R code for fitting a finite mixture model to fatigue data

The following R code fits a Finite Mixture Fatigue Limit Model to fatigue data. The fatigue data may contain right, left, and interval censored observations. These censored observations are also referred to as runouts.

The Finite Mixture Fatigue Limit Model is fitted by using the Expectation-Maximization (EM) algorithm. The fitted model assumes that the fatigue observations follow either a Weibull, lognormal, or Gaussian distribution.

fatigue limit model mixture

See this blog post for computing the quantiles of a mixture fitted by the Finite Mixture Fatigue Limit Model.

Fatigue limit models are discussed by Pascual and Meeker in their 1997 paper Analysis of fatigue data with runouts based on a model with nonconstant standard deviation and a fatigue limit parameter, and in their 1998 paper Estimating fatigue curves with the random fatigue-limit model.

Suggestions and/or questions? Please contact Stefan Gelissen (email: info at datall-analyse.nl).
See this page for an overview of all of Stefan’s R code blog posts.

R code

library(survival)
library(SPREDA)

#function for fitting the fatigue limit model
flm <- function (cycles, cycles2=rep(NA, length(cycles)), event,
                 stress, wt=rep(1, length(cycles)),
                 startGamma=.5*min(stress), dist) {
  #-cycles is the exact number of cycles to failure, the right/left censored value,
  # or for interval censoring the lower value of the censoring interval
  # note that censored observations are also referred to as runouts
  #-cycles2 is the upper value of the censoring interval
  #-event is the censoring indicator (0=right censored, 1=failure observed,
  # 2=left censored, 3=interval censored)
  #-stress is the stress level
  #-wt are the weights for the observations
  #-startGamma is a starting value for the fatigue limit (=gamma)
  # a plausible starting value may be obtained from the log-log S-N curve
  #-dist: either the "weibull", "lognormal", or "gaussian" distribution
  
  #optimization (obtain estimate for gamma)
  gammaMLE <- optim(par=startGamma, profile.gamma, method="Brent",
                    lower=0, upper=(1-(1/1e+6))*min(stress),
                    control=list(fnscale=-1),
                    cycles=cycles, cycles2=cycles2, event=event,
                    stress=stress, wt=wt, dist=dist)
  gamma <- gammaMLE$par
  
  #obtain MLEs for remaining parameters in the fatigue limit model
  mod <- survreg(Surv(time=cycles, time2=cycles2,
                      event=event, type="interval") ~ log(stress-gamma),
                 weights=wt, dist=dist)
  parms <- c(coef(mod)[1], coef(mod)[2], mod$scale, gamma, mod$loglik[2])
  names(parms) <- c("beta0mu", "beta1mu", "sigma", "gamma", "loglik")
  parms}

#function for computing the limit (=gamma) of the fatigue limit model
profile.gamma <- function(gammahat, cycles, cycles2, event,
                          stress, wt, dist) {
  stressG <- stress-gammahat
  fit <- survreg(Surv(time=cycles, time2=cycles2,
                      event=event, type="interval") ~ log(stressG),
                 dist=dist, weights=wt)
  fit$loglik[2]
}

#function for computing the log-likelihood of fatigue limit model
llikfm <- function (parms, cycles, cycles2=rep(NA, length(cycles)),
                    event, stress, wt=rep(1, length(cycles)),
                    postWt=rep(1, length(cycles)), dist) {
  #-parms are the MLEs for the parameters of the fatigue limit model
  #-cycles is the exact number of cycles to failure, the right/left censored value,
  # or for interval censoring the lower value of the censoring interval
  # note that censored observations are also referred to as runouts
  #-cycles2 is the upper value of the censoring interval
  #-event is the censoring indicator (0=right censored, 1=failure observed,
  # 2=left censored, 3=interval censored)
  #-stress is the stress level
  #-wt are the weights for the observations
  #-postWt are the posterior probabilities as returned by a finite mixture
  # fatigue limit model
  #-dist: either the "weibull", "lognormal", or "gaussian" distribution
  
  mu <- parms[1] + parms[2]*log(stress-parms[4])
  sigma <- parms[3]
  
  if (dist=="lognormal") {
    z1 <- (log(cycles)-mu)/sigma
    z2 <- (log(cycles2)-mu)/sigma
    
    l <- ifelse(event==0, 1-pnorm(z1), #right censoring
                ifelse(event==1, 1/(sigma*cycles) * dnorm(z1), #exact observations
                       ifelse(event==2, pnorm(z1), #left censoring
                              pnorm(z2)-pnorm(z1) ))) #interval censoring
    (wt*postWt)%*%log(l)} #compute log-likelihood
  
  else if (dist=="weibull") {
    z1 <- (log(cycles)-mu)/sigma
    z2 <- (log(cycles2)-mu)/sigma
    
    l <- ifelse(event==0, 1-psev(z1), #right censoring
                ifelse(event==1, 1/(sigma*cycles) * dsev(z1), #exact observations
                       ifelse(event==2, psev(z1), #left censoring
                              psev(z2)-psev(z1) ))) #interval censoring
    (wt*postWt)%*%log(l)} #compute log-likelihood
  
  else {
    z1 <- (cycles-mu)/sigma
    z2 <- (cycles2-mu)/sigma
    
    l <- ifelse(event==0, 1-pnorm(z1), #right censoring
                ifelse(event==1, 1/(sigma) * dnorm(z1), #exact observations
                       ifelse(event==2, pnorm(z1), #left censoring
                              pnorm(z2)-pnorm(z1) ))) #interval censoring
    (wt*postWt)%*%log(l)} #compute log-likelihood
}





#EXAMPLE 1
#fit fatigue limit model to data

#generate data
N <- 150 #number of observations

stress <- 1000*exp(1.5 + rnorm(N)*.15) #in kilo units
cycles <- exp( (13.6 - .82*(log(stress-2900))) + rnorm(N)*.15)

#any NA's generated?
sum(is.na(cycles))

#apply censoring (here: Type I censoring)
#failure=1, censored=0
cens <- ifelse(cycles>10000, 0, 1)
N-sum(cens, na.rm=TRUE) #number of censored observations
cycles <- ifelse(cens==0, 10000, cycles)

#log-log S-N curve
plot(x=cycles, y=stress, log="xy", pch=cens, xlab="Cycles", ylab="Stress")
legend("topright", pch=c(1,0), legend=c("failure","runout"))

#combine data and remove observations with NA for cycles
cs <- cbind(cycles, stress, cens)
cF <- data.frame(cs[!is.na(cycles),])

#fit fatigue limit model to data
(cpL <- flm(cycles=cF$cycles, event=cF$cens, stress=cF$stress, dist="lognormal"))
(cpW <- flm(cycles=cF$cycles, event=cF$cens, stress=cF$stress, dist="weibull"))
(cpG <- flm(cycles=cF$cycles, event=cF$cens, stress=cF$stress, dist="gaussian"))

#log-log S-N curve (with fit included)
plot(x=cycles, y=stress, log="xy", pch=cens, xlab="Cycles", ylab="Stress")
legend("topright", pch=c(1,0), legend=c("failure","runout"))
Stress <- seq(2000, 7000, by=1)
cl <- exp(cpL[1] + cpL[2]*log(Stress-cpL[4])) #lognormal
lines(x=cl, y=Stress, type="l", col="blue", lty=2)
cl <- exp(cpW[1] + cpW[2]*log(Stress-cpW[4])) #weibull
lines(x=cl, y=Stress, type="l", col="darkgreen", lty=2)
cl <- cpG[1] + cpG[2]*log(Stress-cpG[4]) #Gaussian
lines(x=cl, y=Stress, type="l", col="red", lty=2)

#compute normal-approximation confidence intervals for the MLE-parameters
#of the fatigue limit model
lognormal.mle <- optim(cpL[-length(cpL)], llikfm,
                       method='BFGS', control=list(fnscale=-1),
                       cycles=cF$cycles, event=cF$cens, stress=cF$stress,
                       dist="lognormal", hessian=TRUE)
#MLEs for parameters of the fatigue limit model (assuming lognormal distribution)
lognormal.mle$par
#compare with the original MLEs
cpL
#standard errors of the MLEs
SEparms <- sqrt(diag(solve(-1*lognormal.mle$hessian)))

#normal-approximation 95% confidence intervals
lognormal.mle$par[1] + c(1,-1)*qnorm(.05/2)*SEparms[1] #beta0mu
lognormal.mle$par[2] + c(-1,1)*qnorm(.05/2)*SEparms[2] #beta1mu
lognormal.mle$par[4] + c(1,-1)*qnorm(.05/2)*SEparms[4] #gamma
#95% confidence interval for sigma
lognormal.mle$par[3]*exp((c(1,-1)*qnorm(.05/2)*SEparms[3])/lognormal.mle$par[3])





#EXAMPLE 2
#fit a finite mixture fatigue limit model to data

#function for fitting the finite mixture fatigue limit model
library(LearnBayes)
mixflm <- function (cycles, cycles2=rep(NA, length(cycles)),
                    event=rep(1, length(cycles)), stress,
                    wt=rep(1, length(cycles)), startGamma=.5*min(stress),
                    dist="lognormal", n, cluster=NULL, maxiter=100, tol=1e-6) {
  #-cycles is the exact number of cycles to failure, the right/left censored value,
  # or for interval censoring the lower value of the censoring interval
  # note that censored observations are also referred to as runouts
  #-cycles2 is the upper value of the censoring interval
  #-event is the censoring indicator (0=right censored, 1=failure observed,
  # 2=left censored, 3=interval censored)
  #-stress is the stress level
  #-wt are the weights for the observations
  #-startGamma is a starting value for the fatigue limit (=gamma)
  # a plausible starting value may be obtained from the log-log S-N curve
  #-dist: either the "weibull", "lognormal", or "gaussian" distribution
  #-n is the number of components to be estimated
  #-cluster: start with random initialization of posterior probabilities (=NULL),
  # or a matrix with n columns of initial posterior probabilities for the
  # observations
  #-maxiter is the maximum number of iterations
  #-tol is the convergence criterion
  
  nobs <- sum(wt) #number of observations
  
  parms <- matrix(NA, ncol=5, nrow=n)
  colnames(parms) <- c("beta0mu", "beta1mu", "sigma", "gamma", "logLik")
  mus <- matrix(NA, ncol=n, nrow=length(cycles))
  posteriorP <- matrix(NA, ncol=n, nrow=length(cycles))
  P <- matrix(NA, ncol=n, nrow=length(cycles))
  
  iteration <- 0 #initialize iteration counter
  loglikInit <- logLikC <- 0 #initialize log-likelihood estimates
  
  #initialize posterior probabilities
  if (is.null(cluster)) {
    alpha <- rep(.1, n)
    posteriorP <- rdirichlet(length(cycles), alpha)}
  else {posteriorP <- cluster}
  
  while (iteration < maxiter) {
    #estimate prior probabilities
    priorP <- apply(wt*posteriorP, 2, sum)/nobs
    
    #estimate parameters for each component
    for (i in 1:n) {
      wtp <- ifelse(wt*posteriorP[,i]<=0, 1e-15, wt*posteriorP[,i])
      cp <- flm(cycles=cycles, cycles2=cycles2, event=event,
                stress=stress, wt=wtp, startGamma=startGamma, dist=dist)
      parms[i,] <- cp
      mus[,i] <- cp[1] + cp[2]*log(stress-cp[4])}
    
    #compute the (complete) log-likelihood value
    logLikC <- sum(parms[,5]) + sum(wt*(posteriorP%*%log(priorP)))
    logLikC <- ifelse(is.na(logLikC), 0, logLikC)
    
    #estimate posterior probabilities
    if (dist=="weibull") {
      for (j in 1:n) {
        mu <- mus[,j]; sigma <- parms[j, 3]
        
        z1 <- (log(cycles)-mu)/sigma
        z2 <- (log(cycles2)-mu)/sigma
        
        cp <- ifelse(event==0, 1-psev(z1), #right censoring
                     ifelse(event==1, 1/(sigma*cycles) * dsev(z1), #exact observations
                            ifelse(event==2, psev(z1), #left censoring
                                   psev(z2)-psev(z1) ))) #interval censoring
        P[,j] <- priorP[j]*cp}
    }
    
    else if (dist=="lognormal") {
      for (j in 1:n) {
        mu <- mus[,j]; sigma <- parms[j, 3]
        
        z1 <- (log(cycles)-mu)/sigma
        z2 <- (log(cycles2)-mu)/sigma
        
        cp <- ifelse(event==0, 1-pnorm(z1), #right censoring
                     ifelse(event==1, 1/(sigma*cycles) * dnorm(z1), #exact observations
                            ifelse(event==2, pnorm(z1), #left censoring
                                   pnorm(z2)-pnorm(z1) )))
        P[,j] <- priorP[j]*cp}
    }
    
    else if (dist=="gaussian") {
      for (j in 1:n) {
        mu <- mus[,j]; sigma <- parms[j, 3]
        
        z1 <- (cycles-mu)/sigma
        z2 <- (cycles2-mu)/sigma
        
        cp <- ifelse(event==0, 1-pnorm(z1), #right censoring
                     ifelse(event==1, 1/(sigma) * dnorm(z1), #exact observations
                            ifelse(event==2, pnorm(z1), #left censoring
                                   pnorm(z2)-pnorm(z1) )))
        P[,j] <- priorP[j]*cp}
    }
    
    posteriorP <- P/rowSums(P)
    
    #check convergence criterion
    if (( abs(logLikC-loglikInit) / (abs(loglikInit)+.001*tol) ) < tol) break
    loglikInit <- logLikC #update log-likelihood
    iteration <- iteration + 1 #increment counter
  }
  
  if (iteration == maxiter) warning("Maximum number of iterations exceeded")
  list(components=parms[,1:4], prior=priorP, loglik=logLikC,
       AIC=-2*logLikC + 2*(n-1+n*(ncol(parms)-1)),
       BIC=-2*logLikC + (n-1+n*(ncol(parms)-1))*log(nobs),
       distribution=dist, iterations=iteration, posterior=posteriorP)
}



#generate data for mixture
N <- 300 #number of observations
probs <- c(.7, .3) #mixing proportions
n <- length(probs) #number of components
component <- sample(1:n, prob=probs, size=N, replace=TRUE)

stress <- 1000*exp(1.5 + rnorm(N)*.15) #in kilo units

cycles <- rep(NA, N)

for(i in 1:N){
  if(component[i]==1){
    cycles[i] <- exp( (14.5 - 1*(log(stress[i]-2700))) + rnorm(1)*.20)
  }else if(component[i]==2){
    cycles[i] <- exp( (13.6 - .82*(log(stress[i]-2900))) + rnorm(1)*.15)}
}

#any NA's?
sum(is.na(cycles))

#apply censoring (here: Type I censoring)
#failure=1, censored=0
cens <- ifelse(cycles>10000, 0, 1)
N-sum(cens, na.rm=TRUE) #number of censored observations
cycles <- ifelse(cens==0, 10000, cycles)

#log-log S-N curve
plot(x=cycles, y=stress, log="xy", pch=cens, col=component,
     xlab="Cycles", ylab="Stress")
legend("topright", pch=c(1,0), legend=c("failure","runout"))

#combine data
cs <- cbind(cycles, stress, cens, component)
c1 <- data.frame(cs[which(component==1),])
c2 <- data.frame(cs[which(component==2),])

#remove observations with NA for cycles
cF <- data.frame(cs[!is.na(cycles),])
c1 <- c1[!is.na(c1$cycles),]
c2 <- c2[!is.na(c2$cycles),]

#fit separate fatigue limit models to the data of each component
(c1p <- flm(cycles=c1$cycles, event=c1$cens, stress=c1$stress, dist="lognormal"))
(c2p <- flm(cycles=c2$cycles, event=c2$cens, stress=c2$stress, dist="lognormal"))

#log-log S-N curve (with fit included)
plot(x=cycles, y=stress, log="xy", pch=cens, col=component,
     xlab="Cycles", ylab="Stress")
Stress <- seq(2000, 7000, by=1)
c1l <- exp(c1p[1] + c1p[2]*log(Stress-c1p[4]))
lines(x=c1l, y=Stress, type="l", col="blue", lty=2)
c2l <- exp(c2p[1] + c2p[2]*log(Stress-c2p[4]))
lines(x=c2l, y=Stress, type="l", col="blue", lty=2)
legend("topright", pch=c(1,0), legend=c("failure","runout"))



#run repeatedly short runs for obtaining starting values, and
#start each short run with a different random initialization of
#the posterior probabilities
#note 1: this may be computationally intensive
#note 2: sometimes it is useful to increase maxiter to, for instance, 20
mixmult <- replicate(10, mixflm(cycles=cF$cycles, event=cF$cens,
                                stress=cF$stress, n=2, maxiter=10))

#discard models that did not converge
mixmult[3,] <- ifelse(mixmult[3,]==0, NA, mixmult[3,])
#number of discarded models
length(which(is.na(mixmult[3,])))
#select the solution with the maximum value of the log-likelihood
mixmult <- mixmult[, which.max(mixmult[3,])]
mixmult[1:6]

#fit finite mixture fatigue limit model (using the starting values)
mcF <- mixflm(cycles=cF$cycles, event=cF$cens, stress=cF$stress, n=2,
              cluster=mixmult$posterior, maxiter=300)
mcF[1:7]

#assign observations to components (based on estimated posterior probabilities)
clusterPost <- apply(mcF$posterior, 1, which.max)

#log-log S-N curve (with fit of mixture included)
plot(x=cF$cycles, y=cF$stress, type="n", log="xy", xlab="Cycles", ylab="Stress")
points(x=cF$cycles, y=cF$stress, xlog=TRUE, ylog=TRUE, pch=cF$cens, col=clusterPost)
Stress <- seq(2500, 7000, by=1)
c1l <- exp(mcF$components[1,1] + mcF$components[1,2]*log(Stress-mcF$components[1,4]))
lines(x=c1l, y=Stress, type="l", col="blue", lty=2)
c2l <- exp(mcF$components[2,1] + mcF$components[2,2]*log(Stress-mcF$components[2,4]))
lines(x=c2l, y=Stress, type="l", col="blue", lty=2)
legend("topright", pch=c(1,0), legend=c("failure","runout"))

#separation of components
par(mfrow=c(1, 2))
hist(mcF$posterior[,1][which(clusterPost==1)], xlab="Component 1", main="")
hist(mcF$posterior[,2][which(clusterPost==2)], xlab="Component 2", main="")
par(mfrow=c(1, 1))

#in case the components are well separated then it is possible to obtain valid
#normal-approximation confidence intervals for the components' parameters
#these intervals are obtained by using the posterior probabilities in the
#following way:
lognormal.mle <- optim(mcF$components[1,], #MLEs for component 1
                       llikfm, method='BFGS', control=list(fnscale=-1),
                       cycles=cF$cycles, event=cF$cens, stress=cF$stress,
                       #posterior probabilities for component 1:
                       postWt=mcF$posterior[,1],
                       dist="lognormal", hessian=TRUE)
#MLEs for component 1
lognormal.mle$par
#original MLEs
mcF$components[1,]
#standard errors
SEparms <- sqrt(diag(solve(-1*lognormal.mle$hessian)))

#normal-approximation 95% confidence intervals
lognormal.mle$par[1] + c(1,-1)*qnorm(.05/2)*SEparms[1] #beta0mu
lognormal.mle$par[2] + c(-1,1)*qnorm(.05/2)*SEparms[2] #beta1mu
lognormal.mle$par[4] + c(1,-1)*qnorm(.05/2)*SEparms[4] #gamma
#95% confidence interval for sigma
lognormal.mle$par[3]*exp((c(1,-1)*qnorm(.05/2)*SEparms[3])/lognormal.mle$par[3])





#EXAMPLE 3
#fit finite mixture fatigue limit model

#generate mixture
N <- 300 #number of observations
probs <- c(.5, .5) #mixing proportions
n <- length(probs) #number of components
component <- sample(1:n, prob=probs, size=N, replace=TRUE)

stress <- 1000*exp(-1 + rnorm(N)*.5) #in kilo units

cycles <- rep(NA, N)

for(i in 1:N){
  if(component[i]==1){
    cycles[i] <- exp( (9.7 - .7*(log(stress[i]-50))) + rnorm(1)*.2)
  }else if(component[i]==2){
    cycles[i] <- exp( (8.1 - .5*(log(stress[i]-35))) + rnorm(1)*.1)}
}

#any NA's?
sum(is.na(cycles))

#apply censoring (here: Type I censoring)
#failure=1, censored=0
cens <- ifelse(cycles>1000, 0, 1)
N-sum(cens) #number of censored observations
cycles <- ifelse(cens==0, 1000, cycles)

#log-log S-N curve
plot(x=cycles, y=stress, log="xy", pch=cens, col=component,
     xlab="Cycles", ylab="Stress")
legend("topright", pch=c(1,0), legend=c("failure","runout"))

#combine data
cs <- cbind(cycles, stress, cens, component)
c1 <- data.frame(cs[which(component==1),])
c2 <- data.frame(cs[which(component==2),])

#remove observations with NA for cycles
cF <- data.frame(cs[!is.na(cycles),])
c1 <- c1[!is.na(c1$cycles),]
c2 <- c2[!is.na(c2$cycles),]

#fit separate fatigue limit models to the data of each component
(c1p <- flm(cycles=c1$cycles, event=c1$cens, stress=c1$stress, dist="lognormal"))
(c2p <- flm(cycles=c2$cycles, event=c2$cens, stress=c2$stress, dist="lognormal"))

#log-log S-N curve (with fit included)
plot(x=cycles, y=stress, log="xy", pch=cens, col=component,
     xlab="Cycles", ylab="Stress")
Stress <- seq(50, 2000, by=1)
c1l <- exp(c1p[1] + c1p[2]*log(Stress-c1p[4]))
lines(x=c1l, y=Stress, type="l", col="blue", lty=2)
c2l <- exp(c2p[1] + c2p[2]*log(Stress-c2p[4]))
lines(x=c2l, y=Stress, type="l", col="blue", lty=2)
legend("topright", pch=c(1,0), legend=c("failure","runout"))



#run repeatedly short runs for obtaining starting values, and
#start each short run with a different random initialization of
#the posterior probabilities
#note 1: this may be computationally intensive
#note 2: sometimes it is useful to increase maxiter to, for instance, 20
mixmult <- replicate(10, mixflm(cycles=cF$cycles, event=cF$cens,
                                stress=cF$stress, n=2, maxiter=10))

#discard models that did not converge
mixmult[3,] <- ifelse(mixmult[3,]==0, NA, mixmult[3,])
#number of discarded models
length(which(is.na(mixmult[3,])))
#select the solution with the maximum value of the log-likelihood
mixmult <- mixmult[, which.max(mixmult[3,])]
mixmult[1:6]

#fit finite mixture fatigue limit model (using the starting values)
mcF <- mixflm(cycles=cF$cycles, event=cF$cens, stress=cF$stress, n=2,
              cluster=mixmult$posterior)
mcF[1:7]

#assign observations to components (based on estimated posterior probabilities)
clusterPost <- apply(mcF$posterior, 1, which.max)

#log-log S-N curve (with fit of mixture included)
plot(x=cF$cycles, y=cF$stress, type="n", log="xy", xlab="Cycles", ylab="Stress")
points(x=cF$cycles, y=cF$stress, xlog=TRUE, ylog=TRUE, pch=cF$cens, col=clusterPost)
Stress <- seq(50, 2000, by=1)
c1l <- exp(mcF$components[1,1] + mcF$components[1,2]*log(Stress-mcF$components[1,4]))
lines(x=c1l, y=Stress, type="l", col="blue", lty=2)
c2l <- exp(mcF$components[2,1] + mcF$components[2,2]*log(Stress-mcF$components[2,4]))
lines(x=c2l, y=Stress, type="l", col="blue", lty=2)
legend("topright", pch=c(1,0), legend=c("failure","runout"))

#separation of components
par(mfrow=c(1, 2))
hist(mcF$posterior[,1][which(clusterPost==1)], xlab="Component 1", main="")
hist(mcF$posterior[,2][which(clusterPost==2)], xlab="Component 2", main="")
par(mfrow=c(1, 1))

#normal-approximation confidence intervals for the parameters of component 2
lognormal.mle <- optim(mcF$components[2,], llikfm,
                       method='BFGS', control=list(fnscale=-1),
                       cycles=cF$cycles, event=cF$cens, stress=cF$stress,
                       postWt=mcF$posterior[,2],
                       dist="lognormal", hessian=TRUE)
#MLEs
lognormal.mle$par
#original MLEs
mcF$components[2,]
#standard errors
SEparms <- sqrt(diag(solve(-1*lognormal.mle$hessian)))

#normal-approximation 95% confidence intervals
lognormal.mle$par[1] + c(1,-1)*qnorm(.05/2)*SEparms[1] #beta0mu
lognormal.mle$par[2] + c(-1,1)*qnorm(.05/2)*SEparms[2] #beta1mu
lognormal.mle$par[4] + c(1,-1)*qnorm(.05/2)*SEparms[4] #gamma
#95% confidence interval for sigma
lognormal.mle$par[3]*exp((c(1,-1)*qnorm(.05/2)*SEparms[3])/lognormal.mle$par[3])





#EXAMPLE 4

#Superalloy data from Pascual and Meeker (1997)
alloy <- read.table("http://www.public.iastate.edu/~wqmeeker/anonymous/Stat533_data/Splida_text_data/superalloy.txt", header=T)
#failure=1, runout=0
alloy$status <- abs(as.numeric(alloy$Status)-2)
alloy$Cycles <- 1000*alloy$kCycles

#log-log S-N curve
plot(x=alloy$kCycles, y=alloy$PseudoSress, log="xy", pch=alloy$status,
     xlab="Thousands of Cycles", ylab="Stress")
legend("topright", pch=c(1,0), legend=c("failure","runout"))

#run repeatedly short runs for obtaining starting values, and
#start each short run with a different random initialization of
#the posterior probabilities
#note 1: this may be computationally intensive
#note 2: sometimes it is useful to increase maxiter to, for instance, 20
mixmult <- replicate(10, mixflm(cycles=alloy$kCycles, event=alloy$status,
                                stress=alloy$PseudoSress, n=2, maxiter=10))

#discard models that did not converge
mixmult[3,] <- ifelse(mixmult[3,]==0, NA, mixmult[3,])
#number of discarded models
length(which(is.na(mixmult[3,])))
#select the solution with the maximum value of the log-likelihood
mixmult <- mixmult[, which.max(mixmult[3,])]
mixmult[1:6]

#fit finite mixture fatigue limit model (using the starting values)
mcF <- mixflm(cycles=alloy$kCycles, event=alloy$status,
              stress=alloy$PseudoSress, n=2, cluster=mixmult$posterior)
mcF[1:7]

#assign observations to components (based on estimated posterior probabilities)
clusterPost <- apply(mcF$posterior, 1, which.max)

#log-log S-N curve (with fit of mixture included)
plot(x=alloy$kCycles, y=alloy$PseudoSress, type="n", log="xy",
     xlab="Thousands of Cycles", ylab="Stress")
points(x=alloy$kCycles, y=alloy$PseudoSress, xlog=TRUE, ylog=TRUE,
       col=clusterPost, pch=alloy$status)
Stress <- seq(50, 200, by=1)
c1l <- exp(mcF$components[1,1] + mcF$components[1,2]*log(Stress-mcF$components[1,4]))
lines(x=c1l, y=Stress, type="l", col="blue", lty=2)
c2l <- exp(mcF$components[2,1] + mcF$components[2,2]*log(Stress-mcF$components[2,4]))
lines(x=c2l, y=Stress, type="l", col="blue", lty=2)
legend("topright", pch=c(1,0), legend=c("failure","runout"))

#separation of components
par(mfrow=c(1, 2))
hist(mcF$posterior[,1][which(clusterPost==1)], xlab="Component 1", main="")
hist(mcF$posterior[,2][which(clusterPost==2)], xlab="Component 2", main="")
par(mfrow=c(1, 1))



#try fitting 3 components

#run repeatedly short runs for obtaining starting values, and
#start each short run with a different random initialization of
#the posterior probabilities
#note 1: this may be computationally intensive
#note 2: sometimes it is useful to increase maxiter to, for instance, 20
mixmult <- replicate(20, mixflm(cycles=alloy$kCycles, event=alloy$status,
                                stress=alloy$PseudoSress, n=3, maxiter=20))

#discard models that did not converge
mixmult[3,] <- ifelse(mixmult[3,]==0, NA, mixmult[3,])
#number of discarded models
length(which(is.na(mixmult[3,])))
#select the solution with the maximum value of the log-likelihood
mixmult <- mixmult[, which.max(mixmult[3,])]
mixmult[1:6]

#fit finite mixture fatigue limit model (using the starting values)
mcF <- mixflm(cycles=alloy$kCycles, event=alloy$status,
              stress=alloy$PseudoSress, n=3, cluster=mixmult$posterior)
mcF[1:7]

#assign observations to components (based on estimated posterior probabilities)
clusterPost <- apply(mcF$posterior, 1, which.max)

#log-log S-N curve (with fit of mixture included)
plot(x=alloy$kCycles, y=alloy$PseudoSress, type="n", log="xy",
     xlab="Thousands of Cycles", ylab="Stress")
points(x=alloy$kCycles, y=alloy$PseudoSress, xlog=TRUE, ylog=TRUE,
       col=clusterPost, pch=alloy$status)
Stress <- seq(50, 200, by=1)
c1l <- exp(mcF$components[1,1] + mcF$components[1,2]*log(Stress-mcF$components[1,4]))
lines(x=c1l, y=Stress, type="l", col="blue", lty=2)
c2l <- exp(mcF$components[2,1] + mcF$components[2,2]*log(Stress-mcF$components[2,4]))
lines(x=c2l, y=Stress, type="l", col="blue", lty=2)
c3l <- exp(mcF$components[3,1] + mcF$components[3,2]*log(Stress-mcF$components[3,4]))
lines(x=c3l, y=Stress, type="l", col="blue", lty=2)
legend("topright", pch=c(1,0), legend=c("failure","runout"))

#separation of components
par(mfrow=c(1, 3))
hist(mcF$posterior[,1][which(clusterPost==1)], xlab="Component 1", main="")
hist(mcF$posterior[,2][which(clusterPost==2)], xlab="Component 2", main="")
hist(mcF$posterior[,3][which(clusterPost==3)], xlab="Component 3", main="")
par(mfrow=c(1, 1))