R code for computing quantiles of a Finite Mixture Fatigue Limit Model

In one of my previous posts I demonstrated how to fit a Finite Mixture Fatigue Limit Model to fatigue data.
The following R code computes the quantiles of a mixture fitted by the Finite Mixture Fatigue Limit Model.

fatigue limit model mixture quantiles

Suggestions and/or questions? Please contact Stefan Gelissen (email: info at datall-analyse.nl).
See this page for an overview of all of Stefan’s R code blog posts.

R code

library(survival)
library(SPREDA)

##---define functions
#all of the following functions are needed for fitting the
#Finite Mixture Fatigue Limit Model and subsequently computing
#the quantiles of the fitted mixture

#function for fitting the fatigue limit model
flm <- function (cycles, cycles2=rep(NA, length(cycles)), event,
                 stress, wt=rep(1, length(cycles)),
                 startGamma=.5*min(stress), dist) {
  #-cycles is the exact number of cycles to failure, the right/left censored value,
  # or for interval censoring the lower value of the censoring interval
  # note that censored observations are also referred to as runouts
  #-cycles2 is the upper value of the censoring interval
  #-event is the censoring indicator (0=right censored, 1=failure observed,
  # 2=left censored, 3=interval censored)
  #-stress is the stress level
  #-wt are the weights for the observations
  #-startGamma is a starting value for the fatigue limit (=gamma)
  # a plausible starting value may be obtained from the log-log S-N curve
  #-dist: either the "weibull", "lognormal", or "gaussian" distribution
  
  #optimization (obtain estimate for gamma)
  gammaMLE <- optim(par=startGamma, profile.gamma, method="Brent",
                    lower=0, upper=(1-(1/1e+6))*min(stress),
                    control=list(fnscale=-1),
                    cycles=cycles, cycles2=cycles2, event=event,
                    stress=stress, wt=wt, dist=dist)
  gamma <- gammaMLE$par
  
  #obtain MLEs for remaining parameters in the fatigue limit model
  mod <- survreg(Surv(time=cycles, time2=cycles2,
                      event=event, type="interval") ~ log(stress-gamma),
                 weights=wt, dist=dist)
  parms <- c(coef(mod)[1], coef(mod)[2], mod$scale, gamma, mod$loglik[2])
  names(parms) <- c("beta0mu", "beta1mu", "sigma", "gamma", "loglik")
  parms}

#function for computing the limit (=gamma) of the fatigue limit model
profile.gamma <- function(gammahat, cycles, cycles2, event,
                          stress, wt, dist) {
  stressG <- stress-gammahat
  fit <- survreg(Surv(time=cycles, time2=cycles2,
                      event=event, type="interval") ~ log(stressG),
                 dist=dist, weights=wt)
  fit$loglik[2]
}

#function for computing the log-likelihood of fatigue limit model
llikfm <- function (parms, cycles, cycles2=rep(NA, length(cycles)),
                    event, stress, wt=rep(1, length(cycles)),
                    postWt=rep(1, length(cycles)), dist) {
  #-parms are the MLEs for the parameters of the fatigue limit model
  #-cycles is the exact number of cycles to failure, the right/left censored value,
  # or for interval censoring the lower value of the censoring interval
  # note that censored observations are also referred to as runouts
  #-cycles2 is the upper value of the censoring interval
  #-event is the censoring indicator (0=right censored, 1=failure observed,
  # 2=left censored, 3=interval censored)
  #-stress is the stress level
  #-wt are the weights for the observations
  #-postWt are the posterior probabilities as returned by a finite mixture
  # fatigue limit model
  #-dist: either the "weibull", "lognormal", or "gaussian" distribution
  
  mu <- parms[1] + parms[2]*log(stress-parms[4])
  sigma <- parms[3]
  
  if (dist=="lognormal") {
    z1 <- (log(cycles)-mu)/sigma
    z2 <- (log(cycles2)-mu)/sigma
    
    l <- ifelse(event==0, 1-pnorm(z1), #right censoring
                ifelse(event==1, 1/(sigma*cycles) * dnorm(z1), #exact observations
                       ifelse(event==2, pnorm(z1), #left censoring
                              pnorm(z2)-pnorm(z1) ))) #interval censoring
    (wt*postWt)%*%log(l)} #compute log-likelihood
  
  else if (dist=="weibull") {
    z1 <- (log(cycles)-mu)/sigma
    z2 <- (log(cycles2)-mu)/sigma
    
    l <- ifelse(event==0, 1-psev(z1), #right censoring
                ifelse(event==1, 1/(sigma*cycles) * dsev(z1), #exact observations
                       ifelse(event==2, psev(z1), #left censoring
                              psev(z2)-psev(z1) ))) #interval censoring
    (wt*postWt)%*%log(l)} #compute log-likelihood
  
  else {
    z1 <- (cycles-mu)/sigma
    z2 <- (cycles2-mu)/sigma
    
    l <- ifelse(event==0, 1-pnorm(z1), #right censoring
                ifelse(event==1, 1/(sigma) * dnorm(z1), #exact observations
                       ifelse(event==2, pnorm(z1), #left censoring
                              pnorm(z2)-pnorm(z1) ))) #interval censoring
    (wt*postWt)%*%log(l)} #compute log-likelihood
}

#function for fitting the Finite Mixture Fatigue Limit Model
library(LearnBayes)
mixflm <- function (cycles, cycles2=rep(NA, length(cycles)),
                    event=rep(1, length(cycles)), stress,
                    wt=rep(1, length(cycles)), startGamma=.5*min(stress),
                    dist="lognormal", n, cluster=NULL, maxiter=100, tol=1e-6) {
  #-cycles is the exact number of cycles to failure, the right/left censored value,
  # or for interval censoring the lower value of the censoring interval
  # note that censored observations are also referred to as runouts
  #-cycles2 is the upper value of the censoring interval
  #-event is the censoring indicator (0=right censored, 1=failure observed,
  # 2=left censored, 3=interval censored)
  #-stress is the stress level
  #-wt are the weights for the observations
  #-startGamma is a starting value for the fatigue limit (=gamma)
  # a plausible starting value may be obtained from the log-log S-N curve
  #-dist: either the "weibull", "lognormal", or "gaussian" distribution
  #-n is the number of components to be estimated
  #-cluster: start with random initialization of posterior probabilities (=NULL),
  # or a matrix with n columns of initial posterior probabilities for the
  # observations
  #-maxiter is the maximum number of iterations
  #-tol is the convergence criterion
  
  nobs <- sum(wt) #number of observations
  
  parms <- matrix(NA, ncol=5, nrow=n)
  colnames(parms) <- c("beta0mu", "beta1mu", "sigma", "gamma", "logLik")
  mus <- matrix(NA, ncol=n, nrow=length(cycles))
  posteriorP <- matrix(NA, ncol=n, nrow=length(cycles))
  P <- matrix(NA, ncol=n, nrow=length(cycles))
  
  iteration <- 0 #initialize iteration counter
  loglikInit <- logLikC <- 0 #initialize log-likelihood estimates
  
  #initialize posterior probabilities
  if (is.null(cluster)) {
    alpha <- rep(.1, n)
    posteriorP <- rdirichlet(length(cycles), alpha)}
  else {posteriorP <- cluster}
  
  while (iteration < maxiter) {
    #estimate prior probabilities
    priorP <- apply(wt*posteriorP, 2, sum)/nobs
    
    #estimate parameters for each component
    for (i in 1:n) {
      wtp <- ifelse(wt*posteriorP[,i]<=0, 1e-15, wt*posteriorP[,i])
      cp <- flm(cycles=cycles, cycles2=cycles2, event=event,
                stress=stress, wt=wtp, startGamma=startGamma, dist=dist)
      parms[i,] <- cp
      mus[,i] <- cp[1] + cp[2]*log(stress-cp[4])}
    
    #compute the (complete) log-likelihood value
    logLikC <- sum(parms[,5]) + sum(wt*(posteriorP%*%log(priorP)))
    logLikC <- ifelse(is.na(logLikC), 0, logLikC)
    
    #estimate posterior probabilities
    if (dist=="weibull") {
      for (j in 1:n) {
        mu <- mus[,j]; sigma <- parms[j, 3]
        
        z1 <- (log(cycles)-mu)/sigma
        z2 <- (log(cycles2)-mu)/sigma
        
        cp <- ifelse(event==0, 1-psev(z1), #right censoring
                     ifelse(event==1, 1/(sigma*cycles) * dsev(z1), #exact observations
                            ifelse(event==2, psev(z1), #left censoring
                                   psev(z2)-psev(z1) ))) #interval censoring
        P[,j] <- priorP[j]*cp}
    }
    
    else if (dist=="lognormal") {
      for (j in 1:n) {
        mu <- mus[,j]; sigma <- parms[j, 3]
        
        z1 <- (log(cycles)-mu)/sigma
        z2 <- (log(cycles2)-mu)/sigma
        
        cp <- ifelse(event==0, 1-pnorm(z1), #right censoring
                     ifelse(event==1, 1/(sigma*cycles) * dnorm(z1), #exact observations
                            ifelse(event==2, pnorm(z1), #left censoring
                                   pnorm(z2)-pnorm(z1) )))
        P[,j] <- priorP[j]*cp}
    }
    
    else if (dist=="gaussian") {
      for (j in 1:n) {
        mu <- mus[,j]; sigma <- parms[j, 3]
        
        z1 <- (cycles-mu)/sigma
        z2 <- (cycles2-mu)/sigma
        
        cp <- ifelse(event==0, 1-pnorm(z1), #right censoring
                     ifelse(event==1, 1/(sigma) * dnorm(z1), #exact observations
                            ifelse(event==2, pnorm(z1), #left censoring
                                   pnorm(z2)-pnorm(z1) )))
        P[,j] <- priorP[j]*cp}
    }
    
    posteriorP <- P/rowSums(P)
    
    #check convergence criterion
    if (( abs(logLikC-loglikInit) / (abs(loglikInit)+.001*tol) ) < tol) break
    loglikInit <- logLikC #update log-likelihood
    iteration <- iteration + 1 #increment counter
  }
  
  if (iteration == maxiter) warning("Maximum number of iterations exceeded")
  list(components=parms[,1:4], prior=priorP, loglik=logLikC,
       AIC=-2*logLikC + 2*(n-1+n*(ncol(parms)-1)),
       BIC=-2*logLikC + (n-1+n*(ncol(parms)-1))*log(nobs),
       distribution=dist, iterations=iteration, posterior=posteriorP)
}


#function for computing quantiles of a mixture fitted by
#the Finite Mixture Fatigue Limit Model
predictQMix <- function (object, stress, p) {
  #- object is a finite mixture model as returned by mixflm
  #- stress is the stress level
  #- p is the proportion at which to compute the quantile
  parms <- object$components
  prior <- object$prior
  
  if (object$distribution=="lognormal") {
    fun <- function(p, parms, prior, stress, cycles) {
      sum(sapply(1:nrow(parms),
                 function (x) {
                   if (stress-parms[x,4]>=0) {
                     mu <- parms[x,1] + parms[x,2]*log(stress-parms[x,4])
                     prior[x]*pnorm( (log(cycles)-mu) / parms[x,3])}
                   else 0 #0% fails at stress levels below the fatigue limit
                 }))-p}
    if (all(stress-parms[,4]<0)) cycEst <- NA
    else cycEst <- try(uniroot(fun, c(1e-50, 1e+50), p=p, parms=parms,
                               prior=prior, stress=stress)$root, TRUE)
    ifelse(is.na(suppressWarnings(as.numeric(cycEst))), NA, as.numeric(cycEst))}
  
  else if (object$distribution=="weibull") {
    fun <- function(p, parms, prior, stress) {
      sum(sapply(1:nrow(parms),
                 function (x) {
                   if (stress-parms[x,4]>=0) {
                     mu <- parms[x,1] + parms[x,2]*log(stress-parms[x,4])
                     prior[x]*psev( (log(cycles)-mu) / parms[x,3])}
                   else 0 #0% fails at stress levels below the fatigue limit
                 }))-p}
    if (all(stress-parms[,4]<0)) cycEst <- NA
    else cycEst <- try(uniroot(fun, c(1e-50, 1e+50), p=p, parms=parms,
                               prior=prior, stress=stress)$root, TRUE)
    ifelse(is.na(suppressWarnings(as.numeric(cycEst))), NA, as.numeric(cycEst))}
  
  else if (object$distribution=="gaussian") {
    fun <- function(p, parms, prior, stress) {
      sum(sapply(1:nrow(parms),
                 function (x) {
                   if (stress-parms[x,4]>=0) {
                     mu <- parms[x,1] + parms[x,2]*log(stress-parms[x,4])
                     prior[x]*pnorm( (cycles-mu) / parms[x,3])}
                   else 0 #0% fails at stress levels below the fatigue limit
                 }))-p}
    if (all(stress-parms[,4]<0)) cycEst <- NA
    else cycEst <- try(uniroot(fun, c(1e-50, 1e+50), p=p, parms=parms,
                               prior=prior, stress=stress)$root, TRUE)
    ifelse(is.na(suppressWarnings(as.numeric(cycEst))), NA, as.numeric(cycEst))}
}

##---end of function definitions





#EXAMPLE 1

#generate data for mixture
N <- 300 #number of observations
probs <- c(.7, .3) #mixing proportions
n <- length(probs) #number of components
component <- sample(1:n, prob=probs, size=N, replace=TRUE)

stress <- 1000*exp(1.5 + rnorm(N)*.15) #in kilo units

cycles <- rep(NA, N)

for(i in 1:N){
  if(component[i]==1){
    cycles[i] <- exp( (14.5 - 1*(log(stress[i]-2700))) + rnorm(1)*.20)
  }else if(component[i]==2){
    cycles[i] <- exp( (13.6 - .82*(log(stress[i]-2900))) + rnorm(1)*.15)}
}

#any NA's?
sum(is.na(cycles))

#apply censoring (here: Type I censoring)
#failure=1, censored=0
cens <- ifelse(cycles>7000, 0, 1)
N-sum(cens, na.rm=TRUE) #number of censored observations
cycles <- ifelse(cens==0, 7000, cycles)

#log-log S-N curve
plot(x=cycles, y=stress, log="xy", pch=cens, col=component,
     xlab="Cycles", ylab="Stress")
legend("topright", pch=c(1,0), legend=c("failure","runout"))

#combine data
cs <- cbind(cycles, stress, cens, component)
#remove observations with NA for cycles
cF <- data.frame(cs[!is.na(cycles),])

#run repeatedly short runs for obtaining starting values, and
#start each short run with a different random initialization of
#the posterior probabilities
#note 1: this may be computationally intensive
#note 2: sometimes it is useful to increase maxiter to, for instance, 20
mixmult <- replicate(10, mixflm(cycles=cF$cycles, event=cF$cens,
                                stress=cF$stress, n=2, maxiter=10))

#discard models that did not converge
mixmult[3,] <- ifelse(mixmult[3,]==0, NA, mixmult[3,])
#number of discarded models
length(which(is.na(mixmult[3,])))
#select the solution with the maximum value of the log-likelihood
mixmult <- mixmult[, which.max(mixmult[3,])]
mixmult[1:6]

#fit finite mixture fatigue limit model (using the starting values)
mcF <- mixflm(cycles=cF$cycles, event=cF$cens, stress=cF$stress, n=2,
              cluster=mixmult$posterior, maxiter=300)
mcF[1:7]

#assign observations to components (based on estimated posterior probabilities)
clusterPost <- apply(mcF$posterior, 1, which.max)

#log-log S-N curve (with fit of mixture included)
plot(x=cF$cycles, y=cF$stress, type="n", log="xy", xlab="Cycles", ylab="Stress")
points(x=cF$cycles, y=cF$stress, xlog=TRUE, ylog=TRUE, pch=cF$cens, col=clusterPost)
Stress <- seq(2500, 7000, by=1)
c1l <- exp(mcF$components[1,1] + mcF$components[1,2]*log(Stress-mcF$components[1,4]))
lines(x=c1l, y=Stress, type="l", col="blue", lty=2)
c2l <- exp(mcF$components[2,1] + mcF$components[2,2]*log(Stress-mcF$components[2,4]))
lines(x=c2l, y=Stress, type="l", col="blue", lty=2)
legend("topright", pch=c(1,0), legend=c("failure","runout"))



#compute quantiles of the fitted mixture and plot results
stresses <- seq(1500, 7000,length.out=200)
Qp.05 <- sapply(stresses, predictQMix, object=mcF, p=.05) #.05th quantile
Qp.5 <- sapply(stresses, predictQMix, object=mcF, p=.5) #.5th quantile
Qp.95 <- sapply(stresses, predictQMix, object=mcF, p=.95) #.95th quantile
plot(x=cF$cycles, y=cF$stress, log="xy", pch=cF$cens,
     xlab="Cycles", ylab="Stress", col="gray")
lines(Qp.05, stresses, col="blue", lty=2) #.05th quantile
lines(Qp.5, stresses, col="blue", lty=2) #.5th quantile
lines(Qp.95, stresses, col="blue", lty=2) #.95th quantile
legend("topright", pch=c(1,0), legend=c("failure","runout"))





#EXAMPLE 2

#generate data for mixture
N <- 300 #number of observations
probs <- c(.7, .3) #mixing proportions
n <- length(probs) #number of components
component <- sample(1:n, prob=probs, size=N, replace=TRUE)

stress <- 1000*exp(1.5 + rnorm(N)*.15) #in kilo units

cycles <- rep(NA, N)

#notice the substantial difference in fatigue limits between the
#two distributions from which the observations originate
for(i in 1:N){
  if(component[i]==1){
    cycles[i] <- exp( (14.5 - 1*(log(stress[i]-2100))) + rnorm(1)*.20)
  }else if(component[i]==2){
    cycles[i] <- exp( (11.5 - .82*(log(stress[i]-3500))) + rnorm(1)*.25)}
}

#any NA's?
sum(is.na(cycles))

#apply censoring (here: Type I censoring)
#failure=1, censored=0
cens <- ifelse(cycles>5000, 0, 1)
N-sum(cens, na.rm=TRUE) #number of censored observations
cycles <- ifelse(cens==0, 5000, cycles)

#log-log S-N curve
plot(x=cycles, y=stress, log="xy", pch=cens, col=component,
     xlab="Cycles", ylab="Stress")
legend("topright", pch=c(1,0), legend=c("failure","runout"))

#combine data
cs <- cbind(cycles, stress, cens, component)
#remove observations with NA for cycles
cF <- data.frame(cs[!is.na(cycles),])

#run repeatedly short runs for obtaining starting values, and
#start each short run with a different random initialization of
#the posterior probabilities
#note 1: this may be computationally intensive
#note 2: sometimes it is useful to increase maxiter to, for instance, 20
mixmult <- replicate(10, mixflm(cycles=cF$cycles, event=cF$cens,
                                stress=cF$stress, n=2, maxiter=10))

#discard models that did not converge
mixmult[3,] <- ifelse(mixmult[3,]==0, NA, mixmult[3,])
#number of discarded models
length(which(is.na(mixmult[3,])))
#select the solution with the maximum value of the log-likelihood
mixmult <- mixmult[, which.max(mixmult[3,])]
mixmult[1:6]

#fit finite mixture fatigue limit model (using the starting values)
mcF <- mixflm(cycles=cF$cycles, event=cF$cens, stress=cF$stress, n=2,
              cluster=mixmult$posterior, maxiter=300)
mcF[1:7]

#assign observations to components (based on estimated posterior probabilities)
clusterPost <- apply(mcF$posterior, 1, which.max)

#log-log S-N curve (with fit of mixture included)
plot(x=cF$cycles, y=cF$stress, type="n", log="xy", xlab="Cycles", ylab="Stress")
points(x=cF$cycles, y=cF$stress, xlog=TRUE, ylog=TRUE, pch=cF$cens, col=clusterPost)
Stress <- seq(2500, 7000, by=1)
c1l <- exp(mcF$components[1,1] + mcF$components[1,2]*log(Stress-mcF$components[1,4]))
lines(x=c1l, y=Stress, type="l", col="blue", lty=2)
c2l <- exp(mcF$components[2,1] + mcF$components[2,2]*log(Stress-mcF$components[2,4]))
lines(x=c2l, y=Stress, type="l", col="blue", lty=2)
legend("topright", pch=c(1,0), legend=c("failure","runout"))



#compute quantiles of the fitted mixture and plot results
stresses <- seq(1500, 7000,length.out=200)
Qp.05 <- sapply(stresses, predictQMix, object=mcF, p=.05) #.05th quantile
Qp.5 <- sapply(stresses, predictQMix, object=mcF, p=.5) #.5th quantile
Qp.95 <- sapply(stresses, predictQMix, object=mcF, p=.95) #.95th quantile
plot(x=cF$cycles, y=cF$stress, log="xy", pch=cF$cens,
     xlab="Cycles", ylab="Stress", col="gray",
     ylim=c(1000, 7000), xlim=c(100, 100000))
lines(Qp.05, stresses, col="blue", lty=2) #.05th quantile
lines(Qp.5, stresses, col="blue", lty=2) #.5th quantile
lines(Qp.95, stresses, col="blue", lty=2) #.95th quantile
abline(h=mcF$components[,4], col="red", lty=2) #fatigue limits
legend("topright", pch=c(1,0), legend=c("failure","runout"))





#EXAMPLE 3

#Superalloy data from Pascual and Meeker's (1997) paper "Analysis of fatigue data
#with runouts based on a model with nonconstant standard deviation and a
#fatigue limit parameter"
alloy <- read.table("http://www.public.iastate.edu/~wqmeeker/anonymous/Stat533_data/Splida_text_data/superalloy.txt", header=T)
#failure=1, runout=0
alloy$status <- abs(as.numeric(alloy$Status)-2)
alloy$Cycles <- 1000*alloy$kCycles

#log-log S-N curve
plot(x=alloy$kCycles, y=alloy$PseudoSress, log="xy", pch=alloy$status,
     xlab="Thousands of Cycles", ylab="Stress")
legend("topright", pch=c(1,0), legend=c("failure","runout"))

#run repeatedly short runs for obtaining starting values, and
#start each short run with a different random initialization of
#the posterior probabilities
#note 1: this may be computationally intensive
#note 2: sometimes it is useful to increase maxiter to, for instance, 20
mixmult <- replicate(10, mixflm(cycles=alloy$kCycles, event=alloy$status,
                                stress=alloy$PseudoSress, n=2, maxiter=10))

#discard models that did not converge
mixmult[3,] <- ifelse(mixmult[3,]==0, NA, mixmult[3,])
#number of discarded models
length(which(is.na(mixmult[3,])))
#select the solution with the maximum value of the log-likelihood
mixmult <- mixmult[, which.max(mixmult[3,])]
mixmult[1:6]

#fit finite mixture fatigue limit model (using the starting values)
mcF <- mixflm(cycles=alloy$kCycles, event=alloy$status,
              stress=alloy$PseudoSress, n=2, cluster=mixmult$posterior)
mcF[1:7]

#assign observations to components (based on estimated posterior probabilities)
clusterPost <- apply(mcF$posterior, 1, which.max)

#log-log S-N curve (with fit of mixture included)
plot(x=alloy$kCycles, y=alloy$PseudoSress, log="xy", col=clusterPost,
     pch=alloy$status, xlab="Thousands of Cycles", ylab="Stress")
Stress <- seq(50, 200, by=1)
c1l <- exp(mcF$components[1,1] + mcF$components[1,2]*log(Stress-mcF$components[1,4]))
lines(x=c1l, y=Stress, type="l", col="blue", lty=2)
c2l <- exp(mcF$components[2,1] + mcF$components[2,2]*log(Stress-mcF$components[2,4]))
lines(x=c2l, y=Stress, type="l", col="blue", lty=2)
legend("topright", pch=c(1,0), legend=c("failure","runout"))


#compute quantiles of the fitted mixture and plot results
stresses <- seq(40, 180,length.out=200)
Qp.05 <- sapply(stresses, predictQMix, object=mcF, p=.05) #.05th quantile
Qp.5 <- sapply(stresses, predictQMix, object=mcF, p=.5) #.5th quantile
stresses2 <- c(seq(80.22, 80.5, length.out=5), seq(80.5, 180,length.out=195))
Qp.95 <- sapply(stresses2, predictQMix, object=mcF, p=.95) #.95th quantile
plot(x=alloy$kCycles, y=alloy$PseudoSress, log="xy", pch=alloy$status,
     xlab="Thousands of Cycles", ylab="Stress", col="gray",
     xlim=c(5,5000), ylim=c(45, 180))
lines(Qp.05, stresses, col="blue", lty=2) #.05th quantile
lines(Qp.5, stresses, col="blue", lty=2) #.5th quantile
lines(Qp.95, stresses2, col="blue", lty=2) #.95th quantile
abline(h=mcF$components[,4], col="red", lty=2) #fatigue limits
legend("topright", pch=c(1,0), legend=c("failure","runout"))