# R code for computing quantiles of a Finite Mixture Fatigue Limit Model

In one of my previous posts I demonstrated how to fit a Finite Mixture Fatigue Limit Model to fatigue data.
The following R code computes the quantiles of a mixture fitted by the Finite Mixture Fatigue Limit Model. See this page for an overview of all of Stefan’s R code blog posts.

R code

```library(survival)
library(SPREDA)

##---define functions
#all of the following functions are needed for fitting the
#Finite Mixture Fatigue Limit Model and subsequently computing
#the quantiles of the fitted mixture

#function for fitting the fatigue limit model
flm <- function (cycles, cycles2=rep(NA, length(cycles)), event,
stress, wt=rep(1, length(cycles)),
startGamma=.5*min(stress), dist) {
#-cycles is the exact number of cycles to failure, the right/left censored value,
# or for interval censoring the lower value of the censoring interval
# note that censored observations are also referred to as runouts
#-cycles2 is the upper value of the censoring interval
#-event is the censoring indicator (0=right censored, 1=failure observed,
# 2=left censored, 3=interval censored)
#-stress is the stress level
#-wt are the weights for the observations
#-startGamma is a starting value for the fatigue limit (=gamma)
# a plausible starting value may be obtained from the log-log S-N curve
#-dist: either the "weibull", "lognormal", or "gaussian" distribution

#optimization (obtain estimate for gamma)
gammaMLE <- optim(par=startGamma, profile.gamma, method="Brent",
lower=0, upper=(1-(1/1e+6))*min(stress),
control=list(fnscale=-1),
cycles=cycles, cycles2=cycles2, event=event,
stress=stress, wt=wt, dist=dist)
gamma <- gammaMLE\$par

#obtain MLEs for remaining parameters in the fatigue limit model
mod <- survreg(Surv(time=cycles, time2=cycles2,
event=event, type="interval") ~ log(stress-gamma),
weights=wt, dist=dist)
parms <- c(coef(mod), coef(mod), mod\$scale, gamma, mod\$loglik)
names(parms) <- c("beta0mu", "beta1mu", "sigma", "gamma", "loglik")
parms}

#function for computing the limit (=gamma) of the fatigue limit model
profile.gamma <- function(gammahat, cycles, cycles2, event,
stress, wt, dist) {
stressG <- stress-gammahat
fit <- survreg(Surv(time=cycles, time2=cycles2,
event=event, type="interval") ~ log(stressG),
dist=dist, weights=wt)
fit\$loglik
}

#function for computing the log-likelihood of fatigue limit model
llikfm <- function (parms, cycles, cycles2=rep(NA, length(cycles)),
event, stress, wt=rep(1, length(cycles)),
postWt=rep(1, length(cycles)), dist) {
#-parms are the MLEs for the parameters of the fatigue limit model
#-cycles is the exact number of cycles to failure, the right/left censored value,
# or for interval censoring the lower value of the censoring interval
# note that censored observations are also referred to as runouts
#-cycles2 is the upper value of the censoring interval
#-event is the censoring indicator (0=right censored, 1=failure observed,
# 2=left censored, 3=interval censored)
#-stress is the stress level
#-wt are the weights for the observations
#-postWt are the posterior probabilities as returned by a finite mixture
# fatigue limit model
#-dist: either the "weibull", "lognormal", or "gaussian" distribution

mu <- parms + parms*log(stress-parms)
sigma <- parms

if (dist=="lognormal") {
z1 <- (log(cycles)-mu)/sigma
z2 <- (log(cycles2)-mu)/sigma

l <- ifelse(event==0, 1-pnorm(z1), #right censoring
ifelse(event==1, 1/(sigma*cycles) * dnorm(z1), #exact observations
ifelse(event==2, pnorm(z1), #left censoring
pnorm(z2)-pnorm(z1) ))) #interval censoring
(wt*postWt)%*%log(l)} #compute log-likelihood

else if (dist=="weibull") {
z1 <- (log(cycles)-mu)/sigma
z2 <- (log(cycles2)-mu)/sigma

l <- ifelse(event==0, 1-psev(z1), #right censoring
ifelse(event==1, 1/(sigma*cycles) * dsev(z1), #exact observations
ifelse(event==2, psev(z1), #left censoring
psev(z2)-psev(z1) ))) #interval censoring
(wt*postWt)%*%log(l)} #compute log-likelihood

else {
z1 <- (cycles-mu)/sigma
z2 <- (cycles2-mu)/sigma

l <- ifelse(event==0, 1-pnorm(z1), #right censoring
ifelse(event==1, 1/(sigma) * dnorm(z1), #exact observations
ifelse(event==2, pnorm(z1), #left censoring
pnorm(z2)-pnorm(z1) ))) #interval censoring
(wt*postWt)%*%log(l)} #compute log-likelihood
}

#function for fitting the Finite Mixture Fatigue Limit Model
library(LearnBayes)
mixflm <- function (cycles, cycles2=rep(NA, length(cycles)),
event=rep(1, length(cycles)), stress,
wt=rep(1, length(cycles)), startGamma=.5*min(stress),
dist="lognormal", n, cluster=NULL, maxiter=100, tol=1e-6) {
#-cycles is the exact number of cycles to failure, the right/left censored value,
# or for interval censoring the lower value of the censoring interval
# note that censored observations are also referred to as runouts
#-cycles2 is the upper value of the censoring interval
#-event is the censoring indicator (0=right censored, 1=failure observed,
# 2=left censored, 3=interval censored)
#-stress is the stress level
#-wt are the weights for the observations
#-startGamma is a starting value for the fatigue limit (=gamma)
# a plausible starting value may be obtained from the log-log S-N curve
#-dist: either the "weibull", "lognormal", or "gaussian" distribution
#-n is the number of components to be estimated
# or a matrix with n columns of initial posterior probabilities for the
# observations
#-maxiter is the maximum number of iterations
#-tol is the convergence criterion

nobs <- sum(wt) #number of observations

parms <- matrix(NA, ncol=5, nrow=n)
colnames(parms) <- c("beta0mu", "beta1mu", "sigma", "gamma", "logLik")
mus <- matrix(NA, ncol=n, nrow=length(cycles))
posteriorP <- matrix(NA, ncol=n, nrow=length(cycles))
P <- matrix(NA, ncol=n, nrow=length(cycles))

iteration <- 0 #initialize iteration counter
loglikInit <- logLikC <- 0 #initialize log-likelihood estimates

#initialize posterior probabilities
if (is.null(cluster)) {
alpha <- rep(.1, n)
posteriorP <- rdirichlet(length(cycles), alpha)}
else {posteriorP <- cluster}

while (iteration < maxiter) {
#estimate prior probabilities
priorP <- apply(wt*posteriorP, 2, sum)/nobs

#estimate parameters for each component
for (i in 1:n) {
wtp <- ifelse(wt*posteriorP[,i]<=0, 1e-15, wt*posteriorP[,i])
cp <- flm(cycles=cycles, cycles2=cycles2, event=event,
stress=stress, wt=wtp, startGamma=startGamma, dist=dist)
parms[i,] <- cp
mus[,i] <- cp + cp*log(stress-cp)}

#compute the (complete) log-likelihood value
logLikC <- sum(parms[,5]) + sum(wt*(posteriorP%*%log(priorP)))
logLikC <- ifelse(is.na(logLikC), 0, logLikC)

#estimate posterior probabilities
if (dist=="weibull") {
for (j in 1:n) {
mu <- mus[,j]; sigma <- parms[j, 3]

z1 <- (log(cycles)-mu)/sigma
z2 <- (log(cycles2)-mu)/sigma

cp <- ifelse(event==0, 1-psev(z1), #right censoring
ifelse(event==1, 1/(sigma*cycles) * dsev(z1), #exact observations
ifelse(event==2, psev(z1), #left censoring
psev(z2)-psev(z1) ))) #interval censoring
P[,j] <- priorP[j]*cp}
}

else if (dist=="lognormal") {
for (j in 1:n) {
mu <- mus[,j]; sigma <- parms[j, 3]

z1 <- (log(cycles)-mu)/sigma
z2 <- (log(cycles2)-mu)/sigma

cp <- ifelse(event==0, 1-pnorm(z1), #right censoring
ifelse(event==1, 1/(sigma*cycles) * dnorm(z1), #exact observations
ifelse(event==2, pnorm(z1), #left censoring
pnorm(z2)-pnorm(z1) )))
P[,j] <- priorP[j]*cp}
}

else if (dist=="gaussian") {
for (j in 1:n) {
mu <- mus[,j]; sigma <- parms[j, 3]

z1 <- (cycles-mu)/sigma
z2 <- (cycles2-mu)/sigma

cp <- ifelse(event==0, 1-pnorm(z1), #right censoring
ifelse(event==1, 1/(sigma) * dnorm(z1), #exact observations
ifelse(event==2, pnorm(z1), #left censoring
pnorm(z2)-pnorm(z1) )))
P[,j] <- priorP[j]*cp}
}

posteriorP <- P/rowSums(P)

#check convergence criterion
if (( abs(logLikC-loglikInit) / (abs(loglikInit)+.001*tol) ) < tol) break
loglikInit <- logLikC #update log-likelihood
iteration <- iteration + 1 #increment counter
}

if (iteration == maxiter) warning("Maximum number of iterations exceeded")
list(components=parms[,1:4], prior=priorP, loglik=logLikC,
AIC=-2*logLikC + 2*(n-1+n*(ncol(parms)-1)),
BIC=-2*logLikC + (n-1+n*(ncol(parms)-1))*log(nobs),
distribution=dist, iterations=iteration, posterior=posteriorP)
}

#function for computing quantiles of a mixture fitted by
#the Finite Mixture Fatigue Limit Model
predictQMix <- function (object, stress, p) {
#- object is a finite mixture model as returned by mixflm
#- stress is the stress level
#- p is the proportion at which to compute the quantile
parms <- object\$components
prior <- object\$prior

if (object\$distribution=="lognormal") {
fun <- function(p, parms, prior, stress, cycles) {
sum(sapply(1:nrow(parms),
function (x) {
if (stress-parms[x,4]>=0) {
mu <- parms[x,1] + parms[x,2]*log(stress-parms[x,4])
prior[x]*pnorm( (log(cycles)-mu) / parms[x,3])}
else 0 #0% fails at stress levels below the fatigue limit
}))-p}
if (all(stress-parms[,4]<0)) cycEst <- NA
else cycEst <- try(uniroot(fun, c(1e-50, 1e+50), p=p, parms=parms,
prior=prior, stress=stress)\$root, TRUE)
ifelse(is.na(suppressWarnings(as.numeric(cycEst))), NA, as.numeric(cycEst))}

else if (object\$distribution=="weibull") {
fun <- function(p, parms, prior, stress) {
sum(sapply(1:nrow(parms),
function (x) {
if (stress-parms[x,4]>=0) {
mu <- parms[x,1] + parms[x,2]*log(stress-parms[x,4])
prior[x]*psev( (log(cycles)-mu) / parms[x,3])}
else 0 #0% fails at stress levels below the fatigue limit
}))-p}
if (all(stress-parms[,4]<0)) cycEst <- NA
else cycEst <- try(uniroot(fun, c(1e-50, 1e+50), p=p, parms=parms,
prior=prior, stress=stress)\$root, TRUE)
ifelse(is.na(suppressWarnings(as.numeric(cycEst))), NA, as.numeric(cycEst))}

else if (object\$distribution=="gaussian") {
fun <- function(p, parms, prior, stress) {
sum(sapply(1:nrow(parms),
function (x) {
if (stress-parms[x,4]>=0) {
mu <- parms[x,1] + parms[x,2]*log(stress-parms[x,4])
prior[x]*pnorm( (cycles-mu) / parms[x,3])}
else 0 #0% fails at stress levels below the fatigue limit
}))-p}
if (all(stress-parms[,4]<0)) cycEst <- NA
else cycEst <- try(uniroot(fun, c(1e-50, 1e+50), p=p, parms=parms,
prior=prior, stress=stress)\$root, TRUE)
ifelse(is.na(suppressWarnings(as.numeric(cycEst))), NA, as.numeric(cycEst))}
}

##---end of function definitions

#EXAMPLE 1

#generate data for mixture
N <- 300 #number of observations
probs <- c(.7, .3) #mixing proportions
n <- length(probs) #number of components
component <- sample(1:n, prob=probs, size=N, replace=TRUE)

stress <- 1000*exp(1.5 + rnorm(N)*.15) #in kilo units

cycles <- rep(NA, N)

for(i in 1:N){
if(component[i]==1){
cycles[i] <- exp( (14.5 - 1*(log(stress[i]-2700))) + rnorm(1)*.20)
}else if(component[i]==2){
cycles[i] <- exp( (13.6 - .82*(log(stress[i]-2900))) + rnorm(1)*.15)}
}

#any NA's?
sum(is.na(cycles))

#apply censoring (here: Type I censoring)
#failure=1, censored=0
cens <- ifelse(cycles>7000, 0, 1)
N-sum(cens, na.rm=TRUE) #number of censored observations
cycles <- ifelse(cens==0, 7000, cycles)

#log-log S-N curve
plot(x=cycles, y=stress, log="xy", pch=cens, col=component,
xlab="Cycles", ylab="Stress")
legend("topright", pch=c(1,0), legend=c("failure","runout"))

#combine data
cs <- cbind(cycles, stress, cens, component)
#remove observations with NA for cycles
cF <- data.frame(cs[!is.na(cycles),])

#run repeatedly short runs for obtaining starting values, and
#start each short run with a different random initialization of
#the posterior probabilities
#note 1: this may be computationally intensive
#note 2: sometimes it is useful to increase maxiter to, for instance, 20
mixmult <- replicate(10, mixflm(cycles=cF\$cycles, event=cF\$cens,
stress=cF\$stress, n=2, maxiter=10))

#discard models that did not converge
mixmult[3,] <- ifelse(mixmult[3,]==0, NA, mixmult[3,])
length(which(is.na(mixmult[3,])))
#select the solution with the maximum value of the log-likelihood
mixmult <- mixmult[, which.max(mixmult[3,])]
mixmult[1:6]

#fit finite mixture fatigue limit model (using the starting values)
mcF <- mixflm(cycles=cF\$cycles, event=cF\$cens, stress=cF\$stress, n=2,
cluster=mixmult\$posterior, maxiter=300)
mcF[1:7]

#assign observations to components (based on estimated posterior probabilities)
clusterPost <- apply(mcF\$posterior, 1, which.max)

#log-log S-N curve (with fit of mixture included)
plot(x=cF\$cycles, y=cF\$stress, type="n", log="xy", xlab="Cycles", ylab="Stress")
points(x=cF\$cycles, y=cF\$stress, xlog=TRUE, ylog=TRUE, pch=cF\$cens, col=clusterPost)
Stress <- seq(2500, 7000, by=1)
c1l <- exp(mcF\$components[1,1] + mcF\$components[1,2]*log(Stress-mcF\$components[1,4]))
lines(x=c1l, y=Stress, type="l", col="blue", lty=2)
c2l <- exp(mcF\$components[2,1] + mcF\$components[2,2]*log(Stress-mcF\$components[2,4]))
lines(x=c2l, y=Stress, type="l", col="blue", lty=2)
legend("topright", pch=c(1,0), legend=c("failure","runout"))

#compute quantiles of the fitted mixture and plot results
stresses <- seq(1500, 7000,length.out=200)
Qp.05 <- sapply(stresses, predictQMix, object=mcF, p=.05) #.05th quantile
Qp.5 <- sapply(stresses, predictQMix, object=mcF, p=.5) #.5th quantile
Qp.95 <- sapply(stresses, predictQMix, object=mcF, p=.95) #.95th quantile
plot(x=cF\$cycles, y=cF\$stress, log="xy", pch=cF\$cens,
xlab="Cycles", ylab="Stress", col="gray")
lines(Qp.05, stresses, col="blue", lty=2) #.05th quantile
lines(Qp.5, stresses, col="blue", lty=2) #.5th quantile
lines(Qp.95, stresses, col="blue", lty=2) #.95th quantile
legend("topright", pch=c(1,0), legend=c("failure","runout"))

#EXAMPLE 2

#generate data for mixture
N <- 300 #number of observations
probs <- c(.7, .3) #mixing proportions
n <- length(probs) #number of components
component <- sample(1:n, prob=probs, size=N, replace=TRUE)

stress <- 1000*exp(1.5 + rnorm(N)*.15) #in kilo units

cycles <- rep(NA, N)

#notice the substantial difference in fatigue limits between the
#two distributions from which the observations originate
for(i in 1:N){
if(component[i]==1){
cycles[i] <- exp( (14.5 - 1*(log(stress[i]-2100))) + rnorm(1)*.20)
}else if(component[i]==2){
cycles[i] <- exp( (11.5 - .82*(log(stress[i]-3500))) + rnorm(1)*.25)}
}

#any NA's?
sum(is.na(cycles))

#apply censoring (here: Type I censoring)
#failure=1, censored=0
cens <- ifelse(cycles>5000, 0, 1)
N-sum(cens, na.rm=TRUE) #number of censored observations
cycles <- ifelse(cens==0, 5000, cycles)

#log-log S-N curve
plot(x=cycles, y=stress, log="xy", pch=cens, col=component,
xlab="Cycles", ylab="Stress")
legend("topright", pch=c(1,0), legend=c("failure","runout"))

#combine data
cs <- cbind(cycles, stress, cens, component)
#remove observations with NA for cycles
cF <- data.frame(cs[!is.na(cycles),])

#run repeatedly short runs for obtaining starting values, and
#start each short run with a different random initialization of
#the posterior probabilities
#note 1: this may be computationally intensive
#note 2: sometimes it is useful to increase maxiter to, for instance, 20
mixmult <- replicate(10, mixflm(cycles=cF\$cycles, event=cF\$cens,
stress=cF\$stress, n=2, maxiter=10))

#discard models that did not converge
mixmult[3,] <- ifelse(mixmult[3,]==0, NA, mixmult[3,])
length(which(is.na(mixmult[3,])))
#select the solution with the maximum value of the log-likelihood
mixmult <- mixmult[, which.max(mixmult[3,])]
mixmult[1:6]

#fit finite mixture fatigue limit model (using the starting values)
mcF <- mixflm(cycles=cF\$cycles, event=cF\$cens, stress=cF\$stress, n=2,
cluster=mixmult\$posterior, maxiter=300)
mcF[1:7]

#assign observations to components (based on estimated posterior probabilities)
clusterPost <- apply(mcF\$posterior, 1, which.max)

#log-log S-N curve (with fit of mixture included)
plot(x=cF\$cycles, y=cF\$stress, type="n", log="xy", xlab="Cycles", ylab="Stress")
points(x=cF\$cycles, y=cF\$stress, xlog=TRUE, ylog=TRUE, pch=cF\$cens, col=clusterPost)
Stress <- seq(2500, 7000, by=1)
c1l <- exp(mcF\$components[1,1] + mcF\$components[1,2]*log(Stress-mcF\$components[1,4]))
lines(x=c1l, y=Stress, type="l", col="blue", lty=2)
c2l <- exp(mcF\$components[2,1] + mcF\$components[2,2]*log(Stress-mcF\$components[2,4]))
lines(x=c2l, y=Stress, type="l", col="blue", lty=2)
legend("topright", pch=c(1,0), legend=c("failure","runout"))

#compute quantiles of the fitted mixture and plot results
stresses <- seq(1500, 7000,length.out=200)
Qp.05 <- sapply(stresses, predictQMix, object=mcF, p=.05) #.05th quantile
Qp.5 <- sapply(stresses, predictQMix, object=mcF, p=.5) #.5th quantile
Qp.95 <- sapply(stresses, predictQMix, object=mcF, p=.95) #.95th quantile
plot(x=cF\$cycles, y=cF\$stress, log="xy", pch=cF\$cens,
xlab="Cycles", ylab="Stress", col="gray",
ylim=c(1000, 7000), xlim=c(100, 100000))
lines(Qp.05, stresses, col="blue", lty=2) #.05th quantile
lines(Qp.5, stresses, col="blue", lty=2) #.5th quantile
lines(Qp.95, stresses, col="blue", lty=2) #.95th quantile
abline(h=mcF\$components[,4], col="red", lty=2) #fatigue limits
legend("topright", pch=c(1,0), legend=c("failure","runout"))

#EXAMPLE 3

#Superalloy data from Pascual and Meeker's (1997) paper "Analysis of fatigue data
#with runouts based on a model with nonconstant standard deviation and a
#fatigue limit parameter"
#failure=1, runout=0
alloy\$status <- abs(as.numeric(alloy\$Status)-2)
alloy\$Cycles <- 1000*alloy\$kCycles

#log-log S-N curve
plot(x=alloy\$kCycles, y=alloy\$PseudoSress, log="xy", pch=alloy\$status,
xlab="Thousands of Cycles", ylab="Stress")
legend("topright", pch=c(1,0), legend=c("failure","runout"))

#run repeatedly short runs for obtaining starting values, and
#start each short run with a different random initialization of
#the posterior probabilities
#note 1: this may be computationally intensive
#note 2: sometimes it is useful to increase maxiter to, for instance, 20
mixmult <- replicate(10, mixflm(cycles=alloy\$kCycles, event=alloy\$status,
stress=alloy\$PseudoSress, n=2, maxiter=10))

#discard models that did not converge
mixmult[3,] <- ifelse(mixmult[3,]==0, NA, mixmult[3,])
length(which(is.na(mixmult[3,])))
#select the solution with the maximum value of the log-likelihood
mixmult <- mixmult[, which.max(mixmult[3,])]
mixmult[1:6]

#fit finite mixture fatigue limit model (using the starting values)
mcF <- mixflm(cycles=alloy\$kCycles, event=alloy\$status,
stress=alloy\$PseudoSress, n=2, cluster=mixmult\$posterior)
mcF[1:7]

#assign observations to components (based on estimated posterior probabilities)
clusterPost <- apply(mcF\$posterior, 1, which.max)

#log-log S-N curve (with fit of mixture included)
plot(x=alloy\$kCycles, y=alloy\$PseudoSress, log="xy", col=clusterPost,
pch=alloy\$status, xlab="Thousands of Cycles", ylab="Stress")
Stress <- seq(50, 200, by=1)
c1l <- exp(mcF\$components[1,1] + mcF\$components[1,2]*log(Stress-mcF\$components[1,4]))
lines(x=c1l, y=Stress, type="l", col="blue", lty=2)
c2l <- exp(mcF\$components[2,1] + mcF\$components[2,2]*log(Stress-mcF\$components[2,4]))
lines(x=c2l, y=Stress, type="l", col="blue", lty=2)
legend("topright", pch=c(1,0), legend=c("failure","runout"))

#compute quantiles of the fitted mixture and plot results
stresses <- seq(40, 180,length.out=200)
Qp.05 <- sapply(stresses, predictQMix, object=mcF, p=.05) #.05th quantile
Qp.5 <- sapply(stresses, predictQMix, object=mcF, p=.5) #.5th quantile
stresses2 <- c(seq(80.22, 80.5, length.out=5), seq(80.5, 180,length.out=195))
Qp.95 <- sapply(stresses2, predictQMix, object=mcF, p=.95) #.95th quantile
plot(x=alloy\$kCycles, y=alloy\$PseudoSress, log="xy", pch=alloy\$status,
xlab="Thousands of Cycles", ylab="Stress", col="gray",
xlim=c(5,5000), ylim=c(45, 180))
lines(Qp.05, stresses, col="blue", lty=2) #.05th quantile
lines(Qp.5, stresses, col="blue", lty=2) #.5th quantile
lines(Qp.95, stresses2, col="blue", lty=2) #.95th quantile
abline(h=mcF\$components[,4], col="red", lty=2) #fatigue limits
legend("topright", pch=c(1,0), legend=c("failure","runout"))```