R code for constructing likelihood based confidence intervals when summing two random variables (part 2)

The following R-code may be used for computing likelihood based confidence intervals when adding two random variables.

A practical example
A device is known to fail due to material cracks. The failure time for this device is obtained by summing the onset time (X) and growth time (Y) of the cracks. Both X and Y are random variables and are assumed to follow a Weibull distribution.

What is the failure time when the probability of failure for X+Y is less than, say, 0.1%? Stated differently, when observing both the onset and growth times of the material cracks, what is the time at which a proportion of .001 of these devices will have failed? This time corresponds to the .001 quantile.

The method for computing likelihood based confidence intervals for these quantiles is similar to the one that was used in this blog post. That post discusses how to sum random variables and also demonstrates how to compute likelihood based confidence intervals for cumulative probabilities.

confidence interval summing two random variables quantiles

Suggestions and/or questions? Please contact Stefan Gelissen (email: info at datall-analyse.nl).
See this page for an overview of all of Stefan’s R code blog posts.

R code

library(SPREDA)
library(pracma)

##data

#X-data (crack onset times in seconds)
#lower limit of time interval
XL <- c(112,276,1,323,80,368,618,627,673,824,833,72.5)
#upper limit of time interval
XU <- c(328,563,704,528,NA,NA,NA,NA,NA,NA,NA,NA)
#failure=1, censored=0
XStatus <- c(rep(1,4), rep(0,8))
#weights
XCount <- c(rep(1,11),75)
#construct data frame
Xdata <- data.frame(XL,XU,XStatus,XCount)

#Y-data (crack growth times in seconds)
Ydata <- c(279,431) #crack growth times
Yd <- c(1,1) #failure=1, censored=0



##fit distributions to data
modX <- survreg(Surv(XL, XU, type="interval2") ~ 1,
                weights=XCount, data=Xdata,
                dist="weibull")
modY <- survreg(Surv(Ydata, Yd)~1, dist="weibull")

#obtain MLE for mu and sigma
Xmu <- coef(modX)[1]
names(Xmu) <- "Xmu"
Xsigma <- modX$scale
names(Xsigma) <- "Xsigma"
c(Xmu, Xsigma)

Ymu <- coef(modY)[1]
names(Ymu) <- "Ymu"
Ysigma <- modY$scale
names(Ysigma) <- "Ysigma"
c(Ymu, Ysigma)



##likelihood based interval for quantiles when summing 2 random variables X and Y
#Qp is the quantile (Q) at which a specified proportion (p) of the units fails

#specify proportion
p <- .001

#specify integrand for numerical integration
integrand <- function(u, Xmu, Ymu, Xsigma, Ysigma, Qphat) {
  #pdf for Y (where Y follows a Weibull distribution)
  f1 <- dsev((log(u)-Ymu)/Ysigma) / (Ysigma*u)
  #cdf for X (where X follows a Weibull distribution)
  f2 <- psev((log(Qphat-u)-Xmu)/Xsigma)
  f1*f2
}

#function for computing abs(p-f(Xmu,Xsigma,Ymu,Ysigma))
XmuMin <- function (XmuPar, Xsigma, Ymu, Ysigma, Qphat){
  abs(p-integrate(integrand, 0, Qphat, Qphat=Qphat, Xmu=XmuPar, Ymu=Ymu,
                  Xsigma=Xsigma, Ysigma=Ysigma)$value)}

#log-likelihood function
XmuSE <- sqrt(diag(vcov(modX)))[1] #extract standard error for Xmu

llik.sum <- function (modpar, xlower, xupper, xd, wtx=rep(1,length(xlower)),
                      y, yd, wty=rep(1,length(y)), Qphat) {
  xsigma <- exp(modpar[1])
  ysigma <- exp(modpar[2])
  ymu <- modpar[3]
  xmu <- optim(par=Xmu, XmuMin, method="Brent",
               lower=Xmu-20*XmuSE, upper=Xmu+20*XmuSE,
               Qphat=Qphat, Xsigma=xsigma, Ymu=ymu, Ysigma=ysigma)$par
  
  zy <- (log(y)-ymu)/ysigma
  zxLower <- (log(xlower)-xmu)/xsigma
  zxUpper <- (log(xupper)-xmu)/xsigma
  
  #assuming that the random variables X and Y are independent,
  #the log-likelihood function is given by:
  sum(wty*log((( 1/(ysigma*y) * dsev(zy)) ^yd)*( (1-psev(zy)) ^(1-yd) ))) +
    sum(wtx*log( ((psev(zxUpper)-psev(zxLower))^xd)*( (1-psev(zxLower)) ^(1-xd) ) ))
}

#profile function (using the Nelder-Mead optimization method)
#note that the Nelder-Mead optimization method in the optim-function
#is not stable for this data (and where both random variables are
#assumed to follow a Weibull distribution)
profile.sum <- function(xlower, xupper, xd, wtx=rep(1,length(xlower)),
                        y, yd, wty=rep(1,length(y)), Qphats) {
  plkqhat <- rep(0, length(Qphats))
  
  for (i in 1:length(Qphats)) {
    temp <- fminsearch(llik.sum, c(log(Xsigma), log(Ysigma), Ymu),
                       minimize=FALSE, dfree=TRUE,
                       xlower=xlower, xupper=xupper, xd=xd, wtx=wtx,
                       y=y, yd=yd, wty=wty, Qphat=Qphats[i] )
    plkqhat[i] <- temp$fval
  }
  plkqhat
}

#MLE for Qp
Qss <- function(qhat, Xmu, Xsigma, Ymu, Ysigma, p) {
  p - integrate(integrand, lower=0, upper=qhat, Qphat=qhat, Xmu=Xmu, Ymu=Ymu, Xsigma=Xsigma, Ysigma=Ysigma)$value}

#compute MLE for the quantile (at specified p)
(QpHat <- fzero(f=Qss, x=c(1e-5, 1e+5), Xmu=Xmu, Xsigma=Xsigma, Ymu=Ymu, Ysigma=Ysigma, p=p)$x)

#generate fixed values for the quantiles
Qphats <- seq(120, 500, length.out=50)

#likelihood profile (=computationally intensive)
ll <- profile.sum(Qphats=Qphats, xlower=XL, xupper=XU , xd=XStatus, wtx=XCount,
                  y=Ydata, yd=Yd)

#plot profile likelihood for Qphat
plot(Qphats,ll, type='l', xlab=bquote(Q[.(p)]), ylab="log-likelihood")

#include lower limit for log-likelihood values
loglikw <- llik.sum(modpar=c(log(Xsigma), log(Ysigma), Ymu),
                    xlower=XL, xupper=XU, xd=XStatus, wtx=XCount,
                    y=Ydata, yd=Yd, Qphat=QpHat)
alpha <- .10 #90% confidence interval
limit <- loglikw - .5*qchisq(1-alpha, df=1)
abline(h=limit,col="blue",lty=2)
#include MLE for Qphat
abline(v=QpHat, col="red", lty=2)


#compute limits of confidence interval by hand
limitlkh <- function(xlower, xupper, xd, y, yd, Qphats,
                     wtx=rep(1,length(xlower)), wty=rep(1,length(y))) {
  profile.sum(xlower, xupper, xd, wtx, y, yd, wty, Qphats) - limit
}

#upper and lower limits of two-sided confidence interval
lowerci <- fzero(limitlkh, c(120,QpHat), xlower=XL, xupper=XU, xd=XStatus, wtx=XCount, y=Ydata, yd=Yd)
upperci <- fzero(limitlkh, c(QpHat, 500), xlower=XL, xupper=XU, xd=XStatus, wtx=XCount, y=Ydata, yd=Yd)
round(c(lowerci$x, upperci$x), 6)

#include confidence bounds in plot
abline(v=lowerci$x, col="darkgreen", lty=2)
abline(v=upperci$x, col="darkgreen", lty=2)


#lower bound of one-sided interval
alpha <- .10 #90% confidence interval
limit <- loglikw - .5*qchisq(1-2*alpha, df=1)

upperci <- fzero(limitlkh, c(120, QpHat), xlower=XL, xupper=XU, xd=XStatus, wtx=XCount, y=Ydata, yd=Yd)
upperci$x

In the following example the X and Y random variables are assumed to follow lognormal distributions. Furthermore, in this example the failure times for X are exact (instead of the interval censored failure times for X in the example above).

library(survival)
library(pracma)

##some artificial data

Xdata <- c(94, 191, 339, 371, 391, 421, 428, 462, 478, 502, 503, 510, 512,
           529, 531, 546, 585, 611, 618, 636, 704, 705, 712, 718, 776, 781,
           782, 788, 792, 805, 808, 836, 883, 886, 899, 904, 915, 1002, 1009,
           1017, 1061, 1132, 1148, 1222, 1300, 1316, 1446, 1493, 1504, 1533)
Xd <- rep(1, length(Xdata)) #failure=1, censored=0

Ydata <- c(178, 222, 240, 247, 251, 265, 271, 279, 287, 294, 308, 313,
           320, 324, 338, 347, 348, 351, 352, 354, 356, 357, 357, 358,
           394, 407, 410, 415, 418, 462)
Yd <- rep(1, length(Ydata)) #failure=1, censored=0



##fit distributions to data
modX <- survreg(Surv(Xdata, Xd)~1, dist="lognormal")
modY <- survreg(Surv(Ydata, Yd)~1, dist="lognormal")

#obtain MLE for mu and sigma
Xmu <- coef(modX)[1]
names(Xmu) <- "Xmu"
Xsigma <- modX$scale
names(Xsigma) <- "Xsigma"

Ymu <- coef(modY)[1]
names(Ymu) <- "Ymu"
Ysigma <- modY$scale
names(Ysigma) <- "Ysigma"



##likelihood based interval for quantiles when summing 2 random variables X and Y
#Qp is the quantile (Q) at which a specified proportion (p) of the units fails

#specify proportion
p <- .001

#specify integrand for numerical integration
integrand <- function(u, Xmu, Ymu, Xsigma, Ysigma, Qphat) {
  #pdf for Y (where Y follows a lognormal distribution)
  f1 <- dnorm((log(u)-Ymu)/Ysigma) / (Ysigma*u)
  #cdf for X (where X follows a lognormal distribution)
  f2 <- pnorm((log(Qphat-u)-Xmu)/Xsigma)
  f1*f2
}

#function for computing abs(p-f(Xmu,Xsigma,Ymu,Ysigma))
XmuMin <- function (XmuPar, Xsigma, Ymu, Ysigma, Qphat){
  abs(p-integrate(integrand, lower=0, upper=Qphat, Qphat=Qphat, Xmu=XmuPar, Ymu=Ymu,
                  Xsigma=Xsigma, Ysigma=Ysigma)$value)}

#log-likelihood function
XmuSE <- sqrt(diag(vcov(modX)))[1] #extract standard error for Xmu

llik.sum <- function (modpar, x, xd, wtx=rep(1,length(x)),
                      y, yd, wty=rep(1,length(y)), Qphat) {
  xsigma <- exp(modpar[1])
  ysigma <- exp(modpar[2])
  ymu <- modpar[3]
  xmu <- optim(par=Xmu, XmuMin, method="Brent",
               lower=Xmu-20*XmuSE, upper=Xmu+20*XmuSE,
               Qphat=Qphat, Xsigma=xsigma, Ymu=ymu, Ysigma=ysigma)$par
  
  z1 <- (log(y)-ymu)/ysigma
  z2 <- (log(x)-xmu)/xsigma
  
  #assuming that the random variables X and Y are independent,
  #the log-likelihood function is given by:
  sum(wty*log((( 1/(ysigma*y) * dnorm(z1)) ^yd)*( (1-pnorm(z1)) ^(1-yd) ))) +
    sum(wtx*log((( 1/(xsigma*x) * dnorm(z2)) ^xd)*( (1-pnorm(z2)) ^(1-xd) )))
}

#profile function (using the Nelder-Mead optimization method)
#note that the Nelder-Mead optimization method in the fminsearch-function
#is not stable for this data (and where both random variables are
#assumed to follow a lognormal distribution)
profile.sum <- function(x, xd, wtx=rep(1,length(x)),
                        y, yd, wty=rep(1,length(y)), Qphats) {
  plkqhat <- rep(0, length(Qphats))
  
  for (i in 1:length(Qphats)) {
    temp <- optim(c(log(Xsigma), log(Ysigma), Ymu), llik.sum,
                  method='Nelder-Mead', control=list(fnscale=-1),
                  x=x, xd=xd, wtx=wtx, y=y, yd=yd, wty=wty, Qphat=Qphats[i] )
    plkqhat[i] <- temp$value
  }
  plkqhat
}


#MLE for Qp
Qss <- function(qhat, Xmu, Xsigma, Ymu, Ysigma, p) {
  p - integrate(integrand, lower=0, upper=qhat, Qphat=qhat, Xmu=Xmu, Ymu=Ymu, Xsigma=Xsigma, Ysigma=Ysigma)$value}

#compute MLE for the quantile (at specified p)
(QpHat <- fzero(f=Qss, x=c(1e-5, 1e+5), Xmu=Xmu, Xsigma=Xsigma, Ymu=Ymu, Ysigma=Ysigma, p=p)$x)

#generate fixed values for the quantiles
Qphats <- seq(350, 500, length.out=50)

#likelihood profile (=computationally intensive)
ll <- profile.sum(Qphats=Qphats, x=Xdata, xd=Xd, y=Ydata, yd=Yd)

#plot profile likelihood for Fhat
plot(Qphats,ll, type='l', xlab=bquote(Q[.(p)]), ylab="log-likelihood")

#include lower limit for log-likelihood values
loglikw <- llik.sum(modpar=c(log(Xsigma), log(Ysigma), Ymu),
                    x=Xdata, xd=Xd,
                    y=Ydata, yd=Yd, Qphat=QpHat)
alpha <- .10 #90% confidence interval
limit <- loglikw - .5*qchisq(1-alpha, df=1)
abline(h=limit,col="blue",lty=2)
#include MLE for tphat
abline(v=QpHat, col="red", lty=2)


#compute limits of confidence interval by hand
limitlkh <- function(xobs, xd, yobs, yd, Qphats,
                     wtx=rep(1,length(xobs)), wty=rep(1,length(yobs))) {
  profile.sum(x=xobs, xd, wtx, y=yobs, yd, wty, Qphats) - limit
}

#upper and lower limits of two-sided confidence interval
lowerci <- fzero(limitlkh, c(350, QpHat), xobs=Xdata, xd=Xd, yobs=Ydata, yd=Yd)
upperci <- fzero(limitlkh, c(QpHat, 500), xobs=Xdata, xd=Xd, yobs=Ydata, yd=Yd)
round(c(lowerci$x, upperci$x), 6)

#include confidence bounds in plot
abline(v=lowerci$x, col="darkgreen", lty=2)
abline(v=upperci$x, col="darkgreen", lty=2)