R code for constructing bootstrap confidence intervals for the hazard function

The following R code may be used for computing the hazard function (also known as the hazard rate) of the Accelerated Failure Time model. The code computes the hazard function for failure times that follow either a Weibull or lognormal distribution.

The code also computes normal-approximation and bootstrap confidence intervals for the hazard function. For calculating the latter confidence intervals the code employs the nonparametric bootstrap-t method.

AFT model hazard function bootstrap ci

An introduction to the bootstrap-t method can be found in Meeker and Escobar’s 1998 book Statistical methods for reliability data (Chapter 9).

See this blog post for computing likelihood based confidence intervals for the hazard function.

Suggestions and/or questions? Please contact Stefan Gelissen (email: info at datall-analyse.nl).
See this page for an overview of all of Stefan’s R code blog posts.

R code

library(survival)

##data

#Shock absorber failure data (see: Meeker & Escobar, Example 3.8, p. 59)
ShAbs <- read.table("http://www.public.iastate.edu/~wqmeeker/anonymous/Stat533_data/Splida_text_data/ShockAbsorber.txt", header=T)

#failure=1, censored=0
ShAbs$Cens <- as.numeric(ShAbs$Censor)-1
head(ShAbs)



##Weibull distribution

##fit Weibull model to data
mod <- survreg(Surv(Distance,Cens)~1, data=ShAbs, dist="weibull")
summary(mod)
#extract MLE of mu and sigma
mu <- coef(mod)[1]; names(mu) <- "mu"
sigma <- mod$scale; names(sigma) <- "sigma"



##function for calculating the standard error of the hazard rate (Weibull distribution)

#partial derivatives of hazard function with respect to mu and sigma, respectively
#(these are needed for calculating the standard error of the hazard rate)
dh.dmu <- deriv(~(1/(sigma*exp(mu)))*(x/exp(mu))^((1/sigma)-1), "mu")
dh.dsigma <- deriv(~(1/(sigma*exp(mu)))*(x/exp(mu))^((1/sigma)-1), "sigma")

SEhazard <- function(x, mu, sigma, vcov){
  #apply the delta method for "undoing" the log transformation of sigma
  vcov[1,2] <- vcov[2,1] <- t(c(0,exp(log(sigma))))%*%vcov%*%c(1,0)
  vcov[2,2] <- t(c(0,exp(log(sigma))))%*%vcov%*%c(0,exp(log(sigma)))
  #hazard rate estimate
  esthazardEst <- as.vector((1/(sigma*exp(mu)))*(x/exp(mu))^((1/sigma)-1))
  #calculate standard error of hazard rate (using the delta method)
  dhEval <- c(attr(eval(dh.dmu),"gradient"), attr(eval(dh.dsigma),"gradient"))
  esthazardSE <- sqrt(t(dhEval)%*%vcov%*%dhEval)
  c(esthazardEst, esthazardSE)}



##function for computing normal-approximation confidence intervals for the hazard rate
ciHazardNormal <- function(x, mu, sigma, vcov, alpha=.05){
  hpred <- SEhazard(x, mu, sigma, vcov)
  w <- exp(qnorm(1-alpha/2)*hpred[2]/hpred[1])
  c(x=x, Hazard=hpred[1], std.err=hpred[2], lcl=hpred[1]/w, ucl=hpred[1]*w)}



##compute hazard rate (including normal-approximation confidence intervals)
#generate sequence of values at which to calculate the hazard rate
xs <- seq(6000, 30000, by=2000)
hazards <- data.frame(t(sapply(xs, ciHazardNormal, mu=mu, sigma=sigma, vcov=vcov(mod))))
hazards
#plot hazard rate (including confidence intervals)
plot(hazards$x, hazards$Hazard, type="l", col="blue",
     ylim=c(min(hazards$lcl), max(hazards$ucl)),
     xlab="Kilometers", ylab="Hazard Function")
lines(hazards$x, hazards$lcl, lty=2, col="red")
lines(hazards$x, hazards$ucl, lty=2, col="red")



##nonparametric bootstrap confidence intervals (Weibull distribution)
B <- 2000 #number of bootstrap samples

#function for (1) generating bootstrap samples, and (2) estimating mu, sigma,
#and the variance-covariance matrix of mu and log(sigma) for each bootstrap sample
modEst <- function (time, event){
  obs <- data.frame(t=time, e=event)
  n <- nrow(obs)
  s <- obs[sample(n, size=n, replace=TRUE), ]
  mods <- survreg(Surv(t, e)~1, data=s, dist="weibull")[c(1,3,8)]
  mods}

#apply function to the actual data
modBoot <- replicate(B, modEst(time=ShAbs$Distance, event=ShAbs$Cens))



##apply bootstrap-t method for calculating confidence intervals for the hazard rate

#calculate the hazard rate (and its std.err.) for the actual sample
#at a specified value (i.e., at 20000 kilometers for the shock absorber data)
hazHat <- SEhazard(x=20000, mu=mu, sigma=sigma, vcov=vcov(mod))[1]
hazHatSE <- SEhazard(x=20000, mu=mu, sigma=sigma, vcov=vcov(mod))[2]

#calculate the hazard rate (and its std.err.) for each bootstrap sample
#at a specified value (i.e., at 20000 kilometers for the shock absorber data)
hazardBoot <- sapply(1:B, function(i) {
  SEhazard(x=20000, mu=as.numeric(modBoot[1,i]), sigma=as.numeric(modBoot[3,i]),
           vcov=matrix(unlist(modBoot[2,i]), nrow=2))})

#for each bootstrap sample, take log transformation of the hazard rate estimate
#(this log transformation ensures that the limits of the confidence interval do not take negative values)
logHazard <- log(hazardBoot[1,])
#compute for each bootstrap sample the std.err. of log transformed hazard rates
logHazardSE <- hazardBoot[2,]/hazardBoot[1,]
#compute for each bootstrap sample the Z-score of the log transformed hazard rate
ZlogHazard <- (logHazard-log(hazHat))/logHazardSE

#inspect the distribution of the hazard rate estimates of bootstrap samples
hist(hazardBoot[1,])
#inspect the distribution of the Z-scores
hist(ZlogHazard)

#confidence intervals (using the computed Z-scores of the bootstrap samples)
alpha <- .05 #two-sided 95% confidence interval
wl <- exp(quantile(ZlogHazard, probs=1-alpha/2)*(hazHatSE/hazHat))
wu <- exp(quantile(ZlogHazard, probs=alpha/2)*(hazHatSE/hazHat))
as.vector(c(hazHat/wl, hazHat/wu))

#compare bootstrap interval with the normal-approximation confidence interval
ciHazardNormal(x=20000, mu=mu, sigma=sigma, vcov=vcov(mod))





##lognormal distribution

##fit lognormal model to data
mod <- survreg(Surv(Distance,Cens)~1, data=ShAbs, dist="lognormal")
summary(mod)
#extract MLE of mu and sigma
mu <- coef(mod)[1]; names(mu) <- "mu"
sigma <- mod$scale; names(sigma) <- "sigma"



##function for calculating the standard error of the hazard rate (lognormal distribution)

#partial derivatives of hazard function with respect to mu and sigma, respectively
#(these are needed for calculating the standard error of the hazard rate)
dh.dmu <- deriv(~(dnorm((log(x)-mu)/sigma))/((sigma*x)*(1-pnorm((log(x)-mu)/sigma))), "mu")
dh.dsigma <- deriv(~(dnorm((log(x)-mu)/sigma))/((sigma*x)*(1-pnorm((log(x)-mu)/sigma))), "sigma")

SEhazard <- function(x, mu, sigma, vcov){
  #apply the delta method for "undoing" the log transformation of sigma
  vcov[1,2] <- vcov[2,1] <- t(c(0,exp(log(sigma))))%*%vcov%*%c(1,0)
  vcov[2,2] <- t(c(0,exp(log(sigma))))%*%vcov%*%c(0,exp(log(sigma)))
  #hazard rate estimate
  esthazardEst <- as.vector(dnorm((log(x)-mu)/sigma)/(x*sigma*(1-pnorm((log(x)-mu)/sigma))))
  #calculate standard error of hazard rate (using the delta method)
  dhEval <- c(attr(eval(dh.dmu),"gradient"), attr(eval(dh.dsigma),"gradient"))
  esthazardSE <- sqrt(t(dhEval)%*%vcov%*%dhEval)
  c(esthazardEst, esthazardSE)}



##function for computing normal-approximation confidence intervals for the hazard rate
ciHazardNormal <- function(x, mu, sigma, vcov, alpha=.05){
  hpred <- SEhazard(x, mu, sigma, vcov)
  w <- exp(qnorm(1-alpha/2)*hpred[2]/hpred[1])
  c(x=x, Hazard=hpred[1], std.err=hpred[2], lcl=hpred[1]/w, ucl=hpred[1]*w)}



##compute hazard rate (including normal-approximation confidence intervals)
#generate sequence of values at which to calculate the hazard rate
xs <- seq(6000, 30000, by=2000)
hazards <- data.frame(t(sapply(xs, ciHazardNormal, mu=mu, sigma=sigma, vcov=vcov(mod))))
hazards
#plot hazard rate (including confidence intervals)
plot(hazards$x, hazards$Hazard, type="l", col="blue",
     ylim=c(min(hazards$lcl), max(hazards$ucl)),
     xlab="Kilometers", ylab="Hazard Function")
lines(hazards$x, hazards$lcl, lty=2, col="red")
lines(hazards$x, hazards$ucl, lty=2, col="red")



##nonparametric bootstrap confidence intervals (lognormal distribution)
B <- 2000 #number of bootstrap samples

#function for (1) generating bootstrap samples, and (2) estimating mu, sigma,
#and the variance-covariance matrix of mu and log(sigma) for each bootstrap sample
modEst <- function (time, event){
  obs <- data.frame(t=time, e=event)
  n <- nrow(obs)
  s <- obs[sample(n, size=n, replace=TRUE), ]
  mods <- survreg(Surv(t, e)~1, data=s, dist="lognormal")[c(1,3,8)]
  mods}

#apply function to the actual data
modBoot <- replicate(B, modEst(time=ShAbs$Distance, event=ShAbs$Cens))



##apply bootstrap-t method for calculating confidence intervals for the hazard rate

#calculate the hazard rate (and its std.err.) for the actual sample
#at a specified value (i.e., at 20000 kilometers for the shock absorber data)
hazHat <- SEhazard(x=20000, mu=mu, sigma=sigma, vcov=vcov(mod))[1]
hazHatSE <- SEhazard(x=20000, mu=mu, sigma=sigma, vcov=vcov(mod))[2]

#calculate the hazard rate (and its std.err.) for each bootstrap sample
#at a specified value (i.e., at 20000 kilometers for the shock absorber data)
hazardBoot <- sapply(1:B, function(i) {
  SEhazard(x=20000, mu=as.numeric(modBoot[1,i]), sigma=as.numeric(modBoot[3,i]),
           vcov=matrix(unlist(modBoot[2,i]), nrow=2))})

#for each bootstrap sample, take log transformation of the hazard rate estimate
#(this log transformation ensures that the limits of the confidence interval do not take negative values)
logHazard <- log(hazardBoot[1,])
#compute for each bootstrap sample the std.err. of log transformed hazard rates
logHazardSE <- hazardBoot[2,]/hazardBoot[1,]
#compute for each bootstrap sample the Z-score of the log transformed hazard rate
ZlogHazard <- (logHazard-log(hazHat))/logHazardSE

#inspect the distribution of the hazard rate estimates of bootstrap samples
hist(hazardBoot[1,])
#inspect the distribution of the Z-scores
hist(ZlogHazard)

#confidence intervals (using the computed Z-scores of the bootstrap samples)
alpha <- .05 #two-sided 95% confidence interval
wl <- exp(quantile(ZlogHazard, probs=1-alpha/2)*(hazHatSE/hazHat))
wu <- exp(quantile(ZlogHazard, probs=alpha/2)*(hazHatSE/hazHat))
as.vector(c(hazHat/wl, hazHat/wu))

#compare bootstrap interval with the normal-approximation confidence interval
ciHazardNormal(x=20000, mu=mu, sigma=sigma, vcov=vcov(mod))