R code for fitting a three-parameter Weibull distribution

The following code fits the three-parameter Weibull distribution to (right) censored or complete (uncensored) data in R.

The R code implements a fitting strategy proposed by Jerry Lawless in his 2003 book Statistical models and methods for lifetime data (pp. 187-188). A similar strategy is suggested by Terry Therneau in this comment.

Weibull 3 parameter

The 3-parameter Weibull distribution in the R code is fitted to data reported at this page of the SAS website.

For an alternative way of fitting the 3-parameter Weibull distribution see this blog post and this post.

Suggestions and/or questions? Please contact Stefan Gelissen (email: info at datall-analyse.nl).
See this page for an overview of all of Stefan’s R code blog posts.

R code

library(survival)
library(SPREDA)

#data
#failure times
times <- c(94, 96, 99, 99, 104, 108, 112, 114, 117, 117,
           118, 121, 121, 123, 129, 131, 133, 135, 136, 139,
           139, 140, 141, 141, 143, 144, 149, 149, 152, 153,
           159, 159, 159, 159, 162, 168, 168, 169, 170, 170,
           171, 172, 173, 176, 177, 180, 180, 184, 187, 188,
           189, 190, 196, 197, 203, 205, 211, 213, 224, 226,
           227, 256, 257, 269, 271, 274, 291, 300, 300, 300,
           300, 300)
#failure=1, censored=0
event <- rep(c(1,0), times=c(67, 5))



##probability plot
#check first whether fitting a 3-parameter Weibull distribution is appropriate
#that is, does a probability plot show curvature?
#more specifically, is there any concave behavior in the lower tail?
#if no such concavity is apparent, then consider fitting a 2-parameter Weibull distribution
timesSorted <- times[order(times)]
eventSorted <- event[order(times)]
timesi <- unique(timesSorted[which(eventSorted==1)])

cumSurvRaw <- survfit(Surv(times, event)~ 1)
cumSurv <- unique(rep(cumSurvRaw$surv, cumSurvRaw$n.event))
cumFail <- 1-cumSurv
lagFail <- c(0, cumFail[-length(cumFail)])
Prob <- .5*(cumFail+lagFail)
yi <- log(log((1-Prob)^-1))

#probability plot
plot(timesi, yi, log="x", xlab="Time", ylab="Linearized CDF")



##determine threshold of 3-parameter Weibull distribution

#profile function for threshold (=gamma)
profile.t <- function(gammahat, times, event) {
  time <- times-gammahat
  fit <- survreg(Surv(time, event) ~ 1, dist="weibull", subset=(time>0))
  fit$loglik[1]
}

#optimization
#note: for an estimate of gamma which coincides with the maximum in the profile likelihood
#plot (see below) you may sometimes have to vary the value of 1e+6 (e.g., set it to 1e+5)
gammaMLE <- optim(par=0, profile.t, method="L-BFGS-B",
                  upper=(1-(1/1e+6))*min(times[event==1]),
                  control=list(fnscale=-1),
                  times=times, event=event)
gamma <- gammaMLE$par
names(gamma) <- "gamma"

#obtain MLEs for mu and sigma of 3-parameter Weibull distribution
sgamma <- Surv(times-gamma, event)
musigma <- survreg(sgamma ~ 1, dist="weibull", subset=(times-gamma>0))

mu <- coef(musigma)
names(mu) <- "mu"
sigma <- musigma$scale
names(sigma) <- "sigma"

#summary of all estimated parameters
c(mu, sigma, gamma)
#log-likelihood of 3-parameter Weibull distribution
(loglikw <- musigma$loglik[1])



##normal-approximation confidence intervals for MLEs

#log-likelihood function of 3-parameter Weibull model
llikweibull <- function (theta, y, d) {
  mu <- theta[1]
  sigma <- theta[2]
  gamma <- theta[3]
  subset <- y-gamma>0
  ys <- y[subset]
  ds <- d[subset]
  sum(log((( 1/(sigma*(ys-gamma)) * dsev( ((log(ys-gamma)-mu)/sigma) )) ^ds)*
            ( (1-psev( ((log(ys-gamma)-mu)/sigma) )) ^(1-ds) )))
}

#optimization of log-likelihood
weibull.mle <- optim(c(mu, sigma, gamma), llikweibull,
                     method='BFGS', control=list(fnscale=-1),
                     y=times, d=event, hessian=TRUE)
#MLEs for mu, sigma, and gamma
c(weibull.mle$par[1], weibull.mle$par[2], weibull.mle$par[3])
#assess whether the MLEs are identical to the start values
#(this is usually a reliable indicator of the model's stability)
c(mu, sigma, gamma)
#log-likelihood
weibull.mle$value
#variance covariance matrix
solve(-1*weibull.mle$hessian)
#standard errors
SEparms <- sqrt(diag(solve(-1*weibull.mle$hessian)))

#normal-approximation 95% confidence intervals
#these intervals are also known as Fisher matrix interval bounds
#95% confidence interval for mu
weibull.mle$par[1] + c(1,-1)*qnorm(.05/2)*SEparms[1]
#95% confidence interval for sigma
#note: taking the log transformation of sigma guarrantees that the confidence
#bounds always take positive values
weibull.mle$par[2]*exp((c(1,-1)*qnorm(.05/2)*SEparms[2])/weibull.mle$par[2])
#95% confidence interval for gamma
weibull.mle$par[3] + c(1,-1)*qnorm(.05/2)*SEparms[3]



#shape and scale parameters
#beta (=shape)
(beta <- 1/sigma)
#eta (=scale)
exp(mu)



##likelihood based interval for gamma

#MLE for gamma
gamma
#fixed values of gamma
gammas <- seq(75, min(times[event==1])-1e-10, length.out=100)
#likelihood profile
ll <-sapply(gammas, profile.t, times=times, event=event)

#profile likelihood plot
plot(gammas,ll, type='l', xlab=expression(gamma), ylab="log-likelihood")
#include lower limit for log-likelihood values
#Lawless (p. 188) suggests that if beta>2 then use qchisq(.95, df=1),
#and if 1<beta<2, then employ qchisq(.95, df=2)
beta
#thus the limit becomes
limit <- loglikw - .5*qchisq(.95, df=2)
abline(h=limit,col="blue",lty=2)
#MLE of gamma
abline(v=gamma, lty=2, col="red")
#minimum of observed times
abline(v=min(times[event==1]), lty=2, col="darkgray")



#calculate the lower limit of the 95% confidence interval by hand
limitlkh <- function(gammas, times, event) {
  profile.t(gammas, times, event) - limit
}

uniroot(limitlkh, c(76,80), times=times, event=event)[1]



##predictions of fitted 3-parameter Weibull model

#predict quantiles (including normal-approximation confidence intervals)
weibullQp <- function (vcov, mu, sigma, gamma, p, alpha=.05){
  Qp <- as.vector(exp(qsev(p)*sigma + mu) + gamma)
  #take partial derivatives of the function q=exp(mu+qsev(p)*sigma)+gamma
  dq.dmu <- exp(mu+qsev(p)*sigma) #with respect to mu
  dq.dsigma <- qsev(p)*exp(mu+qsev(p)*sigma) #with respect to sigma
  dq.dgamma <- 1 #with respect to gamma
  dq.dtheta <- c(dq.dmu, dq.dsigma, dq.dgamma)
  #delta method
  seQ <- sqrt(t(dq.dtheta)%*%vcov%*%dq.dtheta)
  #compute confidence interval
  lcl <- Qp-qnorm(1-alpha/2)*seQ; ucl <- Qp+qnorm(1-alpha/2)*seQ
  c(p=p, Quantile=Qp, std.err=seQ, lcl=lcl, ucl=ucl)
}

#specify proportions at which to predict the quantiles
ps <- seq(.001, .999, length.out=200)
#predict quantiles at specified proportions
Qps <- data.frame(t(sapply(ps, weibullQp, vcov=solve(-1*weibull.mle$hessian),
                           mu=mu, sigma=sigma, gamma=gamma, alpha=.05)))

#plot predictions
plot(timesi, yi, type="p", xlab="Time", log="x", ylab="Linearized CDF",
     xlim=c(gamma, max(times)))
#predictions
lines(Qps$Quantile, qsev(Qps$p), col="blue")
#confidence intervals
lines(Qps$lcl, qsev(Qps$p), lty=2, col="red")
lines(Qps$ucl, qsev(Qps$p), lty=2, col="red")
#include threshold (=gamma)
abline(v=gamma, col="darkgray", lty=2)



#predict cumulative proportions (including normal-approximation confidence intervals)
weibullFx <- function (vcov, mu, sigma, gamma, x, alpha=.05){
  Fx <- as.vector(psev( (log(x-gamma)-mu ) / sigma))
  Zx <- qsev(Fx)
  #take partial derivatives of the function z=(log(x-gamma)-mu)/sigma
  dz.dmu <- -1/sigma #with respect to mu
  dz.dsigma <- (-1/sigma)*Zx #with respect to sigma
  dz.dgamma <- (-1/sigma)*(1/(x-gamma)) #with respect to gamma
  dz.dtheta <- c(dz.dmu, dz.dsigma, dz.dgamma)
  #delta method
  seZ <- sqrt(t(dz.dtheta)%*%vcov%*%dz.dtheta)
  #compute confidence interval
  w <- qnorm(1-alpha/2)*seZ
  lcl <- (psev(Zx-w)); ucl <- (psev(Zx+w))
  c(x=x, Fhat=Fx, std.err=seZ, lcl=lcl, ucl=ucl)
}

#specify times at which to predict the cumulative proportions
xs <- seq(min(times[event==1]), max(times[event==1]), length.out=200)
#predict cumulative proportions at specified times
Fxs <- data.frame(t(sapply(xs, weibullFx, vcov=solve(-1*weibull.mle$hessian),
                           mu=mu, sigma=sigma, gamma=gamma, alpha=.05)))

#plot predictions
plot(timesi, yi, type="p", xlab="Time", log="x", ylab="Linearized CDF",
     xlim=c(gamma, max(times)))
#predictions
lines(Fxs$x, qsev(Fxs$Fhat), col="blue")
#confidence intervals
lines(Fxs$x, qsev(Fxs$lcl), lty=2, col="red")
lines(Fxs$x, qsev(Fxs$ucl), lty=2, col="red")
#include threshold (=gamma)
abline(v=gamma, col="darkgray", lty=2)