# R code for fitting a three-parameter Weibull distribution

The following code fits the three-parameter Weibull distribution to (right) censored or complete (uncensored) data in R.

The R code implements a fitting strategy proposed by Jerry Lawless in his 2003 book Statistical models and methods for lifetime data (pp. 187-188). A similar strategy is suggested by Terry Therneau in this comment. The 3-parameter Weibull distribution in the R code is fitted to data reported at this page of the SAS website.

For an alternative way of fitting the 3-parameter Weibull distribution see this blog post and this post.

See this page for an overview of all of Stefan’s R code blog posts.

R code

```library(survival)
library(SPREDA)

#data
#failure times
times <- c(94, 96, 99, 99, 104, 108, 112, 114, 117, 117,
118, 121, 121, 123, 129, 131, 133, 135, 136, 139,
139, 140, 141, 141, 143, 144, 149, 149, 152, 153,
159, 159, 159, 159, 162, 168, 168, 169, 170, 170,
171, 172, 173, 176, 177, 180, 180, 184, 187, 188,
189, 190, 196, 197, 203, 205, 211, 213, 224, 226,
227, 256, 257, 269, 271, 274, 291, 300, 300, 300,
300, 300)
#failure=1, censored=0
event <- rep(c(1,0), times=c(67, 5))

##probability plot
#check first whether fitting a 3-parameter Weibull distribution is appropriate
#that is, does a probability plot show curvature?
#more specifically, is there any concave behavior in the lower tail?
#if no such concavity is apparent, then consider fitting a 2-parameter Weibull distribution
timesSorted <- times[order(times)]
eventSorted <- event[order(times)]
timesi <- unique(timesSorted[which(eventSorted==1)])

cumSurvRaw <- survfit(Surv(times, event)~ 1)
cumSurv <- unique(rep(cumSurvRaw\$surv, cumSurvRaw\$n.event))
cumFail <- 1-cumSurv
lagFail <- c(0, cumFail[-length(cumFail)])
Prob <- .5*(cumFail+lagFail)
yi <- log(log((1-Prob)^-1))

#probability plot
plot(timesi, yi, log="x", xlab="Time", ylab="Linearized CDF")

##determine threshold of 3-parameter Weibull distribution

#profile function for threshold (=gamma)
profile.t <- function(gammahat, times, event) {
time <- times-gammahat
fit <- survreg(Surv(time, event) ~ 1, dist="weibull", subset=(time>0))
fit\$loglik
}

#optimization
#note: for an estimate of gamma which coincides with the maximum in the profile likelihood
#plot (see below) you may sometimes have to vary the value of 1e+6 (e.g., set it to 1e+5)
gammaMLE <- optim(par=0, profile.t, method="L-BFGS-B",
upper=(1-(1/1e+6))*min(times[event==1]),
control=list(fnscale=-1),
times=times, event=event)
gamma <- gammaMLE\$par
names(gamma) <- "gamma"

#obtain MLEs for mu and sigma of 3-parameter Weibull distribution
sgamma <- Surv(times-gamma, event)
musigma <- survreg(sgamma ~ 1, dist="weibull", subset=(times-gamma>0))

mu <- coef(musigma)
names(mu) <- "mu"
sigma <- musigma\$scale
names(sigma) <- "sigma"

#summary of all estimated parameters
c(mu, sigma, gamma)
#log-likelihood of 3-parameter Weibull distribution
(loglikw <- musigma\$loglik)

##normal-approximation confidence intervals for MLEs

#log-likelihood function of 3-parameter Weibull model
llikweibull <- function (theta, y, d) {
mu <- theta
sigma <- theta
gamma <- theta
subset <- y-gamma>0
ys <- y[subset]
ds <- d[subset]
sum(log((( 1/(sigma*(ys-gamma)) * dsev( ((log(ys-gamma)-mu)/sigma) )) ^ds)*
( (1-psev( ((log(ys-gamma)-mu)/sigma) )) ^(1-ds) )))
}

#optimization of log-likelihood
weibull.mle <- optim(c(mu, sigma, gamma), llikweibull,
method='BFGS', control=list(fnscale=-1),
y=times, d=event, hessian=TRUE)
#MLEs for mu, sigma, and gamma
c(weibull.mle\$par, weibull.mle\$par, weibull.mle\$par)
#assess whether the MLEs are identical to the start values
#(this is usually a reliable indicator of the model's stability)
c(mu, sigma, gamma)
#log-likelihood
weibull.mle\$value
#variance covariance matrix
solve(-1*weibull.mle\$hessian)
#standard errors
SEparms <- sqrt(diag(solve(-1*weibull.mle\$hessian)))

#normal-approximation 95% confidence intervals
#these intervals are also known as Fisher matrix interval bounds
#95% confidence interval for mu
weibull.mle\$par + c(1,-1)*qnorm(.05/2)*SEparms
#95% confidence interval for sigma
#note: taking the log transformation of sigma guarrantees that the confidence
#bounds always take positive values
weibull.mle\$par*exp((c(1,-1)*qnorm(.05/2)*SEparms)/weibull.mle\$par)
#95% confidence interval for gamma
weibull.mle\$par + c(1,-1)*qnorm(.05/2)*SEparms

#shape and scale parameters
#beta (=shape)
(beta <- 1/sigma)
#eta (=scale)
exp(mu)

##likelihood based interval for gamma

#MLE for gamma
gamma
#fixed values of gamma
gammas <- seq(75, min(times[event==1])-1e-10, length.out=100)
#likelihood profile
ll <-sapply(gammas, profile.t, times=times, event=event)

#profile likelihood plot
plot(gammas,ll, type='l', xlab=expression(gamma), ylab="log-likelihood")
#include lower limit for log-likelihood values
#Lawless (p. 188) suggests that if beta>2 then use qchisq(.95, df=1),
#and if 1<beta<2, then employ qchisq(.95, df=2)
beta
#thus the limit becomes
limit <- loglikw - .5*qchisq(.95, df=2)
abline(h=limit,col="blue",lty=2)
#MLE of gamma
abline(v=gamma, lty=2, col="red")
#minimum of observed times
abline(v=min(times[event==1]), lty=2, col="darkgray")

#calculate the lower limit of the 95% confidence interval by hand
limitlkh <- function(gammas, times, event) {
profile.t(gammas, times, event) - limit
}

uniroot(limitlkh, c(76,80), times=times, event=event)

##predictions of fitted 3-parameter Weibull model

#predict quantiles (including normal-approximation confidence intervals)
weibullQp <- function (vcov, mu, sigma, gamma, p, alpha=.05){
Qp <- as.vector(exp(qsev(p)*sigma + mu) + gamma)
#take partial derivatives of the function q=exp(mu+qsev(p)*sigma)+gamma
dq.dmu <- exp(mu+qsev(p)*sigma) #with respect to mu
dq.dsigma <- qsev(p)*exp(mu+qsev(p)*sigma) #with respect to sigma
dq.dgamma <- 1 #with respect to gamma
dq.dtheta <- c(dq.dmu, dq.dsigma, dq.dgamma)
#delta method
seQ <- sqrt(t(dq.dtheta)%*%vcov%*%dq.dtheta)
#compute confidence interval
lcl <- Qp-qnorm(1-alpha/2)*seQ; ucl <- Qp+qnorm(1-alpha/2)*seQ
c(p=p, Quantile=Qp, std.err=seQ, lcl=lcl, ucl=ucl)
}

#specify proportions at which to predict the quantiles
ps <- seq(.001, .999, length.out=200)
#predict quantiles at specified proportions
Qps <- data.frame(t(sapply(ps, weibullQp, vcov=solve(-1*weibull.mle\$hessian),
mu=mu, sigma=sigma, gamma=gamma, alpha=.05)))

#plot predictions
plot(timesi, yi, type="p", xlab="Time", log="x", ylab="Linearized CDF",
xlim=c(gamma, max(times)))
#predictions
lines(Qps\$Quantile, qsev(Qps\$p), col="blue")
#confidence intervals
lines(Qps\$lcl, qsev(Qps\$p), lty=2, col="red")
lines(Qps\$ucl, qsev(Qps\$p), lty=2, col="red")
#include threshold (=gamma)
abline(v=gamma, col="darkgray", lty=2)

#predict cumulative proportions (including normal-approximation confidence intervals)
weibullFx <- function (vcov, mu, sigma, gamma, x, alpha=.05){
Fx <- as.vector(psev( (log(x-gamma)-mu ) / sigma))
Zx <- qsev(Fx)
#take partial derivatives of the function z=(log(x-gamma)-mu)/sigma
dz.dmu <- -1/sigma #with respect to mu
dz.dsigma <- (-1/sigma)*Zx #with respect to sigma
dz.dgamma <- (-1/sigma)*(1/(x-gamma)) #with respect to gamma
dz.dtheta <- c(dz.dmu, dz.dsigma, dz.dgamma)
#delta method
seZ <- sqrt(t(dz.dtheta)%*%vcov%*%dz.dtheta)
#compute confidence interval
w <- qnorm(1-alpha/2)*seZ
lcl <- (psev(Zx-w)); ucl <- (psev(Zx+w))
c(x=x, Fhat=Fx, std.err=seZ, lcl=lcl, ucl=ucl)
}

#specify times at which to predict the cumulative proportions
xs <- seq(min(times[event==1]), max(times[event==1]), length.out=200)
#predict cumulative proportions at specified times
Fxs <- data.frame(t(sapply(xs, weibullFx, vcov=solve(-1*weibull.mle\$hessian),
mu=mu, sigma=sigma, gamma=gamma, alpha=.05)))

#plot predictions
plot(timesi, yi, type="p", xlab="Time", log="x", ylab="Linearized CDF",
xlim=c(gamma, max(times)))
#predictions
lines(Fxs\$x, qsev(Fxs\$Fhat), col="blue")
#confidence intervals
lines(Fxs\$x, qsev(Fxs\$lcl), lty=2, col="red")
lines(Fxs\$x, qsev(Fxs\$ucl), lty=2, col="red")
#include threshold (=gamma)
abline(v=gamma, col="darkgray", lty=2)```