R code for fitting a three-parameter Fréchet distribution

The following code fits the three-parameter Fréchet distribution to (right) censored or complete (uncensored) data in R.

The R code implements a fitting strategy proposed by Jerry Lawless in his 2003 book Statistical models and methods for lifetime data (pp. 187-188). A similar strategy is suggested by Terry Therneau in this comment.

For an alternative way of fitting the 3-parameter Fréchet distribution see this blog post and this post.

See this page for an overview of all of Stefan’s R code blog posts.

R code

```library(SPREDA)

##data

#generate artificial data
nobs <- 40 #number of observations
obs <- exp(2.5 + rlev(nobs)*.80) + 50 #draw sample
#apply some kind of censoring (here: type II censoring)
#note: failure/death/event=1, censored=0
cens <- as.numeric(obs<sort(obs)[nobs-5])
maxobs <- sort(obs)[(nobs-5)]
obs <- ifelse(cens==0, maxobs, obs)

times <- obs
event <- cens

##probability plot
#check first whether fitting a 3-parameter Fréchet distribution is appropriate
#that is, does a probability plot show curvature?
#more specifically, is there any concave behavior in the lower tail?
#if no such concavity is visible, then consider fitting a 2-parameter Fréchet distribution
timesSorted <- times[order(times)]
eventSorted <- event[order(times)]
timesi <- unique(timesSorted[which(eventSorted==1)])

cumSurvRaw <- survfit(Surv(times, event)~ 1)
cumSurv <- unique(rep(cumSurvRaw\$surv, cumSurvRaw\$n.event))
cumFail <- 1-cumSurv
lagFail <- c(0, cumFail[-length(cumFail)])
Prob <- .5*(cumFail+lagFail)
yi <- qlev(Prob)

#probability plot
plot(timesi, yi, log="x", xlab="Time", ylab="Linearized CDF")

##determine threshold of 3-parameter Fréchet distribution

#profile function for threshold (=gamma)
profile.t <- function(gammahat, times, event) {
time <- times-gammahat
fit <- Lifedata.MLE(Surv(time, event) ~ 1, dist="frechet", subset=(time>0))
-fit\$min
}

#optimization
#note: for an estimate of gamma which coincides with the maximum in the profile likelihood
#plot (see below) you may sometimes have to vary the value of 1e+3 (e.g., set it to 1e+2)
gammaMLE <- optim(par=0, profile.t, method="L-BFGS-B",
upper=(1-(1/1e+3))*min(times[event==1]),
control=list(fnscale=-1),
times=times, event=event)

gamma <- gammaMLE\$par
names(gamma) <- "gamma"

#MLEs for mu and sigma
sgamma <- Surv(times-gamma, event)
musigma <- Lifedata.MLE(sgamma ~ 1, dist="frechet", subset=(times-gamma>0))

mu <- coef(musigma)[1]
names(mu) <- "mu"
sigma <- coef(musigma)[2]
names(sigma) <- "sigma"

#summary
c(mu, sigma, gamma)
#log-likelihood
(loglikw <- -musigma\$min)

##normal-approximation confidence intervals for MLEs

#log-likelihood function of 3-parameter Fréchet model
llikfrechet <- function (theta, y, d) {
mu <- theta[1]
sigma <- theta[2]
gamma <- theta[3]
subset <- y-gamma>0
ys <- y[subset]
ds <- d[subset]
sum(log((( 1/(sigma*(ys-gamma)) * dlev( ((log(ys-gamma)-mu)/sigma) )) ^ds)*
( (1-plev( ((log(ys-gamma)-mu)/sigma) )) ^(1-ds) )))
}

#optimization of log-likelihood
frechet.mle <- optim(c(mu, sigma, gamma), llikfrechet,
method='BFGS', control=list(fnscale=-1),
y=times, d=event, hessian=TRUE)
#MLEs for mu, sigma, and gamma
c(frechet.mle\$par[1], frechet.mle\$par[2], frechet.mle\$par[3])
#assess whether the MLEs are identical to the start values
#(this is usually a reliable indicator of the model's stability)
c(mu, sigma, gamma)
#log-likelihood
frechet.mle\$value
#variance covariance matrix
solve(-1*frechet.mle\$hessian)
#standard errors
SEparms <- sqrt(diag(solve(-1*frechet.mle\$hessian)))

#normal-approximation 95% confidence intervals
#these intervals are also known as Fisher matrix interval bounds
#95% confidence interval for mu
frechet.mle\$par[1] + c(1,-1)*qnorm(.05/2)*SEparms[1]
#95% confidence interval for sigma
#note: taking the log transformation of sigma guarrantees that the confidence
#bounds always take positive values
frechet.mle\$par[2]*exp((c(1,-1)*qnorm(.05/2)*SEparms[2])/frechet.mle\$par[2])
#95% confidence interval for gamma
frechet.mle\$par[3] + c(1,-1)*qnorm(.05/2)*SEparms[3]

##likelihood based interval for gamma

#MLE for gamma
gamma
#fixed values of gamma
gammas <- seq(0, min(times[event==1])-1e-1, length.out=100)
#likelihood profile
ll <-sapply(gammas, profile.t, times=times, event=event)

#plot profile likelihood
plot(gammas,ll, type='l', xlab=expression(gamma), ylab="log-likelihood")
#include lower limit for log-likelihood values
limit <- loglikw - .5*qchisq(.95, df=2)
abline(h=limit,col="blue",lty=2)
#MLE of gamma
abline(v=gamma, lty=2, col="red")
#minimum of observed times
abline(v=min(times[event==1]), lty=2, col="darkgray")

#calculate the lower limit of the 95% confidence interval by hand
limitlkh <- function(gammas, times, event) {
profile.t(gammas, times, event) - limit
}

lowerci <- uniroot(limitlkh, c(0, gamma), times=times, event=event)
upperci <- uniroot(limitlkh, c(gamma, min(times[event==1])-1e-1),
times=times, event=event)
c(lowerci\$root, upperci\$root)

##predictions of 3-parameter Fréchet model

#predict quantiles (including normal-approximation confidence intervals)
frechetQp <- function (vcov, mu, sigma, gamma, p, alpha=.05){
Qp <- as.vector(exp(qlev(p)*sigma + mu) + gamma)
#take partial derivatives of the function q=exp(mu+qlev(p)*sigma)+gamma
dq.dmu <- exp(mu+qlev(p)*sigma) #with respect to mu
dq.dsigma <- qlev(p)*exp(mu+qlev(p)*sigma) #with respect to sigma
dq.dgamma <- 1 #with respect to gamma
dq.dtheta <- c(dq.dmu, dq.dsigma, dq.dgamma)
#delta method
seQ <- sqrt(t(dq.dtheta)%*%vcov%*%dq.dtheta)
#compute confidence interval
lcl <- Qp-qnorm(1-alpha/2)*seQ; ucl <- Qp+qnorm(1-alpha/2)*seQ
c(p=p, Quantile=Qp, std.err=seQ, lcl=lcl, ucl=ucl)
}

#specify proportions at which to predict the quantiles
ps <- seq(.001, .999, length.out=200)
#predict quantiles at specified proportions
Qps <- data.frame(t(sapply(ps, frechetQp, vcov=solve(-1*frechet.mle\$hessian),
mu=mu, sigma=sigma, gamma=gamma, alpha=.05)))

#plot predictions
plot(timesi, yi, type="p", xlab="Time", log="x", ylab="Linearized CDF",
xlim=c(gamma, max(times)))
#predictions
lines(Qps\$Quantile, qlev(Qps\$p), col="blue")
#confidence intervals
lines(Qps\$lcl, qlev(Qps\$p), lty=2, col="red")
lines(Qps\$ucl, qlev(Qps\$p), lty=2, col="red")
#include threshold (=gamma)
abline(v=gamma, col="darkgray", lty=2)

#predict cumulative proportions (including normal-approximation confidence intervals)
frechetFx <- function (vcov, mu, sigma, gamma, x, alpha=.05){
Fx <- as.vector(plev( (log(x-gamma)-mu ) / sigma))
Zx <- qlev(Fx)
#take partial derivatives of the function z=(log(x-gamma)-mu)/sigma
dz.dmu <- -1/sigma #with respect to mu
dz.dsigma <- (-1/sigma)*Zx #with respect to sigma
dz.dgamma <- (-1/sigma)*(1/(x-gamma)) #with respect to gamma
dz.dtheta <- c(dz.dmu, dz.dsigma, dz.dgamma)
#delta method
seZ <- sqrt(t(dz.dtheta)%*%vcov%*%dz.dtheta)
#compute confidence interval
w <- qnorm(1-alpha/2)*seZ
lcl <- (plev(Zx-w)); ucl <- (plev(Zx+w))
c(x=x, Fhat=Fx, std.err=seZ, lcl=lcl, ucl=ucl)
}

#specify times at which to predict the cumulative proportions
xs <- seq(min(times[event==1]), max(times[event==1]), length.out=200)
#predict cumulative proportions at specified times
Fxs <- data.frame(t(sapply(xs, frechetFx, vcov=solve(-1*frechet.mle\$hessian),
mu=mu, sigma=sigma, gamma=gamma, alpha=.05)))

#plot predictions
plot(timesi, yi, type="p", xlab="Time", log="x", ylab="Linearized CDF",
xlim=c(gamma, max(times)))
#predictions
lines(Fxs\$x, qlev(Fxs\$Fhat), col="blue")
#confidence intervals
lines(Fxs\$x, qlev(Fxs\$lcl), lty=2, col="red")
lines(Fxs\$x, qlev(Fxs\$ucl), lty=2, col="red")
#include threshold (=gamma)
abline(v=gamma, col="darkgray", lty=2)```