R code for fitting a three-parameter Fréchet distribution

The following code fits the three-parameter Fréchet distribution to (right) censored or complete (uncensored) data in R.

The R code implements a fitting strategy proposed by Jerry Lawless in his 2003 book Statistical models and methods for lifetime data (pp. 187-188). A similar strategy is suggested by Terry Therneau in this comment.

Frechet 3 parameter

For an alternative way of fitting the 3-parameter Fréchet distribution see this blog post and this post.

Suggestions and/or questions? Please contact Stefan Gelissen (email: info at datall-analyse.nl).
See this page for an overview of all of Stefan’s R code blog posts.

R code

library(SPREDA)

##data

#generate artificial data
nobs <- 40 #number of observations
obs <- exp(2.5 + rlev(nobs)*.80) + 50 #draw sample
#apply some kind of censoring (here: type II censoring)
#note: failure/death/event=1, censored=0
cens <- as.numeric(obs<sort(obs)[nobs-5])
maxobs <- sort(obs)[(nobs-5)]
obs <- ifelse(cens==0, maxobs, obs)

times <- obs
event <- cens



##probability plot
#check first whether fitting a 3-parameter Fréchet distribution is appropriate
#that is, does a probability plot show curvature?
#more specifically, is there any concave behavior in the lower tail?
#if no such concavity is visible, then consider fitting a 2-parameter Fréchet distribution
timesSorted <- times[order(times)]
eventSorted <- event[order(times)]
timesi <- unique(timesSorted[which(eventSorted==1)])

cumSurvRaw <- survfit(Surv(times, event)~ 1)
cumSurv <- unique(rep(cumSurvRaw$surv, cumSurvRaw$n.event))
cumFail <- 1-cumSurv
lagFail <- c(0, cumFail[-length(cumFail)])
Prob <- .5*(cumFail+lagFail)
yi <- qlev(Prob)

#probability plot
plot(timesi, yi, log="x", xlab="Time", ylab="Linearized CDF")



##determine threshold of 3-parameter Fréchet distribution

#profile function for threshold (=gamma)
profile.t <- function(gammahat, times, event) {
  time <- times-gammahat
  fit <- Lifedata.MLE(Surv(time, event) ~ 1, dist="frechet", subset=(time>0))
  -fit$min
}

#optimization
#note: for an estimate of gamma which coincides with the maximum in the profile likelihood
#plot (see below) you may sometimes have to vary the value of 1e+3 (e.g., set it to 1e+2)
gammaMLE <- optim(par=0, profile.t, method="L-BFGS-B",
                  upper=(1-(1/1e+3))*min(times[event==1]),
                  control=list(fnscale=-1),
                  times=times, event=event)

gamma <- gammaMLE$par
names(gamma) <- "gamma"

#MLEs for mu and sigma
sgamma <- Surv(times-gamma, event)
musigma <- Lifedata.MLE(sgamma ~ 1, dist="frechet", subset=(times-gamma>0))

mu <- coef(musigma)[1]
names(mu) <- "mu"
sigma <- coef(musigma)[2]
names(sigma) <- "sigma"

#summary
c(mu, sigma, gamma)
#log-likelihood
(loglikw <- -musigma$min)



##normal-approximation confidence intervals for MLEs

#log-likelihood function of 3-parameter Fréchet model
llikfrechet <- function (theta, y, d) {
  mu <- theta[1]
  sigma <- theta[2]
  gamma <- theta[3]
  subset <- y-gamma>0
  ys <- y[subset]
  ds <- d[subset]
  sum(log((( 1/(sigma*(ys-gamma)) * dlev( ((log(ys-gamma)-mu)/sigma) )) ^ds)*
            ( (1-plev( ((log(ys-gamma)-mu)/sigma) )) ^(1-ds) )))
}

#optimization of log-likelihood
frechet.mle <- optim(c(mu, sigma, gamma), llikfrechet,
                     method='BFGS', control=list(fnscale=-1),
                     y=times, d=event, hessian=TRUE)
#MLEs for mu, sigma, and gamma
c(frechet.mle$par[1], frechet.mle$par[2], frechet.mle$par[3])
#assess whether the MLEs are identical to the start values
#(this is usually a reliable indicator of the model's stability)
c(mu, sigma, gamma)
#log-likelihood
frechet.mle$value
#variance covariance matrix
solve(-1*frechet.mle$hessian)
#standard errors
SEparms <- sqrt(diag(solve(-1*frechet.mle$hessian)))

#normal-approximation 95% confidence intervals
#these intervals are also known as Fisher matrix interval bounds
#95% confidence interval for mu
frechet.mle$par[1] + c(1,-1)*qnorm(.05/2)*SEparms[1]
#95% confidence interval for sigma
#note: taking the log transformation of sigma guarrantees that the confidence
#bounds always take positive values
frechet.mle$par[2]*exp((c(1,-1)*qnorm(.05/2)*SEparms[2])/frechet.mle$par[2])
#95% confidence interval for gamma
frechet.mle$par[3] + c(1,-1)*qnorm(.05/2)*SEparms[3]



##likelihood based interval for gamma

#MLE for gamma
gamma
#fixed values of gamma
gammas <- seq(0, min(times[event==1])-1e-1, length.out=100)
#likelihood profile
ll <-sapply(gammas, profile.t, times=times, event=event)

#plot profile likelihood
plot(gammas,ll, type='l', xlab=expression(gamma), ylab="log-likelihood")
#include lower limit for log-likelihood values
limit <- loglikw - .5*qchisq(.95, df=2)
abline(h=limit,col="blue",lty=2)
#MLE of gamma
abline(v=gamma, lty=2, col="red")
#minimum of observed times
abline(v=min(times[event==1]), lty=2, col="darkgray")

#calculate the lower limit of the 95% confidence interval by hand
limitlkh <- function(gammas, times, event) {
  profile.t(gammas, times, event) - limit
}

lowerci <- uniroot(limitlkh, c(0, gamma), times=times, event=event)
upperci <- uniroot(limitlkh, c(gamma, min(times[event==1])-1e-1),
                   times=times, event=event)
c(lowerci$root, upperci$root)



##predictions of 3-parameter Fréchet model

#predict quantiles (including normal-approximation confidence intervals)
frechetQp <- function (vcov, mu, sigma, gamma, p, alpha=.05){
  Qp <- as.vector(exp(qlev(p)*sigma + mu) + gamma)
  #take partial derivatives of the function q=exp(mu+qlev(p)*sigma)+gamma
  dq.dmu <- exp(mu+qlev(p)*sigma) #with respect to mu
  dq.dsigma <- qlev(p)*exp(mu+qlev(p)*sigma) #with respect to sigma
  dq.dgamma <- 1 #with respect to gamma
  dq.dtheta <- c(dq.dmu, dq.dsigma, dq.dgamma)
  #delta method
  seQ <- sqrt(t(dq.dtheta)%*%vcov%*%dq.dtheta)
  #compute confidence interval
  lcl <- Qp-qnorm(1-alpha/2)*seQ; ucl <- Qp+qnorm(1-alpha/2)*seQ
  c(p=p, Quantile=Qp, std.err=seQ, lcl=lcl, ucl=ucl)
}

#specify proportions at which to predict the quantiles
ps <- seq(.001, .999, length.out=200)
#predict quantiles at specified proportions
Qps <- data.frame(t(sapply(ps, frechetQp, vcov=solve(-1*frechet.mle$hessian),
                           mu=mu, sigma=sigma, gamma=gamma, alpha=.05)))

#plot predictions
plot(timesi, yi, type="p", xlab="Time", log="x", ylab="Linearized CDF",
     xlim=c(gamma, max(times)))
#predictions
lines(Qps$Quantile, qlev(Qps$p), col="blue")
#confidence intervals
lines(Qps$lcl, qlev(Qps$p), lty=2, col="red")
lines(Qps$ucl, qlev(Qps$p), lty=2, col="red")
#include threshold (=gamma)
abline(v=gamma, col="darkgray", lty=2)



#predict cumulative proportions (including normal-approximation confidence intervals)
frechetFx <- function (vcov, mu, sigma, gamma, x, alpha=.05){
  Fx <- as.vector(plev( (log(x-gamma)-mu ) / sigma))
  Zx <- qlev(Fx)
  #take partial derivatives of the function z=(log(x-gamma)-mu)/sigma
  dz.dmu <- -1/sigma #with respect to mu
  dz.dsigma <- (-1/sigma)*Zx #with respect to sigma
  dz.dgamma <- (-1/sigma)*(1/(x-gamma)) #with respect to gamma
  dz.dtheta <- c(dz.dmu, dz.dsigma, dz.dgamma)
  #delta method
  seZ <- sqrt(t(dz.dtheta)%*%vcov%*%dz.dtheta)
  #compute confidence interval
  w <- qnorm(1-alpha/2)*seZ
  lcl <- (plev(Zx-w)); ucl <- (plev(Zx+w))
  c(x=x, Fhat=Fx, std.err=seZ, lcl=lcl, ucl=ucl)
}

#specify times at which to predict the cumulative proportions
xs <- seq(min(times[event==1]), max(times[event==1]), length.out=200)
#predict cumulative proportions at specified times
Fxs <- data.frame(t(sapply(xs, frechetFx, vcov=solve(-1*frechet.mle$hessian),
                           mu=mu, sigma=sigma, gamma=gamma, alpha=.05)))

#plot predictions
plot(timesi, yi, type="p", xlab="Time", log="x", ylab="Linearized CDF",
     xlim=c(gamma, max(times)))
#predictions
lines(Fxs$x, qlev(Fxs$Fhat), col="blue")
#confidence intervals
lines(Fxs$x, qlev(Fxs$lcl), lty=2, col="red")
lines(Fxs$x, qlev(Fxs$ucl), lty=2, col="red")
#include threshold (=gamma)
abline(v=gamma, col="darkgray", lty=2)