R code for computing likelihood based confidence intervals for the predicted probabilities of a logistic regression model

The following R code may be used for constructing two-sided likelihood based intervals for the predicted probabilities of a logistic regression model. These likelihood based intervals are also referred to as likelihood ratio bounds, or profile likelihood intervals.

logistic regression predicted probability

The example data in the R code was taken from this 2005 paper by Xu and Zhao. The data can be found in Example 1 of the paper.

Suggestions and/or questions? Please contact Stefan Gelissen (email: info at datall-analyse.nl).
See this page for an overview of all of Stefan’s R code blog posts.

R code

library(dfoptim)
library(pracma)


##data
x1 <- c(0.0333, 0.0233, 0.0137, 0.0156, 0.0159, 0.0286, 0.0333, 0.0385,
        0.0270, 0.0156, 0.0230, 0.0222, 0.0250, 0.0234, 0.0210, 0.0217)
x2 <- c(0.0217, 0.0345, 0.0454, 0.0385, 0.0392, 0.0263, 0.0313, 0.0357,
        0.0354, 0.0294, 0.0353, 0.0387, 0.0454, 0.0294, 0.0333, 0.0355)
y <- c(0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1)
degfail <- data.frame(y,x1,x2)

#fit logistic regression model to data
mod <- glm(y ~ x1+x2, data=degfail, family=binomial)
summary(mod)



##compute two-sided likelihood based confidence intervals for predicted probabilities

#log-likelihood function
llikpv <- function (theta, y, xobs, wt=rep(1,length(y)), newvec, pve) {
  betas <- theta
  dotprod <- betas%*%newvec
  beta0 <- qlogis(pve) - dotprod
  betaNew <- c(beta0, betas)
  mu <- xobs%*%betaNew
  #log-likelihood function
  -(sum(wt*(y*mu)) - sum(wt*(log(1+exp(mu)))))
}

#profiling using Nelder-Mead
profileLogLik <- function(y, xobs, wt, parms, newvec, pves) {
  plke <- rep(0, length(pves))
  
  for (i in 1:length(pves)) {
    temp <- nmk(parms, llikpv,
                control=list(tol=1e-10),
                y=y, xobs=xobs, wt=wt, newvec=newvec, pve=pves[i] )
    plke[i] <- temp$value
  }
  -plke
}

#profile function for predicted probability
profilePv <- function(object, range, nvalues=50, alpha=.95, newdata) {
  y <- object$y
  xobs <- model.matrix(object)
  wt <- object$prior.weights
  parms <- coef(object)[-1]
  pves <- seq(range[1], range[2], length.out=nvalues)
  newvec <- model.matrix(object=object$formula[-2],
                         model.frame(formula=object$formula[-2], data=newdata))[,-1]
  
  Pv <- predict(object, newdata=newdata, type="response")
  
  #profile predicted probability
  ll <- profileLogLik(y, xobs, wt, parms, newvec, pves)
  
  #compute confidence bounds
  loglikw <- logLik(object)[1]
  limit <- loglikw - .5*qchisq(alpha, df=1)
  
  limitlkh <- function(y, xobs, wt, parms, newvec, pves) {
    profileLogLik(y, xobs, wt, parms, newvec, pves) - limit}
  
  lowerci <- NA
  upperci <- NA
  
  try(lowerci <- round(fzero(limitlkh, c(range[1], Pv), y=y, xobs=xobs, wt=wt,
                             parms=parms, newvec=newvec)$x, 6), TRUE)
  try(upperci <- round(fzero(limitlkh, c(Pv, range[2]), y=y, xobs=xobs, wt=wt,
                             parms=parms, newvec=newvec)$x, 6), TRUE)
  
  #plot profile
  plot(pves, ll, type='l', xlab="Predicted probability",
       ylab="log-likelihood")
  abline(h=limit, col="blue",lty=2) #log-likelihood limit
  abline(v=Pv, col="red", lty=2) #include MLE for predicted probability
  #include interval bounds
  if (!is.na(lowerci)) abline(v=lowerci, col="darkgreen", lty=2)
  if (!is.na(upperci)) abline(v=upperci, col="darkgreen", lty=2)
  #return bounds
  c(lcl=lowerci, ucl=upperci)
}



#construct two-sided likelihood based confidence intervals for the
#predicted probability of a logistic regression model

#predicted probability at x1=0.0265 and x2=0.0377
predict(mod, newdata=data.frame(x1=0.0265, x2=0.0377), type="response")
#compute the lower (lcl) and upper (ucl) confidence limits
#at x1=0.0265 and x2=0.0377
#note: inspect whether the curve of the log-likelihood function
#intersects the blue line
profilePv(object=mod, range=c(.7, .9999), nvalues=200,
          newdata=data.frame(x1=0.0265, x2=0.0377))
#on the left, the curve does not intersect with the blue line and lcl=NA
#therefore, increase the width of the range
profilePv(object=mod, range=c(.55, .9999), nvalues=200,
          newdata=data.frame(x1=0.0265, x2=0.0377))

#compare this interval with the normal-approximation confidence interval
predval <- predict(mod, newdata=data.frame(x1=0.0265, x2=0.0377),
                   type="link", se.fit=TRUE)
alpha <- .95
zstar <- qnorm((1-alpha)/2)
plogis(predval$fit + c(1,-1)*zstar*predval$se.fit)

#Note that the above normal-approximation interval is wider than the
#likelihood based confidence interval.
#However, simulations performed by the author of this blog post demonstrated that
#for small sample sizes, the coverage probabilities of the likelihood based confidence
#intervals were closer to the nominal coverage probability in comparison to
#the normal-approximation confidence intervals.
#Furthermore, these simulations showed that for small sample sizes the
#normal-approximation confidence intervals were wider compared to the
#likelihood based confidence intervals.
#These simulations are available from the author on request.





##compute the likelihood based confidence intervals at some other data points

#predicted probability at x1=0.022 and x2=0.035
predict(mod, newdata=data.frame(x1=0.022, x2=0.035), type="response")
#compute 95% likelihood based confidence limits at x1=0.022 and x2=0.035
profilePv(object=mod, range=c(.3, .9), newdata=data.frame(x1=0.022, x2=0.035))

#compute 90% likelihood based confidence limits at x1=0.022 and x2=0.035
profilePv(object=mod, range=c(.3, .9), alpha=.90,
          newdata=data.frame(x1=0.022, x2=0.035))

#predicted probability at x1=0.015 and x2=0.025
predict(mod, newdata=data.frame(x1=0.015, x2=0.025), type="response")
#compute 95% likelihood based confidence limits at x1=0.015 and x2=0.025
profilePv(object=mod, range=c(1e-6, .5), nvalues=300,
          newdata=data.frame(x1=0.015, x2=0.025))