# R code for computing likelihood based confidence intervals for the predicted probabilities of a logistic regression model

The following R code may be used for constructing two-sided likelihood based intervals for the predicted probabilities of a logistic regression model. These likelihood based intervals are also referred to as likelihood ratio bounds, or profile likelihood intervals. The example data in the R code was taken from this 2005 paper by Xu and Zhao. The data can be found in Example 1 of the paper.

See this page for an overview of all of Stefan’s R code blog posts.

R code

```library(dfoptim)
library(pracma)

##data
x1 <- c(0.0333, 0.0233, 0.0137, 0.0156, 0.0159, 0.0286, 0.0333, 0.0385,
0.0270, 0.0156, 0.0230, 0.0222, 0.0250, 0.0234, 0.0210, 0.0217)
x2 <- c(0.0217, 0.0345, 0.0454, 0.0385, 0.0392, 0.0263, 0.0313, 0.0357,
0.0354, 0.0294, 0.0353, 0.0387, 0.0454, 0.0294, 0.0333, 0.0355)
y <- c(0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1)
degfail <- data.frame(y,x1,x2)

#fit logistic regression model to data
mod <- glm(y ~ x1+x2, data=degfail, family=binomial)
summary(mod)

##compute two-sided likelihood based confidence intervals for predicted probabilities

#log-likelihood function
llikpv <- function (theta, y, xobs, wt=rep(1,length(y)), newvec, pve) {
betas <- theta
dotprod <- betas%*%newvec
beta0 <- qlogis(pve) - dotprod
betaNew <- c(beta0, betas)
mu <- xobs%*%betaNew
#log-likelihood function
-(sum(wt*(y*mu)) - sum(wt*(log(1+exp(mu)))))
}

profileLogLik <- function(y, xobs, wt, parms, newvec, pves) {
plke <- rep(0, length(pves))

for (i in 1:length(pves)) {
temp <- nmk(parms, llikpv,
control=list(tol=1e-10),
y=y, xobs=xobs, wt=wt, newvec=newvec, pve=pves[i] )
plke[i] <- temp\$value
}
-plke
}

#profile function for predicted probability
profilePv <- function(object, range, nvalues=50, alpha=.95, newdata) {
y <- object\$y
xobs <- model.matrix(object)
wt <- object\$prior.weights
parms <- coef(object)[-1]
pves <- seq(range, range, length.out=nvalues)
newvec <- model.matrix(object=object\$formula[-2],
model.frame(formula=object\$formula[-2], data=newdata))[,-1]

Pv <- predict(object, newdata=newdata, type="response")

#profile predicted probability
ll <- profileLogLik(y, xobs, wt, parms, newvec, pves)

#compute confidence bounds
loglikw <- logLik(object)
limit <- loglikw - .5*qchisq(alpha, df=1)

limitlkh <- function(y, xobs, wt, parms, newvec, pves) {
profileLogLik(y, xobs, wt, parms, newvec, pves) - limit}

lowerci <- NA
upperci <- NA

try(lowerci <- round(fzero(limitlkh, c(range, Pv), y=y, xobs=xobs, wt=wt,
parms=parms, newvec=newvec)\$x, 6), TRUE)
try(upperci <- round(fzero(limitlkh, c(Pv, range), y=y, xobs=xobs, wt=wt,
parms=parms, newvec=newvec)\$x, 6), TRUE)

#plot profile
plot(pves, ll, type='l', xlab="Predicted probability",
ylab="log-likelihood")
abline(h=limit, col="blue",lty=2) #log-likelihood limit
abline(v=Pv, col="red", lty=2) #include MLE for predicted probability
#include interval bounds
if (!is.na(lowerci)) abline(v=lowerci, col="darkgreen", lty=2)
if (!is.na(upperci)) abline(v=upperci, col="darkgreen", lty=2)
#return bounds
c(lcl=lowerci, ucl=upperci)
}

#construct two-sided likelihood based confidence intervals for the
#predicted probability of a logistic regression model

#predicted probability at x1=0.0265 and x2=0.0377
predict(mod, newdata=data.frame(x1=0.0265, x2=0.0377), type="response")
#compute the lower (lcl) and upper (ucl) confidence limits
#at x1=0.0265 and x2=0.0377
#note: inspect whether the curve of the log-likelihood function
#intersects the blue line
profilePv(object=mod, range=c(.7, .9999), nvalues=200,
newdata=data.frame(x1=0.0265, x2=0.0377))
#on the left, the curve does not intersect with the blue line and lcl=NA
#therefore, increase the width of the range
profilePv(object=mod, range=c(.55, .9999), nvalues=200,
newdata=data.frame(x1=0.0265, x2=0.0377))

#compare this interval with the normal-approximation confidence interval
predval <- predict(mod, newdata=data.frame(x1=0.0265, x2=0.0377),
alpha <- .95
zstar <- qnorm((1-alpha)/2)
plogis(predval\$fit + c(1,-1)*zstar*predval\$se.fit)

#Note that the above normal-approximation interval is wider than the
#likelihood based confidence interval.
#However, simulations performed by the author of this blog post demonstrated that
#for small sample sizes, the coverage probabilities of the likelihood based confidence
#intervals were closer to the nominal coverage probability in comparison to
#the normal-approximation confidence intervals.
#Furthermore, these simulations showed that for small sample sizes the
#normal-approximation confidence intervals were wider compared to the
#likelihood based confidence intervals.
#These simulations are available from the author on request.

##compute the likelihood based confidence intervals at some other data points

#predicted probability at x1=0.022 and x2=0.035
predict(mod, newdata=data.frame(x1=0.022, x2=0.035), type="response")
#compute 95% likelihood based confidence limits at x1=0.022 and x2=0.035
profilePv(object=mod, range=c(.3, .9), newdata=data.frame(x1=0.022, x2=0.035))

#compute 90% likelihood based confidence limits at x1=0.022 and x2=0.035
profilePv(object=mod, range=c(.3, .9), alpha=.90,
newdata=data.frame(x1=0.022, x2=0.035))

#predicted probability at x1=0.015 and x2=0.025
predict(mod, newdata=data.frame(x1=0.015, x2=0.025), type="response")
#compute 95% likelihood based confidence limits at x1=0.015 and x2=0.025
profilePv(object=mod, range=c(1e-6, .5), nvalues=300,
newdata=data.frame(x1=0.015, x2=0.025))```