The following R code may be used for constructing two-sided likelihood based intervals for the predicted probabilities of a logistic regression model. These likelihood based intervals are also referred to as likelihood ratio bounds, or profile likelihood intervals.
The example data in the R code was taken from this 2005 paper by Xu and Zhao. The data can be found in Example 1 of the paper.
Suggestions and/or questions? Please contact Stefan Gelissen (email: info at datall-analyse.nl).
See this page for an overview of all of Stefan’s R code blog posts.
R code
library(dfoptim)
library(pracma)
##data
x1 <- c(0.0333, 0.0233, 0.0137, 0.0156, 0.0159, 0.0286, 0.0333, 0.0385,
0.0270, 0.0156, 0.0230, 0.0222, 0.0250, 0.0234, 0.0210, 0.0217)
x2 <- c(0.0217, 0.0345, 0.0454, 0.0385, 0.0392, 0.0263, 0.0313, 0.0357,
0.0354, 0.0294, 0.0353, 0.0387, 0.0454, 0.0294, 0.0333, 0.0355)
y <- c(0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1)
degfail <- data.frame(y,x1,x2)
#fit logistic regression model to data
mod <- glm(y ~ x1+x2, data=degfail, family=binomial)
summary(mod)
##compute two-sided likelihood based confidence intervals for predicted probabilities
#log-likelihood function
llikpv <- function (theta, y, xobs, wt=rep(1,length(y)), newvec, pve) {
betas <- theta
dotprod <- betas%*%newvec
beta0 <- qlogis(pve) - dotprod
betaNew <- c(beta0, betas)
mu <- xobs%*%betaNew
#log-likelihood function
-(sum(wt*(y*mu)) - sum(wt*(log(1+exp(mu)))))
}
#profiling using Nelder-Mead
profileLogLik <- function(y, xobs, wt, parms, newvec, pves) {
plke <- rep(0, length(pves))
for (i in 1:length(pves)) {
temp <- nmk(parms, llikpv,
control=list(tol=1e-10),
y=y, xobs=xobs, wt=wt, newvec=newvec, pve=pves[i] )
plke[i] <- temp$value
}
-plke
}
#profile function for predicted probability
profilePv <- function(object, range, nvalues=50, alpha=.95, newdata) {
y <- object$y
xobs <- model.matrix(object)
wt <- object$prior.weights
parms <- coef(object)[-1]
pves <- seq(range[1], range[2], length.out=nvalues)
newvec <- model.matrix(object=object$formula[-2],
model.frame(formula=object$formula[-2], data=newdata))[,-1]
Pv <- predict(object, newdata=newdata, type="response")
#profile predicted probability
ll <- profileLogLik(y, xobs, wt, parms, newvec, pves)
#compute confidence bounds
loglikw <- logLik(object)[1]
limit <- loglikw - .5*qchisq(alpha, df=1)
limitlkh <- function(y, xobs, wt, parms, newvec, pves) {
profileLogLik(y, xobs, wt, parms, newvec, pves) - limit}
lowerci <- NA
upperci <- NA
try(lowerci <- round(fzero(limitlkh, c(range[1], Pv), y=y, xobs=xobs, wt=wt,
parms=parms, newvec=newvec)$x, 6), TRUE)
try(upperci <- round(fzero(limitlkh, c(Pv, range[2]), y=y, xobs=xobs, wt=wt,
parms=parms, newvec=newvec)$x, 6), TRUE)
#plot profile
plot(pves, ll, type='l', xlab="Predicted probability",
ylab="log-likelihood")
abline(h=limit, col="blue",lty=2) #log-likelihood limit
abline(v=Pv, col="red", lty=2) #include MLE for predicted probability
#include interval bounds
if (!is.na(lowerci)) abline(v=lowerci, col="darkgreen", lty=2)
if (!is.na(upperci)) abline(v=upperci, col="darkgreen", lty=2)
#return bounds
c(lcl=lowerci, ucl=upperci)
}
#construct two-sided likelihood based confidence intervals for the
#predicted probability of a logistic regression model
#predicted probability at x1=0.0265 and x2=0.0377
predict(mod, newdata=data.frame(x1=0.0265, x2=0.0377), type="response")
#compute the lower (lcl) and upper (ucl) confidence limits
#at x1=0.0265 and x2=0.0377
#note: inspect whether the curve of the log-likelihood function
#intersects the blue line
profilePv(object=mod, range=c(.7, .9999), nvalues=200,
newdata=data.frame(x1=0.0265, x2=0.0377))
#on the left, the curve does not intersect with the blue line and lcl=NA
#therefore, increase the width of the range
profilePv(object=mod, range=c(.55, .9999), nvalues=200,
newdata=data.frame(x1=0.0265, x2=0.0377))
#compare this interval with the normal-approximation confidence interval
predval <- predict(mod, newdata=data.frame(x1=0.0265, x2=0.0377),
type="link", se.fit=TRUE)
alpha <- .95
zstar <- qnorm((1-alpha)/2)
plogis(predval$fit + c(1,-1)*zstar*predval$se.fit)
#Note that the above normal-approximation interval is wider than the
#likelihood based confidence interval.
#However, simulations performed by the author of this blog post demonstrated that
#for small sample sizes, the coverage probabilities of the likelihood based confidence
#intervals were closer to the nominal coverage probability in comparison to
#the normal-approximation confidence intervals.
#Furthermore, these simulations showed that for small sample sizes the
#normal-approximation confidence intervals were wider compared to the
#likelihood based confidence intervals.
#These simulations are available from the author on request.
##compute the likelihood based confidence intervals at some other data points
#predicted probability at x1=0.022 and x2=0.035
predict(mod, newdata=data.frame(x1=0.022, x2=0.035), type="response")
#compute 95% likelihood based confidence limits at x1=0.022 and x2=0.035
profilePv(object=mod, range=c(.3, .9), newdata=data.frame(x1=0.022, x2=0.035))
#compute 90% likelihood based confidence limits at x1=0.022 and x2=0.035
profilePv(object=mod, range=c(.3, .9), alpha=.90,
newdata=data.frame(x1=0.022, x2=0.035))
#predicted probability at x1=0.015 and x2=0.025
predict(mod, newdata=data.frame(x1=0.015, x2=0.025), type="response")
#compute 95% likelihood based confidence limits at x1=0.015 and x2=0.025
profilePv(object=mod, range=c(1e-6, .5), nvalues=300,
newdata=data.frame(x1=0.015, x2=0.025))