# R code for fitting a loglinear-NHPP model to grouped temporal data

The following R code fits a NonHomogeneous Poisson Process (NHPP) model to grouped temporal data. Grouped temporal data occurs when only the number of recurrences within a given time interval are known.
The R code fits a NHPP model with a loglinear intensity function, where the intensity at time t is given by $e^{\gamma_{0} + \gamma_{1}t}$ . See this post for fitting a NHPP model with a power law intensity function to grouped temporal data.

See this page for an overview of all of Stefan’s R code blog posts.

R code

library(car)

##data
#generate temporal data for a nonhomogeneous Poisson process
#having a loglinear intensity function
#(with the intensity at time t defined as: exp[gamma0 + gamma1*t] )

#function for computing the intensity measure of a loglinear-NHPP
intensityMeasure <- function (gamma0, gamma1, t) {
exp(gamma0)*(exp(gamma1*t)-1)/gamma1}

#function for computing the inverse of the specified intensity measure
intensityMeasureInv <- function(gamma0, gamma1, s) {
(1/gamma1)*log(gamma1*exp(-gamma0)*s + 1)}

Tend <- 26 #upper bound of observation window
t <- 0 #start of observation window
i <- 2 #initialize counter

#population values for gamma0 and gamma1 of the intensity measure
gamma0 <- -1.5
gamma1 <- 0.15

#sequence of event times generated by the loglinear-NHPP
while (t[length(t)] <= Tend) {
Ti <- t[i-1]
s <- intensityMeasure(gamma0, gamma1, Ti) - log(runif(1,0,1))
Tnew <- intensityMeasureInv(gamma0, gamma1, s)
t <- c(t, Tnew)
i <- i+1
}
#remove t=0 and the final event time, since the latter time > Tend
t <- t[-c(1,length(t))]

#plot generated data
plot(t, 1:length(t), type="p", xlim=c(0,Tend),
xlab="Time", ylab="Cumulative recurrences")

#create grouped temporal data

#construct time intervals
int <- seq(0, 25, 5)
#lower and upper bound of each time interval
tL <- int #lower bounds
tU <- c(int[-1], max(t)) #upper bounds

#plot the generated data with the boundaries of the time intervals included
plot(t, 1:length(t), type="p", xlim=c(0,Tend),
xlab="Time", ylab="Cumulative recurrences")
abline(v=tU, lty=2, col="blue")

#assign observations to the time intervals
assignT <- findInterval(t, int)

#number of recurrences within each time interval
recurrences <- sapply(1:length(tU), function (i) sum(assignT==i))

##fit loglinear-NHPP model to grouped temporal data

#function for computing the log-likelihood of a grouped loglinear-NHPP model
llikNHPPg <- function (theta, LowerTimes, UpperTimes, Ns) {
gamma0 <- theta
gamma1 <- theta
mu <- (exp(gamma0+gamma1*UpperTimes)-exp(gamma0+gamma1*LowerTimes))/gamma1
sum(Ns*log(mu) - mu)
}

#function for fitting a grouped loglinear-NHPP model
groupedNHPP <- function (LowerTimes, UpperTimes, Ns, starts=NULL) {
#-LowerTimes is a vector with the lower values of the time intervals
#-UpperTimes is a vector containing the upper values of the time intervals
#-Ns is a vector with the total number of recurrences within each time interval
#-starts is a vector with the starting values for gamma0 and gamma1, respectively

#obtain starting values for gamma0 and gamma1
if (is.null(starts)) {
iwd <- UpperTimes-LowerTimes #width of time interval
midp <- (UpperTimes-LowerTimes)/2 #midpoint of time interval
ti <- LowerTimes+midp
modstart <- glm(Ns~ti, family=quasi(link="log", variance="mu"), offset=log(iwd))
starts <- as.vector(coef(modstart)) #starting values
}

#optimization of log-likelihood
gloglin <- optim(starts, llikNHPPg,
method="BFGS", control=list(fnscale=-1),
LowerTimes=LowerTimes, UpperTimes=UpperTimes, Ns=Ns,
hessian=TRUE)
parameters <- gloglin$par names(parameters) <- c("gamma0", "gamma1") loglik <- gloglin$value
hessian <- gloglin$hessian #return values list(parameters=parameters, loglik=loglik, AIC=-2*loglik+2*length(parameters), hessian=hessian, LowerTimes=LowerTimes, UpperTimes=UpperTimes, Ns=Ns, starts=starts) } #fit loglinear-NHPP to grouped temporal data (ppgLoglin <- groupedNHPP(LowerTimes=tL, UpperTimes=tU, Ns=recurrences))[1:3] #95% confidence intervals (normal-approximation) for gamma0 and gamma1 #of the fitted loglinear-NHPP model SEparms <- sqrt(diag(solve(-1*ppgLoglin$hessian)))
ppgLoglin$parameters + c(1,-1)*qnorm(.05/2)*SEparms #95% ci for gamma0 ppgLoglin$parameters + c(1,-1)*qnorm(.05/2)*SEparms #95% ci for gamma1
#compare these with the population values for gamma0 and gamma1
#remember, the population values for gamma0 and gamma1 were -1.5 and 0.15, respectively

#prediction of the number of recurrences:
#observed/predicted cumulative number of recurrences at the end of a time interval

upperBoundInterval <- tU #upper bounds of time intervals
#compute predicted number of recurrences (and their standard errors)
#at the end of each time interval
SEs <- sapply(1:length(upperBoundInterval),
function (i) deltaMethod(ppgLoglin$parameters, g="(exp(gamma0)*(exp(gamma1*ts)-1))/gamma1", vcov.=solve(-1*ppgLoglin$hessian),
constants=c(ts=upperBoundInterval[i])))
#95% confidence interval for the predicted number of recurrences
llci <- unlist(SEs[1,])*exp((qnorm(.025)*unlist(SEs[2,]))/unlist(SEs[1,]))
luci <- unlist(SEs[1,])*exp((qnorm(.975)*unlist(SEs[2,]))/unlist(SEs[1,]))
#plot observed cumulative number of recurrences at the end of each time interval
plot(upperBoundInterval, cumsum(recurrences),
xlab="Upper bound time interval", ylab="Cumulative recurrences",
ylim=c(min(llci), max(luci)))
#add predicted cumulative number of recurrences to the plot
points(upperBoundInterval, unlist(SEs[1,]), col="red")
#add error bars representing the 95% confindence intervals
arrows(upperBoundInterval, llci, upperBoundInterval , luci,
code=3, length=0.1, angle=90, col="red")
legend("topleft", pch=1, col=c("black", "red"),
legend=c("Observed", "Predicted"))

#finally, explore the log-likelihood function in the neighborhood
#of the starting values that were used for fitting the loglinear-NHPP model
#more specifically, locate the positions of both the starting values and the
#MLEs for gamma0 and gamma1 in a contour plot of log-likelihood values

#construct grid and calculate log-likelihood values
gamma0values <- seq(-3.5,-.5,length.out=75)
gamma1values <- seq(.05,.25,length.out=75)
lbgrid <- expand.grid(gamma0=gamma0values, gamma1=gamma1values)
lbgrid$ll <- apply(lbgrid, 1, llikNHPPg, LowerTimes=tL, UpperTimes=tU, Ns=recurrences) ll.matrix <- matrix(lbgrid$ll, nrow=length(gamma0values))

#contour plot of log-likelihood values
contour(gamma0values, gamma1values, ll.matrix,
levels=seq(max(ll.matrix)+55, min(ll.matrix), length.out=20),
xlab=bquote(gamma[.(0)]), ylab=bquote(gamma[.(1)]))
#position of starting values for gamma0 and gamma1 (blue cross)
points(x=ppgLoglin$starts, y=ppgLoglin$starts, col="blue", pch=4)
#position of MLEs for gamma0 and gamma1 (red dot)
points(x=ppgLoglin$parameters, y=ppgLoglin$parameters, col="red", pch=20)