R code for fitting a loglinear-NHPP model to grouped temporal data

The following R code fits a NonHomogeneous Poisson Process (NHPP) model to grouped temporal data. Grouped temporal data occurs when only the number of recurrences within a given time interval are known.
The R code fits a NHPP model with a loglinear intensity function, where the intensity at time t is given by e^{\gamma_{0} + \gamma_{1}t} .
NHPP loglikelihood

See this post for fitting a NHPP model with a power law intensity function to grouped temporal data.

Suggestions and/or questions? Please contact Stefan Gelissen (email: info at datall-analyse.nl).
See this page for an overview of all of Stefan’s R code blog posts.

R code

library(car)

##data
#generate temporal data for a nonhomogeneous Poisson process
#having a loglinear intensity function
#(with the intensity at time t defined as: exp[gamma0 + gamma1*t] )

#function for computing the intensity measure of a loglinear-NHPP
intensityMeasure <- function (gamma0, gamma1, t) {
  exp(gamma0)*(exp(gamma1*t)-1)/gamma1}

#function for computing the inverse of the specified intensity measure
intensityMeasureInv <- function(gamma0, gamma1, s) {
  (1/gamma1)*log(gamma1*exp(-gamma0)*s + 1)}

Tend <- 26 #upper bound of observation window
t <- 0 #start of observation window
i <- 2 #initialize counter

#population values for gamma0 and gamma1 of the intensity measure
gamma0 <- -1.5
gamma1 <- 0.15

#sequence of event times generated by the loglinear-NHPP
while (t[length(t)] <= Tend) {
  Ti <- t[i-1]
  s <- intensityMeasure(gamma0, gamma1, Ti) - log(runif(1,0,1))
  Tnew <- intensityMeasureInv(gamma0, gamma1, s)
  t <- c(t, Tnew)
  i <- i+1
}
#remove t=0 and the final event time, since the latter time > Tend
t <- t[-c(1,length(t))]

#plot generated data
plot(t, 1:length(t), type="p", xlim=c(0,Tend),
     xlab="Time", ylab="Cumulative recurrences")



#create grouped temporal data

#construct time intervals
int <- seq(0, 25, 5)
#lower and upper bound of each time interval
tL <- int #lower bounds
tU <- c(int[-1], max(t)) #upper bounds

#plot the generated data with the boundaries of the time intervals included
plot(t, 1:length(t), type="p", xlim=c(0,Tend),
     xlab="Time", ylab="Cumulative recurrences")
abline(v=tU, lty=2, col="blue")

#assign observations to the time intervals
assignT <- findInterval(t, int)

#number of recurrences within each time interval
recurrences <- sapply(1:length(tU), function (i) sum(assignT==i))





##fit loglinear-NHPP model to grouped temporal data

#function for computing the log-likelihood of a grouped loglinear-NHPP model
llikNHPPg <- function (theta, LowerTimes, UpperTimes, Ns) {
  gamma0 <- theta[1]
  gamma1 <- theta[2]
  mu <- (exp(gamma0+gamma1*UpperTimes)-exp(gamma0+gamma1*LowerTimes))/gamma1
  sum(Ns*log(mu) - mu)
}

#function for fitting a grouped loglinear-NHPP model
groupedNHPP <- function (LowerTimes, UpperTimes, Ns, starts=NULL) {
  #-LowerTimes is a vector with the lower values of the time intervals
  #-UpperTimes is a vector containing the upper values of the time intervals
  #-Ns is a vector with the total number of recurrences within each time interval
  #-starts is a vector with the starting values for gamma0 and gamma1, respectively
  
  #obtain starting values for gamma0 and gamma1
  if (is.null(starts)) {
    iwd <- UpperTimes-LowerTimes #width of time interval
    midp <- (UpperTimes-LowerTimes)/2 #midpoint of time interval
    ti <- LowerTimes+midp
    modstart <- glm(Ns~ti, family=quasi(link="log", variance="mu"), offset=log(iwd))
    starts <- as.vector(coef(modstart)) #starting values
  }
  
  #optimization of log-likelihood
  gloglin <- optim(starts, llikNHPPg,
                   method="BFGS", control=list(fnscale=-1),
                   LowerTimes=LowerTimes, UpperTimes=UpperTimes, Ns=Ns,
                   hessian=TRUE)
  parameters <- gloglin$par
  names(parameters) <- c("gamma0", "gamma1")
  loglik <- gloglin$value
  hessian <- gloglin$hessian
  
  #return values
  list(parameters=parameters, loglik=loglik, AIC=-2*loglik+2*length(parameters),
       hessian=hessian, LowerTimes=LowerTimes, UpperTimes=UpperTimes,
       Ns=Ns, starts=starts)
}



#fit loglinear-NHPP to grouped temporal data
(ppgLoglin <- groupedNHPP(LowerTimes=tL, UpperTimes=tU, Ns=recurrences))[1:3]

#95% confidence intervals (normal-approximation) for gamma0 and gamma1
#of the fitted loglinear-NHPP model
SEparms <- sqrt(diag(solve(-1*ppgLoglin$hessian)))
ppgLoglin$parameters[1] + c(1,-1)*qnorm(.05/2)*SEparms[1] #95% ci for gamma0
ppgLoglin$parameters[2] + c(1,-1)*qnorm(.05/2)*SEparms[2] #95% ci for gamma1
#compare these with the population values for gamma0 and gamma1
#remember, the population values for gamma0 and gamma1 were -1.5 and 0.15, respectively



#prediction of the number of recurrences:
#observed/predicted cumulative number of recurrences at the end of a time interval

upperBoundInterval <- tU #upper bounds of time intervals
#compute predicted number of recurrences (and their standard errors)
#at the end of each time interval
SEs <- sapply(1:length(upperBoundInterval),
              function (i) deltaMethod(ppgLoglin$parameters,
                                       g="(exp(gamma0)*(exp(gamma1*ts)-1))/gamma1",
                                       vcov.=solve(-1*ppgLoglin$hessian),
                                       constants=c(ts=upperBoundInterval[i])))
#95% confidence interval for the predicted number of recurrences
llci <- unlist(SEs[1,])*exp((qnorm(.025)*unlist(SEs[2,]))/unlist(SEs[1,]))
luci <- unlist(SEs[1,])*exp((qnorm(.975)*unlist(SEs[2,]))/unlist(SEs[1,]))
#plot observed cumulative number of recurrences at the end of each time interval
plot(upperBoundInterval, cumsum(recurrences),
     xlab="Upper bound time interval", ylab="Cumulative recurrences",
     ylim=c(min(llci), max(luci)))
#add predicted cumulative number of recurrences to the plot
points(upperBoundInterval, unlist(SEs[1,]), col="red")
#add error bars representing the 95% confindence intervals
arrows(upperBoundInterval, llci, upperBoundInterval , luci,
       code=3, length=0.1, angle=90, col="red")
legend("topleft", pch=1, col=c("black", "red"),
       legend=c("Observed", "Predicted"))





#finally, explore the log-likelihood function in the neighborhood
#of the starting values that were used for fitting the loglinear-NHPP model
#more specifically, locate the positions of both the starting values and the
#MLEs for gamma0 and gamma1 in a contour plot of log-likelihood values

#construct grid and calculate log-likelihood values
gamma0values <- seq(-3.5,-.5,length.out=75)
gamma1values <- seq(.05,.25,length.out=75)
lbgrid <- expand.grid(gamma0=gamma0values, gamma1=gamma1values)
lbgrid$ll <- apply(lbgrid, 1, llikNHPPg, LowerTimes=tL,
                   UpperTimes=tU, Ns=recurrences)
ll.matrix <- matrix(lbgrid$ll, nrow=length(gamma0values))

#contour plot of log-likelihood values
contour(gamma0values, gamma1values, ll.matrix,
        levels=seq(max(ll.matrix)+55, min(ll.matrix), length.out=20),
        xlab=bquote(gamma[.(0)]), ylab=bquote(gamma[.(1)]))
#position of starting values for gamma0 and gamma1 (blue cross)
points(x=ppgLoglin$starts[1], y=ppgLoglin$starts[2], col="blue", pch=4)
#position of MLEs for gamma0 and gamma1 (red dot)
points(x=ppgLoglin$parameters[1], y=ppgLoglin$parameters[2], col="red", pch=20)