The following R code fits a NonHomogeneous Poisson Process (NHPP) model to grouped temporal data. Grouped temporal data occurs when only the number of recurrences within a given time interval are known.
The R code fits a NHPP model with a loglinear intensity function, where the intensity at time t is given by .
See this post for fitting a NHPP model with a power law intensity function to grouped temporal data.
Suggestions and/or questions? Please contact Stefan Gelissen (email: info at datall-analyse.nl).
See this page for an overview of all of Stefan’s R code blog posts.
R code
library(car)
##data
#generate temporal data for a nonhomogeneous Poisson process
#having a loglinear intensity function
#(with the intensity at time t defined as: exp[gamma0 + gamma1*t] )
#function for computing the intensity measure of a loglinear-NHPP
intensityMeasure <- function (gamma0, gamma1, t) {
exp(gamma0)*(exp(gamma1*t)-1)/gamma1}
#function for computing the inverse of the specified intensity measure
intensityMeasureInv <- function(gamma0, gamma1, s) {
(1/gamma1)*log(gamma1*exp(-gamma0)*s + 1)}
Tend <- 26 #upper bound of observation window
t <- 0 #start of observation window
i <- 2 #initialize counter
#population values for gamma0 and gamma1 of the intensity measure
gamma0 <- -1.5
gamma1 <- 0.15
#sequence of event times generated by the loglinear-NHPP
while (t[length(t)] <= Tend) {
Ti <- t[i-1]
s <- intensityMeasure(gamma0, gamma1, Ti) - log(runif(1,0,1))
Tnew <- intensityMeasureInv(gamma0, gamma1, s)
t <- c(t, Tnew)
i <- i+1
}
#remove t=0 and the final event time, since the latter time > Tend
t <- t[-c(1,length(t))]
#plot generated data
plot(t, 1:length(t), type="p", xlim=c(0,Tend),
xlab="Time", ylab="Cumulative recurrences")
#create grouped temporal data
#construct time intervals
int <- seq(0, 25, 5)
#lower and upper bound of each time interval
tL <- int #lower bounds
tU <- c(int[-1], max(t)) #upper bounds
#plot the generated data with the boundaries of the time intervals included
plot(t, 1:length(t), type="p", xlim=c(0,Tend),
xlab="Time", ylab="Cumulative recurrences")
abline(v=tU, lty=2, col="blue")
#assign observations to the time intervals
assignT <- findInterval(t, int)
#number of recurrences within each time interval
recurrences <- sapply(1:length(tU), function (i) sum(assignT==i))
##fit loglinear-NHPP model to grouped temporal data
#function for computing the log-likelihood of a grouped loglinear-NHPP model
llikNHPPg <- function (theta, LowerTimes, UpperTimes, Ns) {
gamma0 <- theta[1]
gamma1 <- theta[2]
mu <- (exp(gamma0+gamma1*UpperTimes)-exp(gamma0+gamma1*LowerTimes))/gamma1
sum(Ns*log(mu) - mu)
}
#function for fitting a grouped loglinear-NHPP model
groupedNHPP <- function (LowerTimes, UpperTimes, Ns, starts=NULL) {
#-LowerTimes is a vector with the lower values of the time intervals
#-UpperTimes is a vector containing the upper values of the time intervals
#-Ns is a vector with the total number of recurrences within each time interval
#-starts is a vector with the starting values for gamma0 and gamma1, respectively
#obtain starting values for gamma0 and gamma1
if (is.null(starts)) {
iwd <- UpperTimes-LowerTimes #width of time interval
midp <- (UpperTimes-LowerTimes)/2 #midpoint of time interval
ti <- LowerTimes+midp
modstart <- glm(Ns~ti, family=quasi(link="log", variance="mu"), offset=log(iwd))
starts <- as.vector(coef(modstart)) #starting values
}
#optimization of log-likelihood
gloglin <- optim(starts, llikNHPPg,
method="BFGS", control=list(fnscale=-1),
LowerTimes=LowerTimes, UpperTimes=UpperTimes, Ns=Ns,
hessian=TRUE)
parameters <- gloglin$par
names(parameters) <- c("gamma0", "gamma1")
loglik <- gloglin$value
hessian <- gloglin$hessian
#return values
list(parameters=parameters, loglik=loglik, AIC=-2*loglik+2*length(parameters),
hessian=hessian, LowerTimes=LowerTimes, UpperTimes=UpperTimes,
Ns=Ns, starts=starts)
}
#fit loglinear-NHPP to grouped temporal data
(ppgLoglin <- groupedNHPP(LowerTimes=tL, UpperTimes=tU, Ns=recurrences))[1:3]
#95% confidence intervals (normal-approximation) for gamma0 and gamma1
#of the fitted loglinear-NHPP model
SEparms <- sqrt(diag(solve(-1*ppgLoglin$hessian)))
ppgLoglin$parameters[1] + c(1,-1)*qnorm(.05/2)*SEparms[1] #95% ci for gamma0
ppgLoglin$parameters[2] + c(1,-1)*qnorm(.05/2)*SEparms[2] #95% ci for gamma1
#compare these with the population values for gamma0 and gamma1
#remember, the population values for gamma0 and gamma1 were -1.5 and 0.15, respectively
#prediction of the number of recurrences:
#observed/predicted cumulative number of recurrences at the end of a time interval
upperBoundInterval <- tU #upper bounds of time intervals
#compute predicted number of recurrences (and their standard errors)
#at the end of each time interval
SEs <- sapply(1:length(upperBoundInterval),
function (i) deltaMethod(ppgLoglin$parameters,
g="(exp(gamma0)*(exp(gamma1*ts)-1))/gamma1",
vcov.=solve(-1*ppgLoglin$hessian),
constants=c(ts=upperBoundInterval[i])))
#95% confidence interval for the predicted number of recurrences
llci <- unlist(SEs[1,])*exp((qnorm(.025)*unlist(SEs[2,]))/unlist(SEs[1,]))
luci <- unlist(SEs[1,])*exp((qnorm(.975)*unlist(SEs[2,]))/unlist(SEs[1,]))
#plot observed cumulative number of recurrences at the end of each time interval
plot(upperBoundInterval, cumsum(recurrences),
xlab="Upper bound time interval", ylab="Cumulative recurrences",
ylim=c(min(llci), max(luci)))
#add predicted cumulative number of recurrences to the plot
points(upperBoundInterval, unlist(SEs[1,]), col="red")
#add error bars representing the 95% confindence intervals
arrows(upperBoundInterval, llci, upperBoundInterval , luci,
code=3, length=0.1, angle=90, col="red")
legend("topleft", pch=1, col=c("black", "red"),
legend=c("Observed", "Predicted"))
#finally, explore the log-likelihood function in the neighborhood
#of the starting values that were used for fitting the loglinear-NHPP model
#more specifically, locate the positions of both the starting values and the
#MLEs for gamma0 and gamma1 in a contour plot of log-likelihood values
#construct grid and calculate log-likelihood values
gamma0values <- seq(-3.5,-.5,length.out=75)
gamma1values <- seq(.05,.25,length.out=75)
lbgrid <- expand.grid(gamma0=gamma0values, gamma1=gamma1values)
lbgrid$ll <- apply(lbgrid, 1, llikNHPPg, LowerTimes=tL,
UpperTimes=tU, Ns=recurrences)
ll.matrix <- matrix(lbgrid$ll, nrow=length(gamma0values))
#contour plot of log-likelihood values
contour(gamma0values, gamma1values, ll.matrix,
levels=seq(max(ll.matrix)+55, min(ll.matrix), length.out=20),
xlab=bquote(gamma[.(0)]), ylab=bquote(gamma[.(1)]))
#position of starting values for gamma0 and gamma1 (blue cross)
points(x=ppgLoglin$starts[1], y=ppgLoglin$starts[2], col="blue", pch=4)
#position of MLEs for gamma0 and gamma1 (red dot)
points(x=ppgLoglin$parameters[1], y=ppgLoglin$parameters[2], col="red", pch=20)