R code for fitting a nonhomogeneous temporal Poisson process model

The following R code demonstrates how to fit a nonhomogeneous Poisson process (NHPP) model to temporal data.

The R code may be used to fit 1) a NHPP model with a loglinear intensity function, with the intensity at time t defined by e^{\gamma_{0} + \gamma_{1}t} , or 2) a NHPP model with a power law intensity function, with the intensity at time t defined by \frac{\beta}{\eta}(\frac{t}{\eta})^{\beta-1}. The latter NHPP model is sometimes referred to as the Crow-AMSAA model.

NHPP loglinear

Both the loglinear-NHPP and the power-NHPP model are discussed by Meeker and Escobar in their 1998 book Statistical methods for reliability data (Chapter 16).

Suggestions and/or questions? Please contact Stefan Gelissen (email: info at datall-analyse.nl).
See this page for an overview of all of Stefan’s R code blog posts.

R code

library(pracma)
library(car)

##data

#U.S.S. Halfbeak diesel engine data from Meeker & Escobar (1998), Example 16.12, pp. 415-416
halfb <- read.table("http://www.public.iastate.edu/~wqmeeker/anonymous/Stat533_data/Splida_text_data/halfbeak.txt", header=T)
head(halfb)

#plot of time versus cumulative recurrences
ct <- halfb$Hours[2:72]
cf <- 1:length(ct)
plot(ct, cf, type="p", xlab="Time", ylab="Cumulative recurrences")

#non-parametric estimate of the intensity
startT <- 0 #lower bound of the observation window
endT <- max(ct) #upper bound of the observation window
sigma <- 4 #bandwidth for Gaussian kernel
densityEstimate <- density(ct, from=startT, to=endT, n=200, bw=sigma) #Gaussian kernel
timesD <- densityEstimate$x #extract times
nRecurrences <- length(ct) #total number of recurrences
intensityEstimate <- densityEstimate$y*nRecurrences #estimate of the intensity
#implement edge correction
edgeCorrection <- pnorm(endT, timesD, sigma) - pnorm(startT, timesD, sigma)
#plot log(intensity) against time
plot(timesD, log(intensityEstimate/edgeCorrection), type="l",
     xlim=c(startT, endT), xlab="Time", ylab="log(intensity)")



##fit NHPP-model to temporal data

#function for fitting a nonhomogeneous Poisson process (NHPP) model
fitNHPP <- function (times, Tend=max(times), intensity) {
  #-times is a vector with the observed event times
  #-Tend is the upper bound of the observation window
  #-intensity specifies the type of intensity function;
  # a power law ("power"), loglinear ("loglinear"), or a homogeneous
  # ("homogeneous") intensity function
  
  nobs <- length(times)
  
  if (intensity=="power") {
    #MLE for beta and eta
    beta <- nobs/sum(log(Tend/times))
    eta <- Tend/(nobs^(1/beta))
    #log-likelihood of the power-NHPP
    llp <- function (theta, times, Tend) {
      beta <- theta[1]
      eta <- theta[2]
      nobs*log(beta)-nobs*beta*log(eta)+(beta-1)*sum(log(times))-(Tend/eta)^beta}
    #refit the power-NHPP by optimization of the log-likelihood function
    pow <- optim(c(beta, eta), llp,
                 method="BFGS", control=list(fnscale=-1),
                 times=times, Tend=Tend, hessian=TRUE)
    parameters <- pow$par
    names(parameters) <- c("beta", "eta")
    loglik <- pow$value
    hessian <- pow$hessian
  }
  
  else if (intensity=="loglinear") {
    #MLE for gamma0 and gamma1
    rootgamma1 <- function (nobs, times, Tend, gamma1) {
      sum(times) + nobs/gamma1 - (nobs*Tend*exp(gamma1*Tend))/(exp(gamma1*Tend)-1)}
    gamma1 <- fzero(rootgamma1, x=1e-3, nobs=nobs, times=times, Tend=Tend)$x
    gamma0 <- log((nobs*gamma1)/(exp(Tend*gamma1)-1))
    #log-likelihood of the loglinear-NHPP
    lll <- function (theta, times, Tend) {
      gamma0 <- theta[1]
      gamma1 <- theta[2]
      nobs*gamma0 + gamma1*sum(times)-(exp(gamma0)*(exp(gamma1*Tend)-1))/gamma1}
    #refit the loglinear-NHPP by optimization of the log-likelihood function
    loglin <- optim(c(gamma0, gamma1), lll,
                    method="BFGS", control=list(fnscale=-1),
                    times=times, Tend=Tend, hessian=TRUE)
    parameters <- loglin$par
    names(parameters) <- c("gamma0", "gamma1")
    loglik <- loglin$value
    hessian <- loglin$hessian}
  
  else if (intensity=="homogeneous") {
    #MLE for lambda
    lambda <- length(times)/Tend
    #log-likelihood of the homogeneous-NHPP
    lll <- function (theta, times, Tend) {
      lambda <- theta[1]
      ni <- length(times)
      ni*log(lambda) - lambda*Tend}
    #refit the homogeneous-NHPP by optimization of the log-likelihood function
    hom <- optim(lambda, lll,
                 method="BFGS", control=list(fnscale=-1),
                 times=times, Tend=Tend, hessian=TRUE)
    parameters <- hom$par
    names(parameters) <- c("lambda")
    loglik <- hom$value
    hessian <- hom$hessian}
  
  list(parameters=parameters, loglik=loglik, AIC=-2*loglik+2*length(parameters),
       intensity=intensity, hessian=hessian, times=times, Tend=Tend)
}



#fit the power-NHPP (that is, NHPP model with a power law function as intensity)
(ppPower <- fitNHPP(times=halfb$Hours[2:72], Tend=halfb$Hours[73],
                    intensity="power"))[1:4]
#95% confidence intervals for beta and eta (normal-approximation)
SEparms <- sqrt(diag(solve(-1*ppPower$hessian)))
ppPower$parameters[1]*exp(c(1,-1)*qnorm(.05/2)*SEparms[1]/ppPower$parameters[1])
ppPower$parameters[2]*exp(c(1,-1)*qnorm(.05/2)*SEparms[2]/ppPower$parameters[2])

#fit loglinear-NHPP (that is, NHPP model with a loglinear function as intensity)
(ppLoglin <- fitNHPP(times=halfb$Hours[2:72], Tend=halfb$Hours[73],
                     intensity="loglinear"))[1:4]
#95% confidence intervals for gamma0 and gamma1 (normal-approximation)
SEparms <- sqrt(diag(solve(-1*ppLoglin$hessian)))
ppLoglin$parameters[1] + c(1,-1)*qnorm(.05/2)*SEparms[1]
ppLoglin$parameters[2] + c(1,-1)*qnorm(.05/2)*SEparms[2]

#plot the fitted NHPP models
ct <- halfb$Hours[2:72]
cf <- 1:length(ct)
timesseq <- seq(1, 30, length.out=100)
plot(ct, cf, type="p", xlab="Time", ylab="Cumulative recurrences")
lines(timesseq, (timesseq/ppPower$parameters[2])^ppPower$parameters[1], col="red", lty=2)
lines(timesseq, exp(ppLoglin$parameters[1])*(exp(ppLoglin$parameters[2]*timesseq)-1)/ppLoglin$parameters[2], col="blue", lty=2)
legend("topleft", pch="-", col=c("red", "blue"),
       legend=c("Power-NHPP", "Loglinear-NHPP"), bty="n", y.intersp=1.5)





##diagnostics

#function for transforming NHPP-times into HPP-times
tHPP <- function (object) {
  timesNHPP <- sort(object$times)
  Tstart <- 0
  Tend <- min(object$Tend)
  
  if(object$intensity=="loglinear"){
    gamma0 <- object$parameters[1]
    gamma1 <- object$parameters[2]
    #compute transformed time (homogeneous Poisson process with unit intensity)
    hpp <- (1/gamma1)*(exp(gamma0+gamma1*timesNHPP)-exp(gamma0+gamma1*Tstart))}
  
  else if(object$intensity=="power"){
    beta <- object$parameters[1]
    eta <- object$parameters[2]
    #compute transformed time (homogeneous Poisson process with unit intensity)
    hpp <- (timesNHPP/eta)^beta-(Tstart/eta)^beta}
  
  else if(object$intensity=="homogeneous"){
    lambda <- object$parameters[1]
    #compute transformed time (homogeneous Poisson process with unit intensity)
    hpp <- lambda*timesNHPP-lambda*Tstart}
  
  #return values
  list(eventIndex=seq_along(hpp), hppTimes=hpp, nhppTimes=timesNHPP)
}



#Use the fitted models to transform the NHPP-times into corresponding
#Homogeneous Poisson Process (HPP) times,
#where the HPP is a unit-rate process (i.e., with intensity equal to 1).
hppPower <- tHPP(ppPower)
hppLoglin <- tHPP(ppLoglin)

#For a unit-rate HPP, the number of cumulative recurrences is expected
#to be 1 at t=1, the number of cumulative recurrences at t=2 is expected
#to be 2, et cetera.
#In other words, when plotting the HPP-times against the cumulative recurrences
#for a unit-rate HPP, points are expected to lie close to the line y=x.
plot(hppPower$eventIndex, hppPower$hppTimes, type="l", col="red",
     xlab="Cumulative recurrences", ylab="HPP-time")
lines(hppLoglin$eventIndex, hppLoglin$hppTimes, type="l", col="blue")
abline(a=0, b=1, col="gray", lty=2) #line y=x
legend("topleft", pch="-", col=c("red", "blue"),
       legend=c("Power-NHPP", "Loglinear-NHPP"), bty="n", y.intersp=1.5)

#For a HPP, the interrecurrence times should be independent.
acf(diff(hppPower$hppTimes), main="Interrecurrence times HPP") #Power-NHPP
acf(diff(hppLoglin$hppTimes), main="Interrecurrence times HPP") #Loglinear-NHPP





##predicted cumulative recurrences

#plot again the fitted power-NHPP model, but this time include confidence intervals
ts <- seq(1, 30, length.out=100)
SEs <- sapply(1:length(ts), function (i) deltaMethod(ppPower$parameters,
                                                     g="(ts/eta)^beta",
                                                     vcov.=solve(-1*ppPower$hessian),
                                                     constants=c(ts=ts[i])))
#95% confidence intervals for cumulative number of recurrences
plci <- unlist(SEs[1,])*exp((qnorm(.025)*unlist(SEs[2,]))/unlist(SEs[1,]))
puci <- unlist(SEs[1,])*exp((qnorm(.975)*unlist(SEs[2,]))/unlist(SEs[1,]))
#plot fit
plot(ct, cf, type="p", xlab="Time",
     ylab="Cumulative recurrences", main="Power-NHPP")
lines(timesseq, (timesseq/ppPower$parameters[2])^ppPower$parameters[1],
      col="red", lty=2)
lines(x=ts, y=plci, col="blue", lty=2)
lines(x=ts, y=puci, col="blue", lty=2)



#plot the fitted loglinear-NHPP model including confidence intervals
ts <- seq(1, 30, length.out=100)
SEs <- sapply(1:length(ts), function (i) deltaMethod(ppLoglin$parameters,
                                                     g="(exp(gamma0)*(exp(gamma1*ts)-1))/gamma1",
                                                     vcov.=solve(-1*ppLoglin$hessian),
                                                     constants=c(ts=ts[i])))
#95% confidence intervals for cumulative number of recurrences
llci <- unlist(SEs[1,])*exp((qnorm(.025)*unlist(SEs[2,]))/unlist(SEs[1,]))
luci <- unlist(SEs[1,])*exp((qnorm(.975)*unlist(SEs[2,]))/unlist(SEs[1,]))
#plot fit
plot(ct, cf, type="p", xlab="Time",
     ylab="Cumulative recurrences", main="Loglinear-NHPP")
lines(timesseq, exp(ppLoglin$parameters[1])*(exp(ppLoglin$parameters[2]*timesseq)-1)/ppLoglin$parameters[2],
      col="red", lty=2)
lines(x=ts, y=llci, col="blue", lty=2)
lines(x=ts, y=luci, col="blue", lty=2)





#compute future number of recurrences (including 95% confidence interval)
#in the interval [a,b]
ta <- 25 #lower limit of interval [a,b]
tb <- 35 #upper limit of interval [a,b]

#power-NHPP
fnrp <- deltaMethod(ppPower$parameters, g="((1/eta)^beta)*(tb^beta-ta^beta)",
                    vcov.=solve(-1*ppPower$hessian), constants=c(ta=ta, tb=tb))
fnrp$Estimate #point estimate
fnrp$Estimate*exp(c(1,-1)*((qnorm(.05/2)*fnrp$SE)/fnrp$Estimate)) #confidence interval

#loglinear-NHPP
fnrl <- deltaMethod(ppLoglin$parameters,
                    g="(exp(gamma0)/gamma1)*(exp(gamma1*tb)-exp(gamma1*ta))",
                    vcov.=solve(-1*ppLoglin$hessian), constants=c(ta=ta, tb=tb))
fnrl$Estimate #point estimate
fnrl$Estimate*exp(c(1,-1)*((qnorm(.05/2)*fnrl$SE)/fnrl$Estimate)) #confidence interval



#compute likelihood based confidence interval for the future number
#of recurrences (=FNR) in the interval [a,b]

#function for computing the log-likelihood
llikFNR <- function (theta, times, Tend, timeInterval, N, intensity) {
  nobs <- length(times)
  if (intensity=="power"){
    beta <- theta[1]
    eta <- (N/(timeInterval[2]^beta-timeInterval[1]^beta))^-(1/beta)
    nobs*log(beta)-nobs*beta*log(eta)+(beta-1)*sum(log(times))-(Tend/eta)^beta}
  else if (intensity=="loglinear"){
    gamma1 <- theta[1]
    gamma0 <- log(gamma1*(N/(exp(gamma1*timeInterval[2])-exp(gamma1*timeInterval[1]))))
    nobs*gamma0 + gamma1*sum(times)-(exp(gamma0)*(exp(gamma1*Tend)-1))/gamma1}
}

#function for profiling FNR (using BFGS)
profileLogLik <- function(parms, times, Tend, timeInterval, Ns, intensity) {
  plke <- rep(0, length(Ns))
  
  for (i in 1:length(Ns)) {
    temp <- optim(parms, llikFNR,
                  method="BFGS", control=list(fnscale=-1),
                  times=times, Tend=Tend, timeInterval=timeInterval,
                  intensity=intensity, N=Ns[i])
    plke[i] <- temp$value
  }
  plke
}

#function for computing the confidence interval for FNR
profileFNR <- function(object, rangeFNR, nvalues=50, alpha=.95, timeInterval) {
  times <- object$times
  Tend <- object$Tend
  intensity <- object$intensity
  Ns <- seq(rangeFNR[1], rangeFNR[2], length.out=nvalues)
  
  if (intensity=="power"){
    beta <- object$parameters[1]
    eta <- object$parameters[2]
    parms <- object$parameters[1]
    Np <- ((1/eta)^beta)*(timeInterval[2]^beta-timeInterval[1]^beta)
  }
  
  else if (intensity=="loglinear"){
    gamma0 <- object$parameters[1]
    gamma1 <- object$parameters[2]
    parms <- object$parameters[2]
    Np <- (exp(gamma0)/gamma1)*(exp(gamma1*timeInterval[2])-exp(gamma1*timeInterval[1]))
  }
  
  #profile the future number of recurrences
  ll <- profileLogLik(parms, times, Tend, timeInterval, Ns, intensity)
  
  #compute confidence bounds
  loglikw <- object$loglik
  limit <- loglikw - .5*qchisq(alpha, df=1)
  
  limitlkh <- function(parms, times, Tend, timeInterval, Ns, intensity) {
    profileLogLik(parms, times, Tend, timeInterval, Ns, intensity) - limit}
  
  lowerci <- NA
  upperci <- NA
  
  try(lowerci <- round(fzero(limitlkh, c(rangeFNR[1], Np), parms=parms,
                             times=times, Tend=Tend, timeInterval=timeInterval,
                             intensity=intensity)$x, 6), TRUE)
  try(upperci <- round(fzero(limitlkh, c(Np, rangeFNR[2]), parms=parms,
                             times=times, Tend=Tend, timeInterval=timeInterval,
                             intensity=intensity)$x, 6), TRUE)
  
  #plot profile
  plot(Ns, ll, type='l',
       xlab=paste("Future number of recurrences in time interval [",
                  timeInterval[1],",",timeInterval[2],"]", sep=""),
       ylab="log-likelihood")
  abline(h=limit, col="blue",lty=2) #log-likelihood limit
  abline(v=Np, col="red", lty=2) #include MLE for FNR
  #include interval bounds
  if (!is.na(lowerci)) abline(v=lowerci, col="darkgreen", lty=2)
  if (!is.na(upperci)) abline(v=upperci, col="darkgreen", lty=2)
  #return bounds
  data.frame(Estimate=as.vector(Np), lcl=lowerci, ucl=upperci)
}

#compute the likelihood based 95% confidence intervals for the future
#number of recurrences (=FNR) in the time interval [a,b]

#power-NHPP model
#note: inspect whether the curve of the log-likelihood function
#intersects the blue line
profileFNR(object=ppPower, rangeFNR=c(60, 140), timeInterval=c(25, 35))
#on the right, the curve does not intersect with the blue line and ucl=NA
#therefore, increase the width of the range
profileFNR(object=ppPower, rangeFNR=c(60, 160), timeInterval=c(25, 35))

#loglinear-NHPP model
profileFNR(object=ppLoglin, rangeFNR=c(130, 410), timeInterval=c(25, 35))