The following R code demonstrates how to fit a nonhomogeneous Poisson process (NHPP) model to temporal data.
The R code may be used to fit 1) a NHPP model with a loglinear intensity function, with the intensity at time t defined by , or 2) a NHPP model with a power law intensity function, with the intensity at time t defined by
. The latter NHPP model is sometimes referred to as the Crow-AMSAA model.
Both the loglinear-NHPP and the power-NHPP model are discussed by Meeker and Escobar in their 1998 book Statistical methods for reliability data (Chapter 16).
Suggestions and/or questions? Please contact Stefan Gelissen (email: info at datall-analyse.nl).
See this page for an overview of all of Stefan’s R code blog posts.
R code
library(pracma)
library(car)
##data
#U.S.S. Halfbeak diesel engine data from Meeker & Escobar (1998), Example 16.12, pp. 415-416
halfb <- read.table("http://www.datall-analyse.nl/blog_data/halfbeak.txt", header=T)
head(halfb)
#plot of time versus cumulative recurrences
ct <- halfb$Hours[2:72]
cf <- 1:length(ct)
plot(ct, cf, type="p", xlab="Time", ylab="Cumulative recurrences")
#non-parametric estimate of the intensity
startT <- 0 #lower bound of the observation window
endT <- max(ct) #upper bound of the observation window
sigma <- 4 #bandwidth for Gaussian kernel
densityEstimate <- density(ct, from=startT, to=endT, n=200, bw=sigma) #Gaussian kernel
timesD <- densityEstimate$x #extract times
nRecurrences <- length(ct) #total number of recurrences
intensityEstimate <- densityEstimate$y*nRecurrences #estimate of the intensity
#implement edge correction
edgeCorrection <- pnorm(endT, timesD, sigma) - pnorm(startT, timesD, sigma)
#plot log(intensity) against time
plot(timesD, log(intensityEstimate/edgeCorrection), type="l",
xlim=c(startT, endT), xlab="Time", ylab="log(intensity)")
##fit NHPP-model to temporal data
#function for fitting a nonhomogeneous Poisson process (NHPP) model
fitNHPP <- function (times, Tend=max(times), intensity) {
#-times is a vector with the observed event times
#-Tend is the upper bound of the observation window
#-intensity specifies the type of intensity function;
# a power law ("power"), loglinear ("loglinear"), or a homogeneous
# ("homogeneous") intensity function
nobs <- length(times)
if (intensity=="power") {
#MLE for beta and eta
beta <- nobs/sum(log(Tend/times))
eta <- Tend/(nobs^(1/beta))
#log-likelihood of the power-NHPP
llp <- function (theta, times, Tend) {
beta <- theta[1]
eta <- theta[2]
nobs*log(beta)-nobs*beta*log(eta)+(beta-1)*sum(log(times))-(Tend/eta)^beta}
#refit the power-NHPP by optimization of the log-likelihood function
pow <- optim(c(beta, eta), llp,
method="BFGS", control=list(fnscale=-1),
times=times, Tend=Tend, hessian=TRUE)
parameters <- pow$par
names(parameters) <- c("beta", "eta")
loglik <- pow$value
hessian <- pow$hessian
}
else if (intensity=="loglinear") {
#MLE for gamma0 and gamma1
rootgamma1 <- function (nobs, times, Tend, gamma1) {
sum(times) + nobs/gamma1 - (nobs*Tend*exp(gamma1*Tend))/(exp(gamma1*Tend)-1)}
gamma1 <- fzero(rootgamma1, x=1e-3, nobs=nobs, times=times, Tend=Tend)$x
gamma0 <- log((nobs*gamma1)/(exp(Tend*gamma1)-1))
#log-likelihood of the loglinear-NHPP
lll <- function (theta, times, Tend) {
gamma0 <- theta[1]
gamma1 <- theta[2]
nobs*gamma0 + gamma1*sum(times)-(exp(gamma0)*(exp(gamma1*Tend)-1))/gamma1}
#refit the loglinear-NHPP by optimization of the log-likelihood function
loglin <- optim(c(gamma0, gamma1), lll,
method="BFGS", control=list(fnscale=-1),
times=times, Tend=Tend, hessian=TRUE)
parameters <- loglin$par
names(parameters) <- c("gamma0", "gamma1")
loglik <- loglin$value
hessian <- loglin$hessian}
else if (intensity=="homogeneous") {
#MLE for lambda
lambda <- length(times)/Tend
#log-likelihood of the homogeneous-NHPP
lll <- function (theta, times, Tend) {
lambda <- theta[1]
ni <- length(times)
ni*log(lambda) - lambda*Tend}
#refit the homogeneous-NHPP by optimization of the log-likelihood function
hom <- optim(lambda, lll,
method="BFGS", control=list(fnscale=-1),
times=times, Tend=Tend, hessian=TRUE)
parameters <- hom$par
names(parameters) <- c("lambda")
loglik <- hom$value
hessian <- hom$hessian}
list(parameters=parameters, loglik=loglik, AIC=-2*loglik+2*length(parameters),
intensity=intensity, hessian=hessian, times=times, Tend=Tend)
}
#fit the power-NHPP (that is, NHPP model with a power law function as intensity)
(ppPower <- fitNHPP(times=halfb$Hours[2:72], Tend=halfb$Hours[73],
intensity="power"))[1:4]
#95% confidence intervals for beta and eta (normal-approximation)
SEparms <- sqrt(diag(solve(-1*ppPower$hessian)))
ppPower$parameters[1]*exp(c(1,-1)*qnorm(.05/2)*SEparms[1]/ppPower$parameters[1])
ppPower$parameters[2]*exp(c(1,-1)*qnorm(.05/2)*SEparms[2]/ppPower$parameters[2])
#fit loglinear-NHPP (that is, NHPP model with a loglinear function as intensity)
(ppLoglin <- fitNHPP(times=halfb$Hours[2:72], Tend=halfb$Hours[73],
intensity="loglinear"))[1:4]
#95% confidence intervals for gamma0 and gamma1 (normal-approximation)
SEparms <- sqrt(diag(solve(-1*ppLoglin$hessian)))
ppLoglin$parameters[1] + c(1,-1)*qnorm(.05/2)*SEparms[1]
ppLoglin$parameters[2] + c(1,-1)*qnorm(.05/2)*SEparms[2]
#plot the fitted NHPP models
ct <- halfb$Hours[2:72]
cf <- 1:length(ct)
timesseq <- seq(1, 30, length.out=100)
plot(ct, cf, type="p", xlab="Time", ylab="Cumulative recurrences")
lines(timesseq, (timesseq/ppPower$parameters[2])^ppPower$parameters[1], col="red", lty=2)
lines(timesseq, exp(ppLoglin$parameters[1])*(exp(ppLoglin$parameters[2]*timesseq)-1)/ppLoglin$parameters[2], col="blue", lty=2)
legend("topleft", pch="-", col=c("red", "blue"),
legend=c("Power-NHPP", "Loglinear-NHPP"), bty="n", y.intersp=1.5)
##diagnostics
#function for transforming NHPP-times into HPP-times
tHPP <- function (object) {
timesNHPP <- sort(object$times)
Tstart <- 0
Tend <- min(object$Tend)
if(object$intensity=="loglinear"){
gamma0 <- object$parameters[1]
gamma1 <- object$parameters[2]
#compute transformed time (homogeneous Poisson process with unit intensity)
hpp <- (1/gamma1)*(exp(gamma0+gamma1*timesNHPP)-exp(gamma0+gamma1*Tstart))}
else if(object$intensity=="power"){
beta <- object$parameters[1]
eta <- object$parameters[2]
#compute transformed time (homogeneous Poisson process with unit intensity)
hpp <- (timesNHPP/eta)^beta-(Tstart/eta)^beta}
else if(object$intensity=="homogeneous"){
lambda <- object$parameters[1]
#compute transformed time (homogeneous Poisson process with unit intensity)
hpp <- lambda*timesNHPP-lambda*Tstart}
#return values
list(eventIndex=seq_along(hpp), hppTimes=hpp, nhppTimes=timesNHPP)
}
#Use the fitted models to transform the NHPP-times into corresponding
#Homogeneous Poisson Process (HPP) times,
#where the HPP is a unit-rate process (i.e., with intensity equal to 1).
hppPower <- tHPP(ppPower)
hppLoglin <- tHPP(ppLoglin)
#For a unit-rate HPP, the number of cumulative recurrences is expected
#to be 1 at t=1, the number of cumulative recurrences at t=2 is expected
#to be 2, et cetera.
#In other words, when plotting the HPP-times against the cumulative recurrences
#for a unit-rate HPP, points are expected to lie close to the line y=x.
plot(hppPower$eventIndex, hppPower$hppTimes, type="l", col="red",
xlab="Cumulative recurrences", ylab="HPP-time")
lines(hppLoglin$eventIndex, hppLoglin$hppTimes, type="l", col="blue")
abline(a=0, b=1, col="gray", lty=2) #line y=x
legend("topleft", pch="-", col=c("red", "blue"),
legend=c("Power-NHPP", "Loglinear-NHPP"), bty="n", y.intersp=1.5)
#For a HPP, the interrecurrence times should be independent.
acf(diff(hppPower$hppTimes), main="Interrecurrence times HPP") #Power-NHPP
acf(diff(hppLoglin$hppTimes), main="Interrecurrence times HPP") #Loglinear-NHPP
##predicted cumulative recurrences
#plot again the fitted power-NHPP model, but this time include confidence intervals
ts <- seq(1, 30, length.out=100)
SEs <- sapply(1:length(ts), function (i) deltaMethod(ppPower$parameters,
g="(ts/eta)^beta",
vcov.=solve(-1*ppPower$hessian),
constants=c(ts=ts[i])))
#95% confidence intervals for cumulative number of recurrences
plci <- unlist(SEs[1,])*exp((qnorm(.025)*unlist(SEs[2,]))/unlist(SEs[1,]))
puci <- unlist(SEs[1,])*exp((qnorm(.975)*unlist(SEs[2,]))/unlist(SEs[1,]))
#plot fit
plot(ct, cf, type="p", xlab="Time",
ylab="Cumulative recurrences", main="Power-NHPP")
lines(timesseq, (timesseq/ppPower$parameters[2])^ppPower$parameters[1],
col="red", lty=2)
lines(x=ts, y=plci, col="blue", lty=2)
lines(x=ts, y=puci, col="blue", lty=2)
#plot the fitted loglinear-NHPP model including confidence intervals
ts <- seq(1, 30, length.out=100)
SEs <- sapply(1:length(ts), function (i) deltaMethod(ppLoglin$parameters,
g="(exp(gamma0)*(exp(gamma1*ts)-1))/gamma1",
vcov.=solve(-1*ppLoglin$hessian),
constants=c(ts=ts[i])))
#95% confidence intervals for cumulative number of recurrences
llci <- unlist(SEs[1,])*exp((qnorm(.025)*unlist(SEs[2,]))/unlist(SEs[1,]))
luci <- unlist(SEs[1,])*exp((qnorm(.975)*unlist(SEs[2,]))/unlist(SEs[1,]))
#plot fit
plot(ct, cf, type="p", xlab="Time",
ylab="Cumulative recurrences", main="Loglinear-NHPP")
lines(timesseq, exp(ppLoglin$parameters[1])*(exp(ppLoglin$parameters[2]*timesseq)-1)/ppLoglin$parameters[2],
col="red", lty=2)
lines(x=ts, y=llci, col="blue", lty=2)
lines(x=ts, y=luci, col="blue", lty=2)
#compute future number of recurrences (including 95% confidence interval)
#in the interval [a,b]
ta <- 25 #lower limit of interval [a,b]
tb <- 35 #upper limit of interval [a,b]
#power-NHPP
fnrp <- deltaMethod(ppPower$parameters, g="((1/eta)^beta)*(tb^beta-ta^beta)",
vcov.=solve(-1*ppPower$hessian), constants=c(ta=ta, tb=tb))
fnrp$Estimate #point estimate
fnrp$Estimate*exp(c(1,-1)*((qnorm(.05/2)*fnrp$SE)/fnrp$Estimate)) #confidence interval
#loglinear-NHPP
fnrl <- deltaMethod(ppLoglin$parameters,
g="(exp(gamma0)/gamma1)*(exp(gamma1*tb)-exp(gamma1*ta))",
vcov.=solve(-1*ppLoglin$hessian), constants=c(ta=ta, tb=tb))
fnrl$Estimate #point estimate
fnrl$Estimate*exp(c(1,-1)*((qnorm(.05/2)*fnrl$SE)/fnrl$Estimate)) #confidence interval
#compute likelihood based confidence interval for the future number
#of recurrences (=FNR) in the interval [a,b]
#function for computing the log-likelihood
llikFNR <- function (theta, times, Tend, timeInterval, N, intensity) {
nobs <- length(times)
if (intensity=="power"){
beta <- theta[1]
eta <- (N/(timeInterval[2]^beta-timeInterval[1]^beta))^-(1/beta)
nobs*log(beta)-nobs*beta*log(eta)+(beta-1)*sum(log(times))-(Tend/eta)^beta}
else if (intensity=="loglinear"){
gamma1 <- theta[1]
gamma0 <- log(gamma1*(N/(exp(gamma1*timeInterval[2])-exp(gamma1*timeInterval[1]))))
nobs*gamma0 + gamma1*sum(times)-(exp(gamma0)*(exp(gamma1*Tend)-1))/gamma1}
}
#function for profiling FNR (using BFGS)
profileLogLik <- function(parms, times, Tend, timeInterval, Ns, intensity) {
plke <- rep(0, length(Ns))
for (i in 1:length(Ns)) {
temp <- optim(parms, llikFNR,
method="BFGS", control=list(fnscale=-1),
times=times, Tend=Tend, timeInterval=timeInterval,
intensity=intensity, N=Ns[i])
plke[i] <- temp$value
}
plke
}
#function for computing the confidence interval for FNR
profileFNR <- function(object, rangeFNR, nvalues=50, alpha=.95, timeInterval) {
times <- object$times
Tend <- object$Tend
intensity <- object$intensity
Ns <- seq(rangeFNR[1], rangeFNR[2], length.out=nvalues)
if (intensity=="power"){
beta <- object$parameters[1]
eta <- object$parameters[2]
parms <- object$parameters[1]
Np <- ((1/eta)^beta)*(timeInterval[2]^beta-timeInterval[1]^beta)
}
else if (intensity=="loglinear"){
gamma0 <- object$parameters[1]
gamma1 <- object$parameters[2]
parms <- object$parameters[2]
Np <- (exp(gamma0)/gamma1)*(exp(gamma1*timeInterval[2])-exp(gamma1*timeInterval[1]))
}
#profile the future number of recurrences
ll <- profileLogLik(parms, times, Tend, timeInterval, Ns, intensity)
#compute confidence bounds
loglikw <- object$loglik
limit <- loglikw - .5*qchisq(alpha, df=1)
limitlkh <- function(parms, times, Tend, timeInterval, Ns, intensity) {
profileLogLik(parms, times, Tend, timeInterval, Ns, intensity) - limit}
lowerci <- NA
upperci <- NA
try(lowerci <- round(fzero(limitlkh, c(rangeFNR[1], Np), parms=parms,
times=times, Tend=Tend, timeInterval=timeInterval,
intensity=intensity)$x, 6), TRUE)
try(upperci <- round(fzero(limitlkh, c(Np, rangeFNR[2]), parms=parms,
times=times, Tend=Tend, timeInterval=timeInterval,
intensity=intensity)$x, 6), TRUE)
#plot profile
plot(Ns, ll, type='l',
xlab=paste("Future number of recurrences in time interval [",
timeInterval[1],",",timeInterval[2],"]", sep=""),
ylab="log-likelihood")
abline(h=limit, col="blue",lty=2) #log-likelihood limit
abline(v=Np, col="red", lty=2) #include MLE for FNR
#include interval bounds
if (!is.na(lowerci)) abline(v=lowerci, col="darkgreen", lty=2)
if (!is.na(upperci)) abline(v=upperci, col="darkgreen", lty=2)
#return bounds
data.frame(Estimate=as.vector(Np), lcl=lowerci, ucl=upperci)
}
#compute the likelihood based 95% confidence intervals for the future
#number of recurrences (=FNR) in the time interval [a,b]
#power-NHPP model
#note: inspect whether the curve of the log-likelihood function
#intersects the blue line
profileFNR(object=ppPower, rangeFNR=c(60, 140), timeInterval=c(25, 35))
#on the right, the curve does not intersect with the blue line and ucl=NA
#therefore, increase the width of the range
profileFNR(object=ppPower, rangeFNR=c(60, 160), timeInterval=c(25, 35))
#loglinear-NHPP model
profileFNR(object=ppLoglin, rangeFNR=c(130, 410), timeInterval=c(25, 35))