# R code for fitting a multitype NHPP model to grouped temporal data

The following R code demonstrates how to fit a multitype NonHomogeneous Poisson Process (NHPP) model to grouped temporal data. Multitype Poisson processes belong to the family of marked point processes.
Note that grouped temporal data occurs when only the number of recurrences within a given time interval are known.

The R code may be used to fit 1) a multitype NHPP model with a loglinear intensity function, with the intensity at time t defined by $e^{\gamma_{0} + \gamma_{1}t}$, or 2) a multitype NHPP model with a power law intensity function, with the intensity at time t defined by $\gamma_{0}*t^{\gamma_{1}}$.

Furthermore, the R code also shows how to fit a multitype Homogeneous Poisson Process (HPP) model to grouped temporal data. See this page for an overview of all of Stefan’s R code blog posts.

R code

library(car)

#The R functions for fitting a multitype temporal NHPP model
source("http://www.datall-analyse.nl/R/NHPP_multitype_temporal_grouped.R")
#Have a look at the function groupedNHPP and notice that at the beginning
#of the script you will find a description of the function's arguments.

##EXAMPLE 1: "parallel systems"
#Grouped data for three different types of temporal points are generated.
#The three different types of points are labeled "marks1", "marks2", and
#"marks3". The intensity of all three types is described by a loglinear
#function, with the intensity at time t defined by exp(gamma0 + gamma1*t).
#Furthermore, the observation windows for the three different types
#all start at t=0 and end at t=50.
#Finally, note that this data also can be regarded as three parallel
#operating systems. These three parallel systems all start operating at t=0,
#and stop operating at t=50. The three parallel systems are labeled "marks1",
#"marks2", and "marks3", respectively.

##Generate data

#Sequence of event times generated by a loglinear-NHPP for marks1.
#The observation window for marks1 starts at t=0 and ends at t=30.
t1 <- rloglinNHPP(Tstart=0, Tend=30, gamma0=-1.5, gamma1=0.10)
nr1 <- 1:length(t1) #cumulative recurrences

#Sequence of event times generated by a loglinear-NHPP for marks2.
#The observation window for marks2 also starts at t=0 and ends at t=30.
t2 <- rloglinNHPP(Tstart=0, Tend=30, gamma0=-3, gamma1=0.15)
nr2 <- 1:length(t2) #cumulative recurrences

#Sequence of event times generated by a loglinear-NHPP for marks3.
#Again, the observation window starts at t=0 and ends at t=30.
t3 <- rloglinNHPP(Tstart=0, Tend=30, gamma0=-2.1, gamma1=0.12)
nr3 <- 1:length(t3) #cumulative recurrences

#combine the generated data
tm <- data.frame(t=c(t1,t2,t3), nr=c(nr1,nr2,nr3),
marks= c(rep("marks1", length(t1)),
rep("marks2", length(t2)),
rep("marks3", length(t3))))

#plot the generated data
plot(tm$t, tm$nr, type="p", xlim=c(0, 30), pch=as.numeric(tm$marks), col=as.numeric(tm$marks), xlab="Cumulative time", ylab="Cumulative recurrences")
legend("topleft", pch=as.numeric(unique(tm$marks)), col=as.numeric(unique(tm$marks)),
legend=as.character(unique(tm$marks)), bty="n", y.intersp=1.5) #Create grouped temporal data. #construct time intervals int <- seq(0, 25, 5) #lower and upper bound of each time interval tL <- int #lower bounds tU <- c(int[-1], 30) #upper bounds #plot generated data with the boundaries of the time intervals included plot(tm$t, tm$nr, type="p", xlim=c(0, 30), pch=as.numeric(tm$marks),
col=as.numeric(tm$marks), xlab="Cumulative time", ylab="Number of recurrences") abline(v=tU, lty=2, col="blue") legend("topleft", pch=as.numeric(unique(tm$marks)), col=as.numeric(unique(tm$marks)), legend=as.character(unique(tm$marks)), bty="n", y.intersp=1.5)

#assign observations to the time intervals
assignT <- tapply(tm$t, as.factor(tm$marks), function (i) findInterval(i, int))

#number of recurrences within each time interval
recurrences <-lapply(assignT, function (i) sapply(1:length(tU), function (j) sum(i==j)))

#assign mark labels to time intervals
marksc <- c(rep("marks1", 6), rep("marks2", 6), rep("marks3", 6))
recurrencesc <- as.vector(simplify2array(recurrences))
tLc <- rep(tL, times=length(unique(tm$marks))) tUc <- rep(tU, times=length(unique(tm$marks)))

##Fit a multitype temporal NHPP model

#Fit an interaction model with a loglinear intensity function:
#The interaction model assumes that the three marks (=types) all have different
#values for the gamma0 and gamma1 parameters of the loglinear intensity function.
#Note that all models estimated by the multiNHPP function are fitted
#by using Maximum Likelihood Estimation (MLE).
ppgLoglin <- groupedNHPP(LowerTimes=tLc, UpperTimes=tUc, Ns=recurrencesc,
marks=marksc, model="interaction", intensity="loglinear")
ppgLoglin[1:3]
#Note that the parameter values for gamma0 and gamma1 of "marks1" are labeled
#gamma0.marks1 and gamma1.marks1, respectively, in the output.
#The parameter values for the other two marks are labeled in a similar way.

#Finally, compare the MLEs with the starting values
ppgLoglin$starts ##Variance covariance matrix for the model parameters round(solve(-1*ppgLoglin$hessian), 6)
#Note that the zero's in the matrix reflect that the temporal points (=event times)
#of each of the three types (=marks) are generated by three
#independent temporal point processes

##Compute confidence intervals for the gamma0 and gamma1 parameter estimates
#95% confidence intervals (normal-approximation) for gamma0 and gamma1
#of the fitted loglinear-NHPP model

#compute standard errors
SEparms <- sqrt(diag(solve(-1*ppgLoglin$hessian))) #marks1 ppgLoglin$parameters + c(1,-1)*qnorm(.05/2)*SEparms #95% ci for gamma0
ppgLoglin$parameters + c(1,-1)*qnorm(.05/2)*SEparms #95% ci for gamma1 #compare these with the population values for gamma0 and gamma1 of marks1 #remember, the population values for gamma0 and gamma1 were -1.5 and 0.1, respectively #marks2 ppgLoglin$parameters + c(1,-1)*qnorm(.05/2)*SEparms #95% ci for gamma0
ppgLoglin$parameters + c(1,-1)*qnorm(.05/2)*SEparms #95% ci for gamma1 #population values for gamma0 and gamma1 of marks2 were -3 and 0.15, respectively #marks3 ppgLoglin$parameters + c(1,-1)*qnorm(.05/2)*SEparms #95% ci for gamma0
ppgLoglin$parameters + c(1,-1)*qnorm(.05/2)*SEparms #95% ci for gamma1 #population values for gamma0 and gamma1 of marks3 were -2.1 and 0.12, respectively ##Predictions: cumulative recurrences #Separate fits for the three types (i.e., "marks1", "marks2", and "marks3"). plot(tm$t, tm$nr, type="p", xlim=c(0, 30), pch=as.numeric(tm$marks),
col=as.numeric(tm$marks), xlab="Time", ylab="Cumulative recurrences") abline(v=tU, lty=2, col="blue") parms <- ppgLoglin$parameters
lines(tU, (exp(parms)*(exp(parms*tU)-1))/parms, col="black")
lines(tU, (exp(parms)*(exp(parms*tU)-1))/parms, col="red")
lines(tU, (exp(parms)*(exp(parms*tU)-1))/parms, col="green")

#Observed/predicted cumulative number of recurrences at the end of a time interval.

#Remember that the data from the three types may be seen as a
#"parallel systems configuration".
#As a consequence, it is possible to compute the cumulative recurrences
#of the three systems together by using the superposition principle.

#Cumulative recurrences of the superimposed systems.
recurrencesT <- cumsum(rowSums(simplify2array(recurrences)))
upperBoundInterval <- tU #upper bounds of time intervals

#Compute the predicted number of recurrences (and their standard errors)
#at the end of each time interval, using the superposition principle.
sp <- "(exp(gamma0.marks1)*(exp(gamma1.marks1*ts)-1))/gamma1.marks1+
(exp(gamma0.marks2)*(exp(gamma1.marks2*ts)-1))/gamma1.marks2+
(exp(gamma0.marks3)*(exp(gamma1.marks3*ts)-1))/gamma1.marks3"

SEs <- sapply(1:length(upperBoundInterval),
function (i) deltaMethod(ppgLoglin$parameters, g=sp, vcov.=solve(-1*ppgLoglin$hessian),
constants=c(ts=upperBoundInterval[i])))
#95% confidence interval for the predicted cumulative recurrences
llci <- unlist(SEs[1,])*exp((qnorm(.025)*unlist(SEs[2,]))/unlist(SEs[1,]))
luci <- unlist(SEs[1,])*exp((qnorm(.975)*unlist(SEs[2,]))/unlist(SEs[1,]))
#plot observed cumulative number of recurrences at the end of each time interval
plot(upperBoundInterval, recurrencesT,
xlab="Upper bound time interval", ylab="Cumulative recurrences",
ylim=c(min(llci), max(luci)))
#add predicted cumulative recurrences to the plot
points(upperBoundInterval, unlist(SEs[1,]), col="red")
#add error bars representing the 95% confindence intervals
arrows(upperBoundInterval, llci, upperBoundInterval , luci,
code=3, length=0.1, angle=90, col="red")
legend("topleft", pch=1, col=c("black", "red"),
legend=c("Observed", "Predicted"), bty="n", y.intersp=1.5)

##Fit alternative models to the grouped temporal data

#1) Fit a simple model
#The simple model assumes that the three types (=marks) all have different
#values for the gamma0 parameter of the loglinear intensity function. However,
#in contrast to the interaction model, the simple model assumes that the gamma1
#parameter is identical for all three types.
groupedNHPP(LowerTimes=tLc, UpperTimes=tUc, Ns=recurrencesc,
marks=marksc, model="simple", intensity="loglinear")[1:3]

#2) Fit a homogenous model
#The homogeneous model assumes that the three types (=marks) all have different
#values for the gamma0 parameter of the loglinear intensity function.
#However, this model assumes that the intensity is constant (homogeneous) over
#time. In other words, the homogeneous model assumes that the gamma1
#parameter is equal to zero.
#The intensity for each type is thus defined as: exp(gamma0).
(modhom <- groupedNHPP(LowerTimes=tLc, UpperTimes=tUc, Ns=recurrencesc,
marks=marksc, model="homogeneous"))[1:3]
exp(modhom$parameters) #intensity estimate for the three types #EXAMPLE 2: "two systems in series with a change point" #Grouped data for two different types of temporal points are generated. #The two types of points are labeled "marks1" and "marks2". #The intensity of both types is described by a loglinear #function, with the intensity at time t defined by exp(gamma0 + gamma1*t). #The observation window for "marks1" starts at t=0 and ends at t=25. #The observation window for "marks2" starts at t=25 and ends at t=50. #Note that this data also can be regarded as two systems operating in series. #System 1 (labeled as "marks1") is the only system operating up to the #change point at t=25. However, at the change point, System 2 (="marks2") #takes over and from then on will be the only operating system. ##Generate data #Sequence of event times generated by a loglinear-NHPP for marks1. #The observation window for marks1 starts at t=0 and ends at t=25. t1 <- rloglinNHPP(Tstart=0, Tend=25, gamma0=-1.5, gamma1=0.10) #Sequence of event times generated by a loglinear-NHPP for marks2. #The observation window for marks2 starts at t=25 and ends at t=50. t2 <- rloglinNHPP(Tstart=25, Tend=50, gamma0=-6.5, gamma1=0.15) #combine the generated data tm <- data.frame(t=c(t1,t2), marks= c(rep("marks1", length(t1)), rep("marks2", length(t2)))) #plot generated data plot(tm$t, 1:nrow(tm), type="p", pch=as.numeric(tm$marks), xlim=c(0, 50), xlab="Time", ylab="Cumulative recurrences") legend("topleft", pch=as.numeric(unique(tm$marks)),
legend=as.character(unique(tm$marks)), bty="n", y.intersp=1.5) #Create grouped temporal data. #construct time intervals int <- seq(0, 45, 5) #lower and upper bound of each time interval tL <- int #lower bounds tU <- c(int[-1], 50) #upper bounds #plot generated data with the boundaries of the time intervals included plot(tm$t, 1:nrow(tm), type="p", pch=as.numeric(tm$marks), xlim=c(0,50), xlab="Time", ylab="Cumulative recurrences") abline(v=tU, lty=2, col="blue") legend("topleft", pch=as.numeric(unique(tm$marks)),
legend=as.character(unique(tm$marks)), bty="n", y.intersp=1.5) #assign observations to the time intervals assignT <- findInterval(tm$t, int)

#number of recurrences within each time interval
recurrences <- sapply(1:length(tU), function (i) sum(assignT==i))

#assign mark labels to time intervals
markLabels <- c(rep("marks1", 5), rep("marks2", 5))

##Fit a multitype temporal NHPP model

#Fit an interaction model with a loglinear intensity function
ppgLoglin <- groupedNHPP(LowerTimes=tL, UpperTimes=tU, Ns=recurrences,
marks=markLabels,
model="interaction", intensity="loglinear")
ppgLoglin[1:3]

#compare the MLEs with the starting values
ppgLoglin$starts ##Compute confidence intervals for the gamma0 and gamma1 parameter estimates #95% confidence intervals (normal-approximation) for gamma0 and gamma1 #of the fitted loglinear-NHPP model #compute standard errors SEparms <- sqrt(diag(solve(-1*ppgLoglin$hessian)))

#system operating before the change point (labeled as "marks1")
ppgLoglin$parameters + c(1,-1)*qnorm(.05/2)*SEparms #95% ci for gamma0 ppgLoglin$parameters + c(1,-1)*qnorm(.05/2)*SEparms #95% ci for gamma1
#compare these with the population values for gamma0 and gamma1 of "marks1"
#remember, the population values for gamma0 and gamma1 were -1.5 and 0.1, respectively

#system operating after the change point (labeled as "marks2")
ppgLoglin$parameters + c(1,-1)*qnorm(.05/2)*SEparms #95% ci for gamma0 ppgLoglin$parameters + c(1,-1)*qnorm(.05/2)*SEparms #95% ci for gamma1
#population values for gamma0 and gamma1 of System 2 were -6.5 and 0.15, respectively

##Predictions: cumulative recurrences

#Observed/predicted cumulative number of recurrences at the end of a time interval.
recurrencesT <- cumsum(recurrences) #observed cumulative number of recurrences
upperBoundInterval <- tU #upper bounds of time intervals
upperBoundIntervalMarks1 <- tU[1:5] #upper bounds of time intervals for marks1
upperBoundIntervalMarks2 <- tU[6:10] #upper bounds of time intervals for marks1

#Compute predicted cumulative recurrences (and their standard errors):

#At the end of each time interval of System 1 ("marks1").
sp1 <- "(exp(gamma0.marks1)*(exp(gamma1.marks1*ts)-1))/gamma1.marks1"
SEs1 <- sapply(1:length(upperBoundIntervalMarks1),
function (i) deltaMethod(ppgLoglin$parameters, g=sp1, vcov.=solve(-1*ppgLoglin$hessian),
constants=c(ts=upperBoundIntervalMarks1[i])))
#95% confidence interval for the predicted number of recurrences
predrec1 <- unlist(SEs1[1,])
llci1 <- unlist(SEs1[1,])*exp((qnorm(.025)*unlist(SEs1[2,]))/unlist(SEs1[1,]))
luci1 <- unlist(SEs1[1,])*exp((qnorm(.975)*unlist(SEs1[2,]))/unlist(SEs1[1,]))

#At the end of each time interval of #System 2 ("marks2").
#Make sure that the cumulative recurrences at the ending time of System 1 and
#the starting time of System 2 (that is, at the change point) are identical.
sp2 <- "((exp(gamma0.marks1)*(exp(gamma1.marks1*tcp)-1))/gamma1.marks1-
(exp(gamma0.marks2)*(exp(gamma1.marks2*tcp)-1))/gamma1.marks2)+
(exp(gamma0.marks2)*(exp(gamma1.marks2*ts2)-1))/gamma1.marks2"
tcp <- upperBoundIntervalMarks1[length(upperBoundIntervalMarks1)]

SEs2 <- sapply(1:length(upperBoundIntervalMarks2),
function (i) deltaMethod(ppgLoglin$parameters, g=sp2, vcov.=solve(-1*ppgLoglin$hessian),
constants=c(tcp=tcp,
ts2=upperBoundIntervalMarks2[i])))
#95% confidence interval for the predicted number of recurrences
predrec2 <- unlist(SEs2[1,])
llci2 <- unlist(SEs2[1,])*exp((qnorm(.025)*unlist(SEs2[2,]))/unlist(SEs2[1,]))
luci2 <- unlist(SEs2[1,])*exp((qnorm(.975)*unlist(SEs2[2,]))/unlist(SEs2[1,]))

#combine data
predrec <- c(predrec1, predrec2)
lci <- c(llci1, llci2)
uci <- c(luci1, luci2)

#plot observed cumulative number of recurrences at the end of each time interval
plot(upperBoundInterval, recurrencesT,
xlab="Upper bound time interval", ylab="Cumulative recurrences",
ylim=c(min(lci), max(uci)))
#add predicted cumulative number of recurrences to the plot
points(upperBoundInterval, predrec, col="red")
#add error bars representing the 95% confindence intervals
arrows(upperBoundInterval, lci, upperBoundInterval , uci,
code=3, length=0.1, angle=90, col="red")
abline(v=25, lty=2, col="gray")
legend("topleft", pch=1, col=c("black", "red"),
legend=c("Observed", "Predicted"), bty="n", y.intersp=1.5)

#EXAMPLE 3: "two systems in series with a change point"
#Similar to Example 2, but this time the intensity of both types
#(i.e., "marks1" and "marks2") is described by a power law intensity
#function, with the intensity at time t defined as gamma0*t^gamma1.

##Generate data

#Sequence of event times generated by a power-NHPP for marks1.
#The observation window for marks1 starts at t=0 and ends at t=30.
t1 <- rpowerNHPP(Tstart=0, Tend=30, gamma0=0.3, gamma1=0.7)

#Sequence of event times generated by a power-NHPP for marks2.
#The observation window for marks2 starts at t=30 and ends at t=60.
t2 <- rpowerNHPP(Tstart=30, Tend=60, gamma0=0.008, gamma1=1.4)

#combine data
tm <- data.frame(t=c(t1,t2),
marks= c(rep("marks1", length(t1)), rep("marks2", length(t2))))

#plot generated data
plot(tm$t, 1:nrow(tm), type="p", pch=as.numeric(tm$marks), xlim=c(0, 60),
xlab="Time", ylab="Cumulative recurrences")
legend("topleft", pch=as.numeric(unique(tm$marks)), legend=as.character(unique(tm$marks)), bty="n", y.intersp=1.5)

#Create grouped temporal data.

#construct time intervals
int <- seq(0, 55, 5)
#lower and upper bound of each time interval
tL <- int #lower bounds
tU <- c(int[-1], 60) #upper bounds

#plot generated data with the boundaries of the time intervals included
plot(tm$t, 1:nrow(tm), type="p", pch=as.numeric(tm$marks), xlim=c(0,60),
xlab="Time", ylab="Cumulative recurrences")
abline(v=tU, lty=2, col="blue")
legend("topleft", pch=as.numeric(unique(tm$marks)), legend=as.character(unique(tm$marks)), bty="n", y.intersp=1.5)

#assign observations to the time intervals
assignT <- findInterval(tm$t, int) #number of recurrences within each time interval recurrences <- sapply(1:length(tU), function (i) sum(assignT==i)) #assign mark labels to time intervals markLabels <- c(rep("marks1", 6), rep("marks2", 6)) ##Fit a multitype temporal NHPP model #Fit an interaction model with a power law intensity function ppgPower <- groupedNHPP(LowerTimes=tL, UpperTimes=tU, Ns=recurrences, marks=markLabels, model="interaction", intensity="power") ppgPower[1:3] #compare the MLEs with the starting values ppgPower$starts

##Compute confidence intervals for the gamma0 and gamma1 parameter estimates
#95% confidence intervals (normal-approximation) for gamma0 and gamma1
#of the fitted power law NHPP model

#compute standard errors
SEparms <- sqrt(diag(solve(-1*ppgPower$hessian))) #system before the change point (labeled as "marks1") exp(ppgPower$parameters + c(1,-1)*qnorm(.05/2)*SEparms) #95% ci for gamma0
ppgPower$parameters + c(1,-1)*qnorm(.05/2)*SEparms #95% ci for gamma1 #compare these with the population values for gamma0 and gamma1 of "marks1" #remember, the population values for gamma0 and gamma1 were 0.3 and 0.7, respectively #system after the change point (labeled as "marks2") round(exp(ppgPower$parameters + c(1,-1)*qnorm(.05/2)*SEparms), 5) #95% ci for gamma0
ppgPower$parameters + c(1,-1)*qnorm(.05/2)*SEparms #95% ci for gamma1 #population values for gamma0 and gamma1 of system 2 were 0.008 and 1.4, respectively ##Predictions: cumulative recurrences #Observed/predicted cumulative number of recurrences at the end of a time interval. recurrencesT <- cumsum(recurrences) #observed cumulative number of recurrences upperBoundInterval <- tU #upper bounds of time intervals upperBoundIntervalMarks1 <- tU[1:6] #upper bounds of time intervals for marks1 upperBoundIntervalMarks2 <- tU[7:12] #upper bounds of time intervals for marks1 #compute predicted cumulative recurrences (and their standard errors) #System 1 ("marks1") sp1 <- "((exp(gamma0.marks1)/(gamma1.marks1+1))*ts^(gamma1.marks1+1))" SEs1 <- sapply(1:length(upperBoundIntervalMarks1), function (i) deltaMethod(ppgPower$parameters,
g=sp1,
vcov.=solve(-1*ppgPower$hessian), constants=c(ts=upperBoundIntervalMarks1[i]))) #95% confidence interval for the predicted number of recurrences predrec1 <- unlist(SEs1[1,]) llci1 <- unlist(SEs1[1,])*exp((qnorm(.025)*unlist(SEs1[2,]))/unlist(SEs1[1,])) luci1 <- unlist(SEs1[1,])*exp((qnorm(.975)*unlist(SEs1[2,]))/unlist(SEs1[1,])) #System 2 ("marks2") #Make sure that the cumulative recurrences at the ending time of System 1 and #the starting time of System 2 (that is, at the change point) are identical. sp2 <- "(((exp(gamma0.marks1)/(gamma1.marks1+1))*tcp^(gamma1.marks1+1))- ((exp(gamma0.marks2)/(gamma1.marks2+1))*tcp^(gamma1.marks2+1)))+ ((exp(gamma0.marks2)/(gamma1.marks2+1))*ts2^(gamma1.marks2+1))" tcp <- upperBoundIntervalMarks1[length(upperBoundIntervalMarks1)] SEs2 <- sapply(1:length(upperBoundIntervalMarks2), function (i) deltaMethod(ppgPower$parameters,
g=sp2,
vcov.=solve(-1*ppgPower\$hessian),
constants=c(tcp=tcp,
ts2=upperBoundIntervalMarks2[i])))
#95% confidence interval for the predicted number of recurrences
predrec2 <- unlist(SEs2[1,])
llci2 <- unlist(SEs2[1,])*exp((qnorm(.025)*unlist(SEs2[2,]))/unlist(SEs2[1,]))
luci2 <- unlist(SEs2[1,])*exp((qnorm(.975)*unlist(SEs2[2,]))/unlist(SEs2[1,]))

#combine data
predrec <- c(predrec1, predrec2)
lci <- c(llci1, llci2)
uci <- c(luci1, luci2)

#plot observed cumulative number of recurrences at the end of each time interval
plot(upperBoundInterval, recurrencesT,
xlab="Upper bound time interval", ylab="Cumulative recurrences",
ylim=c(min(lci), max(uci)))
#add predicted cumulative number of recurrences to the plot
points(upperBoundInterval, predrec, col="red")
#add error bars representing the 95% confindence intervals
arrows(upperBoundInterval, lci, upperBoundInterval , uci,
code=3, length=0.1, angle=90, col="red")
legend=c("Observed", "Predicted"), bty="n", y.intersp=1.5)