# R code for estimating the parameters of a Gauss-Hermite Kalman filter model using likelihood maximization

In one of my previous blog posts I demonstrated how to implement and apply the Gauss-Hermite Kalman Filter (GHKF) in R.
In this post I will show how to fit unknown parameters of a GHKF model by means of likelihood maximization. See this page for an overview of all of Stefan’s R code blog posts.

R code

```library(fastGHQuad)

#The R functions for the Gauss-Hermite Kalman filter (GHKF)
source("http://www.datall-analyse.nl/R/GHKF.R")
#Take a look at the function GHKF (=the GHKF), and notice that
#at the beginning of the script you will find
#a description of the function's arguments.
GHKF

##EXAMPLE 1

#The following example is similar to an example discussed
#by Arasaratnam, Haykin, and Elliott in their 2007
#paper "Discrete-Time Nonlinear Filtering Algorithms Using

#In this example we will estimate the state of the following
#nonlinear discrete-time system:
#x(k+1) = 0.2*x(k) + 0.01*x(k)^2 + 8*cos(1.2*(k+1)) + W
#where x(k+1) is the state at time step k+1, x(k) the state at time
#step k, and W the process noise (variance). We will set W to 10 in
#this example.

#The measurement equation is:
#y(k) = x(k)^2 + V
#where V is the measurement noise (variance). We will set V to 0.01.

##TRUE STATES AND MEASUREMENTS

#sample time
dT <- 1
#you may change the value of dT and see how it influences
#the behavior of the Gauss-Hermite Kalman filter

#observation times
t <- seq(1, 150, dT)

#state at t=0
X0 <- 0

#process noise (standard deviation)
Wk <- sqrt(10)

#measurement noise (standard deviation)
Vk <- sqrt(.01)

#parameters in the dynamic model
phi1 <- 0.2
phi2 <- 0.01

#simulate true state for x at the observation times
Xk <- matrix(NA, nrow=length(t)+1, ncol=1)
Yk <- matrix(NA, nrow=length(t), ncol=1)
Xk <- X0

for (i in 2:(length(t)+1)) {
Xk[i] <- phi1*Xk[i-1] + phi2*Xk[i-1]^2 + 8*cos(1.2*(i)) + rnorm(1)*Wk
Yk[i-1] <- Xk[i]^2 + rnorm(1)*Vk}

#plot simulated states
plot(c(0,t), Xk, type="o", xlab="Time, k", ylab="x(k)")

#plot measurements
plot(t, Yk, type="o", xlab="Time, k", ylab="y(k)")

#store measurement data
dataEx1 <- Yk

##ESTIMATING THE PARAMETERS OF A GAUSS-HERMITE KALMAN FILTER (GHKF) MODEL

#Specify the state transition function:
#WARNING: always use arguments x and k when specifying the GGfunction
GGfunction <- function (x, k){
phi1*x + phi2*x^2 + 8*cos(1.2*(k+1))}

#Specify the observation/measurement function:
#WARNING: always use arguments x and k when specifying the FFfunction
FFfunction <- function (x, k){
x^2}

#Function for computing the negative log-likelihood of a GHKF model
llikss <- function (x, data) {
#Specify dynamic model having 1 state, namely [x].
mod <- list(
#Initial state estimate for x.
#Note that the specified initial state estimate (at t=0) below for x
#deviates from the value that was used for generating the data (i.e., 0).
#You may change the initial state estimate too and see how it
#influences the behavior of the Gauss-Hermite Kalman filter.
m0=.5,
#Error covariance of the initial state estimate:
#this vector reflects the uncertainty in our initial state estimate.
#You may change the value in this vector and see how it influences
#the behavior of the Kalman filter.
C0=5,
#Measurement noise (to be estimated)
V=exp(x),
#Process noise (to be estimated)
W=exp(x))

#compute and return the negative log-likelihood
GHKF(y=data, mod=mod,
FFfunction=FFfunction, GGfunction=GGfunction,
simplify=TRUE, logLik=TRUE)\$logLik
}

#Minimize the negative log-likelihood:
#instead of maximizing the log-likelihood, we will minimize the negative
#log-likelihood.
#Use some arbitrary starting values for the to be estimated process and
#measurement noise (here we will use log(1e-1) for the measurement noise,
#and log(5) for the process noise, but it is possible to use
#some other arbitrary starting values).
mod <- optim(c(log(1e-1), log(5)), llikss,
lower=log(1e-8),
method="L-BFGS-B", hessian=TRUE,
data=dataEx1)
#In case the BFGS method generates an error, then try using the
#Nelder-Mead method. For one-dimensional optimization problems
#use the Brent method.

#Check for convergence
mod\$message
mod\$convergence #successful optimization yields value 0

#Estimated measurement and process noise
exp(mod\$par)

#Construct 95% confidence intervals for the estimated noises
seParms <- sqrt(diag(solve(mod\$hessian)))
exp(mod\$par + qnorm(.05/2)*seParms%o%c(1,-1))

#True value for the measurement noise
Vk^2

#True value for the process noise
Wk^2

#finally, since this is a two-dimensional optimization problem, we may
#explore the log-likelihood function in the neighborhood
#of the optimal values by means of a contour plot

#construct grid and calculate log-likelihood values
Vvalues <- log(seq(.001, .5, length.out=5))
Wvalues <- log(seq(5, 15, length.out=10))
lbgrid <- expand.grid(v=Vvalues, w=Wvalues)
lbgrid\$ll <- apply(lbgrid, 1, llikss, data=dataEx1)
ll.matrix <- matrix(lbgrid\$ll, nrow=length(Vvalues))

#contour plot of log-likelihood values
contour(exp(Vvalues), exp(Wvalues), ll.matrix,
xlab="v", ylab="w")
#position of start values for V and W (blue cross)
points(x=1e-1, y=5, col="blue", pch=4)
#position of MLEs for V and W (red dot)
points(x=exp(mod\$par), y=exp(mod\$par), col="red", pch=20)
#Notice that for the measurement noise V, the log-likelihood function is
#practically flat.
#As a result, the confidence interval for V will be (very) wide.

##EXAMPLE 2

#In this example we will apply the GHKF to the
#Lorenz system of nonlinear differential equations.
#en.wikipedia.org/wiki/Lorenz_system

##TRUE STATES

#Generate the true states for x1, x2, and x3.
#Data are generated using the forward Euler method.
dt <- 1/100 #time step for Euler integration
tt <- 5 #upper bound of time window
st <- seq(0, tt, by=dt) #lower time bounds of the integration intervals
ns <- length(st) #number of Euler integrations
x <- matrix(0, ncol=3, nrow=ns) #specify matrix for states
x[1,] <- c(.6, .8, .4) #state values at t=0
colnames(x) <- c("x1", "x2", "x3")

#parameters in the model
sigma <- 10
rho <- 28
beta <- 1.25

#simulate true states
for (i in 2:ns) {
#x1
x[i,1] <- x[i-1,1] + (sigma*(x[i-1,2] - x[i-1,1]))*dt
#x2
x[i,2] <- x[i-1,2] + (x[i-1,1]*(rho - x[i-1,3]) - x[i-1,2])*dt
#x3
x[i,3] <- x[i-1,3] + (x[i-1,1]*x[i-1,2] - beta*x[i-1,3])*dt
}

par(mfrow=c(3, 1))
#plot of x1 against time
plot(st, x[,1], type="l", ylab="x1", xlab="Time")

#plot of x2 against time
plot(st, x[,2], type="l", ylab="x2", xlab="Time")

#plot of x3 against time
plot(st, x[,3], type="l", ylab="x3", xlab="Time")
par(mfrow=c(1, 1))

##MEASUREMENTS

#Take measurements with a sample time of .01
dT <- .01 #sample time for measurements
#you may change the value of dT and see how it influences
#the behavior of the Gauss-Hermite Kalman filter
nm <- tt/dT #total number of measurements
mt <- seq(dT, tt, dT) #measurement times

#Standard deviations for the measurement noise of x1 and x2
sigmaX1 <- sqrt(25) #measurmeent noise x1
sigmaX2 <- sqrt(30) #measurement noise x2

#Measurements for x1 and x2 at specified measurement times

#x1
mx1 <- sapply(1:nm, function(i) x[i*((ns-1)/nm) + 1, 1] + rnorm(1, 0, sigmaX1))
head(cbind(mt, mx1), n=15) #check the generated measurements

#x2
mx2 <- sapply(1:nm, function(i) x[i*((ns-1)/nm) + 1, 2] + rnorm(1, 0, sigmaX2))
head(cbind(mt, mx2), n=15) #check the generated measurements

#However, we suppose that the actual measurements are made with a frequency
#of 20 Hz (and not with the sample time of .01 that was used above).
#Therefore, we are going to replace the values in between the actual
#measurements with NA.
#Note, however, that the Kalman filter can handle missing values (NA's).
#Moreover, the Kalman filter will actually compute the state
#values (for x1, x2, and x3) at the steps in between the
#measurements (i.e., where we will now set the values to NA).

#set the values at intermediate steps to NA for the x1 measurements
dataEx2x1 <-rep(NA, length(mx1))
dataEx2x1[c(seq(.05/dT,length(mx1), .05/dT))] <- mx1[c(seq(.05/dT,length(mx1), .05/dT))]

#set the values at intermediate steps to NA for the x2 measurements
dataEx2x2 <-rep(NA, length(mx2))
dataEx2x2[c(seq(.05/dT,length(mx2), .05/dT))] <- mx2[c(seq(.05/dT,length(mx2), .05/dT))]

#Store measurements
dataEx2 <- cbind(dataEx2x1, dataEx2x2)
head(cbind(mt, dataEx2), n=15) #check the generated measurements

##ESTIMATING THE PARAMETERS OF A GAUSS-HERMITE KALMAN FILTER (GHKF) MODEL

#Specify the state transition function:
#note that we will use as state functions the difference equations
#given by Euler's forward method. These difference equations will yield valid
#estimates for x1, x2, and x3 at each time step as long
#as the specified value for dT above is relatively small.
#WARNING: always use arguments x and k when specifying the GGfunction
GGfunction <- function (x, k){
x1 <- x; x2 <- x; x3 <- x
c(x1 + (sigma*(x2-x1))*dT,
x2 + (x1*(rho-x3) - x2)*dT,
x3 + (x1*x2 - beta*x3)*dT)}

#Specify the observation/measurement function:
#WARNING: always use arguments x and k when specifying the FFfunction
FFfunction <- function (x, k){
x1 <- x; x2 <- x; x3 <- x
c(x1,
x2)}

#Function for computing the negative log-likelihood of a GHKF model
llikss <- function (x, data) {
#Specify dynamic model having 3 states, namely [x1, x2, and x3].
mod <- list(
#Initial state estimates for x1, x2, and x3.
#Note that the specified initial state estimates (at t=0) below for x1 and
#x2 deviate from the values that were used for generating the
#data (i.e., .6 and .8).
#You may change these initial state estimates too and see how they
#influence the behavior of the Gauss-Hermite Kalman filter.
m0=c(4, .2, .4),
#Error covariances of the initial state estimates:
#this matrix reflects the uncertainty in our initial state estimates.
#You may change the values in this matrix and see how they influence
#the behavior of the Kalman filter.
C0=diag(c(1e+2, 1e+2, 1e+1)),
#Measurement noise (to be estimated)
V=matrix(c(exp(x), 0,
0, exp(x)), nrow=2, byrow=TRUE),
#Process noise
W=diag(rep(0,3)))
#Note that all covariances in the process noise matrix (W) are set to zero.
#This makes sense since the change in x1, x2 and x3 at each time step is
#fully explained by the Lorenz model.
#In other words, we assume that we correctly specified the model describing our
#states, and we furthermore assume that there are no random disturbances
#influencing the three states (x1, x2, and x3) at each specified time step.

#Before computing the negative log-likelihood for the current parameter
#estimates, first check if the resulting covariance matrix
#for the measurement noise (=V) is positive semidefinite.
#If the resulting covariance matrix appears not to be positive
#semidefinite, then assign NaN to the negative log-likelihood value.
if (all(eigen(mod\$V)\$values >= 0))
GHKF(data, mod,
FFfunction=FFfunction, GGfunction=GGfunction,
simplify=TRUE, logLik=TRUE)\$logLik
else NaN
}

#Minimize the negative log-likelihood:
#instead of maximizing the log-likelihood, we will minimize the negative
#log-likelihood.
#Use some arbitrary starting values for the to be estimated measurement
#noise (here we will use log(10), but it is possible to use
#some other arbitrary starting values).
mod <- optim(c(log(10), log(10)), llikss,
lower=log(1e-8),
method="L-BFGS-B", hessian=TRUE,
data=dataEx2)
#In case the BFGS method generates an error, then try using the

#Check for convergence
mod\$message
mod\$convergence #successful optimization yields value 0

#Estimated measurement covariances
exp(mod\$par)

#Construct 95% confidence interval for the estimated measurement covariances
seParms <- sqrt(diag(solve(mod\$hessian)))
exp(mod\$par + qnorm(.05/2)*seParms%o%c(1,-1))

#True values for the measurement covariances
c(sigmaX1^2, sigmaX2^2)

#Check by hand if the resulting V-matrix is indeed positive semidefinite
v <- matrix(c(exp(mod\$par), 0,
0, exp(mod\$par)), nrow=2, byrow=TRUE)
all(eigen(v)\$values >= 0)

##Compute the filtered (a posteriori) state estimates

#Using the estimated measurement variance
estVarPop <- diag(exp(mod\$par))

#Specify dynamic model having 3 states, namely [x1, x2, and x3].
ex2 <- list(m0=c(4, .2, .4), #initial state estimates
#error covariances of the initial state estimates:
C0=diag(c(1e+2, 1e+2, 1e+1)),
#estimated measurement noise
V=estVarPop,
#process noise
W=diag(rep(0,3)))

##Compute the filtered (a posteriori) state estimates with the GHKF
#If the filter generates an error, then try to decrease the
#error covariances of the initial state estimates (as specified in C0).
#(For instance, set the value for x1 from 1e+2 to 1e+1)
ghkf2 <- GHKF(y=dataEx2, mod=ex2, order=3, sqrtMethod="Cholesky",
GGfunction=GGfunction, FFfunction=FFfunction)

#We will plot the results for state x1

#x1
plot(st, x[,1], type="l", col=c(gray(level=.5)),
ylim=range(cbind(x[,1], ghkf2\$m[,1])),
ylab="x1(k)", xlab="Time, k")
lines(c(0,mt), ghkf2\$m[,1], lty=2, col="blue", lwd=1)
legend("topright", lty=c(1, 2, 2),
col=c(gray(level=.5), "blue"),
legend=c("true state", "GHKF"),
bty="n", y.intersp=1.2, cex=.7)

#95% ci for x1
seFX1 <- sqrt(unlist(lapply(ghkf2\$C, function(x) diag(x))))
ciFX1 <- ghkf2\$m[,1] + qnorm(.05/2)*seFX1%o%c(1, -1)

#plot confidence intervals for x1
plot(st, x[,1], type="l", col=c(gray(level=.7)), lwd=1.5,
ylim=range(cbind(x[,1], ciFX1)),
ylab="x1(k)", xlab="Time, k")
lines(c(0,mt), ciFX1[,1], lty=2, col="blue")
lines(c(0,mt), ciFX1[,2], lty=2, col="blue")
legend("topright", lty=c(1, 2),
col=c(gray(level=.5), "blue"),
legend=c("true state", "filtered state"),
bty="n", y.intersp=1.2, cex=.7)```