R code for implementing a Gaussian sum filter

The code below implements a Gaussian sum filter (GSF) in R. The implemented GSF employs a bank of Gauss-Hermite Kalman filters (GHKF).

A GSF may be used when the process noise, measurement noise, or a posteriori state distribution is expected to follow some non-Gaussian distribution. The GSF approximates such non-Gaussian distributions with a mixture (or sum) of Gaussian distributions.

If necessary, the implemented Gaussian sum filter performs pruning as a Gaussian mixture reduction technique.

Gaussian sum filter

A derivation of the implemented GSF can be found in Arasaratnam, Haykin, and Elliott’s 2007 paper Discrete-Time Nonlinear Filtering Algorithms Using Gauss-Hermite Quadrature.

Suggestions and/or questions? Please contact Stefan Gelissen (email: info at datall-analyse.nl).
See this page for an overview of all of Stefan’s R code blog posts.

R code

library(fastGHQuad)

#The R functions for the Gaussian sum filter (GSF) that uses a bank of
#Gauss-Hermite Kalman filters can be downloaded from: 
source("http://www.datall-analyse.nl/R/GSF.R")
#Take a look at the function GSFgh (=the GSF), and notice that at the beginning
#of the script you will find a description of the function's arguments.
GSFgh




##EXAMPLE 1

#The following example is similar to an example discussed 
#by Arasaratnam, Haykin, and Elliott in their 2007
#paper "Discrete-Time Nonlinear Filtering Algorithms Using
#Gauss-Hermite Quadrature".

#In this example we will estimate the state of the following
#nonlinear discrete-time system:
#x(k+1) = 0.2*x(k) + 0.01*x(k)^2 + 8*cos(1.2*k+1) + W
#where x(k+1) is the state at time step k+1, x(k) the state at time
#step k, and W the process noise (variance). We will set W to 10 in
#this example.

#The measurement equation is:
#y(k) = x(k)^2 + V
#where V is the measurement noise (variance). We will set V to 0.01.



##TRUE STATES AND MEASUREMENTS

#sample time
dT <- 1
#you may change the value of dT and see how it influences
#the behavior of the Gaussian sum filter

#observation times
t <- seq(1, 50, dT)

#state at t=0
X0 <- runif(1, 0, 1)

#process noise (standard deviation)
Qk <- sqrt(10)

#measurement noise (standard deviation)
Rk <- sqrt(.01)

#parameters in the dynamic model
phi1 <- 0.2
phi2 <- 0.01

#simulate true state for x at the observation times
Xk <- matrix(NA, nrow=length(t)+1, ncol=1)
Yk <- matrix(NA, nrow=length(t), ncol=1)
Xk[1] <- X0

for (i in 2:(length(t)+1)) {
  Xk[i] <- phi1*Xk[i-1] + phi2*Xk[i-1]^2 + 8*cos(1.2*(i)) + rnorm(1)*Qk
  Yk[i-1] <- Xk[i]^2 + rnorm(1)*Rk}

#plot simulated states
plot(c(0,t), Xk, type="o", xlab="Time, k", ylab="x(k)")

#plot measurements
plot(t, Yk, type="o", xlab="Time, k", ylab="y(k)")

#store measurement data
dataEx1 <- Yk



##GAUSSIAN SUM FILTER

#Initial state estimates for the GSF with a bank of 5 GHKF's
m0init <- c(-3, -1.5, .5, 2.5, 4)

#Error covariances of the initial state estimates for the bank of 5 GHKF's
C0init <- replicate(5, list(2))


#or use a for loop
C0init <- vector(10, mode = "list")
C011 <- seq(8e+5, 1.2e+6, length.out=10)
for (i in 1:10) {
  C0init[[i]] <- diag(c(C011[i], 4e+6, 1))
}

#Process noise
vm <- 0 #mean
Q <- list(Qk^2) #variance

#Measurement noise
wm <- 0 #mean
R <- list(Rk^2) #variance


#Dynamic model:
#specifying 1 state, namely [x].
#You may change these initial state estimates and see how they
#influence the behavior of the Gaussian sum filter.
ex1 <- list(m0=m0init, #initial state estimates
            #error covariances of the initial state estimates:
            #this vector reflects the uncertainty in our initial state estimates
            #you may change the values in this vector and see how they influence
            #the behavior of the Gaussian sum filter
            C0=C0init,
            #process noise
            vm=vm,
            Q=Q,
            #measurement noise
            wm=wm,
            R=R)

#Specify the state transition function:
#WARNING: always use arguments x and k when specifying the GGfunction
GGfunction <- function (x, k){
  xk <- x[1]
  phi1*xk + phi2*xk^2 + 8*cos(1.2*(k+1))}

#Specify the observation/measurement function:
#WARNING: always use arguments x and k when specifying the FFfunction
FFfunction <- function (x, k){
  xk <- x[1]
  xk^2}



##Compute the filtered (a posteriori) state estimates with the GSF
gsf1 <- GSFgh(y=dataEx1, mod=ex1, order=3, sqrtMethod="Cholesky",
              GGfunction=GGfunction, FFfunction=FFfunction,
              pruning=FALSE)



##As a comparison, we will also fit the Gauss-Hermite Kalman Filter (GHKF)
#to the data

source("http://www.datall-analyse.nl/R/GHKF.R") #download GHKF

#Dynamic model:
ex1b <- list(m0=0.5, #initial state estimate
             #error covariance of the initial state estimate:
             C0=2,
             #measurement noise
             V=Rk^2,
             #process noise
             W=Qk^2)

#Compute the filtered (a posteriori) state estimates with the GHKF
ghkf1 <- GHKF(y=dataEx1, mod=ex1b, order=3, sqrtMethod="Cholesky",
              GGfunction=GGfunction, FFfunction=FFfunction)



##plot the filtered state estimate for x
plot(c(0,t), Xk, type="o", col=c(gray(level=.5)),
     ylim=range(cbind(ghkf1$m, gsf1$m, Xk)),
     ylab="x(k)", xlab="Time, k")
lines(c(0,t), ghkf1$m, lty=2, col="blue", lwd=1)
lines(c(0,t), gsf1$m, lty=2, col="red", lwd=1)
legend("topleft", lty=c(1, 2, 2),
       col=c(gray(level=.5), "blue", "red"),
       legend=c("true state", "GHKF", "GSF"),
       bty="n", y.intersp=1.2, cex=.7)



##compute the confidence intervals for the filtered state estimates of the GSF

#95% ci for x
seFX <- sqrt(unlist(lapply(gsf1$C, function(x) diag(x)[1])))
ciFX <- gsf1$m[,1] + qnorm(.05/2)*seFX%o%c(1, -1)

#plot confidence intervals for x
plot(c(0,t), Xk, type="l", col=c(gray(level=.7)), lwd=1.5,
     ylim=(range(ciFX)),
     ylab="x(k)", xlab="Time, k")
lines(c(0,t), ciFX[,1], lty=2, col="blue")
lines(c(0,t), ciFX[,2], lty=2, col="blue")
legend("topright", lty=c(1, 2), 
       col=c(gray(level=.5), "blue"),
       legend=c("true state", "filtered state"),
       bty="n", y.intersp=1.2, cex=.7)



##compute the confidence intervals for the filtered state estimates of the GHKF
seFX <- sqrt(unlist(lapply(ghkf1$C, function(x) diag(x)[1])))
ciFX <- ghkf1$m[,1] + qnorm(.05/2)*seFX%o%c(1, -1)

#plot confidence intervals for x
plot(c(0,t), Xk, type="l", col=c(gray(level=.7)), lwd=1.5,
     ylim=(range(ciFX)),
     ylab="x(k)", xlab="Time, k")
lines(c(0,t), ciFX[,1], lty=2, col="blue")
lines(c(0,t), ciFX[,2], lty=2, col="blue")
legend("topright", lty=c(1, 2), 
       col=c(gray(level=.5), "blue"),
       legend=c("true state", "filtered state"),
       bty="n", y.intersp=1.2, cex=.7)





#EXAMPLE 2

#Similar to Example 1, but this time the process noise is assumed
#to follow a Gamma distribution with shape=3 and scale=2.


##TRUE STATES AND MEASUREMENTS

#sample time
dT <- 1

#observation times
t <- seq(1, 50, dT)

#state at t=0
X0 <- runif(1, 0, 1)

#process noise (standard deviation)
Qk <- rgamma(length(t), shape=3, scale=2)

#measurement noise (standard deviation)
Rk <- sqrt(.01)

#parameters in the dynamic model
phi1 <- 0.2
phi2 <- 0.01

#simulate true state for x at the observation times
Xk <- matrix(NA, nrow=length(t)+1, ncol=1)
Yk <- matrix(NA, nrow=length(t), ncol=1)
Xk[1] <- X0

for (i in 2:(length(t)+1)) {
  Xk[i] <- phi1*Xk[i-1] + phi2*Xk[i-1]^2 + 8*cos(1.2*(i)) + Qk[i-1]
  Yk[i-1] <- Xk[i]^2 + rnorm(1)*Rk}

#plot simulated states
plot(c(0,t), Xk, type="o", xlab="Time, k", ylab="x(k)")

#plot measurements
plot(t, Yk, type="o", xlab="Time, k", ylab="y(k)")

#store measurement data
dataEx2 <- Yk

#furthermore, we assume that the observations for time steps 5 to 10 are
#missing
dataEx2[5:10] <- NA




##GAUSSIAN SUM FILTER

#Initial state estimates for a bank of 5 GHKF's
m0init <- c(-3, -1.5, .5, 2.5, 4)

#Error covariances of the initial state estimates for the 5 GHKF's
C0init <- replicate(5, list(2))


#or use a for loop
C0init <- vector(10, mode = "list")
C011 <- seq(8e+5, 1.2e+6, length.out=10)
for (i in 1:10) {
  C0init[[i]] <- diag(c(C011[i], 4e+6, 1))
}

#Process noise:
#approximate the Gamma distribution with a 3-component Gaussian mixture 
Qp <- rgamma(1000, shape=3, scale=2)
plot(density(Qp)) #inspect the Gamma density

#use the EM-algorithm for fitting a 3-component Gaussian mixture
library(flexmix)
gm3 <- stepFlexmix(Qp~1, k=3, nrep=15)

gm3@prior #estimated mixing proportions
parameters(gm3, component=1:3) #mean and standard deviations of the 3 components

vm3 <- as.vector(parameters(gm3, component=1:3)[1,])
Q3 <- as.list(as.vector(parameters(gm3, component=1:3)[2,])^2)
wV3 <- gm3@prior

#Measurement noise
wm <- 0 #mean
R <- list(Rk^2) #variance


#Dynamic model:
#specifying 1 state, namely [x].
#You may change these initial state estimates and see how they
#influence the behavior of the Gaussian sum filter.
ex2 <- list(m0=m0init, #initial state estimates
            #error covariances of the initial state estimates:
            #this vector reflects the uncertainty in our initial state estimates
            #you may change the values in this vector and see how they influence
            #the behavior of the Gaussian sum filter
            C0=C0init,
            #process noise
            vm=vm3,
            Q=Q3,
            #measurement noise
            wm=wm,
            R=R)

#Specify the state transition function:
#WARNING: always use arguments x and k when specifying the GGfunction
GGfunction <- function (x, k){
  xk <- x[1]
  phi1*xk + phi2*xk^2 + 8*cos(1.2*(k+1))}

#Specify the observation/measurement function:
#WARNING: always use arguments x and k when specifying the FFfunction
FFfunction <- function (x, k){
  xk <- x[1]
  xk^2}


##Compute the filtered (a posteriori) state estimates with the GSF.
#Note that at the end of n-th time step, 5*(3*1)^n Gaussian densities are used
#for computing the a posteriori state estimate (where 5 is the number of Gaussian
#components used for approximating the initial density, 3 the number of components
#for the process noise, and 1 the number of components for the measurement noise).
#Such an exponential growth of densities makes the use of the GSF impractical.
#Therefore, pruning will be applied. More specifically, in this example we will
#retain the 5 densities with the largest weights after computing the a posteriori
#estimate, and discard (prune) the remaining ones at the end of each time step. 
gsf2 <- GSFgh(y=dataEx2, mod=ex2, order=3, sqrtMethod="Cholesky",
              wV=wV3, #weights for the process noise Gaussian mixture
              GGfunction=GGfunction, FFfunction=FFfunction,
              pruning=TRUE, retain=5)



##As a comparison, we will approximate the Gamma distribution with
#a single Gaussian distribution 
gm1 <- stepFlexmix(Qp~1, k=1, nrep=15)

vm1 <- as.vector(parameters(gm1, component=1)[1,])
Q1 <- as.list(as.vector(parameters(gm1, component=1)[2,])^2)

#Dynamic model:
#specifying 1 state, namely [x].
#You may change change these initial state estimates and see how they
#influence the behavior of the Gaussian sum filter.
ex2b <- list(m0=m0init, #initial state estimates
             #error covariances of the initial state estimates:
             C0=C0init,
             #process noise
             vm=vm1,
             Q=Q1,
             #measurement noise
             wm=wm,
             R=R)


##Compute the filtered (a posteriori) state estimates with the GSF.
#Note that at the end of the n-th time step, 5*(1*1)^n densities are used for
#computing the a posteriori state estimate. In other words, the number of
#densities for computing the a posteriori estimate will not grow at
#each time step. As a consequence, pruning is not needed anymore. 
gsf3 <- GSFgh(y=dataEx2, mod=ex2b, order=3, sqrtMethod="Cholesky",
              GGfunction=GGfunction, FFfunction=FFfunction,
              pruning=FALSE)



#Plot the filtered state estimate for x
plot(c(0,t), Xk, type="o", col=c(gray(level=.5)),
     ylim=range(cbind(gsf2$m, gsf3$m, Xk)),
     ylab="x(k)", xlab="Time, k")
lines(c(0,t), gsf2$m, lty=2, col="blue", lwd=1)
lines(c(0,t), gsf3$m, lty=2, col="red", lwd=1)
legend("topleft", lty=c(1, 2, 2),
       col=c(gray(level=.5), "blue", "red"),
       legend=c("true state", "GSF 5-3-1", "GSF 5-1-1"),
       bty="n", y.intersp=1.2, cex=.7)



##Compute the confidence intervals for the filtered state estimates
#of the GSF 5-3-1.

#95% ci for x
seFX <- sqrt(unlist(lapply(gsf2$C, function(x) diag(x)[1])))
ciFX <- gsf2$m[,1] + qnorm(.05/2)*seFX%o%c(1, -1)

#plot confidence intervals for x
plot(c(0,t), Xk, type="l", col=c(gray(level=.7)), lwd=1.5,
     ylim=(range(ciFX)),
     ylab="x(k)", xlab="Time, k", main="GSF 5-3-1")
lines(c(0,t), ciFX[,1], lty=2, col="blue")
lines(c(0,t), ciFX[,2], lty=2, col="blue")
legend("topright", lty=c(1, 2), 
       col=c(gray(level=.5), "blue"),
       legend=c("true state", "filtered state"),
       bty="n", y.intersp=1.2, cex=.7)

#Compute the confidence intervals for the filtered state estimates
#of the GSF 5-1-1.
seFX <- sqrt(unlist(lapply(gsf3$C, function(x) diag(x)[1])))
ciFX <- gsf3$m[,1] + qnorm(.05/2)*seFX%o%c(1, -1)

#plot confidence intervals for x
plot(c(0,t), Xk, type="l", col=c(gray(level=.7)), lwd=1.5,
     ylim=(range(ciFX)),
     ylab="x(k)", xlab="Time, k", main="GSF 5-1-1")
lines(c(0,t), ciFX[,1], lty=2, col="blue")
lines(c(0,t), ciFX[,2], lty=2, col="blue")
legend("topright", lty=c(1, 2), 
       col=c(gray(level=.5), "blue"),
       legend=c("true state", "filtered state"),
       bty="n", y.intersp=1.2, cex=.7)