R code for fitting a mixture distribution to censored data

The following code fits a mixture distribution to (right / interval) censored or complete (uncensored) data in R. The mixture distribution is fitted by using the Expectation-Maximization (EM) algorithm.

The R code demonstrates how to fit (1) a mixture of Weibull distributions, (2) a mixture of lognormal distributions, and (3) a mixture of Gaussian distributions.

finite mixture model censored data

See this blog post for fitting a Finite Mixture Model to reliability (or survival data) in R.

Suggestions and/or questions? Please contact Stefan Gelissen (email: info at datall-analyse.nl).
See this page for an overview of all of Stefan’s R code blog posts.

R code

library(SPREDA)
library(LearnBayes)


##RIGHT CENSORED DATA

#function for fitting a mixture distribution to RIGHT censored or complete data
mixcensored <- function (y, d=rep(1, length(y)), wt=rep(1, length(y)),
                         dist="weibull", n, cluster=NULL, classify="EM",
                         maxiter=100, tol=1e-6) {
  #y are the observations
  #d is the censoring indicator (1=failure; 0=censored)
  #wt are the weights for the observations
  #dist: either the "weibull", "lognormal", or "gaussian" distribution
  #n is the number of components
  #cluster: start with random initialization of posterior probabilities (=NULL), or
  # a matrix with n columns of initial posterior probabilities for the observations
  #classify: "EM", "CEM", or "SEM" strategy
  #maxiter is the maximum number of iterations
  #tol is the convergence criterion
  
  nobs <- sum(wt) #number of observations
  
  parms <- matrix(NA, ncol=3, nrow=n)
  colnames(parms) <- c("mu", "sigma", "logLik")
  stdErr <- matrix(NA, ncol=2, nrow=n)
  colnames(stdErr) <- c("mu", "log(sigma)")
  posteriorP <- matrix(NA, ncol=n, nrow=length(y))
  P <- matrix(NA, ncol=n, nrow=length(y))
  
  iteration <- 0 #initialize iteration counter
  loglikInit <- logLikC <- 0 #initialize log-likelihood estimates
  
  #initialize posterior probabilities
  if (is.null(cluster)) {
    alpha <- rep(.1, n)
    posteriorP <- rdirichlet(length(y), alpha)}
  else {posteriorP <- cluster}
  
  while (iteration < maxiter) {
    #estimate prior probabilities
    priorP <- apply(wt*posteriorP, 2, sum)/nobs
    
    #estimate distribution parameters for each component
    if (classify=="EM"){
      for (i in 1:n) {
        wtp <- ifelse(wt*posteriorP[,i]<=0, 1e-15, wt*posteriorP[,i])
        cp <- survreg(Surv(y,d)~1, weights=wtp, dist=dist)
        parms[i,] <- c(coef(cp)[1], cp$scale, logLik(cp)[1])
        stdErr[i,] <- sqrt(diag(vcov(cp)))}
    }
    
    else if (classify=="CEM"){
      compPost <- apply(posteriorP, 1, which.max)
      for (i in 1:n) {
        cp <- survreg(Surv(y,d)~1, weights=wt, dist=dist, subset=(compPost==i))
        parms[i,] <- c(coef(cp)[1], cp$scale, logLik(cp)[1])
        stdErr[i,] <- sqrt(diag(vcov(cp)))}
    }
    
    else if (classify=="SEM"){
      compPost <- apply(posteriorP, 1, function (p) sample(x=1:n, size=1, prob=p))
      for (i in 1:n) {
        cp <- survreg(Surv(y,d)~1, weights=wt, dist=dist, subset=(compPost==i))
        parms[i,] <- c(coef(cp)[1], cp$scale, logLik(cp)[1])
        stdErr[i,] <- sqrt(diag(vcov(cp)))}
    }
    
    #compute the (complete) log-likelihood value
    logLikC <- sum(parms[,3]) + sum(wt*(posteriorP%*%log(priorP)))
    logLikC <- ifelse(is.na(logLikC), 0, logLikC)
    
    #estimate posterior probabilities
    if (dist=="weibull") {
      for (j in 1:n) {
        mu <- parms[j,1]; sigma <- parms[j,2]
        z <- (log(y)-mu)/sigma
        P[,j] <- priorP[j]*((1/(sigma*y)*dsev(z))^d)*((1-psev(z))^(1-d))}
    }
    
    else if (dist=="lognormal") {
      for (j in 1:n) {
        mu <- parms[j,1]; sigma <- parms[j,2]
        z <- (log(y)-mu)/sigma
        P[,j] <- priorP[j]*((1/(sigma*y)*dnorm(z))^d)*((1-pnorm(z))^(1-d))}
    }
    
    else if (dist=="gaussian") {
      for (j in 1:n) {
        mu <- parms[j,1]; sigma <- parms[j,2]
        z <- (y-mu)/sigma
        P[,j] <- priorP[j]*((1/(sigma)*dnorm(z))^d)*((1-pnorm(z))^(1-d))}
    }
    
    posteriorP <- P/rowSums(P)
    
    #check convergence criterion
    if (( abs(logLikC-loglikInit) / (abs(loglikInit)+.001*tol) ) < tol) break
    loglikInit <- logLikC #update log-likelihood
    iteration <- iteration + 1 #increment counter
  }
  
  if (iteration == maxiter) warning("Maximum number of iterations exceeded")
  list(components=parms[,1:2], prior=priorP, loglik=logLikC,
       AIC=-2*logLikC + 2*(n-1+2*n), BIC=-2*logLikC + (n-1+2*n)*log(nobs),
       strategy=classify, distribution=dist, iterations=iteration,
       standardError=stdErr, posterior=posteriorP)
}





##mixture of WEIBULL distributions

#generate mixture of Weibull distributions
N <- 200 #number of observations
probs <- c(.3, .7) #mixing proportions
n <- length(probs) #number of components
component <- sample(1:n, prob=probs, size=N, replace=TRUE)
mus <- c(4, 8)
sigmas <- c(.7, .35)

dataT <- exp(mus[component] + rsev(N)*sigmas[component])
plot(density(dataT)) #plot mixture of distributions

#apply censoring (here: Type I censoring)
#failure=1, censored=0
Cens <- ifelse(dataT>4000, 0, 1)
N-sum(Cens) #number of censored observations
dataT <- ifelse(Cens==0, 4000, dataT)

#fit distribution to components
(comp1 <- survreg(Surv(dataT, Cens)~1, subset=(component==1), dist="weibull"))
(comp2 <- survreg(Surv(dataT, Cens)~1, subset=(component==2), dist="weibull"))



#fit mixture distribution to data
mix <- mixcensored(y=dataT, d=Cens, dist="weibull", n=2)
mix[1:6]

#component membership based on posterior probabilities
classificationPost <- apply(mix$posterior, 1, which.max)

#separation of components
par(mfrow=c(1, 2))
hist(mix$post[,1][which(classificationPost==1)], xlab="Component 1", main="")
hist(mix$post[,2][which(classificationPost==2)], xlab="Component 2", main="")
par(mfrow=c(1, 1))

#classification table of true component membership and the component
#membership based on the estimated posterior probability
table(component, classificationPost)

#use the standard errors for computing normal-approximation confidence intervals
mix[9]
#95% confidence interval for mu of component 1
mix$components[1,1] + c(1,-1)*qnorm(.05/2)*mix$standardError[1,1]
#95% confidence interval for sigma of component 2
exp(log(mix$components[2,2]) + c(1,-1)*qnorm(.05/2)*mix$standardError[2,2])



#run mixcensored repeatedly (applying the EM strategy)
#that is, fit repeatedly a mixture distribution to the data,
#and start each fit with a different random initialization of
#the posterior probabilities
mixmult <- replicate(5, mixcensored(y=dataT, d=Cens, dist="weibull", n=2,
                                    classify="EM", maxiter=200))
#log-likelihood values for the different solutions
mixmult[3,]
#select the solution with the maximum value of the log-likelihood
mixmultEM <- mixmult[, which.max(mixmult[3,])]
mixmultEM[1:6]



#run mixcensored repeatedly (applying the CEM strategy)
mixmult <- replicate(5, mixcensored(y=dataT, d=Cens, dist="weibull", n=2,
                                    classify="CEM", maxiter=200))
#discard models that did not converge
mixmult[3,] <- ifelse(mixmult[3,]==0, NA, mixmult[3,])
#number of discarded models
length(which(is.na(mixmult[3,])))
#select the solution with the maximum value of the log-likelihood
mixmultCEM <- mixmult[, which.max(mixmult[3,])]
mixmultCEM[1:6]



#run mixcensored repeatedly (applying the SEM strategy)
mixmult <- replicate(5, mixcensored(y=dataT, d=Cens, dist="weibull", n=2,
                                    classify="SEM", maxiter=200))
#discard models that did not converge
mixmult[3,] <- ifelse(mixmult[3,]==0, NA, mixmult[3,])
#number of discarded models
length(which(is.na(mixmult[3,])))
#select the solution with the maximum value of the log-likelihood
mixmultSEM <- mixmult[, which.max(mixmult[3,])]
mixmultSEM[1:6]



#compare EM, CEM, and SEM
mixmultEM[1:6]
mixmultCEM[1:6]
mixmultSEM[1:6]

#compare with the distributions fitted to the original components
comp1
comp2





##mixture of LOGNORMAL distributions

#generate mixture of lognormal distributions
N <- 200 #number of observations
probs <- c(.6, .4) #mixing proportions
n <- length(probs) #number of components
component <- sample(1:n, prob=probs, size=N, replace=TRUE)
mus <- c(4, 7)
sigmas <- c(.8, .5)

dataT <- exp(mus[component] + rnorm(N)*sigmas[component])
plot(density(dataT)) #plot mixture of distributions

#apply censoring (here: Type I censoring)
#failure=1, censored=0
Cens <- ifelse(dataT>1500, 0, 1)
N-sum(Cens) #number of censored observations
dataT <- ifelse(Cens==0, 1500, dataT)

#fit distribution to components
(comp1 <- survreg(Surv(dataT, Cens)~1, subset=(component==1), dist="lognormal"))
(comp2 <- survreg(Surv(dataT, Cens)~1, subset=(component==2), dist="lognormal"))



#fit mixture distribution to data (applying the CEM strategy)
mix <- mixcensored(y=dataT, d=Cens, dist="lognormal", n=2,
                   classify="CEM", maxiter=200)
mix[1:6]

#run mixcensored repeatedly (applying the EM strategy)
mixmult <- replicate(5, mixcensored(y=dataT, d=Cens, dist="lognormal", n=2,
                                    classify="EM", maxiter=200))
#log-likelihood values for the different solutions
mixmult[3,]
#select the solution with the maximum value of the log-likelihood
mixmultEM <- mixmult[, which.max(mixmult[3,])]
mixmultEM[1:6]


#run mixcensored repeatedly (applying the CEM strategy)
mixmult <- replicate(10, mixcensored(y=dataT, d=Cens, dist="lognormal", n=2,
                                     classify="CEM", maxiter=200))
#discard models that did not converge
mixmult[3,] <- ifelse(mixmult[3,]==0, NA, mixmult[3,])
#number of discarded models
length(which(is.na(mixmult[3,])))
#select the solution with the maximum value of the log-likelihood
mixmultCEM <- mixmult[, which.max(mixmult[3,])]
mixmultCEM[1:6]



#compare EM with CEM
mixmultEM[1:6]
mixmultCEM[1:6]

#compare with the distributions fitted to the original components
comp1
comp2





##mixture of WEIBULL distributions
#assess this time the fit of the mixture distribution with a probability plot

#generate mixture of Weibull distributions
N <- 100 #number of observations
probs <- c(.3, .7) #mixing proportions
n <- length(probs) #number of components
component <- sample(1:n, prob=probs, size=N, replace=TRUE)
mus <- c(4, 8)
sigmas <- c(.7, .35)

dataT <- exp(mus[component] + rsev(N)*sigmas[component])
plot(density(dataT)) #plot mixture of distributions

#apply censoring (here: Type I censoring)
#failure=1, censored=0
Cens <- ifelse(dataT>4000, 0, 1)
N-sum(Cens) #number of censored observations
dataT <- ifelse(Cens==0, 4000, dataT)

#fit distribution to components
(comp1 <- survreg(Surv(dataT, Cens)~1, subset=(component==1), dist="weibull"))
(comp2 <- survreg(Surv(dataT, Cens)~1, subset=(component==2), dist="weibull"))



#fit mixture distribution to data (applying the CEM strategy)
mixmult <- replicate(10, mixcensored(y=dataT, d=Cens, dist="weibull", n=2,
                                     classify="CEM"))
#discard models that did not converge
mixmult[3,] <- ifelse(mixmult[3,]==0, NA, mixmult[3,])
#number of discarded models
length(which(is.na(mixmult[3,])))
#select the solution with the maximum value of the log-likelihood
mix <- mixmult[, which.max(mixmult[3,])]
mix[1:6]

#compare with the distributions fitted to the original components
comp1
comp2



##construct probability plot for assessing the fit of the mixture distribution

#function for calculating probability plotting positions
ProbPos <- function (times, event=rep(1, length(times)),
                     weight=rep(1, length(times)), dist){
  timesSorted <- times[order(times)]
  eventSorted <- event[order(times)]
  timesi <- unique(timesSorted[which(eventSorted==1)])
  
  cumSurvRaw <- survfit(Surv(times, event)~ 1, weights=weight)
  cumSurv <- unique(rep(cumSurvRaw$surv, cumSurvRaw$n.event))
  cumFail <- 1-cumSurv
  lagFail <- c(0, cumFail[-length(cumFail)])
  Prob <- .5*(cumFail+lagFail)
  if (dist=="lognormal") {yi <- qnorm(Prob)}
  else if (dist=="weibull") {yi <- qsev(Prob)}
  data.frame(timesi, yi, Prob)
}

#function for predicting quantiles
predictTime <- function (p, mu, sigma, dist) {
  if (dist=="lognormal") {exp(qnorm(p)*sigma + mu)}
  else if (dist=="weibull") {exp(qsev(p)*sigma + mu)}
}

##components of mixture distributions

#assign observations to components (based on estimated posterior probabilities)
clusterPost <- apply(mix$posterior, 1, which.max)

#compute probability positions for component 1
dataC1 <- dataT[which(clusterPost==1)]
CensC1 <- Cens[which(clusterPost==1)]
c1 <- ProbPos(dataC1, CensC1, dist="weibull")

#compute probability positions for component 2
dataC2 <- dataT[which(clusterPost==2)]
CensC2 <- Cens[which(clusterPost==2)]
c2 <- ProbPos(dataC2, CensC2, dist="weibull")

#generate predictions for components
ps <- seq(0,1,length.out=200)
plotposps <- qsev(ps) #caution: in case of lognormal distribution use qnorm(ps)

#generate predictions for component 1
predTimeC1 <- sapply(ps, predictTime, mu=mix$components[1,1],
                     sigma=mix$components[1,2], dist="weibull")

#generate predictions for component 2
predTimeC2 <- sapply(ps, predictTime, mu=mix$components[2,1],
                     sigma=mix$components[2,2], dist="weibull")

##combined data
#that is, without separating the observations into components

#fit distribution to the combined data
combMod <- survreg(Surv(dataT, Cens)~1, dist="weibull")
#AIC (smaller is better)
extractAIC(combMod)[2]
#BIC (smaller is better)
extractAIC(combMod, k=log(combMod$df+combMod$df.residual))[2]
#probabilty positions
comb <- ProbPos(dataT, Cens, dist="weibull")
#predictions
predTimeComb <- sapply(ps, predictTime, mu=coef(combMod),
                       sigma=combMod$scale, dist="weibull")

##probability plot
#determine plot ranges
minProb <- min(c(c1$yi, c2$yi, comb$yi))
maxProb <- max(c(c1$yi, c2$yi, comb$yi))
minTime <- min(c(c1$timesi, c2$timesi, comb$timesi))
maxTime <- max(c(c1$timesi, c2$timesi, comb$timesi))

#construct plot
plot(0, type="n", log="x", xlim=c(minTime, maxTime), ylim=c(minProb, maxProb),
     xlab="Time", ylab="Linearized CDF")
#component 1
points(c1$timesi, c1$yi, col="blue")
lines(predTimeC1, plotposps, col="blue")
#component 2
points(c2$timesi, c2$yi, col="darkgreen")
lines(predTimeC2, plotposps, col="darkgreen")
#combined data
points(comb$timesi, comb$yi, col="darkgray")
lines(predTimeComb, plotposps, col="darkgray")
legend("topleft", pch=1, col=c("blue", "darkgreen", "darkgray"),
       legend=c("Comp. 1", "Comp. 2", "Combined"))



##function for predicting cumulative probabilities (at time t) of a mixture distribution
predictFMix <- function (t, parms, prior, dist) {
  parms <- matrix(parms, ncol=2)
  if (dist=="lognormal") {
    sum(sapply(1:nrow(parms), function (x)
      prior[x]*pnorm( (log(t)-parms[x,1]) / parms[x,2])))}
  
  else if (dist=="weibull") {
    sum(sapply(1:nrow(parms), function (x)
      prior[x]*psev( (log(t)-parms[x,1]) / parms[x,2])))}
}

#generate predictions and plot results
ts <- seq(min(dataT[which(Cens==1)]), max(dataT[which(Cens==1)]), length.out=1000)
Ft <- sapply(ts, predictFMix, parms=mix$components, prior=mix$prior, dist="weibull")
plot(comb$timesi, comb$yi, log="x", col="darkgray",
     xlab="Time", ylab="Linearized CDF")
lines(ts, qsev(Ft), col="blue") #note: use qnorm(Ft) for a lognormal distribution



##function for predicting quantiles (at probability p) of a mixture distribution
predictQMix <- function (p, parms, prior, dist) {
  parms <- matrix(parms, ncol=2)
  if (dist=="lognormal") {
    fun <- function(p, parms, prior, t) {
      sum(sapply(1:nrow(parms),
                 function (x) prior[x]*pnorm( (log(t)-parms[x,1]) /
                                                parms[x,2])))-p}
    uniroot(fun, c(1e-50, 1e+50), p=p, parms=parms, prior=prior)$root}
  
  else if (dist=="weibull") {
    fun <- function(p, parms, prior, t) {
      sum(sapply(1:nrow(parms),
                 function (x) prior[x]*psev( (log(t)-parms[x,1]) /
                                               parms[x,2])))-p}
    uniroot(fun, c(1e-50, 1e+50), p=p, parms=parms, prior=prior)$root}
}

#generate predictions and plot results
ps <- seq(0,1,length.out=1000)
Qp <- sapply(ps, predictQMix, parms=mix$components, prior=mix$prior, dist="weibull")
plot(comb$timesi, comb$yi, log="x", col="darkgray",
     xlab="Time", ylab="Linearized CDF")
lines(Qp, qsev(ps), col="blue") #note: use qnorm(ps) for a lognormal distribution





#mixture of GAUSSIAN distributions
library(flexmix)

#generate mixture of Gaussian distributions
N <- 500 #number of observations
probs <- c(.3, .2, .5) #mixing proportions
n <- length(probs) #number of components
component <- sample(1:n, prob=probs, size=N, replace=TRUE)
mus <- c(20, 110, 50)
sigmas <- c(5, 15, 7)

dataT <- mus[component] + rnorm(N)*sigmas[component]
plot(density(dataT)) #plot mixture of distributions

#fit distribution to components
(comp1 <- survreg(Surv(dataT)~1, subset=(component==1), dist="gaussian"))
(comp2 <- survreg(Surv(dataT)~1, subset=(component==2), dist="gaussian"))
(comp3 <- survreg(Surv(dataT)~1, subset=(component==3), dist="gaussian"))

#this is complete data, thus we may compare the results of the
#mixcensored function with those of the flexmix function in
#the "flexmix" package (see: https://cran.r-project.org/web/packages/flexmix)



#fit mixture distribution to data (applying the EM strategy)
#flexmix:
em1 <- stepFlexmix(dataT ~ 1, k=3, nrep=5,
                   control=list(iter=100, tol=1e-6, classify="weighted"))
summary(em1)
parameters(em1, component=1:3)

#mixcensored:
mixmult <- replicate(5, mixcensored(y=dataT, dist="gaussian", n=3))
#select the solution with the maximum value of the log-likelihood
mixEM <- mixmult[, which.max(mixmult[3,])]
mixEM[1:6]
sqrt(N/(N-1))*mixEM$components[,2] #unbiased MLE for sigma of each component



#fit mixture distribution to data (applying the CEM strategy)
#flexmix:
cem1 <- stepFlexmix(dataT ~ 1, k=3, nrep=20,
                    control=list(iter=100, tol=1e-6, classify="CEM"))
summary(cem1)
parameters(cem1, component=1:3)

#mixcensored:
mixmult <- replicate(20, mixcensored(y=dataT, dist="gaussian", classify="CEM",
                                     n=3, maxiter=200))
#discard models that did not converge
mixmult[3,] <- ifelse(mixmult[3,]==0, NA, mixmult[3,])
#number of discarded models
length(which(is.na(mixmult[3,])))
#select the solution with the maximum value of the log-likelihood
mixCEM <- mixmult[,which.max(mixmult[3,])]
mixCEM[1:6]
sqrt(N/(N-1))*mixCEM$components[,2] #unbiased MLE for sigma of each component

#compare with the distributions fitted to the original components
comp1
comp2
comp3





##INTERVAL CENSORED DATA

#function for fitting a mixture distribution to INTERVAL censored data

#NOTE: this function employs the type="interval" coding scheme as used
#in the "Surv" function (from the survival package) for interval censored data

mixcensoredInt <- function (y1, y2, d=rep(1, length(y1)), wt=rep(1, length(y1)),
                            dist="weibull", n, cluster=NULL, classify="EM",
                            maxiter=100, tol=1e-6) {
  #y1 is the right/left censored value, the exact lifetime observation, or for
  # interval censoring the lower value of the censoring interval
  #y2 is the upper value of the censoring interval
  #d is the censoring indicator (0=right censored, 1=event at time,
  # 2=left censored, 3=interval censored)
  #wt are the weights for the observations
  #dist: either the "weibull", "lognormal", or "gaussian" distribution
  #n is the number of components
  #cluster: start with random initialization of posterior probabilities (=NULL), or
  # a matrix with n columns of initial posterior probabilities for the observations
  #classify: "EM", "CEM", or "SEM" strategy
  #maxiter is the maximum number of iterations
  #tol is the convergence criterion
  
  nobs <- sum(wt) #number of observations
  
  parms <- matrix(NA, ncol=3, nrow=n)
  colnames(parms) <- c("mu", "sigma", "logLik")
  posteriorP <- matrix(NA, ncol=n, nrow=length(y1))
  stdErr <- matrix(NA, ncol=2, nrow=n)
  colnames(stdErr) <- c("mu", "log(sigma)")
  P <- matrix(NA, ncol=n, nrow=length(y1))
  
  iteration <- 0 #initialize iteration counter
  loglikInit <- logLikC <- 0 #initialize log-likelihood estimates
  
  #initialize posterior probabilities
  if (is.null(cluster)) {
    alpha <- rep(.1, n)
    posteriorP <- rdirichlet(length(y1), alpha)}
  else {posteriorP <- cluster}
  
  while (iteration < maxiter) {
    #estimate prior probabilities
    priorP <- apply(wt*posteriorP, 2, sum)/nobs
    
    #estimate distribution parameters for each component
    if (classify=="EM"){
      for (i in 1:n) {
        wtp <- ifelse(wt*posteriorP[,i]<=0, 1e-15, wt*posteriorP[,i])
        cp <- survreg(Surv(y1,y2,d, type="interval")~1, weights=wtp, dist=dist)
        parms[i,] <- c(coef(cp)[1], cp$scale, logLik(cp)[1])
        stdErr[i,] <- sqrt(diag(vcov(cp)))}
    }
    
    else if (classify=="CEM"){
      compPost <- apply(posteriorP, 1, which.max)
      for (i in 1:n) {
        cp <- survreg(Surv(y1,y2,d, type="interval")~1, weights=wt, dist=dist,
                      subset=(compPost==i))
        parms[i,] <- c(coef(cp)[1], cp$scale, logLik(cp)[1])
        stdErr[i,] <- sqrt(diag(vcov(cp)))}
    }
    
    else if (classify=="SEM"){
      compPost <- apply(posteriorP, 1, function (p) sample(x=1:n, size=1, prob=p))
      for (i in 1:n) {
        cp <- survreg(Surv(y1,y2,d, type="interval")~1, weights=wt, dist=dist,
                      subset=(compPost==i))
        parms[i,] <- c(coef(cp)[1], cp$scale, logLik(cp)[1])
        stdErr[i,] <- sqrt(diag(vcov(cp)))}
    }
    
    #compute the (complete) log-likelihood value
    logLikC <- sum(parms[,3]) + sum(wt*(posteriorP%*%log(priorP)))
    logLikC <- ifelse(is.na(logLikC), 0, logLikC)
    
    #estimate posterior probabilities
    if (dist=="weibull") {
      for (j in 1:n) {
        mu <- parms[j,1]; sigma <- parms[j,2]
        
        z1 <- (log(y1)-mu)/sigma
        z2 <- (log(y2)-mu)/sigma
        
        cp <- ifelse(d==0, 1-psev(z1), #right censoring
                     ifelse(d==1, 1/(sigma*y1) * dsev(z1), #exact observations
                            ifelse(d==2, psev(z1), #left censoring
                                   psev(z2)-psev(z1) ))) #interval censoring
        P[,j] <- priorP[j]*cp}
    }
    
    else if (dist=="lognormal") {
      for (j in 1:n) {
        mu <- parms[j,1]; sigma <- parms[j,2]
        
        z1 <- (log(y1)-mu)/sigma
        z2 <- (log(y2)-mu)/sigma
        
        cp <- ifelse(d==0, 1-pnorm(z1), #right censoring
                     ifelse(d==1, 1/(sigma*y1) * dnorm(z1), #exact observations
                            ifelse(d==2, pnorm(z1), #left censoring
                                   pnorm(z2)-pnorm(z1) )))
        P[,j] <- priorP[j]*cp}
    }
    
    else if (dist=="gaussian") {
      for (j in 1:n) {
        mu <- parms[j,1]; sigma <- parms[j,2]
        
        z1 <- (y1-mu)/sigma
        z2 <- (y2-mu)/sigma
        
        cp <- ifelse(d==0, 1-pnorm(z1), #right censoring
                     ifelse(d==1, 1/(sigma) * dnorm(z1), #exact observations
                            ifelse(d==2, pnorm(z1), #left censoring
                                   pnorm(z2)-pnorm(z1) )))
        P[,j] <- priorP[j]*cp}
    }
    
    posteriorP <- P/rowSums(P)
    
    #check convergence criterion
    if (( abs(logLikC-loglikInit) / (abs(loglikInit)+.001*tol) ) < tol) break
    loglikInit <- logLikC #update log-likelihood
    iteration <- iteration + 1 #increment counter
  }
  
  if (iteration == maxiter) warning("Maximum number of iterations exceeded")
  list(components=parms[,1:2], prior=priorP, loglik=logLikC,
       AIC=-2*logLikC + 2*(n-1+2*n), BIC=-2*logLikC + (n-1+2*n)*log(nobs),
       strategy=classify, distribution=dist, iterations=iteration,
       standardError=stdErr, posterior=posteriorP)
}



##mixture of LOGNORMAL distributions

#generate mixture of lognormal distributions
N <- 200 #number of observations
probs <- c(.3, .7) #mixing proportions
n <- length(probs) #number of components
component <- sample(1:n, prob=probs, size=N, replace=TRUE)
mus <- c(5, 8)
sigmas <- c(.7, .35)

dataT <- exp(mus[component] + rnorm(N)*sigmas[component])
plot(density(dataT)) #plot mixture of distributions

#generate intervals for censoring
intCens <- seq(0,4000,200)

#assign observations to censoring intervals
assignCens <- findInterval(dataT, intCens)

#observations with >4000 are right censored and get value NA for y2
intCensRC <- c(intCens, NA)

#lower bound of censoring interval
L <- sapply(assignCens, function (k) intCensRC[k])

#upper bound of censoring interval
U <- sapply(assignCens, function (k) intCensRC[k+1])

#indicator for type of censoring
Cens <- ifelse(is.na(U), 0, 3)

#censoring in the interval [0,200] is identical to left censoring at t=200
Cens <- ifelse(L==0, 2, Cens) #set censoring indicator to 2 (=left censoring)
#for these left censored observations, set L to 200, and U to NA
L <- ifelse(Cens==2, U, L)
U <- ifelse(Cens==2, NA, U)

#inspect data
head(cbind(L, U, dataT, Cens, component), n=15)

#censoring distribution
table(Cens)



#fit distribution to components
(comp1 <- survreg(Surv(L,U,Cens, type="interval")~1,
                  subset=(component==1), dist="lognormal"))
(comp2 <- survreg(Surv(L,U,Cens, type="interval")~1,
                  subset=(component==2), dist="lognormal"))



#fit a lognormal mixture distribution to the interval censored data
mixmult <- replicate(5, mixcensoredInt(y1=L, y2=U, d=Cens,
                                       dist="lognormal", n=2,
                                       classify="SEM", maxiter=200))
#log-likelihood values for the different solutions
mixmult[3,]
#discard models that did not converge
mixmult[3,] <- ifelse(mixmult[3,]==0, NA, mixmult[3,])
#number of discarded models
length(which(is.na(mixmult[3,])))
#select the solution with the maximum value of the log-likelihood
mixmultSEM <- mixmult[, which.max(mixmult[3,])]
mixmultSEM[1:6]

#compare with the distributions fitted to the original components
comp1
comp2





#some runs might generate an error
#this is a way of handing these errors
mixmult <- replicate(20, try(mixcensoredInt(y1=L, y2=U, d=Cens,
                                            dist="lognormal", n=2,
                                            classify="CEM", maxiter=200),
                             silent=TRUE),
                     simplify=FALSE)
#number of runs that generated an error
length(which(lapply(mixmult, length)==1))
#discard runs that generated an error
mixmult <- simplify2array(mixmult[which(lapply(mixmult, length)>1)])
#log-likelihood values for the different solutions
mixmult[3,]
#discard models that did not converge
mixmult[3,] <- ifelse(mixmult[3,]==0, NA, mixmult[3,])
#number of discarded models
length(which(is.na(mixmult[3,])))
#select the solution with the maximum value of the log-likelihood
mixmultCEM <- mixmult[, which.max(mixmult[3,])]
mixmultCEM[1:6]