R code for constructing probability plots

Probability plots are a tool for assessing whether the observed data follow a particular distribution.

probability-plotExample of a probability plot for a Beta distribution

In short, if all data points in a probability plot fall on an approximate straight line, then you may assume that the data fit to the distribution. In the figure above, for instance, all points seem to fall on a straight line in a Beta probability plot. As a result, we may assume that these data points come from a Beta distribution.

The following R code constructs probability plots. The following distributions are implemented:

  • Beta
  • Gamma
  • Exponential
  • Normal (=Gaussian)
  • Log-Normal
  • Smallest Extreme Value (=Gumbel)
  • Weibull
  • Largest Extreme Value
  • Fréchet
  • Logistic
  • Log-Logistic

However, it should be easy to extend the R code and implement other distributions as well.

The R code may be used for assessing the fit of right censored, complete (uncensored), interval censored, and arbitrarily censored data. The latter type of censored data consists of some combination of left, right, or interval censoring.

An introduction to probability plotting can be found in Meeker and Escobar’s 1998 book Statistical Methods for Reliability Data.

Suggestions and/or questions? Please contact Stefan Gelissen (email: info at datall-analyse.nl).
See this page for an overview of all of Stefan’s
R code blog posts.

R code

Complete (uncensored) and right censored data

library(survival)
library(SPREDA)
library(MASS)

#Function for constructing probability plots for
#complete (uncensored) and right censored data
ProbPlot <- function (data, event=rep(1, length(data)),
                      weight=rep(1, length(data)), dist,
                      shape=NULL, simplify=TRUE){
  #-data is a vector containing the observed data points.
  #-event is a status indicator (with possible values 0 and 1) for specifying
  # whether an observation is right censored (=0) or not (=1). This indicator
  # can be omitted in case of complete (uncensored) data.
  #-weight is an optional vector with the weights for the observations.
  #-dist is the assumed distribution for the data. Distributions "lognormal",
  # "gaussian", "exponential", "weibull", "sev" (=Gumbel), "frechet",
  # "lev" (=largest extreme value), "logistic" ,"loglogistic", "gamma", and
  # "beta" are implemented.
  #-shape is a vector for specifying the shape parameter(s) of distributions
  # such as the beta and gamma distribution (see Examples 2, 3, and 4 below)
  #-simplify: if TRUE then do not return a matrix containing the data points
  # (=data), standardized quantiles (=standardQuantile), and corresponding
  # probabilities.
  
  #plotting positions
  dataSorted <- data[order(data)]
  eventSorted <- event[order(data)]
  datai <- unique(dataSorted[which(eventSorted==1)])
  
  cumSurvRaw <- survfit(Surv(data, event)~ 1, weights=weight)
  cumSurv <- unique(rep(cumSurvRaw$surv, cumSurvRaw$n.event))
  cumFail <- 1-cumSurv
  lagFail <- c(0, cumFail[-length(cumFail)])
  Prob <- .5*(cumFail+lagFail)
  
  #labels for ticks along y-axis
  tick.probs <- c(.001,.005,.01,.02,.05,.1,.2,.3,.4,.5,.6,.7,.8,.9,.99,.999)
  
  #implement distributions
  if (dist=="lognormal" | dist=="gaussian") {
    yi <- qnorm(Prob) #plotting positions
    tick.pos <- qnorm(tick.probs) #positions for ticks along y-axis
    ylimr <- qnorm(range(tick.probs)) #range along y-axis
  }
  else if (dist=="exponential") {
    yi <- qexp(Prob) #plotting positions
    tick.pos <- qexp(tick.probs) #positions for ticks along y-axis
    ylimr <- qexp(range(tick.probs)) #range along y-axis
  }
  else if (dist=="weibull" | dist=="sev") {
    yi <- qsev(Prob) #plotting positions
    tick.pos <- qsev(tick.probs) #positions for ticks along y-axis
    ylimr <- qsev(range(tick.probs)) #range along y-axis
  }
  else if (dist=="frechet" | dist=="lev") {
    yi <- qlev(Prob) #plotting positions
    tick.pos <- qlev(tick.probs) #positions for ticks along y-axis
    ylimr <- qlev(range(tick.probs)) #range along y-axis
  }
  else if (dist=="loglogistic" | dist=="logistic") {
    yi <- qlogis(Prob) #plotting positions
    tick.pos <- qlogis(tick.probs) #positions for ticks along y-axis
    ylimr <- qlogis(range(tick.probs)) #range along y-axis
  }
  else if (dist=="gamma") {
    yi <- qgamma(Prob, shape=shape) #plotting positions
    tick.pos <- qgamma(tick.probs, shape=shape) #positions for ticks along y-axis
    ylimr <- qgamma(range(tick.probs), shape=shape) #range along y-axis
  }
  else if (dist=="beta") {
    yi <- qbeta(Prob, shape1=shape[1], shape2=shape[2]) #plotting positions
    tick.pos <- qbeta(tick.probs, shape1=shape[1], shape2=shape[2]) #positions for ticks along y-axis
    ylimr <- qbeta(range(tick.probs), shape1=shape[1], shape2=shape[2]) #range along y-axis
  }
  
  #determine range along x-axis
  rangeData <- range(data)
  
  #construct plot
  #distributions that require a log transform of the data scale
  if (dist=="weibull" | dist=="lognormal" | dist=="frechet" |
      dist=="loglogistic") {
    plot(0, type="n", log="x",
         xlim=c(rangeData[1], rangeData[2]),
         ylim=c(ylimr[1], ylimr[2]),
         xlab="Data", ylab="Probability", main=paste(dist, "distribution"),
         axes=FALSE, frame.plot=TRUE)}
  else {
    plot(0, type="n",
         xlim=c(rangeData[1], rangeData[2]),
         ylim=c(ylimr[1], ylimr[2]),
         xlab="Data", ylab="Probability", main=paste(dist, "distribution"),
         axes=FALSE, frame.plot=TRUE)}
  
  axis(2, at=tick.pos, labels=tick.probs)
  axis(1)
  points(datai, yi, col="red")
  #draw raster
  abline(h = tick.pos, lty=3, col="gray")
  abline(v = axTicks(1), lty=3, col="gray")
  
  #return plotting positions
  if (!simplify) cbind(data=datai, standardQuantile=yi, probability=Prob)
}

#Function for predicting quantiles
predictData <- function (p, location=0, scale=1,
                         shape=NULL, rate=NULL, dist) {
  if (dist=="weibull") {exp(qsev(p)*scale + location)}
  else if (dist=="sev") {qsev(p)*scale + location}
  else if (dist=="frechet") {exp(qlev(p)*scale + location)}
  else if (dist=="lev") {qlev(p)*scale + location}
  else if (dist=="lognormal") {exp(qnorm(p)*scale + location)}
  else if (dist=="gaussian") {qnorm(p)*scale + location}
  else if (dist=="loglogistic") {exp(qlogis(p)*scale + location)}
  else if (dist=="logistic") {qlogis(p)*scale + location}
  else if (dist=="exponential") {qexp(p)*scale}
  else if (dist=="gamma") {qgamma(p, shape=shape)*scale}
  else if (dist=="beta") {qbeta(p, shape1=shape[1],
                                shape2=shape[2])*scale + location}
}



##EXAMPLE 1: Exponential distribution

#Generate artificial data
#note: this is complete (uncensored) data
x <- rexp(100, rate=2)

#Fit Exponential distribution to data
mleExponential <- fitdistr(x, densfun="exponential")

#Generate predictions based on the Maximum Likelihood Estimates for the
#parameters of the Exponential distribution
ps <- seq(.001, .999, length.out=200) #probabilities
predExponential <- sapply(ps, predictData, scale=1/mleExponential$estimate,
                          dist="exponential")

#Exponential probability plot of data (red points), with ML estimates (blue line) 
ProbPlot(data=x, dist="exponential")
lines(predExponential, qexp(ps), col="blue")



##EXAMPLE 2: Right censored data

#Fatigue Life data from Meeker & Escobar (1998), Example 6.2, pp. 130-131, and
#Example 6.6, pp. 137-138
FaLi <- read.table("http://www.public.iastate.edu/~wqmeeker/anonymous/Stat533_data/Splida_text_data/at7987.txt", header=T)
#failure=1, censored=0
FaLi$Cens <- as.numeric(FaLi$Status)-1
head(FaLi)

#Probability plot of data
par(mfrow=c(2, 2))
ProbPlot(data=FaLi$Kilocycles, event=FaLi$Cens,
         weight=FaLi$Weight, dist="weibull")
ProbPlot(data=FaLi$Kilocycles, event=FaLi$Cens,
         weight=FaLi$Weight, dist="sev")
ProbPlot(data=FaLi$Kilocycles, event=FaLi$Cens,
         weight=FaLi$Weight, dist="frechet")
ProbPlot(data=FaLi$Kilocycles, event=FaLi$Cens,
         weight=FaLi$Weight, dist="lognormal")
par(mfrow=c(1, 1))


#Adding ML estimates to the probability plots

#Obtain MLEs for the location and scale
mleWeibull <- survreg(Surv(Kilocycles, Cens) ~ 1, data=FaLi,
                      weights=Weight, dist="weibull")
mleGumbel <- survreg(Surv(Kilocycles, Cens) ~ 1, data=FaLi,
                     weights=Weight, dist="extreme")
mleFrechet <- Lifedata.MLE(Surv(Kilocycles, Cens) ~ 1, data=FaLi,
                           weights=Weight, dist="frechet")
mleLognormal <- survreg(Surv(Kilocycles, Cens) ~ 1, data=FaLi,
                        weights=Weight, dist="lognormal")

#Generate predictions based on the MLEs for the distributions
ps <- seq(.001, .999, length.out=200)
predWeibull <- sapply(ps, predictData, location=coef(mleWeibull),
                      scale=mleWeibull$scale, dist="weibull")
predGumbel <- sapply(ps, predictData, location=coef(mleGumbel),
                     scale=mleGumbel$scale, dist="sev")
predFrechet <- sapply(ps, predictData, location=coef(mleFrechet)[1],
                      scale=coef(mleFrechet)[2], dist="frechet")
predLognormal <- sapply(ps, predictData, location=coef(mleLognormal),
                        scale=mleLognormal$scale, dist="lognormal")

#Probability plot of data (red points), with ML estimates (blue line)
par(mfrow=c(2, 2))
ProbPlot(data=FaLi$Kilocycles, event=FaLi$Cens, weight=FaLi$Weight, dist="weibull")
lines(predWeibull, qsev(ps), col="blue")
ProbPlot(data=FaLi$Kilocycles, event=FaLi$Cens, weight=FaLi$Weight, dist="sev")
lines(predGumbel, qsev(ps), col="blue")
ProbPlot(data=FaLi$Kilocycles, event=FaLi$Cens, weight=FaLi$Weight, dist="frechet")
lines(predFrechet, qlev(ps), col="blue")
ProbPlot(data=FaLi$Kilocycles, event=FaLi$Cens, weight=FaLi$Weight, dist="lognormal")
lines(predLognormal, qnorm(ps), col="blue")
par(mfrow=c(1, 2))


#For constructing the probability plot of the Gamma distribution we
#have to specify a value for the shape parameter.
#We are looking for a shape parameter such that the points in our Gamma
#probability plot all will fall on an approximate straight line, and will
#vary the shape parameter accordingly.
par(mfrow=c(2, 2))
ProbPlot(data=FaLi$Kilocycles, event=FaLi$Cens,
         weight=FaLi$Weight, dist="gamma", shape=.8)
ProbPlot(data=FaLi$Kilocycles, event=FaLi$Cens,
         weight=FaLi$Weight, dist="gamma", shape=1.2)
ProbPlot(data=FaLi$Kilocycles, event=FaLi$Cens,
         weight=FaLi$Weight, dist="gamma", shape=2)
ProbPlot(data=FaLi$Kilocycles, event=FaLi$Cens,
         weight=FaLi$Weight, dist="gamma", shape=5)
par(mfrow=c(1, 1))
#The best choice for the shape parameter seems to be 2.



##EXAMPLE 3: Using a Beta probability plot for obtaining ML starting values 

#Generate artificial data
#note: this is complete (uncensored) data
x <- rbeta(100, shape1=2, shape2=5)
hist(x) #for the Beta distribution it holds that 0<x<1

#For constructing the probability plot of the Beta distribution we
#have to specify values for 2 shape parameters.
#We are looking for shape parameters such that the points in our Beta
#probability plot all will fall on an approximate straight line, and will
#vary the shape parameters accordingly.
par(mfrow=c(2, 2))
ProbPlot(data=x, dist="beta", shape=c(1, 1))
ProbPlot(data=x, dist="beta", shape=c(2, 2))
ProbPlot(data=x, dist="beta", shape=c(2, 5))
ProbPlot(data=x, dist="beta", shape=c(5, 3))
par(mfrow=c(1, 1))
#The shape parameters 2 and 5, respectively, will give the best fit.

#Fit a Beta distribution to the data, and use as ML starting values
#those two shape values that yielded the most straight line in the
#the Beta probability plots above.
mleBeta <- fitdistr(x, densfun="beta", start=list(shape1=2, shape2=5))
mleBeta 

#Generate predictions based on the Maximum Likelihood Estimates for the
#parameters of the Exponential distribution
ps <- seq(.001, .999, length.out=200)
predBeta <- sapply(ps, predictData, shape=mleBeta$estimate, dist="beta")

#Beta probability plot of data (red points), with ML estimates (blue line) 
ProbPlot(data=x, dist="beta", shape=mleBeta$estimate)
lines(predBeta, qbeta(ps, shape1=mleBeta$estimate[1], shape2=mleBeta$estimate[2]),
      col="blue")



#EXTRA: generate data that follows a Beta distribution, but for which
#the data is not limited to the range [0, 1].

#Generate artificial data
x <- rbeta(100, shape1=.5, shape2=1.3)*122 + 3.5
hist(x) #note: 0<x<1 does not hold anymore for this data

#For constructing the probability plot of the Beta distribution we
#have to specify values for 2 shape parameters.
#We are looking for shape parameters such that the points in our Beta
#probability plot all will fall on an approximate straight line, and will
#vary the shape parameters accordingly.
par(mfrow=c(2, 2))
ProbPlot(data=x, dist="beta", shape=c(.5, .5))
ProbPlot(data=x, dist="beta", shape=c(.5, 1))
ProbPlot(data=x, dist="beta", shape=c(1, .5))
ProbPlot(data=x, dist="beta", shape=c(2, 2))
par(mfrow=c(1, 1))
#The shape parameters .5 and 1, respectively, will give the best fit.

#Fit a Pearson type I distribution to this data. The Pearson type I
#essentially is a Beta-distribution, but in addition has a
#location and scale parameter.
library(PearsonDS)
mleBeta <- pearsonMSC(x)$FittedDistributions$I
mleBeta

#Generate predictions based on the Maximum Likelihood Estimates for the
#parameters of the Pearson type I distribution.
ps <- seq(.001, .999, length.out=200)
predBeta <- sapply(ps, predictData, shape=mleBeta[c(2, 3)],
                   location=mleBeta[4], scale=mleBeta[5], dist="beta")

#Beta probability plot of data (red points), with ML estimates (blue line) 
ProbPlot(data=x, dist="beta", shape=mleBeta[c(2, 3)])
lines(predBeta, qbeta(ps, shape1=mleBeta[2], shape2=mleBeta[3]), col="blue")



##EXAMPLE 4: Using a Gamma probability plot for obtaining ML estimates

#Ball bearing fatigue data (see: Meeker & Escobar, Example 11.1, pp. 256-257)
BBfd <- read.table("http://www.public.iastate.edu/~wqmeeker/anonymous/Stat533_data/Splida_text_data/lzbearing.txt", header=T)

#For constructing the probability plot of the Gamma distribution we
#have to specify a value for the shape parameter.
#We are looking for a shape parameter such that the points in our Gamma
#probability plot all will fall on an approximate straight line, and will
#vary the shape parameter accordingly.
par(mfrow=c(2, 2))
ProbPlot(data=BBfd$Megacycles, dist="gamma", shape=.5)
ProbPlot(data=BBfd$Megacycles, dist="gamma", shape=1)
ProbPlot(data=BBfd$Megacycles, dist="gamma", shape=2)
ProbPlot(data=BBfd$Megacycles, dist="gamma", shape=4)
par(mfrow=c(1, 1))
#The best choice for the shape parameter seems to be 4.

#Extract the corresponding plotting positions of the data points (for "shape=4")
pp <- ProbPlot(data=BBfd$Megacycles, dist="gamma", shape=4, simplify=FALSE)
head(pp)
#Obtain starting values for the scale parameter by fitting the following
#linear model
scaleStart <- coef(lm(data ~ standardQuantile, data=data.frame(pp)))[2]
shapeStart <- 4 #start value for the shape parameter

#Use these starting values for computing MLEs for the scale and shape
#parameters of the Gamma distribution

#log-likelihood function for fitting a Gamma distribution
llik <- function (theta, y) {
  sum(log(dgamma(y, shape=exp(theta[1]), scale=exp(theta[2]))))}

#optimization of log-likelihood
MLEs <- optim(c(log(shapeStart), log(scaleStart)), llik,
              method="Nelder-Mead", control=list(fnscale=-1),
              y=BBfd$Megacycles)
#MLEs
MLEparam <- as.vector(exp(MLEs$par))
c(shape=MLEparam[1], scale=MLEparam[2], rate=1/MLEparam[2])

#Compare these MLEs with:
fitdistr(BBfd$Megacycles, "gamma")

Interval and arbitrarily censored data

library(survival)
library(SPREDA)
library(MASS)

#CAUTION: the following function employs the type="interval" coding scheme as used
#in the "Surv" function (from the survival package) for coding interval
#and arbitrarily censored data.
#That is, events should be coded as: 0=right censored, 1=event at time,
#2=left censored, 3=interval censored.
#Have a look at the "Surv" function for more information on how to
#handle data when using the "interval" coding scheme.

#Function for constructing probability plots for
#interval and arbitrarily censored data
ProbPlotInterval <- function (time, time2=rep(NA, length(time)), event,
                              weight=rep(1, length(time)), dist,
                              shape=NULL, simplify=TRUE){
  #-time is the right/left censored value, the exact lifetime observation, or for
  # interval censoring the lower value of the censoring interval.
  #-time2 is the upper value of the censoring interval.
  #-event is the censoring indicator (0=right censored, 1=event at time,
  # 2=left censored, 3=interval censored).
  #-weight is an optional vector the weights for the observations
  #-dist is the assumed distribution for the data. Distributions "lognormal",
  # "gaussian", "exponential", "weibull", "sev" (=Gumbel), "frechet",
  # "lev" (=largest extreme value), "logistic" ,"loglogistic", "gamma", and
  # "beta" are implemented.
  #-shape is a vector for specifying the shape parameter(s) of distributions
  # such as the beta and gamma distribution (see Examples 2, 3, and 4 below).
  #-simplify: if TRUE then do not return a matrix containing the data points
  # (=data), standardized quantiles (=standardQuantile), and corresponding
  # probabilities.
  
  #plotting positions
  cumSurvRaw <- survfit(Surv(time=time, time2=time2,
                             event=event, type="interval") ~ 1,
                        weights=weight)
  
  cumSurv <- cumSurvRaw$surv
  cumFail <- 1-cumSurv
  Prob <- cumFail
  
  datai <- cumSurvRaw$time
  
  #labels for ticks along y-axis
  tick.probs <- c(.001,.005,.01,.02,.05,.1,.2,.3,.4,.5,.6,.7,.8,.9,.99,.999)
  
  #implement distributions
  if (dist=="lognormal" | dist=="gaussian") {
    yi <- qnorm(Prob) #plotting positions
    tick.pos <- qnorm(tick.probs) #positions for ticks along y-axis
    ylimr <- qnorm(range(tick.probs)) #range along y-axis
  }
  else if (dist=="exponential") {
    yi <- qexp(Prob) #plotting positions
    tick.pos <- qexp(tick.probs) #positions for ticks along y-axis
    ylimr <- qexp(range(tick.probs)) #range along y-axis
  }
  else if (dist=="weibull" | dist=="sev") {
    yi <- qsev(Prob) #plotting positions
    tick.pos <- qsev(tick.probs) #positions for ticks along y-axis
    ylimr <- qsev(range(tick.probs)) #range along y-axis
  }
  else if (dist=="frechet" | dist=="lev") {
    yi <- qlev(Prob) #plotting positions
    tick.pos <- qlev(tick.probs) #positions for ticks along y-axis
    ylimr <- qlev(range(tick.probs)) #range along y-axis
  }
  else if (dist=="loglogistic" | dist=="logistic") {
    yi <- qlogis(Prob) #plotting positions
    tick.pos <- qlogis(tick.probs) #positions for ticks along y-axis
    ylimr <- qlogis(range(tick.probs)) #range along y-axis
  }
  else if (dist=="gamma") {
    yi <- qgamma(Prob, shape=shape) #plotting positions
    tick.pos <- qgamma(tick.probs, shape=shape) #positions for ticks along y-axis
    ylimr <- qgamma(range(tick.probs), shape=shape) #range along y-axis
  }
  else if (dist=="beta") {
    yi <- qbeta(Prob, shape1=shape[1], shape2=shape[2]) #plotting positions
    tick.pos <- qbeta(tick.probs, shape1=shape[1], shape2=shape[2]) #positions for ticks along y-axis
    ylimr <- qbeta(range(tick.probs), shape1=shape[1], shape2=shape[2]) #range along y-axis
  }
  
  #determine range along x-axis
  rangeData <- range(cumSurvRaw$time)
  
  #construct plot
  #distributions that require a log transform of the data scale
  if (dist=="weibull" | dist=="lognormal" | dist=="frechet" |
      dist=="loglogistic") {
    plot(0, type="n", log="x",
         xlim=c(rangeData[1], rangeData[2]),
         ylim=c(ylimr[1], ylimr[2]),
         xlab="Data", ylab="Probability", main=paste(dist, "distribution"),
         axes=FALSE, frame.plot=TRUE)}
  else {
    plot(0, type="n",
         xlim=c(rangeData[1], rangeData[2]),
         ylim=c(ylimr[1], ylimr[2]),
         xlab="Data", ylab="Probability", main=paste(dist, "distribution"),
         axes=FALSE, frame.plot=TRUE)}
  
  axis(2, at=tick.pos, labels=tick.probs)
  axis(1)
  points(datai, yi, col="red")
  #draw raster
  abline(h = tick.pos, lty=3, col="gray")
  abline(v = axTicks(1), lty=3, col="gray")
  
  #return plotting positions
  if (!simplify) cbind(data=datai, standardQuantile=yi, probability=Prob)
}

#Function for predicting quantiles
predictData <- function (p, location=0, scale=1,
                         shape=NULL, rate=NULL, dist) {
  if (dist=="weibull") {exp(qsev(p)*scale + location)}
  else if (dist=="sev") {qsev(p)*scale + location}
  else if (dist=="frechet") {exp(qlev(p)*scale + location)}
  else if (dist=="lev") {qlev(p)*scale + location}
  else if (dist=="lognormal") {exp(qnorm(p)*scale + location)}
  else if (dist=="gaussian") {qnorm(p)*scale + location}
  else if (dist=="loglogistic") {exp(qlogis(p)*scale + location)}
  else if (dist=="logistic") {qlogis(p)*scale + location}
  else if (dist=="exponential") {qexp(p)*scale}
  else if (dist=="gamma") {qgamma(p, shape=shape)*scale}
  else if (dist=="beta") {qbeta(p, shape1=shape[1],
                                shape2=shape[2])*scale + location}
}



##EXAMPLE 5: Interval censored data

#Generate artificial interval censored data that follows a Weibull distribution
x <- exp(8  + rsev(200)*.65)

#Generate intervals for censoring
intCens <- seq(0,5000,100)

#Assign observations to censoring intervals
assignCens <- findInterval(x, intCens)

#Observations with >5000 are right censored and get value NA for time2
intCensRC <- c(intCens, NA)

#Lower bounds of censoring interval
L <- sapply(assignCens, function (k) intCensRC[k])

#Upper bounds of censoring interval
U <- sapply(assignCens, function (k) intCensRC[k+1])

#Indicator for type of censoring
Cens <- ifelse(is.na(U), 0, 3)

#Censoring in the interval [0, 100] is identical to left censoring at t=100
Cens <- ifelse(L==0, 2, Cens) #set censoring indicator to 2 (=left censoring)
#For these left censored observations, set L to 100, and U to NA
L <- ifelse(Cens==2, U, L)
U <- ifelse(Cens==2, NA, U)

#Inspect data
head(cbind(L, U, x, Cens), n=15)

#Censoring distribution
table(Cens)

#Probability plots of data
par(mfrow=c(2, 2))
ProbPlotInterval(time=L, time2=U, event=Cens, dist="weibull")
ProbPlotInterval(time=L, time2=U, event=Cens, dist="lognormal")
ProbPlotInterval(time=L, time2=U, event=Cens, dist="frechet")
ProbPlotInterval(time=L, time2=U, event=Cens, dist="exponential")
par(mfrow=c(1, 1))



##EXAMPLE 6: Arbitrarily censored data

#Turbine wheel data from Meeker & Escobar (1998), Example 6.5, pp. 135-136
FaLi <- read.table("http://www.public.iastate.edu/~wqmeeker/anonymous/Stat533_data/Splida_text_data/turbine.txt", header=T)
#The data contains a mixture of left and right censoring: right=0, left=2
FaLi$Cens <- ifelse(FaLi$Status=="Right", 0, 2)
head(FaLi)

#Probability plot of data
par(mfrow=c(2, 2))
ProbPlotInterval(time=FaLi$HundredHours, event=FaLi$Cens, weight=FaLi$Count,
                 dist="weibull")
ProbPlotInterval(time=FaLi$HundredHours, event=FaLi$Cens, weight=FaLi$Count,
                 dist="sev")
ProbPlotInterval(time=FaLi$HundredHours, event=FaLi$Cens, weight=FaLi$Count,
                 dist="exponential")
ProbPlotInterval(time=FaLi$HundredHours, event=FaLi$Cens, weight=FaLi$Count,
                 dist="lognormal")
par(mfrow=c(1, 1))


#Adding ML estimates to the probability plots

#Obtain MLEs for the location and scale
FaLi$Upper <- rep(NA, length(FaLi$HundredHours))
mleWeibull <- survreg(Surv(HundredHours, Upper, Cens, type="interval") ~ 1,
                      data=FaLi, weights=Count, dist="weibull")
mleGumbel <- survreg(Surv(HundredHours, Upper, Cens, type="interval") ~ 1,
                     data=FaLi, weights=Count, dist="extreme")
mleExponential <- survreg(Surv(HundredHours, Upper, Cens, type="interval") ~ 1,
                          data=FaLi, weights=Count, dist="exponential")
mleLognormal <- survreg(Surv(HundredHours, Upper, Cens, type="interval") ~ 1,
                        data=FaLi, weights=Count, dist="lognormal")

#Generate predictions based on the MLEs for the distributions
ps <- seq(.001, .999, length.out=200)
predWeibull <- sapply(ps, predictData, location=coef(mleWeibull),
                      scale=mleWeibull$scale, dist="weibull")
predGumbel <- sapply(ps, predictData, location=coef(mleGumbel),
                     scale=mleGumbel$scale, dist="sev")
predExponential <- sapply(ps, predictData, scale=exp(coef(mleExponential)),
                          dist="exponential")
predLognormal <- sapply(ps, predictData, location=coef(mleLognormal),
                        scale=mleLognormal$scale, dist="lognormal")

#Probability plot of data (red points), with ML estimates (blue line)
par(mfrow=c(2, 2))
ProbPlotInterval(time=FaLi$HundredHours, event=FaLi$Cens, weight=FaLi$Count,
                 dist="weibull")
lines(predWeibull, qsev(ps), col="blue")
ProbPlotInterval(time=FaLi$HundredHours, event=FaLi$Cens, weight=FaLi$Count,
                 dist="sev")
lines(predGumbel, qsev(ps), col="blue")
ProbPlotInterval(time=FaLi$HundredHours, event=FaLi$Cens, weight=FaLi$Count,
                 dist="exponential")
lines(predExponential, qexp(ps), col="blue")
ProbPlotInterval(time=FaLi$HundredHours, event=FaLi$Cens, weight=FaLi$Count,
                 dist="lognormal")
lines(predLognormal, qnorm(ps), col="blue")
par(mfrow=c(1, 2))