Extreme value analysis in engineering (largest extreme values)

Extreme value analysis deals with extreme events. In engineering, extreme value analysis is used to estimate the maximum wind speed (important for determining the maximum load on structures due to wind), the maximum river discharge or wave height (important information for the design height of dikes), maximum earthquake intensity (important input for structural mechanics), the maximum voltage level, et cetera.

The following code may be used for fitting extreme value models in R. Note, however, that the R code is restricted to the analysis of maxima (or largest values). For mimima (or smallest values) see this R code.

The code focuses on an application of extreme value analysis in the field of engineering, namely obtaining the maximum wind speed.

The code shows how to fit a Gumbel and Weibull distribution for largest values to the wind speed data. Subsequently, the same data will be fitted with a Generalized Extreme Value (or GEV) distribution for maxima. The three extreme value distributions for largest values (i.e., Gumbel, Weibull, and Fréchet distribution) are all family members of the GEV distribution. The code will demonstrate that a GEV model for largest values fits either the Gumbel, Weibull, or Fréchet distribution for maxima.

A comprehensive introduction to extreme value analysis is given by Castillo et al. in their 2005 book Extreme value and related models with applications in engineering and science.
Another introduction to extreme value analysis is given by Coles in his 2001 book An introduction to statistical modeling of extreme values.

Suggestions and/or questions? Please contact Stefan Gelissen (email: info at datall-analyse.nl).
See this page for an overview of all of my R code blog posts.

R code

library(survival)
library(SPREDA)
library(extRemes)


#The R functions for performing the extreme value analysis (for maxima)
#can be downloaded from: 
source("http://www.datall-analyse.nl/R/eva_max.R")


##data
#data from Castillo et al., Table 1.1, p. 9-10
#yearly maximum wind speed (in miles/hour)
evobs <- scan("http://personales.unican.es/castie/extremes/Table1-1.txt")

#explore data visually
hist(evobs)



##MLEs of parameters Gumbel and Weibull model (largest values)

#Gumbel
gumbelmod <- fevd(x=evobs, type="Gumbel", method="MLE")
summary(gumbelmod)
#extract MLEs (these are needed for the remaining part of the analysis)
muG <- gumbelmod$results$par[1]
sigmaG <- gumbelmod$results$par[2]

#Fréchet
frechetmod <- Lifedata.MLE(Surv(evobs) ~ 1, dist="frechet")
frechetmod
#extract MLEs (these are needed for the remaining part of the analysis)
muF <- coef(frechetmod)[1]
sigmaF <- coef(frechetmod)[2]


##probability plot for largest values (=maxima)
#probability plot for Gumbel and Fréchet distribution (side by side)
par(mfrow=c(1, 2))
probplot(values=evobs, model=gumbelmod, varname="Wind speed (mph)",
         alpha=1-.95, dist="gumbel")
probplot(values=evobs, model=frechetmod, varname="Wind speed (mph)",
         alpha=1-.95, dist="frechet")
par(mfrow=c(1, 1))



##cumulative probability plot for largest values (including return period)
#note: for the wind data, a return period of 20 means that once every
#20 years the wind speed is (on average) expected to be larger than
#muG+qlev(1-1/20)*sigmaG=49.4 mph (in case of a Gumbel distribution),
#or exp(muF+qlev(1-1/20)*sigmaF)=51.4 mph (in case of a Fréchet distribution)
#(this expected wind speed is also called the return level)

#Gumbel distribution
cprobplot(values=evobs, model=gumbelmod, varname="Wind speed (mph)",
          alpha=1-.95, dist="gumbel")
#Fréchet distribution
cprobplot(values=evobs, model=frechetmod, varname="Wind speed (mph)",
          alpha=1-.95, dist="frechet")



##diagnostics
#q-q and p-p plot for Gumbel and Fréchet distribution
par(mfrow=c(2, 2))
QQplot(values=evobs, mu=muG, sigma=sigmaG, dist="gumbel")
QQplot(values=evobs, mu=muF, sigma=sigmaF, dist="frechet")
PPplot(values=evobs, mu=muG, sigma=sigmaG, dist="gumbel")
PPplot(values=evobs, mu=muF, sigma=sigmaF, dist="frechet")
par(mfrow=c(1, 1))



##fit GEV distribution (largest values) to data
gevmod <- fevd(x=evobs, units="Wind speed (mph)", period.basis="year",
               type="GEV", method="MLE")
gevmod
#extract MLEs (these are needed for the remaining part of the analysis)
muGev <- gevmod$results$par[1]
sigmaGev <- gevmod$results$par[2]
xiGev <- gevmod$results$par[3]



##confidence intervals for MLEs of GEV model
#normal-approximation confidence intervals
ci(gevmod, alpha=.05, type="parameter", method="normal")

#likelihood based confidence intervals
#mu (=location)
ci(gevmod, alpha=.05, which.par=1, type="parameter", method="proflik",
   xrange=c(26,30), nint=50, verbose=TRUE, main="GEV - largest values")
#sigma (=scale)
ci(gevmod, alpha=.05, which.par=2, type="parameter", method="proflik",
   xrange=c(3,7), nint=50, verbose=TRUE, main="GEV - largest values")
#xi (=shape)
ci(gevmod, alpha=.05, which.par=3, type="parameter", method="proflik",
   xrange=c(.1,.8), nint=50, verbose=TRUE, main="GEV - largest values",
   ylim=c(-175,-171)) #change range of the y-axis



##relationship between GEV model and Gumbel/Weibull/Fréchet distribution
#obtain parameters of Gumbel, Weibull, and Fréchet model from GEV model
#see Castillo et al., p. 200

#xi = 0: GEV model fitted a Gumbel distribution
parmsGumbel <- c(muGev, sigmaGev)
names(parmsGumbel) <- c("Mu", "Sigma")
parmsGumbel #GEV model's estimates of the Gumbel distribution
#compare with Gumbel model
fevd(x=evobs, type="Gumbel", method="MLE")$results$par

#xi > 0: GEV model fitted a Fréchet distribution
parmsFrechet <- c(log(sigmaGev/xiGev), xiGev, muGev-sigmaGev/xiGev)
names(parmsFrechet) <- c("Mu", "Sigma", "Gamma")
parmsFrechet #GEV model's estimates of the Fréchet distribution
#compare with Fréchet model
gamma <- parmsFrechet[3] #threshold parameter of Fréchet distribution
Lifedata.MLE(Surv(evobs-gamma) ~ 1, dist="frechet")

#xi < 0: GEV model fitted a Weibull distribution
parmsWeibull <- c(log(sigmaGev/-xiGev), -xiGev, muGev-sigmaGev/xiGev)
names(parmsWeibull) <- c("Mu", "Sigma", "Gamma")
parmsWeibull #GEV model's estimates of the Weibull distribution
#compare with Weibull model
gamma <- parmsWeibull[3] #threshold parameter of Weibull distribution
survreg(Surv(gamma-evobs) ~ 1, dist="weibull")



##diagnostics
#histogram of the data together with the density of the fitted GEV distribution
plot(gevmod, "hist", ylim=c(0,0.1), col="gray", main="GEV - largest values")
#density plot of the data together with the density of the fitted GEV distribution
plot(gevmod, "density", ylim=c(0,0.1), main="GEV - largest values")

#q-q and p-p plot for GEV (here: compare with Gumbel distribution)
par(mfrow=c(2, 2))
QQplot(values=evobs, mu=muGev, sigma=sigmaGev, xi=xiGev, dist="gev")
QQplot(values=evobs, mu=muG, sigma=sigmaG, dist="gumbel")
PPplot(values=evobs, mu=muGev, sigma=sigmaGev, xi=xiGev, dist="gev")
PPplot(values=evobs, mu=muG, sigma=sigmaG, dist="gumbel")
par(mfrow=c(1, 1))



##probability plot for GEV distribution (here: compare with Gumbel distribution)
#note: the GEV model may display concave/convex behavior in the upper tail
#this concave/convex behavior depends on the xi-parameter of the GEV model
#- if this parameter differs from zero, then the GEV model fitted
# either the Weibull distribution (convex upper tail), or the Fréchet
# distribution (concave upper tail)
#- if this xi-parameter is very close to zero, then the GEV model fitted a
# Gumbel distribution, and no concave/convex upper tail will show up
par(mfrow=c(1, 2))
probplot(values=evobs, model=gevmod, varname="Wind speed (mph)",
         alpha=1-.95, dist="gev")
probplot(values=evobs, model=gumbelmod, varname="Wind speed (mph)",
         alpha=1-.95, dist="gumbel")
par(mfrow=c(1, 1))

#compare GEV and Gumbel model with likelihood-ratio test
gummod <- fevd(x=evobs, units="Wind speed (mph)", type="Gumbel", method="MLE")
lr.test(gevmod, gummod)



#for the wind speed data, it is also interesting to compare the
#probability plot of the GEV model with that of the Fréchet model
par(mfrow=c(1, 2))
probplot(values=evobs, model=gevmod, varname="Wind speed (mph)",
         alpha=1-.95, dist="gev")
probplot(values=evobs, model=frechetmod, varname="Wind speed (mph)",
         alpha=1-.95, dist="frechet")
par(mfrow=c(1, 1))

#note that in the Fréchet probability plot on the right, the observed data
#points show signs of concave behavior in the upper tail
#this concave behavior in the upper tail suggests that the fit of the
#Fréchet distribution could be improved by adding a threshold parameter
#to the Fréchet model
#the blue line of fitted GEV model on the left, on the other hand,
#"bends towards" these observations in the upper tail
#the reason for this bending behavior is that the fitted GEV model is
#actually similar to a Fréchet model with a threshold parameter included
#(which is also called a three-parameter Fréchet model)

#obtain the Fréchet threshold parameter (=gamma) from the fitted GEV model
#(see Castillo et al., p. 200; see also Coles, p. 56)
gamma <- muGev-sigmaGev/xiGev

#fit the Fréchet model including the threshold parameter
frechetmodG <- Lifedata.MLE(Surv(evobs-gamma) ~ 1, dist="frechet")

#fitting a Fréchet model including the correct threshold parameter
#should linearize the Fréchet probability plot, which can be seen as follows:

#Fréchet probability plot of model with (left) and without (right) threshold
par(mfrow=c(1, 2))
probplot(values=evobs-gamma, model=frechetmodG,
         varname="Wind speed (mph)", alpha=1-.95, dist="frechet")
probplot(values=evobs, model=frechetmod, varname="Wind speed (mph)",
         alpha=1-.95, dist="frechet")
par(mfrow=c(1, 1))



##cumulative probability plot for GEV model (including return period)
#GEV distribution (largest values)
cprobplot(values=evobs, model=gevmod, varname="Wind speed (mph)",
          alpha=1-.95, dist="gev")
#return period versus return level
plot(gevmod, "rl", main="GEV - largest values")



##likelihood based intervals for return levels (GEV model)
#in case of the GEV model, the normal-approximation confidence intervals for the
#return level can be unrealistically wide, especially in the tails
#(see, for instance, the probability and cumulative probability plot of the GEV model)
#likelihood based intervals, on the other hand, yield more realistic intervals

#specify cumulative probability
cumProb <- .99
returnPeriod <- 1/(1-cumProb) #return period = 1/(1-cumulative probability)

#normal-approximation confidence interval
(ciRetperN <- ci(gevmod, type="return.level", return.period=returnPeriod,
                 method="normal", alpha=.05))

#likelihood based confidence interval
ci(gevmod, type="return.level", return.period=returnPeriod,
   main="GEV - largest values",
   method="proflik", alpha=.05, nint=100,
   xrange=c(ciRetperN[1], ciRetperN[3]), verbose=TRUE,
   ylim=c(-180,-170)) #change range of the y-axis