# Tag Archives: Extreme value theory

While linear correlation represents the appropriate measure of dependence for normally distributed asset returns, capturing co-movements between non-normal financial asset returns requires the theory of copulas, a flexible way for modelling general multivariate dependencies. Let’s download 5 years worth of price data for JPM and MS, set up the data environment, calculate log returns and explore the marginal distribution of returns.

```library(fExtremes)
library(timeSeries)
library(LambertW)
library(quantmod)
library(MSBVAR)
library(spd)
library(PerformanceAnalytics)

ticker<-c('JPM','MS')
symbols<-unlist(strsplit(ticker,split=","))
getSymbols(symbols,auto.assign=TRUE,from='1996-01-01',to='2011-12-31')
ret.mat<-as.timeSeries(cbind(tail(diff(log(get(symbols[1])[,4])),-1),tail(diff(log(get(symbols[2])[,4])),-1)))
ret<-paste(ticker,"Returns")
ret.unl<-unlist(strsplit(ret,","))
colnames(ret.mat)<-ret.unl

# Ensure return series contains only non zero values
nonz.ind<-(seriesData(ret.mat[,1]) !=0 & seriesData(ret.mat[,2])!=0)
ret.mat.nz<-ret.mat[nonz.ind,]

# ##############################################################################
# Exploring the marginal distribution of returns
# ##############################################################################

# [1]Plotting the time series

windows()
layout(c(1,1,2,2))
par(mai=c(0.3,0.75,0,0.1))
plot(ret.mat.nz[,1],cex=0.7,cex.axis=0.9,cex.lab=1,cex.main=0.7,col='steelblue',font.axis=2,xaxt='n',xlab='',bty='n')
abline(h=0)
par(mai=c(0.5,0.75,0.3,0.1))
plot(ret.mat.nz[,2],cex=0.7,cex.axis=0.9,cex.lab=1,cex.main=0.7,col='darkblue',font.axis=2,xaxt='n',xlab='',bty='n')
abline(h=0)

# [2] Normality tests
windows()
x_min<-min(ret.mat.nz)
x_max<-max(ret.mat.nz)
layout(c(1,1,2,2,3,3))
par(mai=c(0.1,0.75,0.3,0.1))
par(mai=c(0.1,0.75,0,0.1))
par(mai=c(0.3,0,0,0.1))

windows()
chart.QQPlot(ret.mat.nz[,1],distribution='norm',line="quartiles",element.color="black",cex.lab=0.75,cex.axis=0.75,cex.main=0.75,lwd=1,pch=1,cex=0.5)

windows()
chart.QQPlot(ret.mat.nz[,2],distribution='norm',line="quartiles",element.color="black",cex.lab=0.75,cex.axis=0.75,cex.main=0.75,lwd=1,pch=1,cex=0.5)```

Created by Pretty R at inside-R.org

Visual inspection of the above figures should convince us that neither JPM nor MS returns are normally distributed, invalidating the use of linear correlation as a reliable measure of dependency between these financial assets. In addition to the univariate marginal distribution explored above, let’s look at the joint bivariate distribution of MS and JPM log-returns and compare it to simulated bi-variate normal data.

```# [3] Compare Empirical Scatter with simulated bivariate normal data

simulated.ret<-rmultnorm(n=nrow(ret.mat.nz),vmat=var(ret.mat.nz),mu=colMeans(ret.mat.nz))
colnames(simulated.ret)<-paste("Simulated Bivariate Normal Return for",colnames(ret.mat.nz))
tot.ret<-cbind(ret.mat.nz,simulated.ret)
emp.corr<-cor(ret.mat.nz[,1],ret.mat.nz[,2])

windows(width=7*2)
layout(matrix(c(1,2,3,4,5,6),nrow=2,ncol=3))
par(mai=c(0.7,0.7,0.3,0.1))
plot(ylim=c(x_min,x_max),xlim=c(x_min,x_max),xlab=ret.unl[1],ylab=ret.unl[2],x=ret.mat.nz[,1],y=ret.mat.nz[,2],pch=1,col="darkblue",cex.main=0.85,cex.lab=0.85,cex=0.85,cex.axis=0.85,main=paste("Empirical data with rho of\n ",emp.corr))
grid()
abline(h=0,v=0)

par(mai=c(0.7,0.7,0.3,0.1))
plot(ylim=c(x_min,x_max),xlim=c(x_min,x_max),xlab=paste("Simulated ",ret.unl[1]),ylab=paste("Simulated ",ret.unl[2]),x=simulated.ret[,1],y=simulated.ret[,2],pch=1,col="darkblue",cex.main=0.85,cex.lab=0.75,cex=0.85,cex.axis=0.85,main="Simulated data from \n Bivariate Normal Distribution")
grid()
abline(h=0,v=0)

par(mai=c(0.7,0.7,0.3,0.1))
chart.Histogram(main=paste("Histogram of \n ",colnames(ret.mat.nz)[1]), element.color="black",note.color="darkred",xlim=c(-x_max,x_max),note.lines=x_max,cex.axis=0.85,cex=0.85,cex.lab=0.85,cex.main=0.85,ret.mat.nz[,1], methods = c("add.density", "add.centered"),lwd=1)

par(mai=c(0.7,0.7,0.3,0.1))
chart.Histogram(main=paste("Histogram of \n",colnames(simulated.ret)[1]),element.color="black",note.color="darkred",xlim=c(-x_max,x_max),note.lines=x_max,cex.axis=0.85,cex=0.85,cex.lab=0.85,cex.main=0.85,simulated.ret[,1], methods = c("add.density", "add.centered"),lwd=1)

par(mai=c(0.7,0.7,0.3,0.1))
chart.Histogram(main=paste("Histogram of \n",colnames(ret.mat.nz)[2]),element.color="black",note.color="darkgreen",xlim=c(-x_max,x_max),note.lines=x_max,cex.axis=0.85,cex=0.85,cex.lab=0.85,cex.main=0.85,ret.mat.nz[,2], methods = c("add.density", "add.centered"),lwd=1)

par(mai=c(0.7,0.7,0.3,0.1))
chart.Histogram(main=paste("Histogram of \n",colnames(simulated.ret)[2]),element.color="black",note.color="darkgreen",note.cex=1,xlim=c(-x_max,x_max),note.lines=x_max,cex=0.85,cex.axis=0.85,cex.lab=0.85,cex.main=0.85,simulated.ret[,2], methods = c("add.density", "add.centered"),lwd=1)```

Created by Pretty R at inside-R.org

Visual inspection of the graphs suggest that the simulated bi-variate normal data matches the joint behaviour of returns in the middle of the distribution quite well but does not capture the observed dependence in the tails. When the bi-variate normal distribution does not adequately describe joint returns,pearson correlation may not be a proper measure of dependence. Tails of asset returns can be modelled using the generalised pareto distribution(parametric) while the centre of distribution can be captured using an Empirical distribution function (non parametric). This results in a semi-parametric approach to modelling the bi-variate data. Incidentally, if we had imposed a normality assumption on the JPM and MS data, data points would be clustered around 0 in the expected way (last 3 graphs).

Let’s fit a semi parametric model to the data and compare the actual returns with simulated normal, and semi parametric  returns. The semi parametric model is a better fit than the normal model.

```# [4] Modelling the Tails of the distribution

# [4.a] The Mean Excess Plot should tell us where to cut the tails from the entire distribution
windows()
layout(matrix(c(1,2),ncol=1,nrow=2))
par(mai=c(1,0.75,0.3,0.3))
mePlot(ret.mat.nz[,1],cex=0.75,cex.axis=0.75,cex.main=0.85,cex.lab=0.75,sub=colnames(ret.mat.nz)[1])
mePlot(ret.mat.nz[,2],cex=0.75,cex.axis=0.75,cex.main=0.85,cex.lab=0.75,sub=colnames(ret.mat.nz)[2])

# [4.b] Perform a sermiparametric fit; Normal density for the middle of the distribution, GPD for the tails

semiparam.1.fit<-spdfit(ret.mat.nz[,1],upper=0.9,lower=0.1,tailfit='GPD',kernelfit='normal')
semiparam.2.fit<-spdfit(ret.mat.nz[,2],upper=0.9,lower=0.1,tailfit='GPD',kernelfit='normal')

# Plots for the first series
windows()
layout(matrix(c(1,2),nrow=2,ncol=1))
par(mai=c(0.75,0.75,0.7,0.3),cex.main=0.85,cex.lab=0.85,cex=0.75,cex.axis=0.85,bty='n')
plot(semiparam.1.fit,which=1)
mtext(paste(ticker[1],"Data"),side=3,col="darkblue",cex=0.75)
plot(semiparam.1.fit,which=2)
mtext(paste(ticker[1],"Data"),side=3,col="darkblue",cex=0.75)

windows()
layout(matrix(c(1,2),nrow=2,ncol=1))
par(mai=c(0.5,0.75,0.7,0.3),cex.main=0.85,cex.lab=0.85,cex=0.75,cex.axis=0.85,bty='n')
plot(semiparam.1.fit,which=3)
mtext(paste("Fitting the Upper Tails of the ",ticker[1],"Return Series"),side=1,col="darkblue",cex=1,outer=T,font=4)

windows()
layout(matrix(c(1,2),nrow=2,ncol=1))
par(mai=c(0.75,0.75,0.7,0.3),cex.main=1,cex.lab=0.85,cex=0.75,cex.axis=0.85,bty='n')
plot(semiparam.1.fit,which=4)
mtext(paste("Fitting the Lower Tails of the ",ticker[1],"Return Series"),side=1,col="darkblue",cex=1,outer=T,font=4)

# Plots for the second Series

windows()
layout(matrix(c(1,2),nrow=2,ncol=1))
par(mai=c(0.75,0.75,0.7,0.3),cex.main=0.85,cex.lab=0.85,cex=0.75,cex.axis=0.85,bty='n')
plot(semiparam.2.fit,which=1)
mtext(paste(ticker[2],"Data"),side=3,col="darkred",cex=0.75)
plot(semiparam.2.fit,which=2)
mtext(paste(ticker[2],"Data"),side=3,col="darkred",cex=0.75)

windows()
layout(matrix(c(1,2),nrow=2,ncol=1))
par(mai=c(0.5,0.75,0.7,0.3),cex.main=0.85,cex.lab=0.85,cex=0.75,cex.axis=0.85,bty='n')
plot(semiparam.2.fit,which=3)
mtext(paste("Fitting the Lower Tails of the ",ticker[2],"Return Series"),side=1,col="darkred",cex=1,outer=T,font=4)

windows()
layout(matrix(c(1,2),nrow=2,ncol=1))
par(mai=c(0.5,0.75,0.7,0.3),cex.main=0.85,cex.lab=0.85,cex=0.75,cex.axis=0.85,bty='n')
plot(semiparam.2.fit,which=4)
mtext(paste("Fitting the Lower Tails of the ",ticker[2],"Return Series"),side=1,col="darkred",cex=1,outer=T,font=4)

#Comparing the scatterplots for
# actual returns,simulated bivariate normal returns,and simulated Semiparametric returns

semiparam.1.sim<-rspd(nrow(ret.mat.nz),semiparam.1.fit)
semiparam.2.sim<-rspd(nrow(ret.mat.nz),semiparam.2.fit)
semiparametric.sim.ret<-cbind(semiparam.1.sim,semiparam.2.sim)
colnames(semiparametric.sim.ret)<-paste("Semiparametric simulated returns for",ticker)

windows(width=7*2,height=5)
layout(matrix(c(1,2,3),nrow=1,ncol=3))
plot(ylim=c(x_min,x_max),xlim=c(x_min,x_max),xlab=colnames(ret.mat.nz)[1],ylab=colnames(ret.mat.nz)[2],cex.main=1,cex=1,cex.lab=0.95,cex.axis=0.85,x=ret.mat.nz[,1],y=ret.mat.nz[,2],main=paste("Actual Return Data For \n",ticker[1],"and",ticker[2]),col='blue',pch=10)
abline(h=0,v=0)
grid()

plot(ylim=c(x_min,x_max),xlim=c(x_min,x_max),xlab=colnames(simulated.ret)[1],ylab=colnames(simulated.ret)[2],cex.main=1,cex=1,cex.lab=0.95,cex.axis=0.85,x=simulated.ret[,1],y=simulated.ret[,2],main=paste("Simulated Return Data From \n","BiVariate Normal Distribution"),col='darkblue',pch=10)
abline(h=0,v=0)
grid()

plot(ylim=c(x_min,x_max),xlim=c(x_min,x_max),xlab=colnames(semiparametric.sim.ret)[1],ylab=colnames(semiparametric.sim.ret)[2],cex.main=1,cex=1,cex.lab=0.95,cex.axis=0.85,x=semiparametric.sim.ret[,1],y=semiparametric.sim.ret[,2],main=paste("Simulated Return Data From \n","Semiparametric GPD"),col='darkblue',pch=10)
abline(h=0,v=0)
grid()```

Created by Pretty R at inside-R.org

In the previous post we examined how the maxima model can be applied to the SP500 dataset using quarterly and monthly blocks. A more modern and comprehensive approach involves fitting an appropriate threshold model to the data rather than dividing the entire dataset into quarters or months.

Before we can fit the model,we must determine the suitable threshold to use. This can be assessed by visual inspection of mean excess plots and/or gpd shape plots from the library. The mean excess plot becomes linear and upward sloping at around a threshold of u=1, which we can then use as an input to fitting the gpd model. Diagnostic plots suggest that the GPD model is quite suitable to modelling the returns data.

```# Mean excessplot (Lower Tail)
# Me plot tells us the threshold u.Threshold occurs when meplot starts to be linear
# linear and + slope -> fat tails
# linear and - sloke -> thin tails

windows()
layout(c(1,1,2,2))
par(mai=c(0.5,0.75,0.2,0.3))
par(cex.main=0.85,cex.lab=0.85,cex=0.75,cex.axis=0.85)
mePlot(-ret.mat)
par(mai=c(0.75,0.75,0.3,0.3))
gpdShapePlot(-ret.mat,plottype='reverse')   # Fat tails if : shape parameter xi increases as threshholds increase/

GPD.fit.low<-gpdFit(-ret.mat,u=1)

# diagnostics
windows()
layout(matrix(c(1,2,3,4),nrow=2,ncol=2))
par(mai=c(0.75,0.75,0.75,0.5),cex.main=0.85,cex.lab=0.85,cex=0.75,cex.axis=0.85,bty='n')
for(i in 1:4){
plot(GPD.fit.low,which=i)
}```

Created by Pretty R at inside-R.org

hh

ggg

Now that we have fitted a threshold model to the SP500 returns data, we can use this GPD distribution to calculate extreme risk measures such as VaR and Expected shortfall. The following section compares these risk measures under a normality assumption versus the fitted GPD distribution.

```# Risk measures VaR , Eshortfall
quantiles<-round(seq(0.95,0.99,length=100),4)

Norm.risk<-round(Riskmeasures.Normal(-ret.mat,quantiles),4)
Norm.risk.quantiles<-Norm.risk[,1]
Norm.risk.probs<-(1-Norm.risk[,1])*100
Norm.risk.VaR<-Norm.risk[,2]
Norm.risk.Es<-Norm.risk[,3]

GPD.risk<-round(gpdRiskMeasures(GPD.fit.low,p=quantiles),4)
GPD.risk.quantiles<-GPD.risk[,1]
GPD.risk.probs<-(1-GPD.risk[,1])*100
GPD.risk.VaR<-GPD.risk[,2]
GPD.risk.Es<-GPD.risk[,3]

windows()
par(mai=c(0.75,0.75,0.75,0.5),cex.main=0.95,cex.lab=0.85,cex=0.75,cex.axis=0.85)
plot(main="Value-at-risk and Expected-Shortfall \n under normality and Generalised Pareto Distribution for the tails",ylab="Risk",xlab="Probabilites",type='l',x=GPD.risk.probs,y=GPD.risk.VaR,ylim=c(min(GPD.risk),max(GPD.risk)),lwd=2,col="steelblue")
lines(x=GPD.risk.probs,y=GPD.risk.Es,type='l',col="darkblue",lwd=2)
lines(x=Norm.risk.probs,y=Norm.risk.VaR,col="green",lwd=2)
lines(x=Norm.risk.probs,y=Norm.risk.Es,col="darkgreen",lwd=2)
text(cex=1,max(GPD.risk.probs)-0.5,y=GPD.risk[max(GPD.risk.probs),2]-0.05,"GPD \n Value At Risk",font=2,col="steelblue")
text(cex=1,x=max(GPD.risk.probs)-0.5,y=GPD.risk[max(GPD.risk.probs),3]+0.16,"GPD \n Expected shortfall",font=2,col="darkblue")
text(cex=1,max(GPD.risk.probs)-0.5,y=GPD.risk[max(GPD.risk.probs),2]+0.16,"Norm \n Value At Risk",font=2,col="green")
text(cex=1,x=max(GPD.risk.probs)-0.5,y=GPD.risk[max(GPD.risk.probs),3]-0.18,"Norm \n Expected shortfall",font=2,col="darkgreen")

# Example interpretation
mid.prob<-mean(GPD.risk.probs)
mid.quant<-1-(mid.prob/100)
mid.VaR<-last(GPD.risk[GPD.risk[,1]<=mid.quant,2])
mid.Es<-last(GPD.risk[GPD.risk[,1]<=mid.quant,3])

abline(v=mid.prob,lwd=1,lty="dashed")
abline(h=mid.VaR,lwd=1,lty="dashed")
abline(h=mid.Es,lwd=1,lty="dashed")

points(mid.prob,mid.VaR,col="darkred",cex=2,pch=20)
points(mid.prob,mid.Es,col="darkred",cex=2,pch=20)

text(x=1.8,y=1.25,labels=paste("[Value at Risk] \n \n With a probability of",mid.prob,"percent"),font=2)
text(x=1.8,y=1.10,labels=paste("The daily return can be as \n low as",-mid.VaR,"percent"),font=2)

text(x=4.05,y=2.5,labels=paste("[Expected Shortfall] \n \n With a return less then",-mid.VaR,"percent"),font=2)
text(x=4,y=2.35,labels=paste("the average daily return is \n ",-mid.Es,"percent"),font=2)
mtext(side=3,"VaR and Es measures under Normality are underestimated relative to the GPD case",col='darkblue',font=2,cex=0.75)

#Results summary
tab.fill1<-c("",symbols,paste(beg,'-',ed),p.n.obs,ret.type)
tab.ind<-c("","Symbol","Date Range","Observations","Return Type")
dat.tab<-cbind(tab.ind,tab.fill1)
rownames(dat.tab)<-c("","","Dataset\n used","","")

temp<-GEV.Quarters
temp.n<-temp@fit\$n
temp.xi<-round(temp@fit\$par.ests[1],2)
temp.mu<-round(temp@fit\$par.ests[2],2)
temp.beta<-round(temp@fit\$par.ests[3],2)
temp.alpha<-round(1/temp.beta,2)
temp.xi.se<-round(temp@fit\$par.ses[1],2)
temp.mu.se<-round(temp@fit\$par.ses[2],2)
temp.beta.se<-round(temp@fit\$par.ses[3],2)
temp.alpha.se<-c('***')
temp.prob<-paste(round(Quarters.prob[1],4)*100,"%")
temp.xi.t<-round(temp.xi/temp@fit\$par.ses[1],2)
temp.mu.t<-round(temp.mu/temp@fit\$par.ses[2],2)
temp.beta.t<-round(temp.beta/temp@fit\$par.ses[3],2)
temp.alpha.t<-c('***')
# temp.interp<-ifelse(temp.xi<0,"Weibull Distribution which is thin tailed",ifelse(temp.xi==0,"Gumbel Distribution","Frechet Distribution which is fat-tailed"))

temp.m<-GEV.Monthly
temp.n.m<-temp.m@fit\$n
temp.xi.m<-round(temp.m@fit\$par.ests[1],2)
temp.mu.m<-round(temp.m@fit\$par.ests[2],2)
temp.beta.m<-round(temp.m@fit\$par.ests[3],2)
temp.alpha.m<-round(1/temp.beta.m,2)
temp.xi.se.m<-round(temp.m@fit\$par.ses[1],2)
temp.mu.se.m<-round(temp.m@fit\$par.ses[2],2)
temp.beta.se.m<-round(temp.m@fit\$par.ses[3],2)
temp.alpha.se.m<-c('***')
temp.prob.m<-paste(round(Months.prob[1],4)*100,"%")
temp.xi.t.m<-round(temp.xi/temp.m@fit\$par.ses[1],2)
temp.mu.t.m<-round(temp.mu/temp.m@fit\$par.ses[2],2)
temp.beta.t.m<-round(temp.beta/temp.m@fit\$par.ses[3],2)
temp.alpha.t.m<-c('***')
# temp.interp.m<-paste("The maximal returns in\nmonthly blocks follow a",ifelse(temp.xi.m<0,"Weibull Distribution which is thin tailed",ifelse(temp.xi.m==0,"Gumbel Distribution","Frechet Distribution which is fat-tailed")))

temp.g<-GPD.fit.low
temp.n.g<-length(temp.g@fit\$data)
temp.thr.g<-temp.g@fit\$threshold
temp.xi.g<-round(temp.g@fit\$par.ests[1],2)
temp.beta.g<-round(temp.g@fit\$par.ests[2],2)
temp.xi.se.g<-round(temp.g@fit\$par.ses[1],2)
temp.beta.se.g<-round(temp.g@fit\$par.ses[2],2)
temp.xi.t.g<-round(temp.xi.g/temp.g@fit\$par.ses[1],2)
temp.beta.t.g<-round(temp.beta.g/temp.g@fit\$par.ses[2],2)
temp.interp.g<-c('asasd')
temp.VaR<-GPD.risk.VaR[1]
temp.Es<-GPD.risk.Es[1]
# temp.interp.g<-paste("The threshold excess returns\nfollow a",ifelse(temp.xi.g<0,"Weibull Distribution which is thin tailed",ifelse(temp.xi.g==0,"Gumbel Distribution","Frechet Distribution which is fat-tailed")))

gev.fill1<-c("Observations","xi(shape)","mu(location)","beta(scale)","alpha(tailindex)","PROB","----------------","Observations","xi(shape)","mu(location)","beta(scale)","alpha(tailindex)","PROB","----------------","Observations","Threshold","xi(shape)","beta(scale)","VaR","Es")
gev.fill2<-c(temp.n,temp.xi,temp.mu,temp.beta,temp.alpha,temp.prob,"--------",temp.n.m,temp.xi.m,temp.mu.m,temp.beta.m,temp.alpha.m,temp.prob.m,"--------",temp.n.g,temp.thr.g,temp.xi.g,temp.beta.g,temp.VaR,temp.Es)
gev.fill3<-c(" ",temp.xi.se,temp.mu.se,temp.beta.se,temp.alpha.se,' ',"-------"," ",temp.xi.se.m,temp.mu.se.m,temp.beta.se.m,temp.alpha.se.m,' ',"-------"," "," ",temp.xi.se.g,temp.beta.se.g," "," ")
gev.fill4<-c(" ",temp.xi.t,temp.mu.t,temp.beta.t,temp.alpha.t,' ',"-------"," ",temp.xi.t.m,temp.mu.t.m,temp.beta.t.m,temp.alpha.t.m,' ',"-------"," "," ",temp.xi.t.g,temp.beta.t.g,' '," ")
gev.tab<-cbind(gev.fill1,gev.fill2,gev.fill3,gev.fill4)
colnames(gev.tab)<-c("Estimates","Values","Std.Errors","T-value")
rownames(gev.tab)<-c("Quarterly\nBlock Maxima"," "," "," "," ","Risk Measures"," ------------------------------","Monthly\nBlock Maxima"," "," "," "," ","Risk Measures"," ------------------------------","Value over\nThreshold"," "," "," "," ","Risk Measures")

windows()
layout(c(1,2,2,2))
textplot(col.rownames="darkblue",show.colnames=F,dat.tab,cex=1,valign='bottom',halign='center',col.colnames="darkblue")
textplot(col=ifelse((gev.tab=='PROB' | gev.tab=='VaR' | gev.tab=='Es'),'darkred','black'),col.colnames="darkblue",col.rownames=ifelse(rownames(gev.tab)=='Risk Measures','darkred',"darkblue"),gev.tab,cex=1,valign='top',halign='left')```

Created by Pretty R at inside-R.org

hghh

The following plot shows how VaR/ES measures vary across Distributions (Normal vs GPD) along probability levels. The results corroborate the intuition that extreme risk measures are underestimated in the case a normality assumption is imposed as opposed to the case where we use the more accurate GPD distribution to model the tails.

Finally I have summarised the findings in the following table.

Following the simple introduction to EVT in the previous post, let’s proceed to apply the maxima and threshold models to the SP 500 data set. First, let’s download the prices for ^GSPC from 1960 to 1987 and calculate arithmetic returns. Then we shall plot the price and returns data as well as the empirical density along with a simulated normal distribution.

```# Download the data and draw price/return charts
ticker<-c('^GSPC')
symbols<-unlist(strsplit(ticker,split=","))
beg<-'1960-01-05'
ed<-'1987-10-16'
ret.type='arithmetic'

getSymbols(symbols[1],from=as.Date(beg),to=as.Date(ed))
price.mat<-GSPC[,4]
p.n.obs<-nrow(price.mat)
ret.mat<-as.timeSeries(dailyReturn(price.mat,type=ret.type)*100)
# ret.mat<-as.timeSeries(tail(diff(log(GSPC[,4])),-1)*100)
colnames(ret.mat)<-c("S+P500 Returns")

# Plot Series
windows()
layout(c(1,2,3))
par(mai=c(0.3,0.5,0.2,0.5))
plot(price.mat,cex=0.7,cex.axis=0.9,cex.lab=1,cex.main=0.7,xlab='',main='')
grid()
par(mai=c(0.5,0.5,0.2,0.5))
plot(ret.mat,cex=0.7,cex.axis=0.9,cex.lab=1,cex.main=0.7,ylab='',col='darkblue',font.axis=2,xlab='',main='')
grid()
abline(h=0)
plot(xlab='',main='',density(ret.mat),cex=0.7,cex.axis=0.9,cex.lab=1,cex.main=0.7,ylab='',col="darkblue",lwd=2)
legend(horiz=F,bty='n',"topright",fill=c("darkblue","darkred"),legend=c(symbols[1],"Normal"))
grid()
lines(density(rnorm(300000,mean=0,sd=1)),col='darkred',lwd=1.5)

Created by Pretty R at inside-R.org

From a visual inspection of the last plot, we can see that the GSPC data is non normally distributed with excess kurtosis and a very mild skewness. We could, if we so wished, conduct statistical tests such as the Kolmogorov-Smirnov test (K-S) and Shapiro-Wilk (S-W) test as well as the Jacque Berra test to determine the extent and significance of non normality in the data set.

Having established the non-normality of the data set, let’s proceed with fitting the maxima model to the data and run some diagnostic tests to ascertain suitability. I have used quarterly and monthly blocks in the fitting process.

```# Block maxima
# Quarters

GEV.Quarters<-gevFit(-ret.mat,block='quarterly')
tstamp<-timeSequence(from=as.Date('1960-01-05'),to=as.Date('1987-10-16'),by="quarter")
Quarters.max<-aggregate(-ret.mat,tstamp,max)
Quarters.sort<-sort(seriesData(Quarters.max))

xi.q<-GEV.Quarters@fit\$par.ests[1]
mu.q<-GEV.Quarters@fit\$par.est[2]
beta.q<-GEV.Quarters@fit\$par.ests[3]

Quarters.fitplot<-gevSim(model=list(xi=xi.q,mu=mu.q,beta=beta.q),n=100000)

windows()
layout(matrix(c(1,2,3,4),nrow=2,ncol=2))
par(mai=c(0.75,0.75,0.7,0.3),cex.main=0.85,cex.lab=0.85,cex=0.75,cex.axis=0.85,bty='n')
plot(Quarters.max,type='s',main="Quarters Maxima (Losses)")
hist(Quarters.max,main="Histogramm of Maximal Losses (Quarters)")
recordsPlot(-ret.mat)
plot(density(Quarters.max),main='Density plot of quaterly maxima and \n Simulated GEV ',type='p',col="darkred",cex=0.2,xlab=paste("Xi=",round(xi.q,2)," Mu=",round(mu.q,2)," Beta=",round(beta.q,2)))
lines(density(Quarters.fitplot),type='l',col="darkblue",lwd=2)
legend(cex=0.85,bty='n',"topright",fill=c("darkred","darkblue"),legend=c("Actual density","Simulated GEV"))

# Diagnostics
windows()
layout(matrix(c(1,2,3,4),nrow=2,ncol=2))
par(mai=c(0.75,0.75,0.7,0.3),cex.main=0.85,cex.lab=0.85,cex=0.75,cex.axis=0.85,bty='n')
for(i in 1:4){
plot(GEV.Quarters,which=i)
}

# Probability of next quarters Maximal loss exceeding all past maxima
Quarters.prob<-1-pgev(max(GEV.Quarters@fit\$data),xi=xi.q,mu=mu.q,beta=beta.q)
cat('The probability of next quarters maximal loss exceeding all past maxima is : \n',round(Quarters.prob*100,2)," Percent")

# Monthly
GEV.Monthly<-gevFit(-ret.mat,block='monthly')
tstamp<-timeSequence(from=as.Date('1960-01-05'),to=as.Date('1987-10-16'),by="month")
Months.max<-aggregate(-ret.mat,tstamp,max)
Months.sort<-sort(seriesData(Months.max))

xi.m<-GEV.Monthly@fit\$par.ests[1]
mu.m<-GEV.Monthly@fit\$par.est[2]
beta.m<-GEV.Monthly@fit\$par.ests[3]

Months.fitplot<-gevSim(model=list(xi=xi.m,mu=mu.m,beta=beta.m),n=100000)

windows()
layout(matrix(c(1,2,3,4),nrow=2,ncol=2))
par(mai=c(0.75,0.75,0.7,0.3),cex.main=0.85,cex.lab=0.85,cex=0.75,cex.axis=0.85,bty='n')
plot(Months.max,type='s',main="Monthly Maxima (Losses)")
hist(Months.max,main="Histogramm of Maximal Losses (Months)")
recordsPlot(-ret.mat)
plot(density(Months.max),main='Density plot of quaterly maxima and \n Simulated GEV ',type='p',col="darkred",cex=0.2,xlab=paste("Xi=",round(xi.m,2)," Mu=",round(mu.m,2)," Beta=",round(beta.m,2)))
lines(density(Months.fitplot),type='l',col="darkblue",lwd=2)
legend(cex=0.85,bty='n',"topright",fill=c("darkred","darkblue"),legend=c("Actual density","Simulated GEV"))

# Diagnostics
windows()
layout(matrix(c(1,2,3,4),nrow=2,ncol=2))
par(mai=c(0.75,0.75,0.7,0.3),cex.main=0.85,cex.lab=0.85,cex=0.75,cex.axis=0.85,bty='n')
for(i in 1:4){
plot(GEV.Monthly,which=i)
}

# Probability of next months Maximal loss exceeding all past maxima
Months.prob<-1-pgev(max(GEV.Monthly@fit\$data),xi=xi.m,mu=mu.m,beta=beta.m)
cat('The probability of next months maximal loss exceeding all past maxima is : \n',round(Months.prob*100,2)," Percent")```

Created by Pretty R at inside-R.org

From the diagnostic plots we can see that using monthly rather than the quarterly blocks results in a better fit for the maxima model. From the console output ,the probability of next quarters maximal loss exceeding all past maxima is 0.33 Percent.The probability of next months maximal loss exceeding all past maxima is 0.05 Percent, attesting to how rare extreme events actually are.

Extreme value theory  is a branch of statistics dealing with the extreme deviations from the median of probability distributions. It seeks to assess, from a given ordered sample of a given random variable, the probability of events that are more extreme than any observed prior. Extreme value analysis is widely used in many disciplines, ranging from structural engineering, finance, earth sciences, traffic prediction, geological engineering, etc. EVT is a tool which attempts to provide us with the best possible estimate of the tail area of the distribution. However, even in the absence of useful historical data, EVT provides guidance on the kind of distribution we should select so that extreme risks can be handled prudently.

Click here for the complete Wikipedia entry

There are two broad methods of modelling extreme values. The oldest group of models are the block maxima models;  these are models for the largest observations collected from large samples of identically distributed observations. A more modern group of models are the peaks-over-threshold (POT) models; these are models for all large observations which exceed a high threshold. The POT models are generally considered to be the most useful for practical purposes.

Block Maxima Model

Distribution: Generalised Extreme Value Distribution (GEV) that encompasses the Gumbel,Frechet and Weibull distributions ( corresponding to ξ = 0,  ξ > 0  and  ξ  < 0 respectively).

Implementation: Data are segmented into blocks of equal length and a series of block maxima/minima are generated to which the GEV is fitted.

Interpretation: Model fit is assessed using graphical analysis

1. Probability plot: the fitted value of the c.d.f. is plotted against the empirical value of the c.d.f. for each data point.
2. Quantile plot: the empirical quantile is plotted against the fitted quantile for each data point.
3. Return level plot: the return level (with error bars) is plotted against the return period.
4. Density plot: the fitted p.d.f. is supereimposed on a histogram of the data.

fff

Threshold Exceedence Model

Distribution: Generalised Pareto Distribution (GPD)

Implementation:

1. Choose some threshold u0 so that GPD is a good model for (X − u0|X > u0). There are 2 methods available for setting the threshold level. The exploratory method is carried out prior to model estimation and is based on mean residual life plots (graphs the means of threshold exceedences across various threshold levels). The threshold u0 should be the lowest value of u above which threshold exceedence means are roughly linear. The other method assesses the stability of parameter estimates for models fit across a range of thresholds
2. Fit the GPD to the observed excesses x − u0.
3. Use the fitted GPD, together with some model for the rate of exceedances X > u0, to provide estimates for return levels.
fff

The 2 libraries that deal with Extreme Value Theory are fExtremes and extRemes. I rely on the former package for this code. Other libraries used are

```setwd("C:/University of Warwick/R experiments/Extreme Value Theory")
library(fExtremes)
library(timeSeries)
library(quantmod)```

ggg

Let’s simulate from the normal ,GEV and GPD distributions and visualise the differences.

```windows()
par(mfrow=c(2,3))
n<-NULL
n[[1]]<-rnorm(n=500000,mean=0,sd=1)
plot(cex.main=0.85,xlab='',density(n[[1]]),main="Normal Distribution",cex=0.75,col="blue",lwd=2)
grid()
for(i in 2:4){
n[[i]]<-rnorm(n=500000,mean=i-0.5,sd=i/2+0.05)
lines(density(n[[i]]),col=i,lwd=1.5)
}

r<-NULL
r[[1]]=rgev(n=100000,xi=0,mu=0.5,beta=2)
plot(cex.main=0.85,xlab='',density(r[[1]]),col='blue',main="Gumbel Distribution",lwd=2,cex=0.75)
grid()
for(i in 2:4){
r[[i]]=rgev(n=100000,xi=0,mu=i-0.5,beta=i+1)
lines(density(r[[i]]),col=i,lwd=1.5)
}

f<-NULL
f[[1]]=rgev(n=100000,xi=0.05,mu=0,beta=0.2)
plot(cex.main=0.85,xlab='',density(f[[1]]),col='blue',main="Frechet Distribution",lwd=2,cex=0.75)
grid()
for(i in 2:4){
f[[i]]=rgev(n=100000,xi=0.05,mu=i-0.5,beta=i/10+0.15)
lines(density(f[[i]]),col=i,lwd=1.5)
}

w<-NULL
w[[1]]=rgev(n=100000,xi=-0.3,mu=0,beta=2)
plot(xlab='',density(w[[1]]),col='blue',main="Weibull Distribution",lwd=2,cex=0.75,cex.main=0.85)
grid()
for(i in 2:4){
w[[i]]=rgev(n=100000,xi=-0.3,mu=i-0.5,beta=i+1)
lines(density(w[[i]]),col=i,lwd=1.5)
}

g<-NULL
g[[1]]=rgpd(n=100000,xi=0,mu=0.5,beta=1)
plot(xlab='',density(g[[1]]),col='blue',main="GPD Distribution",lwd=2,cex=0.75,cex.main=0.85)
grid()
for(i in 2:4){
g[[i]]=rgpd(n=100000,xi=0,mu=i/2+0.8,beta=i)
lines(density(g[[i]]),col=i,lwd=1.5)
}

plot(xlab='',density(n[[1]]),col='blue',main="All Distributions",lwd=2,cex=0.75,cex.main=0.85,xlim=c(-5,20))
lines(density(g[[1]]),col=1,lwd=1.5)
lines(density(w[[1]]),col=2,lwd=1.5)
lines(density(f[[1]]),col=3,lwd=1.5)
lines(density(r[[1]]),col=4,lwd=1.5)```

Created by Pretty R at inside-R.org