Asset Pricing [10b : Comparing Models]

Continuing with the list of tasks defined in the previous post, let’s load the edhec data set,fit each of the nine models to each of the funds and store results in a nested list object :

[4] Load data and store results

For each model and fund combination, I store the following information :

  1. The Log-Likelihood value
  2. The Degree of freedom
  3. The AIC criterion
  4. The SBC criterion

The code follows :

########################################################################################
# Load dataset
########################################################################################

orig.csv <- read.csv('edhecfinal.csv',header=T,stringsAsFactors=F)
edhec.xts <- xts(order.by=seq(as.Date('1997-01-31'),as.Date('2014-07-31'),by='1 month'),orig.csv[,-1])
edhec.xts <- log(edhec.xts+1)

########################################################################################
# Nested Lists and Regression results
########################################################################################

n.funds <- ncol(edhec.xts)
fund.names <- names(edhec.xts)
n.obs <- nrow(edhec.xts)

n.models <- 9

edhec.ret <- list()
	for(i in 1:n.funds){
		edhec.ret[[i]] <- list()
		names(edhec.ret)[i] <- fund.names[i]
		edhec.ret[[i]]$Ret.xts <- edhec.xts[,i]
    logret <- Return.clean(edhec.ret[[i]]$Ret.xts,method='boudt')

			#Independent Log-normal model
			edhec.ret[[i]]$ILN <- lm(logret~1)
		  edhec.ret[[i]]$ILN$Stat <- StatComp(edhec.ret[[i]]$ILN,n.obs)

		  edhec.ret[[i]]$ILN$LogLik <- edhec.ret[[i]]$ILN$Stat['model.loglik']
		  edhec.ret[[i]]$ILN$df <- edhec.ret[[i]]$ILN$Stat['model.df']
		  edhec.ret[[i]]$ILN$AIC <- edhec.ret[[i]]$ILN$Stat['model.aic']
		  edhec.ret[[i]]$ILN$SBC <- edhec.ret[[i]]$ILN$Stat['model.sbc']

		  #Autoregressive model
		  edhec.ret[[i]]$AR1 <- arima(x=logret,order=c(1,0,0))
		  edhec.ret[[i]]$AR1$Stat <- StatComp(edhec.ret[[i]]$AR1,n.obs)

		  edhec.ret[[i]]$AR1$LogLik <- edhec.ret[[i]]$AR1$Stat['model.loglik']
		  edhec.ret[[i]]$AR1$df <- edhec.ret[[i]]$AR1$Stat['model.df']
		  edhec.ret[[i]]$AR1$AIC <- edhec.ret[[i]]$AR1$Stat['model.aic']
		  edhec.ret[[i]]$AR1$SBC <- edhec.ret[[i]]$AR1$Stat['model.sbc']

		  #ARCH model
			arch.spec <- ugarchspec(variance.model=list(garchOrder=c(1,0)),mean.model = list(armaOrder=c(0,0)))
		  arch.fit<- ugarchfit(spec=arch.spec, data=logret,solver.control=list(trace = 1))

		  edhec.ret[[i]]$ARCH$LogLik <- likelihood(arch.fit)
		  edhec.ret[[i]]$ARCH$df <- 3
		  edhec.ret[[i]]$ARCH$AIC <- edhec.ret[[i]]$ARCH$LogLik-edhec.ret[[i]]$ARCH$df
		  edhec.ret[[i]]$ARCH$SBC <- edhec.ret[[i]]$ARCH$LogLik-0.5*edhec.ret[[i]]$ARCH$df*log(n.obs)

	    #AR-ARCH model
			arch.spec <- ugarchspec(variance.model=list(garchOrder=c(1,0)),mean.model = list(armaOrder=c(1,0)))
		  arch.fit<- ugarchfit(spec=arch.spec, data=logret,solver.control=list(trace = 1))

		  edhec.ret[[i]]$AR.ARCH$LogLik <- likelihood(arch.fit)
		  edhec.ret[[i]]$AR.ARCH$df <- 4
		  edhec.ret[[i]]$AR.ARCH$AIC <- edhec.ret[[i]]$AR.ARCH$LogLik-edhec.ret[[i]]$AR.ARCH$df
		  edhec.ret[[i]]$AR.ARCH$SBC <- edhec.ret[[i]]$AR.ARCH$LogLik-0.5*edhec.ret[[i]]$AR.ARCH$df*log(n.obs)

	    #GARCH model
			garch.spec <- ugarchspec(variance.model=list(garchOrder=c(1,1)),mean.model = list(armaOrder=c(0,0)))
		  garch.fit<- ugarchfit(spec=garch.spec, data=logret,solver.control=list(trace = 1))

		  edhec.ret[[i]]$GARCH$LogLik <- likelihood(garch.fit)
		  edhec.ret[[i]]$GARCH$df <- 4
		  edhec.ret[[i]]$GARCH$AIC <- edhec.ret[[i]]$GARCH$LogLik-edhec.ret[[i]]$GARCH$df
		  edhec.ret[[i]]$GARCH$SBC <- edhec.ret[[i]]$GARCH$LogLik-0.5*edhec.ret[[i]]$GARCH$df*log(n.obs)

		  #AR-GARCH model
			garch.spec <- ugarchspec(variance.model=list(garchOrder=c(1,1)),mean.model = list(armaOrder=c(1,0)))
		  garch.fit<- ugarchfit(spec=garch.spec, data=logret,solver.control=list(trace = 1))

		  edhec.ret[[i]]$AR.GARCH$LogLik <- likelihood(garch.fit)
		  edhec.ret[[i]]$AR.GARCH$df <- 5
		  edhec.ret[[i]]$AR.GARCH$AIC <- edhec.ret[[i]]$AR.GARCH$LogLik-edhec.ret[[i]]$AR.GARCH$df
		  edhec.ret[[i]]$AR.GARCH$SBC <- edhec.ret[[i]]$AR.GARCH$LogLik-0.5*edhec.ret[[i]]$AR.GARCH$df*log(n.obs)

		  #ILN 2 regimes
		  model.spec <- depmix(eval(parse(text=fund.names[i]))~1,nstates=2,data=logret)
	    model.fit <- fit(model.spec)
		  edhec.ret[[i]]$ILN.Reg2$Stat <- StatComp(model.fit,n.obs)

      edhec.ret[[i]]$ILN.Reg2$LogLik <- edhec.ret[[i]]$ILN.Reg2$Stat['model.loglik']
		  edhec.ret[[i]]$ILN.Reg2$df <- edhec.ret[[i]]$ILN.Reg2$Stat['model.df']
		  edhec.ret[[i]]$ILN.Reg2$AIC <- edhec.ret[[i]]$ILN.Reg2$Stat['model.aic']
		  edhec.ret[[i]]$ILN.Reg2$SBC <- edhec.ret[[i]]$ILN.Reg2$Stat['model.sbc']

	    #ILN 3 regimes
		  model.spec <- depmix(eval(parse(text=fund.names[i]))~1,nstates=3,data=logret)
	    model.fit <- fit(model.spec)
	  	edhec.ret[[i]]$ILN.Reg3$Stat <- StatComp(model.fit,n.obs)

      edhec.ret[[i]]$ILN.Reg3$LogLik <- edhec.ret[[i]]$ILN.Reg3$Stat['model.loglik']
		  edhec.ret[[i]]$ILN.Reg3$df <- edhec.ret[[i]]$ILN.Reg3$Stat['model.df']
		  edhec.ret[[i]]$ILN.Reg3$AIC <- edhec.ret[[i]]$ILN.Reg3$Stat['model.aic']
		  edhec.ret[[i]]$ILN.Reg3$SBC <- edhec.ret[[i]]$ILN.Reg3$Stat['model.sbc']

      #AR-Regime switch model
		  temp.df <- data.frame(logret[2:n.obs],logret[2:n.obs-1])
		  names(temp.df) <- c('left','right')

		  model.spec <- depmix(left~right,nstates=2,data=temp.df)
	    model.fit <- fit(model.spec)

			edhec.ret[[i]]$AR.Reg2$Stat <- StatComp(model.fit,n.obs)

      edhec.ret[[i]]$AR.Reg2$LogLik <- edhec.ret[[i]]$AR.Reg2$Stat['model.loglik']
		  edhec.ret[[i]]$AR.Reg2$df <- edhec.ret[[i]]$AR.Reg2$Stat['model.df']
		  edhec.ret[[i]]$AR.Reg2$AIC <- edhec.ret[[i]]$AR.Reg2$Stat['model.aic']
		  edhec.ret[[i]]$AR.Reg2$SBC <- edhec.ret[[i]]$AR.Reg2$Stat['model.sbc']

	}

l
To give an idea of the output,consider the case of CTA.Global fund and the Autoregressive-Regime switching model :

ff

 

The StatComp() function used to calculate these values is simply :

########################################################################################
# Calculating Stats for Comparison across models
########################################################################################

StatComp <- function(fitted.model,n.obs){
	    model.df <- attr(logLik(fitted.model),which='df')
			model.loglik <- as.numeric(logLik(fitted.model))
		  model.aic <- model.loglik-model.df
		  model.sbc <- model.loglik-0.5*model.df*log(n.obs)

	return(c(model.df=model.df,model.loglik=model.loglik,model.aic=model.aic,model.sbc=model.sbc))
}

ll
Once we have fitted each of the nine models to the data for each of the 13 funds in the edhec dataset,let’s summarise this information in a new list object by saving across models.

 

[5] Saving across models and making data ggplot friendly

The purpose of this step in the process is to summarise the data contained in the edhec.ret list object above into something that can be conveniently used in ggplot functions. Problematic funds are subsequently identified and deleted.

# Saving across models & removing problematic funds
model.names <- names(edhec.ret[[1]])[-1]
model.list <- list()
problems <- NULL

for(i in 1:n.models){
	model.list[[i]] <- list()
	names(model.list)[i] <- model.names[i]
	model.list[[i]] <- do.call(rbind,lapply(edhec.ret,function(x) eval(parse(text=paste('x$',model.names[i],sep='')))))[,c('AIC','SBC','df','LogLik')]
  problems <- c(problems,which(is.na(model.list[[i]]==0) | model.list[[i]]<0))
  model.list[[i]] <- cbind(model.list[[i]],Model=rep(model.names[i],n.funds))
}

problem.funds <- unique(problems)[-which(problems>13)][-5]
model.list<-lapply(model.list,function(x) x[-problem.funds,])
subfund.names <- rownames(model.list[[1]])

temp <- apply(do.call(rbind,model.list),2,unlist)

ll
By problematic funds, i mean those for which the following error messages occurred in the fitting process:

gff

 

For me the problematic funds are the ones corresponding to the following indices :

kli

 

Ultimately,our model.list object looks like this :

jjuk

 

This is the first element of the list. It continues this way for all other models listed in the previous post. The last line of code creates a matrix called temp which contains the unlisted version of the data contained in model.list. In the next post we will use this temp object to create three data frames, each of which we will manipulate using the tidyR package to produce the desired data structure for ggplotting purposes.

1 comment

Leave a comment