Asset Pricing [10b : Comparing Models]
Continuing with the list of tasks defined in the previous post, let’s load the edhec data set,fit each of the nine models to each of the funds and store results in a nested list object :
[4] Load data and store results
For each model and fund combination, I store the following information :
- The Log-Likelihood value
- The Degree of freedom
- The AIC criterion
- The SBC criterion
The code follows :
######################################################################################## # Load dataset ######################################################################################## orig.csv <- read.csv('edhecfinal.csv',header=T,stringsAsFactors=F) edhec.xts <- xts(order.by=seq(as.Date('1997-01-31'),as.Date('2014-07-31'),by='1 month'),orig.csv[,-1]) edhec.xts <- log(edhec.xts+1) ######################################################################################## # Nested Lists and Regression results ######################################################################################## n.funds <- ncol(edhec.xts) fund.names <- names(edhec.xts) n.obs <- nrow(edhec.xts) n.models <- 9 edhec.ret <- list() for(i in 1:n.funds){ edhec.ret[[i]] <- list() names(edhec.ret)[i] <- fund.names[i] edhec.ret[[i]]$Ret.xts <- edhec.xts[,i] logret <- Return.clean(edhec.ret[[i]]$Ret.xts,method='boudt') #Independent Log-normal model edhec.ret[[i]]$ILN <- lm(logret~1) edhec.ret[[i]]$ILN$Stat <- StatComp(edhec.ret[[i]]$ILN,n.obs) edhec.ret[[i]]$ILN$LogLik <- edhec.ret[[i]]$ILN$Stat['model.loglik'] edhec.ret[[i]]$ILN$df <- edhec.ret[[i]]$ILN$Stat['model.df'] edhec.ret[[i]]$ILN$AIC <- edhec.ret[[i]]$ILN$Stat['model.aic'] edhec.ret[[i]]$ILN$SBC <- edhec.ret[[i]]$ILN$Stat['model.sbc'] #Autoregressive model edhec.ret[[i]]$AR1 <- arima(x=logret,order=c(1,0,0)) edhec.ret[[i]]$AR1$Stat <- StatComp(edhec.ret[[i]]$AR1,n.obs) edhec.ret[[i]]$AR1$LogLik <- edhec.ret[[i]]$AR1$Stat['model.loglik'] edhec.ret[[i]]$AR1$df <- edhec.ret[[i]]$AR1$Stat['model.df'] edhec.ret[[i]]$AR1$AIC <- edhec.ret[[i]]$AR1$Stat['model.aic'] edhec.ret[[i]]$AR1$SBC <- edhec.ret[[i]]$AR1$Stat['model.sbc'] #ARCH model arch.spec <- ugarchspec(variance.model=list(garchOrder=c(1,0)),mean.model = list(armaOrder=c(0,0))) arch.fit<- ugarchfit(spec=arch.spec, data=logret,solver.control=list(trace = 1)) edhec.ret[[i]]$ARCH$LogLik <- likelihood(arch.fit) edhec.ret[[i]]$ARCH$df <- 3 edhec.ret[[i]]$ARCH$AIC <- edhec.ret[[i]]$ARCH$LogLik-edhec.ret[[i]]$ARCH$df edhec.ret[[i]]$ARCH$SBC <- edhec.ret[[i]]$ARCH$LogLik-0.5*edhec.ret[[i]]$ARCH$df*log(n.obs) #AR-ARCH model arch.spec <- ugarchspec(variance.model=list(garchOrder=c(1,0)),mean.model = list(armaOrder=c(1,0))) arch.fit<- ugarchfit(spec=arch.spec, data=logret,solver.control=list(trace = 1)) edhec.ret[[i]]$AR.ARCH$LogLik <- likelihood(arch.fit) edhec.ret[[i]]$AR.ARCH$df <- 4 edhec.ret[[i]]$AR.ARCH$AIC <- edhec.ret[[i]]$AR.ARCH$LogLik-edhec.ret[[i]]$AR.ARCH$df edhec.ret[[i]]$AR.ARCH$SBC <- edhec.ret[[i]]$AR.ARCH$LogLik-0.5*edhec.ret[[i]]$AR.ARCH$df*log(n.obs) #GARCH model garch.spec <- ugarchspec(variance.model=list(garchOrder=c(1,1)),mean.model = list(armaOrder=c(0,0))) garch.fit<- ugarchfit(spec=garch.spec, data=logret,solver.control=list(trace = 1)) edhec.ret[[i]]$GARCH$LogLik <- likelihood(garch.fit) edhec.ret[[i]]$GARCH$df <- 4 edhec.ret[[i]]$GARCH$AIC <- edhec.ret[[i]]$GARCH$LogLik-edhec.ret[[i]]$GARCH$df edhec.ret[[i]]$GARCH$SBC <- edhec.ret[[i]]$GARCH$LogLik-0.5*edhec.ret[[i]]$GARCH$df*log(n.obs) #AR-GARCH model garch.spec <- ugarchspec(variance.model=list(garchOrder=c(1,1)),mean.model = list(armaOrder=c(1,0))) garch.fit<- ugarchfit(spec=garch.spec, data=logret,solver.control=list(trace = 1)) edhec.ret[[i]]$AR.GARCH$LogLik <- likelihood(garch.fit) edhec.ret[[i]]$AR.GARCH$df <- 5 edhec.ret[[i]]$AR.GARCH$AIC <- edhec.ret[[i]]$AR.GARCH$LogLik-edhec.ret[[i]]$AR.GARCH$df edhec.ret[[i]]$AR.GARCH$SBC <- edhec.ret[[i]]$AR.GARCH$LogLik-0.5*edhec.ret[[i]]$AR.GARCH$df*log(n.obs) #ILN 2 regimes model.spec <- depmix(eval(parse(text=fund.names[i]))~1,nstates=2,data=logret) model.fit <- fit(model.spec) edhec.ret[[i]]$ILN.Reg2$Stat <- StatComp(model.fit,n.obs) edhec.ret[[i]]$ILN.Reg2$LogLik <- edhec.ret[[i]]$ILN.Reg2$Stat['model.loglik'] edhec.ret[[i]]$ILN.Reg2$df <- edhec.ret[[i]]$ILN.Reg2$Stat['model.df'] edhec.ret[[i]]$ILN.Reg2$AIC <- edhec.ret[[i]]$ILN.Reg2$Stat['model.aic'] edhec.ret[[i]]$ILN.Reg2$SBC <- edhec.ret[[i]]$ILN.Reg2$Stat['model.sbc'] #ILN 3 regimes model.spec <- depmix(eval(parse(text=fund.names[i]))~1,nstates=3,data=logret) model.fit <- fit(model.spec) edhec.ret[[i]]$ILN.Reg3$Stat <- StatComp(model.fit,n.obs) edhec.ret[[i]]$ILN.Reg3$LogLik <- edhec.ret[[i]]$ILN.Reg3$Stat['model.loglik'] edhec.ret[[i]]$ILN.Reg3$df <- edhec.ret[[i]]$ILN.Reg3$Stat['model.df'] edhec.ret[[i]]$ILN.Reg3$AIC <- edhec.ret[[i]]$ILN.Reg3$Stat['model.aic'] edhec.ret[[i]]$ILN.Reg3$SBC <- edhec.ret[[i]]$ILN.Reg3$Stat['model.sbc'] #AR-Regime switch model temp.df <- data.frame(logret[2:n.obs],logret[2:n.obs-1]) names(temp.df) <- c('left','right') model.spec <- depmix(left~right,nstates=2,data=temp.df) model.fit <- fit(model.spec) edhec.ret[[i]]$AR.Reg2$Stat <- StatComp(model.fit,n.obs) edhec.ret[[i]]$AR.Reg2$LogLik <- edhec.ret[[i]]$AR.Reg2$Stat['model.loglik'] edhec.ret[[i]]$AR.Reg2$df <- edhec.ret[[i]]$AR.Reg2$Stat['model.df'] edhec.ret[[i]]$AR.Reg2$AIC <- edhec.ret[[i]]$AR.Reg2$Stat['model.aic'] edhec.ret[[i]]$AR.Reg2$SBC <- edhec.ret[[i]]$AR.Reg2$Stat['model.sbc'] }
l
To give an idea of the output,consider the case of CTA.Global fund and the Autoregressive-Regime switching model :
The StatComp() function used to calculate these values is simply :
######################################################################################## # Calculating Stats for Comparison across models ######################################################################################## StatComp <- function(fitted.model,n.obs){ model.df <- attr(logLik(fitted.model),which='df') model.loglik <- as.numeric(logLik(fitted.model)) model.aic <- model.loglik-model.df model.sbc <- model.loglik-0.5*model.df*log(n.obs) return(c(model.df=model.df,model.loglik=model.loglik,model.aic=model.aic,model.sbc=model.sbc)) }
ll
Once we have fitted each of the nine models to the data for each of the 13 funds in the edhec dataset,let’s summarise this information in a new list object by saving across models.
[5] Saving across models and making data ggplot friendly
The purpose of this step in the process is to summarise the data contained in the edhec.ret list object above into something that can be conveniently used in ggplot functions. Problematic funds are subsequently identified and deleted.
# Saving across models & removing problematic funds model.names <- names(edhec.ret[[1]])[-1] model.list <- list() problems <- NULL for(i in 1:n.models){ model.list[[i]] <- list() names(model.list)[i] <- model.names[i] model.list[[i]] <- do.call(rbind,lapply(edhec.ret,function(x) eval(parse(text=paste('x$',model.names[i],sep='')))))[,c('AIC','SBC','df','LogLik')] problems <- c(problems,which(is.na(model.list[[i]]==0) | model.list[[i]]<0)) model.list[[i]] <- cbind(model.list[[i]],Model=rep(model.names[i],n.funds)) } problem.funds <- unique(problems)[-which(problems>13)][-5] model.list<-lapply(model.list,function(x) x[-problem.funds,]) subfund.names <- rownames(model.list[[1]]) temp <- apply(do.call(rbind,model.list),2,unlist)
ll
By problematic funds, i mean those for which the following error messages occurred in the fitting process:
For me the problematic funds are the ones corresponding to the following indices :
Ultimately,our model.list object looks like this :
This is the first element of the list. It continues this way for all other models listed in the previous post. The last line of code creates a matrix called temp which contains the unlisted version of the data contained in model.list. In the next post we will use this temp object to create three data frames, each of which we will manipulate using the tidyR package to produce the desired data structure for ggplotting purposes.
Pingback: The Whole Street’s Daily Wrap for 10/14/2014 | The Whole Street