Tidbits & Reminders [2]

[2] Nested lists and storing regressions results

A way to store regression results for multiple models and/or variables that I have come across involves the use of nested list objects. This allows me to fit a variety of models and store their results across multiple variables using a for loop. The easiest way to visualise nested list objects is to imagine them as folders in windows explorer, with each subfolder (or item contained therein) being separated by the $-sign. A sequence of steps that I have found quite useful is as follows :

  1. Load and clean data.
  2. Fit n models to data and store regression results in a nested list object such that
    • each element of the list represents one variable.
    • each element contains the fitted results of all n models.
  3. Create new list object,this time looping across models such that
    • each element of the list is one of n models.
    • each element contains desired regression results for each model.
    • information is stored in ggplot-friendly format.
  4. Remove problematic variables from each element of that list.
    • This does not work all the time for some strange reason (i.e.sometimes need to manually remove problematic variables)
    • In this step I remove the problematic variable for all models rather than for specific models,something that needs to be improved upon.
  5. Use do.call with rbind across elements of this newly created list to create the base dataframe.
  6. The base dataframe can then be manipulated to create other sub dataframes.
  7. Apply ggplotting functions on sub dataframes.

This procedure has been applied in the previous set of posts. I have doctored up some bogus dataset to provide an additional example so that I don’t forget :


#[1] Simulate data

Feed.list <- c('caviar','cayenne.pepper','celery','cereal','chard','cheddar','cheese','cheesecake','chef','crisp','cellophane','coffee')
sample.df <- data.frame()
for(i in 1:length(Feed.list)){
	sample.df[1:100,i] <- rnorm(100)
colnames(sample.df) <- Feed.list
sample.xts <- xts(order.by=seq(as.Date('1997-01-31'),length=100,by=1),sample.df)

#[2] Nested Lists and regression

n.vars <- ncol(sample.xts)
Feed.list <- names(sample.xts)
n.obs <- nrow(sample.xts)
n.models <- 9

main.list <- list()
	for(i in 1:n.vars){
		main.list[[i]] <- list()
		names(main.list)[i] <- Feed.list[i]
		main.list[[i]]$Sample.xts <- sample.xts[,i]
		temp.xts <- sample.xts[,i]

			#Independent Log-normal model
			main.list[[i]]$ILN <- lm(temp.xts~1)
		  main.list[[i]]$ILN$Stat <- StatComp(main.list[[i]]$ILN,n.obs)

		  main.list[[i]]$ILN$LogLik <- main.list[[i]]$ILN$Stat['model.loglik']
		  main.list[[i]]$ILN$df <- main.list[[i]]$ILN$Stat['model.df']
		  main.list[[i]]$ILN$AIC <- main.list[[i]]$ILN$Stat['model.aic']
		  main.list[[i]]$ILN$SBC <- main.list[[i]]$ILN$Stat['model.sbc']

		  #Autoregressive model
		  main.list[[i]]$AR1 <- arima(x=temp.xts,order=c(1,0,0))
		  main.list[[i]]$AR1$Stat <- StatComp(main.list[[i]]$AR1,n.obs)

		  main.list[[i]]$AR1$LogLik <- main.list[[i]]$AR1$Stat['model.loglik']
		  main.list[[i]]$AR1$df <- main.list[[i]]$AR1$Stat['model.df']
		  main.list[[i]]$AR1$AIC <- main.list[[i]]$AR1$Stat['model.aic']
		  main.list[[i]]$AR1$SBC <- main.list[[i]]$AR1$Stat['model.sbc']

		  #ARCH model
			arch.spec <- ugarchspec(variance.model=list(garchOrder=c(1,0)),mean.model = list(armaOrder=c(0,0)))
		  arch.fit<- ugarchfit(spec=arch.spec, data=temp.xts,solver.control=list(trace = 1))

		  main.list[[i]]$ARCH$LogLik <- likelihood(arch.fit)
		  main.list[[i]]$ARCH$df <- 3
		  main.list[[i]]$ARCH$AIC <- main.list[[i]]$ARCH$LogLik-main.list[[i]]$ARCH$df
		  main.list[[i]]$ARCH$SBC <- main.list[[i]]$ARCH$LogLik-0.5*main.list[[i]]$ARCH$df*log(n.obs)

	    #AR-ARCH model
			arch.spec <- ugarchspec(variance.model=list(garchOrder=c(1,0)),mean.model = list(armaOrder=c(1,0)))
		  arch.fit<- ugarchfit(spec=arch.spec, data=temp.xts,solver.control=list(trace = 1))

		  main.list[[i]]$AR.ARCH$LogLik <- likelihood(arch.fit)
		  main.list[[i]]$AR.ARCH$df <- 4
		  main.list[[i]]$AR.ARCH$AIC <- main.list[[i]]$AR.ARCH$LogLik-main.list[[i]]$AR.ARCH$df
		  main.list[[i]]$AR.ARCH$SBC <- main.list[[i]]$AR.ARCH$LogLik-0.5*main.list[[i]]$AR.ARCH$df*log(n.obs)

	    #GARCH model
			garch.spec <- ugarchspec(variance.model=list(garchOrder=c(1,1)),mean.model = list(armaOrder=c(0,0)))
		  garch.fit<- ugarchfit(spec=garch.spec, data=temp.xts,solver.control=list(trace = 1))

		  main.list[[i]]$GARCH$LogLik <- likelihood(garch.fit)
		  main.list[[i]]$GARCH$df <- 4
		  main.list[[i]]$GARCH$AIC <- main.list[[i]]$GARCH$LogLik-main.list[[i]]$GARCH$df
		  main.list[[i]]$GARCH$SBC <- main.list[[i]]$GARCH$LogLik-0.5*main.list[[i]]$GARCH$df*log(n.obs)

		  #AR-GARCH model
			garch.spec <- ugarchspec(variance.model=list(garchOrder=c(1,1)),mean.model = list(armaOrder=c(1,0)))
		  garch.fit<- ugarchfit(spec=garch.spec, data=temp.xts,solver.control=list(trace = 1))

		  main.list[[i]]$AR.GARCH$LogLik <- likelihood(garch.fit)
		  main.list[[i]]$AR.GARCH$df <- 5
		  main.list[[i]]$AR.GARCH$AIC <- main.list[[i]]$AR.GARCH$LogLik-main.list[[i]]$AR.GARCH$df
		  main.list[[i]]$AR.GARCH$SBC <- main.list[[i]]$AR.GARCH$LogLik-0.5*main.list[[i]]$AR.GARCH$df*log(n.obs)

		  #ILN 2 regimes
		  model.spec <- depmix(eval(parse(text=Feed.list[i]))~1,nstates=2,data=temp.xts)
	    model.fit <- fit(model.spec)
		  main.list[[i]]$ILN.Reg2$Stat <- StatComp(model.fit,n.obs)

      main.list[[i]]$ILN.Reg2$LogLik <- main.list[[i]]$ILN.Reg2$Stat['model.loglik']
		  main.list[[i]]$ILN.Reg2$df <- main.list[[i]]$ILN.Reg2$Stat['model.df']
		  main.list[[i]]$ILN.Reg2$AIC <- main.list[[i]]$ILN.Reg2$Stat['model.aic']
		  main.list[[i]]$ILN.Reg2$SBC <- main.list[[i]]$ILN.Reg2$Stat['model.sbc']

	    #ILN 3 regimes
		  model.spec <- depmix(eval(parse(text=Feed.list[i]))~1,nstates=3,data=temp.xts)
	    model.fit <- fit(model.spec)
	  	main.list[[i]]$ILN.Reg3$Stat <- StatComp(model.fit,n.obs)

      main.list[[i]]$ILN.Reg3$LogLik <- main.list[[i]]$ILN.Reg3$Stat['model.loglik']
		  main.list[[i]]$ILN.Reg3$df <- main.list[[i]]$ILN.Reg3$Stat['model.df']
		  main.list[[i]]$ILN.Reg3$AIC <- main.list[[i]]$ILN.Reg3$Stat['model.aic']
		  main.list[[i]]$ILN.Reg3$SBC <- main.list[[i]]$ILN.Reg3$Stat['model.sbc']

      #AR-Regime switch model
		  temp.df <- data.frame(temp.xts[2:n.obs],temp.xts[2:n.obs-1])
		  names(temp.df) <- c('left','right')

		  model.spec <- depmix(left~right,nstates=2,data=temp.df)
	    model.fit <- fit(model.spec)

			main.list[[i]]$AR.Reg2$Stat <- StatComp(model.fit,n.obs)

      main.list[[i]]$AR.Reg2$LogLik <- main.list[[i]]$AR.Reg2$Stat['model.loglik']
		  main.list[[i]]$AR.Reg2$df <- main.list[[i]]$AR.Reg2$Stat['model.df']
		  main.list[[i]]$AR.Reg2$AIC <- main.list[[i]]$AR.Reg2$Stat['model.aic']
		  main.list[[i]]$AR.Reg2$SBC <- main.list[[i]]$AR.Reg2$Stat['model.sbc']


#[3] Saving across models & removing problematic variables
model.names <- names(main.list[[1]])[-1]
model.list <- list()
problems <- NULL

for(i in 1:n.models){
	model.list[[i]] <- list()
	names(model.list)[i] <- model.names[i]
	model.list[[i]] <- do.call(rbind,lapply(main.list,function(x) eval(parse(text=paste('x$',model.names[i],sep='')))))[,c('AIC','SBC','df','LogLik')]
  problems <- c(problems,which(is.na(model.list[[i]]==0)))
  model.list[[i]] <- cbind(model.list[[i]],Model=rep(model.names[i],n.vars))

problem.vars <- unique(problems)
ifelse(length(problem.vars)==0,print('No issues in variables'),model.list<-lapply(model.list,function(x) x[-problem.vars,]))
subFeed.list <- rownames(model.list[[1]])

original.df <- apply(do.call(rbind,model.list),2,unlist)

The original.df is the base dataframe which is then manipulated to create sub-dataframes, inputs to ggplotting functions. Ultimately this leads to a set of plots like :




Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

%d bloggers like this: