Asset Pricing [10c : Comparing Models]

The previous post ended with the creation of a matrix that contains an unlisted version of the selection criteria for each fund & model combination. From this object let’s create three data frames which we then manipulate and use as data inputs to the ggplot function.

[6] Make a facet plot of Selection Criteria across non-problematic funds

Here we simply make a dodged barchart plot for both AIC and SBC criteria across the 9 remaining non-problematic funds.

#ggplot across all funds
df.1 <- data.frame(Fund=rep(subfund.names,n.models),AIC=as.numeric(temp[,1]),SBC=as.numeric(temp[,2]),Model=temp[,5],stringsAsFactors=T)
df.1<-gather(df.1,Selection.Criteria,values,-c(Fund,Model))
miny <- min(df.1[,'values'])
maxy <- max(df.1[,'values'])

all.plot <- ggplot(data = df.1)+
	 						geom_bar(aes(x=Model,y=values,fill=Selection.Criteria),stat = "identity",position='dodge')+
	 						scale_fill_brewer(palette='Set1')+
	 						theme(axis.text.x = element_text(angle=90, hjust=1),legend.position='bottom',legend.key.size=unit(0.2, "cm"),legend.key.width=unit(2,'cm'),legend.direction='horizontal')+
	 						labs(title='Comparison of Selection Information for Lognormal, Autoregressive, \nand Regime Switching Models across Funds')+
	 						facet_wrap(~Fund,shrink=T)+
	 						coord_cartesian(ylim=c(miny,maxy+1))

l
nnnn

Interpretation of the selection criteria is straightforward : the higher the value the better the model. Although model differences are not very significant within each fund,the values for the selection criteria do show some variation across funds (thankfully). Models seem to do particularly well for the Equity.Market.Neutral and Fixed.Income.Arbitrage cases.

To gain a better view of how individual models performed,let’s take a look at the results for a particular fund & selection criterion combination.

[7] Snap plot function for the chosen fund and selection criterion

The FundSnap() function provides [1] a dodged barchart plot of models ranked according to the selected criterion (AIC,SBC) in descending order as well as [2]  a tabulation of numeric values (Likelihood,degree of freedom,AIC,SBC) with maximum/minimum values highlighted.

########################################################################################
# Fund snap across models
########################################################################################

FundSnap <- function(fund,df.obj,order.by){

	e<-environment()

	tbl.df <- df.obj%>%
		        filter(Fund==fund)%>%
		        select(Model,DF,Likelihood,AIC,SBC)

  temp.sel <- paste('df.obj$',order.by,sep='')

	plot.df <- df.obj%>%
		         arrange(desc(eval(parse(text=temp.sel))))%>%
		         filter(Fund==fund)%>%
		         select(c(Fund,Model,eval(parse(text=order.by))))

#plot
	miny<-min(plot.df[,3])
	maxy <-max(plot.df[,3])

	snap.plot <- ggplot(data = plot.df,environment=e)+theme_tufte()+
	 					 			geom_bar(aes(x=reorder(Model,desc(eval(parse(text=order.by)))),y=eval(parse(text=order.by)),fill=(eval(parse(text=order.by)))),stat = "identity",position='dodge',alpha=0.4)+
	 	              scale_fill_continuous(low="lightgreen", high="darkgreen")+
		              coord_cartesian(ylim=c(miny-1,maxy+1))+
		              theme(legend.position='none',legend.key.size=unit(0.2, "cm"),legend.key.width=unit(2,'cm'),legend.direction='horizontal',axis.text.x = element_text(angle=90, hjust=1),panel.grid.major = element_blank(), panel.grid.minor = element_blank())+
		              labs(title=paste('Snapshot for : ',fund,' & ',order.by,'\nacross models',sep=''))+
		              xlab('Models')+
		              ylab('Values')

#table
	tbl.df$Model <- as.character(tbl.df$Model)
	tbl.df$Likelihood <- round(tbl.df$Likelihood,2)
	tbl.df$AIC <- round(tbl.df$AIC,2)
	tbl.df$SBC <- round(tbl.df$SBC,2)

	parse.str <- paste('tbl.df$',order.by,sep='')
  row.max.idx <- which(eval(parse(text=parse.str))==max(eval(parse(text=parse.str))))
  row.min.idx <- which(eval(parse(text=parse.str))==min(eval(parse(text=parse.str))))

	debug.spec <- ggTableSpec(columns.exist=T,columns.txt=colnames(tbl.df),columns.font=rep('bold',ncol(tbl.df)),columns.col='blue',columns.fill='grey',columns.alpha=0.7,
		                      rows.exist=F,rows.txt=paste('row',1:10,sep=''),rows.font=rep('bold',10),rows.col='green',rows.fill='grey',rows.alpha=0.7,
		                      data.obj=tbl.df,
		                    	hlt.col.exist=T,hl.col.which=c(row.min.idx,row.max.idx),hl.col.fill=c('lightgreen','darkgreen'),hl.col.alpha=c(0.4,0.4),
		                    	hlt.row.exist=F,hl.row.which=row.idx,hl.row.fill='skyblue',hl.row.alpha=0.45
                )
  tbl.plot <- ggTableDrawer(debug.spec)

	layOut(
		     list(snap.plot,1,1),
		     list(tbl.plot,2,1)
	)
}
########################################################################################

ll

Implementation is simple once we have the proper data frame in place:

#ggplot for specific funds
df.2 <- data.frame(Fund=rep(subfund.names,n.models),DF=as.numeric(temp[,3]),Likelihood=as.numeric(temp[,4]),AIC=as.numeric(temp[,1]),SBC=as.numeric(temp[,2]),Model=temp[,5],stringsAsFactors=T)
FundSnap(fund='Emerging.Markets',df.obj=df.2,order.by='SBC')

l
hhhh

Above shows the case of the Emerging.Markets & SBC combination.The AR.GARCH model provides the maximal SBC value while the ILN.Reg3 model is least useful here.

[8] Percentage of funds for which a model has maximal AIC

The final plot is a bar chart plot with text annotations showing how often a particular model is the best performer across the remaining non problematic funds.

#Percentages
df.3 <- data.frame(Fund=rep(subfund.names,n.models),AIC=as.numeric(temp[,1]),Model=temp[,5],stringsAsFactors=T)
df.3 <- spread(df.3,Model,AIC)

col.idx <- apply(df.3,1,function(x) which(x==max(x[-1]),arr.ind=TRUE))
col.names <- colnames(df.3)[col.idx]

df.percentages <- df.3%>%
										mutate(Best.Model=col.names)%>%
                    group_by(Best.Model)%>%
                    summarise(count=n()/length(df.3)*100)

perc.plot <- ggplot(data = df.percentages)+
	 							geom_bar(aes(x=Best.Model,y=count,fill=Best.Model),stat = "identity",position='dodge')+
	 							scale_fill_brewer(palette='Set1')+
	 							theme(axis.text.x = element_blank(),legend.position='bottom',legend.key.size=unit(0.2, "cm"),legend.key.width=unit(2,'cm'),legend.direction='horizontal')+
	 							labs(title='Best performing models')+
                geom_text(aes(x=Best.Model,y=count/2,label=paste(round(count,2),'%',sep=''),fontface=rep('bold',4)),size=rel(4))+
	 							xlab('')+
	 							ylab('Percentage')

ll

gh

It appears that the AR.GARCH and ILN.Reg3 models provide the highest AIC values a third of the time.The simplest model,the independent log normal model fares relatively badly in comparison. Complicated models do seem to work better in our data.

Leave a comment