├── Output
    └── tmp
├── _gitignore.txt
├── MSGARCH_comp.Rproj
├── LICENSE
├── README.md
├── testing_fun.R
├── Main.R
└── helping_fun.R


/Output/tmp:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/_gitignore.txt:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | 


--------------------------------------------------------------------------------
/MSGARCH_comp.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: knitr
13 | LaTeX: pdfLaTeX
14 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 n4tg
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MSGARCH_comp
 2 | Comparison of Value-at-Risk forecasting performance of Markov-Switching GARCH models, namely symmetric GARCH, Exponential GARCH, and GJR-GARCH, based on stock markets universe. 
 3 | 
 4 | The data considered here are 5,000 daily percentage log returns of each stock indices: **DAX**, **S&P500**, and **Nikkei**.  
 5 | The first 3,000 daily log returns are fit univariately to 2-state-Markov-switching GARCH-type models as mentioned above, each with 2 innovation assumptions: Normal and Student's t distributions.
 6 | 
 7 | Model estimation is done by MCMC, using a robust adaptive random-walk Metropolis algorithm proposed by Vihola (2012).
 8 | The forecasting horizons used here are 1, 3, 10 and 22, and the VaR is calculated at 1% shortfall probability.
 9 | 
10 | For a fixed rolling-window length of 3,000, the experiment is repeated 2,000 times, providing the 2,000 out-of-sample data, which later on used for evaluating the forecasting performance of each model setting.
11 | 
12 | The **goodness of in-sample fit** is evaulated based on:  
13 | 1. The Akaike information criterion (AIC) and  
14 | 2. The Bayesian information criterion (BIC).
15 | 
16 | The **forecasting performance** is evaluated based on  
17 | 1. Backtesting:
18 |     + Unconditional coverage test by Kupiec (1995),  
19 |     + Independence test by Christoffersen (1998),  
20 |     + Conditional coverage test by Christoffersen (1998),  
21 |     + Dynamic quantile test of Engle and Mangenelli (2004), and
22 | 2. Model confidence set by Hansen et al. (2011), with the VaR-based loss function defined by González-Rivera et al (2004).
23 | 
24 | # Require R packages:  
25 | + MSGARCH: https://github.com/keblu/MSGARCH  
26 | + data.table: https://CRAN.R-project.org/package=data.table  
27 | + MCS: https://CRAN.R-project.org/package=MCS  
28 | + MASS: https://CRAN.R-project.org/package=MASS  
29 | + plyr: https://cran.r-project.org/package=plyr  
30 | + expm: https://CRAN.R-project.org/package=expm  
31 | + dplyr: https://cran.r-project.org/package=dplyr  
32 | 


--------------------------------------------------------------------------------
/testing_fun.R:
--------------------------------------------------------------------------------
  1 | # MCS ---------------------------------------------------------------------
  2 | 
  3 | ## Function: test.MCS
  4 | # Description: A wrapper function for doing MCS.
  5 | # Input:  r - A numeric vector of the real data.
  6 | #         VaR - A numeric matrix of the forecasted data.
  7 | #         LossFn - A function indicates loss function used for MCS evaluation. 
  8 | #                  See MCS package manual for possible choices.
  9 | #         save.out - A logical input indicates whether the output MCS is saved.
 10 | #         filename - A string for path and filename to which the output is saved.
 11 | #         ... - Optional argument for the LossFn. See MCS package manual.
 12 | # Output: An SSM object. See MCS package manual.
 13 | test.MCS <- function(r, VaR, LossFn, 
 14 |                      save.out = T, filename = paste0("Output/MCS.txt"),
 15 |                      ...){
 16 |   r = as.numeric(r)
 17 | 
 18 |   Loss = apply(X = VaR, MARGIN = 2, FUN = function(x) LossFn(realized = r, evaluated = x, ...))
 19 |   SSM = MCSprocedure(Loss = Loss, alpha = alpha, verbose = F)
 20 |   if(save.out){
 21 |     sink(filename)
 22 |     print(SSM)
 23 |     sink()
 24 |   }
 25 |   return(SSM)
 26 | }
 27 | 
 28 | # Bactesting --------------------------------------------------------------
 29 | 
 30 | ## Function: Backtesting
 31 | # Description: A wrapper function for backtesting.
 32 | # Input:  alpha - A number indicates shortfall probability.
 33 | #         r - A numeric vector of the real data.
 34 | #         VaR - A numeric matrix of the forecasted data.
 35 | #         lags - A number indicates lags of Hit in DQ test. See Engle and Mangenelli (2004).
 36 | #         save.out - A logical input indicates whether the backtesting output table is saved or not.
 37 | #         filename - A string for path and filename to which the output is saved.
 38 | # Output: A data table consists of model names, %violations, test statistics (UC, Ind, CC, DQ) and their p-values.
 39 | Backtesting <- function(alpha, r, VaR, lags = 4,
 40 |                         save.out = T, filename = paste0("Output/Backtest.csv")){
 41 |   alpha = check.alpha(alpha)
 42 |   r = as.numeric(r)
 43 | 
 44 |   hit = (r < VaR)
 45 |   
 46 |   tests = c("test.UC", "test.Ind", "test.CC", "test.DQ")
 47 |   res_table = data.table(Model = colnames(VaR),
 48 |                          Violation.percentage = 100*colMeans(hit))
 49 |   
 50 |   for(test in tests){
 51 |     if(test == "test.DQ"){
 52 |       res = apply(X = VaR, MARGIN = 2, FUN = test, alpha = alpha, r = r, lags = lags)
 53 |     } else res = apply(X = hit, MARGIN = 2, FUN = test, alpha = alpha)
 54 |     res_table = cbind(res_table, rbindlist(res, use.names = T, fill = T))
 55 |   }
 56 |   
 57 |   if(save.out) write.table(res_table, file = filename, sep = ";", col.names = T, row.names = F)
 58 |   return(res_table)
 59 | }
 60 | 
 61 | ## Function: test.UC
 62 | # Description: Perform unconditional coverage test of Kupiec (1995).
 63 | # Input:  alpha - A number indicates shortfall probability.
 64 | #         hit - A numeric vector of hit sequence.
 65 | # Output: A list consists of likelihood ratio of UC test (LR.UC) and its p-value (pval.UC).
 66 | test.UC <- function(alpha, hit){
 67 |   alpha = check.alpha(alpha)
 68 |   hit = check.hit(hit)
 69 |   
 70 |   n0 = length(which(hit == 0))
 71 |   n1 = length(which(hit == 1))
 72 |   alpha_hat = n1/(n0+n1)
 73 |   
 74 |   LR.UC = -2*log( ((alpha^n1)*(1-alpha)^n0)/((alpha_hat^n1)*(1-alpha_hat)^n0) )
 75 |   pval.UC = 1 - pchisq(LR.UC, df = 1)
 76 |   
 77 |   return(list(LR.UC = LR.UC, pval.UC = pval.UC))
 78 | }
 79 | 
 80 | ## Function: test.Ind
 81 | # Description: Perform independent test as in Christoffersen (1998).
 82 | # Input:  alpha - A number indicates shortfall probability.
 83 | #         hit - A numeric vector of hit sequence.
 84 | # Output: A list consists of likelihood ratio of Ind test (LR.Ind) and its p-value (pval.Ind).
 85 | test.Ind <- function(alpha, hit){
 86 |   alpha = check.alpha(alpha)
 87 |   hit = check.hit(hit)
 88 |   
 89 |   n00 <- n01 <- n10 <- n11 <- 0
 90 |   
 91 |   for(i in 1:(length(hit)-1)){
 92 |     if(hit[i] == 0){
 93 |       if(hit[i+1] == 0){
 94 |         n00 = n00+1
 95 |       } else{
 96 |         n01 = n01+1
 97 |       }
 98 |     } else{
 99 |       if(hit[i+1] == 0){
100 |         n10 = n10+1
101 |       } else{
102 |         n11 = n11+1
103 |       }
104 |     }
105 |   }
106 |   
107 |   p01 = n01/(n00+n01)
108 |   p11 = n11/(n10+n11)
109 |   p1 = (n01+n11)/(n00+n01+n10+n11)
110 |   
111 |   LR.Ind = -2*log( ( (1-p1)^(n00+n10) * p1^(n01+n11) )/( (1-p01)^n00 * p01^n01 * (1-p11)^n10 * p11^n11 ) )
112 |   pval.Ind = 1 - pchisq(LR.Ind, df = 1)
113 |   
114 |   return(list(LR.Ind = LR.Ind, pval.Ind = pval.Ind))
115 | }
116 | 
117 | 
118 | ## Function: test.CC
119 | # Description: Perform conditional coverage test of Christoffersen (1998).
120 | # Input:  alpha - A number indicates shortfall probability.
121 | #         hit - A numeric vector of hit sequence.
122 | # Output: A list consists of likelihood ratio of CC test (LR.CC) and its p-value (pval.CC).
123 | test.CC <- function(alpha, hit){
124 |   alpha = check.alpha(alpha)
125 |   hit = check.hit(hit)
126 |   
127 |   LR.CC = test.UC(alpha, hit)$LR.UC + test.Ind(alpha, hit)$LR.Ind
128 |   pval.CC = 1- pchisq(LR.CC, df = 2)
129 |   
130 |   return(list(LR.CC = LR.CC, pval.CC = pval.CC))
131 | }
132 | 
133 | ## Function: test.DQ
134 | # Description: Perform dynamic quantile test as in Engle and Mangenelli (2004).
135 | # Input:  alpha - A number indicates shortfall probability.
136 | #         r - A numeric vector of the real data.
137 | #         VaR - A numeric matrix of the forecasted data.
138 | #         lags - A number indicates lags of Hit. Default as 4.
139 | # Output: A list consists of DQ test statistics (DQ) and its p-value (pval.DQ).
140 | test.DQ <- function(alpha, r, VaR, lags = 4){
141 |   alpha = check.alpha(alpha)
142 |   r = as.numeric(r)
143 | 
144 |   N.period = length(r)
145 |   H = (r<VaR)-alpha
146 |   Hit = H[(lags+1):N.period]
147 |   VaR_hat = VaR[(lags+1):N.period]
148 |   
149 |   const = rep(1, times = N.period-lags)
150 |   tmp_Mat = matrix(0, nrow = N.period-lags, ncol = lags)
151 |   
152 |   for(i in 1:lags){
153 |     tmp_Mat[,i] = H[i:(N.period-lags+i-1)]
154 |   }
155 |   
156 |   X = cbind(const, VaR_hat, tmp_Mat)
157 |   mat = t(X) %*% X
158 |   mat = check.Mat(mat)
159 |   DQ = (t(Hit) %*% X %*% ginv(mat) %*% t(X) %*% Hit)/(alpha*(1-alpha))
160 |   pval.DQ = 1 - pchisq(DQ, df = ncol(X))
161 |   
162 |   return(list(DQ = DQ, pval.DQ = pval.DQ))
163 | }
164 | 
165 | # Wald test ---------------------------------------------------------------
166 | 
167 | ## Function: test.Wald
168 | # Description: Perform Wald test.
169 | # Input:  theta_hat - A numeric vector of the estimated parameters.
170 | #         Vn - A numeric matrix represents parameters covariance matrix.
171 | #         R - A numeric vector or matrix of hypothesis coefficients.
172 | #         r - A numeric vector or number in accordance to the hypothesis.
173 | # Output: A list consists of Wald test statistics (W), its degree of freedom (df), and its p-value (pval).
174 | test.Wald <- function(theta_hat,
175 |                       Vn,
176 |                       R,
177 |                       r = 0){
178 |   W = t(R %*% theta_hat - r) %*% ginv(R %*% Vn %*% t(R)) %*% (R %*% theta_hat - r)
179 |   df = nrow(R)
180 |   pval = 1-pchisq(as.numeric(W), df)
181 |   return(list(W = W, df = df, p = pval))
182 | }
183 | 


--------------------------------------------------------------------------------
/Main.R:
--------------------------------------------------------------------------------
  1 | ####################################################
  2 | # Start MAIN.R of MSGARCH_comp
  3 | ####################################################
  4 | 
  5 | rm(list = ls()) # clear the environment before the script starts
  6 | 
  7 | # Set up ------------------------------------------------------------------
  8 | source("helping_fun.R") # call functions in the file helping_fun.R
  9 | source("testing_fun.R") # call functions in the file testing_fun.R
 10 | set_library() 
 11 | ############## END OF SECTION "Set up" #############
 12 | 
 13 | # Input -------------------------------------------------------------------
 14 | input = "DAX" # choice of "DAX", "SP500", and "Nikkei"
 15 | roll_win_stat = 250 # window size for computing data statistics
 16 | window_size = 3000 # window length
 17 | interval = 1:2000 # rolling windows index
 18 | 
 19 | # Model specifications
 20 | models <- c("sGARCH", "eGARCH", "gjrGARCH")
 21 | distributions <- c("norm", "std")
 22 | 
 23 | # Bayesian parameters
 24 | N.sim = 50000 # number of draws
 25 | N.thin = 50 # draws are kept at every N.thin'th
 26 | N.burn = 50000 # number of burn-in draws
 27 | 
 28 | # VaR forecasting parameter
 29 | alpha = 0.01 # shortfall probability
 30 | taus <- c(1, 3, 10, 22) # forecasting horizon: VaR_t+taus
 31 | ############## END OF SECTION "Input" #############
 32 | 
 33 | # Preprocessing -----------------------------------------------------------
 34 | input.data <- read.data.log.return(input)
 35 | 
 36 | plot.data(input.data, input, N.trunc = (window_size+max(interval)), save_plot = T)
 37 | summary.data(input.data$LogReturn[(window_size+1):(window_size+max(interval))], input, write_table = T)
 38 | summary.data(input.data$LogReturn[(window_size+1):(window_size+max(interval))], input, stat = 'roll_win', roll_win = roll_win_stat, write_table = T)
 39 | 
 40 | spec <- list() # create an empty list to keep specification objects for each model
 41 | for(model in models){
 42 |   for(distr in distributions){ 
 43 |     
 44 |     # create models specification objects
 45 |     spec[[paste0(model, ".", distr)]] <- MSGARCH::create.spec(model = c(model, model),
 46 |                                                               distribution = c(distr, distr), do.skew = c(F, F), 
 47 |                                                               do.mix = F, do.shape.ind = F)
 48 |   }
 49 | }
 50 | ctr.bayes <- list(N.burn = N.burn, N.mcmc = N.sim, N.thin = N.thin, do.enhance.theta0 = T, adapt = T, acc.rate = 0.4) # a list of control parameters for Bayesian estimation
 51 | specs <- names(spec) # get all specifications name
 52 | bayes <- list() # create an empty list to keep model estimation and forecasting results
 53 | create.outfile(input, specs, taus, interval)
 54 | ############ END OF SECTION "Preprocessing" ###########
 55 | 
 56 | # Fit data and forecasting VaR --------------------------------------------
 57 | for(i in interval){
 58 |   # show the computing status on the console panel
 59 |   print(paste0("start computation for time ", i, " out of ", max(interval))) 
 60 |   
 61 |   # select the input log return data in according to relevant window
 62 |   y <- input.data$LogReturn[i:(window_size+i-1)]   
 63 |   
 64 |   set.seed(123) # set seed to allow reproduction of results
 65 |   # estimated models by Bayesian for each model spec regarding to the input log return data
 66 |   bayes$fit <- lapply(spec, function(s) MSGARCH::fit.bayes(spec = s, y = y, ctr = ctr.bayes))
 67 |   
 68 |   # save the AIC, BIC of each models fitting
 69 |   bayes$ic <- data.frame(i, sapply(bayes$fit, function(f) list(AIC = MSGARCH::AIC(f), BIC = MSGARCH::BIC(f)) ))
 70 |   write.table(as.matrix(bayes$ic), file = paste0("Output/", input, "_AIC_BIC_", min(interval), "_", max(interval), ".csv"), sep = ";", row.names = T, col.names = F, append = T)
 71 |   
 72 |   # show the computing status on the console panel
 73 |   print(paste0("Computing 1-period-ahead VaR at time ", i, " out of ", max(interval)))
 74 |   
 75 |   # compute and save the 1-period-ahead VaR with the alpha shortfall probability
 76 |   bayes$VaR_1 <- sapply(bayes$fit, function(f) MSGARCH::risk(object = f, level = 1-alpha, ES = F, do.its = F)$VaR)
 77 |   write.table(matrix(c(i,bayes$VaR_1), nrow = 1), file = paste0("Output/", input, "_VaR_1_", min(interval), "_", max(interval), ".csv"), sep = ";", row.names = F, col.names = F, append = T)
 78 |   
 79 |   # show the computing status on the console panel
 80 |   print(paste0("Computing multi-period-ahead VaR at time ", i, " out of ", max(interval)))
 81 |   
 82 |   set.seed(123) # set seed to allow reproduction of result
 83 |   # simulate the data for multi-period VaR computation
 84 |   bayes$sim.data <- lapply(bayes$fit, function(f) MSGARCH::simahead(f, n = max(taus)))
 85 |   
 86 |   # compute and save the 3-, 10-, and 22-period-ahead VaR (alpha-quantile)
 87 |   bayes$VaR_multi <- lapply(taus[-1], function(tau) matrix(c(i, tau, sapply(bayes$sim.data, function(sim) quantile(sim$draws[,tau], prob = alpha, na.rm = T))), nrow = 1))
 88 |   l_ply(bayes$VaR_multi, function(v) write.table(matrix(v[-2], nrow = 1), file = paste0("Output/", input, "_VaR_", v[2], "_", min(interval), "_", max(interval), ".csv"), sep = ";", row.names = F, col.names = F, append = T))
 89 | }
 90 | print("END OF LOOP") # show the computing status on the console panel
 91 | ####### END OF SECTION "Fit data and forecasting VaR" ######
 92 | 
 93 | # AIC and BIC -------------------------------------------------------------
 94 | # read in the AIC an BIC results
 95 | filename <- paste0("Output/", input, "_AIC_BIC_", min(interval), "_", max(interval), ".csv")
 96 | df <- read.csv(filename, sep = ";", stringsAsFactors = F)
 97 | # select AIC values only and keep them in a data frame called aic
 98 | aic <- df %>% filter(test == "AIC") %>% select(-c(test,iteration))
 99 | # find the average AIC for each model
100 | colMeans(aic) 
101 | # find the model that has minimum AIC per each rolling window
102 | aic$min <- sapply(interval, function(i) which.min(aic[i,]))
103 | table(aic$min)
104 | 
105 | # select BC values only and keep them in a data frame called aic
106 | bic <- df %>% filter(test == "BIC") %>% select(-c(test,iteration))
107 | # find the average BIC for each model
108 | colMeans(bic)
109 | # find the model that has minimum BIC per each rolling window
110 | bic$min <- sapply(interval, function(i) which.min(bic[i,]))
111 | table(bic$min)
112 | ############ END OF SECTION "AIC and BIC" ###########
113 | 
114 | # Forecasting evaluation --------------------------------------------------
115 | VaR <- list() # create an empty list to keep VaR results
116 | VaR_t <- paste0("VaR_", taus) # a string vector that keeps VaR forecasting names
117 | # create a string vector that keeps the output filenames for each VaR forecasting result
118 | filename <- paste0(input, "_VaR_", taus, "_", min(interval), "_", max(interval), ".csv")
119 | # read the forecasted VaR in as a list of data frame
120 | VaR <- lapply(filename, function(fn) read.csv(paste0("Output/", fn), header = T, stringsAsFactors = F, sep = ";"))
121 | names(VaR) <- VaR_t # change the list name
122 | # check the forecasted VaR at each forecasting horizon and fix it if error e.g. NA, NaN
123 | VaR <- lapply(taus, function(tau) check.VaR(VaR[[paste0("VaR_", tau)]], tau = tau, input = input, interval = interval))
124 | names(VaR) <- VaR_t # change the list name
125 | 
126 | # Backtesting and MCS
127 | l_ply(taus, function(tau){
128 |   
129 |   VaR_tau = VaR[[paste0("VaR_", tau)]] # Select the relevant VaR
130 |   N.period = nrow(VaR_tau) # find the total period considered in VaR forecasting
131 |   
132 |   # Select the relevant log return input data
133 |   r = input.data$LogReturn[(window_size+tau):(window_size+tau+N.period-1)]
134 |   
135 |   # Evaluation the forecasting performances by backtesting and MCS
136 |   Backtesting(alpha = alpha, r = r, VaR = VaR_tau, filename = paste0("Output/", input, "_Backtest_VaR_", tau, ".csv"))
137 |   test.MCS(r = r, VaR = VaR_tau, LossFn = MCS::LossVaR, tau = alpha, type = 'differentiable', filename = paste0("Output/", input, "_MCS_VaR_", tau, ".txt"))})
138 | ####### END OF SECTION "Forecasting evaluation" ######
139 | 
140 | print("END OF PROGRAM") # show the computing status on the console panel
141 | 
142 | ####################################################
143 | # End MAIN.R of MSGARCH_comp
144 | ####################################################
145 | 


--------------------------------------------------------------------------------
/helping_fun.R:
--------------------------------------------------------------------------------
  1 | # Library handle ----------------------------------------------------------
  2 | 
  3 | ## Function: install_lib_local
  4 | # Description: Install R package to the specified folder.
  5 | # Input:  package -  A string represents an R package name.
  6 | #         my.lib - A string for folder path. Default as "Packages".
  7 | # Output: None.
  8 | install_lib_local <- function(package, my.lib = "Packages"){
  9 |   set_lib_path_local(my.lib)
 10 |   install.packages(package, lib = my.lib)
 11 | }
 12 | 
 13 | ## Function: set_library
 14 | # Description: Call require packages for the project.
 15 | # Input:  user_lib - A logical input indicates whether the packages are 
 16 | #                    called from specified folder, default as True. If 
 17 | #                    false, packages are called from default R library.
 18 | #         my.lib - A string for the folder path, not required when user_lib 
 19 | #                  is False. Default as "Packages".
 20 | # Output: None.
 21 | set_library <- function(user_lib = T, my.lib = "Packages"){
 22 |   if(user_lib) .libPaths(c(.libPaths(), my.lib))
 23 |   library(MSGARCH)
 24 |   library(data.table)
 25 |   library(MCS)
 26 |   library(MASS)
 27 |   library(plyr)
 28 |   library(expm)
 29 |   library(dplyr)
 30 | }
 31 | 
 32 | # Raw data processing -----------------------------------------------------
 33 | 
 34 | ## Function: read.data.log.return
 35 | # Description: Read input data from folder "Input" and provide date, adjusted closing prices, 
 36 | #              and log return, on a daily basis, as an output.
 37 | # Input:  input - A string indicates input data, i.e. "DAX", "SP500", or "Nikkei".
 38 | #         start_date - (Optional) A date format of start date for the output data.
 39 | #         end_date - (Optional) A date format of end date for the output data.
 40 | #         length_lr - (Optional) A numeric input specifies the fixed log-return output length.
 41 | # Output: A list of
 42 | #         + Date - A date object of the format: yyyy-mm-dd.
 43 | #         + Adj.Close - A numeric vector of daily adjusted closing price at the specific date.
 44 | #         + LogReturn - A numeric vector of daily log returns of the adjusted closing price.
 45 | read.data.log.return <- function(input, start_date, end_date, length_lr = NULL){
 46 |   data = read.csv(paste0("Input/", input, ".csv"), sep = ",", stringsAsFactors = F)
 47 |   data$Date = as.Date(data$Date)
 48 |   data = data[order(data$Date), ]
 49 |   
 50 |   if(!missing(start_date)) {
 51 |     data = data[which(data$Date >= as.Date(start_date)),]
 52 |     
 53 |     if(!missing(end_date)) {
 54 |       data = data[which(data$Date <= as.Date(end_date)),]
 55 |       if(!is.null(length_lr)) {
 56 |         warning("Parameters: start_date and end_date were given, so neglecting parameter: length_lr")
 57 |         length_lr = NULL
 58 |       }
 59 |     }
 60 |     
 61 |     if(!is.null(length_lr)) {
 62 |       if(nrow(data) <= length_lr) {
 63 |         stop(paste0("Number of input data from time ", start_date, " is not larger than the given length_lr (", length_lr, ")."))
 64 |       } else data = data[1:(length_lr+1),] # nrow(data) = length_lr+1
 65 |     }
 66 |   }
 67 |   
 68 |   if(!missing(end_date)) {
 69 |     data = data[which(data$Date <= as.Date(end_date)),]
 70 |     if(!is.null(length_lr)){
 71 |       if(nrow(data) <= length_lr) {
 72 |         stop(paste0("Number of input data up to time ", end_date, " is not larger than the given length_lr (", length_lr, ")."))
 73 |       } else data = data[(nrow(data) - length_lr):nrow(data),] # nrow(data) = length_lr+1
 74 |     }
 75 |   }
 76 |   
 77 |   return(list(Date = data$Date,
 78 |               Adj.Close = data$Adj.Close,
 79 |               LogReturn = 100*diff(log(data$Adj.Close))))
 80 | }
 81 | 
 82 | 
 83 | ## Function: plot.data
 84 | # Description: Plot raw data and log-return data.
 85 | # Input:  data - A list similar to output from function "read.data.log.return".
 86 | #         input - A string indicates input data, i.e. "DAX", "SP500", or "Nikkei".
 87 | #         save_plot - A logical input states whether the graphical output will be saved or not. Default as False.
 88 | #         filename - (Optional) A path and name to which the graphical output is saved.
 89 | # Output: A figure consists of two plots, raw data and log return data at the top and the low panels, respectively.
 90 | plot.data <- function(data, input, save_plot = F, ...){
 91 |   opt_arg = list(...)
 92 |   
 93 |   data$Date = as.Date(data$Date)
 94 |   
 95 |   if(save_plot){
 96 |     if(!is.null(opt_arg$filename)){
 97 |       filename = opt_arg$filename
 98 |     } else filename = paste0("Output/", input, "_data_plots.jpg")
 99 |     jpeg(filename, width = 800, height = 500, quality = 100)
100 |   }
101 |   
102 |   par(mfrow = c(2,1))
103 |   if(!is.null(opt_arg$N.trunc)){
104 |     N.trunc = opt_arg$N.trunc
105 |     data$Date = data$Date[1:(N.trunc+1)]
106 |     data$Adj.Close = data$Adj.Close[1:(N.trunc+1)]
107 |     data$LogReturn = data$LogReturn[1:N.trunc]
108 |   }
109 |   plot(data$Date, data$Adj.Close, type = "l", xlab = "date", ylab = "Adjusted closing value")
110 |   plot(data$Date[-1], data$LogReturn, type = "l", xlab = "date", ylab = "Log return value")
111 |   title(main = paste0(input, "\n" , min(data$Date), " - ", max(data$Date)), outer = T, line = -3)
112 |   
113 |   if(save_plot) dev.off()
114 | }
115 | 
116 | ## Function: summary.data
117 | # Description: Display data statistics as a table.
118 | # Input:  data - A list similar to output from function "read.data.log.return".
119 | #         input - A string indicates input data, i.e. "DAX", "SP500", or "Nikkei".
120 | #         stat - A string determines whether the data statistics are calculated for the whole period ("all"),
121 | #                or based on rolling windows ("roll_win"). Default as stat = "all".
122 | #         write_table - A logical input states whether the output table will be saved or not. Default as False.
123 | #         ... - optional argument:
124 | #           + filename - A path and name to which the output table is saved.
125 | #           + roll_win - A numeric input indicates window length, required when stat = "roll_win".
126 | # Output: A dataframe consists of input string and its following statistics: mean, variance, length, 
127 | #         minimum, maximum, skewness, and kurtosis.
128 | summary.data <- function(data, input, stat = "all", write_table = F, ...){
129 |   opt_arg = list(...)
130 |   
131 |   if(stat=="all"){
132 |     df = data.frame(Data = input, Mean = mean(data), Variance = var(data), N = length(data), 
133 |                     Min = min(data), Max = max(data), Skewness = skewness(data), Kurtosis = kurtosis(data))
134 |   } else if(stat=="roll_win"){
135 |     if(is.null(opt_arg$roll_win)) stop("Please specify the rolling window length (roll_win) for statistics calculation; otherwise, use stat = 'all'.")
136 |     N.data = length(data)
137 |     roll_win = opt_arg$roll_win
138 |     df = lapply(1:(N.data-roll_win), 
139 |                 function(i){
140 |                   dat = data[i:(i+roll_win-1)]
141 |                   data.frame(Mean = mean(dat), Variance = var(dat), Skewness = skewness(dat), Kurtosis = kurtosis(dat))}
142 |                 )
143 |     df = rbindlist(df, use.names = T, fill = T)
144 |     df = cbind(Data = input, data.frame(t(colMeans(df))))
145 |   }
146 |   
147 |   if(write_table) {
148 |     if(!is.null(opt_arg$filename)){
149 |       filename = opt_arg$filename
150 |     } else filename = paste0("Output/", input, "_stats_", stat, ".csv")
151 |     write.table(df, file = filename, sep = ";", row.names = F)
152 |   }
153 |   return(df)
154 | }
155 | 
156 | 
157 | ## Function: skewness
158 | # Description: Calculate skewness of data.
159 | # Input:  data - A numeric vector from which skewness is calculated.
160 | # Output: A number represents skewness.
161 | skewness <- function(data){
162 |   mean( ( (data-mean(data))/sd(data) )^3 )
163 | }
164 | 
165 | ## Function: kurtosis
166 | # Description: Calculate kurtosis of data.
167 | # Input:  data - A numeric vector from which kurtosis is calculated.
168 | # Output: A number represents kurtosis.
169 | kurtosis <- function(data){
170 |   mean( ( (data-mean(data))/sd(data) )^4 )
171 | }
172 | 
173 | # Utility functions -------------------------------------------------------
174 | 
175 | ## Function: simahead_exclude_inf
176 | # Description: Re-simulate the draws that are non-numerics (NA, NaN) or (-/+)Inf.
177 | # Input:  object, n, m, theta, y as in MSGARCH::simahead. See MSGARCH package manual.
178 | # Output: A list of draws and states, in which all draws are valid numeric.
179 | simahead_exclude_inf <- function(object, n, m, theta, y){
180 |   y = as.matrix(y)
181 |   theta = as.vector(theta)
182 |   draws <- state <- matrix(data = NA, nrow = m, ncol = n)
183 |   
184 |   for(j in 1:n){
185 |     print(paste0("Random draws for time ", j, " out of ", n))
186 |     for(i in 1:m){
187 |       rand = object$rcpp.func$rnd_Rcpp(1, theta, c(y, draws[i, 0:(j-1)]))
188 |       tmp.it = 0
189 |       while(length(which(is.infinite(rand$draws)))+length((which(is.nan(rand$draws)))) > 0) {
190 |         rand = object$rcpp.func$rnd_Rcpp(1, theta, c(y, draws[i, 0:(j-1)]))
191 |         tmp.it = tmp.it + 1
192 |         if(tmp.it > 10000){
193 |           rand$draws = NA
194 |           break
195 |         }
196 |       }
197 |       draws[i,j] = rand$draws
198 |       state[i,j] = rand$state
199 |     }
200 |   }
201 |   
202 |   return(list(draws = draws,
203 |               state = state))
204 | }
205 | 
206 | ## Function: check.VaR
207 | # Description: Check whether the VaR is of the correct form. If not, 
208 | #              e.g. contains NA, NaN, (+/-)Inf, correct it.
209 | # Input:  VaR - A vector or matrix of VaR.
210 | #         tau - A number indicates forecasting steps for VaR prediction.
211 | #         save.out - A logical input indicates whether the corrected VaR is saved. Default as True.
212 | #         input - A string indicates input data, i.e. "DAX", "SP500", or "Nikkei".
213 | #         interval - A numeric vector or number of time index.
214 | # Output: A numeric vector or matrix of corrected VaR.
215 | check.VaR <- function(VaR, tau, save.out = T, input, interval){
216 |   tau = is.numeric(tau)
217 |   
218 |   N.period = nrow(VaR)
219 |   n.fix = find.n.fix(VaR)
220 |   n.err = which(which(abs(VaR) > 100) > N.period)
221 |   n.fix = c(n.fix, n.err)
222 |   
223 |   for(i in n.fix){
224 |     print(paste0("Start fix ", input, ", tau = ", tau, ", n.fix = ", i, " (", which(n.fix == i), " of ", length(n.fix), ")"))
225 |     n.col.fix = find.n.col.fix(i, N.period)
226 |     spec.fix = spec[[colnames(VaR[n.col.fix])]]
227 |     
228 |     n.row.fix = find.n.row.fix(i, N.period)
229 |     
230 |     y = input.data$LogReturn[(n.row.fix-tau+1):(n.row.fix+window_size-tau)]
231 |     
232 |     print(paste0("Fit ", input, ", tau = ", tau, ", n.fix = ", i, " (", which(n.fix == i), " of ", length(n.fix), ")"))
233 |     fit = fit.bayes(spec = spec.fix, y = y, ctr = ctr.bayes)
234 |     
235 |     print(paste0("Start draw ", input, ", tau = ", tau, ", n.fix = ", i, " (", which(n.fix == i), " of ", length(n.fix), ")"))
236 |     draws = simahead_exclude_inf(spec.fix, n = tau, m = N.sim, theta = colMeans(fit$theta), y = y)$draws
237 |     print(paste0("Finish draw ", input, ", tau = ", tau, ", n.fix = ", i, " (", which(n.fix == i), " of ", length(n.fix), ")"))
238 |     VaR[n.row.fix, n.col.fix] = quantile(draws[, tau], na.rm = T, probs = alpha, names = F)
239 |     if(save.out) write.table(VaR, file = paste0("Output/", input, "_VaR_", tau, "_", min(interval), "_", max(interval), ".csv"), sep = ";", row.names = F)
240 |   }
241 |   
242 |   VaR$iteration = NULL
243 |   return(VaR)
244 | }
245 | 
246 | ## Function: find.n.fix
247 | # Description: Find NA, NaN, (+/-)Inf, in the given vector/matrix. 
248 | # Input:  VaR - A vector or matrix of interest.
249 | # Output: A numeric vector or a number indicates the location of NA, NaN, (+/-)Inf.
250 | find.n.fix <- function(VaR){
251 |   n.fix.na = which(is.na(VaR))
252 |   n.fix.inf = which(is.infinite(as.matrix(VaR)))
253 |   n.fix.nan = which(is.nan(as.matrix(VaR)))
254 |   n.fix = c(n.fix.na, n.fix.inf, n.fix.nan)
255 |   return(n.fix)
256 | }
257 | 
258 | ## Function: find.n.col.fix
259 | # Description: Find column of the given location and the total row size.
260 | # Input:  n.fix - A number of location (e.g. output from function "find.n.fix").
261 | #         N.period - A number of total row size of the considered matrix.
262 | # Output: A number indicates the column index, regarding the given location and total row size.
263 | find.n.col.fix <- function(n.fix, N.period){
264 |   n.fix = as.numeric(n.fix)
265 |   N.period = as.numeric(N.period)
266 |   
267 |   return(ceiling(n.fix/N.period))
268 | }
269 | 
270 | ## Function: find.n.row.fix
271 | # Description: Find row of the given location and the total row size.
272 | # Input:  n.fix - A number of location (e.g. output from function "find.n.fix").
273 | #         N.period - A number of total row size of the considered matrix.
274 | # Output: A number indicates the row index, regarding the given location and total row size.
275 | find.n.row.fix <- function(n.fix, N.period){
276 |   n.fix = as.numeric(n.fix)
277 |   N.period = as.numeric(N.period)
278 |   
279 |   n.row.fix = n.fix %% N.period
280 |   if(n.row.fix == 0)  n.row.fix = N.period
281 |   return(n.row.fix)
282 | }
283 | 
284 | ## Function: check.Mat
285 | # Description: Check if the input matrix contains NA, NaN, or (+/-)Inf. 
286 | #              If yes, change NA, NaN to 0 and (+/-)Inf to (+/-)1e6.
287 | # Input:  mat - A matrix.
288 | # Output: A matrix without NA, NaN, (+/-)Inf.
289 | check.Mat <- function(mat){
290 |   n.period = nrow(mat)
291 |   n.fix = find.n.fix(mat)
292 |   if(length(n.fix) > 0){
293 |     n.col.fix = find.n.col.fix(n.fix, n.period)
294 |     n.row.fix = find.n.row.fix(n.fix, n.period)
295 |     
296 |     if(is.na(mat[n.row.fix,n.col.fix]) || is.nan(mat[n.row.fix,n.col.fix])) mat[n.row.fix,n.col.fix] = 0
297 |     if(is.infinite(mat[n.row.fix,n.col.fix])){
298 |       if(mat[n.row.fix,n.col.fix] < 0){mat[n.row.fix,n.col.fix] = -1e6} else{mat[n.row.fix,n.col.fix] = 1e6}
299 |     }
300 |   }
301 |   
302 |   return(mat)
303 | }
304 | 
305 | ## Function: check.alpha
306 | # Description: Check if the input (alpha) is the valid probability: 0<=alpha<=1. 
307 | # Input:  alpha - A number or character indicates (shortfall) probability.
308 | # Output: A number of (shortfall) probability.
309 | check.alpha <- function(alpha){
310 |   alpha = as.numeric(alpha)
311 |   if((alpha < 0) || (alpha > 1)) stop("Input argument (alpha) must be numeric between 0 and 1.")
312 |   return(alpha)
313 | }
314 | 
315 | ## Function: check.hit
316 | # Description: Check if the input vector (hit) contains only 0 or 1 (False or True). 
317 | # Input:  hit - A considered vector.
318 | # Output: A numeric vector consists of 0 or 1.
319 | check.hit <- function(hit){
320 |   hit = as.numeric(hit)
321 |   if(!prod(hit %in% c(0,1))) stop("Input argument (hit) should be a vector consists of only 0 or FALSE or 1 or TRUE.")
322 |   return(hit)
323 | }
324 | 
325 | ## Function: create.outfile
326 | # Description: Create an output file for the criteria tests and VaR forecasting.
327 | # Input:  input - A string indicates input data, i.e. "DAX", "SP500", or "Nikkei".
328 | #         specs - A vector of strings consist of all competitor models.
329 | #         taus - A numeric vector or a number of the forecasting period.
330 | #         interval - A numeric vector or number of time index.
331 | # Output: None.
332 | create.outfile <- function(input, specs, taus, interval){
333 |   taus = as.numeric(taus)
334 |   write.table(matrix(c("test", "iteration", specs), nrow = 1), file = paste0("Output/", input, "_AIC_BIC_", min(interval), "_", max(interval), ".csv"), sep = ";", row.names = F, col.names = F)
335 |   l_ply(taus, function(t) write.table(matrix(c("iteration", specs), nrow = 1), file = paste0("Output/", input, "_VaR_", t, "_", min(interval), "_", max(interval), ".csv"), sep = ";", row.names = F, col.names = F))
336 | }
337 | 


--------------------------------------------------------------------------------