├── img
    ├── images.txt
    ├── RV.PNG
    ├── rt.PNG
    ├── HAR.PNG
    ├── HAR_models.PNG
    ├── modelTest.PNG
    └── inSampleBetas.PNG
├── LICENSE
├── README.md
└── HAR_RV.R


/img/images.txt:
--------------------------------------------------------------------------------
1 | this folder contains the images
2 | 


--------------------------------------------------------------------------------
/img/RV.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacob-hein/HAR-models-forecasting-realized-volatility-in-US-stocks/HEAD/img/RV.PNG


--------------------------------------------------------------------------------
/img/rt.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacob-hein/HAR-models-forecasting-realized-volatility-in-US-stocks/HEAD/img/rt.PNG


--------------------------------------------------------------------------------
/img/HAR.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacob-hein/HAR-models-forecasting-realized-volatility-in-US-stocks/HEAD/img/HAR.PNG


--------------------------------------------------------------------------------
/img/HAR_models.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacob-hein/HAR-models-forecasting-realized-volatility-in-US-stocks/HEAD/img/HAR_models.PNG


--------------------------------------------------------------------------------
/img/modelTest.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacob-hein/HAR-models-forecasting-realized-volatility-in-US-stocks/HEAD/img/modelTest.PNG


--------------------------------------------------------------------------------
/img/inSampleBetas.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacob-hein/HAR-models-forecasting-realized-volatility-in-US-stocks/HEAD/img/inSampleBetas.PNG


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Jacob Hein
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # HAR models forecasting realized volatility in US stocks
  2 | Various heterogenous autoregressive (HAR) models in Bollerslev et al. (2016) implemented in R to forecast the intraday measure of realized volatilty in select US stocks based on high-frequency trading prices.
  3 | 
  4 | ### Data
  5 | Intraday prices of US stocks of Microsoft, McDonalds, J.P. Morgan & Chase, Disney and a S&P500 trust fund (SPY) were extracted at frequencies of 1, 5 and 10 minutes with the Bloomberg Terminal. The time-series span from March 1st 2019 to November 1st 2019,
  6 | 
  7 | ### Returns and realized volatility
  8 | Intraday returns are computed as the change in intraday prices of a given financial asset,
  9 | <img src="img/rt.PNG" width="150" />, the sum of all these intraday returns constitute the realized volatilty (variance if squared) estimate of a given day: 
 10 | 
 11 | <img src="img/RV.PNG" width="200" />
 12 | 
 13 | Related measures like realized quarticity (RQ) and bi-power variance (BPV) that are used by some model variants were also estimated in R
 14 | 
 15 | ```r
 16 | #RV, RQ, BPV Estimator
 17 | estimator <- function(data) {
 18 |   RV_t_estimates = c()
 19 |   RQ_t_estimates = c()
 20 |   BPV_t_estimates = c()
 21 |   RV_t_plus_estimates = c()
 22 |   RV_t_minus_estimates = c()
 23 |   RV_t_dates = c()
 24 |   M = 0 # Intraday obs used in estimation of RV_t
 25 |   
 26 |   for (t in 1:length(data$Dates)) {
 27 |     # t accounts for the final number of daily RV_t estimates
 28 |     RV_t_i_estimates = c() # M number of r_t,i to be summed up
 29 |     M_Q = M # Counter
 30 |     
 31 |     while (substring(data$Dates[t+M], first = 1, last = 5) == substring(data$Dates[t+M+1], first = 1, last = 5)
 32 |            # The below AND condition breaks while-loop when no more intraday obs available
 33 |            & !is.na(data$Dates[t+M+1])) {
 34 |       
 35 |       # Intraday returns 
 36 |       RV_t_i = (data$Open[t+M+1] - data$Open[t+M])
 37 |       RV_t_i_estimates = c(RV_t_i_estimates, RV_t_i)
 38 |       
 39 |       M = M + 1
 40 |     }
 41 |     
 42 |     if (is.na(data$Dates[t+M])) {
 43 |       break # This if-clause breaks for-loop when the eventual NA intraday obs is reached 
 44 |     }
 45 |     
 46 |     RV_t = sum(RV_t_i_estimates^2) #Realized Variance
 47 |     
 48 |     RQ_t = ((M-M_Q)/3) * sum(RV_t_i_estimates^4)
 49 | 
 50 |     BPV_t_i_estimates = c()
 51 |     # i in 1:len(...)-1 corresponds to summing up to M-1 as in Bollerslev (2016)
 52 |     for (i in 1:(length(RV_t_i_estimates)-1)) {
 53 |       BPV_t_i_estimates = c(BPV_t_i_estimates, abs(RV_t_i_estimates[i] * abs(RV_t_i_estimates[i+1])))
 54 |     }
 55 |     BPV_t = (sqrt(2/pi))^(-2) * sum(BPV_t_i_estimates)
 56 |     
 57 |     # RV Plus and RV Minus for SHAR model spec
 58 |     RV_t_plus = sum(RV_t_i_estimates[RV_t_i_estimates > 0]^2)
 59 |     RV_t_minus = sum(RV_t_i_estimates[RV_t_i_estimates < 0]^2)
 60 |     
 61 |     RV_t_estimates = c(RV_t_estimates, RV_t)
 62 |     RQ_t_estimates = c(RQ_t_estimates, RQ_t)
 63 |     BPV_t_estimates = c(BPV_t_estimates, BPV_t)
 64 |     RV_t_plus_estimates = c(RV_t_plus_estimates, RV_t_plus)
 65 |     RV_t_minus_estimates = c(RV_t_minus_estimates, RV_t_minus)
 66 |     
 67 |     # Dates
 68 |     RV_t_date = as.numeric(substring(data$Dates[t+M], first = 1, last = 5))
 69 |     RV_t_dates = c(RV_t_dates, RV_t_date)
 70 |   }
 71 |   
 72 |   RV_df = as.data.frame(RV_t_dates)
 73 |   RV_df = cbind(RV_df, RV_t_estimates, RQ_t_estimates, BPV_t_estimates, RV_t_plus_estimates, RV_t_minus_estimates)
 74 |   colnames(RV_df) = c("Dates", "RV", "RQ", "BPV", "RV_plus", "RV_minus")
 75 |   RV_df$Dates = as.Date(RV_df$Dates, origin = "1899-12-30")
 76 |   return(RV_df)
 77 | }
 78 | ```
 79 | 
 80 | 
 81 | 
 82 | ### HAR models
 83 | The objective is to fit different HAR model variants to an in-sample portion of the time-series, to then predict an out-of-sample test sample. The fundamental HAR model is given: 
 84 | 
 85 | <img src="img/HAR.PNG" width="500" />
 86 | 
 87 | This project includes implementation and tests of model variants that account for error minimizing returns quarticity, leverage effects and jumps in the data.
 88 | 
 89 | The R implementation could have been optimized by vectorizing the computations, it nevertheless runs in a small amount of time:
 90 | 
 91 | ```r
 92 | #Forecast, Errors, Betas & Plots
 93 | HAR <- function(data, out_sample = 96, plot_scalar = 1, extra_plots = FALSE) {
 94 |   
 95 |   RV = data$RV
 96 |   RQ = data$RQ
 97 |   BPV = data$BPV
 98 |   RV_p = data$RV_plus
 99 |   RV_m = data$RV_minus
100 |   
101 |   nobs = length(RV)
102 |   in_sample = nobs - out_sample
103 |   
104 |   outRV = RV[(in_sample+1):(length(RV))] # We +1 to get equal length as out_sample size
105 |   lag = 22 # 22 days lag is equivalent to one month of trading days lag
106 |   
107 |   all_predsA = rep(0, times = out_sample)
108 |   all_preds = rep(0, times = out_sample)
109 |   all_predsQ = rep(0, times = out_sample)
110 |   all_predsF = rep(0, times = out_sample)
111 |   all_predsC = rep(0, times = out_sample)
112 |   all_predsS = rep(0, times = out_sample)
113 |   all_predsJ = rep(0, times = out_sample)
114 |   
115 |   all_betasA = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 4)
116 |   all_betas = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 4)
117 |   all_betasQ = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 5)
118 |   all_betasF = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 7)
119 |   all_betasC = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 4)
120 |   all_betasS = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 5)
121 |   all_betasJ = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 5)
122 |   
123 |   for (t in 1:(out_sample)) {
124 |     # Estimation
125 |     y = RV[(lag + t + 1):(in_sample + t)]
126 |     XA = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 3)
127 |     X = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 3)
128 |     XQ = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 4)
129 |     XF = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 6)
130 |     XC = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 3)
131 |     XS = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 4)
132 |     XJ = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 4)
133 |     
134 |     for (i in 0:(in_sample - lag - 1)) {
135 |       
136 |       # AR(3)
137 |       XA[i+1,1] = RV[(-1+i+1+lag + t)]
138 |       XA[i+1,2] = RV[(-2+i+1+lag + t)]
139 |       XA[i+1,3] = RV[(-3+i+1+lag + t)]
140 |       
141 |       # HAR
142 |       X[i+1,1] = RV[(-1+i+1+lag + t)]
143 |       X[i+1,2] = (1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)])
144 |       X[i+1,3] = (1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)])
145 |       
146 |       # HARQ
147 |       XQ[i+1,1] = RV[(-1+i+1+lag + t)]
148 |       XQ[i+1,2] = (1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)])
149 |       XQ[i+1,3] = (1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)])
150 |       XQ[i+1,4] = (RQ[-1+i+1+lag + t]^(1/2) * RV[(-1+i+1+lag + t)])
151 |       
152 |       # HARQ-F
153 |       XF[i+1,1] = RV[(-1+i+1+lag + t)]
154 |       XF[i+1,2] = (1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)])
155 |       XF[i+1,3] = (1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)])
156 |       XF[i+1,4] = (RQ[-1+i+1+lag + t]^(1/2) * RV[(-1+i+1+lag + t)])
157 |       XF[i+1,5] = (((1/5)*sum(RQ[(-5+i+1+lag + t):(i+lag + t)]))^(1/2) * ((1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)])))
158 |       XF[i+1,6] = (((1/22)*sum(RQ[(-22+i+1+lag + t):(i+lag + t)]))^(1/2) * ((1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)])))
159 |       
160 |       # CHAR
161 |       XC[i+1,1] = BPV[(-1+i+1+lag + t)]
162 |       XC[i+1,2] = (1/5)*sum(BPV[(-5+i+1+lag + t):(i+lag + t)])
163 |       XC[i+1,3] = (1/22)*sum(BPV[(-22+i+1+lag + t):(i+lag + t)])
164 |       
165 |       # SHAR
166 |       XS[i+1,1] = (1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)])
167 |       XS[i+1,2] = (1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)])
168 |       XS[i+1,3] = RV_p[(-1+i+1+lag + t)]
169 |       XS[i+1,4] = RV_m[(-1+i+1+lag + t)]
170 |       
171 |       # HAR-J
172 |       XJ[i+1,1] = RV[(-1+i+1+lag + t)]
173 |       XJ[i+1,2] = (1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)])
174 |       XJ[i+1,3] = (1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)])
175 |       XJ[i+1,4] = max((RV[(-1+i+1+lag + t)] - BPV[(-1+i+1+lag + t)]), 0)
176 |       
177 |     }
178 |     
179 |     
180 |     # R Regression at t=1 for Standard Errors before performing any out-of-sample forecasts
181 |     if (t==out_sample) {
182 |       modelA = lm(y ~ XA)
183 |       model = lm(y ~ X)
184 |       modelQ = lm(y ~ XQ)
185 |       modelF = lm(y ~ XF)
186 |       modelC = lm(y ~ XC)
187 |       modelS = lm(y ~ XS)
188 |       # HAR-J if statement:
189 |       if (sum(XJ[,4]) ==0) {
190 |         XJ[1,4] = 0.1 # Ensure invertibility if XJ singular with zero column
191 |       }
192 |       modelJ = lm(y ~ XJ)
193 |       models_at_t_1 = list("modelA" = modelA, "model" = model, 
194 |                            "modelQ" = modelQ, "modelF" = modelF, 
195 |                            "modelC" = modelC, "modelS" = modelS, 
196 |                            "modelJ" = modelJ)
197 |       
198 |       # Below we retrieve R^2 & Adjusted R^2, prior to out-of-sample forecasts
199 |       num_of_models = 7
200 |       r_squareds = matrix(0, nrow = 2, ncol = num_of_models)
201 |       for (val in 1:length(models_at_t_1)) {
202 |         r_squareds[1,val] = summary(models_at_t_1[[val]])$r.squared
203 |         r_squareds[2,val] = summary(models_at_t_1[[val]])$adj.r.squared
204 |       }
205 |       rownames(r_squareds) = c("R-squared", "Adj.R-squared")
206 |       colnames(r_squareds) = c("AR(3)", "HAR", "HARQ", "HARQ-F", "CHAR", "SHAR", "HAR-J")
207 |       
208 |     }
209 |     
210 |     XA = cbind(rep(1, times = nrow(XA)), XA)
211 |     X = cbind(rep(1, times = nrow(X)), X)
212 |     XQ = cbind(rep(1, times = nrow(XQ)), XQ)
213 |     XF = cbind(rep(1, times = nrow(XF)), XF)
214 |     XC = cbind(rep(1, times = nrow(XC)), XC)
215 |     XS = cbind(rep(1, times = nrow(XS)), XS)
216 |     XJ = cbind(rep(1, times = nrow(XJ)), XJ)
217 |     
218 |     # HAR-J if statement:
219 |     if (sum(XJ[,5]) ==0) {
220 |       XJ[1,5] = 0.1 # Ensure invertibility if XJ singular with zero column
221 |     }
222 |     
223 |     # OLS Regression
224 |     betasA = solve(t(XA) %*% XA) %*% t(XA) %*% y
225 |     betas = solve(t(X) %*% X) %*% t(X) %*% y
226 |     betasQ = solve(t(XQ) %*% XQ) %*% t(XQ) %*% y
227 |     betasF = solve(t(XF) %*% XF) %*% t(XF) %*% y
228 |     betasC = solve(t(XC) %*% XC) %*% t(XC) %*% y
229 |     betasS = solve(t(XS) %*% XS) %*% t(XS) %*% y
230 |     betasJ = solve(t(XJ) %*% XJ) %*% t(XJ) %*% y
231 |     
232 |     b0A = betasA[1]
233 |     b1A = betasA[2]
234 |     b2A = betasA[3]
235 |     b3A = betasA[4]
236 |     
237 |     b0 = betas[1]
238 |     b1 = betas[2]
239 |     b2 = betas[3]
240 |     b3 = betas[4]
241 |     
242 |     b0Q = betasQ[1]
243 |     b1Q = betasQ[2]
244 |     b2Q = betasQ[3]
245 |     b3Q = betasQ[4]
246 |     b1Q_Q = betasQ[5]
247 |     
248 |     b0F = betasF[1]
249 |     b1F = betasF[2]
250 |     b2F = betasF[3]
251 |     b3F = betasF[4]
252 |     b1F_Q = betasF[5]
253 |     b2F_Q = betasF[6]
254 |     b3F_Q = betasF[7]
255 |     
256 |     b0C = betasC[1]
257 |     b1C = betasC[2]
258 |     b2C = betasC[3]
259 |     b3C = betasC[4]
260 |     
261 |     b0S = betasS[1]
262 |     b1S_P = betasS[2]
263 |     b2S = betasS[3]
264 |     b3S = betasS[4]
265 |     b1S_M = betasS[5]
266 |     
267 |     b0J = betasJ[1]
268 |     b1J = betasJ[2]
269 |     b2J = betasJ[3]
270 |     b3J = betasJ[4]
271 |     bJ = betasJ[5]
272 |     
273 |     all_betasA[t,1] = b0A
274 |     all_betasA[t,2] = b1A
275 |     all_betasA[t,3] = b2A
276 |     all_betasA[t,4] = b3A
277 |     
278 |     all_betas[t,1] = b0
279 |     all_betas[t,2] = b1
280 |     all_betas[t,3] = b2
281 |     all_betas[t,4] = b3
282 |     
283 |     all_betasQ[t,1] = b0Q
284 |     all_betasQ[t,2] = b1Q
285 |     all_betasQ[t,3] = b2Q
286 |     all_betasQ[t,4] = b3Q
287 |     all_betasQ[t,5] = b1Q_Q
288 |     
289 |     all_betasF[t,1] = b0F
290 |     all_betasF[t,2] = b1F
291 |     all_betasF[t,3] = b2F
292 |     all_betasF[t,4] = b3F
293 |     all_betasF[t,5] = b1F_Q
294 |     all_betasF[t,6] = b2F_Q
295 |     all_betasF[t,7] = b3F_Q
296 |     
297 |     all_betasC[t,1] = b0C
298 |     all_betasC[t,2] = b1C
299 |     all_betasC[t,3] = b2C
300 |     all_betasC[t,4] = b3C
301 |     
302 |     all_betasS[t,1] = b0S
303 |     all_betasS[t,2] = b1S_P
304 |     all_betasS[t,3] = b2S
305 |     all_betasS[t,4] = b3S
306 |     all_betasS[t,5] = b1S_M
307 |     
308 |     all_betasJ[t,1] = b0J
309 |     all_betasJ[t,2] = b1J
310 |     all_betasJ[t,3] = b2J
311 |     all_betasJ[t,4] = b3J
312 |     all_betasJ[t,5] = bJ
313 |     
314 |     
315 |     
316 |     # Prediction at time-step t
317 |     predA = b0A + b1A*XA[nrow(XA),2] + b2A*XA[nrow(XA),3] + b3A*XA[nrow(XA),4]
318 |     pred = b0 + b1*X[nrow(X),2] + b2*X[nrow(X),3] + b3*X[nrow(X),4]
319 |     predQ = b0Q + b1Q*XQ[nrow(XQ),2] + b2Q*XQ[nrow(XQ),3] + b3Q*XQ[nrow(XQ),4] + b1Q_Q*XQ[nrow(XQ),5]
320 |     predF = b0F + b1F*XF[nrow(XF),2] + b2F*XF[nrow(XF),3] + b3F*XF[nrow(XF),4] + b1F_Q*XF[nrow(XF),5] + b2F_Q*XF[nrow(XF),6] + b3F_Q*XF[nrow(XF),7]
321 |     predC = b0C + b1C*XC[nrow(XC),2] + b2C*XC[nrow(XC),3] + b3C*XC[nrow(XC),4]
322 |     predS = b0S + b1S_P*XS[nrow(XS),2] + b2S*XS[nrow(XS),3] + b3S*XS[nrow(XS),4] + b1S_M*XS[nrow(XS),5]
323 |     predJ = b0J + b1J*XJ[nrow(XJ),2] + b2J*XJ[nrow(XJ),3] + b3J*XJ[nrow(XJ),4] + bJ*XJ[nrow(XJ),5]
324 |     
325 |     # Saving time-step t prediction within list for error computations
326 |     all_predsA[t] = predA
327 |     all_preds[t] = pred
328 |     all_predsQ[t] = predQ
329 |     all_predsF[t] = predF
330 |     all_predsC[t] = predC
331 |     all_predsS[t] = predS
332 |     all_predsJ[t] = predJ
333 |     
334 |   }
335 |   
336 |   # Error Computations: Mean Squared Error & Mean Absolute Error
337 |   AR_mse = mean((outRV - all_predsA)^2)
338 |   AR_mae = mean(abs(outRV - all_predsA))
339 |   
340 |   HAR_mse = mean((outRV - all_preds)^2)
341 |   HAR_mae = mean(abs(outRV - all_preds))
342 |   
343 |   HARQ_mse = mean((outRV - all_predsQ)^2)
344 |   HARQ_mae = mean(abs(outRV - all_predsQ))
345 |   
346 |   HARQF_mse = mean((outRV - all_predsF)^2)
347 |   HARQF_mae = mean(abs(outRV - all_predsF))
348 |   
349 |   CHAR_mse = mean((outRV - all_predsC)^2)
350 |   CHAR_mae = mean(abs(outRV - all_predsC))
351 |   
352 |   SHAR_mse = mean((outRV - all_predsS)^2)
353 |   SHAR_mae = mean(abs(outRV - all_predsS))
354 |   
355 |   HARJ_mse = mean((outRV - all_predsJ)^2)
356 |   HARJ_mae = mean(abs(outRV - all_predsJ))
357 |   
358 |   
359 |   errors_list = list("AR_mse" = AR_mse, "AR_mae" = AR_mae, "HAR_mse" = HAR_mse, "HAR_mae" = HAR_mae,
360 |                 "HARQ_mse" = HARQ_mse, "HARQ_mae" = HARQ_mae, "HARQF_mse" = HARQF_mse, "HARQF_mae" = HARQF_mae,
361 |                 "CHAR_mse" = CHAR_mse, "CHAR_mae" = CHAR_mae, "SHAR_mse" = SHAR_mse, "SHAR_mae" = SHAR_mae,
362 |                 "HARJ_mse" = HARJ_mse, "HARJ_mae" = HARJ_mae)
363 |   
364 |   
365 |   # Output formatting: 
366 |   output = matrix(outRV)
367 |   out_sample_dates = data$Date[(in_sample+1):length(data$Date)] 
368 |   output = cbind(out_sample_dates, output, all_predsA, all_preds, all_predsQ, all_predsF, all_predsC, all_predsS, all_predsJ)
369 |   output_df = as.data.frame(output)
370 |   colnames(output_df) = c("Date", "outRV", "all_predsA", "all_preds", "all_predsQ", "all_predsF", "all_predsC", "all_predsS", "all_predsJ")
371 |   betas_list = list("all_betasA" = all_betasA, "all_betas" = all_betas, "all_betasQ" = all_betasQ, 
372 |                     "all_betasF" = all_betasF, "all_betasC" = all_betasC, "all_betasS" = all_betasS, 
373 |                     "all_betasJ" = all_betasJ)
374 |   
375 |   output_df_errors_betas = list(output_df, errors_list, betas_list, models_at_t_1, r_squareds)
376 |   
377 |   return(output_df_errors_betas)
378 | }
379 | ```
380 | 
381 | 
382 | ### Beta coefficients estimated in-sample
383 | 
384 | <img src="img/inSampleBetas.PNG" width="650" />
385 | 
386 | ### Out-of-sample forecasting
387 | 
388 | <img src="img/HAR_models.PNG" width="950" />
389 | 
390 | <img src="img/modelTest.PNG" width="950" />
391 | 
392 | ### Project conclusion
393 | 
394 | HAR-type models fits nicely to measures representing intraday price movements. The out-of-sample forecast results suggest that the HARQ-F model outperforms all other variants along with a simple AR(3) model. 
395 | 
396 | The HARQ-F includes a realized quarticity measure to minimze error, which may contribute to the superior forecasting ability of this model variant. 
397 | 
398 | Time-series extended further back in time would be desirable for both in-sample coefficient estimations and out-of-sample forecasting, however the limitation of readily available intra-day price data prohibited a longer time horizon in this project. 
399 | 


--------------------------------------------------------------------------------
/HAR_RV.R:
--------------------------------------------------------------------------------
  1 | # install.packages("rstudioapi")
  2 | # install.packages("rJava")
  3 | # install.packages("readxl")
  4 | # install.packages("xts")
  5 | # install.packages("sandwich")
  6 | # install.packages("lmtest")
  7 | # install.packages("xlsx")
  8 | # install.packages("e1071")
  9 | # install.packages("stringr")
 10 | # install.packages("rlang")
 11 | # install.packages(ggplot2)
 12 | 
 13 | setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # Set working directive to current folder
 14 | options(scipen=999) # No scientific floating points notation
 15 | 
 16 | library(xts) # Time-indexed data frames, ideal for plots 
 17 | library(readxl) # Reading data
 18 | library(xlsx) # Exporting results
 19 | library(e1071) # Skew & Kurtosis for Summary Statistics Table
 20 | library(stringr) # String formatting
 21 | library(rlang)
 22 | library(ggplot2) # Plots
 23 | theme_set(theme_minimal()) # Plot Window
 24 | ###############
 25 | # For Robust Standard Errors
 26 | library(lmtest)
 27 | library(sandwich)
 28 | ###############
 29 | 
 30 | #Forecast, Errors, Betas & Plots
 31 | HAR <- function(data, out_sample = 96, plot_scalar = 1, extra_plots = FALSE) {
 32 |   
 33 |   RV = data$RV
 34 |   RQ = data$RQ
 35 |   BPV = data$BPV
 36 |   RV_p = data$RV_plus
 37 |   RV_m = data$RV_minus
 38 |   
 39 |   nobs = length(RV)
 40 |   in_sample = nobs - out_sample
 41 |   
 42 |   outRV = RV[(in_sample+1):(length(RV))] # We +1 to get equal length as out_sample size
 43 |   lag = 22 # 22 days lag is equivalent to one month of trading days lag
 44 |   
 45 |   all_predsA = rep(0, times = out_sample)
 46 |   all_preds = rep(0, times = out_sample)
 47 |   all_predsQ = rep(0, times = out_sample)
 48 |   all_predsF = rep(0, times = out_sample)
 49 |   all_predsC = rep(0, times = out_sample)
 50 |   all_predsS = rep(0, times = out_sample)
 51 |   all_predsJ = rep(0, times = out_sample)
 52 |   
 53 |   all_betasA = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 4)
 54 |   all_betas = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 4)
 55 |   all_betasQ = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 5)
 56 |   all_betasF = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 7)
 57 |   all_betasC = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 4)
 58 |   all_betasS = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 5)
 59 |   all_betasJ = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 5)
 60 |   
 61 |   for (t in 1:(out_sample)) {
 62 |     # Estimation
 63 |     y = RV[(lag + t + 1):(in_sample + t)]
 64 |     XA = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 3)
 65 |     X = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 3)
 66 |     XQ = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 4)
 67 |     XF = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 6)
 68 |     XC = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 3)
 69 |     XS = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 4)
 70 |     XJ = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 4)
 71 |     
 72 |     for (i in 0:(in_sample - lag - 1)) {
 73 |       
 74 |       # AR(3)
 75 |       XA[i+1,1] = RV[(-1+i+1+lag + t)]
 76 |       XA[i+1,2] = RV[(-2+i+1+lag + t)]
 77 |       XA[i+1,3] = RV[(-3+i+1+lag + t)]
 78 |       
 79 |       # HAR
 80 |       X[i+1,1] = RV[(-1+i+1+lag + t)]
 81 |       X[i+1,2] = (1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)])
 82 |       X[i+1,3] = (1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)])
 83 |       
 84 |       # HARQ
 85 |       XQ[i+1,1] = RV[(-1+i+1+lag + t)]
 86 |       XQ[i+1,2] = (1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)])
 87 |       XQ[i+1,3] = (1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)])
 88 |       XQ[i+1,4] = (RQ[-1+i+1+lag + t]^(1/2) * RV[(-1+i+1+lag + t)])
 89 |       
 90 |       # HARQ-F
 91 |       XF[i+1,1] = RV[(-1+i+1+lag + t)]
 92 |       XF[i+1,2] = (1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)])
 93 |       XF[i+1,3] = (1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)])
 94 |       XF[i+1,4] = (RQ[-1+i+1+lag + t]^(1/2) * RV[(-1+i+1+lag + t)])
 95 |       XF[i+1,5] = (((1/5)*sum(RQ[(-5+i+1+lag + t):(i+lag + t)]))^(1/2) * ((1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)])))
 96 |       XF[i+1,6] = (((1/22)*sum(RQ[(-22+i+1+lag + t):(i+lag + t)]))^(1/2) * ((1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)])))
 97 |       
 98 |       # CHAR
 99 |       XC[i+1,1] = BPV[(-1+i+1+lag + t)]
100 |       XC[i+1,2] = (1/5)*sum(BPV[(-5+i+1+lag + t):(i+lag + t)])
101 |       XC[i+1,3] = (1/22)*sum(BPV[(-22+i+1+lag + t):(i+lag + t)])
102 |       
103 |       # SHAR
104 |       XS[i+1,1] = (1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)])
105 |       XS[i+1,2] = (1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)])
106 |       XS[i+1,3] = RV_p[(-1+i+1+lag + t)]
107 |       XS[i+1,4] = RV_m[(-1+i+1+lag + t)]
108 |       
109 |       # HAR-J
110 |       XJ[i+1,1] = RV[(-1+i+1+lag + t)]
111 |       XJ[i+1,2] = (1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)])
112 |       XJ[i+1,3] = (1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)])
113 |       XJ[i+1,4] = max((RV[(-1+i+1+lag + t)] - BPV[(-1+i+1+lag + t)]), 0)
114 |       
115 |     }
116 |     
117 |     
118 |     # R Regression at t=1 for Standard Errors before performing any out-of-sample forecasts
119 |     if (t==out_sample) {
120 |       modelA = lm(y ~ XA)
121 |       model = lm(y ~ X)
122 |       modelQ = lm(y ~ XQ)
123 |       modelF = lm(y ~ XF)
124 |       modelC = lm(y ~ XC)
125 |       modelS = lm(y ~ XS)
126 |       # HAR-J if statement:
127 |       if (sum(XJ[,4]) ==0) {
128 |         XJ[1,4] = 0.1 # Ensure invertibility if XJ singular with zero column
129 |       }
130 |       modelJ = lm(y ~ XJ)
131 |       models_at_t_1 = list("modelA" = modelA, "model" = model, 
132 |                            "modelQ" = modelQ, "modelF" = modelF, 
133 |                            "modelC" = modelC, "modelS" = modelS, 
134 |                            "modelJ" = modelJ)
135 |       
136 |       # Below we retrieve R^2 & Adjusted R^2, prior to out-of-sample forecasts
137 |       num_of_models = 7
138 |       r_squareds = matrix(0, nrow = 2, ncol = num_of_models)
139 |       for (val in 1:length(models_at_t_1)) {
140 |         r_squareds[1,val] = summary(models_at_t_1[[val]])$r.squared
141 |         r_squareds[2,val] = summary(models_at_t_1[[val]])$adj.r.squared
142 |       }
143 |       rownames(r_squareds) = c("R-squared", "Adj.R-squared")
144 |       colnames(r_squareds) = c("AR(3)", "HAR", "HARQ", "HARQ-F", "CHAR", "SHAR", "HAR-J")
145 |       
146 |     }
147 |     
148 |     XA = cbind(rep(1, times = nrow(XA)), XA)
149 |     X = cbind(rep(1, times = nrow(X)), X)
150 |     XQ = cbind(rep(1, times = nrow(XQ)), XQ)
151 |     XF = cbind(rep(1, times = nrow(XF)), XF)
152 |     XC = cbind(rep(1, times = nrow(XC)), XC)
153 |     XS = cbind(rep(1, times = nrow(XS)), XS)
154 |     XJ = cbind(rep(1, times = nrow(XJ)), XJ)
155 |     
156 |     # HAR-J if statement:
157 |     if (sum(XJ[,5]) ==0) {
158 |       XJ[1,5] = 0.1 # Ensure invertibility if XJ singular with zero column
159 |     }
160 |     
161 |     # OLS Regression
162 |     betasA = solve(t(XA) %*% XA) %*% t(XA) %*% y
163 |     betas = solve(t(X) %*% X) %*% t(X) %*% y
164 |     betasQ = solve(t(XQ) %*% XQ) %*% t(XQ) %*% y
165 |     betasF = solve(t(XF) %*% XF) %*% t(XF) %*% y
166 |     betasC = solve(t(XC) %*% XC) %*% t(XC) %*% y
167 |     betasS = solve(t(XS) %*% XS) %*% t(XS) %*% y
168 |     betasJ = solve(t(XJ) %*% XJ) %*% t(XJ) %*% y
169 |     
170 |     b0A = betasA[1]
171 |     b1A = betasA[2]
172 |     b2A = betasA[3]
173 |     b3A = betasA[4]
174 |     
175 |     b0 = betas[1]
176 |     b1 = betas[2]
177 |     b2 = betas[3]
178 |     b3 = betas[4]
179 |     
180 |     b0Q = betasQ[1]
181 |     b1Q = betasQ[2]
182 |     b2Q = betasQ[3]
183 |     b3Q = betasQ[4]
184 |     b1Q_Q = betasQ[5]
185 |     
186 |     b0F = betasF[1]
187 |     b1F = betasF[2]
188 |     b2F = betasF[3]
189 |     b3F = betasF[4]
190 |     b1F_Q = betasF[5]
191 |     b2F_Q = betasF[6]
192 |     b3F_Q = betasF[7]
193 |     
194 |     b0C = betasC[1]
195 |     b1C = betasC[2]
196 |     b2C = betasC[3]
197 |     b3C = betasC[4]
198 |     
199 |     b0S = betasS[1]
200 |     b1S_P = betasS[2]
201 |     b2S = betasS[3]
202 |     b3S = betasS[4]
203 |     b1S_M = betasS[5]
204 |     
205 |     b0J = betasJ[1]
206 |     b1J = betasJ[2]
207 |     b2J = betasJ[3]
208 |     b3J = betasJ[4]
209 |     bJ = betasJ[5]
210 |     
211 |     all_betasA[t,1] = b0A
212 |     all_betasA[t,2] = b1A
213 |     all_betasA[t,3] = b2A
214 |     all_betasA[t,4] = b3A
215 |     
216 |     all_betas[t,1] = b0
217 |     all_betas[t,2] = b1
218 |     all_betas[t,3] = b2
219 |     all_betas[t,4] = b3
220 |     
221 |     all_betasQ[t,1] = b0Q
222 |     all_betasQ[t,2] = b1Q
223 |     all_betasQ[t,3] = b2Q
224 |     all_betasQ[t,4] = b3Q
225 |     all_betasQ[t,5] = b1Q_Q
226 |     
227 |     all_betasF[t,1] = b0F
228 |     all_betasF[t,2] = b1F
229 |     all_betasF[t,3] = b2F
230 |     all_betasF[t,4] = b3F
231 |     all_betasF[t,5] = b1F_Q
232 |     all_betasF[t,6] = b2F_Q
233 |     all_betasF[t,7] = b3F_Q
234 |     
235 |     all_betasC[t,1] = b0C
236 |     all_betasC[t,2] = b1C
237 |     all_betasC[t,3] = b2C
238 |     all_betasC[t,4] = b3C
239 |     
240 |     all_betasS[t,1] = b0S
241 |     all_betasS[t,2] = b1S_P
242 |     all_betasS[t,3] = b2S
243 |     all_betasS[t,4] = b3S
244 |     all_betasS[t,5] = b1S_M
245 |     
246 |     all_betasJ[t,1] = b0J
247 |     all_betasJ[t,2] = b1J
248 |     all_betasJ[t,3] = b2J
249 |     all_betasJ[t,4] = b3J
250 |     all_betasJ[t,5] = bJ
251 |     
252 |     
253 |     
254 |     # Prediction at time-step t
255 |     predA = b0A + b1A*XA[nrow(XA),2] + b2A*XA[nrow(XA),3] + b3A*XA[nrow(XA),4]
256 |     pred = b0 + b1*X[nrow(X),2] + b2*X[nrow(X),3] + b3*X[nrow(X),4]
257 |     predQ = b0Q + b1Q*XQ[nrow(XQ),2] + b2Q*XQ[nrow(XQ),3] + b3Q*XQ[nrow(XQ),4] + b1Q_Q*XQ[nrow(XQ),5]
258 |     predF = b0F + b1F*XF[nrow(XF),2] + b2F*XF[nrow(XF),3] + b3F*XF[nrow(XF),4] + b1F_Q*XF[nrow(XF),5] + b2F_Q*XF[nrow(XF),6] + b3F_Q*XF[nrow(XF),7]
259 |     predC = b0C + b1C*XC[nrow(XC),2] + b2C*XC[nrow(XC),3] + b3C*XC[nrow(XC),4]
260 |     predS = b0S + b1S_P*XS[nrow(XS),2] + b2S*XS[nrow(XS),3] + b3S*XS[nrow(XS),4] + b1S_M*XS[nrow(XS),5]
261 |     predJ = b0J + b1J*XJ[nrow(XJ),2] + b2J*XJ[nrow(XJ),3] + b3J*XJ[nrow(XJ),4] + bJ*XJ[nrow(XJ),5]
262 |     
263 |     # Saving time-step t prediction within list for error computations
264 |     all_predsA[t] = predA
265 |     all_preds[t] = pred
266 |     all_predsQ[t] = predQ
267 |     all_predsF[t] = predF
268 |     all_predsC[t] = predC
269 |     all_predsS[t] = predS
270 |     all_predsJ[t] = predJ
271 |     
272 |   }
273 |   
274 |   # Error Computations: Mean Squared Error & Mean Absolute Error
275 |   AR_mse = mean((outRV - all_predsA)^2)
276 |   AR_mae = mean(abs(outRV - all_predsA))
277 |   
278 |   HAR_mse = mean((outRV - all_preds)^2)
279 |   HAR_mae = mean(abs(outRV - all_preds))
280 |   
281 |   HARQ_mse = mean((outRV - all_predsQ)^2)
282 |   HARQ_mae = mean(abs(outRV - all_predsQ))
283 |   
284 |   HARQF_mse = mean((outRV - all_predsF)^2)
285 |   HARQF_mae = mean(abs(outRV - all_predsF))
286 |   
287 |   CHAR_mse = mean((outRV - all_predsC)^2)
288 |   CHAR_mae = mean(abs(outRV - all_predsC))
289 |   
290 |   SHAR_mse = mean((outRV - all_predsS)^2)
291 |   SHAR_mae = mean(abs(outRV - all_predsS))
292 |   
293 |   HARJ_mse = mean((outRV - all_predsJ)^2)
294 |   HARJ_mae = mean(abs(outRV - all_predsJ))
295 |   
296 |   
297 |   errors_list = list("AR_mse" = AR_mse, "AR_mae" = AR_mae, "HAR_mse" = HAR_mse, "HAR_mae" = HAR_mae,
298 |                 "HARQ_mse" = HARQ_mse, "HARQ_mae" = HARQ_mae, "HARQF_mse" = HARQF_mse, "HARQF_mae" = HARQF_mae,
299 |                 "CHAR_mse" = CHAR_mse, "CHAR_mae" = CHAR_mae, "SHAR_mse" = SHAR_mse, "SHAR_mae" = SHAR_mae,
300 |                 "HARJ_mse" = HARJ_mse, "HARJ_mae" = HARJ_mae)
301 |   
302 |   # Plots
303 |   #plot_scalar = 100
304 |   if (plot_scalar == 1) {
305 |     ylab_str = "Realized Variance"
306 |   } else {
307 |     ylab_str = paste("Realized Var. (Axis scaled by a factor of ", plot_scalar, ")", sep="")
308 |   }
309 |   
310 |   plotDates = DataSet$Dates[(length(DataSet$Dates)-out_sample+1):length(DataSet$Dates)]
311 |   
312 |   plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
313 |   lines(plot_scalar*all_predsA ~ plotDates, col="darkred")
314 |   legend("topright", legend=c("Actual RV", "AR(3)"), col=c("blue", "darkred"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1)
315 |   plot_path = paste(getwd(), "/OutSampPlots/AR(3) R Plot.pdf", sep="")
316 |   dev.copy(pdf, plot_path)
317 |   #plot_path_png = paste(substr(plot_path, 1, nchar(plot_path)-4), ".png", sep="")
318 |   dev.off()
319 |   plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
320 |   lines(plot_scalar*all_preds ~ plotDates, col="red")
321 |   legend("topright", legend=c("Actual RV", "HAR"), col=c("blue", "red"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1)
322 |   plot_path = paste(getwd(), "/OutSampPlots/HAR R Plot.pdf", sep="")
323 |   dev.copy(pdf, plot_path)
324 |   dev.off()
325 |   plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
326 |   lines(plot_scalar*all_predsQ ~ plotDates, col="green")
327 |   legend("topright", legend=c("Actual RV", "HARQ"), col=c("blue", "green"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1)
328 |   plot_path = paste(getwd(), "/OutSampPlots/HARQ R Plot.pdf", sep="")
329 |   dev.copy(pdf, plot_path)
330 |   dev.off()
331 |   plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
332 |   lines(plot_scalar*all_predsF ~ plotDates, col="darkgreen")
333 |   legend("topright", legend=c("Actual RV", "HARQ-F"), col=c("blue", "darkgreen"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1)
334 |   plot_path = paste(getwd(), "/OutSampPlots/HARQ-F R Plot.pdf", sep="")
335 |   dev.copy(pdf, plot_path)
336 |   dev.off()
337 |   plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
338 |   lines(plot_scalar*all_predsC ~ plotDates, col="black")
339 |   legend("topright", legend=c("Actual RV", "CHAR"), col=c("blue", "black"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1)
340 |   plot_path = paste(getwd(), "/OutSampPlots/CHAR R Plot.pdf", sep="")
341 |   dev.copy(pdf, plot_path)
342 |   dev.off()
343 |   plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
344 |   lines(plot_scalar*all_predsS ~ plotDates, col="maroon")
345 |   legend("topright", legend=c("Actual RV", "SHAR"), col=c("blue", "maroon"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1)
346 |   plot_path = paste(getwd(), "/OutSampPlots/SHAR R Plot.pdf", sep="")
347 |   dev.copy(pdf, plot_path)
348 |   dev.off()
349 |   plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
350 |   lines(plot_scalar*all_predsJ ~ plotDates, col="orange")
351 |   legend("topright", legend=c("Actual RV", "HAR-J"), col=c("blue", "orange"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1)
352 |   plot_path = paste(getwd(), "/OutSampPlots/HAR-J R Plot.pdf", sep="")
353 |   dev.copy(pdf, plot_path)
354 |   dev.off()
355 |   
356 |   if (extra_plots == TRUE) {
357 |     # Plotting All models in one 
358 |     plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="All Models ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
359 |     lines(plot_scalar*all_predsA ~ plotDates, col="darkred")
360 |     #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="HAR ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
361 |     lines(plot_scalar*all_preds ~ plotDates, col="red")
362 |     #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="HARQ ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
363 |     lines(plot_scalar*all_predsQ ~ plotDates, col="green")
364 |     #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="HARQ-F ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
365 |     lines(plot_scalar*all_predsF ~ plotDates, col="darkgreen")
366 |     #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="CHAR ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
367 |     lines(plot_scalar*all_predsC ~ plotDates, col="orange")
368 |     #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="SHAR ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
369 |     lines(plot_scalar*all_predsS ~ plotDates, col="maroon")
370 |     #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="HAR-J ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
371 |     lines(plot_scalar*all_predsJ ~ plotDates, col="black")
372 |     plot_path = paste(getwd(), "/OutSampPlots/AllModels R Plot.pdf", sep="")
373 |     dev.copy(pdf, plot_path)
374 |     dev.off()
375 |     
376 |   }
377 |   
378 |   par(mfrow=c(2,2))
379 |   plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
380 |   #lines(plot_scalar*all_predsA ~ plotDates, col="darkred")
381 |   #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="HAR ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
382 |   lines(plot_scalar*all_preds ~ plotDates, col="red")
383 |   legend("topright", legend=c("Actual RV", "HAR"), col=c("blue", "red"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1, bty = "n")
384 |   
385 |   plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
386 |   lines(plot_scalar*all_predsJ ~ plotDates, col="orange")
387 |   legend("topright", legend=c("Actual RV", "HAR-J"), col=c("blue", "orange"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1, bty = "n")
388 |   
389 |   plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
390 |   #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="HARQ ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
391 |   lines(plot_scalar*all_predsQ ~ plotDates, col="green")
392 |   legend("topright", legend=c("Actual RV", "HARQ"), col=c("blue", "green"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1, bty = "n")
393 |   
394 |  
395 |   plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
396 |   lines(plot_scalar*all_predsF ~ plotDates, col="darkgreen")
397 |   legend("topright", legend=c("Actual RV", "HARQ-F"), col=c("blue", "darkgreen"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1, bty = "n")
398 |   #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="CHAR ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
399 |   #lines(plot_scalar*all_predsC ~ plotDates, col="orange")
400 |   #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="SHAR ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
401 |   #lines(plot_scalar*all_predsS ~ plotDates, col="maroon")
402 |   #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="HAR-J ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
403 |   plot_path = paste(getwd(), "/OutSampPlots/FOUR_R_Plots.pdf", sep="")
404 |   dev.copy(pdf, plot_path)
405 |   dev.off()
406 |   
407 |   par(mfrow=c(2,2))
408 |   plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
409 |   lines(plot_scalar*all_predsC ~ plotDates, col="black")
410 |   legend("topright", legend=c("Actual RV", "CHAR"), col=c("blue", "black"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1, bty = "n")
411 |   
412 |   plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
413 |   lines(plot_scalar*all_predsS ~ plotDates, col="maroon")
414 |   legend("topright", legend=c("Actual RV", "SHAR"), col=c("blue", "maroon"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1, bty = "n")
415 |   
416 |   plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
417 |   lines(plot_scalar*all_predsC ~ plotDates, col="black")
418 |   legend("topright", legend=c("Actual RV", "CHAR"), col=c("blue", "black"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1, bty = "n")
419 |   
420 |   plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
421 |   lines(plot_scalar*all_predsS ~ plotDates, col="maroon")
422 |   legend("topright", legend=c("Actual RV", "SHAR"), col=c("blue", "maroon"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1, bty = "n")
423 |   
424 |   plot_path = paste(getwd(), "/OutSampPlots/TWO_R_Plots.pdf", sep="")
425 |   dev.copy(pdf, plot_path)
426 |   dev.off() 
427 |   
428 |   
429 |   # Output formatting: 
430 |   output = matrix(outRV)
431 |   out_sample_dates = data$Date[(in_sample+1):length(data$Date)] 
432 |   output = cbind(out_sample_dates, output, all_predsA, all_preds, all_predsQ, all_predsF, all_predsC, all_predsS, all_predsJ)
433 |   output_df = as.data.frame(output)
434 |   colnames(output_df) = c("Date", "outRV", "all_predsA", "all_preds", "all_predsQ", "all_predsF", "all_predsC", "all_predsS", "all_predsJ")
435 |   betas_list = list("all_betasA" = all_betasA, "all_betas" = all_betas, "all_betasQ" = all_betasQ, 
436 |                     "all_betasF" = all_betasF, "all_betasC" = all_betasC, "all_betasS" = all_betasS, 
437 |                     "all_betasJ" = all_betasJ)
438 |   
439 |   output_df_errors_betas = list(output_df, errors_list, betas_list, models_at_t_1, r_squareds)
440 |   
441 |   return(output_df_errors_betas)
442 | }
443 | 
444 | #RV, RQ, BPV Estimator
445 | estimator <- function(data) {
446 |   RV_t_estimates = c()
447 |   RQ_t_estimates = c()
448 |   BPV_t_estimates = c()
449 |   RV_t_plus_estimates = c()
450 |   RV_t_minus_estimates = c()
451 |   RV_t_dates = c()
452 |   M = 0 # Intraday obs used in estimation of RV_t
453 |   
454 |   for (t in 1:length(data$Dates)) {
455 |     # t accounts for the final number of daily RV_t estimates
456 |     RV_t_i_estimates = c() # M number of r_t,i to be summed up
457 |     M_Q = M # Counter
458 |     
459 |     while (substring(data$Dates[t+M], first = 1, last = 5) == substring(data$Dates[t+M+1], first = 1, last = 5)
460 |            # The below AND condition breaks while-loop when no more intraday obs available
461 |            & !is.na(data$Dates[t+M+1])) {
462 |       
463 |       # Intraday returns 
464 |       RV_t_i = (data$Open[t+M+1] - data$Open[t+M])
465 |       RV_t_i_estimates = c(RV_t_i_estimates, RV_t_i)
466 |       
467 |       M = M + 1
468 |     }
469 |     
470 |     if (is.na(data$Dates[t+M])) {
471 |       break # This if-clause breaks for-loop when the eventual NA intraday obs is reached 
472 |     }
473 |     
474 |     RV_t = sum(RV_t_i_estimates^2) #Realized Variance
475 |     #RV_t = sqrt(RV_t) #Realized Volatilty
476 |     
477 |     RQ_t = ((M-M_Q)/3) * sum(RV_t_i_estimates^4)
478 |     #RQ_t = sqrt(RQ_t)
479 |     
480 |     # Bi-Power Variance: |r_t,i||r_t,i+1|
481 |     
482 |     ############### Faulty one line of code below:
483 |     # Notice [-length(RV_....)] ensures final element is left out.
484 |     # BPV_t = (sqrt(2/pi))^(-2) * (sum(abs(RV_t_i_estimates[-length(RV_t_i_estimates)]))*sum(abs(RV_t_i_estimates)))
485 |     ############### Line above discarded as wrong implementation
486 |     
487 |     BPV_t_i_estimates = c()
488 |     # i in 1:len(...)-1 corresponds to summing up to M-1 as in Bollerslev (2016)
489 |     for (i in 1:(length(RV_t_i_estimates)-1)) {
490 |       BPV_t_i_estimates = c(BPV_t_i_estimates, abs(RV_t_i_estimates[i] * abs(RV_t_i_estimates[i+1])))
491 |     }
492 |     BPV_t = (sqrt(2/pi))^(-2) * sum(BPV_t_i_estimates)
493 |     #BPV = sqrt(BPV_t)
494 |     
495 |     # RV Plus and RV Minus for SHAR model spec
496 |     RV_t_plus = sum(RV_t_i_estimates[RV_t_i_estimates > 0]^2)
497 |     #RV_t_plus = sqrt(RV_t_plus)
498 |     RV_t_minus = sum(RV_t_i_estimates[RV_t_i_estimates < 0]^2)
499 |     #RV_t_minus = (-1)*sqrt(abs(RV_t_minus)) # Abs value and multiply by -1 to avoid sqrt'ing negative numbers
500 |     
501 |     RV_t_estimates = c(RV_t_estimates, RV_t)
502 |     RQ_t_estimates = c(RQ_t_estimates, RQ_t)
503 |     BPV_t_estimates = c(BPV_t_estimates, BPV_t)
504 |     RV_t_plus_estimates = c(RV_t_plus_estimates, RV_t_plus)
505 |     RV_t_minus_estimates = c(RV_t_minus_estimates, RV_t_minus)
506 |     
507 |     # Dates
508 |     RV_t_date = as.numeric(substring(data$Dates[t+M], first = 1, last = 5))
509 |     RV_t_dates = c(RV_t_dates, RV_t_date)
510 |   }
511 |   
512 |   RV_df = as.data.frame(RV_t_dates)
513 |   RV_df = cbind(RV_df, RV_t_estimates, RQ_t_estimates, BPV_t_estimates, RV_t_plus_estimates, RV_t_minus_estimates)
514 |   colnames(RV_df) = c("Dates", "RV", "RQ", "BPV", "RV_plus", "RV_minus")
515 |   RV_df$Dates = as.Date(RV_df$Dates, origin = "1899-12-30")
516 |   return(RV_df)
517 | }
518 | 
519 | # Scaling of RV, RQ, BPV, RVplus, RVminus estimates for numerical stability
520 | DataSet_Scalar <- function(DataSet, scalar = 100000){
521 |   for (col in 2:ncol(DataSet)) {
522 |     DataSet[,col] = DataSet[,col]*scalar
523 |   }
524 |   return(DataSet)
525 | }
526 | 
527 | # Constructing Beta Table
528 | betaTable <- function(forecast) {
529 |   
530 |   robustStdErrs_A = coeftest(forecast[[4]]$modelA, vcov = vcovHC(forecast[[4]]$modelA, type="HC1"))
531 |   robustStdErrs_ = coeftest(forecast[[4]]$model, vcov = vcovHC(forecast[[4]]$model, type="HC1"))
532 |   robustStdErrs_Q = coeftest(forecast[[4]]$modelQ, vcov = vcovHC(forecast[[4]]$modelQ, type="HC1"))
533 |   robustStdErrs_F = coeftest(forecast[[4]]$modelF, vcov = vcovHC(forecast[[4]]$modelF, type="HC1"))
534 |   robustStdErrs_C = coeftest(forecast[[4]]$modelC, vcov = vcovHC(forecast[[4]]$modelC, type="HC1"))
535 |   robustStdErrs_S = coeftest(forecast[[4]]$modelS, vcov = vcovHC(forecast[[4]]$modelS, type="HC1"))
536 |   robustStdErrs_J = coeftest(forecast[[4]]$modelJ, vcov = vcovHC(forecast[[4]]$modelJ, type="HC1"))
537 |   
538 |   
539 |   
540 |   Robust_T_test = list("modelA" = robustStdErrs_A, "model" = robustStdErrs_,
541 |                        "modelQ" = robustStdErrs_Q, "modelF" = robustStdErrs_F,
542 |                        "modelC" = robustStdErrs_C, "modelS" = robustStdErrs_S,
543 |                        "modelJ" = robustStdErrs_J)
544 |   
545 |   Robust_T_test_matrix = do.call("rbind", Robust_T_test)[, 1:2]
546 |   Robust_T_test_table = matrix(NA, nrow = 10, ncol = 7)
547 |   colnames(Robust_T_test_table) = c("AR(3)", "HAR", "HARQ", "HARQ-F", "CHAR", "SHAR", "HAR-J")
548 |   rownames(Robust_T_test_table) = c("b0", "b1", "b2", "b3", "b1Q", "b2Q", "b3Q","b1+", "b1-", "bJ")
549 |   
550 |   # c = Counter that counts through the rows of #Robust_T_test_matrix to extract the needed betas
551 |   # l = length of beta parameters of given model
552 |   # dig = num of digits to round up estimates
553 |   dig = 4
554 |   c_AR = 1:length(forecast[[4]]$modelA$coefficients)
555 |   Robust_T_test_table[c_AR , 1] = paste(round(Robust_T_test_matrix[c_AR, 1], dig), " (",round(Robust_T_test_matrix[c_AR, 2], dig),")", sep="")
556 |   
557 |   l_HAR = 1:length(forecast[[4]]$model$coefficients)
558 |   c_HAR = length(c_AR) + l_HAR
559 |   Robust_T_test_table[l_HAR , 2] = paste(round(Robust_T_test_matrix[c_HAR, 1], dig), " (",round(Robust_T_test_matrix[c_HAR, 2], dig),")", sep="")
560 |   
561 |   l_HARQ = 1:length(forecast[[4]]$modelQ$coefficients)
562 |   c_HARQ = length(c_HAR) + l_HARQ + 4 # Ad-Hoc added values found by checking Robust_T_test_matrix[c_HAR, ] throughout
563 |   Robust_T_test_table[l_HARQ , 3] = paste(round(Robust_T_test_matrix[c_HARQ, 1], dig), " (",round(Robust_T_test_matrix[c_HARQ, 2], dig),")", sep="")
564 |   
565 |   l_HARQF = 1:length(forecast[[4]]$modelF$coefficients)
566 |   c_HARQF = length(c_HARQ) + l_HARQF + 8
567 |   Robust_T_test_table[l_HARQF , 4] = paste(round(Robust_T_test_matrix[c_HARQF, 1], dig), " (",round(Robust_T_test_matrix[c_HARQF, 2], dig),")", sep="")
568 |   
569 |   l_CHAR = 1:length(forecast[[4]]$modelC$coefficients)
570 |   c_CHAR = length(l_HARQF) + l_CHAR + 13
571 |   Robust_T_test_table[l_CHAR , 5] = paste(round(Robust_T_test_matrix[c_CHAR, 1], dig), " (",round(Robust_T_test_matrix[c_CHAR, 2], dig),")", sep="")
572 |   
573 |   l_SHAR = 1:length(forecast[[4]]$modelS$coefficients)
574 |   c_SHAR = length(l_CHAR) + l_SHAR + 20
575 |   Robust_T_test_table[c(1,3,4,8,9) , 6] = paste(round(Robust_T_test_matrix[c_SHAR, 1], dig), " (",round(Robust_T_test_matrix[c_SHAR, 2], dig),")", sep="")
576 |   
577 |   l_HARJ = 1:length(forecast[[4]]$modelJ$coefficients)
578 |   c_HARJ = length(l_SHAR) + l_HARJ + 24
579 |   Robust_T_test_table[c(1:4,10) , 7] = paste(round(Robust_T_test_matrix[c_HARJ, 1], dig), " (",round(Robust_T_test_matrix[c_HARJ, 2], dig),")", sep="")
580 |   
581 |   #print(Robust_T_test_matrix)
582 |   #print(Robust_T_test_table)
583 |   
584 |   # Appending R^squareds to table
585 |   Robust_T_test_table_w_rsquareds = rbind(Robust_T_test_table, NA*c(1:7), round(forecast[[5]][1,], dig), round(forecast[[5]][2,], dig))
586 |   rownames(Robust_T_test_table_w_rsquareds)[c(12,13)] = c("R^2", "Adj.R^2")
587 |   # Exporting Results
588 |   write.xlsx(Robust_T_test_table_w_rsquareds, paste(getwd(), "/Results/Betas_w_all_rsquareds.xlsx", sep=""), sheetName="Sheet1", 
589 |              col.names=TRUE, row.names=TRUE, append=FALSE)
590 |   
591 |   return(Robust_T_test_table_w_rsquareds)
592 | }
593 | 
594 | # Summary Stats
595 | summaryStats <- function(stocks, scalar = 1, freq = "5min_extended") {
596 |   sumStats = matrix(NA, nrow = length(stocks), ncol = 7)
597 |   colnames(sumStats) = c("Min", "Mean", "Median", "Max", "Std. Dev.", "Skewness", "Kurtosis")
598 |   Symbol = c()
599 |   for (stockname in 1:length(stocks)) {
600 |     Symbol = c(Symbol,substr(stocks[stockname],1, (nchar(stocks[stockname])-10)))
601 |   }
602 |   rownames(sumStats) = Symbol
603 |   
604 |   # Realized Measures of all in assets in 'stocks' variable 
605 |   DataSets = list(rep(NA, times = length(stocks)))
606 |   for (stock in 1:length(stocks)) {
607 |     excel_file = paste("Data/",freq,".xlsx" , sep="")
608 |     data_name = stocks[stock]
609 |     data = as.data.frame(read_excel(excel_file, sheet = data_name))
610 |     data = data[c("BarTp", "Trade")]
611 |     colnames(data) = c("Dates", "Open")
612 |     data = data[-c(1:4),]
613 |     data$Open = as.numeric(data$Open)
614 |     data_log = cbind(data$Dates, as.data.frame(log(data$Open)))
615 |     colnames(data_log) = c("Dates", "Open")
616 |     
617 |     #DataSet = estimator(data)
618 |     DataSet = estimator(data_log)
619 |     DataSet = DataSet_Scalar(DataSet) # DataSet Scalar Function!
620 |     
621 |     # Appending all data.frames of RV measures to a list of data.frames
622 |     DataSets[stock] = list(DataSet)
623 |   }
624 |   names(DataSets) = stocks
625 |   
626 |   # Summary Stats to summStats table. Scaled by factor of variable 'scalar' 
627 |   for (stk in 1:length(stocks)) {
628 |     curRV = DataSets[[stk]]$RV # Realized Vol of current stock in iteration
629 |     sumStats[stk, 1] = round(min(curRV)*scalar,3) 
630 |     sumStats[stk, 2] = round(mean(curRV)*scalar,3)
631 |     sumStats[stk, 3] = round(median(curRV)*scalar,3)
632 |     sumStats[stk, 4] = round(max(curRV)*scalar ,3)
633 |     sumStats[stk, 5] = round(sd(curRV)*scalar,3)
634 |     sumStats[stk, 6] = round(skewness(curRV)*scalar,1)
635 |     sumStats[stk, 7] = round(kurtosis(curRV)*scalar,1) 
636 |   }
637 |   sumStats = cbind(Symbol, sumStats)
638 |   Symbol_str = str_c(Symbol,collapse='_') #stringr function dependency
639 |   sumStats_file_path = paste(getwd(), "/Results/SummaryStats_", Symbol_str, "_", freq, ".xlsx", sep="")
640 |   
641 |   # Exporting Summary Stats
642 |   write.xlsx(sumStats, sumStats_file_path, sheetName="Sheet1",
643 |              col.names=TRUE, row.names=TRUE, append=FALSE)
644 |   print(paste("All values in the Summary Statistics are scaled by a factor of: ", scalar, sep=""))
645 |   return(sumStats)
646 | }
647 | 
648 | # Error Computation & Table Construction
649 | errorsTable <- function(stocks, dig = 5, errorsScalar = 1) {
650 |   errorsMatrix = matrix(NA, nrow = length(stocks)+1, ncol = 14)
651 |   Symbol = c()
652 |   for (stockname in 1:length(stocks)) {
653 |     Symbol = c(Symbol,substr(stocks[stockname],1, (nchar(stocks[stockname])-10)))
654 |   }
655 |   rownames(errorsMatrix) = c("", Symbol)
656 |   errorsMatrix[1, ] = rep(c("MSE", "MAE"), times = 7)
657 |   
658 |   model_names = c("AR(3)", "HAR", "HARQ", "HARQ-F", "CHAR", "SHAR", "HAR-J")
659 |   model_names2 = rep(NA, times = ncol(errorsMatrix))
660 |   
661 |   #odds = odds[lapply(odds, "%%", 2) != 0]
662 |   mod = 0
663 |   for (m in 1:length(model_names)) {
664 |     model_names2[(mod+m)] = model_names[m]
665 |     mod = mod + 1
666 |   }
667 |   for (m in 1:length(model_names2)) {
668 |     if(is.na(model_names2[m])) {
669 |       model_names2[m] = model_names2[(m-1)]
670 |     }
671 |   }
672 |   colnames(errorsMatrix) = model_names2
673 |   
674 |   
675 |   # Realized Measures of all in assets in 'stocks' variable 
676 |   forecasts = list(rep(NA, times = length(stocks)))
677 |   for (stock in 1:length(stocks)) {
678 |     excel_file = paste("Data/",freq,".xlsx" , sep="")
679 |     data_name = stocks[stock]
680 |     data = as.data.frame(read_excel(excel_file, sheet = data_name))
681 |     data = data[c("BarTp", "Trade")]
682 |     colnames(data) = c("Dates", "Open")
683 |     data = data[-c(1:4),]
684 |     data$Open = as.numeric(data$Open)
685 |     data_log = cbind(data$Dates, as.data.frame(log(data$Open)))
686 |     colnames(data_log) = c("Dates", "Open")
687 |     
688 |     #DataSet = estimator(data)
689 |     DataSet = estimator(data_log)
690 |     DataSet = DataSet_Scalar(DataSet)
691 |     forecast = HAR(DataSet)
692 |     
693 |     # Appending all data.frames of RV measures to a list of data.frames
694 |     forecasts[stock] = list(forecast)
695 |   }
696 |   names(forecasts) = stocks
697 |   
698 |   #errorsScalar = 100
699 |   #dig = 5
700 |   for (s in 1:length(stocks)) {
701 |     # Loop over errors and models
702 |     all_errors = forecasts[[s]][[2]]
703 |     for (e in 1:length(all_errors)) {
704 |       errorsMatrix[(s+1), e] = round(all_errors[[e]]*errorsScalar, dig)
705 |     }
706 |   }
707 |   
708 |   errorsMatrix_BM = duplicate(errorsMatrix)
709 |   HAR_MSE_idx = grep("^HAR$", colnames(errorsMatrix_BM))[1]
710 |   HAR_MAE_idx = grep("^HAR$", colnames(errorsMatrix_BM))[2]
711 |   
712 |   #as.numeric(errorsMatrix_BM[1:length(stocks)+1, HAR_MSE_idx])
713 |   
714 |   MSE_cols = (1:ncol(errorsMatrix_BM))[1:ncol(errorsMatrix_BM) %% 2 != 0]
715 |   MAE_cols = (1:ncol(errorsMatrix_BM))[1:ncol(errorsMatrix_BM) %% 2 == 0]
716 |   
717 |   for (e in 1:ncol(errorsMatrix_BM)) {
718 |     if (e %in% MSE_cols) {
719 |       errorsMatrix_BM[1:length(stocks)+1,e] = as.numeric(errorsMatrix[1:length(stocks)+1,e]) / as.numeric(errorsMatrix[1:length(stocks)+1, HAR_MSE_idx]) # MSE_idx
720 |     } else if (e %in% MAE_cols) {
721 |       errorsMatrix_BM[1:length(stocks)+1,e] = as.numeric(errorsMatrix[1:length(stocks)+1,e]) / as.numeric(errorsMatrix[1:length(stocks)+1, HAR_MAE_idx]) #MAE_idx
722 |     }
723 |   }
724 |   
725 |   #Unbenchmarked Errors
726 |   errorsMatrix_file_path = paste(getwd(), "/Results/Errors_UnBenchmarked.xlsx", sep="")
727 |   errorsMatrix_BM_file_path = paste(getwd(), "/Results/Errors_Benchmarked_to_HAR.xlsx", sep="")
728 |   # Exporting Errors Table
729 |   write.xlsx(errorsMatrix, errorsMatrix_file_path, sheetName="Sheet1",
730 |              col.names=TRUE, row.names=TRUE, append=FALSE)
731 |   write.xlsx(errorsMatrix_BM, errorsMatrix_BM_file_path, sheetName="Sheet1",
732 |              col.names=TRUE, row.names=TRUE, append=FALSE)
733 |   
734 |   errorsTables = c(errorsMatrix, errorsMatrix_BM)
735 |   return(errorsTables)
736 | }
737 | 
738 | 
739 | ######## SETTINGS ########
740 | stocks = c("SPY US Equity", "MSFT US Equity", "MCD US Equity", "JPM US Equity", "DIS US Equity")
741 | freq = "5min_extended"
742 | ##########################
743 | 
744 | # Data Prep
745 | log_returns_scalar = 1
746 | excel_file = paste("Data/",freq,".xlsx" , sep="")
747 | data_name = stocks[1]
748 | data = as.data.frame(read_excel(excel_file, sheet = data_name))
749 | data = data[c("BarTp", "Trade")]
750 | colnames(data) = c("Dates", "Open")
751 | data = data[-c(1:4),]
752 | data$Open = as.numeric(data$Open)
753 | data_log = cbind(data$Dates, as.data.frame(log(data$Open)*log_returns_scalar))
754 | colnames(data_log) = c("Dates", "Open")
755 | 
756 | #DataSet = estimator(data)
757 | DataSet = estimator(data_log)
758 | DataSet = DataSet_Scalar(DataSet)
759 | forecast = HAR(DataSet, out_sample = 96, extra_plots = TRUE)
760 | 
761 | 
762 | #### FINAL SECTION: Generating tables, retrieving out-of-sample trading dates for plot, etc ######
763 | 
764 | ###### This section generates the data exports for the tables in the paper
765 | # beta_table = betaTable(forecast)
766 | #summStats = summaryStats(stocks)
767 | #errorsTables = errorsTable(stocks)
768 | 
769 | # Function for plotDates (out-of-sample forecasting)
770 | # plotDates = substring(data$Dates, first = 1, last = 5)
771 | # plotDates = unique(plotDates)[which(!is.na(unique(plotDates)))]
772 | # plotDates = as.Date(as.numeric(plotDates), origin = "1899-12-30")
773 | # length(plotDates)
774 | 
775 | # Realized Quarticity Plots of the 5 stocks
776 | # for (i in 1:5) {
777 | #   data_name = stocks[i]
778 | #   data = as.data.frame(read_excel(excel_file, sheet = data_name))
779 | #   data = data[c("BarTp", "Trade")]
780 | #   colnames(data) = c("Dates", "Open")
781 | #   data = data[-c(1:4),]
782 | #   data$Open = as.numeric(data$Open)
783 | #   data_log = cbind(data$Dates, as.data.frame(log(data$Open)*log_returns_scalar))
784 | #   colnames(data_log) = c("Dates", "Open")
785 | #   
786 | #   #DataSet = estimator(data)
787 | #   DataSet = estimator(data_log)
788 | #   DataSet = DataSet_Scalar(DataSet)
789 | #   plot(DataSet$RQ, type ="l", main = data_name)
790 | #   print(data_name)
791 | #   print(summary(DataSet$RQ))
792 | # }
793 | 
794 | 


--------------------------------------------------------------------------------