├── img ├── images.txt ├── RV.PNG ├── rt.PNG ├── HAR.PNG ├── HAR_models.PNG ├── modelTest.PNG └── inSampleBetas.PNG ├── LICENSE ├── README.md └── HAR_RV.R /img/images.txt: -------------------------------------------------------------------------------- 1 | this folder contains the images 2 | -------------------------------------------------------------------------------- /img/RV.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacob-hein/HAR-models-forecasting-realized-volatility-in-US-stocks/HEAD/img/RV.PNG -------------------------------------------------------------------------------- /img/rt.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacob-hein/HAR-models-forecasting-realized-volatility-in-US-stocks/HEAD/img/rt.PNG -------------------------------------------------------------------------------- /img/HAR.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacob-hein/HAR-models-forecasting-realized-volatility-in-US-stocks/HEAD/img/HAR.PNG -------------------------------------------------------------------------------- /img/HAR_models.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacob-hein/HAR-models-forecasting-realized-volatility-in-US-stocks/HEAD/img/HAR_models.PNG -------------------------------------------------------------------------------- /img/modelTest.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacob-hein/HAR-models-forecasting-realized-volatility-in-US-stocks/HEAD/img/modelTest.PNG -------------------------------------------------------------------------------- /img/inSampleBetas.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacob-hein/HAR-models-forecasting-realized-volatility-in-US-stocks/HEAD/img/inSampleBetas.PNG -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Jacob Hein 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # HAR models forecasting realized volatility in US stocks 2 | Various heterogenous autoregressive (HAR) models in Bollerslev et al. (2016) implemented in R to forecast the intraday measure of realized volatilty in select US stocks based on high-frequency trading prices. 3 | 4 | ### Data 5 | Intraday prices of US stocks of Microsoft, McDonalds, J.P. Morgan & Chase, Disney and a S&P500 trust fund (SPY) were extracted at frequencies of 1, 5 and 10 minutes with the Bloomberg Terminal. The time-series span from March 1st 2019 to November 1st 2019, 6 | 7 | ### Returns and realized volatility 8 | Intraday returns are computed as the change in intraday prices of a given financial asset, 9 | , the sum of all these intraday returns constitute the realized volatilty (variance if squared) estimate of a given day: 10 | 11 | 12 | 13 | Related measures like realized quarticity (RQ) and bi-power variance (BPV) that are used by some model variants were also estimated in R 14 | 15 | ```r 16 | #RV, RQ, BPV Estimator 17 | estimator <- function(data) { 18 | RV_t_estimates = c() 19 | RQ_t_estimates = c() 20 | BPV_t_estimates = c() 21 | RV_t_plus_estimates = c() 22 | RV_t_minus_estimates = c() 23 | RV_t_dates = c() 24 | M = 0 # Intraday obs used in estimation of RV_t 25 | 26 | for (t in 1:length(data$Dates)) { 27 | # t accounts for the final number of daily RV_t estimates 28 | RV_t_i_estimates = c() # M number of r_t,i to be summed up 29 | M_Q = M # Counter 30 | 31 | while (substring(data$Dates[t+M], first = 1, last = 5) == substring(data$Dates[t+M+1], first = 1, last = 5) 32 | # The below AND condition breaks while-loop when no more intraday obs available 33 | & !is.na(data$Dates[t+M+1])) { 34 | 35 | # Intraday returns 36 | RV_t_i = (data$Open[t+M+1] - data$Open[t+M]) 37 | RV_t_i_estimates = c(RV_t_i_estimates, RV_t_i) 38 | 39 | M = M + 1 40 | } 41 | 42 | if (is.na(data$Dates[t+M])) { 43 | break # This if-clause breaks for-loop when the eventual NA intraday obs is reached 44 | } 45 | 46 | RV_t = sum(RV_t_i_estimates^2) #Realized Variance 47 | 48 | RQ_t = ((M-M_Q)/3) * sum(RV_t_i_estimates^4) 49 | 50 | BPV_t_i_estimates = c() 51 | # i in 1:len(...)-1 corresponds to summing up to M-1 as in Bollerslev (2016) 52 | for (i in 1:(length(RV_t_i_estimates)-1)) { 53 | BPV_t_i_estimates = c(BPV_t_i_estimates, abs(RV_t_i_estimates[i] * abs(RV_t_i_estimates[i+1]))) 54 | } 55 | BPV_t = (sqrt(2/pi))^(-2) * sum(BPV_t_i_estimates) 56 | 57 | # RV Plus and RV Minus for SHAR model spec 58 | RV_t_plus = sum(RV_t_i_estimates[RV_t_i_estimates > 0]^2) 59 | RV_t_minus = sum(RV_t_i_estimates[RV_t_i_estimates < 0]^2) 60 | 61 | RV_t_estimates = c(RV_t_estimates, RV_t) 62 | RQ_t_estimates = c(RQ_t_estimates, RQ_t) 63 | BPV_t_estimates = c(BPV_t_estimates, BPV_t) 64 | RV_t_plus_estimates = c(RV_t_plus_estimates, RV_t_plus) 65 | RV_t_minus_estimates = c(RV_t_minus_estimates, RV_t_minus) 66 | 67 | # Dates 68 | RV_t_date = as.numeric(substring(data$Dates[t+M], first = 1, last = 5)) 69 | RV_t_dates = c(RV_t_dates, RV_t_date) 70 | } 71 | 72 | RV_df = as.data.frame(RV_t_dates) 73 | RV_df = cbind(RV_df, RV_t_estimates, RQ_t_estimates, BPV_t_estimates, RV_t_plus_estimates, RV_t_minus_estimates) 74 | colnames(RV_df) = c("Dates", "RV", "RQ", "BPV", "RV_plus", "RV_minus") 75 | RV_df$Dates = as.Date(RV_df$Dates, origin = "1899-12-30") 76 | return(RV_df) 77 | } 78 | ``` 79 | 80 | 81 | 82 | ### HAR models 83 | The objective is to fit different HAR model variants to an in-sample portion of the time-series, to then predict an out-of-sample test sample. The fundamental HAR model is given: 84 | 85 | 86 | 87 | This project includes implementation and tests of model variants that account for error minimizing returns quarticity, leverage effects and jumps in the data. 88 | 89 | The R implementation could have been optimized by vectorizing the computations, it nevertheless runs in a small amount of time: 90 | 91 | ```r 92 | #Forecast, Errors, Betas & Plots 93 | HAR <- function(data, out_sample = 96, plot_scalar = 1, extra_plots = FALSE) { 94 | 95 | RV = data$RV 96 | RQ = data$RQ 97 | BPV = data$BPV 98 | RV_p = data$RV_plus 99 | RV_m = data$RV_minus 100 | 101 | nobs = length(RV) 102 | in_sample = nobs - out_sample 103 | 104 | outRV = RV[(in_sample+1):(length(RV))] # We +1 to get equal length as out_sample size 105 | lag = 22 # 22 days lag is equivalent to one month of trading days lag 106 | 107 | all_predsA = rep(0, times = out_sample) 108 | all_preds = rep(0, times = out_sample) 109 | all_predsQ = rep(0, times = out_sample) 110 | all_predsF = rep(0, times = out_sample) 111 | all_predsC = rep(0, times = out_sample) 112 | all_predsS = rep(0, times = out_sample) 113 | all_predsJ = rep(0, times = out_sample) 114 | 115 | all_betasA = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 4) 116 | all_betas = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 4) 117 | all_betasQ = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 5) 118 | all_betasF = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 7) 119 | all_betasC = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 4) 120 | all_betasS = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 5) 121 | all_betasJ = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 5) 122 | 123 | for (t in 1:(out_sample)) { 124 | # Estimation 125 | y = RV[(lag + t + 1):(in_sample + t)] 126 | XA = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 3) 127 | X = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 3) 128 | XQ = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 4) 129 | XF = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 6) 130 | XC = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 3) 131 | XS = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 4) 132 | XJ = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 4) 133 | 134 | for (i in 0:(in_sample - lag - 1)) { 135 | 136 | # AR(3) 137 | XA[i+1,1] = RV[(-1+i+1+lag + t)] 138 | XA[i+1,2] = RV[(-2+i+1+lag + t)] 139 | XA[i+1,3] = RV[(-3+i+1+lag + t)] 140 | 141 | # HAR 142 | X[i+1,1] = RV[(-1+i+1+lag + t)] 143 | X[i+1,2] = (1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)]) 144 | X[i+1,3] = (1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)]) 145 | 146 | # HARQ 147 | XQ[i+1,1] = RV[(-1+i+1+lag + t)] 148 | XQ[i+1,2] = (1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)]) 149 | XQ[i+1,3] = (1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)]) 150 | XQ[i+1,4] = (RQ[-1+i+1+lag + t]^(1/2) * RV[(-1+i+1+lag + t)]) 151 | 152 | # HARQ-F 153 | XF[i+1,1] = RV[(-1+i+1+lag + t)] 154 | XF[i+1,2] = (1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)]) 155 | XF[i+1,3] = (1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)]) 156 | XF[i+1,4] = (RQ[-1+i+1+lag + t]^(1/2) * RV[(-1+i+1+lag + t)]) 157 | XF[i+1,5] = (((1/5)*sum(RQ[(-5+i+1+lag + t):(i+lag + t)]))^(1/2) * ((1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)]))) 158 | XF[i+1,6] = (((1/22)*sum(RQ[(-22+i+1+lag + t):(i+lag + t)]))^(1/2) * ((1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)]))) 159 | 160 | # CHAR 161 | XC[i+1,1] = BPV[(-1+i+1+lag + t)] 162 | XC[i+1,2] = (1/5)*sum(BPV[(-5+i+1+lag + t):(i+lag + t)]) 163 | XC[i+1,3] = (1/22)*sum(BPV[(-22+i+1+lag + t):(i+lag + t)]) 164 | 165 | # SHAR 166 | XS[i+1,1] = (1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)]) 167 | XS[i+1,2] = (1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)]) 168 | XS[i+1,3] = RV_p[(-1+i+1+lag + t)] 169 | XS[i+1,4] = RV_m[(-1+i+1+lag + t)] 170 | 171 | # HAR-J 172 | XJ[i+1,1] = RV[(-1+i+1+lag + t)] 173 | XJ[i+1,2] = (1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)]) 174 | XJ[i+1,3] = (1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)]) 175 | XJ[i+1,4] = max((RV[(-1+i+1+lag + t)] - BPV[(-1+i+1+lag + t)]), 0) 176 | 177 | } 178 | 179 | 180 | # R Regression at t=1 for Standard Errors before performing any out-of-sample forecasts 181 | if (t==out_sample) { 182 | modelA = lm(y ~ XA) 183 | model = lm(y ~ X) 184 | modelQ = lm(y ~ XQ) 185 | modelF = lm(y ~ XF) 186 | modelC = lm(y ~ XC) 187 | modelS = lm(y ~ XS) 188 | # HAR-J if statement: 189 | if (sum(XJ[,4]) ==0) { 190 | XJ[1,4] = 0.1 # Ensure invertibility if XJ singular with zero column 191 | } 192 | modelJ = lm(y ~ XJ) 193 | models_at_t_1 = list("modelA" = modelA, "model" = model, 194 | "modelQ" = modelQ, "modelF" = modelF, 195 | "modelC" = modelC, "modelS" = modelS, 196 | "modelJ" = modelJ) 197 | 198 | # Below we retrieve R^2 & Adjusted R^2, prior to out-of-sample forecasts 199 | num_of_models = 7 200 | r_squareds = matrix(0, nrow = 2, ncol = num_of_models) 201 | for (val in 1:length(models_at_t_1)) { 202 | r_squareds[1,val] = summary(models_at_t_1[[val]])$r.squared 203 | r_squareds[2,val] = summary(models_at_t_1[[val]])$adj.r.squared 204 | } 205 | rownames(r_squareds) = c("R-squared", "Adj.R-squared") 206 | colnames(r_squareds) = c("AR(3)", "HAR", "HARQ", "HARQ-F", "CHAR", "SHAR", "HAR-J") 207 | 208 | } 209 | 210 | XA = cbind(rep(1, times = nrow(XA)), XA) 211 | X = cbind(rep(1, times = nrow(X)), X) 212 | XQ = cbind(rep(1, times = nrow(XQ)), XQ) 213 | XF = cbind(rep(1, times = nrow(XF)), XF) 214 | XC = cbind(rep(1, times = nrow(XC)), XC) 215 | XS = cbind(rep(1, times = nrow(XS)), XS) 216 | XJ = cbind(rep(1, times = nrow(XJ)), XJ) 217 | 218 | # HAR-J if statement: 219 | if (sum(XJ[,5]) ==0) { 220 | XJ[1,5] = 0.1 # Ensure invertibility if XJ singular with zero column 221 | } 222 | 223 | # OLS Regression 224 | betasA = solve(t(XA) %*% XA) %*% t(XA) %*% y 225 | betas = solve(t(X) %*% X) %*% t(X) %*% y 226 | betasQ = solve(t(XQ) %*% XQ) %*% t(XQ) %*% y 227 | betasF = solve(t(XF) %*% XF) %*% t(XF) %*% y 228 | betasC = solve(t(XC) %*% XC) %*% t(XC) %*% y 229 | betasS = solve(t(XS) %*% XS) %*% t(XS) %*% y 230 | betasJ = solve(t(XJ) %*% XJ) %*% t(XJ) %*% y 231 | 232 | b0A = betasA[1] 233 | b1A = betasA[2] 234 | b2A = betasA[3] 235 | b3A = betasA[4] 236 | 237 | b0 = betas[1] 238 | b1 = betas[2] 239 | b2 = betas[3] 240 | b3 = betas[4] 241 | 242 | b0Q = betasQ[1] 243 | b1Q = betasQ[2] 244 | b2Q = betasQ[3] 245 | b3Q = betasQ[4] 246 | b1Q_Q = betasQ[5] 247 | 248 | b0F = betasF[1] 249 | b1F = betasF[2] 250 | b2F = betasF[3] 251 | b3F = betasF[4] 252 | b1F_Q = betasF[5] 253 | b2F_Q = betasF[6] 254 | b3F_Q = betasF[7] 255 | 256 | b0C = betasC[1] 257 | b1C = betasC[2] 258 | b2C = betasC[3] 259 | b3C = betasC[4] 260 | 261 | b0S = betasS[1] 262 | b1S_P = betasS[2] 263 | b2S = betasS[3] 264 | b3S = betasS[4] 265 | b1S_M = betasS[5] 266 | 267 | b0J = betasJ[1] 268 | b1J = betasJ[2] 269 | b2J = betasJ[3] 270 | b3J = betasJ[4] 271 | bJ = betasJ[5] 272 | 273 | all_betasA[t,1] = b0A 274 | all_betasA[t,2] = b1A 275 | all_betasA[t,3] = b2A 276 | all_betasA[t,4] = b3A 277 | 278 | all_betas[t,1] = b0 279 | all_betas[t,2] = b1 280 | all_betas[t,3] = b2 281 | all_betas[t,4] = b3 282 | 283 | all_betasQ[t,1] = b0Q 284 | all_betasQ[t,2] = b1Q 285 | all_betasQ[t,3] = b2Q 286 | all_betasQ[t,4] = b3Q 287 | all_betasQ[t,5] = b1Q_Q 288 | 289 | all_betasF[t,1] = b0F 290 | all_betasF[t,2] = b1F 291 | all_betasF[t,3] = b2F 292 | all_betasF[t,4] = b3F 293 | all_betasF[t,5] = b1F_Q 294 | all_betasF[t,6] = b2F_Q 295 | all_betasF[t,7] = b3F_Q 296 | 297 | all_betasC[t,1] = b0C 298 | all_betasC[t,2] = b1C 299 | all_betasC[t,3] = b2C 300 | all_betasC[t,4] = b3C 301 | 302 | all_betasS[t,1] = b0S 303 | all_betasS[t,2] = b1S_P 304 | all_betasS[t,3] = b2S 305 | all_betasS[t,4] = b3S 306 | all_betasS[t,5] = b1S_M 307 | 308 | all_betasJ[t,1] = b0J 309 | all_betasJ[t,2] = b1J 310 | all_betasJ[t,3] = b2J 311 | all_betasJ[t,4] = b3J 312 | all_betasJ[t,5] = bJ 313 | 314 | 315 | 316 | # Prediction at time-step t 317 | predA = b0A + b1A*XA[nrow(XA),2] + b2A*XA[nrow(XA),3] + b3A*XA[nrow(XA),4] 318 | pred = b0 + b1*X[nrow(X),2] + b2*X[nrow(X),3] + b3*X[nrow(X),4] 319 | predQ = b0Q + b1Q*XQ[nrow(XQ),2] + b2Q*XQ[nrow(XQ),3] + b3Q*XQ[nrow(XQ),4] + b1Q_Q*XQ[nrow(XQ),5] 320 | predF = b0F + b1F*XF[nrow(XF),2] + b2F*XF[nrow(XF),3] + b3F*XF[nrow(XF),4] + b1F_Q*XF[nrow(XF),5] + b2F_Q*XF[nrow(XF),6] + b3F_Q*XF[nrow(XF),7] 321 | predC = b0C + b1C*XC[nrow(XC),2] + b2C*XC[nrow(XC),3] + b3C*XC[nrow(XC),4] 322 | predS = b0S + b1S_P*XS[nrow(XS),2] + b2S*XS[nrow(XS),3] + b3S*XS[nrow(XS),4] + b1S_M*XS[nrow(XS),5] 323 | predJ = b0J + b1J*XJ[nrow(XJ),2] + b2J*XJ[nrow(XJ),3] + b3J*XJ[nrow(XJ),4] + bJ*XJ[nrow(XJ),5] 324 | 325 | # Saving time-step t prediction within list for error computations 326 | all_predsA[t] = predA 327 | all_preds[t] = pred 328 | all_predsQ[t] = predQ 329 | all_predsF[t] = predF 330 | all_predsC[t] = predC 331 | all_predsS[t] = predS 332 | all_predsJ[t] = predJ 333 | 334 | } 335 | 336 | # Error Computations: Mean Squared Error & Mean Absolute Error 337 | AR_mse = mean((outRV - all_predsA)^2) 338 | AR_mae = mean(abs(outRV - all_predsA)) 339 | 340 | HAR_mse = mean((outRV - all_preds)^2) 341 | HAR_mae = mean(abs(outRV - all_preds)) 342 | 343 | HARQ_mse = mean((outRV - all_predsQ)^2) 344 | HARQ_mae = mean(abs(outRV - all_predsQ)) 345 | 346 | HARQF_mse = mean((outRV - all_predsF)^2) 347 | HARQF_mae = mean(abs(outRV - all_predsF)) 348 | 349 | CHAR_mse = mean((outRV - all_predsC)^2) 350 | CHAR_mae = mean(abs(outRV - all_predsC)) 351 | 352 | SHAR_mse = mean((outRV - all_predsS)^2) 353 | SHAR_mae = mean(abs(outRV - all_predsS)) 354 | 355 | HARJ_mse = mean((outRV - all_predsJ)^2) 356 | HARJ_mae = mean(abs(outRV - all_predsJ)) 357 | 358 | 359 | errors_list = list("AR_mse" = AR_mse, "AR_mae" = AR_mae, "HAR_mse" = HAR_mse, "HAR_mae" = HAR_mae, 360 | "HARQ_mse" = HARQ_mse, "HARQ_mae" = HARQ_mae, "HARQF_mse" = HARQF_mse, "HARQF_mae" = HARQF_mae, 361 | "CHAR_mse" = CHAR_mse, "CHAR_mae" = CHAR_mae, "SHAR_mse" = SHAR_mse, "SHAR_mae" = SHAR_mae, 362 | "HARJ_mse" = HARJ_mse, "HARJ_mae" = HARJ_mae) 363 | 364 | 365 | # Output formatting: 366 | output = matrix(outRV) 367 | out_sample_dates = data$Date[(in_sample+1):length(data$Date)] 368 | output = cbind(out_sample_dates, output, all_predsA, all_preds, all_predsQ, all_predsF, all_predsC, all_predsS, all_predsJ) 369 | output_df = as.data.frame(output) 370 | colnames(output_df) = c("Date", "outRV", "all_predsA", "all_preds", "all_predsQ", "all_predsF", "all_predsC", "all_predsS", "all_predsJ") 371 | betas_list = list("all_betasA" = all_betasA, "all_betas" = all_betas, "all_betasQ" = all_betasQ, 372 | "all_betasF" = all_betasF, "all_betasC" = all_betasC, "all_betasS" = all_betasS, 373 | "all_betasJ" = all_betasJ) 374 | 375 | output_df_errors_betas = list(output_df, errors_list, betas_list, models_at_t_1, r_squareds) 376 | 377 | return(output_df_errors_betas) 378 | } 379 | ``` 380 | 381 | 382 | ### Beta coefficients estimated in-sample 383 | 384 | 385 | 386 | ### Out-of-sample forecasting 387 | 388 | 389 | 390 | 391 | 392 | ### Project conclusion 393 | 394 | HAR-type models fits nicely to measures representing intraday price movements. The out-of-sample forecast results suggest that the HARQ-F model outperforms all other variants along with a simple AR(3) model. 395 | 396 | The HARQ-F includes a realized quarticity measure to minimze error, which may contribute to the superior forecasting ability of this model variant. 397 | 398 | Time-series extended further back in time would be desirable for both in-sample coefficient estimations and out-of-sample forecasting, however the limitation of readily available intra-day price data prohibited a longer time horizon in this project. 399 | -------------------------------------------------------------------------------- /HAR_RV.R: -------------------------------------------------------------------------------- 1 | # install.packages("rstudioapi") 2 | # install.packages("rJava") 3 | # install.packages("readxl") 4 | # install.packages("xts") 5 | # install.packages("sandwich") 6 | # install.packages("lmtest") 7 | # install.packages("xlsx") 8 | # install.packages("e1071") 9 | # install.packages("stringr") 10 | # install.packages("rlang") 11 | # install.packages(ggplot2) 12 | 13 | setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # Set working directive to current folder 14 | options(scipen=999) # No scientific floating points notation 15 | 16 | library(xts) # Time-indexed data frames, ideal for plots 17 | library(readxl) # Reading data 18 | library(xlsx) # Exporting results 19 | library(e1071) # Skew & Kurtosis for Summary Statistics Table 20 | library(stringr) # String formatting 21 | library(rlang) 22 | library(ggplot2) # Plots 23 | theme_set(theme_minimal()) # Plot Window 24 | ############### 25 | # For Robust Standard Errors 26 | library(lmtest) 27 | library(sandwich) 28 | ############### 29 | 30 | #Forecast, Errors, Betas & Plots 31 | HAR <- function(data, out_sample = 96, plot_scalar = 1, extra_plots = FALSE) { 32 | 33 | RV = data$RV 34 | RQ = data$RQ 35 | BPV = data$BPV 36 | RV_p = data$RV_plus 37 | RV_m = data$RV_minus 38 | 39 | nobs = length(RV) 40 | in_sample = nobs - out_sample 41 | 42 | outRV = RV[(in_sample+1):(length(RV))] # We +1 to get equal length as out_sample size 43 | lag = 22 # 22 days lag is equivalent to one month of trading days lag 44 | 45 | all_predsA = rep(0, times = out_sample) 46 | all_preds = rep(0, times = out_sample) 47 | all_predsQ = rep(0, times = out_sample) 48 | all_predsF = rep(0, times = out_sample) 49 | all_predsC = rep(0, times = out_sample) 50 | all_predsS = rep(0, times = out_sample) 51 | all_predsJ = rep(0, times = out_sample) 52 | 53 | all_betasA = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 4) 54 | all_betas = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 4) 55 | all_betasQ = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 5) 56 | all_betasF = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 7) 57 | all_betasC = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 4) 58 | all_betasS = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 5) 59 | all_betasJ = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 5) 60 | 61 | for (t in 1:(out_sample)) { 62 | # Estimation 63 | y = RV[(lag + t + 1):(in_sample + t)] 64 | XA = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 3) 65 | X = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 3) 66 | XQ = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 4) 67 | XF = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 6) 68 | XC = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 3) 69 | XS = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 4) 70 | XJ = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 4) 71 | 72 | for (i in 0:(in_sample - lag - 1)) { 73 | 74 | # AR(3) 75 | XA[i+1,1] = RV[(-1+i+1+lag + t)] 76 | XA[i+1,2] = RV[(-2+i+1+lag + t)] 77 | XA[i+1,3] = RV[(-3+i+1+lag + t)] 78 | 79 | # HAR 80 | X[i+1,1] = RV[(-1+i+1+lag + t)] 81 | X[i+1,2] = (1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)]) 82 | X[i+1,3] = (1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)]) 83 | 84 | # HARQ 85 | XQ[i+1,1] = RV[(-1+i+1+lag + t)] 86 | XQ[i+1,2] = (1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)]) 87 | XQ[i+1,3] = (1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)]) 88 | XQ[i+1,4] = (RQ[-1+i+1+lag + t]^(1/2) * RV[(-1+i+1+lag + t)]) 89 | 90 | # HARQ-F 91 | XF[i+1,1] = RV[(-1+i+1+lag + t)] 92 | XF[i+1,2] = (1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)]) 93 | XF[i+1,3] = (1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)]) 94 | XF[i+1,4] = (RQ[-1+i+1+lag + t]^(1/2) * RV[(-1+i+1+lag + t)]) 95 | XF[i+1,5] = (((1/5)*sum(RQ[(-5+i+1+lag + t):(i+lag + t)]))^(1/2) * ((1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)]))) 96 | XF[i+1,6] = (((1/22)*sum(RQ[(-22+i+1+lag + t):(i+lag + t)]))^(1/2) * ((1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)]))) 97 | 98 | # CHAR 99 | XC[i+1,1] = BPV[(-1+i+1+lag + t)] 100 | XC[i+1,2] = (1/5)*sum(BPV[(-5+i+1+lag + t):(i+lag + t)]) 101 | XC[i+1,3] = (1/22)*sum(BPV[(-22+i+1+lag + t):(i+lag + t)]) 102 | 103 | # SHAR 104 | XS[i+1,1] = (1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)]) 105 | XS[i+1,2] = (1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)]) 106 | XS[i+1,3] = RV_p[(-1+i+1+lag + t)] 107 | XS[i+1,4] = RV_m[(-1+i+1+lag + t)] 108 | 109 | # HAR-J 110 | XJ[i+1,1] = RV[(-1+i+1+lag + t)] 111 | XJ[i+1,2] = (1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)]) 112 | XJ[i+1,3] = (1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)]) 113 | XJ[i+1,4] = max((RV[(-1+i+1+lag + t)] - BPV[(-1+i+1+lag + t)]), 0) 114 | 115 | } 116 | 117 | 118 | # R Regression at t=1 for Standard Errors before performing any out-of-sample forecasts 119 | if (t==out_sample) { 120 | modelA = lm(y ~ XA) 121 | model = lm(y ~ X) 122 | modelQ = lm(y ~ XQ) 123 | modelF = lm(y ~ XF) 124 | modelC = lm(y ~ XC) 125 | modelS = lm(y ~ XS) 126 | # HAR-J if statement: 127 | if (sum(XJ[,4]) ==0) { 128 | XJ[1,4] = 0.1 # Ensure invertibility if XJ singular with zero column 129 | } 130 | modelJ = lm(y ~ XJ) 131 | models_at_t_1 = list("modelA" = modelA, "model" = model, 132 | "modelQ" = modelQ, "modelF" = modelF, 133 | "modelC" = modelC, "modelS" = modelS, 134 | "modelJ" = modelJ) 135 | 136 | # Below we retrieve R^2 & Adjusted R^2, prior to out-of-sample forecasts 137 | num_of_models = 7 138 | r_squareds = matrix(0, nrow = 2, ncol = num_of_models) 139 | for (val in 1:length(models_at_t_1)) { 140 | r_squareds[1,val] = summary(models_at_t_1[[val]])$r.squared 141 | r_squareds[2,val] = summary(models_at_t_1[[val]])$adj.r.squared 142 | } 143 | rownames(r_squareds) = c("R-squared", "Adj.R-squared") 144 | colnames(r_squareds) = c("AR(3)", "HAR", "HARQ", "HARQ-F", "CHAR", "SHAR", "HAR-J") 145 | 146 | } 147 | 148 | XA = cbind(rep(1, times = nrow(XA)), XA) 149 | X = cbind(rep(1, times = nrow(X)), X) 150 | XQ = cbind(rep(1, times = nrow(XQ)), XQ) 151 | XF = cbind(rep(1, times = nrow(XF)), XF) 152 | XC = cbind(rep(1, times = nrow(XC)), XC) 153 | XS = cbind(rep(1, times = nrow(XS)), XS) 154 | XJ = cbind(rep(1, times = nrow(XJ)), XJ) 155 | 156 | # HAR-J if statement: 157 | if (sum(XJ[,5]) ==0) { 158 | XJ[1,5] = 0.1 # Ensure invertibility if XJ singular with zero column 159 | } 160 | 161 | # OLS Regression 162 | betasA = solve(t(XA) %*% XA) %*% t(XA) %*% y 163 | betas = solve(t(X) %*% X) %*% t(X) %*% y 164 | betasQ = solve(t(XQ) %*% XQ) %*% t(XQ) %*% y 165 | betasF = solve(t(XF) %*% XF) %*% t(XF) %*% y 166 | betasC = solve(t(XC) %*% XC) %*% t(XC) %*% y 167 | betasS = solve(t(XS) %*% XS) %*% t(XS) %*% y 168 | betasJ = solve(t(XJ) %*% XJ) %*% t(XJ) %*% y 169 | 170 | b0A = betasA[1] 171 | b1A = betasA[2] 172 | b2A = betasA[3] 173 | b3A = betasA[4] 174 | 175 | b0 = betas[1] 176 | b1 = betas[2] 177 | b2 = betas[3] 178 | b3 = betas[4] 179 | 180 | b0Q = betasQ[1] 181 | b1Q = betasQ[2] 182 | b2Q = betasQ[3] 183 | b3Q = betasQ[4] 184 | b1Q_Q = betasQ[5] 185 | 186 | b0F = betasF[1] 187 | b1F = betasF[2] 188 | b2F = betasF[3] 189 | b3F = betasF[4] 190 | b1F_Q = betasF[5] 191 | b2F_Q = betasF[6] 192 | b3F_Q = betasF[7] 193 | 194 | b0C = betasC[1] 195 | b1C = betasC[2] 196 | b2C = betasC[3] 197 | b3C = betasC[4] 198 | 199 | b0S = betasS[1] 200 | b1S_P = betasS[2] 201 | b2S = betasS[3] 202 | b3S = betasS[4] 203 | b1S_M = betasS[5] 204 | 205 | b0J = betasJ[1] 206 | b1J = betasJ[2] 207 | b2J = betasJ[3] 208 | b3J = betasJ[4] 209 | bJ = betasJ[5] 210 | 211 | all_betasA[t,1] = b0A 212 | all_betasA[t,2] = b1A 213 | all_betasA[t,3] = b2A 214 | all_betasA[t,4] = b3A 215 | 216 | all_betas[t,1] = b0 217 | all_betas[t,2] = b1 218 | all_betas[t,3] = b2 219 | all_betas[t,4] = b3 220 | 221 | all_betasQ[t,1] = b0Q 222 | all_betasQ[t,2] = b1Q 223 | all_betasQ[t,3] = b2Q 224 | all_betasQ[t,4] = b3Q 225 | all_betasQ[t,5] = b1Q_Q 226 | 227 | all_betasF[t,1] = b0F 228 | all_betasF[t,2] = b1F 229 | all_betasF[t,3] = b2F 230 | all_betasF[t,4] = b3F 231 | all_betasF[t,5] = b1F_Q 232 | all_betasF[t,6] = b2F_Q 233 | all_betasF[t,7] = b3F_Q 234 | 235 | all_betasC[t,1] = b0C 236 | all_betasC[t,2] = b1C 237 | all_betasC[t,3] = b2C 238 | all_betasC[t,4] = b3C 239 | 240 | all_betasS[t,1] = b0S 241 | all_betasS[t,2] = b1S_P 242 | all_betasS[t,3] = b2S 243 | all_betasS[t,4] = b3S 244 | all_betasS[t,5] = b1S_M 245 | 246 | all_betasJ[t,1] = b0J 247 | all_betasJ[t,2] = b1J 248 | all_betasJ[t,3] = b2J 249 | all_betasJ[t,4] = b3J 250 | all_betasJ[t,5] = bJ 251 | 252 | 253 | 254 | # Prediction at time-step t 255 | predA = b0A + b1A*XA[nrow(XA),2] + b2A*XA[nrow(XA),3] + b3A*XA[nrow(XA),4] 256 | pred = b0 + b1*X[nrow(X),2] + b2*X[nrow(X),3] + b3*X[nrow(X),4] 257 | predQ = b0Q + b1Q*XQ[nrow(XQ),2] + b2Q*XQ[nrow(XQ),3] + b3Q*XQ[nrow(XQ),4] + b1Q_Q*XQ[nrow(XQ),5] 258 | predF = b0F + b1F*XF[nrow(XF),2] + b2F*XF[nrow(XF),3] + b3F*XF[nrow(XF),4] + b1F_Q*XF[nrow(XF),5] + b2F_Q*XF[nrow(XF),6] + b3F_Q*XF[nrow(XF),7] 259 | predC = b0C + b1C*XC[nrow(XC),2] + b2C*XC[nrow(XC),3] + b3C*XC[nrow(XC),4] 260 | predS = b0S + b1S_P*XS[nrow(XS),2] + b2S*XS[nrow(XS),3] + b3S*XS[nrow(XS),4] + b1S_M*XS[nrow(XS),5] 261 | predJ = b0J + b1J*XJ[nrow(XJ),2] + b2J*XJ[nrow(XJ),3] + b3J*XJ[nrow(XJ),4] + bJ*XJ[nrow(XJ),5] 262 | 263 | # Saving time-step t prediction within list for error computations 264 | all_predsA[t] = predA 265 | all_preds[t] = pred 266 | all_predsQ[t] = predQ 267 | all_predsF[t] = predF 268 | all_predsC[t] = predC 269 | all_predsS[t] = predS 270 | all_predsJ[t] = predJ 271 | 272 | } 273 | 274 | # Error Computations: Mean Squared Error & Mean Absolute Error 275 | AR_mse = mean((outRV - all_predsA)^2) 276 | AR_mae = mean(abs(outRV - all_predsA)) 277 | 278 | HAR_mse = mean((outRV - all_preds)^2) 279 | HAR_mae = mean(abs(outRV - all_preds)) 280 | 281 | HARQ_mse = mean((outRV - all_predsQ)^2) 282 | HARQ_mae = mean(abs(outRV - all_predsQ)) 283 | 284 | HARQF_mse = mean((outRV - all_predsF)^2) 285 | HARQF_mae = mean(abs(outRV - all_predsF)) 286 | 287 | CHAR_mse = mean((outRV - all_predsC)^2) 288 | CHAR_mae = mean(abs(outRV - all_predsC)) 289 | 290 | SHAR_mse = mean((outRV - all_predsS)^2) 291 | SHAR_mae = mean(abs(outRV - all_predsS)) 292 | 293 | HARJ_mse = mean((outRV - all_predsJ)^2) 294 | HARJ_mae = mean(abs(outRV - all_predsJ)) 295 | 296 | 297 | errors_list = list("AR_mse" = AR_mse, "AR_mae" = AR_mae, "HAR_mse" = HAR_mse, "HAR_mae" = HAR_mae, 298 | "HARQ_mse" = HARQ_mse, "HARQ_mae" = HARQ_mae, "HARQF_mse" = HARQF_mse, "HARQF_mae" = HARQF_mae, 299 | "CHAR_mse" = CHAR_mse, "CHAR_mae" = CHAR_mae, "SHAR_mse" = SHAR_mse, "SHAR_mae" = SHAR_mae, 300 | "HARJ_mse" = HARJ_mse, "HARJ_mae" = HARJ_mae) 301 | 302 | # Plots 303 | #plot_scalar = 100 304 | if (plot_scalar == 1) { 305 | ylab_str = "Realized Variance" 306 | } else { 307 | ylab_str = paste("Realized Var. (Axis scaled by a factor of ", plot_scalar, ")", sep="") 308 | } 309 | 310 | plotDates = DataSet$Dates[(length(DataSet$Dates)-out_sample+1):length(DataSet$Dates)] 311 | 312 | plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str) 313 | lines(plot_scalar*all_predsA ~ plotDates, col="darkred") 314 | legend("topright", legend=c("Actual RV", "AR(3)"), col=c("blue", "darkred"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1) 315 | plot_path = paste(getwd(), "/OutSampPlots/AR(3) R Plot.pdf", sep="") 316 | dev.copy(pdf, plot_path) 317 | #plot_path_png = paste(substr(plot_path, 1, nchar(plot_path)-4), ".png", sep="") 318 | dev.off() 319 | plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str) 320 | lines(plot_scalar*all_preds ~ plotDates, col="red") 321 | legend("topright", legend=c("Actual RV", "HAR"), col=c("blue", "red"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1) 322 | plot_path = paste(getwd(), "/OutSampPlots/HAR R Plot.pdf", sep="") 323 | dev.copy(pdf, plot_path) 324 | dev.off() 325 | plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str) 326 | lines(plot_scalar*all_predsQ ~ plotDates, col="green") 327 | legend("topright", legend=c("Actual RV", "HARQ"), col=c("blue", "green"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1) 328 | plot_path = paste(getwd(), "/OutSampPlots/HARQ R Plot.pdf", sep="") 329 | dev.copy(pdf, plot_path) 330 | dev.off() 331 | plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str) 332 | lines(plot_scalar*all_predsF ~ plotDates, col="darkgreen") 333 | legend("topright", legend=c("Actual RV", "HARQ-F"), col=c("blue", "darkgreen"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1) 334 | plot_path = paste(getwd(), "/OutSampPlots/HARQ-F R Plot.pdf", sep="") 335 | dev.copy(pdf, plot_path) 336 | dev.off() 337 | plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str) 338 | lines(plot_scalar*all_predsC ~ plotDates, col="black") 339 | legend("topright", legend=c("Actual RV", "CHAR"), col=c("blue", "black"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1) 340 | plot_path = paste(getwd(), "/OutSampPlots/CHAR R Plot.pdf", sep="") 341 | dev.copy(pdf, plot_path) 342 | dev.off() 343 | plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str) 344 | lines(plot_scalar*all_predsS ~ plotDates, col="maroon") 345 | legend("topright", legend=c("Actual RV", "SHAR"), col=c("blue", "maroon"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1) 346 | plot_path = paste(getwd(), "/OutSampPlots/SHAR R Plot.pdf", sep="") 347 | dev.copy(pdf, plot_path) 348 | dev.off() 349 | plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str) 350 | lines(plot_scalar*all_predsJ ~ plotDates, col="orange") 351 | legend("topright", legend=c("Actual RV", "HAR-J"), col=c("blue", "orange"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1) 352 | plot_path = paste(getwd(), "/OutSampPlots/HAR-J R Plot.pdf", sep="") 353 | dev.copy(pdf, plot_path) 354 | dev.off() 355 | 356 | if (extra_plots == TRUE) { 357 | # Plotting All models in one 358 | plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="All Models ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str) 359 | lines(plot_scalar*all_predsA ~ plotDates, col="darkred") 360 | #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="HAR ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str) 361 | lines(plot_scalar*all_preds ~ plotDates, col="red") 362 | #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="HARQ ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str) 363 | lines(plot_scalar*all_predsQ ~ plotDates, col="green") 364 | #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="HARQ-F ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str) 365 | lines(plot_scalar*all_predsF ~ plotDates, col="darkgreen") 366 | #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="CHAR ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str) 367 | lines(plot_scalar*all_predsC ~ plotDates, col="orange") 368 | #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="SHAR ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str) 369 | lines(plot_scalar*all_predsS ~ plotDates, col="maroon") 370 | #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="HAR-J ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str) 371 | lines(plot_scalar*all_predsJ ~ plotDates, col="black") 372 | plot_path = paste(getwd(), "/OutSampPlots/AllModels R Plot.pdf", sep="") 373 | dev.copy(pdf, plot_path) 374 | dev.off() 375 | 376 | } 377 | 378 | par(mfrow=c(2,2)) 379 | plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str) 380 | #lines(plot_scalar*all_predsA ~ plotDates, col="darkred") 381 | #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="HAR ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str) 382 | lines(plot_scalar*all_preds ~ plotDates, col="red") 383 | legend("topright", legend=c("Actual RV", "HAR"), col=c("blue", "red"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1, bty = "n") 384 | 385 | plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str) 386 | lines(plot_scalar*all_predsJ ~ plotDates, col="orange") 387 | legend("topright", legend=c("Actual RV", "HAR-J"), col=c("blue", "orange"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1, bty = "n") 388 | 389 | plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str) 390 | #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="HARQ ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str) 391 | lines(plot_scalar*all_predsQ ~ plotDates, col="green") 392 | legend("topright", legend=c("Actual RV", "HARQ"), col=c("blue", "green"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1, bty = "n") 393 | 394 | 395 | plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str) 396 | lines(plot_scalar*all_predsF ~ plotDates, col="darkgreen") 397 | legend("topright", legend=c("Actual RV", "HARQ-F"), col=c("blue", "darkgreen"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1, bty = "n") 398 | #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="CHAR ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str) 399 | #lines(plot_scalar*all_predsC ~ plotDates, col="orange") 400 | #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="SHAR ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str) 401 | #lines(plot_scalar*all_predsS ~ plotDates, col="maroon") 402 | #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="HAR-J ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str) 403 | plot_path = paste(getwd(), "/OutSampPlots/FOUR_R_Plots.pdf", sep="") 404 | dev.copy(pdf, plot_path) 405 | dev.off() 406 | 407 | par(mfrow=c(2,2)) 408 | plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str) 409 | lines(plot_scalar*all_predsC ~ plotDates, col="black") 410 | legend("topright", legend=c("Actual RV", "CHAR"), col=c("blue", "black"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1, bty = "n") 411 | 412 | plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str) 413 | lines(plot_scalar*all_predsS ~ plotDates, col="maroon") 414 | legend("topright", legend=c("Actual RV", "SHAR"), col=c("blue", "maroon"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1, bty = "n") 415 | 416 | plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str) 417 | lines(plot_scalar*all_predsC ~ plotDates, col="black") 418 | legend("topright", legend=c("Actual RV", "CHAR"), col=c("blue", "black"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1, bty = "n") 419 | 420 | plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str) 421 | lines(plot_scalar*all_predsS ~ plotDates, col="maroon") 422 | legend("topright", legend=c("Actual RV", "SHAR"), col=c("blue", "maroon"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1, bty = "n") 423 | 424 | plot_path = paste(getwd(), "/OutSampPlots/TWO_R_Plots.pdf", sep="") 425 | dev.copy(pdf, plot_path) 426 | dev.off() 427 | 428 | 429 | # Output formatting: 430 | output = matrix(outRV) 431 | out_sample_dates = data$Date[(in_sample+1):length(data$Date)] 432 | output = cbind(out_sample_dates, output, all_predsA, all_preds, all_predsQ, all_predsF, all_predsC, all_predsS, all_predsJ) 433 | output_df = as.data.frame(output) 434 | colnames(output_df) = c("Date", "outRV", "all_predsA", "all_preds", "all_predsQ", "all_predsF", "all_predsC", "all_predsS", "all_predsJ") 435 | betas_list = list("all_betasA" = all_betasA, "all_betas" = all_betas, "all_betasQ" = all_betasQ, 436 | "all_betasF" = all_betasF, "all_betasC" = all_betasC, "all_betasS" = all_betasS, 437 | "all_betasJ" = all_betasJ) 438 | 439 | output_df_errors_betas = list(output_df, errors_list, betas_list, models_at_t_1, r_squareds) 440 | 441 | return(output_df_errors_betas) 442 | } 443 | 444 | #RV, RQ, BPV Estimator 445 | estimator <- function(data) { 446 | RV_t_estimates = c() 447 | RQ_t_estimates = c() 448 | BPV_t_estimates = c() 449 | RV_t_plus_estimates = c() 450 | RV_t_minus_estimates = c() 451 | RV_t_dates = c() 452 | M = 0 # Intraday obs used in estimation of RV_t 453 | 454 | for (t in 1:length(data$Dates)) { 455 | # t accounts for the final number of daily RV_t estimates 456 | RV_t_i_estimates = c() # M number of r_t,i to be summed up 457 | M_Q = M # Counter 458 | 459 | while (substring(data$Dates[t+M], first = 1, last = 5) == substring(data$Dates[t+M+1], first = 1, last = 5) 460 | # The below AND condition breaks while-loop when no more intraday obs available 461 | & !is.na(data$Dates[t+M+1])) { 462 | 463 | # Intraday returns 464 | RV_t_i = (data$Open[t+M+1] - data$Open[t+M]) 465 | RV_t_i_estimates = c(RV_t_i_estimates, RV_t_i) 466 | 467 | M = M + 1 468 | } 469 | 470 | if (is.na(data$Dates[t+M])) { 471 | break # This if-clause breaks for-loop when the eventual NA intraday obs is reached 472 | } 473 | 474 | RV_t = sum(RV_t_i_estimates^2) #Realized Variance 475 | #RV_t = sqrt(RV_t) #Realized Volatilty 476 | 477 | RQ_t = ((M-M_Q)/3) * sum(RV_t_i_estimates^4) 478 | #RQ_t = sqrt(RQ_t) 479 | 480 | # Bi-Power Variance: |r_t,i||r_t,i+1| 481 | 482 | ############### Faulty one line of code below: 483 | # Notice [-length(RV_....)] ensures final element is left out. 484 | # BPV_t = (sqrt(2/pi))^(-2) * (sum(abs(RV_t_i_estimates[-length(RV_t_i_estimates)]))*sum(abs(RV_t_i_estimates))) 485 | ############### Line above discarded as wrong implementation 486 | 487 | BPV_t_i_estimates = c() 488 | # i in 1:len(...)-1 corresponds to summing up to M-1 as in Bollerslev (2016) 489 | for (i in 1:(length(RV_t_i_estimates)-1)) { 490 | BPV_t_i_estimates = c(BPV_t_i_estimates, abs(RV_t_i_estimates[i] * abs(RV_t_i_estimates[i+1]))) 491 | } 492 | BPV_t = (sqrt(2/pi))^(-2) * sum(BPV_t_i_estimates) 493 | #BPV = sqrt(BPV_t) 494 | 495 | # RV Plus and RV Minus for SHAR model spec 496 | RV_t_plus = sum(RV_t_i_estimates[RV_t_i_estimates > 0]^2) 497 | #RV_t_plus = sqrt(RV_t_plus) 498 | RV_t_minus = sum(RV_t_i_estimates[RV_t_i_estimates < 0]^2) 499 | #RV_t_minus = (-1)*sqrt(abs(RV_t_minus)) # Abs value and multiply by -1 to avoid sqrt'ing negative numbers 500 | 501 | RV_t_estimates = c(RV_t_estimates, RV_t) 502 | RQ_t_estimates = c(RQ_t_estimates, RQ_t) 503 | BPV_t_estimates = c(BPV_t_estimates, BPV_t) 504 | RV_t_plus_estimates = c(RV_t_plus_estimates, RV_t_plus) 505 | RV_t_minus_estimates = c(RV_t_minus_estimates, RV_t_minus) 506 | 507 | # Dates 508 | RV_t_date = as.numeric(substring(data$Dates[t+M], first = 1, last = 5)) 509 | RV_t_dates = c(RV_t_dates, RV_t_date) 510 | } 511 | 512 | RV_df = as.data.frame(RV_t_dates) 513 | RV_df = cbind(RV_df, RV_t_estimates, RQ_t_estimates, BPV_t_estimates, RV_t_plus_estimates, RV_t_minus_estimates) 514 | colnames(RV_df) = c("Dates", "RV", "RQ", "BPV", "RV_plus", "RV_minus") 515 | RV_df$Dates = as.Date(RV_df$Dates, origin = "1899-12-30") 516 | return(RV_df) 517 | } 518 | 519 | # Scaling of RV, RQ, BPV, RVplus, RVminus estimates for numerical stability 520 | DataSet_Scalar <- function(DataSet, scalar = 100000){ 521 | for (col in 2:ncol(DataSet)) { 522 | DataSet[,col] = DataSet[,col]*scalar 523 | } 524 | return(DataSet) 525 | } 526 | 527 | # Constructing Beta Table 528 | betaTable <- function(forecast) { 529 | 530 | robustStdErrs_A = coeftest(forecast[[4]]$modelA, vcov = vcovHC(forecast[[4]]$modelA, type="HC1")) 531 | robustStdErrs_ = coeftest(forecast[[4]]$model, vcov = vcovHC(forecast[[4]]$model, type="HC1")) 532 | robustStdErrs_Q = coeftest(forecast[[4]]$modelQ, vcov = vcovHC(forecast[[4]]$modelQ, type="HC1")) 533 | robustStdErrs_F = coeftest(forecast[[4]]$modelF, vcov = vcovHC(forecast[[4]]$modelF, type="HC1")) 534 | robustStdErrs_C = coeftest(forecast[[4]]$modelC, vcov = vcovHC(forecast[[4]]$modelC, type="HC1")) 535 | robustStdErrs_S = coeftest(forecast[[4]]$modelS, vcov = vcovHC(forecast[[4]]$modelS, type="HC1")) 536 | robustStdErrs_J = coeftest(forecast[[4]]$modelJ, vcov = vcovHC(forecast[[4]]$modelJ, type="HC1")) 537 | 538 | 539 | 540 | Robust_T_test = list("modelA" = robustStdErrs_A, "model" = robustStdErrs_, 541 | "modelQ" = robustStdErrs_Q, "modelF" = robustStdErrs_F, 542 | "modelC" = robustStdErrs_C, "modelS" = robustStdErrs_S, 543 | "modelJ" = robustStdErrs_J) 544 | 545 | Robust_T_test_matrix = do.call("rbind", Robust_T_test)[, 1:2] 546 | Robust_T_test_table = matrix(NA, nrow = 10, ncol = 7) 547 | colnames(Robust_T_test_table) = c("AR(3)", "HAR", "HARQ", "HARQ-F", "CHAR", "SHAR", "HAR-J") 548 | rownames(Robust_T_test_table) = c("b0", "b1", "b2", "b3", "b1Q", "b2Q", "b3Q","b1+", "b1-", "bJ") 549 | 550 | # c = Counter that counts through the rows of #Robust_T_test_matrix to extract the needed betas 551 | # l = length of beta parameters of given model 552 | # dig = num of digits to round up estimates 553 | dig = 4 554 | c_AR = 1:length(forecast[[4]]$modelA$coefficients) 555 | Robust_T_test_table[c_AR , 1] = paste(round(Robust_T_test_matrix[c_AR, 1], dig), " (",round(Robust_T_test_matrix[c_AR, 2], dig),")", sep="") 556 | 557 | l_HAR = 1:length(forecast[[4]]$model$coefficients) 558 | c_HAR = length(c_AR) + l_HAR 559 | Robust_T_test_table[l_HAR , 2] = paste(round(Robust_T_test_matrix[c_HAR, 1], dig), " (",round(Robust_T_test_matrix[c_HAR, 2], dig),")", sep="") 560 | 561 | l_HARQ = 1:length(forecast[[4]]$modelQ$coefficients) 562 | c_HARQ = length(c_HAR) + l_HARQ + 4 # Ad-Hoc added values found by checking Robust_T_test_matrix[c_HAR, ] throughout 563 | Robust_T_test_table[l_HARQ , 3] = paste(round(Robust_T_test_matrix[c_HARQ, 1], dig), " (",round(Robust_T_test_matrix[c_HARQ, 2], dig),")", sep="") 564 | 565 | l_HARQF = 1:length(forecast[[4]]$modelF$coefficients) 566 | c_HARQF = length(c_HARQ) + l_HARQF + 8 567 | Robust_T_test_table[l_HARQF , 4] = paste(round(Robust_T_test_matrix[c_HARQF, 1], dig), " (",round(Robust_T_test_matrix[c_HARQF, 2], dig),")", sep="") 568 | 569 | l_CHAR = 1:length(forecast[[4]]$modelC$coefficients) 570 | c_CHAR = length(l_HARQF) + l_CHAR + 13 571 | Robust_T_test_table[l_CHAR , 5] = paste(round(Robust_T_test_matrix[c_CHAR, 1], dig), " (",round(Robust_T_test_matrix[c_CHAR, 2], dig),")", sep="") 572 | 573 | l_SHAR = 1:length(forecast[[4]]$modelS$coefficients) 574 | c_SHAR = length(l_CHAR) + l_SHAR + 20 575 | Robust_T_test_table[c(1,3,4,8,9) , 6] = paste(round(Robust_T_test_matrix[c_SHAR, 1], dig), " (",round(Robust_T_test_matrix[c_SHAR, 2], dig),")", sep="") 576 | 577 | l_HARJ = 1:length(forecast[[4]]$modelJ$coefficients) 578 | c_HARJ = length(l_SHAR) + l_HARJ + 24 579 | Robust_T_test_table[c(1:4,10) , 7] = paste(round(Robust_T_test_matrix[c_HARJ, 1], dig), " (",round(Robust_T_test_matrix[c_HARJ, 2], dig),")", sep="") 580 | 581 | #print(Robust_T_test_matrix) 582 | #print(Robust_T_test_table) 583 | 584 | # Appending R^squareds to table 585 | Robust_T_test_table_w_rsquareds = rbind(Robust_T_test_table, NA*c(1:7), round(forecast[[5]][1,], dig), round(forecast[[5]][2,], dig)) 586 | rownames(Robust_T_test_table_w_rsquareds)[c(12,13)] = c("R^2", "Adj.R^2") 587 | # Exporting Results 588 | write.xlsx(Robust_T_test_table_w_rsquareds, paste(getwd(), "/Results/Betas_w_all_rsquareds.xlsx", sep=""), sheetName="Sheet1", 589 | col.names=TRUE, row.names=TRUE, append=FALSE) 590 | 591 | return(Robust_T_test_table_w_rsquareds) 592 | } 593 | 594 | # Summary Stats 595 | summaryStats <- function(stocks, scalar = 1, freq = "5min_extended") { 596 | sumStats = matrix(NA, nrow = length(stocks), ncol = 7) 597 | colnames(sumStats) = c("Min", "Mean", "Median", "Max", "Std. Dev.", "Skewness", "Kurtosis") 598 | Symbol = c() 599 | for (stockname in 1:length(stocks)) { 600 | Symbol = c(Symbol,substr(stocks[stockname],1, (nchar(stocks[stockname])-10))) 601 | } 602 | rownames(sumStats) = Symbol 603 | 604 | # Realized Measures of all in assets in 'stocks' variable 605 | DataSets = list(rep(NA, times = length(stocks))) 606 | for (stock in 1:length(stocks)) { 607 | excel_file = paste("Data/",freq,".xlsx" , sep="") 608 | data_name = stocks[stock] 609 | data = as.data.frame(read_excel(excel_file, sheet = data_name)) 610 | data = data[c("BarTp", "Trade")] 611 | colnames(data) = c("Dates", "Open") 612 | data = data[-c(1:4),] 613 | data$Open = as.numeric(data$Open) 614 | data_log = cbind(data$Dates, as.data.frame(log(data$Open))) 615 | colnames(data_log) = c("Dates", "Open") 616 | 617 | #DataSet = estimator(data) 618 | DataSet = estimator(data_log) 619 | DataSet = DataSet_Scalar(DataSet) # DataSet Scalar Function! 620 | 621 | # Appending all data.frames of RV measures to a list of data.frames 622 | DataSets[stock] = list(DataSet) 623 | } 624 | names(DataSets) = stocks 625 | 626 | # Summary Stats to summStats table. Scaled by factor of variable 'scalar' 627 | for (stk in 1:length(stocks)) { 628 | curRV = DataSets[[stk]]$RV # Realized Vol of current stock in iteration 629 | sumStats[stk, 1] = round(min(curRV)*scalar,3) 630 | sumStats[stk, 2] = round(mean(curRV)*scalar,3) 631 | sumStats[stk, 3] = round(median(curRV)*scalar,3) 632 | sumStats[stk, 4] = round(max(curRV)*scalar ,3) 633 | sumStats[stk, 5] = round(sd(curRV)*scalar,3) 634 | sumStats[stk, 6] = round(skewness(curRV)*scalar,1) 635 | sumStats[stk, 7] = round(kurtosis(curRV)*scalar,1) 636 | } 637 | sumStats = cbind(Symbol, sumStats) 638 | Symbol_str = str_c(Symbol,collapse='_') #stringr function dependency 639 | sumStats_file_path = paste(getwd(), "/Results/SummaryStats_", Symbol_str, "_", freq, ".xlsx", sep="") 640 | 641 | # Exporting Summary Stats 642 | write.xlsx(sumStats, sumStats_file_path, sheetName="Sheet1", 643 | col.names=TRUE, row.names=TRUE, append=FALSE) 644 | print(paste("All values in the Summary Statistics are scaled by a factor of: ", scalar, sep="")) 645 | return(sumStats) 646 | } 647 | 648 | # Error Computation & Table Construction 649 | errorsTable <- function(stocks, dig = 5, errorsScalar = 1) { 650 | errorsMatrix = matrix(NA, nrow = length(stocks)+1, ncol = 14) 651 | Symbol = c() 652 | for (stockname in 1:length(stocks)) { 653 | Symbol = c(Symbol,substr(stocks[stockname],1, (nchar(stocks[stockname])-10))) 654 | } 655 | rownames(errorsMatrix) = c("", Symbol) 656 | errorsMatrix[1, ] = rep(c("MSE", "MAE"), times = 7) 657 | 658 | model_names = c("AR(3)", "HAR", "HARQ", "HARQ-F", "CHAR", "SHAR", "HAR-J") 659 | model_names2 = rep(NA, times = ncol(errorsMatrix)) 660 | 661 | #odds = odds[lapply(odds, "%%", 2) != 0] 662 | mod = 0 663 | for (m in 1:length(model_names)) { 664 | model_names2[(mod+m)] = model_names[m] 665 | mod = mod + 1 666 | } 667 | for (m in 1:length(model_names2)) { 668 | if(is.na(model_names2[m])) { 669 | model_names2[m] = model_names2[(m-1)] 670 | } 671 | } 672 | colnames(errorsMatrix) = model_names2 673 | 674 | 675 | # Realized Measures of all in assets in 'stocks' variable 676 | forecasts = list(rep(NA, times = length(stocks))) 677 | for (stock in 1:length(stocks)) { 678 | excel_file = paste("Data/",freq,".xlsx" , sep="") 679 | data_name = stocks[stock] 680 | data = as.data.frame(read_excel(excel_file, sheet = data_name)) 681 | data = data[c("BarTp", "Trade")] 682 | colnames(data) = c("Dates", "Open") 683 | data = data[-c(1:4),] 684 | data$Open = as.numeric(data$Open) 685 | data_log = cbind(data$Dates, as.data.frame(log(data$Open))) 686 | colnames(data_log) = c("Dates", "Open") 687 | 688 | #DataSet = estimator(data) 689 | DataSet = estimator(data_log) 690 | DataSet = DataSet_Scalar(DataSet) 691 | forecast = HAR(DataSet) 692 | 693 | # Appending all data.frames of RV measures to a list of data.frames 694 | forecasts[stock] = list(forecast) 695 | } 696 | names(forecasts) = stocks 697 | 698 | #errorsScalar = 100 699 | #dig = 5 700 | for (s in 1:length(stocks)) { 701 | # Loop over errors and models 702 | all_errors = forecasts[[s]][[2]] 703 | for (e in 1:length(all_errors)) { 704 | errorsMatrix[(s+1), e] = round(all_errors[[e]]*errorsScalar, dig) 705 | } 706 | } 707 | 708 | errorsMatrix_BM = duplicate(errorsMatrix) 709 | HAR_MSE_idx = grep("^HAR$", colnames(errorsMatrix_BM))[1] 710 | HAR_MAE_idx = grep("^HAR$", colnames(errorsMatrix_BM))[2] 711 | 712 | #as.numeric(errorsMatrix_BM[1:length(stocks)+1, HAR_MSE_idx]) 713 | 714 | MSE_cols = (1:ncol(errorsMatrix_BM))[1:ncol(errorsMatrix_BM) %% 2 != 0] 715 | MAE_cols = (1:ncol(errorsMatrix_BM))[1:ncol(errorsMatrix_BM) %% 2 == 0] 716 | 717 | for (e in 1:ncol(errorsMatrix_BM)) { 718 | if (e %in% MSE_cols) { 719 | errorsMatrix_BM[1:length(stocks)+1,e] = as.numeric(errorsMatrix[1:length(stocks)+1,e]) / as.numeric(errorsMatrix[1:length(stocks)+1, HAR_MSE_idx]) # MSE_idx 720 | } else if (e %in% MAE_cols) { 721 | errorsMatrix_BM[1:length(stocks)+1,e] = as.numeric(errorsMatrix[1:length(stocks)+1,e]) / as.numeric(errorsMatrix[1:length(stocks)+1, HAR_MAE_idx]) #MAE_idx 722 | } 723 | } 724 | 725 | #Unbenchmarked Errors 726 | errorsMatrix_file_path = paste(getwd(), "/Results/Errors_UnBenchmarked.xlsx", sep="") 727 | errorsMatrix_BM_file_path = paste(getwd(), "/Results/Errors_Benchmarked_to_HAR.xlsx", sep="") 728 | # Exporting Errors Table 729 | write.xlsx(errorsMatrix, errorsMatrix_file_path, sheetName="Sheet1", 730 | col.names=TRUE, row.names=TRUE, append=FALSE) 731 | write.xlsx(errorsMatrix_BM, errorsMatrix_BM_file_path, sheetName="Sheet1", 732 | col.names=TRUE, row.names=TRUE, append=FALSE) 733 | 734 | errorsTables = c(errorsMatrix, errorsMatrix_BM) 735 | return(errorsTables) 736 | } 737 | 738 | 739 | ######## SETTINGS ######## 740 | stocks = c("SPY US Equity", "MSFT US Equity", "MCD US Equity", "JPM US Equity", "DIS US Equity") 741 | freq = "5min_extended" 742 | ########################## 743 | 744 | # Data Prep 745 | log_returns_scalar = 1 746 | excel_file = paste("Data/",freq,".xlsx" , sep="") 747 | data_name = stocks[1] 748 | data = as.data.frame(read_excel(excel_file, sheet = data_name)) 749 | data = data[c("BarTp", "Trade")] 750 | colnames(data) = c("Dates", "Open") 751 | data = data[-c(1:4),] 752 | data$Open = as.numeric(data$Open) 753 | data_log = cbind(data$Dates, as.data.frame(log(data$Open)*log_returns_scalar)) 754 | colnames(data_log) = c("Dates", "Open") 755 | 756 | #DataSet = estimator(data) 757 | DataSet = estimator(data_log) 758 | DataSet = DataSet_Scalar(DataSet) 759 | forecast = HAR(DataSet, out_sample = 96, extra_plots = TRUE) 760 | 761 | 762 | #### FINAL SECTION: Generating tables, retrieving out-of-sample trading dates for plot, etc ###### 763 | 764 | ###### This section generates the data exports for the tables in the paper 765 | # beta_table = betaTable(forecast) 766 | #summStats = summaryStats(stocks) 767 | #errorsTables = errorsTable(stocks) 768 | 769 | # Function for plotDates (out-of-sample forecasting) 770 | # plotDates = substring(data$Dates, first = 1, last = 5) 771 | # plotDates = unique(plotDates)[which(!is.na(unique(plotDates)))] 772 | # plotDates = as.Date(as.numeric(plotDates), origin = "1899-12-30") 773 | # length(plotDates) 774 | 775 | # Realized Quarticity Plots of the 5 stocks 776 | # for (i in 1:5) { 777 | # data_name = stocks[i] 778 | # data = as.data.frame(read_excel(excel_file, sheet = data_name)) 779 | # data = data[c("BarTp", "Trade")] 780 | # colnames(data) = c("Dates", "Open") 781 | # data = data[-c(1:4),] 782 | # data$Open = as.numeric(data$Open) 783 | # data_log = cbind(data$Dates, as.data.frame(log(data$Open)*log_returns_scalar)) 784 | # colnames(data_log) = c("Dates", "Open") 785 | # 786 | # #DataSet = estimator(data) 787 | # DataSet = estimator(data_log) 788 | # DataSet = DataSet_Scalar(DataSet) 789 | # plot(DataSet$RQ, type ="l", main = data_name) 790 | # print(data_name) 791 | # print(summary(DataSet$RQ)) 792 | # } 793 | 794 | --------------------------------------------------------------------------------