├── img
├── images.txt
├── RV.PNG
├── rt.PNG
├── HAR.PNG
├── HAR_models.PNG
├── modelTest.PNG
└── inSampleBetas.PNG
├── LICENSE
├── README.md
└── HAR_RV.R
/img/images.txt:
--------------------------------------------------------------------------------
1 | this folder contains the images
2 |
--------------------------------------------------------------------------------
/img/RV.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacob-hein/HAR-models-forecasting-realized-volatility-in-US-stocks/HEAD/img/RV.PNG
--------------------------------------------------------------------------------
/img/rt.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacob-hein/HAR-models-forecasting-realized-volatility-in-US-stocks/HEAD/img/rt.PNG
--------------------------------------------------------------------------------
/img/HAR.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacob-hein/HAR-models-forecasting-realized-volatility-in-US-stocks/HEAD/img/HAR.PNG
--------------------------------------------------------------------------------
/img/HAR_models.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacob-hein/HAR-models-forecasting-realized-volatility-in-US-stocks/HEAD/img/HAR_models.PNG
--------------------------------------------------------------------------------
/img/modelTest.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacob-hein/HAR-models-forecasting-realized-volatility-in-US-stocks/HEAD/img/modelTest.PNG
--------------------------------------------------------------------------------
/img/inSampleBetas.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacob-hein/HAR-models-forecasting-realized-volatility-in-US-stocks/HEAD/img/inSampleBetas.PNG
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 Jacob Hein
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # HAR models forecasting realized volatility in US stocks
2 | Various heterogenous autoregressive (HAR) models in Bollerslev et al. (2016) implemented in R to forecast the intraday measure of realized volatilty in select US stocks based on high-frequency trading prices.
3 |
4 | ### Data
5 | Intraday prices of US stocks of Microsoft, McDonalds, J.P. Morgan & Chase, Disney and a S&P500 trust fund (SPY) were extracted at frequencies of 1, 5 and 10 minutes with the Bloomberg Terminal. The time-series span from March 1st 2019 to November 1st 2019,
6 |
7 | ### Returns and realized volatility
8 | Intraday returns are computed as the change in intraday prices of a given financial asset,
9 |
, the sum of all these intraday returns constitute the realized volatilty (variance if squared) estimate of a given day:
10 |
11 |
12 |
13 | Related measures like realized quarticity (RQ) and bi-power variance (BPV) that are used by some model variants were also estimated in R
14 |
15 | ```r
16 | #RV, RQ, BPV Estimator
17 | estimator <- function(data) {
18 | RV_t_estimates = c()
19 | RQ_t_estimates = c()
20 | BPV_t_estimates = c()
21 | RV_t_plus_estimates = c()
22 | RV_t_minus_estimates = c()
23 | RV_t_dates = c()
24 | M = 0 # Intraday obs used in estimation of RV_t
25 |
26 | for (t in 1:length(data$Dates)) {
27 | # t accounts for the final number of daily RV_t estimates
28 | RV_t_i_estimates = c() # M number of r_t,i to be summed up
29 | M_Q = M # Counter
30 |
31 | while (substring(data$Dates[t+M], first = 1, last = 5) == substring(data$Dates[t+M+1], first = 1, last = 5)
32 | # The below AND condition breaks while-loop when no more intraday obs available
33 | & !is.na(data$Dates[t+M+1])) {
34 |
35 | # Intraday returns
36 | RV_t_i = (data$Open[t+M+1] - data$Open[t+M])
37 | RV_t_i_estimates = c(RV_t_i_estimates, RV_t_i)
38 |
39 | M = M + 1
40 | }
41 |
42 | if (is.na(data$Dates[t+M])) {
43 | break # This if-clause breaks for-loop when the eventual NA intraday obs is reached
44 | }
45 |
46 | RV_t = sum(RV_t_i_estimates^2) #Realized Variance
47 |
48 | RQ_t = ((M-M_Q)/3) * sum(RV_t_i_estimates^4)
49 |
50 | BPV_t_i_estimates = c()
51 | # i in 1:len(...)-1 corresponds to summing up to M-1 as in Bollerslev (2016)
52 | for (i in 1:(length(RV_t_i_estimates)-1)) {
53 | BPV_t_i_estimates = c(BPV_t_i_estimates, abs(RV_t_i_estimates[i] * abs(RV_t_i_estimates[i+1])))
54 | }
55 | BPV_t = (sqrt(2/pi))^(-2) * sum(BPV_t_i_estimates)
56 |
57 | # RV Plus and RV Minus for SHAR model spec
58 | RV_t_plus = sum(RV_t_i_estimates[RV_t_i_estimates > 0]^2)
59 | RV_t_minus = sum(RV_t_i_estimates[RV_t_i_estimates < 0]^2)
60 |
61 | RV_t_estimates = c(RV_t_estimates, RV_t)
62 | RQ_t_estimates = c(RQ_t_estimates, RQ_t)
63 | BPV_t_estimates = c(BPV_t_estimates, BPV_t)
64 | RV_t_plus_estimates = c(RV_t_plus_estimates, RV_t_plus)
65 | RV_t_minus_estimates = c(RV_t_minus_estimates, RV_t_minus)
66 |
67 | # Dates
68 | RV_t_date = as.numeric(substring(data$Dates[t+M], first = 1, last = 5))
69 | RV_t_dates = c(RV_t_dates, RV_t_date)
70 | }
71 |
72 | RV_df = as.data.frame(RV_t_dates)
73 | RV_df = cbind(RV_df, RV_t_estimates, RQ_t_estimates, BPV_t_estimates, RV_t_plus_estimates, RV_t_minus_estimates)
74 | colnames(RV_df) = c("Dates", "RV", "RQ", "BPV", "RV_plus", "RV_minus")
75 | RV_df$Dates = as.Date(RV_df$Dates, origin = "1899-12-30")
76 | return(RV_df)
77 | }
78 | ```
79 |
80 |
81 |
82 | ### HAR models
83 | The objective is to fit different HAR model variants to an in-sample portion of the time-series, to then predict an out-of-sample test sample. The fundamental HAR model is given:
84 |
85 |
86 |
87 | This project includes implementation and tests of model variants that account for error minimizing returns quarticity, leverage effects and jumps in the data.
88 |
89 | The R implementation could have been optimized by vectorizing the computations, it nevertheless runs in a small amount of time:
90 |
91 | ```r
92 | #Forecast, Errors, Betas & Plots
93 | HAR <- function(data, out_sample = 96, plot_scalar = 1, extra_plots = FALSE) {
94 |
95 | RV = data$RV
96 | RQ = data$RQ
97 | BPV = data$BPV
98 | RV_p = data$RV_plus
99 | RV_m = data$RV_minus
100 |
101 | nobs = length(RV)
102 | in_sample = nobs - out_sample
103 |
104 | outRV = RV[(in_sample+1):(length(RV))] # We +1 to get equal length as out_sample size
105 | lag = 22 # 22 days lag is equivalent to one month of trading days lag
106 |
107 | all_predsA = rep(0, times = out_sample)
108 | all_preds = rep(0, times = out_sample)
109 | all_predsQ = rep(0, times = out_sample)
110 | all_predsF = rep(0, times = out_sample)
111 | all_predsC = rep(0, times = out_sample)
112 | all_predsS = rep(0, times = out_sample)
113 | all_predsJ = rep(0, times = out_sample)
114 |
115 | all_betasA = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 4)
116 | all_betas = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 4)
117 | all_betasQ = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 5)
118 | all_betasF = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 7)
119 | all_betasC = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 4)
120 | all_betasS = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 5)
121 | all_betasJ = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 5)
122 |
123 | for (t in 1:(out_sample)) {
124 | # Estimation
125 | y = RV[(lag + t + 1):(in_sample + t)]
126 | XA = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 3)
127 | X = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 3)
128 | XQ = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 4)
129 | XF = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 6)
130 | XC = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 3)
131 | XS = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 4)
132 | XJ = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 4)
133 |
134 | for (i in 0:(in_sample - lag - 1)) {
135 |
136 | # AR(3)
137 | XA[i+1,1] = RV[(-1+i+1+lag + t)]
138 | XA[i+1,2] = RV[(-2+i+1+lag + t)]
139 | XA[i+1,3] = RV[(-3+i+1+lag + t)]
140 |
141 | # HAR
142 | X[i+1,1] = RV[(-1+i+1+lag + t)]
143 | X[i+1,2] = (1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)])
144 | X[i+1,3] = (1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)])
145 |
146 | # HARQ
147 | XQ[i+1,1] = RV[(-1+i+1+lag + t)]
148 | XQ[i+1,2] = (1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)])
149 | XQ[i+1,3] = (1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)])
150 | XQ[i+1,4] = (RQ[-1+i+1+lag + t]^(1/2) * RV[(-1+i+1+lag + t)])
151 |
152 | # HARQ-F
153 | XF[i+1,1] = RV[(-1+i+1+lag + t)]
154 | XF[i+1,2] = (1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)])
155 | XF[i+1,3] = (1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)])
156 | XF[i+1,4] = (RQ[-1+i+1+lag + t]^(1/2) * RV[(-1+i+1+lag + t)])
157 | XF[i+1,5] = (((1/5)*sum(RQ[(-5+i+1+lag + t):(i+lag + t)]))^(1/2) * ((1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)])))
158 | XF[i+1,6] = (((1/22)*sum(RQ[(-22+i+1+lag + t):(i+lag + t)]))^(1/2) * ((1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)])))
159 |
160 | # CHAR
161 | XC[i+1,1] = BPV[(-1+i+1+lag + t)]
162 | XC[i+1,2] = (1/5)*sum(BPV[(-5+i+1+lag + t):(i+lag + t)])
163 | XC[i+1,3] = (1/22)*sum(BPV[(-22+i+1+lag + t):(i+lag + t)])
164 |
165 | # SHAR
166 | XS[i+1,1] = (1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)])
167 | XS[i+1,2] = (1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)])
168 | XS[i+1,3] = RV_p[(-1+i+1+lag + t)]
169 | XS[i+1,4] = RV_m[(-1+i+1+lag + t)]
170 |
171 | # HAR-J
172 | XJ[i+1,1] = RV[(-1+i+1+lag + t)]
173 | XJ[i+1,2] = (1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)])
174 | XJ[i+1,3] = (1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)])
175 | XJ[i+1,4] = max((RV[(-1+i+1+lag + t)] - BPV[(-1+i+1+lag + t)]), 0)
176 |
177 | }
178 |
179 |
180 | # R Regression at t=1 for Standard Errors before performing any out-of-sample forecasts
181 | if (t==out_sample) {
182 | modelA = lm(y ~ XA)
183 | model = lm(y ~ X)
184 | modelQ = lm(y ~ XQ)
185 | modelF = lm(y ~ XF)
186 | modelC = lm(y ~ XC)
187 | modelS = lm(y ~ XS)
188 | # HAR-J if statement:
189 | if (sum(XJ[,4]) ==0) {
190 | XJ[1,4] = 0.1 # Ensure invertibility if XJ singular with zero column
191 | }
192 | modelJ = lm(y ~ XJ)
193 | models_at_t_1 = list("modelA" = modelA, "model" = model,
194 | "modelQ" = modelQ, "modelF" = modelF,
195 | "modelC" = modelC, "modelS" = modelS,
196 | "modelJ" = modelJ)
197 |
198 | # Below we retrieve R^2 & Adjusted R^2, prior to out-of-sample forecasts
199 | num_of_models = 7
200 | r_squareds = matrix(0, nrow = 2, ncol = num_of_models)
201 | for (val in 1:length(models_at_t_1)) {
202 | r_squareds[1,val] = summary(models_at_t_1[[val]])$r.squared
203 | r_squareds[2,val] = summary(models_at_t_1[[val]])$adj.r.squared
204 | }
205 | rownames(r_squareds) = c("R-squared", "Adj.R-squared")
206 | colnames(r_squareds) = c("AR(3)", "HAR", "HARQ", "HARQ-F", "CHAR", "SHAR", "HAR-J")
207 |
208 | }
209 |
210 | XA = cbind(rep(1, times = nrow(XA)), XA)
211 | X = cbind(rep(1, times = nrow(X)), X)
212 | XQ = cbind(rep(1, times = nrow(XQ)), XQ)
213 | XF = cbind(rep(1, times = nrow(XF)), XF)
214 | XC = cbind(rep(1, times = nrow(XC)), XC)
215 | XS = cbind(rep(1, times = nrow(XS)), XS)
216 | XJ = cbind(rep(1, times = nrow(XJ)), XJ)
217 |
218 | # HAR-J if statement:
219 | if (sum(XJ[,5]) ==0) {
220 | XJ[1,5] = 0.1 # Ensure invertibility if XJ singular with zero column
221 | }
222 |
223 | # OLS Regression
224 | betasA = solve(t(XA) %*% XA) %*% t(XA) %*% y
225 | betas = solve(t(X) %*% X) %*% t(X) %*% y
226 | betasQ = solve(t(XQ) %*% XQ) %*% t(XQ) %*% y
227 | betasF = solve(t(XF) %*% XF) %*% t(XF) %*% y
228 | betasC = solve(t(XC) %*% XC) %*% t(XC) %*% y
229 | betasS = solve(t(XS) %*% XS) %*% t(XS) %*% y
230 | betasJ = solve(t(XJ) %*% XJ) %*% t(XJ) %*% y
231 |
232 | b0A = betasA[1]
233 | b1A = betasA[2]
234 | b2A = betasA[3]
235 | b3A = betasA[4]
236 |
237 | b0 = betas[1]
238 | b1 = betas[2]
239 | b2 = betas[3]
240 | b3 = betas[4]
241 |
242 | b0Q = betasQ[1]
243 | b1Q = betasQ[2]
244 | b2Q = betasQ[3]
245 | b3Q = betasQ[4]
246 | b1Q_Q = betasQ[5]
247 |
248 | b0F = betasF[1]
249 | b1F = betasF[2]
250 | b2F = betasF[3]
251 | b3F = betasF[4]
252 | b1F_Q = betasF[5]
253 | b2F_Q = betasF[6]
254 | b3F_Q = betasF[7]
255 |
256 | b0C = betasC[1]
257 | b1C = betasC[2]
258 | b2C = betasC[3]
259 | b3C = betasC[4]
260 |
261 | b0S = betasS[1]
262 | b1S_P = betasS[2]
263 | b2S = betasS[3]
264 | b3S = betasS[4]
265 | b1S_M = betasS[5]
266 |
267 | b0J = betasJ[1]
268 | b1J = betasJ[2]
269 | b2J = betasJ[3]
270 | b3J = betasJ[4]
271 | bJ = betasJ[5]
272 |
273 | all_betasA[t,1] = b0A
274 | all_betasA[t,2] = b1A
275 | all_betasA[t,3] = b2A
276 | all_betasA[t,4] = b3A
277 |
278 | all_betas[t,1] = b0
279 | all_betas[t,2] = b1
280 | all_betas[t,3] = b2
281 | all_betas[t,4] = b3
282 |
283 | all_betasQ[t,1] = b0Q
284 | all_betasQ[t,2] = b1Q
285 | all_betasQ[t,3] = b2Q
286 | all_betasQ[t,4] = b3Q
287 | all_betasQ[t,5] = b1Q_Q
288 |
289 | all_betasF[t,1] = b0F
290 | all_betasF[t,2] = b1F
291 | all_betasF[t,3] = b2F
292 | all_betasF[t,4] = b3F
293 | all_betasF[t,5] = b1F_Q
294 | all_betasF[t,6] = b2F_Q
295 | all_betasF[t,7] = b3F_Q
296 |
297 | all_betasC[t,1] = b0C
298 | all_betasC[t,2] = b1C
299 | all_betasC[t,3] = b2C
300 | all_betasC[t,4] = b3C
301 |
302 | all_betasS[t,1] = b0S
303 | all_betasS[t,2] = b1S_P
304 | all_betasS[t,3] = b2S
305 | all_betasS[t,4] = b3S
306 | all_betasS[t,5] = b1S_M
307 |
308 | all_betasJ[t,1] = b0J
309 | all_betasJ[t,2] = b1J
310 | all_betasJ[t,3] = b2J
311 | all_betasJ[t,4] = b3J
312 | all_betasJ[t,5] = bJ
313 |
314 |
315 |
316 | # Prediction at time-step t
317 | predA = b0A + b1A*XA[nrow(XA),2] + b2A*XA[nrow(XA),3] + b3A*XA[nrow(XA),4]
318 | pred = b0 + b1*X[nrow(X),2] + b2*X[nrow(X),3] + b3*X[nrow(X),4]
319 | predQ = b0Q + b1Q*XQ[nrow(XQ),2] + b2Q*XQ[nrow(XQ),3] + b3Q*XQ[nrow(XQ),4] + b1Q_Q*XQ[nrow(XQ),5]
320 | predF = b0F + b1F*XF[nrow(XF),2] + b2F*XF[nrow(XF),3] + b3F*XF[nrow(XF),4] + b1F_Q*XF[nrow(XF),5] + b2F_Q*XF[nrow(XF),6] + b3F_Q*XF[nrow(XF),7]
321 | predC = b0C + b1C*XC[nrow(XC),2] + b2C*XC[nrow(XC),3] + b3C*XC[nrow(XC),4]
322 | predS = b0S + b1S_P*XS[nrow(XS),2] + b2S*XS[nrow(XS),3] + b3S*XS[nrow(XS),4] + b1S_M*XS[nrow(XS),5]
323 | predJ = b0J + b1J*XJ[nrow(XJ),2] + b2J*XJ[nrow(XJ),3] + b3J*XJ[nrow(XJ),4] + bJ*XJ[nrow(XJ),5]
324 |
325 | # Saving time-step t prediction within list for error computations
326 | all_predsA[t] = predA
327 | all_preds[t] = pred
328 | all_predsQ[t] = predQ
329 | all_predsF[t] = predF
330 | all_predsC[t] = predC
331 | all_predsS[t] = predS
332 | all_predsJ[t] = predJ
333 |
334 | }
335 |
336 | # Error Computations: Mean Squared Error & Mean Absolute Error
337 | AR_mse = mean((outRV - all_predsA)^2)
338 | AR_mae = mean(abs(outRV - all_predsA))
339 |
340 | HAR_mse = mean((outRV - all_preds)^2)
341 | HAR_mae = mean(abs(outRV - all_preds))
342 |
343 | HARQ_mse = mean((outRV - all_predsQ)^2)
344 | HARQ_mae = mean(abs(outRV - all_predsQ))
345 |
346 | HARQF_mse = mean((outRV - all_predsF)^2)
347 | HARQF_mae = mean(abs(outRV - all_predsF))
348 |
349 | CHAR_mse = mean((outRV - all_predsC)^2)
350 | CHAR_mae = mean(abs(outRV - all_predsC))
351 |
352 | SHAR_mse = mean((outRV - all_predsS)^2)
353 | SHAR_mae = mean(abs(outRV - all_predsS))
354 |
355 | HARJ_mse = mean((outRV - all_predsJ)^2)
356 | HARJ_mae = mean(abs(outRV - all_predsJ))
357 |
358 |
359 | errors_list = list("AR_mse" = AR_mse, "AR_mae" = AR_mae, "HAR_mse" = HAR_mse, "HAR_mae" = HAR_mae,
360 | "HARQ_mse" = HARQ_mse, "HARQ_mae" = HARQ_mae, "HARQF_mse" = HARQF_mse, "HARQF_mae" = HARQF_mae,
361 | "CHAR_mse" = CHAR_mse, "CHAR_mae" = CHAR_mae, "SHAR_mse" = SHAR_mse, "SHAR_mae" = SHAR_mae,
362 | "HARJ_mse" = HARJ_mse, "HARJ_mae" = HARJ_mae)
363 |
364 |
365 | # Output formatting:
366 | output = matrix(outRV)
367 | out_sample_dates = data$Date[(in_sample+1):length(data$Date)]
368 | output = cbind(out_sample_dates, output, all_predsA, all_preds, all_predsQ, all_predsF, all_predsC, all_predsS, all_predsJ)
369 | output_df = as.data.frame(output)
370 | colnames(output_df) = c("Date", "outRV", "all_predsA", "all_preds", "all_predsQ", "all_predsF", "all_predsC", "all_predsS", "all_predsJ")
371 | betas_list = list("all_betasA" = all_betasA, "all_betas" = all_betas, "all_betasQ" = all_betasQ,
372 | "all_betasF" = all_betasF, "all_betasC" = all_betasC, "all_betasS" = all_betasS,
373 | "all_betasJ" = all_betasJ)
374 |
375 | output_df_errors_betas = list(output_df, errors_list, betas_list, models_at_t_1, r_squareds)
376 |
377 | return(output_df_errors_betas)
378 | }
379 | ```
380 |
381 |
382 | ### Beta coefficients estimated in-sample
383 |
384 |
385 |
386 | ### Out-of-sample forecasting
387 |
388 |
389 |
390 |
391 |
392 | ### Project conclusion
393 |
394 | HAR-type models fits nicely to measures representing intraday price movements. The out-of-sample forecast results suggest that the HARQ-F model outperforms all other variants along with a simple AR(3) model.
395 |
396 | The HARQ-F includes a realized quarticity measure to minimze error, which may contribute to the superior forecasting ability of this model variant.
397 |
398 | Time-series extended further back in time would be desirable for both in-sample coefficient estimations and out-of-sample forecasting, however the limitation of readily available intra-day price data prohibited a longer time horizon in this project.
399 |
--------------------------------------------------------------------------------
/HAR_RV.R:
--------------------------------------------------------------------------------
1 | # install.packages("rstudioapi")
2 | # install.packages("rJava")
3 | # install.packages("readxl")
4 | # install.packages("xts")
5 | # install.packages("sandwich")
6 | # install.packages("lmtest")
7 | # install.packages("xlsx")
8 | # install.packages("e1071")
9 | # install.packages("stringr")
10 | # install.packages("rlang")
11 | # install.packages(ggplot2)
12 |
13 | setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # Set working directive to current folder
14 | options(scipen=999) # No scientific floating points notation
15 |
16 | library(xts) # Time-indexed data frames, ideal for plots
17 | library(readxl) # Reading data
18 | library(xlsx) # Exporting results
19 | library(e1071) # Skew & Kurtosis for Summary Statistics Table
20 | library(stringr) # String formatting
21 | library(rlang)
22 | library(ggplot2) # Plots
23 | theme_set(theme_minimal()) # Plot Window
24 | ###############
25 | # For Robust Standard Errors
26 | library(lmtest)
27 | library(sandwich)
28 | ###############
29 |
30 | #Forecast, Errors, Betas & Plots
31 | HAR <- function(data, out_sample = 96, plot_scalar = 1, extra_plots = FALSE) {
32 |
33 | RV = data$RV
34 | RQ = data$RQ
35 | BPV = data$BPV
36 | RV_p = data$RV_plus
37 | RV_m = data$RV_minus
38 |
39 | nobs = length(RV)
40 | in_sample = nobs - out_sample
41 |
42 | outRV = RV[(in_sample+1):(length(RV))] # We +1 to get equal length as out_sample size
43 | lag = 22 # 22 days lag is equivalent to one month of trading days lag
44 |
45 | all_predsA = rep(0, times = out_sample)
46 | all_preds = rep(0, times = out_sample)
47 | all_predsQ = rep(0, times = out_sample)
48 | all_predsF = rep(0, times = out_sample)
49 | all_predsC = rep(0, times = out_sample)
50 | all_predsS = rep(0, times = out_sample)
51 | all_predsJ = rep(0, times = out_sample)
52 |
53 | all_betasA = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 4)
54 | all_betas = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 4)
55 | all_betasQ = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 5)
56 | all_betasF = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 7)
57 | all_betasC = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 4)
58 | all_betasS = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 5)
59 | all_betasJ = matrix(rep(0, times = out_sample), nrow = out_sample, ncol = 5)
60 |
61 | for (t in 1:(out_sample)) {
62 | # Estimation
63 | y = RV[(lag + t + 1):(in_sample + t)]
64 | XA = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 3)
65 | X = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 3)
66 | XQ = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 4)
67 | XF = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 6)
68 | XC = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 3)
69 | XS = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 4)
70 | XJ = matrix(rep(0, times = in_sample-lag), nrow = in_sample-lag, ncol = 4)
71 |
72 | for (i in 0:(in_sample - lag - 1)) {
73 |
74 | # AR(3)
75 | XA[i+1,1] = RV[(-1+i+1+lag + t)]
76 | XA[i+1,2] = RV[(-2+i+1+lag + t)]
77 | XA[i+1,3] = RV[(-3+i+1+lag + t)]
78 |
79 | # HAR
80 | X[i+1,1] = RV[(-1+i+1+lag + t)]
81 | X[i+1,2] = (1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)])
82 | X[i+1,3] = (1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)])
83 |
84 | # HARQ
85 | XQ[i+1,1] = RV[(-1+i+1+lag + t)]
86 | XQ[i+1,2] = (1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)])
87 | XQ[i+1,3] = (1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)])
88 | XQ[i+1,4] = (RQ[-1+i+1+lag + t]^(1/2) * RV[(-1+i+1+lag + t)])
89 |
90 | # HARQ-F
91 | XF[i+1,1] = RV[(-1+i+1+lag + t)]
92 | XF[i+1,2] = (1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)])
93 | XF[i+1,3] = (1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)])
94 | XF[i+1,4] = (RQ[-1+i+1+lag + t]^(1/2) * RV[(-1+i+1+lag + t)])
95 | XF[i+1,5] = (((1/5)*sum(RQ[(-5+i+1+lag + t):(i+lag + t)]))^(1/2) * ((1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)])))
96 | XF[i+1,6] = (((1/22)*sum(RQ[(-22+i+1+lag + t):(i+lag + t)]))^(1/2) * ((1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)])))
97 |
98 | # CHAR
99 | XC[i+1,1] = BPV[(-1+i+1+lag + t)]
100 | XC[i+1,2] = (1/5)*sum(BPV[(-5+i+1+lag + t):(i+lag + t)])
101 | XC[i+1,3] = (1/22)*sum(BPV[(-22+i+1+lag + t):(i+lag + t)])
102 |
103 | # SHAR
104 | XS[i+1,1] = (1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)])
105 | XS[i+1,2] = (1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)])
106 | XS[i+1,3] = RV_p[(-1+i+1+lag + t)]
107 | XS[i+1,4] = RV_m[(-1+i+1+lag + t)]
108 |
109 | # HAR-J
110 | XJ[i+1,1] = RV[(-1+i+1+lag + t)]
111 | XJ[i+1,2] = (1/5)*sum(RV[(-5+i+1+lag + t):(i+lag + t)])
112 | XJ[i+1,3] = (1/22)*sum(RV[(-22+i+1+lag + t):(i+lag + t)])
113 | XJ[i+1,4] = max((RV[(-1+i+1+lag + t)] - BPV[(-1+i+1+lag + t)]), 0)
114 |
115 | }
116 |
117 |
118 | # R Regression at t=1 for Standard Errors before performing any out-of-sample forecasts
119 | if (t==out_sample) {
120 | modelA = lm(y ~ XA)
121 | model = lm(y ~ X)
122 | modelQ = lm(y ~ XQ)
123 | modelF = lm(y ~ XF)
124 | modelC = lm(y ~ XC)
125 | modelS = lm(y ~ XS)
126 | # HAR-J if statement:
127 | if (sum(XJ[,4]) ==0) {
128 | XJ[1,4] = 0.1 # Ensure invertibility if XJ singular with zero column
129 | }
130 | modelJ = lm(y ~ XJ)
131 | models_at_t_1 = list("modelA" = modelA, "model" = model,
132 | "modelQ" = modelQ, "modelF" = modelF,
133 | "modelC" = modelC, "modelS" = modelS,
134 | "modelJ" = modelJ)
135 |
136 | # Below we retrieve R^2 & Adjusted R^2, prior to out-of-sample forecasts
137 | num_of_models = 7
138 | r_squareds = matrix(0, nrow = 2, ncol = num_of_models)
139 | for (val in 1:length(models_at_t_1)) {
140 | r_squareds[1,val] = summary(models_at_t_1[[val]])$r.squared
141 | r_squareds[2,val] = summary(models_at_t_1[[val]])$adj.r.squared
142 | }
143 | rownames(r_squareds) = c("R-squared", "Adj.R-squared")
144 | colnames(r_squareds) = c("AR(3)", "HAR", "HARQ", "HARQ-F", "CHAR", "SHAR", "HAR-J")
145 |
146 | }
147 |
148 | XA = cbind(rep(1, times = nrow(XA)), XA)
149 | X = cbind(rep(1, times = nrow(X)), X)
150 | XQ = cbind(rep(1, times = nrow(XQ)), XQ)
151 | XF = cbind(rep(1, times = nrow(XF)), XF)
152 | XC = cbind(rep(1, times = nrow(XC)), XC)
153 | XS = cbind(rep(1, times = nrow(XS)), XS)
154 | XJ = cbind(rep(1, times = nrow(XJ)), XJ)
155 |
156 | # HAR-J if statement:
157 | if (sum(XJ[,5]) ==0) {
158 | XJ[1,5] = 0.1 # Ensure invertibility if XJ singular with zero column
159 | }
160 |
161 | # OLS Regression
162 | betasA = solve(t(XA) %*% XA) %*% t(XA) %*% y
163 | betas = solve(t(X) %*% X) %*% t(X) %*% y
164 | betasQ = solve(t(XQ) %*% XQ) %*% t(XQ) %*% y
165 | betasF = solve(t(XF) %*% XF) %*% t(XF) %*% y
166 | betasC = solve(t(XC) %*% XC) %*% t(XC) %*% y
167 | betasS = solve(t(XS) %*% XS) %*% t(XS) %*% y
168 | betasJ = solve(t(XJ) %*% XJ) %*% t(XJ) %*% y
169 |
170 | b0A = betasA[1]
171 | b1A = betasA[2]
172 | b2A = betasA[3]
173 | b3A = betasA[4]
174 |
175 | b0 = betas[1]
176 | b1 = betas[2]
177 | b2 = betas[3]
178 | b3 = betas[4]
179 |
180 | b0Q = betasQ[1]
181 | b1Q = betasQ[2]
182 | b2Q = betasQ[3]
183 | b3Q = betasQ[4]
184 | b1Q_Q = betasQ[5]
185 |
186 | b0F = betasF[1]
187 | b1F = betasF[2]
188 | b2F = betasF[3]
189 | b3F = betasF[4]
190 | b1F_Q = betasF[5]
191 | b2F_Q = betasF[6]
192 | b3F_Q = betasF[7]
193 |
194 | b0C = betasC[1]
195 | b1C = betasC[2]
196 | b2C = betasC[3]
197 | b3C = betasC[4]
198 |
199 | b0S = betasS[1]
200 | b1S_P = betasS[2]
201 | b2S = betasS[3]
202 | b3S = betasS[4]
203 | b1S_M = betasS[5]
204 |
205 | b0J = betasJ[1]
206 | b1J = betasJ[2]
207 | b2J = betasJ[3]
208 | b3J = betasJ[4]
209 | bJ = betasJ[5]
210 |
211 | all_betasA[t,1] = b0A
212 | all_betasA[t,2] = b1A
213 | all_betasA[t,3] = b2A
214 | all_betasA[t,4] = b3A
215 |
216 | all_betas[t,1] = b0
217 | all_betas[t,2] = b1
218 | all_betas[t,3] = b2
219 | all_betas[t,4] = b3
220 |
221 | all_betasQ[t,1] = b0Q
222 | all_betasQ[t,2] = b1Q
223 | all_betasQ[t,3] = b2Q
224 | all_betasQ[t,4] = b3Q
225 | all_betasQ[t,5] = b1Q_Q
226 |
227 | all_betasF[t,1] = b0F
228 | all_betasF[t,2] = b1F
229 | all_betasF[t,3] = b2F
230 | all_betasF[t,4] = b3F
231 | all_betasF[t,5] = b1F_Q
232 | all_betasF[t,6] = b2F_Q
233 | all_betasF[t,7] = b3F_Q
234 |
235 | all_betasC[t,1] = b0C
236 | all_betasC[t,2] = b1C
237 | all_betasC[t,3] = b2C
238 | all_betasC[t,4] = b3C
239 |
240 | all_betasS[t,1] = b0S
241 | all_betasS[t,2] = b1S_P
242 | all_betasS[t,3] = b2S
243 | all_betasS[t,4] = b3S
244 | all_betasS[t,5] = b1S_M
245 |
246 | all_betasJ[t,1] = b0J
247 | all_betasJ[t,2] = b1J
248 | all_betasJ[t,3] = b2J
249 | all_betasJ[t,4] = b3J
250 | all_betasJ[t,5] = bJ
251 |
252 |
253 |
254 | # Prediction at time-step t
255 | predA = b0A + b1A*XA[nrow(XA),2] + b2A*XA[nrow(XA),3] + b3A*XA[nrow(XA),4]
256 | pred = b0 + b1*X[nrow(X),2] + b2*X[nrow(X),3] + b3*X[nrow(X),4]
257 | predQ = b0Q + b1Q*XQ[nrow(XQ),2] + b2Q*XQ[nrow(XQ),3] + b3Q*XQ[nrow(XQ),4] + b1Q_Q*XQ[nrow(XQ),5]
258 | predF = b0F + b1F*XF[nrow(XF),2] + b2F*XF[nrow(XF),3] + b3F*XF[nrow(XF),4] + b1F_Q*XF[nrow(XF),5] + b2F_Q*XF[nrow(XF),6] + b3F_Q*XF[nrow(XF),7]
259 | predC = b0C + b1C*XC[nrow(XC),2] + b2C*XC[nrow(XC),3] + b3C*XC[nrow(XC),4]
260 | predS = b0S + b1S_P*XS[nrow(XS),2] + b2S*XS[nrow(XS),3] + b3S*XS[nrow(XS),4] + b1S_M*XS[nrow(XS),5]
261 | predJ = b0J + b1J*XJ[nrow(XJ),2] + b2J*XJ[nrow(XJ),3] + b3J*XJ[nrow(XJ),4] + bJ*XJ[nrow(XJ),5]
262 |
263 | # Saving time-step t prediction within list for error computations
264 | all_predsA[t] = predA
265 | all_preds[t] = pred
266 | all_predsQ[t] = predQ
267 | all_predsF[t] = predF
268 | all_predsC[t] = predC
269 | all_predsS[t] = predS
270 | all_predsJ[t] = predJ
271 |
272 | }
273 |
274 | # Error Computations: Mean Squared Error & Mean Absolute Error
275 | AR_mse = mean((outRV - all_predsA)^2)
276 | AR_mae = mean(abs(outRV - all_predsA))
277 |
278 | HAR_mse = mean((outRV - all_preds)^2)
279 | HAR_mae = mean(abs(outRV - all_preds))
280 |
281 | HARQ_mse = mean((outRV - all_predsQ)^2)
282 | HARQ_mae = mean(abs(outRV - all_predsQ))
283 |
284 | HARQF_mse = mean((outRV - all_predsF)^2)
285 | HARQF_mae = mean(abs(outRV - all_predsF))
286 |
287 | CHAR_mse = mean((outRV - all_predsC)^2)
288 | CHAR_mae = mean(abs(outRV - all_predsC))
289 |
290 | SHAR_mse = mean((outRV - all_predsS)^2)
291 | SHAR_mae = mean(abs(outRV - all_predsS))
292 |
293 | HARJ_mse = mean((outRV - all_predsJ)^2)
294 | HARJ_mae = mean(abs(outRV - all_predsJ))
295 |
296 |
297 | errors_list = list("AR_mse" = AR_mse, "AR_mae" = AR_mae, "HAR_mse" = HAR_mse, "HAR_mae" = HAR_mae,
298 | "HARQ_mse" = HARQ_mse, "HARQ_mae" = HARQ_mae, "HARQF_mse" = HARQF_mse, "HARQF_mae" = HARQF_mae,
299 | "CHAR_mse" = CHAR_mse, "CHAR_mae" = CHAR_mae, "SHAR_mse" = SHAR_mse, "SHAR_mae" = SHAR_mae,
300 | "HARJ_mse" = HARJ_mse, "HARJ_mae" = HARJ_mae)
301 |
302 | # Plots
303 | #plot_scalar = 100
304 | if (plot_scalar == 1) {
305 | ylab_str = "Realized Variance"
306 | } else {
307 | ylab_str = paste("Realized Var. (Axis scaled by a factor of ", plot_scalar, ")", sep="")
308 | }
309 |
310 | plotDates = DataSet$Dates[(length(DataSet$Dates)-out_sample+1):length(DataSet$Dates)]
311 |
312 | plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
313 | lines(plot_scalar*all_predsA ~ plotDates, col="darkred")
314 | legend("topright", legend=c("Actual RV", "AR(3)"), col=c("blue", "darkred"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1)
315 | plot_path = paste(getwd(), "/OutSampPlots/AR(3) R Plot.pdf", sep="")
316 | dev.copy(pdf, plot_path)
317 | #plot_path_png = paste(substr(plot_path, 1, nchar(plot_path)-4), ".png", sep="")
318 | dev.off()
319 | plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
320 | lines(plot_scalar*all_preds ~ plotDates, col="red")
321 | legend("topright", legend=c("Actual RV", "HAR"), col=c("blue", "red"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1)
322 | plot_path = paste(getwd(), "/OutSampPlots/HAR R Plot.pdf", sep="")
323 | dev.copy(pdf, plot_path)
324 | dev.off()
325 | plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
326 | lines(plot_scalar*all_predsQ ~ plotDates, col="green")
327 | legend("topright", legend=c("Actual RV", "HARQ"), col=c("blue", "green"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1)
328 | plot_path = paste(getwd(), "/OutSampPlots/HARQ R Plot.pdf", sep="")
329 | dev.copy(pdf, plot_path)
330 | dev.off()
331 | plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
332 | lines(plot_scalar*all_predsF ~ plotDates, col="darkgreen")
333 | legend("topright", legend=c("Actual RV", "HARQ-F"), col=c("blue", "darkgreen"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1)
334 | plot_path = paste(getwd(), "/OutSampPlots/HARQ-F R Plot.pdf", sep="")
335 | dev.copy(pdf, plot_path)
336 | dev.off()
337 | plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
338 | lines(plot_scalar*all_predsC ~ plotDates, col="black")
339 | legend("topright", legend=c("Actual RV", "CHAR"), col=c("blue", "black"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1)
340 | plot_path = paste(getwd(), "/OutSampPlots/CHAR R Plot.pdf", sep="")
341 | dev.copy(pdf, plot_path)
342 | dev.off()
343 | plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
344 | lines(plot_scalar*all_predsS ~ plotDates, col="maroon")
345 | legend("topright", legend=c("Actual RV", "SHAR"), col=c("blue", "maroon"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1)
346 | plot_path = paste(getwd(), "/OutSampPlots/SHAR R Plot.pdf", sep="")
347 | dev.copy(pdf, plot_path)
348 | dev.off()
349 | plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
350 | lines(plot_scalar*all_predsJ ~ plotDates, col="orange")
351 | legend("topright", legend=c("Actual RV", "HAR-J"), col=c("blue", "orange"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1)
352 | plot_path = paste(getwd(), "/OutSampPlots/HAR-J R Plot.pdf", sep="")
353 | dev.copy(pdf, plot_path)
354 | dev.off()
355 |
356 | if (extra_plots == TRUE) {
357 | # Plotting All models in one
358 | plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="All Models ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
359 | lines(plot_scalar*all_predsA ~ plotDates, col="darkred")
360 | #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="HAR ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
361 | lines(plot_scalar*all_preds ~ plotDates, col="red")
362 | #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="HARQ ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
363 | lines(plot_scalar*all_predsQ ~ plotDates, col="green")
364 | #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="HARQ-F ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
365 | lines(plot_scalar*all_predsF ~ plotDates, col="darkgreen")
366 | #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="CHAR ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
367 | lines(plot_scalar*all_predsC ~ plotDates, col="orange")
368 | #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="SHAR ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
369 | lines(plot_scalar*all_predsS ~ plotDates, col="maroon")
370 | #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="HAR-J ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
371 | lines(plot_scalar*all_predsJ ~ plotDates, col="black")
372 | plot_path = paste(getwd(), "/OutSampPlots/AllModels R Plot.pdf", sep="")
373 | dev.copy(pdf, plot_path)
374 | dev.off()
375 |
376 | }
377 |
378 | par(mfrow=c(2,2))
379 | plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
380 | #lines(plot_scalar*all_predsA ~ plotDates, col="darkred")
381 | #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="HAR ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
382 | lines(plot_scalar*all_preds ~ plotDates, col="red")
383 | legend("topright", legend=c("Actual RV", "HAR"), col=c("blue", "red"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1, bty = "n")
384 |
385 | plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
386 | lines(plot_scalar*all_predsJ ~ plotDates, col="orange")
387 | legend("topright", legend=c("Actual RV", "HAR-J"), col=c("blue", "orange"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1, bty = "n")
388 |
389 | plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
390 | #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="HARQ ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
391 | lines(plot_scalar*all_predsQ ~ plotDates, col="green")
392 | legend("topright", legend=c("Actual RV", "HARQ"), col=c("blue", "green"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1, bty = "n")
393 |
394 |
395 | plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
396 | lines(plot_scalar*all_predsF ~ plotDates, col="darkgreen")
397 | legend("topright", legend=c("Actual RV", "HARQ-F"), col=c("blue", "darkgreen"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1, bty = "n")
398 | #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="CHAR ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
399 | #lines(plot_scalar*all_predsC ~ plotDates, col="orange")
400 | #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="SHAR ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
401 | #lines(plot_scalar*all_predsS ~ plotDates, col="maroon")
402 | #plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", main="HAR-J ", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
403 | plot_path = paste(getwd(), "/OutSampPlots/FOUR_R_Plots.pdf", sep="")
404 | dev.copy(pdf, plot_path)
405 | dev.off()
406 |
407 | par(mfrow=c(2,2))
408 | plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
409 | lines(plot_scalar*all_predsC ~ plotDates, col="black")
410 | legend("topright", legend=c("Actual RV", "CHAR"), col=c("blue", "black"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1, bty = "n")
411 |
412 | plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
413 | lines(plot_scalar*all_predsS ~ plotDates, col="maroon")
414 | legend("topright", legend=c("Actual RV", "SHAR"), col=c("blue", "maroon"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1, bty = "n")
415 |
416 | plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
417 | lines(plot_scalar*all_predsC ~ plotDates, col="black")
418 | legend("topright", legend=c("Actual RV", "CHAR"), col=c("blue", "black"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1, bty = "n")
419 |
420 | plot(plot_scalar*outRV ~ plotDates, type="l", col="blue", xlab = "Out-of-Sample Trading Days", ylab = ylab_str)
421 | lines(plot_scalar*all_predsS ~ plotDates, col="maroon")
422 | legend("topright", legend=c("Actual RV", "SHAR"), col=c("blue", "maroon"), lty=c(1,1), lwd=c(2.5,2.5), box.lty=1, bty = "n")
423 |
424 | plot_path = paste(getwd(), "/OutSampPlots/TWO_R_Plots.pdf", sep="")
425 | dev.copy(pdf, plot_path)
426 | dev.off()
427 |
428 |
429 | # Output formatting:
430 | output = matrix(outRV)
431 | out_sample_dates = data$Date[(in_sample+1):length(data$Date)]
432 | output = cbind(out_sample_dates, output, all_predsA, all_preds, all_predsQ, all_predsF, all_predsC, all_predsS, all_predsJ)
433 | output_df = as.data.frame(output)
434 | colnames(output_df) = c("Date", "outRV", "all_predsA", "all_preds", "all_predsQ", "all_predsF", "all_predsC", "all_predsS", "all_predsJ")
435 | betas_list = list("all_betasA" = all_betasA, "all_betas" = all_betas, "all_betasQ" = all_betasQ,
436 | "all_betasF" = all_betasF, "all_betasC" = all_betasC, "all_betasS" = all_betasS,
437 | "all_betasJ" = all_betasJ)
438 |
439 | output_df_errors_betas = list(output_df, errors_list, betas_list, models_at_t_1, r_squareds)
440 |
441 | return(output_df_errors_betas)
442 | }
443 |
444 | #RV, RQ, BPV Estimator
445 | estimator <- function(data) {
446 | RV_t_estimates = c()
447 | RQ_t_estimates = c()
448 | BPV_t_estimates = c()
449 | RV_t_plus_estimates = c()
450 | RV_t_minus_estimates = c()
451 | RV_t_dates = c()
452 | M = 0 # Intraday obs used in estimation of RV_t
453 |
454 | for (t in 1:length(data$Dates)) {
455 | # t accounts for the final number of daily RV_t estimates
456 | RV_t_i_estimates = c() # M number of r_t,i to be summed up
457 | M_Q = M # Counter
458 |
459 | while (substring(data$Dates[t+M], first = 1, last = 5) == substring(data$Dates[t+M+1], first = 1, last = 5)
460 | # The below AND condition breaks while-loop when no more intraday obs available
461 | & !is.na(data$Dates[t+M+1])) {
462 |
463 | # Intraday returns
464 | RV_t_i = (data$Open[t+M+1] - data$Open[t+M])
465 | RV_t_i_estimates = c(RV_t_i_estimates, RV_t_i)
466 |
467 | M = M + 1
468 | }
469 |
470 | if (is.na(data$Dates[t+M])) {
471 | break # This if-clause breaks for-loop when the eventual NA intraday obs is reached
472 | }
473 |
474 | RV_t = sum(RV_t_i_estimates^2) #Realized Variance
475 | #RV_t = sqrt(RV_t) #Realized Volatilty
476 |
477 | RQ_t = ((M-M_Q)/3) * sum(RV_t_i_estimates^4)
478 | #RQ_t = sqrt(RQ_t)
479 |
480 | # Bi-Power Variance: |r_t,i||r_t,i+1|
481 |
482 | ############### Faulty one line of code below:
483 | # Notice [-length(RV_....)] ensures final element is left out.
484 | # BPV_t = (sqrt(2/pi))^(-2) * (sum(abs(RV_t_i_estimates[-length(RV_t_i_estimates)]))*sum(abs(RV_t_i_estimates)))
485 | ############### Line above discarded as wrong implementation
486 |
487 | BPV_t_i_estimates = c()
488 | # i in 1:len(...)-1 corresponds to summing up to M-1 as in Bollerslev (2016)
489 | for (i in 1:(length(RV_t_i_estimates)-1)) {
490 | BPV_t_i_estimates = c(BPV_t_i_estimates, abs(RV_t_i_estimates[i] * abs(RV_t_i_estimates[i+1])))
491 | }
492 | BPV_t = (sqrt(2/pi))^(-2) * sum(BPV_t_i_estimates)
493 | #BPV = sqrt(BPV_t)
494 |
495 | # RV Plus and RV Minus for SHAR model spec
496 | RV_t_plus = sum(RV_t_i_estimates[RV_t_i_estimates > 0]^2)
497 | #RV_t_plus = sqrt(RV_t_plus)
498 | RV_t_minus = sum(RV_t_i_estimates[RV_t_i_estimates < 0]^2)
499 | #RV_t_minus = (-1)*sqrt(abs(RV_t_minus)) # Abs value and multiply by -1 to avoid sqrt'ing negative numbers
500 |
501 | RV_t_estimates = c(RV_t_estimates, RV_t)
502 | RQ_t_estimates = c(RQ_t_estimates, RQ_t)
503 | BPV_t_estimates = c(BPV_t_estimates, BPV_t)
504 | RV_t_plus_estimates = c(RV_t_plus_estimates, RV_t_plus)
505 | RV_t_minus_estimates = c(RV_t_minus_estimates, RV_t_minus)
506 |
507 | # Dates
508 | RV_t_date = as.numeric(substring(data$Dates[t+M], first = 1, last = 5))
509 | RV_t_dates = c(RV_t_dates, RV_t_date)
510 | }
511 |
512 | RV_df = as.data.frame(RV_t_dates)
513 | RV_df = cbind(RV_df, RV_t_estimates, RQ_t_estimates, BPV_t_estimates, RV_t_plus_estimates, RV_t_minus_estimates)
514 | colnames(RV_df) = c("Dates", "RV", "RQ", "BPV", "RV_plus", "RV_minus")
515 | RV_df$Dates = as.Date(RV_df$Dates, origin = "1899-12-30")
516 | return(RV_df)
517 | }
518 |
519 | # Scaling of RV, RQ, BPV, RVplus, RVminus estimates for numerical stability
520 | DataSet_Scalar <- function(DataSet, scalar = 100000){
521 | for (col in 2:ncol(DataSet)) {
522 | DataSet[,col] = DataSet[,col]*scalar
523 | }
524 | return(DataSet)
525 | }
526 |
527 | # Constructing Beta Table
528 | betaTable <- function(forecast) {
529 |
530 | robustStdErrs_A = coeftest(forecast[[4]]$modelA, vcov = vcovHC(forecast[[4]]$modelA, type="HC1"))
531 | robustStdErrs_ = coeftest(forecast[[4]]$model, vcov = vcovHC(forecast[[4]]$model, type="HC1"))
532 | robustStdErrs_Q = coeftest(forecast[[4]]$modelQ, vcov = vcovHC(forecast[[4]]$modelQ, type="HC1"))
533 | robustStdErrs_F = coeftest(forecast[[4]]$modelF, vcov = vcovHC(forecast[[4]]$modelF, type="HC1"))
534 | robustStdErrs_C = coeftest(forecast[[4]]$modelC, vcov = vcovHC(forecast[[4]]$modelC, type="HC1"))
535 | robustStdErrs_S = coeftest(forecast[[4]]$modelS, vcov = vcovHC(forecast[[4]]$modelS, type="HC1"))
536 | robustStdErrs_J = coeftest(forecast[[4]]$modelJ, vcov = vcovHC(forecast[[4]]$modelJ, type="HC1"))
537 |
538 |
539 |
540 | Robust_T_test = list("modelA" = robustStdErrs_A, "model" = robustStdErrs_,
541 | "modelQ" = robustStdErrs_Q, "modelF" = robustStdErrs_F,
542 | "modelC" = robustStdErrs_C, "modelS" = robustStdErrs_S,
543 | "modelJ" = robustStdErrs_J)
544 |
545 | Robust_T_test_matrix = do.call("rbind", Robust_T_test)[, 1:2]
546 | Robust_T_test_table = matrix(NA, nrow = 10, ncol = 7)
547 | colnames(Robust_T_test_table) = c("AR(3)", "HAR", "HARQ", "HARQ-F", "CHAR", "SHAR", "HAR-J")
548 | rownames(Robust_T_test_table) = c("b0", "b1", "b2", "b3", "b1Q", "b2Q", "b3Q","b1+", "b1-", "bJ")
549 |
550 | # c = Counter that counts through the rows of #Robust_T_test_matrix to extract the needed betas
551 | # l = length of beta parameters of given model
552 | # dig = num of digits to round up estimates
553 | dig = 4
554 | c_AR = 1:length(forecast[[4]]$modelA$coefficients)
555 | Robust_T_test_table[c_AR , 1] = paste(round(Robust_T_test_matrix[c_AR, 1], dig), " (",round(Robust_T_test_matrix[c_AR, 2], dig),")", sep="")
556 |
557 | l_HAR = 1:length(forecast[[4]]$model$coefficients)
558 | c_HAR = length(c_AR) + l_HAR
559 | Robust_T_test_table[l_HAR , 2] = paste(round(Robust_T_test_matrix[c_HAR, 1], dig), " (",round(Robust_T_test_matrix[c_HAR, 2], dig),")", sep="")
560 |
561 | l_HARQ = 1:length(forecast[[4]]$modelQ$coefficients)
562 | c_HARQ = length(c_HAR) + l_HARQ + 4 # Ad-Hoc added values found by checking Robust_T_test_matrix[c_HAR, ] throughout
563 | Robust_T_test_table[l_HARQ , 3] = paste(round(Robust_T_test_matrix[c_HARQ, 1], dig), " (",round(Robust_T_test_matrix[c_HARQ, 2], dig),")", sep="")
564 |
565 | l_HARQF = 1:length(forecast[[4]]$modelF$coefficients)
566 | c_HARQF = length(c_HARQ) + l_HARQF + 8
567 | Robust_T_test_table[l_HARQF , 4] = paste(round(Robust_T_test_matrix[c_HARQF, 1], dig), " (",round(Robust_T_test_matrix[c_HARQF, 2], dig),")", sep="")
568 |
569 | l_CHAR = 1:length(forecast[[4]]$modelC$coefficients)
570 | c_CHAR = length(l_HARQF) + l_CHAR + 13
571 | Robust_T_test_table[l_CHAR , 5] = paste(round(Robust_T_test_matrix[c_CHAR, 1], dig), " (",round(Robust_T_test_matrix[c_CHAR, 2], dig),")", sep="")
572 |
573 | l_SHAR = 1:length(forecast[[4]]$modelS$coefficients)
574 | c_SHAR = length(l_CHAR) + l_SHAR + 20
575 | Robust_T_test_table[c(1,3,4,8,9) , 6] = paste(round(Robust_T_test_matrix[c_SHAR, 1], dig), " (",round(Robust_T_test_matrix[c_SHAR, 2], dig),")", sep="")
576 |
577 | l_HARJ = 1:length(forecast[[4]]$modelJ$coefficients)
578 | c_HARJ = length(l_SHAR) + l_HARJ + 24
579 | Robust_T_test_table[c(1:4,10) , 7] = paste(round(Robust_T_test_matrix[c_HARJ, 1], dig), " (",round(Robust_T_test_matrix[c_HARJ, 2], dig),")", sep="")
580 |
581 | #print(Robust_T_test_matrix)
582 | #print(Robust_T_test_table)
583 |
584 | # Appending R^squareds to table
585 | Robust_T_test_table_w_rsquareds = rbind(Robust_T_test_table, NA*c(1:7), round(forecast[[5]][1,], dig), round(forecast[[5]][2,], dig))
586 | rownames(Robust_T_test_table_w_rsquareds)[c(12,13)] = c("R^2", "Adj.R^2")
587 | # Exporting Results
588 | write.xlsx(Robust_T_test_table_w_rsquareds, paste(getwd(), "/Results/Betas_w_all_rsquareds.xlsx", sep=""), sheetName="Sheet1",
589 | col.names=TRUE, row.names=TRUE, append=FALSE)
590 |
591 | return(Robust_T_test_table_w_rsquareds)
592 | }
593 |
594 | # Summary Stats
595 | summaryStats <- function(stocks, scalar = 1, freq = "5min_extended") {
596 | sumStats = matrix(NA, nrow = length(stocks), ncol = 7)
597 | colnames(sumStats) = c("Min", "Mean", "Median", "Max", "Std. Dev.", "Skewness", "Kurtosis")
598 | Symbol = c()
599 | for (stockname in 1:length(stocks)) {
600 | Symbol = c(Symbol,substr(stocks[stockname],1, (nchar(stocks[stockname])-10)))
601 | }
602 | rownames(sumStats) = Symbol
603 |
604 | # Realized Measures of all in assets in 'stocks' variable
605 | DataSets = list(rep(NA, times = length(stocks)))
606 | for (stock in 1:length(stocks)) {
607 | excel_file = paste("Data/",freq,".xlsx" , sep="")
608 | data_name = stocks[stock]
609 | data = as.data.frame(read_excel(excel_file, sheet = data_name))
610 | data = data[c("BarTp", "Trade")]
611 | colnames(data) = c("Dates", "Open")
612 | data = data[-c(1:4),]
613 | data$Open = as.numeric(data$Open)
614 | data_log = cbind(data$Dates, as.data.frame(log(data$Open)))
615 | colnames(data_log) = c("Dates", "Open")
616 |
617 | #DataSet = estimator(data)
618 | DataSet = estimator(data_log)
619 | DataSet = DataSet_Scalar(DataSet) # DataSet Scalar Function!
620 |
621 | # Appending all data.frames of RV measures to a list of data.frames
622 | DataSets[stock] = list(DataSet)
623 | }
624 | names(DataSets) = stocks
625 |
626 | # Summary Stats to summStats table. Scaled by factor of variable 'scalar'
627 | for (stk in 1:length(stocks)) {
628 | curRV = DataSets[[stk]]$RV # Realized Vol of current stock in iteration
629 | sumStats[stk, 1] = round(min(curRV)*scalar,3)
630 | sumStats[stk, 2] = round(mean(curRV)*scalar,3)
631 | sumStats[stk, 3] = round(median(curRV)*scalar,3)
632 | sumStats[stk, 4] = round(max(curRV)*scalar ,3)
633 | sumStats[stk, 5] = round(sd(curRV)*scalar,3)
634 | sumStats[stk, 6] = round(skewness(curRV)*scalar,1)
635 | sumStats[stk, 7] = round(kurtosis(curRV)*scalar,1)
636 | }
637 | sumStats = cbind(Symbol, sumStats)
638 | Symbol_str = str_c(Symbol,collapse='_') #stringr function dependency
639 | sumStats_file_path = paste(getwd(), "/Results/SummaryStats_", Symbol_str, "_", freq, ".xlsx", sep="")
640 |
641 | # Exporting Summary Stats
642 | write.xlsx(sumStats, sumStats_file_path, sheetName="Sheet1",
643 | col.names=TRUE, row.names=TRUE, append=FALSE)
644 | print(paste("All values in the Summary Statistics are scaled by a factor of: ", scalar, sep=""))
645 | return(sumStats)
646 | }
647 |
648 | # Error Computation & Table Construction
649 | errorsTable <- function(stocks, dig = 5, errorsScalar = 1) {
650 | errorsMatrix = matrix(NA, nrow = length(stocks)+1, ncol = 14)
651 | Symbol = c()
652 | for (stockname in 1:length(stocks)) {
653 | Symbol = c(Symbol,substr(stocks[stockname],1, (nchar(stocks[stockname])-10)))
654 | }
655 | rownames(errorsMatrix) = c("", Symbol)
656 | errorsMatrix[1, ] = rep(c("MSE", "MAE"), times = 7)
657 |
658 | model_names = c("AR(3)", "HAR", "HARQ", "HARQ-F", "CHAR", "SHAR", "HAR-J")
659 | model_names2 = rep(NA, times = ncol(errorsMatrix))
660 |
661 | #odds = odds[lapply(odds, "%%", 2) != 0]
662 | mod = 0
663 | for (m in 1:length(model_names)) {
664 | model_names2[(mod+m)] = model_names[m]
665 | mod = mod + 1
666 | }
667 | for (m in 1:length(model_names2)) {
668 | if(is.na(model_names2[m])) {
669 | model_names2[m] = model_names2[(m-1)]
670 | }
671 | }
672 | colnames(errorsMatrix) = model_names2
673 |
674 |
675 | # Realized Measures of all in assets in 'stocks' variable
676 | forecasts = list(rep(NA, times = length(stocks)))
677 | for (stock in 1:length(stocks)) {
678 | excel_file = paste("Data/",freq,".xlsx" , sep="")
679 | data_name = stocks[stock]
680 | data = as.data.frame(read_excel(excel_file, sheet = data_name))
681 | data = data[c("BarTp", "Trade")]
682 | colnames(data) = c("Dates", "Open")
683 | data = data[-c(1:4),]
684 | data$Open = as.numeric(data$Open)
685 | data_log = cbind(data$Dates, as.data.frame(log(data$Open)))
686 | colnames(data_log) = c("Dates", "Open")
687 |
688 | #DataSet = estimator(data)
689 | DataSet = estimator(data_log)
690 | DataSet = DataSet_Scalar(DataSet)
691 | forecast = HAR(DataSet)
692 |
693 | # Appending all data.frames of RV measures to a list of data.frames
694 | forecasts[stock] = list(forecast)
695 | }
696 | names(forecasts) = stocks
697 |
698 | #errorsScalar = 100
699 | #dig = 5
700 | for (s in 1:length(stocks)) {
701 | # Loop over errors and models
702 | all_errors = forecasts[[s]][[2]]
703 | for (e in 1:length(all_errors)) {
704 | errorsMatrix[(s+1), e] = round(all_errors[[e]]*errorsScalar, dig)
705 | }
706 | }
707 |
708 | errorsMatrix_BM = duplicate(errorsMatrix)
709 | HAR_MSE_idx = grep("^HAR$", colnames(errorsMatrix_BM))[1]
710 | HAR_MAE_idx = grep("^HAR$", colnames(errorsMatrix_BM))[2]
711 |
712 | #as.numeric(errorsMatrix_BM[1:length(stocks)+1, HAR_MSE_idx])
713 |
714 | MSE_cols = (1:ncol(errorsMatrix_BM))[1:ncol(errorsMatrix_BM) %% 2 != 0]
715 | MAE_cols = (1:ncol(errorsMatrix_BM))[1:ncol(errorsMatrix_BM) %% 2 == 0]
716 |
717 | for (e in 1:ncol(errorsMatrix_BM)) {
718 | if (e %in% MSE_cols) {
719 | errorsMatrix_BM[1:length(stocks)+1,e] = as.numeric(errorsMatrix[1:length(stocks)+1,e]) / as.numeric(errorsMatrix[1:length(stocks)+1, HAR_MSE_idx]) # MSE_idx
720 | } else if (e %in% MAE_cols) {
721 | errorsMatrix_BM[1:length(stocks)+1,e] = as.numeric(errorsMatrix[1:length(stocks)+1,e]) / as.numeric(errorsMatrix[1:length(stocks)+1, HAR_MAE_idx]) #MAE_idx
722 | }
723 | }
724 |
725 | #Unbenchmarked Errors
726 | errorsMatrix_file_path = paste(getwd(), "/Results/Errors_UnBenchmarked.xlsx", sep="")
727 | errorsMatrix_BM_file_path = paste(getwd(), "/Results/Errors_Benchmarked_to_HAR.xlsx", sep="")
728 | # Exporting Errors Table
729 | write.xlsx(errorsMatrix, errorsMatrix_file_path, sheetName="Sheet1",
730 | col.names=TRUE, row.names=TRUE, append=FALSE)
731 | write.xlsx(errorsMatrix_BM, errorsMatrix_BM_file_path, sheetName="Sheet1",
732 | col.names=TRUE, row.names=TRUE, append=FALSE)
733 |
734 | errorsTables = c(errorsMatrix, errorsMatrix_BM)
735 | return(errorsTables)
736 | }
737 |
738 |
739 | ######## SETTINGS ########
740 | stocks = c("SPY US Equity", "MSFT US Equity", "MCD US Equity", "JPM US Equity", "DIS US Equity")
741 | freq = "5min_extended"
742 | ##########################
743 |
744 | # Data Prep
745 | log_returns_scalar = 1
746 | excel_file = paste("Data/",freq,".xlsx" , sep="")
747 | data_name = stocks[1]
748 | data = as.data.frame(read_excel(excel_file, sheet = data_name))
749 | data = data[c("BarTp", "Trade")]
750 | colnames(data) = c("Dates", "Open")
751 | data = data[-c(1:4),]
752 | data$Open = as.numeric(data$Open)
753 | data_log = cbind(data$Dates, as.data.frame(log(data$Open)*log_returns_scalar))
754 | colnames(data_log) = c("Dates", "Open")
755 |
756 | #DataSet = estimator(data)
757 | DataSet = estimator(data_log)
758 | DataSet = DataSet_Scalar(DataSet)
759 | forecast = HAR(DataSet, out_sample = 96, extra_plots = TRUE)
760 |
761 |
762 | #### FINAL SECTION: Generating tables, retrieving out-of-sample trading dates for plot, etc ######
763 |
764 | ###### This section generates the data exports for the tables in the paper
765 | # beta_table = betaTable(forecast)
766 | #summStats = summaryStats(stocks)
767 | #errorsTables = errorsTable(stocks)
768 |
769 | # Function for plotDates (out-of-sample forecasting)
770 | # plotDates = substring(data$Dates, first = 1, last = 5)
771 | # plotDates = unique(plotDates)[which(!is.na(unique(plotDates)))]
772 | # plotDates = as.Date(as.numeric(plotDates), origin = "1899-12-30")
773 | # length(plotDates)
774 |
775 | # Realized Quarticity Plots of the 5 stocks
776 | # for (i in 1:5) {
777 | # data_name = stocks[i]
778 | # data = as.data.frame(read_excel(excel_file, sheet = data_name))
779 | # data = data[c("BarTp", "Trade")]
780 | # colnames(data) = c("Dates", "Open")
781 | # data = data[-c(1:4),]
782 | # data$Open = as.numeric(data$Open)
783 | # data_log = cbind(data$Dates, as.data.frame(log(data$Open)*log_returns_scalar))
784 | # colnames(data_log) = c("Dates", "Open")
785 | #
786 | # #DataSet = estimator(data)
787 | # DataSet = estimator(data_log)
788 | # DataSet = DataSet_Scalar(DataSet)
789 | # plot(DataSet$RQ, type ="l", main = data_name)
790 | # print(data_name)
791 | # print(summary(DataSet$RQ))
792 | # }
793 |
794 |
--------------------------------------------------------------------------------