├── midasr-user-guide.pdf ├── .gitignore ├── README.md ├── midasr-examples.R ├── guideR.bib └── midasr-user-guide.Rnw /midasr-user-guide.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpiktas/midasr-user-guide/HEAD/midasr-user-guide.pdf -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | *.synctex.gz 3 | .Rhistory 4 | .RData 5 | # Example code in package build process 6 | *-Ex.R 7 | 8 | #Tangled code 9 | midasr-user-guide.R 10 | 11 | *.sty 12 | *.cls 13 | *guide-* 14 | 15 | #Sweave unnecessary files 16 | *.aux 17 | *.log 18 | *.out 19 | *.tex 20 | *.bbl 21 | *.blg 22 | #Emacs and knitr files 23 | *.# 24 | *.Rt 25 | *.swp 26 | cache/ 27 | figure/ 28 | .Rproj.user 29 | *.Rproj 30 | jss_submission 31 | 32 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | midasr-user-guide 2 | ================= 3 | 4 | This repository contains the user guide for midasr R package and the accompanying files. 5 | 6 | Here is the brief description of the files: 7 | 8 | - ```midasr-user-guide.pdf``` - the user guide for midasr R package 9 | - ```midasr-user-guide.Rnw``` - the .Rnw file containing the user guide 10 | - ```midasr-examples.R``` - the file with the examples demonstrating the usage of midasr package. Most of the examples come from the code in the user guide. 11 | -------------------------------------------------------------------------------- /midasr-examples.R: -------------------------------------------------------------------------------- 1 | # Preliminaries ----------------------------------------------------------- 2 | # rm(list=ls) #Uncomment to clean up the workspace 3 | library(midasr) # loads the midasr package 4 | 5 | 6 | # Example of simulated MIDAS regression --------------------------------------------------- 7 | 8 | ## Sets a seed for RNG ### 9 | set.seed(1001) # just for comparability of results 10 | ## Number of low-frequency observations 11 | n <- 250 12 | ## Linear trend and higher-frequency explanatory variables (e.g. quarterly and monthly) 13 | trend <- c(1:n) 14 | x <- rnorm(4 * n) 15 | z <- rnorm(12 * n) 16 | ## Exponential Almon polynomial constraint-consistent coefficients 17 | fn_x <- nealmon(p = c(1, -0.5), d = 8) 18 | fn_z <- nealmon(p = c(2, 0.5, -0.1), d = 17) 19 | ## Simulated low-frequency series (e.g. yearly) 20 | y <- 2 + 0.1 * trend + mls(x, 0:7, 4) %*% fn_x + mls(z, 0:16, 12) %*% fn_z + rnorm(n) 21 | ## Figure 1 (coefficients) 22 | plot(fn_z, col = "red") 23 | points(fn_x) 24 | 25 | 26 | # Examples of MIDAS regression specification in midasr -------------------- 27 | 28 | ## OLS using lm 29 | eq_u <- lm(y ~ trend + mls(x, k = 0:7, m = 4) + mls(z, k = 0:16, m = 12)) 30 | eq_u <- midas_u(y ~ trend + mls(x, 0:7, 4) + mls(z, 0:16, 12)) 31 | summary(eq_u) 32 | 33 | ## NLS using midas_r 34 | eq_r <- midas_r(y ~ trend + mls(x, 0:7, 4, nealmon) + mls(z, 0:16, 12, nealmon), start = list(x = c(1, 35 | -0.5), z = c(2, 0.5, -0.1))) 36 | summary(eq_r) 37 | deriv_tests(eq_r, tol = 1e-06) 38 | coef(eq_r) 39 | coef(eq_r, midas = TRUE) 40 | amweights(p = c(1, -0.5), d = 8, m = 4, weight = nealmon, type = "C") 41 | nealmon(p = c(1, -0.5), d = 4) 42 | ## NLS using midas_r with aggregates 43 | eq_r <- midas_r(y ~ trend + mls(x, 0:7, 4, amweights, nealmon, "C") + mls(z, 0:16, 12, nealmon), 44 | start = list(x = c(1, -0.5), z = c(2, 0.5, -0.1))) 45 | summary(eq_r) 46 | ## Table 3-related NLS variations using midas_r (trend dropped in the table and can be 47 | ## omitted here) 48 | fn <- gompertzp 49 | eq_r1 <- midas_r(y ~ trend + mls(x, 0:7, 4, nealmon) + mls(z, 0:16, 12, fn), start = list(x = c(1, 50 | -0.5), z = c(1, 0.5, 0.1))) 51 | summary(eq_r1) 52 | eq_r2 <- midas_r(y ~ trend + mls(x, 0:7, 4) + mls(z, 0:16, 12, nealmon), start = list(z = c(1, 53 | -0.5))) 54 | summary(eq_r2) 55 | eq_r3 <- midas_r(y ~ trend + mls(y, 1:2, 1) + mls(x, 0:7, 4, nealmon), start = list(x = c(1, 56 | -0.5))) 57 | summary(eq_r3) 58 | eq_r4 <- midas_r(y ~ trend + mls(y, 1:2, 1, "*") + mls(x, 0:7, 4, nealmon), start = list(x = c(1, 59 | -0.5))) 60 | summary(eq_r4) 61 | eq_r5 <- midas_r(y ~ trend + mls(y, 1:4, 1, nealmon) + mls(x, 0:7, 4, nealmon), start = list(y = c(1, 62 | -0.5), x = c(1, -0.5))) 63 | summary(eq_r5) 64 | eq_r6 <- midas_r(y ~ trend + mls(x, 0:7, 4, amweights, nealmon, "A"), start = list(x = c(1, 65 | 1, 1, -0.5))) 66 | summary(eq_r6) 67 | eq_r7 <- midas_r(y ~ trend + mls(x, 0:7, 4, amweights, nealmon, "B"), start = list(x = c(1, 68 | 1, -0.5))) 69 | summary(eq_r7) 70 | eq_r8 <- midas_r(y ~ trend + mls(x, 0:7, 4, amweights, nealmon, "C"), start = list(x = c(1, 71 | -0.5))) 72 | summary(eq_r8) 73 | fn <- function(p, d) { 74 | p[1] * c(1:d)^p[2] 75 | } 76 | eq_r9 <- midas_r(y ~ trend + mls(x, 0:101, 4, fn), start = list(x = rep(0, 2))) 77 | summary(eq_r9) 78 | 79 | 80 | # Testing the adequacy of MIDAS regression -------------------------------- 81 | 82 | ## DGP-consistent specification 83 | eq_r <- midas_r(y ~ trend + mls(x, 0:7, 4, nealmon) + mls(z, 0:16, 12, nealmon), start = list(x = c(1, 84 | -0.5), z = c(2, 0.5, -0.1))) 85 | summary(eq_r) 86 | hAh_test(eq_r) 87 | hAhr_test(eq_r) 88 | ## Mis-specification of constraint on z coefficients 89 | eq_rb <- midas_r(y ~ trend + mls(x, 0:7, 4, nealmon) + mls(z, 0:12, 12, nealmon), start = list(x = c(1, 90 | -0.5), z = c(2, -0.1))) 91 | hAh_test(eq_rb) 92 | hAhr_test(eq_rb) 93 | summary(eq_rb) 94 | 95 | 96 | # Model selection --------------------------------------------------------- 97 | 98 | ## Potential sets of models 99 | set_x <- expand_weights_lags(weights = c("nealmon", "almonp"), from = 0, to = c(5, 10), m = 1, 100 | start = list(nealmon = rep(1, 2), almonp = rep(1, 3))) 101 | set_z <- expand_weights_lags(c("nealmon", "nealmon"), 0, c(10, 20), 1, start = list(nealmon = rep(1, 102 | 2), nealmon = rep(1, 3))) 103 | expand_weights_lags(weights = c("nealmon", "nbeta"), from = 1, to = c(2, 3), m = 1, start = list(nealmon = rep(0, 104 | 2), nbeta = rep(0.5, 3))) 105 | ## Estimation and selection of models 106 | eqs.ic <- midas_r_ic_table(y ~ trend + mls(x, 0, m = 4) + fmls(z, 0, m = 12), table = list(z = set_z, 107 | x = set_x), start = c(`(Intercept)` = 0, trend = 0)) 108 | mod <- modsel(eqs.ic, IC = "AIC", type = "restricted") 109 | 110 | 111 | # Forecast combinations --------------------------------------------------- 112 | 113 | ## With model selection and horizon 1:3 114 | cbfc <- select_and_forecast(y ~ trend + mls(x, 0, m = 4) + mls(z, 0, m = 12), from = list(x = c(4, 115 | 8, 12), z = c(12, 24, 36)), to = list(x = rbind(c(14, 19), c(18, 23), c(22, 27)), z = rbind(c(22, 116 | 27), c(34, 39), c(46, 51))), insample = 1:200, outsample = 201:250, weights = list(x = c("nealmon", 117 | "almonp"), z = c("nealmon", "almonp")), wstart = list(nealmon = rep(1, 3), almonp = rep(1, 118 | 3)), IC = "AIC", seltype = "restricted", ftype = "fixed", measures = c("MSE", "MAPE", "MASE"), 119 | fweights = c("EW", "BICW", "MSFE", "DMSFE")) 120 | cbfc$accuracy$individual 121 | cbfc$accuracy$average 122 | 123 | ## With a given specification one period ahead 124 | nealmon2 <- nealmon 125 | nealmon3 <- nealmon 126 | cbfc1 <- select_and_forecast(y ~ trend + mls(x, 0, 4) + mls(z, 0, 12), from = list(x = c(4), 127 | z = c(12)), to = list(x = rbind(c(14, 14)), z = rbind(c(22, 22))), insample = 1:200, outsample = 201:250, 128 | weights = list(x = c("nealmon3"), z = c("nealmon2")), wstart = list(nealmon3 = c(10, 1, 129 | -0.1), nealmon2 = c(2, -0.1)), IC = "AIC", seltype = "restricted", ftype = "fixed", 130 | measures = c("MSE", "MAPE", "MASE"), fweights = c("EW", "BICW", "MSFE", "DMSFE")) 131 | cbfc1$accuracy$individual 132 | cbfc1$accuracy$average 133 | 134 | 135 | # Manual model selection -------------------------------------------------- 136 | 137 | ## First split data into in-sample and out-of-sample data 138 | 139 | datasplit <- split_data(list(y = y, x = x, z = z, trend = trend), insample = 1:200, outsample = 201:250) 140 | 141 | ## Fit two models 142 | 143 | mod1 <- midas_r(y ~ trend + mls(x, 4:14, 4, nealmon3) + mls(z, 12:22, 12, nealmon2), data = datasplit$indata, 144 | start = list(x = c(10, 1, -0.1), z = c(2, -0.1))) 145 | 146 | mod2 <- midas_r(y ~ trend + mls(x, 4:20, 4, nealmon3) + mls(z, 12:25, 12, nealmon2), data = datasplit$indata, 147 | start = list(x = c(10, 1, -0.1), z = c(2, -0.1))) 148 | 149 | ## Calculate average forecasts 150 | 151 | avgf <- average_forecast(list(mod1, mod2), data = list(y = y, x = x, z = z, trend = trend), 152 | insample = 1:200, outsample = 201:250, type = "fixed", measures = c("MSE", "MAPE", "MASE"), 153 | fweights = c("EW", "BICW", "MSFE", "DMSFE")) 154 | 155 | avgf$accuracy 156 | avgf$forecast 157 | avgf$avgforecast 158 | 159 | ## Produce rolling forecasts, where for each forecast models are reestimated using rolling 160 | ## window 161 | avgrollf <- average_forecast(list(mod1, mod2), data = list(y = y, x = x, z = z, trend = trend), 162 | insample = 1:200, outsample = 201:210, type = "rolling", measures = c("MSE", "MAPE", "MASE"), 163 | fweights = c("EW", "BICW", "MSFE", "DMSFE")) 164 | 165 | avgrollf$accuracy 166 | 167 | ## Produce recursive forecasts where for each forecast models are reestimated by recursively 168 | ## increasing estimation sample 169 | avgrecf <- average_forecast(list(mod1, mod2), data = list(y = y, x = x, z = z, trend = trend), 170 | insample = 1:200, outsample = 201:210, type = "recursive", measures = c("MSE", "MAPE", "MASE"), 171 | fweights = c("EW", "BICW", "MSFE", "DMSFE")) 172 | 173 | avgrecf$accuracy 174 | 175 | 176 | # Inspect objects produced by midasr -------------------------------------- 177 | 178 | objects(eq_r) 179 | objects(cbfc) 180 | ## Accessed e.g. by 181 | eq_r$opt 182 | cbfc$bestlist 183 | ############ Info ### A specific function 184 | `?`(select_and_forecast) 185 | ## On midasr package 186 | `?`(`?`(midasr)) 187 | 188 | 189 | 190 | # MIDAS Matlab toolbox example ---------------------------------------------------------------------- 191 | 192 | ## Get the data 193 | data("USqgdp") 194 | data("USpayems") 195 | 196 | ## Convert to ts with the exact sample used in MIDAS Matlab toolbox 197 | y <- window(USqgdp, end = c(2011, 2)) 198 | x <- window(USpayems, end = c(2011, 7)) 199 | 200 | ## Calculate the log differences 201 | yg <- diff(log(y))*100 202 | xg <- diff(log(x))*100 203 | 204 | ## Align data 205 | nx <- ts(c(NA, xg, NA, NA), start = start(x), frequency = 12) 206 | ny <- ts(c(rep(NA, 33), yg, NA), start = start(x), frequency = 4) 207 | 208 | 209 | ## Replicate the graph of the data 210 | plot.ts(nx, xlab = "Time", ylab = "Percentages", col = 4, ylim = c(-5, 6)) 211 | lines(ny, col = 2) 212 | 213 | 214 | ## Use the same sample as in MIDAS Matlab toolbox 215 | xx <- window(nx, start = c(1985, 1), end = c(2009, 3)) 216 | yy <- window(ny, start = c(1985, 1), end = c(2009, 1)) 217 | 218 | ## Estimate the models 219 | beta0 <- midas_r(yy ~ mls(yy, 1, 1) + mls(xx, 3:11, 3, nbeta), start = list(xx = c(1.7, 1, 5))) 220 | coef(beta0) 221 | 222 | ## Note the nbetaMT, which is different form nbeta 223 | betan <- midas_r(yy ~ mls(yy, 1, 1) + mls(xx, 3:11, 3, nbetaMT), start = list(xx = c(2, 1, 5, 224 | 0))) 225 | coef(betan) 226 | 227 | um <- midas_r(yy ~ mls(yy, 1, 1) + mls(xx, 3:11, 3), start = NULL) 228 | coef(um) 229 | 230 | 231 | ## Split the data into in-sample and out-of-sample 232 | fulldata <- list(xx = window(nx, start = c(1985, 1), end = c(2011, 6)), yy = window(ny, start = c(1985, 233 | 1), end = c(2011, 2))) 234 | insample <- 1:length(yy) 235 | outsample <- (1:length(fulldata$yy))[-insample] 236 | 237 | ## Calculate the individual forecasts of each of the model and their weighted average 238 | avgf <- average_forecast(list(beta0, betan, um), data = fulldata, insample = insample, outsample = outsample) 239 | sqrt(avgf$accuracy$individual$MSE.out.of.sample) 240 | -------------------------------------------------------------------------------- /guideR.bib: -------------------------------------------------------------------------------- 1 | 2 | 3 | @article{chen:ngo1, 4 | title={{News-Good or Bad-and its Impact on Volatility Forecasts over Multiple Horizons}}, 5 | author={Chen, X. and Ghysels, E.}, 6 | journal={Review of Financial Studies}, 7 | year={2011}, 8 | volume = 24, 9 | pages = {46-81} 10 | } 11 | 12 | 13 | @unpublished{ghysels_etal_midas-02a, 14 | Author="Ghysels, Eric and Santa-Clara, Pedro and Valkanov, Rossen", 15 | Title="{The MIDAS touch: Mixed data Sampling Regression Models}", 16 | Year=2002, 17 | Note={Working paper, UNC and UCLA} 18 | } 19 | 20 | @techreport{alper2008forecasting, 21 | title={{Forecasting Stock Market Volatilities Using MIDAS Regressions: An Application to the Emerging Markets}}, 22 | author={Alper, C.E. and Fendoglu, S. and Saltoglu, B.}, 23 | year={2008}, 24 | institution={MPRA Paper No. 7460} 25 | } 26 | 27 | @article{andreou:rmm, 28 | title={Regression Models With Mixed Sampling Frequencies}, 29 | author={Andreou, Elena and Ghysels, Eric and Kourtellos, Andros}, 30 | journal={Journal of Econometrics}, 31 | volume = {158}, 32 | pages={246-261}, 33 | year={2010} 34 | } 35 | 36 | @article{armesto:mic, 37 | title={Measuring the Information Content of the Beige Book: A Mixed Data Sampling Approach}, 38 | author={Armesto, Michelle T. and Hernandez-Murillo, Rub{\'e}n and Owyang, Michael and Piger, Jeremy}, 39 | journal={Journal of Money, Credit and Banking}, 40 | volume = {41}, 41 | pages = {35--55}, 42 | year = 2009 43 | } 44 | 45 | 46 | @article{clements2008mfm, 47 | title={{Macroeconomic Forecasting with Mixed Frequency Data: Forecasting US output growth}}, 48 | author={Clements, M. and Galv{\~a}o, A.}, 49 | journal={Journal of Business and Economic Statistics}, 50 | volume={26}, 51 | pages={546--554}, 52 | year={2008} 53 | } 54 | 55 | @article{clements2008fuo, 56 | title={{Forecasting US output growth using Leading Indicators: An appraisal using MIDAS models}}, 57 | author={Clements, M.P. and Galv{\~a}o, A.B.}, 58 | journal={Journal of Applied Econometrics (forthcoming)}, 59 | year={2008} 60 | } 61 | 62 | @unpublished{frale-famidas, 63 | title={{FaMIDAS: A Mixed Frequency Factor Model with MIDAS structure}}, 64 | author={Frale, C. and Monteforte, L.}, 65 | note={Bank of Italy Temi di Discussione - Working Paper No. 788}, 66 | year={2011} 67 | } 68 | 69 | @article{kuzin-midas, 70 | title={{MIDAS versus mixed-frequency VAR: Nowcasting GDP in the Euro Area}}, 71 | author={Kuzin, V. and Marcellino, M. and Schumacher, C.}, 72 | journal={International Journal of Forecasting}, 73 | volume={27}, 74 | pages={529--542}, 75 | year={2011} 76 | } 77 | 78 | @unpublished{monteforte-real, 79 | title={{Real time forecasts of inflation: the role of financial variables}}, 80 | author={Monteforte, L. and Moretti, G.}, 81 | year={2012}, 82 | note = {{\it Journal of Forecasting}, (forthcoming)} 83 | } 84 | 85 | @article{marcellino2010factor, 86 | title={{Factor MIDAS for Nowcasting and Forecasting with Ragged-Edge Data: A Model Comparison for German GDP}}, 87 | author={Marcellino, M. and Schumacher, C.}, 88 | journal={Oxford Bulletin of Economics and Statistics}, 89 | volume={72}, 90 | pages={518--550}, 91 | year={2010} 92 | } 93 | 94 | @article{schumacher2008rtf, 95 | title={Real-time Forecasting of German GDP Based on a Large Factor Model with Monthly and Quarterly Data}, 96 | author={Schumacher, Christian and Breitung, Jorg}, 97 | journal={International Journal of Forecasting}, 98 | volume={24}, 99 | pages={386--398}, 100 | year={2008} 101 | } 102 | 103 | @article{AGK_macro, 104 | title={Should macroeconomic forecasters look at daily financial data?}, 105 | author={Andreou, E. and Ghysels, E. and Kourtellos, A.}, 106 | YEAR=2013, 107 | journal={Journal of Business and Economic Statistics}, 108 | volume=31, 109 | pages={240-251} 110 | } 111 | 112 | @article{hamilton2008daily, 113 | title={{Daily monetary policy shocks and new home sales}}, 114 | author={Hamilton, J.D.}, 115 | journal={Journal of Monetary Economics}, 116 | volume={55}, 117 | pages={1171--1190}, 118 | year={2008} 119 | } 120 | 121 | @article{ghysels2007valuation, 122 | title={{Valuation in US commercial real estate}}, 123 | author={Ghysels, E. and Plazzi, A. and Valkanov, R.}, 124 | journal={European Financial Management}, 125 | volume={13}, 126 | pages={472--497}, 127 | year={2007} 128 | } 129 | 130 | @techreport{valkanov-does, 131 | title={{Does the Early Exercise Premium Contain Information about Future Underlying Returns?}}, 132 | author={Valkanov, R. and Yadav, P. and Zhang, Y.}, 133 | institution={Discussion Paper UCSD}, 134 | year = {2009} 135 | } 136 | 137 | @article{colacito:cmd, 138 | title={{A component model for dynamic correlations}}, 139 | author={Colacito, R. and Engle, R.F. and Ghysels, E.}, 140 | year={2011}, 141 | journal={Journal of Econometrics}, 142 | volume= 164, 143 | pages= {45-59} 144 | } 145 | 146 | @article{baele2010determinants, 147 | title={{The determinants of stock and bond return comovements}}, 148 | author={Baele, L. and Bekaert, G. and Inghelbrecht, K.}, 149 | journal={Review of Financial Studies}, 150 | year={2010}, 151 | volume = {23}, 152 | pages = {2374--2428} 153 | } 154 | 155 | 156 | @unpublished{engle2006ess, 157 | title={{On the Economic Sources of Stock Market Volatility}}, 158 | author={Engle, R.F. and Ghysels, E. and Sohn, B.}, 159 | note={{\it Review of Economics and Statistics} (forthcoming)}, 160 | year={2012} 161 | } 162 | 163 | 164 | @unpublished{brown_ferreira-03, 165 | Author="Brown, David P. and Ferreira, Miguel A. Ferreira", 166 | Title="{The Information in the Idiosyncratic Volatility of Small Firms}", 167 | Year=2003, 168 | Note={Working paper, Univesrity of Wisconsin and ISCTE} 169 | } 170 | 171 | @techreport{chen2009hybrid, 172 | title={{The HYBRID GARCH class of models}}, 173 | author={Chen, X. and Ghysels, E. and Wang, F.}, 174 | year={2010}, 175 | institution={Working Paper, UNC} 176 | } 177 | 178 | @article{chen2009role, 179 | title={{On the role of Intra-Daily Seasonality in HYBRID GARCH Models}}, 180 | author={Chen, X. and Ghysels, E. and Wang, F.}, 181 | year={2011}, 182 | journal={Journal of Time Series Econometrics}, 183 | volume = 3 184 | } 185 | 186 | @article{clements2008quantile, 187 | title={{Quantile forecasts of daily exchange rate returns from forecasts of realized volatility}}, 188 | author={Clements, M.P. and Galv{\~a}o, A.B. and Kim, J.H.}, 189 | journal={Journal of Empirical Finance}, 190 | volume={15}, 191 | pages={729--750}, 192 | year={2008} 193 | } 194 | 195 | @article{ForsGhys2003b, 196 | author = {Lars Forsberg and Eric Ghysels}, 197 | year = 2006, 198 | title = {Why do absolute returns predict volatility so well?}, 199 | Journal = {Journal of Financial Econometrics}, 200 | volume = 6, 201 | pages = {31-67} 202 | } 203 | 204 | @article{ghysels_etal_midas-jfe, 205 | Author="Ghysels, Eric and Santa-Clara, Pedro and Valkanov, Ross", 206 | Title="{There is a risk-return tradeoff after all}", 207 | Year=2005, 208 | Journal={Journal of Financial Economics}, 209 | volume = 76, 210 | pages= {509-548} 211 | } 212 | 213 | 214 | @article{ghysels_etal_midas-joe, 215 | Author="Ghysels, E. and Santa-Clara, P. and Valkanov, R.", 216 | Title="{Predicting volatility: getting the most out of return data sampled at different frequencies}", 217 | Year=2006, 218 | Journal={Journal of Econometrics}, 219 | Volume = 131, 220 | Pages = {59-95} 221 | } 222 | 223 | 224 | @article{ghyselsSinko06, 225 | author = {Ghysels, E. and A. Sinko}, 226 | year = 2006, 227 | title = {Comment on Realized variance and market microstructure noise by P. R. Hansen and Asger Lunde}, 228 | journal ={Journal of Business and Economic Statistics}, 229 | volume = {24}, 230 | pages = {192-194} 231 | } 232 | 233 | 234 | 235 | @article{Ghysels_Sinko2005, 236 | author = {E. Ghysels and A. Sinko}, 237 | year = 2011, 238 | title = {Volatility Prediction and Microstructure Noise}, 239 | journal = {Journal of Econometrics}, 240 | volume = 160, 241 | pages = {257-271} 242 | } 243 | 244 | 245 | @unpublished{ghysels-multi, 246 | title={{Multi-Period Forecasts of Variance: Direct, Iterated, and Mixed-Data Approaches}}, 247 | author={Ghysels, E. and Rubia, A. and Valkanov, R.}, 248 | year={2008}, 249 | Note={Working paper, Alicante, UCSD and UNC} 250 | } 251 | 252 | @article{leon2007rbr, 253 | title={{The relationship between risk and expected return in Europe}}, 254 | author={Le{\'o}n, {\'A}ngel. and Nave, Juan M. and Rubio, Gonzalo}, 255 | journal={Journal of Banking and Finance}, 256 | volume={31}, 257 | pages={495--512}, 258 | year={2007} 259 | } 260 | @inproceedings{andreou2011forecasting, 261 | title={Forecasting with mixed-frequency data}, 262 | author={Andreou, E. and Ghysels, E. and Kourtellos, A.}, 263 | booktitle={Oxford Handbook of Economic Forecasting}, 264 | editor={Clements. M. and Hendry, D.}, 265 | pages={225-245}, 266 | year={2011} 267 | } 268 | 269 | @inproceedings{ghysels_valkanov_chap, 270 | title={{Forecasting volatility with MIDAS}}, 271 | author={Ghysels, E. and Valkanov, R.}, 272 | Booktitle={Handbook of Volatility Models and Their Applications}, 273 | editor={Bauwens, L. and Hafner, C. and Laurent, S.}, 274 | pages={383--401}, 275 | year={2012}, 276 | publisher={{John Wiley \& Sons}} 277 | } 278 | @article{armesto2010forecasting, 279 | title={Forecasting with Mixed Frequencies}, 280 | author={Armesto, M.T. and Engemann, K.M. and Owyang, M.T.}, 281 | journal={Federal Reserve Bank of St. Louis Review}, 282 | volume={92}, 283 | pages={521--536}, 284 | year={2010} 285 | } 286 | 287 | @article{bai:kalman, 288 | title={State Space Models and MIDAS Regressions}, 289 | author={Bai, J. and Ghysels, E. and Wright, J.}, 290 | journal={Econometric Reviews (forthcoming)}, 291 | year={2012} 292 | } 293 | 294 | @techreport{kvedaras-regression, 295 | title={{Regression Models with Variables of Different Frequencies: The Case of a Fixed Frequency Ratio}}, 296 | author={Kvedaras, V. and Ra{\v{c}}kauskas, A.}, 297 | institution={{\it Oxford Bulletin of Economics and Statistics}, forthcoming.}, 298 | year = {2010} 299 | } 300 | @article{rodriguez2010mixed, 301 | title={{Mixed Frequency Models: Bayesian Approaches to Estimation and Prediction}}, 302 | author={Rodriguez, A. and Puggioni, G.}, 303 | journal={International Journal of Forecasting}, 304 | year={2010}, 305 | volume={26}, 306 | pages={293--311} 307 | } 308 | 309 | @techreport{wohlrabe2009forecasting, 310 | title={{Forecasting with Mixed-frequency Time Series Models}}, 311 | author={Wohlrabe, K.}, 312 | year={2009}, 313 | institution={Ph. D. Dissertation Ludwig-Maximilians-Universitat Munchen} 314 | } 315 | 316 | @BOOK{Dhrymes:71, 317 | author = "Dhrymes, Phoebus", 318 | title = {{Distributed Lags: Problems of Formulation and Estimation}}, 319 | year = 1971, 320 | publisher = {{Holden-Day}} 321 | } 322 | 323 | 324 | @UNPUBLISHED{foroni, 325 | title={{U-MIDAS: MIDAS Regressions with Unrestricted Lag Polynomials}}, 326 | author={Foroni, Claudia and Marcellino, Massimiliano and Schumacher, Christian}, 327 | note = {{\it Journal of the Royal Statistical Society A} (forthcoming)}, 328 | year = 2014 329 | } 330 | 331 | @article{chen2011news, 332 | title={News - Good or Bad - and Its Impact on Volatility Predictions over Multiple Horizons}, 333 | author={Chen, X. and Ghysels, E.}, 334 | journal={Review of Financial Studies}, 335 | volume={24-81}, 336 | pages={46}, 337 | year={2011} 338 | } 339 | 340 | @article{bai2002determining, 341 | title={{Determining the Number of Factors in Approximate Factor Models}}, 342 | author={Bai, J. and Ng, S.}, 343 | journal={Econometrica}, 344 | pages={191--221}, 345 | year={2002} 346 | } 347 | 348 | @article{bai2003inferential, 349 | title={{Inferential Theory for Factor Models of Large Dimensions}}, 350 | author={Bai, J.}, 351 | journal={Econometrica}, 352 | pages={135--171}, 353 | year={2003} 354 | } 355 | @article{forni2000generalized, 356 | title={{The Generalized Dynamic-factor Model: Identification and Estimation}}, 357 | author={Forni, M. and Hallin, M. and Lippi, M. and Reichlin, L.}, 358 | journal={Review of Economics and Statistics}, 359 | volume={82}, 360 | number={4}, 361 | pages={540--554}, 362 | year={2000} 363 | } 364 | 365 | @article{forni2005generalized, 366 | title={{The Generalized Dynamic Factor Model}}, 367 | author={Forni, M. and Hallin, M. and Lippi, M. and Reichlin, L.}, 368 | journal={Journal of the American Statistical Association}, 369 | volume={100}, 370 | number={471}, 371 | pages={830--840}, 372 | year={2005} 373 | } 374 | @article{stock1989new, 375 | title={{New Indexes of Coincident and Leading Economic Indicators}}, 376 | author={Stock, J.H. and Watson, M.W.}, 377 | journal={NBER macroeconomics annual}, 378 | pages={351--394}, 379 | year={1989} 380 | } 381 | @article{stock2003forecasting, 382 | title={{Forecasting Output and Inflation: the Role of Asset Prices}}, 383 | author={Stock, J.H. and Watson, M.W.}, 384 | journal={Journal of Economic Literature}, 385 | pages={788--829}, 386 | year={2003} 387 | } 388 | 389 | @inproceedings{stock2008fdf, 390 | title={{Forecasting in Dynamic Factor Models Subject to Structural Instability}}, 391 | author={Stock, J.H. and Watson, M.W.}, 392 | editor = {Jennifer Castle and Neil Shephard}, 393 | Booktitle={The Methodology and Practice of Econometrics, A Festschrift in Honour of Professor David F. Hendry}, 394 | Publisher = {Oxford University Press}, 395 | year={2008} 396 | } 397 | 398 | @article{nunes2005nowcasting, 399 | title={Nowcasting Quarterly GDP Growth in a Monthly Coincident Indicator Model}, 400 | author={Nunes, L.C.}, 401 | journal={Journal of Forecasting}, 402 | volume={24}, 403 | pages={575--592}, 404 | year={2005} 405 | } 406 | 407 | @INCOLLECTION{Timmermann:forc, 408 | author = {Timmermann, Allan}, 409 | editor = {Elliott, G. and Granger, C. and Timmermann, A.}, 410 | title = {Forecast Combinations}, 411 | booktitle = {Handbook of Economic Forecasting }, 412 | year = {2006}, 413 | volume = {1}, 414 | pages = {136--196}, 415 | publisher = {North Holland}, 416 | address = {Amsterdam} 417 | } 418 | 419 | @article{Stock:comb7, 420 | title={Combination Forecasts Of Output Growth In A Seven-Country Data Set}, 421 | author={Stock, James H. and Watson, Mark W.}, 422 | journal={Journal of Forecasting}, 423 | volume = {23}, 424 | number = {6}, 425 | pages={405--430}, 426 | year={2004} 427 | } 428 | 429 | @unpublished{SW:Phillips, 430 | title={Phillips Curve Inflation Forecasts}, 431 | author={Stock, James H. and Watson, Mark W.}, 432 | note={NBER Working paper}, 433 | year={2008} 434 | } 435 | 436 | 437 | 438 | 439 | @article{engle2008sgm, 440 | title={{The Spline-GARCH Model for Low-Frequency Volatility and Its Global Macroeconomic Causes}}, 441 | author={Engle, R.F. and Rangel, J.G.}, 442 | journal={Review of Financial Studies}, 443 | year={2008}, 444 | volume={21}, 445 | pages={1187--1222} 446 | } 447 | 448 | @article{ghysels_wright, 449 | title={{Forecasting Professional Forecasters}}, 450 | author={Ghysels, Eric and Wright, Jonathan}, 451 | journal={Journal of Business and Economic Statistics}, 452 | volume = {27}, 453 | pages = {504--516}, 454 | year={2009} 455 | } 456 | 457 | @article{ghysels:mrf, 458 | title={{MIDAS Regressions: Further Results and New Directions}}, 459 | author={Ghysels, E. and Sinko, A. and Valkanov, R.}, 460 | journal={Econometric Reviews}, 461 | volume={26}, 462 | year={2006}, 463 | pages={53-90} 464 | } 465 | 466 | @article{giannone2008nowcasting, 467 | title={{Nowcasting: The Real-time Informational Content of Macroeconomic Data}}, 468 | author={Giannone, Domenico and Reichlin, Lucrezia and Small, David}, 469 | journal={Journal of Monetary Economics}, 470 | volume={55}, 471 | pages={665--676}, 472 | year={2008} 473 | } 474 | 475 | @unpublished{ghysels_mfvar, 476 | author = {Ghysels, Eric}, 477 | title={Macroeconomics and the Reality of Mixed Frequency Data}, 478 | year = {2012}, 479 | note = {SSRN: \url{http://ssrn.com/abstract=2069998 or http://dx.doi.org/10.2139/ssrn.2069998}} 480 | } 481 | 482 | 483 | @article{harvey_pierse-84, 484 | author = "Harvey, Andrew C. and Pierse, Richard G.", 485 | title = {Estimating Missing Observations in Economic Time Series}, 486 | year = 1984, 487 | Journal = {Journal of the American Statistical Association}, 488 | volume = 79, 489 | pages = {125-131} 490 | } 491 | 492 | 493 | 494 | @BOOK{harveybook, 495 | author = "Harvey, Andrew", 496 | year = 1989, 497 | title = "Forecasting, Structural Time Series Models and the Kalman Filter", 498 | Publisher = {Cambridge University Press, Cambridge} 499 | } 500 | 501 | @ARTICLE{bernankeetal, 502 | author = "Bernanke, Ben and Gertler, Mark and Watson, Mark", 503 | year = 1997, 504 | title = "Systematic Monetary Policy and the Effects of Oil Price Shocks", 505 | journal = {Brookings Papers on Economic Activity}, 506 | volume = 1, 507 | pages = "91--157"} 508 | 509 | 510 | @article{zadrozny1990fug, 511 | title={{Forecasting US GNP at Monthly Intervals With an Estimated Bivariate Time Series Model}}, 512 | author={Zadrozny, Peter A.}, 513 | journal={Federal Reserve Bank of Atlanta Economic Review}, 514 | volume={75}, 515 | pages={2--15}, 516 | year={1990} 517 | } 518 | 519 | 520 | @article{mariano2003nci, 521 | title={{A New Coincident Index of Business Cycles Based on Monthly and Quarterly Series}}, 522 | author={Mariano, Roberto S. and Murasawa, Yasutomo}, 523 | journal={Journal of Applied Econometrics}, 524 | pages={427--443}, 525 | year={2003}, 526 | volume = 18 527 | } 528 | 529 | @book{mittnik2004forecasting, 530 | title={{Forecasting Quarterly German GDP at Monthly Intervals Using Monthly Ifo Business Conditions Data}}, 531 | author={Mittnik, Stefan and Zadrozny, Peter A.}, 532 | year={2004}, 533 | publisher={Springer} 534 | } 535 | 536 | @article{ads:rtm, 537 | title={Real-time Measurement of Business Conditions}, 538 | author={Aruoba, S. Boragan and Diebold, Frank X. and Scotti, Chiara}, 539 | journal={Journal of Business and Economic Statistics}, 540 | pages={417--427}, 541 | volume = {27}, 542 | year = {2009} 543 | } 544 | 545 | @UNPUBLISHED{kuzin:mvm, 546 | title={{MIDAS Versus Mixed-frequency VAR: Nowcasting GDP in the Euro Area}}, 547 | author={Kuzin, Vladimir and Marcellino, Massimiliano and Schumacher, Christian}, 548 | note = {Discussion Paper 07/2009 Deutsche Bundesbank}, 549 | year = 2009 550 | } 551 | @article{cox1981statistical, 552 | title={Statistical Analysis of Time Series: Some Recent Developments [with Discussion and Reply]}, 553 | author={Cox, D.R.}, 554 | journal={Scandinavian Journal of Statistics}, 555 | pages={93--115}, 556 | year={1981}, 557 | volume = 8 558 | } 559 | @article{tiao1980hidden, 560 | title={Hidden Periodic Autoregressive-moving average Models in Time Series Data}, 561 | author={Tiao, GC and Grupe, MR}, 562 | journal={Biometrika}, 563 | volume={67}, 564 | number={2}, 565 | pages={365}, 566 | year={1980}, 567 | publisher={Biometrika Trust} 568 | } 569 | 570 | @article{gladyshev1961periodically, 571 | title={Periodically Correlated Random Sequences}, 572 | author={Gladyshev, EG}, 573 | journal={Soviet Mathematics}, 574 | volume={2}, 575 | pages={385--388}, 576 | year={1961} 577 | } 578 | 579 | @article{hansen1993seasonality, 580 | title={Seasonality and Approximation Errors in Rational Expectations Models}, 581 | author={Hansen, L.P. and Sargent, T.J.}, 582 | journal={Journal of Econometrics}, 583 | volume={55}, 584 | number={1-2}, 585 | pages={21--55}, 586 | year={1993}, 587 | publisher={Elsevier} 588 | } 589 | 590 | @book{franses1996periodicity, 591 | title={Periodicity and Stochastic Trends in Economic Time Series}, 592 | author={Franses, P.H.}, 593 | year={1996}, 594 | publisher={Oxford University Press, USA} 595 | } 596 | 597 | @article{pagano1978periodic, 598 | title={On Periodic and Multiple Autoregressions}, 599 | author={Pagano, M.}, 600 | journal={The Annals of Statistics}, 601 | pages={1310--1317}, 602 | year={1978}, 603 | publisher={JSTOR} 604 | } 605 | @article{ghysels1994periodic, 606 | title={On the Periodic Structure of the Business Cycle}, 607 | author={Ghysels, E.}, 608 | journal={Journal of Business and Economic Statistics}, 609 | pages={289--298}, 610 | year={1994}, 611 | publisher={JSTOR} 612 | } 613 | 614 | @book{hansen1990recursive, 615 | title={Recursive Linear Models of Dynamic Economies}, 616 | author={Hansen, L.P. and Sargent, T.J.}, 617 | year={1990}, 618 | publisher={NYU and University of Chicago} 619 | } 620 | 621 | @unpublished{ghysels:2013, 622 | title = {\proglang{MATLAB} Toolbox for Mixed Sampling Frequency Data Analysis using {MIDAS} Regression Models}, 623 | author = {Eric Ghysels}, 624 | month = jan, 625 | year = {2013}, 626 | Note={Available on \proglang{MATLAB} Central at \url{http://www.mathworks.com/matlabcentral/fileexchange/45150-midas-regression}} 627 | } 628 | 629 | 630 | @article{corsi2009simple, 631 | title={A Simple Approximate Long-memory Model of Realized Volatility}, 632 | author={Corsi, Fulvio}, 633 | journal={Journal of Financial Econometrics}, 634 | volume={7}, 635 | pages={174--196}, 636 | year={2009} 637 | } 638 | 639 | 640 | @unpublished{bilinskas:2013, 641 | Author={Bilinskas, Benediktas and Zemlys, Vaidotas}, 642 | Title={Testing the Functional Constraints on Parameters in Regression Models with Cointegrated Variables of Different Frequency}, 643 | Year=2013, 644 | Note={Submitted, available upon a request} 645 | } 646 | @unpublished{kvedaras:2013b, 647 | Author={Bilinskas, Benediktas and Kvedaras, Virmantas and Zemlys, Vaidotas}, 648 | Title={Testing the Functional Constraints on Parameters in Cointegrated MIDAS regressions}, 649 | Year=2013, 650 | Note={Working paper, available upon a request} 651 | } 652 | @unpublished{kvedaras:2013, 653 | Author={Kvedaras, Virmantas and Zemlys, Vaidotas}, 654 | Title={The Statistical Content and Empirical Testing of the MIDAS Restrictions}, 655 | Year=2013, 656 | Note={Submitted, available upon a request} 657 | } 658 | 659 | @article{kvedaras:2012, 660 | title={Testing the Functional Constraints on Parameters in Regressions With Variables of Different Frequency}, 661 | author={Kvedaras, Virmantas and Zemlys, Vaidotas}, 662 | journal={Economics Letters}, 663 | volume={116}, 664 | number={2}, 665 | pages={250--254}, 666 | year={2012}, 667 | publisher={North-Holland} 668 | } 669 | @article{zeileis:2004, 670 | author = "Achim Zeileis", 671 | title = "Econometric Computing with HC and HAC Covariance Matrix Estimators", 672 | journal = "Journal of Statistical Software", 673 | volume = "11", 674 | number = "10", 675 | pages = "1--17", 676 | day = "29", 677 | month = "11", 678 | year = "2004", 679 | CODEN = "JSSOBK", 680 | ISSN = "1548-7660", 681 | bibdate = "2004-11-29", 682 | URL = "http://www.jstatsoft.org/v11/i10", 683 | accepted = "2004-11-29", 684 | acknowledgement = "", 685 | keywords = "", 686 | submitted = "2004-11-09", 687 | } 688 | 689 | @article{zeileis:2006, 690 | author = "Achim Zeileis", 691 | title = "Object-Oriented Computation of Sandwich Estimators", 692 | journal = "Journal of Statistical Software", 693 | volume = "16", 694 | number = "9", 695 | pages = "1--16", 696 | year = "2006", 697 | URL = "http://www.jstatsoft.org/v16/i09", 698 | } 699 | 700 | 701 | 702 | @unpublished{breitung:2013, 703 | Author={Breitung, Jorg and Roling, Christoph and Elengikal, Suma}, 704 | Title={Forecasting Inflation Rates Using Daily Data: A Nonparametric MIDAS Approach}, 705 | Year=2013, 706 | Note={working paper}, 707 | url={http://www.ect.uni-bonn.de/mitarbeiter/joerg-breitung/npmidas} 708 | } 709 | 710 | @unpublished{oxmandata, 711 | Author={Heber, Gerd and Lunde, Asger and Shephard, Neil and Sheppard, Kevin}, 712 | Title={Oxford-Man Institute's Realized Library}, 713 | Year=2009, 714 | Note={Oxford-Man Institute, University of Oxford. Library Version 0.2} 715 | } 716 | 717 | @Article{optimx, 718 | title = {Unifying Optimization Algorithms to Aid Software System 719 | Users: {optimx} for {R}}, 720 | author = {John C. Nash and Ravi Varadhan}, 721 | journal = {Journal of Statistical Software}, 722 | year = {2011}, 723 | volume = {43}, 724 | number = {9}, 725 | pages = {1--14}, 726 | url = {http://www.jstatsoft.org/v43/i09/}, 727 | } 728 | 729 | @Article{optimx2, 730 | title = {On Best Practice Optimization Methods in R}, 731 | author = {John C. Nash}, 732 | journal = {Journal of Statistical Software}, 733 | year = {2014}, 734 | volume = {60}, 735 | number = {2}, 736 | pages = {1--14}, 737 | url = {http://www.jstatsoft.org/v60/i02/}, 738 | } 739 | 740 | 741 | @Manual{numderiv, 742 | title = {numDeriv: Accurate Numerical Derivatives}, 743 | author = {Paul Gilbert and Ravi Varadhan}, 744 | year = {2015}, 745 | note = {R package version 2014.2-1}, 746 | url = {http://CRAN.R-project.org/package=numDeriv}, 747 | } 748 | 749 | @Manual{midasr, 750 | title = {Mixed Frequency Data Sampling Regression Models: The \proglang{R} Package \pkg{midasr}}, 751 | author = {Eric Ghysels and Virmantas Kvedaras and Vaidotas Zemlys}, 752 | note = {\proglang{R} package version 0.5}, 753 | url = {http://CRAN.R-project.org/package=midasr}, 754 | year = {2015}, 755 | } 756 | 757 | @Manual{Rcore, 758 | title = {R: A Language and Environment for Statistical Computing}, 759 | author = {{R Core Team}}, 760 | organization = {R Foundation for Statistical Computing}, 761 | address = {Vienna, Austria}, 762 | year = {2015}, 763 | url = {http://www.R-project.org/}, 764 | } -------------------------------------------------------------------------------- /midasr-user-guide.Rnw: -------------------------------------------------------------------------------- 1 | \documentclass{jss} 2 | \usepackage{amsmath,amssymb} 3 | \usepackage{bm} 4 | \usepackage{multirow} 5 | \usepackage{dcolumn} 6 | \usepackage{rotating} 7 | \usepackage{natbib} 8 | \newcommand{\citeasnoun}{\cite} 9 | 10 | \newcommand{\alal}{\mbox{\boldmath$\alpha$}} 11 | \newcommand{\bebe}{\mbox{\boldmath$\beta$}} 12 | \newcommand{\dede}{\mbox{\boldmath$\delta$}} 13 | \newcommand{\e}{\varepsilon} 14 | \newcommand{\ff}{\mbox{\boldmath$f$}} 15 | \newcommand{\gaga}{\mbox{\boldmath$\gamma$}} 16 | \newcommand{\lala}{\mbox{\boldmath$\lambda$}} 17 | \newcommand{\phph}{\mbox{\boldmath$\phi$}} 18 | \newcommand{\thth}{\mbox{\boldmath$\theta$}} 19 | \newcommand{\xx}{\mbox{\boldmath$x$}} 20 | \newcommand{\XX}{\mbox{\boldmath$X$}} 21 | \newcommand{\yy}{\mbox{\boldmath$y$}} 22 | \newcommand{\eps}{\varepsilon} 23 | \newcommand{\specialcell}[2][c]{% 24 | \begin{tabular}[#1]{@{}c@{}}#2\end{tabular}} 25 | 26 | \DeclareMathOperator{\argmin}{argmin} 27 | 28 | \author{Eric Ghysels \\University of North Carolina \And Virmantas Kvedaras\\Vilnius University \And Vaidotas 29 | Zemlys\\Vilnius University} 30 | \Plainauthor{Eric Ghysels, Virmantas Kvedaras, Vaidotas Zemlys} 31 | 32 | \title{Mixed Frequency Data Sampling Regression Models: The \proglang{R} 33 | Package \pkg{midasr}} 34 | \Plaintitle{Mixed Frequency Data Sampling Regression Models: The R 35 | Package midasr} 36 | \Shorttitle{Mixed Frequency Data Sampling Regression Models: The \proglang{R} 37 | Package \pkg{midasr}} 38 | 39 | %\Plaintitle{Mixed Frequency Data Sampling Regression Models: The R Package midasr} 40 | %\Shorttitle{R Package \pkg{midasr}} 41 | 42 | \Abstract{ 43 | When modeling economic relationships it is increasingly common to encounter data sampled at different frequencies. We introduce \proglang{R} package \pkg{midasr} which enables estimating regression models with variables sampled at different frequencies within a MIDAS regression framework put forward in work by \citeasnoun{ghysels_etal_midas-02a}. In this article we define a general autoregressive MIDAS regression model with multiple variables of different frequencies and show how it can be specified using familiar R formula interface and estimated using various optimisation methods chosen by the researcher. We discuss how to check the validity of estimated model both in terms of numerical convergence and statistical adequacy of a chosen regression specification, how to do the model selection based on a information criteria, how to assess forecasting accuracy of the MIDAS regression model and do a forecast aggregation of different MIDAS regression models. We illustrate capabilities of the package with a simulated MIDAS regression model and give two empirical examples of application of MIDAS regression.} 44 | 45 | %The package provides all the usual R methods for a regression model such as 46 | 47 | 48 | %Regression models involving data sampled at different frequencies are of general interest. We describe how can be used to estimate such models within a MIDAS regression framework with functional constraints on parameters put forward in work by \citeasnoun{ghysels_etal_midas-02a}. We 49 | %} 50 | \Keywords{MIDAS, specification test} 51 | \Plainkeywords{MIDAS, specification test} 52 | \Address{ 53 | Eric Ghysels\\ 54 | Department of Economics\\ 55 | University of North Carolina - Chapel Hill\\ 56 | Gardner Hall, CB 3305 Chapel Hill, NC 27599-3305\\ 57 | E-mail: \email{eghysels@unc.edu}\\ 58 | URL: \url{http://www.unc.edu/~eghysels/}\\ 59 | \\ 60 | Virmantas Kvedaras\\ 61 | Department of Econometric Analysis\\ 62 | Faculty of Mathematics and Informatics\\ 63 | Vilnius University\\ 64 | Naugarduko g. 24, Vilnius, Lithuania\\ 65 | E-mail: \email{virmantas.kvedaras@mif.vu.lt}\\ 66 | URL:\url{http://mif.vu.lt/ututi/teacher/vk}\\ 67 | \\ 68 | Vaidotas Zemlys\\ 69 | Department of Econometric Analysis\\ 70 | Faculty of Mathematics and Informatics\\ 71 | Vilnius University\\ 72 | Naugarduko g. 24, Vilnius, Lithuania\\ 73 | E-mail: \email{vaidotas.zemlys@mif.vu.lt}\\ 74 | URL:\url{http://mif.vu.lt/~zemlys/} 75 | } 76 | 77 | 78 | %\Submitdate{2014-01-28} 79 | %% need no \usepackage{Sweave.sty} 80 | \begin{document} 81 | 82 | \section{Introduction} 83 | 84 | Regression models involving data sampled at different frequencies are of general interest. In this document we introduce a new \proglang{R} \citep{Rcore} package \pkg{midasr} \citep{midasr} for the regression modeling with the mixed frequency data based on a framework put forward in recent work by \citeasnoun{ghysels_etal_midas-02a}, \citeasnoun{ghysels_etal_midas-joe} and 85 | \citeasnoun{andreou:rmm} using so called MIDAS, meaning Mi(xed) Da(ta) S(ampling), regressions. 86 | 87 | In a general framework of regressions with functional constraints on parameters, the \pkg{midasr} package not only provides similar functionality within a standard \proglang{R} framework of the model specification comparable to that available in the usual functions \code{lm} or \code{nls}, but also deals with an extended model specification analysis for MIDAS regressions. 88 | 89 | Several recent surveys on the topic of MIDAS are worth mentioning at the outset. They are: \cite{andreou2011forecasting} who review more extensively some of the material summarized in this document, \cite{armesto2010forecasting} who provide a very simple introduction to MIDAS regressions and finally \cite{ghysels_valkanov_chap} who discuss volatility models and mixed data sampling. 90 | 91 | Econometric analysis of MIDAS regressions appears in \cite{ghysels:mrf}, \cite{andreou:rmm}, \cite{bai:kalman}, 92 | \cite{kvedaras-regression}, \cite{rodriguez2010mixed}, \cite{wohlrabe2009forecasting}, among others. 93 | 94 | MIDAS regression can also be viewed as a reduced form representation of the linear projection which emerges from a state space model approach - by reduced form we mean that the MIDAS regression does not require the specification of a full state space system of equations. 95 | \cite{bai:kalman} show that in some cases the MIDAS regression is an exact representation of the Kalman filter, in other cases it involves approximation errors which are typically small. The Kalman filter, while clearly optimal as far as linear projections goes, has several disadvantages (1) it is more prone to specification errors as a full system of measurement and state equations is required and as a consequence (2) requires a lot more parameters, which in turn results in (3) computational complexities which often limit the scope of applications. In contrast, MIDAS regressions - combined with forecast combination schemes if large data sets are involved (see \cite{AGK_macro}) are computationally easy to implement and are less prone to specification errors. 96 | 97 | The key feature of the package is its flexibility in terms of the model formulation and estimation, which allows for the\footnote{ 98 | \cite{ghysels:2013} also developed a package for \proglang{MATLAB} which deals with the estimation and information criteria-based specification of MIDAS regressions as well as forecasting and nowcasting of low frequency series. All of the \pkg{midasr} features replicate or extend features provided by the said package. The key extensions are: the specification of any user-defined functional contraint, the inclusion of multiple variables of different frequency and different functional constraints, the testing of the adequacy of a chosen model specification, and the option to calculate standard errors of the parameters robust to serial correlation and heteroscedasticity in regression disturbances}: 99 | \begin{itemize} 100 | \item estimation of regression models with its parameters defined (restricted) by a certain functional constraint using familiar \proglang{R} \code{formula} interface allowing any choice of a constraint, which can be selected from a standard list or can be customized using user-defined \proglang{R} functions; 101 | \item parsimonious aggregation-linked restrictions (as e.g., in \citealp{ghysels:2013}) as a special case; 102 | \item estimation of MIDAS models with many variables and (numerous) different frequencies; 103 | \item constrained, partially constrained, or unconstrained estimation of the model; 104 | \item various mixtures of restrictions/weighting schemes and also lag orders as they can be specific to each series; 105 | \item statistical testing for the adequacy of the model specification and the imposed functional constraint; 106 | \item information criteria and testing-based selection of models; 107 | \item forecasting and nowcasting functionality, including various forecast combinations. 108 | \end{itemize} 109 | 110 | 111 | Suppose $\{y_t, t\in \mathbb{Z}\}$ is a univariate process observed at low frequency. Lags of the process are denoted by $By_t$ = $y_{t-1},$ where $B$ is the low frequency lag operator. A MIDAS regression involves linear projections using stochastic processes $\{x_{\tau}^{(i)}, \tau \in \mathbb{Z}\},$ 112 | $i=0,\dots,k,$ observed at a higher frequency, i.e., for each low 113 | frequency period $t=t_0$ we observe the process $x_\tau^{(i)}$ at $m_i\in 114 | \mathbb{N}$ high frequency periods $\tau=(t_0-1)m_i+1,\dots,t_0m_i$. Throughout the article we represent $i$-th high frequency period $\tau$ in terms of low frequency period $t$ as 115 | $\tau=(t-1)m_i+j,$ $j=1,\dots,m_i$. Note that this notation does not exclude the case $m_i=1$. In that case the high frequency process $x_{\tau}^{(i)}$ is observed at the same frequency as the low frequency process $y_t$. However we require that $m_i\ge 116 | 1,$ such that the process $y_t$ is observed at the lowest frequency. Lags of the processes $x_{\tau}^{(i)}$ are denoted by $Lx^{(i)}_{\tau}=x_{\tau-1},$ where $L$ is the high frequency lag operator, which operates on the lag irrespective of the frequency of the process. 117 | 118 | The package deals with any specification of mixed-frequency 119 | regression model which can be represented as 120 | \begin{align}\label{l:eqintro} 121 | y_t-\alpha_1y_{t-1}-\dots-\alpha_py_{t-p}=\sum_{i=0}^{k}\sum_{j=0}^{l_i}\beta_{j}^{(i)}x_{tm_i-j}^{(i)}+\eps_t, 122 | \end{align} 123 | where we require 124 | \begin{align*} 125 | E(\eps_t|y_{t-1},\dots,y_{t-p},x^{(0)}_{tm_0},\dots,x^{(0)}_{tm_0-l_i},\dots,x^{(k)}_{tm_k},\dots,x^{(k)}_{tm_k-l_k})=0, 126 | \end{align*} so that the equation \eqref{l:eqintro} is identified as a projection equation. 127 | 128 | The model stated in the equation \eqref{l:eqintro} can be estimated in the usual time series regression fashion or using a Bayesian approach. However, the number of parameters in this model $d$ = $p+\sum_{i=0}^kl_i$ can be very large in terms of the number $n$ of available observations of $y_t$\footnote{In the MIDAS literature it is common to have $l_i\ge m_i$ and $m_i$ can be large, for example monthly versus daily data}. Since the estimation of the model can 129 | easily become infeasible, whenever either larger differences in frequencies or more variables and/or higher lag orders prevail, \cite{ghysels_etal_midas-02a} introduced a sufficiently flexible parametric restriction to be imposed on the original parameters, 130 | \begin{align}\label{l:eqres} 131 | \beta_{j}^{(i)}=f_{i}(\gaga_i,j), \; j=0,\dots,l_i,\; \gaga_i=(\gamma_{1}^{(i)},\dots,\gamma_{q_i}^{(i)}), \; q_i\in\mathbb{N}. 132 | \end{align} 133 | 134 | This approach greatly reduces the number of parameters to be estimated, from $d$ to $q=\sum_{i=0}^{h_i}q_i,$ which is assumed to be always considerably less than the number of observations available at the lowest frequency. This gain is offset by the fact that \eqref{l:eqintro} becomes non-linear model, however if 135 | the parameters of an underlying data generating process did follow a certain functional constraint which is perfectly or well approximated by a constraint function chosen by a researcher, then significant efficiency gains could be achieved from the imposed constraints. Figure \ref{fig:fig1} plots the out-of-sample prediction precision (left figure) and the parameter estimation precision (right figure) in an unconstrained and constrained simple model with correct and approximate restrictions (see Appendix A for details). 136 | 137 | <>= 138 | render_sweave() 139 | @ 140 | 141 | \begin{figure}[htbp] 142 | <>= 143 | opts_chunk$set(tidy=TRUE, 144 | tidy.opts=list(blank=FALSE, width.cutoff = 60, indent = 2), 145 | highlight = FALSE, prompt = TRUE, comment = NA,background="#FFFFFF") 146 | options(prompt="R> ") 147 | suppressMessages(library("tseries")) 148 | suppressMessages(library("midasr")) 149 | suppressMessages(library("ggplot2")) 150 | data("oos_prec") 151 | qplot(x=n,y=value,data=oos_prec,geom="line",colour=Constraint,ylab="")+facet_wrap(~Type,scales="free_y")+xlab("Sample size")+theme_bw() 152 | @ 153 | \caption{A plot depicting efficiency gains when the correct non-linear constraint is imposed. The left panel plots the average out-of-sample prediction accuracy against sample size. The right panel plots out the average euclidean distance of estimated model parameters to their true values.} 154 | \label{fig:fig1} 155 | \end{figure} 156 | 157 | 158 | As can be seen, even an incorrect constraint might be useful whenever the number of degrees of freedom in an unconstrained model is low and, consequently, one cannot rely on the large sample properties of unconstrained estimators. Furthermore, this approach seems to be necessary whenever estimation is simply infeasible because of the lack of degrees of freedom. 159 | 160 | \section{Theory}\label{Theory} 161 | 162 | The model \eqref{l:eqintro} can be rewritten in a more compact form: 163 | \begin{equation}\label{eq1} 164 | \alpha(B)y_{t}= \bebe(L)^\top\xx_{t,0}+\e_t,\\ 165 | \end{equation} 166 | where $\alpha(z)=1-\sum_{j=1}^{p}\alpha_jz^j$ and 167 | \begin{align*} 168 | \xx_{t,0}:&=\left(x_{tm_0}^{(0)},\dots,x_{tm_i}^{(i)},\dots,x_{tm_l}^{(l)}\right)^\top,\\ 169 | \bebe(z)&= \sum_{j=0}^{l}\bebe_jz^j,\ \bebe_j=\left(\beta_j^{(0)},\dots,\beta_j^{(i)},\dots,\beta_j^{(l)}\right)^\top,\\ 170 | L^j\xx_{t,0}:&=\xx_{t,j}=\left(L^jx_{tm_0}^{(0)},\dots,L^jx_{tm_i}^{(i)},\dots,L^jx_{tm_l}^{(l)}\right)^\top. 171 | \end{align*} 172 | In order to simplify notation, without loss of generality, a single order of the lag polynomials is used with $l$ being the maximum lag order. If the orders of some components of $\bebe(z)$ are smaller, it is easy to set some coefficients of the polynomial equal to zero. 173 | 174 | We require the existence of the continuous second derivative of functional constraint with respect to its parameters i.e., $\frac{\partial^2 f_i}{\partial \gaga_i\partial \gaga_i^\top}$. The functional constraints can vary with each variable and/or frequency, and therefore we use $\gaga$ to represent a vector of all the parameters of a restricted model with $q=\dim(\gaga)$ their total number. 175 | 176 | As will be shown in the next section, all variants of the usual linear (in terms of variables) MIDAS regression model are covered by regression \eqref{eq1} via the specification of functional constraints. When each restriction function is an identity mapping, one obtain an unrestricted MIDAS regression model.\footnote{See \cite{foroni}.} 177 | 178 | \subsection{Frequency alignment} 179 | 180 | It is instructive to rewrite the model \eqref{l:eqintro} in a matrix notation. We start with a few examples. Suppose $y_t$ is observed quarterly and we want to explain its variation with the variable $x_\tau$, which is observed monthly. Since each quarter has three months, the frequency $m$ is 3 in this example. Suppose we assume that the monthly data in the current and the previous quarter has explanatory power. This means that for each quarter $t$ we want to model $y_t$ as a linear combination of variables $x_{3t},x_{3t-1},x_{3t-2}$ observed in the quarter $t$ and variables $y_{t-1}$ and $x_{3(t-1)},x_{3(t-1)-1},x_{3(t-1)-2}$ observed in the previous quarter $t-1$. In matrix notation the MIDAS model \eqref{l:eqintro} for this example is: 181 | \begin{align*} 182 | \begin{bmatrix} 183 | y_2\\ 184 | \vdots\\ 185 | y_n 186 | \end{bmatrix}= 187 | \begin{bmatrix} 188 | y_1\\ 189 | \vdots\\ 190 | y_{n-1} 191 | \end{bmatrix}\alpha_1+ 192 | \begin{bmatrix} 193 | x_6 & \dots &x_1\\ 194 | \vdots & \vdots & \vdots\\ 195 | x_{3n} & \dots & x_{3n-5} 196 | \end{bmatrix} 197 | \begin{bmatrix} 198 | \beta_0\\ 199 | \vdots\\ 200 | \beta_5 201 | \end{bmatrix}+ 202 | \begin{bmatrix} 203 | \eps_2\\ 204 | \vdots\\ 205 | \eps_n 206 | \end{bmatrix} 207 | \end{align*} 208 | By writing the model in the matrix notation we have transformed high-frequency variable $x_\tau$ into a low-frequency vector $(x_{3t},\dots,x_{3t-5})^\top$. We call this transformation the frequency alignment. Note that we require that the number of observations of $x_\tau$ is exactly $3n$. 209 | 210 | Let us examine another example. Suppose we have another variable $z_t$ observed weekly which we want to add to the model. The model \eqref{l:eqintro} does not allow varying frequency ratios, so we need to assume that each month has exactly 4 weeks. If months do not always have four weeks, as they do in practice, one can simply think of this model as taking a fixed set of weekly lags. The frequency $m$ for the variable $z_\tau$ is then 12. We use again the current and previous quarter data for explaining variation in $y_t$. This means that for quarter $t$ we model $y_t$ as a linear combination of variables $x_{3t},x_{3t-1},x_{3t-2}$ and $z_{12t},z_{12t-1},\dots,z_{12t-11}$ observed in the quarter $t$, and variables $y_{t-1}$, $x_{3(t-1)},\dots,x_{3(t-1)-2}$ and $z_{12(t-1)},z_{12(t-1)-1},\dots,z_{12(t-1)-11}$ observed in the quarter $t-1$. The model in the matrix form is then: 211 | \begin{align*} 212 | \begin{bmatrix} 213 | y_2\\ 214 | \vdots\\ 215 | y_n 216 | \end{bmatrix}= 217 | \begin{bmatrix} 218 | y_1\\ 219 | \vdots\\ 220 | y_{n-1} 221 | \end{bmatrix}\alpha_1+ 222 | \begin{bmatrix} 223 | x_6 & \dots &x_1\\ 224 | \vdots & \vdots & \vdots\\ 225 | x_{3n} & \dots & x_{3n-5} 226 | \end{bmatrix} 227 | \begin{bmatrix} 228 | \beta_0\\ 229 | \vdots\\ 230 | \beta_5 231 | \end{bmatrix}+ 232 | \begin{bmatrix} 233 | z_{24} & \dots &z_1\\ 234 | \vdots & \vdots & \vdots\\ 235 | z_{12n} & \dots & z_{12n-23} 236 | \end{bmatrix} 237 | \begin{bmatrix} 238 | \gamma_0\\ 239 | \vdots\\ 240 | \gamma_{23} 241 | \end{bmatrix}+ 242 | \begin{bmatrix} 243 | \eps_2\\ 244 | \vdots\\ 245 | \eps_n 246 | \end{bmatrix} 247 | \end{align*} 248 | In this example we have aligned $x_{\tau}$ into a vector $(x_{3t},..,x_{3t-5})^\top$ and $z_{\tau}$ into a vector\newline $(z_{12t},\dots,z_{12t-23})^\top.$ Again we require that the number of observations of high frequency variables are multiple of $n$, with multiplication factor being the corresponding frequencies. This is not a restrictive assumption in practical applications as will be further explained in the Section \ref{Implementation}. 249 | 250 | Let us return to the general case of the model \eqref{l:eqintro}. We \textit{align the frequency} of high-frequency variable $x_\tau$ by transforming it to the low-frequency vector $(x_{tm_i}^{(i)},x_{tm_i-1}^{(i)},\dots,x_{tm_i-l}^{(i)})^\top$. The model $\eqref{l:eqintro}$ is then expressed in the matrix notation as follows: 251 | \begin{align*} 252 | \begin{bmatrix}y_l\\\vdots\\y_n\end{bmatrix}= 253 | \begin{bmatrix} 254 | y_{l-1} &\dots & y_{l-p}\\ 255 | \vdots & \vdots & \vdots\\ 256 | y_{n-1} & \dots & y_{n-p} 257 | \end{bmatrix} 258 | \begin{bmatrix}\alpha_1\\\vdots\\\alpha_p\end{bmatrix}+ 259 | \sum_{i=0}^k\XX^{(i)}\begin{bmatrix}\beta_0^{(i)}\\\vdots\\\beta_{l}^{(i)}\end{bmatrix} 260 | +\begin{bmatrix} 261 | \eps_l\\\vdots\\\eps_n 262 | \end{bmatrix}, 263 | \end{align*} 264 | where 265 | \begin{equation}\label{eq2} 266 | \XX^{(i)}:=\left[ 267 | \begin{matrix} 268 | x_{um_i}^{(i)}&x_{um_i-1}^{(i)}&\dots&x_{um_i-l}^{(i)}\\ 269 | x_{(u+1)m_i}^{(i)}&x_{(u+1)m_i-1}^{(i)}&\dots&x_{(u+1)m_i-l}^{(i)}\\ 270 | \vdots&\vdots&\dots&\vdots\\ 271 | %\hline 272 | x_{tm_i}^{(i)}&x_{tm_i-1}^{(i)}&\dots&x_{tm_i-l}^{(i)}\\ 273 | %\hline 274 | \vdots&\vdots&\dots&\vdots\\ 275 | x_{(n-1)m_i}^{(i)}&x_{(n-1)m_i-1}^{(i)}&\dots&x_{(n-1)m_i-l}^{(i)}\\ 276 | x_{nm_i}^{(i)}&x_{nm_i-1}^{(i)}&\dots&x_{nm_i-l}^{(i)}\\ 277 | \end{matrix}\right], 278 | \end{equation} 279 | and $u$ is the smallest integer such that $um_i-l>0$ and $u>p$. 280 | 281 | The purpose of this subsection was to show how the frequency alignment procedure turns a MIDAS regression into a classical time series regression where all the variables are observed at the same frequency. 282 | 283 | \subsection{Estimation} 284 | 285 | Equation \eqref{eq1} can be estimated directly via ordinary least squares (OLS), without restrictions on the parameters. This is a so called U-MIDAS regression model, see \cite{foroni}. Furthermore, a consistent non-parametric approach could be used to estimate the underlying parameters of a function as e.g., in \cite{breitung:2013}. Since, none of these approaches use a parametric functional constraint, they can be estimated using already available \proglang{R} packages. The \pkg{midasr} package aims at the estimation of mixed frequency models with some parametric functional constraints. 286 | 287 | While model \eqref{eq1} is a linear model in terms of variables, any non-linear functional constraints will result in non-linearities with respect to the parameters $\gaga.$ Therefore, in the general case, we use in the function \code{midas_r} the non-linear least squares (NLS) estimator of parameters $\gaga$ of a restricted model \eqref{eq1} as defined by 288 | \begin{equation}\label{eqNLS} 289 | \widehat \gaga=\underset{\gaga\in\mathbb{R}^q}{\argmin}\ \sum_{\lceil (l+1)/m\rceil}^{n}\bigg(\alpha(B)y_t- \ff_{\gaga}(L)^\top \xx_{t,0}\bigg)^2, 290 | \end{equation} 291 | where the lag polynomial of constrained parameters is defined by 292 | $$ 293 | \ff_{\gaga}(z)=\sum_{j=0}^l\ff_{\gaga,j}z^j 294 | $$ 295 | with 296 | $$ 297 | \ff_{\gaga,j}=\bigg(f_{0}(\gaga_{0};j),\dots,f_{i}(\gaga_{i};j),\dots,f_{k}(\gaga_{k};j)\bigg)^\top 298 | $$ 299 | for each $(i,j)\in\{0,1,\dots,k\}\times\{0,1,\dots,l\}.$ 300 | A number of numerical algorithms are readily available in \proglang{R}. By default, the \code{optim} optimization function is used with optional choices of optimization algorithms in it. However, a user can also choose within the function \code{midas_r} other procedures available in \proglang{R} such as \code{nls}, customizing the desired algorithm which is suitable for the problem at hand. 301 | 302 | The efficiency of the estimator and consistency of the standard errors depend on whether the errors of the model are spherical. We leave the aspect of efficiency of estimation to be considered by a user, however the implementation of heteroscedasticity and autocorrelation (HAC) robust standard errors is an option available in the package \pkg{sandwich} (see \citealp{zeileis:2004}, \citealp{zeileis:2006}). 303 | 304 | If all the functional relations $f_i(\cdot)$ were non-constraining identity mappings, then the NLS estimator would be equivalent to the ordinary least squares (OLS) problem in terms of the original parameters. For convenience, such a U-MIDAS version can be dealt with directly using a different function \code{midas_u} of the package (see an illustration in the Section \ref{Implementation}) or a standard \code{lm} function, provided the alignment of data frequencies is performed as discussed in the previous section. 305 | 306 | \subsection{Taxonomy of aggregates-based MIDAS regression models} 307 | Based on the parsimony of representation argument, the higher-frequency part of conditional expectation of MIDAS regressions is often formulated in terms of aggregates as follows 308 | \begin{equation} \label{eq3} 309 | \begin{aligned} 310 | \bebe(L)^\top \xx_{t,0}&=\sum_{i=0}^k\sum_{j=0}^l \beta_j^{(i)}x_{tm_i-j}^{(i)}\\ 311 | &=\sum_{i=0}^k\sum_{r=0}^q \lambda_r^{(i)}\tilde x_{t-r}^{(i)},\\ 312 | \end{aligned} 313 | \end{equation} 314 | with some low-frequency number of lags $q\in\mathbb{N}$ and parameter-driven low-frequency aggregates 315 | \begin{equation*} %\label{eq3} 316 | \begin{aligned} 317 | \tilde x_{t-r}^{(i)}&:=x_{t-r}^{(i)}(\dede_{i,r})=\sum_{s=1}^{m_i}w_{r}^{(i)}(\dede_{i,r};s)x_{(t-1-r)m_i+s}^{(i)} 318 | \end{aligned} 319 | \end{equation*} 320 | which depend on a weighting (aggregating within a low-frequency period) function $w_{r}(\dede_{i,r};s)$ with its parameter vector $\dede_{i,r},$ which, in the general case, can vary with each variable/frequency and/or the low-frequency lag order $r\in\mathbb{N}.$ Here the aggregation weights are usually non-negative and, for identification of parameters $\{\lambda_r^{(i)}\}_{i=0,r=0}^{h,p},$ satisfy the normalization constraint such as $\sum_{s=0}^{m_i-1}w_{r}(\dede_{i,r};s)=1.$ To have the weights add to one, it is convenient to define a weighting function in the following form 321 | \begin{equation}\label{eq4} 322 | \forall \ i,r\ \ w_{r}^{(i)}(\dede_{i,r};s)=\frac{\psi_{r}^{(i)}(\dede_{i,r};s)}{\sum_{j=1}^{m_i}\psi_{r}^{(i)}(\dede_{i,r};j)},\ s=1,\dots,m_i, 323 | \end{equation} 324 | given some underlying function $\psi_{r}^{(i)}(\cdot).$ Provided that the latter function is non-negatively-valued (and the denominator is positive), the resulting weights in eq. \eqref{eq4} are also non-negative. Table \ref{tab:1} provides a list of some underlying functions producing, within the context of equation \eqref{eq4}, the usual weighting schemes with non-negative weights (whenever the parameter space of underlying functions is appropriately bounded, which in some cases is also needed for identification of parameters). In order to avoid heavy notation, indices $i$ and $r$---which are connected respectively with frequency/variable and the lag order---are dropped in the table. %Furthermore, a variable $x_s\in(0,1)$ is used as defined by $x_s:=\xi+(1-\xi)h(s),\ h(s):=(s-1)/({m}-1)$ for some marginally small quantity $\xi>0.$ %[??Keistoka, kad kai kur yra 1:d/d (nors tikriausiai visur keicia tik paramtro reiksme). Kam tokia dalyba, jei $x\in\mathbb{R}_+$; o jei norima normuoti ant $[0,1],$ kodel ne $x_s$??] 325 | 326 | \begin{table}[htp] 327 | \begin{center} 328 | \small{ 329 | \begin{tabular}{lcc} 330 | \hline\hline 331 | \parbox[c]{4cm}{ {\bf Resulting (normalized) weighting scheme}}& {\bf $\psi(\dede;s):=\psi_{r}^{(i)}(\dede_{i,r};s)$} & {\bf Related midasr function}\\ 332 | %\shortstack{aa \\ 333 | \hline 334 | \parbox[c]{4cm}{ Exponential Almon lag polynomial} &\parbox[c]{5.5cm}{ $\psi(\dede;s)=\exp\left(\sum_{j=1}^p\delta_{j}s^j\right),\ p\in\mathbb{N},$ \newline where $\dede=\left(\delta_{1},\dots,\delta_{j},\dots,\delta_{p}\right)^\top\in\mathbb{R}^p.$ }&\code{nealmon}\\ 335 | \hline 336 | \parbox[c]{4cm}{Beta (analogue of probability density function)} &\parbox[c]{5.5cm}{ $\psi(\dede;s)=x_s^{\delta_{1}-1}(1-x_s)^{\delta_{2}-1},$ where \newline 337 | $x_s:=\xi+(1-\xi)h(s),\ h(s):=(s-1)/({m}-1),$ with some marginally small quantity $\xi>0,$ and $\dede=\left(\delta_{1},\delta_{2}\right)^\top\in\mathbb{R}_+^2$.}&\code{nbeta}\\ 338 | \hline 339 | \parbox[c]{4cm}{Gompertz (analogue of probability density function)} &\parbox[c]{5.5cm}{ $\psi(\dede;s)=z(s)e^{-\delta_1z(s)},$ where \newline $z(s)=\exp\big(\delta_{2}s\big),$ and $\dede=\left(\delta_{1},\delta_{2}\right)^\top\in\mathbb{R}_+^2$.}&\code{gompertzp}\\ 340 | \hline 341 | \parbox[c]{4cm}{Log-Cauchy (analogue of probability density function)} &\parbox[c]{5.5cm}{ $\psi(\dede;s)=s^{-1}\left(\delta_2^2+(\ln s-\delta_1)^2\right)^{-1},$ where \newline $\dede=\left(\delta_{1},\delta_{2}\right)^\top\in\mathbb{R}\times\mathbb{R}_+.$ }&\code{lcauchyp}\\ 342 | \hline 343 | \parbox[c]{4cm}{Nakagami (analogue of probability density function)} &\parbox[c]{5.5cm}{ $\psi(\dede;s)=s^{2\delta_1-1}\exp(-\delta_1/\delta_2 s^2),$ where \newline $\dede=\left(\delta_{1},\delta_{2}\right)^\top,$ $\delta_{1}\geq0.5, \delta_2\in\mathbb{R}_+$.}& \code{nakagamip}\\ 344 | % \hline 345 | %\parbox[c]{4cm}{Polynomial step function weighting scheme} &\parbox[c]{7cm}{ $w_{\cdot,j}^{(i)}(\dede_{i,\cdot})=\frac{\exp\left(\sum_{s=1}^p\delta_{s,\cdot}^{(i)}j^s\right)}{\sum_{j=1}^{k_i}\exp\left(\sum_{s=1}^p\delta_{s,\cdot}^{(i)}j^s\right)},$ \newline where $\dede_{i,\cdot}=\left(\delta_{1,\cdot}^{(i)},\dots,\delta_{j,\cdot}^{(i)},\dots,\delta_{p_i,\cdot}^{(i)}\right)^\top.$ }&wpolystep()\\ 346 | \hline\hline 347 | \end{tabular} 348 | } 349 | \caption{\label{tab:1} A sample of weighting schemes in aggregation-based MIDAS specifications.} 350 | \end{center} 351 | \end{table} 352 | 353 | Some other weighting functions which do not have a representation as in eq. \eqref{eq4} are also available in the package such as (non-normalized) \code{almonp} and the polynomial specification with step functions \code{polystep} (see \cite{ghysels:mrf} for further discussion of step functions). 354 | 355 | %Since it is usual in the aggregate-based MIDAS to present models with a single mixed frequency and the same aggregation weighting scheme, without loss of generality we will adopt this convenience in this section for exposition clarity and simplicity of comparison. 356 | 357 | However, the choice of a particular weighting function in the MIDAS regression with aggregates represents only one restriction imposed on $\bebe(L)$ out of many other choices to be made. To see this, let us note that aggregates-based MIDAS regressions can be connected with the following restrictions on the conditional expectation of model \eqref{eq1}: 358 | \begin{equation}\label{eq5} 359 | \begin{aligned} 360 | E\left(\alpha(B)y_t|\yy_{t,1},\{\xx_{t,0}^{(i)}\}_{j=0}^{l}\right) 361 | &=\bebe(L)^\top \xx_{t,0}\\ 362 | %&=\sum_{i=0}^h\sum_{j=0}^k \beta_j^{(i)}x_{tm_i-j}^{(i)}\\ 363 | &=\sum_{i=0}^k\sum_{r=0}^q \lambda_r^{(i)}\tilde x_{t-r}^{(i)},\\ 364 | &=\sum_{i=0}^k\sum_{r=0}^q \lambda_r^{(i)}\sum_{s=1}^{m_i}w_{r}^{(i)}(\dede_{i,r};s)x_{(t-1-r)m_i+s}^{(i)},\\ 365 | \bigg|_{w_{r}^{(i)}(\cdot)=w_{r}(\cdot)}&=\sum_{i=0}^k\sum_{r=0}^q \lambda_r^{(i)}\sum_{s=1}^{m_i}w_{r}(\dede_{i,r};s)x_{(t-1-r)m_i+s}^{(i)},\\ 366 | \bigg|_{w_{r}(\cdot)=w(\cdot)}&=\sum_{i=0}^k\sum_{r=0}^q \lambda_r^{(i)}\sum_{s=1}^{m_i}w(\dede_{i,r};s)x_{(t-1-r)m_i+s}^{(i)},\\ 367 | \bigg|_{\dede_{i,r}=\dede_i}&=\sum_{i=0}^k\sum_{r=0}^q \lambda_r^{(i)}\sum_{s=1}^{m_i}w(\dede_{i};s)x_{(t-1-r)m_i+s}^{(i)},\\ 368 | \bigg|_{\lambda_{r}^{(i)}=\lambda^{(i)}}&=\sum_{i=0}^k\lambda^{(i)}\sum_{r=0}^q \sum_{s=1}^{m_i}w(\dede_{i};s)x_{(t-1-r)m_i+s}^{(i)},\\ 369 | \end{aligned} 370 | \end{equation} 371 | where $\yy_{t,1}=(y_{t-1},...,y_{t-p})^\top$. 372 | 373 | As can be seen---and leaving aside other less intuitive restrictions---depending on the choice of a particular MIDAS specification with aggregates, it can impose restrictions on the equality of 374 | \begin{itemize} 375 | \item the applied weighting scheme/function across variables and/or frequencies ($\forall \ i,\ \ w_{r}^{(i)}(\cdot)=w_{r}(\cdot)$); 376 | \item the applied weighting scheme/function across all low-frequency lags $r=0,1,\dots,q$ of aggregates ($\forall \ r,\ \ w_{r}(\cdot)=w(\cdot)$); 377 | \item parameters of the weighting functions in each lag ($\forall \ r,\ \ \dede_{i,r}=\dede_{i}$); 378 | \item impact of contemporaneous and lagged aggregates for all lags ($\forall \ r,\ \ \lambda_{r}^{(i)}=\lambda^{(i)}$). 379 | \end{itemize} 380 | Furthermore, let $s_i$ stand for an enumerator of $i^{th}$ higher-frequency periods within a low-frequency period. Then, noting that, given a frequency ratio $m_i,$ there is a one-to-one mapping between higher-frequency index $j\in\mathbb{N}$ and a pair $(r,s_i)\in\mathbb{N}\times\{1,2,\dots,m_i\}$ 381 | $$ 382 | j=rm_i+s_i, 383 | $$ 384 | it holds 385 | \begin{equation}\label{eq6} 386 | f_i(\gaga_{i};rm_i+s_i)=\lambda_r^{(i)}w_r^{(i)}(\dede_{i,r};s). 387 | \end{equation} 388 | Hence, it is easy to see that the aggregates-based MIDAS induces a certain periodicity of the functional constraint $f_{i}$ in eq. \eqref{eq1} as illustrated bellow using a stylized case where all the restrictions are imposed in eq. \eqref{eq5}: 389 | 390 | {\small 391 | \begin{tabular}{llll|llll|l} 392 | %$f_{i}(\cdot,0),$&$f_{i}(\cdot,1),$&$\dots$&$f_{i}(\cdot,m-1)$&$f_{i}(\cdot,m),$&$f_{i}(\cdot,m+1),$&$\dots$&$f_{i}(\cdot,2m-1)$&\dots \\ 393 | %$\lambda_0^{(i)}w_0^{(i)}(\cdot,1),$&$\lambda_0^{(i)}w_0^{(i)}(\cdot,2),$&\dots&$\lambda_0^{(i)}w_0^{(i)}(\cdot,m)$&$\lambda_1^{(i)}w_1^{(i)}(\cdot,1),$&$\lambda_1^{(i)}w_1^{(i)}(\cdot,2),$&\dots&$\lambda_1^{(i)}w_1^{(i)}(\cdot,m)$&\dots 394 | $f_{i}(\cdot,0),$&$f_{i}(\cdot,1),$&$\dots$&$f_{i}(\cdot,m-1)$&$f_{i}(\cdot,m),$&$f_{i}(\cdot,m+1),$&$\dots$&$f_{i}(\cdot,2m-1)$&\dots \\ 395 | $\lambda^{(i)}w(\cdot,1),$&$\lambda^{(i)}w(\cdot,2),$&\dots&$\lambda^{(i)}w(\cdot,m)$&$\lambda^{(i)}w(\cdot,1),$&$\lambda^{(i)}w(\cdot,2),$&\dots&$\lambda^{(i)}w(\cdot,m)$&\dots 396 | \end{tabular},\\ 397 | } 398 | for any $i\in\{0,1,\dots,h\}.$ 399 | From eq. \eqref{eq6} it is clear that any specification of MIDAS regression models which relies on aggregates is a special case of representation \eqref{eq1} with just a specific functional constraint on parameters. On the other hand, not every general constraint $\bebe(L)$ can be represented using periodic aggregates. For instance, in the above characterized example the correspondence necessarily breaches whenever there exists at least one frequency $i,$ for which none of $q\in\mathbb{N}$ satisfies $l=qm_i-1.$ 400 | 401 | \subsection{Specification selection and adequacy testing} 402 | 403 | Besides the usual considerations about the properties of the error term, there are two main questions about the specification of the MIDAS regression models. First, suitable functional constraints need to be selected, since their choice will affect the precision of the model. 404 | Second, the appropriate maximum lag orders need to be chosen. 405 | 406 | One way to address both issues together is to use some information criterion to select the best model in terms of the parameter restriction and the lag orders using either in- or out-of-sample precision measures. Functions \code{midas_r_ic_table} and \code{amidas\_table} of the package allow the user to make an in-sample choice using some usual information criteria, such as AIC and BIC, and a user-specified list of functional constraints.\footnote{Although aimed at forecasting, the function \code{select\_and\_forecast} can also be used to perform the selection of models relying on their out-of-sample performance.} 407 | 408 | Another way is to test the adequacy of the chosen functional constraints. For instance, whenever the autoregressive terms in model \eqref{eq1} are present ($p>0$), it was pointed out by \cite{ghysels:mrf} that, in the general case, $\phph(L)=\bebe(L)/\alpha(B)$ will have seasonal pattern thus corresponding to some seasonal impact of explanatory variables on the dependent one in a pure distributed lag model (i.e., without autoregressive terms). To avoid such an effect whenever it is not (or is believed to be not) relevant, \cite{clements2008mfm} proposed to us a common factor restriction which can be formulated as a common polynomial restriction with a constraint on the polynomial $\bebe(L)$ to satisfy a factorization $\bebe(L)=\alpha(B)\phph(L),$ so that inverting equation \eqref{eq1} in terms of the polynomial $\alpha(B)$ leaves $\phph(L)$ unaffected i.e., without creating/destroying any (possibly absent) seasonal pattern of the impact of explanatory variables. 409 | However, there is little if any knowledge a priori whether the impact in the distributed lag model should be seasonal or not. Hence, an explicit testing of adequacy of the model and, in particular, of the imposed functional constraint is obviously useful. 410 | 411 | Let $\bebe$ denote a vector of all coefficients of polynomial $\bebe(z)$ defined in eq. \eqref{eq1}, while $\ff_{\gaga}$ stand for the corresponding vector of coefficients restricted by a (possibly incorrect) functional constraint in $\ff_{\gaga}(z).$ Let $\bm{\widehat{\beta}}$ denote the respective OLS estimates of unconstrained model i.e., where functional restrictions of parameters are \textit{not} taken into account. Let $\bm{\hat{\ff}_{\gaga}}:=\ff_{\gaga}\big|_{\gaga=\widehat\gaga}$ denote a vector of the corresponding quantities obtained from the restricted model relying on the NLS estimates $\widehat{\gaga}$ as defined in eq. \eqref{eqNLS}. Denote by $\alal,\ \widehat\alal,$ and $\widehat\alal_{\gamma}$ the corresponding vectors of coefficients of polynomial $\alpha(z),$ its OLS estimates in an unrestricted model, and its NLS estimates in a restricted model.\footnote{Recall that unconstrained $\alal$ elements make a subset of parameter vector $\gaga$ of a constrained model.} 412 | Let $\thth:=(\alal^\top,\bebe^\top)^\top,$ $\widehat\thth:=(\widehat\alal^\top,\widehat\bebe^\top)^\top,$ and $\widetilde\thth:=(\widehat\alal_{\gaga}^\top,\hat\ff_{\gaga}^\top)^\top$ signify the corresponding vectors of all coefficients in eq. \eqref{eq1}. 413 | Then, 414 | under the null hypothesis of $\exists \ \gaga\in\mathbb{R}^q\text{ such that }\ff_{\gaga}=\bebe,$ it holds 415 | \begin{align*} 416 | (\bm{\widehat{\thth}}-{\bm{\widetilde{\thth}}})^\top\bm{A}(\bm{\widehat{\thth}}-\bm{\widetilde{\thth}})\sim \chi^2\big(d-q\big), 417 | \end{align*} 418 | where $\bm{A}$ is a suitable normalization matrix (see \citealp{kvedaras:2012} for a standard and \citealp{kvedaras:2013} for a HAC-robust versions of the test), and $q=\dim(\gaga)$ and $d=\dim(\thth)$ stand for the number of parameters in a restricted and unrestricted models, respectively. Functions \code{hAh_test} and \code{hAhr_test} of the package implement the described testing as will be illustrated later. 419 | 420 | \subsection{Forecasting} 421 | 422 | Let us write model \eqref{eq1} for period $t+1$ as 423 | \begin{equation}\label{eq8a} 424 | y_{t+1}=\alal^\top\yy_{t,0}+\bebe(L)^\top\xx_{t+1,0}+\e_{t+1}, 425 | \end{equation} 426 | where $\yy_{t,0}=(y_{t},\dots,y_{t-p+1})^\top$ and $\alal=(\alpha_{1},\alpha_{2},\dots,\alpha_{p})^\top$ is a vector of parameters of the autoregressive terms. This representation is well suited for (one step ahead) conditional forecasting of $y_{t+1},$ provided that the information on the explanatory variables is available. If it were absent, forecasts of $\xx_{t+1,0}$ would be also necessary from a joint process of $\{y_t,\xx_{t,0}\}$ which might be difficult to specify and estimate correctly, especially, bearing in mind the presence of data with mixed frequencies. Instead, a direct approach to forecasting is often applied in the MIDAS framework. Namely, given an information set available up to a moment $t$ defined by $\mathcal{I}_{t,0}=\{\yy_{t,j},\xx_{t,j}\}_{j=0}^{\infty},$ where 427 | \begin{align*} 428 | \yy_{t,j}&=(y_{t-j},...,y_{t-j-p+1})^\top\\ 429 | \xx_{t,j}&=(x^{(0)}_{tm_0},...,x^{(i)}_{tm_i},...,x^{(k)}_{tm_k})^\top, 430 | \end{align*} 431 | an $h$-step ahead direct forecast 432 | \begin{equation}\label{eq8} 433 | \tilde y_{t+h}=E\left(y_{t+h}\big|\mathcal{I}_{t,0}\right)=\alal_h^\top\yy_{t,0}+\bebe_h(L)^\top\xx_{t,0},\ h\in\mathbb{N}, 434 | \end{equation} 435 | can be formed leaning on a model linked to a corresponding conditional expectation 436 | $$ 437 | y_{t+h}=\alal_h^\top\yy_{t,0}+\bebe_h(L)^\top\xx_{t,0}+\e_{h,t},\ E\left(\e_{h,t}\big|\mathcal{I}_{t,0}\right), 438 | $$ 439 | where $\alal_h$ and $\bebe_h(L)$ are the respective horizon $h$-specific parameters. Note that, in principle, these conditional expectations have a form of representation \eqref{eq1} with certain restrictions on the original lag polynomials of coefficients. Hence, in the general case, the suitable restrictions for each $h$ will have a different form. 440 | 441 | Given periods $h=1,2,\dots,$ and a selected model or a list of specifications to be considered, package \pkg{midasr} provides the point forecasts corresponding to the estimated analogue of eq. \eqref{eq8} evaluates the precision of different specifications, and performs weighted forecasting using the framework defined in \cite{ghysels:2013}. 442 | 443 | \subsection{Alternative representations of MIDAS regressions} 444 | The model \eqref{l:eqintro} represents a very general MIDAS regression representation. We give below a sample of other popular MIDAS regression specifications from \cite{andreou2011forecasting}. These specification assume that only one high frequency variable is available. We denote its frequency by $m$. 445 | \begin{enumerate} 446 | \item DL-MIDAS($p_X$): 447 | \begin{align*} 448 | y_{t+1}=\mu + \sum_{r=0}^{p_X}\sum_{j=0}^{m-1}\beta_{rm+j}x_{(t-r)m-j}+\eps_{t+1} 449 | \end{align*} 450 | \item ADL-MIDAS($p_Y$,$p_X$): 451 | \begin{align*} 452 | y_{t+1}=\mu + \sum_{j=0}^{p_Y}\mu_jy_{t-j}+\sum_{r=0}^{p_X}\sum_{j=0}^{m-1}\beta_{rm+j}x_{(t-r)m-j}+\eps_{t+1} 453 | \end{align*} 454 | 455 | \item FADL-MIDAS($p_F$,$p_X$,$p_Y$): 456 | \begin{align*} 457 | y_{t+1}=\mu + \sum_{i=0}^{p_F}\alpha_iF_{t-i}+\sum_{j=0}^{p_Y}\mu_jy_{t-j}+\sum_{r=0}^{p_X}\sum_{j=0}^{m-1}\beta_{rm+j}x_{(t-r)m-j}+\eps_{t+1}, 458 | \end{align*} 459 | where $F_t$ is a factor derived from additional data. 460 | 461 | \item ADL-MIDAS-M($p_X$,$p_Y$), or multiplicative MIDAS: 462 | \begin{align*} 463 | y_{t+1}=\mu + \sum_{j=0}^{p_Y}\mu_jy_{t-j}+\sum_{r=0}^{p_X}\alpha_rX_{t-r}+\eps_{t+1}, 464 | \end{align*} 465 | where $X_{t-r}=\sum_{j=0}^{m_i-1}\beta_jx_{(t-r)m-j}$. 466 | 467 | \item MIDAS with leads, where it is assumed that we have only $J>= 516 | x<-1:12 517 | @ 518 | we get the following result 519 | <>= 520 | fmls(x,k=2,m=3) 521 | @ 522 | i.e., three variables (a contemporaneous and two lags) with four low-frequency observations ($n=12/m$). 523 | 524 | Function \code{mls} is slightly more flexible as the lags included can start from a given order rather than from zero, whereas the function \code{fmls} uses a full lag structure. \code{dmls} performs in addition a first-order differencing of the data which is convenient when working with integrated series. 525 | 526 | 527 | A couple of issues should be taken into account when working with series of different frequencies. 528 | \begin{itemize} 529 | \item It is assumed that the numbers of observations of different frequencies match exactly through the frequency ratio ($n_i=nm_i$), and the first and last observations of each series of different frequency are correspondingly aligned (possibly using \code{NA} to account for some missing observations for series of higher frequency). 530 | \item Because of different lengths of series of various frequencies, the data for the model cannot be kept in one \code{data.frame}. It is expected that variables for the model are either vectors residing in \proglang{R} global environment, or are passed as elements of a \code{list}. Variables of different frequency can be in the same \code{data.frame}, which in turn should be an element of a list. 531 | 532 | \end{itemize} 533 | 534 | \subsection{An example of simulated MIDAS regression}\label{DGP} 535 | Using the above data handling functions, it is straightforward to simulate a response series from the MIDAS regression as a data generating process (DGP). For instance, suppose one is willing to generate a low-frequency response variable $y$ in the MIDAS with two higher-frequency series $x$ and $z$ where the impact parameters satisfy the exponential Almon lag polynomials of different orders as follows: 536 | \begin{equation}\label{eq9} 537 | \begin{aligned} 538 | y_t&=2+0.1t+\sum_{j=0}^7\beta_j^{(1)}x_{4t-j}+\sum_{j=0}^{16}\beta_j^{(2)}z_{12t-j}+\e_t,\\ %\ \e_t\sim n.i.d.(0,1)\\ 539 | x_{\tau_1}&\sim n.i.d.(0,1), \ \ z_{\tau_2}\sim n.i.d.(0,1), \ \ \e_t\sim n.i.d.(0,1),\\ 540 | \end{aligned} 541 | \end{equation} 542 | where $(x_{\tau_1},z_{\tau_2},\e_t)$ are independent for any $({\tau_1},{\tau_2},t)\in\mathbb{Z}^3,$ and 543 | $$ 544 | \beta_j^{(i)}=\gamma_0^{(i)}\frac{\exp\left(\sum_{s=1}^{q_i-1}\gamma_{s}^{(i)}j^s\right)}{\sum_{j=0}^{d_i-1}\exp\left(\sum_{s=1}^{q_i-1}\gamma_{s}^{(i)}j^s\right)},\ i=1,2, 545 | $$ 546 | where $d_1=k_1+1=8$ is a multiple of the frequency ratio $m_1=4,$ whereas $d_2=k_2+1=17$ is not a multiple of $m_2=12.$ Here $q_1=2,$ $q_2=3$ with parameterizations 547 | $$ 548 | \begin{aligned} 549 | \gaga_1&=(1,-0.5)^\top,\\ 550 | \gaga_2&=(2,0.5,-0.1)^\top,\\ 551 | \end{aligned} 552 | $$ 553 | which yield the shapes of functional constraints as plotted in Figure \ref{fig:fig2}. 554 | 555 | \begin{figure}[t] 556 | <>= 557 | plot(x=0:16,nealmon(p=c(2,0.5,-0.1),d=17),type="l",xlab="High frequency lag",ylab="Weights",col=4) 558 | lines(x=0:7,nealmon(p=c(1,-0.5),d=8),col=2) 559 | @ 560 | \caption{A plot of shapes of functional constraints of $x_\tau$ (blue) and $z_\tau$ (red).} 561 | \label{fig:fig2} 562 | \end{figure} 563 | 564 | The following \proglang{R} code produces a series according to the DGP characterized above: 565 | <>= 566 | set.seed(1001) 567 | n<-250 568 | trend<-c(1:n) 569 | x<-rnorm(4*n) 570 | z<-rnorm(12*n) 571 | fn_x <- nealmon(p=c(1,-0.5),d=8) 572 | fn_z <- nealmon(p=c(2,0.5,-0.1),d=17) 573 | y<-2+0.1*trend+mls(x,0:7,4)%*%fn_x+mls(z,0:16,12)%*%fn_z+rnorm(n) 574 | @ 575 | It is of interest to note that the impact of variable $x$ can be represented using aggregates-based MIDAS, whereas the impact of $z$ cannot. 576 | 577 | \subsection{Some specification examples of MIDAS regressions} 578 | Suppose now that we have (only) observations of $y,$ $x,$ and $z$ which are stored as vectors, matrices, time series, or list objects in \proglang{R}, and our intention is to estimate a MIDAS regression model as in equation \eqref{eq9}: 579 | \begin{itemize} 580 | \item[a)] without restricting the parameters (as in U-MIDAS) and using the OLS; 581 | \item[b)] with the exponential Almon lag polynomial constraint on parameters (as in the function \code{nealmon}) and using the NLS. 582 | \end{itemize} 583 | 584 | The OLS estimation as in case a) is straightforwardly performed using 585 | 586 | <>= 587 | eq_u<-lm(y~trend+mls(x,k=0:7,m=4)+mls(z,k=0:16,m=12)) 588 | @ 589 | or, equivalently 590 | <>= 591 | eq_u<-midas_r(y~trend+mls(x,0:7,4)+mls(z,0:16,12),start=NULL) 592 | @ 593 | Note that in this case, \code{midas_r} picks up the variables from the global \proglang{R} environment. It is possible to pass the data explicitly: 594 | <>= 595 | eq_u<-midas_r(y~trend + mls(x,0:7,4)+mls(z,0:16,12),start=NULL, 596 | data = list(y=y,trend=trend,x=x,z=z)) 597 | @ 598 | The variables of the same frequency can reside in the same \code{data.frame}: 599 | <>= 600 | eq_u<-midas_r(y~trend + mls(x,0:7,4)+mls(z,0:16,12),start=NULL, 601 | data = list(data.frame(y=y,trend=trend),x=x,z=z)) 602 | @ 603 | In this case, there is no need to name the \code{data.frame} element in the list. 604 | 605 | The following \proglang{R} code estimates the constrained case b) using the function \code{midas_r} and reports the NLS estimates $\widehat{\gaga}$ of parameters with the related summary statistics. 606 | <>= 607 | eq_r<-midas_r(y~trend+mls(x,0:7,4,nealmon)+mls(z,0:16,12,nealmon),start=list(x=c(1,-0.5),z=c(2,0.5,-0.1))) 608 | summary(eq_r) 609 | @ 610 | 611 | As you can see the syntax of the function \code{midas_r} is similar to the standard \proglang{R} function \code{nls}. The model is specified via familiar \code{formula} interface. The lags included and functional restriction used can be individual to each variable and are specified within the respective \code{mls}, \code{fmls}, or \code{dmls} function used with \code{midas_r}. It is necessary to provide a list of starting values for each variable with restricted coefficients, since it implicitly defines the number of parameters of the constraint functions to be used for each series. 612 | 613 | The main difference with the function \code{nls} is that there is a greater choice of numerical optimization algorithms. The function \code{midas_r} is written in a way that in theory it can use any \proglang{R} optimization function. The choice is controlled via \code{Ofunction} argument. Currently it is possible to use functions \code{optim} and \code{nls} which are present in a standard \proglang{R} installation and function \code{optimx} from the package \pkg{optimx} \cite{optimx}, \cite{optimx2}. The additional arguments to the aforementioned functions can be specified directly in the call to \code{midas_r}. So for example if we want to use the optimization algorithm of Nelder and Mead, which is the default option in the function \code{optim} we use the following code 614 | <>= 615 | eq_r2 <- midas_r(y ~ trend + mls(x, 0:7, 4, nealmon) + 616 | mls(z, 0:16, 12, nealmon), 617 | start = list(x = c(1, -0.5), z = c(2, 0.5, -0.1)), 618 | Ofunction = "optim", method = "Nelder-Mead") 619 | @ 620 | If we want to use Golub-Pereyra algorithm for partially linear least-squares models implemented in the function \code{nls} we use the following code 621 | <>= 622 | eq_r2 <- midas_r(y ~ trend + mls(x, 0:7, 4, nealmon) + 623 | mls(z, 0:16, 12, nealmon), 624 | start = list(x = c(1,-0.5), z = c(2, 0.5, -0.1)), 625 | Ofunction = "nls", method = "plinear") 626 | @ 627 | It is possible to re-estimate the NLS problem with the different algorithm using as starting values the final solution of the previous algorithm. For example it is known, that the default algorithm in \code{nls} is sensitive to starting values. So first we can use the standard Nelder-Mead algorithm to find ``more feasible'' starting values and then use the \code{nls} to get the final result: 628 | <>= 629 | eq_r2 <- midas_r(y ~ trend + mls(x, 0:7, 4, nealmon) + 630 | mls(z, 0:16, 12, nealmon), 631 | start = list(x = c(1, -0.5), z = c(2, 0.5, -0.1)), 632 | Ofunction = "optim", method = "Nelder-Mead") 633 | eq_r2 <- update(eq_r2, Ofunction = "nls") 634 | @ 635 | 636 | The output of the optimization function used can be found by inspecting the element \code{opt} of \code{midas_r} output. 637 | <>= 638 | eq_r2 <- midas_r(y ~ trend + mls(x, 0:7, 4, nealmon) + 639 | mls(z, 0:16, 12, nealmon), 640 | start = list(x = c(1, -0.5), z = c(2, 0.5, -0.1)), 641 | Ofunction = "optim", method = "Nelder-Mead") 642 | eq_r2$opt 643 | @ 644 | Here we observe that the Nelder-Mead algorithm evaluated the cost function 502 times. 645 | 646 | The optimization functions in \proglang{R} report the status of the convergence of optimization algorithm by the numeric constant, 0 indicating succesful convergence. This code is reported as the element \code{convergence} of the \code{midas_r} output. 647 | <>= 648 | eq_r2$convergence 649 | @ 650 | In this case the convergence was not successfull. The help page of the function \code{optim} indicates that convergence code 1 means that the iteration limit was reached. 651 | 652 | In order to improve the convergence it is possible to use user defined gradient functions. To use them it is necessary to define gradient function of the restriction. For example for the \code{nealmon} restriction the gradient function is defined in the following way: 653 | <>= 654 | nealmon_gradient <- function(p, d, m) { 655 | i <- 1:d 656 | pl <- poly(i, degree = length(p) - 1, raw = TRUE) 657 | eplc <- exp(pl %*% p[-1])[, , drop=TRUE] 658 | ds <- apply(pl * eplc, 2, sum) 659 | s <- sum(eplc) 660 | cbind(eplc/s, p[1] * (pl * eplc/s - eplc %*% t(ds)/s^2)) 661 | } 662 | @ 663 | The gradient functions are passed as named list elements via argument \code{weight_gradients} : 664 | <>= 665 | eq_r2 <- midas_r(y ~ trend + mls(x, 0:7, 4, nealmon) + 666 | mls(z, 0:16, 12, nealmon), 667 | start = list(x = c(1, -0.5), z = c(2, 0.5, -0.1)), 668 | weight_gradients=list(nealmon = nealmon_gradient)) 669 | @ 670 | This way \code{midas_r} calculates the exact gradient of the NLS problem ~\eqref{eqNLS} using the specified gradient function of the restriction. For all the types of the restrictions referenced in Table \ref{tab:3} their gradient functions are specified in the package \pkg{midasr}. The naming convention for gradient functions is \code{restriction_name_gradient}. It is not necessary to explicitly pass gradient functions named according to this convention. If \code{weight_gradients} is not \code{NULL} and does not contain the apropriately named element, it is assumed that there exists a gradient function conforming to the gradient naming convention which is then subsequently used. 671 | 672 | The gradient and the hessian of the NLS problem are supplied as the output of \code{midas_r}. The gradient is calculated exactly if appropriate gradients for weight functions are supplied as explained above, otherwise the numerical approximation of the gradient is calculated using the package \pkg{numDeriv} \cite{numderiv}. For hessian the numerical approximation is always used. Having the gradient and hessian calculated allows to check whether the necessary and sufficient conditions for the convergence are satisfied. This is performed by the function \code{deriv_test} which calculates the Euclidean norm of the gradient and the eigenvalues of the hessian. It then tests whether the norm of gradient is close to zero and whether the eigenvalues are positive. 673 | 674 | 675 | <>= 676 | deriv_tests(eq_r, tol = 1e-06) 677 | @ 678 | 679 | To retrieve a vector of constrained estimates $\tilde\theta$ (and, hence, also $\hat\ff=\ff_{\gaga}\big|_{{\gaga}=\widehat{\gaga}}$) which corresponds to the vector ${\thth}$ ({\bebe}, respectively), the function \code{coef} can be used as follows 680 | <>= 681 | coef(eq_r, midas = TRUE) 682 | @ 683 | 684 | In the example provided above, a functional constraint was imposed directly on $\bebe(L)$ terms corresponding to each series without the usage of aggregates. Relying on the relationship \eqref{eq6}, it is always possible to write such an explicit general constraint from an aggregates-based one. For convenience of a user, the function \code{amweights} can be used to form several standard periodic functional constraints with 'typical' restrictions explicated in equation \eqref{eq4}. For instance, 685 | 686 | <>= 687 | amweights(p=c(1,-0.5),d=8,m=4,weight=nealmon,type="C") 688 | @ 689 | with \code{type="C"} corresponds to a fully restricted version of aggregates-based expression \eqref{eq4} apart the cross-restriction on the equality of weighting schemes between different \newline variables/frequencies. Note that the code above repeats the result of 690 | <>= 691 | nealmon(p=c(1,-0.5),d=4) 692 | @ 693 | twice ($d/m=2$), as implied by the number of periods at higher-frequency (\code{d=8}) and the frequency ratio (\code{m=4}). In this way, the function \code{amweights} can be used to define explicitly a new functional constraint relying on the relationship \eqref{eq6}. Alternatively, one can indicate directly within the function \code{midas_r} that the aggregates-based restriction must be used as follows 694 | <>= 695 | eq_r2<-midas_r(y~trend+mls(x,0:7,4,amweights,nealmon,"C")+mls(z,0:16,12,nealmon),start=list(x=c(1,-0.5),z=c(2,0.5,-0.1))) 696 | @ 697 | where the first variable follows and aggregates-based MIDAS restriction scheme. Note that the selection of alternative types "A" and "B" are connected with specifications having a larger number of parameters (see Table \ref{tab:3}), hence the list of starting values needs to be adjusted to account for an increase in the number of (potentially unequal) impact parameters. %To simplify the usage, the starting values of a single period are used for all the lags. %[?? O kodel tas pats nepadaryta ir su poveikio parametrais? Manau, reikia naudoti vienodus ir cia??] 698 | 699 | It should be also noted that, whenever the aggregates-connected restrictions are used, the number of periods must be a multiple of the frequency ratio. For instance, the current lag specification for variable $z$ is not consistent with this requirement and cannot be represented through the (periodic) aggregates, but either \code{mls(z,0:11,12,amweights,nealmon,"C")} or \code{mls(z,0:23,12,amweights,nealmon,"C")} would be valid expressions from the code implementation point of view. 700 | 701 | Table \ref{tab:3} summarizes and provides various other examples of correspondence between \code{midas_r} coding and the analytical specifications of MIDAS regressions. 702 | \begin{sidewaystable}[p] 703 | %\begin{table}[htp] 704 | \begin{center} 705 | \small{ 706 | \begin{tabular}{llll} 707 | \hline\hline 708 | {\bf Description}& {\bf Code example}& {\bf Analytical expression}&{\bf Notes}\\ 709 | %\shortstack{aa \\ 710 | \hline 711 | \parbox[c]{3cm}{Different constraint functions} 712 | &\parbox{7.3cm}{\code{midas\_r(y \~{} mls(x, 0:7, 4, nealmon) + \newline mls(z, 0:16, 12, gompertzp), start = list(x = c(1, -0.5), z = c(1, 0.5, 0.1)))} } 713 | &\parbox[c]{6cm}{$y_t=c+\sum_{j=0}^7\beta_j^{(1)}x_{4t-j}+\newline \sum_{j=0}^{16}\beta_j^{(2)}z_{12t-j}+\e_t$}& \parbox[c]{4.5cm}{Constraints on $\beta_j^{(i)},\ i=1,2$ are given by different functions.}\\ 714 | \hline 715 | \parbox[c]{3cm}{Partial constraint\newline (only on $z$)} 716 | &\parbox{6cm}{\code{midas\_r(y \~{} mls(x, 0:7, 4) + mls(z, 0:16, 12, nealmon), start = list(z = c(1, -0.5)))} } 717 | &\parbox[c]{6cm}{$y_t=c+\sum_{j=0}^7\beta_j^{(1)}x_{4t-j}+\newline \sum_{j=0}^{16}\beta_j^{(2)}z_{12t-j}+\e_t$}& \parbox[c]{4.5cm}{$x$ enters linearly with unconstrained $\beta_j^{(1)}$.}\\ 718 | \hline 719 | \parbox[c]{3cm}{With unrestricted autoregressive terms} 720 | &\parbox{6cm}{\code{midas\_r(y \~{} mls(y, 1:2, 1) + mls(x, 0:7, 4, nealmon), start = list(x = c(1, -0.5)))} } 721 | &\parbox[c]{6cm}{$y_t=c+\sum_{j=1}^{2}\alpha_jy_{t-j}+\newline \sum_{j=0}^7\beta_jx_{4t-j}+\e_t$}& \parbox[c]{4.5cm}{Autoregressive terms enter linearly with unconstrained coefficients.}\\ 722 | \hline 723 | \parbox[c]{3cm}{With a common factor restriction} 724 | &\parbox{6cm}{\code{midas\_r(y \~{} mls(y, 1:2, 1, "*") + mls(x, 0:7, 4, nealmon), start = list(x = c(1, -0.5)))} } 725 | &\parbox[c]{6cm}{$\alpha(B)y_t=c+\alpha(B)\lambda(L) x_{4t}+\e_t,$}& \parbox[c]{4.5cm}{Here coefficients of $\lambda(z)$ are assumed to satisfy nealmon restriction.}\\ 726 | \hline 727 | \parbox[c]{3cm}{With autoregressive parameters restricted by a function} 728 | &\parbox{6cm}{\code{midas\_r(y \~{} mls(y, 1:6, 1, nealmon) + mls(x, 0:7, 4, nealmon), start = list(y = c(1, -0.5), x = c(1, -0.5)))} } 729 | &\parbox[c]{6cm}{$y_t=c+\sum_{j=1}^{6}\alpha_jy_{t-j}+\newline \sum_{j=0}^7\beta_jx_{4t-j}+\e_t$}& \parbox[c]{4.5cm}{Autoregressive parameters $\alpha_j,\ j=1,\dots,6$ are constrained to satisfy nealmon restriction.}\\ 730 | \hline 731 | \parbox[c]{3cm}{Aggregates-based \newline (Case A)} 732 | &\parbox{6cm}{\code{midas\_r(y \~{} mls(x, 0:7, 4, amweights, nealmon, "A"), start = list(x = c(1, 1, 1, -0.5)))} } 733 | &\parbox[c]{6cm}{$y_t=c+\newline \sum_{r=0}^1\lambda_r\sum_{s=1}^{4}w(\dede_{r};s)x_{4(t-1-r)+s}+\e_t$} 734 | & \parbox[c]{4.5cm}{The same weighting scheme (not parameters) is used in aggregation.}\\ 735 | \hline 736 | \parbox[c]{3cm}{Aggregates-based \newline (Case B)} 737 | &\parbox{6cm}{\code{midas\_r(y \~{} mls(x, 0:7, 4, amweights, nealmon, "B"), start = list(x = c(1, 1, -0.5)))} } 738 | &\parbox[c]{6cm}{$y_t=c+\newline \sum_{r=0}^1 \lambda_r\sum_{s=1}^{4}w(\dede;s)x_{4(t-1-r)+s}+\e_t$} 739 | & \parbox[c]{4.5cm}{The same weights are used in aggregation.}\\ 740 | \hline 741 | \parbox[c]{3cm}{Aggregates-based \newline (Case C)} 742 | &\parbox{6cm}{\code{midas\_r(y \~{} mls(x, 0:7, 4, amweights, nealmon, "C"), start = list(x = c(1, -0.5)))} } 743 | &\parbox[c]{6cm}{$y_t=c+\newline \lambda\sum_{r=0}^1 \sum_{s=1}^{4}w(\dede;s)x_{4(t-1-r)+s}+\e_t$} 744 | & \parbox[c]{4.5cm}{A common impact parameter of lags and the same weights are used in aggregation.}\\ 745 | \hline 746 | \parbox[c]{3cm}{With a user-defined constraint} 747 | &\parbox{6cm}{\code{midas\_r(y \~{} mls(x, 0:101, 4, fn), start = list(x = c(0, 0)))} } 748 | &\parbox[c]{6cm}{$y_t=c+\sum_{j=0}^{101}\beta_jx_{4t-j}+\e_t$, \newline $\beta_j=\gamma_1(j+1)^{\gamma_2},\ j=0,1,\dots,101.$}& 749 | \parbox[c]{4.5cm}{User defined function: \code{fn <- function(p, d) p[1] * c(1:d)\^{}p[2]}.}\\ 750 | \hline\hline 751 | \end{tabular} 752 | } 753 | \caption{A non-extensive list of possible specifications of the MIDAS regression in the \pkg{midasr} package.} 754 | \label{tab:3} 755 | \end{center} 756 | %\end{table} 757 | \end{sidewaystable} 758 | 759 | \subsection{Adequacy testing of restrictions} 760 | Given a MIDAS regression model estimated with \code{midas_r}, the empirical adequacy of the functional restrictions can be tested under quite standard assumptions (see \citealp{kvedaras:2012} and \citealp{kvedaras:2013}) using functions \code{hAh_test} and \code{hAhr_test} of the package. In the case of a stationary series $\{y_t\}$ they can be applied directly, whereas whenever $\{y_t\}$ is cointegrated with explanatory variables, a special transformation needs to be applied before the testing (see e.g., \citealp{bilinskas:2013}). The \code{hAh_test} can be used whenever errors of the process are independently and identically distributed, whereas the \code{hAhr_test} uses a HAC-robust version of the test. We should just point out that, whenever no significant HAC in the residuals are observed, we would suggest using \code{hAh_test} which would then have more precise test sizes in small samples. In the case of integrated series $\{y_t\}$ which is co-integrated with explanatory variables, some other alternatives are available (see \citealp{kvedaras:2013b}). 761 | 762 | For illustration, let us use the name \code{eq_r} of an estimated model as in the previous subsections. Then the functions produce, respectively, 763 | <>= 764 | hAh_test(eq_r) 765 | hAhr_test(eq_r) 766 | @ 767 | Here the value of a test statistic, the degrees of freedom (the number of binding constraints on parameters in equation \eqref{eq1}), and the empirical significance of the null hypothesis that a functional constraint is adequate are reported. 768 | 769 | As can be seen, such a specification, which in fact corresponds to the underlying DGP, cannot be rejected at the usual significance levels, whereas e.g., reducing the number of parameters of functional constraint of variable $z$ to only two instead of three is quite strongly rejected using either version of the test: 770 | <>= 771 | eq_rb <- midas_r(y ~ trend + mls(x, 0:7, 4, nealmon) + 772 | mls(z, 0:12, 12, nealmon), 773 | start = list(x = c(1, -0.5), z = c(2, -0.1))) 774 | hAh_test(eq_rb) 775 | hAhr_test(eq_rb) 776 | @ 777 | 778 | %[Perleidus MC empirinis reiksmingumas hAhr prie H0 neatrodo gerai lyginant su ant taip pat ivertinto modelio gautais hAh. Kazkur sedi problema.] - viskas ok prie dideliu imciu, pvz., n=2000. 779 | 780 | Whenever the empirical adequacy cannot be reject at some appropriate level of significance for a couple of models, we could further rely on information criteria to make the selection of the best candidate(s). 781 | 782 | \subsection{Model selection} 783 | Suppose that we want to investigate which out of several functional constraints---for instance, the normalized ("nealmon") or non-normalized ("almonp") exponential Almon lag polynomials, or with polynomial of order 2 or 3, and so on---are better suited in a MIDAS regression model of $y$ on $x$ and $z$ (possibly different for each variable). Since the best maximum number of lags can differ with a functional constraint and/or variable/frequency, let us first define using the \pkg{midasr} function \code{expand_weights_lags} the sets of potential models corresponding to each explanatory variable as follows % and the autoregressive terms of $y$ ; 784 | <>= 785 | set_x <- expand_weights_lags(weights = c("nealmon", "almonp"), 786 | from = 0, to = c(5, 10), m = 1, 787 | start = list(nealmon = c(1, -1), 788 | almonp = c(1, 0, 0))) 789 | set_z <- expand_weights_lags(c("nealmon", "nealmon"), 790 | 0, c(10, 20), 1, 791 | start = list(nealmon = c(1, -1), 792 | nealmon = c(1, -1, 0))) 793 | @ 794 | Here, for each variable, vector (or list) \code{weights} defines the potential restrictions to be considered and a list \code{start} gives the appropriate starting values defining implicitly the number of parameters per a function. 795 | 796 | The potential lag structures are given by the following ranges of high-frequency lags: from [\code{from}; \code{m}$*\min$(\code{to})] to [\code{from}; \code{m}$*\max$(\code{to})]. When aggregates-based modeling is involved using \code{amweights} in \code{midas_r}, \code{m} can be set to the frequency ratio which ensures that the considered models (lag structures) are multiples of it. Otherwise, we would recommend to operate with high-frequency lag structures without changing the default value $m=1.$ 797 | Then, the set of potential models is defined as all possible different combinations of functions and lag structures with a corresponding set of starting values. A simple example bellow illustrates the result in order to reveal the underlying structure, which, besides the understanding of it, is otherwise not needed for a user. 798 | <>= 799 | expand_weights_lags(weights = c("nealmon", "nbeta"), 800 | from = 1, to = c(2, 3), m = 1, 801 | start = list(nealmon = c(1, -1), 802 | nbeta = rep(0.5, 3))) 803 | @ 804 | Given the sets of potential specifications for each variable as defined above, the estimation of all the models is performed by 805 | 806 | <>= 807 | eqs_ic<-midas_r_ic_table(y~trend+mls(x,0,m=4)+fmls(z,0,m=12),table=list(z=set_z,x=set_x)) 808 | @ 809 | The function \code{midas_r_ic_table} returns 810 | a summary table of all models together with the corresponding values of the usual information criteria and the empirical sizes of adequacy testing of functional restrictions of parameters. The result of derivative tests and the convergence status of the optimization function is also returned. 811 | 812 | The summary table is a \code{data.frame} where each row corresponds to candidate model, so this table can be manipulated in the usual \proglang{R} way. The table can be accessed as \code{table} element of the list returned by \code{midas_r_ic_table}. The list of fitted \code{midas_r} objects of all candidate models can be accessed as \code{candlist} element. It is possible to inspect each candidate model and fine-tune its convergence if necessary. 813 | 814 | <>= 815 | eqs_ic$candlist[[5]] <- update(eqs_ic$candlist[[5]],Ofunction="nls") 816 | @ 817 | 818 | The summary table can be recalculated by using the \code{update} method for \code{midas_r_ic_table}. This function then recalculates all the necessary statistics. 819 | <>= 820 | eqs_ic <- update(eqs_ic) 821 | @ 822 | It should be pointed out that there is no need to provide the weighting function nor a specific lag order in the \code{mls} functions in a call to \code{midas_r_ic_table}, since they are defined by the respective potential sets of models under option \code{table}. Any provided values with \code{mls} (or other similar functions) are over-written by those defined in \code{table}. 823 | 824 | Finally, the best model in terms of a selected information criterion in a restricted or unrestricted model then is simply obtained by using 825 | <>= 826 | modsel(eqs_ic,IC="AIC",type="restricted") 827 | @ 828 | which also prints the usual summary statistics as well as the testing of adequacy of the applied functional restriction using, by default, the \code{hAh_test}. A word of caution is needed here to remind that, as is typical, the empirical size of a test corresponding to a complex model-selection procedure might not correspond directly to a nominal one of a single-step estimation. 829 | 830 | %For instance, whenever the data are generated by a DGP as was defined in subsection ???, and supposing that the lag orders where specified correctly (were known in advance) while it is only uncertain whether the normalized exponential Almon lag polynomial with two parameters or an unrestricted one with three parameters should be applied, we get the following result 831 | %\begin{verbatim} 832 | %\end{verbatim} 833 | %which chooses the correct specification of the DGP. 834 | 835 | %???ghysels\_table??? 836 | 837 | \subsection{Forecasting} 838 | 839 | Conditional forecasting (with prediction intervals, etc.) using unrestricted U-MIDAS regression models which are estimated using \code{lm} can be performed using standard \proglang{R} functions e.g., \code{predict.lm}. Conditional point prediction given a specific model is also possible relying on a standard \code{predict} function. 840 | 841 | The function \code{predict} works similarly to \code{predict.lm}. It takes the new data, transforms it into an appropriate matrix and multiplies it with the coefficients. Suppose we want to produce the forecast $\hat{y}_{T+1|T}$ for the model \eqref{eq9}. To produce this forecast we need the data $x_{4(T+1)},...,x_{4T-3} 842 | $ and $z_{12(T+1)},...,z_{12T-4}.$ It would be tedious to calculate precisely the required data each time we want to perform a forecasting exercise. To alleviate this problem the package \pkg{midasr} provides the function \code{forecast}. This function assumes that the model was estimated with the data up to low frequency index $T.$ It is then assumed that the new data is the data after the low frequency $T$ and then calculates the appropriate forecast. For example suppose that we have new data for one low frequency period for the model \eqref{eq9}. Here is how the forecast for one period would look like: 843 | 844 | <>= 845 | newx<-rnorm(4) 846 | newz<-rnorm(12) 847 | forecast(eq_rb,newdata=list(x=newx,z=newz,trend=251)) 848 | @ 849 | 850 | It is also common to estimate models which do not require new data for forecasting 851 | \begin{align*} 852 | y_{t+\ell}&=2+0.1t+\sum_{j=0}^7\beta_j^{(1)}x_{4t-j}+\sum_{j=0}^{16}\beta_j^{(2)}z_{12t-j}+\e_{t+\ell}, 853 | \end{align*} 854 | where $\ell$ is the desired forecasting horizon. This model can be rewritten as 855 | \begin{align*} 856 | y_{t}&=2+0.1(t-\ell)+\sum_{j=4\ell}^{7+4\ell}\beta_j^{(1)}x_{4t-j}+\sum_{j=12\ell}^{16+12\ell}\beta_j^{(2)}z_{12t-j}+\e_t, 857 | \end{align*} 858 | and can be estimated using \code{midas_r}. For such a model we can get forecasts $\hat{y}_{T+\ell|T},...,\hat{y}_{T+1|T}$ using the explanatory variable data up to low frequency index $T.$ To obtain these forecasts using the function \code{forecast} we need to supply \code{NA} values for explanatory variables. An example for $\ell=1$ is as follows: 859 | <>= 860 | eq_f <- midas_r(y~trend+mls(x,4+0:7,4,nealmon)+mls(z,12+0:16,12,nealmon), 861 | start=list(x=c(1,-0.5),z=c(2,0.5,-0.1))) 862 | forecast(eq_f,newdata=list(x=rep(NA,4),z=rep(NA,12),trend=251)) 863 | @ 864 | Note that we still need to specify a value for the trend. 865 | 866 | In addition, the package \pkg{midasr} provides a general flexible environment for out-of-sample prediction, forecast combination, and evaluation of restricted MIDAS regression models using the function \code{select_and_forecast}. If exact models were known for different forecasting horizons, it can also be used just to report various in- and out-of-sample prediction characteristics of the models. In the general case, it also performs an automatic selection of the best models for each forecasting horizon from a set of potential specifications defined by all combinations of functional restrictions and lag orders to be considered, and produces forecast combinations according to a specified forecast weighting scheme. 867 | 868 | 869 | In general, the definition of potential models in the function \code{select_and_forecast} is similar to that one uses in the model selection analysis described in the previous section. However, different best performing specifications are most likely related with each low-frequency forecasting horizon $\ell=0,1,2,\dots.$ Therefore the set of potential models (parameter restriction functions and lag orders) to be considered for each horizon needs to be defined. 870 | 871 | Suppose that, as in the previous examples, we have variables $x$ and $z$ with frequency ratios $m_1=4$ and $m_2=12,$ respectively. Suppose that we intend to consider forecasting of $y$ up to three low-frequency periods $\ell\in\{1,2,3\}$ ahead. It should be noted that, in terms of high-frequency periods, they correspond to $\ell m_1\in\{4,8,12\}$ for variable $x,$ and $\ell m_2\in\{12,24,36\}$ for variable $z.$ Thus these variable-specific vectors define the lowest lags\footnote{Including lags smaller than that would imply that more information on explanatory variables is available and, in fact, $\ell-1$ forecasting horizon is actually under consideration.} of high-frequency period to be considered for each variable in the respective forecasting model (option \code{from} in the function \code{select_and_forecast}). Suppose further that in all the models we want to consider specifications having not less than 10 high-frequency lags and not more than 15 for each variable. This defines the maximum high-frequency lag of all potential models considered for each low-frequency horizon period $\ell\in\{1,2,3\}.$ Hence, for each variable, three corresponding pairs $(\ell m_1+10,\ell m_1+15),\ \ell\in\{1,2,3\}$ will define the upper bounds of ranges to be considered (option \code{to} in the function \code{select_and_forecast}). 872 | For instance, for variable $x,$ three pairs $(14,19),(18,23),$ and $(22,27)$ correspond to $\ell=1,2,$ and $3$ and together with that defined in option \code{from} (see \code{x=(4,8,12)}) imply that the following ranges of potential models will be under consideration for variable $x$: 873 | \begin{itemize} 874 | \item[$\ell=1:$] from $[4-14]$ to $[4-19],$ 875 | \item[$\ell=2:$] from $[8-18]$ to $[8-23],$ 876 | \item[$\ell=3:$] from $[12-22]$ to $[12-27].$ 877 | \end{itemize} 878 | 879 | The other options of the function \code{select_and_forecast} are options of functions\newline \code{midas_r_ic_table}, \code{modsel} and \code{average_forecast}. 880 | 881 | <>= 882 | cbfc <- select_and_forecast( 883 | y ~ trend + mls(x, 0, 4) + mls(z, 0, 12), 884 | from = list(x = c(4, 8, 12), z = c(12, 24, 36)), 885 | to = list(x = rbind(c(14, 19), c(18, 23), c(22, 27)), 886 | z = rbind(c(22, 27), c(34, 39), c(46, 51))), 887 | insample = 1:200, outsample = 201:250, 888 | weights = list(x = c("nealmon", "almonp"), z = c("nealmon", "almonp")), 889 | wstart = list(nealmon = rep(1, 3), almonp = rep(1, 3)), 890 | IC = "AIC", seltype = "restricted", ftype = "fixed", 891 | measures = c("MSE", "MAPE", "MASE"), 892 | fweights = c("EW", "BICW", "MSFE", "DMSFE")) 893 | @ 894 | The names of weighting schemes are taken from MIDAS \proglang{MATLAB} toolbox \cite{ghysels:2013}. Similarly forecasting using rolling and recursive model estimation samples defined therein \cite{ghysels:2013} is supported by setting option \code{seltype = "rolling"} or\newline \code{seltype = "recursive"}. 895 | 896 | Then, among others, 897 | <>= 898 | cbfc$accuracy$individual 899 | cbfc$accuracy$average 900 | @ 901 | report, respectively: 902 | \begin{itemize} 903 | \item the best forecasting equations (in terms of a specified criterion out of the above-defined potential specifications), and their in- and out-of-sample forecasting precision measures for each forecasting horizon; 904 | \item the out-of-sample precision of forecast combinations for each forecasting horizon. 905 | %\item the best models for each forecasting horizon. 906 | \end{itemize} 907 | 908 | The above example illustrated a general usage of the function \code{select_and_forecast} including selection of best models. Now suppose that a user is only interested in evaluating a one step ahead forecasting performance of a given model. Suppose further that he/she a priori knows that the best specifications to be used for this forecasting horizon $\ell=1$ is with 909 | \begin{itemize} 910 | \item \code{mls(x,4:12,4,nealmon)} with parameters \code{x=c(2,10,1,-0.1)} (the first one representing an impact parameter and the last three being the parameters of the normalized weighting function), and 911 | \item \code{mls(z,12:20,12,nealmon)} with parameters \code{z=c(-1,2,-0.1)} i.e., with one parameter less in the weighting function. 912 | \end{itemize} 913 | Given already preselected and evaluated models, user can use the function \code{average_forecast} to evaluate the forecasting performance. To use this function at first it is necessary to fit the model and then pass it to function \code{average_forecast} specifying the in-sample and out-of-sample data, accuracy measures and weighting scheme in a similar manner to\newline \code{select_and_forecast} 914 | <>= 915 | mod1 <- midas_r(y ~ trend + mls(x, 4:14, 4, nealmon) + 916 | mls(z, 12:22, 12, nealmon), 917 | start = list(x = c(10, 1, -0.1), z = c(2, -0.1))) 918 | avgf <- average_forecast(list(mod1), 919 | data = list(y = y, x = x, z = z, trend = trend), 920 | insample = 1:200, outsample = 201:250, 921 | type = "fixed", 922 | measures = c("MSE", "MAPE", "MASE"), 923 | fweights = c("EW", "BICW", "MSFE", "DMSFE")) 924 | @ 925 | 926 | It should also be pointed out that the forecast combinations in the function \newline\code{select_and_forecast} are obtained only from the forecasts linked to different restriction functions on parameters. The forecasts related to different lag specifications are not combined, but the best lag order is chosen in terms of a given information criterion. 927 | If there is a need to get forecast combinations for a group of models which the user selected using other criteria, the function \code{average_forecast} should be used in a manner outlined in the previous example. 928 | 929 | \section{Empirical illustrations} 930 | 931 | \subsection{Forecasting GDP growth} 932 | We replicate the example provided in \cite{ghysels:2013}. In particular we run MIDAS regression to forecast quarterly GDP growth with monthly non-farms payroll employment growth. The forecasting equation is the following 933 | \begin{align*} 934 | y_{t+1} = \alpha+\rho y_{t}+\sum_{j = 0}^8\theta_jx_{3t-j}+\varepsilon_t, 935 | \end{align*} 936 | where $y_t$ is the log difference of quarterly seasonally adjusted real US GDP and $x_{3t}$ is the log difference of monthly total employment non-farms payroll. The data is taken from St. Louis FRED website. 937 | 938 | First we load the data and perform necessary transformations. 939 | <>= 940 | data("USqgdp") 941 | data("USpayems") 942 | y <- window(USqgdp, end = c(2011, 2)) 943 | x <- window(USpayems, end = c(2011, 7)) 944 | 945 | yg <- diff(log(y))*100 946 | xg <- diff(log(x))*100 947 | 948 | nx <- ts(c(NA, xg, NA, NA), start = start(x), frequency = 12) 949 | ny <- ts(c(rep(NA, 33), yg, NA), start = start(x), frequency = 4) 950 | @ 951 | The last two lines are needed to equalize the sample sizes, which are different in the original data. We simply add additional \code{NA} values at the beginning and the end of the data. The graphical representation of the data is shown in Figure \ref{fig:ghysels}. 952 | 953 | \begin{figure}[tp] 954 | <>= 955 | plot.ts(nx, xlab = "Time", ylab = "Percentages", col = 4, ylim = c(-5, 6)) 956 | lines(ny, col = 2) 957 | @ 958 | \caption{A plot of time series of quaterly gross domestic product growth rates and monthly non-farm payroll employment growth rates.} 959 | \label{fig:ghysels} 960 | \end{figure} 961 | To specify the model for the \code{midas_r} function we rewrite it in the following equivalent form: 962 | \begin{align*} 963 | y_t = \alpha+\rho y_{t-1}+\sum_{j = 3}^{11}\theta_jx_{3t-j}+\varepsilon_t, 964 | \end{align*} 965 | As in \cite{ghysels:2013} we restrict the estimation sample from the first quarter of 1985 to the first quarter of 2009. We evaluate the models with the Beta polynomial, Beta with non-zero and U-MIDAS weight specifications. 966 | <>= 967 | xx <- window(nx,start=c(1985,1),end=c(2009,3)) 968 | yy <- window(ny,start=c(1985,1),end=c(2009,1)) 969 | 970 | beta0 <- midas_r(yy~mls(yy,1,1)+mls(xx,3:11,3,nbeta), 971 | start=list(xx=c(1.7,1,5))) 972 | 973 | coef(beta0) 974 | 975 | betan <- midas_r(yy~mls(yy,1,1)+mls(xx,3:11,3,nbetaMT), 976 | start=list(xx=c(2,1,5,0))) 977 | coef(betan) 978 | 979 | um <- midas_r(yy~mls(yy,1,1)+mls(xx,3:11,3),start=NULL) 980 | coef(um) 981 | 982 | @ 983 | We can evaluate the forecasting performance of these three models on the out of sample data, containing 9 quarters, from 2009Q2 to 2011Q2 984 | 985 | <>= 986 | fulldata <- list(xx = window(nx, start = c(1985, 1), end = c(2011, 6)), 987 | yy = window(ny, start = c(1985, 1), end = c(2011, 2))) 988 | insample <- 1:length(yy) 989 | outsample <- (1:length(fulldata$yy))[-insample] 990 | 991 | avgf<-average_forecast(list(beta0, betan, um), 992 | data = fulldata, 993 | insample = insample, 994 | outsample = outsample) 995 | sqrt(avgf$accuracy$individual$MSE.out.of.sample) 996 | @ 997 | We see that the unrestricted MIDAS regression model gives the best out-of-sample RMSE. 998 | %Again the reported RMSE correspond to the ones in \cite{ghysels:2013}. 999 | 1000 | \subsection{Forecasting realized volatility} 1001 | 1002 | As another demonstration we use the package \pkg{midasr} to forecast the daily realized volatility. A simple model for forecasting the daily realized volatility was proposed by \cite{corsi2009simple}. The heterogeneous 1003 | autoregressive model of realized volatility (HAR-RV) is defined as 1004 | \begin{align*} 1005 | RV_{t+1}^{(d)}=c+\beta^{(d)}RV_t^{(d)}+\beta^{(w)}RV_{t}^{(w)}+\beta^{(m)}RV_t^{(m)}+w_{t+1}, 1006 | \end{align*} 1007 | where $RV_t$ is the daily realized volatility and $RV_{t}^{(w)}$ and $RV_t^{(m)}$ are weekly and monthly averages: 1008 | \begin{align*} 1009 | RV_{t}^{(w)}&=\frac{1}{5}\left(RV_{t}^{(d)}+RV_{t-1}^{(d)}+...+RV_{t-4}^{(d)}\right)\\ 1010 | RV_{t}^{(m)}&=\frac{1}{20}\left(RV_{t}^{(d)}+RV_{t-1}^{(d)}+...+RV_{t-19}^{(d)}\right), 1011 | \end{align*} 1012 | where we assume that a week has 5 days, and a month has 4 weeks. This model is a special case of a MIDAS regression: 1013 | \begin{align*} 1014 | RV_{t+1}^{(d)}=c+\sum_{j=0}^{19}\beta_jRV_{t-j}^{(d)}+w_{t+1}, 1015 | \end{align*} 1016 | where 1017 | \begin{align*} 1018 | \beta_{j}=\begin{cases} 1019 | \beta^{(d)}+\frac{1}{5}\beta^{(w)}+\frac{1}{20}\beta^{(m)}, \text{ for } j=0,\\ 1020 | \frac{1}{5}\beta^{(w)}+\frac{1}{20}\beta^{(m)}, \text{ for } j=1,..,4,\\ 1021 | \frac{1}{20}\beta^{(m)}, \text{ for } j=5,...,19. 1022 | \end{cases} 1023 | \end{align*} 1024 | The corresponding R code is the following 1025 | <>= 1026 | harstep <- function(p,d,m) { 1027 | if(d!=20) stop("HAR(3)-RV process requires 20 lags") 1028 | out <- rep(0,20) 1029 | out[1] <- p[1]+p[2]/5+p[3]/20 1030 | out[2:5] <- p[2]/5+p[3]/20 1031 | out[6:20] <- p[3]/20 1032 | out 1033 | } 1034 | @ 1035 | 1036 | For empirical demonstration we use the realized variance data on stock 1037 | indices provided by \cite{oxmandata}. 1038 | 1039 | We estimate the model for the annualized realized volatility of the S\&P500 index, which is based on 5-minute returns data. 1040 | <>= 1041 | data("rvsp500") 1042 | spx2_rvol <- 100*sqrt(252*as.numeric(rvsp500[,"SPX2.rv"])) 1043 | mh <- midas_r(rv~mls(rv,1:20,1,harstep),data=list(rv=spx2_rvol),start=list(rv=c(1,1,1))) 1044 | summary(mh) 1045 | @ 1046 | For comparison we also estimate the model with the normalized exponential Almon weights 1047 | 1048 | <>= 1049 | mr <- midas_r(rv~mls(rv,1:20,1,nealmon), data=list(rv=spx2_rvol),start=list(rv=c(0,0,0)), 1050 | weight_gradients = list()) 1051 | summary(mr) 1052 | @ 1053 | We can test which of these restrictions is compatible with the data using the heteroscedasticity and autocorrelation robust weight specification 1054 | test \code{hAhr_test}. 1055 | <>= 1056 | hAhr_test(mh) 1057 | hAhr_test(mr) 1058 | @ 1059 | We can see that the null hypothesis pertaining to the HAR-RV-implied constraints in the MIDAS regression model is rejected at the 0.05 significance level, whereas the null hypothesis that the exponential Almon lag restriction is adequate, cannot be rejected. 1060 | 1061 | Figure \ref{fig:figHAR} illustrates the coefficients of the fitted 1062 | MIDAS regressions and the coefficients of U-MIDAS 1063 | regression with their corresponding 95\% confidence bounds. 1064 | 1065 | \begin{figure}[tbp] 1066 | <>= 1067 | plot_midas_coef(mh, title = "") 1068 | lines(0:19,coef(mr, midas = TRUE, term_names = "rv"), col = 3) 1069 | @ 1070 | \caption{Comparison of HAR-RV (blue), Nealmon (green) and U-MIDAS 1071 | (black) models.} 1072 | \label{fig:figHAR} 1073 | \end{figure} 1074 | 1075 | For the exponential Almon lag specification we can choose the number 1076 | of lags via AIC or BIC. 1077 | 1078 | <>= 1079 | tb <- expand_weights_lags("nealmon", from = 1, to = c(5, 15), 1080 | start = list(nealmon = c(0, 0, 0))) 1081 | mtb <- midas_r_ic_table(rv ~ mls(rv, 1:20, 1, nealmon), 1082 | data = list(rv = spx2_rvol), table = list(rv = tb), 1083 | test = "hAh_test", 1084 | weight_gradients = list(), show_progress = FALSE) 1085 | mtb$candlist <- lapply(mtb$candlist, update, Ofunction = "nls") 1086 | mtb$test <- "hAhr_test" 1087 | mtb <- update(mtb) 1088 | @ 1089 | Here we used two optimization methods to improve the convergence. The function \newline\code{midas_r_ic_table} applies the \code{test} function for each candidate model. The function \newline\code{hAhr_test} takes a lot of computing time, especialy for models with larger number of lags, so we calculate it only for the second final step, and we restrict the number of lags choose from. The AIC selects the model with 9 lags: 1090 | 1091 | <>= 1092 | bm <- modsel(mtb) 1093 | @ 1094 | The HAC robust version of \code{hAh_test} again does not reject the null 1095 | hypothesis of the exponential Almon lag specifications. 1096 | 1097 | <>= 1098 | bm <- update(bm,Ofunction="optim",method="BFGS") 1099 | @ 1100 | 1101 | We can look into the forecast performance of both models, using a 1102 | rolling forecast with 1000 observation window. For comparison we also 1103 | calculate the forecasts of an unrestricted AR(20) model. 1104 | 1105 | <>= 1106 | ar20 <- midas_r(rv ~ mls(rv, 1:20, 1), data = list(rv = spx2_rvol), 1107 | start = NULL) 1108 | forc <- average_forecast(list(ar20, mh, bm), 1109 | data = list(rv = spx2_rvol), 1110 | insample = 1:1000, outsample = 1001:1100, 1111 | type = "rolling", show_progress = FALSE) 1112 | forc$accuracy$individual 1113 | @ 1114 | We see that exponential Almon lag model slightly outperforms the 1115 | HAR-RV model and both models outperform the AR(20) model. 1116 | 1117 | 1118 | \section{Final remarks} 1119 | 1120 | Only a part of the available functionality of the discussed functions of the package \pkg{midasr} was discussed. As it is usual in \proglang{R}, much more information on the resulting objects and all the information on the package-specific functions can be retrieved the using generic functions \code{objects} and \code{?}, respectively. Furthermore, in order to save space, the coding examples provided were almost always presented with minimal accompanying output obtained after running the code. The package page \href{http://mpiktas.github.io/midasr} contains all the codes and complete output together with some additional illustration of the functionality of the package. Other information with a list of the functions and a number of demonstration codes is accessible using the usual \code{??midasr}. 1121 | 1122 | \section{Appendix} 1123 | 1124 | The Figure \ref{fig:fig1} was created using Monte-Carlo simulation. The following DGP was used 1125 | \begin{align*} 1126 | y_t&=2+0.1t+\sum_{j = 0}^{16}\beta_jz_{12t-j}+u_t,\text{ } 1127 | z_\tau\sim N(0,\sigma^2),\text{ } u_t\sim N(0,\sigma^2), 1128 | \end{align*} 1129 | where $z_\tau$ and $u_t$ are independent. 1130 | 1131 | The coefficients $\beta_j$ were chosen to come from the normalized exponential Almon polynomial restriction: 1132 | <>= 1133 | nealmon(p=c(2,0.5,-0.1),d=17) 1134 | @ 1135 | The data for this DGP was generated for low frequency sample sizes 50, 100, 200, 300, 500, 750 and 1000. For each sample size an additional out-of-sample data set was generated using a quarter of the size of an in-sample data set. Three MIDAS regression models were estimated using in-sample data set: an unrestricted MIDAS, a restricted one using the correct constraint from the DGP, and the one with an incorrect restriction (non-exponential Almon polynomial). The forecast was calculated using the out-of-sample data-set. The Euclidean distance between the model coefficients and the coefficients of the DGP was recorded together with the mean squared error of the forecast. 1136 | 1137 | This process was repeated 1000 times. The points in the figure are the averages of the replications. The code for generating the data can be found in the help page of a dataset \code{oos_prec} in the package \pkg{midasr}. 1138 | 1139 | %\bibliographystyle{jss} 1140 | \bibliography{guideR} 1141 | 1142 | \end{document} 1143 | --------------------------------------------------------------------------------