├── .Rbuildignore ├── .gitignore ├── .travis.yml ├── DESCRIPTION ├── LICENSE.md ├── NAMESPACE ├── OOS.Rproj ├── R ├── data_preparation.R ├── external_imports.R ├── forecast_chart.R ├── forecast_combinations.R ├── forecast_metrics.R ├── forecast_multivariate.R └── forecast_univariate.R ├── README.md ├── codecov.yml ├── cran-comments.md ├── man ├── NBest.Rd ├── chart_forecast.Rd ├── chart_forecast_error.Rd ├── data_impute.Rd ├── data_outliers.Rd ├── data_reduction.Rd ├── data_subset.Rd ├── forecast_accuracy.Rd ├── forecast_combine.Rd ├── forecast_comparison.Rd ├── forecast_date.Rd ├── forecast_multivariate.Rd ├── forecast_univariate.Rd ├── instantiate.data_impute.control_panel.Rd ├── instantiate.forecast_combinations.control_panel.Rd ├── instantiate.forecast_multivariate.ml.control_panel.Rd ├── instantiate.forecast_multivariate.var.control_panel.Rd ├── instantiate.forecast_univariate.control_panel.Rd ├── loss_function.Rd ├── n.lag.Rd ├── pipe.Rd ├── standardize.Rd └── winsorize.Rd ├── tests ├── testthat.R └── testthat │ ├── test-forecast_chart.R │ ├── test-forecast_combination.R │ ├── test-forecast_metrics.R │ ├── test-forecast_multivariate.R │ └── test-forecast_univariate.R └── vignettes ├── .gitignore └── basic_introduction.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^\.github 4 | ^articles$ 5 | ^docs$ 6 | ^codecov\.yml$ 7 | ^.travis\.yml$ 8 | ^LICENSE\.md$ 9 | ^cran-comments\.md$ 10 | ^to-do\.md$ 11 | ^CRAN-RELEASE$ 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | *.csv 6 | to-do.md 7 | to-do.html 8 | inst/doc 9 | CRAN-RELEASE* 10 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: r 2 | 3 | sudo: required 4 | 5 | env: _R_CHECK_CRAN_INCOMING_=FALSE 6 | 7 | r_packages: 8 | - covr 9 | - devtools 10 | 11 | r_github_packages: 12 | - tylerJPike/OOS 13 | 14 | after_success: 15 | - Rscript -e 'covr::codecov(token = "eabccb13-c362-4955-9ee2-34cfb28f0c29")' 16 | - Rscript -e 'devtools::check()' -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: OOS 2 | Title: Out-of-Sample Time Series Forecasting 3 | Version: 1.0.0 4 | Authors@R: 5 | person(given = "Tyler J.", 6 | family = "Pike", 7 | role = c("aut", "cre"), 8 | email = "tjpike7@gmail.com") 9 | Description: A comprehensive and cohesive API for the out-of-sample forecasting workflow: 10 | data preparation, forecasting - including both traditional econometric time series models and 11 | modern machine learning techniques - forecast combination, model and error analysis, and 12 | forecast visualization. 13 | License: GPL-3 14 | URL: https://github.com/tylerJPike/OOS, 15 | https://tylerjpike.github.io/OOS/ 16 | BugReports: https://github.com/tylerJPike/OOS/issues 17 | Encoding: UTF-8 18 | LazyData: true 19 | Roxygen: list(markdown = TRUE) 20 | RoxygenNote: 7.1.1 21 | VignetteBuilder: knitr 22 | Depends: 23 | R (>= 4.0.0) 24 | Imports: 25 | caret, 26 | dplyr, 27 | forecast, 28 | furrr, 29 | future, 30 | ggplot2, 31 | glmnet, 32 | imputeTS, 33 | lmtest, 34 | lubridate, 35 | magrittr, 36 | purrr, 37 | sandwich, 38 | stats, 39 | tidyr, 40 | vars, 41 | xts, 42 | zoo 43 | Suggests: 44 | knitr, 45 | testthat, 46 | rmarkdown, 47 | quantmod 48 | 49 | 50 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | GNU General Public License 2 | ========================== 3 | 4 | _Version 3, 29 June 2007_ 5 | _Copyright © 2007 Free Software Foundation, Inc. <>_ 6 | 7 | Everyone is permitted to copy and distribute verbatim copies of this license 8 | document, but changing it is not allowed. 9 | 10 | ## Preamble 11 | 12 | The GNU General Public License is a free, copyleft license for software and other 13 | kinds of works. 14 | 15 | The licenses for most software and other practical works are designed to take away 16 | your freedom to share and change the works. By contrast, the GNU General Public 17 | License is intended to guarantee your freedom to share and change all versions of a 18 | program--to make sure it remains free software for all its users. We, the Free 19 | Software Foundation, use the GNU General Public License for most of our software; it 20 | applies also to any other work released this way by its authors. You can apply it to 21 | your programs, too. 22 | 23 | When we speak of free software, we are referring to freedom, not price. Our General 24 | Public Licenses are designed to make sure that you have the freedom to distribute 25 | copies of free software (and charge for them if you wish), that you receive source 26 | code or can get it if you want it, that you can change the software or use pieces of 27 | it in new free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you these rights or 30 | asking you to surrender the rights. Therefore, you have certain responsibilities if 31 | you distribute copies of the software, or if you modify it: responsibilities to 32 | respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether gratis or for a fee, 35 | you must pass on to the recipients the same freedoms that you received. You must make 36 | sure that they, too, receive or can get the source code. And you must show them these 37 | terms so they know their rights. 38 | 39 | Developers that use the GNU GPL protect your rights with two steps: **(1)** assert 40 | copyright on the software, and **(2)** offer you this License giving you legal permission 41 | to copy, distribute and/or modify it. 42 | 43 | For the developers' and authors' protection, the GPL clearly explains that there is 44 | no warranty for this free software. For both users' and authors' sake, the GPL 45 | requires that modified versions be marked as changed, so that their problems will not 46 | be attributed erroneously to authors of previous versions. 47 | 48 | Some devices are designed to deny users access to install or run modified versions of 49 | the software inside them, although the manufacturer can do so. This is fundamentally 50 | incompatible with the aim of protecting users' freedom to change the software. The 51 | systematic pattern of such abuse occurs in the area of products for individuals to 52 | use, which is precisely where it is most unacceptable. Therefore, we have designed 53 | this version of the GPL to prohibit the practice for those products. If such problems 54 | arise substantially in other domains, we stand ready to extend this provision to 55 | those domains in future versions of the GPL, as needed to protect the freedom of 56 | users. 57 | 58 | Finally, every program is threatened constantly by software patents. States should 59 | not allow patents to restrict development and use of software on general-purpose 60 | computers, but in those that do, we wish to avoid the special danger that patents 61 | applied to a free program could make it effectively proprietary. To prevent this, the 62 | GPL assures that patents cannot be used to render the program non-free. 63 | 64 | The precise terms and conditions for copying, distribution and modification follow. 65 | 66 | ## TERMS AND CONDITIONS 67 | 68 | ### 0. Definitions 69 | 70 | “This License” refers to version 3 of the GNU General Public License. 71 | 72 | “Copyright” also means copyright-like laws that apply to other kinds of 73 | works, such as semiconductor masks. 74 | 75 | “The Program” refers to any copyrightable work licensed under this 76 | License. Each licensee is addressed as “you”. “Licensees” and 77 | “recipients” may be individuals or organizations. 78 | 79 | To “modify” a work means to copy from or adapt all or part of the work in 80 | a fashion requiring copyright permission, other than the making of an exact copy. The 81 | resulting work is called a “modified version” of the earlier work or a 82 | work “based on” the earlier work. 83 | 84 | A “covered work” means either the unmodified Program or a work based on 85 | the Program. 86 | 87 | To “propagate” a work means to do anything with it that, without 88 | permission, would make you directly or secondarily liable for infringement under 89 | applicable copyright law, except executing it on a computer or modifying a private 90 | copy. Propagation includes copying, distribution (with or without modification), 91 | making available to the public, and in some countries other activities as well. 92 | 93 | To “convey” a work means any kind of propagation that enables other 94 | parties to make or receive copies. Mere interaction with a user through a computer 95 | network, with no transfer of a copy, is not conveying. 96 | 97 | An interactive user interface displays “Appropriate Legal Notices” to the 98 | extent that it includes a convenient and prominently visible feature that **(1)** 99 | displays an appropriate copyright notice, and **(2)** tells the user that there is no 100 | warranty for the work (except to the extent that warranties are provided), that 101 | licensees may convey the work under this License, and how to view a copy of this 102 | License. If the interface presents a list of user commands or options, such as a 103 | menu, a prominent item in the list meets this criterion. 104 | 105 | ### 1. Source Code 106 | 107 | The “source code” for a work means the preferred form of the work for 108 | making modifications to it. “Object code” means any non-source form of a 109 | work. 110 | 111 | A “Standard Interface” means an interface that either is an official 112 | standard defined by a recognized standards body, or, in the case of interfaces 113 | specified for a particular programming language, one that is widely used among 114 | developers working in that language. 115 | 116 | The “System Libraries” of an executable work include anything, other than 117 | the work as a whole, that **(a)** is included in the normal form of packaging a Major 118 | Component, but which is not part of that Major Component, and **(b)** serves only to 119 | enable use of the work with that Major Component, or to implement a Standard 120 | Interface for which an implementation is available to the public in source code form. 121 | A “Major Component”, in this context, means a major essential component 122 | (kernel, window system, and so on) of the specific operating system (if any) on which 123 | the executable work runs, or a compiler used to produce the work, or an object code 124 | interpreter used to run it. 125 | 126 | The “Corresponding Source” for a work in object code form means all the 127 | source code needed to generate, install, and (for an executable work) run the object 128 | code and to modify the work, including scripts to control those activities. However, 129 | it does not include the work's System Libraries, or general-purpose tools or 130 | generally available free programs which are used unmodified in performing those 131 | activities but which are not part of the work. For example, Corresponding Source 132 | includes interface definition files associated with source files for the work, and 133 | the source code for shared libraries and dynamically linked subprograms that the work 134 | is specifically designed to require, such as by intimate data communication or 135 | control flow between those subprograms and other parts of the work. 136 | 137 | The Corresponding Source need not include anything that users can regenerate 138 | automatically from other parts of the Corresponding Source. 139 | 140 | The Corresponding Source for a work in source code form is that same work. 141 | 142 | ### 2. Basic Permissions 143 | 144 | All rights granted under this License are granted for the term of copyright on the 145 | Program, and are irrevocable provided the stated conditions are met. This License 146 | explicitly affirms your unlimited permission to run the unmodified Program. The 147 | output from running a covered work is covered by this License only if the output, 148 | given its content, constitutes a covered work. This License acknowledges your rights 149 | of fair use or other equivalent, as provided by copyright law. 150 | 151 | You may make, run and propagate covered works that you do not convey, without 152 | conditions so long as your license otherwise remains in force. You may convey covered 153 | works to others for the sole purpose of having them make modifications exclusively 154 | for you, or provide you with facilities for running those works, provided that you 155 | comply with the terms of this License in conveying all material for which you do not 156 | control copyright. Those thus making or running the covered works for you must do so 157 | exclusively on your behalf, under your direction and control, on terms that prohibit 158 | them from making any copies of your copyrighted material outside their relationship 159 | with you. 160 | 161 | Conveying under any other circumstances is permitted solely under the conditions 162 | stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 163 | 164 | ### 3. Protecting Users' Legal Rights From Anti-Circumvention Law 165 | 166 | No covered work shall be deemed part of an effective technological measure under any 167 | applicable law fulfilling obligations under article 11 of the WIPO copyright treaty 168 | adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention 169 | of such measures. 170 | 171 | When you convey a covered work, you waive any legal power to forbid circumvention of 172 | technological measures to the extent such circumvention is effected by exercising 173 | rights under this License with respect to the covered work, and you disclaim any 174 | intention to limit operation or modification of the work as a means of enforcing, 175 | against the work's users, your or third parties' legal rights to forbid circumvention 176 | of technological measures. 177 | 178 | ### 4. Conveying Verbatim Copies 179 | 180 | You may convey verbatim copies of the Program's source code as you receive it, in any 181 | medium, provided that you conspicuously and appropriately publish on each copy an 182 | appropriate copyright notice; keep intact all notices stating that this License and 183 | any non-permissive terms added in accord with section 7 apply to the code; keep 184 | intact all notices of the absence of any warranty; and give all recipients a copy of 185 | this License along with the Program. 186 | 187 | You may charge any price or no price for each copy that you convey, and you may offer 188 | support or warranty protection for a fee. 189 | 190 | ### 5. Conveying Modified Source Versions 191 | 192 | You may convey a work based on the Program, or the modifications to produce it from 193 | the Program, in the form of source code under the terms of section 4, provided that 194 | you also meet all of these conditions: 195 | 196 | * **a)** The work must carry prominent notices stating that you modified it, and giving a 197 | relevant date. 198 | * **b)** The work must carry prominent notices stating that it is released under this 199 | License and any conditions added under section 7. This requirement modifies the 200 | requirement in section 4 to “keep intact all notices”. 201 | * **c)** You must license the entire work, as a whole, under this License to anyone who 202 | comes into possession of a copy. This License will therefore apply, along with any 203 | applicable section 7 additional terms, to the whole of the work, and all its parts, 204 | regardless of how they are packaged. This License gives no permission to license the 205 | work in any other way, but it does not invalidate such permission if you have 206 | separately received it. 207 | * **d)** If the work has interactive user interfaces, each must display Appropriate Legal 208 | Notices; however, if the Program has interactive interfaces that do not display 209 | Appropriate Legal Notices, your work need not make them do so. 210 | 211 | A compilation of a covered work with other separate and independent works, which are 212 | not by their nature extensions of the covered work, and which are not combined with 213 | it such as to form a larger program, in or on a volume of a storage or distribution 214 | medium, is called an “aggregate” if the compilation and its resulting 215 | copyright are not used to limit the access or legal rights of the compilation's users 216 | beyond what the individual works permit. Inclusion of a covered work in an aggregate 217 | does not cause this License to apply to the other parts of the aggregate. 218 | 219 | ### 6. Conveying Non-Source Forms 220 | 221 | You may convey a covered work in object code form under the terms of sections 4 and 222 | 5, provided that you also convey the machine-readable Corresponding Source under the 223 | terms of this License, in one of these ways: 224 | 225 | * **a)** Convey the object code in, or embodied in, a physical product (including a 226 | physical distribution medium), accompanied by the Corresponding Source fixed on a 227 | durable physical medium customarily used for software interchange. 228 | * **b)** Convey the object code in, or embodied in, a physical product (including a 229 | physical distribution medium), accompanied by a written offer, valid for at least 230 | three years and valid for as long as you offer spare parts or customer support for 231 | that product model, to give anyone who possesses the object code either **(1)** a copy of 232 | the Corresponding Source for all the software in the product that is covered by this 233 | License, on a durable physical medium customarily used for software interchange, for 234 | a price no more than your reasonable cost of physically performing this conveying of 235 | source, or **(2)** access to copy the Corresponding Source from a network server at no 236 | charge. 237 | * **c)** Convey individual copies of the object code with a copy of the written offer to 238 | provide the Corresponding Source. This alternative is allowed only occasionally and 239 | noncommercially, and only if you received the object code with such an offer, in 240 | accord with subsection 6b. 241 | * **d)** Convey the object code by offering access from a designated place (gratis or for 242 | a charge), and offer equivalent access to the Corresponding Source in the same way 243 | through the same place at no further charge. You need not require recipients to copy 244 | the Corresponding Source along with the object code. If the place to copy the object 245 | code is a network server, the Corresponding Source may be on a different server 246 | (operated by you or a third party) that supports equivalent copying facilities, 247 | provided you maintain clear directions next to the object code saying where to find 248 | the Corresponding Source. Regardless of what server hosts the Corresponding Source, 249 | you remain obligated to ensure that it is available for as long as needed to satisfy 250 | these requirements. 251 | * **e)** Convey the object code using peer-to-peer transmission, provided you inform 252 | other peers where the object code and Corresponding Source of the work are being 253 | offered to the general public at no charge under subsection 6d. 254 | 255 | A separable portion of the object code, whose source code is excluded from the 256 | Corresponding Source as a System Library, need not be included in conveying the 257 | object code work. 258 | 259 | A “User Product” is either **(1)** a “consumer product”, which 260 | means any tangible personal property which is normally used for personal, family, or 261 | household purposes, or **(2)** anything designed or sold for incorporation into a 262 | dwelling. In determining whether a product is a consumer product, doubtful cases 263 | shall be resolved in favor of coverage. For a particular product received by a 264 | particular user, “normally used” refers to a typical or common use of 265 | that class of product, regardless of the status of the particular user or of the way 266 | in which the particular user actually uses, or expects or is expected to use, the 267 | product. A product is a consumer product regardless of whether the product has 268 | substantial commercial, industrial or non-consumer uses, unless such uses represent 269 | the only significant mode of use of the product. 270 | 271 | “Installation Information” for a User Product means any methods, 272 | procedures, authorization keys, or other information required to install and execute 273 | modified versions of a covered work in that User Product from a modified version of 274 | its Corresponding Source. The information must suffice to ensure that the continued 275 | functioning of the modified object code is in no case prevented or interfered with 276 | solely because modification has been made. 277 | 278 | If you convey an object code work under this section in, or with, or specifically for 279 | use in, a User Product, and the conveying occurs as part of a transaction in which 280 | the right of possession and use of the User Product is transferred to the recipient 281 | in perpetuity or for a fixed term (regardless of how the transaction is 282 | characterized), the Corresponding Source conveyed under this section must be 283 | accompanied by the Installation Information. But this requirement does not apply if 284 | neither you nor any third party retains the ability to install modified object code 285 | on the User Product (for example, the work has been installed in ROM). 286 | 287 | The requirement to provide Installation Information does not include a requirement to 288 | continue to provide support service, warranty, or updates for a work that has been 289 | modified or installed by the recipient, or for the User Product in which it has been 290 | modified or installed. Access to a network may be denied when the modification itself 291 | materially and adversely affects the operation of the network or violates the rules 292 | and protocols for communication across the network. 293 | 294 | Corresponding Source conveyed, and Installation Information provided, in accord with 295 | this section must be in a format that is publicly documented (and with an 296 | implementation available to the public in source code form), and must require no 297 | special password or key for unpacking, reading or copying. 298 | 299 | ### 7. Additional Terms 300 | 301 | “Additional permissions” are terms that supplement the terms of this 302 | License by making exceptions from one or more of its conditions. Additional 303 | permissions that are applicable to the entire Program shall be treated as though they 304 | were included in this License, to the extent that they are valid under applicable 305 | law. If additional permissions apply only to part of the Program, that part may be 306 | used separately under those permissions, but the entire Program remains governed by 307 | this License without regard to the additional permissions. 308 | 309 | When you convey a copy of a covered work, you may at your option remove any 310 | additional permissions from that copy, or from any part of it. (Additional 311 | permissions may be written to require their own removal in certain cases when you 312 | modify the work.) You may place additional permissions on material, added by you to a 313 | covered work, for which you have or can give appropriate copyright permission. 314 | 315 | Notwithstanding any other provision of this License, for material you add to a 316 | covered work, you may (if authorized by the copyright holders of that material) 317 | supplement the terms of this License with terms: 318 | 319 | * **a)** Disclaiming warranty or limiting liability differently from the terms of 320 | sections 15 and 16 of this License; or 321 | * **b)** Requiring preservation of specified reasonable legal notices or author 322 | attributions in that material or in the Appropriate Legal Notices displayed by works 323 | containing it; or 324 | * **c)** Prohibiting misrepresentation of the origin of that material, or requiring that 325 | modified versions of such material be marked in reasonable ways as different from the 326 | original version; or 327 | * **d)** Limiting the use for publicity purposes of names of licensors or authors of the 328 | material; or 329 | * **e)** Declining to grant rights under trademark law for use of some trade names, 330 | trademarks, or service marks; or 331 | * **f)** Requiring indemnification of licensors and authors of that material by anyone 332 | who conveys the material (or modified versions of it) with contractual assumptions of 333 | liability to the recipient, for any liability that these contractual assumptions 334 | directly impose on those licensors and authors. 335 | 336 | All other non-permissive additional terms are considered “further 337 | restrictions” within the meaning of section 10. If the Program as you received 338 | it, or any part of it, contains a notice stating that it is governed by this License 339 | along with a term that is a further restriction, you may remove that term. If a 340 | license document contains a further restriction but permits relicensing or conveying 341 | under this License, you may add to a covered work material governed by the terms of 342 | that license document, provided that the further restriction does not survive such 343 | relicensing or conveying. 344 | 345 | If you add terms to a covered work in accord with this section, you must place, in 346 | the relevant source files, a statement of the additional terms that apply to those 347 | files, or a notice indicating where to find the applicable terms. 348 | 349 | Additional terms, permissive or non-permissive, may be stated in the form of a 350 | separately written license, or stated as exceptions; the above requirements apply 351 | either way. 352 | 353 | ### 8. Termination 354 | 355 | You may not propagate or modify a covered work except as expressly provided under 356 | this License. Any attempt otherwise to propagate or modify it is void, and will 357 | automatically terminate your rights under this License (including any patent licenses 358 | granted under the third paragraph of section 11). 359 | 360 | However, if you cease all violation of this License, then your license from a 361 | particular copyright holder is reinstated **(a)** provisionally, unless and until the 362 | copyright holder explicitly and finally terminates your license, and **(b)** permanently, 363 | if the copyright holder fails to notify you of the violation by some reasonable means 364 | prior to 60 days after the cessation. 365 | 366 | Moreover, your license from a particular copyright holder is reinstated permanently 367 | if the copyright holder notifies you of the violation by some reasonable means, this 368 | is the first time you have received notice of violation of this License (for any 369 | work) from that copyright holder, and you cure the violation prior to 30 days after 370 | your receipt of the notice. 371 | 372 | Termination of your rights under this section does not terminate the licenses of 373 | parties who have received copies or rights from you under this License. If your 374 | rights have been terminated and not permanently reinstated, you do not qualify to 375 | receive new licenses for the same material under section 10. 376 | 377 | ### 9. Acceptance Not Required for Having Copies 378 | 379 | You are not required to accept this License in order to receive or run a copy of the 380 | Program. Ancillary propagation of a covered work occurring solely as a consequence of 381 | using peer-to-peer transmission to receive a copy likewise does not require 382 | acceptance. However, nothing other than this License grants you permission to 383 | propagate or modify any covered work. These actions infringe copyright if you do not 384 | accept this License. Therefore, by modifying or propagating a covered work, you 385 | indicate your acceptance of this License to do so. 386 | 387 | ### 10. Automatic Licensing of Downstream Recipients 388 | 389 | Each time you convey a covered work, the recipient automatically receives a license 390 | from the original licensors, to run, modify and propagate that work, subject to this 391 | License. You are not responsible for enforcing compliance by third parties with this 392 | License. 393 | 394 | An “entity transaction” is a transaction transferring control of an 395 | organization, or substantially all assets of one, or subdividing an organization, or 396 | merging organizations. If propagation of a covered work results from an entity 397 | transaction, each party to that transaction who receives a copy of the work also 398 | receives whatever licenses to the work the party's predecessor in interest had or 399 | could give under the previous paragraph, plus a right to possession of the 400 | Corresponding Source of the work from the predecessor in interest, if the predecessor 401 | has it or can get it with reasonable efforts. 402 | 403 | You may not impose any further restrictions on the exercise of the rights granted or 404 | affirmed under this License. For example, you may not impose a license fee, royalty, 405 | or other charge for exercise of rights granted under this License, and you may not 406 | initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging 407 | that any patent claim is infringed by making, using, selling, offering for sale, or 408 | importing the Program or any portion of it. 409 | 410 | ### 11. Patents 411 | 412 | A “contributor” is a copyright holder who authorizes use under this 413 | License of the Program or a work on which the Program is based. The work thus 414 | licensed is called the contributor's “contributor version”. 415 | 416 | A contributor's “essential patent claims” are all patent claims owned or 417 | controlled by the contributor, whether already acquired or hereafter acquired, that 418 | would be infringed by some manner, permitted by this License, of making, using, or 419 | selling its contributor version, but do not include claims that would be infringed 420 | only as a consequence of further modification of the contributor version. For 421 | purposes of this definition, “control” includes the right to grant patent 422 | sublicenses in a manner consistent with the requirements of this License. 423 | 424 | Each contributor grants you a non-exclusive, worldwide, royalty-free patent license 425 | under the contributor's essential patent claims, to make, use, sell, offer for sale, 426 | import and otherwise run, modify and propagate the contents of its contributor 427 | version. 428 | 429 | In the following three paragraphs, a “patent license” is any express 430 | agreement or commitment, however denominated, not to enforce a patent (such as an 431 | express permission to practice a patent or covenant not to sue for patent 432 | infringement). To “grant” such a patent license to a party means to make 433 | such an agreement or commitment not to enforce a patent against the party. 434 | 435 | If you convey a covered work, knowingly relying on a patent license, and the 436 | Corresponding Source of the work is not available for anyone to copy, free of charge 437 | and under the terms of this License, through a publicly available network server or 438 | other readily accessible means, then you must either **(1)** cause the Corresponding 439 | Source to be so available, or **(2)** arrange to deprive yourself of the benefit of the 440 | patent license for this particular work, or **(3)** arrange, in a manner consistent with 441 | the requirements of this License, to extend the patent license to downstream 442 | recipients. “Knowingly relying” means you have actual knowledge that, but 443 | for the patent license, your conveying the covered work in a country, or your 444 | recipient's use of the covered work in a country, would infringe one or more 445 | identifiable patents in that country that you have reason to believe are valid. 446 | 447 | If, pursuant to or in connection with a single transaction or arrangement, you 448 | convey, or propagate by procuring conveyance of, a covered work, and grant a patent 449 | license to some of the parties receiving the covered work authorizing them to use, 450 | propagate, modify or convey a specific copy of the covered work, then the patent 451 | license you grant is automatically extended to all recipients of the covered work and 452 | works based on it. 453 | 454 | A patent license is “discriminatory” if it does not include within the 455 | scope of its coverage, prohibits the exercise of, or is conditioned on the 456 | non-exercise of one or more of the rights that are specifically granted under this 457 | License. You may not convey a covered work if you are a party to an arrangement with 458 | a third party that is in the business of distributing software, under which you make 459 | payment to the third party based on the extent of your activity of conveying the 460 | work, and under which the third party grants, to any of the parties who would receive 461 | the covered work from you, a discriminatory patent license **(a)** in connection with 462 | copies of the covered work conveyed by you (or copies made from those copies), or **(b)** 463 | primarily for and in connection with specific products or compilations that contain 464 | the covered work, unless you entered into that arrangement, or that patent license 465 | was granted, prior to 28 March 2007. 466 | 467 | Nothing in this License shall be construed as excluding or limiting any implied 468 | license or other defenses to infringement that may otherwise be available to you 469 | under applicable patent law. 470 | 471 | ### 12. No Surrender of Others' Freedom 472 | 473 | If conditions are imposed on you (whether by court order, agreement or otherwise) 474 | that contradict the conditions of this License, they do not excuse you from the 475 | conditions of this License. If you cannot convey a covered work so as to satisfy 476 | simultaneously your obligations under this License and any other pertinent 477 | obligations, then as a consequence you may not convey it at all. For example, if you 478 | agree to terms that obligate you to collect a royalty for further conveying from 479 | those to whom you convey the Program, the only way you could satisfy both those terms 480 | and this License would be to refrain entirely from conveying the Program. 481 | 482 | ### 13. Use with the GNU Affero General Public License 483 | 484 | Notwithstanding any other provision of this License, you have permission to link or 485 | combine any covered work with a work licensed under version 3 of the GNU Affero 486 | General Public License into a single combined work, and to convey the resulting work. 487 | The terms of this License will continue to apply to the part which is the covered 488 | work, but the special requirements of the GNU Affero General Public License, section 489 | 13, concerning interaction through a network will apply to the combination as such. 490 | 491 | ### 14. Revised Versions of this License 492 | 493 | The Free Software Foundation may publish revised and/or new versions of the GNU 494 | General Public License from time to time. Such new versions will be similar in spirit 495 | to the present version, but may differ in detail to address new problems or concerns. 496 | 497 | Each version is given a distinguishing version number. If the Program specifies that 498 | a certain numbered version of the GNU General Public License “or any later 499 | version” applies to it, you have the option of following the terms and 500 | conditions either of that numbered version or of any later version published by the 501 | Free Software Foundation. If the Program does not specify a version number of the GNU 502 | General Public License, you may choose any version ever published by the Free 503 | Software Foundation. 504 | 505 | If the Program specifies that a proxy can decide which future versions of the GNU 506 | General Public License can be used, that proxy's public statement of acceptance of a 507 | version permanently authorizes you to choose that version for the Program. 508 | 509 | Later license versions may give you additional or different permissions. However, no 510 | additional obligations are imposed on any author or copyright holder as a result of 511 | your choosing to follow a later version. 512 | 513 | ### 15. Disclaimer of Warranty 514 | 515 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. 516 | EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 517 | PROVIDE THE PROGRAM “AS IS” WITHOUT WARRANTY OF ANY KIND, EITHER 518 | EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 519 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE 520 | QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE 521 | DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 522 | 523 | ### 16. Limitation of Liability 524 | 525 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY 526 | COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS 527 | PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, 528 | INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE 529 | PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE 530 | OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE 531 | WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 532 | POSSIBILITY OF SUCH DAMAGES. 533 | 534 | ### 17. Interpretation of Sections 15 and 16 535 | 536 | If the disclaimer of warranty and limitation of liability provided above cannot be 537 | given local legal effect according to their terms, reviewing courts shall apply local 538 | law that most closely approximates an absolute waiver of all civil liability in 539 | connection with the Program, unless a warranty or assumption of liability accompanies 540 | a copy of the Program in return for a fee. 541 | 542 | _END OF TERMS AND CONDITIONS_ 543 | 544 | ## How to Apply These Terms to Your New Programs 545 | 546 | If you develop a new program, and you want it to be of the greatest possible use to 547 | the public, the best way to achieve this is to make it free software which everyone 548 | can redistribute and change under these terms. 549 | 550 | To do so, attach the following notices to the program. It is safest to attach them 551 | to the start of each source file to most effectively state the exclusion of warranty; 552 | and each file should have at least the “copyright” line and a pointer to 553 | where the full notice is found. 554 | 555 | 556 | Copyright (C) 2021 Tyler J. Pike 557 | 558 | This program is free software: you can redistribute it and/or modify 559 | it under the terms of the GNU General Public License as published by 560 | the Free Software Foundation, either version 3 of the License, or 561 | (at your option) any later version. 562 | 563 | This program is distributed in the hope that it will be useful, 564 | but WITHOUT ANY WARRANTY; without even the implied warranty of 565 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 566 | GNU General Public License for more details. 567 | 568 | You should have received a copy of the GNU General Public License 569 | along with this program. If not, see . 570 | 571 | Also add information on how to contact you by electronic and paper mail. 572 | 573 | If the program does terminal interaction, make it output a short notice like this 574 | when it starts in an interactive mode: 575 | 576 | OOS Copyright (C) 2021 Tyler J. Pike 577 | This program comes with ABSOLUTELY NO WARRANTY; for details type 'show w'. 578 | This is free software, and you are welcome to redistribute it 579 | under certain conditions; type 'show c' for details. 580 | 581 | The hypothetical commands `show w` and `show c` should show the appropriate parts of 582 | the General Public License. Of course, your program's commands might be different; 583 | for a GUI interface, you would use an “about box”. 584 | 585 | You should also get your employer (if you work as a programmer) or school, if any, to 586 | sign a “copyright disclaimer” for the program, if necessary. For more 587 | information on this, and how to apply and follow the GNU GPL, see 588 | <>. 589 | 590 | The GNU General Public License does not permit incorporating your program into 591 | proprietary programs. If your program is a subroutine library, you may consider it 592 | more useful to permit linking proprietary applications with the library. If this is 593 | what you want to do, use the GNU Lesser General Public License instead of this 594 | License. But first, please read 595 | <>. 596 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export("%>%") 4 | export(NBest) 5 | export(chart_forecast) 6 | export(chart_forecast_error) 7 | export(data_impute) 8 | export(data_outliers) 9 | export(data_reduction) 10 | export(data_subset) 11 | export(forecast_accuracy) 12 | export(forecast_combine) 13 | export(forecast_comparison) 14 | export(forecast_multivariate) 15 | export(forecast_univariate) 16 | export(instantiate.data_impute.control_panel) 17 | export(instantiate.forecast_combinations.control_panel) 18 | export(instantiate.forecast_multivariate.ml.control_panel) 19 | export(instantiate.forecast_multivariate.var.control_panel) 20 | export(instantiate.forecast_univariate.control_panel) 21 | export(loss_function) 22 | export(n.lag) 23 | export(standardize) 24 | export(winsorize) 25 | import(stats) 26 | importFrom(magrittr,"%>%") 27 | -------------------------------------------------------------------------------- /OOS.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | LineEndingConversion: Posix 18 | 19 | BuildType: Package 20 | PackageUseDevtools: Yes 21 | PackageInstallArgs: --no-multiarch --with-keep.source 22 | PackageRoxygenize: rd,collate,namespace 23 | -------------------------------------------------------------------------------- /R/data_preparation.R: -------------------------------------------------------------------------------- 1 | #--------------------------------------------- 2 | # data cleaning helper functions 3 | #--------------------------------------------- 4 | #' Standardize variables (mean 0, variance 1) 5 | #' 6 | #' @param X numeric: vector to be standardized 7 | #' 8 | #' @return numeric vector of standardized values 9 | #' 10 | #' @export 11 | standardize = function(X){return((X-mean(X, na.rm = T))/sd(X, na.rm = T))} 12 | 13 | #' Winsorize or trim variables 14 | #' 15 | #' @param X numeric: vector to be winsorized or trimmed 16 | #' @param trim boolean: if TRUE then replace outliers with NA instead of winsorizing bound 17 | #' @param bounds double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile) 18 | #' 19 | #' @return numeric vector of winsorized or trimmed values 20 | #' 21 | #' @export 22 | winsorize = function(X, bounds, trim = FALSE){ 23 | 24 | qq = quantile(X, probs = bounds, na.rm = TRUE) 25 | 26 | if(trim == FALSE){ 27 | X[X <= qq[1]] = qq[1] 28 | X[X >= qq[2]] = qq[2] 29 | }else{ 30 | X[X <= qq[1]] = NA 31 | X[X >= qq[2]] = NA 32 | } 33 | 34 | return(X) 35 | } 36 | 37 | #' Create information set 38 | #' 39 | #' A function to subset data recursively or with a rolling window to create a valid information set. Is used as a data preparation 40 | #' helper function and is called internally by forecast_univariate, forecast_multivariate, and forecast_combine. 41 | #' 42 | #' @param Data data.frame: data frame of target variable, exogenous variables, and observed date (named 'date') 43 | #' @param forecast.date date: upper bound of information set 44 | #' @param rolling.window int: size of rolling window, NA if expanding window is used 45 | #' @param freq string: time series frequency; day, week, month, quarter, year; only needed for rolling window factors 46 | #' 47 | #' @return data.frame bounded by the given date range 48 | #' 49 | #' @export 50 | data_subset = function( 51 | Data, 52 | forecast.date, 53 | rolling.window, 54 | freq 55 | ){ 56 | 57 | # 1. using expanding window 58 | if(is.na(rolling.window)){ 59 | information.set = 60 | dplyr::filter(Data, date <= forecast.date) 61 | 62 | # 2. using rolling window 63 | }else{ 64 | rolling.window.start = forecast.date 65 | 66 | if(freq == 'day'){ 67 | rolling.window.start = forecast.date - rolling.window 68 | }else if(freq == 'week'){ 69 | lubridate::week(rolling.window.start) = lubridate::week(forecast.date) - rolling.window 70 | }else if(freq == 'month'){ 71 | lubridate::month(rolling.window.start) = lubridate::month(forecast.date) - rolling.window 72 | }else if(freq == 'quarter'){ 73 | lubridate::month(rolling.window.start) = lubridate::month(forecast.date) - rolling.window*3 74 | }else if(freq == 'year'){ 75 | lubridate::year(rolling.window.start) = lubridate::year(forecast.date) - rolling.window 76 | } 77 | 78 | information.set = 79 | dplyr::filter(Data, rolling.window.start <= date & date <= forecast.date ) 80 | } 81 | 82 | return(information.set) 83 | } 84 | 85 | #' Set forecasted date 86 | #' 87 | #' A function to subset data recursively or with a rolling window to create a valid information set. Is used as a data preparation 88 | #' helper function and is called internally by forecast_univariate, forecast_multivariate, and forecast_combine. 89 | #' 90 | #' @param forecast.date date: date forecast was made 91 | #' @param horizon int: periods ahead of forecast 92 | #' @param freq string: time series frequency; day, week, month, quarter, year; only needed for rolling window factors 93 | #' 94 | #' @return date vector 95 | #' 96 | #' 97 | forecast_date = function( 98 | forecast.date, 99 | horizon, 100 | freq 101 | ){ 102 | 103 | date = forecast.date 104 | 105 | if(freq == 'day'){ 106 | date = forecast.date + horizon 107 | }else if(freq == 'week'){ 108 | lubridate::week(date) = lubridate::week(date) + horizon 109 | }else if(freq == 'month'){ 110 | lubridate::month(date) = lubridate::month(date) + horizon 111 | }else if(freq == 'quarter'){ 112 | lubridate::month(date) = lubridate::month(date) + horizon*3 113 | }else if(freq == 'year'){ 114 | lubridate::year(date) = lubridate::year(date) + horizon 115 | } 116 | 117 | return(date) 118 | } 119 | 120 | #' Create n lags 121 | #' 122 | #' A function to create 1 through n lags of a set of variables. Is used as a data preparation 123 | #' helper function and is called internally by forecast_univariate, forecast_multivariate, and forecast_combine. 124 | #' 125 | #' @param Data data.frame: data frame of variables to lag and a 'date' column 126 | #' @param lags int: number of lags to create 127 | #' @param variables string: vector of variable names to lag, default is all non-date variables 128 | #' 129 | #' @return data.frame 130 | #' 131 | #' @export 132 | n.lag = function( 133 | Data, # data.frame: data frame of variables to lag and a 'date' column 134 | lags, # int: number of lags to create 135 | variables = NULL # string: vector of variable names to lag, default is all non-date variables 136 | ){ 137 | 138 | if(is.null(variables)){ 139 | variables = names(dplyr::select(Data, -date)) 140 | } 141 | 142 | Data = c(0:lags) %>% 143 | purrr::map( 144 | .f = function(n){ 145 | 146 | if(n == 0){return(Data)} 147 | 148 | X = Data %>% 149 | dplyr::mutate_at(variables, dplyr::lag, n) 150 | 151 | names(X)[names(X) != 'date'] = paste0(names(X)[names(X) != 'date'], '.l', n) 152 | 153 | return(X) 154 | } 155 | ) %>% 156 | purrr::reduce(dplyr::full_join, by = 'date') 157 | 158 | 159 | return(Data) 160 | } 161 | 162 | #--------------------------------------------- 163 | # Clean outliers 164 | #--------------------------------------------- 165 | #' Clean outliers 166 | #' 167 | #' A function to clean outliers. Is used as a data preparation helper function and is called internally 168 | #' by forecast_univariate, forecast_multivariate, and forecast_combine. 169 | #' 170 | #' @param Data data.frame: data frame of target variable, exogenous variables, and observed date (named 'date') 171 | #' @param variables string: vector of variables to standardize, default is all but 'date' column 172 | #' @param w.bounds double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile) 173 | #' @param trim boolean: if TRUE then replace outliers with NA instead of winsorizing bound 174 | #' @param cross_section boolean: if TRUE then remove outliers based on cross-section (row-wise) instead of historical data (column-wise) 175 | #' 176 | #' @return data.frame with a date column and one column per forecast method selected 177 | #' 178 | #' @export 179 | data_outliers = function( 180 | Data, # data.frame: data frame of target variable, exogenous variables, and observed date (named 'date') 181 | variables = NULL, # string: vector of variables to standardize, default is all but 'date' column 182 | w.bounds = c(0.05, 0.95), # double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile) 183 | trim = FALSE, # boolean: if TRUE then replace outliers with NA instead of winsorizing bound 184 | cross_section = FALSE # boolean: if TRUE then remove outliers based on cross-section (row-wise) instead of historical data (column-wise) 185 | ){ 186 | 187 | 188 | # set variables to all if default 189 | if(is.null(variables) == TRUE){ 190 | variables = names(dplyr::select_if(Data, is.numeric)) 191 | } 192 | 193 | # target variables must be numeric 194 | if(length(setdiff(variables, names(dplyr::select_if(Data, is.numeric)))) != 0){ 195 | print(errorCondition('Variables cleaned for outliers must be numeric.')) 196 | } 197 | 198 | # clean outliers (column wise) 199 | if(cross_section == FALSE){ 200 | test = Data %>% 201 | dplyr::mutate_at(dplyr::vars(variables), winsorize, bounds = w.bounds, trim = trim) 202 | 203 | # clean outliers (row wise) 204 | }else{ 205 | Data = Data %>% 206 | dplyr::rowwise() %>% 207 | dplyr::mutate_at(dplyr::vars(variables), winsorize, bounds = w.bounds, trim = trim) 208 | } 209 | 210 | # return results 211 | return(Data) 212 | } 213 | 214 | 215 | #--------------------------------------------- 216 | # Impute missing 217 | #--------------------------------------------- 218 | #' Create interface to control `data_impute` model estimation 219 | #' 220 | #' A function to create the data imputation method 221 | #' arguments list for user manipulation. 222 | #' 223 | #' @return data_impute.control_panel 224 | #' 225 | #' @export 226 | instantiate.data_impute.control_panel = function(){ 227 | 228 | # methods 229 | methods = list( 230 | interpolation = 'imputeTS::na_interpolation', 231 | kalman = imputeTS::na_kalman, 232 | locf = 'imputeTS::na_locf', 233 | ma = 'imputeTS::na_ma', 234 | mean = 'imputeTS::na_mean', 235 | random = 'imputeTS::na_random', 236 | remove = 'imputeTS:na_remove', 237 | replace = 'imputeTS::na_replace', 238 | seadec = 'imputeTS::na_seadec', 239 | seasplit = 'imputeTS::na_seasplit' 240 | ) 241 | 242 | # arguments 243 | arguments = list( 244 | interpolation = NULL, 245 | kalman = NULL, 246 | locf = NULL, 247 | ma = NULL, 248 | mean = NULL, 249 | random = NULL, 250 | remove = NULL, 251 | replace = NULL, 252 | seadec = NULL, 253 | seasplit = NULL 254 | ) 255 | 256 | return( 257 | list( 258 | method = methods, 259 | arguments = arguments 260 | ) 261 | ) 262 | 263 | } 264 | 265 | #' Impute missing values 266 | #' 267 | #' A function to impute missing values. Is used as a data preparation helper function and is called internally 268 | #' by forecast_univariate, forecast_multivariate, and forecast_combine. 269 | #' 270 | #' @param Data data.frame: data frame of target variable, exogenous variables, and observed date (named 'date') 271 | #' @param method string: select which method to use from the imputeTS package; 'interpolation', 'kalman', 'locf', 'ma', 'mean', 'random', 'remove','replace', 'seadec', 'seasplit' 272 | #' @param variables string: vector of variables to standardize, default is all but 'date' column 273 | #' @param verbose boolean: show start-up status of impute.missing.routine 274 | #' 275 | #' @return data.frame with missing data imputed 276 | #' 277 | #' @export 278 | data_impute = function( 279 | Data, # data.frame: data frame of target variable, exogenous variables, and observed date (named 'date') 280 | method = 'kalman', # string: select which method to use from the imputeTS package; 'interpolation', 'kalman', 'locf', 'ma', 'mean', 'random', 'remove','replace', 'seadec', 'seasplit' 281 | variables = NULL, # string: vector of variables to impute missing values, default is all numeric columns 282 | verbose = FALSE # boolean: show start-up status of data_impute.control_panel 283 | ){ 284 | 285 | # training parameter creation and warnings 286 | if(verbose == TRUE){ 287 | if(exists("data_impute.control_panel")){ 288 | print(warningCondition('data_impute.control_panel exists and will be used to impute missing data in its present state.')) 289 | }else{ 290 | data_impute.control_panel = instantiate.data_impute.control_panel() 291 | print(warningCondition('data_impute.control_panel was instantiated and default values will be used for to impute missing data.')) 292 | } 293 | }else{ 294 | if(!exists("data_impute.control_panel")){data_impute.control_panel = instantiate.data_impute.control_panel()} 295 | } 296 | 297 | # set variables to all if default 298 | if(is.null(variables) == TRUE){ 299 | variables = names(dplyr::select_if(Data, is.numeric)) 300 | } 301 | 302 | # target variables must be numeric 303 | if(length(setdiff(variables, names(dplyr::select_if(Data, is.numeric)))) != 0){ 304 | print(errorCondition('Variables cleaned for outliers must be numeric.')) 305 | } 306 | 307 | # clean outliers 308 | for(v in variables){ 309 | data_impute.control_panel$arguments[[method]]$x = Data[,c(v)] 310 | Data[,c(v)] = 311 | do.call(what = data_impute.control_panel$method[[method]], 312 | args = data_impute.control_panel$arguments[[method]]) 313 | } 314 | 315 | # return results 316 | return(Data) 317 | } 318 | 319 | 320 | #--------------------------------------------- 321 | # Dimension reduction 322 | #--------------------------------------------- 323 | #' Dimension reduction via principal components 324 | #' 325 | #' A function to estimate principal components. 326 | #' 327 | #' @param Data data.frame: data frame of target variable, exogenous variables, and observed date (named 'date') 328 | #' @param variables string: vector of variables to standardize, default is all but 'date' column 329 | #' @param ncomp int: number of factors to create 330 | #' @param standardize boolean: normalize variables (mean zero, variance one) before estimating factors 331 | #' 332 | #' @return data.frame with a date column and one column per forecast method selected 333 | #' 334 | #' @export 335 | 336 | data_reduction = function( 337 | Data, # data.frame: data frame of target variable, exogenous variables, and observed date (named 'date') 338 | variables = NULL, # string: vector of variables to impute missing values, default is all numeric columns 339 | ncomp, # int: number of factors to create 340 | standardize = TRUE # boolean: normalize variables (mean zero, variance one) before estimating factors 341 | ){ 342 | 343 | # set variables to all if default 344 | if(is.null(variables) == TRUE){ 345 | variables = names(dplyr::select_if(Data, is.numeric)) 346 | } 347 | 348 | # target variables must be numeric 349 | if(length(setdiff(variables, names(dplyr::select_if(Data, is.numeric)))) != 0){ 350 | print(errorCondition('Variables cleaned for outliers must be numeric.')) 351 | } 352 | 353 | # remove missing 354 | information.set = na.omit(Data) 355 | 356 | # standardize variables 357 | information.set = information.set %>% 358 | dplyr::mutate_at(dplyr::vars(variables), OOS::standardize) 359 | 360 | # estimate factors 361 | model.pc = stats::princomp(dplyr::select(information.set, -date)) 362 | 363 | # select factors 364 | factors = as.matrix(dplyr::select(information.set, -date)) %*% model.pc$loadings[,1:ncomp] 365 | 366 | # take most recent factors 367 | colnames(factors) = paste0('pc.',c(1:ncomp)) 368 | 369 | factors = 370 | data.frame(factors, 371 | date = information.set$date) 372 | 373 | # return results 374 | return(factors) 375 | } 376 | -------------------------------------------------------------------------------- /R/external_imports.R: -------------------------------------------------------------------------------- 1 | #' Pipe operator 2 | #' 3 | #' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. 4 | #' 5 | #' @name %>% 6 | #' @rdname pipe 7 | #' @keywords internal 8 | #' @return magrittr pipe operator %>% 9 | #' @export 10 | #' @importFrom magrittr %>% 11 | #' @usage lhs \%>\% rhs 12 | NULL 13 | 14 | #' @import stats 15 | NULL -------------------------------------------------------------------------------- /R/forecast_chart.R: -------------------------------------------------------------------------------- 1 | #---------------------------------------- 2 | ### Basic forecast chart 3 | #---------------------------------------- 4 | #' Chart forecasts 5 | #' 6 | #' @param Data data.frame: oos.forecast object 7 | #' @param Title string: chart title 8 | #' @param Ylab string: y-axis label 9 | #' @param Freq string: frequency (acts as sub-title) 10 | #' @param zeroline boolean: if TRUE then add a horizontal line at zero 11 | #' 12 | #' @return ggplot2 chart 13 | #' 14 | #' @examples 15 | #' \donttest{ 16 | #' 17 | #' # simple time series 18 | #' A = c(1:100) + rnorm(100) 19 | #' date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100) 20 | #' Data = data.frame(date = date, A) 21 | #' 22 | #' # run forecast_univariate 23 | #' forecast.uni = 24 | #' forecast_univariate( 25 | #' Data = Data, 26 | #' forecast.dates = tail(Data$date,10), 27 | #' method = c('naive','auto.arima', 'ets'), 28 | #' horizon = 1, 29 | #' recursive = FALSE, 30 | #' freq = 'month') 31 | #' 32 | #' forecasts = 33 | #' dplyr::left_join( 34 | #' forecast.uni, 35 | #' data.frame(date, observed = A), 36 | #' by = 'date' 37 | #' ) 38 | #' 39 | #' # chart forecasts 40 | #' chart.forecast = 41 | #' chart_forecast( 42 | #' forecasts, 43 | #' Title = 'test', 44 | #' Ylab = 'Index', 45 | #' Freq = 'Monthly', 46 | #' zeroline = TRUE) 47 | #' 48 | #' } 49 | #' 50 | #' @export 51 | 52 | chart_forecast = function( 53 | Data, # data.frame: oos.forecast object 54 | Title, # string: chart title 55 | Ylab, # string: y-axis label 56 | Freq, # string: frequency (acts as sub-title) 57 | zeroline = FALSE # boolean: if TRUE then add a horizontal line at zero 58 | ){ 59 | 60 | # function errors 61 | if(!'forecast' %in% colnames(Data)){ 62 | errorCondition('Data must have a column named "forecast" to calculate errors') 63 | } 64 | if(!'date' %in% colnames(Data)){ 65 | errorCondition('Data must have a column named "date" to create plot') 66 | } 67 | 68 | # function variables 69 | model = observed = forecast = forecast.date = se = NA 70 | 71 | # reformat observed 72 | if('observed' %in% colnames(Data)){ 73 | Data = 74 | dplyr::bind_rows( 75 | Data, 76 | Data %>% dplyr::select(forecast = observed, date) %>% 77 | dplyr::mutate(model = '*observed') %>% 78 | dplyr::distinct() 79 | ) 80 | } 81 | 82 | # set chart 83 | chart = 84 | ggplot2::ggplot(Data, ggplot2::aes(x=date, y = forecast, color = model)) + 85 | # plot line 86 | ggplot2::geom_line(lwd = 1.25) + 87 | ggplot2::theme_classic() + 88 | ggplot2::theme(panel.grid.major = ggplot2::element_line(size = 0.5, linetype = 'solid', colour = "grey")) + 89 | # chart details 90 | ggplot2::labs(title = Title, subtitle = Freq) + 91 | ggplot2::xlab("") + 92 | ggplot2::ylab(Ylab) 93 | 94 | # add zero line 95 | if(zeroline == TRUE){ 96 | 97 | chart = chart + 98 | ggplot2::geom_hline(yintercept=0, color="black", size=.5) 99 | 100 | } 101 | 102 | return(chart) 103 | 104 | } 105 | 106 | 107 | #---------------------------------------- 108 | ### Basic error chart 109 | #---------------------------------------- 110 | #' Chart forecast errors 111 | #' 112 | #' @param Data data.frame: oos.forecast object 113 | #' @param Title string: chart title 114 | #' @param Ylab string: y-axis label 115 | #' @param Freq string: frequency (acts as sub-title) 116 | #' @param zeroline boolean: if TRUE then add a horizontal line at zero 117 | #' 118 | #' @return ggplot2 chart 119 | #' 120 | #' @examples 121 | #' \donttest{ 122 | #' 123 | #' # simple time series 124 | #' A = c(1:100) + rnorm(100) 125 | #' date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100) 126 | #' Data = data.frame(date = date, A) 127 | #' 128 | #' # run forecast_univariate 129 | #' forecast.uni = 130 | #' forecast_univariate( 131 | #' Data = Data, 132 | #' forecast.dates = tail(Data$date,10), 133 | #' method = c('naive','auto.arima', 'ets'), 134 | #' horizon = 1, 135 | #' recursive = FALSE, 136 | #' freq = 'month') 137 | #' 138 | #' forecasts = 139 | #' dplyr::left_join( 140 | #' forecast.uni, 141 | #' data.frame(date, observed = A), 142 | #' by = 'date' 143 | #' ) 144 | #' 145 | #' # chart forecast errors 146 | #' chart.errors = 147 | #' chart_forecast_error( 148 | #' forecasts, 149 | #' Title = 'test', 150 | #' Ylab = 'Index', 151 | #' Freq = 'Monthly', 152 | #' zeroline = TRUE) 153 | #' 154 | #' } 155 | #' 156 | #' @export 157 | 158 | chart_forecast_error = function( 159 | Data, # data.frame: oos.forecast function output 160 | Title, # string: chart title 161 | Ylab, # string: y-axis label 162 | Freq, # string: frequency (acts as sub-title) 163 | zeroline = FALSE # boolean: if TRUE then add a horizontal line at zero 164 | ){ 165 | 166 | # function errors 167 | if(!'observed' %in% colnames(Data)){ 168 | errorCondition('Data must have a column named "observed" to calculate errors') 169 | } 170 | if(!'forecast' %in% colnames(Data)){ 171 | errorCondition('Data must have a column named "forecast" to calculate errors') 172 | } 173 | if(!'date' %in% colnames(Data)){ 174 | errorCondition('Data must have a column named "date" to create plot') 175 | } 176 | 177 | # function variables 178 | model = observed = forecast = forecast.date = se = errors = NA 179 | 180 | # calculate errors 181 | Data = Data %>% 182 | dplyr::mutate(errors = forecast - observed) %>% 183 | dplyr::select(date, errors, model) 184 | 185 | # set chart 186 | chart = 187 | ggplot2::ggplot(Data, ggplot2::aes(x=date, y = errors, color = model)) + 188 | # plot line 189 | ggplot2::geom_line(lwd = 1.25) + 190 | ggplot2::theme_classic() + 191 | ggplot2::theme(panel.grid.major = ggplot2::element_line(size = 0.5, linetype = 'solid', colour = "grey")) + 192 | # chart details 193 | ggplot2::labs(title = Title, subtitle = Freq) + 194 | ggplot2::xlab("") + 195 | ggplot2::ylab(Ylab) 196 | 197 | # add zero line 198 | if(zeroline == TRUE){ 199 | 200 | chart = chart + 201 | ggplot2::geom_hline(yintercept=0, color="black", size=.5) 202 | 203 | } 204 | 205 | return(chart) 206 | 207 | } 208 | 209 | -------------------------------------------------------------------------------- /R/forecast_combinations.R: -------------------------------------------------------------------------------- 1 | #--------------------------------------------- 2 | # Forecast combination helper functions 3 | #--------------------------------------------- 4 | #' Select N-best forecasts 5 | #' 6 | #' A function to subset the n-best forecasts; 7 | #' assumes column named observed. 8 | #' 9 | #' @param forecasts data.frame: a data frame of forecasts to combine, assumes one column named "observed" 10 | #' @param n.max int: maximum number of forecasts to select 11 | #' @param window int: size of rolling window to evaluate forecast error over, use entire period if NA 12 | #' 13 | #' @return data.frame with n columns of the historically best forecasts 14 | #' 15 | #' @export 16 | 17 | NBest = function( 18 | forecasts, # data.frame: a data frame of forecasts to combine, assumes one column named "observed" 19 | n.max, # int: maximum number of forecasts to select 20 | window = NA # int: size of rolling window to evaluate forecast error over, use entire period if NA 21 | ){ 22 | 23 | observed = NA 24 | 25 | # calculate rolling forecast errors 26 | errors = abs(dplyr::select(forecasts, -observed) - forecasts$observed) 27 | rollRMSE = function(X){return(sqrt(mean((X)^2, na.rm = T)))} 28 | rollingErrors = zoo::rollapply(data = errors, width = seq_along(errors[,1]), 29 | FUN = rollRMSE, align = 'right', fill = NA) 30 | 31 | # create rolling N-best forecasts 32 | X = dplyr::select(forecasts, -observed) %>% as.matrix() 33 | nBest = matrix(nrow = nrow(X), ncol = n.max) 34 | for(row in 1:nrow(X)){ 35 | for(column in 1:n.max){ 36 | nBest[row,column] = mean(X[row,order(rollingErrors[row,])[1:column]]) 37 | } 38 | } 39 | colnames(nBest) = paste0('N',c(1:n.max)) 40 | 41 | # return results 42 | return(nBest) 43 | } 44 | 45 | #--------------------------------------------- 46 | # Forecast combination method arguments 47 | #---------------------------------------------- 48 | #' Create interface to control `forecast_combine` model estimation 49 | #' 50 | #' A function to create the forecast combination technique arguments list 51 | #' for user manipulation. 52 | #' 53 | #' @param covariates int: the number of features that will go into the model 54 | #' 55 | #' @return forecast_combinations.control_panel 56 | #' 57 | #' @export 58 | 59 | instantiate.forecast_combinations.control_panel = function(covariates = NULL){ 60 | 61 | # caret names 62 | caret.engine = list( 63 | ols = 'lm', 64 | ridge = 'glmnet', 65 | lasso = 'glmnet', 66 | elastic = 'glmnet', 67 | RF = 'rf', 68 | GBM = 'gbm', 69 | NN = 'avNNet', 70 | pls = 'pls', 71 | pcr = 'pcr' 72 | ) 73 | 74 | # tuning grids 75 | tuning.grids = list( 76 | 77 | ols = NULL, 78 | 79 | ridge = expand.grid( 80 | alpha = 0, 81 | lambda = 10^seq(-3, 3, length = 100)), 82 | 83 | lasso = expand.grid( 84 | alpha = 1, 85 | lambda = 10^seq(-3, 3, length = 100)), 86 | 87 | elastic = NULL, 88 | 89 | GBM = 90 | expand.grid( 91 | n.minobsinnode = c(1), 92 | shrinkage = c(.1,.01), 93 | n.trees = c(100, 250, 500), 94 | interaction.depth = c(1,2,5)), 95 | 96 | RF = 97 | expand.grid( 98 | mtry = c(1:4)), 99 | 100 | NN = 101 | expand.grid( 102 | size = seq(2,10,5), 103 | decay = c(.01,.001), 104 | bag = c(100, 250, 500)), 105 | 106 | pls = 107 | expand.grid( 108 | ncomp = c(1:5)), 109 | 110 | pcr = 111 | expand.grid( 112 | ncomp = c(1:5)) 113 | 114 | ) 115 | 116 | # tuning grids if # of features is available 117 | if(!is.null(covariates)){ 118 | tuning.grids[['RF']] = 119 | expand.grid( 120 | mtry = covariates/3) 121 | 122 | tuning.grids[['NN']] = 123 | expand.grid( 124 | size = c(covariates, 2*covariates, 3*covariates), 125 | decay = c(.01,.001), 126 | bag = c(20, 100)) 127 | 128 | } 129 | 130 | # hyper-parameter selection routine 131 | control = 132 | caret::trainControl( 133 | method = "cv", 134 | number = 5, 135 | allowParallel = TRUE) 136 | 137 | # accuracy metric used in training 138 | accuracy = 'RMSE' 139 | 140 | # return training information 141 | return( 142 | list( 143 | caret.engine = caret.engine, 144 | tuning.grids = tuning.grids, 145 | control = control, 146 | accuracy = accuracy 147 | ) 148 | ) 149 | } 150 | 151 | #--------------------------------------------- 152 | # Forecast combination methods 153 | #--------------------------------------------- 154 | #' Forecast with forecast combinations 155 | #' 156 | #' A function to combine forecasts out-of-sample. Methods available include: 157 | #' uniform weights, median forecast, trimmed (winsorized) mean, n-best, 158 | #' ridge regression, lasso regression, elastic net, peLASSO, 159 | #' random forest, tree-based gradient boosting machine, and single-layer neural network. 160 | #' See package website for most up-to-date list of available models. 161 | #' 162 | #' @param Data data.frame: data frame of forecasted values to combine, assumes 'date' and 'observed' columns, but `observed' is not necessary for all methods 163 | #' @param method string: the method to use; 'uniform', 'median', 'trimmed.mean', 'n.best', 'peLasso', 'lasso', 'ridge', 'elastic', 'RF', 'GBM', 'NN' 164 | #' @param n.max int: maximum number of forecasts to select in n.best method 165 | #' @param rolling.window int: size of rolling window to evaluate forecast error over, use entire period if NA 166 | #' @param trim numeric: a two element vector with the winsorizing bounds for the trimmed mean method; c(min, max) 167 | #' @param burn.in int: the number of periods to use in the first model estimation 168 | #' @param parallel.dates int: the number of cores available for parallel estimation 169 | #' 170 | #' @return data.frame with a row for each combination method and forecasted date 171 | #' 172 | #' @examples 173 | #' \donttest{ 174 | #' # simple time series 175 | #' A = c(1:100) + rnorm(100) 176 | #' B = c(1:100) + rnorm(100) 177 | #' C = c(1:100) + rnorm(100) 178 | #' date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100) 179 | #' Data = data.frame(date = date, A, B, C) 180 | #' 181 | #' # run forecast_univariate 182 | #' forecast.multi = 183 | #' forecast_multivariate( 184 | #' Data = Data, 185 | #' target = 'A', 186 | #' forecast.dates = tail(Data$date,5), 187 | #' method = c('ols','var'), 188 | #' horizon = 1, 189 | #' freq = 'month') 190 | #' # include observed valuesd 191 | #' forecasts = 192 | #' dplyr::left_join( 193 | #' forecast.multi, 194 | #' data.frame(date, observed = A), 195 | #' by = 'date' 196 | #' ) 197 | #' 198 | #' # combine forecasts 199 | #' combinations = 200 | #' forecast_combine( 201 | #' forecasts, 202 | #' method = c('uniform','median','trimmed.mean', 203 | #' 'n.best','lasso','peLasso'), 204 | #' burn.in = 5, 205 | #' n.max = 2) 206 | #' } 207 | #' 208 | #' 209 | #' @export 210 | 211 | # assumes a column named observed 212 | forecast_combine = function( 213 | Data, # data.frame: data frame of forecasted values to combine, assumes `date` and `observed` columns, but `observed' is not necessary for all methods 214 | method = 'unform', # string or vector: the method to use; 'uniform', 'median', 'trimmed.mean', 'n.best', 'peLasso', 'lasso', 'ridge', 'elastic', 'RF', 'GBM', 'NN' 215 | n.max = NULL, # int: maximum number of forecasts to select 216 | rolling.window = NA, # int: size of rolling window to evaluate forecast error over, use entire period if NA 217 | trim = c(0.5, 0.95), # numeric: a two element vector with the winsorizing bounds for the trimmed mean method; c(min, max) 218 | burn.in = 1, # int: the number of periods to use in the first model estimation 219 | parallel.dates = NULL # int: the number of cores available for parallel estimation 220 | ){ 221 | 222 | # create parallel back end 223 | if(!is.null(parallel.dates)){ 224 | future::plan(strategy = 'multisession', workers = parallel.dates) 225 | }else{ 226 | future::plan(strategy = 'sequential') 227 | } 228 | 229 | # cast from long to wide 230 | forecasts = Data %>% 231 | dplyr::select(-se, -forecast.date) %>% 232 | tidyr::pivot_wider(names_from = model, values_from = forecast) 233 | 234 | # function variables 235 | model = observed = forecast = forecast.date = se = NA 236 | results.list = list() 237 | 238 | # uniform weights 239 | if('uniform' %in% method){ 240 | forecasts.raw = dplyr::select(forecasts, -dplyr::contains('date'), -dplyr::contains('observed')) 241 | combination = apply(forecasts.raw, MARGIN = 1, FUN = mean, na.rm = T) 242 | results.list[['unform']] = data.frame(date = forecasts$date, forecast = combination, model = 'uniform') 243 | } 244 | 245 | # median forecast 246 | if('median' %in% method){ 247 | forecasts.raw = dplyr::select(forecasts, -dplyr::contains('date'), -dplyr::contains('observed')) 248 | combination = apply(forecasts.raw, MARGIN = 1, FUN = median, na.rm = T) 249 | results.list[['median']] = data.frame(date = forecasts$date, forecast = combination, model = 'median') 250 | } 251 | 252 | # trimmed (winsorized) mean 253 | if('trimmed.mean' %in% method){ 254 | forecasts.raw = dplyr::select(forecasts, -dplyr::contains('date'), -dplyr::contains('observed')) 255 | combination = apply(forecasts.raw, MARGIN = 1, FUN = winsorize, bounds = trim, trim = FALSE) 256 | combination = apply(forecasts.raw, MARGIN = 1, FUN = mean, na.rm = T) 257 | results.list[['trimmed']] = data.frame(date = forecasts$date, forecast = combination, model = 'trimmed.mean') 258 | } 259 | 260 | # N-best method 261 | if('n.best' %in% method){ 262 | 263 | # warnings and errors 264 | if(!is.null(n.max)){ 265 | errorCondition('Set n.max before using the n-best combination method') 266 | } 267 | if(!is.null(window)){ 268 | warningCondition('The n-best method will default to using the entire forecast history') 269 | } 270 | 271 | # create n-best forecast combinations 272 | combination.nbest = NBest(dplyr::select(forecasts, -dplyr::contains('date')), n.max, window) 273 | combination.mean = apply(combination.nbest, MARGIN = 1, FUN = mean, na.rm = T) 274 | combination = data.frame(date = forecasts$date, combination.mean, combination.nbest) %>% 275 | dplyr::rename(N.best = combination.mean) 276 | combination = tidyr::pivot_longer(combination, 277 | cols = names(dplyr::select(combination, -date)), 278 | names_to = 'model', 279 | values_to = 'forecast') 280 | 281 | results.list[['nbest']] = combination 282 | } 283 | 284 | # peLasso 285 | if('peLasso' %in% method){ 286 | combination = 287 | forecasts$date[burn.in : nrow(forecasts)] %>% 288 | furrr::future_map( 289 | .f = function(forecast.date){ 290 | 291 | # set data 292 | information.set = dplyr::filter(forecasts, forecast.date > date) 293 | current.forecasts = dplyr::filter(forecasts, forecast.date == date) 294 | 295 | # calculate peLasso method 296 | # stage 1, shrink to 0, 297 | # y-f -> eLasso to select subset of regressors 298 | x = as.matrix(dplyr::select(information.set , -observed, -date)) 299 | y = information.set$observed - rowMeans(x) 300 | model = glmnet::cv.glmnet(x, y, alpha = 1, intercept = F, parallel = T) 301 | covariates = colnames(x)[which(as.vector(coef(model, s = 'lambda.min')) != 0)-1] 302 | 303 | # stage 2, shrink to 1/k, 304 | # y-f -> eRidge to shrink subset of regressors to uniform weights 305 | if(length(covariates) > 1){ 306 | model = glmnet::cv.glmnet(x[,covariates], y, alpha = 0, intercept = F) 307 | }else{ 308 | covariates = colnames(x) 309 | } 310 | 311 | # calculate forecast 312 | peLasso = predict(model, newx = as.matrix(current.forecasts[,covariates]), s = 'lambda.min') + 313 | rowMeans(dplyr::select(current.forecasts , -observed, -date)) 314 | results = data.frame(date = current.forecasts$date, peLasso, model = 'peLasso') 315 | colnames(results)[colnames(results) == 'X1'] = 'forecast' 316 | return(results) 317 | 318 | } 319 | ) %>% 320 | purrr::reduce(dplyr::bind_rows) 321 | 322 | results.list[['peLasso']] = combination 323 | } 324 | 325 | # ML algorithms via caret 326 | if(length(intersect(c('GBM','RF','NN','ols','lasso','ridge','elastic','pcr','pls'), method)) > 0){ 327 | 328 | # training parameter creation and warnings 329 | if(exists("forecast_combinations.control_panel")){ 330 | message('forecast_combinations.control_panel exists and will be used for ML forecast combination techniques in its present state.') 331 | }else{ 332 | 333 | covariates = length(unique(forecasts$model)) 334 | 335 | forecast_combinations.control_panel = instantiate.forecast_combinations.control_panel(covariates = covariates) 336 | message('forecast_combinations.control_panel was instantiated and default values will be used to train ML forecast combination techniques.') 337 | } 338 | 339 | combination = intersect(c('GBM','RF','NN','ols','lasso','ridge','elastic'), method) %>% 340 | purrr::map( 341 | .f = function(engine){ 342 | 343 | forecasts$date[burn.in : nrow(forecasts)] %>% 344 | furrr::future_map( 345 | .f = function(forecast.date){ 346 | 347 | # set data 348 | information.set = dplyr::filter(forecasts, forecast.date > date) 349 | current.forecasts = dplyr::filter(forecasts, forecast.date == date) 350 | 351 | # estimate model 352 | model = 353 | caret::train(observed~., 354 | data = dplyr::select(information.set, -date), 355 | method = forecast_combinations.control_panel$caret.engine[[engine]], 356 | trControl = forecast_combinations.control_panel$control, 357 | tuneGrid = forecast_combinations.control_panel$tuning.grids[[engine]], 358 | metric = forecast_combinations.control_panel$accuracy, 359 | na.action = na.omit) 360 | 361 | # calculate forecast 362 | point = predict(model, newdata = current.forecasts) 363 | 364 | # calculate standard error 365 | error = 366 | try( 367 | predict(model$finalModel, current.forecasts, interval = "confidence", level = 0.95) %>% 368 | data.frame(), 369 | silent = TRUE 370 | ) 371 | 372 | if('upr' %in% names(error) == TRUE){ 373 | error = (error$upr - error$fit) / qnorm(0.95) 374 | error = as.numeric(error) 375 | }else{ 376 | error = NA 377 | } 378 | 379 | # set dates 380 | results = data.frame(date = current.forecasts$date, 381 | model = engine, forecast = point, se = error) 382 | } 383 | ) %>% 384 | purrr::reduce(dplyr::bind_rows) 385 | } 386 | ) %>% 387 | purrr::reduce(dplyr::bind_rows) 388 | 389 | results.list[['ML']] = combination 390 | } 391 | 392 | # return results 393 | results = purrr::reduce(results.list, dplyr::bind_rows) %>% 394 | dplyr::mutate(model = paste0(model, '.combo')) 395 | rownames(results) = c(1:nrow(results)) 396 | return(results) 397 | } 398 | -------------------------------------------------------------------------------- /R/forecast_metrics.R: -------------------------------------------------------------------------------- 1 | 2 | # dependencies: 3 | # lmtest 4 | # sandwich 5 | # forecast 6 | 7 | #------------------------------------------- 8 | # loss functions 9 | #------------------------------------------- 10 | #' Calculate error via loss functions 11 | #' 12 | #' A function to calculate various error loss functions. Options include: 13 | #' MSE, RMSE, MAE, and MAPE. The default is MSE loss. 14 | #' 15 | #' @param forecast numeric: vector of forecasted values 16 | #' @param observed numeric: vector of observed values 17 | #' @param metric string: loss function 18 | #' 19 | #' @return numeric test result 20 | #' 21 | #' @export 22 | 23 | loss_function = function( 24 | forecast, # numeric: vector of forecasted values 25 | observed, # numeric: vector of observed values 26 | metric = 'MSE' # string: loss function 27 | ){ 28 | 29 | if(metric == 'MSE'){ 30 | error = mean((observed - forecast)^2, na.rm = T) 31 | }else if(metric == 'RMSE'){ 32 | error = sqrt(mean((observed - forecast)^2, na.rm = T)) 33 | }else if(metric == 'MAE'){ 34 | error = mean(abs(observed - forecast), na.rm = T) 35 | }else if(metric == 'MAPE'){ 36 | error = mean(abs((forecast - observed)/observed), na.rm = T) 37 | } 38 | 39 | return(error) 40 | } 41 | 42 | #------------------------------------------- 43 | # forecast accuracy 44 | #------------------------------------------- 45 | #' Calculate forecast accuracy 46 | #' 47 | #' A function to calculate various loss functions, including 48 | #' MSE, RMSE, MAE, and MAPE. 49 | #' 50 | #' @param Data data.frame: data frame of forecasts, model names, and dates 51 | #' 52 | #' @return data.frame of numeric error results 53 | #' 54 | #' @examples 55 | #' \donttest{ 56 | #' 57 | #' # simple time series 58 | #' A = c(1:100) + rnorm(100) 59 | #' date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100) 60 | #' Data = data.frame(date = date, A) 61 | #' 62 | #' # run forecast_univariate 63 | #' forecast.uni = 64 | #' forecast_univariate( 65 | #' Data = Data, 66 | #' forecast.dates = tail(Data$date,10), 67 | #' method = c('naive','auto.arima', 'ets'), 68 | #' horizon = 1, 69 | #' recursive = FALSE, 70 | #' freq = 'month') 71 | #' 72 | #' forecasts = 73 | #' dplyr::left_join( 74 | #' forecast.uni, 75 | #' data.frame(date, observed = A), 76 | #' by = 'date' 77 | #' ) 78 | #' 79 | #' # forecast accuracy 80 | #' forecast.accuracy = forecast_accuracy(forecasts) 81 | #' 82 | #' } 83 | #' 84 | #' @export 85 | 86 | forecast_accuracy = function( 87 | Data 88 | ){ 89 | 90 | if(!'observed' %in% names(Data)){ 91 | print(errorCondition('There must be a column named "obsererved" in Data.')) 92 | 93 | } 94 | if(!'date' %in% names(Data)){ 95 | print(errorCondition('There must be a column named "date" in Data.')) 96 | } 97 | 98 | # function variables 99 | model = observed = forecast = forecast.date = se = NA 100 | 101 | # set data 102 | information.set = 103 | dplyr::full_join( 104 | dplyr::select(Data, -observed), 105 | dplyr::select(Data, date, observed), 106 | by = 'date') 107 | 108 | # calculate loss functions 109 | information.set = information.set %>% 110 | dplyr::group_split(model) %>% 111 | purrr::map_df( 112 | .f = function(X){ 113 | 114 | Y = X %>% 115 | dplyr::select(observed, forecast, model) %>% 116 | na.omit() %>% 117 | dplyr::summarize( 118 | model = unique(model), 119 | MSE = mean((observed - forecast)^2, na.rm = T), 120 | RMSE = sqrt(mean((observed - forecast)^2, na.rm = T)), 121 | MAE = mean(abs(observed - forecast), na.rm = T), 122 | MAPE = mean(abs((forecast - observed)/observed), na.rm = T)) 123 | 124 | return(Y) 125 | } 126 | ) 127 | 128 | return(information.set) 129 | } 130 | 131 | 132 | #------------------------------------------- 133 | # forecast comparison 134 | #------------------------------------------- 135 | #' Compare forecast accuracy 136 | #' 137 | #' A function to compare forecasts. Options include: simple forecast error ratios, 138 | #' [Diebold-Mariano test](https://www.sas.upenn.edu/~fdiebold/papers/paper68/pa.dm.pdf), and [Clark and West test](https://www.nber.org/papers/t0326) for nested models 139 | #' 140 | #' @param Data data.frame: data frame of forecasts, model names, and dates 141 | #' @param baseline.forecast string: column name of baseline (null hypothesis) forecasts 142 | #' @param test string: which test to use; ER = error ratio, DM = Diebold-Mariano, CM = Clark and West 143 | #' @param loss string: error loss function to use if creating forecast error ratio 144 | #' @param horizon int: horizon of forecasts being compared in DM and CW tests 145 | #' 146 | #' @return numeric test result 147 | #' 148 | #' @examples 149 | #' \donttest{ 150 | #' 151 | #' # simple time series 152 | #' A = c(1:100) + rnorm(100) 153 | #' date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100) 154 | #' Data = data.frame(date = date, A) 155 | #' 156 | #' # run forecast_univariate 157 | #' forecast.uni = 158 | #' forecast_univariate( 159 | #' Data = Data, 160 | #' forecast.dates = tail(Data$date,10), 161 | #' method = c('naive','auto.arima', 'ets'), 162 | #' horizon = 1, 163 | #' recursive = FALSE, 164 | #' freq = 'month') 165 | #' 166 | #' forecasts = 167 | #' dplyr::left_join( 168 | #' forecast.uni, 169 | #' data.frame(date, observed = A), 170 | #' by = 'date' 171 | #' ) 172 | #' 173 | #' # run ER (MSE) 174 | #' er.ratio.mse = 175 | #' forecast_comparison( 176 | #' forecasts, 177 | #' baseline.forecast = 'naive', 178 | #' test = 'ER', 179 | #' loss = 'MSE') 180 | #' } 181 | #' 182 | #' @export 183 | 184 | forecast_comparison = function( 185 | Data, # data.frame: data frame of forecasts, model names, and dates 186 | baseline.forecast, # string: column name of baseline (null hypothesis) forecasts 187 | test = 'ER', # string: which test to use; ER = error ratio, DM = Diebold-Mariano, CM = Clark and West 188 | loss = 'MSE', # string: error loss function to use if creating forecast error ratio 189 | horizon = NULL # int: horizon of forecasts being compared in DM and CW tests 190 | ){ 191 | 192 | if(!'observed' %in% names(Data)){ 193 | print(errorCondition('There must be a column named "observed" in Data.')) 194 | 195 | } 196 | if(!'date' %in% names(Data)){ 197 | print(errorCondition('There must be a column named "date" in Data.')) 198 | } 199 | 200 | # function variables 201 | model = observed = forecast = forecast.date = se = NA 202 | 203 | # set data 204 | information.set = 205 | dplyr::full_join( 206 | dplyr::select(Data, -observed), 207 | Data %>% 208 | dplyr::filter(model == baseline.forecast) %>% 209 | dplyr::select(date, observed, baseline.forecast = forecast), 210 | by = 'date') 211 | 212 | 213 | # calculate loss functions 214 | if(test == 'ER'){ 215 | information.set = information.set %>% 216 | dplyr::group_split(model) %>% 217 | purrr::map_df( 218 | .f = function(X){ 219 | 220 | error = 221 | loss_function(X$forecast, X$observed, loss) / 222 | loss_function(X$baseline.forecast, X$observed, loss) 223 | 224 | return( 225 | data.frame( 226 | model = unique(X$model), 227 | error.ratio = error) 228 | ) 229 | } 230 | ) 231 | 232 | }else if(test == 'DM'){ 233 | information.set = information.set %>% 234 | dplyr::group_split(model) %>% 235 | purrr::map_df( 236 | .f = function(X){ 237 | 238 | if(sum(na.omit(X$baseline.forecast - X$forecast)) == 0){ 239 | return( 240 | data.frame( 241 | model = baseline.forecast, 242 | DM.statistic = NA) 243 | ) 244 | } 245 | 246 | DM.statistic = 247 | forecast::dm.test( 248 | e1 = na.omit(X$baseline.forecast - X$observed), 249 | e2 = na.omit(X$forecast - X$observed), 250 | alternative = 'less')$statistic[1] 251 | 252 | return( 253 | data.frame( 254 | model = unique(X$model), 255 | DM.statistic = DM.statistic) 256 | ) 257 | } 258 | ) 259 | 260 | }else if(test == 'CW'){ 261 | information.set = information.set %>% 262 | dplyr::group_split(model) %>% 263 | purrr::map_df( 264 | .f = function(X){ 265 | 266 | if(sum(na.omit(X$baseline.forecast - X$forecast)) == 0){ 267 | return( 268 | data.frame( 269 | model = baseline.forecast, 270 | CW.statistic = NA) 271 | ) 272 | } 273 | 274 | fCW12 = 275 | (X$observed - X$baseline.forecast)^2 - 276 | (X$observed - X$forecast)^2 - 277 | (X$baseline.forecast - X$forecast)^2 278 | 279 | lmCW = lm(as.numeric(fCW12)~1) 280 | 281 | lmCW.summ = summary(lmCW) 282 | 283 | lmCW.NW.summ = lmCW.summ 284 | 285 | lmCW.NW.summ$coefficients = 286 | unclass(lmtest::coeftest(lmCW, vcov. = sandwich::NeweyWest(lmCW, lag = horizon))) 287 | 288 | CW.statistic = lmCW.NW.summ$coefficients[3] 289 | 290 | return( 291 | data.frame( 292 | model = unique(X$model), 293 | Cw.statistic = CW.statistic) 294 | ) 295 | } 296 | ) 297 | } 298 | 299 | rownames(information.set) = c(1:nrow(information.set)) 300 | return(information.set) 301 | 302 | } 303 | -------------------------------------------------------------------------------- /R/forecast_multivariate.R: -------------------------------------------------------------------------------- 1 | #---------------------------------------------- 2 | # multivariate forecasting arguments - ML 3 | #---------------------------------------------- 4 | #' Create interface to control `forecast_multivariate` ML estimation 5 | #' 6 | #' A function to create the multivariate forecast methods 7 | #' arguments list for user manipulation. 8 | #' 9 | #' @param covariates int: the number of features that will go into the model 10 | #' @param rolling.window int: size of rolling window, NA if expanding window is used 11 | #' @param horizon int: number of periods into the future to forecast 12 | #' 13 | #' @return forecast_multivariate.ml.control_panel 14 | #' 15 | #' @export 16 | 17 | instantiate.forecast_multivariate.ml.control_panel = function(covariates = NULL, rolling.window = NULL, horizon = NULL){ 18 | 19 | # caret names 20 | caret.engine = list( 21 | ols = 'lm', 22 | ridge = 'glmnet', 23 | lasso = 'glmnet', 24 | elastic = 'glmnet', 25 | RF = 'rf', 26 | GBM = 'gbm', 27 | NN = 'avNNet', 28 | pls = 'pls', 29 | pcr = 'pcr' 30 | ) 31 | 32 | # tuning grids 33 | tuning.grids = list( 34 | 35 | ols = NULL, 36 | 37 | ridge = expand.grid( 38 | alpha = 0, 39 | lambda = 10^seq(-3, 3, length = 100)), 40 | 41 | lasso = expand.grid( 42 | alpha = 1, 43 | lambda = 10^seq(-3, 3, length = 100)), 44 | 45 | elastic = NULL, 46 | 47 | GBM = 48 | expand.grid( 49 | n.minobsinnode = c(1), 50 | shrinkage = c(.1,.01), 51 | n.trees = c(100, 250, 500), 52 | interaction.depth = c(1,2,5)), 53 | 54 | RF = 55 | expand.grid( 56 | mtry = c(1:4)), 57 | 58 | NN = 59 | expand.grid( 60 | size = seq(2,10,5), 61 | decay = c(.01,.001), 62 | bag = c(100, 250, 500)), 63 | 64 | pls = 65 | expand.grid( 66 | ncomp = c(1:5)), 67 | 68 | pcr = 69 | expand.grid( 70 | ncomp = c(1:5)) 71 | 72 | ) 73 | 74 | # tuning grids if # of features is available 75 | if(!is.null(covariates)){ 76 | tuning.grids[['RF']] = 77 | expand.grid( 78 | mtry = covariates/3) 79 | 80 | tuning.grids[['NN']] = 81 | expand.grid( 82 | size = c(covariates, 2*covariates, 3*covariates), 83 | decay = c(.01,.001), 84 | bag = c(20, 100)) 85 | 86 | } 87 | 88 | # hyper-parameter selection routine 89 | if(is.numeric(rolling.window)){ 90 | control = 91 | caret::trainControl( 92 | method = "timeslice", 93 | horizon = horizon, 94 | initialWindow = rolling.window, 95 | allowParallel = TRUE) 96 | }else if(!is.null(rolling.window)){ 97 | control = 98 | caret::trainControl( 99 | method = "timeslice", 100 | horizon = horizon, 101 | initialWindow = 5, 102 | allowParallel = TRUE) 103 | }else{ 104 | control = 105 | caret::trainControl( 106 | method = "cv", 107 | number = 5, 108 | allowParallel = TRUE) 109 | 110 | } 111 | 112 | # accuracy metric used in training 113 | accuracy = 'RMSE' 114 | 115 | # return training information 116 | return( 117 | list( 118 | caret.engine = caret.engine, 119 | tuning.grids = tuning.grids, 120 | control = control, 121 | accuracy = accuracy 122 | ) 123 | ) 124 | 125 | } 126 | 127 | #---------------------------------------------- 128 | # multivariate forecasting arguments - VAR 129 | #---------------------------------------------- 130 | #' Create interface to control `forecast_multivariate` VAR estimation 131 | #' 132 | #' A function to create the multivariate forecast methods 133 | #' arguments list for user manipulation. 134 | #' 135 | #' @return forecast_multivariate.var.control_panel 136 | #' 137 | #' @export 138 | 139 | instantiate.forecast_multivariate.var.control_panel = function(){ 140 | 141 | return( 142 | list( 143 | p = 1, 144 | lag.max = NULL, 145 | ic = 'AIC', 146 | type = 'none', 147 | season = NULL, 148 | exogen = NULL 149 | ) 150 | ) 151 | 152 | } 153 | 154 | #--------------------------------------------- 155 | # Multivariate Forecast 156 | #--------------------------------------------- 157 | #' Forecast with multivariate models 158 | #' 159 | #' A function to estimate multivariate forecasts out-of-sample. Methods available include: 160 | #' vector auto-regression, linear regression, lasso regression, ridge regression, elastic net, 161 | #' random forest, tree-based gradient boosting machine, and single-layer neural network. 162 | #' See package website for most up-to-date list of available models. 163 | #' 164 | #' @param Data data.frame: data frame of target variable, exogenous variables, and observed date (named 'date'); may alternatively be a `ts`, `xts`, or `zoo` object to forecast 165 | #' @param forecast.dates date: dates forecasts are created 166 | #' @param target string: column name in Data of variable to forecast 167 | #' @param method string: methods to use 168 | #' @param rolling.window int: size of rolling window, NA if expanding window is used 169 | #' @param freq string: time series frequency; day, week, month, quarter, year 170 | #' @param horizon int: number of periods into the future to forecast 171 | #' @param lag.variables string: vector of variables to lag each time step, if lag.n is not null then the default is all non-date variables 172 | #' @param lag.n int: number of lags to create 173 | #' @param outlier.clean boolean: if TRUE then clean outliers 174 | #' @param outlier.variables string: vector of variables to purge of outlier, default is all but 'date' column 175 | #' @param outlier.bounds double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile) 176 | #' @param outlier.trim boolean: if TRUE then replace outliers with NA instead of winsorizing bound 177 | #' @param outlier.cross_section boolean: if TRUE then remove outliers based on cross-section (row-wise) instead of historical data (column-wise) 178 | #' @param impute.missing boolean: if TRUE then impute missing values 179 | #' @param impute.method string: select which method to use from the imputeTS package; 'interpolation', 'kalman', 'locf', 'ma', 'mean', 'random', 'remove','replace', 'seadec', 'seasplit' 180 | #' @param impute.variables string: vector of variables to impute missing values, default is all numeric columns 181 | #' @param impute.verbose boolean: show start-up status of impute.missing.routine 182 | #' @param reduce.data boolean: if TRUE then reduce dimension 183 | #' @param reduce.variables string: vector of variables to impute missing values, default is all numeric columns 184 | #' @param reduce.ncomp int: number of factors to create 185 | #' @param reduce.standardize boolean: normalize variables (mean zero, variance one) before estimating factors 186 | #' @param parallel.dates int: the number of cores available for parallel estimation 187 | #' @param return.models boolean: if TRUE then return list of models estimated each forecast.date 188 | #' @param return.data boolean: if True then return list of information.set for each forecast.date 189 | #' 190 | #' @return data.frame with a row for each forecast by model and forecasted date 191 | #' 192 | #' @examples 193 | #' \donttest{ 194 | #' # simple time series 195 | #' A = c(1:100) + rnorm(100) 196 | #' B = c(1:100) + rnorm(100) 197 | #' C = c(1:100) + rnorm(100) 198 | #' date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100) 199 | #' Data = data.frame(date = date, A, B, C) 200 | #' 201 | #' # run forecast_univariate 202 | #' forecast.multi = 203 | #' forecast_multivariate( 204 | #' Data = Data, 205 | #' target = 'A', 206 | #' forecast.dates = tail(Data$date,5), 207 | #' method = c('ols','var'), 208 | #' horizon = 1, 209 | #' # information set 210 | #' rolling.window = NA, 211 | #' freq = 'month', 212 | #' # data prep 213 | #' lag.n = 4, 214 | #' outlier.clean = TRUE, 215 | #' impute.missing = TRUE) 216 | #' } 217 | #' 218 | #' 219 | #' @export 220 | 221 | forecast_multivariate = function( 222 | Data, # data.frame: data frame of target variable, exogenous variables, and observed date (named 'date'); may alternatively be a `ts`, `xts`, or `zoo` object to forecast 223 | forecast.dates, # date: dates forecasts are created 224 | target, # string: column name in `Data` of variable to forecast 225 | horizon, # int: number of periods into the future to forecast 226 | method, # string or vector: methods to use; 'var', 'ols', 'ridge', 'lasso', 'elastic', 'RF', 'GBM', 'NN' 227 | 228 | # information set 229 | rolling.window = NA, # int: size of rolling window, NA if expanding window is used 230 | freq, # string: time series frequency; day, week, month, quarter, year 231 | lag.variables = NULL, # string: vector of variables to lag each time step, if lag.n is not null then the default is all non-date variables 232 | lag.n = NULL, # int: number of lags to create 233 | 234 | # outlier cleaning 235 | outlier.clean = FALSE, # boolean: if TRUE then clean outliers 236 | outlier.variables = NULL, # string: vector of variables to standardize, default is all but 'date' column 237 | outlier.bounds = c(0.05, 0.95), # double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile) 238 | outlier.trim = FALSE, # boolean: if TRUE then replace outliers with NA instead of winsorizing bound 239 | outlier.cross_section = FALSE, # boolean: if TRUE then remove outliers based on cross-section (row-wise) instead of historical data (column-wise) 240 | 241 | # impute missing 242 | impute.missing = FALSE, # boolean: if TRUE then impute missing values 243 | impute.method = 'kalman', # string: select which method to use from the imputeTS package; 'interpolation', 'kalman', 'locf', 'ma', 'mean', 'random', 'remove','replace', 'seadec', 'seasplit' 244 | impute.variables = NULL, # string: vector of variables to impute missing values, default is all numeric columns 245 | impute.verbose = FALSE, # boolean: show start-up status of impute.missing.routine 246 | 247 | # dimension reduction 248 | reduce.data = FALSE, # boolean: if TRUE then reduce dimension 249 | reduce.variables = NULL, # string: vector of variables to impute missing values, default is all numeric columns 250 | reduce.ncomp = NULL, # int: number of factors to create 251 | reduce.standardize = TRUE, # boolean: normalize variables (mean zero, variance one) before estimating factors 252 | 253 | # parallel processing 254 | parallel.dates = NULL, # int: the number of cores available for parallel estimation 255 | 256 | # additional objects 257 | return.models = FALSE, # boolean: if TRUE then return list of models estimated each forecast.date 258 | return.data = FALSE # boolean: if True then return list of information.set for each forecast.date 259 | 260 | ){ 261 | 262 | # convert from ts, xts, or zoo object 263 | if(xts::is.xts(Data) | zoo::is.zoo(Data) | stats::is.ts(Data)){ 264 | Data = data.frame(date = zoo::index(Data), Data) 265 | } 266 | 267 | # training parameter creation and warnings 268 | if(exists("forecast_multivariate.ml.control_panel")){ 269 | 270 | message('forecast_multivariate.ml.control_panel exists and will be used for ML model estimation in its present state.') 271 | 272 | }else{ 273 | 274 | covariates = nrow(dplyr::select(Data, -target, -date)) 275 | if(!is.null(lag.n)){covariates = covariates + covariates*lag.n} 276 | 277 | forecast_multivariate.ml.control_panel = instantiate.forecast_multivariate.ml.control_panel(covariates = covariates, rolling.window = rolling.window, horizon = horizon) 278 | message('forecast_multivariate.ml.control_panel was instantiated and default values will be used for ML model estimation.') 279 | 280 | } 281 | 282 | # VAR parameters and warnings 283 | if(exists("forecast_multivariate.var.control_panel")){ 284 | message('forecast.combinations.var.training exists and will be used for VAR model estimation in its present state.') 285 | }else{ 286 | forecast_multivariate.var.control_panel = instantiate.forecast_multivariate.var.control_panel() 287 | message('forecast_multivariate.var.control_panel was instantiated and default values will be used for VAR model estimation.') 288 | } 289 | 290 | # create parallel back end 291 | if(!is.null(parallel.dates)){ 292 | future::plan(strategy = 'multisession', workers = parallel.dates) 293 | }else{ 294 | future::plan(strategy = 'sequential') 295 | } 296 | 297 | # results list 298 | results.list = list() 299 | 300 | # Create forecasts 301 | forecasts = forecast.dates %>% 302 | furrr::future_map( 303 | .f = function(forecast.date){ 304 | 305 | # subset data 306 | information.set = 307 | data_subset( 308 | Data = Data, 309 | forecast.date = forecast.date, 310 | rolling.window = rolling.window, 311 | freq = freq 312 | ) 313 | 314 | # clean outliers 315 | if(outlier.clean){ 316 | information.set = 317 | data_outliers( 318 | Data = information.set, 319 | variables = outlier.variables, 320 | w.bounds = outlier.bounds, 321 | trim = outlier.trim, 322 | cross_section = outlier.cross_section 323 | ) 324 | } 325 | 326 | # impute missing values 327 | if(impute.missing){ 328 | information.set = 329 | data_impute( 330 | Data = information.set, 331 | variables = impute.variables, 332 | method = impute.method, 333 | verbose = impute.verbose 334 | ) 335 | } 336 | 337 | # dimension reduction 338 | if(reduce.data){ 339 | information.set.reduce = 340 | data_reduction( 341 | Data = information.set, 342 | variables = reduce.variables, 343 | ncomp = reduce.ncomp, 344 | standardize = reduce.standardize 345 | ) 346 | 347 | information.set = 348 | dplyr::full_join( 349 | dplyr::select(information.set, target, date), 350 | information.set.reduce, 351 | by = 'date') 352 | } 353 | 354 | # create variable lags 355 | if(!is.null(lag.n)){ 356 | information.set = 357 | n.lag( 358 | Data = information.set, 359 | lags = lag.n, 360 | variables = lag.variables) 361 | } 362 | 363 | results = method %>% 364 | purrr::map( 365 | .f = function(engine){ 366 | 367 | # set current data 368 | current.set = dplyr::filter(information.set, forecast.date == date) 369 | 370 | # estimate ML model 371 | if(engine != 'var'){ 372 | 373 | # check for missing covariates in current data 374 | if(is.na(sum(dplyr::select(current.set, -date)))){ 375 | print(warningCondition(paste0('Missing covariate on: ', forecast.date))) 376 | results = data.frame(date = current.set$date, ml = NA) 377 | colnames(results)[colnames(results) == 'ml'] = engine 378 | return(results) 379 | } 380 | 381 | # set target variable 382 | names(information.set)[names(information.set) == target] = 'target' 383 | 384 | # set horizon 385 | information.set = 386 | dplyr::mutate(information.set, target = dplyr::lead(target, horizon)) %>% 387 | na.omit() 388 | 389 | # estimate model 390 | model = 391 | caret::train(target~., 392 | data = dplyr::select(information.set, -date), 393 | method = forecast_multivariate.ml.control_panel$caret.engine[[engine]], 394 | trControl = forecast_multivariate.ml.control_panel$control, 395 | tuneGrid = forecast_multivariate.ml.control_panel$tuning.grids[[engine]], 396 | metric = forecast_multivariate.ml.control_panel$accuracy) 397 | 398 | # calculate forecast 399 | point = try(predict(model, newdata = current.set)) 400 | 401 | if(!is.numeric(point)){ 402 | point = NA 403 | } 404 | 405 | # calculate standard error 406 | error = 407 | try( 408 | predict(model$finalModel, current.set, interval = "confidence", level = 0.95) %>% 409 | data.frame(), 410 | silent = TRUE 411 | ) 412 | 413 | error = try((error$upr - error$fit) / qnorm(0.95), 414 | silent = TRUE) 415 | 416 | if(is.numeric(error) != TRUE | length(error) != 1){error = NA} 417 | 418 | # estimate VAR 419 | }else{ 420 | 421 | model = 422 | vars::VAR( 423 | y = na.omit(dplyr::select(information.set, -date)), 424 | p = forecast_multivariate.var.control_panel$p, 425 | lag.max = forecast_multivariate.var.control_panel$max.lag, 426 | ic = forecast_multivariate.var.control_panel$ic, 427 | season = forecast_multivariate.var.control_panel$season, 428 | type = forecast_multivariate.var.control_panel$type 429 | ) 430 | 431 | # calculate forecast and standard error 432 | ml = predict(model, n.ahead = horizon) 433 | ml = ml$fcst[target] %>% data.frame() 434 | point = ml[horizon, 1] 435 | error = (ml[horizon, 3] - ml[horizon, 1]) / qnorm(0.95) 436 | 437 | } 438 | 439 | # set date 440 | date = forecast_date( 441 | forecast.date, 442 | horizon, 443 | freq) 444 | 445 | # set dates 446 | predictions = data.frame( 447 | date = date, 448 | forecast.date = forecast.date, 449 | model = engine, forecast = point, se = error) 450 | 451 | 452 | # return results 453 | return( 454 | list( 455 | predictions = predictions, 456 | model = model 457 | ) 458 | ) 459 | 460 | } 461 | ) 462 | 463 | predictions = 464 | purrr::map(results, .f = function(X){return(X$predictions)}) %>% 465 | purrr::reduce(dplyr::bind_rows) 466 | 467 | rownames(predictions) = c(1:nrow(predictions)) 468 | 469 | models = 470 | purrr::map(results, .f = function(X){return(X$model)}) 471 | 472 | # store objects for return 473 | results = 474 | list( 475 | predictions = predictions, 476 | information.set = information.set, 477 | models = models 478 | ) 479 | 480 | # return results 481 | return(results) 482 | 483 | } 484 | ) 485 | 486 | # prepare forecasts 487 | predictions = 488 | purrr::map(forecasts, .f = function(X){return(X$predictions)}) %>% 489 | purrr::reduce(dplyr::bind_rows) 490 | 491 | # add model and information set lists to return object 492 | if(return.data == TRUE | return.models == TRUE){ 493 | information = list(forecasts = predictions) 494 | }else{ 495 | information = predictions 496 | } 497 | 498 | # prepare models 499 | if(return.models == TRUE){ 500 | models = purrr::map(forecasts, .f = function(X){return(X$models)}) 501 | names(models) = forecast.dates 502 | information[['models']] = models 503 | } 504 | 505 | # prepare information set 506 | if(return.data == TRUE){ 507 | information.set = purrr::map(forecasts, .f = function(X){return(X$information.set)}) 508 | names(information.set) = forecast.dates 509 | information[['information.set']] = information.set 510 | } 511 | 512 | # return results 513 | return(information) 514 | } 515 | -------------------------------------------------------------------------------- /R/forecast_univariate.R: -------------------------------------------------------------------------------- 1 | 2 | # dependencies: 3 | # magrittr 4 | # lubridate 5 | # dplry 6 | # purrr 7 | # forecast 8 | 9 | 10 | #---------------------------------------------- 11 | # univariate forecasting arguments 12 | #---------------------------------------------- 13 | #' Create interface to control `forecast_univariate` model estimation 14 | #' 15 | #' A function to create the univariate forecast method arguments list 16 | #' for user manipulation. 17 | #' 18 | #' @return forecast_univariate.control_panel 19 | #' 20 | #' @export 21 | 22 | instantiate.forecast_univariate.control_panel = function(){ 23 | 24 | # methods 25 | methods = list( 26 | auto.arima = forecast::auto.arima, 27 | Arima = forecast::Arima, 28 | dshw = forecast::dshw, 29 | holt = forecast::holt, 30 | hw = forecast::hw, 31 | ses = forecast::ses, 32 | ets = forecast::ets, 33 | stlm = forecast::stlm, 34 | bats = forecast::bats, 35 | tbats = forecast::tbats, 36 | thetaf = forecast::thetaf, 37 | nnetar = forecast::nnetar, 38 | meanf = forecast::meanf, 39 | naive = forecast::naive, 40 | snaive = forecast::snaive, 41 | rwf = forecast::rwf, 42 | tslm = forecast::tslm, 43 | splinef = forecast::splinef 44 | ) 45 | 46 | # arguments 47 | arguments = list( 48 | auto.arima = NULL, 49 | Arima = NULL, 50 | dshw = NULL, 51 | holt = NULL, 52 | hw = NULL, 53 | ses = NULL, 54 | ets = NULL, 55 | stlm = NULL, 56 | bats = NULL, 57 | tbats = NULL, 58 | thetaf = NULL, 59 | nnetar = NULL, 60 | meanf = NULL, 61 | naive = NULL, 62 | snaive = NULL, 63 | rwf = NULL, 64 | splinef = NULL, 65 | tslm = NULL 66 | ) 67 | 68 | return( 69 | list( 70 | method = methods, 71 | arguments = arguments 72 | ) 73 | ) 74 | 75 | } 76 | 77 | #---------------------------------------------- 78 | # univariate time series forecasting function 79 | #---------------------------------------------- 80 | #' Forecast with univariate models 81 | #' 82 | #' A function to estimate univariate forecasts out-of-sample. Methods available include all forecast 83 | #' methods from the `forecast` package. See package website for most up-to-date list of available models. 84 | #' 85 | #' @param Data data.frame: data frame of variable to forecast and a date column; may alternatively be a `ts`, `xts`, or `zoo` object to forecast 86 | #' @param forecast.dates date: dates forecasts are created 87 | #' @param methods string: models to estimate forecasts 88 | #' @param horizon int: number of periods to forecast 89 | #' @param rolling.window int: size of rolling window, NA if expanding window is used 90 | #' @param freq string: time series frequency; day, week, month, quarter, year 91 | #' @param recursive boolean: use sequential one-step-ahead forecast if TRUE, use direct projections if FALSE 92 | #' @param outlier.clean boolean: if TRUE then clean outliers 93 | #' @param outlier.variables string: vector of variables to purge of outliers, default is all but 'date' column 94 | #' @param outlier.bounds double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile) 95 | #' @param outlier.trim boolean: if TRUE then replace outliers with NA instead of winsorizing bound 96 | #' @param outlier.cross_section boolean: if TRUE then remove outliers based on cross-section (row-wise) instead of historical data (column-wise) 97 | #' @param impute.missing boolean: if TRUE then impute missing values 98 | #' @param impute.method string: select which method to use from the imputeTS package; 'interpolation', 'kalman', 'locf', 'ma', 'mean', 'random', 'remove','replace', 'seadec', 'seasplit' 99 | #' @param impute.variables string: vector of variables to impute missing values, default is all numeric columns 100 | #' @param impute.verbose boolean: show start-up status of impute.missing.routine 101 | #' @param parallel.dates int: the number of cores available for parallel estimation 102 | #' @param return.models boolean: if TRUE then return list of models estimated each forecast.date 103 | #' @param return.data boolean: if True then return list of information.set for each forecast.date 104 | #' 105 | #' @return data.frame with a row for each forecast by model and forecasted date 106 | #' 107 | #' @examples 108 | #' \donttest{ 109 | #' # simple time series 110 | #' A = c(1:100) + rnorm(100) 111 | #' date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100) 112 | #' Data = data.frame(date = date, A) 113 | #' 114 | #' # estiamte univariate forecasts 115 | #' forecast.uni = 116 | #' forecast_univariate( 117 | #' Data = Data, 118 | #' forecast.dates = tail(Data$date,5), 119 | #' method = c('naive','auto.arima', 'ets'), 120 | #' horizon = 1, 121 | #' recursive = FALSE, 122 | #' # information set 123 | #' rolling.window = NA, 124 | #' freq = 'month', 125 | #' # data prep 126 | #' outlier.clean = TRUE, 127 | #' impute.missing = TRUE) 128 | #' } 129 | #' 130 | #' @export 131 | 132 | forecast_univariate = function( 133 | Data, # data.frame: data frame of variable to forecast and a date column; may alternatively be a `ts`, `xts`, or `zoo` object to forecast 134 | forecast.dates, # date: dates forecasts are created 135 | methods, # string or vector: models to estimate forecasts with; currently supports all and only functions from the `forecast` package 136 | horizon, # int: number of periods to forecast 137 | recursive = TRUE, # boolean: use sequential one-step-ahead forecast if TRUE, use direct projections if FALSE 138 | 139 | # information set 140 | rolling.window = NA, # int: size of rolling window, NA if expanding window is used 141 | freq, # string: time series frequency; day, week, month, quarter, year 142 | 143 | # outlier cleaning 144 | outlier.clean = FALSE, # boolean: if TRUE then clean outliers 145 | outlier.variables = NULL, # string: vector of variables to standardize, default is all but 'date' column 146 | outlier.bounds = c(0.05, 0.95), # double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile) 147 | outlier.trim = FALSE, # boolean: if TRUE then replace outliers with NA instead of winsorizing bound 148 | outlier.cross_section = FALSE, # boolean: if TRUE then remove outliers based on cross-section (row-wise) instead of historical data (column-wise) 149 | 150 | # impute missing 151 | impute.missing = FALSE, # boolean: if TRUE then impute missing values 152 | impute.method = 'kalman', # string: select which method to use from the imputeTS package; 'interpolation', 'kalman', 'locf', 'ma', 'mean', 'random', 'remove','replace', 'seadec', 'seasplit' 153 | impute.variables = NULL, # string: vector of variables to impute missing values, default is all numeric columns 154 | impute.verbose = FALSE, # boolean: show start-up status of impute.missing.routine 155 | 156 | # parallel processing 157 | parallel.dates = NULL, # int: the number of cores available for parallel estimation 158 | 159 | # additional objects 160 | return.models = FALSE, # boolean: if TRUE then return list of models estimated each forecast.date 161 | return.data = FALSE # boolean: if True then return list of information.set for each forecast.date 162 | 163 | ){ 164 | 165 | # convert from ts, xts, or zoo object 166 | if(xts::is.xts(Data) | zoo::is.zoo(Data) | stats::is.ts(Data)){ 167 | Data = data.frame(date = zoo::index(Data), Data) 168 | } 169 | 170 | # training parameter creation and warnings 171 | if(exists("forecast_univariate.control_panel")){ 172 | message('forecast_univariate.control_panel exists and will be used for model estimation in its present state.') 173 | }else{ 174 | forecast_univariate.control_panel = instantiate.forecast_univariate.control_panel() 175 | message('forecast_univariate.control_panel was instantiated and default values will be used for model estimation.') 176 | } 177 | 178 | # create parallel back end 179 | if(!is.null(parallel.dates)){ 180 | future::plan(strategy = 'multisession', workers = parallel.dates) 181 | }else{ 182 | future::plan(strategy = 'sequential') 183 | } 184 | 185 | # create lists to store information 186 | list.models = list(); i = 1 187 | list.data = list(); j = 1 188 | 189 | # forecast routine 190 | forecasts = forecast.dates %>% 191 | furrr::future_map( 192 | .f = function(forecast.date){ 193 | 194 | #--------------------------- 195 | # Create information set 196 | #--------------------------- 197 | 198 | # subset data 199 | information.set = 200 | data_subset( 201 | Data = Data, 202 | forecast.date = forecast.date, 203 | rolling.window = rolling.window, 204 | freq = freq 205 | ) 206 | 207 | # clean outliers 208 | if(outlier.clean){ 209 | information.set = 210 | data_outliers( 211 | Data = information.set, 212 | variables = outlier.variables, 213 | w.bounds = outlier.bounds, 214 | trim = outlier.trim, 215 | cross_section = outlier.cross_section 216 | ) 217 | } 218 | 219 | # impute missing values 220 | if(impute.missing){ 221 | information.set = 222 | data_impute( 223 | Data = information.set, 224 | variables = impute.variables, 225 | method = impute.method, 226 | verbose = impute.verbose 227 | ) 228 | } 229 | 230 | # set ts object 231 | information.set = information.set %>% 232 | dplyr::select(-date) %>% 233 | as.ts() 234 | 235 | #--------------------------- 236 | # Create forecasts 237 | #--------------------------- 238 | 239 | results = 240 | methods %>% purrr::map( 241 | .f = function(engine){ 242 | 243 | # make predictions 244 | # 1. using direct projections 245 | if(recursive == FALSE){ 246 | 247 | # set data 248 | forecast_univariate.control_panel$arguments[[engine]]$y = information.set 249 | 250 | # estimate model 251 | model = do.call(what = forecast_univariate.control_panel$method[[engine]], 252 | args = forecast_univariate.control_panel$arguments[[engine]]) 253 | 254 | # create forecasts 255 | predictions = forecast::forecast(model, h = horizon) 256 | 257 | # create standard errors 258 | calc.error = try(predictions$lower[1]) 259 | 260 | if(is.numeric(calc.error) == TRUE){ 261 | error = (predictions$upper[,1] - predictions$lower[,1]) / 262 | (2 * qnorm(.5 + predictions$level[1] / 200)) 263 | error = as.numeric(error) 264 | }else{ 265 | se = NA 266 | } 267 | 268 | predictions = data.frame(model = engine, forecast = predictions$mean, se = error) 269 | 270 | # 2. using recursive forecasts 271 | }else{ 272 | 273 | predictions = list() 274 | forecast_univariate.control_panel$arguments[[engine]]$y = information.set 275 | 276 | for(i in 1:horizon){ 277 | 278 | # estimate model 279 | model = do.call(what = forecast_univariate.control_panel$method[[engine]], 280 | args = forecast_univariate.control_panel$arguments[[engine]]) 281 | 282 | # create forecast 283 | prediction = forecast::forecast(model, h = 1) 284 | 285 | # create standard errors 286 | calc.error = try(prediction$lower[1]) 287 | 288 | if(is.numeric(calc.error) == TRUE){ 289 | error = (prediction$upper[,1] - prediction$lower[,1]) / 290 | (2 * qnorm(.5 + prediction$level[1] / 200)) 291 | error = as.numeric(error) 292 | }else{ 293 | error = NA 294 | } 295 | 296 | predictions[[i]] = data.frame(model = engine, forecast = prediction$mean, se = error) 297 | 298 | # update information set 299 | information.set = rbind(information.set, prediction$mean[1]) %>% as.ts() 300 | forecast_univariate.control_panel$arguments[[engine]]$y = information.set 301 | 302 | } 303 | 304 | # collapse results 305 | predictions = purrr::reduce(predictions, dplyr::bind_rows) %>% data.frame() 306 | 307 | } 308 | 309 | # add forecast dates 310 | predictions$forecast.date = forecast.date 311 | predictions$date = seq.Date(from = forecast.date, by = freq, length.out = horizon+1)[2:(horizon+1)] 312 | 313 | # return results 314 | return( 315 | list( 316 | predictions = predictions, 317 | model = model 318 | ) 319 | ) 320 | } 321 | ) 322 | 323 | predictions = 324 | purrr::map(results, .f = function(X){return(X$predictions)}) %>% 325 | purrr::reduce(dplyr::bind_rows) 326 | 327 | models = 328 | purrr::map(results, .f = function(X){return(X$model)}) 329 | 330 | # store objects for return 331 | results = 332 | list( 333 | predictions = predictions, 334 | information.set = information.set, 335 | models = models 336 | ) 337 | 338 | # return results 339 | return(results) 340 | 341 | } 342 | ) 343 | 344 | # prepare forecasts 345 | predictions = 346 | purrr::map(forecasts, .f = function(X){return(X$predictions)}) %>% 347 | purrr::reduce(dplyr::bind_rows) 348 | 349 | # add model and information set lists to return object 350 | if(return.data == TRUE | return.models == TRUE){ 351 | information = list(forecasts = predictions) 352 | }else{ 353 | information = predictions 354 | } 355 | 356 | # prepare models 357 | if(return.models == TRUE){ 358 | models = purrr::map(forecasts, .f = function(X){return(X$models)}) 359 | names(models) = forecast.dates 360 | information[['models']] = models 361 | } 362 | 363 | # prepare information set 364 | if(return.data == TRUE){ 365 | information.set = purrr::map(forecasts, .f = function(X){return(X$information.set)}) 366 | names(information.set) = forecast.dates 367 | information[['information.set']] = information.set 368 | } 369 | 370 | # return results 371 | return(information) 372 | } 373 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Out-of-sample time series forecasting 2 | 3 | 4 | [![License: GPL v3](https://img.shields.io/badge/License-GPL%20v3-blue.svg)](http://www.gnu.org/licenses/gpl-3.0) 5 | [![CRAN status](https://www.r-pkg.org/badges/version/OOS)](https://CRAN.R-project.org/package=OOS) 6 | [![Lifecycle: stable](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html) 7 | [![codecov](https://codecov.io/gh/tylerJPike/OOS/branch/main/graph/badge.svg?token=AQ4PFWU3KS)](https://codecov.io/gh/tylerJPike/OOS) 8 | [![Build Status](https://travis-ci.org/tylerJPike/OOS.svg?branch=main)](https://travis-ci.org/tylerJPike/OOS) 9 | 10 | 11 | Out-of-Sample time series forecasting is a common, important, and subtle task. The OOS package introduces a comprehensive and cohesive API for the out-of-sample forecasting workflow: data preparation, forecasting - including both traditional econometric time series models and modern machine learning techniques - forecast combination, model and error analysis, and forecast visualization. 12 | 13 | The key difference between OOS and the other time series forecasting packages is that it operates out-of-sample by construction. That is, it re-cleans data and re-trains models each forecast.date and is careful not to introduce look-ahead bias into its information set via data cleaning or forecasts via model training. Other packages tend to fit the model once, leaving the user to construct the out-of-sample data cleaning and forecast exercise on their own. 14 | 15 | See the OOS package [website](https://tylerjpike.github.io/OOS/) for examples and documentation. 16 | 17 | --- 18 | ## Workflow and available Tools 19 | 20 | ### 1. Prepare Data 21 | 22 | | Clean Outliers | Impute Missing Observations (via [imputeTS](https://github.com/SteffenMoritz/imputeTS)) | Dimension Reduction | 23 | |----------------------|------------------------|-----------------------| 24 | | Winsorize | Linear Interpolation | Principal Components | 25 | | Trim | Kalman Filter | | 26 | | | Fill-Forward | | 27 | | | Average | | 28 | | | Moving Average | | 29 | | | Seasonal Decomposition | | 30 | 31 | 32 | ### 2. Forecast 33 | 34 | | Univariate Forecasts (via [forecast](https://github.com/robjhyndman/forecast)) | Multivariate Forecasts (via [caret](https://github.com/topepo/caret)) | Forecast Combinations | 35 | |----------------------|------------------------|-----------------------| 36 | | Random Walk | Vector Autoregression | Mean| 37 | | ARIMA | Linear Regression | Median | 38 | | ETS | LASSO Regression | Trimmed (Winsorized) Mean | 39 | | Spline | Ridge Regression | N-Best | 40 | | Theta Method | Elastic Net | Linear Regression | 41 | | TBATS | Principal Component Regression | LASSO Regression | 42 | | STL | Partial Least Squares Regression | Ridge Regression | 43 | | AR Perceptron | Random Forest | Partial Egalitarian LASSO | 44 | | | Tree-Based Gradient Boosting Machine | Principal Component Regression | 45 | | | Single Layered Neural Network | Partial Least Squares Regression | 46 | | | | Random Forest | 47 | | | | Tree-Based Gradient Boosting Machine | 48 | | | | Single Layered Neural Network | 49 | 50 | 51 | ### 3. Analyze 52 | 53 | | Accuracy | Compare | Visualize | 54 | |----------------------|------------------------|-----------------------| 55 | | Mean Square Error (MSE) | Forecast Error Ratios | Forecasts | 56 | | Root Mean Square Error (RMSE) | Diebold-Mariano Test (for unnested models) | Errors | 57 | | Mean Absolute Error (MAE) | Clark and West Test (for nested models) | | 58 | | Mean Absolute Percentage Error (MAPE) | | | 59 | 60 | --- 61 | 62 | ## Model estimation flexibility and accessibility 63 | 64 | Users may edit any model training routine through accessing a list of function arguments. For machine learning techniques, this entails editing [caret](https://github.com/topepo/caret) arguments including: tuning grid, control grid, method, and accuracy metric. For univariate time series forecasting, this entails passing arguments to [forecast](https://github.com/robjhyndman/forecast) package model functions. For imputing missing variables, this entails passing arguments to [imputeTS](https://github.com/SteffenMoritz/imputeTS) package functions. 65 | 66 | A brief example using an `Arima` model to forecast univariate time series: 67 | 68 | # 1. create the central list of univariate model training arguments, univariate.forecast.training 69 | forecast_univariate.control_panel = instantiate.forecast_univariate.control_panel() 70 | 71 | # 2. select an item to edit, for example the Arima order to create an ARMA(1,1) 72 | # view default model arguments (there are none) 73 | forecast_univariate.control_panel$arguments[['Arima']] 74 | # add our own function arguments 75 | forecast_univariate.control_panel$arguments[['Arima']]$order = c(1,0,1) 76 | 77 | A brief example using the `Random Forest` to combine forecasts: 78 | 79 | # 1. create the central list of ML training arguments 80 | forecast_combinations.control_panel = instantiate.forecast_combinations.control_panel() 81 | 82 | # 2. select an item to edit, for example the random forest tuning grid 83 | # view default tuning grid 84 | forecast_combinations.control_panel$tuning.grids[['RF']] 85 | # edit tuning grid 86 | forecast_combinations.control_panel$tuning.grids[['RF']] = expand.grid(mtry = c(1:6)) 87 | --- 88 | ## Basic workflow 89 | #---------------------------------------- 90 | ### Forecasting Example 91 | #---------------------------------------- 92 | # pull and prepare data from FRED 93 | quantmod::getSymbols.FRED( 94 | c('UNRATE','INDPRO','GS10'), 95 | env = globalenv()) 96 | Data = cbind(UNRATE, INDPRO, GS10) 97 | Data = data.frame(Data, date = zoo::index(Data)) %>% 98 | dplyr::filter(lubridate::year(date) >= 1990) 99 | 100 | # run univariate forecasts 101 | forecast.uni = 102 | forecast_univariate( 103 | Data = dplyr::select(Data, date, UNRATE), 104 | forecast.dates = tail(Data$date,15), 105 | method = c('naive','auto.arima', 'ets'), 106 | horizon = 1, 107 | recursive = FALSE, 108 | 109 | # information set 110 | rolling.window = NA, 111 | freq = 'month', 112 | 113 | # outlier cleaning 114 | outlier.clean = FALSE, 115 | outlier.variables = NULL, 116 | outlier.bounds = c(0.05, 0.95), 117 | outlier.trim = FALSE, 118 | outlier.cross_section = FALSE, 119 | 120 | # impute missing 121 | impute.missing = FALSE, 122 | impute.method = 'kalman', 123 | impute.variables = NULL, 124 | impute.verbose = FALSE) 125 | 126 | # create multivariate forecasts 127 | forecast.multi = 128 | forecast_multivariate( 129 | Data = Data, 130 | forecast.date = tail(Data$date,15), 131 | target = 'UNRATE', 132 | horizon = 1, 133 | method = c('ols','lasso','ridge','elastic','GBM'), 134 | 135 | # information set 136 | rolling.window = NA, 137 | freq = 'month', 138 | 139 | # outlier cleaning 140 | outlier.clean = FALSE, 141 | outlier.variables = NULL, 142 | outlier.bounds = c(0.05, 0.95), 143 | outlier.trim = FALSE, 144 | outlier.cross_section = FALSE, 145 | 146 | # impute missing 147 | impute.missing = FALSE, 148 | impute.method = 'kalman', 149 | impute.variables = NULL, 150 | impute.verbose = FALSE, 151 | 152 | # dimension reduction 153 | reduce.data = FALSE, 154 | reduce.variables = NULL, 155 | reduce.ncomp = NULL, 156 | reduce.standardize = TRUE) 157 | 158 | # combine forecasts and add in observed values 159 | forecasts = 160 | dplyr::bind_rows( 161 | forecast.uni, 162 | forecast.multi) %>% 163 | dplyr::left_join( 164 | dplyr::select(Data, date, observed = UNRATE)) 165 | 166 | # forecast combinations 167 | forecast.combo = 168 | forecast_combine( 169 | forecasts, 170 | method = c('uniform','median','trimmed.mean', 171 | 'n.best','lasso','peLasso','RF'), 172 | burn.in = 5, 173 | n.max = 2) 174 | 175 | # merge forecast combinations back into forecasts 176 | forecasts = 177 | forecasts %>% 178 | dplyr::bind_rows(forecast.combo) 179 | 180 | # calculate forecast errors 181 | forecast.error = forecast_accuracy(forecasts) 182 | 183 | # view forecast errors from least to greatest 184 | # (best forecast to worst forecast method) 185 | forecast.error %>% 186 | dplyr::mutate_at(vars(-model), round, 3) %>% 187 | dplyr::arrange(MSE) 188 | 189 | # compare forecasts to the baseline (a random walk) 190 | forecast_comparison( 191 | forecasts, 192 | baseline.forecast = 'naive', 193 | test = 'ER', 194 | loss = 'MSE') %>% 195 | arrange(error.ratio) 196 | 197 | # chart forecasts 198 | chart = 199 | chart_forecast( 200 | forecasts, 201 | Title = 'US Unemployment Rate', 202 | Ylab = 'Index', 203 | Freq = 'Monthly') 204 | 205 | chart 206 | 207 | --- 208 | ## Contact 209 | If you should have questions, concerns, or wish to collaborate, please contact [Tyler J. Pike](https://tylerjpike.github.io/) 210 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | target: auto 8 | threshold: 1% 9 | informational: true 10 | patch: 11 | default: 12 | target: auto 13 | threshold: 1% 14 | informational: true 15 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## Resubmission 2 | This is a resubmission. In this version I have: 3 | * Changed \dontrun to \donttest in function documentation examples 4 | * Added function documentation examples to all primary user-facing functions 5 | * Added \value to pipe.Rd 6 | 7 | ## Test environments 8 | * local Windows install, R 4.0.3 9 | * win-builder (devel and release) 10 | * Ubuntu 16.04.6 (on travis-ci), R 4.0.2 11 | * R-hub Ubuntu Linux 20.04.1 LTS, R-release 12 | * R-hub Fedora Linux, R-devel 13 | 14 | ## R CMD check results 15 | There were no ERRORs or WARNINGs. 16 | 17 | There is one NOTE since this is a new package submission. 18 | 19 | ## Downstream dependencies 20 | There are currently no downstream dependencies for this package. -------------------------------------------------------------------------------- /man/NBest.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/forecast_combinations.R 3 | \name{NBest} 4 | \alias{NBest} 5 | \title{Select N-best forecasts} 6 | \usage{ 7 | NBest(forecasts, n.max, window = NA) 8 | } 9 | \arguments{ 10 | \item{forecasts}{data.frame: a data frame of forecasts to combine, assumes one column named "observed"} 11 | 12 | \item{n.max}{int: maximum number of forecasts to select} 13 | 14 | \item{window}{int: size of rolling window to evaluate forecast error over, use entire period if NA} 15 | } 16 | \value{ 17 | data.frame with n columns of the historically best forecasts 18 | } 19 | \description{ 20 | A function to subset the n-best forecasts; 21 | assumes column named observed. 22 | } 23 | -------------------------------------------------------------------------------- /man/chart_forecast.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/forecast_chart.R 3 | \name{chart_forecast} 4 | \alias{chart_forecast} 5 | \title{Chart forecasts} 6 | \usage{ 7 | chart_forecast(Data, Title, Ylab, Freq, zeroline = FALSE) 8 | } 9 | \arguments{ 10 | \item{Data}{data.frame: oos.forecast object} 11 | 12 | \item{Title}{string: chart title} 13 | 14 | \item{Ylab}{string: y-axis label} 15 | 16 | \item{Freq}{string: frequency (acts as sub-title)} 17 | 18 | \item{zeroline}{boolean: if TRUE then add a horizontal line at zero} 19 | } 20 | \value{ 21 | ggplot2 chart 22 | } 23 | \description{ 24 | Chart forecasts 25 | } 26 | \examples{ 27 | \donttest{ 28 | 29 | # simple time series 30 | A = c(1:100) + rnorm(100) 31 | date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100) 32 | Data = data.frame(date = date, A) 33 | 34 | # run forecast_univariate 35 | forecast.uni = 36 | forecast_univariate( 37 | Data = Data, 38 | forecast.dates = tail(Data$date,10), 39 | method = c('naive','auto.arima', 'ets'), 40 | horizon = 1, 41 | recursive = FALSE, 42 | freq = 'month') 43 | 44 | forecasts = 45 | dplyr::left_join( 46 | forecast.uni, 47 | data.frame(date, observed = A), 48 | by = 'date' 49 | ) 50 | 51 | # chart forecasts 52 | chart.forecast = 53 | chart_forecast( 54 | forecasts, 55 | Title = 'test', 56 | Ylab = 'Index', 57 | Freq = 'Monthly', 58 | zeroline = TRUE) 59 | 60 | } 61 | 62 | } 63 | -------------------------------------------------------------------------------- /man/chart_forecast_error.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/forecast_chart.R 3 | \name{chart_forecast_error} 4 | \alias{chart_forecast_error} 5 | \title{Chart forecast errors} 6 | \usage{ 7 | chart_forecast_error(Data, Title, Ylab, Freq, zeroline = FALSE) 8 | } 9 | \arguments{ 10 | \item{Data}{data.frame: oos.forecast object} 11 | 12 | \item{Title}{string: chart title} 13 | 14 | \item{Ylab}{string: y-axis label} 15 | 16 | \item{Freq}{string: frequency (acts as sub-title)} 17 | 18 | \item{zeroline}{boolean: if TRUE then add a horizontal line at zero} 19 | } 20 | \value{ 21 | ggplot2 chart 22 | } 23 | \description{ 24 | Chart forecast errors 25 | } 26 | \examples{ 27 | \donttest{ 28 | 29 | # simple time series 30 | A = c(1:100) + rnorm(100) 31 | date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100) 32 | Data = data.frame(date = date, A) 33 | 34 | # run forecast_univariate 35 | forecast.uni = 36 | forecast_univariate( 37 | Data = Data, 38 | forecast.dates = tail(Data$date,10), 39 | method = c('naive','auto.arima', 'ets'), 40 | horizon = 1, 41 | recursive = FALSE, 42 | freq = 'month') 43 | 44 | forecasts = 45 | dplyr::left_join( 46 | forecast.uni, 47 | data.frame(date, observed = A), 48 | by = 'date' 49 | ) 50 | 51 | # chart forecast errors 52 | chart.errors = 53 | chart_forecast_error( 54 | forecasts, 55 | Title = 'test', 56 | Ylab = 'Index', 57 | Freq = 'Monthly', 58 | zeroline = TRUE) 59 | 60 | } 61 | 62 | } 63 | -------------------------------------------------------------------------------- /man/data_impute.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_preparation.R 3 | \name{data_impute} 4 | \alias{data_impute} 5 | \title{Impute missing values} 6 | \usage{ 7 | data_impute(Data, method = "kalman", variables = NULL, verbose = FALSE) 8 | } 9 | \arguments{ 10 | \item{Data}{data.frame: data frame of target variable, exogenous variables, and observed date (named 'date')} 11 | 12 | \item{method}{string: select which method to use from the imputeTS package; 'interpolation', 'kalman', 'locf', 'ma', 'mean', 'random', 'remove','replace', 'seadec', 'seasplit'} 13 | 14 | \item{variables}{string: vector of variables to standardize, default is all but 'date' column} 15 | 16 | \item{verbose}{boolean: show start-up status of impute.missing.routine} 17 | } 18 | \value{ 19 | data.frame with missing data imputed 20 | } 21 | \description{ 22 | A function to impute missing values. Is used as a data preparation helper function and is called internally 23 | by forecast_univariate, forecast_multivariate, and forecast_combine. 24 | } 25 | -------------------------------------------------------------------------------- /man/data_outliers.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_preparation.R 3 | \name{data_outliers} 4 | \alias{data_outliers} 5 | \title{Clean outliers} 6 | \usage{ 7 | data_outliers( 8 | Data, 9 | variables = NULL, 10 | w.bounds = c(0.05, 0.95), 11 | trim = FALSE, 12 | cross_section = FALSE 13 | ) 14 | } 15 | \arguments{ 16 | \item{Data}{data.frame: data frame of target variable, exogenous variables, and observed date (named 'date')} 17 | 18 | \item{variables}{string: vector of variables to standardize, default is all but 'date' column} 19 | 20 | \item{w.bounds}{double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile)} 21 | 22 | \item{trim}{boolean: if TRUE then replace outliers with NA instead of winsorizing bound} 23 | 24 | \item{cross_section}{boolean: if TRUE then remove outliers based on cross-section (row-wise) instead of historical data (column-wise)} 25 | } 26 | \value{ 27 | data.frame with a date column and one column per forecast method selected 28 | } 29 | \description{ 30 | A function to clean outliers. Is used as a data preparation helper function and is called internally 31 | by forecast_univariate, forecast_multivariate, and forecast_combine. 32 | } 33 | -------------------------------------------------------------------------------- /man/data_reduction.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_preparation.R 3 | \name{data_reduction} 4 | \alias{data_reduction} 5 | \title{Dimension reduction via principal components} 6 | \usage{ 7 | data_reduction(Data, variables = NULL, ncomp, standardize = TRUE) 8 | } 9 | \arguments{ 10 | \item{Data}{data.frame: data frame of target variable, exogenous variables, and observed date (named 'date')} 11 | 12 | \item{variables}{string: vector of variables to standardize, default is all but 'date' column} 13 | 14 | \item{ncomp}{int: number of factors to create} 15 | 16 | \item{standardize}{boolean: normalize variables (mean zero, variance one) before estimating factors} 17 | } 18 | \value{ 19 | data.frame with a date column and one column per forecast method selected 20 | } 21 | \description{ 22 | A function to estimate principal components. 23 | } 24 | -------------------------------------------------------------------------------- /man/data_subset.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_preparation.R 3 | \name{data_subset} 4 | \alias{data_subset} 5 | \title{Create information set} 6 | \usage{ 7 | data_subset(Data, forecast.date, rolling.window, freq) 8 | } 9 | \arguments{ 10 | \item{Data}{data.frame: data frame of target variable, exogenous variables, and observed date (named 'date')} 11 | 12 | \item{forecast.date}{date: upper bound of information set} 13 | 14 | \item{rolling.window}{int: size of rolling window, NA if expanding window is used} 15 | 16 | \item{freq}{string: time series frequency; day, week, month, quarter, year; only needed for rolling window factors} 17 | } 18 | \value{ 19 | data.frame bounded by the given date range 20 | } 21 | \description{ 22 | A function to subset data recursively or with a rolling window to create a valid information set. Is used as a data preparation 23 | helper function and is called internally by forecast_univariate, forecast_multivariate, and forecast_combine. 24 | } 25 | -------------------------------------------------------------------------------- /man/forecast_accuracy.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/forecast_metrics.R 3 | \name{forecast_accuracy} 4 | \alias{forecast_accuracy} 5 | \title{Calculate forecast accuracy} 6 | \usage{ 7 | forecast_accuracy(Data) 8 | } 9 | \arguments{ 10 | \item{Data}{data.frame: data frame of forecasts, model names, and dates} 11 | } 12 | \value{ 13 | data.frame of numeric error results 14 | } 15 | \description{ 16 | A function to calculate various loss functions, including 17 | MSE, RMSE, MAE, and MAPE. 18 | } 19 | \examples{ 20 | \donttest{ 21 | 22 | # simple time series 23 | A = c(1:100) + rnorm(100) 24 | date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100) 25 | Data = data.frame(date = date, A) 26 | 27 | # run forecast_univariate 28 | forecast.uni = 29 | forecast_univariate( 30 | Data = Data, 31 | forecast.dates = tail(Data$date,10), 32 | method = c('naive','auto.arima', 'ets'), 33 | horizon = 1, 34 | recursive = FALSE, 35 | freq = 'month') 36 | 37 | forecasts = 38 | dplyr::left_join( 39 | forecast.uni, 40 | data.frame(date, observed = A), 41 | by = 'date' 42 | ) 43 | 44 | # forecast accuracy 45 | forecast.accuracy = forecast_accuracy(forecasts) 46 | 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /man/forecast_combine.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/forecast_combinations.R 3 | \name{forecast_combine} 4 | \alias{forecast_combine} 5 | \title{Forecast with forecast combinations} 6 | \usage{ 7 | forecast_combine( 8 | Data, 9 | method = "unform", 10 | n.max = NULL, 11 | rolling.window = NA, 12 | trim = c(0.5, 0.95), 13 | burn.in = 1, 14 | parallel.dates = NULL 15 | ) 16 | } 17 | \arguments{ 18 | \item{Data}{data.frame: data frame of forecasted values to combine, assumes 'date' and 'observed' columns, but `observed' is not necessary for all methods} 19 | 20 | \item{method}{string: the method to use; 'uniform', 'median', 'trimmed.mean', 'n.best', 'peLasso', 'lasso', 'ridge', 'elastic', 'RF', 'GBM', 'NN'} 21 | 22 | \item{n.max}{int: maximum number of forecasts to select in n.best method} 23 | 24 | \item{rolling.window}{int: size of rolling window to evaluate forecast error over, use entire period if NA} 25 | 26 | \item{trim}{numeric: a two element vector with the winsorizing bounds for the trimmed mean method; c(min, max)} 27 | 28 | \item{burn.in}{int: the number of periods to use in the first model estimation} 29 | 30 | \item{parallel.dates}{int: the number of cores available for parallel estimation} 31 | } 32 | \value{ 33 | data.frame with a row for each combination method and forecasted date 34 | } 35 | \description{ 36 | A function to combine forecasts out-of-sample. Methods available include: 37 | uniform weights, median forecast, trimmed (winsorized) mean, n-best, 38 | ridge regression, lasso regression, elastic net, peLASSO, 39 | random forest, tree-based gradient boosting machine, and single-layer neural network. 40 | See package website for most up-to-date list of available models. 41 | } 42 | \examples{ 43 | \donttest{ 44 | # simple time series 45 | A = c(1:100) + rnorm(100) 46 | B = c(1:100) + rnorm(100) 47 | C = c(1:100) + rnorm(100) 48 | date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100) 49 | Data = data.frame(date = date, A, B, C) 50 | 51 | # run forecast_univariate 52 | forecast.multi = 53 | forecast_multivariate( 54 | Data = Data, 55 | target = 'A', 56 | forecast.dates = tail(Data$date,5), 57 | method = c('ols','var'), 58 | horizon = 1, 59 | freq = 'month') 60 | # include observed valuesd 61 | forecasts = 62 | dplyr::left_join( 63 | forecast.multi, 64 | data.frame(date, observed = A), 65 | by = 'date' 66 | ) 67 | 68 | # combine forecasts 69 | combinations = 70 | forecast_combine( 71 | forecasts, 72 | method = c('uniform','median','trimmed.mean', 73 | 'n.best','lasso','peLasso'), 74 | burn.in = 5, 75 | n.max = 2) 76 | } 77 | 78 | 79 | } 80 | -------------------------------------------------------------------------------- /man/forecast_comparison.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/forecast_metrics.R 3 | \name{forecast_comparison} 4 | \alias{forecast_comparison} 5 | \title{Compare forecast accuracy} 6 | \usage{ 7 | forecast_comparison( 8 | Data, 9 | baseline.forecast, 10 | test = "ER", 11 | loss = "MSE", 12 | horizon = NULL 13 | ) 14 | } 15 | \arguments{ 16 | \item{Data}{data.frame: data frame of forecasts, model names, and dates} 17 | 18 | \item{baseline.forecast}{string: column name of baseline (null hypothesis) forecasts} 19 | 20 | \item{test}{string: which test to use; ER = error ratio, DM = Diebold-Mariano, CM = Clark and West} 21 | 22 | \item{loss}{string: error loss function to use if creating forecast error ratio} 23 | 24 | \item{horizon}{int: horizon of forecasts being compared in DM and CW tests} 25 | } 26 | \value{ 27 | numeric test result 28 | } 29 | \description{ 30 | A function to compare forecasts. Options include: simple forecast error ratios, 31 | \href{https://www.sas.upenn.edu/~fdiebold/papers/paper68/pa.dm.pdf}{Diebold-Mariano test}, and \href{https://www.nber.org/papers/t0326}{Clark and West test} for nested models 32 | } 33 | \examples{ 34 | \donttest{ 35 | 36 | # simple time series 37 | A = c(1:100) + rnorm(100) 38 | date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100) 39 | Data = data.frame(date = date, A) 40 | 41 | # run forecast_univariate 42 | forecast.uni = 43 | forecast_univariate( 44 | Data = Data, 45 | forecast.dates = tail(Data$date,10), 46 | method = c('naive','auto.arima', 'ets'), 47 | horizon = 1, 48 | recursive = FALSE, 49 | freq = 'month') 50 | 51 | forecasts = 52 | dplyr::left_join( 53 | forecast.uni, 54 | data.frame(date, observed = A), 55 | by = 'date' 56 | ) 57 | 58 | # run ER (MSE) 59 | er.ratio.mse = 60 | forecast_comparison( 61 | forecasts, 62 | baseline.forecast = 'naive', 63 | test = 'ER', 64 | loss = 'MSE') 65 | } 66 | 67 | } 68 | -------------------------------------------------------------------------------- /man/forecast_date.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_preparation.R 3 | \name{forecast_date} 4 | \alias{forecast_date} 5 | \title{Set forecasted date} 6 | \usage{ 7 | forecast_date(forecast.date, horizon, freq) 8 | } 9 | \arguments{ 10 | \item{forecast.date}{date: date forecast was made} 11 | 12 | \item{horizon}{int: periods ahead of forecast} 13 | 14 | \item{freq}{string: time series frequency; day, week, month, quarter, year; only needed for rolling window factors} 15 | } 16 | \value{ 17 | date vector 18 | } 19 | \description{ 20 | A function to subset data recursively or with a rolling window to create a valid information set. Is used as a data preparation 21 | helper function and is called internally by forecast_univariate, forecast_multivariate, and forecast_combine. 22 | } 23 | -------------------------------------------------------------------------------- /man/forecast_multivariate.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/forecast_multivariate.R 3 | \name{forecast_multivariate} 4 | \alias{forecast_multivariate} 5 | \title{Forecast with multivariate models} 6 | \usage{ 7 | forecast_multivariate( 8 | Data, 9 | forecast.dates, 10 | target, 11 | horizon, 12 | method, 13 | rolling.window = NA, 14 | freq, 15 | lag.variables = NULL, 16 | lag.n = NULL, 17 | outlier.clean = FALSE, 18 | outlier.variables = NULL, 19 | outlier.bounds = c(0.05, 0.95), 20 | outlier.trim = FALSE, 21 | outlier.cross_section = FALSE, 22 | impute.missing = FALSE, 23 | impute.method = "kalman", 24 | impute.variables = NULL, 25 | impute.verbose = FALSE, 26 | reduce.data = FALSE, 27 | reduce.variables = NULL, 28 | reduce.ncomp = NULL, 29 | reduce.standardize = TRUE, 30 | parallel.dates = NULL, 31 | return.models = FALSE, 32 | return.data = FALSE 33 | ) 34 | } 35 | \arguments{ 36 | \item{Data}{data.frame: data frame of target variable, exogenous variables, and observed date (named 'date'); may alternatively be a \code{ts}, \code{xts}, or \code{zoo} object to forecast} 37 | 38 | \item{forecast.dates}{date: dates forecasts are created} 39 | 40 | \item{target}{string: column name in Data of variable to forecast} 41 | 42 | \item{horizon}{int: number of periods into the future to forecast} 43 | 44 | \item{method}{string: methods to use} 45 | 46 | \item{rolling.window}{int: size of rolling window, NA if expanding window is used} 47 | 48 | \item{freq}{string: time series frequency; day, week, month, quarter, year} 49 | 50 | \item{lag.variables}{string: vector of variables to lag each time step, if lag.n is not null then the default is all non-date variables} 51 | 52 | \item{lag.n}{int: number of lags to create} 53 | 54 | \item{outlier.clean}{boolean: if TRUE then clean outliers} 55 | 56 | \item{outlier.variables}{string: vector of variables to purge of outlier, default is all but 'date' column} 57 | 58 | \item{outlier.bounds}{double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile)} 59 | 60 | \item{outlier.trim}{boolean: if TRUE then replace outliers with NA instead of winsorizing bound} 61 | 62 | \item{outlier.cross_section}{boolean: if TRUE then remove outliers based on cross-section (row-wise) instead of historical data (column-wise)} 63 | 64 | \item{impute.missing}{boolean: if TRUE then impute missing values} 65 | 66 | \item{impute.method}{string: select which method to use from the imputeTS package; 'interpolation', 'kalman', 'locf', 'ma', 'mean', 'random', 'remove','replace', 'seadec', 'seasplit'} 67 | 68 | \item{impute.variables}{string: vector of variables to impute missing values, default is all numeric columns} 69 | 70 | \item{impute.verbose}{boolean: show start-up status of impute.missing.routine} 71 | 72 | \item{reduce.data}{boolean: if TRUE then reduce dimension} 73 | 74 | \item{reduce.variables}{string: vector of variables to impute missing values, default is all numeric columns} 75 | 76 | \item{reduce.ncomp}{int: number of factors to create} 77 | 78 | \item{reduce.standardize}{boolean: normalize variables (mean zero, variance one) before estimating factors} 79 | 80 | \item{parallel.dates}{int: the number of cores available for parallel estimation} 81 | 82 | \item{return.models}{boolean: if TRUE then return list of models estimated each forecast.date} 83 | 84 | \item{return.data}{boolean: if True then return list of information.set for each forecast.date} 85 | } 86 | \value{ 87 | data.frame with a row for each forecast by model and forecasted date 88 | } 89 | \description{ 90 | A function to estimate multivariate forecasts out-of-sample. Methods available include: 91 | vector auto-regression, linear regression, lasso regression, ridge regression, elastic net, 92 | random forest, tree-based gradient boosting machine, and single-layer neural network. 93 | See package website for most up-to-date list of available models. 94 | } 95 | \examples{ 96 | \donttest{ 97 | # simple time series 98 | A = c(1:100) + rnorm(100) 99 | B = c(1:100) + rnorm(100) 100 | C = c(1:100) + rnorm(100) 101 | date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100) 102 | Data = data.frame(date = date, A, B, C) 103 | 104 | # run forecast_univariate 105 | forecast.multi = 106 | forecast_multivariate( 107 | Data = Data, 108 | target = 'A', 109 | forecast.dates = tail(Data$date,5), 110 | method = c('ols','var'), 111 | horizon = 1, 112 | # information set 113 | rolling.window = NA, 114 | freq = 'month', 115 | # data prep 116 | lag.n = 4, 117 | outlier.clean = TRUE, 118 | impute.missing = TRUE) 119 | } 120 | 121 | 122 | } 123 | -------------------------------------------------------------------------------- /man/forecast_univariate.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/forecast_univariate.R 3 | \name{forecast_univariate} 4 | \alias{forecast_univariate} 5 | \title{Forecast with univariate models} 6 | \usage{ 7 | forecast_univariate( 8 | Data, 9 | forecast.dates, 10 | methods, 11 | horizon, 12 | recursive = TRUE, 13 | rolling.window = NA, 14 | freq, 15 | outlier.clean = FALSE, 16 | outlier.variables = NULL, 17 | outlier.bounds = c(0.05, 0.95), 18 | outlier.trim = FALSE, 19 | outlier.cross_section = FALSE, 20 | impute.missing = FALSE, 21 | impute.method = "kalman", 22 | impute.variables = NULL, 23 | impute.verbose = FALSE, 24 | parallel.dates = NULL, 25 | return.models = FALSE, 26 | return.data = FALSE 27 | ) 28 | } 29 | \arguments{ 30 | \item{Data}{data.frame: data frame of variable to forecast and a date column; may alternatively be a \code{ts}, \code{xts}, or \code{zoo} object to forecast} 31 | 32 | \item{forecast.dates}{date: dates forecasts are created} 33 | 34 | \item{methods}{string: models to estimate forecasts} 35 | 36 | \item{horizon}{int: number of periods to forecast} 37 | 38 | \item{recursive}{boolean: use sequential one-step-ahead forecast if TRUE, use direct projections if FALSE} 39 | 40 | \item{rolling.window}{int: size of rolling window, NA if expanding window is used} 41 | 42 | \item{freq}{string: time series frequency; day, week, month, quarter, year} 43 | 44 | \item{outlier.clean}{boolean: if TRUE then clean outliers} 45 | 46 | \item{outlier.variables}{string: vector of variables to purge of outliers, default is all but 'date' column} 47 | 48 | \item{outlier.bounds}{double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile)} 49 | 50 | \item{outlier.trim}{boolean: if TRUE then replace outliers with NA instead of winsorizing bound} 51 | 52 | \item{outlier.cross_section}{boolean: if TRUE then remove outliers based on cross-section (row-wise) instead of historical data (column-wise)} 53 | 54 | \item{impute.missing}{boolean: if TRUE then impute missing values} 55 | 56 | \item{impute.method}{string: select which method to use from the imputeTS package; 'interpolation', 'kalman', 'locf', 'ma', 'mean', 'random', 'remove','replace', 'seadec', 'seasplit'} 57 | 58 | \item{impute.variables}{string: vector of variables to impute missing values, default is all numeric columns} 59 | 60 | \item{impute.verbose}{boolean: show start-up status of impute.missing.routine} 61 | 62 | \item{parallel.dates}{int: the number of cores available for parallel estimation} 63 | 64 | \item{return.models}{boolean: if TRUE then return list of models estimated each forecast.date} 65 | 66 | \item{return.data}{boolean: if True then return list of information.set for each forecast.date} 67 | } 68 | \value{ 69 | data.frame with a row for each forecast by model and forecasted date 70 | } 71 | \description{ 72 | A function to estimate univariate forecasts out-of-sample. Methods available include all forecast 73 | methods from the \code{forecast} package. See package website for most up-to-date list of available models. 74 | } 75 | \examples{ 76 | \donttest{ 77 | # simple time series 78 | A = c(1:100) + rnorm(100) 79 | date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100) 80 | Data = data.frame(date = date, A) 81 | 82 | # estiamte univariate forecasts 83 | forecast.uni = 84 | forecast_univariate( 85 | Data = Data, 86 | forecast.dates = tail(Data$date,5), 87 | method = c('naive','auto.arima', 'ets'), 88 | horizon = 1, 89 | recursive = FALSE, 90 | # information set 91 | rolling.window = NA, 92 | freq = 'month', 93 | # data prep 94 | outlier.clean = TRUE, 95 | impute.missing = TRUE) 96 | } 97 | 98 | } 99 | -------------------------------------------------------------------------------- /man/instantiate.data_impute.control_panel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_preparation.R 3 | \name{instantiate.data_impute.control_panel} 4 | \alias{instantiate.data_impute.control_panel} 5 | \title{Create interface to control \code{data_impute} model estimation} 6 | \usage{ 7 | instantiate.data_impute.control_panel() 8 | } 9 | \value{ 10 | data_impute.control_panel 11 | } 12 | \description{ 13 | A function to create the data imputation method 14 | arguments list for user manipulation. 15 | } 16 | -------------------------------------------------------------------------------- /man/instantiate.forecast_combinations.control_panel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/forecast_combinations.R 3 | \name{instantiate.forecast_combinations.control_panel} 4 | \alias{instantiate.forecast_combinations.control_panel} 5 | \title{Create interface to control \code{forecast_combine} model estimation} 6 | \usage{ 7 | instantiate.forecast_combinations.control_panel(covariates = NULL) 8 | } 9 | \arguments{ 10 | \item{covariates}{int: the number of features that will go into the model} 11 | } 12 | \value{ 13 | forecast_combinations.control_panel 14 | } 15 | \description{ 16 | A function to create the forecast combination technique arguments list 17 | for user manipulation. 18 | } 19 | -------------------------------------------------------------------------------- /man/instantiate.forecast_multivariate.ml.control_panel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/forecast_multivariate.R 3 | \name{instantiate.forecast_multivariate.ml.control_panel} 4 | \alias{instantiate.forecast_multivariate.ml.control_panel} 5 | \title{Create interface to control \code{forecast_multivariate} ML estimation} 6 | \usage{ 7 | instantiate.forecast_multivariate.ml.control_panel( 8 | covariates = NULL, 9 | rolling.window = NULL, 10 | horizon = NULL 11 | ) 12 | } 13 | \arguments{ 14 | \item{covariates}{int: the number of features that will go into the model} 15 | 16 | \item{rolling.window}{int: size of rolling window, NA if expanding window is used} 17 | 18 | \item{horizon}{int: number of periods into the future to forecast} 19 | } 20 | \value{ 21 | forecast_multivariate.ml.control_panel 22 | } 23 | \description{ 24 | A function to create the multivariate forecast methods 25 | arguments list for user manipulation. 26 | } 27 | -------------------------------------------------------------------------------- /man/instantiate.forecast_multivariate.var.control_panel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/forecast_multivariate.R 3 | \name{instantiate.forecast_multivariate.var.control_panel} 4 | \alias{instantiate.forecast_multivariate.var.control_panel} 5 | \title{Create interface to control \code{forecast_multivariate} VAR estimation} 6 | \usage{ 7 | instantiate.forecast_multivariate.var.control_panel() 8 | } 9 | \value{ 10 | forecast_multivariate.var.control_panel 11 | } 12 | \description{ 13 | A function to create the multivariate forecast methods 14 | arguments list for user manipulation. 15 | } 16 | -------------------------------------------------------------------------------- /man/instantiate.forecast_univariate.control_panel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/forecast_univariate.R 3 | \name{instantiate.forecast_univariate.control_panel} 4 | \alias{instantiate.forecast_univariate.control_panel} 5 | \title{Create interface to control \code{forecast_univariate} model estimation} 6 | \usage{ 7 | instantiate.forecast_univariate.control_panel() 8 | } 9 | \value{ 10 | forecast_univariate.control_panel 11 | } 12 | \description{ 13 | A function to create the univariate forecast method arguments list 14 | for user manipulation. 15 | } 16 | -------------------------------------------------------------------------------- /man/loss_function.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/forecast_metrics.R 3 | \name{loss_function} 4 | \alias{loss_function} 5 | \title{Calculate error via loss functions} 6 | \usage{ 7 | loss_function(forecast, observed, metric = "MSE") 8 | } 9 | \arguments{ 10 | \item{forecast}{numeric: vector of forecasted values} 11 | 12 | \item{observed}{numeric: vector of observed values} 13 | 14 | \item{metric}{string: loss function} 15 | } 16 | \value{ 17 | numeric test result 18 | } 19 | \description{ 20 | A function to calculate various error loss functions. Options include: 21 | MSE, RMSE, MAE, and MAPE. The default is MSE loss. 22 | } 23 | -------------------------------------------------------------------------------- /man/n.lag.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_preparation.R 3 | \name{n.lag} 4 | \alias{n.lag} 5 | \title{Create n lags} 6 | \usage{ 7 | n.lag(Data, lags, variables = NULL) 8 | } 9 | \arguments{ 10 | \item{Data}{data.frame: data frame of variables to lag and a 'date' column} 11 | 12 | \item{lags}{int: number of lags to create} 13 | 14 | \item{variables}{string: vector of variable names to lag, default is all non-date variables} 15 | } 16 | \value{ 17 | data.frame 18 | } 19 | \description{ 20 | A function to create 1 through n lags of a set of variables. Is used as a data preparation 21 | helper function and is called internally by forecast_univariate, forecast_multivariate, and forecast_combine. 22 | } 23 | -------------------------------------------------------------------------------- /man/pipe.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/external_imports.R 3 | \name{\%>\%} 4 | \alias{\%>\%} 5 | \title{Pipe operator} 6 | \usage{ 7 | lhs \%>\% rhs 8 | } 9 | \value{ 10 | magrittr pipe operator \%>\% 11 | } 12 | \description{ 13 | See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/standardize.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_preparation.R 3 | \name{standardize} 4 | \alias{standardize} 5 | \title{Standardize variables (mean 0, variance 1)} 6 | \usage{ 7 | standardize(X) 8 | } 9 | \arguments{ 10 | \item{X}{numeric: vector to be standardized} 11 | } 12 | \value{ 13 | numeric vector of standardized values 14 | } 15 | \description{ 16 | Standardize variables (mean 0, variance 1) 17 | } 18 | -------------------------------------------------------------------------------- /man/winsorize.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_preparation.R 3 | \name{winsorize} 4 | \alias{winsorize} 5 | \title{Winsorize or trim variables} 6 | \usage{ 7 | winsorize(X, bounds, trim = FALSE) 8 | } 9 | \arguments{ 10 | \item{X}{numeric: vector to be winsorized or trimmed} 11 | 12 | \item{bounds}{double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile)} 13 | 14 | \item{trim}{boolean: if TRUE then replace outliers with NA instead of winsorizing bound} 15 | } 16 | \value{ 17 | numeric vector of winsorized or trimmed values 18 | } 19 | \description{ 20 | Winsorize or trim variables 21 | } 22 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(OOS) 3 | 4 | test_check("OOS") 5 | -------------------------------------------------------------------------------- /tests/testthat/test-forecast_chart.R: -------------------------------------------------------------------------------- 1 | test_that("forecast chart", { 2 | 3 | # simple time series 4 | A = c(1:100) + rnorm(100) 5 | date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100) 6 | Data = data.frame(date = date, A) 7 | 8 | # run forecast_univariate 9 | forecast.uni = 10 | forecast_univariate( 11 | Data = Data, 12 | forecast.dates = tail(Data$date,10), 13 | method = c('naive','auto.arima', 'ets'), 14 | horizon = 1, 15 | recursive = FALSE, 16 | freq = 'month') 17 | 18 | forecasts = 19 | dplyr::left_join( 20 | forecast.uni, 21 | data.frame(date, observed = A), 22 | by = 'date' 23 | ) 24 | 25 | # chart forecasts 26 | chart.forecast = 27 | chart_forecast( 28 | forecasts, 29 | Title = 'test', 30 | Ylab = 'Index', 31 | Freq = 'Monthly', 32 | zeroline = TRUE) 33 | 34 | expect_true(exists('chart.forecast'), 'Chart is not created.') 35 | 36 | # chart forecast errors 37 | chart.errors = 38 | chart_forecast_error( 39 | forecasts, 40 | Title = 'test', 41 | Ylab = 'Index', 42 | Freq = 'Monthly', 43 | zeroline = TRUE) 44 | 45 | expect_true(exists('chart.errors'), 'Chart is not created.') 46 | 47 | }) 48 | -------------------------------------------------------------------------------- /tests/testthat/test-forecast_combination.R: -------------------------------------------------------------------------------- 1 | test_that("forecast_combine produces standard output", { 2 | 3 | # simple time series 4 | A = c(1:100) + rnorm(100) 5 | B = c(1:100) + rnorm(100) 6 | C = c(1:100) + rnorm(100) 7 | date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100) 8 | Data = data.frame(date = date, A, B, C) 9 | 10 | # run forecast_univariate 11 | forecast.multi = 12 | forecast_multivariate( 13 | Data = Data, 14 | target = 'A', 15 | forecast.dates = tail(Data$date,5), 16 | method = c('ols','var'), 17 | horizon = 1, 18 | freq = 'month') 19 | 20 | forecasts = 21 | dplyr::left_join( 22 | forecast.multi, 23 | data.frame(date, observed = A), 24 | by = 'date' 25 | ) 26 | 27 | # combine forecasts 28 | combinations = 29 | forecast_combine( 30 | forecasts, 31 | method = c('uniform','median','trimmed.mean', 32 | 'n.best','lasso','peLasso'), 33 | burn.in = 5, 34 | n.max = 2) 35 | 36 | # expect formats 37 | expect_true(is.data.frame(combinations), 'forecast_combine is not a proper data.frame') 38 | 39 | }) 40 | -------------------------------------------------------------------------------- /tests/testthat/test-forecast_metrics.R: -------------------------------------------------------------------------------- 1 | test_that("forecast_comparison", { 2 | 3 | # simple time series 4 | A = c(1:100) + rnorm(100) 5 | date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100) 6 | Data = data.frame(date = date, A) 7 | 8 | # create forecasts 9 | forecast.uni = 10 | forecast_univariate( 11 | Data = Data, 12 | forecast.dates = tail(Data$date,10), 13 | method = c('naive','auto.arima', 'ets'), 14 | horizon = 1, 15 | recursive = FALSE, 16 | freq = 'month') 17 | 18 | forecasts = 19 | dplyr::left_join( 20 | forecast.uni, 21 | data.frame(date, observed = A), 22 | by = 'date' 23 | ) 24 | 25 | # run ER (MSE) 26 | er.ratio.mse = 27 | forecast_comparison( 28 | forecasts, 29 | baseline.forecast = 'naive', 30 | test = 'ER', 31 | loss = 'MSE') 32 | 33 | expect_true(is.data.frame(er.ratio.mse),'Error ratio (MSE) is not a data.frame') 34 | expect_true(!is.na(mean(er.ratio.mse$error.ratio)) | 35 | !is.nan(mean(er.ratio.mse$error.ratio)),'Error ratio (MSE) is NA or NAN') 36 | 37 | # run ER (RMSE) 38 | er.ratio.rmse = 39 | forecast_comparison( 40 | forecasts, 41 | baseline.forecast = 'naive', 42 | test = 'ER', 43 | loss = 'RMSE') 44 | 45 | expect_true(is.data.frame(er.ratio.rmse),'Error ratio (RMSE) is not a data.frame') 46 | expect_true(!is.na(mean(er.ratio.rmse$error.ratio)) | 47 | !is.nan(mean(er.ratio.rmse$error.ratio)),'Error ratio (RMSE) is NA or NAN') 48 | 49 | # run ER (MAE) 50 | er.ratio.mae = 51 | forecast_comparison( 52 | forecasts, 53 | baseline.forecast = 'naive', 54 | test = 'ER', 55 | loss = 'MAE') 56 | 57 | expect_true(is.data.frame(er.ratio.mae),'Error ratio (MAPE) is not a data.frame') 58 | expect_true(!is.na(mean(er.ratio.mae$error.ratio)) | 59 | !is.nan(mean(er.ratio.mae$error.ratio)),'Error ratio (MAPE) is NA or NAN') 60 | 61 | # run ER (MAPE) 62 | er.ratio.mape = 63 | forecast_comparison( 64 | forecasts, 65 | baseline.forecast = 'naive', 66 | test = 'ER', 67 | loss = 'MAPE') 68 | 69 | expect_true(is.data.frame(er.ratio.mape),'Error ratio (MAPE) is not a data.frame') 70 | expect_true(!is.na(mean(er.ratio.mape$error.ratio)) | 71 | !is.nan(mean(er.ratio.mape$error.ratio)),'Error ratio (MAPE) is NA or NAN') 72 | 73 | # run DM test 74 | dm.test = 75 | forecast_comparison( 76 | forecasts, 77 | baseline.forecast = 'naive', 78 | test = 'DM') 79 | 80 | expect_true(is.data.frame(dm.test),'DM test is not a data.frame') 81 | expect_true(!is.na(mean(dm.test$error.ratio)) | 82 | !is.nan(mean(dm.test$error.ratio)),'DM test is NA or NAN') 83 | 84 | # run DM test 85 | cw.test = 86 | forecast_comparison( 87 | forecasts, 88 | baseline.forecast = 'naive', 89 | test = 'CW', 90 | horizon = 1) 91 | 92 | expect_true(is.data.frame(cw.test),'DM test is not a data.frame') 93 | expect_true(!is.na(mean(cw.test$error.ratio)) | 94 | !is.nan(mean(cw.test$error.ratio)),'DM test is NA or NAN') 95 | 96 | }) 97 | 98 | test_that("forecast_accuracy", { 99 | 100 | # simple time series 101 | A = c(1:100) + rnorm(100) 102 | date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100) 103 | Data = data.frame(date = date, A) 104 | 105 | # create forecast 106 | forecast.uni = 107 | forecast_univariate( 108 | Data = Data, 109 | forecast.dates = tail(Data$date,10), 110 | method = c('naive','auto.arima', 'ets'), 111 | horizon = 1, 112 | recursive = FALSE, 113 | freq = 'month') 114 | 115 | forecasts = 116 | dplyr::left_join( 117 | forecast.uni, 118 | data.frame(date, observed = A), 119 | by = 'date' 120 | ) 121 | 122 | # forecast accuracy 123 | forecast.accuracy = forecast_accuracy(forecasts) 124 | 125 | expect_true(is.data.frame(forecast.accuracy),'Accuracy is not a data.frame') 126 | expect_true(!is.na(sum(forecast.accuracy[,2:5])),'Accuracy is NA or NAN') 127 | 128 | 129 | }) 130 | -------------------------------------------------------------------------------- /tests/testthat/test-forecast_multivariate.R: -------------------------------------------------------------------------------- 1 | test_that("forecast_multivariate produces standard output", { 2 | 3 | # simple time series 4 | A = c(1:100) + rnorm(100) 5 | B = c(1:100) + rnorm(100) 6 | C = c(1:100) + rnorm(100) 7 | date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100) 8 | Data = data.frame(date = date, A, B, C) 9 | 10 | # run forecast_univariate 11 | forecast.multi = 12 | try( 13 | forecast_multivariate( 14 | Data = Data, 15 | target = 'A', 16 | forecast.dates = tail(Data$date,5), 17 | method = c('ols','var'), 18 | horizon = 1, 19 | # information set 20 | rolling.window = NA, 21 | freq = 'month', 22 | # data prep 23 | lag.n = 4, 24 | outlier.clean = TRUE, 25 | impute.missing = TRUE, 26 | # return 27 | return.models = TRUE, 28 | return.data = TRUE) 29 | ) 30 | 31 | # expect formats 32 | expect_true(is.data.frame(forecast.multi$forecasts), 'forecasts is not a proper data.frame') 33 | expect_true(is.list(forecast.multi$models), 'models is not a proper list') 34 | expect_true(is.list(forecast.multi$information.set), 'information set is not a proper list') 35 | 36 | # expect proper names and numbers of outputs 37 | expect_equal(names(forecast.multi$models), as.character(tail(Data$date,5))) 38 | expect_equal(names(forecast.multi$information.set), as.character(tail(Data$date,5))) 39 | 40 | 41 | }) 42 | 43 | test_that("forecast_multivariate produces standard output", { 44 | 45 | # simple time series 46 | A = c(1:100) + rnorm(100) 47 | B = c(1:100) + rnorm(100) 48 | C = c(1:100) + rnorm(100) 49 | date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100) 50 | Data = data.frame(date = date, A, B, C) 51 | 52 | # run forecast_univariate 53 | forecast.multi = 54 | try( 55 | forecast_multivariate( 56 | Data = Data, 57 | target = 'A', 58 | forecast.dates = tail(Data$date,5), 59 | method = c('ols','var'), 60 | horizon = 1, 61 | # information set 62 | rolling.window = NA, 63 | freq = 'month', 64 | # data prep 65 | lag.n = 4, 66 | outlier.clean = TRUE, 67 | impute.missing = TRUE, 68 | reduce.data = TRUE, 69 | reduce.ncomp = 1, 70 | return.models = TRUE, 71 | return.data = TRUE, 72 | ) 73 | ) 74 | 75 | # expect formats 76 | expect_true(is.data.frame(forecast.multi$forecasts), 'forecasts is not a proper data.frame') 77 | expect_true(is.list(forecast.multi$models), 'models is not a proper list') 78 | expect_true(is.list(forecast.multi$information.set), 'information set is not a proper list') 79 | 80 | # expect proper names and numbers of outputs 81 | expect_equal(names(forecast.multi$models), as.character(tail(Data$date,5))) 82 | expect_equal(names(forecast.multi$information.set), as.character(tail(Data$date,5))) 83 | 84 | }) 85 | -------------------------------------------------------------------------------- /tests/testthat/test-forecast_univariate.R: -------------------------------------------------------------------------------- 1 | test_that("forecast_univariate (direct projection) produces standard output", { 2 | 3 | # simple time series 4 | A = c(1:100) + rnorm(100) 5 | date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100) 6 | Data = data.frame(date = date, A) 7 | 8 | # run forecast_univariate 9 | forecast.uni = 10 | try( 11 | forecast_univariate( 12 | Data = Data, 13 | forecast.dates = tail(Data$date,5), 14 | method = c('naive','auto.arima', 'ets'), 15 | horizon = 1, 16 | recursive = FALSE, 17 | # information set 18 | rolling.window = NA, 19 | freq = 'month', 20 | # data prep 21 | outlier.clean = TRUE, 22 | impute.missing = TRUE, 23 | # return 24 | return.models = TRUE, 25 | return.data = TRUE) 26 | ) 27 | 28 | # expect formats 29 | expect_true(is.data.frame(forecast.uni$forecasts), 'forecasts is not a proper data.frame') 30 | expect_true(is.list(forecast.uni$models), 'models is not a proper list') 31 | expect_true(is.list(forecast.uni$information.set), 'information set is not a proper list') 32 | 33 | # expect proper names and numbers of outputs 34 | expect_equal(names(forecast.uni$models), as.character(tail(Data$date,5))) 35 | expect_equal(names(forecast.uni$information.set), as.character(tail(Data$date,5))) 36 | 37 | }) 38 | 39 | test_that("forecast_univariate (recursive) produces standard output", { 40 | 41 | # simple time series 42 | A = c(1:100) + rnorm(100) 43 | date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100) 44 | Data = data.frame(date = date, A) 45 | 46 | # run forecast_univariate 47 | forecast.uni = 48 | try( 49 | forecast_univariate( 50 | Data = Data, 51 | forecast.dates = tail(Data$date,5), 52 | method = c('naive','auto.arima', 'ets'), 53 | horizon = 1, 54 | recursive = TRUE, 55 | # information set 56 | rolling.window = NA, 57 | freq = 'month', 58 | # data prep 59 | outlier.clean = TRUE, 60 | impute.missing = TRUE, 61 | # return 62 | return.models = TRUE, 63 | return.data = TRUE) 64 | ) 65 | 66 | # expect formats 67 | expect_true(is.data.frame(forecast.uni$forecasts), 'rercursive forecasts is not a proper data.frame') 68 | expect_true(is.list(forecast.uni$models), 'rercursive models is not a proper list') 69 | expect_true(is.list(forecast.uni$information.set), 'rercursive information set is not a proper list') 70 | 71 | # expect proper names and numbers of outputs 72 | expect_equal(names(forecast.uni$models), as.character(tail(Data$date,5))) 73 | expect_equal(names(forecast.uni$information.set), as.character(tail(Data$date,5))) 74 | 75 | }) 76 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | -------------------------------------------------------------------------------- /vignettes/basic_introduction.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Basic Introduction to OOS" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{Window functions} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\usepackage[utf8]{inputenc} 8 | --- 9 | 10 | ```{r, include = FALSE} 11 | knitr::opts_chunk$set( 12 | collapse = TRUE, 13 | comment = "#>" 14 | ) 15 | ``` 16 | 17 | ```{r setup, include=FALSE, warning = FALSE, message = FALSE, results = 'hide'} 18 | knitr::opts_chunk$set(echo = TRUE) 19 | ``` 20 | 21 | This introduction demonstrates how one may conduct a basic forecasting exercise with OOS. For more detail examples and documentation, please see the [OOS](https://tylerjpike.github.io/OOS/) website. 22 | 23 | ## 0. Environment 24 | ```{r} 25 | library(OOS) 26 | ``` 27 | 28 | ## 1. Data 29 | 30 | ```{r} 31 | # pull and prepare data from FRED 32 | quantmod::getSymbols.FRED( 33 | c('UNRATE','INDPRO','GS10'), 34 | env = globalenv()) 35 | Data = cbind(UNRATE, INDPRO, GS10) 36 | Data = data.frame(Data, date = zoo::index(Data)) %>% 37 | dplyr::filter(lubridate::year(date) >= 1990) %>% 38 | na.omit() 39 | 40 | # make industrial production and 10-year Treasury stationary 41 | Data = Data %>% 42 | dplyr::mutate( 43 | GS10 = GS10 - dplyr::lag(GS10), 44 | INDPRO = (INDPRO - lag(INDPRO, 12))/lag(INDPRO, 12)) 45 | 46 | # start data when all three variables are available 47 | # (this is not necessary, but it will suppress warnings for us) 48 | Data = dplyr::filter(Data, date >= as.Date('1954-01-01')) 49 | ``` 50 | 51 | ## 2. Forecasting 52 | 53 | ```{r, warning=FALSE} 54 | # run univariate forecasts 55 | forecast.uni = 56 | forecast_univariate( 57 | Data = dplyr::select(Data, date, UNRATE), 58 | forecast.dates = tail(Data$date,5), 59 | method = c('naive'), #,'auto.arima', 'ets'), 60 | horizon = 1, 61 | recursive = FALSE, 62 | rolling.window = NA, 63 | freq = 'month') 64 | ``` 65 | 66 | 67 | ```{r, warning=FALSE} 68 | # create multivariate forecasts 69 | forecast.multi = 70 | forecast_multivariate( 71 | Data = Data, 72 | forecast.date = tail(Data$date,5), 73 | target = 'UNRATE', 74 | horizon = 1, 75 | method = c('lasso'), 76 | rolling.window = NA, 77 | freq = 'month') 78 | ``` 79 | 80 | 81 | ```{r, warning=FALSE} 82 | # combine forecasts and add in observed values 83 | forecasts = 84 | dplyr::bind_rows( 85 | forecast.uni, 86 | forecast.multi) %>% 87 | dplyr::left_join( 88 | dplyr::select(Data, date, observed = UNRATE), 89 | by = 'date') 90 | 91 | # forecast combinations 92 | forecast.combo = 93 | forecast_combine( 94 | forecasts, 95 | method = c('uniform','median','trimmed.mean')) 96 | ``` 97 | 98 | ## Forecast Analysis 99 | ```{r, warning=FALSE} 100 | # merge forecast combinations back into forecasts 101 | forecasts = 102 | forecasts %>% 103 | dplyr::bind_rows(forecast.combo) 104 | 105 | # calculate forecast errors 106 | forecast.error = forecast_accuracy(forecasts) 107 | 108 | # view forecast errors from least to greatest 109 | # (best forecast to worst forecast method) 110 | forecast.error %>% 111 | dplyr::mutate_at(vars(-model), round, 3) %>% 112 | dplyr::arrange(MSE) 113 | 114 | # compare forecasts to the baseline (a random walk) 115 | forecast_comparison( 116 | forecasts, 117 | baseline.forecast = 'naive', 118 | test = 'ER', 119 | loss = 'MSE') %>% 120 | dplyr::arrange(error.ratio) 121 | 122 | # chart forecasts 123 | chart = 124 | chart_forecast( 125 | forecasts, 126 | Title = 'US Unemployment Rate', 127 | Ylab = 'Index', 128 | Freq = 'Monthly') 129 | 130 | ``` 131 | 132 | --------------------------------------------------------------------------------