├── .Rbuildignore
├── .gitignore
├── .travis.yml
├── DESCRIPTION
├── LICENSE.md
├── NAMESPACE
├── OOS.Rproj
├── R
    ├── data_preparation.R
    ├── external_imports.R
    ├── forecast_chart.R
    ├── forecast_combinations.R
    ├── forecast_metrics.R
    ├── forecast_multivariate.R
    └── forecast_univariate.R
├── README.md
├── codecov.yml
├── cran-comments.md
├── man
    ├── NBest.Rd
    ├── chart_forecast.Rd
    ├── chart_forecast_error.Rd
    ├── data_impute.Rd
    ├── data_outliers.Rd
    ├── data_reduction.Rd
    ├── data_subset.Rd
    ├── forecast_accuracy.Rd
    ├── forecast_combine.Rd
    ├── forecast_comparison.Rd
    ├── forecast_date.Rd
    ├── forecast_multivariate.Rd
    ├── forecast_univariate.Rd
    ├── instantiate.data_impute.control_panel.Rd
    ├── instantiate.forecast_combinations.control_panel.Rd
    ├── instantiate.forecast_multivariate.ml.control_panel.Rd
    ├── instantiate.forecast_multivariate.var.control_panel.Rd
    ├── instantiate.forecast_univariate.control_panel.Rd
    ├── loss_function.Rd
    ├── n.lag.Rd
    ├── pipe.Rd
    ├── standardize.Rd
    └── winsorize.Rd
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test-forecast_chart.R
    │   ├── test-forecast_combination.R
    │   ├── test-forecast_metrics.R
    │   ├── test-forecast_multivariate.R
    │   └── test-forecast_univariate.R
└── vignettes
    ├── .gitignore
    └── basic_introduction.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^\.github
 4 | ^articles$
 5 | ^docs$
 6 | ^codecov\.yml$
 7 | ^.travis\.yml$
 8 | ^LICENSE\.md$
 9 | ^cran-comments\.md$
10 | ^to-do\.md$
11 | ^CRAN-RELEASE$
12 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .RData
 4 | .Ruserdata
 5 | *.csv
 6 | to-do.md
 7 | to-do.html
 8 | inst/doc
 9 | CRAN-RELEASE*
10 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: r
 2 | 
 3 | sudo: required
 4 | 
 5 | env: _R_CHECK_CRAN_INCOMING_=FALSE
 6 | 
 7 | r_packages:
 8 |   - covr
 9 |   - devtools
10 | 
11 | r_github_packages:
12 |   - tylerJPike/OOS
13 | 
14 | after_success:
15 |   - Rscript -e 'covr::codecov(token = "eabccb13-c362-4955-9ee2-34cfb28f0c29")'
16 |   - Rscript -e 'devtools::check()'


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: OOS
 2 | Title: Out-of-Sample Time Series Forecasting
 3 | Version: 1.0.0
 4 | Authors@R: 
 5 |     person(given = "Tyler J.",
 6 |            family = "Pike",
 7 |            role = c("aut", "cre"),
 8 |            email = "tjpike7@gmail.com")
 9 | Description: A comprehensive and cohesive API for the out-of-sample forecasting workflow: 
10 |              data preparation, forecasting - including both traditional econometric time series models and 
11 |              modern machine learning techniques - forecast combination, model and error analysis, and 
12 |              forecast visualization. 
13 | License: GPL-3
14 | URL: https://github.com/tylerJPike/OOS,
15 |      https://tylerjpike.github.io/OOS/
16 | BugReports: https://github.com/tylerJPike/OOS/issues     
17 | Encoding: UTF-8
18 | LazyData: true
19 | Roxygen: list(markdown = TRUE)
20 | RoxygenNote: 7.1.1
21 | VignetteBuilder: knitr  
22 | Depends: 
23 |   R (>= 4.0.0)  
24 | Imports:
25 |   caret,
26 |   dplyr,
27 |   forecast,
28 |   furrr,
29 |   future,
30 |   ggplot2,
31 |   glmnet,
32 |   imputeTS,
33 |   lmtest,
34 |   lubridate,
35 |   magrittr,
36 |   purrr,
37 |   sandwich, 
38 |   stats,
39 |   tidyr,
40 |   vars,
41 |   xts,
42 |   zoo
43 | Suggests:
44 |   knitr,
45 |   testthat,
46 |   rmarkdown,
47 |   quantmod    
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
  1 | GNU General Public License
  2 | ==========================
  3 | 
  4 | _Version 3, 29 June 2007_  
  5 | _Copyright © 2007 Free Software Foundation, Inc. &lt;<http://fsf.org/>&gt;_
  6 | 
  7 | Everyone is permitted to copy and distribute verbatim copies of this license
  8 | document, but changing it is not allowed.
  9 | 
 10 | ## Preamble
 11 | 
 12 | The GNU General Public License is a free, copyleft license for software and other
 13 | kinds of works.
 14 | 
 15 | The licenses for most software and other practical works are designed to take away
 16 | your freedom to share and change the works. By contrast, the GNU General Public
 17 | License is intended to guarantee your freedom to share and change all versions of a
 18 | program--to make sure it remains free software for all its users. We, the Free
 19 | Software Foundation, use the GNU General Public License for most of our software; it
 20 | applies also to any other work released this way by its authors. You can apply it to
 21 | your programs, too.
 22 | 
 23 | When we speak of free software, we are referring to freedom, not price. Our General
 24 | Public Licenses are designed to make sure that you have the freedom to distribute
 25 | copies of free software (and charge for them if you wish), that you receive source
 26 | code or can get it if you want it, that you can change the software or use pieces of
 27 | it in new free programs, and that you know you can do these things.
 28 | 
 29 | To protect your rights, we need to prevent others from denying you these rights or
 30 | asking you to surrender the rights. Therefore, you have certain responsibilities if
 31 | you distribute copies of the software, or if you modify it: responsibilities to
 32 | respect the freedom of others.
 33 | 
 34 | For example, if you distribute copies of such a program, whether gratis or for a fee,
 35 | you must pass on to the recipients the same freedoms that you received. You must make
 36 | sure that they, too, receive or can get the source code. And you must show them these
 37 | terms so they know their rights.
 38 | 
 39 | Developers that use the GNU GPL protect your rights with two steps: **(1)** assert
 40 | copyright on the software, and **(2)** offer you this License giving you legal permission
 41 | to copy, distribute and/or modify it.
 42 | 
 43 | For the developers' and authors' protection, the GPL clearly explains that there is
 44 | no warranty for this free software. For both users' and authors' sake, the GPL
 45 | requires that modified versions be marked as changed, so that their problems will not
 46 | be attributed erroneously to authors of previous versions.
 47 | 
 48 | Some devices are designed to deny users access to install or run modified versions of
 49 | the software inside them, although the manufacturer can do so. This is fundamentally
 50 | incompatible with the aim of protecting users' freedom to change the software. The
 51 | systematic pattern of such abuse occurs in the area of products for individuals to
 52 | use, which is precisely where it is most unacceptable. Therefore, we have designed
 53 | this version of the GPL to prohibit the practice for those products. If such problems
 54 | arise substantially in other domains, we stand ready to extend this provision to
 55 | those domains in future versions of the GPL, as needed to protect the freedom of
 56 | users.
 57 | 
 58 | Finally, every program is threatened constantly by software patents. States should
 59 | not allow patents to restrict development and use of software on general-purpose
 60 | computers, but in those that do, we wish to avoid the special danger that patents
 61 | applied to a free program could make it effectively proprietary. To prevent this, the
 62 | GPL assures that patents cannot be used to render the program non-free.
 63 | 
 64 | The precise terms and conditions for copying, distribution and modification follow.
 65 | 
 66 | ## TERMS AND CONDITIONS
 67 | 
 68 | ### 0. Definitions
 69 | 
 70 | “This License” refers to version 3 of the GNU General Public License.
 71 | 
 72 | “Copyright” also means copyright-like laws that apply to other kinds of
 73 | works, such as semiconductor masks.
 74 | 
 75 | “The Program” refers to any copyrightable work licensed under this
 76 | License. Each licensee is addressed as “you”. “Licensees” and
 77 | “recipients” may be individuals or organizations.
 78 | 
 79 | To “modify” a work means to copy from or adapt all or part of the work in
 80 | a fashion requiring copyright permission, other than the making of an exact copy. The
 81 | resulting work is called a “modified version” of the earlier work or a
 82 | work “based on” the earlier work.
 83 | 
 84 | A “covered work” means either the unmodified Program or a work based on
 85 | the Program.
 86 | 
 87 | To “propagate” a work means to do anything with it that, without
 88 | permission, would make you directly or secondarily liable for infringement under
 89 | applicable copyright law, except executing it on a computer or modifying a private
 90 | copy. Propagation includes copying, distribution (with or without modification),
 91 | making available to the public, and in some countries other activities as well.
 92 | 
 93 | To “convey” a work means any kind of propagation that enables other
 94 | parties to make or receive copies. Mere interaction with a user through a computer
 95 | network, with no transfer of a copy, is not conveying.
 96 | 
 97 | An interactive user interface displays “Appropriate Legal Notices” to the
 98 | extent that it includes a convenient and prominently visible feature that **(1)**
 99 | displays an appropriate copyright notice, and **(2)** tells the user that there is no
100 | warranty for the work (except to the extent that warranties are provided), that
101 | licensees may convey the work under this License, and how to view a copy of this
102 | License. If the interface presents a list of user commands or options, such as a
103 | menu, a prominent item in the list meets this criterion.
104 | 
105 | ### 1. Source Code
106 | 
107 | The “source code” for a work means the preferred form of the work for
108 | making modifications to it. “Object code” means any non-source form of a
109 | work.
110 | 
111 | A “Standard Interface” means an interface that either is an official
112 | standard defined by a recognized standards body, or, in the case of interfaces
113 | specified for a particular programming language, one that is widely used among
114 | developers working in that language.
115 | 
116 | The “System Libraries” of an executable work include anything, other than
117 | the work as a whole, that **(a)** is included in the normal form of packaging a Major
118 | Component, but which is not part of that Major Component, and **(b)** serves only to
119 | enable use of the work with that Major Component, or to implement a Standard
120 | Interface for which an implementation is available to the public in source code form.
121 | A “Major Component”, in this context, means a major essential component
122 | (kernel, window system, and so on) of the specific operating system (if any) on which
123 | the executable work runs, or a compiler used to produce the work, or an object code
124 | interpreter used to run it.
125 | 
126 | The “Corresponding Source” for a work in object code form means all the
127 | source code needed to generate, install, and (for an executable work) run the object
128 | code and to modify the work, including scripts to control those activities. However,
129 | it does not include the work's System Libraries, or general-purpose tools or
130 | generally available free programs which are used unmodified in performing those
131 | activities but which are not part of the work. For example, Corresponding Source
132 | includes interface definition files associated with source files for the work, and
133 | the source code for shared libraries and dynamically linked subprograms that the work
134 | is specifically designed to require, such as by intimate data communication or
135 | control flow between those subprograms and other parts of the work.
136 | 
137 | The Corresponding Source need not include anything that users can regenerate
138 | automatically from other parts of the Corresponding Source.
139 | 
140 | The Corresponding Source for a work in source code form is that same work.
141 | 
142 | ### 2. Basic Permissions
143 | 
144 | All rights granted under this License are granted for the term of copyright on the
145 | Program, and are irrevocable provided the stated conditions are met. This License
146 | explicitly affirms your unlimited permission to run the unmodified Program. The
147 | output from running a covered work is covered by this License only if the output,
148 | given its content, constitutes a covered work. This License acknowledges your rights
149 | of fair use or other equivalent, as provided by copyright law.
150 | 
151 | You may make, run and propagate covered works that you do not convey, without
152 | conditions so long as your license otherwise remains in force. You may convey covered
153 | works to others for the sole purpose of having them make modifications exclusively
154 | for you, or provide you with facilities for running those works, provided that you
155 | comply with the terms of this License in conveying all material for which you do not
156 | control copyright. Those thus making or running the covered works for you must do so
157 | exclusively on your behalf, under your direction and control, on terms that prohibit
158 | them from making any copies of your copyrighted material outside their relationship
159 | with you.
160 | 
161 | Conveying under any other circumstances is permitted solely under the conditions
162 | stated below. Sublicensing is not allowed; section 10 makes it unnecessary.
163 | 
164 | ### 3. Protecting Users' Legal Rights From Anti-Circumvention Law
165 | 
166 | No covered work shall be deemed part of an effective technological measure under any
167 | applicable law fulfilling obligations under article 11 of the WIPO copyright treaty
168 | adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention
169 | of such measures.
170 | 
171 | When you convey a covered work, you waive any legal power to forbid circumvention of
172 | technological measures to the extent such circumvention is effected by exercising
173 | rights under this License with respect to the covered work, and you disclaim any
174 | intention to limit operation or modification of the work as a means of enforcing,
175 | against the work's users, your or third parties' legal rights to forbid circumvention
176 | of technological measures.
177 | 
178 | ### 4. Conveying Verbatim Copies
179 | 
180 | You may convey verbatim copies of the Program's source code as you receive it, in any
181 | medium, provided that you conspicuously and appropriately publish on each copy an
182 | appropriate copyright notice; keep intact all notices stating that this License and
183 | any non-permissive terms added in accord with section 7 apply to the code; keep
184 | intact all notices of the absence of any warranty; and give all recipients a copy of
185 | this License along with the Program.
186 | 
187 | You may charge any price or no price for each copy that you convey, and you may offer
188 | support or warranty protection for a fee.
189 | 
190 | ### 5. Conveying Modified Source Versions
191 | 
192 | You may convey a work based on the Program, or the modifications to produce it from
193 | the Program, in the form of source code under the terms of section 4, provided that
194 | you also meet all of these conditions:
195 | 
196 | * **a)** The work must carry prominent notices stating that you modified it, and giving a
197 | relevant date.
198 | * **b)** The work must carry prominent notices stating that it is released under this
199 | License and any conditions added under section 7. This requirement modifies the
200 | requirement in section 4 to “keep intact all notices”.
201 | * **c)** You must license the entire work, as a whole, under this License to anyone who
202 | comes into possession of a copy. This License will therefore apply, along with any
203 | applicable section 7 additional terms, to the whole of the work, and all its parts,
204 | regardless of how they are packaged. This License gives no permission to license the
205 | work in any other way, but it does not invalidate such permission if you have
206 | separately received it.
207 | * **d)** If the work has interactive user interfaces, each must display Appropriate Legal
208 | Notices; however, if the Program has interactive interfaces that do not display
209 | Appropriate Legal Notices, your work need not make them do so.
210 | 
211 | A compilation of a covered work with other separate and independent works, which are
212 | not by their nature extensions of the covered work, and which are not combined with
213 | it such as to form a larger program, in or on a volume of a storage or distribution
214 | medium, is called an “aggregate” if the compilation and its resulting
215 | copyright are not used to limit the access or legal rights of the compilation's users
216 | beyond what the individual works permit. Inclusion of a covered work in an aggregate
217 | does not cause this License to apply to the other parts of the aggregate.
218 | 
219 | ### 6. Conveying Non-Source Forms
220 | 
221 | You may convey a covered work in object code form under the terms of sections 4 and
222 | 5, provided that you also convey the machine-readable Corresponding Source under the
223 | terms of this License, in one of these ways:
224 | 
225 | * **a)** Convey the object code in, or embodied in, a physical product (including a
226 | physical distribution medium), accompanied by the Corresponding Source fixed on a
227 | durable physical medium customarily used for software interchange.
228 | * **b)** Convey the object code in, or embodied in, a physical product (including a
229 | physical distribution medium), accompanied by a written offer, valid for at least
230 | three years and valid for as long as you offer spare parts or customer support for
231 | that product model, to give anyone who possesses the object code either **(1)** a copy of
232 | the Corresponding Source for all the software in the product that is covered by this
233 | License, on a durable physical medium customarily used for software interchange, for
234 | a price no more than your reasonable cost of physically performing this conveying of
235 | source, or **(2)** access to copy the Corresponding Source from a network server at no
236 | charge.
237 | * **c)** Convey individual copies of the object code with a copy of the written offer to
238 | provide the Corresponding Source. This alternative is allowed only occasionally and
239 | noncommercially, and only if you received the object code with such an offer, in
240 | accord with subsection 6b.
241 | * **d)** Convey the object code by offering access from a designated place (gratis or for
242 | a charge), and offer equivalent access to the Corresponding Source in the same way
243 | through the same place at no further charge. You need not require recipients to copy
244 | the Corresponding Source along with the object code. If the place to copy the object
245 | code is a network server, the Corresponding Source may be on a different server
246 | (operated by you or a third party) that supports equivalent copying facilities,
247 | provided you maintain clear directions next to the object code saying where to find
248 | the Corresponding Source. Regardless of what server hosts the Corresponding Source,
249 | you remain obligated to ensure that it is available for as long as needed to satisfy
250 | these requirements.
251 | * **e)** Convey the object code using peer-to-peer transmission, provided you inform
252 | other peers where the object code and Corresponding Source of the work are being
253 | offered to the general public at no charge under subsection 6d.
254 | 
255 | A separable portion of the object code, whose source code is excluded from the
256 | Corresponding Source as a System Library, need not be included in conveying the
257 | object code work.
258 | 
259 | A “User Product” is either **(1)** a “consumer product”, which
260 | means any tangible personal property which is normally used for personal, family, or
261 | household purposes, or **(2)** anything designed or sold for incorporation into a
262 | dwelling. In determining whether a product is a consumer product, doubtful cases
263 | shall be resolved in favor of coverage. For a particular product received by a
264 | particular user, “normally used” refers to a typical or common use of
265 | that class of product, regardless of the status of the particular user or of the way
266 | in which the particular user actually uses, or expects or is expected to use, the
267 | product. A product is a consumer product regardless of whether the product has
268 | substantial commercial, industrial or non-consumer uses, unless such uses represent
269 | the only significant mode of use of the product.
270 | 
271 | “Installation Information” for a User Product means any methods,
272 | procedures, authorization keys, or other information required to install and execute
273 | modified versions of a covered work in that User Product from a modified version of
274 | its Corresponding Source. The information must suffice to ensure that the continued
275 | functioning of the modified object code is in no case prevented or interfered with
276 | solely because modification has been made.
277 | 
278 | If you convey an object code work under this section in, or with, or specifically for
279 | use in, a User Product, and the conveying occurs as part of a transaction in which
280 | the right of possession and use of the User Product is transferred to the recipient
281 | in perpetuity or for a fixed term (regardless of how the transaction is
282 | characterized), the Corresponding Source conveyed under this section must be
283 | accompanied by the Installation Information. But this requirement does not apply if
284 | neither you nor any third party retains the ability to install modified object code
285 | on the User Product (for example, the work has been installed in ROM).
286 | 
287 | The requirement to provide Installation Information does not include a requirement to
288 | continue to provide support service, warranty, or updates for a work that has been
289 | modified or installed by the recipient, or for the User Product in which it has been
290 | modified or installed. Access to a network may be denied when the modification itself
291 | materially and adversely affects the operation of the network or violates the rules
292 | and protocols for communication across the network.
293 | 
294 | Corresponding Source conveyed, and Installation Information provided, in accord with
295 | this section must be in a format that is publicly documented (and with an
296 | implementation available to the public in source code form), and must require no
297 | special password or key for unpacking, reading or copying.
298 | 
299 | ### 7. Additional Terms
300 | 
301 | “Additional permissions” are terms that supplement the terms of this
302 | License by making exceptions from one or more of its conditions. Additional
303 | permissions that are applicable to the entire Program shall be treated as though they
304 | were included in this License, to the extent that they are valid under applicable
305 | law. If additional permissions apply only to part of the Program, that part may be
306 | used separately under those permissions, but the entire Program remains governed by
307 | this License without regard to the additional permissions.
308 | 
309 | When you convey a copy of a covered work, you may at your option remove any
310 | additional permissions from that copy, or from any part of it. (Additional
311 | permissions may be written to require their own removal in certain cases when you
312 | modify the work.) You may place additional permissions on material, added by you to a
313 | covered work, for which you have or can give appropriate copyright permission.
314 | 
315 | Notwithstanding any other provision of this License, for material you add to a
316 | covered work, you may (if authorized by the copyright holders of that material)
317 | supplement the terms of this License with terms:
318 | 
319 | * **a)** Disclaiming warranty or limiting liability differently from the terms of
320 | sections 15 and 16 of this License; or
321 | * **b)** Requiring preservation of specified reasonable legal notices or author
322 | attributions in that material or in the Appropriate Legal Notices displayed by works
323 | containing it; or
324 | * **c)** Prohibiting misrepresentation of the origin of that material, or requiring that
325 | modified versions of such material be marked in reasonable ways as different from the
326 | original version; or
327 | * **d)** Limiting the use for publicity purposes of names of licensors or authors of the
328 | material; or
329 | * **e)** Declining to grant rights under trademark law for use of some trade names,
330 | trademarks, or service marks; or
331 | * **f)** Requiring indemnification of licensors and authors of that material by anyone
332 | who conveys the material (or modified versions of it) with contractual assumptions of
333 | liability to the recipient, for any liability that these contractual assumptions
334 | directly impose on those licensors and authors.
335 | 
336 | All other non-permissive additional terms are considered “further
337 | restrictions” within the meaning of section 10. If the Program as you received
338 | it, or any part of it, contains a notice stating that it is governed by this License
339 | along with a term that is a further restriction, you may remove that term. If a
340 | license document contains a further restriction but permits relicensing or conveying
341 | under this License, you may add to a covered work material governed by the terms of
342 | that license document, provided that the further restriction does not survive such
343 | relicensing or conveying.
344 | 
345 | If you add terms to a covered work in accord with this section, you must place, in
346 | the relevant source files, a statement of the additional terms that apply to those
347 | files, or a notice indicating where to find the applicable terms.
348 | 
349 | Additional terms, permissive or non-permissive, may be stated in the form of a
350 | separately written license, or stated as exceptions; the above requirements apply
351 | either way.
352 | 
353 | ### 8. Termination
354 | 
355 | You may not propagate or modify a covered work except as expressly provided under
356 | this License. Any attempt otherwise to propagate or modify it is void, and will
357 | automatically terminate your rights under this License (including any patent licenses
358 | granted under the third paragraph of section 11).
359 | 
360 | However, if you cease all violation of this License, then your license from a
361 | particular copyright holder is reinstated **(a)** provisionally, unless and until the
362 | copyright holder explicitly and finally terminates your license, and **(b)** permanently,
363 | if the copyright holder fails to notify you of the violation by some reasonable means
364 | prior to 60 days after the cessation.
365 | 
366 | Moreover, your license from a particular copyright holder is reinstated permanently
367 | if the copyright holder notifies you of the violation by some reasonable means, this
368 | is the first time you have received notice of violation of this License (for any
369 | work) from that copyright holder, and you cure the violation prior to 30 days after
370 | your receipt of the notice.
371 | 
372 | Termination of your rights under this section does not terminate the licenses of
373 | parties who have received copies or rights from you under this License. If your
374 | rights have been terminated and not permanently reinstated, you do not qualify to
375 | receive new licenses for the same material under section 10.
376 | 
377 | ### 9. Acceptance Not Required for Having Copies
378 | 
379 | You are not required to accept this License in order to receive or run a copy of the
380 | Program. Ancillary propagation of a covered work occurring solely as a consequence of
381 | using peer-to-peer transmission to receive a copy likewise does not require
382 | acceptance. However, nothing other than this License grants you permission to
383 | propagate or modify any covered work. These actions infringe copyright if you do not
384 | accept this License. Therefore, by modifying or propagating a covered work, you
385 | indicate your acceptance of this License to do so.
386 | 
387 | ### 10. Automatic Licensing of Downstream Recipients
388 | 
389 | Each time you convey a covered work, the recipient automatically receives a license
390 | from the original licensors, to run, modify and propagate that work, subject to this
391 | License. You are not responsible for enforcing compliance by third parties with this
392 | License.
393 | 
394 | An “entity transaction” is a transaction transferring control of an
395 | organization, or substantially all assets of one, or subdividing an organization, or
396 | merging organizations. If propagation of a covered work results from an entity
397 | transaction, each party to that transaction who receives a copy of the work also
398 | receives whatever licenses to the work the party's predecessor in interest had or
399 | could give under the previous paragraph, plus a right to possession of the
400 | Corresponding Source of the work from the predecessor in interest, if the predecessor
401 | has it or can get it with reasonable efforts.
402 | 
403 | You may not impose any further restrictions on the exercise of the rights granted or
404 | affirmed under this License. For example, you may not impose a license fee, royalty,
405 | or other charge for exercise of rights granted under this License, and you may not
406 | initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging
407 | that any patent claim is infringed by making, using, selling, offering for sale, or
408 | importing the Program or any portion of it.
409 | 
410 | ### 11. Patents
411 | 
412 | A “contributor” is a copyright holder who authorizes use under this
413 | License of the Program or a work on which the Program is based. The work thus
414 | licensed is called the contributor's “contributor version”.
415 | 
416 | A contributor's “essential patent claims” are all patent claims owned or
417 | controlled by the contributor, whether already acquired or hereafter acquired, that
418 | would be infringed by some manner, permitted by this License, of making, using, or
419 | selling its contributor version, but do not include claims that would be infringed
420 | only as a consequence of further modification of the contributor version. For
421 | purposes of this definition, “control” includes the right to grant patent
422 | sublicenses in a manner consistent with the requirements of this License.
423 | 
424 | Each contributor grants you a non-exclusive, worldwide, royalty-free patent license
425 | under the contributor's essential patent claims, to make, use, sell, offer for sale,
426 | import and otherwise run, modify and propagate the contents of its contributor
427 | version.
428 | 
429 | In the following three paragraphs, a “patent license” is any express
430 | agreement or commitment, however denominated, not to enforce a patent (such as an
431 | express permission to practice a patent or covenant not to sue for patent
432 | infringement). To “grant” such a patent license to a party means to make
433 | such an agreement or commitment not to enforce a patent against the party.
434 | 
435 | If you convey a covered work, knowingly relying on a patent license, and the
436 | Corresponding Source of the work is not available for anyone to copy, free of charge
437 | and under the terms of this License, through a publicly available network server or
438 | other readily accessible means, then you must either **(1)** cause the Corresponding
439 | Source to be so available, or **(2)** arrange to deprive yourself of the benefit of the
440 | patent license for this particular work, or **(3)** arrange, in a manner consistent with
441 | the requirements of this License, to extend the patent license to downstream
442 | recipients. “Knowingly relying” means you have actual knowledge that, but
443 | for the patent license, your conveying the covered work in a country, or your
444 | recipient's use of the covered work in a country, would infringe one or more
445 | identifiable patents in that country that you have reason to believe are valid.
446 | 
447 | If, pursuant to or in connection with a single transaction or arrangement, you
448 | convey, or propagate by procuring conveyance of, a covered work, and grant a patent
449 | license to some of the parties receiving the covered work authorizing them to use,
450 | propagate, modify or convey a specific copy of the covered work, then the patent
451 | license you grant is automatically extended to all recipients of the covered work and
452 | works based on it.
453 | 
454 | A patent license is “discriminatory” if it does not include within the
455 | scope of its coverage, prohibits the exercise of, or is conditioned on the
456 | non-exercise of one or more of the rights that are specifically granted under this
457 | License. You may not convey a covered work if you are a party to an arrangement with
458 | a third party that is in the business of distributing software, under which you make
459 | payment to the third party based on the extent of your activity of conveying the
460 | work, and under which the third party grants, to any of the parties who would receive
461 | the covered work from you, a discriminatory patent license **(a)** in connection with
462 | copies of the covered work conveyed by you (or copies made from those copies), or **(b)**
463 | primarily for and in connection with specific products or compilations that contain
464 | the covered work, unless you entered into that arrangement, or that patent license
465 | was granted, prior to 28 March 2007.
466 | 
467 | Nothing in this License shall be construed as excluding or limiting any implied
468 | license or other defenses to infringement that may otherwise be available to you
469 | under applicable patent law.
470 | 
471 | ### 12. No Surrender of Others' Freedom
472 | 
473 | If conditions are imposed on you (whether by court order, agreement or otherwise)
474 | that contradict the conditions of this License, they do not excuse you from the
475 | conditions of this License. If you cannot convey a covered work so as to satisfy
476 | simultaneously your obligations under this License and any other pertinent
477 | obligations, then as a consequence you may not convey it at all. For example, if you
478 | agree to terms that obligate you to collect a royalty for further conveying from
479 | those to whom you convey the Program, the only way you could satisfy both those terms
480 | and this License would be to refrain entirely from conveying the Program.
481 | 
482 | ### 13. Use with the GNU Affero General Public License
483 | 
484 | Notwithstanding any other provision of this License, you have permission to link or
485 | combine any covered work with a work licensed under version 3 of the GNU Affero
486 | General Public License into a single combined work, and to convey the resulting work.
487 | The terms of this License will continue to apply to the part which is the covered
488 | work, but the special requirements of the GNU Affero General Public License, section
489 | 13, concerning interaction through a network will apply to the combination as such.
490 | 
491 | ### 14. Revised Versions of this License
492 | 
493 | The Free Software Foundation may publish revised and/or new versions of the GNU
494 | General Public License from time to time. Such new versions will be similar in spirit
495 | to the present version, but may differ in detail to address new problems or concerns.
496 | 
497 | Each version is given a distinguishing version number. If the Program specifies that
498 | a certain numbered version of the GNU General Public License “or any later
499 | version” applies to it, you have the option of following the terms and
500 | conditions either of that numbered version or of any later version published by the
501 | Free Software Foundation. If the Program does not specify a version number of the GNU
502 | General Public License, you may choose any version ever published by the Free
503 | Software Foundation.
504 | 
505 | If the Program specifies that a proxy can decide which future versions of the GNU
506 | General Public License can be used, that proxy's public statement of acceptance of a
507 | version permanently authorizes you to choose that version for the Program.
508 | 
509 | Later license versions may give you additional or different permissions. However, no
510 | additional obligations are imposed on any author or copyright holder as a result of
511 | your choosing to follow a later version.
512 | 
513 | ### 15. Disclaimer of Warranty
514 | 
515 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
516 | EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
517 | PROVIDE THE PROGRAM “AS IS” WITHOUT WARRANTY OF ANY KIND, EITHER
518 | EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
519 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE
520 | QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE
521 | DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
522 | 
523 | ### 16. Limitation of Liability
524 | 
525 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY
526 | COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS
527 | PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL,
528 | INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
529 | PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE
530 | OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE
531 | WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
532 | POSSIBILITY OF SUCH DAMAGES.
533 | 
534 | ### 17. Interpretation of Sections 15 and 16
535 | 
536 | If the disclaimer of warranty and limitation of liability provided above cannot be
537 | given local legal effect according to their terms, reviewing courts shall apply local
538 | law that most closely approximates an absolute waiver of all civil liability in
539 | connection with the Program, unless a warranty or assumption of liability accompanies
540 | a copy of the Program in return for a fee.
541 | 
542 | _END OF TERMS AND CONDITIONS_
543 | 
544 | ## How to Apply These Terms to Your New Programs
545 | 
546 | If you develop a new program, and you want it to be of the greatest possible use to
547 | the public, the best way to achieve this is to make it free software which everyone
548 | can redistribute and change under these terms.
549 | 
550 | To do so, attach the following notices to the program. It is safest to attach them
551 | to the start of each source file to most effectively state the exclusion of warranty;
552 | and each file should have at least the “copyright” line and a pointer to
553 | where the full notice is found.
554 | 
555 |     <one line to give the program's name and a brief idea of what it does.>
556 |     Copyright (C) 2021 Tyler J. Pike
557 | 
558 |     This program is free software: you can redistribute it and/or modify
559 |     it under the terms of the GNU General Public License as published by
560 |     the Free Software Foundation, either version 3 of the License, or
561 |     (at your option) any later version.
562 | 
563 |     This program is distributed in the hope that it will be useful,
564 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
565 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
566 |     GNU General Public License for more details.
567 | 
568 |     You should have received a copy of the GNU General Public License
569 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
570 | 
571 | Also add information on how to contact you by electronic and paper mail.
572 | 
573 | If the program does terminal interaction, make it output a short notice like this
574 | when it starts in an interactive mode:
575 | 
576 |     OOS Copyright (C) 2021 Tyler J. Pike
577 |     This program comes with ABSOLUTELY NO WARRANTY; for details type 'show w'.
578 |     This is free software, and you are welcome to redistribute it
579 |     under certain conditions; type 'show c' for details.
580 | 
581 | The hypothetical commands `show w` and `show c` should show the appropriate parts of
582 | the General Public License. Of course, your program's commands might be different;
583 | for a GUI interface, you would use an “about box”.
584 | 
585 | You should also get your employer (if you work as a programmer) or school, if any, to
586 | sign a “copyright disclaimer” for the program, if necessary. For more
587 | information on this, and how to apply and follow the GNU GPL, see
588 | &lt;<http://www.gnu.org/licenses/>&gt;.
589 | 
590 | The GNU General Public License does not permit incorporating your program into
591 | proprietary programs. If your program is a subroutine library, you may consider it
592 | more useful to permit linking proprietary applications with the library. If this is
593 | what you want to do, use the GNU Lesser General Public License instead of this
594 | License. But first, please read
595 | &lt;<http://www.gnu.org/philosophy/why-not-lgpl.html>&gt;.
596 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export("%>%")
 4 | export(NBest)
 5 | export(chart_forecast)
 6 | export(chart_forecast_error)
 7 | export(data_impute)
 8 | export(data_outliers)
 9 | export(data_reduction)
10 | export(data_subset)
11 | export(forecast_accuracy)
12 | export(forecast_combine)
13 | export(forecast_comparison)
14 | export(forecast_multivariate)
15 | export(forecast_univariate)
16 | export(instantiate.data_impute.control_panel)
17 | export(instantiate.forecast_combinations.control_panel)
18 | export(instantiate.forecast_multivariate.ml.control_panel)
19 | export(instantiate.forecast_multivariate.var.control_panel)
20 | export(instantiate.forecast_univariate.control_panel)
21 | export(loss_function)
22 | export(n.lag)
23 | export(standardize)
24 | export(winsorize)
25 | import(stats)
26 | importFrom(magrittr,"%>%")
27 | 


--------------------------------------------------------------------------------
/OOS.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | LineEndingConversion: Posix
18 | 
19 | BuildType: Package
20 | PackageUseDevtools: Yes
21 | PackageInstallArgs: --no-multiarch --with-keep.source
22 | PackageRoxygenize: rd,collate,namespace
23 | 


--------------------------------------------------------------------------------
/R/data_preparation.R:
--------------------------------------------------------------------------------
  1 | #---------------------------------------------
  2 | # data cleaning helper functions
  3 | #---------------------------------------------
  4 | #' Standardize variables (mean 0, variance 1)
  5 | #'
  6 | #' @param X    numeric: vector to be standardized
  7 | #'
  8 | #' @return  numeric vector of standardized values
  9 | #'
 10 | #' @export
 11 | standardize = function(X){return((X-mean(X, na.rm = T))/sd(X, na.rm = T))}
 12 | 
 13 | #' Winsorize or trim variables
 14 | #'
 15 | #' @param X           numeric: vector to be winsorized or trimmed
 16 | #' @param trim        boolean: if TRUE then replace outliers with NA instead of winsorizing bound
 17 | #' @param bounds      double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile)
 18 | #'
 19 | #' @return  numeric vector of winsorized or trimmed values
 20 | #'
 21 | #' @export
 22 | winsorize = function(X, bounds, trim = FALSE){
 23 | 
 24 |     qq = quantile(X, probs = bounds, na.rm = TRUE)
 25 | 
 26 |     if(trim == FALSE){
 27 |       X[X <= qq[1]] = qq[1]
 28 |       X[X >= qq[2]] = qq[2]
 29 |     }else{
 30 |       X[X <= qq[1]] = NA
 31 |       X[X >= qq[2]] = NA
 32 |     }
 33 | 
 34 |     return(X)
 35 | }
 36 | 
 37 | #' Create information set
 38 | #'
 39 | #' A function to subset data recursively or with a rolling window to create a valid information set. Is used as a data preparation
 40 | #' helper function and is called internally by forecast_univariate, forecast_multivariate, and forecast_combine.
 41 | #'
 42 | #' @param Data                  data.frame: data frame of target variable, exogenous variables, and observed date (named 'date')
 43 | #' @param forecast.date         date: upper bound of information set
 44 | #' @param rolling.window        int: size of rolling window, NA if expanding window is used
 45 | #' @param freq                  string: time series frequency; day, week, month, quarter, year; only needed for rolling window factors
 46 | #'
 47 | #' @return  data.frame bounded by the given date range
 48 | #'
 49 | #' @export
 50 | data_subset = function(
 51 |   Data,
 52 |   forecast.date,
 53 |   rolling.window,
 54 |   freq
 55 | ){
 56 | 
 57 |   # 1. using expanding window
 58 |   if(is.na(rolling.window)){
 59 |     information.set =
 60 |       dplyr::filter(Data, date <= forecast.date)
 61 | 
 62 |     # 2. using rolling window
 63 |   }else{
 64 |     rolling.window.start = forecast.date
 65 | 
 66 |     if(freq == 'day'){
 67 |       rolling.window.start = forecast.date - rolling.window
 68 |     }else if(freq == 'week'){
 69 |       lubridate::week(rolling.window.start) = lubridate::week(forecast.date) - rolling.window
 70 |     }else if(freq == 'month'){
 71 |       lubridate::month(rolling.window.start) = lubridate::month(forecast.date) - rolling.window
 72 |     }else if(freq == 'quarter'){
 73 |       lubridate::month(rolling.window.start) = lubridate::month(forecast.date) - rolling.window*3
 74 |     }else if(freq == 'year'){
 75 |       lubridate::year(rolling.window.start) = lubridate::year(forecast.date) - rolling.window
 76 |     }
 77 | 
 78 |     information.set =
 79 |       dplyr::filter(Data, rolling.window.start <= date & date <= forecast.date )
 80 |   }
 81 | 
 82 |   return(information.set)
 83 | }
 84 | 
 85 | #' Set forecasted date
 86 | #'
 87 | #' A function to subset data recursively or with a rolling window to create a valid information set. Is used as a data preparation
 88 | #' helper function and is called internally by forecast_univariate, forecast_multivariate, and forecast_combine.
 89 | #'
 90 | #' @param forecast.date         date: date forecast was made
 91 | #' @param horizon               int: periods ahead of forecast
 92 | #' @param freq                  string: time series frequency; day, week, month, quarter, year; only needed for rolling window factors
 93 | #'
 94 | #' @return  date vector
 95 | #'
 96 | #'
 97 | forecast_date = function(
 98 |   forecast.date,
 99 |   horizon,
100 |   freq
101 | ){
102 | 
103 |   date = forecast.date
104 | 
105 |   if(freq == 'day'){
106 |     date = forecast.date + horizon
107 |   }else if(freq == 'week'){
108 |     lubridate::week(date) = lubridate::week(date) + horizon
109 |   }else if(freq == 'month'){
110 |     lubridate::month(date) = lubridate::month(date) + horizon
111 |   }else if(freq == 'quarter'){
112 |     lubridate::month(date) = lubridate::month(date) + horizon*3
113 |   }else if(freq == 'year'){
114 |     lubridate::year(date) = lubridate::year(date) + horizon
115 |   }
116 | 
117 |   return(date)
118 | }
119 | 
120 | #' Create n lags
121 | #'
122 | #' A function to create 1 through n lags of a set of variables. Is used as a data preparation
123 | #' helper function and is called internally by forecast_univariate, forecast_multivariate, and forecast_combine.
124 | #'
125 | #' @param Data        data.frame: data frame of variables to lag and a 'date' column
126 | #' @param lags        int: number of lags to create
127 | #' @param variables   string: vector of variable names to lag, default is all non-date variables
128 | #'
129 | #' @return  data.frame
130 | #'
131 | #' @export
132 | n.lag = function(
133 |   Data,                       # data.frame: data frame of variables to lag and a 'date' column
134 |   lags,                       # int: number of lags to create
135 |   variables = NULL            # string: vector of variable names to lag, default is all non-date variables
136 | ){
137 | 
138 |   if(is.null(variables)){
139 |     variables = names(dplyr::select(Data, -date))
140 |   }
141 | 
142 |   Data = c(0:lags) %>%
143 |     purrr::map(
144 |       .f = function(n){
145 | 
146 |         if(n == 0){return(Data)}
147 | 
148 |         X = Data %>%
149 |           dplyr::mutate_at(variables, dplyr::lag, n)
150 | 
151 |         names(X)[names(X) != 'date'] = paste0(names(X)[names(X) != 'date'], '.l', n)
152 | 
153 |         return(X)
154 |       }
155 |     ) %>%
156 |     purrr::reduce(dplyr::full_join, by = 'date')
157 | 
158 | 
159 |   return(Data)
160 | }
161 | 
162 | #---------------------------------------------
163 | # Clean outliers
164 | #---------------------------------------------
165 | #' Clean outliers
166 | #'
167 | #' A function to clean outliers. Is used as a data preparation helper function and is called internally
168 | #'  by forecast_univariate, forecast_multivariate, and forecast_combine.
169 | #'
170 | #' @param Data                  data.frame: data frame of target variable, exogenous variables, and observed date (named 'date')
171 | #' @param variables             string: vector of variables to standardize, default is all but 'date' column
172 | #' @param w.bounds              double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile)
173 | #' @param trim                  boolean: if TRUE then replace outliers with NA instead of winsorizing bound
174 | #' @param cross_section         boolean: if TRUE then remove outliers based on cross-section (row-wise) instead of historical data (column-wise)
175 | #'
176 | #' @return  data.frame with a date column and one column per forecast method selected
177 | #'
178 | #' @export
179 | data_outliers = function(
180 |   Data,                           # data.frame: data frame of target variable, exogenous variables, and observed date (named 'date')
181 |   variables = NULL,               # string: vector of variables to standardize, default is all but 'date' column
182 |   w.bounds = c(0.05, 0.95),       # double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile)
183 |   trim = FALSE,                   # boolean: if TRUE then replace outliers with NA instead of winsorizing bound
184 |   cross_section = FALSE           # boolean: if TRUE then remove outliers based on cross-section (row-wise) instead of historical data (column-wise)
185 | ){
186 | 
187 | 
188 |   # set variables to all if default
189 |   if(is.null(variables) == TRUE){
190 |     variables = names(dplyr::select_if(Data, is.numeric))
191 |   }
192 | 
193 |   # target variables must be numeric
194 |   if(length(setdiff(variables, names(dplyr::select_if(Data, is.numeric)))) != 0){
195 |     print(errorCondition('Variables cleaned for outliers must be numeric.'))
196 |   }
197 | 
198 |   # clean outliers (column wise)
199 |   if(cross_section == FALSE){
200 |     test = Data %>%
201 |       dplyr::mutate_at(dplyr::vars(variables), winsorize, bounds = w.bounds, trim = trim)
202 | 
203 |   # clean outliers (row wise)
204 |   }else{
205 |     Data = Data %>%
206 |       dplyr::rowwise() %>%
207 |       dplyr::mutate_at(dplyr::vars(variables), winsorize, bounds = w.bounds, trim = trim)
208 |   }
209 | 
210 |   # return results
211 |   return(Data)
212 | }
213 | 
214 | 
215 | #---------------------------------------------
216 | # Impute missing
217 | #---------------------------------------------
218 | #' Create interface to control `data_impute` model estimation
219 | #'
220 | #' A function to create the data imputation method
221 | #' arguments list for user manipulation.
222 | #'
223 | #' @return data_impute.control_panel
224 | #'
225 | #' @export
226 | instantiate.data_impute.control_panel = function(){
227 | 
228 |   # methods
229 |   methods = list(
230 |     interpolation = 'imputeTS::na_interpolation',
231 |     kalman = imputeTS::na_kalman,
232 |     locf = 'imputeTS::na_locf',
233 |     ma = 'imputeTS::na_ma',
234 |     mean = 'imputeTS::na_mean',
235 |     random = 'imputeTS::na_random',
236 |     remove = 'imputeTS:na_remove',
237 |     replace = 'imputeTS::na_replace',
238 |     seadec = 'imputeTS::na_seadec',
239 |     seasplit = 'imputeTS::na_seasplit'
240 |   )
241 | 
242 |   # arguments
243 |   arguments = list(
244 |     interpolation = NULL,
245 |     kalman = NULL,
246 |     locf =  NULL,
247 |     ma =  NULL,
248 |     mean =  NULL,
249 |     random =  NULL,
250 |     remove =  NULL,
251 |     replace =  NULL,
252 |     seadec =  NULL,
253 |     seasplit =  NULL
254 |   )
255 | 
256 |   return(
257 |     list(
258 |       method = methods,
259 |       arguments = arguments
260 |     )
261 |   )
262 | 
263 | }
264 | 
265 | #' Impute missing values
266 | #'
267 | #' A function to impute missing values. Is used as a data preparation helper function and is called internally
268 | #'  by forecast_univariate, forecast_multivariate, and forecast_combine.
269 | #'
270 | #' @param Data          data.frame: data frame of target variable, exogenous variables, and observed date (named 'date')
271 | #' @param method        string: select which method to use from the imputeTS package; 'interpolation', 'kalman', 'locf', 'ma', 'mean', 'random', 'remove','replace', 'seadec', 'seasplit'
272 | #' @param variables     string: vector of variables to standardize, default is all but 'date' column
273 | #' @param verbose       boolean: show start-up status of impute.missing.routine
274 | #'
275 | #' @return  data.frame with missing data imputed
276 | #'
277 | #' @export
278 | data_impute = function(
279 |   Data,                           # data.frame: data frame of target variable, exogenous variables, and observed date (named 'date')
280 |   method = 'kalman',              # string: select which method to use from the imputeTS package; 'interpolation', 'kalman', 'locf', 'ma', 'mean', 'random', 'remove','replace', 'seadec', 'seasplit'
281 |   variables = NULL,               # string: vector of variables to impute missing values, default is all numeric columns
282 |   verbose = FALSE                 # boolean: show start-up status of data_impute.control_panel
283 | ){
284 | 
285 |   # training parameter creation and warnings
286 |   if(verbose == TRUE){
287 |     if(exists("data_impute.control_panel")){
288 |       print(warningCondition('data_impute.control_panel exists and will be used to impute missing data in its present state.'))
289 |     }else{
290 |       data_impute.control_panel = instantiate.data_impute.control_panel()
291 |       print(warningCondition('data_impute.control_panel was instantiated and default values will be used for to impute missing data.'))
292 |     }
293 |   }else{
294 |     if(!exists("data_impute.control_panel")){data_impute.control_panel = instantiate.data_impute.control_panel()}
295 |   }
296 | 
297 |   # set variables to all if default
298 |   if(is.null(variables) == TRUE){
299 |     variables = names(dplyr::select_if(Data, is.numeric))
300 |   }
301 | 
302 |   # target variables must be numeric
303 |   if(length(setdiff(variables, names(dplyr::select_if(Data, is.numeric)))) != 0){
304 |     print(errorCondition('Variables cleaned for outliers must be numeric.'))
305 |   }
306 | 
307 |   # clean outliers
308 |   for(v in variables){
309 |     data_impute.control_panel$arguments[[method]]$x = Data[,c(v)]
310 |     Data[,c(v)] =
311 |       do.call(what = data_impute.control_panel$method[[method]],
312 |               args = data_impute.control_panel$arguments[[method]])
313 |   }
314 | 
315 |   # return results
316 |   return(Data)
317 | }
318 | 
319 | 
320 | #---------------------------------------------
321 | # Dimension reduction
322 | #---------------------------------------------
323 | #' Dimension reduction via principal components
324 | #'
325 | #' A function to estimate principal components.
326 | #'
327 | #' @param Data            data.frame: data frame of target variable, exogenous variables, and observed date (named 'date')
328 | #' @param variables       string: vector of variables to standardize, default is all but 'date' column
329 | #' @param ncomp           int: number of factors to create
330 | #' @param standardize     boolean: normalize variables (mean zero, variance one) before estimating factors
331 | #'
332 | #' @return  data.frame with a date column and one column per forecast method selected
333 | #'
334 | #' @export
335 | 
336 | data_reduction = function(
337 |   Data,                 # data.frame: data frame of target variable, exogenous variables, and observed date (named 'date')
338 |   variables = NULL,     # string: vector of variables to impute missing values, default is all numeric columns
339 |   ncomp,                # int: number of factors to create
340 |   standardize = TRUE    # boolean: normalize variables (mean zero, variance one) before estimating factors
341 | ){
342 | 
343 |   # set variables to all if default
344 |   if(is.null(variables) == TRUE){
345 |     variables = names(dplyr::select_if(Data, is.numeric))
346 |   }
347 | 
348 |   # target variables must be numeric
349 |   if(length(setdiff(variables, names(dplyr::select_if(Data, is.numeric)))) != 0){
350 |     print(errorCondition('Variables cleaned for outliers must be numeric.'))
351 |   }
352 | 
353 |   # remove missing
354 |   information.set = na.omit(Data)
355 | 
356 |   # standardize variables
357 |   information.set = information.set %>%
358 |     dplyr::mutate_at(dplyr::vars(variables), OOS::standardize)
359 | 
360 |   # estimate factors
361 |   model.pc = stats::princomp(dplyr::select(information.set, -date))
362 | 
363 |   # select factors
364 |   factors = as.matrix(dplyr::select(information.set, -date)) %*% model.pc$loadings[,1:ncomp]
365 | 
366 |   # take most recent factors
367 |   colnames(factors) = paste0('pc.',c(1:ncomp))
368 | 
369 |   factors =
370 |     data.frame(factors,
371 |                date = information.set$date)
372 | 
373 |   # return results
374 |   return(factors)
375 | }
376 | 


--------------------------------------------------------------------------------
/R/external_imports.R:
--------------------------------------------------------------------------------
 1 | #' Pipe operator
 2 | #'
 3 | #' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
 4 | #'
 5 | #' @name %>%
 6 | #' @rdname pipe
 7 | #' @keywords internal
 8 | #' @return magrittr pipe operator %>%
 9 | #' @export
10 | #' @importFrom magrittr %>%
11 | #' @usage lhs \%>\% rhs
12 | NULL
13 | 
14 | #' @import stats
15 | NULL


--------------------------------------------------------------------------------
/R/forecast_chart.R:
--------------------------------------------------------------------------------
  1 | #----------------------------------------
  2 | ### Basic forecast chart
  3 | #----------------------------------------
  4 | #' Chart forecasts
  5 | #'
  6 | #' @param Data        data.frame: oos.forecast object
  7 | #' @param Title       string: chart title
  8 | #' @param Ylab        string: y-axis label
  9 | #' @param Freq        string: frequency (acts as sub-title)
 10 | #' @param zeroline    boolean: if TRUE then add a horizontal line at zero
 11 | #'
 12 | #' @return ggplot2 chart
 13 | #'
 14 | #' @examples 
 15 | #' \donttest{
 16 | #' 
 17 | #'  # simple time series
 18 | #'  A = c(1:100) + rnorm(100)
 19 | #'  date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
 20 | #'  Data = data.frame(date = date, A)
 21 | #'
 22 | #'  # run forecast_univariate
 23 | #'  forecast.uni =
 24 | #'    forecast_univariate(
 25 | #'      Data = Data,
 26 | #'      forecast.dates = tail(Data$date,10),
 27 | #'      method = c('naive','auto.arima', 'ets'),
 28 | #'      horizon = 1,
 29 | #'      recursive = FALSE,
 30 | #'      freq = 'month')
 31 | #'
 32 | #'  forecasts =
 33 | #'    dplyr::left_join(
 34 | #'      forecast.uni,
 35 | #'      data.frame(date, observed = A),
 36 | #'      by = 'date'
 37 | #'    )
 38 | #'
 39 | #'  # chart forecasts
 40 | #'  chart.forecast =
 41 | #'    chart_forecast(
 42 | #'      forecasts,
 43 | #'      Title = 'test',
 44 | #'      Ylab = 'Index',
 45 | #'      Freq = 'Monthly',
 46 | #'      zeroline = TRUE)
 47 | #' 
 48 | #' }
 49 | #' 
 50 | #' @export
 51 | 
 52 | chart_forecast = function(
 53 |   Data,              # data.frame: oos.forecast object
 54 |   Title,             # string: chart title
 55 |   Ylab,              # string: y-axis label
 56 |   Freq,              # string: frequency (acts as sub-title)
 57 |   zeroline = FALSE   # boolean: if TRUE then add a horizontal line at zero
 58 | ){
 59 | 
 60 |   # function errors
 61 |   if(!'forecast' %in% colnames(Data)){
 62 |     errorCondition('Data must have a column named "forecast" to calculate errors')
 63 |   }
 64 |   if(!'date' %in% colnames(Data)){
 65 |     errorCondition('Data must have a column named "date" to create plot')
 66 |   }
 67 | 
 68 |   # function variables
 69 |   model = observed = forecast = forecast.date = se = NA
 70 | 
 71 |   # reformat observed
 72 |   if('observed' %in% colnames(Data)){
 73 |     Data =
 74 |       dplyr::bind_rows(
 75 |         Data,
 76 |         Data %>% dplyr::select(forecast = observed, date) %>%
 77 |           dplyr::mutate(model = '*observed') %>%
 78 |           dplyr::distinct()
 79 |       )
 80 |   }
 81 | 
 82 |   # set chart
 83 |   chart =
 84 |     ggplot2::ggplot(Data, ggplot2::aes(x=date, y = forecast, color = model)) +
 85 |     # plot line
 86 |     ggplot2::geom_line(lwd = 1.25) +
 87 |     ggplot2::theme_classic() +
 88 |     ggplot2::theme(panel.grid.major = ggplot2::element_line(size = 0.5, linetype = 'solid', colour = "grey")) +
 89 |     # chart details
 90 |     ggplot2::labs(title = Title, subtitle = Freq) +
 91 |     ggplot2::xlab("") +
 92 |     ggplot2::ylab(Ylab)
 93 | 
 94 |   # add zero line
 95 |   if(zeroline == TRUE){
 96 | 
 97 |     chart = chart +
 98 |       ggplot2::geom_hline(yintercept=0, color="black", size=.5)
 99 | 
100 |   }
101 | 
102 |   return(chart)
103 | 
104 | }
105 | 
106 | 
107 | #----------------------------------------
108 | ### Basic error chart
109 | #----------------------------------------
110 | #' Chart forecast errors
111 | #'
112 | #' @param Data        data.frame: oos.forecast object
113 | #' @param Title       string: chart title
114 | #' @param Ylab        string: y-axis label
115 | #' @param Freq        string: frequency (acts as sub-title)
116 | #' @param zeroline    boolean: if TRUE then add a horizontal line at zero
117 | #'
118 | #' @return ggplot2 chart
119 | #' 
120 | #' @examples 
121 | #' \donttest{
122 | #' 
123 | #'  # simple time series
124 | #'  A = c(1:100) + rnorm(100)
125 | #'  date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
126 | #'  Data = data.frame(date = date, A)
127 | #'
128 | #'  # run forecast_univariate
129 | #'  forecast.uni =
130 | #'    forecast_univariate(
131 | #'      Data = Data,
132 | #'      forecast.dates = tail(Data$date,10),
133 | #'      method = c('naive','auto.arima', 'ets'),
134 | #'      horizon = 1,
135 | #'      recursive = FALSE,
136 | #'      freq = 'month')
137 | #'
138 | #'  forecasts =
139 | #'    dplyr::left_join(
140 | #'      forecast.uni,
141 | #'      data.frame(date, observed = A),
142 | #'      by = 'date'
143 | #'    )
144 | #'
145 | #'  # chart forecast errors
146 | #'  chart.errors =
147 | #'    chart_forecast_error(
148 | #'      forecasts,
149 | #'      Title = 'test',
150 | #'      Ylab = 'Index',
151 | #'      Freq = 'Monthly',
152 | #'      zeroline = TRUE)
153 | #'
154 | #' }
155 | #'
156 | #' @export
157 | 
158 | chart_forecast_error = function(
159 |   Data,              # data.frame: oos.forecast function output
160 |   Title,             # string: chart title
161 |   Ylab,              # string: y-axis label
162 |   Freq,              # string: frequency (acts as sub-title)
163 |   zeroline = FALSE   # boolean: if TRUE then add a horizontal line at zero
164 | ){
165 | 
166 |   # function errors
167 |   if(!'observed' %in% colnames(Data)){
168 |     errorCondition('Data must have a column named "observed" to calculate errors')
169 |   }
170 |   if(!'forecast' %in% colnames(Data)){
171 |     errorCondition('Data must have a column named "forecast" to calculate errors')
172 |   }
173 |   if(!'date' %in% colnames(Data)){
174 |     errorCondition('Data must have a column named "date" to create plot')
175 |   }
176 | 
177 |   # function variables
178 |   model = observed = forecast = forecast.date = se = errors = NA
179 | 
180 |   # calculate errors
181 |   Data = Data %>%
182 |     dplyr::mutate(errors = forecast - observed) %>%
183 |     dplyr::select(date, errors, model)
184 | 
185 |   # set chart
186 |   chart =
187 |     ggplot2::ggplot(Data, ggplot2::aes(x=date, y = errors, color = model)) +
188 |     # plot line
189 |     ggplot2::geom_line(lwd = 1.25) +
190 |     ggplot2::theme_classic() +
191 |     ggplot2::theme(panel.grid.major = ggplot2::element_line(size = 0.5, linetype = 'solid', colour = "grey")) +
192 |     # chart details
193 |     ggplot2::labs(title = Title, subtitle = Freq) +
194 |     ggplot2::xlab("") +
195 |     ggplot2::ylab(Ylab)
196 | 
197 |   # add zero line
198 |   if(zeroline == TRUE){
199 | 
200 |     chart = chart +
201 |       ggplot2::geom_hline(yintercept=0, color="black", size=.5)
202 | 
203 |   }
204 | 
205 |   return(chart)
206 | 
207 | }
208 | 
209 | 


--------------------------------------------------------------------------------
/R/forecast_combinations.R:
--------------------------------------------------------------------------------
  1 | #---------------------------------------------
  2 | # Forecast combination helper functions
  3 | #---------------------------------------------
  4 | #' Select N-best forecasts
  5 | #'
  6 | #' A function to subset the n-best forecasts;
  7 | #' assumes column named observed.
  8 | #'
  9 | #' @param forecasts data.frame: a data frame of forecasts to combine, assumes one column named "observed"
 10 | #' @param n.max     int: maximum number of forecasts to select
 11 | #' @param window    int: size of rolling window to evaluate forecast error over, use entire period if NA
 12 | #'
 13 | #' @return data.frame with n columns of the historically best forecasts
 14 | #'
 15 | #' @export
 16 | 
 17 | NBest = function(
 18 |   forecasts,    # data.frame: a data frame of forecasts to combine, assumes one column named "observed"
 19 |   n.max,        # int: maximum number of forecasts to select
 20 |   window = NA   # int: size of rolling window to evaluate forecast error over, use entire period if NA
 21 | ){
 22 | 
 23 |   observed = NA
 24 | 
 25 |   # calculate rolling forecast errors
 26 |   errors = abs(dplyr::select(forecasts, -observed) - forecasts$observed)
 27 |   rollRMSE = function(X){return(sqrt(mean((X)^2, na.rm = T)))}
 28 |   rollingErrors = zoo::rollapply(data = errors, width = seq_along(errors[,1]),
 29 |                                  FUN = rollRMSE, align = 'right', fill = NA)
 30 | 
 31 |   # create rolling N-best forecasts
 32 |   X = dplyr::select(forecasts, -observed) %>% as.matrix()
 33 |   nBest = matrix(nrow = nrow(X), ncol = n.max)
 34 |   for(row in 1:nrow(X)){
 35 |     for(column in 1:n.max){
 36 |       nBest[row,column] = mean(X[row,order(rollingErrors[row,])[1:column]])
 37 |     }
 38 |   }
 39 |   colnames(nBest) = paste0('N',c(1:n.max))
 40 | 
 41 |   # return results
 42 |   return(nBest)
 43 | }
 44 | 
 45 | #---------------------------------------------
 46 | # Forecast combination method arguments
 47 | #----------------------------------------------
 48 | #' Create interface to control `forecast_combine` model estimation
 49 | #'
 50 | #' A function to create the forecast combination technique arguments list
 51 | #' for user manipulation.
 52 | #'
 53 | #' @param covariates       int: the number of features that will go into the model
 54 | #'
 55 | #' @return forecast_combinations.control_panel
 56 | #'
 57 | #' @export
 58 | 
 59 | instantiate.forecast_combinations.control_panel = function(covariates = NULL){
 60 | 
 61 |   # caret names
 62 |   caret.engine = list(
 63 |     ols = 'lm',
 64 |     ridge = 'glmnet',
 65 |     lasso = 'glmnet',
 66 |     elastic = 'glmnet',
 67 |     RF = 'rf',
 68 |     GBM = 'gbm',
 69 |     NN = 'avNNet',
 70 |     pls = 'pls',
 71 |     pcr = 'pcr'
 72 |   )
 73 | 
 74 |   # tuning grids
 75 |   tuning.grids = list(
 76 | 
 77 |     ols = NULL,
 78 | 
 79 |     ridge = expand.grid(
 80 |       alpha = 0,
 81 |       lambda = 10^seq(-3, 3, length = 100)),
 82 | 
 83 |     lasso = expand.grid(
 84 |       alpha = 1,
 85 |       lambda = 10^seq(-3, 3, length = 100)),
 86 | 
 87 |     elastic = NULL,
 88 | 
 89 |     GBM =
 90 |       expand.grid(
 91 |         n.minobsinnode = c(1),
 92 |         shrinkage = c(.1,.01),
 93 |         n.trees = c(100, 250, 500),
 94 |         interaction.depth = c(1,2,5)),
 95 | 
 96 |     RF =
 97 |       expand.grid(
 98 |         mtry = c(1:4)),
 99 | 
100 |     NN =
101 |       expand.grid(
102 |         size = seq(2,10,5),
103 |         decay = c(.01,.001),
104 |         bag = c(100, 250, 500)),
105 | 
106 |     pls =
107 |       expand.grid(
108 |         ncomp = c(1:5)),
109 | 
110 |     pcr =
111 |       expand.grid(
112 |         ncomp = c(1:5))
113 | 
114 |   )
115 | 
116 |   # tuning grids if # of features is available
117 |   if(!is.null(covariates)){
118 |     tuning.grids[['RF']] =
119 |       expand.grid(
120 |         mtry = covariates/3)
121 | 
122 |     tuning.grids[['NN']] =
123 |       expand.grid(
124 |         size = c(covariates, 2*covariates, 3*covariates),
125 |         decay = c(.01,.001),
126 |         bag = c(20, 100))
127 | 
128 |   }
129 | 
130 |   # hyper-parameter selection routine
131 |   control =
132 |     caret::trainControl(
133 |       method = "cv",
134 |       number = 5,
135 |       allowParallel = TRUE)
136 | 
137 |   # accuracy metric used in training
138 |   accuracy = 'RMSE'
139 | 
140 |   # return training information
141 |   return(
142 |     list(
143 |       caret.engine = caret.engine,
144 |       tuning.grids = tuning.grids,
145 |       control = control,
146 |       accuracy = accuracy
147 |     )
148 |   )
149 | }
150 | 
151 | #---------------------------------------------
152 | # Forecast combination methods
153 | #---------------------------------------------
154 | #' Forecast with forecast combinations
155 | #'
156 | #' A function to combine forecasts out-of-sample. Methods available include:
157 | #' uniform weights, median forecast, trimmed (winsorized) mean, n-best,
158 | #' ridge regression, lasso regression, elastic net, peLASSO,
159 | #' random forest, tree-based gradient boosting machine, and single-layer neural network. 
160 | #' See package website for most up-to-date list of available models.
161 | #'
162 | #' @param Data            data.frame: data frame of forecasted values to combine, assumes 'date' and 'observed' columns, but `observed' is not necessary for all methods
163 | #' @param method          string: the method to use; 'uniform', 'median', 'trimmed.mean', 'n.best', 'peLasso', 'lasso', 'ridge', 'elastic', 'RF', 'GBM', 'NN'
164 | #' @param n.max           int: maximum number of forecasts to select in n.best method
165 | #' @param rolling.window  int: size of rolling window to evaluate forecast error over, use entire period if NA
166 | #' @param trim            numeric: a two element vector with the winsorizing bounds for the trimmed mean method; c(min, max)
167 | #' @param burn.in         int: the number of periods to use in the first model estimation
168 | #' @param parallel.dates  int: the number of cores available for parallel estimation
169 | #'
170 | #' @return  data.frame with a row for each combination method and forecasted date
171 | #'
172 | #' @examples
173 | #' \donttest{
174 | #'  # simple time series
175 | #'  A = c(1:100) + rnorm(100)
176 | #'  B = c(1:100) + rnorm(100)
177 | #'  C = c(1:100) + rnorm(100)
178 | #'  date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
179 | #'  Data = data.frame(date = date, A, B, C)
180 | #'
181 | #'  # run forecast_univariate
182 | #'  forecast.multi =
183 | #'      forecast_multivariate(
184 | #'        Data = Data,
185 | #'        target = 'A',
186 | #'        forecast.dates = tail(Data$date,5),
187 | #'        method = c('ols','var'),
188 | #'        horizon = 1,
189 | #'        freq = 'month')
190 | #'  # include observed valuesd
191 | #'  forecasts =
192 | #'    dplyr::left_join(
193 | #'      forecast.multi,
194 | #'      data.frame(date, observed = A),
195 | #'      by = 'date'
196 | #'    )
197 | #'
198 | #'  # combine forecasts
199 | #'  combinations =
200 | #'    forecast_combine(
201 | #'      forecasts,
202 | #'      method = c('uniform','median','trimmed.mean',
203 | #'                 'n.best','lasso','peLasso'),
204 | #'      burn.in = 5,
205 | #'      n.max = 2)
206 | #' }
207 | #'
208 | #'
209 | #' @export
210 | 
211 | # assumes a column named observed
212 | forecast_combine = function(
213 |   Data,                   # data.frame: data frame of forecasted values to combine, assumes `date` and `observed` columns, but `observed' is not necessary for all methods
214 |   method = 'unform',      # string or vector: the method to use; 'uniform', 'median', 'trimmed.mean', 'n.best', 'peLasso', 'lasso', 'ridge', 'elastic', 'RF', 'GBM', 'NN'
215 |   n.max = NULL,           # int: maximum number of forecasts to select
216 |   rolling.window = NA,    # int: size of rolling window to evaluate forecast error over, use entire period if NA
217 |   trim = c(0.5, 0.95),    # numeric: a two element vector with the winsorizing bounds for the trimmed mean method; c(min, max)
218 |   burn.in = 1,            # int: the number of periods to use in the first model estimation
219 |   parallel.dates = NULL   # int: the number of cores available for parallel estimation
220 | ){
221 | 
222 |   # create parallel back end
223 |   if(!is.null(parallel.dates)){
224 |     future::plan(strategy = 'multisession', workers = parallel.dates)
225 |   }else{
226 |     future::plan(strategy = 'sequential')
227 |   }
228 | 
229 |   # cast from long to wide
230 |   forecasts = Data %>%
231 |     dplyr::select(-se, -forecast.date) %>%
232 |     tidyr::pivot_wider(names_from = model, values_from = forecast)
233 | 
234 |   # function variables
235 |   model = observed = forecast = forecast.date = se = NA
236 |   results.list = list()
237 | 
238 |   # uniform weights
239 |   if('uniform' %in% method){
240 |     forecasts.raw = dplyr::select(forecasts, -dplyr::contains('date'), -dplyr::contains('observed'))
241 |     combination = apply(forecasts.raw, MARGIN = 1, FUN = mean, na.rm = T)
242 |     results.list[['unform']] = data.frame(date = forecasts$date, forecast = combination, model = 'uniform')
243 |   }
244 | 
245 |   # median forecast
246 |   if('median' %in% method){
247 |     forecasts.raw = dplyr::select(forecasts, -dplyr::contains('date'), -dplyr::contains('observed'))
248 |     combination = apply(forecasts.raw, MARGIN = 1, FUN = median, na.rm = T)
249 |     results.list[['median']] = data.frame(date = forecasts$date, forecast = combination, model = 'median')
250 |   }
251 | 
252 |   # trimmed (winsorized) mean
253 |   if('trimmed.mean' %in% method){
254 |     forecasts.raw = dplyr::select(forecasts, -dplyr::contains('date'), -dplyr::contains('observed'))
255 |     combination = apply(forecasts.raw, MARGIN = 1, FUN = winsorize, bounds = trim, trim = FALSE)
256 |     combination = apply(forecasts.raw, MARGIN = 1, FUN = mean, na.rm = T)
257 |     results.list[['trimmed']] =  data.frame(date = forecasts$date, forecast = combination, model = 'trimmed.mean')
258 |   }
259 | 
260 |   # N-best method
261 |   if('n.best' %in% method){
262 | 
263 |     # warnings and errors
264 |     if(!is.null(n.max)){
265 |       errorCondition('Set n.max before using the n-best combination method')
266 |     }
267 |     if(!is.null(window)){
268 |       warningCondition('The n-best method will default to using the entire forecast history')
269 |     }
270 | 
271 |     # create n-best forecast combinations
272 |     combination.nbest = NBest(dplyr::select(forecasts, -dplyr::contains('date')), n.max, window)
273 |     combination.mean = apply(combination.nbest, MARGIN = 1, FUN = mean, na.rm = T)
274 |     combination = data.frame(date = forecasts$date, combination.mean, combination.nbest) %>%
275 |       dplyr::rename(N.best = combination.mean)
276 |     combination = tidyr::pivot_longer(combination,
277 |                                       cols = names(dplyr::select(combination, -date)),
278 |                                       names_to = 'model',
279 |                                       values_to = 'forecast')
280 | 
281 |     results.list[['nbest']] = combination
282 |   }
283 | 
284 |   # peLasso
285 |   if('peLasso' %in% method){
286 |     combination =
287 |       forecasts$date[burn.in : nrow(forecasts)] %>%
288 |       furrr::future_map(
289 |         .f = function(forecast.date){
290 | 
291 |           # set data
292 |           information.set = dplyr::filter(forecasts, forecast.date > date)
293 |           current.forecasts = dplyr::filter(forecasts, forecast.date == date)
294 | 
295 |           # calculate peLasso method
296 |           # stage 1, shrink to 0,
297 |           # y-f -> eLasso to select subset of regressors
298 |           x = as.matrix(dplyr::select(information.set , -observed, -date))
299 |           y = information.set$observed - rowMeans(x)
300 |           model = glmnet::cv.glmnet(x, y, alpha = 1, intercept = F, parallel = T)
301 |           covariates = colnames(x)[which(as.vector(coef(model, s = 'lambda.min')) != 0)-1]
302 | 
303 |           # stage 2, shrink to 1/k,
304 |           # y-f -> eRidge to shrink subset of regressors to uniform weights
305 |           if(length(covariates) > 1){
306 |             model = glmnet::cv.glmnet(x[,covariates], y, alpha = 0, intercept = F)
307 |           }else{
308 |             covariates = colnames(x)
309 |           }
310 | 
311 |           # calculate forecast
312 |           peLasso = predict(model, newx = as.matrix(current.forecasts[,covariates]), s = 'lambda.min') +
313 |             rowMeans(dplyr::select(current.forecasts , -observed, -date))
314 |           results = data.frame(date = current.forecasts$date, peLasso, model = 'peLasso')
315 |           colnames(results)[colnames(results) == 'X1'] = 'forecast'
316 |           return(results)
317 | 
318 |         }
319 |       ) %>%
320 |       purrr::reduce(dplyr::bind_rows)
321 | 
322 |     results.list[['peLasso']] = combination
323 |   }
324 | 
325 |   # ML algorithms via caret
326 |   if(length(intersect(c('GBM','RF','NN','ols','lasso','ridge','elastic','pcr','pls'), method)) > 0){
327 | 
328 |     # training parameter creation and warnings
329 |     if(exists("forecast_combinations.control_panel")){
330 |       message('forecast_combinations.control_panel exists and will be used for ML forecast combination techniques in its present state.')
331 |     }else{
332 | 
333 |       covariates = length(unique(forecasts$model))
334 | 
335 |       forecast_combinations.control_panel = instantiate.forecast_combinations.control_panel(covariates = covariates)
336 |       message('forecast_combinations.control_panel was instantiated and default values will be used to train ML forecast combination techniques.')
337 |     }
338 | 
339 |     combination = intersect(c('GBM','RF','NN','ols','lasso','ridge','elastic'), method) %>%
340 |       purrr::map(
341 |         .f = function(engine){
342 | 
343 |           forecasts$date[burn.in : nrow(forecasts)] %>%
344 |             furrr::future_map(
345 |               .f = function(forecast.date){
346 | 
347 |                 # set data
348 |                 information.set = dplyr::filter(forecasts, forecast.date > date)
349 |                 current.forecasts = dplyr::filter(forecasts, forecast.date == date)
350 | 
351 |                 # estimate model
352 |                 model =
353 |                   caret::train(observed~.,
354 |                                 data = dplyr::select(information.set, -date),
355 |                                 method    = forecast_combinations.control_panel$caret.engine[[engine]],
356 |                                 trControl = forecast_combinations.control_panel$control,
357 |                                 tuneGrid  = forecast_combinations.control_panel$tuning.grids[[engine]],
358 |                                 metric    = forecast_combinations.control_panel$accuracy,
359 |                                 na.action = na.omit)
360 | 
361 |                 # calculate forecast
362 |                 point = predict(model, newdata = current.forecasts)
363 | 
364 |                 # calculate standard error
365 |                 error =
366 |                   try(
367 |                     predict(model$finalModel, current.forecasts, interval = "confidence", level = 0.95) %>%
368 |                       data.frame(),
369 |                     silent = TRUE
370 |                   )
371 | 
372 |                 if('upr' %in% names(error) == TRUE){
373 |                   error = (error$upr - error$fit) / qnorm(0.95)
374 |                   error = as.numeric(error)
375 |                 }else{
376 |                   error = NA
377 |                 }
378 | 
379 |                 # set dates
380 |                 results = data.frame(date = current.forecasts$date,
381 |                                      model = engine, forecast = point, se = error)
382 |               }
383 |             ) %>%
384 |             purrr::reduce(dplyr::bind_rows)
385 |         }
386 |       ) %>%
387 |       purrr::reduce(dplyr::bind_rows)
388 | 
389 |     results.list[['ML']] = combination
390 |   }
391 | 
392 |   # return results
393 |   results = purrr::reduce(results.list, dplyr::bind_rows) %>%
394 |     dplyr::mutate(model = paste0(model, '.combo'))
395 |   rownames(results) = c(1:nrow(results))
396 |   return(results)
397 | }
398 | 


--------------------------------------------------------------------------------
/R/forecast_metrics.R:
--------------------------------------------------------------------------------
  1 | 
  2 | # dependencies:
  3 | # lmtest
  4 | # sandwich
  5 | # forecast
  6 | 
  7 | #-------------------------------------------
  8 | # loss functions
  9 | #-------------------------------------------
 10 | #' Calculate error via loss functions
 11 | #'
 12 | #' A function to calculate various error loss functions. Options include:
 13 | #' MSE, RMSE, MAE, and  MAPE. The default is MSE loss.
 14 | #'
 15 | #' @param forecast  numeric: vector of forecasted values
 16 | #' @param observed  numeric: vector of observed values
 17 | #' @param metric    string: loss function
 18 | #'
 19 | #' @return numeric test result
 20 | #'
 21 | #' @export
 22 | 
 23 | loss_function = function(
 24 |   forecast,          # numeric: vector of forecasted values
 25 |   observed,          # numeric: vector of observed values
 26 |   metric = 'MSE'     # string: loss function
 27 | ){
 28 | 
 29 |   if(metric == 'MSE'){
 30 |     error = mean((observed - forecast)^2, na.rm = T)
 31 |   }else if(metric == 'RMSE'){
 32 |     error = sqrt(mean((observed - forecast)^2, na.rm = T))
 33 |   }else if(metric == 'MAE'){
 34 |     error = mean(abs(observed - forecast), na.rm = T)
 35 |   }else if(metric == 'MAPE'){
 36 |     error = mean(abs((forecast - observed)/observed), na.rm = T)
 37 |   }
 38 | 
 39 |   return(error)
 40 | }
 41 | 
 42 | #-------------------------------------------
 43 | # forecast accuracy
 44 | #-------------------------------------------
 45 | #' Calculate forecast accuracy
 46 | #'
 47 | #' A function to calculate various loss functions, including
 48 | #' MSE, RMSE, MAE, and  MAPE.
 49 | #'
 50 | #' @param Data  data.frame: data frame of forecasts, model names, and dates
 51 | #'
 52 | #' @return data.frame of numeric error results
 53 | #'
 54 | #' @examples 
 55 | #' \donttest{
 56 | #' 
 57 | #'  # simple time series
 58 | #'  A = c(1:100) + rnorm(100)
 59 | #'  date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
 60 | #'  Data = data.frame(date = date, A)
 61 | #'
 62 | #'  # run forecast_univariate
 63 | #'  forecast.uni =
 64 | #'    forecast_univariate(
 65 | #'      Data = Data,
 66 | #'      forecast.dates = tail(Data$date,10),
 67 | #'      method = c('naive','auto.arima', 'ets'),
 68 | #'      horizon = 1,
 69 | #'      recursive = FALSE,
 70 | #'      freq = 'month')
 71 | #'
 72 | #'  forecasts =
 73 | #'    dplyr::left_join(
 74 | #'      forecast.uni,
 75 | #'      data.frame(date, observed = A),
 76 | #'      by = 'date'
 77 | #'    )
 78 | #'
 79 | #'  # forecast accuracy
 80 | #'  forecast.accuracy = forecast_accuracy(forecasts)
 81 | #' 
 82 | #' }
 83 | #' 
 84 | #' @export
 85 | 
 86 | forecast_accuracy = function(
 87 |   Data
 88 | ){
 89 | 
 90 |   if(!'observed' %in% names(Data)){
 91 |     print(errorCondition('There must be a column named "obsererved" in Data.'))
 92 | 
 93 |   }
 94 |   if(!'date' %in% names(Data)){
 95 |     print(errorCondition('There must be a column named "date" in Data.'))
 96 |   }
 97 | 
 98 |   # function variables
 99 |   model = observed = forecast = forecast.date = se = NA
100 | 
101 |   # set data
102 |   information.set =
103 |     dplyr::full_join(
104 |       dplyr::select(Data, -observed),
105 |       dplyr::select(Data, date, observed),
106 |       by  = 'date')
107 | 
108 |   # calculate loss functions
109 |   information.set = information.set %>%
110 |     dplyr::group_split(model) %>%
111 |     purrr::map_df(
112 |       .f = function(X){
113 | 
114 |         Y = X %>%
115 |           dplyr::select(observed, forecast, model) %>%
116 |           na.omit() %>%
117 |           dplyr::summarize(
118 |             model = unique(model),
119 |             MSE = mean((observed - forecast)^2, na.rm = T),
120 |             RMSE = sqrt(mean((observed - forecast)^2, na.rm = T)),
121 |             MAE = mean(abs(observed - forecast), na.rm = T),
122 |             MAPE = mean(abs((forecast - observed)/observed), na.rm = T))
123 | 
124 |         return(Y)
125 |      }
126 |    )
127 | 
128 |   return(information.set)
129 | }
130 | 
131 | 
132 | #-------------------------------------------
133 | # forecast comparison
134 | #-------------------------------------------
135 | #' Compare forecast accuracy
136 | #'
137 | #' A function to compare forecasts. Options include: simple forecast error ratios,
138 | #' [Diebold-Mariano test](https://www.sas.upenn.edu/~fdiebold/papers/paper68/pa.dm.pdf), and [Clark and West test](https://www.nber.org/papers/t0326) for nested models
139 | #'
140 | #' @param Data                   data.frame: data frame of forecasts, model names, and dates
141 | #' @param baseline.forecast      string: column name of baseline (null hypothesis) forecasts
142 | #' @param test                   string: which test to use; ER = error ratio, DM = Diebold-Mariano, CM = Clark and West
143 | #' @param loss                   string: error loss function to use if creating forecast error ratio
144 | #' @param horizon                int: horizon of forecasts being compared in DM and CW tests
145 | #'
146 | #' @return numeric test result
147 | #'
148 | #' @examples 
149 | #' \donttest{
150 | #' 
151 | #'  # simple time series
152 | #'  A = c(1:100) + rnorm(100)
153 | #'  date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
154 | #'  Data = data.frame(date = date, A)
155 | #'
156 | #'  # run forecast_univariate
157 | #'  forecast.uni =
158 | #'    forecast_univariate(
159 | #'      Data = Data,
160 | #'      forecast.dates = tail(Data$date,10),
161 | #'      method = c('naive','auto.arima', 'ets'),
162 | #'      horizon = 1,
163 | #'      recursive = FALSE,
164 | #'      freq = 'month')
165 | #'
166 | #'  forecasts =
167 | #'    dplyr::left_join(
168 | #'      forecast.uni,
169 | #'      data.frame(date, observed = A),
170 | #'      by = 'date'
171 | #'    )
172 | #'
173 | #'  # run ER (MSE)
174 | #'  er.ratio.mse =
175 | #'    forecast_comparison(
176 | #'      forecasts,
177 | #'      baseline.forecast = 'naive',
178 | #'      test = 'ER',
179 | #'      loss = 'MSE')
180 | #' }
181 | #' 
182 | #' @export
183 | 
184 | forecast_comparison = function(
185 |   Data,                   # data.frame: data frame of forecasts, model names, and dates
186 |   baseline.forecast,      # string: column name of baseline (null hypothesis) forecasts
187 |   test = 'ER',            # string: which test to use; ER = error ratio, DM = Diebold-Mariano, CM = Clark and West
188 |   loss = 'MSE',           # string: error loss function to use if creating forecast error ratio
189 |   horizon = NULL          # int: horizon of forecasts being compared in DM and CW tests
190 | ){
191 | 
192 |   if(!'observed' %in% names(Data)){
193 |     print(errorCondition('There must be a column named "observed" in Data.'))
194 | 
195 |   }
196 |   if(!'date' %in% names(Data)){
197 |     print(errorCondition('There must be a column named "date" in Data.'))
198 |   }
199 | 
200 |   # function variables
201 |   model = observed = forecast = forecast.date = se = NA
202 | 
203 |   # set data
204 |   information.set =
205 |     dplyr::full_join(
206 |       dplyr::select(Data, -observed),
207 |       Data %>%
208 |         dplyr::filter(model == baseline.forecast) %>%
209 |         dplyr::select(date, observed, baseline.forecast = forecast),
210 |       by  = 'date')
211 | 
212 | 
213 |   # calculate loss functions
214 |   if(test == 'ER'){
215 |     information.set = information.set %>%
216 |       dplyr::group_split(model) %>%
217 |       purrr::map_df(
218 |         .f = function(X){
219 | 
220 |           error =
221 |             loss_function(X$forecast, X$observed, loss) /
222 |             loss_function(X$baseline.forecast, X$observed, loss)
223 | 
224 |           return(
225 |             data.frame(
226 |               model = unique(X$model),
227 |               error.ratio = error)
228 |           )
229 |         }
230 |       )
231 | 
232 |   }else if(test == 'DM'){
233 |     information.set = information.set %>%
234 |       dplyr::group_split(model) %>%
235 |       purrr::map_df(
236 |         .f = function(X){
237 | 
238 |           if(sum(na.omit(X$baseline.forecast - X$forecast)) == 0){
239 |             return(
240 |               data.frame(
241 |                 model = baseline.forecast,
242 |                 DM.statistic = NA)
243 |             )
244 |           }
245 | 
246 |           DM.statistic =
247 |             forecast::dm.test(
248 |               e1 = na.omit(X$baseline.forecast - X$observed),
249 |               e2 = na.omit(X$forecast - X$observed),
250 |               alternative = 'less')$statistic[1]
251 | 
252 |           return(
253 |             data.frame(
254 |               model = unique(X$model),
255 |               DM.statistic = DM.statistic)
256 |           )
257 |         }
258 |       )
259 | 
260 |   }else if(test == 'CW'){
261 |     information.set = information.set %>%
262 |       dplyr::group_split(model) %>%
263 |       purrr::map_df(
264 |         .f = function(X){
265 | 
266 |           if(sum(na.omit(X$baseline.forecast - X$forecast)) == 0){
267 |             return(
268 |               data.frame(
269 |                 model = baseline.forecast,
270 |                 CW.statistic = NA)
271 |             )
272 |           }
273 | 
274 |           fCW12 =
275 |               (X$observed - X$baseline.forecast)^2 -
276 |               (X$observed - X$forecast)^2 -
277 |               (X$baseline.forecast - X$forecast)^2
278 | 
279 |           lmCW = lm(as.numeric(fCW12)~1)
280 | 
281 |           lmCW.summ = summary(lmCW)
282 | 
283 |           lmCW.NW.summ = lmCW.summ
284 | 
285 |           lmCW.NW.summ$coefficients =
286 |               unclass(lmtest::coeftest(lmCW, vcov. = sandwich::NeweyWest(lmCW, lag = horizon)))
287 | 
288 |           CW.statistic = lmCW.NW.summ$coefficients[3]
289 | 
290 |           return(
291 |               data.frame(
292 |                 model = unique(X$model),
293 |                 Cw.statistic = CW.statistic)
294 |           )
295 |         }
296 |       )
297 |   }
298 | 
299 |   rownames(information.set) = c(1:nrow(information.set))
300 |   return(information.set)
301 | 
302 | }
303 | 


--------------------------------------------------------------------------------
/R/forecast_multivariate.R:
--------------------------------------------------------------------------------
  1 | #----------------------------------------------
  2 | # multivariate forecasting arguments - ML
  3 | #----------------------------------------------
  4 | #' Create interface to control `forecast_multivariate` ML estimation
  5 | #'
  6 | #' A function to create the multivariate forecast methods
  7 | #' arguments list for user manipulation.
  8 | #'
  9 | #' @param covariates       int: the number of features that will go into the model
 10 | #' @param rolling.window   int: size of rolling window, NA if expanding window is used
 11 | #' @param horizon          int: number of periods into the future to forecast
 12 | #'
 13 | #' @return forecast_multivariate.ml.control_panel
 14 | #'
 15 | #' @export
 16 | 
 17 | instantiate.forecast_multivariate.ml.control_panel = function(covariates = NULL, rolling.window = NULL, horizon = NULL){
 18 | 
 19 |   # caret names
 20 |   caret.engine = list(
 21 |     ols = 'lm',
 22 |     ridge = 'glmnet',
 23 |     lasso = 'glmnet',
 24 |     elastic = 'glmnet',
 25 |     RF = 'rf',
 26 |     GBM = 'gbm',
 27 |     NN = 'avNNet',
 28 |     pls = 'pls',
 29 |     pcr = 'pcr'
 30 |   )
 31 | 
 32 |   # tuning grids
 33 |   tuning.grids = list(
 34 | 
 35 |     ols = NULL,
 36 | 
 37 |     ridge = expand.grid(
 38 |       alpha = 0,
 39 |       lambda = 10^seq(-3, 3, length = 100)),
 40 | 
 41 |     lasso = expand.grid(
 42 |       alpha = 1,
 43 |       lambda = 10^seq(-3, 3, length = 100)),
 44 | 
 45 |     elastic = NULL,
 46 | 
 47 |     GBM =
 48 |       expand.grid(
 49 |         n.minobsinnode = c(1),
 50 |         shrinkage = c(.1,.01),
 51 |         n.trees = c(100, 250, 500),
 52 |         interaction.depth = c(1,2,5)),
 53 | 
 54 |     RF =
 55 |       expand.grid(
 56 |         mtry = c(1:4)),
 57 | 
 58 |     NN =
 59 |       expand.grid(
 60 |         size = seq(2,10,5),
 61 |         decay = c(.01,.001),
 62 |         bag = c(100, 250, 500)),
 63 | 
 64 |     pls =
 65 |       expand.grid(
 66 |         ncomp = c(1:5)),
 67 | 
 68 |     pcr =
 69 |       expand.grid(
 70 |         ncomp = c(1:5))
 71 | 
 72 |   )
 73 | 
 74 |   # tuning grids if # of features is available
 75 |   if(!is.null(covariates)){
 76 |     tuning.grids[['RF']] =
 77 |       expand.grid(
 78 |         mtry = covariates/3)
 79 | 
 80 |     tuning.grids[['NN']] =
 81 |       expand.grid(
 82 |         size = c(covariates, 2*covariates, 3*covariates),
 83 |         decay = c(.01,.001),
 84 |         bag = c(20, 100))
 85 | 
 86 |   }
 87 | 
 88 |   # hyper-parameter selection routine
 89 |   if(is.numeric(rolling.window)){
 90 |     control =
 91 |       caret::trainControl(
 92 |         method = "timeslice",
 93 |         horizon = horizon,
 94 |         initialWindow = rolling.window,
 95 |         allowParallel = TRUE)
 96 |   }else if(!is.null(rolling.window)){
 97 |     control =
 98 |       caret::trainControl(
 99 |         method = "timeslice",
100 |         horizon = horizon,
101 |         initialWindow = 5,
102 |         allowParallel = TRUE)
103 |   }else{
104 |     control =
105 |       caret::trainControl(
106 |         method = "cv",
107 |         number = 5,
108 |         allowParallel = TRUE)
109 | 
110 |   }
111 | 
112 |   # accuracy metric used in training
113 |   accuracy = 'RMSE'
114 | 
115 |   # return training information
116 |   return(
117 |     list(
118 |       caret.engine = caret.engine,
119 |       tuning.grids = tuning.grids,
120 |       control = control,
121 |       accuracy = accuracy
122 |     )
123 |   )
124 | 
125 | }
126 | 
127 | #----------------------------------------------
128 | # multivariate forecasting arguments - VAR
129 | #----------------------------------------------
130 | #' Create interface to control `forecast_multivariate` VAR estimation
131 | #'
132 | #' A function to create the multivariate forecast methods
133 | #' arguments list for user manipulation.
134 | #'
135 | #' @return forecast_multivariate.var.control_panel
136 | #'
137 | #' @export
138 | 
139 | instantiate.forecast_multivariate.var.control_panel = function(){
140 | 
141 |   return(
142 |     list(
143 |       p = 1,
144 |       lag.max = NULL,
145 |       ic = 'AIC',
146 |       type = 'none',
147 |       season = NULL,
148 |       exogen = NULL
149 |     )
150 |   )
151 | 
152 | }
153 | 
154 | #---------------------------------------------
155 | # Multivariate Forecast
156 | #---------------------------------------------
157 | #' Forecast with multivariate models
158 | #'
159 | #' A function to estimate multivariate forecasts out-of-sample. Methods available include:
160 | #' vector auto-regression, linear regression, lasso regression, ridge regression, elastic net,
161 | #' random forest, tree-based gradient boosting machine, and single-layer neural network.
162 | #' See package website for most up-to-date list of available models.
163 | #'
164 | #' @param Data             data.frame: data frame of target variable, exogenous variables, and observed date (named 'date'); may alternatively be a `ts`, `xts`, or `zoo` object to forecast
165 | #' @param forecast.dates   date: dates forecasts are created
166 | #' @param target           string: column name in Data of variable to forecast
167 | #' @param method           string: methods to use
168 | #' @param rolling.window   int: size of rolling window, NA if expanding window is used
169 | #' @param freq             string: time series frequency; day, week, month, quarter, year
170 | #' @param horizon          int: number of periods into the future to forecast
171 | #' @param lag.variables    string: vector of variables to lag each time step, if lag.n is not null then the default is all non-date variables
172 | #' @param lag.n            int: number of lags to create
173 | #' @param outlier.clean         boolean: if TRUE then clean outliers
174 | #' @param outlier.variables     string: vector of variables to purge of outlier, default is all but 'date' column
175 | #' @param outlier.bounds        double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile)
176 | #' @param outlier.trim          boolean: if TRUE then replace outliers with NA instead of winsorizing bound
177 | #' @param outlier.cross_section boolean: if TRUE then remove outliers based on cross-section (row-wise) instead of historical data (column-wise)
178 | #' @param impute.missing        boolean: if TRUE then impute missing values
179 | #' @param impute.method         string: select which method to use from the imputeTS package; 'interpolation', 'kalman', 'locf', 'ma', 'mean', 'random', 'remove','replace', 'seadec', 'seasplit'
180 | #' @param impute.variables      string: vector of variables to impute missing values, default is all numeric columns
181 | #' @param impute.verbose        boolean: show start-up status of impute.missing.routine
182 | #' @param reduce.data           boolean: if TRUE then reduce dimension
183 | #' @param reduce.variables      string: vector of variables to impute missing values, default is all numeric columns
184 | #' @param reduce.ncomp          int: number of factors to create
185 | #' @param reduce.standardize    boolean: normalize variables (mean zero, variance one) before estimating factors
186 | #' @param parallel.dates        int: the number of cores available for parallel estimation
187 | #' @param return.models         boolean: if TRUE then return list of models estimated each forecast.date
188 | #' @param return.data           boolean: if True then return list of information.set for each forecast.date
189 | #'
190 | #' @return  data.frame with a row for each forecast by model and forecasted date
191 | #'
192 | #' @examples
193 | #' \donttest{
194 | #'  # simple time series
195 | #'  A = c(1:100) + rnorm(100)
196 | #'  B = c(1:100) + rnorm(100)
197 | #'  C = c(1:100) + rnorm(100)
198 | #'  date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
199 | #'  Data = data.frame(date = date, A, B, C)
200 | #'
201 | #'  # run forecast_univariate
202 | #'  forecast.multi =
203 | #'      forecast_multivariate(
204 | #'        Data = Data,
205 | #'        target = 'A',
206 | #'        forecast.dates = tail(Data$date,5),
207 | #'        method = c('ols','var'),
208 | #'        horizon = 1,
209 | #'        # information set
210 | #'        rolling.window = NA,
211 | #'        freq = 'month',
212 | #'        # data prep
213 | #'        lag.n = 4,
214 | #'        outlier.clean = TRUE,
215 | #'        impute.missing = TRUE)
216 | #' }
217 | #'
218 | #'
219 | #' @export
220 | 
221 | forecast_multivariate = function(
222 |   Data,                 # data.frame: data frame of target variable, exogenous variables, and observed date (named 'date'); may alternatively be a `ts`, `xts`, or `zoo` object to forecast
223 |   forecast.dates,       # date: dates forecasts are created
224 |   target,               # string: column name in `Data` of variable to forecast
225 |   horizon,              # int: number of periods into the future to forecast
226 |   method,               # string or vector: methods to use; 'var', 'ols', 'ridge', 'lasso', 'elastic', 'RF', 'GBM', 'NN'
227 | 
228 |   # information set
229 |   rolling.window = NA,  # int: size of rolling window, NA if expanding window is used
230 |   freq,                 # string: time series frequency; day, week, month, quarter, year
231 |   lag.variables = NULL, # string: vector of variables to lag each time step, if lag.n is not null then the default is all non-date variables
232 |   lag.n = NULL,         # int: number of lags to create
233 | 
234 |   # outlier cleaning
235 |   outlier.clean = FALSE,           # boolean: if TRUE then clean outliers
236 |   outlier.variables = NULL,        # string: vector of variables to standardize, default is all but 'date' column
237 |   outlier.bounds = c(0.05, 0.95),  # double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile)
238 |   outlier.trim = FALSE,            # boolean: if TRUE then replace outliers with NA instead of winsorizing bound
239 |   outlier.cross_section = FALSE,   # boolean: if TRUE then remove outliers based on cross-section (row-wise) instead of historical data (column-wise)
240 | 
241 |   # impute missing
242 |   impute.missing = FALSE,          # boolean: if TRUE then impute missing values
243 |   impute.method = 'kalman',        # string: select which method to use from the imputeTS package; 'interpolation', 'kalman', 'locf', 'ma', 'mean', 'random', 'remove','replace', 'seadec', 'seasplit'
244 |   impute.variables = NULL,         # string: vector of variables to impute missing values, default is all numeric columns
245 |   impute.verbose = FALSE,          # boolean: show start-up status of impute.missing.routine
246 | 
247 |   # dimension reduction
248 |   reduce.data = FALSE,             # boolean: if TRUE then reduce dimension
249 |   reduce.variables = NULL,         # string: vector of variables to impute missing values, default is all numeric columns
250 |   reduce.ncomp = NULL,             # int: number of factors to create
251 |   reduce.standardize = TRUE,       # boolean: normalize variables (mean zero, variance one) before estimating factors
252 | 
253 |   # parallel processing
254 |   parallel.dates = NULL,           # int: the number of cores available for parallel estimation
255 | 
256 |   # additional objects
257 |   return.models = FALSE,           # boolean: if TRUE then return list of models estimated each forecast.date
258 |   return.data = FALSE              # boolean: if True then return list of information.set for each forecast.date
259 | 
260 | ){
261 | 
262 |   # convert from ts, xts, or zoo object
263 |   if(xts::is.xts(Data) | zoo::is.zoo(Data) | stats::is.ts(Data)){
264 |     Data = data.frame(date = zoo::index(Data), Data)
265 |   }
266 | 
267 |   # training parameter creation and warnings
268 |   if(exists("forecast_multivariate.ml.control_panel")){
269 | 
270 |     message('forecast_multivariate.ml.control_panel exists and will be used for ML model estimation in its present state.')
271 | 
272 |   }else{
273 | 
274 |     covariates = nrow(dplyr::select(Data, -target, -date))
275 |     if(!is.null(lag.n)){covariates = covariates + covariates*lag.n}
276 | 
277 |     forecast_multivariate.ml.control_panel = instantiate.forecast_multivariate.ml.control_panel(covariates = covariates, rolling.window = rolling.window, horizon = horizon)
278 |     message('forecast_multivariate.ml.control_panel was instantiated and default values will be used for ML model estimation.')
279 | 
280 |   }
281 | 
282 |   # VAR parameters and warnings
283 |   if(exists("forecast_multivariate.var.control_panel")){
284 |     message('forecast.combinations.var.training exists and will be used for VAR model estimation in its present state.')
285 |   }else{
286 |     forecast_multivariate.var.control_panel = instantiate.forecast_multivariate.var.control_panel()
287 |    message('forecast_multivariate.var.control_panel was instantiated and default values will be used for VAR model estimation.')
288 |   }
289 | 
290 |   # create parallel back end
291 |   if(!is.null(parallel.dates)){
292 |     future::plan(strategy = 'multisession', workers = parallel.dates)
293 |   }else{
294 |     future::plan(strategy = 'sequential')
295 |   }
296 | 
297 |   # results list
298 |   results.list = list()
299 | 
300 |   # Create forecasts
301 |   forecasts = forecast.dates %>%
302 |     furrr::future_map(
303 |       .f = function(forecast.date){
304 | 
305 |           # subset data
306 |           information.set =
307 |             data_subset(
308 |               Data = Data,
309 |               forecast.date = forecast.date,
310 |               rolling.window = rolling.window,
311 |               freq = freq
312 |             )
313 | 
314 |           # clean outliers
315 |           if(outlier.clean){
316 |             information.set =
317 |               data_outliers(
318 |                 Data = information.set,
319 |                 variables = outlier.variables,
320 |                 w.bounds = outlier.bounds,
321 |                 trim = outlier.trim,
322 |                 cross_section = outlier.cross_section
323 |               )
324 |           }
325 | 
326 |           # impute missing values
327 |           if(impute.missing){
328 |             information.set =
329 |               data_impute(
330 |                 Data = information.set,
331 |                 variables = impute.variables,
332 |                 method = impute.method,
333 |                 verbose = impute.verbose
334 |               )
335 |           }
336 | 
337 |           # dimension reduction
338 |           if(reduce.data){
339 |             information.set.reduce =
340 |               data_reduction(
341 |                 Data = information.set,
342 |                 variables = reduce.variables,
343 |                 ncomp = reduce.ncomp,
344 |                 standardize = reduce.standardize
345 |               )
346 | 
347 |             information.set =
348 |               dplyr::full_join(
349 |                 dplyr::select(information.set, target, date),
350 |                 information.set.reduce,
351 |                 by = 'date')
352 |           }
353 | 
354 |           # create variable lags
355 |           if(!is.null(lag.n)){
356 |             information.set =
357 |               n.lag(
358 |                 Data = information.set,
359 |                 lags = lag.n,
360 |                 variables = lag.variables)
361 |           }
362 | 
363 |           results = method %>%
364 |             purrr::map(
365 |               .f = function(engine){
366 | 
367 |               # set current data
368 |               current.set = dplyr::filter(information.set, forecast.date == date)
369 | 
370 |               # estimate ML model
371 |               if(engine != 'var'){
372 | 
373 |                 # check for missing covariates in current data
374 |                 if(is.na(sum(dplyr::select(current.set, -date)))){
375 |                   print(warningCondition(paste0('Missing covariate on: ', forecast.date)))
376 |                   results = data.frame(date = current.set$date, ml = NA)
377 |                   colnames(results)[colnames(results) == 'ml'] = engine
378 |                   return(results)
379 |                 }
380 | 
381 |                 # set target variable
382 |                 names(information.set)[names(information.set) == target] = 'target'
383 | 
384 |                 # set horizon
385 |                 information.set =
386 |                   dplyr::mutate(information.set, target = dplyr::lead(target, horizon)) %>%
387 |                   na.omit()
388 | 
389 |                 # estimate model
390 |                 model =
391 |                   caret::train(target~.,
392 |                                data = dplyr::select(information.set, -date),
393 |                                method    = forecast_multivariate.ml.control_panel$caret.engine[[engine]],
394 |                                trControl = forecast_multivariate.ml.control_panel$control,
395 |                                tuneGrid  = forecast_multivariate.ml.control_panel$tuning.grids[[engine]],
396 |                                metric    = forecast_multivariate.ml.control_panel$accuracy)
397 | 
398 |                 # calculate forecast
399 |                 point = try(predict(model, newdata = current.set))
400 | 
401 |                 if(!is.numeric(point)){
402 |                   point = NA
403 |                 }
404 | 
405 |                 # calculate standard error
406 |                 error =
407 |                   try(
408 |                     predict(model$finalModel, current.set, interval = "confidence", level = 0.95) %>%
409 |                       data.frame(),
410 |                     silent = TRUE
411 |                     )
412 | 
413 |                 error = try((error$upr - error$fit) / qnorm(0.95),
414 |                             silent = TRUE)
415 | 
416 |                 if(is.numeric(error) != TRUE | length(error) != 1){error = NA}
417 | 
418 |               # estimate VAR
419 |               }else{
420 | 
421 |                 model =
422 |                   vars::VAR(
423 |                     y       = na.omit(dplyr::select(information.set, -date)),
424 |                     p       =  forecast_multivariate.var.control_panel$p,
425 |                     lag.max =  forecast_multivariate.var.control_panel$max.lag,
426 |                     ic      =  forecast_multivariate.var.control_panel$ic,
427 |                     season  =  forecast_multivariate.var.control_panel$season,
428 |                     type    =  forecast_multivariate.var.control_panel$type
429 |                   )
430 | 
431 |                 # calculate forecast and standard error
432 |                 ml = predict(model, n.ahead = horizon)
433 |                 ml = ml$fcst[target] %>% data.frame()
434 |                 point = ml[horizon, 1]
435 |                 error = (ml[horizon, 3] - ml[horizon, 1]) / qnorm(0.95)
436 | 
437 |               }
438 | 
439 |               # set date
440 |               date = forecast_date(
441 |                 forecast.date,
442 |                 horizon,
443 |                 freq)
444 | 
445 |               # set dates
446 |               predictions = data.frame(
447 |                 date = date,
448 |                 forecast.date = forecast.date,
449 |                 model = engine, forecast = point, se = error)
450 | 
451 | 
452 |               # return results
453 |               return(
454 |                 list(
455 |                   predictions = predictions,
456 |                   model = model
457 |                 )
458 |               )
459 | 
460 |             }
461 |           )
462 | 
463 |         predictions =
464 |           purrr::map(results, .f = function(X){return(X$predictions)}) %>%
465 |           purrr::reduce(dplyr::bind_rows)
466 | 
467 |         rownames(predictions) = c(1:nrow(predictions))
468 | 
469 |         models =
470 |           purrr::map(results, .f = function(X){return(X$model)})
471 | 
472 |         # store objects for return
473 |         results =
474 |           list(
475 |             predictions = predictions,
476 |             information.set = information.set,
477 |             models = models
478 |           )
479 | 
480 |         # return results
481 |         return(results)
482 | 
483 |       }
484 |     )
485 | 
486 |   # prepare forecasts
487 |   predictions =
488 |     purrr::map(forecasts, .f = function(X){return(X$predictions)}) %>%
489 |     purrr::reduce(dplyr::bind_rows)
490 | 
491 |   # add model and information set lists to return object
492 |   if(return.data == TRUE | return.models == TRUE){
493 |     information = list(forecasts = predictions)
494 |   }else{
495 |     information = predictions
496 |   }
497 | 
498 |   # prepare models
499 |   if(return.models == TRUE){
500 |     models = purrr::map(forecasts, .f = function(X){return(X$models)})
501 |     names(models) = forecast.dates
502 |     information[['models']] = models
503 |   }
504 | 
505 |   # prepare information set
506 |   if(return.data == TRUE){
507 |     information.set = purrr::map(forecasts, .f = function(X){return(X$information.set)})
508 |     names(information.set) = forecast.dates
509 |     information[['information.set']] = information.set
510 |   }
511 | 
512 |   # return results
513 |   return(information)
514 | }
515 | 


--------------------------------------------------------------------------------
/R/forecast_univariate.R:
--------------------------------------------------------------------------------
  1 | 
  2 | # dependencies:
  3 | # magrittr
  4 | # lubridate
  5 | # dplry
  6 | # purrr
  7 | # forecast
  8 | 
  9 | 
 10 | #----------------------------------------------
 11 | # univariate forecasting arguments
 12 | #----------------------------------------------
 13 | #' Create interface to control `forecast_univariate` model estimation
 14 | #'
 15 | #' A function to create the univariate forecast method arguments list
 16 | #' for user manipulation.
 17 | #'
 18 | #' @return forecast_univariate.control_panel
 19 | #'
 20 | #' @export
 21 | 
 22 | instantiate.forecast_univariate.control_panel = function(){
 23 | 
 24 |   # methods
 25 |   methods = list(
 26 |     auto.arima = forecast::auto.arima,
 27 |     Arima = forecast::Arima,
 28 |     dshw = forecast::dshw,
 29 |     holt = forecast::holt,
 30 |     hw = forecast::hw,
 31 |     ses = forecast::ses,
 32 |     ets = forecast::ets,
 33 |     stlm = forecast::stlm,
 34 |     bats = forecast::bats,
 35 |     tbats = forecast::tbats,
 36 |     thetaf = forecast::thetaf,
 37 |     nnetar = forecast::nnetar,
 38 |     meanf = forecast::meanf,
 39 |     naive = forecast::naive,
 40 |     snaive = forecast::snaive,
 41 |     rwf = forecast::rwf,
 42 |     tslm = forecast::tslm,
 43 |     splinef = forecast::splinef
 44 |   )
 45 | 
 46 |   # arguments
 47 |   arguments = list(
 48 |    auto.arima = NULL,
 49 |    Arima   = NULL,
 50 |    dshw    = NULL,
 51 |    holt    = NULL,
 52 |    hw      = NULL,
 53 |    ses     = NULL,
 54 |    ets     = NULL,
 55 |    stlm    = NULL,
 56 |    bats    = NULL,
 57 |    tbats   = NULL,
 58 |    thetaf  = NULL,
 59 |    nnetar  = NULL,
 60 |    meanf   = NULL,
 61 |    naive   = NULL,
 62 |    snaive  = NULL,
 63 |    rwf     = NULL,
 64 |    splinef = NULL,
 65 |    tslm    = NULL
 66 |   )
 67 | 
 68 |   return(
 69 |     list(
 70 |       method = methods,
 71 |       arguments = arguments
 72 |     )
 73 |   )
 74 | 
 75 | }
 76 | 
 77 | #----------------------------------------------
 78 | # univariate time series forecasting function
 79 | #----------------------------------------------
 80 | #' Forecast with univariate models
 81 | #'
 82 | #' A function to estimate univariate forecasts out-of-sample. Methods available include all forecast
 83 | #' methods from the `forecast` package. See package website for most up-to-date list of available models.
 84 | #'
 85 | #' @param Data            data.frame: data frame of variable to forecast and a date column; may alternatively be a `ts`, `xts`, or `zoo` object to forecast
 86 | #' @param forecast.dates  date: dates forecasts are created
 87 | #' @param methods         string: models to estimate forecasts
 88 | #' @param horizon         int: number of periods to forecast
 89 | #' @param rolling.window  int: size of rolling window, NA if expanding window is used
 90 | #' @param freq            string: time series frequency; day, week, month, quarter, year
 91 | #' @param recursive       boolean: use sequential one-step-ahead forecast if TRUE, use direct projections if FALSE
 92 | #' @param outlier.clean         boolean: if TRUE then clean outliers
 93 | #' @param outlier.variables     string: vector of variables to purge of outliers, default is all but 'date' column
 94 | #' @param outlier.bounds        double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile)
 95 | #' @param outlier.trim          boolean: if TRUE then replace outliers with NA instead of winsorizing bound
 96 | #' @param outlier.cross_section boolean: if TRUE then remove outliers based on cross-section (row-wise) instead of historical data (column-wise)
 97 | #' @param impute.missing        boolean: if TRUE then impute missing values
 98 | #' @param impute.method         string: select which method to use from the imputeTS package; 'interpolation', 'kalman', 'locf', 'ma', 'mean', 'random', 'remove','replace', 'seadec', 'seasplit'
 99 | #' @param impute.variables      string: vector of variables to impute missing values, default is all numeric columns
100 | #' @param impute.verbose        boolean: show start-up status of impute.missing.routine
101 | #' @param parallel.dates        int: the number of cores available for parallel estimation
102 | #' @param return.models         boolean: if TRUE then return list of models estimated each forecast.date
103 | #' @param return.data           boolean: if True then return list of information.set for each forecast.date
104 | #'
105 | #' @return  data.frame with a row for each forecast by model and forecasted date
106 | #'
107 | #' @examples
108 | #' \donttest{
109 | #'  # simple time series
110 | #'  A = c(1:100) + rnorm(100)
111 | #'  date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
112 | #'  Data = data.frame(date = date, A)
113 | #'
114 | #'  # estiamte univariate forecasts
115 | #'  forecast.uni =
116 | #'      forecast_univariate(
117 | #'        Data = Data,
118 | #'        forecast.dates = tail(Data$date,5),
119 | #'        method = c('naive','auto.arima', 'ets'),
120 | #'        horizon = 1,
121 | #'        recursive = FALSE,
122 | #'        # information set
123 | #'        rolling.window = NA,
124 | #'        freq = 'month',
125 | #'        # data prep
126 | #'        outlier.clean = TRUE,
127 | #'        impute.missing = TRUE)
128 | #' }
129 | #'
130 | #' @export
131 | 
132 | forecast_univariate = function(
133 |   Data,                   # data.frame: data frame of variable to forecast and a date column; may alternatively be a `ts`, `xts`, or `zoo` object to forecast
134 |   forecast.dates,         # date: dates forecasts are created
135 |   methods,                # string or vector: models to estimate forecasts with; currently supports all and only functions from the `forecast` package
136 |   horizon,                # int: number of periods to forecast
137 |   recursive = TRUE,       # boolean: use sequential one-step-ahead forecast if TRUE, use direct projections if FALSE
138 | 
139 |   # information set
140 |   rolling.window = NA,  # int: size of rolling window, NA if expanding window is used
141 |   freq,                 # string: time series frequency; day, week, month, quarter, year
142 | 
143 |   # outlier cleaning
144 |   outlier.clean = FALSE,           # boolean: if TRUE then clean outliers
145 |   outlier.variables = NULL,               # string: vector of variables to standardize, default is all but 'date' column
146 |   outlier.bounds = c(0.05, 0.95),  # double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile)
147 |   outlier.trim = FALSE,            # boolean: if TRUE then replace outliers with NA instead of winsorizing bound
148 |   outlier.cross_section = FALSE,   # boolean: if TRUE then remove outliers based on cross-section (row-wise) instead of historical data (column-wise)
149 | 
150 |   # impute missing
151 |   impute.missing = FALSE,          # boolean: if TRUE then impute missing values
152 |   impute.method = 'kalman',        # string: select which method to use from the imputeTS package; 'interpolation', 'kalman', 'locf', 'ma', 'mean', 'random', 'remove','replace', 'seadec', 'seasplit'
153 |   impute.variables = NULL,         # string: vector of variables to impute missing values, default is all numeric columns
154 |   impute.verbose = FALSE,          # boolean: show start-up status of impute.missing.routine
155 | 
156 |   # parallel processing
157 |   parallel.dates = NULL,           # int: the number of cores available for parallel estimation
158 | 
159 |   # additional objects
160 |   return.models = FALSE,           # boolean: if TRUE then return list of models estimated each forecast.date
161 |   return.data = FALSE              # boolean: if True then return list of information.set for each forecast.date
162 | 
163 | ){
164 | 
165 |   # convert from ts, xts, or zoo object
166 |   if(xts::is.xts(Data) | zoo::is.zoo(Data) | stats::is.ts(Data)){
167 |     Data = data.frame(date = zoo::index(Data), Data)
168 |   }
169 | 
170 |   # training parameter creation and warnings
171 |   if(exists("forecast_univariate.control_panel")){
172 |     message('forecast_univariate.control_panel exists and will be used for model estimation in its present state.')
173 |   }else{
174 |     forecast_univariate.control_panel = instantiate.forecast_univariate.control_panel()
175 |     message('forecast_univariate.control_panel was instantiated and default values will be used for model estimation.')
176 |   }
177 | 
178 |   # create parallel back end
179 |   if(!is.null(parallel.dates)){
180 |     future::plan(strategy = 'multisession', workers = parallel.dates)
181 |   }else{
182 |     future::plan(strategy = 'sequential')
183 |   }
184 | 
185 |   # create lists to store information
186 |   list.models = list(); i = 1
187 |   list.data = list(); j = 1
188 | 
189 |   # forecast routine
190 |   forecasts = forecast.dates %>%
191 |     furrr::future_map(
192 |       .f = function(forecast.date){
193 | 
194 |         #---------------------------
195 |         # Create information set
196 |         #---------------------------
197 | 
198 |         # subset data
199 |         information.set =
200 |           data_subset(
201 |             Data = Data,
202 |             forecast.date = forecast.date,
203 |             rolling.window = rolling.window,
204 |             freq = freq
205 |           )
206 | 
207 |         # clean outliers
208 |         if(outlier.clean){
209 |           information.set =
210 |             data_outliers(
211 |               Data = information.set,
212 |               variables = outlier.variables,
213 |               w.bounds = outlier.bounds,
214 |               trim = outlier.trim,
215 |               cross_section = outlier.cross_section
216 |             )
217 |         }
218 | 
219 |         # impute missing values
220 |         if(impute.missing){
221 |           information.set =
222 |             data_impute(
223 |               Data = information.set,
224 |               variables = impute.variables,
225 |               method = impute.method,
226 |               verbose = impute.verbose
227 |             )
228 |         }
229 | 
230 |         # set ts object
231 |         information.set = information.set %>%
232 |           dplyr::select(-date) %>%
233 |           as.ts()
234 | 
235 |         #---------------------------
236 |         # Create forecasts
237 |         #---------------------------
238 | 
239 |         results =
240 |             methods %>% purrr::map(
241 |               .f = function(engine){
242 | 
243 |                 # make predictions
244 |                 # 1. using direct projections
245 |                 if(recursive == FALSE){
246 | 
247 |                   # set data
248 |                   forecast_univariate.control_panel$arguments[[engine]]$y = information.set
249 | 
250 |                   # estimate model
251 |                   model =  do.call(what = forecast_univariate.control_panel$method[[engine]],
252 |                                    args = forecast_univariate.control_panel$arguments[[engine]])
253 | 
254 |                   # create forecasts
255 |                   predictions = forecast::forecast(model, h = horizon)
256 | 
257 |                   # create standard errors
258 |                   calc.error = try(predictions$lower[1])
259 | 
260 |                   if(is.numeric(calc.error) == TRUE){
261 |                     error = (predictions$upper[,1] - predictions$lower[,1]) /
262 |                       (2 * qnorm(.5 + predictions$level[1] / 200))
263 |                     error = as.numeric(error)
264 |                   }else{
265 |                     se = NA
266 |                   }
267 | 
268 |                   predictions = data.frame(model = engine, forecast = predictions$mean, se = error)
269 | 
270 |                 # 2. using recursive forecasts
271 |                 }else{
272 | 
273 |                   predictions = list()
274 |                   forecast_univariate.control_panel$arguments[[engine]]$y = information.set
275 | 
276 |                   for(i in 1:horizon){
277 | 
278 |                     # estimate model
279 |                     model =  do.call(what = forecast_univariate.control_panel$method[[engine]],
280 |                                      args = forecast_univariate.control_panel$arguments[[engine]])
281 | 
282 |                     # create forecast
283 |                     prediction = forecast::forecast(model, h = 1)
284 | 
285 |                     # create standard errors
286 |                     calc.error = try(prediction$lower[1])
287 | 
288 |                     if(is.numeric(calc.error) == TRUE){
289 |                       error = (prediction$upper[,1] - prediction$lower[,1]) /
290 |                         (2 * qnorm(.5 + prediction$level[1] / 200))
291 |                       error = as.numeric(error)
292 |                     }else{
293 |                       error = NA
294 |                     }
295 | 
296 |                     predictions[[i]] = data.frame(model = engine, forecast = prediction$mean, se = error)
297 | 
298 |                     # update information set
299 |                     information.set = rbind(information.set, prediction$mean[1]) %>% as.ts()
300 |                     forecast_univariate.control_panel$arguments[[engine]]$y = information.set
301 | 
302 |                   }
303 | 
304 |                   # collapse results
305 |                   predictions = purrr::reduce(predictions, dplyr::bind_rows) %>% data.frame()
306 | 
307 |                 }
308 | 
309 |                 # add forecast dates
310 |                 predictions$forecast.date = forecast.date
311 |                 predictions$date = seq.Date(from = forecast.date, by = freq, length.out = horizon+1)[2:(horizon+1)]
312 | 
313 |                 # return results
314 |                 return(
315 |                   list(
316 |                     predictions = predictions,
317 |                     model = model
318 |                   )
319 |                 )
320 |               }
321 |           )
322 | 
323 |         predictions =
324 |           purrr::map(results, .f = function(X){return(X$predictions)}) %>%
325 |           purrr::reduce(dplyr::bind_rows)
326 | 
327 |         models =
328 |           purrr::map(results, .f = function(X){return(X$model)})
329 | 
330 |         # store objects for return
331 |         results =
332 |           list(
333 |             predictions = predictions,
334 |             information.set = information.set,
335 |             models = models
336 |           )
337 | 
338 |         # return results
339 |         return(results)
340 | 
341 |       }
342 |     )
343 | 
344 |   # prepare forecasts
345 |   predictions =
346 |     purrr::map(forecasts, .f = function(X){return(X$predictions)}) %>%
347 |     purrr::reduce(dplyr::bind_rows)
348 | 
349 |   # add model and information set lists to return object
350 |   if(return.data == TRUE | return.models == TRUE){
351 |     information = list(forecasts = predictions)
352 |   }else{
353 |     information = predictions
354 |   }
355 | 
356 |   # prepare models
357 |   if(return.models == TRUE){
358 |     models = purrr::map(forecasts, .f = function(X){return(X$models)})
359 |     names(models) = forecast.dates
360 |     information[['models']] = models
361 |   }
362 | 
363 |   # prepare information set
364 |   if(return.data == TRUE){
365 |     information.set = purrr::map(forecasts, .f = function(X){return(X$information.set)})
366 |     names(information.set) = forecast.dates
367 |     information[['information.set']] = information.set
368 |   }
369 | 
370 |   # return results
371 |   return(information)
372 | }
373 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Out-of-sample time series forecasting
  2 | 
  3 | <!-- badges: start -->
  4 | [![License: GPL v3](https://img.shields.io/badge/License-GPL%20v3-blue.svg)](http://www.gnu.org/licenses/gpl-3.0)
  5 | [![CRAN status](https://www.r-pkg.org/badges/version/OOS)](https://CRAN.R-project.org/package=OOS)
  6 | [![Lifecycle: stable](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html)
  7 | [![codecov](https://codecov.io/gh/tylerJPike/OOS/branch/main/graph/badge.svg?token=AQ4PFWU3KS)](https://codecov.io/gh/tylerJPike/OOS)
  8 | [![Build Status](https://travis-ci.org/tylerJPike/OOS.svg?branch=main)](https://travis-ci.org/tylerJPike/OOS)
  9 | <!-- badges: end -->
 10 | 
 11 | Out-of-Sample time series forecasting is a common, important, and subtle task. The OOS package introduces a comprehensive and cohesive API for the out-of-sample forecasting workflow: data preparation, forecasting - including both traditional econometric time series models and modern machine learning techniques - forecast combination, model and error analysis, and forecast visualization. 
 12 | 
 13 | The key difference between OOS and the other time series forecasting packages is that it operates out-of-sample by construction. That is, it re-cleans data and re-trains models each forecast.date and is careful not to introduce look-ahead bias into its information set via data cleaning or forecasts via model training. Other packages tend to fit the model once, leaving the user to construct the out-of-sample data cleaning and forecast exercise on their own.
 14 | 
 15 | See the OOS package [website](https://tylerjpike.github.io/OOS/) for examples and documentation.
 16 | 
 17 | ---
 18 | ## Workflow and available Tools
 19 | 
 20 | ### 1. Prepare Data
 21 | 
 22 | | Clean Outliers | Impute Missing Observations (via [imputeTS](https://github.com/SteffenMoritz/imputeTS)) | Dimension Reduction | 
 23 | |----------------------|------------------------|-----------------------|
 24 | | Winsorize | Linear Interpolation | Principal Components |
 25 | | Trim | Kalman Filter | |
 26 | |  | Fill-Forward | |
 27 | |  | Average | |
 28 | |  | Moving Average | |
 29 | |  | Seasonal Decomposition | |
 30 | 
 31 | 
 32 | ### 2. Forecast
 33 | 
 34 | | Univariate Forecasts (via [forecast](https://github.com/robjhyndman/forecast)) | Multivariate Forecasts (via [caret](https://github.com/topepo/caret)) | Forecast Combinations |
 35 | |----------------------|------------------------|-----------------------|
 36 | | Random Walk | Vector Autoregression | Mean|
 37 | | ARIMA | Linear Regression | Median |
 38 | | ETS | LASSO Regression | Trimmed (Winsorized) Mean |
 39 | | Spline | Ridge Regression | N-Best |
 40 | | Theta Method | Elastic Net | Linear Regression |
 41 | | TBATS | Principal Component Regression | LASSO Regression |
 42 | | STL | Partial Least Squares Regression | Ridge Regression |
 43 | | AR Perceptron | Random Forest | Partial Egalitarian LASSO |
 44 | |  | Tree-Based Gradient Boosting Machine | Principal Component Regression | 
 45 | |   |  Single Layered Neural Network  | Partial Least Squares Regression  |
 46 | |  | | Random Forest |
 47 | |  | | Tree-Based Gradient Boosting Machine |
 48 | |  | | Single Layered Neural Network  |
 49 | 
 50 | 
 51 | ### 3. Analyze
 52 | 
 53 | | Accuracy | Compare | Visualize |
 54 | |----------------------|------------------------|-----------------------|
 55 | | Mean Square Error (MSE) | Forecast Error Ratios | Forecasts |
 56 | | Root Mean Square Error (RMSE) | Diebold-Mariano Test (for unnested models) | Errors |
 57 | | Mean Absolute Error (MAE) | Clark and West Test (for nested models) |  |
 58 | | Mean Absolute Percentage Error (MAPE) |  | |
 59 | 
 60 | ---
 61 | 
 62 | ## Model estimation flexibility and accessibility
 63 | 
 64 | Users may edit any model training routine through accessing a list of function arguments. For machine learning techniques, this entails editing [caret](https://github.com/topepo/caret) arguments including: tuning grid, control grid, method, and accuracy metric. For univariate time series forecasting, this entails passing arguments to [forecast](https://github.com/robjhyndman/forecast) package model functions. For imputing missing variables, this entails passing arguments to [imputeTS](https://github.com/SteffenMoritz/imputeTS) package functions.
 65 | 
 66 | A brief example using an `Arima` model to forecast univariate time series:   
 67 | 
 68 | 	# 1. create the central list of univariate model training arguments, univariate.forecast.training  
 69 | 	forecast_univariate.control_panel = instantiate.forecast_univariate.control_panel()  
 70 | 
 71 | 	# 2. select an item to edit, for example the Arima order to create an ARMA(1,1)   
 72 | 		# view default model arguments (there are none)  
 73 | 		forecast_univariate.control_panel$arguments[['Arima']] 
 74 | 		# add our own function arguments  
 75 | 		forecast_univariate.control_panel$arguments[['Arima']]$order = c(1,0,1) 
 76 | 
 77 | A brief example using the `Random Forest` to combine forecasts:   
 78 | 
 79 | 	# 1. create the central list of ML training arguments 
 80 | 	forecast_combinations.control_panel = instantiate.forecast_combinations.control_panel()  
 81 | 
 82 | 	# 2. select an item to edit, for example the random forest tuning grid   
 83 | 		# view default tuning grid  
 84 | 		forecast_combinations.control_panel$tuning.grids[['RF']]  
 85 | 		# edit tuning grid   
 86 | 		forecast_combinations.control_panel$tuning.grids[['RF']] = expand.grid(mtry = c(1:6))  
 87 | ---
 88 | ## Basic workflow
 89 | 	#----------------------------------------
 90 | 	### Forecasting Example
 91 | 	#----------------------------------------
 92 | 	# pull and prepare data from FRED
 93 | 	quantmod::getSymbols.FRED(
 94 | 		c('UNRATE','INDPRO','GS10'), 
 95 | 		env = globalenv())
 96 | 	Data = cbind(UNRATE, INDPRO, GS10)
 97 | 	Data = data.frame(Data, date = zoo::index(Data)) %>%
 98 | 		dplyr::filter(lubridate::year(date) >= 1990)
 99 | 
100 | 	# run univariate forecasts 
101 | 	forecast.uni = 
102 | 		forecast_univariate(
103 | 			Data = dplyr::select(Data, date, UNRATE),
104 | 			forecast.dates = tail(Data$date,15), 
105 | 			method = c('naive','auto.arima', 'ets'),      
106 | 			horizon = 1,                         
107 | 			recursive = FALSE,
108 | 
109 | 			# information set       
110 | 			rolling.window = NA,    
111 | 			freq = 'month',                   
112 | 			
113 | 			# outlier cleaning
114 | 			outlier.clean = FALSE,
115 | 			outlier.variables = NULL,
116 | 			outlier.bounds = c(0.05, 0.95),
117 | 			outlier.trim = FALSE,
118 | 			outlier.cross_section = FALSE,
119 | 			
120 | 			# impute missing
121 | 			impute.missing = FALSE,
122 | 			impute.method = 'kalman',
123 | 			impute.variables = NULL,
124 | 			impute.verbose = FALSE) 
125 | 
126 | 	# create multivariate forecasts
127 | 	forecast.multi = 
128 | 		forecast_multivariate(
129 | 			Data = Data,           
130 | 			forecast.date = tail(Data$date,15),
131 | 			target = 'UNRATE',
132 | 			horizon = 1,
133 | 			method = c('ols','lasso','ridge','elastic','GBM'),
134 | 
135 | 			# information set       
136 | 			rolling.window = NA,    
137 | 			freq = 'month',                   
138 | 			
139 | 			# outlier cleaning
140 | 			outlier.clean = FALSE,
141 | 			outlier.variables = NULL,
142 | 			outlier.bounds = c(0.05, 0.95),
143 | 			outlier.trim = FALSE,
144 | 			outlier.cross_section = FALSE,
145 | 			
146 | 			# impute missing
147 | 			impute.missing = FALSE,
148 | 			impute.method = 'kalman',
149 | 			impute.variables = NULL,
150 | 			impute.verbose = FALSE,
151 | 			
152 | 			# dimension reduction
153 | 			reduce.data = FALSE,
154 | 			reduce.variables = NULL,
155 | 			reduce.ncomp = NULL,
156 | 			reduce.standardize = TRUE) 
157 | 
158 | 	# combine forecasts and add in observed values
159 | 	forecasts = 
160 | 		dplyr::bind_rows(
161 | 			forecast.uni,
162 | 			forecast.multi) %>%
163 | 		dplyr::left_join( 
164 | 			dplyr::select(Data, date, observed = UNRATE))
165 | 
166 | 	# forecast combinations 
167 | 	forecast.combo = 
168 | 		forecast_combine(
169 | 			forecasts, 
170 | 			method = c('uniform','median','trimmed.mean',
171 | 					   'n.best','lasso','peLasso','RF'), 
172 | 			burn.in = 5, 
173 | 			n.max = 2)
174 | 
175 | 	# merge forecast combinations back into forecasts
176 | 	forecasts = 
177 | 		forecasts %>%
178 | 		dplyr::bind_rows(forecast.combo)
179 | 
180 | 	# calculate forecast errors
181 | 	forecast.error = forecast_accuracy(forecasts)
182 | 
183 | 	# view forecast errors from least to greatest 
184 | 	#   (best forecast to worst forecast method)
185 | 	forecast.error %>% 
186 | 		dplyr::mutate_at(vars(-model), round, 3) %>%
187 | 		dplyr::arrange(MSE)
188 | 
189 | 	# compare forecasts to the baseline (a random walk)
190 | 	forecast_comparison(
191 | 		forecasts,
192 | 		baseline.forecast = 'naive',  
193 | 		test = 'ER',
194 | 		loss = 'MSE') %>% 
195 | 		arrange(error.ratio)
196 | 
197 | 	# chart forecasts
198 | 	chart = 
199 | 		chart_forecast(
200 | 			forecasts,              
201 | 			Title = 'US Unemployment Rate',
202 | 			Ylab = 'Index',
203 | 			Freq = 'Monthly')
204 | 
205 | 	chart
206 | 
207 | ---
208 | ## Contact
209 | If you should have questions, concerns, or wish to collaborate, please contact [Tyler J. Pike](https://tylerjpike.github.io/)
210 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | comment: false
 2 | 
 3 | coverage:
 4 |   status:
 5 |     project:
 6 |       default:
 7 |         target: auto
 8 |         threshold: 1%
 9 |         informational: true
10 |     patch:
11 |       default:
12 |         target: auto
13 |         threshold: 1%
14 |         informational: true
15 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | ## Resubmission
 2 | This is a resubmission. In this version I have:
 3 | * Changed \dontrun to \donttest in function documentation examples
 4 | * Added function documentation examples to all primary user-facing functions
 5 | * Added \value to pipe.Rd
 6 | 
 7 | ## Test environments
 8 | * local Windows install, R 4.0.3
 9 | * win-builder (devel and release)
10 | * Ubuntu 16.04.6 (on travis-ci), R 4.0.2
11 | * R-hub Ubuntu Linux 20.04.1 LTS, R-release
12 | * R-hub Fedora Linux, R-devel
13 | 
14 | ## R CMD check results
15 | There were no ERRORs or WARNINGs.
16 | 
17 | There is one NOTE since this is a new package submission.
18 | 
19 | ## Downstream dependencies
20 | There are currently no downstream dependencies for this package.


--------------------------------------------------------------------------------
/man/NBest.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/forecast_combinations.R
 3 | \name{NBest}
 4 | \alias{NBest}
 5 | \title{Select N-best forecasts}
 6 | \usage{
 7 | NBest(forecasts, n.max, window = NA)
 8 | }
 9 | \arguments{
10 | \item{forecasts}{data.frame: a data frame of forecasts to combine, assumes one column named "observed"}
11 | 
12 | \item{n.max}{int: maximum number of forecasts to select}
13 | 
14 | \item{window}{int: size of rolling window to evaluate forecast error over, use entire period if NA}
15 | }
16 | \value{
17 | data.frame with n columns of the historically best forecasts
18 | }
19 | \description{
20 | A function to subset the n-best forecasts;
21 | assumes column named observed.
22 | }
23 | 


--------------------------------------------------------------------------------
/man/chart_forecast.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/forecast_chart.R
 3 | \name{chart_forecast}
 4 | \alias{chart_forecast}
 5 | \title{Chart forecasts}
 6 | \usage{
 7 | chart_forecast(Data, Title, Ylab, Freq, zeroline = FALSE)
 8 | }
 9 | \arguments{
10 | \item{Data}{data.frame: oos.forecast object}
11 | 
12 | \item{Title}{string: chart title}
13 | 
14 | \item{Ylab}{string: y-axis label}
15 | 
16 | \item{Freq}{string: frequency (acts as sub-title)}
17 | 
18 | \item{zeroline}{boolean: if TRUE then add a horizontal line at zero}
19 | }
20 | \value{
21 | ggplot2 chart
22 | }
23 | \description{
24 | Chart forecasts
25 | }
26 | \examples{
27 | \donttest{
28 | 
29 |  # simple time series
30 |  A = c(1:100) + rnorm(100)
31 |  date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
32 |  Data = data.frame(date = date, A)
33 | 
34 |  # run forecast_univariate
35 |  forecast.uni =
36 |    forecast_univariate(
37 |      Data = Data,
38 |      forecast.dates = tail(Data$date,10),
39 |      method = c('naive','auto.arima', 'ets'),
40 |      horizon = 1,
41 |      recursive = FALSE,
42 |      freq = 'month')
43 | 
44 |  forecasts =
45 |    dplyr::left_join(
46 |      forecast.uni,
47 |      data.frame(date, observed = A),
48 |      by = 'date'
49 |    )
50 | 
51 |  # chart forecasts
52 |  chart.forecast =
53 |    chart_forecast(
54 |      forecasts,
55 |      Title = 'test',
56 |      Ylab = 'Index',
57 |      Freq = 'Monthly',
58 |      zeroline = TRUE)
59 | 
60 | }
61 | 
62 | }
63 | 


--------------------------------------------------------------------------------
/man/chart_forecast_error.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/forecast_chart.R
 3 | \name{chart_forecast_error}
 4 | \alias{chart_forecast_error}
 5 | \title{Chart forecast errors}
 6 | \usage{
 7 | chart_forecast_error(Data, Title, Ylab, Freq, zeroline = FALSE)
 8 | }
 9 | \arguments{
10 | \item{Data}{data.frame: oos.forecast object}
11 | 
12 | \item{Title}{string: chart title}
13 | 
14 | \item{Ylab}{string: y-axis label}
15 | 
16 | \item{Freq}{string: frequency (acts as sub-title)}
17 | 
18 | \item{zeroline}{boolean: if TRUE then add a horizontal line at zero}
19 | }
20 | \value{
21 | ggplot2 chart
22 | }
23 | \description{
24 | Chart forecast errors
25 | }
26 | \examples{
27 | \donttest{
28 | 
29 |  # simple time series
30 |  A = c(1:100) + rnorm(100)
31 |  date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
32 |  Data = data.frame(date = date, A)
33 | 
34 |  # run forecast_univariate
35 |  forecast.uni =
36 |    forecast_univariate(
37 |      Data = Data,
38 |      forecast.dates = tail(Data$date,10),
39 |      method = c('naive','auto.arima', 'ets'),
40 |      horizon = 1,
41 |      recursive = FALSE,
42 |      freq = 'month')
43 | 
44 |  forecasts =
45 |    dplyr::left_join(
46 |      forecast.uni,
47 |      data.frame(date, observed = A),
48 |      by = 'date'
49 |    )
50 | 
51 |  # chart forecast errors
52 |  chart.errors =
53 |    chart_forecast_error(
54 |      forecasts,
55 |      Title = 'test',
56 |      Ylab = 'Index',
57 |      Freq = 'Monthly',
58 |      zeroline = TRUE)
59 | 
60 | }
61 | 
62 | }
63 | 


--------------------------------------------------------------------------------
/man/data_impute.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_preparation.R
 3 | \name{data_impute}
 4 | \alias{data_impute}
 5 | \title{Impute missing values}
 6 | \usage{
 7 | data_impute(Data, method = "kalman", variables = NULL, verbose = FALSE)
 8 | }
 9 | \arguments{
10 | \item{Data}{data.frame: data frame of target variable, exogenous variables, and observed date (named 'date')}
11 | 
12 | \item{method}{string: select which method to use from the imputeTS package; 'interpolation', 'kalman', 'locf', 'ma', 'mean', 'random', 'remove','replace', 'seadec', 'seasplit'}
13 | 
14 | \item{variables}{string: vector of variables to standardize, default is all but 'date' column}
15 | 
16 | \item{verbose}{boolean: show start-up status of impute.missing.routine}
17 | }
18 | \value{
19 | data.frame with missing data imputed
20 | }
21 | \description{
22 | A function to impute missing values. Is used as a data preparation helper function and is called internally
23 | by forecast_univariate, forecast_multivariate, and forecast_combine.
24 | }
25 | 


--------------------------------------------------------------------------------
/man/data_outliers.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_preparation.R
 3 | \name{data_outliers}
 4 | \alias{data_outliers}
 5 | \title{Clean outliers}
 6 | \usage{
 7 | data_outliers(
 8 |   Data,
 9 |   variables = NULL,
10 |   w.bounds = c(0.05, 0.95),
11 |   trim = FALSE,
12 |   cross_section = FALSE
13 | )
14 | }
15 | \arguments{
16 | \item{Data}{data.frame: data frame of target variable, exogenous variables, and observed date (named 'date')}
17 | 
18 | \item{variables}{string: vector of variables to standardize, default is all but 'date' column}
19 | 
20 | \item{w.bounds}{double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile)}
21 | 
22 | \item{trim}{boolean: if TRUE then replace outliers with NA instead of winsorizing bound}
23 | 
24 | \item{cross_section}{boolean: if TRUE then remove outliers based on cross-section (row-wise) instead of historical data (column-wise)}
25 | }
26 | \value{
27 | data.frame with a date column and one column per forecast method selected
28 | }
29 | \description{
30 | A function to clean outliers. Is used as a data preparation helper function and is called internally
31 | by forecast_univariate, forecast_multivariate, and forecast_combine.
32 | }
33 | 


--------------------------------------------------------------------------------
/man/data_reduction.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_preparation.R
 3 | \name{data_reduction}
 4 | \alias{data_reduction}
 5 | \title{Dimension reduction via principal components}
 6 | \usage{
 7 | data_reduction(Data, variables = NULL, ncomp, standardize = TRUE)
 8 | }
 9 | \arguments{
10 | \item{Data}{data.frame: data frame of target variable, exogenous variables, and observed date (named 'date')}
11 | 
12 | \item{variables}{string: vector of variables to standardize, default is all but 'date' column}
13 | 
14 | \item{ncomp}{int: number of factors to create}
15 | 
16 | \item{standardize}{boolean: normalize variables (mean zero, variance one) before estimating factors}
17 | }
18 | \value{
19 | data.frame with a date column and one column per forecast method selected
20 | }
21 | \description{
22 | A function to estimate principal components.
23 | }
24 | 


--------------------------------------------------------------------------------
/man/data_subset.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_preparation.R
 3 | \name{data_subset}
 4 | \alias{data_subset}
 5 | \title{Create information set}
 6 | \usage{
 7 | data_subset(Data, forecast.date, rolling.window, freq)
 8 | }
 9 | \arguments{
10 | \item{Data}{data.frame: data frame of target variable, exogenous variables, and observed date (named 'date')}
11 | 
12 | \item{forecast.date}{date: upper bound of information set}
13 | 
14 | \item{rolling.window}{int: size of rolling window, NA if expanding window is used}
15 | 
16 | \item{freq}{string: time series frequency; day, week, month, quarter, year; only needed for rolling window factors}
17 | }
18 | \value{
19 | data.frame bounded by the given date range
20 | }
21 | \description{
22 | A function to subset data recursively or with a rolling window to create a valid information set. Is used as a data preparation
23 | helper function and is called internally by forecast_univariate, forecast_multivariate, and forecast_combine.
24 | }
25 | 


--------------------------------------------------------------------------------
/man/forecast_accuracy.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/forecast_metrics.R
 3 | \name{forecast_accuracy}
 4 | \alias{forecast_accuracy}
 5 | \title{Calculate forecast accuracy}
 6 | \usage{
 7 | forecast_accuracy(Data)
 8 | }
 9 | \arguments{
10 | \item{Data}{data.frame: data frame of forecasts, model names, and dates}
11 | }
12 | \value{
13 | data.frame of numeric error results
14 | }
15 | \description{
16 | A function to calculate various loss functions, including
17 | MSE, RMSE, MAE, and  MAPE.
18 | }
19 | \examples{
20 | \donttest{
21 | 
22 |  # simple time series
23 |  A = c(1:100) + rnorm(100)
24 |  date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
25 |  Data = data.frame(date = date, A)
26 | 
27 |  # run forecast_univariate
28 |  forecast.uni =
29 |    forecast_univariate(
30 |      Data = Data,
31 |      forecast.dates = tail(Data$date,10),
32 |      method = c('naive','auto.arima', 'ets'),
33 |      horizon = 1,
34 |      recursive = FALSE,
35 |      freq = 'month')
36 | 
37 |  forecasts =
38 |    dplyr::left_join(
39 |      forecast.uni,
40 |      data.frame(date, observed = A),
41 |      by = 'date'
42 |    )
43 | 
44 |  # forecast accuracy
45 |  forecast.accuracy = forecast_accuracy(forecasts)
46 | 
47 | }
48 | 
49 | }
50 | 


--------------------------------------------------------------------------------
/man/forecast_combine.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/forecast_combinations.R
 3 | \name{forecast_combine}
 4 | \alias{forecast_combine}
 5 | \title{Forecast with forecast combinations}
 6 | \usage{
 7 | forecast_combine(
 8 |   Data,
 9 |   method = "unform",
10 |   n.max = NULL,
11 |   rolling.window = NA,
12 |   trim = c(0.5, 0.95),
13 |   burn.in = 1,
14 |   parallel.dates = NULL
15 | )
16 | }
17 | \arguments{
18 | \item{Data}{data.frame: data frame of forecasted values to combine, assumes 'date' and 'observed' columns, but `observed' is not necessary for all methods}
19 | 
20 | \item{method}{string: the method to use; 'uniform', 'median', 'trimmed.mean', 'n.best', 'peLasso', 'lasso', 'ridge', 'elastic', 'RF', 'GBM', 'NN'}
21 | 
22 | \item{n.max}{int: maximum number of forecasts to select in n.best method}
23 | 
24 | \item{rolling.window}{int: size of rolling window to evaluate forecast error over, use entire period if NA}
25 | 
26 | \item{trim}{numeric: a two element vector with the winsorizing bounds for the trimmed mean method; c(min, max)}
27 | 
28 | \item{burn.in}{int: the number of periods to use in the first model estimation}
29 | 
30 | \item{parallel.dates}{int: the number of cores available for parallel estimation}
31 | }
32 | \value{
33 | data.frame with a row for each combination method and forecasted date
34 | }
35 | \description{
36 | A function to combine forecasts out-of-sample. Methods available include:
37 | uniform weights, median forecast, trimmed (winsorized) mean, n-best,
38 | ridge regression, lasso regression, elastic net, peLASSO,
39 | random forest, tree-based gradient boosting machine, and single-layer neural network.
40 | See package website for most up-to-date list of available models.
41 | }
42 | \examples{
43 | \donttest{
44 |  # simple time series
45 |  A = c(1:100) + rnorm(100)
46 |  B = c(1:100) + rnorm(100)
47 |  C = c(1:100) + rnorm(100)
48 |  date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
49 |  Data = data.frame(date = date, A, B, C)
50 | 
51 |  # run forecast_univariate
52 |  forecast.multi =
53 |      forecast_multivariate(
54 |        Data = Data,
55 |        target = 'A',
56 |        forecast.dates = tail(Data$date,5),
57 |        method = c('ols','var'),
58 |        horizon = 1,
59 |        freq = 'month')
60 |  # include observed valuesd
61 |  forecasts =
62 |    dplyr::left_join(
63 |      forecast.multi,
64 |      data.frame(date, observed = A),
65 |      by = 'date'
66 |    )
67 | 
68 |  # combine forecasts
69 |  combinations =
70 |    forecast_combine(
71 |      forecasts,
72 |      method = c('uniform','median','trimmed.mean',
73 |                 'n.best','lasso','peLasso'),
74 |      burn.in = 5,
75 |      n.max = 2)
76 | }
77 | 
78 | 
79 | }
80 | 


--------------------------------------------------------------------------------
/man/forecast_comparison.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/forecast_metrics.R
 3 | \name{forecast_comparison}
 4 | \alias{forecast_comparison}
 5 | \title{Compare forecast accuracy}
 6 | \usage{
 7 | forecast_comparison(
 8 |   Data,
 9 |   baseline.forecast,
10 |   test = "ER",
11 |   loss = "MSE",
12 |   horizon = NULL
13 | )
14 | }
15 | \arguments{
16 | \item{Data}{data.frame: data frame of forecasts, model names, and dates}
17 | 
18 | \item{baseline.forecast}{string: column name of baseline (null hypothesis) forecasts}
19 | 
20 | \item{test}{string: which test to use; ER = error ratio, DM = Diebold-Mariano, CM = Clark and West}
21 | 
22 | \item{loss}{string: error loss function to use if creating forecast error ratio}
23 | 
24 | \item{horizon}{int: horizon of forecasts being compared in DM and CW tests}
25 | }
26 | \value{
27 | numeric test result
28 | }
29 | \description{
30 | A function to compare forecasts. Options include: simple forecast error ratios,
31 | \href{https://www.sas.upenn.edu/~fdiebold/papers/paper68/pa.dm.pdf}{Diebold-Mariano test}, and \href{https://www.nber.org/papers/t0326}{Clark and West test} for nested models
32 | }
33 | \examples{
34 | \donttest{
35 | 
36 |  # simple time series
37 |  A = c(1:100) + rnorm(100)
38 |  date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
39 |  Data = data.frame(date = date, A)
40 | 
41 |  # run forecast_univariate
42 |  forecast.uni =
43 |    forecast_univariate(
44 |      Data = Data,
45 |      forecast.dates = tail(Data$date,10),
46 |      method = c('naive','auto.arima', 'ets'),
47 |      horizon = 1,
48 |      recursive = FALSE,
49 |      freq = 'month')
50 | 
51 |  forecasts =
52 |    dplyr::left_join(
53 |      forecast.uni,
54 |      data.frame(date, observed = A),
55 |      by = 'date'
56 |    )
57 | 
58 |  # run ER (MSE)
59 |  er.ratio.mse =
60 |    forecast_comparison(
61 |      forecasts,
62 |      baseline.forecast = 'naive',
63 |      test = 'ER',
64 |      loss = 'MSE')
65 | }
66 | 
67 | }
68 | 


--------------------------------------------------------------------------------
/man/forecast_date.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_preparation.R
 3 | \name{forecast_date}
 4 | \alias{forecast_date}
 5 | \title{Set forecasted date}
 6 | \usage{
 7 | forecast_date(forecast.date, horizon, freq)
 8 | }
 9 | \arguments{
10 | \item{forecast.date}{date: date forecast was made}
11 | 
12 | \item{horizon}{int: periods ahead of forecast}
13 | 
14 | \item{freq}{string: time series frequency; day, week, month, quarter, year; only needed for rolling window factors}
15 | }
16 | \value{
17 | date vector
18 | }
19 | \description{
20 | A function to subset data recursively or with a rolling window to create a valid information set. Is used as a data preparation
21 | helper function and is called internally by forecast_univariate, forecast_multivariate, and forecast_combine.
22 | }
23 | 


--------------------------------------------------------------------------------
/man/forecast_multivariate.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/forecast_multivariate.R
  3 | \name{forecast_multivariate}
  4 | \alias{forecast_multivariate}
  5 | \title{Forecast with multivariate models}
  6 | \usage{
  7 | forecast_multivariate(
  8 |   Data,
  9 |   forecast.dates,
 10 |   target,
 11 |   horizon,
 12 |   method,
 13 |   rolling.window = NA,
 14 |   freq,
 15 |   lag.variables = NULL,
 16 |   lag.n = NULL,
 17 |   outlier.clean = FALSE,
 18 |   outlier.variables = NULL,
 19 |   outlier.bounds = c(0.05, 0.95),
 20 |   outlier.trim = FALSE,
 21 |   outlier.cross_section = FALSE,
 22 |   impute.missing = FALSE,
 23 |   impute.method = "kalman",
 24 |   impute.variables = NULL,
 25 |   impute.verbose = FALSE,
 26 |   reduce.data = FALSE,
 27 |   reduce.variables = NULL,
 28 |   reduce.ncomp = NULL,
 29 |   reduce.standardize = TRUE,
 30 |   parallel.dates = NULL,
 31 |   return.models = FALSE,
 32 |   return.data = FALSE
 33 | )
 34 | }
 35 | \arguments{
 36 | \item{Data}{data.frame: data frame of target variable, exogenous variables, and observed date (named 'date'); may alternatively be a \code{ts}, \code{xts}, or \code{zoo} object to forecast}
 37 | 
 38 | \item{forecast.dates}{date: dates forecasts are created}
 39 | 
 40 | \item{target}{string: column name in Data of variable to forecast}
 41 | 
 42 | \item{horizon}{int: number of periods into the future to forecast}
 43 | 
 44 | \item{method}{string: methods to use}
 45 | 
 46 | \item{rolling.window}{int: size of rolling window, NA if expanding window is used}
 47 | 
 48 | \item{freq}{string: time series frequency; day, week, month, quarter, year}
 49 | 
 50 | \item{lag.variables}{string: vector of variables to lag each time step, if lag.n is not null then the default is all non-date variables}
 51 | 
 52 | \item{lag.n}{int: number of lags to create}
 53 | 
 54 | \item{outlier.clean}{boolean: if TRUE then clean outliers}
 55 | 
 56 | \item{outlier.variables}{string: vector of variables to purge of outlier, default is all but 'date' column}
 57 | 
 58 | \item{outlier.bounds}{double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile)}
 59 | 
 60 | \item{outlier.trim}{boolean: if TRUE then replace outliers with NA instead of winsorizing bound}
 61 | 
 62 | \item{outlier.cross_section}{boolean: if TRUE then remove outliers based on cross-section (row-wise) instead of historical data (column-wise)}
 63 | 
 64 | \item{impute.missing}{boolean: if TRUE then impute missing values}
 65 | 
 66 | \item{impute.method}{string: select which method to use from the imputeTS package; 'interpolation', 'kalman', 'locf', 'ma', 'mean', 'random', 'remove','replace', 'seadec', 'seasplit'}
 67 | 
 68 | \item{impute.variables}{string: vector of variables to impute missing values, default is all numeric columns}
 69 | 
 70 | \item{impute.verbose}{boolean: show start-up status of impute.missing.routine}
 71 | 
 72 | \item{reduce.data}{boolean: if TRUE then reduce dimension}
 73 | 
 74 | \item{reduce.variables}{string: vector of variables to impute missing values, default is all numeric columns}
 75 | 
 76 | \item{reduce.ncomp}{int: number of factors to create}
 77 | 
 78 | \item{reduce.standardize}{boolean: normalize variables (mean zero, variance one) before estimating factors}
 79 | 
 80 | \item{parallel.dates}{int: the number of cores available for parallel estimation}
 81 | 
 82 | \item{return.models}{boolean: if TRUE then return list of models estimated each forecast.date}
 83 | 
 84 | \item{return.data}{boolean: if True then return list of information.set for each forecast.date}
 85 | }
 86 | \value{
 87 | data.frame with a row for each forecast by model and forecasted date
 88 | }
 89 | \description{
 90 | A function to estimate multivariate forecasts out-of-sample. Methods available include:
 91 | vector auto-regression, linear regression, lasso regression, ridge regression, elastic net,
 92 | random forest, tree-based gradient boosting machine, and single-layer neural network.
 93 | See package website for most up-to-date list of available models.
 94 | }
 95 | \examples{
 96 | \donttest{
 97 |  # simple time series
 98 |  A = c(1:100) + rnorm(100)
 99 |  B = c(1:100) + rnorm(100)
100 |  C = c(1:100) + rnorm(100)
101 |  date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
102 |  Data = data.frame(date = date, A, B, C)
103 | 
104 |  # run forecast_univariate
105 |  forecast.multi =
106 |      forecast_multivariate(
107 |        Data = Data,
108 |        target = 'A',
109 |        forecast.dates = tail(Data$date,5),
110 |        method = c('ols','var'),
111 |        horizon = 1,
112 |        # information set
113 |        rolling.window = NA,
114 |        freq = 'month',
115 |        # data prep
116 |        lag.n = 4,
117 |        outlier.clean = TRUE,
118 |        impute.missing = TRUE)
119 | }
120 | 
121 | 
122 | }
123 | 


--------------------------------------------------------------------------------
/man/forecast_univariate.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/forecast_univariate.R
 3 | \name{forecast_univariate}
 4 | \alias{forecast_univariate}
 5 | \title{Forecast with univariate models}
 6 | \usage{
 7 | forecast_univariate(
 8 |   Data,
 9 |   forecast.dates,
10 |   methods,
11 |   horizon,
12 |   recursive = TRUE,
13 |   rolling.window = NA,
14 |   freq,
15 |   outlier.clean = FALSE,
16 |   outlier.variables = NULL,
17 |   outlier.bounds = c(0.05, 0.95),
18 |   outlier.trim = FALSE,
19 |   outlier.cross_section = FALSE,
20 |   impute.missing = FALSE,
21 |   impute.method = "kalman",
22 |   impute.variables = NULL,
23 |   impute.verbose = FALSE,
24 |   parallel.dates = NULL,
25 |   return.models = FALSE,
26 |   return.data = FALSE
27 | )
28 | }
29 | \arguments{
30 | \item{Data}{data.frame: data frame of variable to forecast and a date column; may alternatively be a \code{ts}, \code{xts}, or \code{zoo} object to forecast}
31 | 
32 | \item{forecast.dates}{date: dates forecasts are created}
33 | 
34 | \item{methods}{string: models to estimate forecasts}
35 | 
36 | \item{horizon}{int: number of periods to forecast}
37 | 
38 | \item{recursive}{boolean: use sequential one-step-ahead forecast if TRUE, use direct projections if FALSE}
39 | 
40 | \item{rolling.window}{int: size of rolling window, NA if expanding window is used}
41 | 
42 | \item{freq}{string: time series frequency; day, week, month, quarter, year}
43 | 
44 | \item{outlier.clean}{boolean: if TRUE then clean outliers}
45 | 
46 | \item{outlier.variables}{string: vector of variables to purge of outliers, default is all but 'date' column}
47 | 
48 | \item{outlier.bounds}{double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile)}
49 | 
50 | \item{outlier.trim}{boolean: if TRUE then replace outliers with NA instead of winsorizing bound}
51 | 
52 | \item{outlier.cross_section}{boolean: if TRUE then remove outliers based on cross-section (row-wise) instead of historical data (column-wise)}
53 | 
54 | \item{impute.missing}{boolean: if TRUE then impute missing values}
55 | 
56 | \item{impute.method}{string: select which method to use from the imputeTS package; 'interpolation', 'kalman', 'locf', 'ma', 'mean', 'random', 'remove','replace', 'seadec', 'seasplit'}
57 | 
58 | \item{impute.variables}{string: vector of variables to impute missing values, default is all numeric columns}
59 | 
60 | \item{impute.verbose}{boolean: show start-up status of impute.missing.routine}
61 | 
62 | \item{parallel.dates}{int: the number of cores available for parallel estimation}
63 | 
64 | \item{return.models}{boolean: if TRUE then return list of models estimated each forecast.date}
65 | 
66 | \item{return.data}{boolean: if True then return list of information.set for each forecast.date}
67 | }
68 | \value{
69 | data.frame with a row for each forecast by model and forecasted date
70 | }
71 | \description{
72 | A function to estimate univariate forecasts out-of-sample. Methods available include all forecast
73 | methods from the \code{forecast} package. See package website for most up-to-date list of available models.
74 | }
75 | \examples{
76 | \donttest{
77 |  # simple time series
78 |  A = c(1:100) + rnorm(100)
79 |  date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
80 |  Data = data.frame(date = date, A)
81 | 
82 |  # estiamte univariate forecasts
83 |  forecast.uni =
84 |      forecast_univariate(
85 |        Data = Data,
86 |        forecast.dates = tail(Data$date,5),
87 |        method = c('naive','auto.arima', 'ets'),
88 |        horizon = 1,
89 |        recursive = FALSE,
90 |        # information set
91 |        rolling.window = NA,
92 |        freq = 'month',
93 |        # data prep
94 |        outlier.clean = TRUE,
95 |        impute.missing = TRUE)
96 | }
97 | 
98 | }
99 | 


--------------------------------------------------------------------------------
/man/instantiate.data_impute.control_panel.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_preparation.R
 3 | \name{instantiate.data_impute.control_panel}
 4 | \alias{instantiate.data_impute.control_panel}
 5 | \title{Create interface to control \code{data_impute} model estimation}
 6 | \usage{
 7 | instantiate.data_impute.control_panel()
 8 | }
 9 | \value{
10 | data_impute.control_panel
11 | }
12 | \description{
13 | A function to create the data imputation method
14 | arguments list for user manipulation.
15 | }
16 | 


--------------------------------------------------------------------------------
/man/instantiate.forecast_combinations.control_panel.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/forecast_combinations.R
 3 | \name{instantiate.forecast_combinations.control_panel}
 4 | \alias{instantiate.forecast_combinations.control_panel}
 5 | \title{Create interface to control \code{forecast_combine} model estimation}
 6 | \usage{
 7 | instantiate.forecast_combinations.control_panel(covariates = NULL)
 8 | }
 9 | \arguments{
10 | \item{covariates}{int: the number of features that will go into the model}
11 | }
12 | \value{
13 | forecast_combinations.control_panel
14 | }
15 | \description{
16 | A function to create the forecast combination technique arguments list
17 | for user manipulation.
18 | }
19 | 


--------------------------------------------------------------------------------
/man/instantiate.forecast_multivariate.ml.control_panel.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/forecast_multivariate.R
 3 | \name{instantiate.forecast_multivariate.ml.control_panel}
 4 | \alias{instantiate.forecast_multivariate.ml.control_panel}
 5 | \title{Create interface to control \code{forecast_multivariate} ML estimation}
 6 | \usage{
 7 | instantiate.forecast_multivariate.ml.control_panel(
 8 |   covariates = NULL,
 9 |   rolling.window = NULL,
10 |   horizon = NULL
11 | )
12 | }
13 | \arguments{
14 | \item{covariates}{int: the number of features that will go into the model}
15 | 
16 | \item{rolling.window}{int: size of rolling window, NA if expanding window is used}
17 | 
18 | \item{horizon}{int: number of periods into the future to forecast}
19 | }
20 | \value{
21 | forecast_multivariate.ml.control_panel
22 | }
23 | \description{
24 | A function to create the multivariate forecast methods
25 | arguments list for user manipulation.
26 | }
27 | 


--------------------------------------------------------------------------------
/man/instantiate.forecast_multivariate.var.control_panel.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/forecast_multivariate.R
 3 | \name{instantiate.forecast_multivariate.var.control_panel}
 4 | \alias{instantiate.forecast_multivariate.var.control_panel}
 5 | \title{Create interface to control \code{forecast_multivariate} VAR estimation}
 6 | \usage{
 7 | instantiate.forecast_multivariate.var.control_panel()
 8 | }
 9 | \value{
10 | forecast_multivariate.var.control_panel
11 | }
12 | \description{
13 | A function to create the multivariate forecast methods
14 | arguments list for user manipulation.
15 | }
16 | 


--------------------------------------------------------------------------------
/man/instantiate.forecast_univariate.control_panel.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/forecast_univariate.R
 3 | \name{instantiate.forecast_univariate.control_panel}
 4 | \alias{instantiate.forecast_univariate.control_panel}
 5 | \title{Create interface to control \code{forecast_univariate} model estimation}
 6 | \usage{
 7 | instantiate.forecast_univariate.control_panel()
 8 | }
 9 | \value{
10 | forecast_univariate.control_panel
11 | }
12 | \description{
13 | A function to create the univariate forecast method arguments list
14 | for user manipulation.
15 | }
16 | 


--------------------------------------------------------------------------------
/man/loss_function.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/forecast_metrics.R
 3 | \name{loss_function}
 4 | \alias{loss_function}
 5 | \title{Calculate error via loss functions}
 6 | \usage{
 7 | loss_function(forecast, observed, metric = "MSE")
 8 | }
 9 | \arguments{
10 | \item{forecast}{numeric: vector of forecasted values}
11 | 
12 | \item{observed}{numeric: vector of observed values}
13 | 
14 | \item{metric}{string: loss function}
15 | }
16 | \value{
17 | numeric test result
18 | }
19 | \description{
20 | A function to calculate various error loss functions. Options include:
21 | MSE, RMSE, MAE, and  MAPE. The default is MSE loss.
22 | }
23 | 


--------------------------------------------------------------------------------
/man/n.lag.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_preparation.R
 3 | \name{n.lag}
 4 | \alias{n.lag}
 5 | \title{Create n lags}
 6 | \usage{
 7 | n.lag(Data, lags, variables = NULL)
 8 | }
 9 | \arguments{
10 | \item{Data}{data.frame: data frame of variables to lag and a 'date' column}
11 | 
12 | \item{lags}{int: number of lags to create}
13 | 
14 | \item{variables}{string: vector of variable names to lag, default is all non-date variables}
15 | }
16 | \value{
17 | data.frame
18 | }
19 | \description{
20 | A function to create 1 through n lags of a set of variables. Is used as a data preparation
21 | helper function and is called internally by forecast_univariate, forecast_multivariate, and forecast_combine.
22 | }
23 | 


--------------------------------------------------------------------------------
/man/pipe.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/external_imports.R
 3 | \name{\%>\%}
 4 | \alias{\%>\%}
 5 | \title{Pipe operator}
 6 | \usage{
 7 | lhs \%>\% rhs
 8 | }
 9 | \value{
10 | magrittr pipe operator \%>\%
11 | }
12 | \description{
13 | See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
14 | }
15 | \keyword{internal}
16 | 


--------------------------------------------------------------------------------
/man/standardize.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_preparation.R
 3 | \name{standardize}
 4 | \alias{standardize}
 5 | \title{Standardize variables (mean 0, variance 1)}
 6 | \usage{
 7 | standardize(X)
 8 | }
 9 | \arguments{
10 | \item{X}{numeric: vector to be standardized}
11 | }
12 | \value{
13 | numeric vector of standardized values
14 | }
15 | \description{
16 | Standardize variables (mean 0, variance 1)
17 | }
18 | 


--------------------------------------------------------------------------------
/man/winsorize.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_preparation.R
 3 | \name{winsorize}
 4 | \alias{winsorize}
 5 | \title{Winsorize or trim variables}
 6 | \usage{
 7 | winsorize(X, bounds, trim = FALSE)
 8 | }
 9 | \arguments{
10 | \item{X}{numeric: vector to be winsorized or trimmed}
11 | 
12 | \item{bounds}{double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile)}
13 | 
14 | \item{trim}{boolean: if TRUE then replace outliers with NA instead of winsorizing bound}
15 | }
16 | \value{
17 | numeric vector of winsorized or trimmed values
18 | }
19 | \description{
20 | Winsorize or trim variables
21 | }
22 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(OOS)
3 | 
4 | test_check("OOS")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/test-forecast_chart.R:
--------------------------------------------------------------------------------
 1 | test_that("forecast chart", {
 2 | 
 3 |   # simple time series
 4 |   A = c(1:100) + rnorm(100)
 5 |   date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
 6 |   Data = data.frame(date = date, A)
 7 | 
 8 |   # run forecast_univariate
 9 |   forecast.uni =
10 |     forecast_univariate(
11 |       Data = Data,
12 |       forecast.dates = tail(Data$date,10),
13 |       method = c('naive','auto.arima', 'ets'),
14 |       horizon = 1,
15 |       recursive = FALSE,
16 |       freq = 'month')
17 | 
18 |   forecasts =
19 |     dplyr::left_join(
20 |       forecast.uni,
21 |       data.frame(date, observed = A),
22 |       by = 'date'
23 |     )
24 | 
25 |   # chart forecasts
26 |   chart.forecast =
27 |     chart_forecast(
28 |       forecasts,
29 |       Title = 'test',
30 |       Ylab = 'Index',
31 |       Freq = 'Monthly',
32 |       zeroline = TRUE)
33 | 
34 |   expect_true(exists('chart.forecast'), 'Chart is not created.')
35 | 
36 |   # chart forecast errors
37 |   chart.errors =
38 |     chart_forecast_error(
39 |       forecasts,
40 |       Title = 'test',
41 |       Ylab = 'Index',
42 |       Freq = 'Monthly',
43 |       zeroline = TRUE)
44 | 
45 |   expect_true(exists('chart.errors'), 'Chart is not created.')
46 | 
47 | })
48 | 


--------------------------------------------------------------------------------
/tests/testthat/test-forecast_combination.R:
--------------------------------------------------------------------------------
 1 | test_that("forecast_combine produces standard output", {
 2 | 
 3 |   # simple time series
 4 |   A = c(1:100) + rnorm(100)
 5 |   B = c(1:100) + rnorm(100)
 6 |   C = c(1:100) + rnorm(100)
 7 |   date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
 8 |   Data = data.frame(date = date, A, B, C)
 9 | 
10 |   # run forecast_univariate
11 |   forecast.multi =
12 |       forecast_multivariate(
13 |         Data = Data,
14 |         target = 'A',
15 |         forecast.dates = tail(Data$date,5),
16 |         method = c('ols','var'),
17 |         horizon = 1,
18 |         freq = 'month')
19 | 
20 |   forecasts =
21 |     dplyr::left_join(
22 |       forecast.multi,
23 |       data.frame(date, observed = A),
24 |       by = 'date'
25 |     )
26 | 
27 |   # combine forecasts
28 |   combinations =
29 |     forecast_combine(
30 |       forecasts,
31 |       method = c('uniform','median','trimmed.mean',
32 |                  'n.best','lasso','peLasso'),
33 |       burn.in = 5,
34 |       n.max = 2)
35 | 
36 |   # expect formats
37 |   expect_true(is.data.frame(combinations), 'forecast_combine is not a proper data.frame')
38 | 
39 | })
40 | 


--------------------------------------------------------------------------------
/tests/testthat/test-forecast_metrics.R:
--------------------------------------------------------------------------------
  1 | test_that("forecast_comparison", {
  2 | 
  3 |   # simple time series
  4 |   A = c(1:100) + rnorm(100)
  5 |   date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
  6 |   Data = data.frame(date = date, A)
  7 | 
  8 |   # create forecasts
  9 |   forecast.uni =
 10 |     forecast_univariate(
 11 |       Data = Data,
 12 |       forecast.dates = tail(Data$date,10),
 13 |       method = c('naive','auto.arima', 'ets'),
 14 |       horizon = 1,
 15 |       recursive = FALSE,
 16 |       freq = 'month')
 17 | 
 18 |   forecasts =
 19 |     dplyr::left_join(
 20 |       forecast.uni,
 21 |       data.frame(date, observed = A),
 22 |       by = 'date'
 23 |     )
 24 | 
 25 |   # run ER (MSE)
 26 |   er.ratio.mse =
 27 |     forecast_comparison(
 28 |       forecasts,
 29 |       baseline.forecast = 'naive',
 30 |       test = 'ER',
 31 |       loss = 'MSE')
 32 | 
 33 |   expect_true(is.data.frame(er.ratio.mse),'Error ratio (MSE) is not a data.frame')
 34 |   expect_true(!is.na(mean(er.ratio.mse$error.ratio)) |
 35 |                  !is.nan(mean(er.ratio.mse$error.ratio)),'Error ratio (MSE) is NA or NAN')
 36 | 
 37 |   # run ER (RMSE)
 38 |   er.ratio.rmse =
 39 |     forecast_comparison(
 40 |       forecasts,
 41 |       baseline.forecast = 'naive',
 42 |       test = 'ER',
 43 |       loss = 'RMSE')
 44 | 
 45 |   expect_true(is.data.frame(er.ratio.rmse),'Error ratio (RMSE) is not a data.frame')
 46 |   expect_true(!is.na(mean(er.ratio.rmse$error.ratio)) |
 47 |                 !is.nan(mean(er.ratio.rmse$error.ratio)),'Error ratio (RMSE) is NA or NAN')
 48 | 
 49 |   # run ER (MAE)
 50 |   er.ratio.mae =
 51 |     forecast_comparison(
 52 |       forecasts,
 53 |       baseline.forecast = 'naive',
 54 |       test = 'ER',
 55 |       loss = 'MAE')
 56 | 
 57 |   expect_true(is.data.frame(er.ratio.mae),'Error ratio (MAPE) is not a data.frame')
 58 |   expect_true(!is.na(mean(er.ratio.mae$error.ratio)) |
 59 |                 !is.nan(mean(er.ratio.mae$error.ratio)),'Error ratio (MAPE) is NA or NAN')
 60 | 
 61 |   # run ER (MAPE)
 62 |   er.ratio.mape =
 63 |     forecast_comparison(
 64 |       forecasts,
 65 |       baseline.forecast = 'naive',
 66 |       test = 'ER',
 67 |       loss = 'MAPE')
 68 | 
 69 |   expect_true(is.data.frame(er.ratio.mape),'Error ratio (MAPE) is not a data.frame')
 70 |   expect_true(!is.na(mean(er.ratio.mape$error.ratio)) |
 71 |                 !is.nan(mean(er.ratio.mape$error.ratio)),'Error ratio (MAPE) is NA or NAN')
 72 | 
 73 |   # run DM test
 74 |   dm.test =
 75 |     forecast_comparison(
 76 |       forecasts,
 77 |       baseline.forecast = 'naive',
 78 |       test = 'DM')
 79 | 
 80 |   expect_true(is.data.frame(dm.test),'DM test is not a data.frame')
 81 |   expect_true(!is.na(mean(dm.test$error.ratio)) |
 82 |                 !is.nan(mean(dm.test$error.ratio)),'DM test is NA or NAN')
 83 | 
 84 |   # run DM test
 85 |   cw.test =
 86 |     forecast_comparison(
 87 |       forecasts,
 88 |       baseline.forecast = 'naive',
 89 |       test = 'CW',
 90 |       horizon = 1)
 91 | 
 92 |   expect_true(is.data.frame(cw.test),'DM test is not a data.frame')
 93 |   expect_true(!is.na(mean(cw.test$error.ratio)) |
 94 |                 !is.nan(mean(cw.test$error.ratio)),'DM test is NA or NAN')
 95 | 
 96 | })
 97 | 
 98 | test_that("forecast_accuracy", {
 99 | 
100 |   # simple time series
101 |   A = c(1:100) + rnorm(100)
102 |   date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
103 |   Data = data.frame(date = date, A)
104 | 
105 |   # create forecast
106 |   forecast.uni =
107 |     forecast_univariate(
108 |       Data = Data,
109 |       forecast.dates = tail(Data$date,10),
110 |       method = c('naive','auto.arima', 'ets'),
111 |       horizon = 1,
112 |       recursive = FALSE,
113 |       freq = 'month')
114 | 
115 |   forecasts =
116 |     dplyr::left_join(
117 |       forecast.uni,
118 |       data.frame(date, observed = A),
119 |       by = 'date'
120 |     )
121 | 
122 |   # forecast accuracy
123 |   forecast.accuracy = forecast_accuracy(forecasts)
124 | 
125 |   expect_true(is.data.frame(forecast.accuracy),'Accuracy is not a data.frame')
126 |   expect_true(!is.na(sum(forecast.accuracy[,2:5])),'Accuracy is NA or NAN')
127 | 
128 | 
129 | })
130 | 


--------------------------------------------------------------------------------
/tests/testthat/test-forecast_multivariate.R:
--------------------------------------------------------------------------------
 1 | test_that("forecast_multivariate produces standard output", {
 2 | 
 3 |   # simple time series
 4 |   A = c(1:100) + rnorm(100)
 5 |   B = c(1:100) + rnorm(100)
 6 |   C = c(1:100) + rnorm(100)
 7 |   date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
 8 |   Data = data.frame(date = date, A, B, C)
 9 | 
10 |   # run forecast_univariate
11 |   forecast.multi =
12 |     try(
13 |       forecast_multivariate(
14 |         Data = Data,
15 |         target = 'A',
16 |         forecast.dates = tail(Data$date,5),
17 |         method = c('ols','var'),
18 |         horizon = 1,
19 |         # information set
20 |         rolling.window = NA,
21 |         freq = 'month',
22 |         # data prep
23 |         lag.n = 4,
24 |         outlier.clean = TRUE,
25 |         impute.missing = TRUE,
26 |         # return
27 |         return.models = TRUE,
28 |         return.data = TRUE)
29 |     )
30 | 
31 |   # expect formats
32 |   expect_true(is.data.frame(forecast.multi$forecasts), 'forecasts is not a proper data.frame')
33 |   expect_true(is.list(forecast.multi$models), 'models is not a proper list')
34 |   expect_true(is.list(forecast.multi$information.set), 'information set is not a proper list')
35 | 
36 |   # expect proper names and numbers of outputs
37 |   expect_equal(names(forecast.multi$models),  as.character(tail(Data$date,5)))
38 |   expect_equal(names(forecast.multi$information.set),  as.character(tail(Data$date,5)))
39 | 
40 | 
41 | })
42 | 
43 | test_that("forecast_multivariate produces standard output", {
44 | 
45 |   # simple time series
46 |   A = c(1:100) + rnorm(100)
47 |   B = c(1:100) + rnorm(100)
48 |   C = c(1:100) + rnorm(100)
49 |   date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
50 |   Data = data.frame(date = date, A, B, C)
51 | 
52 |   # run forecast_univariate
53 |   forecast.multi =
54 |     try(
55 |       forecast_multivariate(
56 |         Data = Data,
57 |         target = 'A',
58 |         forecast.dates = tail(Data$date,5),
59 |         method = c('ols','var'),
60 |         horizon = 1,
61 |         # information set
62 |         rolling.window = NA,
63 |         freq = 'month',
64 |         # data prep
65 |         lag.n = 4,
66 |         outlier.clean = TRUE,
67 |         impute.missing = TRUE,
68 |         reduce.data = TRUE,
69 |         reduce.ncomp = 1,
70 |         return.models = TRUE,
71 |         return.data = TRUE,
72 |       )
73 |     )
74 | 
75 |   # expect formats
76 |   expect_true(is.data.frame(forecast.multi$forecasts), 'forecasts is not a proper data.frame')
77 |   expect_true(is.list(forecast.multi$models), 'models is not a proper list')
78 |   expect_true(is.list(forecast.multi$information.set), 'information set is not a proper list')
79 | 
80 |   # expect proper names and numbers of outputs
81 |   expect_equal(names(forecast.multi$models),  as.character(tail(Data$date,5)))
82 |   expect_equal(names(forecast.multi$information.set),  as.character(tail(Data$date,5)))
83 | 
84 | })
85 | 


--------------------------------------------------------------------------------
/tests/testthat/test-forecast_univariate.R:
--------------------------------------------------------------------------------
 1 | test_that("forecast_univariate (direct projection) produces standard output", {
 2 | 
 3 |   # simple time series
 4 |   A = c(1:100) + rnorm(100)
 5 |   date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
 6 |   Data = data.frame(date = date, A)
 7 | 
 8 |   # run forecast_univariate
 9 |   forecast.uni =
10 |     try(
11 |       forecast_univariate(
12 |         Data = Data,
13 |         forecast.dates = tail(Data$date,5),
14 |         method = c('naive','auto.arima', 'ets'),
15 |         horizon = 1,
16 |         recursive = FALSE,
17 |         # information set
18 |         rolling.window = NA,
19 |         freq = 'month',
20 |         # data prep
21 |         outlier.clean = TRUE,
22 |         impute.missing = TRUE,
23 |         # return
24 |         return.models = TRUE,
25 |         return.data = TRUE)
26 |     )
27 | 
28 |   # expect formats
29 |   expect_true(is.data.frame(forecast.uni$forecasts), 'forecasts is not a proper data.frame')
30 |   expect_true(is.list(forecast.uni$models), 'models is not a proper list')
31 |   expect_true(is.list(forecast.uni$information.set), 'information set is not a proper list')
32 | 
33 |   # expect proper names and numbers of outputs
34 |   expect_equal(names(forecast.uni$models),  as.character(tail(Data$date,5)))
35 |   expect_equal(names(forecast.uni$information.set),  as.character(tail(Data$date,5)))
36 | 
37 | })
38 | 
39 | test_that("forecast_univariate (recursive) produces standard output", {
40 | 
41 |   # simple time series
42 |   A = c(1:100) + rnorm(100)
43 |   date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
44 |   Data = data.frame(date = date, A)
45 | 
46 |   # run forecast_univariate
47 |   forecast.uni =
48 |     try(
49 |       forecast_univariate(
50 |         Data = Data,
51 |         forecast.dates = tail(Data$date,5),
52 |         method = c('naive','auto.arima', 'ets'),
53 |         horizon = 1,
54 |         recursive = TRUE,
55 |         # information set
56 |         rolling.window = NA,
57 |         freq = 'month',
58 |         # data prep
59 |         outlier.clean = TRUE,
60 |         impute.missing = TRUE,
61 |         # return
62 |         return.models = TRUE,
63 |         return.data = TRUE)
64 |     )
65 | 
66 |   # expect formats
67 |   expect_true(is.data.frame(forecast.uni$forecasts), 'rercursive forecasts is not a proper data.frame')
68 |   expect_true(is.list(forecast.uni$models), 'rercursive models is not a proper list')
69 |   expect_true(is.list(forecast.uni$information.set), 'rercursive information set is not a proper list')
70 | 
71 |   # expect proper names and numbers of outputs
72 |   expect_equal(names(forecast.uni$models),  as.character(tail(Data$date,5)))
73 |   expect_equal(names(forecast.uni$information.set),  as.character(tail(Data$date,5)))
74 | 
75 | })
76 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/vignettes/basic_introduction.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Basic Introduction to OOS"
  3 | output: rmarkdown::html_vignette
  4 | vignette: >
  5 |   %\VignetteIndexEntry{Window functions}
  6 |   %\VignetteEngine{knitr::rmarkdown}
  7 |   %\usepackage[utf8]{inputenc}
  8 | ---
  9 | 
 10 | ```{r, include = FALSE}
 11 | knitr::opts_chunk$set(
 12 |   collapse = TRUE,
 13 |   comment = "#>"
 14 | )
 15 | ```
 16 | 
 17 | ```{r setup, include=FALSE, warning = FALSE, message = FALSE, results = 'hide'}
 18 | knitr::opts_chunk$set(echo = TRUE)
 19 | ```
 20 | 
 21 | This introduction demonstrates how one may conduct a basic forecasting exercise with OOS. For more detail examples and documentation, please see the [OOS](https://tylerjpike.github.io/OOS/) website. 
 22 | 
 23 | ## 0. Environment
 24 | ```{r}
 25 | library(OOS)
 26 | ```
 27 | 
 28 | ## 1. Data
 29 | 
 30 | ```{r}
 31 | # pull and prepare data from FRED
 32 | quantmod::getSymbols.FRED(
 33 | 	c('UNRATE','INDPRO','GS10'), 
 34 | 	env = globalenv())
 35 | Data = cbind(UNRATE, INDPRO, GS10)
 36 | Data = data.frame(Data, date = zoo::index(Data)) %>%
 37 | 	dplyr::filter(lubridate::year(date) >= 1990) %>% 
 38 |   na.omit()
 39 | 
 40 | # make industrial production and 10-year Treasury stationary
 41 | Data = Data %>%
 42 |   dplyr::mutate(
 43 |     GS10 = GS10 - dplyr::lag(GS10), 
 44 |     INDPRO = (INDPRO - lag(INDPRO, 12))/lag(INDPRO, 12)) 
 45 | 
 46 | # start data when all three variables are available
 47 | # (this is not necessary, but it will suppress warnings for us)
 48 | Data = dplyr::filter(Data, date >= as.Date('1954-01-01'))
 49 | ```
 50 | 
 51 | ## 2. Forecasting
 52 | 
 53 | ```{r, warning=FALSE}
 54 | # run univariate forecasts 
 55 | forecast.uni = 
 56 | 	forecast_univariate(
 57 | 		Data = dplyr::select(Data, date, UNRATE),
 58 | 		forecast.dates = tail(Data$date,5), 
 59 | 		method = c('naive'), #,'auto.arima', 'ets'),      
 60 | 		horizon = 1,                         
 61 | 		recursive = FALSE,      
 62 | 		rolling.window = NA,    
 63 | 		freq = 'month')
 64 | ```
 65 | 
 66 | 
 67 | ```{r, warning=FALSE}
 68 | # create multivariate forecasts
 69 | forecast.multi = 
 70 | 	forecast_multivariate(
 71 | 		Data = Data,           
 72 | 		forecast.date = tail(Data$date,5),
 73 | 		target = 'UNRATE',
 74 | 		horizon = 1,
 75 | 		method = c('lasso'),       
 76 | 		rolling.window = NA,    
 77 | 		freq = 'month')
 78 | ```
 79 | 
 80 | 
 81 | ```{r, warning=FALSE}
 82 | # combine forecasts and add in observed values
 83 | forecasts = 
 84 | 	dplyr::bind_rows(
 85 | 		forecast.uni,
 86 | 		forecast.multi) %>%
 87 | 	dplyr::left_join( 
 88 | 		dplyr::select(Data, date, observed = UNRATE),
 89 | 		by = 'date')
 90 | 
 91 | # forecast combinations 
 92 | forecast.combo = 
 93 | 	forecast_combine(
 94 | 		forecasts, 
 95 | 		method = c('uniform','median','trimmed.mean'))
 96 | ```
 97 | 
 98 | ## Forecast Analysis 
 99 | ```{r, warning=FALSE}
100 | # merge forecast combinations back into forecasts
101 | forecasts = 
102 | 	forecasts %>%
103 | 	dplyr::bind_rows(forecast.combo)
104 | 
105 | # calculate forecast errors
106 | forecast.error = forecast_accuracy(forecasts)
107 | 
108 | # view forecast errors from least to greatest 
109 | #   (best forecast to worst forecast method)
110 | forecast.error %>% 
111 | 	dplyr::mutate_at(vars(-model), round, 3) %>%
112 | 	dplyr::arrange(MSE)
113 | 
114 | # compare forecasts to the baseline (a random walk)
115 | forecast_comparison(
116 | 	forecasts,
117 | 	baseline.forecast = 'naive',  
118 | 	test = 'ER',
119 | 	loss = 'MSE') %>% 
120 | 	dplyr::arrange(error.ratio)
121 | 
122 | # chart forecasts
123 | chart = 
124 | 	chart_forecast(
125 | 		forecasts,              
126 | 		Title = 'US Unemployment Rate',
127 | 		Ylab = 'Index',
128 | 		Freq = 'Monthly')
129 | 
130 | ```
131 | 
132 | 


--------------------------------------------------------------------------------