├── .Rbuildignore
├── .gitignore
├── .travis.yml
├── DESCRIPTION
├── LICENSE.md
├── NAMESPACE
├── OOS.Rproj
├── R
├── data_preparation.R
├── external_imports.R
├── forecast_chart.R
├── forecast_combinations.R
├── forecast_metrics.R
├── forecast_multivariate.R
└── forecast_univariate.R
├── README.md
├── codecov.yml
├── cran-comments.md
├── man
├── NBest.Rd
├── chart_forecast.Rd
├── chart_forecast_error.Rd
├── data_impute.Rd
├── data_outliers.Rd
├── data_reduction.Rd
├── data_subset.Rd
├── forecast_accuracy.Rd
├── forecast_combine.Rd
├── forecast_comparison.Rd
├── forecast_date.Rd
├── forecast_multivariate.Rd
├── forecast_univariate.Rd
├── instantiate.data_impute.control_panel.Rd
├── instantiate.forecast_combinations.control_panel.Rd
├── instantiate.forecast_multivariate.ml.control_panel.Rd
├── instantiate.forecast_multivariate.var.control_panel.Rd
├── instantiate.forecast_univariate.control_panel.Rd
├── loss_function.Rd
├── n.lag.Rd
├── pipe.Rd
├── standardize.Rd
└── winsorize.Rd
├── tests
├── testthat.R
└── testthat
│ ├── test-forecast_chart.R
│ ├── test-forecast_combination.R
│ ├── test-forecast_metrics.R
│ ├── test-forecast_multivariate.R
│ └── test-forecast_univariate.R
└── vignettes
├── .gitignore
└── basic_introduction.Rmd
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^\.github
4 | ^articles$
5 | ^docs$
6 | ^codecov\.yml$
7 | ^.travis\.yml$
8 | ^LICENSE\.md$
9 | ^cran-comments\.md$
10 | ^to-do\.md$
11 | ^CRAN-RELEASE$
12 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | *.csv
6 | to-do.md
7 | to-do.html
8 | inst/doc
9 | CRAN-RELEASE*
10 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: r
2 |
3 | sudo: required
4 |
5 | env: _R_CHECK_CRAN_INCOMING_=FALSE
6 |
7 | r_packages:
8 | - covr
9 | - devtools
10 |
11 | r_github_packages:
12 | - tylerJPike/OOS
13 |
14 | after_success:
15 | - Rscript -e 'covr::codecov(token = "eabccb13-c362-4955-9ee2-34cfb28f0c29")'
16 | - Rscript -e 'devtools::check()'
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: OOS
2 | Title: Out-of-Sample Time Series Forecasting
3 | Version: 1.0.0
4 | Authors@R:
5 | person(given = "Tyler J.",
6 | family = "Pike",
7 | role = c("aut", "cre"),
8 | email = "tjpike7@gmail.com")
9 | Description: A comprehensive and cohesive API for the out-of-sample forecasting workflow:
10 | data preparation, forecasting - including both traditional econometric time series models and
11 | modern machine learning techniques - forecast combination, model and error analysis, and
12 | forecast visualization.
13 | License: GPL-3
14 | URL: https://github.com/tylerJPike/OOS,
15 | https://tylerjpike.github.io/OOS/
16 | BugReports: https://github.com/tylerJPike/OOS/issues
17 | Encoding: UTF-8
18 | LazyData: true
19 | Roxygen: list(markdown = TRUE)
20 | RoxygenNote: 7.1.1
21 | VignetteBuilder: knitr
22 | Depends:
23 | R (>= 4.0.0)
24 | Imports:
25 | caret,
26 | dplyr,
27 | forecast,
28 | furrr,
29 | future,
30 | ggplot2,
31 | glmnet,
32 | imputeTS,
33 | lmtest,
34 | lubridate,
35 | magrittr,
36 | purrr,
37 | sandwich,
38 | stats,
39 | tidyr,
40 | vars,
41 | xts,
42 | zoo
43 | Suggests:
44 | knitr,
45 | testthat,
46 | rmarkdown,
47 | quantmod
48 |
49 |
50 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | GNU General Public License
2 | ==========================
3 |
4 | _Version 3, 29 June 2007_
5 | _Copyright © 2007 Free Software Foundation, Inc. <>_
6 |
7 | Everyone is permitted to copy and distribute verbatim copies of this license
8 | document, but changing it is not allowed.
9 |
10 | ## Preamble
11 |
12 | The GNU General Public License is a free, copyleft license for software and other
13 | kinds of works.
14 |
15 | The licenses for most software and other practical works are designed to take away
16 | your freedom to share and change the works. By contrast, the GNU General Public
17 | License is intended to guarantee your freedom to share and change all versions of a
18 | program--to make sure it remains free software for all its users. We, the Free
19 | Software Foundation, use the GNU General Public License for most of our software; it
20 | applies also to any other work released this way by its authors. You can apply it to
21 | your programs, too.
22 |
23 | When we speak of free software, we are referring to freedom, not price. Our General
24 | Public Licenses are designed to make sure that you have the freedom to distribute
25 | copies of free software (and charge for them if you wish), that you receive source
26 | code or can get it if you want it, that you can change the software or use pieces of
27 | it in new free programs, and that you know you can do these things.
28 |
29 | To protect your rights, we need to prevent others from denying you these rights or
30 | asking you to surrender the rights. Therefore, you have certain responsibilities if
31 | you distribute copies of the software, or if you modify it: responsibilities to
32 | respect the freedom of others.
33 |
34 | For example, if you distribute copies of such a program, whether gratis or for a fee,
35 | you must pass on to the recipients the same freedoms that you received. You must make
36 | sure that they, too, receive or can get the source code. And you must show them these
37 | terms so they know their rights.
38 |
39 | Developers that use the GNU GPL protect your rights with two steps: **(1)** assert
40 | copyright on the software, and **(2)** offer you this License giving you legal permission
41 | to copy, distribute and/or modify it.
42 |
43 | For the developers' and authors' protection, the GPL clearly explains that there is
44 | no warranty for this free software. For both users' and authors' sake, the GPL
45 | requires that modified versions be marked as changed, so that their problems will not
46 | be attributed erroneously to authors of previous versions.
47 |
48 | Some devices are designed to deny users access to install or run modified versions of
49 | the software inside them, although the manufacturer can do so. This is fundamentally
50 | incompatible with the aim of protecting users' freedom to change the software. The
51 | systematic pattern of such abuse occurs in the area of products for individuals to
52 | use, which is precisely where it is most unacceptable. Therefore, we have designed
53 | this version of the GPL to prohibit the practice for those products. If such problems
54 | arise substantially in other domains, we stand ready to extend this provision to
55 | those domains in future versions of the GPL, as needed to protect the freedom of
56 | users.
57 |
58 | Finally, every program is threatened constantly by software patents. States should
59 | not allow patents to restrict development and use of software on general-purpose
60 | computers, but in those that do, we wish to avoid the special danger that patents
61 | applied to a free program could make it effectively proprietary. To prevent this, the
62 | GPL assures that patents cannot be used to render the program non-free.
63 |
64 | The precise terms and conditions for copying, distribution and modification follow.
65 |
66 | ## TERMS AND CONDITIONS
67 |
68 | ### 0. Definitions
69 |
70 | “This License” refers to version 3 of the GNU General Public License.
71 |
72 | “Copyright” also means copyright-like laws that apply to other kinds of
73 | works, such as semiconductor masks.
74 |
75 | “The Program” refers to any copyrightable work licensed under this
76 | License. Each licensee is addressed as “you”. “Licensees” and
77 | “recipients” may be individuals or organizations.
78 |
79 | To “modify” a work means to copy from or adapt all or part of the work in
80 | a fashion requiring copyright permission, other than the making of an exact copy. The
81 | resulting work is called a “modified version” of the earlier work or a
82 | work “based on” the earlier work.
83 |
84 | A “covered work” means either the unmodified Program or a work based on
85 | the Program.
86 |
87 | To “propagate” a work means to do anything with it that, without
88 | permission, would make you directly or secondarily liable for infringement under
89 | applicable copyright law, except executing it on a computer or modifying a private
90 | copy. Propagation includes copying, distribution (with or without modification),
91 | making available to the public, and in some countries other activities as well.
92 |
93 | To “convey” a work means any kind of propagation that enables other
94 | parties to make or receive copies. Mere interaction with a user through a computer
95 | network, with no transfer of a copy, is not conveying.
96 |
97 | An interactive user interface displays “Appropriate Legal Notices” to the
98 | extent that it includes a convenient and prominently visible feature that **(1)**
99 | displays an appropriate copyright notice, and **(2)** tells the user that there is no
100 | warranty for the work (except to the extent that warranties are provided), that
101 | licensees may convey the work under this License, and how to view a copy of this
102 | License. If the interface presents a list of user commands or options, such as a
103 | menu, a prominent item in the list meets this criterion.
104 |
105 | ### 1. Source Code
106 |
107 | The “source code” for a work means the preferred form of the work for
108 | making modifications to it. “Object code” means any non-source form of a
109 | work.
110 |
111 | A “Standard Interface” means an interface that either is an official
112 | standard defined by a recognized standards body, or, in the case of interfaces
113 | specified for a particular programming language, one that is widely used among
114 | developers working in that language.
115 |
116 | The “System Libraries” of an executable work include anything, other than
117 | the work as a whole, that **(a)** is included in the normal form of packaging a Major
118 | Component, but which is not part of that Major Component, and **(b)** serves only to
119 | enable use of the work with that Major Component, or to implement a Standard
120 | Interface for which an implementation is available to the public in source code form.
121 | A “Major Component”, in this context, means a major essential component
122 | (kernel, window system, and so on) of the specific operating system (if any) on which
123 | the executable work runs, or a compiler used to produce the work, or an object code
124 | interpreter used to run it.
125 |
126 | The “Corresponding Source” for a work in object code form means all the
127 | source code needed to generate, install, and (for an executable work) run the object
128 | code and to modify the work, including scripts to control those activities. However,
129 | it does not include the work's System Libraries, or general-purpose tools or
130 | generally available free programs which are used unmodified in performing those
131 | activities but which are not part of the work. For example, Corresponding Source
132 | includes interface definition files associated with source files for the work, and
133 | the source code for shared libraries and dynamically linked subprograms that the work
134 | is specifically designed to require, such as by intimate data communication or
135 | control flow between those subprograms and other parts of the work.
136 |
137 | The Corresponding Source need not include anything that users can regenerate
138 | automatically from other parts of the Corresponding Source.
139 |
140 | The Corresponding Source for a work in source code form is that same work.
141 |
142 | ### 2. Basic Permissions
143 |
144 | All rights granted under this License are granted for the term of copyright on the
145 | Program, and are irrevocable provided the stated conditions are met. This License
146 | explicitly affirms your unlimited permission to run the unmodified Program. The
147 | output from running a covered work is covered by this License only if the output,
148 | given its content, constitutes a covered work. This License acknowledges your rights
149 | of fair use or other equivalent, as provided by copyright law.
150 |
151 | You may make, run and propagate covered works that you do not convey, without
152 | conditions so long as your license otherwise remains in force. You may convey covered
153 | works to others for the sole purpose of having them make modifications exclusively
154 | for you, or provide you with facilities for running those works, provided that you
155 | comply with the terms of this License in conveying all material for which you do not
156 | control copyright. Those thus making or running the covered works for you must do so
157 | exclusively on your behalf, under your direction and control, on terms that prohibit
158 | them from making any copies of your copyrighted material outside their relationship
159 | with you.
160 |
161 | Conveying under any other circumstances is permitted solely under the conditions
162 | stated below. Sublicensing is not allowed; section 10 makes it unnecessary.
163 |
164 | ### 3. Protecting Users' Legal Rights From Anti-Circumvention Law
165 |
166 | No covered work shall be deemed part of an effective technological measure under any
167 | applicable law fulfilling obligations under article 11 of the WIPO copyright treaty
168 | adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention
169 | of such measures.
170 |
171 | When you convey a covered work, you waive any legal power to forbid circumvention of
172 | technological measures to the extent such circumvention is effected by exercising
173 | rights under this License with respect to the covered work, and you disclaim any
174 | intention to limit operation or modification of the work as a means of enforcing,
175 | against the work's users, your or third parties' legal rights to forbid circumvention
176 | of technological measures.
177 |
178 | ### 4. Conveying Verbatim Copies
179 |
180 | You may convey verbatim copies of the Program's source code as you receive it, in any
181 | medium, provided that you conspicuously and appropriately publish on each copy an
182 | appropriate copyright notice; keep intact all notices stating that this License and
183 | any non-permissive terms added in accord with section 7 apply to the code; keep
184 | intact all notices of the absence of any warranty; and give all recipients a copy of
185 | this License along with the Program.
186 |
187 | You may charge any price or no price for each copy that you convey, and you may offer
188 | support or warranty protection for a fee.
189 |
190 | ### 5. Conveying Modified Source Versions
191 |
192 | You may convey a work based on the Program, or the modifications to produce it from
193 | the Program, in the form of source code under the terms of section 4, provided that
194 | you also meet all of these conditions:
195 |
196 | * **a)** The work must carry prominent notices stating that you modified it, and giving a
197 | relevant date.
198 | * **b)** The work must carry prominent notices stating that it is released under this
199 | License and any conditions added under section 7. This requirement modifies the
200 | requirement in section 4 to “keep intact all notices”.
201 | * **c)** You must license the entire work, as a whole, under this License to anyone who
202 | comes into possession of a copy. This License will therefore apply, along with any
203 | applicable section 7 additional terms, to the whole of the work, and all its parts,
204 | regardless of how they are packaged. This License gives no permission to license the
205 | work in any other way, but it does not invalidate such permission if you have
206 | separately received it.
207 | * **d)** If the work has interactive user interfaces, each must display Appropriate Legal
208 | Notices; however, if the Program has interactive interfaces that do not display
209 | Appropriate Legal Notices, your work need not make them do so.
210 |
211 | A compilation of a covered work with other separate and independent works, which are
212 | not by their nature extensions of the covered work, and which are not combined with
213 | it such as to form a larger program, in or on a volume of a storage or distribution
214 | medium, is called an “aggregate” if the compilation and its resulting
215 | copyright are not used to limit the access or legal rights of the compilation's users
216 | beyond what the individual works permit. Inclusion of a covered work in an aggregate
217 | does not cause this License to apply to the other parts of the aggregate.
218 |
219 | ### 6. Conveying Non-Source Forms
220 |
221 | You may convey a covered work in object code form under the terms of sections 4 and
222 | 5, provided that you also convey the machine-readable Corresponding Source under the
223 | terms of this License, in one of these ways:
224 |
225 | * **a)** Convey the object code in, or embodied in, a physical product (including a
226 | physical distribution medium), accompanied by the Corresponding Source fixed on a
227 | durable physical medium customarily used for software interchange.
228 | * **b)** Convey the object code in, or embodied in, a physical product (including a
229 | physical distribution medium), accompanied by a written offer, valid for at least
230 | three years and valid for as long as you offer spare parts or customer support for
231 | that product model, to give anyone who possesses the object code either **(1)** a copy of
232 | the Corresponding Source for all the software in the product that is covered by this
233 | License, on a durable physical medium customarily used for software interchange, for
234 | a price no more than your reasonable cost of physically performing this conveying of
235 | source, or **(2)** access to copy the Corresponding Source from a network server at no
236 | charge.
237 | * **c)** Convey individual copies of the object code with a copy of the written offer to
238 | provide the Corresponding Source. This alternative is allowed only occasionally and
239 | noncommercially, and only if you received the object code with such an offer, in
240 | accord with subsection 6b.
241 | * **d)** Convey the object code by offering access from a designated place (gratis or for
242 | a charge), and offer equivalent access to the Corresponding Source in the same way
243 | through the same place at no further charge. You need not require recipients to copy
244 | the Corresponding Source along with the object code. If the place to copy the object
245 | code is a network server, the Corresponding Source may be on a different server
246 | (operated by you or a third party) that supports equivalent copying facilities,
247 | provided you maintain clear directions next to the object code saying where to find
248 | the Corresponding Source. Regardless of what server hosts the Corresponding Source,
249 | you remain obligated to ensure that it is available for as long as needed to satisfy
250 | these requirements.
251 | * **e)** Convey the object code using peer-to-peer transmission, provided you inform
252 | other peers where the object code and Corresponding Source of the work are being
253 | offered to the general public at no charge under subsection 6d.
254 |
255 | A separable portion of the object code, whose source code is excluded from the
256 | Corresponding Source as a System Library, need not be included in conveying the
257 | object code work.
258 |
259 | A “User Product” is either **(1)** a “consumer product”, which
260 | means any tangible personal property which is normally used for personal, family, or
261 | household purposes, or **(2)** anything designed or sold for incorporation into a
262 | dwelling. In determining whether a product is a consumer product, doubtful cases
263 | shall be resolved in favor of coverage. For a particular product received by a
264 | particular user, “normally used” refers to a typical or common use of
265 | that class of product, regardless of the status of the particular user or of the way
266 | in which the particular user actually uses, or expects or is expected to use, the
267 | product. A product is a consumer product regardless of whether the product has
268 | substantial commercial, industrial or non-consumer uses, unless such uses represent
269 | the only significant mode of use of the product.
270 |
271 | “Installation Information” for a User Product means any methods,
272 | procedures, authorization keys, or other information required to install and execute
273 | modified versions of a covered work in that User Product from a modified version of
274 | its Corresponding Source. The information must suffice to ensure that the continued
275 | functioning of the modified object code is in no case prevented or interfered with
276 | solely because modification has been made.
277 |
278 | If you convey an object code work under this section in, or with, or specifically for
279 | use in, a User Product, and the conveying occurs as part of a transaction in which
280 | the right of possession and use of the User Product is transferred to the recipient
281 | in perpetuity or for a fixed term (regardless of how the transaction is
282 | characterized), the Corresponding Source conveyed under this section must be
283 | accompanied by the Installation Information. But this requirement does not apply if
284 | neither you nor any third party retains the ability to install modified object code
285 | on the User Product (for example, the work has been installed in ROM).
286 |
287 | The requirement to provide Installation Information does not include a requirement to
288 | continue to provide support service, warranty, or updates for a work that has been
289 | modified or installed by the recipient, or for the User Product in which it has been
290 | modified or installed. Access to a network may be denied when the modification itself
291 | materially and adversely affects the operation of the network or violates the rules
292 | and protocols for communication across the network.
293 |
294 | Corresponding Source conveyed, and Installation Information provided, in accord with
295 | this section must be in a format that is publicly documented (and with an
296 | implementation available to the public in source code form), and must require no
297 | special password or key for unpacking, reading or copying.
298 |
299 | ### 7. Additional Terms
300 |
301 | “Additional permissions” are terms that supplement the terms of this
302 | License by making exceptions from one or more of its conditions. Additional
303 | permissions that are applicable to the entire Program shall be treated as though they
304 | were included in this License, to the extent that they are valid under applicable
305 | law. If additional permissions apply only to part of the Program, that part may be
306 | used separately under those permissions, but the entire Program remains governed by
307 | this License without regard to the additional permissions.
308 |
309 | When you convey a copy of a covered work, you may at your option remove any
310 | additional permissions from that copy, or from any part of it. (Additional
311 | permissions may be written to require their own removal in certain cases when you
312 | modify the work.) You may place additional permissions on material, added by you to a
313 | covered work, for which you have or can give appropriate copyright permission.
314 |
315 | Notwithstanding any other provision of this License, for material you add to a
316 | covered work, you may (if authorized by the copyright holders of that material)
317 | supplement the terms of this License with terms:
318 |
319 | * **a)** Disclaiming warranty or limiting liability differently from the terms of
320 | sections 15 and 16 of this License; or
321 | * **b)** Requiring preservation of specified reasonable legal notices or author
322 | attributions in that material or in the Appropriate Legal Notices displayed by works
323 | containing it; or
324 | * **c)** Prohibiting misrepresentation of the origin of that material, or requiring that
325 | modified versions of such material be marked in reasonable ways as different from the
326 | original version; or
327 | * **d)** Limiting the use for publicity purposes of names of licensors or authors of the
328 | material; or
329 | * **e)** Declining to grant rights under trademark law for use of some trade names,
330 | trademarks, or service marks; or
331 | * **f)** Requiring indemnification of licensors and authors of that material by anyone
332 | who conveys the material (or modified versions of it) with contractual assumptions of
333 | liability to the recipient, for any liability that these contractual assumptions
334 | directly impose on those licensors and authors.
335 |
336 | All other non-permissive additional terms are considered “further
337 | restrictions” within the meaning of section 10. If the Program as you received
338 | it, or any part of it, contains a notice stating that it is governed by this License
339 | along with a term that is a further restriction, you may remove that term. If a
340 | license document contains a further restriction but permits relicensing or conveying
341 | under this License, you may add to a covered work material governed by the terms of
342 | that license document, provided that the further restriction does not survive such
343 | relicensing or conveying.
344 |
345 | If you add terms to a covered work in accord with this section, you must place, in
346 | the relevant source files, a statement of the additional terms that apply to those
347 | files, or a notice indicating where to find the applicable terms.
348 |
349 | Additional terms, permissive or non-permissive, may be stated in the form of a
350 | separately written license, or stated as exceptions; the above requirements apply
351 | either way.
352 |
353 | ### 8. Termination
354 |
355 | You may not propagate or modify a covered work except as expressly provided under
356 | this License. Any attempt otherwise to propagate or modify it is void, and will
357 | automatically terminate your rights under this License (including any patent licenses
358 | granted under the third paragraph of section 11).
359 |
360 | However, if you cease all violation of this License, then your license from a
361 | particular copyright holder is reinstated **(a)** provisionally, unless and until the
362 | copyright holder explicitly and finally terminates your license, and **(b)** permanently,
363 | if the copyright holder fails to notify you of the violation by some reasonable means
364 | prior to 60 days after the cessation.
365 |
366 | Moreover, your license from a particular copyright holder is reinstated permanently
367 | if the copyright holder notifies you of the violation by some reasonable means, this
368 | is the first time you have received notice of violation of this License (for any
369 | work) from that copyright holder, and you cure the violation prior to 30 days after
370 | your receipt of the notice.
371 |
372 | Termination of your rights under this section does not terminate the licenses of
373 | parties who have received copies or rights from you under this License. If your
374 | rights have been terminated and not permanently reinstated, you do not qualify to
375 | receive new licenses for the same material under section 10.
376 |
377 | ### 9. Acceptance Not Required for Having Copies
378 |
379 | You are not required to accept this License in order to receive or run a copy of the
380 | Program. Ancillary propagation of a covered work occurring solely as a consequence of
381 | using peer-to-peer transmission to receive a copy likewise does not require
382 | acceptance. However, nothing other than this License grants you permission to
383 | propagate or modify any covered work. These actions infringe copyright if you do not
384 | accept this License. Therefore, by modifying or propagating a covered work, you
385 | indicate your acceptance of this License to do so.
386 |
387 | ### 10. Automatic Licensing of Downstream Recipients
388 |
389 | Each time you convey a covered work, the recipient automatically receives a license
390 | from the original licensors, to run, modify and propagate that work, subject to this
391 | License. You are not responsible for enforcing compliance by third parties with this
392 | License.
393 |
394 | An “entity transaction” is a transaction transferring control of an
395 | organization, or substantially all assets of one, or subdividing an organization, or
396 | merging organizations. If propagation of a covered work results from an entity
397 | transaction, each party to that transaction who receives a copy of the work also
398 | receives whatever licenses to the work the party's predecessor in interest had or
399 | could give under the previous paragraph, plus a right to possession of the
400 | Corresponding Source of the work from the predecessor in interest, if the predecessor
401 | has it or can get it with reasonable efforts.
402 |
403 | You may not impose any further restrictions on the exercise of the rights granted or
404 | affirmed under this License. For example, you may not impose a license fee, royalty,
405 | or other charge for exercise of rights granted under this License, and you may not
406 | initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging
407 | that any patent claim is infringed by making, using, selling, offering for sale, or
408 | importing the Program or any portion of it.
409 |
410 | ### 11. Patents
411 |
412 | A “contributor” is a copyright holder who authorizes use under this
413 | License of the Program or a work on which the Program is based. The work thus
414 | licensed is called the contributor's “contributor version”.
415 |
416 | A contributor's “essential patent claims” are all patent claims owned or
417 | controlled by the contributor, whether already acquired or hereafter acquired, that
418 | would be infringed by some manner, permitted by this License, of making, using, or
419 | selling its contributor version, but do not include claims that would be infringed
420 | only as a consequence of further modification of the contributor version. For
421 | purposes of this definition, “control” includes the right to grant patent
422 | sublicenses in a manner consistent with the requirements of this License.
423 |
424 | Each contributor grants you a non-exclusive, worldwide, royalty-free patent license
425 | under the contributor's essential patent claims, to make, use, sell, offer for sale,
426 | import and otherwise run, modify and propagate the contents of its contributor
427 | version.
428 |
429 | In the following three paragraphs, a “patent license” is any express
430 | agreement or commitment, however denominated, not to enforce a patent (such as an
431 | express permission to practice a patent or covenant not to sue for patent
432 | infringement). To “grant” such a patent license to a party means to make
433 | such an agreement or commitment not to enforce a patent against the party.
434 |
435 | If you convey a covered work, knowingly relying on a patent license, and the
436 | Corresponding Source of the work is not available for anyone to copy, free of charge
437 | and under the terms of this License, through a publicly available network server or
438 | other readily accessible means, then you must either **(1)** cause the Corresponding
439 | Source to be so available, or **(2)** arrange to deprive yourself of the benefit of the
440 | patent license for this particular work, or **(3)** arrange, in a manner consistent with
441 | the requirements of this License, to extend the patent license to downstream
442 | recipients. “Knowingly relying” means you have actual knowledge that, but
443 | for the patent license, your conveying the covered work in a country, or your
444 | recipient's use of the covered work in a country, would infringe one or more
445 | identifiable patents in that country that you have reason to believe are valid.
446 |
447 | If, pursuant to or in connection with a single transaction or arrangement, you
448 | convey, or propagate by procuring conveyance of, a covered work, and grant a patent
449 | license to some of the parties receiving the covered work authorizing them to use,
450 | propagate, modify or convey a specific copy of the covered work, then the patent
451 | license you grant is automatically extended to all recipients of the covered work and
452 | works based on it.
453 |
454 | A patent license is “discriminatory” if it does not include within the
455 | scope of its coverage, prohibits the exercise of, or is conditioned on the
456 | non-exercise of one or more of the rights that are specifically granted under this
457 | License. You may not convey a covered work if you are a party to an arrangement with
458 | a third party that is in the business of distributing software, under which you make
459 | payment to the third party based on the extent of your activity of conveying the
460 | work, and under which the third party grants, to any of the parties who would receive
461 | the covered work from you, a discriminatory patent license **(a)** in connection with
462 | copies of the covered work conveyed by you (or copies made from those copies), or **(b)**
463 | primarily for and in connection with specific products or compilations that contain
464 | the covered work, unless you entered into that arrangement, or that patent license
465 | was granted, prior to 28 March 2007.
466 |
467 | Nothing in this License shall be construed as excluding or limiting any implied
468 | license or other defenses to infringement that may otherwise be available to you
469 | under applicable patent law.
470 |
471 | ### 12. No Surrender of Others' Freedom
472 |
473 | If conditions are imposed on you (whether by court order, agreement or otherwise)
474 | that contradict the conditions of this License, they do not excuse you from the
475 | conditions of this License. If you cannot convey a covered work so as to satisfy
476 | simultaneously your obligations under this License and any other pertinent
477 | obligations, then as a consequence you may not convey it at all. For example, if you
478 | agree to terms that obligate you to collect a royalty for further conveying from
479 | those to whom you convey the Program, the only way you could satisfy both those terms
480 | and this License would be to refrain entirely from conveying the Program.
481 |
482 | ### 13. Use with the GNU Affero General Public License
483 |
484 | Notwithstanding any other provision of this License, you have permission to link or
485 | combine any covered work with a work licensed under version 3 of the GNU Affero
486 | General Public License into a single combined work, and to convey the resulting work.
487 | The terms of this License will continue to apply to the part which is the covered
488 | work, but the special requirements of the GNU Affero General Public License, section
489 | 13, concerning interaction through a network will apply to the combination as such.
490 |
491 | ### 14. Revised Versions of this License
492 |
493 | The Free Software Foundation may publish revised and/or new versions of the GNU
494 | General Public License from time to time. Such new versions will be similar in spirit
495 | to the present version, but may differ in detail to address new problems or concerns.
496 |
497 | Each version is given a distinguishing version number. If the Program specifies that
498 | a certain numbered version of the GNU General Public License “or any later
499 | version” applies to it, you have the option of following the terms and
500 | conditions either of that numbered version or of any later version published by the
501 | Free Software Foundation. If the Program does not specify a version number of the GNU
502 | General Public License, you may choose any version ever published by the Free
503 | Software Foundation.
504 |
505 | If the Program specifies that a proxy can decide which future versions of the GNU
506 | General Public License can be used, that proxy's public statement of acceptance of a
507 | version permanently authorizes you to choose that version for the Program.
508 |
509 | Later license versions may give you additional or different permissions. However, no
510 | additional obligations are imposed on any author or copyright holder as a result of
511 | your choosing to follow a later version.
512 |
513 | ### 15. Disclaimer of Warranty
514 |
515 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
516 | EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
517 | PROVIDE THE PROGRAM “AS IS” WITHOUT WARRANTY OF ANY KIND, EITHER
518 | EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
519 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE
520 | QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE
521 | DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
522 |
523 | ### 16. Limitation of Liability
524 |
525 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY
526 | COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS
527 | PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL,
528 | INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
529 | PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE
530 | OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE
531 | WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
532 | POSSIBILITY OF SUCH DAMAGES.
533 |
534 | ### 17. Interpretation of Sections 15 and 16
535 |
536 | If the disclaimer of warranty and limitation of liability provided above cannot be
537 | given local legal effect according to their terms, reviewing courts shall apply local
538 | law that most closely approximates an absolute waiver of all civil liability in
539 | connection with the Program, unless a warranty or assumption of liability accompanies
540 | a copy of the Program in return for a fee.
541 |
542 | _END OF TERMS AND CONDITIONS_
543 |
544 | ## How to Apply These Terms to Your New Programs
545 |
546 | If you develop a new program, and you want it to be of the greatest possible use to
547 | the public, the best way to achieve this is to make it free software which everyone
548 | can redistribute and change under these terms.
549 |
550 | To do so, attach the following notices to the program. It is safest to attach them
551 | to the start of each source file to most effectively state the exclusion of warranty;
552 | and each file should have at least the “copyright” line and a pointer to
553 | where the full notice is found.
554 |
555 |
556 | Copyright (C) 2021 Tyler J. Pike
557 |
558 | This program is free software: you can redistribute it and/or modify
559 | it under the terms of the GNU General Public License as published by
560 | the Free Software Foundation, either version 3 of the License, or
561 | (at your option) any later version.
562 |
563 | This program is distributed in the hope that it will be useful,
564 | but WITHOUT ANY WARRANTY; without even the implied warranty of
565 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
566 | GNU General Public License for more details.
567 |
568 | You should have received a copy of the GNU General Public License
569 | along with this program. If not, see .
570 |
571 | Also add information on how to contact you by electronic and paper mail.
572 |
573 | If the program does terminal interaction, make it output a short notice like this
574 | when it starts in an interactive mode:
575 |
576 | OOS Copyright (C) 2021 Tyler J. Pike
577 | This program comes with ABSOLUTELY NO WARRANTY; for details type 'show w'.
578 | This is free software, and you are welcome to redistribute it
579 | under certain conditions; type 'show c' for details.
580 |
581 | The hypothetical commands `show w` and `show c` should show the appropriate parts of
582 | the General Public License. Of course, your program's commands might be different;
583 | for a GUI interface, you would use an “about box”.
584 |
585 | You should also get your employer (if you work as a programmer) or school, if any, to
586 | sign a “copyright disclaimer” for the program, if necessary. For more
587 | information on this, and how to apply and follow the GNU GPL, see
588 | <>.
589 |
590 | The GNU General Public License does not permit incorporating your program into
591 | proprietary programs. If your program is a subroutine library, you may consider it
592 | more useful to permit linking proprietary applications with the library. If this is
593 | what you want to do, use the GNU Lesser General Public License instead of this
594 | License. But first, please read
595 | <>.
596 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | export("%>%")
4 | export(NBest)
5 | export(chart_forecast)
6 | export(chart_forecast_error)
7 | export(data_impute)
8 | export(data_outliers)
9 | export(data_reduction)
10 | export(data_subset)
11 | export(forecast_accuracy)
12 | export(forecast_combine)
13 | export(forecast_comparison)
14 | export(forecast_multivariate)
15 | export(forecast_univariate)
16 | export(instantiate.data_impute.control_panel)
17 | export(instantiate.forecast_combinations.control_panel)
18 | export(instantiate.forecast_multivariate.ml.control_panel)
19 | export(instantiate.forecast_multivariate.var.control_panel)
20 | export(instantiate.forecast_univariate.control_panel)
21 | export(loss_function)
22 | export(n.lag)
23 | export(standardize)
24 | export(winsorize)
25 | import(stats)
26 | importFrom(magrittr,"%>%")
27 |
--------------------------------------------------------------------------------
/OOS.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: No
4 | SaveWorkspace: No
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | LineEndingConversion: Posix
18 |
19 | BuildType: Package
20 | PackageUseDevtools: Yes
21 | PackageInstallArgs: --no-multiarch --with-keep.source
22 | PackageRoxygenize: rd,collate,namespace
23 |
--------------------------------------------------------------------------------
/R/data_preparation.R:
--------------------------------------------------------------------------------
1 | #---------------------------------------------
2 | # data cleaning helper functions
3 | #---------------------------------------------
4 | #' Standardize variables (mean 0, variance 1)
5 | #'
6 | #' @param X numeric: vector to be standardized
7 | #'
8 | #' @return numeric vector of standardized values
9 | #'
10 | #' @export
11 | standardize = function(X){return((X-mean(X, na.rm = T))/sd(X, na.rm = T))}
12 |
13 | #' Winsorize or trim variables
14 | #'
15 | #' @param X numeric: vector to be winsorized or trimmed
16 | #' @param trim boolean: if TRUE then replace outliers with NA instead of winsorizing bound
17 | #' @param bounds double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile)
18 | #'
19 | #' @return numeric vector of winsorized or trimmed values
20 | #'
21 | #' @export
22 | winsorize = function(X, bounds, trim = FALSE){
23 |
24 | qq = quantile(X, probs = bounds, na.rm = TRUE)
25 |
26 | if(trim == FALSE){
27 | X[X <= qq[1]] = qq[1]
28 | X[X >= qq[2]] = qq[2]
29 | }else{
30 | X[X <= qq[1]] = NA
31 | X[X >= qq[2]] = NA
32 | }
33 |
34 | return(X)
35 | }
36 |
37 | #' Create information set
38 | #'
39 | #' A function to subset data recursively or with a rolling window to create a valid information set. Is used as a data preparation
40 | #' helper function and is called internally by forecast_univariate, forecast_multivariate, and forecast_combine.
41 | #'
42 | #' @param Data data.frame: data frame of target variable, exogenous variables, and observed date (named 'date')
43 | #' @param forecast.date date: upper bound of information set
44 | #' @param rolling.window int: size of rolling window, NA if expanding window is used
45 | #' @param freq string: time series frequency; day, week, month, quarter, year; only needed for rolling window factors
46 | #'
47 | #' @return data.frame bounded by the given date range
48 | #'
49 | #' @export
50 | data_subset = function(
51 | Data,
52 | forecast.date,
53 | rolling.window,
54 | freq
55 | ){
56 |
57 | # 1. using expanding window
58 | if(is.na(rolling.window)){
59 | information.set =
60 | dplyr::filter(Data, date <= forecast.date)
61 |
62 | # 2. using rolling window
63 | }else{
64 | rolling.window.start = forecast.date
65 |
66 | if(freq == 'day'){
67 | rolling.window.start = forecast.date - rolling.window
68 | }else if(freq == 'week'){
69 | lubridate::week(rolling.window.start) = lubridate::week(forecast.date) - rolling.window
70 | }else if(freq == 'month'){
71 | lubridate::month(rolling.window.start) = lubridate::month(forecast.date) - rolling.window
72 | }else if(freq == 'quarter'){
73 | lubridate::month(rolling.window.start) = lubridate::month(forecast.date) - rolling.window*3
74 | }else if(freq == 'year'){
75 | lubridate::year(rolling.window.start) = lubridate::year(forecast.date) - rolling.window
76 | }
77 |
78 | information.set =
79 | dplyr::filter(Data, rolling.window.start <= date & date <= forecast.date )
80 | }
81 |
82 | return(information.set)
83 | }
84 |
85 | #' Set forecasted date
86 | #'
87 | #' A function to subset data recursively or with a rolling window to create a valid information set. Is used as a data preparation
88 | #' helper function and is called internally by forecast_univariate, forecast_multivariate, and forecast_combine.
89 | #'
90 | #' @param forecast.date date: date forecast was made
91 | #' @param horizon int: periods ahead of forecast
92 | #' @param freq string: time series frequency; day, week, month, quarter, year; only needed for rolling window factors
93 | #'
94 | #' @return date vector
95 | #'
96 | #'
97 | forecast_date = function(
98 | forecast.date,
99 | horizon,
100 | freq
101 | ){
102 |
103 | date = forecast.date
104 |
105 | if(freq == 'day'){
106 | date = forecast.date + horizon
107 | }else if(freq == 'week'){
108 | lubridate::week(date) = lubridate::week(date) + horizon
109 | }else if(freq == 'month'){
110 | lubridate::month(date) = lubridate::month(date) + horizon
111 | }else if(freq == 'quarter'){
112 | lubridate::month(date) = lubridate::month(date) + horizon*3
113 | }else if(freq == 'year'){
114 | lubridate::year(date) = lubridate::year(date) + horizon
115 | }
116 |
117 | return(date)
118 | }
119 |
120 | #' Create n lags
121 | #'
122 | #' A function to create 1 through n lags of a set of variables. Is used as a data preparation
123 | #' helper function and is called internally by forecast_univariate, forecast_multivariate, and forecast_combine.
124 | #'
125 | #' @param Data data.frame: data frame of variables to lag and a 'date' column
126 | #' @param lags int: number of lags to create
127 | #' @param variables string: vector of variable names to lag, default is all non-date variables
128 | #'
129 | #' @return data.frame
130 | #'
131 | #' @export
132 | n.lag = function(
133 | Data, # data.frame: data frame of variables to lag and a 'date' column
134 | lags, # int: number of lags to create
135 | variables = NULL # string: vector of variable names to lag, default is all non-date variables
136 | ){
137 |
138 | if(is.null(variables)){
139 | variables = names(dplyr::select(Data, -date))
140 | }
141 |
142 | Data = c(0:lags) %>%
143 | purrr::map(
144 | .f = function(n){
145 |
146 | if(n == 0){return(Data)}
147 |
148 | X = Data %>%
149 | dplyr::mutate_at(variables, dplyr::lag, n)
150 |
151 | names(X)[names(X) != 'date'] = paste0(names(X)[names(X) != 'date'], '.l', n)
152 |
153 | return(X)
154 | }
155 | ) %>%
156 | purrr::reduce(dplyr::full_join, by = 'date')
157 |
158 |
159 | return(Data)
160 | }
161 |
162 | #---------------------------------------------
163 | # Clean outliers
164 | #---------------------------------------------
165 | #' Clean outliers
166 | #'
167 | #' A function to clean outliers. Is used as a data preparation helper function and is called internally
168 | #' by forecast_univariate, forecast_multivariate, and forecast_combine.
169 | #'
170 | #' @param Data data.frame: data frame of target variable, exogenous variables, and observed date (named 'date')
171 | #' @param variables string: vector of variables to standardize, default is all but 'date' column
172 | #' @param w.bounds double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile)
173 | #' @param trim boolean: if TRUE then replace outliers with NA instead of winsorizing bound
174 | #' @param cross_section boolean: if TRUE then remove outliers based on cross-section (row-wise) instead of historical data (column-wise)
175 | #'
176 | #' @return data.frame with a date column and one column per forecast method selected
177 | #'
178 | #' @export
179 | data_outliers = function(
180 | Data, # data.frame: data frame of target variable, exogenous variables, and observed date (named 'date')
181 | variables = NULL, # string: vector of variables to standardize, default is all but 'date' column
182 | w.bounds = c(0.05, 0.95), # double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile)
183 | trim = FALSE, # boolean: if TRUE then replace outliers with NA instead of winsorizing bound
184 | cross_section = FALSE # boolean: if TRUE then remove outliers based on cross-section (row-wise) instead of historical data (column-wise)
185 | ){
186 |
187 |
188 | # set variables to all if default
189 | if(is.null(variables) == TRUE){
190 | variables = names(dplyr::select_if(Data, is.numeric))
191 | }
192 |
193 | # target variables must be numeric
194 | if(length(setdiff(variables, names(dplyr::select_if(Data, is.numeric)))) != 0){
195 | print(errorCondition('Variables cleaned for outliers must be numeric.'))
196 | }
197 |
198 | # clean outliers (column wise)
199 | if(cross_section == FALSE){
200 | test = Data %>%
201 | dplyr::mutate_at(dplyr::vars(variables), winsorize, bounds = w.bounds, trim = trim)
202 |
203 | # clean outliers (row wise)
204 | }else{
205 | Data = Data %>%
206 | dplyr::rowwise() %>%
207 | dplyr::mutate_at(dplyr::vars(variables), winsorize, bounds = w.bounds, trim = trim)
208 | }
209 |
210 | # return results
211 | return(Data)
212 | }
213 |
214 |
215 | #---------------------------------------------
216 | # Impute missing
217 | #---------------------------------------------
218 | #' Create interface to control `data_impute` model estimation
219 | #'
220 | #' A function to create the data imputation method
221 | #' arguments list for user manipulation.
222 | #'
223 | #' @return data_impute.control_panel
224 | #'
225 | #' @export
226 | instantiate.data_impute.control_panel = function(){
227 |
228 | # methods
229 | methods = list(
230 | interpolation = 'imputeTS::na_interpolation',
231 | kalman = imputeTS::na_kalman,
232 | locf = 'imputeTS::na_locf',
233 | ma = 'imputeTS::na_ma',
234 | mean = 'imputeTS::na_mean',
235 | random = 'imputeTS::na_random',
236 | remove = 'imputeTS:na_remove',
237 | replace = 'imputeTS::na_replace',
238 | seadec = 'imputeTS::na_seadec',
239 | seasplit = 'imputeTS::na_seasplit'
240 | )
241 |
242 | # arguments
243 | arguments = list(
244 | interpolation = NULL,
245 | kalman = NULL,
246 | locf = NULL,
247 | ma = NULL,
248 | mean = NULL,
249 | random = NULL,
250 | remove = NULL,
251 | replace = NULL,
252 | seadec = NULL,
253 | seasplit = NULL
254 | )
255 |
256 | return(
257 | list(
258 | method = methods,
259 | arguments = arguments
260 | )
261 | )
262 |
263 | }
264 |
265 | #' Impute missing values
266 | #'
267 | #' A function to impute missing values. Is used as a data preparation helper function and is called internally
268 | #' by forecast_univariate, forecast_multivariate, and forecast_combine.
269 | #'
270 | #' @param Data data.frame: data frame of target variable, exogenous variables, and observed date (named 'date')
271 | #' @param method string: select which method to use from the imputeTS package; 'interpolation', 'kalman', 'locf', 'ma', 'mean', 'random', 'remove','replace', 'seadec', 'seasplit'
272 | #' @param variables string: vector of variables to standardize, default is all but 'date' column
273 | #' @param verbose boolean: show start-up status of impute.missing.routine
274 | #'
275 | #' @return data.frame with missing data imputed
276 | #'
277 | #' @export
278 | data_impute = function(
279 | Data, # data.frame: data frame of target variable, exogenous variables, and observed date (named 'date')
280 | method = 'kalman', # string: select which method to use from the imputeTS package; 'interpolation', 'kalman', 'locf', 'ma', 'mean', 'random', 'remove','replace', 'seadec', 'seasplit'
281 | variables = NULL, # string: vector of variables to impute missing values, default is all numeric columns
282 | verbose = FALSE # boolean: show start-up status of data_impute.control_panel
283 | ){
284 |
285 | # training parameter creation and warnings
286 | if(verbose == TRUE){
287 | if(exists("data_impute.control_panel")){
288 | print(warningCondition('data_impute.control_panel exists and will be used to impute missing data in its present state.'))
289 | }else{
290 | data_impute.control_panel = instantiate.data_impute.control_panel()
291 | print(warningCondition('data_impute.control_panel was instantiated and default values will be used for to impute missing data.'))
292 | }
293 | }else{
294 | if(!exists("data_impute.control_panel")){data_impute.control_panel = instantiate.data_impute.control_panel()}
295 | }
296 |
297 | # set variables to all if default
298 | if(is.null(variables) == TRUE){
299 | variables = names(dplyr::select_if(Data, is.numeric))
300 | }
301 |
302 | # target variables must be numeric
303 | if(length(setdiff(variables, names(dplyr::select_if(Data, is.numeric)))) != 0){
304 | print(errorCondition('Variables cleaned for outliers must be numeric.'))
305 | }
306 |
307 | # clean outliers
308 | for(v in variables){
309 | data_impute.control_panel$arguments[[method]]$x = Data[,c(v)]
310 | Data[,c(v)] =
311 | do.call(what = data_impute.control_panel$method[[method]],
312 | args = data_impute.control_panel$arguments[[method]])
313 | }
314 |
315 | # return results
316 | return(Data)
317 | }
318 |
319 |
320 | #---------------------------------------------
321 | # Dimension reduction
322 | #---------------------------------------------
323 | #' Dimension reduction via principal components
324 | #'
325 | #' A function to estimate principal components.
326 | #'
327 | #' @param Data data.frame: data frame of target variable, exogenous variables, and observed date (named 'date')
328 | #' @param variables string: vector of variables to standardize, default is all but 'date' column
329 | #' @param ncomp int: number of factors to create
330 | #' @param standardize boolean: normalize variables (mean zero, variance one) before estimating factors
331 | #'
332 | #' @return data.frame with a date column and one column per forecast method selected
333 | #'
334 | #' @export
335 |
336 | data_reduction = function(
337 | Data, # data.frame: data frame of target variable, exogenous variables, and observed date (named 'date')
338 | variables = NULL, # string: vector of variables to impute missing values, default is all numeric columns
339 | ncomp, # int: number of factors to create
340 | standardize = TRUE # boolean: normalize variables (mean zero, variance one) before estimating factors
341 | ){
342 |
343 | # set variables to all if default
344 | if(is.null(variables) == TRUE){
345 | variables = names(dplyr::select_if(Data, is.numeric))
346 | }
347 |
348 | # target variables must be numeric
349 | if(length(setdiff(variables, names(dplyr::select_if(Data, is.numeric)))) != 0){
350 | print(errorCondition('Variables cleaned for outliers must be numeric.'))
351 | }
352 |
353 | # remove missing
354 | information.set = na.omit(Data)
355 |
356 | # standardize variables
357 | information.set = information.set %>%
358 | dplyr::mutate_at(dplyr::vars(variables), OOS::standardize)
359 |
360 | # estimate factors
361 | model.pc = stats::princomp(dplyr::select(information.set, -date))
362 |
363 | # select factors
364 | factors = as.matrix(dplyr::select(information.set, -date)) %*% model.pc$loadings[,1:ncomp]
365 |
366 | # take most recent factors
367 | colnames(factors) = paste0('pc.',c(1:ncomp))
368 |
369 | factors =
370 | data.frame(factors,
371 | date = information.set$date)
372 |
373 | # return results
374 | return(factors)
375 | }
376 |
--------------------------------------------------------------------------------
/R/external_imports.R:
--------------------------------------------------------------------------------
1 | #' Pipe operator
2 | #'
3 | #' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
4 | #'
5 | #' @name %>%
6 | #' @rdname pipe
7 | #' @keywords internal
8 | #' @return magrittr pipe operator %>%
9 | #' @export
10 | #' @importFrom magrittr %>%
11 | #' @usage lhs \%>\% rhs
12 | NULL
13 |
14 | #' @import stats
15 | NULL
--------------------------------------------------------------------------------
/R/forecast_chart.R:
--------------------------------------------------------------------------------
1 | #----------------------------------------
2 | ### Basic forecast chart
3 | #----------------------------------------
4 | #' Chart forecasts
5 | #'
6 | #' @param Data data.frame: oos.forecast object
7 | #' @param Title string: chart title
8 | #' @param Ylab string: y-axis label
9 | #' @param Freq string: frequency (acts as sub-title)
10 | #' @param zeroline boolean: if TRUE then add a horizontal line at zero
11 | #'
12 | #' @return ggplot2 chart
13 | #'
14 | #' @examples
15 | #' \donttest{
16 | #'
17 | #' # simple time series
18 | #' A = c(1:100) + rnorm(100)
19 | #' date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
20 | #' Data = data.frame(date = date, A)
21 | #'
22 | #' # run forecast_univariate
23 | #' forecast.uni =
24 | #' forecast_univariate(
25 | #' Data = Data,
26 | #' forecast.dates = tail(Data$date,10),
27 | #' method = c('naive','auto.arima', 'ets'),
28 | #' horizon = 1,
29 | #' recursive = FALSE,
30 | #' freq = 'month')
31 | #'
32 | #' forecasts =
33 | #' dplyr::left_join(
34 | #' forecast.uni,
35 | #' data.frame(date, observed = A),
36 | #' by = 'date'
37 | #' )
38 | #'
39 | #' # chart forecasts
40 | #' chart.forecast =
41 | #' chart_forecast(
42 | #' forecasts,
43 | #' Title = 'test',
44 | #' Ylab = 'Index',
45 | #' Freq = 'Monthly',
46 | #' zeroline = TRUE)
47 | #'
48 | #' }
49 | #'
50 | #' @export
51 |
52 | chart_forecast = function(
53 | Data, # data.frame: oos.forecast object
54 | Title, # string: chart title
55 | Ylab, # string: y-axis label
56 | Freq, # string: frequency (acts as sub-title)
57 | zeroline = FALSE # boolean: if TRUE then add a horizontal line at zero
58 | ){
59 |
60 | # function errors
61 | if(!'forecast' %in% colnames(Data)){
62 | errorCondition('Data must have a column named "forecast" to calculate errors')
63 | }
64 | if(!'date' %in% colnames(Data)){
65 | errorCondition('Data must have a column named "date" to create plot')
66 | }
67 |
68 | # function variables
69 | model = observed = forecast = forecast.date = se = NA
70 |
71 | # reformat observed
72 | if('observed' %in% colnames(Data)){
73 | Data =
74 | dplyr::bind_rows(
75 | Data,
76 | Data %>% dplyr::select(forecast = observed, date) %>%
77 | dplyr::mutate(model = '*observed') %>%
78 | dplyr::distinct()
79 | )
80 | }
81 |
82 | # set chart
83 | chart =
84 | ggplot2::ggplot(Data, ggplot2::aes(x=date, y = forecast, color = model)) +
85 | # plot line
86 | ggplot2::geom_line(lwd = 1.25) +
87 | ggplot2::theme_classic() +
88 | ggplot2::theme(panel.grid.major = ggplot2::element_line(size = 0.5, linetype = 'solid', colour = "grey")) +
89 | # chart details
90 | ggplot2::labs(title = Title, subtitle = Freq) +
91 | ggplot2::xlab("") +
92 | ggplot2::ylab(Ylab)
93 |
94 | # add zero line
95 | if(zeroline == TRUE){
96 |
97 | chart = chart +
98 | ggplot2::geom_hline(yintercept=0, color="black", size=.5)
99 |
100 | }
101 |
102 | return(chart)
103 |
104 | }
105 |
106 |
107 | #----------------------------------------
108 | ### Basic error chart
109 | #----------------------------------------
110 | #' Chart forecast errors
111 | #'
112 | #' @param Data data.frame: oos.forecast object
113 | #' @param Title string: chart title
114 | #' @param Ylab string: y-axis label
115 | #' @param Freq string: frequency (acts as sub-title)
116 | #' @param zeroline boolean: if TRUE then add a horizontal line at zero
117 | #'
118 | #' @return ggplot2 chart
119 | #'
120 | #' @examples
121 | #' \donttest{
122 | #'
123 | #' # simple time series
124 | #' A = c(1:100) + rnorm(100)
125 | #' date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
126 | #' Data = data.frame(date = date, A)
127 | #'
128 | #' # run forecast_univariate
129 | #' forecast.uni =
130 | #' forecast_univariate(
131 | #' Data = Data,
132 | #' forecast.dates = tail(Data$date,10),
133 | #' method = c('naive','auto.arima', 'ets'),
134 | #' horizon = 1,
135 | #' recursive = FALSE,
136 | #' freq = 'month')
137 | #'
138 | #' forecasts =
139 | #' dplyr::left_join(
140 | #' forecast.uni,
141 | #' data.frame(date, observed = A),
142 | #' by = 'date'
143 | #' )
144 | #'
145 | #' # chart forecast errors
146 | #' chart.errors =
147 | #' chart_forecast_error(
148 | #' forecasts,
149 | #' Title = 'test',
150 | #' Ylab = 'Index',
151 | #' Freq = 'Monthly',
152 | #' zeroline = TRUE)
153 | #'
154 | #' }
155 | #'
156 | #' @export
157 |
158 | chart_forecast_error = function(
159 | Data, # data.frame: oos.forecast function output
160 | Title, # string: chart title
161 | Ylab, # string: y-axis label
162 | Freq, # string: frequency (acts as sub-title)
163 | zeroline = FALSE # boolean: if TRUE then add a horizontal line at zero
164 | ){
165 |
166 | # function errors
167 | if(!'observed' %in% colnames(Data)){
168 | errorCondition('Data must have a column named "observed" to calculate errors')
169 | }
170 | if(!'forecast' %in% colnames(Data)){
171 | errorCondition('Data must have a column named "forecast" to calculate errors')
172 | }
173 | if(!'date' %in% colnames(Data)){
174 | errorCondition('Data must have a column named "date" to create plot')
175 | }
176 |
177 | # function variables
178 | model = observed = forecast = forecast.date = se = errors = NA
179 |
180 | # calculate errors
181 | Data = Data %>%
182 | dplyr::mutate(errors = forecast - observed) %>%
183 | dplyr::select(date, errors, model)
184 |
185 | # set chart
186 | chart =
187 | ggplot2::ggplot(Data, ggplot2::aes(x=date, y = errors, color = model)) +
188 | # plot line
189 | ggplot2::geom_line(lwd = 1.25) +
190 | ggplot2::theme_classic() +
191 | ggplot2::theme(panel.grid.major = ggplot2::element_line(size = 0.5, linetype = 'solid', colour = "grey")) +
192 | # chart details
193 | ggplot2::labs(title = Title, subtitle = Freq) +
194 | ggplot2::xlab("") +
195 | ggplot2::ylab(Ylab)
196 |
197 | # add zero line
198 | if(zeroline == TRUE){
199 |
200 | chart = chart +
201 | ggplot2::geom_hline(yintercept=0, color="black", size=.5)
202 |
203 | }
204 |
205 | return(chart)
206 |
207 | }
208 |
209 |
--------------------------------------------------------------------------------
/R/forecast_combinations.R:
--------------------------------------------------------------------------------
1 | #---------------------------------------------
2 | # Forecast combination helper functions
3 | #---------------------------------------------
4 | #' Select N-best forecasts
5 | #'
6 | #' A function to subset the n-best forecasts;
7 | #' assumes column named observed.
8 | #'
9 | #' @param forecasts data.frame: a data frame of forecasts to combine, assumes one column named "observed"
10 | #' @param n.max int: maximum number of forecasts to select
11 | #' @param window int: size of rolling window to evaluate forecast error over, use entire period if NA
12 | #'
13 | #' @return data.frame with n columns of the historically best forecasts
14 | #'
15 | #' @export
16 |
17 | NBest = function(
18 | forecasts, # data.frame: a data frame of forecasts to combine, assumes one column named "observed"
19 | n.max, # int: maximum number of forecasts to select
20 | window = NA # int: size of rolling window to evaluate forecast error over, use entire period if NA
21 | ){
22 |
23 | observed = NA
24 |
25 | # calculate rolling forecast errors
26 | errors = abs(dplyr::select(forecasts, -observed) - forecasts$observed)
27 | rollRMSE = function(X){return(sqrt(mean((X)^2, na.rm = T)))}
28 | rollingErrors = zoo::rollapply(data = errors, width = seq_along(errors[,1]),
29 | FUN = rollRMSE, align = 'right', fill = NA)
30 |
31 | # create rolling N-best forecasts
32 | X = dplyr::select(forecasts, -observed) %>% as.matrix()
33 | nBest = matrix(nrow = nrow(X), ncol = n.max)
34 | for(row in 1:nrow(X)){
35 | for(column in 1:n.max){
36 | nBest[row,column] = mean(X[row,order(rollingErrors[row,])[1:column]])
37 | }
38 | }
39 | colnames(nBest) = paste0('N',c(1:n.max))
40 |
41 | # return results
42 | return(nBest)
43 | }
44 |
45 | #---------------------------------------------
46 | # Forecast combination method arguments
47 | #----------------------------------------------
48 | #' Create interface to control `forecast_combine` model estimation
49 | #'
50 | #' A function to create the forecast combination technique arguments list
51 | #' for user manipulation.
52 | #'
53 | #' @param covariates int: the number of features that will go into the model
54 | #'
55 | #' @return forecast_combinations.control_panel
56 | #'
57 | #' @export
58 |
59 | instantiate.forecast_combinations.control_panel = function(covariates = NULL){
60 |
61 | # caret names
62 | caret.engine = list(
63 | ols = 'lm',
64 | ridge = 'glmnet',
65 | lasso = 'glmnet',
66 | elastic = 'glmnet',
67 | RF = 'rf',
68 | GBM = 'gbm',
69 | NN = 'avNNet',
70 | pls = 'pls',
71 | pcr = 'pcr'
72 | )
73 |
74 | # tuning grids
75 | tuning.grids = list(
76 |
77 | ols = NULL,
78 |
79 | ridge = expand.grid(
80 | alpha = 0,
81 | lambda = 10^seq(-3, 3, length = 100)),
82 |
83 | lasso = expand.grid(
84 | alpha = 1,
85 | lambda = 10^seq(-3, 3, length = 100)),
86 |
87 | elastic = NULL,
88 |
89 | GBM =
90 | expand.grid(
91 | n.minobsinnode = c(1),
92 | shrinkage = c(.1,.01),
93 | n.trees = c(100, 250, 500),
94 | interaction.depth = c(1,2,5)),
95 |
96 | RF =
97 | expand.grid(
98 | mtry = c(1:4)),
99 |
100 | NN =
101 | expand.grid(
102 | size = seq(2,10,5),
103 | decay = c(.01,.001),
104 | bag = c(100, 250, 500)),
105 |
106 | pls =
107 | expand.grid(
108 | ncomp = c(1:5)),
109 |
110 | pcr =
111 | expand.grid(
112 | ncomp = c(1:5))
113 |
114 | )
115 |
116 | # tuning grids if # of features is available
117 | if(!is.null(covariates)){
118 | tuning.grids[['RF']] =
119 | expand.grid(
120 | mtry = covariates/3)
121 |
122 | tuning.grids[['NN']] =
123 | expand.grid(
124 | size = c(covariates, 2*covariates, 3*covariates),
125 | decay = c(.01,.001),
126 | bag = c(20, 100))
127 |
128 | }
129 |
130 | # hyper-parameter selection routine
131 | control =
132 | caret::trainControl(
133 | method = "cv",
134 | number = 5,
135 | allowParallel = TRUE)
136 |
137 | # accuracy metric used in training
138 | accuracy = 'RMSE'
139 |
140 | # return training information
141 | return(
142 | list(
143 | caret.engine = caret.engine,
144 | tuning.grids = tuning.grids,
145 | control = control,
146 | accuracy = accuracy
147 | )
148 | )
149 | }
150 |
151 | #---------------------------------------------
152 | # Forecast combination methods
153 | #---------------------------------------------
154 | #' Forecast with forecast combinations
155 | #'
156 | #' A function to combine forecasts out-of-sample. Methods available include:
157 | #' uniform weights, median forecast, trimmed (winsorized) mean, n-best,
158 | #' ridge regression, lasso regression, elastic net, peLASSO,
159 | #' random forest, tree-based gradient boosting machine, and single-layer neural network.
160 | #' See package website for most up-to-date list of available models.
161 | #'
162 | #' @param Data data.frame: data frame of forecasted values to combine, assumes 'date' and 'observed' columns, but `observed' is not necessary for all methods
163 | #' @param method string: the method to use; 'uniform', 'median', 'trimmed.mean', 'n.best', 'peLasso', 'lasso', 'ridge', 'elastic', 'RF', 'GBM', 'NN'
164 | #' @param n.max int: maximum number of forecasts to select in n.best method
165 | #' @param rolling.window int: size of rolling window to evaluate forecast error over, use entire period if NA
166 | #' @param trim numeric: a two element vector with the winsorizing bounds for the trimmed mean method; c(min, max)
167 | #' @param burn.in int: the number of periods to use in the first model estimation
168 | #' @param parallel.dates int: the number of cores available for parallel estimation
169 | #'
170 | #' @return data.frame with a row for each combination method and forecasted date
171 | #'
172 | #' @examples
173 | #' \donttest{
174 | #' # simple time series
175 | #' A = c(1:100) + rnorm(100)
176 | #' B = c(1:100) + rnorm(100)
177 | #' C = c(1:100) + rnorm(100)
178 | #' date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
179 | #' Data = data.frame(date = date, A, B, C)
180 | #'
181 | #' # run forecast_univariate
182 | #' forecast.multi =
183 | #' forecast_multivariate(
184 | #' Data = Data,
185 | #' target = 'A',
186 | #' forecast.dates = tail(Data$date,5),
187 | #' method = c('ols','var'),
188 | #' horizon = 1,
189 | #' freq = 'month')
190 | #' # include observed valuesd
191 | #' forecasts =
192 | #' dplyr::left_join(
193 | #' forecast.multi,
194 | #' data.frame(date, observed = A),
195 | #' by = 'date'
196 | #' )
197 | #'
198 | #' # combine forecasts
199 | #' combinations =
200 | #' forecast_combine(
201 | #' forecasts,
202 | #' method = c('uniform','median','trimmed.mean',
203 | #' 'n.best','lasso','peLasso'),
204 | #' burn.in = 5,
205 | #' n.max = 2)
206 | #' }
207 | #'
208 | #'
209 | #' @export
210 |
211 | # assumes a column named observed
212 | forecast_combine = function(
213 | Data, # data.frame: data frame of forecasted values to combine, assumes `date` and `observed` columns, but `observed' is not necessary for all methods
214 | method = 'unform', # string or vector: the method to use; 'uniform', 'median', 'trimmed.mean', 'n.best', 'peLasso', 'lasso', 'ridge', 'elastic', 'RF', 'GBM', 'NN'
215 | n.max = NULL, # int: maximum number of forecasts to select
216 | rolling.window = NA, # int: size of rolling window to evaluate forecast error over, use entire period if NA
217 | trim = c(0.5, 0.95), # numeric: a two element vector with the winsorizing bounds for the trimmed mean method; c(min, max)
218 | burn.in = 1, # int: the number of periods to use in the first model estimation
219 | parallel.dates = NULL # int: the number of cores available for parallel estimation
220 | ){
221 |
222 | # create parallel back end
223 | if(!is.null(parallel.dates)){
224 | future::plan(strategy = 'multisession', workers = parallel.dates)
225 | }else{
226 | future::plan(strategy = 'sequential')
227 | }
228 |
229 | # cast from long to wide
230 | forecasts = Data %>%
231 | dplyr::select(-se, -forecast.date) %>%
232 | tidyr::pivot_wider(names_from = model, values_from = forecast)
233 |
234 | # function variables
235 | model = observed = forecast = forecast.date = se = NA
236 | results.list = list()
237 |
238 | # uniform weights
239 | if('uniform' %in% method){
240 | forecasts.raw = dplyr::select(forecasts, -dplyr::contains('date'), -dplyr::contains('observed'))
241 | combination = apply(forecasts.raw, MARGIN = 1, FUN = mean, na.rm = T)
242 | results.list[['unform']] = data.frame(date = forecasts$date, forecast = combination, model = 'uniform')
243 | }
244 |
245 | # median forecast
246 | if('median' %in% method){
247 | forecasts.raw = dplyr::select(forecasts, -dplyr::contains('date'), -dplyr::contains('observed'))
248 | combination = apply(forecasts.raw, MARGIN = 1, FUN = median, na.rm = T)
249 | results.list[['median']] = data.frame(date = forecasts$date, forecast = combination, model = 'median')
250 | }
251 |
252 | # trimmed (winsorized) mean
253 | if('trimmed.mean' %in% method){
254 | forecasts.raw = dplyr::select(forecasts, -dplyr::contains('date'), -dplyr::contains('observed'))
255 | combination = apply(forecasts.raw, MARGIN = 1, FUN = winsorize, bounds = trim, trim = FALSE)
256 | combination = apply(forecasts.raw, MARGIN = 1, FUN = mean, na.rm = T)
257 | results.list[['trimmed']] = data.frame(date = forecasts$date, forecast = combination, model = 'trimmed.mean')
258 | }
259 |
260 | # N-best method
261 | if('n.best' %in% method){
262 |
263 | # warnings and errors
264 | if(!is.null(n.max)){
265 | errorCondition('Set n.max before using the n-best combination method')
266 | }
267 | if(!is.null(window)){
268 | warningCondition('The n-best method will default to using the entire forecast history')
269 | }
270 |
271 | # create n-best forecast combinations
272 | combination.nbest = NBest(dplyr::select(forecasts, -dplyr::contains('date')), n.max, window)
273 | combination.mean = apply(combination.nbest, MARGIN = 1, FUN = mean, na.rm = T)
274 | combination = data.frame(date = forecasts$date, combination.mean, combination.nbest) %>%
275 | dplyr::rename(N.best = combination.mean)
276 | combination = tidyr::pivot_longer(combination,
277 | cols = names(dplyr::select(combination, -date)),
278 | names_to = 'model',
279 | values_to = 'forecast')
280 |
281 | results.list[['nbest']] = combination
282 | }
283 |
284 | # peLasso
285 | if('peLasso' %in% method){
286 | combination =
287 | forecasts$date[burn.in : nrow(forecasts)] %>%
288 | furrr::future_map(
289 | .f = function(forecast.date){
290 |
291 | # set data
292 | information.set = dplyr::filter(forecasts, forecast.date > date)
293 | current.forecasts = dplyr::filter(forecasts, forecast.date == date)
294 |
295 | # calculate peLasso method
296 | # stage 1, shrink to 0,
297 | # y-f -> eLasso to select subset of regressors
298 | x = as.matrix(dplyr::select(information.set , -observed, -date))
299 | y = information.set$observed - rowMeans(x)
300 | model = glmnet::cv.glmnet(x, y, alpha = 1, intercept = F, parallel = T)
301 | covariates = colnames(x)[which(as.vector(coef(model, s = 'lambda.min')) != 0)-1]
302 |
303 | # stage 2, shrink to 1/k,
304 | # y-f -> eRidge to shrink subset of regressors to uniform weights
305 | if(length(covariates) > 1){
306 | model = glmnet::cv.glmnet(x[,covariates], y, alpha = 0, intercept = F)
307 | }else{
308 | covariates = colnames(x)
309 | }
310 |
311 | # calculate forecast
312 | peLasso = predict(model, newx = as.matrix(current.forecasts[,covariates]), s = 'lambda.min') +
313 | rowMeans(dplyr::select(current.forecasts , -observed, -date))
314 | results = data.frame(date = current.forecasts$date, peLasso, model = 'peLasso')
315 | colnames(results)[colnames(results) == 'X1'] = 'forecast'
316 | return(results)
317 |
318 | }
319 | ) %>%
320 | purrr::reduce(dplyr::bind_rows)
321 |
322 | results.list[['peLasso']] = combination
323 | }
324 |
325 | # ML algorithms via caret
326 | if(length(intersect(c('GBM','RF','NN','ols','lasso','ridge','elastic','pcr','pls'), method)) > 0){
327 |
328 | # training parameter creation and warnings
329 | if(exists("forecast_combinations.control_panel")){
330 | message('forecast_combinations.control_panel exists and will be used for ML forecast combination techniques in its present state.')
331 | }else{
332 |
333 | covariates = length(unique(forecasts$model))
334 |
335 | forecast_combinations.control_panel = instantiate.forecast_combinations.control_panel(covariates = covariates)
336 | message('forecast_combinations.control_panel was instantiated and default values will be used to train ML forecast combination techniques.')
337 | }
338 |
339 | combination = intersect(c('GBM','RF','NN','ols','lasso','ridge','elastic'), method) %>%
340 | purrr::map(
341 | .f = function(engine){
342 |
343 | forecasts$date[burn.in : nrow(forecasts)] %>%
344 | furrr::future_map(
345 | .f = function(forecast.date){
346 |
347 | # set data
348 | information.set = dplyr::filter(forecasts, forecast.date > date)
349 | current.forecasts = dplyr::filter(forecasts, forecast.date == date)
350 |
351 | # estimate model
352 | model =
353 | caret::train(observed~.,
354 | data = dplyr::select(information.set, -date),
355 | method = forecast_combinations.control_panel$caret.engine[[engine]],
356 | trControl = forecast_combinations.control_panel$control,
357 | tuneGrid = forecast_combinations.control_panel$tuning.grids[[engine]],
358 | metric = forecast_combinations.control_panel$accuracy,
359 | na.action = na.omit)
360 |
361 | # calculate forecast
362 | point = predict(model, newdata = current.forecasts)
363 |
364 | # calculate standard error
365 | error =
366 | try(
367 | predict(model$finalModel, current.forecasts, interval = "confidence", level = 0.95) %>%
368 | data.frame(),
369 | silent = TRUE
370 | )
371 |
372 | if('upr' %in% names(error) == TRUE){
373 | error = (error$upr - error$fit) / qnorm(0.95)
374 | error = as.numeric(error)
375 | }else{
376 | error = NA
377 | }
378 |
379 | # set dates
380 | results = data.frame(date = current.forecasts$date,
381 | model = engine, forecast = point, se = error)
382 | }
383 | ) %>%
384 | purrr::reduce(dplyr::bind_rows)
385 | }
386 | ) %>%
387 | purrr::reduce(dplyr::bind_rows)
388 |
389 | results.list[['ML']] = combination
390 | }
391 |
392 | # return results
393 | results = purrr::reduce(results.list, dplyr::bind_rows) %>%
394 | dplyr::mutate(model = paste0(model, '.combo'))
395 | rownames(results) = c(1:nrow(results))
396 | return(results)
397 | }
398 |
--------------------------------------------------------------------------------
/R/forecast_metrics.R:
--------------------------------------------------------------------------------
1 |
2 | # dependencies:
3 | # lmtest
4 | # sandwich
5 | # forecast
6 |
7 | #-------------------------------------------
8 | # loss functions
9 | #-------------------------------------------
10 | #' Calculate error via loss functions
11 | #'
12 | #' A function to calculate various error loss functions. Options include:
13 | #' MSE, RMSE, MAE, and MAPE. The default is MSE loss.
14 | #'
15 | #' @param forecast numeric: vector of forecasted values
16 | #' @param observed numeric: vector of observed values
17 | #' @param metric string: loss function
18 | #'
19 | #' @return numeric test result
20 | #'
21 | #' @export
22 |
23 | loss_function = function(
24 | forecast, # numeric: vector of forecasted values
25 | observed, # numeric: vector of observed values
26 | metric = 'MSE' # string: loss function
27 | ){
28 |
29 | if(metric == 'MSE'){
30 | error = mean((observed - forecast)^2, na.rm = T)
31 | }else if(metric == 'RMSE'){
32 | error = sqrt(mean((observed - forecast)^2, na.rm = T))
33 | }else if(metric == 'MAE'){
34 | error = mean(abs(observed - forecast), na.rm = T)
35 | }else if(metric == 'MAPE'){
36 | error = mean(abs((forecast - observed)/observed), na.rm = T)
37 | }
38 |
39 | return(error)
40 | }
41 |
42 | #-------------------------------------------
43 | # forecast accuracy
44 | #-------------------------------------------
45 | #' Calculate forecast accuracy
46 | #'
47 | #' A function to calculate various loss functions, including
48 | #' MSE, RMSE, MAE, and MAPE.
49 | #'
50 | #' @param Data data.frame: data frame of forecasts, model names, and dates
51 | #'
52 | #' @return data.frame of numeric error results
53 | #'
54 | #' @examples
55 | #' \donttest{
56 | #'
57 | #' # simple time series
58 | #' A = c(1:100) + rnorm(100)
59 | #' date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
60 | #' Data = data.frame(date = date, A)
61 | #'
62 | #' # run forecast_univariate
63 | #' forecast.uni =
64 | #' forecast_univariate(
65 | #' Data = Data,
66 | #' forecast.dates = tail(Data$date,10),
67 | #' method = c('naive','auto.arima', 'ets'),
68 | #' horizon = 1,
69 | #' recursive = FALSE,
70 | #' freq = 'month')
71 | #'
72 | #' forecasts =
73 | #' dplyr::left_join(
74 | #' forecast.uni,
75 | #' data.frame(date, observed = A),
76 | #' by = 'date'
77 | #' )
78 | #'
79 | #' # forecast accuracy
80 | #' forecast.accuracy = forecast_accuracy(forecasts)
81 | #'
82 | #' }
83 | #'
84 | #' @export
85 |
86 | forecast_accuracy = function(
87 | Data
88 | ){
89 |
90 | if(!'observed' %in% names(Data)){
91 | print(errorCondition('There must be a column named "obsererved" in Data.'))
92 |
93 | }
94 | if(!'date' %in% names(Data)){
95 | print(errorCondition('There must be a column named "date" in Data.'))
96 | }
97 |
98 | # function variables
99 | model = observed = forecast = forecast.date = se = NA
100 |
101 | # set data
102 | information.set =
103 | dplyr::full_join(
104 | dplyr::select(Data, -observed),
105 | dplyr::select(Data, date, observed),
106 | by = 'date')
107 |
108 | # calculate loss functions
109 | information.set = information.set %>%
110 | dplyr::group_split(model) %>%
111 | purrr::map_df(
112 | .f = function(X){
113 |
114 | Y = X %>%
115 | dplyr::select(observed, forecast, model) %>%
116 | na.omit() %>%
117 | dplyr::summarize(
118 | model = unique(model),
119 | MSE = mean((observed - forecast)^2, na.rm = T),
120 | RMSE = sqrt(mean((observed - forecast)^2, na.rm = T)),
121 | MAE = mean(abs(observed - forecast), na.rm = T),
122 | MAPE = mean(abs((forecast - observed)/observed), na.rm = T))
123 |
124 | return(Y)
125 | }
126 | )
127 |
128 | return(information.set)
129 | }
130 |
131 |
132 | #-------------------------------------------
133 | # forecast comparison
134 | #-------------------------------------------
135 | #' Compare forecast accuracy
136 | #'
137 | #' A function to compare forecasts. Options include: simple forecast error ratios,
138 | #' [Diebold-Mariano test](https://www.sas.upenn.edu/~fdiebold/papers/paper68/pa.dm.pdf), and [Clark and West test](https://www.nber.org/papers/t0326) for nested models
139 | #'
140 | #' @param Data data.frame: data frame of forecasts, model names, and dates
141 | #' @param baseline.forecast string: column name of baseline (null hypothesis) forecasts
142 | #' @param test string: which test to use; ER = error ratio, DM = Diebold-Mariano, CM = Clark and West
143 | #' @param loss string: error loss function to use if creating forecast error ratio
144 | #' @param horizon int: horizon of forecasts being compared in DM and CW tests
145 | #'
146 | #' @return numeric test result
147 | #'
148 | #' @examples
149 | #' \donttest{
150 | #'
151 | #' # simple time series
152 | #' A = c(1:100) + rnorm(100)
153 | #' date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
154 | #' Data = data.frame(date = date, A)
155 | #'
156 | #' # run forecast_univariate
157 | #' forecast.uni =
158 | #' forecast_univariate(
159 | #' Data = Data,
160 | #' forecast.dates = tail(Data$date,10),
161 | #' method = c('naive','auto.arima', 'ets'),
162 | #' horizon = 1,
163 | #' recursive = FALSE,
164 | #' freq = 'month')
165 | #'
166 | #' forecasts =
167 | #' dplyr::left_join(
168 | #' forecast.uni,
169 | #' data.frame(date, observed = A),
170 | #' by = 'date'
171 | #' )
172 | #'
173 | #' # run ER (MSE)
174 | #' er.ratio.mse =
175 | #' forecast_comparison(
176 | #' forecasts,
177 | #' baseline.forecast = 'naive',
178 | #' test = 'ER',
179 | #' loss = 'MSE')
180 | #' }
181 | #'
182 | #' @export
183 |
184 | forecast_comparison = function(
185 | Data, # data.frame: data frame of forecasts, model names, and dates
186 | baseline.forecast, # string: column name of baseline (null hypothesis) forecasts
187 | test = 'ER', # string: which test to use; ER = error ratio, DM = Diebold-Mariano, CM = Clark and West
188 | loss = 'MSE', # string: error loss function to use if creating forecast error ratio
189 | horizon = NULL # int: horizon of forecasts being compared in DM and CW tests
190 | ){
191 |
192 | if(!'observed' %in% names(Data)){
193 | print(errorCondition('There must be a column named "observed" in Data.'))
194 |
195 | }
196 | if(!'date' %in% names(Data)){
197 | print(errorCondition('There must be a column named "date" in Data.'))
198 | }
199 |
200 | # function variables
201 | model = observed = forecast = forecast.date = se = NA
202 |
203 | # set data
204 | information.set =
205 | dplyr::full_join(
206 | dplyr::select(Data, -observed),
207 | Data %>%
208 | dplyr::filter(model == baseline.forecast) %>%
209 | dplyr::select(date, observed, baseline.forecast = forecast),
210 | by = 'date')
211 |
212 |
213 | # calculate loss functions
214 | if(test == 'ER'){
215 | information.set = information.set %>%
216 | dplyr::group_split(model) %>%
217 | purrr::map_df(
218 | .f = function(X){
219 |
220 | error =
221 | loss_function(X$forecast, X$observed, loss) /
222 | loss_function(X$baseline.forecast, X$observed, loss)
223 |
224 | return(
225 | data.frame(
226 | model = unique(X$model),
227 | error.ratio = error)
228 | )
229 | }
230 | )
231 |
232 | }else if(test == 'DM'){
233 | information.set = information.set %>%
234 | dplyr::group_split(model) %>%
235 | purrr::map_df(
236 | .f = function(X){
237 |
238 | if(sum(na.omit(X$baseline.forecast - X$forecast)) == 0){
239 | return(
240 | data.frame(
241 | model = baseline.forecast,
242 | DM.statistic = NA)
243 | )
244 | }
245 |
246 | DM.statistic =
247 | forecast::dm.test(
248 | e1 = na.omit(X$baseline.forecast - X$observed),
249 | e2 = na.omit(X$forecast - X$observed),
250 | alternative = 'less')$statistic[1]
251 |
252 | return(
253 | data.frame(
254 | model = unique(X$model),
255 | DM.statistic = DM.statistic)
256 | )
257 | }
258 | )
259 |
260 | }else if(test == 'CW'){
261 | information.set = information.set %>%
262 | dplyr::group_split(model) %>%
263 | purrr::map_df(
264 | .f = function(X){
265 |
266 | if(sum(na.omit(X$baseline.forecast - X$forecast)) == 0){
267 | return(
268 | data.frame(
269 | model = baseline.forecast,
270 | CW.statistic = NA)
271 | )
272 | }
273 |
274 | fCW12 =
275 | (X$observed - X$baseline.forecast)^2 -
276 | (X$observed - X$forecast)^2 -
277 | (X$baseline.forecast - X$forecast)^2
278 |
279 | lmCW = lm(as.numeric(fCW12)~1)
280 |
281 | lmCW.summ = summary(lmCW)
282 |
283 | lmCW.NW.summ = lmCW.summ
284 |
285 | lmCW.NW.summ$coefficients =
286 | unclass(lmtest::coeftest(lmCW, vcov. = sandwich::NeweyWest(lmCW, lag = horizon)))
287 |
288 | CW.statistic = lmCW.NW.summ$coefficients[3]
289 |
290 | return(
291 | data.frame(
292 | model = unique(X$model),
293 | Cw.statistic = CW.statistic)
294 | )
295 | }
296 | )
297 | }
298 |
299 | rownames(information.set) = c(1:nrow(information.set))
300 | return(information.set)
301 |
302 | }
303 |
--------------------------------------------------------------------------------
/R/forecast_multivariate.R:
--------------------------------------------------------------------------------
1 | #----------------------------------------------
2 | # multivariate forecasting arguments - ML
3 | #----------------------------------------------
4 | #' Create interface to control `forecast_multivariate` ML estimation
5 | #'
6 | #' A function to create the multivariate forecast methods
7 | #' arguments list for user manipulation.
8 | #'
9 | #' @param covariates int: the number of features that will go into the model
10 | #' @param rolling.window int: size of rolling window, NA if expanding window is used
11 | #' @param horizon int: number of periods into the future to forecast
12 | #'
13 | #' @return forecast_multivariate.ml.control_panel
14 | #'
15 | #' @export
16 |
17 | instantiate.forecast_multivariate.ml.control_panel = function(covariates = NULL, rolling.window = NULL, horizon = NULL){
18 |
19 | # caret names
20 | caret.engine = list(
21 | ols = 'lm',
22 | ridge = 'glmnet',
23 | lasso = 'glmnet',
24 | elastic = 'glmnet',
25 | RF = 'rf',
26 | GBM = 'gbm',
27 | NN = 'avNNet',
28 | pls = 'pls',
29 | pcr = 'pcr'
30 | )
31 |
32 | # tuning grids
33 | tuning.grids = list(
34 |
35 | ols = NULL,
36 |
37 | ridge = expand.grid(
38 | alpha = 0,
39 | lambda = 10^seq(-3, 3, length = 100)),
40 |
41 | lasso = expand.grid(
42 | alpha = 1,
43 | lambda = 10^seq(-3, 3, length = 100)),
44 |
45 | elastic = NULL,
46 |
47 | GBM =
48 | expand.grid(
49 | n.minobsinnode = c(1),
50 | shrinkage = c(.1,.01),
51 | n.trees = c(100, 250, 500),
52 | interaction.depth = c(1,2,5)),
53 |
54 | RF =
55 | expand.grid(
56 | mtry = c(1:4)),
57 |
58 | NN =
59 | expand.grid(
60 | size = seq(2,10,5),
61 | decay = c(.01,.001),
62 | bag = c(100, 250, 500)),
63 |
64 | pls =
65 | expand.grid(
66 | ncomp = c(1:5)),
67 |
68 | pcr =
69 | expand.grid(
70 | ncomp = c(1:5))
71 |
72 | )
73 |
74 | # tuning grids if # of features is available
75 | if(!is.null(covariates)){
76 | tuning.grids[['RF']] =
77 | expand.grid(
78 | mtry = covariates/3)
79 |
80 | tuning.grids[['NN']] =
81 | expand.grid(
82 | size = c(covariates, 2*covariates, 3*covariates),
83 | decay = c(.01,.001),
84 | bag = c(20, 100))
85 |
86 | }
87 |
88 | # hyper-parameter selection routine
89 | if(is.numeric(rolling.window)){
90 | control =
91 | caret::trainControl(
92 | method = "timeslice",
93 | horizon = horizon,
94 | initialWindow = rolling.window,
95 | allowParallel = TRUE)
96 | }else if(!is.null(rolling.window)){
97 | control =
98 | caret::trainControl(
99 | method = "timeslice",
100 | horizon = horizon,
101 | initialWindow = 5,
102 | allowParallel = TRUE)
103 | }else{
104 | control =
105 | caret::trainControl(
106 | method = "cv",
107 | number = 5,
108 | allowParallel = TRUE)
109 |
110 | }
111 |
112 | # accuracy metric used in training
113 | accuracy = 'RMSE'
114 |
115 | # return training information
116 | return(
117 | list(
118 | caret.engine = caret.engine,
119 | tuning.grids = tuning.grids,
120 | control = control,
121 | accuracy = accuracy
122 | )
123 | )
124 |
125 | }
126 |
127 | #----------------------------------------------
128 | # multivariate forecasting arguments - VAR
129 | #----------------------------------------------
130 | #' Create interface to control `forecast_multivariate` VAR estimation
131 | #'
132 | #' A function to create the multivariate forecast methods
133 | #' arguments list for user manipulation.
134 | #'
135 | #' @return forecast_multivariate.var.control_panel
136 | #'
137 | #' @export
138 |
139 | instantiate.forecast_multivariate.var.control_panel = function(){
140 |
141 | return(
142 | list(
143 | p = 1,
144 | lag.max = NULL,
145 | ic = 'AIC',
146 | type = 'none',
147 | season = NULL,
148 | exogen = NULL
149 | )
150 | )
151 |
152 | }
153 |
154 | #---------------------------------------------
155 | # Multivariate Forecast
156 | #---------------------------------------------
157 | #' Forecast with multivariate models
158 | #'
159 | #' A function to estimate multivariate forecasts out-of-sample. Methods available include:
160 | #' vector auto-regression, linear regression, lasso regression, ridge regression, elastic net,
161 | #' random forest, tree-based gradient boosting machine, and single-layer neural network.
162 | #' See package website for most up-to-date list of available models.
163 | #'
164 | #' @param Data data.frame: data frame of target variable, exogenous variables, and observed date (named 'date'); may alternatively be a `ts`, `xts`, or `zoo` object to forecast
165 | #' @param forecast.dates date: dates forecasts are created
166 | #' @param target string: column name in Data of variable to forecast
167 | #' @param method string: methods to use
168 | #' @param rolling.window int: size of rolling window, NA if expanding window is used
169 | #' @param freq string: time series frequency; day, week, month, quarter, year
170 | #' @param horizon int: number of periods into the future to forecast
171 | #' @param lag.variables string: vector of variables to lag each time step, if lag.n is not null then the default is all non-date variables
172 | #' @param lag.n int: number of lags to create
173 | #' @param outlier.clean boolean: if TRUE then clean outliers
174 | #' @param outlier.variables string: vector of variables to purge of outlier, default is all but 'date' column
175 | #' @param outlier.bounds double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile)
176 | #' @param outlier.trim boolean: if TRUE then replace outliers with NA instead of winsorizing bound
177 | #' @param outlier.cross_section boolean: if TRUE then remove outliers based on cross-section (row-wise) instead of historical data (column-wise)
178 | #' @param impute.missing boolean: if TRUE then impute missing values
179 | #' @param impute.method string: select which method to use from the imputeTS package; 'interpolation', 'kalman', 'locf', 'ma', 'mean', 'random', 'remove','replace', 'seadec', 'seasplit'
180 | #' @param impute.variables string: vector of variables to impute missing values, default is all numeric columns
181 | #' @param impute.verbose boolean: show start-up status of impute.missing.routine
182 | #' @param reduce.data boolean: if TRUE then reduce dimension
183 | #' @param reduce.variables string: vector of variables to impute missing values, default is all numeric columns
184 | #' @param reduce.ncomp int: number of factors to create
185 | #' @param reduce.standardize boolean: normalize variables (mean zero, variance one) before estimating factors
186 | #' @param parallel.dates int: the number of cores available for parallel estimation
187 | #' @param return.models boolean: if TRUE then return list of models estimated each forecast.date
188 | #' @param return.data boolean: if True then return list of information.set for each forecast.date
189 | #'
190 | #' @return data.frame with a row for each forecast by model and forecasted date
191 | #'
192 | #' @examples
193 | #' \donttest{
194 | #' # simple time series
195 | #' A = c(1:100) + rnorm(100)
196 | #' B = c(1:100) + rnorm(100)
197 | #' C = c(1:100) + rnorm(100)
198 | #' date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
199 | #' Data = data.frame(date = date, A, B, C)
200 | #'
201 | #' # run forecast_univariate
202 | #' forecast.multi =
203 | #' forecast_multivariate(
204 | #' Data = Data,
205 | #' target = 'A',
206 | #' forecast.dates = tail(Data$date,5),
207 | #' method = c('ols','var'),
208 | #' horizon = 1,
209 | #' # information set
210 | #' rolling.window = NA,
211 | #' freq = 'month',
212 | #' # data prep
213 | #' lag.n = 4,
214 | #' outlier.clean = TRUE,
215 | #' impute.missing = TRUE)
216 | #' }
217 | #'
218 | #'
219 | #' @export
220 |
221 | forecast_multivariate = function(
222 | Data, # data.frame: data frame of target variable, exogenous variables, and observed date (named 'date'); may alternatively be a `ts`, `xts`, or `zoo` object to forecast
223 | forecast.dates, # date: dates forecasts are created
224 | target, # string: column name in `Data` of variable to forecast
225 | horizon, # int: number of periods into the future to forecast
226 | method, # string or vector: methods to use; 'var', 'ols', 'ridge', 'lasso', 'elastic', 'RF', 'GBM', 'NN'
227 |
228 | # information set
229 | rolling.window = NA, # int: size of rolling window, NA if expanding window is used
230 | freq, # string: time series frequency; day, week, month, quarter, year
231 | lag.variables = NULL, # string: vector of variables to lag each time step, if lag.n is not null then the default is all non-date variables
232 | lag.n = NULL, # int: number of lags to create
233 |
234 | # outlier cleaning
235 | outlier.clean = FALSE, # boolean: if TRUE then clean outliers
236 | outlier.variables = NULL, # string: vector of variables to standardize, default is all but 'date' column
237 | outlier.bounds = c(0.05, 0.95), # double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile)
238 | outlier.trim = FALSE, # boolean: if TRUE then replace outliers with NA instead of winsorizing bound
239 | outlier.cross_section = FALSE, # boolean: if TRUE then remove outliers based on cross-section (row-wise) instead of historical data (column-wise)
240 |
241 | # impute missing
242 | impute.missing = FALSE, # boolean: if TRUE then impute missing values
243 | impute.method = 'kalman', # string: select which method to use from the imputeTS package; 'interpolation', 'kalman', 'locf', 'ma', 'mean', 'random', 'remove','replace', 'seadec', 'seasplit'
244 | impute.variables = NULL, # string: vector of variables to impute missing values, default is all numeric columns
245 | impute.verbose = FALSE, # boolean: show start-up status of impute.missing.routine
246 |
247 | # dimension reduction
248 | reduce.data = FALSE, # boolean: if TRUE then reduce dimension
249 | reduce.variables = NULL, # string: vector of variables to impute missing values, default is all numeric columns
250 | reduce.ncomp = NULL, # int: number of factors to create
251 | reduce.standardize = TRUE, # boolean: normalize variables (mean zero, variance one) before estimating factors
252 |
253 | # parallel processing
254 | parallel.dates = NULL, # int: the number of cores available for parallel estimation
255 |
256 | # additional objects
257 | return.models = FALSE, # boolean: if TRUE then return list of models estimated each forecast.date
258 | return.data = FALSE # boolean: if True then return list of information.set for each forecast.date
259 |
260 | ){
261 |
262 | # convert from ts, xts, or zoo object
263 | if(xts::is.xts(Data) | zoo::is.zoo(Data) | stats::is.ts(Data)){
264 | Data = data.frame(date = zoo::index(Data), Data)
265 | }
266 |
267 | # training parameter creation and warnings
268 | if(exists("forecast_multivariate.ml.control_panel")){
269 |
270 | message('forecast_multivariate.ml.control_panel exists and will be used for ML model estimation in its present state.')
271 |
272 | }else{
273 |
274 | covariates = nrow(dplyr::select(Data, -target, -date))
275 | if(!is.null(lag.n)){covariates = covariates + covariates*lag.n}
276 |
277 | forecast_multivariate.ml.control_panel = instantiate.forecast_multivariate.ml.control_panel(covariates = covariates, rolling.window = rolling.window, horizon = horizon)
278 | message('forecast_multivariate.ml.control_panel was instantiated and default values will be used for ML model estimation.')
279 |
280 | }
281 |
282 | # VAR parameters and warnings
283 | if(exists("forecast_multivariate.var.control_panel")){
284 | message('forecast.combinations.var.training exists and will be used for VAR model estimation in its present state.')
285 | }else{
286 | forecast_multivariate.var.control_panel = instantiate.forecast_multivariate.var.control_panel()
287 | message('forecast_multivariate.var.control_panel was instantiated and default values will be used for VAR model estimation.')
288 | }
289 |
290 | # create parallel back end
291 | if(!is.null(parallel.dates)){
292 | future::plan(strategy = 'multisession', workers = parallel.dates)
293 | }else{
294 | future::plan(strategy = 'sequential')
295 | }
296 |
297 | # results list
298 | results.list = list()
299 |
300 | # Create forecasts
301 | forecasts = forecast.dates %>%
302 | furrr::future_map(
303 | .f = function(forecast.date){
304 |
305 | # subset data
306 | information.set =
307 | data_subset(
308 | Data = Data,
309 | forecast.date = forecast.date,
310 | rolling.window = rolling.window,
311 | freq = freq
312 | )
313 |
314 | # clean outliers
315 | if(outlier.clean){
316 | information.set =
317 | data_outliers(
318 | Data = information.set,
319 | variables = outlier.variables,
320 | w.bounds = outlier.bounds,
321 | trim = outlier.trim,
322 | cross_section = outlier.cross_section
323 | )
324 | }
325 |
326 | # impute missing values
327 | if(impute.missing){
328 | information.set =
329 | data_impute(
330 | Data = information.set,
331 | variables = impute.variables,
332 | method = impute.method,
333 | verbose = impute.verbose
334 | )
335 | }
336 |
337 | # dimension reduction
338 | if(reduce.data){
339 | information.set.reduce =
340 | data_reduction(
341 | Data = information.set,
342 | variables = reduce.variables,
343 | ncomp = reduce.ncomp,
344 | standardize = reduce.standardize
345 | )
346 |
347 | information.set =
348 | dplyr::full_join(
349 | dplyr::select(information.set, target, date),
350 | information.set.reduce,
351 | by = 'date')
352 | }
353 |
354 | # create variable lags
355 | if(!is.null(lag.n)){
356 | information.set =
357 | n.lag(
358 | Data = information.set,
359 | lags = lag.n,
360 | variables = lag.variables)
361 | }
362 |
363 | results = method %>%
364 | purrr::map(
365 | .f = function(engine){
366 |
367 | # set current data
368 | current.set = dplyr::filter(information.set, forecast.date == date)
369 |
370 | # estimate ML model
371 | if(engine != 'var'){
372 |
373 | # check for missing covariates in current data
374 | if(is.na(sum(dplyr::select(current.set, -date)))){
375 | print(warningCondition(paste0('Missing covariate on: ', forecast.date)))
376 | results = data.frame(date = current.set$date, ml = NA)
377 | colnames(results)[colnames(results) == 'ml'] = engine
378 | return(results)
379 | }
380 |
381 | # set target variable
382 | names(information.set)[names(information.set) == target] = 'target'
383 |
384 | # set horizon
385 | information.set =
386 | dplyr::mutate(information.set, target = dplyr::lead(target, horizon)) %>%
387 | na.omit()
388 |
389 | # estimate model
390 | model =
391 | caret::train(target~.,
392 | data = dplyr::select(information.set, -date),
393 | method = forecast_multivariate.ml.control_panel$caret.engine[[engine]],
394 | trControl = forecast_multivariate.ml.control_panel$control,
395 | tuneGrid = forecast_multivariate.ml.control_panel$tuning.grids[[engine]],
396 | metric = forecast_multivariate.ml.control_panel$accuracy)
397 |
398 | # calculate forecast
399 | point = try(predict(model, newdata = current.set))
400 |
401 | if(!is.numeric(point)){
402 | point = NA
403 | }
404 |
405 | # calculate standard error
406 | error =
407 | try(
408 | predict(model$finalModel, current.set, interval = "confidence", level = 0.95) %>%
409 | data.frame(),
410 | silent = TRUE
411 | )
412 |
413 | error = try((error$upr - error$fit) / qnorm(0.95),
414 | silent = TRUE)
415 |
416 | if(is.numeric(error) != TRUE | length(error) != 1){error = NA}
417 |
418 | # estimate VAR
419 | }else{
420 |
421 | model =
422 | vars::VAR(
423 | y = na.omit(dplyr::select(information.set, -date)),
424 | p = forecast_multivariate.var.control_panel$p,
425 | lag.max = forecast_multivariate.var.control_panel$max.lag,
426 | ic = forecast_multivariate.var.control_panel$ic,
427 | season = forecast_multivariate.var.control_panel$season,
428 | type = forecast_multivariate.var.control_panel$type
429 | )
430 |
431 | # calculate forecast and standard error
432 | ml = predict(model, n.ahead = horizon)
433 | ml = ml$fcst[target] %>% data.frame()
434 | point = ml[horizon, 1]
435 | error = (ml[horizon, 3] - ml[horizon, 1]) / qnorm(0.95)
436 |
437 | }
438 |
439 | # set date
440 | date = forecast_date(
441 | forecast.date,
442 | horizon,
443 | freq)
444 |
445 | # set dates
446 | predictions = data.frame(
447 | date = date,
448 | forecast.date = forecast.date,
449 | model = engine, forecast = point, se = error)
450 |
451 |
452 | # return results
453 | return(
454 | list(
455 | predictions = predictions,
456 | model = model
457 | )
458 | )
459 |
460 | }
461 | )
462 |
463 | predictions =
464 | purrr::map(results, .f = function(X){return(X$predictions)}) %>%
465 | purrr::reduce(dplyr::bind_rows)
466 |
467 | rownames(predictions) = c(1:nrow(predictions))
468 |
469 | models =
470 | purrr::map(results, .f = function(X){return(X$model)})
471 |
472 | # store objects for return
473 | results =
474 | list(
475 | predictions = predictions,
476 | information.set = information.set,
477 | models = models
478 | )
479 |
480 | # return results
481 | return(results)
482 |
483 | }
484 | )
485 |
486 | # prepare forecasts
487 | predictions =
488 | purrr::map(forecasts, .f = function(X){return(X$predictions)}) %>%
489 | purrr::reduce(dplyr::bind_rows)
490 |
491 | # add model and information set lists to return object
492 | if(return.data == TRUE | return.models == TRUE){
493 | information = list(forecasts = predictions)
494 | }else{
495 | information = predictions
496 | }
497 |
498 | # prepare models
499 | if(return.models == TRUE){
500 | models = purrr::map(forecasts, .f = function(X){return(X$models)})
501 | names(models) = forecast.dates
502 | information[['models']] = models
503 | }
504 |
505 | # prepare information set
506 | if(return.data == TRUE){
507 | information.set = purrr::map(forecasts, .f = function(X){return(X$information.set)})
508 | names(information.set) = forecast.dates
509 | information[['information.set']] = information.set
510 | }
511 |
512 | # return results
513 | return(information)
514 | }
515 |
--------------------------------------------------------------------------------
/R/forecast_univariate.R:
--------------------------------------------------------------------------------
1 |
2 | # dependencies:
3 | # magrittr
4 | # lubridate
5 | # dplry
6 | # purrr
7 | # forecast
8 |
9 |
10 | #----------------------------------------------
11 | # univariate forecasting arguments
12 | #----------------------------------------------
13 | #' Create interface to control `forecast_univariate` model estimation
14 | #'
15 | #' A function to create the univariate forecast method arguments list
16 | #' for user manipulation.
17 | #'
18 | #' @return forecast_univariate.control_panel
19 | #'
20 | #' @export
21 |
22 | instantiate.forecast_univariate.control_panel = function(){
23 |
24 | # methods
25 | methods = list(
26 | auto.arima = forecast::auto.arima,
27 | Arima = forecast::Arima,
28 | dshw = forecast::dshw,
29 | holt = forecast::holt,
30 | hw = forecast::hw,
31 | ses = forecast::ses,
32 | ets = forecast::ets,
33 | stlm = forecast::stlm,
34 | bats = forecast::bats,
35 | tbats = forecast::tbats,
36 | thetaf = forecast::thetaf,
37 | nnetar = forecast::nnetar,
38 | meanf = forecast::meanf,
39 | naive = forecast::naive,
40 | snaive = forecast::snaive,
41 | rwf = forecast::rwf,
42 | tslm = forecast::tslm,
43 | splinef = forecast::splinef
44 | )
45 |
46 | # arguments
47 | arguments = list(
48 | auto.arima = NULL,
49 | Arima = NULL,
50 | dshw = NULL,
51 | holt = NULL,
52 | hw = NULL,
53 | ses = NULL,
54 | ets = NULL,
55 | stlm = NULL,
56 | bats = NULL,
57 | tbats = NULL,
58 | thetaf = NULL,
59 | nnetar = NULL,
60 | meanf = NULL,
61 | naive = NULL,
62 | snaive = NULL,
63 | rwf = NULL,
64 | splinef = NULL,
65 | tslm = NULL
66 | )
67 |
68 | return(
69 | list(
70 | method = methods,
71 | arguments = arguments
72 | )
73 | )
74 |
75 | }
76 |
77 | #----------------------------------------------
78 | # univariate time series forecasting function
79 | #----------------------------------------------
80 | #' Forecast with univariate models
81 | #'
82 | #' A function to estimate univariate forecasts out-of-sample. Methods available include all forecast
83 | #' methods from the `forecast` package. See package website for most up-to-date list of available models.
84 | #'
85 | #' @param Data data.frame: data frame of variable to forecast and a date column; may alternatively be a `ts`, `xts`, or `zoo` object to forecast
86 | #' @param forecast.dates date: dates forecasts are created
87 | #' @param methods string: models to estimate forecasts
88 | #' @param horizon int: number of periods to forecast
89 | #' @param rolling.window int: size of rolling window, NA if expanding window is used
90 | #' @param freq string: time series frequency; day, week, month, quarter, year
91 | #' @param recursive boolean: use sequential one-step-ahead forecast if TRUE, use direct projections if FALSE
92 | #' @param outlier.clean boolean: if TRUE then clean outliers
93 | #' @param outlier.variables string: vector of variables to purge of outliers, default is all but 'date' column
94 | #' @param outlier.bounds double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile)
95 | #' @param outlier.trim boolean: if TRUE then replace outliers with NA instead of winsorizing bound
96 | #' @param outlier.cross_section boolean: if TRUE then remove outliers based on cross-section (row-wise) instead of historical data (column-wise)
97 | #' @param impute.missing boolean: if TRUE then impute missing values
98 | #' @param impute.method string: select which method to use from the imputeTS package; 'interpolation', 'kalman', 'locf', 'ma', 'mean', 'random', 'remove','replace', 'seadec', 'seasplit'
99 | #' @param impute.variables string: vector of variables to impute missing values, default is all numeric columns
100 | #' @param impute.verbose boolean: show start-up status of impute.missing.routine
101 | #' @param parallel.dates int: the number of cores available for parallel estimation
102 | #' @param return.models boolean: if TRUE then return list of models estimated each forecast.date
103 | #' @param return.data boolean: if True then return list of information.set for each forecast.date
104 | #'
105 | #' @return data.frame with a row for each forecast by model and forecasted date
106 | #'
107 | #' @examples
108 | #' \donttest{
109 | #' # simple time series
110 | #' A = c(1:100) + rnorm(100)
111 | #' date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
112 | #' Data = data.frame(date = date, A)
113 | #'
114 | #' # estiamte univariate forecasts
115 | #' forecast.uni =
116 | #' forecast_univariate(
117 | #' Data = Data,
118 | #' forecast.dates = tail(Data$date,5),
119 | #' method = c('naive','auto.arima', 'ets'),
120 | #' horizon = 1,
121 | #' recursive = FALSE,
122 | #' # information set
123 | #' rolling.window = NA,
124 | #' freq = 'month',
125 | #' # data prep
126 | #' outlier.clean = TRUE,
127 | #' impute.missing = TRUE)
128 | #' }
129 | #'
130 | #' @export
131 |
132 | forecast_univariate = function(
133 | Data, # data.frame: data frame of variable to forecast and a date column; may alternatively be a `ts`, `xts`, or `zoo` object to forecast
134 | forecast.dates, # date: dates forecasts are created
135 | methods, # string or vector: models to estimate forecasts with; currently supports all and only functions from the `forecast` package
136 | horizon, # int: number of periods to forecast
137 | recursive = TRUE, # boolean: use sequential one-step-ahead forecast if TRUE, use direct projections if FALSE
138 |
139 | # information set
140 | rolling.window = NA, # int: size of rolling window, NA if expanding window is used
141 | freq, # string: time series frequency; day, week, month, quarter, year
142 |
143 | # outlier cleaning
144 | outlier.clean = FALSE, # boolean: if TRUE then clean outliers
145 | outlier.variables = NULL, # string: vector of variables to standardize, default is all but 'date' column
146 | outlier.bounds = c(0.05, 0.95), # double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile)
147 | outlier.trim = FALSE, # boolean: if TRUE then replace outliers with NA instead of winsorizing bound
148 | outlier.cross_section = FALSE, # boolean: if TRUE then remove outliers based on cross-section (row-wise) instead of historical data (column-wise)
149 |
150 | # impute missing
151 | impute.missing = FALSE, # boolean: if TRUE then impute missing values
152 | impute.method = 'kalman', # string: select which method to use from the imputeTS package; 'interpolation', 'kalman', 'locf', 'ma', 'mean', 'random', 'remove','replace', 'seadec', 'seasplit'
153 | impute.variables = NULL, # string: vector of variables to impute missing values, default is all numeric columns
154 | impute.verbose = FALSE, # boolean: show start-up status of impute.missing.routine
155 |
156 | # parallel processing
157 | parallel.dates = NULL, # int: the number of cores available for parallel estimation
158 |
159 | # additional objects
160 | return.models = FALSE, # boolean: if TRUE then return list of models estimated each forecast.date
161 | return.data = FALSE # boolean: if True then return list of information.set for each forecast.date
162 |
163 | ){
164 |
165 | # convert from ts, xts, or zoo object
166 | if(xts::is.xts(Data) | zoo::is.zoo(Data) | stats::is.ts(Data)){
167 | Data = data.frame(date = zoo::index(Data), Data)
168 | }
169 |
170 | # training parameter creation and warnings
171 | if(exists("forecast_univariate.control_panel")){
172 | message('forecast_univariate.control_panel exists and will be used for model estimation in its present state.')
173 | }else{
174 | forecast_univariate.control_panel = instantiate.forecast_univariate.control_panel()
175 | message('forecast_univariate.control_panel was instantiated and default values will be used for model estimation.')
176 | }
177 |
178 | # create parallel back end
179 | if(!is.null(parallel.dates)){
180 | future::plan(strategy = 'multisession', workers = parallel.dates)
181 | }else{
182 | future::plan(strategy = 'sequential')
183 | }
184 |
185 | # create lists to store information
186 | list.models = list(); i = 1
187 | list.data = list(); j = 1
188 |
189 | # forecast routine
190 | forecasts = forecast.dates %>%
191 | furrr::future_map(
192 | .f = function(forecast.date){
193 |
194 | #---------------------------
195 | # Create information set
196 | #---------------------------
197 |
198 | # subset data
199 | information.set =
200 | data_subset(
201 | Data = Data,
202 | forecast.date = forecast.date,
203 | rolling.window = rolling.window,
204 | freq = freq
205 | )
206 |
207 | # clean outliers
208 | if(outlier.clean){
209 | information.set =
210 | data_outliers(
211 | Data = information.set,
212 | variables = outlier.variables,
213 | w.bounds = outlier.bounds,
214 | trim = outlier.trim,
215 | cross_section = outlier.cross_section
216 | )
217 | }
218 |
219 | # impute missing values
220 | if(impute.missing){
221 | information.set =
222 | data_impute(
223 | Data = information.set,
224 | variables = impute.variables,
225 | method = impute.method,
226 | verbose = impute.verbose
227 | )
228 | }
229 |
230 | # set ts object
231 | information.set = information.set %>%
232 | dplyr::select(-date) %>%
233 | as.ts()
234 |
235 | #---------------------------
236 | # Create forecasts
237 | #---------------------------
238 |
239 | results =
240 | methods %>% purrr::map(
241 | .f = function(engine){
242 |
243 | # make predictions
244 | # 1. using direct projections
245 | if(recursive == FALSE){
246 |
247 | # set data
248 | forecast_univariate.control_panel$arguments[[engine]]$y = information.set
249 |
250 | # estimate model
251 | model = do.call(what = forecast_univariate.control_panel$method[[engine]],
252 | args = forecast_univariate.control_panel$arguments[[engine]])
253 |
254 | # create forecasts
255 | predictions = forecast::forecast(model, h = horizon)
256 |
257 | # create standard errors
258 | calc.error = try(predictions$lower[1])
259 |
260 | if(is.numeric(calc.error) == TRUE){
261 | error = (predictions$upper[,1] - predictions$lower[,1]) /
262 | (2 * qnorm(.5 + predictions$level[1] / 200))
263 | error = as.numeric(error)
264 | }else{
265 | se = NA
266 | }
267 |
268 | predictions = data.frame(model = engine, forecast = predictions$mean, se = error)
269 |
270 | # 2. using recursive forecasts
271 | }else{
272 |
273 | predictions = list()
274 | forecast_univariate.control_panel$arguments[[engine]]$y = information.set
275 |
276 | for(i in 1:horizon){
277 |
278 | # estimate model
279 | model = do.call(what = forecast_univariate.control_panel$method[[engine]],
280 | args = forecast_univariate.control_panel$arguments[[engine]])
281 |
282 | # create forecast
283 | prediction = forecast::forecast(model, h = 1)
284 |
285 | # create standard errors
286 | calc.error = try(prediction$lower[1])
287 |
288 | if(is.numeric(calc.error) == TRUE){
289 | error = (prediction$upper[,1] - prediction$lower[,1]) /
290 | (2 * qnorm(.5 + prediction$level[1] / 200))
291 | error = as.numeric(error)
292 | }else{
293 | error = NA
294 | }
295 |
296 | predictions[[i]] = data.frame(model = engine, forecast = prediction$mean, se = error)
297 |
298 | # update information set
299 | information.set = rbind(information.set, prediction$mean[1]) %>% as.ts()
300 | forecast_univariate.control_panel$arguments[[engine]]$y = information.set
301 |
302 | }
303 |
304 | # collapse results
305 | predictions = purrr::reduce(predictions, dplyr::bind_rows) %>% data.frame()
306 |
307 | }
308 |
309 | # add forecast dates
310 | predictions$forecast.date = forecast.date
311 | predictions$date = seq.Date(from = forecast.date, by = freq, length.out = horizon+1)[2:(horizon+1)]
312 |
313 | # return results
314 | return(
315 | list(
316 | predictions = predictions,
317 | model = model
318 | )
319 | )
320 | }
321 | )
322 |
323 | predictions =
324 | purrr::map(results, .f = function(X){return(X$predictions)}) %>%
325 | purrr::reduce(dplyr::bind_rows)
326 |
327 | models =
328 | purrr::map(results, .f = function(X){return(X$model)})
329 |
330 | # store objects for return
331 | results =
332 | list(
333 | predictions = predictions,
334 | information.set = information.set,
335 | models = models
336 | )
337 |
338 | # return results
339 | return(results)
340 |
341 | }
342 | )
343 |
344 | # prepare forecasts
345 | predictions =
346 | purrr::map(forecasts, .f = function(X){return(X$predictions)}) %>%
347 | purrr::reduce(dplyr::bind_rows)
348 |
349 | # add model and information set lists to return object
350 | if(return.data == TRUE | return.models == TRUE){
351 | information = list(forecasts = predictions)
352 | }else{
353 | information = predictions
354 | }
355 |
356 | # prepare models
357 | if(return.models == TRUE){
358 | models = purrr::map(forecasts, .f = function(X){return(X$models)})
359 | names(models) = forecast.dates
360 | information[['models']] = models
361 | }
362 |
363 | # prepare information set
364 | if(return.data == TRUE){
365 | information.set = purrr::map(forecasts, .f = function(X){return(X$information.set)})
366 | names(information.set) = forecast.dates
367 | information[['information.set']] = information.set
368 | }
369 |
370 | # return results
371 | return(information)
372 | }
373 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Out-of-sample time series forecasting
2 |
3 |
4 | [](http://www.gnu.org/licenses/gpl-3.0)
5 | [](https://CRAN.R-project.org/package=OOS)
6 | [](https://lifecycle.r-lib.org/articles/stages.html)
7 | [](https://codecov.io/gh/tylerJPike/OOS)
8 | [](https://travis-ci.org/tylerJPike/OOS)
9 |
10 |
11 | Out-of-Sample time series forecasting is a common, important, and subtle task. The OOS package introduces a comprehensive and cohesive API for the out-of-sample forecasting workflow: data preparation, forecasting - including both traditional econometric time series models and modern machine learning techniques - forecast combination, model and error analysis, and forecast visualization.
12 |
13 | The key difference between OOS and the other time series forecasting packages is that it operates out-of-sample by construction. That is, it re-cleans data and re-trains models each forecast.date and is careful not to introduce look-ahead bias into its information set via data cleaning or forecasts via model training. Other packages tend to fit the model once, leaving the user to construct the out-of-sample data cleaning and forecast exercise on their own.
14 |
15 | See the OOS package [website](https://tylerjpike.github.io/OOS/) for examples and documentation.
16 |
17 | ---
18 | ## Workflow and available Tools
19 |
20 | ### 1. Prepare Data
21 |
22 | | Clean Outliers | Impute Missing Observations (via [imputeTS](https://github.com/SteffenMoritz/imputeTS)) | Dimension Reduction |
23 | |----------------------|------------------------|-----------------------|
24 | | Winsorize | Linear Interpolation | Principal Components |
25 | | Trim | Kalman Filter | |
26 | | | Fill-Forward | |
27 | | | Average | |
28 | | | Moving Average | |
29 | | | Seasonal Decomposition | |
30 |
31 |
32 | ### 2. Forecast
33 |
34 | | Univariate Forecasts (via [forecast](https://github.com/robjhyndman/forecast)) | Multivariate Forecasts (via [caret](https://github.com/topepo/caret)) | Forecast Combinations |
35 | |----------------------|------------------------|-----------------------|
36 | | Random Walk | Vector Autoregression | Mean|
37 | | ARIMA | Linear Regression | Median |
38 | | ETS | LASSO Regression | Trimmed (Winsorized) Mean |
39 | | Spline | Ridge Regression | N-Best |
40 | | Theta Method | Elastic Net | Linear Regression |
41 | | TBATS | Principal Component Regression | LASSO Regression |
42 | | STL | Partial Least Squares Regression | Ridge Regression |
43 | | AR Perceptron | Random Forest | Partial Egalitarian LASSO |
44 | | | Tree-Based Gradient Boosting Machine | Principal Component Regression |
45 | | | Single Layered Neural Network | Partial Least Squares Regression |
46 | | | | Random Forest |
47 | | | | Tree-Based Gradient Boosting Machine |
48 | | | | Single Layered Neural Network |
49 |
50 |
51 | ### 3. Analyze
52 |
53 | | Accuracy | Compare | Visualize |
54 | |----------------------|------------------------|-----------------------|
55 | | Mean Square Error (MSE) | Forecast Error Ratios | Forecasts |
56 | | Root Mean Square Error (RMSE) | Diebold-Mariano Test (for unnested models) | Errors |
57 | | Mean Absolute Error (MAE) | Clark and West Test (for nested models) | |
58 | | Mean Absolute Percentage Error (MAPE) | | |
59 |
60 | ---
61 |
62 | ## Model estimation flexibility and accessibility
63 |
64 | Users may edit any model training routine through accessing a list of function arguments. For machine learning techniques, this entails editing [caret](https://github.com/topepo/caret) arguments including: tuning grid, control grid, method, and accuracy metric. For univariate time series forecasting, this entails passing arguments to [forecast](https://github.com/robjhyndman/forecast) package model functions. For imputing missing variables, this entails passing arguments to [imputeTS](https://github.com/SteffenMoritz/imputeTS) package functions.
65 |
66 | A brief example using an `Arima` model to forecast univariate time series:
67 |
68 | # 1. create the central list of univariate model training arguments, univariate.forecast.training
69 | forecast_univariate.control_panel = instantiate.forecast_univariate.control_panel()
70 |
71 | # 2. select an item to edit, for example the Arima order to create an ARMA(1,1)
72 | # view default model arguments (there are none)
73 | forecast_univariate.control_panel$arguments[['Arima']]
74 | # add our own function arguments
75 | forecast_univariate.control_panel$arguments[['Arima']]$order = c(1,0,1)
76 |
77 | A brief example using the `Random Forest` to combine forecasts:
78 |
79 | # 1. create the central list of ML training arguments
80 | forecast_combinations.control_panel = instantiate.forecast_combinations.control_panel()
81 |
82 | # 2. select an item to edit, for example the random forest tuning grid
83 | # view default tuning grid
84 | forecast_combinations.control_panel$tuning.grids[['RF']]
85 | # edit tuning grid
86 | forecast_combinations.control_panel$tuning.grids[['RF']] = expand.grid(mtry = c(1:6))
87 | ---
88 | ## Basic workflow
89 | #----------------------------------------
90 | ### Forecasting Example
91 | #----------------------------------------
92 | # pull and prepare data from FRED
93 | quantmod::getSymbols.FRED(
94 | c('UNRATE','INDPRO','GS10'),
95 | env = globalenv())
96 | Data = cbind(UNRATE, INDPRO, GS10)
97 | Data = data.frame(Data, date = zoo::index(Data)) %>%
98 | dplyr::filter(lubridate::year(date) >= 1990)
99 |
100 | # run univariate forecasts
101 | forecast.uni =
102 | forecast_univariate(
103 | Data = dplyr::select(Data, date, UNRATE),
104 | forecast.dates = tail(Data$date,15),
105 | method = c('naive','auto.arima', 'ets'),
106 | horizon = 1,
107 | recursive = FALSE,
108 |
109 | # information set
110 | rolling.window = NA,
111 | freq = 'month',
112 |
113 | # outlier cleaning
114 | outlier.clean = FALSE,
115 | outlier.variables = NULL,
116 | outlier.bounds = c(0.05, 0.95),
117 | outlier.trim = FALSE,
118 | outlier.cross_section = FALSE,
119 |
120 | # impute missing
121 | impute.missing = FALSE,
122 | impute.method = 'kalman',
123 | impute.variables = NULL,
124 | impute.verbose = FALSE)
125 |
126 | # create multivariate forecasts
127 | forecast.multi =
128 | forecast_multivariate(
129 | Data = Data,
130 | forecast.date = tail(Data$date,15),
131 | target = 'UNRATE',
132 | horizon = 1,
133 | method = c('ols','lasso','ridge','elastic','GBM'),
134 |
135 | # information set
136 | rolling.window = NA,
137 | freq = 'month',
138 |
139 | # outlier cleaning
140 | outlier.clean = FALSE,
141 | outlier.variables = NULL,
142 | outlier.bounds = c(0.05, 0.95),
143 | outlier.trim = FALSE,
144 | outlier.cross_section = FALSE,
145 |
146 | # impute missing
147 | impute.missing = FALSE,
148 | impute.method = 'kalman',
149 | impute.variables = NULL,
150 | impute.verbose = FALSE,
151 |
152 | # dimension reduction
153 | reduce.data = FALSE,
154 | reduce.variables = NULL,
155 | reduce.ncomp = NULL,
156 | reduce.standardize = TRUE)
157 |
158 | # combine forecasts and add in observed values
159 | forecasts =
160 | dplyr::bind_rows(
161 | forecast.uni,
162 | forecast.multi) %>%
163 | dplyr::left_join(
164 | dplyr::select(Data, date, observed = UNRATE))
165 |
166 | # forecast combinations
167 | forecast.combo =
168 | forecast_combine(
169 | forecasts,
170 | method = c('uniform','median','trimmed.mean',
171 | 'n.best','lasso','peLasso','RF'),
172 | burn.in = 5,
173 | n.max = 2)
174 |
175 | # merge forecast combinations back into forecasts
176 | forecasts =
177 | forecasts %>%
178 | dplyr::bind_rows(forecast.combo)
179 |
180 | # calculate forecast errors
181 | forecast.error = forecast_accuracy(forecasts)
182 |
183 | # view forecast errors from least to greatest
184 | # (best forecast to worst forecast method)
185 | forecast.error %>%
186 | dplyr::mutate_at(vars(-model), round, 3) %>%
187 | dplyr::arrange(MSE)
188 |
189 | # compare forecasts to the baseline (a random walk)
190 | forecast_comparison(
191 | forecasts,
192 | baseline.forecast = 'naive',
193 | test = 'ER',
194 | loss = 'MSE') %>%
195 | arrange(error.ratio)
196 |
197 | # chart forecasts
198 | chart =
199 | chart_forecast(
200 | forecasts,
201 | Title = 'US Unemployment Rate',
202 | Ylab = 'Index',
203 | Freq = 'Monthly')
204 |
205 | chart
206 |
207 | ---
208 | ## Contact
209 | If you should have questions, concerns, or wish to collaborate, please contact [Tyler J. Pike](https://tylerjpike.github.io/)
210 |
--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | comment: false
2 |
3 | coverage:
4 | status:
5 | project:
6 | default:
7 | target: auto
8 | threshold: 1%
9 | informational: true
10 | patch:
11 | default:
12 | target: auto
13 | threshold: 1%
14 | informational: true
15 |
--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
1 | ## Resubmission
2 | This is a resubmission. In this version I have:
3 | * Changed \dontrun to \donttest in function documentation examples
4 | * Added function documentation examples to all primary user-facing functions
5 | * Added \value to pipe.Rd
6 |
7 | ## Test environments
8 | * local Windows install, R 4.0.3
9 | * win-builder (devel and release)
10 | * Ubuntu 16.04.6 (on travis-ci), R 4.0.2
11 | * R-hub Ubuntu Linux 20.04.1 LTS, R-release
12 | * R-hub Fedora Linux, R-devel
13 |
14 | ## R CMD check results
15 | There were no ERRORs or WARNINGs.
16 |
17 | There is one NOTE since this is a new package submission.
18 |
19 | ## Downstream dependencies
20 | There are currently no downstream dependencies for this package.
--------------------------------------------------------------------------------
/man/NBest.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/forecast_combinations.R
3 | \name{NBest}
4 | \alias{NBest}
5 | \title{Select N-best forecasts}
6 | \usage{
7 | NBest(forecasts, n.max, window = NA)
8 | }
9 | \arguments{
10 | \item{forecasts}{data.frame: a data frame of forecasts to combine, assumes one column named "observed"}
11 |
12 | \item{n.max}{int: maximum number of forecasts to select}
13 |
14 | \item{window}{int: size of rolling window to evaluate forecast error over, use entire period if NA}
15 | }
16 | \value{
17 | data.frame with n columns of the historically best forecasts
18 | }
19 | \description{
20 | A function to subset the n-best forecasts;
21 | assumes column named observed.
22 | }
23 |
--------------------------------------------------------------------------------
/man/chart_forecast.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/forecast_chart.R
3 | \name{chart_forecast}
4 | \alias{chart_forecast}
5 | \title{Chart forecasts}
6 | \usage{
7 | chart_forecast(Data, Title, Ylab, Freq, zeroline = FALSE)
8 | }
9 | \arguments{
10 | \item{Data}{data.frame: oos.forecast object}
11 |
12 | \item{Title}{string: chart title}
13 |
14 | \item{Ylab}{string: y-axis label}
15 |
16 | \item{Freq}{string: frequency (acts as sub-title)}
17 |
18 | \item{zeroline}{boolean: if TRUE then add a horizontal line at zero}
19 | }
20 | \value{
21 | ggplot2 chart
22 | }
23 | \description{
24 | Chart forecasts
25 | }
26 | \examples{
27 | \donttest{
28 |
29 | # simple time series
30 | A = c(1:100) + rnorm(100)
31 | date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
32 | Data = data.frame(date = date, A)
33 |
34 | # run forecast_univariate
35 | forecast.uni =
36 | forecast_univariate(
37 | Data = Data,
38 | forecast.dates = tail(Data$date,10),
39 | method = c('naive','auto.arima', 'ets'),
40 | horizon = 1,
41 | recursive = FALSE,
42 | freq = 'month')
43 |
44 | forecasts =
45 | dplyr::left_join(
46 | forecast.uni,
47 | data.frame(date, observed = A),
48 | by = 'date'
49 | )
50 |
51 | # chart forecasts
52 | chart.forecast =
53 | chart_forecast(
54 | forecasts,
55 | Title = 'test',
56 | Ylab = 'Index',
57 | Freq = 'Monthly',
58 | zeroline = TRUE)
59 |
60 | }
61 |
62 | }
63 |
--------------------------------------------------------------------------------
/man/chart_forecast_error.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/forecast_chart.R
3 | \name{chart_forecast_error}
4 | \alias{chart_forecast_error}
5 | \title{Chart forecast errors}
6 | \usage{
7 | chart_forecast_error(Data, Title, Ylab, Freq, zeroline = FALSE)
8 | }
9 | \arguments{
10 | \item{Data}{data.frame: oos.forecast object}
11 |
12 | \item{Title}{string: chart title}
13 |
14 | \item{Ylab}{string: y-axis label}
15 |
16 | \item{Freq}{string: frequency (acts as sub-title)}
17 |
18 | \item{zeroline}{boolean: if TRUE then add a horizontal line at zero}
19 | }
20 | \value{
21 | ggplot2 chart
22 | }
23 | \description{
24 | Chart forecast errors
25 | }
26 | \examples{
27 | \donttest{
28 |
29 | # simple time series
30 | A = c(1:100) + rnorm(100)
31 | date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
32 | Data = data.frame(date = date, A)
33 |
34 | # run forecast_univariate
35 | forecast.uni =
36 | forecast_univariate(
37 | Data = Data,
38 | forecast.dates = tail(Data$date,10),
39 | method = c('naive','auto.arima', 'ets'),
40 | horizon = 1,
41 | recursive = FALSE,
42 | freq = 'month')
43 |
44 | forecasts =
45 | dplyr::left_join(
46 | forecast.uni,
47 | data.frame(date, observed = A),
48 | by = 'date'
49 | )
50 |
51 | # chart forecast errors
52 | chart.errors =
53 | chart_forecast_error(
54 | forecasts,
55 | Title = 'test',
56 | Ylab = 'Index',
57 | Freq = 'Monthly',
58 | zeroline = TRUE)
59 |
60 | }
61 |
62 | }
63 |
--------------------------------------------------------------------------------
/man/data_impute.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data_preparation.R
3 | \name{data_impute}
4 | \alias{data_impute}
5 | \title{Impute missing values}
6 | \usage{
7 | data_impute(Data, method = "kalman", variables = NULL, verbose = FALSE)
8 | }
9 | \arguments{
10 | \item{Data}{data.frame: data frame of target variable, exogenous variables, and observed date (named 'date')}
11 |
12 | \item{method}{string: select which method to use from the imputeTS package; 'interpolation', 'kalman', 'locf', 'ma', 'mean', 'random', 'remove','replace', 'seadec', 'seasplit'}
13 |
14 | \item{variables}{string: vector of variables to standardize, default is all but 'date' column}
15 |
16 | \item{verbose}{boolean: show start-up status of impute.missing.routine}
17 | }
18 | \value{
19 | data.frame with missing data imputed
20 | }
21 | \description{
22 | A function to impute missing values. Is used as a data preparation helper function and is called internally
23 | by forecast_univariate, forecast_multivariate, and forecast_combine.
24 | }
25 |
--------------------------------------------------------------------------------
/man/data_outliers.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data_preparation.R
3 | \name{data_outliers}
4 | \alias{data_outliers}
5 | \title{Clean outliers}
6 | \usage{
7 | data_outliers(
8 | Data,
9 | variables = NULL,
10 | w.bounds = c(0.05, 0.95),
11 | trim = FALSE,
12 | cross_section = FALSE
13 | )
14 | }
15 | \arguments{
16 | \item{Data}{data.frame: data frame of target variable, exogenous variables, and observed date (named 'date')}
17 |
18 | \item{variables}{string: vector of variables to standardize, default is all but 'date' column}
19 |
20 | \item{w.bounds}{double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile)}
21 |
22 | \item{trim}{boolean: if TRUE then replace outliers with NA instead of winsorizing bound}
23 |
24 | \item{cross_section}{boolean: if TRUE then remove outliers based on cross-section (row-wise) instead of historical data (column-wise)}
25 | }
26 | \value{
27 | data.frame with a date column and one column per forecast method selected
28 | }
29 | \description{
30 | A function to clean outliers. Is used as a data preparation helper function and is called internally
31 | by forecast_univariate, forecast_multivariate, and forecast_combine.
32 | }
33 |
--------------------------------------------------------------------------------
/man/data_reduction.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data_preparation.R
3 | \name{data_reduction}
4 | \alias{data_reduction}
5 | \title{Dimension reduction via principal components}
6 | \usage{
7 | data_reduction(Data, variables = NULL, ncomp, standardize = TRUE)
8 | }
9 | \arguments{
10 | \item{Data}{data.frame: data frame of target variable, exogenous variables, and observed date (named 'date')}
11 |
12 | \item{variables}{string: vector of variables to standardize, default is all but 'date' column}
13 |
14 | \item{ncomp}{int: number of factors to create}
15 |
16 | \item{standardize}{boolean: normalize variables (mean zero, variance one) before estimating factors}
17 | }
18 | \value{
19 | data.frame with a date column and one column per forecast method selected
20 | }
21 | \description{
22 | A function to estimate principal components.
23 | }
24 |
--------------------------------------------------------------------------------
/man/data_subset.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data_preparation.R
3 | \name{data_subset}
4 | \alias{data_subset}
5 | \title{Create information set}
6 | \usage{
7 | data_subset(Data, forecast.date, rolling.window, freq)
8 | }
9 | \arguments{
10 | \item{Data}{data.frame: data frame of target variable, exogenous variables, and observed date (named 'date')}
11 |
12 | \item{forecast.date}{date: upper bound of information set}
13 |
14 | \item{rolling.window}{int: size of rolling window, NA if expanding window is used}
15 |
16 | \item{freq}{string: time series frequency; day, week, month, quarter, year; only needed for rolling window factors}
17 | }
18 | \value{
19 | data.frame bounded by the given date range
20 | }
21 | \description{
22 | A function to subset data recursively or with a rolling window to create a valid information set. Is used as a data preparation
23 | helper function and is called internally by forecast_univariate, forecast_multivariate, and forecast_combine.
24 | }
25 |
--------------------------------------------------------------------------------
/man/forecast_accuracy.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/forecast_metrics.R
3 | \name{forecast_accuracy}
4 | \alias{forecast_accuracy}
5 | \title{Calculate forecast accuracy}
6 | \usage{
7 | forecast_accuracy(Data)
8 | }
9 | \arguments{
10 | \item{Data}{data.frame: data frame of forecasts, model names, and dates}
11 | }
12 | \value{
13 | data.frame of numeric error results
14 | }
15 | \description{
16 | A function to calculate various loss functions, including
17 | MSE, RMSE, MAE, and MAPE.
18 | }
19 | \examples{
20 | \donttest{
21 |
22 | # simple time series
23 | A = c(1:100) + rnorm(100)
24 | date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
25 | Data = data.frame(date = date, A)
26 |
27 | # run forecast_univariate
28 | forecast.uni =
29 | forecast_univariate(
30 | Data = Data,
31 | forecast.dates = tail(Data$date,10),
32 | method = c('naive','auto.arima', 'ets'),
33 | horizon = 1,
34 | recursive = FALSE,
35 | freq = 'month')
36 |
37 | forecasts =
38 | dplyr::left_join(
39 | forecast.uni,
40 | data.frame(date, observed = A),
41 | by = 'date'
42 | )
43 |
44 | # forecast accuracy
45 | forecast.accuracy = forecast_accuracy(forecasts)
46 |
47 | }
48 |
49 | }
50 |
--------------------------------------------------------------------------------
/man/forecast_combine.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/forecast_combinations.R
3 | \name{forecast_combine}
4 | \alias{forecast_combine}
5 | \title{Forecast with forecast combinations}
6 | \usage{
7 | forecast_combine(
8 | Data,
9 | method = "unform",
10 | n.max = NULL,
11 | rolling.window = NA,
12 | trim = c(0.5, 0.95),
13 | burn.in = 1,
14 | parallel.dates = NULL
15 | )
16 | }
17 | \arguments{
18 | \item{Data}{data.frame: data frame of forecasted values to combine, assumes 'date' and 'observed' columns, but `observed' is not necessary for all methods}
19 |
20 | \item{method}{string: the method to use; 'uniform', 'median', 'trimmed.mean', 'n.best', 'peLasso', 'lasso', 'ridge', 'elastic', 'RF', 'GBM', 'NN'}
21 |
22 | \item{n.max}{int: maximum number of forecasts to select in n.best method}
23 |
24 | \item{rolling.window}{int: size of rolling window to evaluate forecast error over, use entire period if NA}
25 |
26 | \item{trim}{numeric: a two element vector with the winsorizing bounds for the trimmed mean method; c(min, max)}
27 |
28 | \item{burn.in}{int: the number of periods to use in the first model estimation}
29 |
30 | \item{parallel.dates}{int: the number of cores available for parallel estimation}
31 | }
32 | \value{
33 | data.frame with a row for each combination method and forecasted date
34 | }
35 | \description{
36 | A function to combine forecasts out-of-sample. Methods available include:
37 | uniform weights, median forecast, trimmed (winsorized) mean, n-best,
38 | ridge regression, lasso regression, elastic net, peLASSO,
39 | random forest, tree-based gradient boosting machine, and single-layer neural network.
40 | See package website for most up-to-date list of available models.
41 | }
42 | \examples{
43 | \donttest{
44 | # simple time series
45 | A = c(1:100) + rnorm(100)
46 | B = c(1:100) + rnorm(100)
47 | C = c(1:100) + rnorm(100)
48 | date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
49 | Data = data.frame(date = date, A, B, C)
50 |
51 | # run forecast_univariate
52 | forecast.multi =
53 | forecast_multivariate(
54 | Data = Data,
55 | target = 'A',
56 | forecast.dates = tail(Data$date,5),
57 | method = c('ols','var'),
58 | horizon = 1,
59 | freq = 'month')
60 | # include observed valuesd
61 | forecasts =
62 | dplyr::left_join(
63 | forecast.multi,
64 | data.frame(date, observed = A),
65 | by = 'date'
66 | )
67 |
68 | # combine forecasts
69 | combinations =
70 | forecast_combine(
71 | forecasts,
72 | method = c('uniform','median','trimmed.mean',
73 | 'n.best','lasso','peLasso'),
74 | burn.in = 5,
75 | n.max = 2)
76 | }
77 |
78 |
79 | }
80 |
--------------------------------------------------------------------------------
/man/forecast_comparison.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/forecast_metrics.R
3 | \name{forecast_comparison}
4 | \alias{forecast_comparison}
5 | \title{Compare forecast accuracy}
6 | \usage{
7 | forecast_comparison(
8 | Data,
9 | baseline.forecast,
10 | test = "ER",
11 | loss = "MSE",
12 | horizon = NULL
13 | )
14 | }
15 | \arguments{
16 | \item{Data}{data.frame: data frame of forecasts, model names, and dates}
17 |
18 | \item{baseline.forecast}{string: column name of baseline (null hypothesis) forecasts}
19 |
20 | \item{test}{string: which test to use; ER = error ratio, DM = Diebold-Mariano, CM = Clark and West}
21 |
22 | \item{loss}{string: error loss function to use if creating forecast error ratio}
23 |
24 | \item{horizon}{int: horizon of forecasts being compared in DM and CW tests}
25 | }
26 | \value{
27 | numeric test result
28 | }
29 | \description{
30 | A function to compare forecasts. Options include: simple forecast error ratios,
31 | \href{https://www.sas.upenn.edu/~fdiebold/papers/paper68/pa.dm.pdf}{Diebold-Mariano test}, and \href{https://www.nber.org/papers/t0326}{Clark and West test} for nested models
32 | }
33 | \examples{
34 | \donttest{
35 |
36 | # simple time series
37 | A = c(1:100) + rnorm(100)
38 | date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
39 | Data = data.frame(date = date, A)
40 |
41 | # run forecast_univariate
42 | forecast.uni =
43 | forecast_univariate(
44 | Data = Data,
45 | forecast.dates = tail(Data$date,10),
46 | method = c('naive','auto.arima', 'ets'),
47 | horizon = 1,
48 | recursive = FALSE,
49 | freq = 'month')
50 |
51 | forecasts =
52 | dplyr::left_join(
53 | forecast.uni,
54 | data.frame(date, observed = A),
55 | by = 'date'
56 | )
57 |
58 | # run ER (MSE)
59 | er.ratio.mse =
60 | forecast_comparison(
61 | forecasts,
62 | baseline.forecast = 'naive',
63 | test = 'ER',
64 | loss = 'MSE')
65 | }
66 |
67 | }
68 |
--------------------------------------------------------------------------------
/man/forecast_date.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data_preparation.R
3 | \name{forecast_date}
4 | \alias{forecast_date}
5 | \title{Set forecasted date}
6 | \usage{
7 | forecast_date(forecast.date, horizon, freq)
8 | }
9 | \arguments{
10 | \item{forecast.date}{date: date forecast was made}
11 |
12 | \item{horizon}{int: periods ahead of forecast}
13 |
14 | \item{freq}{string: time series frequency; day, week, month, quarter, year; only needed for rolling window factors}
15 | }
16 | \value{
17 | date vector
18 | }
19 | \description{
20 | A function to subset data recursively or with a rolling window to create a valid information set. Is used as a data preparation
21 | helper function and is called internally by forecast_univariate, forecast_multivariate, and forecast_combine.
22 | }
23 |
--------------------------------------------------------------------------------
/man/forecast_multivariate.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/forecast_multivariate.R
3 | \name{forecast_multivariate}
4 | \alias{forecast_multivariate}
5 | \title{Forecast with multivariate models}
6 | \usage{
7 | forecast_multivariate(
8 | Data,
9 | forecast.dates,
10 | target,
11 | horizon,
12 | method,
13 | rolling.window = NA,
14 | freq,
15 | lag.variables = NULL,
16 | lag.n = NULL,
17 | outlier.clean = FALSE,
18 | outlier.variables = NULL,
19 | outlier.bounds = c(0.05, 0.95),
20 | outlier.trim = FALSE,
21 | outlier.cross_section = FALSE,
22 | impute.missing = FALSE,
23 | impute.method = "kalman",
24 | impute.variables = NULL,
25 | impute.verbose = FALSE,
26 | reduce.data = FALSE,
27 | reduce.variables = NULL,
28 | reduce.ncomp = NULL,
29 | reduce.standardize = TRUE,
30 | parallel.dates = NULL,
31 | return.models = FALSE,
32 | return.data = FALSE
33 | )
34 | }
35 | \arguments{
36 | \item{Data}{data.frame: data frame of target variable, exogenous variables, and observed date (named 'date'); may alternatively be a \code{ts}, \code{xts}, or \code{zoo} object to forecast}
37 |
38 | \item{forecast.dates}{date: dates forecasts are created}
39 |
40 | \item{target}{string: column name in Data of variable to forecast}
41 |
42 | \item{horizon}{int: number of periods into the future to forecast}
43 |
44 | \item{method}{string: methods to use}
45 |
46 | \item{rolling.window}{int: size of rolling window, NA if expanding window is used}
47 |
48 | \item{freq}{string: time series frequency; day, week, month, quarter, year}
49 |
50 | \item{lag.variables}{string: vector of variables to lag each time step, if lag.n is not null then the default is all non-date variables}
51 |
52 | \item{lag.n}{int: number of lags to create}
53 |
54 | \item{outlier.clean}{boolean: if TRUE then clean outliers}
55 |
56 | \item{outlier.variables}{string: vector of variables to purge of outlier, default is all but 'date' column}
57 |
58 | \item{outlier.bounds}{double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile)}
59 |
60 | \item{outlier.trim}{boolean: if TRUE then replace outliers with NA instead of winsorizing bound}
61 |
62 | \item{outlier.cross_section}{boolean: if TRUE then remove outliers based on cross-section (row-wise) instead of historical data (column-wise)}
63 |
64 | \item{impute.missing}{boolean: if TRUE then impute missing values}
65 |
66 | \item{impute.method}{string: select which method to use from the imputeTS package; 'interpolation', 'kalman', 'locf', 'ma', 'mean', 'random', 'remove','replace', 'seadec', 'seasplit'}
67 |
68 | \item{impute.variables}{string: vector of variables to impute missing values, default is all numeric columns}
69 |
70 | \item{impute.verbose}{boolean: show start-up status of impute.missing.routine}
71 |
72 | \item{reduce.data}{boolean: if TRUE then reduce dimension}
73 |
74 | \item{reduce.variables}{string: vector of variables to impute missing values, default is all numeric columns}
75 |
76 | \item{reduce.ncomp}{int: number of factors to create}
77 |
78 | \item{reduce.standardize}{boolean: normalize variables (mean zero, variance one) before estimating factors}
79 |
80 | \item{parallel.dates}{int: the number of cores available for parallel estimation}
81 |
82 | \item{return.models}{boolean: if TRUE then return list of models estimated each forecast.date}
83 |
84 | \item{return.data}{boolean: if True then return list of information.set for each forecast.date}
85 | }
86 | \value{
87 | data.frame with a row for each forecast by model and forecasted date
88 | }
89 | \description{
90 | A function to estimate multivariate forecasts out-of-sample. Methods available include:
91 | vector auto-regression, linear regression, lasso regression, ridge regression, elastic net,
92 | random forest, tree-based gradient boosting machine, and single-layer neural network.
93 | See package website for most up-to-date list of available models.
94 | }
95 | \examples{
96 | \donttest{
97 | # simple time series
98 | A = c(1:100) + rnorm(100)
99 | B = c(1:100) + rnorm(100)
100 | C = c(1:100) + rnorm(100)
101 | date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
102 | Data = data.frame(date = date, A, B, C)
103 |
104 | # run forecast_univariate
105 | forecast.multi =
106 | forecast_multivariate(
107 | Data = Data,
108 | target = 'A',
109 | forecast.dates = tail(Data$date,5),
110 | method = c('ols','var'),
111 | horizon = 1,
112 | # information set
113 | rolling.window = NA,
114 | freq = 'month',
115 | # data prep
116 | lag.n = 4,
117 | outlier.clean = TRUE,
118 | impute.missing = TRUE)
119 | }
120 |
121 |
122 | }
123 |
--------------------------------------------------------------------------------
/man/forecast_univariate.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/forecast_univariate.R
3 | \name{forecast_univariate}
4 | \alias{forecast_univariate}
5 | \title{Forecast with univariate models}
6 | \usage{
7 | forecast_univariate(
8 | Data,
9 | forecast.dates,
10 | methods,
11 | horizon,
12 | recursive = TRUE,
13 | rolling.window = NA,
14 | freq,
15 | outlier.clean = FALSE,
16 | outlier.variables = NULL,
17 | outlier.bounds = c(0.05, 0.95),
18 | outlier.trim = FALSE,
19 | outlier.cross_section = FALSE,
20 | impute.missing = FALSE,
21 | impute.method = "kalman",
22 | impute.variables = NULL,
23 | impute.verbose = FALSE,
24 | parallel.dates = NULL,
25 | return.models = FALSE,
26 | return.data = FALSE
27 | )
28 | }
29 | \arguments{
30 | \item{Data}{data.frame: data frame of variable to forecast and a date column; may alternatively be a \code{ts}, \code{xts}, or \code{zoo} object to forecast}
31 |
32 | \item{forecast.dates}{date: dates forecasts are created}
33 |
34 | \item{methods}{string: models to estimate forecasts}
35 |
36 | \item{horizon}{int: number of periods to forecast}
37 |
38 | \item{recursive}{boolean: use sequential one-step-ahead forecast if TRUE, use direct projections if FALSE}
39 |
40 | \item{rolling.window}{int: size of rolling window, NA if expanding window is used}
41 |
42 | \item{freq}{string: time series frequency; day, week, month, quarter, year}
43 |
44 | \item{outlier.clean}{boolean: if TRUE then clean outliers}
45 |
46 | \item{outlier.variables}{string: vector of variables to purge of outliers, default is all but 'date' column}
47 |
48 | \item{outlier.bounds}{double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile)}
49 |
50 | \item{outlier.trim}{boolean: if TRUE then replace outliers with NA instead of winsorizing bound}
51 |
52 | \item{outlier.cross_section}{boolean: if TRUE then remove outliers based on cross-section (row-wise) instead of historical data (column-wise)}
53 |
54 | \item{impute.missing}{boolean: if TRUE then impute missing values}
55 |
56 | \item{impute.method}{string: select which method to use from the imputeTS package; 'interpolation', 'kalman', 'locf', 'ma', 'mean', 'random', 'remove','replace', 'seadec', 'seasplit'}
57 |
58 | \item{impute.variables}{string: vector of variables to impute missing values, default is all numeric columns}
59 |
60 | \item{impute.verbose}{boolean: show start-up status of impute.missing.routine}
61 |
62 | \item{parallel.dates}{int: the number of cores available for parallel estimation}
63 |
64 | \item{return.models}{boolean: if TRUE then return list of models estimated each forecast.date}
65 |
66 | \item{return.data}{boolean: if True then return list of information.set for each forecast.date}
67 | }
68 | \value{
69 | data.frame with a row for each forecast by model and forecasted date
70 | }
71 | \description{
72 | A function to estimate univariate forecasts out-of-sample. Methods available include all forecast
73 | methods from the \code{forecast} package. See package website for most up-to-date list of available models.
74 | }
75 | \examples{
76 | \donttest{
77 | # simple time series
78 | A = c(1:100) + rnorm(100)
79 | date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
80 | Data = data.frame(date = date, A)
81 |
82 | # estiamte univariate forecasts
83 | forecast.uni =
84 | forecast_univariate(
85 | Data = Data,
86 | forecast.dates = tail(Data$date,5),
87 | method = c('naive','auto.arima', 'ets'),
88 | horizon = 1,
89 | recursive = FALSE,
90 | # information set
91 | rolling.window = NA,
92 | freq = 'month',
93 | # data prep
94 | outlier.clean = TRUE,
95 | impute.missing = TRUE)
96 | }
97 |
98 | }
99 |
--------------------------------------------------------------------------------
/man/instantiate.data_impute.control_panel.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data_preparation.R
3 | \name{instantiate.data_impute.control_panel}
4 | \alias{instantiate.data_impute.control_panel}
5 | \title{Create interface to control \code{data_impute} model estimation}
6 | \usage{
7 | instantiate.data_impute.control_panel()
8 | }
9 | \value{
10 | data_impute.control_panel
11 | }
12 | \description{
13 | A function to create the data imputation method
14 | arguments list for user manipulation.
15 | }
16 |
--------------------------------------------------------------------------------
/man/instantiate.forecast_combinations.control_panel.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/forecast_combinations.R
3 | \name{instantiate.forecast_combinations.control_panel}
4 | \alias{instantiate.forecast_combinations.control_panel}
5 | \title{Create interface to control \code{forecast_combine} model estimation}
6 | \usage{
7 | instantiate.forecast_combinations.control_panel(covariates = NULL)
8 | }
9 | \arguments{
10 | \item{covariates}{int: the number of features that will go into the model}
11 | }
12 | \value{
13 | forecast_combinations.control_panel
14 | }
15 | \description{
16 | A function to create the forecast combination technique arguments list
17 | for user manipulation.
18 | }
19 |
--------------------------------------------------------------------------------
/man/instantiate.forecast_multivariate.ml.control_panel.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/forecast_multivariate.R
3 | \name{instantiate.forecast_multivariate.ml.control_panel}
4 | \alias{instantiate.forecast_multivariate.ml.control_panel}
5 | \title{Create interface to control \code{forecast_multivariate} ML estimation}
6 | \usage{
7 | instantiate.forecast_multivariate.ml.control_panel(
8 | covariates = NULL,
9 | rolling.window = NULL,
10 | horizon = NULL
11 | )
12 | }
13 | \arguments{
14 | \item{covariates}{int: the number of features that will go into the model}
15 |
16 | \item{rolling.window}{int: size of rolling window, NA if expanding window is used}
17 |
18 | \item{horizon}{int: number of periods into the future to forecast}
19 | }
20 | \value{
21 | forecast_multivariate.ml.control_panel
22 | }
23 | \description{
24 | A function to create the multivariate forecast methods
25 | arguments list for user manipulation.
26 | }
27 |
--------------------------------------------------------------------------------
/man/instantiate.forecast_multivariate.var.control_panel.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/forecast_multivariate.R
3 | \name{instantiate.forecast_multivariate.var.control_panel}
4 | \alias{instantiate.forecast_multivariate.var.control_panel}
5 | \title{Create interface to control \code{forecast_multivariate} VAR estimation}
6 | \usage{
7 | instantiate.forecast_multivariate.var.control_panel()
8 | }
9 | \value{
10 | forecast_multivariate.var.control_panel
11 | }
12 | \description{
13 | A function to create the multivariate forecast methods
14 | arguments list for user manipulation.
15 | }
16 |
--------------------------------------------------------------------------------
/man/instantiate.forecast_univariate.control_panel.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/forecast_univariate.R
3 | \name{instantiate.forecast_univariate.control_panel}
4 | \alias{instantiate.forecast_univariate.control_panel}
5 | \title{Create interface to control \code{forecast_univariate} model estimation}
6 | \usage{
7 | instantiate.forecast_univariate.control_panel()
8 | }
9 | \value{
10 | forecast_univariate.control_panel
11 | }
12 | \description{
13 | A function to create the univariate forecast method arguments list
14 | for user manipulation.
15 | }
16 |
--------------------------------------------------------------------------------
/man/loss_function.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/forecast_metrics.R
3 | \name{loss_function}
4 | \alias{loss_function}
5 | \title{Calculate error via loss functions}
6 | \usage{
7 | loss_function(forecast, observed, metric = "MSE")
8 | }
9 | \arguments{
10 | \item{forecast}{numeric: vector of forecasted values}
11 |
12 | \item{observed}{numeric: vector of observed values}
13 |
14 | \item{metric}{string: loss function}
15 | }
16 | \value{
17 | numeric test result
18 | }
19 | \description{
20 | A function to calculate various error loss functions. Options include:
21 | MSE, RMSE, MAE, and MAPE. The default is MSE loss.
22 | }
23 |
--------------------------------------------------------------------------------
/man/n.lag.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data_preparation.R
3 | \name{n.lag}
4 | \alias{n.lag}
5 | \title{Create n lags}
6 | \usage{
7 | n.lag(Data, lags, variables = NULL)
8 | }
9 | \arguments{
10 | \item{Data}{data.frame: data frame of variables to lag and a 'date' column}
11 |
12 | \item{lags}{int: number of lags to create}
13 |
14 | \item{variables}{string: vector of variable names to lag, default is all non-date variables}
15 | }
16 | \value{
17 | data.frame
18 | }
19 | \description{
20 | A function to create 1 through n lags of a set of variables. Is used as a data preparation
21 | helper function and is called internally by forecast_univariate, forecast_multivariate, and forecast_combine.
22 | }
23 |
--------------------------------------------------------------------------------
/man/pipe.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/external_imports.R
3 | \name{\%>\%}
4 | \alias{\%>\%}
5 | \title{Pipe operator}
6 | \usage{
7 | lhs \%>\% rhs
8 | }
9 | \value{
10 | magrittr pipe operator \%>\%
11 | }
12 | \description{
13 | See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
14 | }
15 | \keyword{internal}
16 |
--------------------------------------------------------------------------------
/man/standardize.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data_preparation.R
3 | \name{standardize}
4 | \alias{standardize}
5 | \title{Standardize variables (mean 0, variance 1)}
6 | \usage{
7 | standardize(X)
8 | }
9 | \arguments{
10 | \item{X}{numeric: vector to be standardized}
11 | }
12 | \value{
13 | numeric vector of standardized values
14 | }
15 | \description{
16 | Standardize variables (mean 0, variance 1)
17 | }
18 |
--------------------------------------------------------------------------------
/man/winsorize.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data_preparation.R
3 | \name{winsorize}
4 | \alias{winsorize}
5 | \title{Winsorize or trim variables}
6 | \usage{
7 | winsorize(X, bounds, trim = FALSE)
8 | }
9 | \arguments{
10 | \item{X}{numeric: vector to be winsorized or trimmed}
11 |
12 | \item{bounds}{double: vector of winsorizing minimum and maximum bounds, c(min percentile, max percentile)}
13 |
14 | \item{trim}{boolean: if TRUE then replace outliers with NA instead of winsorizing bound}
15 | }
16 | \value{
17 | numeric vector of winsorized or trimmed values
18 | }
19 | \description{
20 | Winsorize or trim variables
21 | }
22 |
--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(OOS)
3 |
4 | test_check("OOS")
5 |
--------------------------------------------------------------------------------
/tests/testthat/test-forecast_chart.R:
--------------------------------------------------------------------------------
1 | test_that("forecast chart", {
2 |
3 | # simple time series
4 | A = c(1:100) + rnorm(100)
5 | date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
6 | Data = data.frame(date = date, A)
7 |
8 | # run forecast_univariate
9 | forecast.uni =
10 | forecast_univariate(
11 | Data = Data,
12 | forecast.dates = tail(Data$date,10),
13 | method = c('naive','auto.arima', 'ets'),
14 | horizon = 1,
15 | recursive = FALSE,
16 | freq = 'month')
17 |
18 | forecasts =
19 | dplyr::left_join(
20 | forecast.uni,
21 | data.frame(date, observed = A),
22 | by = 'date'
23 | )
24 |
25 | # chart forecasts
26 | chart.forecast =
27 | chart_forecast(
28 | forecasts,
29 | Title = 'test',
30 | Ylab = 'Index',
31 | Freq = 'Monthly',
32 | zeroline = TRUE)
33 |
34 | expect_true(exists('chart.forecast'), 'Chart is not created.')
35 |
36 | # chart forecast errors
37 | chart.errors =
38 | chart_forecast_error(
39 | forecasts,
40 | Title = 'test',
41 | Ylab = 'Index',
42 | Freq = 'Monthly',
43 | zeroline = TRUE)
44 |
45 | expect_true(exists('chart.errors'), 'Chart is not created.')
46 |
47 | })
48 |
--------------------------------------------------------------------------------
/tests/testthat/test-forecast_combination.R:
--------------------------------------------------------------------------------
1 | test_that("forecast_combine produces standard output", {
2 |
3 | # simple time series
4 | A = c(1:100) + rnorm(100)
5 | B = c(1:100) + rnorm(100)
6 | C = c(1:100) + rnorm(100)
7 | date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
8 | Data = data.frame(date = date, A, B, C)
9 |
10 | # run forecast_univariate
11 | forecast.multi =
12 | forecast_multivariate(
13 | Data = Data,
14 | target = 'A',
15 | forecast.dates = tail(Data$date,5),
16 | method = c('ols','var'),
17 | horizon = 1,
18 | freq = 'month')
19 |
20 | forecasts =
21 | dplyr::left_join(
22 | forecast.multi,
23 | data.frame(date, observed = A),
24 | by = 'date'
25 | )
26 |
27 | # combine forecasts
28 | combinations =
29 | forecast_combine(
30 | forecasts,
31 | method = c('uniform','median','trimmed.mean',
32 | 'n.best','lasso','peLasso'),
33 | burn.in = 5,
34 | n.max = 2)
35 |
36 | # expect formats
37 | expect_true(is.data.frame(combinations), 'forecast_combine is not a proper data.frame')
38 |
39 | })
40 |
--------------------------------------------------------------------------------
/tests/testthat/test-forecast_metrics.R:
--------------------------------------------------------------------------------
1 | test_that("forecast_comparison", {
2 |
3 | # simple time series
4 | A = c(1:100) + rnorm(100)
5 | date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
6 | Data = data.frame(date = date, A)
7 |
8 | # create forecasts
9 | forecast.uni =
10 | forecast_univariate(
11 | Data = Data,
12 | forecast.dates = tail(Data$date,10),
13 | method = c('naive','auto.arima', 'ets'),
14 | horizon = 1,
15 | recursive = FALSE,
16 | freq = 'month')
17 |
18 | forecasts =
19 | dplyr::left_join(
20 | forecast.uni,
21 | data.frame(date, observed = A),
22 | by = 'date'
23 | )
24 |
25 | # run ER (MSE)
26 | er.ratio.mse =
27 | forecast_comparison(
28 | forecasts,
29 | baseline.forecast = 'naive',
30 | test = 'ER',
31 | loss = 'MSE')
32 |
33 | expect_true(is.data.frame(er.ratio.mse),'Error ratio (MSE) is not a data.frame')
34 | expect_true(!is.na(mean(er.ratio.mse$error.ratio)) |
35 | !is.nan(mean(er.ratio.mse$error.ratio)),'Error ratio (MSE) is NA or NAN')
36 |
37 | # run ER (RMSE)
38 | er.ratio.rmse =
39 | forecast_comparison(
40 | forecasts,
41 | baseline.forecast = 'naive',
42 | test = 'ER',
43 | loss = 'RMSE')
44 |
45 | expect_true(is.data.frame(er.ratio.rmse),'Error ratio (RMSE) is not a data.frame')
46 | expect_true(!is.na(mean(er.ratio.rmse$error.ratio)) |
47 | !is.nan(mean(er.ratio.rmse$error.ratio)),'Error ratio (RMSE) is NA or NAN')
48 |
49 | # run ER (MAE)
50 | er.ratio.mae =
51 | forecast_comparison(
52 | forecasts,
53 | baseline.forecast = 'naive',
54 | test = 'ER',
55 | loss = 'MAE')
56 |
57 | expect_true(is.data.frame(er.ratio.mae),'Error ratio (MAPE) is not a data.frame')
58 | expect_true(!is.na(mean(er.ratio.mae$error.ratio)) |
59 | !is.nan(mean(er.ratio.mae$error.ratio)),'Error ratio (MAPE) is NA or NAN')
60 |
61 | # run ER (MAPE)
62 | er.ratio.mape =
63 | forecast_comparison(
64 | forecasts,
65 | baseline.forecast = 'naive',
66 | test = 'ER',
67 | loss = 'MAPE')
68 |
69 | expect_true(is.data.frame(er.ratio.mape),'Error ratio (MAPE) is not a data.frame')
70 | expect_true(!is.na(mean(er.ratio.mape$error.ratio)) |
71 | !is.nan(mean(er.ratio.mape$error.ratio)),'Error ratio (MAPE) is NA or NAN')
72 |
73 | # run DM test
74 | dm.test =
75 | forecast_comparison(
76 | forecasts,
77 | baseline.forecast = 'naive',
78 | test = 'DM')
79 |
80 | expect_true(is.data.frame(dm.test),'DM test is not a data.frame')
81 | expect_true(!is.na(mean(dm.test$error.ratio)) |
82 | !is.nan(mean(dm.test$error.ratio)),'DM test is NA or NAN')
83 |
84 | # run DM test
85 | cw.test =
86 | forecast_comparison(
87 | forecasts,
88 | baseline.forecast = 'naive',
89 | test = 'CW',
90 | horizon = 1)
91 |
92 | expect_true(is.data.frame(cw.test),'DM test is not a data.frame')
93 | expect_true(!is.na(mean(cw.test$error.ratio)) |
94 | !is.nan(mean(cw.test$error.ratio)),'DM test is NA or NAN')
95 |
96 | })
97 |
98 | test_that("forecast_accuracy", {
99 |
100 | # simple time series
101 | A = c(1:100) + rnorm(100)
102 | date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
103 | Data = data.frame(date = date, A)
104 |
105 | # create forecast
106 | forecast.uni =
107 | forecast_univariate(
108 | Data = Data,
109 | forecast.dates = tail(Data$date,10),
110 | method = c('naive','auto.arima', 'ets'),
111 | horizon = 1,
112 | recursive = FALSE,
113 | freq = 'month')
114 |
115 | forecasts =
116 | dplyr::left_join(
117 | forecast.uni,
118 | data.frame(date, observed = A),
119 | by = 'date'
120 | )
121 |
122 | # forecast accuracy
123 | forecast.accuracy = forecast_accuracy(forecasts)
124 |
125 | expect_true(is.data.frame(forecast.accuracy),'Accuracy is not a data.frame')
126 | expect_true(!is.na(sum(forecast.accuracy[,2:5])),'Accuracy is NA or NAN')
127 |
128 |
129 | })
130 |
--------------------------------------------------------------------------------
/tests/testthat/test-forecast_multivariate.R:
--------------------------------------------------------------------------------
1 | test_that("forecast_multivariate produces standard output", {
2 |
3 | # simple time series
4 | A = c(1:100) + rnorm(100)
5 | B = c(1:100) + rnorm(100)
6 | C = c(1:100) + rnorm(100)
7 | date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
8 | Data = data.frame(date = date, A, B, C)
9 |
10 | # run forecast_univariate
11 | forecast.multi =
12 | try(
13 | forecast_multivariate(
14 | Data = Data,
15 | target = 'A',
16 | forecast.dates = tail(Data$date,5),
17 | method = c('ols','var'),
18 | horizon = 1,
19 | # information set
20 | rolling.window = NA,
21 | freq = 'month',
22 | # data prep
23 | lag.n = 4,
24 | outlier.clean = TRUE,
25 | impute.missing = TRUE,
26 | # return
27 | return.models = TRUE,
28 | return.data = TRUE)
29 | )
30 |
31 | # expect formats
32 | expect_true(is.data.frame(forecast.multi$forecasts), 'forecasts is not a proper data.frame')
33 | expect_true(is.list(forecast.multi$models), 'models is not a proper list')
34 | expect_true(is.list(forecast.multi$information.set), 'information set is not a proper list')
35 |
36 | # expect proper names and numbers of outputs
37 | expect_equal(names(forecast.multi$models), as.character(tail(Data$date,5)))
38 | expect_equal(names(forecast.multi$information.set), as.character(tail(Data$date,5)))
39 |
40 |
41 | })
42 |
43 | test_that("forecast_multivariate produces standard output", {
44 |
45 | # simple time series
46 | A = c(1:100) + rnorm(100)
47 | B = c(1:100) + rnorm(100)
48 | C = c(1:100) + rnorm(100)
49 | date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
50 | Data = data.frame(date = date, A, B, C)
51 |
52 | # run forecast_univariate
53 | forecast.multi =
54 | try(
55 | forecast_multivariate(
56 | Data = Data,
57 | target = 'A',
58 | forecast.dates = tail(Data$date,5),
59 | method = c('ols','var'),
60 | horizon = 1,
61 | # information set
62 | rolling.window = NA,
63 | freq = 'month',
64 | # data prep
65 | lag.n = 4,
66 | outlier.clean = TRUE,
67 | impute.missing = TRUE,
68 | reduce.data = TRUE,
69 | reduce.ncomp = 1,
70 | return.models = TRUE,
71 | return.data = TRUE,
72 | )
73 | )
74 |
75 | # expect formats
76 | expect_true(is.data.frame(forecast.multi$forecasts), 'forecasts is not a proper data.frame')
77 | expect_true(is.list(forecast.multi$models), 'models is not a proper list')
78 | expect_true(is.list(forecast.multi$information.set), 'information set is not a proper list')
79 |
80 | # expect proper names and numbers of outputs
81 | expect_equal(names(forecast.multi$models), as.character(tail(Data$date,5)))
82 | expect_equal(names(forecast.multi$information.set), as.character(tail(Data$date,5)))
83 |
84 | })
85 |
--------------------------------------------------------------------------------
/tests/testthat/test-forecast_univariate.R:
--------------------------------------------------------------------------------
1 | test_that("forecast_univariate (direct projection) produces standard output", {
2 |
3 | # simple time series
4 | A = c(1:100) + rnorm(100)
5 | date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
6 | Data = data.frame(date = date, A)
7 |
8 | # run forecast_univariate
9 | forecast.uni =
10 | try(
11 | forecast_univariate(
12 | Data = Data,
13 | forecast.dates = tail(Data$date,5),
14 | method = c('naive','auto.arima', 'ets'),
15 | horizon = 1,
16 | recursive = FALSE,
17 | # information set
18 | rolling.window = NA,
19 | freq = 'month',
20 | # data prep
21 | outlier.clean = TRUE,
22 | impute.missing = TRUE,
23 | # return
24 | return.models = TRUE,
25 | return.data = TRUE)
26 | )
27 |
28 | # expect formats
29 | expect_true(is.data.frame(forecast.uni$forecasts), 'forecasts is not a proper data.frame')
30 | expect_true(is.list(forecast.uni$models), 'models is not a proper list')
31 | expect_true(is.list(forecast.uni$information.set), 'information set is not a proper list')
32 |
33 | # expect proper names and numbers of outputs
34 | expect_equal(names(forecast.uni$models), as.character(tail(Data$date,5)))
35 | expect_equal(names(forecast.uni$information.set), as.character(tail(Data$date,5)))
36 |
37 | })
38 |
39 | test_that("forecast_univariate (recursive) produces standard output", {
40 |
41 | # simple time series
42 | A = c(1:100) + rnorm(100)
43 | date = seq.Date(from = as.Date('2000-01-01'), by = 'month', length.out = 100)
44 | Data = data.frame(date = date, A)
45 |
46 | # run forecast_univariate
47 | forecast.uni =
48 | try(
49 | forecast_univariate(
50 | Data = Data,
51 | forecast.dates = tail(Data$date,5),
52 | method = c('naive','auto.arima', 'ets'),
53 | horizon = 1,
54 | recursive = TRUE,
55 | # information set
56 | rolling.window = NA,
57 | freq = 'month',
58 | # data prep
59 | outlier.clean = TRUE,
60 | impute.missing = TRUE,
61 | # return
62 | return.models = TRUE,
63 | return.data = TRUE)
64 | )
65 |
66 | # expect formats
67 | expect_true(is.data.frame(forecast.uni$forecasts), 'rercursive forecasts is not a proper data.frame')
68 | expect_true(is.list(forecast.uni$models), 'rercursive models is not a proper list')
69 | expect_true(is.list(forecast.uni$information.set), 'rercursive information set is not a proper list')
70 |
71 | # expect proper names and numbers of outputs
72 | expect_equal(names(forecast.uni$models), as.character(tail(Data$date,5)))
73 | expect_equal(names(forecast.uni$information.set), as.character(tail(Data$date,5)))
74 |
75 | })
76 |
--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 |
--------------------------------------------------------------------------------
/vignettes/basic_introduction.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Basic Introduction to OOS"
3 | output: rmarkdown::html_vignette
4 | vignette: >
5 | %\VignetteIndexEntry{Window functions}
6 | %\VignetteEngine{knitr::rmarkdown}
7 | %\usepackage[utf8]{inputenc}
8 | ---
9 |
10 | ```{r, include = FALSE}
11 | knitr::opts_chunk$set(
12 | collapse = TRUE,
13 | comment = "#>"
14 | )
15 | ```
16 |
17 | ```{r setup, include=FALSE, warning = FALSE, message = FALSE, results = 'hide'}
18 | knitr::opts_chunk$set(echo = TRUE)
19 | ```
20 |
21 | This introduction demonstrates how one may conduct a basic forecasting exercise with OOS. For more detail examples and documentation, please see the [OOS](https://tylerjpike.github.io/OOS/) website.
22 |
23 | ## 0. Environment
24 | ```{r}
25 | library(OOS)
26 | ```
27 |
28 | ## 1. Data
29 |
30 | ```{r}
31 | # pull and prepare data from FRED
32 | quantmod::getSymbols.FRED(
33 | c('UNRATE','INDPRO','GS10'),
34 | env = globalenv())
35 | Data = cbind(UNRATE, INDPRO, GS10)
36 | Data = data.frame(Data, date = zoo::index(Data)) %>%
37 | dplyr::filter(lubridate::year(date) >= 1990) %>%
38 | na.omit()
39 |
40 | # make industrial production and 10-year Treasury stationary
41 | Data = Data %>%
42 | dplyr::mutate(
43 | GS10 = GS10 - dplyr::lag(GS10),
44 | INDPRO = (INDPRO - lag(INDPRO, 12))/lag(INDPRO, 12))
45 |
46 | # start data when all three variables are available
47 | # (this is not necessary, but it will suppress warnings for us)
48 | Data = dplyr::filter(Data, date >= as.Date('1954-01-01'))
49 | ```
50 |
51 | ## 2. Forecasting
52 |
53 | ```{r, warning=FALSE}
54 | # run univariate forecasts
55 | forecast.uni =
56 | forecast_univariate(
57 | Data = dplyr::select(Data, date, UNRATE),
58 | forecast.dates = tail(Data$date,5),
59 | method = c('naive'), #,'auto.arima', 'ets'),
60 | horizon = 1,
61 | recursive = FALSE,
62 | rolling.window = NA,
63 | freq = 'month')
64 | ```
65 |
66 |
67 | ```{r, warning=FALSE}
68 | # create multivariate forecasts
69 | forecast.multi =
70 | forecast_multivariate(
71 | Data = Data,
72 | forecast.date = tail(Data$date,5),
73 | target = 'UNRATE',
74 | horizon = 1,
75 | method = c('lasso'),
76 | rolling.window = NA,
77 | freq = 'month')
78 | ```
79 |
80 |
81 | ```{r, warning=FALSE}
82 | # combine forecasts and add in observed values
83 | forecasts =
84 | dplyr::bind_rows(
85 | forecast.uni,
86 | forecast.multi) %>%
87 | dplyr::left_join(
88 | dplyr::select(Data, date, observed = UNRATE),
89 | by = 'date')
90 |
91 | # forecast combinations
92 | forecast.combo =
93 | forecast_combine(
94 | forecasts,
95 | method = c('uniform','median','trimmed.mean'))
96 | ```
97 |
98 | ## Forecast Analysis
99 | ```{r, warning=FALSE}
100 | # merge forecast combinations back into forecasts
101 | forecasts =
102 | forecasts %>%
103 | dplyr::bind_rows(forecast.combo)
104 |
105 | # calculate forecast errors
106 | forecast.error = forecast_accuracy(forecasts)
107 |
108 | # view forecast errors from least to greatest
109 | # (best forecast to worst forecast method)
110 | forecast.error %>%
111 | dplyr::mutate_at(vars(-model), round, 3) %>%
112 | dplyr::arrange(MSE)
113 |
114 | # compare forecasts to the baseline (a random walk)
115 | forecast_comparison(
116 | forecasts,
117 | baseline.forecast = 'naive',
118 | test = 'ER',
119 | loss = 'MSE') %>%
120 | dplyr::arrange(error.ratio)
121 |
122 | # chart forecasts
123 | chart =
124 | chart_forecast(
125 | forecasts,
126 | Title = 'US Unemployment Rate',
127 | Ylab = 'Index',
128 | Freq = 'Monthly')
129 |
130 | ```
131 |
132 |
--------------------------------------------------------------------------------