├── .gitignore
├── LICENSE
├── LICENSE_notes
├── README.md
├── demo
    └── demo.R
├── epiforecast.cpp14funs
    ├── DESCRIPTION
    ├── NAMESPACE
    ├── R
    │   ├── RcppExports.R
    │   ├── checks.R
    │   ├── curve-fits.R
    │   ├── eb.R
    │   ├── eb_dists.R
    │   └── interface.R
    ├── man-roxygen
    │   ├── param_baseline.R
    │   ├── param_full.dat.R
    │   ├── param_max.n.sims.R
    │   └── sim.method_template.R
    ├── src
    │   ├── Makevars
    │   ├── RcppExports.cpp
    │   ├── eb_rcpp.cpp
    │   └── epiforecast.cpp14funs_types.hpp
    └── tests
    │   └── testthat
    │       ├── fluview_hhs1_fetch.Rdata.rds
    │       └── test-eb.sim.R
├── epiforecast.cpp14funs_0.0.1.pdf
├── epiforecast
    ├── .gitignore
    ├── DESCRIPTION
    ├── NAMESPACE
    ├── R
    │   ├── RcppExports.R
    │   ├── backcasters.R
    │   ├── backcasters_new.R
    │   ├── br.R
    │   ├── cv.R
    │   ├── cv_apply.R
    │   ├── delphi_epidata.R
    │   ├── empirical.futures.R
    │   ├── empirical.trajectories.R
    │   ├── ensemble.R
    │   ├── epiproject.R
    │   ├── forecast_type.R
    │   ├── get_completed_fluview_state_df.R
    │   ├── holidays.R
    │   ├── loaders.R
    │   ├── map_join.R
    │   ├── match.R
    │   ├── namesp.R
    │   ├── predx_v2_spreadsheets.R
    │   ├── read.R
    │   ├── simclass.R
    │   ├── simplify2arrayp.R
    │   ├── target_spec.R
    │   ├── todo-by-file.R
    │   ├── twkde.R
    │   ├── utils.R
    │   └── weeks.R
    ├── data-raw
    │   └── fluview.outdated.R
    ├── data
    │   ├── fluview.2003on.outdated.dats.rda
    │   └── fluview.2003on.outdated.new.dats.rda
    ├── man-roxygen
    │   ├── param_baseline.R
    │   ├── param_full.dat.R
    │   ├── param_invent.scalars.R
    │   ├── param_max.n.sims.R
    │   └── sim.method_template.R
    ├── meta.R
    ├── src
    │   ├── RcppExports.cpp
    │   └── histograms_rcpp.cpp
    ├── tests
    │   └── testthat
    │   │   ├── test-br.sim.R
    │   │   ├── test-check.file.contents.R
    │   │   ├── test-read.from.file.R
    │   │   ├── test-twkde.markovian.sim.R
    │   │   ├── test-twkde.sim.R
    │   │   └── test_match.arg.or.default.R
    └── vignettes
    │   ├── fetch_data.Rmd
    │   ├── fetch_data.html
    │   ├── fetch_data[woven].md
    │   ├── figure
    │       ├── unnamed-chunk-1-1.png
    │       ├── unnamed-chunk-1-2.png
    │       ├── unnamed-chunk-2-1.png
    │       ├── unnamed-chunk-2-2.png
    │       ├── unnamed-chunk-3-1.png
    │       └── unnamed-chunk-3-2.png
    │   ├── make-forecasts-br.Rmd
    │   ├── make-forecasts-br.html
    │   └── test.R
├── epiforecast_0.0.1.pdf
├── sample
    ├── census_weights.rds
    ├── combine-spreadsheets.R
    ├── covid19ilinet-templates.R
    ├── gen-prospective-component-forecasts.R
    ├── gen-prospective-ensemble-forecasts.R
    ├── gen-retro-component-forecasts.R
    ├── gen-retro-ensemble-forecasts.R
    ├── generate-retro-and-prospective-forecasts.R
    ├── high-state-config.R
    ├── high-state-forecasts.R
    ├── hospitalization-forecasts.R
    ├── low-state-config.R
    ├── low-state-forecasts.R
    ├── merge-state-forecasts.R
    ├── natreg-config.R
    ├── natreg-for-collaborative-ensemble.R
    ├── natreg-forecasts.R
    ├── natreg-with-eb-config.R
    ├── natreg-with-eb.R
    ├── prepare-covid19-submissions.R
    ├── retrospective_forecasts.R
    └── state-eval-config.R
└── scratch
    └── flusurv_pancast_viz.R


/.gitignore:
--------------------------------------------------------------------------------
  1 | # begin license for material from https://github.com/github/gitignore/**
  2 | 
  3 | # Copyright (c) 2015 GitHub, Inc.
  4 | 
  5 | # Permission is hereby granted,  free of charge,  to any person obtaining a
  6 | # copy of this software and associated documentation files (the "Software"),
  7 | # to deal in the Software without restriction, including without limitation
  8 | # the rights to  use, copy, modify, merge, publish, distribute, sublicense,
  9 | # and/or sell copies of the Software, and to permit persons to whom the
 10 | # Software is furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in
 13 | # all copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 20 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 21 | # DEALINGS IN THE SOFTWARE.
 22 | 
 23 | # end license for material from https://github.com/github/gitignore/**
 24 | 
 25 | 
 26 | 
 27 | # begin material from https://github.com/github/gitignore/blob/master/Global/Emacs.gitignore
 28 | 
 29 | *~
 30 | \#*\#
 31 | /.emacs.desktop
 32 | /.emacs.desktop.lock
 33 | *.elc
 34 | auto-save-list
 35 | tramp
 36 | .\#*
 37 | 
 38 | # Org-mode
 39 | .org-id-locations
 40 | *_archive
 41 | 
 42 | # flymake-mode
 43 | *_flymake.*
 44 | 
 45 | # eshell files
 46 | /eshell/history
 47 | /eshell/lastdir
 48 | 
 49 | # elpa packages
 50 | /elpa/
 51 | 
 52 | # reftex files
 53 | *.rel
 54 | 
 55 | # AUCTeX auto folder
 56 | /auto/
 57 | 
 58 | # cask packages
 59 | .cask/
 60 | 
 61 | # end material from https://github.com/github/gitignore/blob/master/Global/Emacs.gitignore
 62 | 
 63 | 
 64 | 
 65 | 
 66 | 
 67 | # begin material from https://github.com/github/gitignore/blob/master/Global/Vim.gitignore
 68 | 
 69 | [._]*.s[a-w][a-z]
 70 | [._]s[a-w][a-z]
 71 | *.un~
 72 | Session.vim
 73 | .netrwhist
 74 | *~
 75 | 
 76 | # end material from https://github.com/github/gitignore/blob/master/Global/Vim.gitignore
 77 | 
 78 | 
 79 | 
 80 | # begin material from https://github.com/github/gitignore/blob/master/R.gitignore
 81 | 
 82 | # History files
 83 | .Rhistory
 84 | .Rapp.history
 85 | 
 86 | # Session Data files
 87 | .RData
 88 | 
 89 | # Example code in package build process
 90 | *-Ex.R
 91 | 
 92 | # RStudio files
 93 | .Rproj.user/
 94 | 
 95 | # produced vignettes
 96 | vignettes/*.html
 97 | vignettes/*.pdf
 98 | 
 99 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
100 | .httr-oauth
101 | 
102 | # end material from https://github.com/github/gitignore/blob/master/R.gitignore
103 | 
104 | 
105 | 
106 | # begin material from https://github.com/github/gitignore/blob/master/C%2B%2B.gitignore
107 | 
108 | # Compiled Object files
109 | *.slo
110 | *.lo
111 | *.o
112 | *.obj
113 | 
114 | # Precompiled Headers
115 | *.gch
116 | *.pch
117 | 
118 | # Compiled Dynamic libraries
119 | *.so
120 | *.dylib
121 | *.dll
122 | 
123 | # Fortran module files
124 | *.mod
125 | 
126 | # Compiled Static libraries
127 | *.lai
128 | *.la
129 | *.a
130 | *.lib
131 | 
132 | # Executables
133 | *.exe
134 | *.out
135 | *.app
136 | 
137 | # end material from https://github.com/github/gitignore/blob/master/C%2B%2B.gitignore
138 | 
139 | # Assume contents of man page directories are automatically generated:
140 | man/**
141 | 
142 | # Ignore copy of delphi_epidata.R:
143 | # /R_pkg/epiforecast/R/delphi_epidata.R
144 | # /R_pkg/epiforecast/R/RcppExports.R
145 | # /R_pkg/epiforecast/src/RcppExports.cpp
146 | 
147 | # Ignore epiforecast package data folder (it should be generated by scripts in data-raw):
148 | /R_pkg/epiforecast/data/**
149 | 
150 | 
151 | # # (For now) offline copies of data that was fetched
152 | # *.csv
153 | # *.Rdata
154 | # --- Disabled to allow "real" .csv's like sample/wILI_Baseline_fixup.csv to be noticed
155 | 
156 | # R documentatation
157 | *.Rd
158 | 
159 | # Personal todo lists
160 | *todo-justin.org


--------------------------------------------------------------------------------
/LICENSE_notes:
--------------------------------------------------------------------------------
1 | - All code is offered under GPL, version 2 (included in the file LICENSE).
2 | - The file simplify2arrayp.R is also offered under GPL, version 2+ (see top of that file).
3 | - The file epiforecast/R/delphi_epidata.R is also offered under the MIT License.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Warning
 2 | 
 3 | This package is no longer being maintained.
 4 | 
 5 | # Summary
 6 | R package that implements several methods for epidemiological forecasting empirical bayes (EB), basis regression (BR), and time-weighted kernel density estimation (twkde).
 7 | 
 8 | # Installation
 9 | On mac or linux command line:
10 | ```
11 | R CMD INSTALL epiforecast
12 | ```
13 | 
14 | or run the following in R:
15 | 
16 | ```R
17 | devtools::install_github("cmu-delphi/epiforecast-R", subdir="epiforecast")
18 | ```
19 | 
20 | 
21 | # Tests
22 | Run code in ```demo/demo.R```
23 | 


--------------------------------------------------------------------------------
/demo/demo.R:
--------------------------------------------------------------------------------
 1 | ## author_header begin
 2 | ## Copyright (C) 2016 Sangwon Hyun
 3 | ##
 4 | ## This file is part of epiforecast.  Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University.
 5 | ##
 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174.
 7 | ## author_header end
 8 | ## license_header begin
 9 | ## epiforecast is free software: you can redistribute it and/or modify
10 | ## it under the terms of the GNU General Public License as published by
11 | ## the Free Software Foundation, version 2 of the License.
12 | ##
13 | ## epiforecast is distributed in the hope that it will be useful,
14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | ## GNU General Public License for more details.
17 | ##
18 | ## You should have received a copy of the GNU General Public License
19 | ## along with epiforecast.  If not, see <http://www.gnu.org/licenses/>.
20 | ## license_header end
21 | 
22 | library(epiforecast) # or devtools::load_all("../epiforecast") to try without installing
23 | library(epiforecast.cpp14funs) # or devtools::load_all("../epiforecast.cpp14funs") to try without installing
24 | 
25 | ## Fetch current ILINet data for Health and Human Services (HHS) Region 1:
26 | area.name = "hhs1"
27 | full.dat = fetchEpidataFullDat("fluview", area.name, "wili",
28 |                                min.points.in.season=52L,
29 |                                first.week.of.season = 21L,
30 |                                cache.file.prefix=sprintf("fluview_%s_fetch", area.name))
31 | 
32 | ## Try EB with basic options; evaluate line by line to see all plots
33 | mysim = eb.sim(full.dat, max.n.sims=1000)
34 | print(mysim)
35 | plot(mysim)
36 | target.forecast = target_forecast(mysim,'pht')
37 | print(target.forecast)
38 | plot(target.forecast)
39 | 
40 | ## Try EB with more simulation settings
41 | control.list = get_eb_control_list(sd.option="prior",max.match.length=5)
42 | mysim = eb.sim(full.dat, max.n.sims=100, control.list=control.list)
43 | plot(mysim)
44 | target.forecast = target_forecast(mysim,'pwk')
45 | 
46 | ## Try empirical futures
47 | mysim = empirical.futures.sim(full.dat)
48 | plot(mysim)
49 | 
50 | ## Try empirical trajectories
51 | mysim = empirical.trajectories.sim(full.dat)
52 | plot(mysim)
53 | 
54 | ## Try BR
55 | mysim = br.sim(full.dat, max.n.sims=100)
56 | plot(mysim)
57 | print(mysim,verbose=TRUE)
58 | target.forecast = target_forecast(mysim, "pht")
59 | 
60 | ## Try BR with more simulation settings
61 | control.list = get_br_control_list(df=5, cv.rule="1se")
62 | mysim = br.sim(full.dat, max.n.sims=100, control.list=control.list)
63 | plot(mysim)
64 | print(mysim,verbose=TRUE)
65 | target.forecast = target_forecast(mysim, "pht")
66 | target.forecast = target_forecast(mysim, "pwk")
67 | 
68 | ## Try Markovian twkde
69 | mysim = twkde.markovian.sim(full.dat)
70 | plot(mysim)
71 | 
72 | ## Try twkde
73 | mysim = twkde.sim(full.dat)
74 | plot(mysim)
75 | 
76 | ## plot(mysim, type="hexagonal")
77 | ## matplot(cbind(sample(head(full.dat,-1L),1L)[[1L]][1:53], mysim$ys[,sample.int(ncol(mysim$ys),1L)][1:53]), type="l")
78 | ## matplot(cbind(rowMeans(dat.to.matrix(head(full.dat,-1L),53L)), matrixStats::rowWeightedMeans(mysim$ys, mysim$weights)[1:53]), type="l")
79 | 


--------------------------------------------------------------------------------
/epiforecast.cpp14funs/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: epiforecast.cpp14funs
 2 | Type: Package
 3 | Title: Tools for forecasting semi-regular seasonal epidemic curves and similar
 4 |     time series
 5 | Version: 0.0.1
 6 | Date: 2015-12-09
 7 | Author: Logan C. Brooks, David C. Farrow, Sangwon Hyun, Ryan J. Tibshirani, Roni
 8 |     Rosenfeld
 9 | Maintainer: Logan C. Brooks <lcbrooks@cs.cmu.edu>
10 | Recommends:
11 |     epiforecast
12 | Imports:
13 |     stats,
14 |     utils,
15 |     lubridate,
16 |     splines,
17 |     glmgen,
18 |     glmnet,
19 |     httr,
20 |     Rcpp,
21 |     rlist,
22 |     Hmisc,
23 |     weights,
24 |     hexbin,
25 |     plotrix,
26 |     tibble,
27 |     dplyr,
28 |     pipeR,
29 |     matrixStats,
30 |     quantreg,
31 |     lpSolve,
32 |     parallel,
33 |     R.utils,
34 |     RCurl,
35 |     pbmcapply
36 | Description: Tools for forecasting semi-regular seasonal epidemic curves and
37 |     similar time series. Includes an empirical Bayes approach that forms a prior
38 |     by transforming historical curves, a basis regression approach that balances
39 |     matching observations from the current season and matching historical
40 |     seasons' measurements for future weeks, and timestep-by-timestep weighted
41 |     kernel density estimation on backward differences parameterized by both the
42 |     time series measurements and the current time.
43 | License: GPL-2
44 | RoxygenNote: 7.3.2
45 | Collate: 
46 |     'RcppExports.R'
47 |     'checks.R'
48 |     'curve-fits.R'
49 |     'eb_dists.R'
50 |     'interface.R'
51 |     'eb.R'
52 | LinkingTo: Rcpp
53 | SystemRequirements: C++14
54 | Suggests:
55 |     testthat
56 | Encoding: UTF-8
57 | 


--------------------------------------------------------------------------------
/epiforecast.cpp14funs/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(eb.createForecasts)
 4 | export(eb.fitSmoothCurves)
 5 | export(eb.sim)
 6 | export(get_eb_control_list)
 7 | export(unifChoicePrior)
 8 | import(Rcpp)
 9 | import(glmgen)
10 | useDynLib(epiforecast.cpp14funs)
11 | 


--------------------------------------------------------------------------------
/epiforecast.cpp14funs/R/RcppExports.R:
--------------------------------------------------------------------------------
 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 3 | 
 4 | CartesianProductCurves <- function(fit_obj_rcpp, y_scale_baseline, curve_index_choices, peak_time_choices, x_shift_choices, x_scale_choices, sd_choices, sd_scale_choices, peak_height_choices, y_scale_choices) {
 5 |     .Call('_epiforecast_cpp14funs_CartesianProductCurves', PACKAGE = 'epiforecast.cpp14funs', fit_obj_rcpp, y_scale_baseline, curve_index_choices, peak_time_choices, x_shift_choices, x_scale_choices, sd_choices, sd_scale_choices, peak_height_choices, y_scale_choices)
 6 | }
 7 | 
 8 | CartesianProductLogWeights <- function(new_dat_df, dat_rcpp, observed_past_shrinkage_map_df, reasonable_future_shrinkage_map_df, n_future_neighbors, fit_obj_rcpp, y_scale_baseline, curve_index_choices, peak_time_choices, x_shift_choices, x_scale_choices, sd_choices, sd_scale_choices, peak_height_choices, y_scale_choices, bias_peaktime_mean, bias_peaktime_sd, bias_peaktime_shrinkage, bias_peakheight_mean, bias_peakheight_sd, bias_peakheight_shrinkage) {
 9 |     .Call('_epiforecast_cpp14funs_CartesianProductLogWeights', PACKAGE = 'epiforecast.cpp14funs', new_dat_df, dat_rcpp, observed_past_shrinkage_map_df, reasonable_future_shrinkage_map_df, n_future_neighbors, fit_obj_rcpp, y_scale_baseline, curve_index_choices, peak_time_choices, x_shift_choices, x_scale_choices, sd_choices, sd_scale_choices, peak_height_choices, y_scale_choices, bias_peaktime_mean, bias_peaktime_sd, bias_peaktime_shrinkage, bias_peakheight_mean, bias_peakheight_sd, bias_peakheight_shrinkage)
10 | }
11 | 
12 | ZipProductCurvesAndLogWeightsp <- function(output_times, new_dat_df, dat_rcpp, observed_past_shrinkage_map_df, reasonable_future_shrinkage_map_df, n_future_neighbors, fit_obj_rcpp, y_scale_baseline, curve_index_choices, peak_time_choices, x_shift_choices, x_scale_choices, sd_choices, sd_scale_choices, peak_height_choices, y_scale_choices, bias_peaktime_mean, bias_peaktime_sd, bias_peaktime_shrinkage, bias_peakheight_mean, bias_peakheight_sd, bias_peakheight_shrinkage) {
13 |     .Call('_epiforecast_cpp14funs_ZipProductCurvesAndLogWeightsp', PACKAGE = 'epiforecast.cpp14funs', output_times, new_dat_df, dat_rcpp, observed_past_shrinkage_map_df, reasonable_future_shrinkage_map_df, n_future_neighbors, fit_obj_rcpp, y_scale_baseline, curve_index_choices, peak_time_choices, x_shift_choices, x_scale_choices, sd_choices, sd_scale_choices, peak_height_choices, y_scale_choices, bias_peaktime_mean, bias_peaktime_sd, bias_peaktime_shrinkage, bias_peakheight_mean, bias_peakheight_sd, bias_peakheight_shrinkage)
14 | }
15 | 
16 | 


--------------------------------------------------------------------------------
/epiforecast.cpp14funs/R/checks.R:
--------------------------------------------------------------------------------
 1 | ## author_header begin
 2 | ## Copyright (C) 2016 Logan C. Brooks
 3 | ##
 4 | ## This file is part of epiforecast.  Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University.
 5 | ##
 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174.
 7 | ## author_header end
 8 | ## license_header begin
 9 | ## epiforecast is free software: you can redistribute it and/or modify
10 | ## it under the terms of the GNU General Public License as published by
11 | ## the Free Software Foundation, version 2 of the License.
12 | ##
13 | ## epiforecast is distributed in the hope that it will be useful,
14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | ## GNU General Public License for more details.
17 | ##
18 | ## You should have received a copy of the GNU General Public License
19 | ## along with epiforecast.  If not, see <http://www.gnu.org/licenses/>.
20 | ## license_header end
21 | 
22 | ## Do nothing if \code{trueOrStopMessage} is \code{TRUE}; otherwise,
23 | ## output the error message given by \code{trueOrStopMessage}.  Used
24 | ## with \code{is.or.why.not.*} functions.
25 | ensureTRUE = function(trueOrStopMessage) {
26 |     if (!isTRUE(trueOrStopMessage))
27 |         stop(trueOrStopMessage)
28 | }
29 | 
30 | ## Do nothing if \code{trueOrStopMessage} is \code{TRUE}; otherwise,
31 | ## output the error message given by \code{trueOrStopMessage} with a
32 | ## tag indicating that it is a post-condition check failure
33 | ## (indicating a bug).  Used with \code{is.or.why.not.*} functions.
34 | ensureTRUEpostcondition = function(trueOrStopMessage) {
35 |     if (!isTRUE(trueOrStopMessage))
36 |         stop(paste("Post-condition check failed: ",trueOrStopMessage))
37 | }
38 | 
39 | ## Output \code{TRUE} if \code{smooth.dat} is list of numeric vectors
40 | ## with same length as \code{dat}, but each with length 53 and no
41 | ## \code{NA}'s (should be a smoothed version of \code{dat}).
42 | ## Otherwise, output a string to be used in an error message.
43 | is.or.why.not.smooth.dat = function(dat, smooth.dat) {
44 |     if (length(dat) != length(smooth.dat)) {
45 |         return("length(dat) != length(smooth.dat)")
46 |     }
47 |     ## if (length(smooth.dat) != 0 &&
48 |     ##     !identical(53L, unique(sapply(smooth.dat, length)))) {
49 |     ##     return("=smooth.dat= should contain only 53-length vectors")
50 |     ## }
51 |     if (!identical(unname(lengths(dat)), unname(lengths(smooth.dat))))
52 |         stop("lengths of vectors in =dat= and =smooth.dat= do not match")
53 |     ## xxx additional checks for numeric type
54 |     return (TRUE)
55 | }
56 | ## xxx remove reference to 53, maybe change around length requirements to match =eb.fitSmoothCurves=.
57 | 
58 | ## Output \code{TRUE} if \code{curve.models} is a list of lists; one
59 | ## list per season in \code{dat}, each list containing three elements
60 | ## \code{`f`}, the corresponding smoothed curve, \code{`tau`}, the
61 | ## estimate of the sd under the iid Gaussian noise assumption, and
62 | ## \code{`type`}, a string ("Gaussian") indicating what noise model
63 | ## was used.  Otherwise, output a string to be used in an error
64 | ## message.
65 | is.or.why.not.curve.models = function(dat, curve.models) {
66 |     if (length(dat) != length(curve.models))
67 |         return ("length(dat) != length(curve.models)")
68 |     if (!inherits(curve.models, "list") ||
69 |           !all(vapply(curve.models, FUN.VALUE = logical(1L), inherits, "list")))
70 |         return ("curve.models must be a list of lists")
71 |     if (length(curve.models)>0 && !identical(sapply(curve.models, names), matrix(rep(c("f","tau","type"), length(curve.models)),3)))
72 |         return ("list elt's should be f, tau, type")
73 |     ## xxx additional checks on f's, tau's, type's
74 |     return (TRUE)
75 | }
76 | 
77 | ## xxx use the =match= paradigm and forget about / use try-catch to alter post-condition error checks?
78 | ## xxx use OO system to ensure predicates with types
79 | 


--------------------------------------------------------------------------------
/epiforecast.cpp14funs/R/curve-fits.R:
--------------------------------------------------------------------------------
 1 | 
 2 | smooth.curves.to.fit = function(smooth.curves, type="Gaussian") {
 3 |     return (lapply(seq_along(smooth.curves$sigma.hat), function(fit.s.i) {
 4 |         list(f=smooth.curves$smooth.obj[[fit.s.i]], tau=smooth.curves$sigma.hat[fit.s.i], type=type)
 5 |     }))
 6 | }
 7 | 
 8 | fit.to.oldfit = function(fit) {
 9 |     f = lapply(fit, `[[`, "f")
10 |     f <- sapply(f, `[`, seq_len(min(lengths(f))))
11 |     tau = sapply(fit, `[[`, "tau")
12 |     return (list(f=f, tau=tau))
13 | }
14 | 
15 | oldfit.to.fit = function(oldfit, type="Gaussian") {
16 |     return (lapply(seq_along(oldfit$tau), function(fit.s.i) {
17 |         list(f=oldfit$f[,fit.s.i], tau=oldfit$tau[fit.s.i], type=type)
18 |     }))
19 | }
20 | 


--------------------------------------------------------------------------------
/epiforecast.cpp14funs/R/eb_dists.R:
--------------------------------------------------------------------------------
  1 | ## author_header begin
  2 | ## Copyright (C) 2016 Logan C. Brooks, Ryan J. Tibshirani
  3 | ##
  4 | ## This file is part of epiforecast.  Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University.
  5 | ##
  6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174.
  7 | ## author_header end
  8 | ## license_header begin
  9 | ## epiforecast is free software: you can redistribute it and/or modify
 10 | ## it under the terms of the GNU General Public License as published by
 11 | ## the Free Software Foundation, version 2 of the License.
 12 | ##
 13 | ## epiforecast is distributed in the hope that it will be useful,
 14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | ## GNU General Public License for more details.
 17 | ##
 18 | ## You should have received a copy of the GNU General Public License
 19 | ## along with epiforecast.  If not, see <http://www.gnu.org/licenses/>.
 20 | ## license_header end
 21 | 
 22 | 
 23 | ##' Creates a uniform distribution over discrete choices which can be used with \code{\link{get.eb.control.list}}.
 24 | ##'
 25 | ##' @param choices a vector of (discrete) choices
 26 | ##'
 27 | ##' @return a uniform discrete distribution over \code{choices}.
 28 | ##'
 29 | ##' @examples
 30 | ##' 
 31 | ##' uniform.seq = unifChoicePrior(letters[1:5])
 32 | ##' 
 33 | ##' ## The distributions used by EB can be broken down into buckets;
 34 | ##' ## for the uniform discrete distribution, each bucket corresponds
 35 | ##' ## (boringly) to a single choice from =choices=. However, it is
 36 | ##' ## important to have a common interface.
 37 | ##' random.bucket.indices = sample(seq_len(uniform.seq$n), 10000, replace=TRUE, prob=uniform.seq$probs)
 38 | ##' random.elements = uniform.seq$sampler(random.bucket.indices)
 39 | ##' random.elements.another.way = uniform.seq$choices[random.bucket.indices] # only works for =unifChoicePrior=
 40 | ##' random.elements.a.third.way = letters[random.bucket.indices] # only works for this example
 41 | ##'
 42 | ##' @export
 43 | unifChoicePrior = function(choices) {
 44 |   probs = rep(1.0/length(choices), length(choices)) # uniform probabilities
 45 |   sampler = function(inds) choices[inds]
 46 |   return (list(n = length(choices), choices = choices, probs = probs, sampler = sampler))
 47 | }
 48 | ## xxx refactor to discretePrior with probs (default to uniform) and possibly tapply-like groups (determining bucket grouping, default to seq_along(choices))
 49 | 
 50 | integerTriangleAroundZeroPrior = function(max.val) {
 51 |   val.ramp = Seq(1,max.val)
 52 |   choices = c(rev(-val.ramp), 0, val.ramp)
 53 |   weights = c(val.ramp, max.val+1, rev(val.ramp))
 54 |   probs = weights/sum(weights)
 55 |   sampler = function(inds) choices[inds]
 56 |   return (list(n = length(choices), choices = choices, probs = probs, sampler = sampler))
 57 | }
 58 | 
 59 | unifDistrUnifGridPrior = function(min.val, max.val, n.choices) {
 60 |   # We want to discretize the uniform distribution U[min.val, max.val].
 61 |   # Divide interval [min.val, max.val] into n.choices uniform-width buckets.
 62 |   # This code chooses the midpoints of these buckets as the representative elements (choices):
 63 |   choices = (1:n.choices - 0.5)/n.choices * (max.val-min.val) + min.val
 64 |   # Uniform probabilities for uniform grid:
 65 |   probs = rep(1.0/n.choices, n.choices)
 66 |   # Note for future: for other grids, actually compute the bucket boundaries and endpoints, then take differences of cdf
 67 |   mins = ((1:n.choices)-1)/n.choices * (max.val-min.val) + min.val
 68 |   maxes = (1:n.choices)/n.choices * (max.val-min.val) + min.val
 69 |   sampler = function(inds) stats::runif(length(inds), min=mins[inds], max=maxes[inds])
 70 |   return (list(n = n.choices, choices = choices, probs = probs, sampler=sampler))
 71 | }
 72 | 
 73 | logUnifDistrLogUnifGridPrior = function(min.val, max.val, n.choices) {
 74 |   # We want to discretize the uniform distribution U[min.val, max.val].
 75 |   # Divide interval [min.val, max.val] into n.choices uniform-width buckets.
 76 |   # This code chooses the midpoints of these buckets as the representative elements (choices):
 77 |     min.val <- log(min.val)
 78 |     max.val <- log(max.val)
 79 |   choices = (1:n.choices - 0.5)/n.choices * (max.val-min.val) + min.val
 80 |   # Uniform probabilities for uniform grid:
 81 |   probs = rep(1.0/n.choices, n.choices)
 82 |   # Note for future: for other grids, actually compute the bucket boundaries and endpoints, then take differences of cdf
 83 |   mins = ((1:n.choices)-1)/n.choices * (max.val-min.val) + min.val
 84 |   maxes = (1:n.choices)/n.choices * (max.val-min.val) + min.val
 85 |   sampler = function(inds) exp(stats::runif(length(inds), min=mins[inds], max=maxes[inds]))
 86 |   return (list(n = n.choices, choices = exp(choices), probs = probs, sampler=sampler))
 87 | }
 88 | 
 89 | unifDistrGaussianGridPrior = function(min.val, max.val, n.choices, mean=(max.val+min.val)/2, sd=(max.val-min.val)/4) {
 90 |   # Non-truncated normal CDF at window ends:
 91 |   left.cd = stats::pnorm(min.val, mean=mean, sd=sd)
 92 |   right.cd = stats::pnorm(max.val, mean=mean, sd=sd)
 93 |   
 94 |   endpoint.cds = (0:n.choices)/n.choices * (right.cd-left.cd) + left.cd
 95 |   endpoints = stats::qnorm(endpoint.cds,mean=mean,sd=sd)
 96 |   if(endpoints[1] + 1e-12 < min.val) stop("Bug in tnormcdf reasoning.")
 97 |   if(endpoints[length(endpoints)] - 1e-12 > max.val) stop("Bug in tnormcdf reasoning.")
 98 |   # Domain- and mass- midpoints of buckets are choosen as the choices (these are the same since we are dealing with a uniform distribution):
 99 |   choices = (endpoints[1:n.choices] + endpoints[2:(n.choices+1)])/2
100 |   probs = diff(endpoints)/(max.val-min.val)
101 | 
102 |   # todo sampler
103 | 
104 |   return (list(n=n.choices,choices=choices,probs=probs))
105 | }
106 | 
107 | unifLocGridPrior = function(fit) {
108 |   locs = max.col(t(fit$f), ties.method="last")
109 |   #locs = apply(fit$f,2,which.max)
110 |   delta = mean(diff(sort(locs)))/2
111 |   choices = Seq(max(round(min(locs)-delta),1),
112 |                 min(round(max(locs)+delta),nrow(fit$f)))
113 |   return (unifChoicePrior(choices))
114 | }
115 | 
116 | # Uniform grid spacing, uniform scale (underlying peak height) distribution.
117 | unifScaleUnifGridPrior = function(fit, n.choices) {
118 |   peaks = apply(fit$f,2,max)
119 |   delta = mean(diff(sort(peaks)))/2
120 |   
121 |   min.val = min(peaks)-delta
122 |   max.val = max(peaks)+delta
123 |   
124 |   return (unifDistrUnifGridPrior(min.val, max.val, n.choices))
125 | }
126 | 
127 | # Uniform grid spacing, uniform scale (underlying peak height) distribution.
128 | unifScaleGaussianGridPrior = function(fit, n.choices) {
129 |   peaks = apply(fit$f,2,max)
130 |   delta = mean(diff(sort(peaks)))/2
131 |   
132 |   min.val = min(peaks)-delta
133 |   max.val = max(peaks)+delta
134 |   
135 |   return (unifDistrGaussianGridPrior(min.val, max.val, n.choices))
136 | }
137 | 


--------------------------------------------------------------------------------
/epiforecast.cpp14funs/man-roxygen/param_baseline.R:
--------------------------------------------------------------------------------
1 | ../../epiforecast/man-roxygen/param_baseline.R


--------------------------------------------------------------------------------
/epiforecast.cpp14funs/man-roxygen/param_full.dat.R:
--------------------------------------------------------------------------------
1 | ../../epiforecast/man-roxygen/param_full.dat.R


--------------------------------------------------------------------------------
/epiforecast.cpp14funs/man-roxygen/param_max.n.sims.R:
--------------------------------------------------------------------------------
1 | ../../epiforecast/man-roxygen/param_max.n.sims.R


--------------------------------------------------------------------------------
/epiforecast.cpp14funs/man-roxygen/sim.method_template.R:
--------------------------------------------------------------------------------
1 | ../../epiforecast/man-roxygen/sim.method_template.R


--------------------------------------------------------------------------------
/epiforecast.cpp14funs/src/Makevars:
--------------------------------------------------------------------------------
1 | CXX_STD = CXX14
2 | 
3 | # Local Variables:
4 | # mode: Makefile
5 | # End:
6 | 


--------------------------------------------------------------------------------
/epiforecast.cpp14funs/src/epiforecast.cpp14funs_types.hpp:
--------------------------------------------------------------------------------
 1 | // author_header begin
 2 | // Copyright (C) 2017 Logan C. Brooks
 3 | //
 4 | // This file is part of epiforecast.  Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University.
 5 | //
 6 | // Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174.
 7 | // author_header end
 8 | // license_header begin
 9 | // epiforecast is free software: you can redistribute it and/or modify
10 | // it under the terms of the GNU General Public License as published by
11 | // the Free Software Foundation, version 2 of the License.
12 | //
13 | // epiforecast is distributed in the hope that it will be useful,
14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | // GNU General Public License for more details.
17 | //
18 | // You should have received a copy of the GNU General Public License
19 | // along with epiforecast.  If not, see <http://www.gnu.org/licenses/>.
20 | // license_header end
21 | 
22 | // Note: the naming of this file is important in determining whether it is
23 | // properly included by the Rcpp::export attribute.
24 | 
25 | #ifndef EPIFORECAST_TYPES_H
26 | #define EPIFORECAST_TYPES_H
27 | 
28 | #include <Rcpp.h>
29 | 
30 | // xxx global typedefs; perhaps better as template arguments
31 | using Time = double;
32 | using Observation = double;
33 | using Mean = double;
34 | using SD = double;
35 | using LogLikelihood = double;
36 | using Rp = double;
37 | 
38 | #endif
39 | 


--------------------------------------------------------------------------------
/epiforecast.cpp14funs/tests/testthat/fluview_hhs1_fetch.Rdata.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-delphi/epiforecast-R/726580337e75286532a03ec3ec5b104e9c3fd425/epiforecast.cpp14funs/tests/testthat/fluview_hhs1_fetch.Rdata.rds


--------------------------------------------------------------------------------
/epiforecast.cpp14funs/tests/testthat/test-eb.sim.R:
--------------------------------------------------------------------------------
 1 | ## author_header begin
 2 | ## Copyright (C) 2016 Sangwon Hyun
 3 | ##
 4 | ## This file is part of epiforecast.  Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University.
 5 | ##
 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174.
 7 | ## author_header end
 8 | ## license_header begin
 9 | ## epiforecast is free software: you can redistribute it and/or modify
10 | ## it under the terms of the GNU General Public License as published by
11 | ## the Free Software Foundation, version 2 of the License.
12 | ##
13 | ## epiforecast is distributed in the hope that it will be useful,
14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | ## GNU General Public License for more details.
17 | ##
18 | ## You should have received a copy of the GNU General Public License
19 | ## along with epiforecast.  If not, see <http://www.gnu.org/licenses/>.
20 | ## license_header end
21 | 
22 | 
23 | context("Testing the eb.sim() function..")
24 | 
25 | area.name = "hhs1"
26 | 
27 | ## Make a sim object from fluview.
28 | full.dat = epiforecast::fetchEpidataFullDat("fluview", area.name, "wili",
29 |                                             min.points.in.season=52L,
30 |                                             first.week.of.season = 21L,
31 |                                             cache.file.prefix=sprintf("fluview_%s_fetch.Rdata", area.name))
32 | mysim = eb.sim(full.dat, max.n.sims=100)
33 | 
34 | ## Tests:
35 | 
36 | test_that("Returns object of class 'sim'.", {
37 |     expect_equal(class(mysim),"sim")
38 | })
39 | 


--------------------------------------------------------------------------------
/epiforecast.cpp14funs_0.0.1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-delphi/epiforecast-R/726580337e75286532a03ec3ec5b104e9c3fd425/epiforecast.cpp14funs_0.0.1.pdf


--------------------------------------------------------------------------------
/epiforecast/.gitignore:
--------------------------------------------------------------------------------
1 | inst/doc
2 | 


--------------------------------------------------------------------------------
/epiforecast/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: epiforecast
 2 | Type: Package
 3 | Title: Tools for forecasting semi-regular seasonal epidemic curves and similar
 4 |     time series
 5 | Version: 0.0.1
 6 | Date: 2015-12-09
 7 | Author: Logan C. Brooks, Sangwon Hyun, David C. Farrow, Aaron Rumack, Ryan J.
 8 |     Tibshirani, Roni Rosenfeld
 9 | Maintainer: Logan C. Brooks <lcbrooks@cs.cmu.edu>
10 | Imports:
11 |     stats,
12 |     utils,
13 |     lubridate,
14 |     splines,
15 |     glmgen,
16 |     glmnet,
17 |     httr,
18 |     Rcpp,
19 |     Matrix,
20 |     rlist,
21 |     Hmisc,
22 |     weights,
23 |     hexbin,
24 |     plotrix,
25 |     tibble,
26 |     dplyr,
27 |     tidyr,
28 |     pipeR,
29 |     matrixStats,
30 |     quantreg,
31 |     lpSolve,
32 |     parallel,
33 |     R.utils,
34 |     RCurl,
35 |     pbmcapply
36 | Description: Tools for forecasting semi-regular seasonal epidemic curves and
37 |     similar time series. Includes an empirical Bayes approach that forms a prior
38 |     by transforming historical curves, a basis regression approach that balances
39 |     matching observations from the current season and matching historical
40 |     seasons' measurements for future weeks, and timestep-by-timestep weighted
41 |     kernel density estimation on backward differences parameterized by both the
42 |     time series measurements and the current time.
43 | License: GPL-2
44 | RoxygenNote: 7.3.2
45 | Collate: 
46 |     'RcppExports.R'
47 |     'backcasters.R'
48 |     'backcasters_new.R'
49 |     'match.R'
50 |     'utils.R'
51 |     'br.R'
52 |     'cv.R'
53 |     'cv_apply.R'
54 |     'delphi_epidata.R'
55 |     'empirical.futures.R'
56 |     'empirical.trajectories.R'
57 |     'simclass.R'
58 |     'ensemble.R'
59 |     'namesp.R'
60 |     'map_join.R'
61 |     'epiproject.R'
62 |     'forecast_type.R'
63 |     'get_completed_fluview_state_df.R'
64 |     'holidays.R'
65 |     'weeks.R'
66 |     'loaders.R'
67 |     'predx_v2_spreadsheets.R'
68 |     'read.R'
69 |     'simplify2arrayp.R'
70 |     'target_spec.R'
71 |     'todo-by-file.R'
72 |     'twkde.R'
73 | LinkingTo: Rcpp
74 | SystemRequirements:
75 | Suggests:
76 |     testthat
77 | Encoding: UTF-8
78 | 
79 | 


--------------------------------------------------------------------------------
/epiforecast/NAMESPACE:
--------------------------------------------------------------------------------
  1 | # Generated by roxygen2: do not edit by hand
  2 | 
  3 | S3method("[",array_proxy)
  4 | S3method("[<-",array_proxy)
  5 | S3method("[[",array_proxy)
  6 | S3method("[[<-",array_proxy)
  7 | S3method("dim<-",array_proxy)
  8 | S3method("dimnames<-",array_proxy)
  9 | S3method(aperm,array_proxy)
 10 | S3method(as.list,array_proxy)
 11 | S3method(as.vector,array_proxy)
 12 | S3method(c,sim)
 13 | S3method(c,target_forecast)
 14 | S3method(c,uniform_forecast)
 15 | S3method(dim,array_proxy)
 16 | S3method(dimnames,array_proxy)
 17 | S3method(length,array_proxy)
 18 | S3method(names,array_proxy)
 19 | S3method(plot,sim)
 20 | S3method(plot,target_forecast)
 21 | S3method(plot,target_multicast)
 22 | S3method(print,array_proxy)
 23 | S3method(print,sim)
 24 | S3method(print,target_forecast)
 25 | S3method(print,uniform_forecast)
 26 | S3method(simplified_simlike,default)
 27 | S3method(simplified_simlike,sim)
 28 | S3method(target_forecast,sim)
 29 | S3method(target_forecast,uniform_forecast)
 30 | export("[.array_proxy")
 31 | export("[<-.array_proxy")
 32 | export("[[.array_proxy")
 33 | export("[[<-.array_proxy")
 34 | export("dim<-.array_proxy")
 35 | export("dimnames<-.array_proxy")
 36 | export(DateToYearWeekWdayDF)
 37 | export(Date_to_epiweek)
 38 | export(DatesOfSeason)
 39 | export(Seq)
 40 | export(add_epiweek_integer)
 41 | export(aperm.array_proxy)
 42 | export(array_proxy)
 43 | export(as.list.array_proxy)
 44 | export(as.vector.array_proxy)
 45 | export(augmentWeeklyDF)
 46 | export(br.sim)
 47 | export(br.smoothedCurve)
 48 | export(c.sim)
 49 | export(c.target_forecast)
 50 | export(c.uniform_forecast)
 51 | export(covid19ilinet.202003.202008.target.specs)
 52 | export(covid19ilinet.forecast.types)
 53 | export(covid19ilinet_target_trajectory_preprocessor)
 54 | export(cv_apply)
 55 | export(dat.to.matrix)
 56 | export(degenerate_em_weights)
 57 | export(dim.array_proxy)
 58 | export(dimnames.array_proxy)
 59 | export(dimnames_or_inds)
 60 | export(dimnamesnamesp)
 61 | export(dimnamesp)
 62 | export(dimnamesp_to_dimindices)
 63 | export(dimp)
 64 | export(dur)
 65 | export(empirical.futures.sim)
 66 | export(empirical.trajectories.sim)
 67 | export(epi_end)
 68 | export(epi_week_to_model_week)
 69 | export(epiweek_Seq)
 70 | export(epiweek_to_Date)
 71 | export(epiweek_to_sunday)
 72 | export(fetchEpidataDF)
 73 | export(fetchEpidataFullDat)
 74 | export(fetchEpidataHistoryDF)
 75 | export(fetchUpdatingResource)
 76 | export(flusight2016.evaluation.forecast.types)
 77 | export(flusight2016.proxy.forecast.types)
 78 | export(flusight2016.target.specs)
 79 | export(flusight2016_target_trajectory_preprocessor)
 80 | export(flusight2016ilinet_target_trajectory_preprocessor)
 81 | export(flusight2017flusurv.target.specs)
 82 | export(flusight2017flusurv_target_trajectory_preprocessor)
 83 | export(flusight2018.evaluation.forecast.types)
 84 | export(flusight2018.proxy.forecast.types)
 85 | export(flusight2018flusurv.first.submission.epi.week)
 86 | export(flusight2018flusurv.last.submission.epi.week)
 87 | export(flusight2018ilinet.first.submission.epi.week)
 88 | export(flusight2018ilinet.last.submission.epi.week)
 89 | export(flusight2018ilinet_target_trajectory_preprocessor)
 90 | export(flusight2018natreg.target.specs)
 91 | export(flusight2018state.target.specs)
 92 | export(get.latest.time)
 93 | export(get_br_control_list)
 94 | export(is_christmas)
 95 | export(is_newyear)
 96 | export(is_thanksgiving)
 97 | export(lasso_lad_coef)
 98 | export(lastWeekNumber)
 99 | export(length.array_proxy)
100 | export(map_join)
101 | export(map_join_)
102 | export(match.arg.else.default)
103 | export(match.dat)
104 | export(match.integer)
105 | export(match.new.dat.sim)
106 | export(match.nonnegative.numeric)
107 | export(match.single.na.or.numeric)
108 | export(match.single.nonna.integer)
109 | export(match.single.nonna.integer.or.null)
110 | export(match.single.nonna.numeric)
111 | export(mimicPastDF)
112 | export(mimicPastEpidataDF)
113 | export(model_week_to_epi_week)
114 | export(model_week_to_time)
115 | export(named_array_to_name_arrayvecs)
116 | export(named_arrayvec_to_name_arrayvec)
117 | export(names.array_proxy)
118 | export(namesp)
119 | export(ndimp)
120 | export(no_join)
121 | export(ons)
122 | export(pht)
123 | export(plot.sim)
124 | export(plot.target_forecast)
125 | export(plot.target_multicast)
126 | export(print.array_proxy)
127 | export(print.sim)
128 | export(print.target_forecast)
129 | export(print.uniform_forecast)
130 | export(pwk)
131 | export(read.from.file)
132 | export(reformat_to_predx_v2_spreadsheet)
133 | export(seasonModelWeekDFToYearWeekDF)
134 | export(seasonModelWeekToYearWeekDF)
135 | export(seasonModelWeekWdayDFToDate)
136 | export(seasonModelWeekWdayToDate)
137 | export(seasonOfDate)
138 | export(seasonOfYearWeek)
139 | export(season_model.week_to_epiweek)
140 | export(season_to_Season)
141 | export(simplex_lad_weights)
142 | export(simplified_simlike)
143 | export(simplified_simlike.sim)
144 | export(subtract_epiweek_epiweek)
145 | export(target_forecast)
146 | export(target_forecast.sim)
147 | export(target_forecast.uniform_forecast)
148 | export(time_to_model_week)
149 | export(trimPartialPastSeasons)
150 | export(twkde.markovian.sim)
151 | export(twkde.sim)
152 | export(unite_arraylike)
153 | export(usa.flu.first.week.of.season)
154 | export(usa_flu_inseason_flags)
155 | export(vector_as_named_array)
156 | export(vector_as_named_array_)
157 | export(weekConventions)
158 | export(weighted.bw.nrd0ish)
159 | export(weighted_tabulate)
160 | export(with_dimnames)
161 | export(with_dimnamesnames)
162 | export(with_no_effective_evaluation_time_mask_window)
163 | export(yearWeekDFToSeasonModelWeekDF)
164 | export(yearWeekToSeasonModelWeekDF)
165 | export(yearWeekWdayDFToDate)
166 | export(yearWeekWdayListsToDate)
167 | export(yearWeekWdayVecsToDate)
168 | import(R.utils)
169 | import(graphics)
170 | import(hexbin)
171 | import(httr)
172 | import(parallel)
173 | import(pipeR)
174 | import(rlist)
175 | import(tibble)
176 | importFrom(Matrix,Diagonal)
177 | importFrom(Matrix,Matrix)
178 | useDynLib(epiforecast)
179 | 


--------------------------------------------------------------------------------
/epiforecast/R/RcppExports.R:
--------------------------------------------------------------------------------
 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 3 | 
 4 | #' Weighted, more \code{nbins}-restrictive version of \code{base::tabulate}
 5 | #'
 6 | #' @param bin integer-compatible vector; entries must be non-NA and between 1
 7 | #' and \code{nbins}; these indices denote entries in the result vector to which
 8 | #' the corresponding weights in \code{w} should be added
 9 | #' @param nbins single non-NA, non-negative integer; length of the vector to
10 | #' return
11 | #' @param w numeric-compatible vector of the same length as \code{bin}; weights
12 | #' corresponding to the indices in \code{bin}
13 | #' @return numeric vector of length \code{nbins}; the \code{i}th entry is like
14 | #' \code{sum(w[bin==i])}, but with a naive summation algorithm
15 | #'
16 | #' @useDynLib epiforecast
17 | #' @export
18 | weighted_tabulate <- function(bin, nbins, w) {
19 |     .Call('_epiforecast_WeightedTabulateRcpp', PACKAGE = 'epiforecast', bin, nbins, w)
20 | }
21 | 
22 | 


--------------------------------------------------------------------------------
/epiforecast/R/cv.R:
--------------------------------------------------------------------------------
 1 | ## author_header begin
 2 | ## Copyright (C) 2016 Sangwon Hyun
 3 | ##
 4 | ## This file is part of epiforecast.  Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University.
 5 | ##
 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174.
 7 | ## author_header end
 8 | ## license_header begin
 9 | ## epiforecast is free software: you can redistribute it and/or modify
10 | ## it under the terms of the GNU General Public License as published by
11 | ## the Free Software Foundation, version 2 of the License.
12 | ##
13 | ## epiforecast is distributed in the hope that it will be useful,
14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | ## GNU General Public License for more details.
17 | ##
18 | ## You should have received a copy of the GNU General Public License
19 | ## along with epiforecast.  If not, see <http://www.gnu.org/licenses/>.
20 | ## license_header end
21 | 
22 | ##' Compare two cv.sim objects
23 | cv.compare = function(cv.sim1, cv.sim2){
24 |     print("A helpful message like this: setting 1 (wiggle, holiday effect, etc.) is better than setting 2( wiggle, etc)")
25 | }
26 | 
27 | 
28 | 
29 | ##' Class for cv objects; contains
30 | ##' (1) control list
31 | ##' (2) for each forecasting time and for each left-out season (fold), what are the densities in the 52 by n.grid block of the 2d plane?\
32 | ##' (3) several things pre-calculated; the prediction scores (negative log-likelihood) for 4 target forecasts.
33 | ##' The idea is that, instead of 52 by nsim curves, we can store 52 by
34 | ##' n.grid values that store the density estimates, where n.grid can be
35 | ##' hundreds, while n.sim may be 10,000's.
36 | ##' It should return /all/ quantities required for doing 
37 | cv.sim = function(){
38 |     print("Not written yet")
39 | return()    
40 | }
41 | 
42 | ##' Holiday effect:
43 | 


--------------------------------------------------------------------------------
/epiforecast/R/cv_apply.R:
--------------------------------------------------------------------------------
  1 | ##' @import parallel
  2 | ##' @import R.utils
  3 | NULL
  4 | 
  5 | cv_apply_helper = function(train_data, test_data, indexer_list, fn, parallel_dim_i=0L, ...) {
  6 |   current_dim_i = length(indexer_list)
  7 |   stopifnot(dim(train_data)[current_dim_i] == dim(test_data)[current_dim_i])
  8 |   if (current_dim_i == 0L) {
  9 |     result = fn(train_data, test_data, ...)
 10 |     return (result)
 11 |   } else {
 12 |     current_dim_size = dim(train_data)[current_dim_i]
 13 |     current_dim_seq = seq_len(current_dim_size)
 14 |     current_dim_inpnames = dimnames(train_data)[[current_dim_i]]
 15 |     indexer_name = names(indexer_list)[current_dim_i]
 16 |     indexer_val = indexer_list[[current_dim_i]]
 17 |     switch(indexer_name,
 18 |            each={
 19 |              train_inds = current_dim_seq
 20 |              test_inds = train_inds
 21 |              current_dim_outnames = current_dim_inpnames
 22 |            },
 23 |            all={
 24 |              train_inds = list(TRUE)
 25 |              test_inds = train_inds
 26 |              current_dim_outnames = "all"
 27 |            },
 28 |            smear={
 29 |              train_inds = lapply(current_dim_seq, function(center_ind) {
 30 |                window_inds = center_ind + indexer_val
 31 |                window_inds <- window_inds[1L <= window_inds & window_inds <= current_dim_size]
 32 |                if (length(window_inds)==0L) {
 33 |                  stop ("smear argument led to selection of no data")
 34 |                }
 35 |                window_inds
 36 |              })
 37 |              test_inds = current_dim_seq
 38 |              current_dim_outnames = current_dim_inpnames
 39 |            },
 40 |            ablation={
 41 |              train_inds = lapply(current_dim_seq, function(left_out_ind) {
 42 |                current_dim_seq[-left_out_ind]
 43 |              })
 44 |              test_inds = train_inds
 45 |              current_dim_outnames = current_dim_inpnames
 46 |            },
 47 |            subsets={
 48 |              train_inds = indexer_val
 49 |              test_inds = train_inds
 50 |              current_dim_outnames = names(indexer_val)
 51 |            },
 52 |            loo={
 53 |              train_inds = lapply(current_dim_seq, function(train_out_ind) {
 54 |                current_dim_seq[-train_out_ind]
 55 |              })
 56 |              test_inds = current_dim_seq
 57 |              current_dim_outnames = current_dim_inpnames
 58 |            },
 59 |            oneahead={
 60 |              test_start_ind = match.single.nonna.integer(indexer_val)
 61 |              if (test_start_ind <= 1L || current_dim_size < test_start_ind) {
 62 |                stop ("oneahead argument outside the index range for the corresponding dimension, or equal to one, resulting in no training data for the first fold")
 63 |              }
 64 |              test_inds = test_start_ind-1L + seq_len(current_dim_size-test_start_ind+1L)
 65 |              train_inds = lapply(test_inds-1L, seq_len)
 66 |              current_dim_outnames = current_dim_inpnames[test_inds]
 67 |            },
 68 |            loo_oneahead={
 69 |              oneahead_start_ind = match.single.nonna.integer(indexer_val)
 70 |              if (oneahead_start_ind <= 2L || current_dim_size < oneahead_start_ind) {
 71 |                stop ("loo_oneahead argument outside the index range for the corresponding dimension, or equal to one or two, resulting in no training data for the first fold")
 72 |              }
 73 |              loo_test_inds = seq_len(oneahead_start_ind-1L)
 74 |              loo_train_inds = lapply(loo_test_inds, function(train_out_ind) {
 75 |                loo_test_inds[-train_out_ind]
 76 |              })
 77 |              oneahead_test_inds = oneahead_start_ind-1L + seq_len(current_dim_size-oneahead_start_ind+1L)
 78 |              oneahead_train_inds = lapply(oneahead_test_inds-1L, seq_len)
 79 |              train_inds = c(loo_train_inds, oneahead_train_inds)
 80 |              test_inds = c(loo_test_inds, oneahead_test_inds)
 81 |              current_dim_outnames = current_dim_inpnames[test_inds]
 82 |            },
 83 |            {
 84 |              stop("Unrecognized indexer name.")
 85 |            }
 86 |            )
 87 |     stopifnot(length(train_inds) == length(test_inds))
 88 |     current_dim_lapply = if (current_dim_i == parallel_dim_i) {
 89 |                            print("parallel::mclapply")
 90 |                            parallel::mclapply
 91 |                          } else {
 92 |                            lapply
 93 |                          }
 94 |     subresult.list =
 95 |       setNames(current_dim_lapply(seq_along(train_inds), function(indset_i) {
 96 |         train_data_inds = as.list(rep(TRUE, length(dim(train_data))))
 97 |         if (length(train_inds[[indset_i]])==0L) {
 98 |           stop ("Some indexing operation resulted in 0 indices.")
 99 |         }
100 |         train_data_inds[[current_dim_i]] <- train_inds[[indset_i]]
101 |         inner_train_data = do.call(`[`, c(list(train_data, drop=FALSE), train_data_inds))
102 |         test_data_inds = as.list(rep(TRUE, length(dim(test_data))))
103 |         test_data_inds[[current_dim_i]] <- test_inds[[indset_i]]
104 |         inner_test_data = do.call(`[`, c(list(test_data, drop=FALSE), test_data_inds))
105 |         cv_apply_helper(inner_train_data, inner_test_data, head(indexer_list,-1L), fn, parallel_dim_i=parallel_dim_i, ...)
106 |       }), current_dim_outnames)
107 |     result = simplify2array(subresult.list)
108 |     return (result)
109 |   }
110 | }
111 | 
112 | ##' \code{apply}-like function applying binary functions on training and test set selections
113 | ##'
114 | ##' @param data an array
115 | ##' @param indexer_list a named list with one entry per dimension of \code{data}
116 | ##'   specifying how to select training and test indices for that dimension; for
117 | ##'   the \code{i}th entry:
118 | 
119 | ##'   * \code{each=NULL} slices both training and test data into
120 | ##'   \code{dim(data)[[i]]} pieces by selecting each index along the \code{i}th
121 | ##'   dimension; the corresponding output dimension width of
122 | ##'   \code{dim(data)[[i]]}
123 | 
124 | ##'   * \code{all=NULL} performs no indexing along the \code{i}th dimension for
125 | ##'   either training or test data; the corresponding output dimension has width
126 | ##'   1 and name "all"
127 | 
128 | ##'   * \code{smear=relative.indices} acts like \code{each=NULL}, but instead of
129 | ##'   each slices corresponding to a single index, allows for nearby indices to
130 | ##'   be included as well; for the \code{j}th slice, includes data for valid
131 | ##'   indices in \code{j+relative.indices}
132 | 
133 | ##'   * \code{ablation=NULL} acts like \code{each=NULL}, but for the \code{j}th
134 | ##'   "slice", excludes, rather than selects, data corresponding to index
135 | ##'   \code{j}
136 | 
137 | ##'   * \code{subsets=subset.list} indexes both training and test data based on
138 | ##'   the subsets in \code{subset.list}; each entry in \code{subset.list} should
139 | ##'   be a vector of indices (logical, integer, or character) into the
140 | ##'   \code{i}th dimension of \code{data}; the output dimension has width
141 | ##'   \code{length(subset.list)} and names \code{names(subset.list)}
142 | 
143 | ##'   * \code{loo=NULL} performs leave-one-out cross-validation indexing: the
144 | ##'   training set like \code{ablation=NULL}, while the test set is indexed like
145 | ##'   \code{each=NULL}
146 | 
147 | ##'   * \code{oneahead=test_start_ind} slices the test data by taking each index
148 | ##'   greater than or equal to the specified single \emph{integer} index
149 | ##'   \code{test_start_ind}; the test data corresponding to index i is paired
150 | ##'   with training data from indices strictly preceding i
151 | 
152 | ##'   * \code{loo_oneahead=oneahead_start_ind} slices the test data by each
153 | ##'   index (similar to each); if the index i is less than the specified single
154 | ##'   \emph{integer} index \code{oneahead_start_ind}, then it is paired with
155 | ##'   training data from indices strictly preceding \code{oneahead_start_ind},
156 | ##'   excluding i; if i >= \code{oneahead_start_ind}, then the training data
157 | ##'   contains all indices strictly preceding i
158 | 
159 | ##' @param fn a \code{function(training.slice, test.slice)} returning a scalar,
160 | ##'   vector, matrix, array, or list, with the same class and fixed structure
161 | ##'   for all inputs
162 | 
163 | ##' @return an array with dimensionality equal to the sum of the
164 | ##'   dimensionless of the output of \code{fn} and of \code{data}
165 | 
166 | ##' @md
167 | ##' @export
168 | cv_apply = function(data, indexer_list, fn, parallel_dim_i=0L, ...) {
169 |   ## If =data= is 1-D, convert it to an array so it will have non-NULL dim:
170 |   if (is.null(dim(data))) {
171 |     data <- as.array(data)
172 |   }
173 |   if (length(dim(data)) != length(indexer_list)) {
174 |     stop ("Need exactly one indexer_list entry per dimension of data (or exactly 1 entry for vector data).")
175 |   }
176 |   if (is.null(names(indexer_list))) {
177 |     stop ("Indexer types must be specified using names in indexer_list; indexer_list has no names.")
178 |   }
179 |   ## Use recursive cv_apply_helper to compute the entries of the result:
180 |   result = cv_apply_helper(data, data, indexer_list, fn, parallel_dim_i=parallel_dim_i, ...)
181 |   ## --- Adjust the class and dimnames: ---
182 |   ## Make sure the result is an array:
183 |   result <- as.array(result)
184 |   ## Make sure the dimnames are a list, not NULL:
185 |   if (is.null(dimnames(result))) {
186 |     dimnames(result) <- rep(list(NULL), length(dim(result)))
187 |   }
188 |   ## Make sure the dimnames names are a character vector, no NULL:
189 |   if (is.null(names(dimnames(result)))) {
190 |     names(dimnames(result)) <- rep("",length(dim(result)))
191 |   }
192 |   ## If the original input =data= had dimnames names, assign them to the
193 |   ## corresponding dimensions in the result:
194 |   if (!is.null(names(dimnames(data)))) {
195 |     names(dimnames(result))[tail(seq_along(dim(result)), length(dim(data)))] <- names(dimnames(data))
196 |   }
197 |   return (result)
198 | }
199 | 
200 | ## todo don't subset at each dimension; prepare all indices then subset once? or once @ parallel dimension & once @ last dimension?
201 | ## todo try explicitly constructing cluster and things to export
202 | 


--------------------------------------------------------------------------------
/epiforecast/R/empirical.futures.R:
--------------------------------------------------------------------------------
  1 | ## author_header begin
  2 | ## Copyright (C) 2017 Logan C. Brooks, Sangwon Hyun
  3 | ##
  4 | ## This file is part of epiforecast.  Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University.
  5 | ##
  6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174.
  7 | ## author_header end
  8 | ## license_header begin
  9 | ## epiforecast is free software: you can redistribute it and/or modify
 10 | ## it under the terms of the GNU General Public License as published by
 11 | ## the Free Software Foundation, version 2 of the License.
 12 | ##
 13 | ## epiforecast is distributed in the hope that it will be useful,
 14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | ## GNU General Public License for more details.
 17 | ##
 18 | ## You should have received a copy of the GNU General Public License
 19 | ## along with epiforecast.  If not, see <http://www.gnu.org/licenses/>.
 20 | ## license_header end
 21 | 
 22 | ##' Simulate future in current trajectory with empirical (historical)
 23 | ##' distribution
 24 | ##'
 25 | ##' @template sim.method_template
 26 | ##'
 27 | ##' @examples
 28 | ##' ## National-level ILINet weighted %ILI data for recent seasons, excluding 2009 pandemic:
 29 | ##' area.name = "nat"
 30 | ##' full.dat = fetchEpidataFullDat("fluview", area.name, "wili",
 31 | ##'                                min.points.in.season = 52L,
 32 | ##'                                first.week.of.season = 31L,
 33 | ##'                                cache.file.prefix=sprintf("fluview_%s_fetch", area.name))
 34 | ##' full.dat <- full.dat[names(full.dat)!="S2009"]
 35 | ##' ## Sample from conditional curve distribution estimate using the above data
 36 | ##' ## and CDC's 2015 national %wILI onset threshold baseline of 2.1:
 37 | ##' sim = empirical.futures.sim(full.dat, baseline=2.1, max.n.sims=100)
 38 | ##' print(sim)
 39 | ##' plot(sim, type="lineplot")
 40 | ##'
 41 | ##' @author Logan C. Brooks, David C. Farrow, Sangwon Hyun, Ryan J. Tibshirani, Roni Rosenfeld
 42 | ##'
 43 | ##' @export
 44 | empirical.futures.sim = function(full.dat, baseline=NA_real_, max.n.sims=2000L) {
 45 |   ## extract historical data and future data from full.dat
 46 |   dat = head(full.dat, -1L)
 47 |   dat <- match.dat(dat)
 48 |   new.dat.sim = tail(full.dat, 1L)[[1]]
 49 |   new.dat.sim <- match.new.dat.sim(new.dat.sim)
 50 |   old.season.labels = head(names(full.dat), -1L)
 51 |   new.season.label = tail(names(full.dat), 1L)
 52 |   baseline <- match.single.na.or.numeric(baseline) # (ignored by twkde though)
 53 |   max.n.sims <- match.single.nonna.integer.or.null(max.n.sims)
 54 | 
 55 |   n.out = nrow(new.dat.sim[["ys"]])
 56 |   ## sim object containing specified "pasts":
 57 |   past.sim = new.dat.sim
 58 |   ## sim object containing empirical "futures":
 59 |   future.sim = list(
 60 |     ys = sapply(dat, function(historical.trajectory) {
 61 |       approx(historical.trajectory, xout=seq_len(nrow(new.dat.sim[["ys"]])), rule=2L)[["y"]]
 62 |     }),
 63 |     weights = rep(1, length(dat))
 64 |   )
 65 |   ## We want to build a sim object for the random variable dplyr::coalesce(PAST,
 66 |   ## FUTURE), treating PAST and FUTURE as independent. If the n.sims for the
 67 |   ## past and future sim objects are small enough, we can calculate the
 68 |   ## resulting output sim exactly; otherwise, we will need to use sampling.
 69 |   n.past.sims = length(past.sim[["weights"]])
 70 |   n.future.sims = length(future.sim[["weights"]])
 71 |   if (n.past.sims * n.future.sims <= max.n.sims) {
 72 |     ## We can calculate the output sim exactly. (This case will typically be
 73 |     ## triggered when new.dat.sim represents a single possible trajectory rather
 74 |     ## than a distribution.)
 75 | 
 76 |     ## Pair each new.dat.sim partial trajectory with each dat trajectory:
 77 |     past.sim.is = rep(seq_len(n.past.sims), each=n.future.sims)
 78 |     future.sim.is = rep(seq_len(n.future.sims), times=n.past.sims)
 79 |     ## Construct wider sim objects that together correspond to a sim object over
 80 |     ## the cartesian product <PAST, FUTURE>:
 81 |     product.past.sim = list(
 82 |       ys=past.sim[["ys"]][,past.sim.is],
 83 |       weights=past.sim[["weights"]][past.sim.is]
 84 |     )
 85 |     product.future.sim = list(
 86 |       ys=future.sim[["ys"]][,future.sim.is],
 87 |       weights=future.sim[["weights"]][future.sim.is]
 88 |     )
 89 |   } else {
 90 |     ## Construct (potentially) wider sim objects that together correspond to a
 91 |     ## sim object over the cartesian product <PAST, FUTURE>:
 92 |     product.past.sim = upsample_sim(past.sim, max.n.sims, TRUE)
 93 |     product.future.sim = upsample_sim(future.sim, max.n.sims, TRUE)
 94 |   }
 95 | 
 96 |   ys = dplyr::coalesce(product.past.sim[["ys"]], product.future.sim[["ys"]])
 97 |   weights = product.past.sim[["weights"]] * product.future.sim[["weights"]]
 98 | 
 99 |   ## Make a dummy control list, containing only model name
100 |   control.list = list(model = "empirical.futures")
101 | 
102 |   ## Return sim object
103 |   sim = list(ys=ys,
104 |              weights=weights,
105 |              control.list=control.list,
106 |              old.dat = list(dat),
107 |              ## fake a vector new.dat if necessary:
108 |              new.dat = rowMeans(new.dat.sim[["ys"]]),
109 |              old.season.labels = list(old.season.labels),
110 |              new.season.label = list(new.season.label))
111 |   class(sim) <- "sim"
112 |   return (sim)
113 | }
114 | 


--------------------------------------------------------------------------------
/epiforecast/R/empirical.trajectories.R:
--------------------------------------------------------------------------------
 1 | ## author_header begin
 2 | ## Copyright (C) 2017 Logan C. Brooks, Sangwon Hyun
 3 | ##
 4 | ## This file is part of epiforecast.  Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University.
 5 | ##
 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174.
 7 | ## author_header end
 8 | ## license_header begin
 9 | ## epiforecast is free software: you can redistribute it and/or modify
10 | ## it under the terms of the GNU General Public License as published by
11 | ## the Free Software Foundation, version 2 of the License.
12 | ##
13 | ## epiforecast is distributed in the hope that it will be useful,
14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | ## GNU General Public License for more details.
17 | ##
18 | ## You should have received a copy of the GNU General Public License
19 | ## along with epiforecast.  If not, see <http://www.gnu.org/licenses/>.
20 | ## license_header end
21 | 
22 | ##' Simulate future in current trajectory with empirical (historical)
23 | ##' distribution
24 | ##'
25 | ##' @template sim.method_template
26 | ##'
27 | ##' @examples
28 | ##' ## National-level ILINet weighted %ILI data for recent seasons, excluding 2009 pandemic:
29 | ##' area.name = "nat"
30 | ##' full.dat = fetchEpidataFullDat("fluview", area.name, "wili",
31 | ##'                                min.points.in.season = 52L,
32 | ##'                                first.week.of.season = 31L,
33 | ##'                                cache.file.prefix=sprintf("fluview_%s_fetch", area.name))
34 | ##' full.dat <- full.dat[names(full.dat)!="S2009"]
35 | ##' ## Sample from conditional curve distribution estimate using the above data
36 | ##' ## and CDC's 2015 national %wILI onset threshold baseline of 2.1:
37 | ##' sim = empirical.trajectories.sim(full.dat, baseline=2.1, max.n.sims=100)
38 | ##' print(sim)
39 | ##' plot(sim, type="lineplot")
40 | ##'
41 | ##' @author Logan C. Brooks, David C. Farrow, Sangwon Hyun, Ryan J. Tibshirani, Roni Rosenfeld
42 | ##'
43 | ##' @export
44 | empirical.trajectories.sim = function(full.dat, baseline=NA_real_, max.n.sims=2000L) {
45 |   ## extract historical data and future data from full.dat
46 |   dat = head(full.dat, -1L)
47 |   dat <- match.dat(dat)
48 |   new.dat.sim = tail(full.dat, 1L)[[1]]
49 |   new.dat.sim <- match.new.dat.sim(new.dat.sim)
50 |   old.season.labels = head(names(full.dat), -1L)
51 |   new.season.label = tail(names(full.dat), 1L)
52 |   baseline <- match.single.na.or.numeric(baseline) # (ignored by twkde though)
53 |   max.n.sims <- match.single.nonna.integer.or.null(max.n.sims)
54 | 
55 |   n.out = nrow(new.dat.sim[["ys"]])
56 |   empirical.sim = list(
57 |     ys = sapply(dat, function(historical.trajectory) {
58 |       approx(historical.trajectory, xout=seq_len(nrow(new.dat.sim[["ys"]])), rule=2L)[["y"]]
59 |     }),
60 |     weights = rep(1, length(dat))
61 |   ) %>>% structure(class="sim")
62 |   basic.sim = downsample_sim(empirical.sim, max.n.sims)
63 | 
64 |   ## Make a dummy control list, containing only model name
65 |   control.list = list(model = "empirical.trajectories")
66 | 
67 |   ## Return sim object
68 |   sim = c(basic.sim, list(
69 |                        control.list=control.list,
70 |                        old.dat = list(dat),
71 |                        ## fake a vector new.dat if necessary:
72 |                        new.dat = rowMeans(new.dat.sim[["ys"]]),
73 |                        old.season.labels = list(old.season.labels),
74 |                        new.season.label = list(new.season.label)
75 |                      ))
76 |   return (sim)
77 | }
78 | 


--------------------------------------------------------------------------------
/epiforecast/R/ensemble.R:
--------------------------------------------------------------------------------
  1 | ##' @include simclass.R
  2 | NULL
  3 | 
  4 | ##' @export
  5 | degenerate_em_weights = function(distr.cond.lkhds,
  6 |                                  init.weights=rep(1/dim(distr.cond.lkhds)[[2L]],dim(distr.cond.lkhds)[[2L]]),
  7 |                                  instance.weights=rep(1, dim(distr.cond.lkhds)[[1L]]),
  8 |                                  stop.eps = sqrt(.Machine[["double.eps"]])) {
  9 |   if (any(init.weights < 1e-10)) {
 10 |     stop("All init.weight's must be >=1e-10")
 11 |   }
 12 |   if (!isTRUE(all.equal(1, sum(init.weights)))) {
 13 |     stop("Sum of init.weight's must be all.equal to 1.")
 14 |   }
 15 | 
 16 |   ## Set some constants:
 17 |   n.obs = dim(distr.cond.lkhds)[[1L]]
 18 |   n.distr = dim(distr.cond.lkhds)[[2L]]
 19 |   t.distr.cond.lkhds = t(distr.cond.lkhds) # dim: n.distr x n.obs
 20 | 
 21 |   ## Set initial values of variables adjusted each step:
 22 |   weights = init.weights # length: n.distr
 23 |   t.lkhds = init.weights*t.distr.cond.lkhds # dim: n.distr x n.obs
 24 |   marginals = colSums(t.lkhds) # length: n.obs
 25 |   log.lkhd = weighted.mean(log(marginals), instance.weights) # scalar
 26 |   ## log.lkhds = list(log.lkhd)
 27 |   if (log.lkhd == -Inf) {
 28 |     stop ("All methods assigned a probability of 0 to at least one observed event.")
 29 |   } else {
 30 |     repeat {
 31 |       old.log.lkhd = log.lkhd # scalar
 32 |       weights <- matrixStats::colWeightedMeans(t(t.lkhds)/marginals, instance.weights) # length: n.distr
 33 |       t.lkhds <- weights*t.distr.cond.lkhds # dim: n.distr x n.obs
 34 |       marginals <- colSums(t.lkhds) # length: n.obs
 35 |       log.lkhd <- weighted.mean(log(marginals), instance.weights) # scalar
 36 |       ## xxx inefficient
 37 |       ## log.lkhds <- c(log.lkhds,list(log.lkhd))
 38 |       stopifnot (log.lkhd >= old.log.lkhd)
 39 |       if (log.lkhd-old.log.lkhd <= stop.eps || (log.lkhd-old.log.lkhd)/-log.lkhd <= stop.eps) {
 40 |         break
 41 |       }
 42 |     }
 43 |   }
 44 |   return (weights)
 45 | }
 46 | ## Test for agreement:
 47 | ##
 48 | ## ## |cvMixtureCoeffLogLkhdTestValues|: choose the best mixing
 49 | ## ## coefficient between two distributions from a set of test
 50 | ## ## coefficients to minimize CV-estimated log-likelihood loss given the
 51 | ## ## observed indicator values.
 52 | ## ## |indicators|: nxm matrix, observed indicator values
 53 | ## ## |distr1|: nxm matrix, first distr's probs/E[indicator values]
 54 | ## ## |distr2|: nxm matrix, second distr's probs/E[indicator values]
 55 | ## ## n: number of events
 56 | ## ## m: number of folds
 57 | ## ## |test.coeffs|: different mixing coefficients of distr1
 58 | ## ## |safety.lambda|: coefficient with which to pre-mix a uniform distr in with distr1, distr2 to avoid 0 entries
 59 | ## ## result: best test coefficient of distr1
 60 | ## mixture_coef_loglkhd_test_values = function(indicators, distr1, distr2,
 61 | ##     test.coeffs=c(0:1000/1000,10^(0:-120/10),1-10^(0:-120/10)),
 62 | ##     safety.lambda=1e-15) {
 63 | ##     n = nrow(indicators) # number of events per fold
 64 | ##     m = ncol(indicators) # number of folds
 65 | ##     distr1 <- (1-safety.lambda)*distr1 + safety.lambda/n # premix with tiny uniform
 66 | ##     distr2 <- (1-safety.lambda)*distr2 + safety.lambda/n # premix with tiny uniform
 67 | ##     elosses = sapply(test.coeffs, function(test.coeff)
 68 | ##         sum(-indicators*log(test.coeff*distr1+(1-test.coeff)*distr2))
 69 | ##                     ) # cv avg loss for each test coefficient
 70 | ##     return (test.coeffs[which.min(elosses)])
 71 | ## }
 72 | ## {
 73 | ##   print(mixture_coef_loglkhd_test_values(matrix(c(1,0,1,0,1,0,1,0),2),matrix(c(0.2,0.8,0.2,0.8,0.2,0.8,0.2,0.8),2),matrix(c(1,0,1,0,1,0,1,0),2)))
 74 | ##   print(degenerate_em_weights(distrsToLkhds(matrix(c(1,0,1,0,1,0,1,0),2),matrix(c(0.2,0.8,0.2,0.8,0.2,0.8,0.2,0.8),2),matrix(c(1,0,1,0,1,0,1,0),2))))
 75 | ##   print(degenerate_em_weights(matrix(c(0.2,0.2,0.2,0.2, 1,1,1,1),4)))
 76 | ## }
 77 | ##
 78 | ## ## Test using instance.weights:
 79 | ## degenerate_em_weights(matrix(c(0.5,0.2,0.2,0.2, 0.8,0.4,0.1,0.1),,2L))
 80 | ## degenerate_em_weights(matrix(c(0.5,0.2,0.2, 0.8,0.4,0.1),,2L),instance.weights=c(1,1,2))
 81 | ## ## Scale of instance.weights does not impact the result:
 82 | ## degenerate_em_weights(matrix(c(0.5,0.2,0.2, 0.8,0.4,0.1),,2L),instance.weights=c(1,1,2)/5)
 83 | 
 84 | ## Illustrative example/check:
 85 | ## {
 86 | ##     m = 1
 87 | ##     a = -3
 88 | ##     b = 4
 89 | ##     par(ask=TRUE)
 90 | ##     for (i in 1:10) {
 91 | ##         hist(rnorm(800),col=rgb(1,0,0,0.5,1),freq=FALSE)
 92 | ##         hist(rnorm(800,m),col=rgb(0,1,0,0.5,1),freq=FALSE,add=TRUE)
 93 | ##         hist(runif(800,a,b),col=rgb(0,0,1,0.5,1),freq=FALSE,add=TRUE)
 94 | ##     }
 95 | ##     print('distr1 coeff:')
 96 | ##     f = function(points) empiricalBucketMasses(points, (-5:5)*0.6, tack.neg.inf=TRUE, tack.pos.inf=TRUE)
 97 | ##     cvMixtureCoeffLogLkhd(replicate(800,f(rnorm(1))),replicate(800,f(rnorm(5000,m))),replicate(800,f(runif(5000,a,b))))
 98 | ## }
 99 | 
100 | ##' @export
101 | lasso_lad_coef = function(y, X, include.intercept=TRUE) {
102 |   y <- as.vector(y)
103 |   coef(quantreg::rq(if(include.intercept) obs ~ . else obs ~ . + 0,
104 |                     method="lasso",
105 |                     data=cbind(obs=y, as.data.frame(X)),
106 |                     lambda=mean(abs(y-X))))
107 | }
108 | 
109 | ##' @importFrom Matrix Diagonal Matrix
110 | ##' @export
111 | simplex_lad_weights = function(y, X) {
112 |   if (length(y) != nrow(X)) stop("length(y) != nrow(X)")
113 |   n = nrow(X)
114 |   p = ncol(X)
115 | 
116 |   ## p beta, n s+, n s-
117 |   objective.in = c(rep(0,p),rep(1,n),rep(1,n))
118 |   const.mat = rbind(cbind(                     X,   Matrix::Diagonal(n),  -Matrix::Diagonal(n) ),
119 |                     cbind( Matrix::Matrix(0,n,p),   Matrix::Diagonal(n), Matrix::Matrix(0,n,n) ),
120 |                     cbind( Matrix::Matrix(0,n,p), Matrix::Matrix(0,n,n),   Matrix::Diagonal(n) ),
121 |                     cbind(   Matrix::Diagonal(p), Matrix::Matrix(0,p,n), Matrix::Matrix(0,p,n) ),
122 |                         c(              rep(1,p),              rep(0,n),              rep(0,n) ))
123 |   const.dir = c(rep("=" , n),
124 |                 rep(">=", n),
125 |                 rep(">=", n),
126 |                 rep(">=", p),
127 |                     "="     )
128 |   const.rhs = c(       y,
129 |                 rep(0,n),
130 |                 rep(0,n),
131 |                 rep(0,p),
132 |                        1)
133 | 
134 |   structure(
135 |     lpSolve::lp("min",objective.in,,const.dir,const.rhs,dense.const=as.matrix(summary(const.mat)))$solution[1:p],
136 |     names=colnames(X)
137 |   )
138 | }
139 | 
140 | uniform_forecast = function(...) {
141 |   return (structure(list(), class="uniform_forecast"))
142 | }
143 | 
144 | ##' @method target_forecast uniform_forecast
145 | ##' @export
146 | ##' @export target_forecast.uniform_forecast
147 | target_forecast.uniform_forecast = function(uniform.forecast, ..., target.name, target.spec) {
148 |   bin.info = target.spec[["bin_info_for"]](...)
149 |   breaks = bin.info[["breaks"]]
150 |   non.na.bin.representatives = breaks[-1L] - 0.5*diff(breaks)
151 |   target.values = c(
152 |     non.na.bin.representatives,
153 |     get_na_value_or_empty_for_target(target.spec, ...)
154 |   )
155 |   target.weights = rep(1/length(target.values), length(target.values))
156 |   return (structure(list(target.values = stats::setNames(list(target.values), target.name),
157 |                          target.weights = target.weights,
158 |                          method.settings = list(uniform.pseudoweight.total=0,smooth.sim.targets=FALSE)
159 |                          ),
160 |                     class="target_forecast"))
161 | }
162 | 
163 | ##' @method print uniform_forecast
164 | ##' @export
165 | ##' @export print.uniform_forecast
166 | print.uniform_forecast = function(x, ...) {
167 |   cat("uniform_forecast: produces uniform-probability forecasts for any target", fill=TRUE)
168 | }
169 | 
170 | ##' Add information to a \code{uniform_forecast} object
171 | ##'
172 | ##' Uses \code{recursive=FALSE} and \code{use.names=TRUE} when forwarding to the
173 | ##' list \code{c} method; any attempt to override these values will generate an
174 | ##' error.
175 | ##'
176 | ##' @param uniform.forecast a \code{uniform_forecast} object
177 | ##' @param ... list of components to add to the \code{uniform_forecast} object; must lead to
178 | ##'   a resulting \code{uniform_forecast} object with components that are all uniquely,
179 | ##'   nontrivially (\code{!=""}) named
180 | ##' @return \code{uniform_forecast} object with the given components appended
181 | ##'
182 | ##' @method c uniform_forecast
183 | ##'
184 | ##' @export
185 | ##' @export c.uniform_forecast
186 | c.uniform_forecast = c_for_named_lists
187 | 
188 | 


--------------------------------------------------------------------------------
/epiforecast/R/get_completed_fluview_state_df.R:
--------------------------------------------------------------------------------
 1 | ## author_header begin
 2 | ## Copyright (C) 2017 Aaron Rumack
 3 | ##
 4 | ## This file is part of epiforecast.  Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University.
 5 | ##
 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174.
 7 | ## author_header end
 8 | ## license_header begin
 9 | ## epiforecast is free software: you can redistribute it and/or modify
10 | ## it under the terms of the GNU General Public License as published by
11 | ## the Free Software Foundation, version 2 of the License.
12 | ##
13 | ## epiforecast is distributed in the hope that it will be useful,
14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | ## GNU General Public License for more details.
17 | ##
18 | ## You should have received a copy of the GNU General Public License
19 | ## along with epiforecast.  If not, see <http://www.gnu.org/licenses/>.
20 | ## license_header end
21 | 
22 | get_completed_fluview_state_df = function(epigroup, first.week.of.season=31L) {
23 |   st = stringi::stri_trans_tolower(epigroup)
24 |   st.dat = NULL
25 |   if (st != "fl" && st != "la") {
26 |     st.dat = fetchEpidataDF("fluview", st, first.week.of.season = first.week.of.season,
27 |                             cache.file.prefix=sprintf("fluview_%s_fetch", st))
28 |     if (st == "pr") {
29 |       ## Estimate seasons 2010-2013 using other epigroups in Region 2
30 |       njili = get_completed_fluview_state_df("nj",first.week.of.season = first.week.of.season)$ili
31 |       nyili = get_completed_fluview_state_df("ny",first.week.of.season = first.week.of.season)$ili
32 |       jfkili = get_completed_fluview_state_df("jfk",first.week.of.season = first.week.of.season)$ili
33 |       pr.len = length(st.dat$ili)
34 |       pr.fit = lm(st.dat$ili ~ tail(njili,pr.len) + tail(nyili,pr.len) + tail(jfkili,pr.len))
35 |       pr.est = pr.fit$coefficients[1] + pr.fit$coefficients[2]*njili + pr.fit$coefficients[3]*nyili + pr.fit$coefficients[4]*jfkili
36 |       pr.dat = get_completed_fluview_state_df("nj") # Placeholder for values such as epiweek, issue, etc.
37 |       vars = colnames(pr.dat)[startsWith(colnames(pr.dat),"num_")] # Flush numeric variables in data frame
38 |       firstepiweek = min(st.dat$epiweek[!is.na(st.dat$issue)])
39 |       mask = pr.dat$epiweek >= firstepiweek # Have data from here and later
40 |       pr.dat[mask,vars] = st.dat[st.dat$epiweek >= firstepiweek,vars]
41 |       pr.dat$ili[!mask] = pr.est[!mask]
42 |       pr.dat$ili[mask] = st.dat$ili[st.dat$epiweek >= firstepiweek]
43 |       pr.dat$wili = pr.dat$ili
44 |       pr.dat$region = st
45 |       st.dat = pr.dat
46 |     } else if (st == "vi") {
47 |       ## Estimate seasons 2010-2015 using other epigroups in Region 2
48 |       njili = get_completed_fluview_state_df("nj",first.week.of.season = first.week.of.season)$ili
49 |       nyili = get_completed_fluview_state_df("ny",first.week.of.season = first.week.of.season)$ili
50 |       jfkili = get_completed_fluview_state_df("jfk",first.week.of.season = first.week.of.season)$ili
51 |       prili = get_completed_fluview_state_df("pr",first.week.of.season = first.week.of.season)$ili
52 |       vi.len = length(st.dat$ili)
53 |       vi.fit = lm(st.dat$ili ~ tail(njili,vi.len) + tail(nyili,vi.len) + tail(jfkili,vi.len) + tail(prili,vi.len))
54 |       vi.est = vi.fit$coefficients[1] + vi.fit$coefficients[2]*njili + vi.fit$coefficients[3]*nyili + vi.fit$coefficients[4]*jfkili + vi.fit$coefficients[5]*prili
55 |       vi.dat = get_completed_fluview_state_df("nj") # Placeholder for values such as epiweek, issue, etc.
56 |       vars = colnames(vi.dat)[startsWith(colnames(vi.dat),"num_")] # Flush numeric variables in data frame
57 |       firstepiweek = min(st.dat$epiweek[!is.na(st.dat$issue)])
58 |       mask = vi.dat$epiweek >= firstepiweek # Have data from here and later
59 |       vi.dat[mask,vars] = st.dat[st.dat$epiweek >= firstepiweek,vars]
60 |       vi.dat$ili[!mask] = vi.est[!mask]
61 |       vi.dat$ili[mask] = st.dat$ili[st.dat$epiweek >= firstepiweek]
62 |       vi.dat$ili[vi.dat$ili < 0] = 0
63 |       vi.dat$wili = vi.dat$ili
64 |       vi.dat$region = st
65 |       st.dat = vi.dat
66 |     }
67 |   } else if (st == "la" || st == "fl") {
68 |     if (st == "la") {
69 |       r = c("ok","ar","tx","nm","la")
70 |       rdat = get_completed_fluview_state_df("hhs6",first.week.of.season = first.week.of.season)
71 |     } else {
72 |       r = c("ga","al","ms","tn","ky","nc","sc","fl")
73 |       rdat = get_completed_fluview_state_df("hhs4",first.week.of.season = first.week.of.season)
74 |     }
75 |     st1 = get_completed_fluview_state_df(r[1],first.week.of.season = first.week.of.season)
76 |     st2 = get_completed_fluview_state_df(r[2],first.week.of.season = first.week.of.season)
77 |     x = merge(st1,st2,by="epiweek",suffixes=c("",""))
78 |     for (s in r[3:length(r)-1]) {
79 |       st3 = get_completed_fluview_state_df(s,first.week.of.season = first.week.of.season)
80 |       x = merge(x,st3,by="epiweek",suffixes=c("",""))
81 |     }
82 |     vars = colnames(st1)[startsWith(colnames(st1),"num_")] # List of numeric variables in data frame
83 |     firstepiweek = min(st1$epiweek)
84 |     st.dat = cbind(rdat[(rdat["epiweek"] >= firstepiweek),]) # Default values are that of the region
85 |     st.dat$region = st
86 |     for (v in vars) {
87 |       st.dat[[v]] = get(v,st.dat) - rowSums(x[colnames(x)==v],na.rm=FALSE) # Subtract the sum of the other states in the region
88 |     }
89 |     st.dat$wili = st.dat$num_ili * 100 / st.dat$num_patients # Reset %ILI and wILI
90 |     st.dat$ili = st.dat$wili
91 |   }
92 |   return (st.dat)
93 | }
94 | 


--------------------------------------------------------------------------------
/epiforecast/R/holidays.R:
--------------------------------------------------------------------------------
 1 | ## author_header begin
 2 | ## Copyright (C) 2016 Logan C. Brooks
 3 | ##
 4 | ## This file is part of epiforecast.  Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University.
 5 | ##
 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174.
 7 | ## author_header end
 8 | ## license_header begin
 9 | ## epiforecast is free software: you can redistribute it and/or modify
10 | ## it under the terms of the GNU General Public License as published by
11 | ## the Free Software Foundation, version 2 of the License.
12 | ##
13 | ## epiforecast is distributed in the hope that it will be useful,
14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | ## GNU General Public License for more details.
17 | ##
18 | ## You should have received a copy of the GNU General Public License
19 | ## along with epiforecast.  If not, see <http://www.gnu.org/licenses/>.
20 | ## license_header end
21 | 
22 | ##' Test if Date is Christmas Day (vectorized)
23 | ##'
24 | ##' @param Date \code{Date} vector to test
25 | ##' @return logical vector
26 | ##' 
27 | ##' @export
28 | is_christmas = function(Date) {
29 |   posixlt = as.POSIXlt(Date)
30 |   return (posixlt$mon + 1L == 12L & posixlt$mday == 25L)
31 | }
32 | 
33 | ##' Test if Date is (Gregorian) New Year's Day (vectorized)
34 | ##'
35 | ##' @param Date \code{Date} vector to test
36 | ##' @return logical vector
37 | ##'
38 | ##' @export
39 | is_newyear = function(Date) {
40 |   posixlt = as.POSIXlt(Date)
41 |   return (posixlt$mon + 1L == 1L & posixlt$mday == 1L)
42 | }
43 | 
44 | ##' Test if Date is Thanksgiving Day (vectorized)
45 | ##'
46 | ##' @param Date \code{Date} vector to test
47 | ##' @return logical vector
48 | ##'
49 | ##' @export
50 | is_thanksgiving = function(Date) {
51 |   posixlt = as.POSIXlt(Date)
52 |   return (posixlt$mon + 1L == 11L & (posixlt$mday - 1L) %/% 7L + 1L == 4L & posixlt$wday == 4L)
53 | }
54 | 
55 | ## todo: Chinese New Year; cannot use seasonal::cny since it is GPL3.0;
56 | ## investigate seasonal-cited source,
57 | ## http://www.chinesenewyears.info/chinese-new-year-calendar.php (license does
58 | ## not mention reuse) or ConvCalendar package (projectpluto.com calendars)
59 | 
60 | ## alternative: timeDate::holiday / chron::is.holiday
61 | 


--------------------------------------------------------------------------------
/epiforecast/R/predx_v2_spreadsheets.R:
--------------------------------------------------------------------------------
 1 | 
 2 | ##' Reformat a predx v1 spreadsheet to a predx v2 spreadsheet
 3 | ##'
 4 | ##' @param old.spreadsheet predx v1 spreadsheet
 5 | ##' @param old.spreadsheet predx v2 spreadsheet template
 6 | ##' @return predx v2 spreadsheet
 7 | ##'
 8 | ##' @export
 9 | reformat_to_predx_v2_spreadsheet = function(old.spreadsheet, new.template) {
10 |     new.spreadsheet.minus.ordering = old.spreadsheet %>>%
11 |         dplyr::group_by(Location, Target, Type, Unit) %>>%
12 |         dplyr::do(
13 |                    tibble::tibble(
14 |                                location = .[["Location"]][[1L]],
15 |                                target = .[["Target"]][[1L]],
16 |                                type = tolower(.[["Type"]][[1L]]),
17 |                                bin =
18 |                                    if (.[["Type"]][[1L]] == "point") {
19 |                                        NA_character_
20 |                                    } else if (.[["Type"]][[1L]] == "bin") {
21 |                                        sprintf("%.1f", covid19ilinet.percentage.bin.info[["breaks"]] %>>% {.[-length(.)]})
22 |                                    } else stop(sprintf('Unexpected Type encountered: %s.', .[["Type"]][[1L]])),
23 |                                value = .[["Value"]]
24 |                            )
25 |                ) %>>%
26 |         dplyr::ungroup() %>>%
27 |         dplyr::select(-Location, -Target, -Type, -Unit) %>>%
28 |         {.}
29 |     bad.rows = dplyr::anti_join(new.spreadsheet.minus.ordering, new.template, by=c("location","target", "type", "bin"))
30 |     if (nrow(bad.rows)!=0L) {
31 |         stop (paste(collapse="\n", capture.output({
32 |             cat('Formed rows with index column entries that are not present in the template:', fill=getOption('width')-nchar('Error: '))
33 |             print(bad.rows)
34 |         })))
35 |         ## fixme todo check for duplicates
36 |     }
37 |     overlarge.groups = new.spreadsheet.minus.ordering %>>%
38 |         dplyr::group_by(location, target, type, bin) %>>%
39 |         dplyr::summarize(count=dplyr::n()) %>>%
40 |         dplyr::ungroup() %>>%
41 |         dplyr::filter(count != 1L) %>>%
42 |         {.}
43 |     if (nrow(overlarge.groups) != 0L) {
44 |         stop (paste(collapse="\n", capture.output({
45 |             cat('Formed overlarge groups:', fill=getOption('width')-nchar('Error: '))
46 |             print(overlarge.groups)
47 |         })))
48 |     }
49 |     ## It's okay to omit rows the other way around when submitting forecasts for only a subset of targets.
50 |     new.spreadsheet = new.spreadsheet.minus.ordering %>>%
51 |         dplyr::mutate(
52 |                    location=ordered(location, unique(new.template[["location"]])),
53 |                    target=ordered(target, unique(new.template[["target"]])),
54 |                    type=ordered(type, unique(new.template[["type"]]))
55 |                ) %>>%
56 |         dplyr::arrange(location, target, type) %>>%
57 |         {.}
58 |     return (new.spreadsheet)
59 | }
60 | 


--------------------------------------------------------------------------------
/epiforecast/R/read.R:
--------------------------------------------------------------------------------
  1 | ## author_header begin
  2 | ## Copyright (C) 2016 Sangwon Hyun
  3 | ##
  4 | ## This file is part of epiforecast.  Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University.
  5 | ##
  6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174.
  7 | ## author_header end
  8 | ## license_header begin
  9 | ## epiforecast is free software: you can redistribute it and/or modify
 10 | ## it under the terms of the GNU General Public License as published by
 11 | ## the Free Software Foundation, version 2 of the License.
 12 | ##
 13 | ## epiforecast is distributed in the hope that it will be useful,
 14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | ## GNU General Public License for more details.
 17 | ##
 18 | ## You should have received a copy of the GNU General Public License
 19 | ## along with epiforecast.  If not, see <http://www.gnu.org/licenses/>.
 20 | ## license_header end
 21 | 
 22 | ##' Get index (time) of last non-\code{NA} in a vector or other new.dat.sim
 23 | ##' @export
 24 | get.latest.time = function(new.dat.sim){
 25 |   new.dat.sim <- match.new.dat.sim(new.dat.sim)
 26 |     ys = new.dat.sim[["ys"]]
 27 | 
 28 |     is.na.ys = is.na(ys)
 29 | 
 30 |     ## if (!any(is.na.ys)) {
 31 |     ##     stop("There are no missing values in new.dat.sim, i.e., the new season has been fully observed!")
 32 |     ## }
 33 | 
 34 |     ## check that NA occurrences match between all columns
 35 |     if (ncol(is.na.ys) > 1L && # preoptimization...: no need to check if only 1 trajectory
 36 |         any(xor(is.na.ys[,1], is.na.ys))) {
 37 |         stop ("All trajectories in new.dat.sim must have NA at exactly the same indices.")
 38 |     }
 39 | 
 40 |     ## Obtain index of non-NA
 41 |     time.of.forecast = max(0L, which(!is.na.ys[,1]))
 42 | 
 43 |     if (!all(time.of.forecast < which(is.na.ys[,1]))) {
 44 |         stop("new.dat.sim should be formatted so that non-NA's are followed by all NA's")
 45 |     }
 46 | 
 47 |     return(time.of.forecast)
 48 | }
 49 | 
 50 | 
 51 | ##' Function that reads from a csv file
 52 | ##' @param filename name of data file with each column equal to each season,
 53 | ##'   with the first (n-1) columns to be, and the n'th column with the current
 54 | ##'   season.
 55 | ##' @export
 56 | read.from.file = function(filename){
 57 | 
 58 |     ## Sanity check of data file formatting
 59 |     check.file.contents(filename)
 60 | 
 61 |     ## Reformat table into list
 62 |     full.dat = read.csv(filename)
 63 |     return (table.to.list(full.dat))
 64 | }
 65 | 
 66 | 
 67 | 
 68 | ##' Function to check if a data file is properly formatted (i.e. contains the
 69 | ##' headers, is filled with numeric values, etc.) Current implementation is very
 70 | ##' memory-inefficient.
 71 | ##'
 72 | ##' @param filename name of data filewith each column equal to each season, with
 73 | ##'   the first (n-1) columns to be, and the n'th column with the current
 74 | ##'   season.
 75 | check.file.contents = function(filename) {
 76 |   my.full.dat = read.csv(filename)
 77 |   check.table.format(my.full.dat)
 78 | }
 79 | 
 80 | 
 81 | 
 82 | 
 83 | ##' Performs various checks on full.dat as a table (matrix/data.frame).
 84 | check.table.format = function(full.dat){
 85 | 
 86 |     stopifnot(inherits(full.dat, "matrix") || inherits(full.dat, "data.frame"))
 87 | 
 88 |     ## Check if any column names are missing or NA
 89 |     default.colnames = Map(paste0, rep("V",ncol(full.dat)), 1:ncol(full.dat))
 90 |     if(any(colnames(full.dat) == default.colnames)) stop("Must supply all column names!")
 91 |     if(any(is.na(colnames(full.dat)))) stop("Column names contains NA values!")
 92 |     if(any(colnames(full.dat)=="NA")) stop("Column names contains NA values!")
 93 |     
 94 |     ## Check if it contains all numeric values.
 95 |     all.is.numeric = all(apply(full.dat,2, function(mycol){all(is.numeric(mycol))}))
 96 |     if(!all.is.numeric){
 97 |         stop("Table must contain all numeric values!")
 98 |     }
 99 | 
100 |     ## Helper function
101 |     check.column.to.see.if.partially.observed = function(mycolumn){
102 |         ## Continue scan until you find the first three consecutive missing values
103 |         scan.boolean.vector.for.three.consecutive.trues = function(ind, boolean.vector.to.scan){
104 |             if(ind+3 > length(boolean.vector.to.scan)){stop("ind is bigger than n-3")}
105 |             return(all(boolean.vector.to.scan[ind:(ind+3)]))}
106 |         ## Make function to deal with |is.na(mycolumn)|
107 |         myscan = function(ind){scan.boolean.vector.for.three.consecutive.trues(ind, is.na(mycolumn))}
108 |         max.ind.three.NA.starts = max(which(!sapply(1:(length(mycolumn)-3), myscan)))
109 |         return(max.ind.three.NA.starts)
110 |     }
111 | 
112 |     ## Check if last column is partially observed
113 |     last.column = full.dat[,ncol(full.dat)]
114 |     max.ind.three.NA.starts = check.column.to.see.if.partially.observed(last.column)
115 |     if(max.ind.three.NA.starts >= length(last.column) - 3){
116 |         stop("I roughly scanned the last column of your table, but it doesn't seem to be partially observed!")
117 |     }
118 | }
119 | 
120 | 
121 | 
122 | ##' Performs various checks on full.dat as a list of numeric vectors.
123 | check.list.format = function(full.dat){
124 |     ## Check if list is formatted correctly.
125 |     if(!inherits(full.dat, "list")) stop("Type of input is not list!")
126 | 
127 |     ## Check if all numeric values
128 |     all.is.numeric = all(sapply(full.dat, function(mycol){all(is.numeric(mycol))}))
129 |     if(!all.is.numeric){
130 |         stop("All vectors must only contain numeric values!")
131 |     }
132 | 
133 |     ## Check if any column names are missing or NA
134 |     default.colnames = Map(paste0, rep("V",length(full.dat)), 1:length(full.dat))
135 |     if(any(is.na(names(full.dat)))) stop("Column names contains NA values!")
136 |     if(any(names(full.dat) == default.colnames)) stop("Must supply all column names!")
137 |     if(any(nchar(names(full.dat))==0)) stop("Some column names are missing!")
138 | 
139 |     ## ## Check partially observed last season
140 |     ## if(!any(is.na(full.dat[[length(full.dat)]]))) stop("Last element (vector) of dataframe is not partially observed! (i.e. there should be some NA values!)")
141 | 
142 | 
143 |     ## Check if last column is partially observed.
144 |     ## a = (which(!is.na(last.column.weird.full.dat[,ncol(last.column.weird.full.dat)])))
145 |     ## a.diff = c(a[2:length(a)],NA) - c(a[1:(length(a)-1)],NA) 
146 |     ## if(any(a.diff[!is.na(a.diff)]!=1)) stop("Last column does not seem to be partially observed! i.e. no missing values!")
147 | }
148 | 
149 | 
150 | 
151 | ##' Function to change full.dat from table to list.
152 | table.to.list = function(full.dat){
153 |     mylist = lapply(seq_len(ncol(full.dat)), function(i) full.dat[,i])
154 |     names(mylist) = colnames(full.dat)
155 |     return(mylist)
156 | }
157 | 
158 |   
159 | 
160 | 
161 | ## ##' Produces a |sim| object.
162 | ## ##' @param full.dat List of numeric vectors, each with proper names.
163 | ## ## ' @param area.name One of "nat" or "hhs*" where * is 1~10 #
164 | ## ## ' @param full.dat Either a properly formatted list (like the output of
165 | ## ## '     fetchepidataDF(); a list of numeric vectors, with the last vector being
166 | ## ## '     the 'new' season for which forecasts are being made), or a matrix/data
167 | ## ## '     frame with appropriate column names (and of which the last column
168 | ## ## '     represents the 'new' seaosn for which forecasts are being made.)
169 | ## ## ' @param filename The file name of the csv file that contains the table of
170 | ## ## '     data, with the first row being the names of the seasons, and each
171 | ## ## '     subsequent i'th row being the dataset for, say, week i. The last column
172 | ## ## '     should contain the 'new' season.
173 | ## ## ##' @examples 
174 | ## ## ##' sim = make.forecast(full.dat = NULL,
175 | ## ## ##'                     area.name = "hhs1",
176 | ## ## ##'                     first.model.week = 21L,
177 | ## ## ##'                     method = "br")
178 | ## ## ##' matplot(sim$ys[,1:100], type = 'l', col = 'cyan', lty=1)
179 | ## make.eb.forecast = function(full.dat,                
180 | ##                             first.model.week,
181 | ##                             min.points.in.season=52L,
182 | ##                             n.sim,
183 | ##                             eb.control.list = function(...){ eb.control.list(n.sim, ... ))){
184 | 
185 | ##     ## Split into old dat (list) and new dat (vector)
186 | ##     old.dat = head(full.dat, -1L)
187 | ##     new.dat = tail(full.dat, 1L)[[1]]
188 | 
189 | ##     time.of.forecast = get.latest.time(new.dat)
190 | 
191 | ##     ## Unbundle control list that contains options for simulation
192 | ##     ## Then make forecast
193 | 
194 | ##     if(is.null(eb.option)){
195 | ##         eb.control.list = get.eb.control.list()
196 | ##     } else {
197 | ##         option.names = names(eb.options)
198 | ##         eb.control.list = get.eb.control.list()
199 | ##     }
200 | 
201 | ##     ## Make sim = eb.sim(dat, new.dat, control.list = eb.control.list)
202 | 
203 | ##     ## Return the simulated curves
204 | ##     return(sim)
205 | ## }
206 | 
207 | 


--------------------------------------------------------------------------------
/epiforecast/R/simplify2arrayp.R:
--------------------------------------------------------------------------------
 1 | ## These functions are modified R methods from base R, under GPL 2+. Original
 2 | ## license information from license() is below. The modification is from Aaron
 3 | ## Rumack.
 4 | 
 5 | ## This software is distributed under the terms of the GNU General
 6 | ## Public License, either Version 2, June 1991 or Version 3, June 2007.
 7 | ## The terms of version 2 of the license are in a file called COPYING
 8 | ## which you should have received with
 9 | ## this software and which can be displayed by RShowDoc("COPYING").
10 | ## Version 3 of the license can be displayed by RShowDoc("GPL-3").
11 | 
12 | ## Copies of both versions 2 and 3 of the license can be found
13 | ## at https://www.R-project.org/Licenses/.
14 | 
15 | ## A small number of files (the API header files listed in
16 | ##                          R_DOC_DIR/COPYRIGHTS) are distributed under the
17 | ## LESSER GNU GENERAL PUBLIC LICENSE, version 2.1 or later.
18 | ## This can be displayed by RShowDoc("LGPL-2.1"),
19 | ## or obtained at the URI given.
20 | ## Version 3 of the license can be displayed by RShowDoc("LGPL-3").
21 | 
22 | ## 'Share and Enjoy.'
23 | 
24 | simplify2arrayp = function (x, higher = TRUE)
25 | {
26 |   if (length(common.len <- unique(as.vector(lengths(x)))) > 1L)
27 |     return(x)
28 |   if (common.len == 1L)
29 |     unlist(x, recursive = FALSE)
30 |   else if (common.len > 1L) {
31 |     n <- length(x)
32 |     r <- unlist(x, recursive = FALSE, use.names = FALSE)
33 |     if (higher && length(c.dim <- unique(lapply(x, dim))) ==
34 |         1 && is.numeric(c.dim <- c.dim[[1L]]) && prod(d <- c(c.dim,
35 |                                                              n)) == length(r)) {
36 |       iN1 <- is.null(n1 <- dimnames(x[[1L]]))
37 |       n2 <- names(x)
38 |       dnam <- if (!(iN1 && is.null(n2)))
39 |                 c(if (iN1) rep.int(list(n1), length(c.dim)) else n1,
40 |                   list(n2))
41 |       array(r, dim = d, dimnames = dnam)
42 |     }
43 |     else if (prod(d <- c(common.len, n)) == length(r))
44 |       array(r, dim = d, dimnames = if (!(is.null(n1 <- names(x[[1L]])) &
45 |                                          is.null(n2 <- names(x))))
46 |                                      list(n1, n2))
47 |     else x
48 |   }
49 |   else x
50 | }
51 | 


--------------------------------------------------------------------------------
/epiforecast/R/todo-by-file.R:
--------------------------------------------------------------------------------
  1 | ## author_header begin
  2 | ## Copyright (C) 2016 Logan C. Brooks
  3 | ##
  4 | ## This file is part of epiforecast.  Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University.
  5 | ##
  6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174.
  7 | ## author_header end
  8 | ## license_header begin
  9 | ## epiforecast is free software: you can redistribute it and/or modify
 10 | ## it under the terms of the GNU General Public License as published by
 11 | ## the Free Software Foundation, version 2 of the License.
 12 | ##
 13 | ## epiforecast is distributed in the hope that it will be useful,
 14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | ## GNU General Public License for more details.
 17 | ##
 18 | ## You should have received a copy of the GNU General Public License
 19 | ## along with epiforecast.  If not, see <http://www.gnu.org/licenses/>.
 20 | ## license_header end
 21 | 
 22 | ######################
 23 | ## interface.R #######
 24 | ######################
 25 | 
 26 | ## todo length checks --- time.of.forecast not only with dat, but with fit
 27 | ## todo remove n.out arg --- determine by min fit length
 28 | 
 29 | ## ## source("../sample/sample_config_flu_1516.R")
 30 | ## source("loaders.R")
 31 | ## source("fitters.R")
 32 | ## source("nidss/nidss_fetch_data.R", chdir=TRUE)
 33 | 
 34 | ## nidss.datfit = fetchNIDSSDatFit("flu", "nationwide")
 35 | 
 36 | ## olddatplus = nidss.datfit$olddatplus
 37 | ## oldfit = nidss.datfit$fit
 38 | ## include.ss = nidss.datfit$include.ss
 39 | ## current.s = nidss.datfit$current.s
 40 | ## first.year = nidss.datfit$first.year
 41 | ## first.model.week = nidss.datfit$first.model.week
 42 | 
 43 | ## fit.ss = include.ss[include.ss < current.s]
 44 | ## exclude.2009.pandemic.season=TRUE
 45 | ## if (exclude.2009.pandemic.season) {
 46 | ##     fit.ss <- fit.ss[-2]
 47 | ##     oldfit <- list(f=oldfit$f[,-2], tau=oldfit$tau[-2])
 48 | ## }
 49 | ## train.ss = fit.ss
 50 | ## test.s = max(train.ss)+1
 51 | 
 52 | ## newdat = olddatplus.to.newdat(olddatplus)
 53 | ## newdat.attributes = attributes(newdat)
 54 | ## newdat <- newdat[match(fit.ss, include.ss)]
 55 | ## newdat.attributes$names <- names(newdat)
 56 | ## attributes(newdat) <- newdat.attributes
 57 | 
 58 | ## qwer = fit.eb.control.list(oldfit.to.newfit(oldfit), get.eb.control.list())
 59 | ## asdf = eb.createForecasts(newdat, olddatplus$wili[olddatplus$season==test.s], oldfit.to.newfit(oldfit), 0)
 60 | ## asdf = eb.createForecasts(newdat, olddatplus$wili[olddatplus$season==test.s], oldfit.to.newfit(oldfit), 1L)
 61 | ## source("plotters.R")
 62 | ## newfit = smooth.curves.to.newfit(eb.fitSmoothCurves(newdat))
 63 | ## matplot.newdat(newdat)
 64 | ## matplot.newfit(newdat, newfit)
 65 | ## seriesplot.newfit(newdat, smooth.curves.to.newfit(eb.fitSmoothCurves(newdat)))
 66 | 
 67 | ## xxx instead of n.out, allow NA's in the future trajectories, just fill in all; use !is.na as another ii.match mask?
 68 | ## todo explicitly make object that represents a distribution of curves, corresponding fitting functions, then the conditioning method?
 69 | ## todo rename forecast time to something with "ind"?
 70 | ## todo documentation
 71 | ## todo imports
 72 | ## todo examples
 73 | 
 74 | ######################
 75 | ## loaders.R #########
 76 | ######################
 77 | 
 78 | ## xxx consider fetching by issue instead in fetchEpidataHistoryDF; at least for
 79 | ## fluview, the set of all issues should be a subset of the set of all epiweeks
 80 | ## from the current data frame
 81 | 
 82 | ## todo version of mimicPastEpidataDF that never uses future data, instead
 83 | ## taking the seasonally-expected change from the last available data point or
 84 | ## stopping if there is no available data point beforehand (will need to handle finalized versions inputted later with lags outside the =lags= range... override their lag with the max lag in =lag= and update =issue= accordingly?)
 85 | 
 86 | ## ## todo turn into test
 87 | ## history.dt = fetchEpidataHistoryDT("fluview", "hhs1", 0:51,
 88 | ##                            first.week.of.season = 31L,
 89 | ##                            cache.file.prefix="~/.epiforecast-cache/fluview_hhs1")
 90 | ## list(mimicPastEpidataDF1, mimicPastEpidataDF2) %>>%
 91 | ##   lapply(function(mimicPastEpidataDFn) {
 92 | ##     ## mimicPastEpidataDFn(history.dt, 201540L) %>>%
 93 | ##     mimicPastEpidataDFn(history.dt, 201040L) %>>%
 94 | ##       dplyr::arrange(-epiweek) %>>%
 95 | ##       dplyr::select(epiweek, issue, forecast.epiweek, wili)
 96 | ##   }) %>>%
 97 | ##   do.call(what=identical) %>>%
 98 | ##   {.}
 99 | 
100 | ## todo conversion operations for epidata df's to full dats & epidata history df's to dt's
101 | ## todo fail gracefully with curl errors in Epidata
102 | ## todo check augmentWeeklyDF input
103 | ## todo trimPartialPastSeasons setting to trim all incomplete (like min # being # of weeks in season)
104 | ## todo check fetching input, add default caching
105 | ## todo baseline as attr to trajectory, first.week.of.season and is.part.of.season.of.length function as attr's to full.dat?
106 | 
107 | #######################
108 | ## simclass.R #########
109 | #######################
110 | 
111 | ## todo: sim objects (/ something with a new name) should include at least two
112 | ## options: (a) a constant, and (b) a list of ys and weights; this will require
113 | ## some refactoring. The br method should map constants to constants unless
114 | ## bootstrapping. Constants should not be represented as a single column with
115 | ## weight 1, because this cannot be distinguished from a single draw from a
116 | ## distribution with importance weight 1.
117 | 
118 | ## todo make sure the weights for all the sim methods can be interpreted as
119 | ## effective number of draws
120 | 
121 | ## todo proper metrics for multibin scores (including multi pwk)
122 | 
123 | ## todo special treatment of new.dat whenever turned into a new.dat.sim
124 | 
125 | ## xxx upsample_sim: concat sample to single copy of sim object, versus
126 | ## repeating the existing sim object as many times as possible and just filling
127 | ## in the remainder with samples
128 | 
129 | ######################################
130 | ## retrospective_forecasts.R #########
131 | ######################################
132 | 
133 | ## fixme better dataset representation... list of data sources (history df's? ilinet, fluview baselines, metadata?, in.season, ...) and auxiliary information indexed in a uniform way for location and time
134 | ## todo interface for multiresolution (seasonal vs. weekly vs. ..., national vs. regions vs. ...) datasets and metadata, targets
135 | ## todo instead of faking new.dat when given a new.dat.sim, store and use one in new.dat.sim
136 | ## todo effective number of particles impacted by number of seasons used (as well as widths...)
137 | ## todo try the weighted bw function instead of the bw.SJnrd0 --- issue: needs
138 | ## to be called many times (more than the unweighted version --- the weights
139 | ## change) and would be slow (half the time appeared to be spent in bw
140 | ## calculations last check).
141 | ## todo test some changes on earlier seasons
142 | ## todo kernel HMM approach
143 | ## todo GP approach
144 | ## todo curve decomposition (+ random walk) approach, regression onto #seasons approach
145 | 
146 | #####################################
147 | ## empirical.trajectories.R #########
148 | #####################################
149 | 
150 | ## todo due to different seasons having different baselines,
151 | ## empirical.trajectories.sim will not produce the historical distribution for
152 | ## onsets (but will for the other flusight targets); optional scaling based on
153 | ## current and historical baselines to give historical onset distribution, to
154 | ## adjust percentages for differences in network composition from season to
155 | ## season (but changing the output from the historical distributions) (perhaps
156 | ## this scaling could happen during a pre-processing step for full.dat)
157 | 
158 | #######################
159 | ## cv_apply.R #########
160 | #######################
161 | 
162 | ## todo better cv_apply interface
163 | ## todo object-oriented iterator design
164 | ## todo other iterator structures: not one iterator per input dim, but one output dim per iterator (+ the result dimensionality dims)
165 | ## todo non-CV versions
166 | ## todo similar functions on cartesian products
167 | ## xxx option for warm starts?
168 | ## xxx option for parallelism?
169 | ## xxx decide on dropping behavior (especially in some cases, LHS will always have 1 index in a given dimension)
170 | ## todo fix issues with dimension combining and naming behavior when there are single outputs; when to run simplify2array vs. not...
171 | 
172 | ## todo better operations on scalar/vector/matrix/arrays: scalars
173 | ## distinguishable from length-1 vectors, drop=FALSE whenever it is an option,
174 | ## uniform interface for length/dim & names/dimnames, dplyr operations if
175 | ## possible. Look into tbl_cube.
176 | 
177 | ####################
178 | ## twkde.R #########
179 | ####################
180 | 
181 | ## todo select twkde params
182 | ## todo fully parameterized bandwidth matrix, especially
183 | ## todo try ks package's kcde, reichlab/kcde
184 | ## xxx a single variable to balance between x and diff(x): a convex combination of the two
185 | 
186 | #######################
187 | ## ensemble.R #########
188 | #######################
189 | 
190 | ## todo calculate degen EM results in log space, in Rcpp
191 | ## todo ridge penalty on point prediction coef's
192 | ## xxx should use =rq= for constrainedLADPtPredFit with the R= r= args if
193 | ## possible; it's probably faster
194 | ## todo weighted versions of ensemble weight fitting algorithms
195 | 
196 | ###############################
197 | ## DESCRIPTION/*-forecasts.R ##
198 | ###############################
199 | 
200 | ## consider byte-compilation
201 | 


--------------------------------------------------------------------------------
/epiforecast/R/utils.R:
--------------------------------------------------------------------------------
  1 | ## author_header begin
  2 | ## Copyright (C) 2016 Logan C. Brooks
  3 | ##
  4 | ## This file is part of epiforecast.  Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University.
  5 | ##
  6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174.
  7 | ## author_header end
  8 | ## license_header begin
  9 | ## epiforecast is free software: you can redistribute it and/or modify
 10 | ## it under the terms of the GNU General Public License as published by
 11 | ## the Free Software Foundation, version 2 of the License.
 12 | ##
 13 | ## epiforecast is distributed in the hope that it will be useful,
 14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | ## GNU General Public License for more details.
 17 | ##
 18 | ## You should have received a copy of the GNU General Public License
 19 | ## along with epiforecast.  If not, see <http://www.gnu.org/licenses/>.
 20 | ## license_header end
 21 | 
 22 | ##' @import pipeR
 23 | ##' @import R.utils
 24 | NULL
 25 | 
 26 | ## At least some versions of roxygen generate an error when documenting this package when the pipeR pipe would actually be called when sourcing in R files (rather than just appearing in generated function bodies).  Work around this by defining an un-exported copy of the pipeR pipe operator:
 27 | `%pipeR>>%` = pipeR::`%>>%`
 28 | 
 29 | ##' A \code{seq} variant that produces a 0-length vector when \code{!(from <=
 30 | ##' to)}.
 31 | ##'
 32 | ##' @param from starting number (or other object compatible with \code{seq})
 33 | ##' @param to ending number (or other object compatible with \code{seq})
 34 | ##' @param ... arguments to forward to \code{seq}
 35 | ##'
 36 | ##' @export
 37 | Seq = function(from, to, ...) {
 38 |   if (from <= to) {
 39 |     ## Just delegate in the "nice" case:
 40 |     return (seq(from, to, ...))
 41 |   } else {
 42 |     ## Return 0 elements of a reasonable type by taking 0 elements from
 43 |     ## =seq(from,from)=.
 44 |     return (seq(from, from)[integer(0)])
 45 |     ## We could do =seq(from, to, ...)=, but that could potentially hurt
 46 |     ## performance and generate bugs for certain dispatches.
 47 |   }
 48 | }
 49 | 
 50 | dat.to.matrix1 = function(dat, n) {
 51 |   dat <- match.dat(dat)
 52 |   n <- match.single.nonna.integer(n)
 53 |   sapply(dat, `[`, seq_len(n))
 54 | }
 55 | dat.to.matrix2 = function(dat, n) {
 56 |   dat <- match.dat(dat)
 57 |   n <- match.single.nonna.integer(n)
 58 |   result = matrix(NA_real_, n, length(dat))
 59 |   for (trajectory.i in seq_along(dat)) {
 60 |     trajectory = dat[[trajectory.i]]
 61 |     result[,trajectory.i] <- trajectory[seq_len(n)]
 62 |   }
 63 |   dimnames(result)[[2]] <- names(dat)
 64 |   return (result)
 65 | }
 66 | dat.to.matrix3 = function(dat, n) {
 67 |   dat <- match.dat(dat)
 68 |   n <- match.single.nonna.integer(n)
 69 |   result = matrix(NA_real_, n, length(dat))
 70 |   inds = seq_len(n)
 71 |   for (trajectory.i in seq_along(dat)) {
 72 |     trajectory = dat[[trajectory.i]]
 73 |     result[,trajectory.i] <- trajectory[inds]
 74 |   }
 75 |   dimnames(result)[[2]] <- names(dat)
 76 |   return (result)
 77 | }
 78 | ## all.equal(dat.to.matrix1(dat, 52), dat.to.matrix2(dat, 52), dat.to.matrix3(dat, 52))
 79 | ## benchmark(dat.to.matrix1(dat,52),dat.to.matrix2(dat,52),dat.to.matrix3(dat,52),replications=100000)
 80 | ## 1 dat.to.matrix1(dat, 52)       100000  35.548    4.814    35.564    0.004
 81 | ## 2 dat.to.matrix2(dat, 52)       100000   7.948    1.076     7.952    0.000
 82 | ## 3 dat.to.matrix3(dat, 52)       100000   7.384    1.000     7.388    0.000
 83 | 
 84 | ##' Numeric matrix of the first \code{n} elements of each numeric vector in
 85 | ##' \code{dat}.
 86 | ##'
 87 | ##' A more efficient implementation of \code{sapply(dat, `[`, seq_len(n))}. Any
 88 | ##' vectors in \code{dat} with length less than \code{n} are extended with
 89 | ##' \code{NA_real_}'s at the end.
 90 | ##'
 91 | ##' @param dat a list of numeric vectors
 92 | ##' @param n a single integer: the number of elements to take from each vector
 93 | ##' @return a \code{n}-by-\code{length(dat)} numeric matrix
 94 | ##'
 95 | ##' @examples
 96 | ##' dat = list(11:15, 21:26)
 97 | ##' dat.to.matrix(dat, 5) # (5x2: dat[[2]] is cut off)
 98 | ##' dat.to.matrix(dat, 6) # (6x2: dat[[1]] is extended with NA_real_)
 99 | ##' n = 3
100 | ##' identical(c(n, length(dat)), dim(dat.to.matrix(dat, n)))
101 | ##'
102 | ##' @export
103 | dat.to.matrix = dat.to.matrix3
104 | 
105 | ##' Reshape arraylike into fewer but larger dimensions and/or permute (and optionally rename) dimensions; like tidyr::unite on arrays using R.utils::wrap.array
106 | ##'
107 | ##' Collapses specified array(like) object dimensions together to produce an
108 | ##' array with fewer but larger dimensions and an equal number of entries;
109 | ##' indices of each collapsed dimension refers to a Cartesian product over the
110 | ##' indices of the dimensions it was made from. This operation should be similar
111 | ##' or equivalent to melting the array, calling tidyr::unite, and casting back
112 | ##' into an array.
113 | ##'
114 | ##' Collapsed/united dimensions are ordered after all untouched dimensions. To
115 | ##' tweak the ordering or use this function to simply permute dimensions,
116 | ##' individual dimnames names can be included as their own singleton "sets" in
117 | ##' dnn.sets.
118 | ##'
119 | ##' @param arraylike the arraylike object to reshape; must have the dimnames
120 | ##'     names referred to in \code{{dnn.sets}}
121 | ##'
122 | ##' @param dnn.sets a list of character vectors; each character vector specifies
123 | ##'     some dimensions to unite in the array; ordering within each vector
124 | ##'     determines the ordering of indices in the resulting collapsed dimension
125 | ##'     (as in R.utils::wrap.array); ordering between the vectors determines the
126 | ##'     ordering of the collapsed dimensions in the result; names of vector
127 | ##'     entries (e.g., \code{names(dnn.sets[[1]])}) are ignored;
128 | ##'     \code{names(dnn.sets)}, if specified and nonblank, override automatic
129 | ##'     names of the collapsed dimensions
130 | ##'
131 | ##' @param sep length-1 character vector; string used to combine dimnames for
132 | ##'     collapsed dimensions, as well as automatic dimnames names for collapsed
133 | ##'     dimensions
134 | ##'
135 | ##' @examples
136 | ##' arraylike = array(1:2^5, rep(2,5))
137 | ##' dimnames(arraylike) <-
138 | ##'   list(A=c("a1","a2"), B=c("b1","b2"), C=c("c1","c2"),
139 | ##'        D=c("d1","d2"), E=c("e1","e2")
140 | ##'   )
141 | ##' ## Collapsing A&B, C&D:
142 | ##' dimnames(unite_arraylike(arraylike, list(c("A","B"), c("C","D"))))
143 | ##' ## Adjusting `sep` changes resulting dimnames and dimnames names:
144 | ##' dimnames(unite_arraylike(arraylike, list(c("A","B"), c("C","D")), sep="__"))
145 | ##' ## Result dimnames names can be manually specified:
146 | ##' names(dimnames(unite_arraylike(arraylike, list(c("A","B"), DVD=c("C","D")), sep="__")))
147 | ##' ## Singleton sets can be used to permute dimensions, optionally renaming
148 | ##' ## them (changing the dimnames names):
149 | ##' ##   Place dimension A at end:
150 | ##' names(dimnames(unite_arraylike(arraylike, list("A"))))
151 | ##' ##   Permute all dims:
152 | ##' names(dimnames(unite_arraylike(arraylike, list("E","D","C","B","A"))))
153 | ##' ##   Place some dimensions at end and rename one:
154 | ##' names(dimnames(unite_arraylike(arraylike, list("A","EEE"="E"))))
155 | ##' ## Collapsing and permuting are actually the same operation and can be mixed:
156 | ##' names(dimnames(unite_arraylike(arraylike, list(c("C","D"),"A","EEE"="E"))))
157 | ##'
158 | ##' @export
159 | unite_arraylike = function(arraylike, dnn.sets, sep=".") {
160 |     src.dnns = names(dimnames(arraylike))
161 |     if (is.null(src.dnns)) {
162 |         stop ('Arraylike must have named dimnames.')
163 |     }
164 |     dim.i.sets = dnn.sets %>>%
165 |         lapply(function(selected.src.dnns) match(selected.src.dnns, src.dnns))
166 |     all.selected.dim.is = Reduce(c, dim.i.sets)
167 |     all.selected.dnns = Reduce(c, dnn.sets)
168 |     if (any(is.na(all.selected.dim.is))) {
169 |         stop (paste0('Dimensions in dnn.sets were not found in arraylike: ',
170 |                      paste(all.selected.dnns[is.na(all.selected.dim.is)], collapse=", ")))
171 |     }
172 |     if (anyDuplicated(all.selected.dnns) != 0L) {
173 |         stop (paste0('Dimension(s) referred to more than once in dnn.sets.  Duplicates: ',
174 |                      paste(all.selected.dnns[duplicated(all.selected.dnns)], collapse=", ")))
175 |     }
176 |     untouched.dim.is = setdiff(seq_along(src.dnns), all.selected.dim.is)
177 |     untouched.dnns = src.dnns[untouched.dim.is]
178 |     if (any(untouched.dnns %in% all.selected.dnns)) {
179 |         stop (paste0('Dimensions in dnn.sets could not be uniquely identified.  Dimnames names duplicates referenced in dnn.sets: ',
180 |                      paste(untouched.dnns %>>% `[`(.%in%all.selected.dnns), collapse=", ")))
181 |     }
182 |     arraylike %>>%
183 |         R.utils::wrap.array(c(as.list(untouched.dim.is), dim.i.sets), sep=sep) %>>%
184 |         {
185 |             automatic.union.names = sapply(dnn.sets, paste0, collapse=sep)
186 |             names(dim.i.sets) <- namesp(dim.i.sets) %>>%
187 |                 {.[.==""] <- automatic.union.names[.==""]; .}
188 |             names(dimnames(.)) <- c(src.dnns[untouched.dim.is], names(dim.i.sets))
189 |             .
190 |         }
191 | }
192 | 


--------------------------------------------------------------------------------
/epiforecast/data-raw/fluview.outdated.R:
--------------------------------------------------------------------------------
 1 | ## author_header begin
 2 | ## Copyright (C) 2016 Logan C. Brooks
 3 | ##
 4 | ## This file is part of epiforecast.  Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University.
 5 | ##
 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174.
 7 | ## author_header end
 8 | ## license_header begin
 9 | ## epiforecast is free software: you can redistribute it and/or modify
10 | ## it under the terms of the GNU General Public License as published by
11 | ## the Free Software Foundation, either version 2 of the License, or
12 | ## (at your option) any later version.
13 | ##
14 | ## epiforecast is distributed in the hope that it will be useful,
15 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | ## GNU General Public License for more details.
18 | ##
19 | ## You should have received a copy of the GNU General Public License
20 | ## along with epiforecast.  If not, see <http://www.gnu.org/licenses/>.
21 | ## license_header end
22 | 
23 | ## source("R/utils.R", chdir=TRUE)
24 | ## source("R/match.R", chdir=TRUE)
25 | ## source("R/weeks.R", chdir=TRUE)
26 | ## source("R/delphi_epidata.R", chdir=TRUE)
27 | ## source("R/loaders.R", chdir=TRUE)
28 | 
29 | ## devtools::load_code()
30 | 
31 | devtools::load_all()
32 | 
33 | fluview.first.model.week = 21L
34 | fluview.area.names = c(sprintf("hhs%d",1:10),"nat")
35 | ## fluview.2003on.dfs = structure(lapply(fluview.area.names, function(area.name) {
36 | ##   trimPartialPastSeasons(fetchEpidataDF("fluview", area.name,
37 | ##                                         first.week.of.season=fluview.first.model.week,
38 | ##                                         cache.file.prefix=sprintf("fluview_%s_fetch.Rdata", area.name)),
39 | ##                          "wili", 52)
40 | ## }), names=fluview.area.names)
41 | ## fluview.2003on.full.dats = lapply(fluview.2003on.dfs, function(df) {
42 | ##   full.dat = split(df$wili, df$season) # historical seasons + current season
43 | ##   names(full.dat) <- sprintf("S%s", names(full.dat))
44 | ##   full.dat <- full.dat[names(full.dat)!="S2009"]
45 | ##   full.dat
46 | ## })
47 | fluview.2003on.full.dats = setNames(lapply(fluview.area.names, function(area.name) {
48 |   fetchEpidataFullDat("fluview", area.name, "wili",
49 |                       min.points.in.season=52L, first.week.of.season=fluview.first.model.week,
50 |                       cache.file.prefix=sprintf("fluview_%s_fetch.Rdata", area.name))
51 | }), fluview.area.names)
52 | fluview.2003on.dats = lapply(fluview.2003on.full.dats, head, n=-1L)
53 | fluview.2003on.new.dats = lapply(fluview.2003on.full.dats, function(full.dat) tail(full.dat, 1L)[[1]])
54 | 
55 | ## todo document
56 | 
57 | fluview.2003on.outdated.dats = fluview.2003on.dats
58 | fluview.2003on.outdated.new.dats = fluview.2003on.new.dats
59 | 
60 | devtools::use_data(fluview.2003on.outdated.dats, fluview.2003on.outdated.new.dats,
61 |                    overwrite = TRUE)
62 | 
63 | ## fixme should this just be a method? get.fluview.2003on.dats()? or something
64 | ## formatted like fetchEpidataDF but returning a dat and new.dat, or a full.dat?
65 | 


--------------------------------------------------------------------------------
/epiforecast/data/fluview.2003on.outdated.dats.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-delphi/epiforecast-R/726580337e75286532a03ec3ec5b104e9c3fd425/epiforecast/data/fluview.2003on.outdated.dats.rda


--------------------------------------------------------------------------------
/epiforecast/data/fluview.2003on.outdated.new.dats.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-delphi/epiforecast-R/726580337e75286532a03ec3ec5b104e9c3fd425/epiforecast/data/fluview.2003on.outdated.new.dats.rda


--------------------------------------------------------------------------------
/epiforecast/man-roxygen/param_baseline.R:
--------------------------------------------------------------------------------
1 | ##' @param baseline a "baseline level"  for this dataset; roughly speaking, data
2 | ##'     below this  level does not grow  like an epidemic; currently  ignored by
3 | ##'     most or all sim methods, but  can be used as the \code{y.scale.baseline}
4 | ##'     in \code{eb.sim}  in \code{epiforecast.cpp14funs} by passing  it through
5 | ##'     the \code{control.list} argument.
6 | 


--------------------------------------------------------------------------------
/epiforecast/man-roxygen/param_full.dat.R:
--------------------------------------------------------------------------------
1 | ##' @param full.dat list of (a) numeric vectors, one per past season, containing
2 | ##'   historical trajectories, followed by (b) a numeric vector (trajectory),
3 | ##'   numeric matrix (cbound trajectories), or sim object (list with $ys a
4 | ##'   numeric matrix (cbound trajectories) and $weights a numeric vector
5 | ##'   (associated weights)), with \code{NA}'s for all future or missing data
6 | ##'   points to forecast or infer; currently only supports \code{NA}'s at future
7 | ##'   points, not mixed in between non-\code{NA} data
8 | 


--------------------------------------------------------------------------------
/epiforecast/man-roxygen/param_invent.scalars.R:
--------------------------------------------------------------------------------
1 | ##' @param invent.scalars length-1 non-NA logical; if \code{TRUE}, treat unnamed
2 | ##'     length-1  vector  \code{x}'s  as  "scalars"  with  zero  dimensions;  if
3 | ##'     \code{FALSE}, treat them as arrays with one dimension of size 1.
4 | 


--------------------------------------------------------------------------------
/epiforecast/man-roxygen/param_max.n.sims.R:
--------------------------------------------------------------------------------
1 | ##' @param max.n.sims single non-\code{NA} integer value or \code{NULL}: the
2 | ##'   number of curves to sample from the inferred distribution
3 | 


--------------------------------------------------------------------------------
/epiforecast/man-roxygen/sim.method_template.R:
--------------------------------------------------------------------------------
 1 | ##' @template param_full.dat
 2 | ##' @template param_baseline
 3 | ##' @template param_max.n.sims
 4 | ##' @return a sim object --- a list with two components:
 5 | ##'
 6 | ##' \code{ys}: a numeric matrix, typically with multiple columns; each column is
 7 | ##' a different possible trajectory for the current season, with NA's in the
 8 | ##' input for the current season filled in with random draws from the forecasted
 9 | ##' distribution, and non-\code{NA}'s (observed data) filled in with an imagined
10 | ##' resampling of noise based on the model (for some models, the non-\code{NA}
11 | ##' values will remain unchanged).
12 | ##'
13 | ##' \code{weights}: a numeric vector; assigns a weight to each column of
14 | ##' \code{ys}, which is used by methods relying on importance sampling.
15 | 


--------------------------------------------------------------------------------
/epiforecast/meta.R:
--------------------------------------------------------------------------------
1 | ## Commands to create a vignette
2 | vignettename = "fetch_data"
3 | devtools::use_vignette(vignettename)
4 | 
5 | 


--------------------------------------------------------------------------------
/epiforecast/src/RcppExports.cpp:
--------------------------------------------------------------------------------
 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand
 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 3 | 
 4 | #include <Rcpp.h>
 5 | 
 6 | using namespace Rcpp;
 7 | 
 8 | #ifdef RCPP_USE_GLOBAL_ROSTREAM
 9 | Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
10 | Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
11 | #endif
12 | 
13 | // WeightedTabulateRcpp
14 | Rcpp::NumericVector WeightedTabulateRcpp(Rcpp::IntegerVector bin, Rcpp::IntegerVector nbins, Rcpp::NumericVector w);
15 | RcppExport SEXP _epiforecast_WeightedTabulateRcpp(SEXP binSEXP, SEXP nbinsSEXP, SEXP wSEXP) {
16 | BEGIN_RCPP
17 |     Rcpp::RObject rcpp_result_gen;
18 |     Rcpp::RNGScope rcpp_rngScope_gen;
19 |     Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type bin(binSEXP);
20 |     Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type nbins(nbinsSEXP);
21 |     Rcpp::traits::input_parameter< Rcpp::NumericVector >::type w(wSEXP);
22 |     rcpp_result_gen = Rcpp::wrap(WeightedTabulateRcpp(bin, nbins, w));
23 |     return rcpp_result_gen;
24 | END_RCPP
25 | }
26 | 
27 | static const R_CallMethodDef CallEntries[] = {
28 |     {"_epiforecast_WeightedTabulateRcpp", (DL_FUNC) &_epiforecast_WeightedTabulateRcpp, 3},
29 |     {NULL, NULL, 0}
30 | };
31 | 
32 | RcppExport void R_init_epiforecast(DllInfo *dll) {
33 |     R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
34 |     R_useDynamicSymbols(dll, FALSE);
35 | }
36 | 


--------------------------------------------------------------------------------
/epiforecast/src/histograms_rcpp.cpp:
--------------------------------------------------------------------------------
 1 | // author_header begin
 2 | // Copyright (C) 2016 Logan C. Brooks
 3 | //
 4 | // This file is part of epiforecast.  Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University.
 5 | //
 6 | // Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174.
 7 | // author_header end
 8 | // license_header begin
 9 | // epiforecast is free software: you can redistribute it and/or modify
10 | // it under the terms of the GNU General Public License as published by
11 | // the Free Software Foundation, version 2 of the License.
12 | //
13 | // epiforecast is distributed in the hope that it will be useful,
14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | // GNU General Public License for more details.
17 | //
18 | // You should have received a copy of the GNU General Public License
19 | // along with epiforecast.  If not, see <http://www.gnu.org/licenses/>.
20 | // license_header end
21 | 
22 | #include <Rcpp.h>
23 | 
24 | //' Weighted, more \code{nbins}-restrictive version of \code{base::tabulate}
25 | //'
26 | //' @param bin integer-compatible vector; entries must be non-NA and between 1
27 | //' and \code{nbins}; these indices denote entries in the result vector to which
28 | //' the corresponding weights in \code{w} should be added
29 | //' @param nbins single non-NA, non-negative integer; length of the vector to
30 | //' return
31 | //' @param w numeric-compatible vector of the same length as \code{bin}; weights
32 | //' corresponding to the indices in \code{bin}
33 | //' @return numeric vector of length \code{nbins}; the \code{i}th entry is like
34 | //' \code{sum(w[bin==i])}, but with a naive summation algorithm
35 | //'
36 | //' @useDynLib epiforecast
37 | //' @export
38 | // [[Rcpp::export(name="weighted_tabulate")]]
39 | Rcpp::NumericVector WeightedTabulateRcpp
40 | (
41 |  Rcpp::IntegerVector bin,
42 |  Rcpp::IntegerVector nbins,
43 |  Rcpp::NumericVector w
44 | ) {
45 |   if (bin.size() != w.size()) {
46 |     ::Rf_error("Invalid input: length(bin) != length(w)");
47 |   }
48 |   if (nbins.size() != 1 ||
49 |       Rcpp::IntegerVector::is_na(* nbins.begin()) ||
50 |       * nbins.begin() < 0) {
51 |     ::Rf_error("Invalid input: nbins is not a single, non-NA, non-negative integer.");
52 |   }
53 |   Rcpp::IntegerVector::stored_type nbins0 = * nbins.begin();
54 |   Rcpp::NumericVector result(nbins0); // 0-initialized
55 |   Rcpp::IntegerVector::iterator bin_it = bin.begin();
56 |   Rcpp::NumericVector::iterator w_it = w.begin();
57 |   for(; bin_it != bin.end(); ++bin_it, ++w_it) {
58 |     if (Rcpp::IntegerVector::is_na(*bin_it)) {
59 |       ::Rf_error("Invalid input: any(is.na(bin))");
60 |     }
61 |     Rcpp::IntegerVector::stored_type index = *bin_it - 1;
62 |     if (index < 0 || index >= nbins0) {
63 |       ::Rf_error("Invalid input: !all(1L <= bin & bin <= nbins)");
64 |     }
65 |     result[index] += *w_it;
66 |   }
67 |   return result;
68 | }
69 | 
70 | // clang-format off
71 | // todo system includes
72 | /* Local Variables: */
73 | /* clang-format-style: "Google" */
74 | /* flycheck-clang-language-standard: "c++14" */
75 | /* flycheck-gcc-language-standard: "c++14" */
76 | /* flycheck-clang-include-path: ("/usr/local/lib/R/site-library/Rcpp/include/" "/usr/share/R/include/") */
77 | /* flycheck-gcc-include-path: ("/usr/local/lib/R/site-library/Rcpp/include/" "/usr/share/R/include/") */
78 | /* End: */
79 | // clang-format on
80 | 


--------------------------------------------------------------------------------
/epiforecast/tests/testthat/test-br.sim.R:
--------------------------------------------------------------------------------
 1 | ## author_header begin
 2 | ## Copyright (C) 2016 Sangwon Hyun
 3 | ##
 4 | ## This file is part of epiforecast.  Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University.
 5 | ##
 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174.
 7 | ## author_header end
 8 | ## license_header begin
 9 | ## epiforecast is free software: you can redistribute it and/or modify
10 | ## it under the terms of the GNU General Public License as published by
11 | ## the Free Software Foundation, version 2 of the License.
12 | ##
13 | ## epiforecast is distributed in the hope that it will be useful,
14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | ## GNU General Public License for more details.
17 | ##
18 | ## You should have received a copy of the GNU General Public License
19 | ## along with epiforecast.  If not, see <http://www.gnu.org/licenses/>.
20 | ## license_header end
21 | 
22 | context("Testing the br.sim() function..")
23 | 
24 | area.name = "hhs1"
25 | 
26 | ## Make a sim object from fluview.
27 | full.dat = fetchEpidataFullDat("fluview", area.name, "wili",
28 |                                min.points.in.season=52L,
29 |                                first.week.of.season = 21L,
30 |                                cache.file.prefix=sprintf("fluview_%s_fetch.Rdata", area.name))
31 | mysim = br.sim(full.dat, max.n.sims=2L)
32 | 
33 | ## Tests:
34 | 
35 | test_that("Returns object of class 'sim'.", {
36 |     expect_equal(class(mysim),"sim")
37 | })
38 | 


--------------------------------------------------------------------------------
/epiforecast/tests/testthat/test-check.file.contents.R:
--------------------------------------------------------------------------------
 1 | ## author_header begin
 2 | ## Copyright (C) 2016 Sangwon Hyun
 3 | ##
 4 | ## This file is part of epiforecast.  Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University.
 5 | ##
 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174.
 7 | ## author_header end
 8 | ## license_header begin
 9 | ## epiforecast is free software: you can redistribute it and/or modify
10 | ## it under the terms of the GNU General Public License as published by
11 | ## the Free Software Foundation, version 2 of the License.
12 | ##
13 | ## epiforecast is distributed in the hope that it will be useful,
14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | ## GNU General Public License for more details.
17 | ##
18 | ## You should have received a copy of the GNU General Public License
19 | ## along with epiforecast.  If not, see <http://www.gnu.org/licenses/>.
20 | ## license_header end
21 | 
22 | context("Testing the check.file.contents() function..")
23 | 
24 | ## Setup
25 | dummytable = matrix(c(runif(n=9,1,3),NA),ncol=5)
26 | dummytable.full = matrix(runif(n=10,1,3),ncol=5)
27 | colnames(dummytable.full) = c(1997:2001)
28 | 
29 | ## Tests:
30 | 
31 | test_that("Missing column names returns error.", {
32 |     wrong.names = c("a","b","c","","d")
33 |     colnames(dummytable) = wrong.names
34 |     write.table(dummytable,
35 |                 file = "a.csv",
36 |                 col.names = T,
37 |                 row.names=F,
38 |                 sep=",")
39 |     expect_error(check.file.contents("a.csv"))
40 |     file.remove("a.csv")
41 | })
42 | 
43 | 
44 | test_that("Column names containing NA returns error.", {
45 |     wrong.names = c(NA,1:4)
46 |     colnames(dummytable) = wrong.names
47 |     write.table(dummytable,
48 |                 file = "a.csv",
49 |                 col.names = T,
50 |                 row.names= F,
51 |                 sep=",")
52 |     expect_error(check.file.contents("a.csv"))
53 |     file.remove("a.csv")
54 | })
55 | 
56 | 
57 | 
58 | test_that("Last column being full returns error.", {
59 |     write.table(dummytable.full,
60 |                 file = "a.csv",
61 |                 col.names = T,
62 |                 row.names=F,
63 |                 sep=",")
64 |     expect_error(check.file.contents("a.csv"))
65 |     file.remove("a.csv")
66 | })
67 | 


--------------------------------------------------------------------------------
/epiforecast/tests/testthat/test-read.from.file.R:
--------------------------------------------------------------------------------
  1 | ## author_header begin
  2 | ## Copyright (C) 2016 Sangwon Hyun
  3 | ##
  4 | ## This file is part of epiforecast.  Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University.
  5 | ##
  6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174.
  7 | ## author_header end
  8 | ## license_header begin
  9 | ## epiforecast is free software: you can redistribute it and/or modify
 10 | ## it under the terms of the GNU General Public License as published by
 11 | ## the Free Software Foundation, version 2 of the License.
 12 | ##
 13 | ## epiforecast is distributed in the hope that it will be useful,
 14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | ## GNU General Public License for more details.
 17 | ##
 18 | ## You should have received a copy of the GNU General Public License
 19 | ## along with epiforecast.  If not, see <http://www.gnu.org/licenses/>.
 20 | ## license_header end
 21 | 
 22 | context("Testing the read.from.file() function..")
 23 | 
 24 | ## ## Setup
 25 | ## dummytable = matrix(c(runif(n=9,1,3),NA),ncol=5)
 26 | ## colnames(dummytable) = c(1997:2001)
 27 | 
 28 | 
 29 | ## ## Tests:
 30 | ## test_that("Return type is list of vector", {
 31 | ##     write.table(dummytable,
 32 | ##                 file = "a.csv",
 33 | ##                 col.names = T,
 34 | ##                 row.names=F,
 35 | ##                 sep=",")
 36 | ##     mydat  = read.from.file("a.csv")
 37 | ##     mydat
 38 | ##     expect_equal("list", typeof(mydat))
 39 | ##     file.remove("a.csv")
 40 | ## })
 41 | 
 42 | 
 43 | ## Fetch some data
 44 | area.name = "hhs1"
 45 | hhs1.dat = fetchEpidataFullDat("fluview", area.name, "wili",
 46 |                                min.points.in.season=52L,
 47 |                                first.week.of.season = 21L,
 48 |                                cache.file.prefix=sprintf("fluview_%s_fetch.Rdata", area.name))
 49 | alt.names1=paste("season",1:length(names(hhs1.dat)))
 50 | alt.names2 = alt.names1;    alt.names2[5] = "" 
 51 | alt.names3 = rep("", length(names(hhs1.dat)))
 52 | 
 53 | list_dat_to_matrix <- function(vec_list) {
 54 |   stopifnot(length(vec_list) > 0L)
 55 |   length_range <- range(vapply(vec_list, length, integer(1L)))
 56 |   if (diff(length_range) > 1L) {
 57 |     stop ('Vectors in the list cannot vary in length by more than 1; please make sure they are repeated or filled with NAs appropriately.')
 58 |   }
 59 |   if (length_range[[1L]] == 0L) {
 60 |     stop ('Vectors in the list must all be of positive length.')
 61 |   }
 62 |   max_length <- length_range[[2L]]
 63 |   vec_list <- lapply(vec_list, function(vec) {
 64 |     # copy the last elt to fill the gap
 65 |     c(vec, rep(vec[[length(vec)]], max_length - length(vec)))
 66 |   })
 67 |   do.call(cbind, vec_list)
 68 | }
 69 | 
 70 | ## When one label is missing
 71 |     
 72 | 
 73 | ## Create csv with /no/ names
 74 | test_that("When alls labels are missing, error is thrown", {
 75 |     filename="./all.names.missing.file.csv"
 76 |     all.names.missing.hhs1.dat = hhs1.dat
 77 |     names(all.names.missing.hhs1.dat) = alt.names3
 78 |     all.names.missing.hhs1.dat = list_dat_to_matrix(all.names.missing.hhs1.dat)
 79 |     all.names.missing.hhs1.dat = all.names.missing.hhs1.dat[-53,]
 80 |     write.csv(all.names.missing.hhs1.dat, file = filename, row.names=FALSE)
 81 |     expect_error(full.dat = read.from.file(filename))
 82 | })
 83 | 
 84 | ## Create csv with one name missing
 85 | test_that("When one label is missing, error is thrown", {
 86 |     filename="./one.name.missing.file.csv"
 87 |     one.name.missing.hhs1.dat = hhs1.dat
 88 |     names(one.name.missing.hhs1.dat) = alt.names2
 89 |     one.name.missing.hhs1.dat = list_dat_to_matrix(one.name.missing.hhs1.dat)
 90 |     one.name.missing.hhs1.dat = one.name.missing.hhs1.dat[-53,]
 91 |     write.csv(one.name.missing.hhs1.dat, file = filename, row.names=FALSE)
 92 |     expect_error(full.dat = read.from.file(filename))
 93 | })
 94 | 
 95 | ## Last season is not partially observed
 96 | test_that("When last season is not partially observed, error is thrown", {
 97 |     filename="./last.column.weird.file.csv"
 98 |     last.column.weird.hhs1.dat = list_dat_to_matrix(hhs1.dat)
 99 |     write.csv(last.column.weird.hhs1.dat, file = filename,row.names=FALSE)
100 |     expect_error(full.dat = read.from.file(filename))
101 | })
102 | 
103 | ## CSV is correct, so everything should be fine here.
104 | test_that("When last season is not partially observed, error is thrown", {
105 |     filename="./correct.csv"
106 |     correct.hhs1.dat = list_dat_to_matrix(hhs1.dat)
107 |     correct.hhs1.dat = correct.hhs1.dat[-53,]
108 |     write.csv(correct.hhs1.dat, file = filename,row.names=FALSE)
109 |     full.dat = read.from.file(filename)
110 |     expect_equal(class(full.dat), "list")
111 | })
112 | 


--------------------------------------------------------------------------------
/epiforecast/tests/testthat/test-twkde.markovian.sim.R:
--------------------------------------------------------------------------------
 1 | ## author_header begin
 2 | ## Copyright (C) 2016 Sangwon Hyun
 3 | ##
 4 | ## This file is part of epiforecast.  Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University.
 5 | ##
 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174.
 7 | ## author_header end
 8 | ## license_header begin
 9 | ## epiforecast is free software: you can redistribute it and/or modify
10 | ## it under the terms of the GNU General Public License as published by
11 | ## the Free Software Foundation, version 2 of the License.
12 | ##
13 | ## epiforecast is distributed in the hope that it will be useful,
14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | ## GNU General Public License for more details.
17 | ##
18 | ## You should have received a copy of the GNU General Public License
19 | ## along with epiforecast.  If not, see <http://www.gnu.org/licenses/>.
20 | ## license_header end
21 | 
22 | context("Testing the twkde.markovian.sim() function..")
23 | 
24 | area.name = "hhs1"
25 | 
26 | ## Make a sim object from fluview.
27 | full.dat = fetchEpidataFullDat("fluview", area.name, "wili",
28 |                                min.points.in.season=52L,
29 |                                first.week.of.season = 21L,
30 |                                cache.file.prefix=sprintf("fluview_%s_fetch.Rdata", area.name))
31 | mysim = twkde.markovian.sim(full.dat, max.n.sims=2L)
32 | 
33 | ## Tests:
34 | test_that("Returns object of class 'sim'.", {
35 |     expect_equal(class(mysim),"sim")
36 | })
37 | 


--------------------------------------------------------------------------------
/epiforecast/tests/testthat/test-twkde.sim.R:
--------------------------------------------------------------------------------
 1 | ## author_header begin
 2 | ## Copyright (C) 2016 Sangwon Hyun
 3 | ##
 4 | ## This file is part of epiforecast.  Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University.
 5 | ##
 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174.
 7 | ## author_header end
 8 | ## license_header begin
 9 | ## epiforecast is free software: you can redistribute it and/or modify
10 | ## it under the terms of the GNU General Public License as published by
11 | ## the Free Software Foundation, version 2 of the License.
12 | ##
13 | ## epiforecast is distributed in the hope that it will be useful,
14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | ## GNU General Public License for more details.
17 | ##
18 | ## You should have received a copy of the GNU General Public License
19 | ## along with epiforecast.  If not, see <http://www.gnu.org/licenses/>.
20 | ## license_header end
21 | 
22 | context("Testing the twkde.sim() function..")
23 | 
24 | area.name = "hhs1"
25 | 
26 | ## Make a sim object from fluview.
27 | full.dat = fetchEpidataFullDat("fluview", area.name, "wili",
28 |                                min.points.in.season=52L,
29 |                                first.week.of.season = 21L,
30 |                                cache.file.prefix=sprintf("fluview_%s_fetch.Rdata", area.name))
31 | mysim = twkde.sim(full.dat, max.n.sims=2L)
32 | 
33 | ## Tests:
34 | test_that("Returns object of class 'sim'.", {
35 |     expect_equal(class(mysim),"sim")
36 | })
37 | 


--------------------------------------------------------------------------------
/epiforecast/tests/testthat/test_match.arg.or.default.R:
--------------------------------------------------------------------------------
 1 | ## author_header begin
 2 | ## Copyright (C) 2016 Logan C. Brooks
 3 | ##
 4 | ## This file is part of epiforecast.  Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University.
 5 | ##
 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174.
 7 | ## author_header end
 8 | ## license_header begin
 9 | ## epiforecast is free software: you can redistribute it and/or modify
10 | ## it under the terms of the GNU General Public License as published by
11 | ## the Free Software Foundation, either version 2 of the License, or
12 | ## (at your option) any later version.
13 | ##
14 | ## epiforecast is distributed in the hope that it will be useful,
15 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | ## GNU General Public License for more details.
18 | ##
19 | ## You should have received a copy of the GNU General Public License
20 | ## along with epiforecast.  If not, see <http://www.gnu.org/licenses/>.
21 | ## license_header end
22 | 
23 | parent_function = function(ch1=letters[1:5], ch2=c("AAA","AAB","BBB"),
24 |                            num1=1:5, int1=6L:10L,
25 |                            list1=list(1:2,3:4), list2=list(NULL,"a",c(two=2),1:5)) {
26 |   return (list(
27 |     ch1=match.arg.else.default(ch1),
28 |     ch2=match.arg.else.default(ch2),
29 |     num1=match.arg.else.default(num1),
30 |     int1=match.arg.else.default(int1),
31 |     list1=match.arg.else.default(list1),
32 |     list2=match.arg.else.default(list2)
33 |   ))
34 | }
35 | 
36 | ## Return default on missing:
37 | expect_equal(parent_function(),
38 |              list(ch1="a", ch2="AAA", num1=1, int1=6L, list1=1:2, list2=NULL))
39 | 
40 | ## Return default on NULL:
41 | expect_equal(parent_function(NULL, NULL, NULL, NULL, NULL, NULL),
42 |              list(ch1="a", ch2="AAA", num1=1, int1=6L, list1=1:2, list2=NULL))
43 | 
44 | ## Allow partial matches, all.equal ignoring attributes:
45 | expect_equal(parent_function("b", c(extraneous.name="B"), 3L, 8.00000000001, c(p=3,q=4), 2),
46 |              list(ch1="b", ch2="BBB", num1=3.0, int1=8L, list1=3:4, list2=c(two=2)))
47 | 
48 | ## Return default with warning on mismatched inputs:
49 | expect_equal(suppressWarnings(parent_function("q", "A", "nonnumeric", 11L, 1:4, c("A","B","C"))),
50 |              list(ch1="a", ch2="AAA", num1=1, int1=6L, list1=1:2, list2=NULL))
51 | expect_warning(parent_function("q"))
52 | expect_warning(parent_function(,"A"))
53 | expect_warning(parent_function(,,"nonnumeric"))
54 | expect_warning(parent_function(,,,11L))
55 | expect_warning(parent_function(,,,,1:4))
56 | expect_warning(parent_function(,,,,,c("A","B","C")))
57 | 
58 | ## Produce error on inappropriate inputs:
59 | expect_error(parent_function(3), "length-1 character")
60 | expect_error(parent_function(letters[1:2]), "length-1 character")
61 | 
62 | ## todo produce error on inappropriate =choices=
63 | 


--------------------------------------------------------------------------------
/epiforecast/vignettes/fetch_data.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Fetching / Loading Data"
 3 | author: "Justin"
 4 | date: "`r Sys.Date()`"
 5 | output: rmarkdown::html_vignette
 6 | vignette: >
 7 |   %\VignetteIndexEntry{Fetch data}
 8 |   %\VignetteEngine{knitr::rmarkdown}
 9 |   %\VignetteEncoding{UTF-8}
10 | 
11 | 
12 | First, setup by loading the package and setting working directory
13 | ```{r, eval=TRUE, results='asis'}
14 | library(devtools)
15 | outputdir = "."
16 | path_to_Rpkg_directory = "."
17 | setwd(path_to_Rpkg_directory)
18 | load_all()
19 | ```
20 | 
21 | Here is an example. Let us try to fetch national influenza intensity measurements (called `weighted ILI', or `wILI' for short), from the [link](fluview) interface using the function `fetchEpidataDF()`:
22 | ```{r, eval=TRUE, results='markup'}
23 | fluview.nat.all.df = fetchEpidataDF(source = "fluview", 
24 |                                     area   = "nat",
25 |                                     first.week.of.season=21L)
26 | ## See first few lines
27 | head(fluview.nat.all.df,3)
28 | tail(fluview.nat.all.df,3)
29 | ```
30 | 	
31 | What if we wanted to fetch Google flutrends (GFT) measurements using the same function, under the same date range and settings?
32 | ```{r, eval=TRUE, results='asis'}
33 | gft.nat.all.df = fetchEpidataDF(source = "gft", 
34 |                                     area   = "nat",
35 |                                     first.week.of.season=21L)
36 | ```
37 | Note, GFT measurements are 1/100'th the scale of wILI measurements.
38 | 
39 | There are also 10 sub-regions of the united states (see [link](here) for more) whose influenza intensity measurements are available; they can be queried as follows:
40 | ```{r,  eval=TRUE, results='asis'}
41 | gft.reg.dat = fetchEpidataDF(source = "gft", 
42 |                              area ="hhs1")
43 | fluview.reg.dat = fetchEpidataDF(source = "fluview", 
44 |                              area ="hhs1")
45 | ```
46 | 
47 | How would you query data from a certain date ranges, but from two or more sources? Use the wrapper `fetchEpidataDF.multiple.sources()`
48 | ```{r, eval=TRUE}
49 | all.reg.dat = fetchEpidataDF.multiple.sources(source = c("gft, hhs1"), area = "hhs1")
50 | ```
51 | 
52 | 


--------------------------------------------------------------------------------
/epiforecast/vignettes/fetch_data[woven].md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Fetching / Loading Data"
 3 | author: "Justin"
 4 | date: "2016-07-17"
 5 | output: rmarkdown::html_vignette
 6 | vignette: >
 7 |   %\VignetteIndexEntry{Fetch data}
 8 |   %\VignetteEngine{knitr::rmarkdown}
 9 |   %\VignetteEncoding{UTF-8}
10 | ---
11 | 
12 | First, setup by loading the package and setting working directory
13 | 
14 | 
15 | ```
16 | ## Loading epiforecast
17 | ```
18 | 
19 | Here is an example.
20 | 
21 | 
22 | ```r
23 | ## helper function to merge gft with flu
24 | mymerge = function(df1, df2, merge.names = "", merge.by = ""){#colnames.to.merge
25 |     merge.dat = merge(df1,df2,by=c("year","week","date"),all=T)
26 |     merge.dat[,"week"] = as.numeric(merge.dat[,"week"])
27 |     merge.dat[,"year"] = as.numeric(merge.dat[,"year"])
28 |     ord.by.dat = order(merge.dat[,c("year")],merge.dat[,c("week")])
29 |     merge.dat = merge.dat[ord.by.dat,]
30 |     merge.dat = merge.dat[,c("season.y","year","week","date","gft.num","wili")]
31 |     colnames(merge.dat)[colnames(merge.dat)=="season.y"] = "season"
32 |     return(merge.dat)
33 | }
34 | 
35 | 
36 | ## National
37 | filename = file.path(outputdir, "fluview_nat_df.csv")
38 | fluview.nat.all.df =   # If I need just 2003 and on, I do #fluview.nat.recent.df()
39 |     trimPartialPastSeasons(fetchEpidataDF("fluview", "nat",
40 |                                           first.week.of.season=21L,
41 |                                           cache.file.prefix="fluview_nat_allfetch.Rdata"),
42 |                            "wili",
43 |                            min.points.in.season=33L)
44 | 
45 | fluview.nat.all.df = fluview.nat.all.df[,c("region","wili","year","week","date","season","model.week")]
46 | gft.nat.df = fetchEpidataDF("gft","nat")
47 | gft.nat.df = gft.nat.df[,c("location","year","week","model.week","date","season","num")]
48 | colnames(gft.nat.df)[which(colnames(gft.nat.df) == "num")] = "gft.num"
49 | nat.dat = mymerge(df1=gft.nat.df, df2=fluview.nat.all.df)
50 | cat("writing national data",fill=T)
51 | write.csv(nat.dat, file=filename,row.names=F)
52 | 
53 | 
54 | ## By region
55 | regs = paste0("reg",1:10)
56 | hhss = paste0("hhs",1:10)
57 | for(jj in 1:10){
58 |     cat("writing region",jj, fill=T)
59 |     myreg = regs[jj]
60 |     myhhs = hhss[jj]
61 |     cachename = paste0("fluview_",myhhs,"_fetch.Rdata")
62 |     filename =  file.path(outputdir, paste0("fluview_",myhhs,"_df.csv"))
63 |     regdat = trimPartialPastSeasons(fetchEpidataDF("fluview", myhhs,
64 |                                                    first.week.of.season=21L),
65 |                                                    ## cache.file.prefix=cachename),
66 |                                     "wili", min.points.in.season=33L)
67 |     regdat2 = regdat[,c("region","wili","year","week","date","season","model.week")]
68 |     gft.reg.dat = fetchEpidataDF("gft","hhs1")
69 |     colnames(gft.reg.dat)[which(colnames(gft.reg.dat) == "num")] = "gft.num"
70 |     reg.dat = mymerge( df1=gft.reg.dat, df2=regdat2)
71 | 
72 |     # write the file
73 |     write.csv(reg.dat, file = filename, row.names=F)
74 | }
75 | ```
76 | 


--------------------------------------------------------------------------------
/epiforecast/vignettes/figure/unnamed-chunk-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-delphi/epiforecast-R/726580337e75286532a03ec3ec5b104e9c3fd425/epiforecast/vignettes/figure/unnamed-chunk-1-1.png


--------------------------------------------------------------------------------
/epiforecast/vignettes/figure/unnamed-chunk-1-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-delphi/epiforecast-R/726580337e75286532a03ec3ec5b104e9c3fd425/epiforecast/vignettes/figure/unnamed-chunk-1-2.png


--------------------------------------------------------------------------------
/epiforecast/vignettes/figure/unnamed-chunk-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-delphi/epiforecast-R/726580337e75286532a03ec3ec5b104e9c3fd425/epiforecast/vignettes/figure/unnamed-chunk-2-1.png


--------------------------------------------------------------------------------
/epiforecast/vignettes/figure/unnamed-chunk-2-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-delphi/epiforecast-R/726580337e75286532a03ec3ec5b104e9c3fd425/epiforecast/vignettes/figure/unnamed-chunk-2-2.png


--------------------------------------------------------------------------------
/epiforecast/vignettes/figure/unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-delphi/epiforecast-R/726580337e75286532a03ec3ec5b104e9c3fd425/epiforecast/vignettes/figure/unnamed-chunk-3-1.png


--------------------------------------------------------------------------------
/epiforecast/vignettes/figure/unnamed-chunk-3-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-delphi/epiforecast-R/726580337e75286532a03ec3ec5b104e9c3fd425/epiforecast/vignettes/figure/unnamed-chunk-3-2.png


--------------------------------------------------------------------------------
/epiforecast/vignettes/make-forecasts-br.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Making forecasts with basis regression"
 3 | author: "Justin"
 4 | date: "`r Sys.Date()`"
 5 | head-includes:
 6 | - \usepackage{bbm}
 7 | output: rmarkdown::html_vignette
 8 | vignette: >
 9 |   %\VignetteIndexEntry{Making forecast with BR}
10 |   %\VignetteEngine{knitr::rmarkdown}
11 |   %\VignetteEncoding{UTF-8}
12 | 
13 | ---
14 | First, setup by loading the package and setting working directory
15 | ```{r, eval=TRUE, results='asis'}
16 | library(devtools)
17 | outputdir = "."
18 | path_to_Rpkg_directory = "."
19 | setwd(path_to_Rpkg_directory)
20 | load_all()
21 | ```
22 | 
23 | Let us first fetch national influenza intensity measurements (called `weighted ILI', or `wILI' for short), from the [link](fluview) interface using the function `fetchEpidataDF()`:
24 | ```{r, eval=TRUE, results='markup'}
25 | ## Bare minimum
26 | fluview.nat.df = fetchEpidataDF(source = "fluview", 
27 |                                 area   = "nat",
28 |                                 first.week.of.season=21L)
29 | 
30 | ## And with some more options
31 | area.name = "nat"
32 | fluview.nat.df = fetchEpidataFullDat(source = "fluview", area = "nat",
33 |                                      signal.ind = "wili",
34 |                                      min.points.in.season = 52L,
35 |                                      first.week.of.season = 21L,
36 |                                      cache.file = sprintf("fluview_%s_fetch.Rdata", area.name))
37 | ```
38 | A short explanation: Use `wili` for `source=fluview` option, `gft` for the `source=gft` option. `min.points.in.season` should be 52 if you are at forecasting time, and you would like to only take the fully observed historical seasons. The seemingly peculiar choice of `first.week.of.season=21` is a detail in the modeling and implementation of BR (and other methods, too!) that comes from that fact that the 21'st _calendar_ week in any given year is a convenient customary cutoff for which the trajectory of flu intensity measurements, when plotted, seem to be centered (on average).
39 | 
40 | See the vignette [link](fetch_data.Rmd) for more details about fetching data.
41 | 
42 | Let's focus our attention on the 'hhs1' region:
43 | ```{r, eval=TRUE, results='markup'}
44 | area.name = "hhs1"
45 | full.dat = fetchEpidataFullDat("fluview", area.name, "wili",
46 |                                min.points.in.season=52L,
47 |                                first.week.of.season = 21L,
48 |                                cache.file.prefix=sprintf("fluview_%s_fetch.Rdata", area.name))
49 | ```
50 | 
51 | The default settings of BR is straightforward to use
52 | ```{r, eval=TRUE, results='markup'}
53 | # Try BR with basic options
54 | mysim = br.sim(full.dat, max.n.sims=1000)
55 | plot(mysim) 
56 | print(mysim)
57 | targets = target_forecast(mysim, "pht")
58 | print(targets)
59 | ```
60 | 


--------------------------------------------------------------------------------
/epiforecast/vignettes/test.R:
--------------------------------------------------------------------------------
 1 | library(devtools)
 2 | outputdir = "."
 3 | path_to_Rpkg_directory = "."
 4 | setwd(path_to_Rpkg_directory)
 5 | load_all()
 6 | build()
 7 | document()
 8 | check()
 9 | 
10 | 


--------------------------------------------------------------------------------
/epiforecast_0.0.1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-delphi/epiforecast-R/726580337e75286532a03ec3ec5b104e9c3fd425/epiforecast_0.0.1.pdf


--------------------------------------------------------------------------------
/sample/census_weights.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-delphi/epiforecast-R/726580337e75286532a03ec3ec5b104e9c3fd425/sample/census_weights.rds


--------------------------------------------------------------------------------
/sample/combine-spreadsheets.R:
--------------------------------------------------------------------------------
 1 | library("pipeR")
 2 | 
 3 | natreg.stat.spreadsheet.dir = "~/files/nosync/epiforecast-epiproject/flusight-natreg-run/stat-spreadsheets"
 4 | state.stat.spreadsheet.dir = "~/files/nosync/epiforecast-epiproject/flusight-state-run/stat-spreadsheets"
 5 | combined.stat.spreadsheet.dir = "~/files/nosync/epiforecast-epiproject/flusight-combined-run/stat-spreadsheets"
 6 | if (!dir.exists(combined.stat.spreadsheet.dir)) {
 7 |   dir.create(combined.stat.spreadsheet.dir, recursive=TRUE)
 8 | }
 9 | 
10 | spreadsheet_name_to_epi_week = function(spreadsheet.name) {
11 |   as.integer(substring(spreadsheet.name, 3L,4L))
12 | }
13 | 
14 | spreadsheet_info_tbl = function(spreadsheet.names, tag) {
15 |   duplicate.info.tbl =
16 |     tibble::tibble(spreadsheet.name=spreadsheet.names) %>>%
17 |     dplyr::mutate(epi.week=spreadsheet_name_to_epi_week(spreadsheet.name)) %>>%
18 |     dplyr::group_by(epi.week) %>>%
19 |     dplyr::mutate(epi.week.is.duplicated=n()>1L) %>>%
20 |     dplyr::ungroup()
21 |   duplicated.epi.weeks = duplicate.info.tbl %>>%
22 |     dplyr::filter(epi.week.is.duplicated) %>>%
23 |     dplyr::distinct(epi.week) %>>%
24 |     (epi.week)
25 |   print(paste0("Don't know what to do in case of multiple spreadsheets for the same EW in the same project ",tag,"; skipping spreadsheets for the following EW's (if any): ", paste(duplicated.epi.weeks, collapse=", ")))
26 |   return (
27 |     duplicate.info.tbl %>>%
28 |     dplyr::filter(!epi.week.is.duplicated) %>>%
29 |     dplyr::select(-epi.week.is.duplicated) %>>%
30 |     stats::setNames(c(paste0(tag,".spreadsheet.name"), names(.)[-1L])) %>>%
31 |     {.}
32 |   )
33 | }
34 | 
35 | natreg.stat.spreadsheet.info.tbl =
36 |   spreadsheet_info_tbl(list.files(natreg.stat.spreadsheet.dir), "natreg.stat")
37 | state.stat.spreadsheet.info.tbl =
38 |   spreadsheet_info_tbl(list.files(state.stat.spreadsheet.dir), "state.stat")
39 | 
40 | print("Skipping EW in natreg but not state (if any):")
41 | print(setdiff(natreg.stat.spreadsheet.info.tbl[["epi.week"]],
42 |               state.stat.spreadsheet.info.tbl[["epi.week"]]))
43 | print("Skipping EW in state but not natreg (if any):")
44 | print(setdiff(state.stat.spreadsheet.info.tbl[["epi.week"]],
45 |               natreg.stat.spreadsheet.info.tbl[["epi.week"]]))
46 | 
47 | combined.stat.spreadsheet.info.tbl =
48 |   dplyr::inner_join(natreg.stat.spreadsheet.info.tbl, state.stat.spreadsheet.info.tbl, by="epi.week")
49 | 
50 | combined.stat.spreadsheet.info.tbl %>>%
51 |   dplyr::rowwise() %>>%
52 |   dplyr::mutate(success={
53 |     tryCatch({
54 |       natreg.spreadsheet = readr::read_csv(file.path(natreg.stat.spreadsheet.dir,
55 |                                                      natreg.stat.spreadsheet.name),
56 |                                            col_types=readr::cols())
57 |       state.spreadsheet = readr::read_csv(file.path(state.stat.spreadsheet.dir,
58 |                                                     state.stat.spreadsheet.name),
59 |                                           col_types=attr(natreg.spreadsheet,"spec"))
60 |       combined.spreadsheet = dplyr::bind_rows(natreg.spreadsheet, state.spreadsheet)
61 |       natreg.date = as.Date(stringr::str_sub(natreg.stat.spreadsheet.name, -14L, -5L))
62 |       state.date = as.Date(stringr::str_sub(natreg.stat.spreadsheet.name, -14L, -5L))
63 |       combined.date = max(natreg.date, state.date)
64 |       combined.stat.spreadsheet.name =
65 |         natreg.stat.spreadsheet.name %>>%
66 |         stringr::str_replace_all("Delphi-Stat", "Delphi-Stat-combined") %>>%
67 |         stringr::str_replace_all(as.character(natreg.date), as.character(combined.date)) %>>%
68 |         {.}
69 |       readr::write_csv(combined.spreadsheet,
70 |                        file.path(combined.stat.spreadsheet.dir, combined.stat.spreadsheet.name))
71 |       TRUE
72 |     },
73 |     error=function(e) {
74 |       FALSE
75 |     })
76 |   }) %>>%
77 |   dplyr::select(success, dplyr::everything())
78 | 


--------------------------------------------------------------------------------
/sample/covid19ilinet-templates.R:
--------------------------------------------------------------------------------
 1 | 
 2 | covid19ilinet.natreg.spreadsheet.template =
 3 |     fetchUpdatingResource(
 4 |         function() {
 5 |             read.csv(textConnection(RCurl::getURL("https://raw.githubusercontent.com/cdcepi/COVID-19-ILI-forecasting/master/templates-and-data/covid19-ili-forecast-national-regional-template.csv")), check.names=FALSE, stringsAsFactors=FALSE)
 6 |         },
 7 |         function(fetch.response) {
 8 |             return ()
 9 |         },
10 |         cache.file.prefix=file.path(epidata.cache.dir,"covid19ilinet_natreg_spreadsheet_template"),
11 |         cache.invalidation.period=as.difftime(8L, units="hours"),
12 |         force.cache.invalidation=FALSE
13 |     ) %>>%
14 |     tibble::as_tibble()
15 | 
16 | covid19ilinet.state.spreadsheet.template =
17 |     fetchUpdatingResource(
18 |         function() {
19 |             read.csv(textConnection(RCurl::getURL("https://raw.githubusercontent.com/cdcepi/COVID-19-ILI-forecasting/master/templates-and-data/covid19-ili-forecast-state-template.csv")), check.names=FALSE, stringsAsFactors=FALSE)
20 |         },
21 |         function(fetch.response) {
22 |             return ()
23 |         },
24 |         cache.file.prefix=file.path(epidata.cache.dir,"covid19ilinet_state_spreadsheet_template"),
25 |         cache.invalidation.period=as.difftime(8L, units="hours"),
26 |         force.cache.invalidation=FALSE
27 |     ) %>>%
28 |     tibble::as_tibble()
29 | 


--------------------------------------------------------------------------------
/sample/gen-prospective-component-forecasts.R:
--------------------------------------------------------------------------------
 1 | 
 2 | gc()
 3 | s.prospective.seasons = current.issue.sw[["season"]] %>>%
 4 |   stats::setNames(paste0(.,"/",.+1L)) %>>%
 5 |   with_dimnamesnames("Season")
 6 | w.prospective.model.weeks = current.issue.sw[["model.week"]] %>>%
 7 |   stats::setNames(paste0("MW",.)) %>>%
 8 |   with_dimnamesnames("Model Week")
 9 | 
10 | print("Current season: select available data")
11 | swg.prospective.voxel.data = map_join(
12 |   get_voxel_data,
13 |   s.prospective.seasons, w.prospective.model.weeks, g.epigroups,
14 |   last.losocv.issue)
15 | 
16 | ## Version of the above grouped by season and model week (an array of arrays of objects):
17 | sw.g.prospective.voxel.data = map_join(
18 |   function(swg.array, s,w) {
19 |     swg.array[s,w,,drop=FALSE] %>>%
20 |       select_dims(1:2, "drop") # drop s & w dimensions, but keep g dimension even if size 1
21 |   },
22 |   no_join(swg.prospective.voxel.data),
23 |   named_arrayvec_to_name_arrayvec(s.prospective.seasons),
24 |   named_arrayvec_to_name_arrayvec(w.prospective.model.weeks),
25 |   lapply_variant=lapply, show.progress=FALSE
26 | )
27 | 
28 | print("Current season: generate backcasts")
29 | swgb.prospective.full.dats = map_join(
30 |   get_backcast,
31 |   swg.prospective.voxel.data, sw.g.prospective.voxel.data, source.name, signal.name, b.backcasters,
32 |   epidata_df_to_chopped_trajectory_df=epidata_df_to_chopped_trajectory_df
33 |   , shuffle=FALSE
34 |   , cache.prefix=file.path(epiproject.cache.dir,"swgb.prospective.full.dats")
35 | )
36 | 
37 | print("Current season: generate component forecasts")
38 | swgbf.prospective.component.target.multicasts = map_join(
39 |   target_multicast,
40 |   swg.prospective.voxel.data, swgb.prospective.full.dats, f.forecasters,
41 |   target_trajectory_preprocessor,
42 |   no_join(t.target.specs),
43 |   no_join(m.forecast.types),
44 |   full_dat_fixup=full_dat_fixup
45 |   , cache.prefix=file.path(epiproject.cache.dir,"swgbf.prospective.component.target.multicasts")
46 | )
47 | 
48 | swgtmbf.prospective.component.forecast.values =
49 |   swgbf.prospective.component.target.multicasts %>>%
50 |   ## first, get forecast.value's in swgbf.tm format:
51 |   map_join(f=`[[`, "forecast.values") %>>%
52 |   ## un-nest lists to get swgbftm format:
53 |   map_join(f=`[[`,
54 |            named_arrayvec_to_name_arrayvec(t.target.specs),
55 |            named_arrayvec_to_name_arrayvec(m.forecast.types)
56 |            ) %>>%
57 |   ## permute dimension order to get desired swgtmbf format:
58 |   aperm(c(1:3,6:7,4:5))
59 | 


--------------------------------------------------------------------------------
/sample/gen-prospective-ensemble-forecasts.R:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Tack on additional indexer_list's for prospective forecasting:
 3 | e.prospective.ensemble.weighting.scheme.swgtmbf.indexer.lists =
 4 |   map_join(function(wgt.indexer.list) {
 5 |     c(list(all=NULL), # use all past seasons
 6 |       wgt.indexer.list, # ensemble choice for weeks, epigroups, targets
 7 |       list(each=NULL), # separately for each forecast type
 8 |       list(all=NULL, all=NULL) # always group together all backcasters, forecasters
 9 |       )
10 |   }, e.ensemble.partial.weighting.scheme.wgt.indexer.lists)
11 | 
12 | print("Current season: fit ensemble weightsets")
13 | e.prospective.ensemble.weightsets = map_join(
14 |   function(weighting.scheme.indexer.list) {
15 |     get_ensemble_weightset(swgtmbf.retro.component.forecast.values,
16 |                            swgtm.retro.observed.values,
17 |                            m.forecast.types,
18 |                            weighting.scheme.indexer.list)
19 |   },
20 |   e.prospective.ensemble.weighting.scheme.swgtmbf.indexer.lists
21 |   , cache.prefix=file.path(epiproject.cache.dir,"e.prospective.ensemble.weightsets")
22 | )
23 | 
24 | ## e.prospective.ensemble.weightsets = map_join(
25 | ##   function(weighting.scheme.indexer.list) {
26 | ##     stop ("Result was not computed yet.")
27 | ##   },
28 | ##   e.prospective.ensemble.weighting.scheme.swgtmbf.indexer.lists
29 | ##   , cache.prefix=file.path(epiproject.cache.dir,"e.prospective.ensemble.weightsets")
30 | ## )
31 | 
32 | print("Current season: generate ensemble forecasts")
33 | swgtme.prospective.ensemble.forecast.values = lapply(
34 |   e.prospective.ensemble.weightsets,
35 |   function(weightset) {
36 |     map_join(
37 |       ## `*`, # bad if only non-NA's are 0-weighted
38 |       function(forecast.value, weight) {
39 |         if (weight == 0) {
40 |           weight <- NA
41 |         }
42 |         weight * forecast.value
43 |       },
44 |       swgtmbf.prospective.component.forecast.values, weightset,
45 |       eltname.mismatch.behavior="intersect"
46 |     ) %>>% apply(1:5, Reduce, f=function(x,y) {
47 |       dplyr::coalesce(x+y, x, y)
48 |     })
49 |   }) %>>%
50 |   simplify2arrayp() %>>%
51 |   {names(dimnames(.))[[6L]] <- dimnamesnamesp(e.prospective.ensemble.weightsets); .}
52 | ## todo make this work for subsets and other indexers --- get index sets from
53 | ## indexers
54 | 
55 | ## Calculate CV ensemble forecasts as target.multicasts
56 | swge.prospective.ensemble.target.multicasts =
57 |   apply(swgtme.prospective.ensemble.forecast.values, c(1:3,6L),
58 |         function(tm.forecast.values) {
59 |           list(forecast.values=tm.forecast.values)
60 |         })
61 | 


--------------------------------------------------------------------------------
/sample/gen-retro-component-forecasts.R:
--------------------------------------------------------------------------------
  1 | 
  2 | ## CV input data
  3 | print("CV: select available input data")
  4 | gc()
  5 | swg.retro.voxel.data =
  6 |   tryCatch({
  7 |     map_join(
  8 |       get_voxel_data,
  9 |       s.retro.seasons, w.retro.model.weeks, g.epigroups,
 10 |       last.losocv.issue,
 11 |       cache.prefix=file.path(epiproject.cache.dir,"swg.retro.voxel.data"),
 12 |       use.proxy=TRUE
 13 |     )
 14 |   },
 15 |   ## issues with parallel package returning long vector results from large runs...
 16 |   error=function(e) {
 17 |     print ("Encountered error preparing voxel data in parallel.  Attempting to read cache files sequentially with no progress bar --- this make take a while.")
 18 |     map_join(
 19 |       get_voxel_data,
 20 |       s.retro.seasons, w.retro.model.weeks, g.epigroups,
 21 |       last.losocv.issue,
 22 |       lapply_variant=lapply,
 23 |       cache.prefix=file.path(epiproject.cache.dir,"swg.retro.voxel.data"),
 24 |       use.proxy=TRUE
 25 |     )
 26 |   })
 27 | 
 28 | ## Version of the above grouped by season and model week (an array of arrays of objects):
 29 | sw.g.retro.voxel.data = map_join(
 30 |   function(swg.array, s,w) {
 31 |     swg.array[s,w,,drop=FALSE] %>>%
 32 |       select_dims(1:2, "drop") # drop s & w dimensions, but keep g dimension even if size 1
 33 |   },
 34 |   no_join(swg.retro.voxel.data),
 35 |   named_arrayvec_to_name_arrayvec(s.retro.seasons),
 36 |   named_arrayvec_to_name_arrayvec(w.retro.model.weeks),
 37 |   lapply_variant=lapply, show.progress=FALSE
 38 | )
 39 | 
 40 | ## CV backcasts
 41 | print("CV: generate backcasts")
 42 | swgb.retro.full.dats = map_join(
 43 |   get_backcast,
 44 |   swg.retro.voxel.data, sw.g.retro.voxel.data, source.name, signal.name, b.backcasters,
 45 |   epidata_df_to_chopped_trajectory_df=epidata_df_to_chopped_trajectory_df,
 46 |   use.proxy=TRUE,
 47 |   cache.prefix=file.path(epiproject.cache.dir,"swgb.retro.full.dats")
 48 | )
 49 | 
 50 | ## CV forecasts as target_multicast objects:
 51 | print("CV: generate component forecasts")
 52 | gc()
 53 | swgbf.retro.component.target.multicasts = map_join(
 54 |   target_multicast,
 55 |   swg.retro.voxel.data, swgb.retro.full.dats, f.forecasters,
 56 |   target_trajectory_preprocessor,
 57 |   no_join(t.target.specs),
 58 |   no_join(m.forecast.types),
 59 |   full_dat_fixup=full_dat_fixup,
 60 |   ## lapply_variant = lapply,
 61 |   cache.prefix=file.path(epiproject.cache.dir,"swgbf.retro.component.target.multicasts"),
 62 |   shuffle=FALSE,
 63 |   use.proxy=TRUE
 64 | )
 65 | ## xxx loading from many cache files is slow; reduce # of cache files?
 66 | 
 67 | ## CV forecasts as forecast.value's:
 68 | gc()
 69 | swgtmbf.retro.component.forecast.values =
 70 |   map_join(swgbf.retro.component.target.multicasts,
 71 |            f=`[[`, "forecast.values",
 72 |            shuffle=FALSE#, lapply_variant=lapply
 73 |            ) %>>%
 74 |   simplify2arrayp() %>>%
 75 |   {
 76 |     original = .
 77 |     dim(.) <- c(dim(original)[1:2], dim(swgbf.retro.component.target.multicasts))
 78 |     dimnames(.) <- c(dimnames(original)[1:2], dimnames(swgbf.retro.component.target.multicasts))
 79 |     rm(original) # somehow this reaches the global environment and takes up memory
 80 |     gc()
 81 |     .
 82 |   } %>>%
 83 |   aperm(c(3:5,1:2,6:7))
 84 | 
 85 | print('Analysis: calculate CV component evaluations')
 86 | 
 87 | gc()
 88 | sg.retro.observed.trajectories = map_join(
 89 |   get_observed_trajectory,
 90 |   s.retro.seasons, g.epigroups,
 91 |   cache.prefix=file.path(epiproject.cache.dir,"sg.retro.observed.trajectories")
 92 | )
 93 | 
 94 | gc()
 95 | swgt.retro.observed.multivals = map_join(
 96 |   observed_multival2,
 97 |   swg.retro.voxel.data,
 98 |   target_trajectory_preprocessor,
 99 |   t.target.specs,
100 |   sg.retro.observed.trajectories,
101 |   shuffle=FALSE,
102 |   cache.prefix=file.path(epiproject.cache.dir,"swgt.retro.observed.multivals")
103 | )
104 | 
105 | gc()
106 | swgtm.retro.observed.values = map_join(
107 |   observed_value2,
108 |   swg.retro.voxel.data, t.target.specs, m.forecast.types,
109 |   swgt.retro.observed.multivals,
110 |   shuffle=FALSE, lapply_variant=lapply,
111 |   cache.prefix=file.path(epiproject.cache.dir,"swgtm.retro.observed.values")
112 | )
113 | 
114 | old.mc.cores = getOption("mc.cores")
115 | options("mc.cores"=min(2L,old.mc.cores))
116 | gc()
117 | swgtmbf.retro.component.evaluations = map_join(
118 |   get_evaluation,
119 |   swgtmbf.retro.component.forecast.values, swgtm.retro.observed.values, m.forecast.types
120 | )
121 | mode(swgtmbf.retro.component.evaluations) <- "numeric"
122 | saveRDS(swgtmbf.retro.component.evaluations, file.path(epiproject.cache.dir,"swgtmbf.retro.component.evaluations.rds"))
123 | options("mc.cores"=old.mc.cores)
124 | ## fixme sometimes this results in errors due to NULL's appearing in the
125 | ## evaluations, but re-running the evaluations seems to work... memory issues? gc beforehand?
126 | 
127 | 
128 | ## swgt.retro.observed.multibin.values = map_join(
129 | ##     observed_value2,
130 | ##     swg.retro.voxel.data, t.target.specs, no_join(multibin.logscore.forecast.type),
131 | ##     swgt.retro.observed.multivals
132 | ## )
133 | 
134 | ## swgtmbf.retro.component.multibin.scores = map_join(
135 | ##     get_evaluation,
136 | ##     swgtmbf.retro.component.forecast.values[,,,,"Bin",,,drop=FALSE],
137 | ##     swgt.retro.observed.multibin.values,
138 | ##     no_join(multibin.logscore.forecast.type)
139 | ## )
140 | ## mode(swgtmbf.retro.component.multibin.scores) <- "numeric"
141 | 
142 | ## saveRDS(swgtmbf.retro.component.multibin.scores, file.path(epiproject.cache.dir,"swgtmbf.retro.component.multibin.scores.rds"))
143 | 
144 | 
145 | 
146 | 
147 | 
148 | apply(swgtmbf.retro.component.multibin.scores, c(7L,6L), mean, na.rm=TRUE)
149 | apply(swgtmbf.retro.component.evaluations[,,,,"Bin",,,drop=FALSE], c(7L,6L), mean, na.rm=TRUE)
150 | apply(swgtmbf.retro.component.evaluations[,,,,"Bin",,,drop=FALSE]%>>%pmax(-10), c(7L,6L), mean, na.rm=TRUE)
151 | apply(swgtmbf.retro.component.evaluations[,,,,"Bin",,,drop=FALSE]%>>%pmax(-10), c(6L,7L), mean, na.rm=TRUE)
152 | apply(swgtmbf.retro.component.evaluations[names(s.retro.seasons)[s.retro.seasons>=2010L],,,,"Bin",,,drop=FALSE]%>>%pmax(-10), c(7L,6L), mean, na.rm=TRUE)
153 | apply(swgtmbf.retro.component.evaluations, c(7L,5L), mean, na.rm=TRUE)
154 | 


--------------------------------------------------------------------------------
/sample/gen-retro-ensemble-forecasts.R:
--------------------------------------------------------------------------------
  1 | 
  2 | ## Observations we use for CV evaluation:
  3 | print("CV: find observed trajectories, target multivals, target values")
  4 | 
  5 | ## Tack on additional indexer_list's for CV:
  6 | e.retro.ensemble.weighting.scheme.swgtmbf.indexer.lists =
  7 |   map_join(function(wgt.indexer.list) {
  8 |     c(retro.season.indexer, # retro study on seasons
  9 |       wgt.indexer.list, # ensemble choice for weeks, epigroups, targets
 10 |       list(each=NULL), # separately for each forecast type
 11 |       list(all=NULL, all=NULL) # always group together all backcasters, forecasters
 12 |       )
 13 |   }, e.ensemble.partial.weighting.scheme.wgt.indexer.lists,
 14 |   lapply_variant=lapply, shuffle=FALSE, show.progress=FALSE
 15 |   )
 16 | 
 17 | ## Calculate CV ensemble weights:
 18 | old.mc.cores = getOption("mc.cores")
 19 | options("mc.cores"=min(2L,old.mc.cores))
 20 | print("CV: fit weightsets")
 21 | e.retro.ensemble.weightsets = map_join(
 22 |   function(weighting.scheme.indexer.list) {
 23 |     get_ensemble_weightset(swgtmbf.retro.component.forecast.values,
 24 |                            swgtm.retro.observed.values,
 25 |                            m.forecast.types,
 26 |                            weighting.scheme.indexer.list)
 27 |   },
 28 |   e.retro.ensemble.weighting.scheme.swgtmbf.indexer.lists,
 29 |   lapply_variant=lapply,
 30 |   cache.prefix=file.path(epiproject.cache.dir,"e.retro.ensemble.weightsets")
 31 | )
 32 | options("mc.cores"=old.mc.cores)
 33 | 
 34 | ## Calculate CV ensemble forecasts as forecast.value's
 35 | print("CV: generate ensemble forecasts")
 36 | gc()
 37 | swgtme.retro.ensemble.forecast.values.file = file.path(file.path(epiproject.cache.dir,"swgtme.retro.ensemble.forecast.values.rds"))
 38 | swgtme.retro.ensemble.forecast.values =
 39 |   if (file.exists(swgtme.retro.ensemble.forecast.values.file)) {
 40 |     readRDS(swgtme.retro.ensemble.forecast.values.file)
 41 |   } else {
 42 |     lapply(
 43 |                  e.retro.ensemble.weightsets,
 44 |                  function(weightset) {
 45 |                    map_join(
 46 |                      ## `*`, # bad if only non-NA's are 0-weighted
 47 |                      function(forecast.value, weight) {
 48 |                        if (weight == 0) {
 49 |                          weight <- NA
 50 |                        }
 51 |                        weight * forecast.value
 52 |                      },
 53 |                      swgtmbf.retro.component.forecast.values, weightset,
 54 |                      lapply_variant=lapply, show.progress=FALSE
 55 |                    ) %>>% apply(1:5, Reduce, f=function(x,y) {
 56 |                      dplyr::coalesce(x+y, x, y)
 57 |                    })
 58 |                  }) %>>%
 59 |       simplify2arrayp() %>>%
 60 |       {names(dimnames(.))[[6L]] <- dimnamesnamesp(e.retro.ensemble.weightsets); .}
 61 |   }
 62 | if (!file.exists(swgtme.retro.ensemble.forecast.values.file)) {
 63 |   saveRDS(swgtme.retro.ensemble.forecast.values, swgtme.retro.ensemble.forecast.values.file)
 64 | }
 65 | ## todo in weightset application, try removing NA values rather than 0 weights? is there a way to use row/col weighted means to do the above?
 66 | ## todo speed up weightset fitting&application with Rcpp?
 67 | ## todo use mclapply in map_join above instead of mclapply over ensembles...
 68 | ## but broke before; need to avoid memory duplication issues
 69 | 
 70 | ## Calculate CV ensemble forecasts as target.multicasts
 71 | gc()
 72 | swge.retro.ensemble.target.multicasts.file = file.path(epiproject.cache.dir,"swge.retro.ensemble.target.multicasts.rds")
 73 | swge.retro.ensemble.target.multicasts =
 74 |   if (file.exists(swge.retro.ensemble.target.multicasts.file)) {
 75 |     readRDS(swge.retro.ensemble.target.multicasts.file)
 76 |   } else {
 77 |     apply(swgtme.retro.ensemble.forecast.values, c(1:3,6L),
 78 |           function(tm.forecast.values) {
 79 |             list(forecast.values=tm.forecast.values)
 80 |           })
 81 |   }
 82 | if (!file.exists(swge.retro.ensemble.target.multicasts.file)) {
 83 |   saveRDS(swge.retro.ensemble.target.multicasts, swge.retro.ensemble.target.multicasts.file)
 84 | }
 85 | 
 86 | print('Analysis: calculate CV ensemble evaluations')
 87 | 
 88 | ## swgtme.retro.ensemble.multibin.scores = map_join(
 89 | ##     get_evaluation,
 90 | ##     swgtme.retro.ensemble.forecast.values[,,,,"Bin",,drop=FALSE],
 91 | ##     swgt.retro.observed.multibin.values,
 92 | ##     no_join(multibin.logscore.forecast.type)
 93 | ## )
 94 | ## mode(swgtme.retro.ensemble.multibin.scores) <- "numeric"
 95 | ## saveRDS(swgtme.retro.ensemble.multibin.scores, file.path(epiproject.cache.dir,"swgtme.retro.ensemble.multibin.scores.rds"))
 96 | 
 97 | swgtme.retro.ensemble.evaluations = map_join(
 98 |     get_evaluation,
 99 |     swgtme.retro.ensemble.forecast.values, swgtm.retro.observed.values, m.forecast.types
100 | )
101 | mode(swgtme.retro.ensemble.evaluations) <- "numeric"
102 | saveRDS(swgtme.retro.ensemble.evaluations, file.path(epiproject.cache.dir,"swgtme.retro.ensemble.evaluations.rds"))
103 | 
104 | 
105 | 
106 | 
107 | 
108 | ## apply(swgtmbf.retro.component.multibin.scores, c(7L,6L), mean, na.rm=TRUE)
109 | ## apply(swgtme.retro.ensemble.multibin.scores, c(6L), mean, na.rm=TRUE)
110 | apply(swgtmbf.retro.component.evaluations[,,,,"Bin",,,drop=FALSE], c(7L,6L), mean, na.rm=TRUE)
111 | apply(swgtmbf.retro.component.evaluations[,,,,"Bin",,,drop=FALSE]%>>%pmax(-10), c(7L,6L), mean, na.rm=TRUE)
112 | apply(swgtmbf.retro.component.evaluations[,,,,"Bin",,,drop=FALSE]%>>%pmax(-10), c(6L,7L), mean, na.rm=TRUE)
113 | apply(swgtmbf.retro.component.evaluations[names(s.retro.seasons)[s.retro.seasons>=2010L],,,,"Bin",,,drop=FALSE]%>>%pmax(-10), c(7L,6L), mean, na.rm=TRUE)
114 | apply(swgtmbf.retro.component.evaluations, c(7L,5L), mean, na.rm=TRUE)
115 | apply(swgtme.retro.ensemble.evaluations, 6:5, mean, na.rm=TRUE)
116 | 


--------------------------------------------------------------------------------
/sample/generate-retro-and-prospective-forecasts.R:
--------------------------------------------------------------------------------
 1 | 
 2 | source("gen-retro-component-forecasts.R")
 3 | source("gen-retro-ensemble-forecasts.R")
 4 | 
 5 | 
 6 | 
 7 | ## apply(swgtmbf.retro.component.evaluations[(2003:2009)%>>%paste0("/",.+1L),,,,,,,drop=FALSE],
 8 | ##       c(5L,7L), mean, na.rm=TRUE)
 9 | ## apply(swgtme.retro.ensemble.evaluations[(2003:2009)%>>%paste0("/",.+1L),,,,,,drop=FALSE],
10 | ##       5:6, mean, na.rm=TRUE)
11 | 
12 | ## apply(swgtmbf.retro.component.evaluations, c(7L,4:5), mean, na.rm=TRUE)
13 | ## apply(swgtme.retro.ensemble.evaluations, c(6L,4:5), mean, na.rm=TRUE)
14 | 
15 | ## apply(swgtme.retro.ensemble.evaluations, c(1,4:5), mean, na.rm=TRUE)
16 | 
17 | ## todo another level of stacking?  ensemble of ensembles?
18 | 
19 | ## swbf.retro.forecast.spreadsheets = map_join(
20 | ##   target_multicast_epigroup_forecast_table,
21 | ##   swgbf.retro.component.target.multicasts,
22 | ##   swg.retro.voxel.data,
23 | ##   no_join(t.target.specs), no_join(m.forecast.types),
24 | ##   epigroup.colname
25 | ## ) %>>%
26 | ##   apply(c(1:2,4:5), dplyr::bind_rows)
27 | 
28 | ## spreadsheet.template = readr::read_csv("~/long_flu_submission_template_1718.csv", col_types=readr::cols())
29 | ## attr(spreadsheet.template, "spec") <- NULL
30 | ## spreadsheet.to.check = swbf.retro.forecast.spreadsheets[["2010/2011",1L,1L,1L]]
31 | 
32 | ## spreadsheet.to.check %>>% dplyr::mutate(A="a") %>>% dplyr::full_join(qwer %>>% dplyr::mutate(B="b"), by=c("Location","Target","Type","Unit","Bin_start_incl","Bin_end_notincl")) %>>% dplyr::filter(is.na(A) | is.na(B))
33 | ## class(spreadsheet.to.check[["Value"]])==class(spreadsheet.template[["Value"]])
34 | ## identical(sapply(spreadsheet.to.check, class), sapply(spreadsheet.template, class))
35 | ## identical(sapply(spreadsheet.to.check, typeof), sapply(spreadsheet.template, typeof))
36 | ## identical(sapply(spreadsheet.to.check, mode), sapply(spreadsheet.template, mode))
37 | ## identical(sapply(spreadsheet.to.check, storage.mode), sapply(spreadsheet.template, storage.mode))
38 | ## Map(all.equal,
39 | ##     spreadsheet.to.check %>>% dplyr::select(-Value),
40 | ##     spreadsheet.template %>>% dplyr::select(-Value))
41 | ## Map(identical,
42 | ##     spreadsheet.to.check %>>% dplyr::select(-Value),
43 | ##     spreadsheet.template %>>% dplyr::select(-Value))
44 | ## identical(class(spreadsheet.to.check), class(spreadsheet.template))
45 | ## identical(typeof(spreadsheet.to.check), typeof(spreadsheet.template))
46 | ## identical(mode(spreadsheet.to.check), mode(spreadsheet.template))
47 | ## identical(storage.mode(spreadsheet.to.check), storage.mode(spreadsheet.template))
48 | ## ## all.equal overload does not check attributes; if "spec" attribute set by
49 | ## ## readr is included, the all.equal check passes but the identical check fails;
50 | ## ## removing this attribute makes the identical check pass
51 | ## all.equal(spreadsheet.to.check %>>% dplyr::select(-Value),
52 | ##           spreadsheet.template %>>% dplyr::select(-Value))
53 | ## identical(spreadsheet.to.check %>>% dplyr::select(-Value),
54 | ##           spreadsheet.template %>>% dplyr::select(-Value))
55 | 
56 | source("gen-prospective-component-forecasts.R")
57 | source("gen-prospective-ensemble-forecasts.R")
58 | 
59 | ## todo automatic appropriate parallel_dim_i setting
60 | ## fixme finish documenting map_join
61 | ## todo use simplified sim's to strip out unnecessary elements
62 | ## todo sw.epidata --- if will be borrowing across epigroups...
63 | ## todo add prospective forecasts...
64 | ## todo allow for subsets in ensemble... need to use cv_apply to get the forecasts again
65 | ## xxx target.settings -> voxel.settings?
66 | ## todo also need to strip out information from target forecasts if remains after using simplified sim's
67 | ## todo dots to lists in target spec functions, maybe elsewhere as well
68 | ## todo could also have save season-evaluation epidata to hopefully speed up calculating observed values
69 | ## todo reshape so that can include labels without exploding in size
70 | ## todo don't store sim's
71 | ## reverse order on the indexing?
72 | ## fixme no uniform fallback for Point; maybe make fallback the empirical distribution since it is smoothed now and has uniform pseudoweight?  or just remove since everything has uniform pseudoweight?
73 | ## fixme no Bin smoothing by default? warn about plotting target forecasts?
74 | ## todo joint epidata? voxel settings?
75 | ## joint backcast? joint forecast?
76 | ## fixme Virgin Islands --- little history
77 | ## todo document no_join
78 | ## fixme check on Season onset Point predictions of "none" vs. NA vs. ...
79 | ## fixme EB weight sum is too large
80 | ## fixme smooth sim targets with a Laplace kernel? or a spike + slab type --- can inflate bw to make up for mass on spike?
81 | ## fixme should adjust dimension ordering... for col major feel
82 | ## todo deal with parallel-related memory issues --- duplicate swg.voxel.data...
83 | ## todo work on speed getting voxel data --- possible to avoid storage?
84 | ## xxx consider just basing everything on filesystem contracts... no need to hold everything in memory
85 | ## fixme try to solve memory issues with mclapply env's? require interaction with disk?
86 | ## todo weighted cv_apply smearing schemes (boxcar kernel -> other kernels)
87 | ## todo tiny subset run to do some testing and development on
88 | ## fixme table verification
89 | ## fixme empirical distribution is using target's baseline with empirical curves again instead of pairing baselines and curves
90 | ## todo better fill-in in 2010 for states
91 | ## todo remove Season onset target for states
92 | ## fixme modularize into functions, document, ...
93 | 


--------------------------------------------------------------------------------
/sample/high-state-forecasts.R:
--------------------------------------------------------------------------------
  1 | ## author_header begin
  2 | ## Copyright (C) 2017 Logan C. Brooks, Aaron Rumack
  3 | ##
  4 | ## This file is part of epiforecast.  Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University.
  5 | ##
  6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174.
  7 | ## author_header end
  8 | ## license_header begin
  9 | ## epiforecast is free software: you can redistribute it and/or modify
 10 | ## it under the terms of the GNU General Public License as published by
 11 | ## the Free Software Foundation, version 2 of the License.
 12 | ##
 13 | ## epiforecast is distributed in the hope that it will be useful,
 14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | ## GNU General Public License for more details.
 17 | ##
 18 | ## You should have received a copy of the GNU General Public License
 19 | ## along with epiforecast.  If not, see <http://www.gnu.org/licenses/>.
 20 | ## license_header end
 21 | 
 22 | source("high-state-config.R")
 23 | 
 24 | source("generate-retro-and-prospective-forecasts.R")
 25 | 
 26 | save_spreadsheets(
 27 |   swge.prospective.ensemble.target.multicasts[,,,"target-9time-based",drop=FALSE],
 28 |   swg.prospective.voxel.data,
 29 |   t.target.specs, m.forecast.types,
 30 |   epigroup.colname,
 31 |   file.path(epiproject.cache.dir,"stat-spreadsheets")
 32 | )
 33 | 
 34 | save_spreadsheets(
 35 |   swgbf.prospective.component.target.multicasts,
 36 |   swg.prospective.voxel.data,
 37 |   t.target.specs, m.forecast.types,
 38 |   epigroup.colname,
 39 |   file.path(epiproject.cache.dir,"spreadsheets")
 40 | )
 41 | 
 42 | save_linlog_plots(
 43 |   target_multicast_week_plot,
 44 |   swgbf.prospective.component.target.multicasts,
 45 |   swg.prospective.voxel.data,
 46 |   t.target.specs, m.forecast.types,
 47 |   file.path(epiproject.cache.dir,"linlog.plots-week")
 48 | )
 49 | 
 50 | save_linlog_plots(
 51 |   target_multicast_percent_plot,
 52 |   swgbf.prospective.component.target.multicasts,
 53 |   swg.prospective.voxel.data,
 54 |   t.target.specs, m.forecast.types,
 55 |   file.path(epiproject.cache.dir,"linlog.plots-percent")
 56 | )
 57 | 
 58 | save_spreadsheets(
 59 |   swge.prospective.ensemble.target.multicasts,
 60 |   swg.prospective.voxel.data,
 61 |   t.target.specs, m.forecast.types,
 62 |   epigroup.colname,
 63 |   file.path(epiproject.cache.dir,"spreadsheets")
 64 | )
 65 | 
 66 | save_linlog_plots(
 67 |   target_multicast_week_plot,
 68 |   swge.prospective.ensemble.target.multicasts,
 69 |   swg.prospective.voxel.data,
 70 |   t.target.specs, m.forecast.types,
 71 |   file.path(epiproject.cache.dir,"linlog.plots-week")
 72 | )
 73 | 
 74 | save_linlog_plots(
 75 |   target_multicast_percent_plot,
 76 |   swge.prospective.ensemble.target.multicasts,
 77 |   swg.prospective.voxel.data,
 78 |   t.target.specs, m.forecast.types,
 79 |   file.path(epiproject.cache.dir,"linlog.plots-percent")
 80 | )
 81 | 
 82 | save_linlog_plots(
 83 |   target_multicast_week_plot,
 84 |   swge.prospective.ensemble.target.multicasts[,,,"target-9time-based",drop=FALSE],
 85 |   swg.prospective.voxel.data,
 86 |   t.target.specs, m.forecast.types,
 87 |   file.path(epiproject.cache.dir,"stat-linlog.plots-week")
 88 | )
 89 | 
 90 | save_linlog_plots(
 91 |   target_multicast_percent_plot,
 92 |   swge.prospective.ensemble.target.multicasts[,,,"target-9time-based",drop=FALSE],
 93 |   swg.prospective.voxel.data,
 94 |   t.target.specs, m.forecast.types,
 95 |   file.path(epiproject.cache.dir,"stat-linlog.plots-percent")
 96 | )
 97 | 
 98 | save_weighting_linlog_plots(
 99 |   e.prospective.ensemble.weightsets[["target-9time-based"]],
100 |   swgbf.prospective.component.target.multicasts,
101 |   swg.prospective.voxel.data,
102 |   t.target.specs, m.forecast.types,
103 |   file.path(epiproject.cache.dir,"stat-weighting-plots")
104 | )
105 | 


--------------------------------------------------------------------------------
/sample/low-state-forecasts.R:
--------------------------------------------------------------------------------
  1 | ## author_header begin
  2 | ## Copyright (C) 2017 Logan C. Brooks, Aaron Rumack
  3 | ##
  4 | ## This file is part of epiforecast.  Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University.
  5 | ##
  6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174.
  7 | ## author_header end
  8 | ## license_header begin
  9 | ## epiforecast is free software: you can redistribute it and/or modify
 10 | ## it under the terms of the GNU General Public License as published by
 11 | ## the Free Software Foundation, version 2 of the License.
 12 | ##
 13 | ## epiforecast is distributed in the hope that it will be useful,
 14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | ## GNU General Public License for more details.
 17 | ##
 18 | ## You should have received a copy of the GNU General Public License
 19 | ## along with epiforecast.  If not, see <http://www.gnu.org/licenses/>.
 20 | ## license_header end
 21 | 
 22 | source("low-state-config.R")
 23 | 
 24 | source("generate-retro-and-prospective-forecasts.R")
 25 | 
 26 | save_spreadsheets(
 27 |   swge.prospective.ensemble.target.multicasts[,,,"target-9time-based",drop=FALSE],
 28 |   swg.prospective.voxel.data,
 29 |   t.target.specs, m.forecast.types,
 30 |   epigroup.colname,
 31 |   file.path(epiproject.cache.dir,"stat-spreadsheets")
 32 | )
 33 | 
 34 | save_spreadsheets(
 35 |   swgbf.prospective.component.target.multicasts,
 36 |   swg.prospective.voxel.data,
 37 |   t.target.specs, m.forecast.types,
 38 |   epigroup.colname,
 39 |   file.path(epiproject.cache.dir,"spreadsheets")
 40 | )
 41 | 
 42 | save_linlog_plots(
 43 |   target_multicast_week_plot,
 44 |   swgbf.prospective.component.target.multicasts,
 45 |   swg.prospective.voxel.data,
 46 |   t.target.specs, m.forecast.types,
 47 |   file.path(epiproject.cache.dir,"linlog.plots-week")
 48 | )
 49 | 
 50 | save_linlog_plots(
 51 |   target_multicast_percent_plot,
 52 |   swgbf.prospective.component.target.multicasts,
 53 |   swg.prospective.voxel.data,
 54 |   t.target.specs, m.forecast.types,
 55 |   file.path(epiproject.cache.dir,"linlog.plots-percent")
 56 | )
 57 | 
 58 | save_spreadsheets(
 59 |   swge.prospective.ensemble.target.multicasts,
 60 |   swg.prospective.voxel.data,
 61 |   t.target.specs, m.forecast.types,
 62 |   epigroup.colname,
 63 |   file.path(epiproject.cache.dir,"spreadsheets")
 64 | )
 65 | 
 66 | save_linlog_plots(
 67 |   target_multicast_week_plot,
 68 |   swge.prospective.ensemble.target.multicasts,
 69 |   swg.prospective.voxel.data,
 70 |   t.target.specs, m.forecast.types,
 71 |   file.path(epiproject.cache.dir,"linlog.plots-week")
 72 | )
 73 | 
 74 | save_linlog_plots(
 75 |   target_multicast_percent_plot,
 76 |   swge.prospective.ensemble.target.multicasts,
 77 |   swg.prospective.voxel.data,
 78 |   t.target.specs, m.forecast.types,
 79 |   "../../../epiforecast-epiproject/flusight-low-state-run/linlog.plots-percent"
 80 | )
 81 | 
 82 | save_linlog_plots(
 83 |   target_multicast_week_plot,
 84 |   swge.prospective.ensemble.target.multicasts[,,,"target-9time-based",drop=FALSE],
 85 |   swg.prospective.voxel.data,
 86 |   t.target.specs, m.forecast.types,
 87 |   "../../../epiforecast-epiproject/flusight-low-state-run/stat-linlog.plots-week"
 88 | )
 89 | 
 90 | save_linlog_plots(
 91 |   target_multicast_percent_plot,
 92 |   swge.prospective.ensemble.target.multicasts[,,,"target-9time-based",drop=FALSE],
 93 |   swg.prospective.voxel.data,
 94 |   t.target.specs, m.forecast.types,
 95 |   "../../../epiforecast-epiproject/flusight-low-state-run/stat-linlog.plots-percent"
 96 | )
 97 | 
 98 | save_weighting_linlog_plots(
 99 |   e.prospective.ensemble.weightsets[["target-9time-based"]],
100 |   swgbf.prospective.component.target.multicasts,
101 |   swg.prospective.voxel.data,
102 |   t.target.specs, m.forecast.types,
103 |   "../../../epiforecast-epiproject/flusight-low-state-run/stat-weighting-plots"
104 | )
105 | 


--------------------------------------------------------------------------------
/sample/natreg-for-collaborative-ensemble.R:
--------------------------------------------------------------------------------
1 | ## See the can-use-for-2018/2019-natreg-collab-ensemble branch for scripts to generate component forecasts to send in to be incorporated into the FluSight-Network ensemble.
2 | 
3 | ## But note that the models on that branch are now outdated; e.g., EB has a critical bug that has been fixed on main but not on that branch yet, and there may be other missing fixes as well.
4 | 


--------------------------------------------------------------------------------
/sample/natreg-forecasts.R:
--------------------------------------------------------------------------------
  1 | ## author_header begin
  2 | ## Copyright (C) 2017 Logan C. Brooks
  3 | ##
  4 | ## This file is part of epiforecast.  Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University.
  5 | ##
  6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174.
  7 | ## author_header end
  8 | ## license_header begin
  9 | ## epiforecast is free software: you can redistribute it and/or modify
 10 | ## it under the terms of the GNU General Public License as published by
 11 | ## the Free Software Foundation, version 2 of the License.
 12 | ##
 13 | ## epiforecast is distributed in the hope that it will be useful,
 14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | ## GNU General Public License for more details.
 17 | ##
 18 | ## You should have received a copy of the GNU General Public License
 19 | ## along with epiforecast.  If not, see <http://www.gnu.org/licenses/>.
 20 | ## license_header end
 21 | 
 22 | source("natreg-config.R")
 23 | 
 24 | source("generate-retro-and-prospective-forecasts.R")
 25 | 
 26 | ## Output prospective forecast spreadsheets, plots:
 27 | collab.ensemble.retro.dir = "../../../collaborative-ensemble-potential-submission-4"
 28 | if (!dir.exists(collab.ensemble.retro.dir)) {
 29 |   dir.create(collab.ensemble.retro.dir)
 30 | }
 31 | save_spreadsheets(swgbf.retro.component.target.multicasts[,,,"quantile_arx_backnowcast",,drop=FALSE],
 32 |                   swg.retro.voxel.data,
 33 |                   t.target.specs, m.forecast.types,
 34 |                   epigroup.colname,
 35 |                   collab.ensemble.retro.dir,
 36 |                   function(swg.voxel.data,s,w,...) {
 37 |                     season = swg.voxel.data[[s,w,1L]][["season"]]
 38 |                     year = swg.voxel.data[[s,w,1L]][["issue"]] %/% 100L
 39 |                     week = swg.voxel.data[[s,w,1L]][["issue"]] %% 100L
 40 |                     if (season >= 2010L && !dplyr::between(week,21L,39L)) {
 41 |                       sprintf("%s/EW%02d-%d-%s.csv", ..2, week, year, ..2)
 42 |                     }
 43 |                   })
 44 | save_spreadsheets(swge.retro.ensemble.target.multicasts[,,,"target-9time-based",drop=FALSE],
 45 |                   swg.retro.voxel.data,
 46 |                   t.target.specs, m.forecast.types,
 47 |                   epigroup.colname,
 48 |                   collab.ensemble.retro.dir,
 49 |                   function(swg.voxel.data,s,w,...) {
 50 |                     season = swg.voxel.data[[s,w,1L]][["season"]]
 51 |                     year = swg.voxel.data[[s,w,1L]][["issue"]] %/% 100L
 52 |                     week = swg.voxel.data[[s,w,1L]][["issue"]] %% 100L
 53 |                     if (season >= 2010L && !dplyr::between(week,21L,39L)) {
 54 |                       sprintf("%s/EW%02d-%d-%s.csv", "Delphi_Stat_FewerComponentsNoBackcastNoNowcast", week, year, "Delphi_Stat_FewerComponentsNoBackcastNoNowcast")
 55 |                     }
 56 |                   })
 57 | save_spreadsheets(swge.prospective.ensemble.target.multicasts[,,,"target-9time-based",drop=FALSE],
 58 |                   swg.prospective.voxel.data,
 59 |                   t.target.specs, m.forecast.types,
 60 |                   epigroup.colname,
 61 |                   collab.ensemble.prospective.dir,
 62 |                   function(swg.voxel.data,s,w,...) {
 63 |                     season = swg.voxel.data[[s,w,1L]][["season"]]
 64 |                     year = swg.voxel.data[[s,w,1L]][["issue"]] %/% 100L
 65 |                     week = swg.voxel.data[[s,w,1L]][["issue"]] %% 100L
 66 |                     if (season >= 2010L && !dplyr::between(week,21L,39L)) {
 67 |                       sprintf("%s/EW%02d-%d-%s.csv", "Delphi_Stat_FewerComponentsNoBackcastNoNowcast", week, year, "Delphi_Stat_FewerComponentsNoBackcastNoNowcast")
 68 |                     }
 69 |                   })
 70 | 
 71 | 
 72 | save_spreadsheets(
 73 |   swge.prospective.ensemble.target.multicasts[,,,"target-9time-based",drop=FALSE],
 74 |   swg.prospective.voxel.data,
 75 |   t.target.specs, m.forecast.types,
 76 |   epigroup.colname,
 77 |   file.path(epiproject.cache.dir,"stat-spreadsheets"),
 78 |   function(swg.voxel.data,s,w,...) {
 79 |     season = swg.voxel.data[[s,w,1L]][["season"]]
 80 |     year = swg.voxel.data[[s,w,1L]][["issue"]] %/% 100L
 81 |     week = swg.voxel.data[[s,w,1L]][["issue"]] %% 100L
 82 |     sprintf("EW%02d-%s-%s.csv", week, "Delphi-Stat", Sys.Date())
 83 |   }
 84 | )
 85 | 
 86 | save_spreadsheets(
 87 |   swge.prospective.ensemble.target.multicasts,
 88 |   swg.prospective.voxel.data,
 89 |   t.target.specs, m.forecast.types,
 90 |   epigroup.colname,
 91 |   file.path(epiproject.cache.dir,"spreadsheets")
 92 | )
 93 | 
 94 | save_linlog_plots(
 95 |   target_multicast_week_plot,
 96 |   swgbf.prospective.component.target.multicasts,
 97 |   swg.prospective.voxel.data,
 98 |   t.target.specs, m.forecast.types,
 99 |   file.path(epiproject.cache.dir,"linlog.plots")
100 | )
101 | 
102 | save_linlog_plots(
103 |   target_multicast_percent_plot,
104 |   swgbf.prospective.component.target.multicasts,
105 |   swg.prospective.voxel.data,
106 |   t.target.specs, m.forecast.types,
107 |   file.path(epiproject.cache.dir,"linlog.plots")
108 | )
109 | 
110 | save_linlog_plots(
111 |   target_multicast_week_plot,
112 |   swge.prospective.ensemble.target.multicasts,
113 |   swg.prospective.voxel.data,
114 |   t.target.specs, m.forecast.types,
115 |   file.path(epiproject.cache.dir,"linlog.plots-week")
116 | )
117 | 
118 | save_linlog_plots(
119 |   target_multicast_percent_plot,
120 |   swge.prospective.ensemble.target.multicasts,
121 |   swg.prospective.voxel.data,
122 |   t.target.specs, m.forecast.types,
123 |   file.path(epiproject.cache.dir,"linlog.plots-percent")
124 | )
125 | 
126 | save_linlog_plots(
127 |   target_multicast_week_plot,
128 |   swge.prospective.ensemble.target.multicasts[,,,"target-9time-based",drop=FALSE],
129 |   swg.prospective.voxel.data,
130 |   t.target.specs, m.forecast.types,
131 |   file.path(epiproject.cache.dir,"stat-linlog.plots-week")
132 | )
133 | 
134 | save_linlog_plots(
135 |   target_multicast_percent_plot,
136 |   swge.prospective.ensemble.target.multicasts[,,,"target-9time-based",drop=FALSE],
137 |   swg.prospective.voxel.data,
138 |   t.target.specs, m.forecast.types,
139 |   file.path(epiproject.cache.dir,"stat-linlog.plots-percent")
140 | )
141 | 
142 | save_weighting_linlog_plots(
143 |   e.prospective.ensemble.weightsets[["target-9time-based"]],
144 |   swgbf.prospective.component.target.multicasts,
145 |   swg.prospective.voxel.data,
146 |   t.target.specs, m.forecast.types,
147 |   file.path(epiproject.cache.dir,"stat-weighting-plots")
148 | )
149 | 


--------------------------------------------------------------------------------
/sample/natreg-with-eb.R:
--------------------------------------------------------------------------------
 1 | ## author_header begin
 2 | ## Copyright (C) 2017 Logan C. Brooks
 3 | ##
 4 | ## This file is part of epiforecast.  Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University.
 5 | ##
 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174.
 7 | ## author_header end
 8 | ## license_header begin
 9 | ## epiforecast is free software: you can redistribute it and/or modify
10 | ## it under the terms of the GNU General Public License as published by
11 | ## the Free Software Foundation, version 2 of the License.
12 | ##
13 | ## epiforecast is distributed in the hope that it will be useful,
14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | ## GNU General Public License for more details.
17 | ##
18 | ## You should have received a copy of the GNU General Public License
19 | ## along with epiforecast.  If not, see <http://www.gnu.org/licenses/>.
20 | ## license_header end
21 | 
22 | source("natreg-with-eb-config.R")
23 | 
24 | source("generate-retro-and-prospective-forecasts.R")
25 | 
26 | save_spreadsheets(
27 |   swge.prospective.ensemble.target.multicasts,
28 |   swg.prospective.voxel.data,
29 |   t.target.specs, m.forecast.types,
30 |   epigroup.colname,
31 |   file.path(epiproject.cache.dir,"spreadsheets")
32 | )
33 | 
34 | ## fixme how is EB treating back/now/pancasts fed into it?
35 | 
36 | ## fixme todo add backcast targets...
37 | 


--------------------------------------------------------------------------------
/sample/prepare-covid19-submissions.R:
--------------------------------------------------------------------------------
  1 | 
  2 | library("pipeR")
  3 | source("covidplot.R")
  4 | 
  5 | ## year = 2020L
  6 | ## epi.week = 10L
  7 | ## crowdcast.tgz.dirpath = "~/Dropbox/private/crowdcast_spreadsheets_to_filter"
  8 | ## submissions.root.dirpath = "~/Dropbox/private/test_submissions"
  9 | ## do.plot = TRUE
 10 | ## plots.root.dirpath = "~/Dropbox/private/test_plots"
 11 | ## create.destination.dirs = FALSE
 12 | 
 13 | ## --- Provide usage message / Read in command-line arguments: ---
 14 | command.args = commandArgs(trailingOnly=TRUE)
 15 | if (length(command.args) != 7L) {
 16 |     print(command.args)
 17 |     stop (paste(collapse="\n",capture.output({
 18 |         cat(fill=TRUE,
 19 |             '
 20 | Usage: Rscript prepare-covid19-submissions.R <year> <epi.week> <crowdcast.tgz.dirpath> <submissions.root.dirpath> <do.plot> <plots.root.dirpath> <create.destination.dirs>; where:
 21 |  - year is probably 2020
 22 |  - epi.week is a two-digit epi-week-of-year number
 23 |  - crowdcast.tgz.dirpath is the path of the directory containing dfarrow epicast tarballs
 24 |  - submissions.root.dirpath is the draft submission directory or the submission repository directory
 25 |  - do.plot is a single Boolean (TRUE/FALSE) indicating whether or not to prepare plots
 26 |  - plots.root.dirpath is a directory indicating where to place plots if requested (and can be an arbitrary string otherwise)
 27 |  - create.destination.dirs is a single Boolean (TRUE/FALSE) indicating whether or not to create spreadsheet and plot destination directories --- use FALSE to double-check that pre-existing directories have the right structure.
 28 | ')
 29 |     })))
 30 | }
 31 | year = as.integer(command.args[[1L]])
 32 | epi.week = as.integer(command.args[[2L]])
 33 | crowdcast.tgz.dirpath = as.character(command.args[[3L]])
 34 | submissions.root.dirpath = as.character(command.args[[4L]])
 35 | do.plot = as.logical(command.args[[5L]])
 36 | plots.root.dirpath = as.character(command.args[[6L]])
 37 | create.destination.dirs = as.logical(command.args[[7L]])
 38 | 
 39 | 
 40 | ## --- Utils: ---
 41 | ensure_dirpath = function(dirpath) {
 42 |     if (length(dirpath) != 1L) {
 43 |         stop ('Only tested for one dirpath at a time.')
 44 |     }
 45 |     if (!dir.exists(dirpath)) {
 46 |         if (create.destination.dirs) {
 47 |             dir.create(dirpath)
 48 |         } else {
 49 |             stop (sprintf('Destination directory "%s" does not already exist.  Maybe one or both of {submissions.root.dirpath, spreadsheets.root.dirpath} have not been used before and needs a single-time set-up by using create.destination.dirs=TRUE, or one or both of these dirpaths were misspecified.'))
 50 |         }
 51 |     }
 52 | }
 53 | 
 54 | 
 55 | 
 56 | ## --- Set up destinations: ---
 57 | cmu.delphi.entry.names = c("CMU_Delphi-Crowdcast","CMU_Delphi-Crowdcast_MTurk","CMU_Delphi-Stat_Nowcast")
 58 | ensure_dirpath(submissions.root.dirpath)
 59 | natreg.submissions.dirpath = file.path(submissions.root.dirpath, "nation-region-forecast-data")
 60 | state.submissions.dirpath = file.path(submissions.root.dirpath, "state-forecast-data")
 61 | for (geography.dirpath in c(natreg.submissions.dirpath, state.submissions.dirpath)) {
 62 |     ensure_dirpath(geography.dirpath)
 63 |     for (cmu.delphi.entry.name in cmu.delphi.entry.names) {
 64 |         ensure_dirpath(file.path(geography.dirpath, cmu.delphi.entry.name))
 65 |     }
 66 | }
 67 | if (do.plot) {
 68 |     ensure_dirpath(plots.root.dirpath)
 69 |     natreg.plots.dirpath = file.path(plots.root.dirpath, "nation-region-forecast-plots")
 70 |     state.plots.dirpath = file.path(plots.root.dirpath, "state-forecast-plots")
 71 |     for (geography.dirpath in c(natreg.plots.dirpath, state.plots.dirpath)) {
 72 |         ensure_dirpath(geography.dirpath)
 73 |         for (cmu.delphi.entry.name in cmu.delphi.entry.names) {
 74 |             ensure_dirpath(file.path(geography.dirpath, cmu.delphi.entry.name))
 75 |         }
 76 |     }
 77 | }
 78 | 
 79 | 
 80 | 
 81 | ## --- Prepare Crowdcast submission: ---
 82 | ## Select a tarball:
 83 | crowdcast.tgz.pattern = sprintf("^dfarrow_covid_epicast_%d%02d_([0-9]+).tgz$", year, epi.week)
 84 | candidate.crowdcast.tgz.filenames = list.files(crowdcast.tgz.dirpath) %>>%
 85 |     {.[grep(crowdcast.tgz.pattern, .)]}
 86 | selected.crowdcast.tgz.filename =
 87 |     if (length(candidate.crowdcast.tgz.filenames) == 0L) {
 88 |         stop ('Cannot find tgz for this week.')
 89 |     } else if (length(candidate.crowdcast.tgz.filenames) == 1L) {
 90 |         candidate.crowdcast.tgz.filenames[[1L]]
 91 |     } else {
 92 |         version.numbers = as.integer(sub(crowdcast.tgz.pattern, "\\1", candidate.crowdcast.tgz.filenames))
 93 |         selected.filename = candidate.crowdcast.tgz.filenames[[which.max(version.numbers)]]
 94 |         warning(sprintf('There appeared to be multiple tarballs for the given week.  Selected "%s"', selected.filename))
 95 |         selected.filename
 96 |     }
 97 | selected.crowdcast.tgz.filepath = file.path(crowdcast.tgz.dirpath, selected.crowdcast.tgz.filename)
 98 | ## Extract tarball to temporary directory:
 99 | extraction.dirpath = tempdir()
100 | untar.code = untar(selected.crowdcast.tgz.filepath, c("epicast-regional.csv","epicast-state.csv"),
101 |                    exdir=extraction.dirpath,
102 |                    compressed="gzip")
103 | if (untar.code != 0L) {
104 |     stop (sprintf('Error extracting tarball; error code: %d',untar.code))
105 | }
106 | cat('Extracted Crowdcast tarball.', fill=TRUE)
107 | ## Filter targets within the csv's, write to csv's in destinations:
108 | crowdcast.natreg.spreadsheet.filepath = file.path(natreg.submissions.dirpath, "CMU_Delphi-Crowdcast", sprintf("%d-ew%02d-CMU_Delphi-Crowdcast.csv", year, epi.week))
109 | readr::read_csv(file.path(extraction.dirpath, "epicast-regional.csv"),
110 |                 col_types="ccccc") %>>%
111 |     dplyr::filter(target %in% c("1 wk ahead","2 wk ahead")) %>>%
112 |     readr::write_csv(crowdcast.natreg.spreadsheet.filepath)
113 | crowdcast.state.spreadsheet.filepath = file.path(state.submissions.dirpath, "CMU_Delphi-Crowdcast", sprintf("%d-ew%02d-CMU_Delphi-Crowdcast.csv", year, epi.week))
114 | readr::read_csv(file.path(extraction.dirpath, "epicast-state.csv"),
115 |                 col_types="ccccc") %>>%
116 |     dplyr::filter(target %in% c("1 wk ahead","2 wk ahead")) %>>%
117 |     readr::write_csv(crowdcast.state.spreadsheet.filepath)
118 | cat('Wrote out spreadsheets.', fill=TRUE)
119 | ## Prepare plots:
120 | if (do.plot) {
121 |     plot.covid.forecast(crowdcast.natreg.spreadsheet.filepath, file.path(natreg.plots.dirpath, "CMU_Delphi-Crowdcast"))
122 |     plot.covid.forecast(crowdcast.state.spreadsheet.filepath, file.path(state.plots.dirpath, "CMU_Delphi-Crowdcast"))
123 | }
124 | cat('Wrote out plots.', fill=TRUE)
125 | ## Clean up temporary directory immediately:
126 | file.remove(file.path(extraction.dirpath, "epicast-regional.csv"))
127 | file.remove(file.path(extraction.dirpath, "epicast-state.csv"))
128 | unlink(extraction.dirpath)
129 | cat('Cleaned up temporary directory.', fill=TRUE)
130 | 


--------------------------------------------------------------------------------