├── .gitignore ├── LICENSE ├── LICENSE_notes ├── README.md ├── demo └── demo.R ├── epiforecast.cpp14funs ├── DESCRIPTION ├── NAMESPACE ├── R │ ├── RcppExports.R │ ├── checks.R │ ├── curve-fits.R │ ├── eb.R │ ├── eb_dists.R │ └── interface.R ├── man-roxygen │ ├── param_baseline.R │ ├── param_full.dat.R │ ├── param_max.n.sims.R │ └── sim.method_template.R ├── src │ ├── Makevars │ ├── RcppExports.cpp │ ├── eb_rcpp.cpp │ └── epiforecast.cpp14funs_types.hpp └── tests │ └── testthat │ ├── fluview_hhs1_fetch.Rdata.rds │ └── test-eb.sim.R ├── epiforecast.cpp14funs_0.0.1.pdf ├── epiforecast ├── .gitignore ├── DESCRIPTION ├── NAMESPACE ├── R │ ├── RcppExports.R │ ├── backcasters.R │ ├── backcasters_new.R │ ├── br.R │ ├── cv.R │ ├── cv_apply.R │ ├── delphi_epidata.R │ ├── empirical.futures.R │ ├── empirical.trajectories.R │ ├── ensemble.R │ ├── epiproject.R │ ├── forecast_type.R │ ├── get_completed_fluview_state_df.R │ ├── holidays.R │ ├── loaders.R │ ├── map_join.R │ ├── match.R │ ├── namesp.R │ ├── predx_v2_spreadsheets.R │ ├── read.R │ ├── simclass.R │ ├── simplify2arrayp.R │ ├── target_spec.R │ ├── todo-by-file.R │ ├── twkde.R │ ├── utils.R │ └── weeks.R ├── data-raw │ └── fluview.outdated.R ├── data │ ├── fluview.2003on.outdated.dats.rda │ └── fluview.2003on.outdated.new.dats.rda ├── man-roxygen │ ├── param_baseline.R │ ├── param_full.dat.R │ ├── param_invent.scalars.R │ ├── param_max.n.sims.R │ └── sim.method_template.R ├── meta.R ├── src │ ├── RcppExports.cpp │ └── histograms_rcpp.cpp ├── tests │ └── testthat │ │ ├── test-br.sim.R │ │ ├── test-check.file.contents.R │ │ ├── test-read.from.file.R │ │ ├── test-twkde.markovian.sim.R │ │ ├── test-twkde.sim.R │ │ └── test_match.arg.or.default.R └── vignettes │ ├── fetch_data.Rmd │ ├── fetch_data.html │ ├── fetch_data[woven].md │ ├── figure │ ├── unnamed-chunk-1-1.png │ ├── unnamed-chunk-1-2.png │ ├── unnamed-chunk-2-1.png │ ├── unnamed-chunk-2-2.png │ ├── unnamed-chunk-3-1.png │ └── unnamed-chunk-3-2.png │ ├── make-forecasts-br.Rmd │ ├── make-forecasts-br.html │ └── test.R ├── epiforecast_0.0.1.pdf ├── sample ├── census_weights.rds ├── combine-spreadsheets.R ├── covid19ilinet-templates.R ├── gen-prospective-component-forecasts.R ├── gen-prospective-ensemble-forecasts.R ├── gen-retro-component-forecasts.R ├── gen-retro-ensemble-forecasts.R ├── generate-retro-and-prospective-forecasts.R ├── high-state-config.R ├── high-state-forecasts.R ├── hospitalization-forecasts.R ├── low-state-config.R ├── low-state-forecasts.R ├── merge-state-forecasts.R ├── natreg-config.R ├── natreg-for-collaborative-ensemble.R ├── natreg-forecasts.R ├── natreg-with-eb-config.R ├── natreg-with-eb.R ├── prepare-covid19-submissions.R ├── retrospective_forecasts.R └── state-eval-config.R └── scratch └── flusurv_pancast_viz.R /.gitignore: -------------------------------------------------------------------------------- 1 | # begin license for material from https://github.com/github/gitignore/** 2 | 3 | # Copyright (c) 2015 GitHub, Inc. 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a 6 | # copy of this software and associated documentation files (the "Software"), 7 | # to deal in the Software without restriction, including without limitation 8 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 | # and/or sell copies of the Software, and to permit persons to whom the 10 | # Software is furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 | # DEALINGS IN THE SOFTWARE. 22 | 23 | # end license for material from https://github.com/github/gitignore/** 24 | 25 | 26 | 27 | # begin material from https://github.com/github/gitignore/blob/master/Global/Emacs.gitignore 28 | 29 | *~ 30 | \#*\# 31 | /.emacs.desktop 32 | /.emacs.desktop.lock 33 | *.elc 34 | auto-save-list 35 | tramp 36 | .\#* 37 | 38 | # Org-mode 39 | .org-id-locations 40 | *_archive 41 | 42 | # flymake-mode 43 | *_flymake.* 44 | 45 | # eshell files 46 | /eshell/history 47 | /eshell/lastdir 48 | 49 | # elpa packages 50 | /elpa/ 51 | 52 | # reftex files 53 | *.rel 54 | 55 | # AUCTeX auto folder 56 | /auto/ 57 | 58 | # cask packages 59 | .cask/ 60 | 61 | # end material from https://github.com/github/gitignore/blob/master/Global/Emacs.gitignore 62 | 63 | 64 | 65 | 66 | 67 | # begin material from https://github.com/github/gitignore/blob/master/Global/Vim.gitignore 68 | 69 | [._]*.s[a-w][a-z] 70 | [._]s[a-w][a-z] 71 | *.un~ 72 | Session.vim 73 | .netrwhist 74 | *~ 75 | 76 | # end material from https://github.com/github/gitignore/blob/master/Global/Vim.gitignore 77 | 78 | 79 | 80 | # begin material from https://github.com/github/gitignore/blob/master/R.gitignore 81 | 82 | # History files 83 | .Rhistory 84 | .Rapp.history 85 | 86 | # Session Data files 87 | .RData 88 | 89 | # Example code in package build process 90 | *-Ex.R 91 | 92 | # RStudio files 93 | .Rproj.user/ 94 | 95 | # produced vignettes 96 | vignettes/*.html 97 | vignettes/*.pdf 98 | 99 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 100 | .httr-oauth 101 | 102 | # end material from https://github.com/github/gitignore/blob/master/R.gitignore 103 | 104 | 105 | 106 | # begin material from https://github.com/github/gitignore/blob/master/C%2B%2B.gitignore 107 | 108 | # Compiled Object files 109 | *.slo 110 | *.lo 111 | *.o 112 | *.obj 113 | 114 | # Precompiled Headers 115 | *.gch 116 | *.pch 117 | 118 | # Compiled Dynamic libraries 119 | *.so 120 | *.dylib 121 | *.dll 122 | 123 | # Fortran module files 124 | *.mod 125 | 126 | # Compiled Static libraries 127 | *.lai 128 | *.la 129 | *.a 130 | *.lib 131 | 132 | # Executables 133 | *.exe 134 | *.out 135 | *.app 136 | 137 | # end material from https://github.com/github/gitignore/blob/master/C%2B%2B.gitignore 138 | 139 | # Assume contents of man page directories are automatically generated: 140 | man/** 141 | 142 | # Ignore copy of delphi_epidata.R: 143 | # /R_pkg/epiforecast/R/delphi_epidata.R 144 | # /R_pkg/epiforecast/R/RcppExports.R 145 | # /R_pkg/epiforecast/src/RcppExports.cpp 146 | 147 | # Ignore epiforecast package data folder (it should be generated by scripts in data-raw): 148 | /R_pkg/epiforecast/data/** 149 | 150 | 151 | # # (For now) offline copies of data that was fetched 152 | # *.csv 153 | # *.Rdata 154 | # --- Disabled to allow "real" .csv's like sample/wILI_Baseline_fixup.csv to be noticed 155 | 156 | # R documentatation 157 | *.Rd 158 | 159 | # Personal todo lists 160 | *todo-justin.org -------------------------------------------------------------------------------- /LICENSE_notes: -------------------------------------------------------------------------------- 1 | - All code is offered under GPL, version 2 (included in the file LICENSE). 2 | - The file simplify2arrayp.R is also offered under GPL, version 2+ (see top of that file). 3 | - The file epiforecast/R/delphi_epidata.R is also offered under the MIT License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Warning 2 | 3 | This package is no longer being maintained. 4 | 5 | # Summary 6 | R package that implements several methods for epidemiological forecasting empirical bayes (EB), basis regression (BR), and time-weighted kernel density estimation (twkde). 7 | 8 | # Installation 9 | On mac or linux command line: 10 | ``` 11 | R CMD INSTALL epiforecast 12 | ``` 13 | 14 | or run the following in R: 15 | 16 | ```R 17 | devtools::install_github("cmu-delphi/epiforecast-R", subdir="epiforecast") 18 | ``` 19 | 20 | 21 | # Tests 22 | Run code in ```demo/demo.R``` 23 | -------------------------------------------------------------------------------- /demo/demo.R: -------------------------------------------------------------------------------- 1 | ## author_header begin 2 | ## Copyright (C) 2016 Sangwon Hyun 3 | ## 4 | ## This file is part of epiforecast. Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University. 5 | ## 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174. 7 | ## author_header end 8 | ## license_header begin 9 | ## epiforecast is free software: you can redistribute it and/or modify 10 | ## it under the terms of the GNU General Public License as published by 11 | ## the Free Software Foundation, version 2 of the License. 12 | ## 13 | ## epiforecast is distributed in the hope that it will be useful, 14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ## GNU General Public License for more details. 17 | ## 18 | ## You should have received a copy of the GNU General Public License 19 | ## along with epiforecast. If not, see . 20 | ## license_header end 21 | 22 | library(epiforecast) # or devtools::load_all("../epiforecast") to try without installing 23 | library(epiforecast.cpp14funs) # or devtools::load_all("../epiforecast.cpp14funs") to try without installing 24 | 25 | ## Fetch current ILINet data for Health and Human Services (HHS) Region 1: 26 | area.name = "hhs1" 27 | full.dat = fetchEpidataFullDat("fluview", area.name, "wili", 28 | min.points.in.season=52L, 29 | first.week.of.season = 21L, 30 | cache.file.prefix=sprintf("fluview_%s_fetch", area.name)) 31 | 32 | ## Try EB with basic options; evaluate line by line to see all plots 33 | mysim = eb.sim(full.dat, max.n.sims=1000) 34 | print(mysim) 35 | plot(mysim) 36 | target.forecast = target_forecast(mysim,'pht') 37 | print(target.forecast) 38 | plot(target.forecast) 39 | 40 | ## Try EB with more simulation settings 41 | control.list = get_eb_control_list(sd.option="prior",max.match.length=5) 42 | mysim = eb.sim(full.dat, max.n.sims=100, control.list=control.list) 43 | plot(mysim) 44 | target.forecast = target_forecast(mysim,'pwk') 45 | 46 | ## Try empirical futures 47 | mysim = empirical.futures.sim(full.dat) 48 | plot(mysim) 49 | 50 | ## Try empirical trajectories 51 | mysim = empirical.trajectories.sim(full.dat) 52 | plot(mysim) 53 | 54 | ## Try BR 55 | mysim = br.sim(full.dat, max.n.sims=100) 56 | plot(mysim) 57 | print(mysim,verbose=TRUE) 58 | target.forecast = target_forecast(mysim, "pht") 59 | 60 | ## Try BR with more simulation settings 61 | control.list = get_br_control_list(df=5, cv.rule="1se") 62 | mysim = br.sim(full.dat, max.n.sims=100, control.list=control.list) 63 | plot(mysim) 64 | print(mysim,verbose=TRUE) 65 | target.forecast = target_forecast(mysim, "pht") 66 | target.forecast = target_forecast(mysim, "pwk") 67 | 68 | ## Try Markovian twkde 69 | mysim = twkde.markovian.sim(full.dat) 70 | plot(mysim) 71 | 72 | ## Try twkde 73 | mysim = twkde.sim(full.dat) 74 | plot(mysim) 75 | 76 | ## plot(mysim, type="hexagonal") 77 | ## matplot(cbind(sample(head(full.dat,-1L),1L)[[1L]][1:53], mysim$ys[,sample.int(ncol(mysim$ys),1L)][1:53]), type="l") 78 | ## matplot(cbind(rowMeans(dat.to.matrix(head(full.dat,-1L),53L)), matrixStats::rowWeightedMeans(mysim$ys, mysim$weights)[1:53]), type="l") 79 | -------------------------------------------------------------------------------- /epiforecast.cpp14funs/DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: epiforecast.cpp14funs 2 | Type: Package 3 | Title: Tools for forecasting semi-regular seasonal epidemic curves and similar 4 | time series 5 | Version: 0.0.1 6 | Date: 2015-12-09 7 | Author: Logan C. Brooks, David C. Farrow, Sangwon Hyun, Ryan J. Tibshirani, Roni 8 | Rosenfeld 9 | Maintainer: Logan C. Brooks 10 | Recommends: 11 | epiforecast 12 | Imports: 13 | stats, 14 | utils, 15 | lubridate, 16 | splines, 17 | glmgen, 18 | glmnet, 19 | httr, 20 | Rcpp, 21 | rlist, 22 | Hmisc, 23 | weights, 24 | hexbin, 25 | plotrix, 26 | tibble, 27 | dplyr, 28 | pipeR, 29 | matrixStats, 30 | quantreg, 31 | lpSolve, 32 | parallel, 33 | R.utils, 34 | RCurl, 35 | pbmcapply 36 | Description: Tools for forecasting semi-regular seasonal epidemic curves and 37 | similar time series. Includes an empirical Bayes approach that forms a prior 38 | by transforming historical curves, a basis regression approach that balances 39 | matching observations from the current season and matching historical 40 | seasons' measurements for future weeks, and timestep-by-timestep weighted 41 | kernel density estimation on backward differences parameterized by both the 42 | time series measurements and the current time. 43 | License: GPL-2 44 | RoxygenNote: 7.3.2 45 | Collate: 46 | 'RcppExports.R' 47 | 'checks.R' 48 | 'curve-fits.R' 49 | 'eb_dists.R' 50 | 'interface.R' 51 | 'eb.R' 52 | LinkingTo: Rcpp 53 | SystemRequirements: C++14 54 | Suggests: 55 | testthat 56 | Encoding: UTF-8 57 | -------------------------------------------------------------------------------- /epiforecast.cpp14funs/NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(eb.createForecasts) 4 | export(eb.fitSmoothCurves) 5 | export(eb.sim) 6 | export(get_eb_control_list) 7 | export(unifChoicePrior) 8 | import(Rcpp) 9 | import(glmgen) 10 | useDynLib(epiforecast.cpp14funs) 11 | -------------------------------------------------------------------------------- /epiforecast.cpp14funs/R/RcppExports.R: -------------------------------------------------------------------------------- 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | CartesianProductCurves <- function(fit_obj_rcpp, y_scale_baseline, curve_index_choices, peak_time_choices, x_shift_choices, x_scale_choices, sd_choices, sd_scale_choices, peak_height_choices, y_scale_choices) { 5 | .Call('_epiforecast_cpp14funs_CartesianProductCurves', PACKAGE = 'epiforecast.cpp14funs', fit_obj_rcpp, y_scale_baseline, curve_index_choices, peak_time_choices, x_shift_choices, x_scale_choices, sd_choices, sd_scale_choices, peak_height_choices, y_scale_choices) 6 | } 7 | 8 | CartesianProductLogWeights <- function(new_dat_df, dat_rcpp, observed_past_shrinkage_map_df, reasonable_future_shrinkage_map_df, n_future_neighbors, fit_obj_rcpp, y_scale_baseline, curve_index_choices, peak_time_choices, x_shift_choices, x_scale_choices, sd_choices, sd_scale_choices, peak_height_choices, y_scale_choices, bias_peaktime_mean, bias_peaktime_sd, bias_peaktime_shrinkage, bias_peakheight_mean, bias_peakheight_sd, bias_peakheight_shrinkage) { 9 | .Call('_epiforecast_cpp14funs_CartesianProductLogWeights', PACKAGE = 'epiforecast.cpp14funs', new_dat_df, dat_rcpp, observed_past_shrinkage_map_df, reasonable_future_shrinkage_map_df, n_future_neighbors, fit_obj_rcpp, y_scale_baseline, curve_index_choices, peak_time_choices, x_shift_choices, x_scale_choices, sd_choices, sd_scale_choices, peak_height_choices, y_scale_choices, bias_peaktime_mean, bias_peaktime_sd, bias_peaktime_shrinkage, bias_peakheight_mean, bias_peakheight_sd, bias_peakheight_shrinkage) 10 | } 11 | 12 | ZipProductCurvesAndLogWeightsp <- function(output_times, new_dat_df, dat_rcpp, observed_past_shrinkage_map_df, reasonable_future_shrinkage_map_df, n_future_neighbors, fit_obj_rcpp, y_scale_baseline, curve_index_choices, peak_time_choices, x_shift_choices, x_scale_choices, sd_choices, sd_scale_choices, peak_height_choices, y_scale_choices, bias_peaktime_mean, bias_peaktime_sd, bias_peaktime_shrinkage, bias_peakheight_mean, bias_peakheight_sd, bias_peakheight_shrinkage) { 13 | .Call('_epiforecast_cpp14funs_ZipProductCurvesAndLogWeightsp', PACKAGE = 'epiforecast.cpp14funs', output_times, new_dat_df, dat_rcpp, observed_past_shrinkage_map_df, reasonable_future_shrinkage_map_df, n_future_neighbors, fit_obj_rcpp, y_scale_baseline, curve_index_choices, peak_time_choices, x_shift_choices, x_scale_choices, sd_choices, sd_scale_choices, peak_height_choices, y_scale_choices, bias_peaktime_mean, bias_peaktime_sd, bias_peaktime_shrinkage, bias_peakheight_mean, bias_peakheight_sd, bias_peakheight_shrinkage) 14 | } 15 | 16 | -------------------------------------------------------------------------------- /epiforecast.cpp14funs/R/checks.R: -------------------------------------------------------------------------------- 1 | ## author_header begin 2 | ## Copyright (C) 2016 Logan C. Brooks 3 | ## 4 | ## This file is part of epiforecast. Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University. 5 | ## 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174. 7 | ## author_header end 8 | ## license_header begin 9 | ## epiforecast is free software: you can redistribute it and/or modify 10 | ## it under the terms of the GNU General Public License as published by 11 | ## the Free Software Foundation, version 2 of the License. 12 | ## 13 | ## epiforecast is distributed in the hope that it will be useful, 14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ## GNU General Public License for more details. 17 | ## 18 | ## You should have received a copy of the GNU General Public License 19 | ## along with epiforecast. If not, see . 20 | ## license_header end 21 | 22 | ## Do nothing if \code{trueOrStopMessage} is \code{TRUE}; otherwise, 23 | ## output the error message given by \code{trueOrStopMessage}. Used 24 | ## with \code{is.or.why.not.*} functions. 25 | ensureTRUE = function(trueOrStopMessage) { 26 | if (!isTRUE(trueOrStopMessage)) 27 | stop(trueOrStopMessage) 28 | } 29 | 30 | ## Do nothing if \code{trueOrStopMessage} is \code{TRUE}; otherwise, 31 | ## output the error message given by \code{trueOrStopMessage} with a 32 | ## tag indicating that it is a post-condition check failure 33 | ## (indicating a bug). Used with \code{is.or.why.not.*} functions. 34 | ensureTRUEpostcondition = function(trueOrStopMessage) { 35 | if (!isTRUE(trueOrStopMessage)) 36 | stop(paste("Post-condition check failed: ",trueOrStopMessage)) 37 | } 38 | 39 | ## Output \code{TRUE} if \code{smooth.dat} is list of numeric vectors 40 | ## with same length as \code{dat}, but each with length 53 and no 41 | ## \code{NA}'s (should be a smoothed version of \code{dat}). 42 | ## Otherwise, output a string to be used in an error message. 43 | is.or.why.not.smooth.dat = function(dat, smooth.dat) { 44 | if (length(dat) != length(smooth.dat)) { 45 | return("length(dat) != length(smooth.dat)") 46 | } 47 | ## if (length(smooth.dat) != 0 && 48 | ## !identical(53L, unique(sapply(smooth.dat, length)))) { 49 | ## return("=smooth.dat= should contain only 53-length vectors") 50 | ## } 51 | if (!identical(unname(lengths(dat)), unname(lengths(smooth.dat)))) 52 | stop("lengths of vectors in =dat= and =smooth.dat= do not match") 53 | ## xxx additional checks for numeric type 54 | return (TRUE) 55 | } 56 | ## xxx remove reference to 53, maybe change around length requirements to match =eb.fitSmoothCurves=. 57 | 58 | ## Output \code{TRUE} if \code{curve.models} is a list of lists; one 59 | ## list per season in \code{dat}, each list containing three elements 60 | ## \code{`f`}, the corresponding smoothed curve, \code{`tau`}, the 61 | ## estimate of the sd under the iid Gaussian noise assumption, and 62 | ## \code{`type`}, a string ("Gaussian") indicating what noise model 63 | ## was used. Otherwise, output a string to be used in an error 64 | ## message. 65 | is.or.why.not.curve.models = function(dat, curve.models) { 66 | if (length(dat) != length(curve.models)) 67 | return ("length(dat) != length(curve.models)") 68 | if (!inherits(curve.models, "list") || 69 | !all(vapply(curve.models, FUN.VALUE = logical(1L), inherits, "list"))) 70 | return ("curve.models must be a list of lists") 71 | if (length(curve.models)>0 && !identical(sapply(curve.models, names), matrix(rep(c("f","tau","type"), length(curve.models)),3))) 72 | return ("list elt's should be f, tau, type") 73 | ## xxx additional checks on f's, tau's, type's 74 | return (TRUE) 75 | } 76 | 77 | ## xxx use the =match= paradigm and forget about / use try-catch to alter post-condition error checks? 78 | ## xxx use OO system to ensure predicates with types 79 | -------------------------------------------------------------------------------- /epiforecast.cpp14funs/R/curve-fits.R: -------------------------------------------------------------------------------- 1 | 2 | smooth.curves.to.fit = function(smooth.curves, type="Gaussian") { 3 | return (lapply(seq_along(smooth.curves$sigma.hat), function(fit.s.i) { 4 | list(f=smooth.curves$smooth.obj[[fit.s.i]], tau=smooth.curves$sigma.hat[fit.s.i], type=type) 5 | })) 6 | } 7 | 8 | fit.to.oldfit = function(fit) { 9 | f = lapply(fit, `[[`, "f") 10 | f <- sapply(f, `[`, seq_len(min(lengths(f)))) 11 | tau = sapply(fit, `[[`, "tau") 12 | return (list(f=f, tau=tau)) 13 | } 14 | 15 | oldfit.to.fit = function(oldfit, type="Gaussian") { 16 | return (lapply(seq_along(oldfit$tau), function(fit.s.i) { 17 | list(f=oldfit$f[,fit.s.i], tau=oldfit$tau[fit.s.i], type=type) 18 | })) 19 | } 20 | -------------------------------------------------------------------------------- /epiforecast.cpp14funs/R/eb_dists.R: -------------------------------------------------------------------------------- 1 | ## author_header begin 2 | ## Copyright (C) 2016 Logan C. Brooks, Ryan J. Tibshirani 3 | ## 4 | ## This file is part of epiforecast. Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University. 5 | ## 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174. 7 | ## author_header end 8 | ## license_header begin 9 | ## epiforecast is free software: you can redistribute it and/or modify 10 | ## it under the terms of the GNU General Public License as published by 11 | ## the Free Software Foundation, version 2 of the License. 12 | ## 13 | ## epiforecast is distributed in the hope that it will be useful, 14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ## GNU General Public License for more details. 17 | ## 18 | ## You should have received a copy of the GNU General Public License 19 | ## along with epiforecast. If not, see . 20 | ## license_header end 21 | 22 | 23 | ##' Creates a uniform distribution over discrete choices which can be used with \code{\link{get.eb.control.list}}. 24 | ##' 25 | ##' @param choices a vector of (discrete) choices 26 | ##' 27 | ##' @return a uniform discrete distribution over \code{choices}. 28 | ##' 29 | ##' @examples 30 | ##' 31 | ##' uniform.seq = unifChoicePrior(letters[1:5]) 32 | ##' 33 | ##' ## The distributions used by EB can be broken down into buckets; 34 | ##' ## for the uniform discrete distribution, each bucket corresponds 35 | ##' ## (boringly) to a single choice from =choices=. However, it is 36 | ##' ## important to have a common interface. 37 | ##' random.bucket.indices = sample(seq_len(uniform.seq$n), 10000, replace=TRUE, prob=uniform.seq$probs) 38 | ##' random.elements = uniform.seq$sampler(random.bucket.indices) 39 | ##' random.elements.another.way = uniform.seq$choices[random.bucket.indices] # only works for =unifChoicePrior= 40 | ##' random.elements.a.third.way = letters[random.bucket.indices] # only works for this example 41 | ##' 42 | ##' @export 43 | unifChoicePrior = function(choices) { 44 | probs = rep(1.0/length(choices), length(choices)) # uniform probabilities 45 | sampler = function(inds) choices[inds] 46 | return (list(n = length(choices), choices = choices, probs = probs, sampler = sampler)) 47 | } 48 | ## xxx refactor to discretePrior with probs (default to uniform) and possibly tapply-like groups (determining bucket grouping, default to seq_along(choices)) 49 | 50 | integerTriangleAroundZeroPrior = function(max.val) { 51 | val.ramp = Seq(1,max.val) 52 | choices = c(rev(-val.ramp), 0, val.ramp) 53 | weights = c(val.ramp, max.val+1, rev(val.ramp)) 54 | probs = weights/sum(weights) 55 | sampler = function(inds) choices[inds] 56 | return (list(n = length(choices), choices = choices, probs = probs, sampler = sampler)) 57 | } 58 | 59 | unifDistrUnifGridPrior = function(min.val, max.val, n.choices) { 60 | # We want to discretize the uniform distribution U[min.val, max.val]. 61 | # Divide interval [min.val, max.val] into n.choices uniform-width buckets. 62 | # This code chooses the midpoints of these buckets as the representative elements (choices): 63 | choices = (1:n.choices - 0.5)/n.choices * (max.val-min.val) + min.val 64 | # Uniform probabilities for uniform grid: 65 | probs = rep(1.0/n.choices, n.choices) 66 | # Note for future: for other grids, actually compute the bucket boundaries and endpoints, then take differences of cdf 67 | mins = ((1:n.choices)-1)/n.choices * (max.val-min.val) + min.val 68 | maxes = (1:n.choices)/n.choices * (max.val-min.val) + min.val 69 | sampler = function(inds) stats::runif(length(inds), min=mins[inds], max=maxes[inds]) 70 | return (list(n = n.choices, choices = choices, probs = probs, sampler=sampler)) 71 | } 72 | 73 | logUnifDistrLogUnifGridPrior = function(min.val, max.val, n.choices) { 74 | # We want to discretize the uniform distribution U[min.val, max.val]. 75 | # Divide interval [min.val, max.val] into n.choices uniform-width buckets. 76 | # This code chooses the midpoints of these buckets as the representative elements (choices): 77 | min.val <- log(min.val) 78 | max.val <- log(max.val) 79 | choices = (1:n.choices - 0.5)/n.choices * (max.val-min.val) + min.val 80 | # Uniform probabilities for uniform grid: 81 | probs = rep(1.0/n.choices, n.choices) 82 | # Note for future: for other grids, actually compute the bucket boundaries and endpoints, then take differences of cdf 83 | mins = ((1:n.choices)-1)/n.choices * (max.val-min.val) + min.val 84 | maxes = (1:n.choices)/n.choices * (max.val-min.val) + min.val 85 | sampler = function(inds) exp(stats::runif(length(inds), min=mins[inds], max=maxes[inds])) 86 | return (list(n = n.choices, choices = exp(choices), probs = probs, sampler=sampler)) 87 | } 88 | 89 | unifDistrGaussianGridPrior = function(min.val, max.val, n.choices, mean=(max.val+min.val)/2, sd=(max.val-min.val)/4) { 90 | # Non-truncated normal CDF at window ends: 91 | left.cd = stats::pnorm(min.val, mean=mean, sd=sd) 92 | right.cd = stats::pnorm(max.val, mean=mean, sd=sd) 93 | 94 | endpoint.cds = (0:n.choices)/n.choices * (right.cd-left.cd) + left.cd 95 | endpoints = stats::qnorm(endpoint.cds,mean=mean,sd=sd) 96 | if(endpoints[1] + 1e-12 < min.val) stop("Bug in tnormcdf reasoning.") 97 | if(endpoints[length(endpoints)] - 1e-12 > max.val) stop("Bug in tnormcdf reasoning.") 98 | # Domain- and mass- midpoints of buckets are choosen as the choices (these are the same since we are dealing with a uniform distribution): 99 | choices = (endpoints[1:n.choices] + endpoints[2:(n.choices+1)])/2 100 | probs = diff(endpoints)/(max.val-min.val) 101 | 102 | # todo sampler 103 | 104 | return (list(n=n.choices,choices=choices,probs=probs)) 105 | } 106 | 107 | unifLocGridPrior = function(fit) { 108 | locs = max.col(t(fit$f), ties.method="last") 109 | #locs = apply(fit$f,2,which.max) 110 | delta = mean(diff(sort(locs)))/2 111 | choices = Seq(max(round(min(locs)-delta),1), 112 | min(round(max(locs)+delta),nrow(fit$f))) 113 | return (unifChoicePrior(choices)) 114 | } 115 | 116 | # Uniform grid spacing, uniform scale (underlying peak height) distribution. 117 | unifScaleUnifGridPrior = function(fit, n.choices) { 118 | peaks = apply(fit$f,2,max) 119 | delta = mean(diff(sort(peaks)))/2 120 | 121 | min.val = min(peaks)-delta 122 | max.val = max(peaks)+delta 123 | 124 | return (unifDistrUnifGridPrior(min.val, max.val, n.choices)) 125 | } 126 | 127 | # Uniform grid spacing, uniform scale (underlying peak height) distribution. 128 | unifScaleGaussianGridPrior = function(fit, n.choices) { 129 | peaks = apply(fit$f,2,max) 130 | delta = mean(diff(sort(peaks)))/2 131 | 132 | min.val = min(peaks)-delta 133 | max.val = max(peaks)+delta 134 | 135 | return (unifDistrGaussianGridPrior(min.val, max.val, n.choices)) 136 | } 137 | -------------------------------------------------------------------------------- /epiforecast.cpp14funs/man-roxygen/param_baseline.R: -------------------------------------------------------------------------------- 1 | ../../epiforecast/man-roxygen/param_baseline.R -------------------------------------------------------------------------------- /epiforecast.cpp14funs/man-roxygen/param_full.dat.R: -------------------------------------------------------------------------------- 1 | ../../epiforecast/man-roxygen/param_full.dat.R -------------------------------------------------------------------------------- /epiforecast.cpp14funs/man-roxygen/param_max.n.sims.R: -------------------------------------------------------------------------------- 1 | ../../epiforecast/man-roxygen/param_max.n.sims.R -------------------------------------------------------------------------------- /epiforecast.cpp14funs/man-roxygen/sim.method_template.R: -------------------------------------------------------------------------------- 1 | ../../epiforecast/man-roxygen/sim.method_template.R -------------------------------------------------------------------------------- /epiforecast.cpp14funs/src/Makevars: -------------------------------------------------------------------------------- 1 | CXX_STD = CXX14 2 | 3 | # Local Variables: 4 | # mode: Makefile 5 | # End: 6 | -------------------------------------------------------------------------------- /epiforecast.cpp14funs/src/epiforecast.cpp14funs_types.hpp: -------------------------------------------------------------------------------- 1 | // author_header begin 2 | // Copyright (C) 2017 Logan C. Brooks 3 | // 4 | // This file is part of epiforecast. Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University. 5 | // 6 | // Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174. 7 | // author_header end 8 | // license_header begin 9 | // epiforecast is free software: you can redistribute it and/or modify 10 | // it under the terms of the GNU General Public License as published by 11 | // the Free Software Foundation, version 2 of the License. 12 | // 13 | // epiforecast is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | // 18 | // You should have received a copy of the GNU General Public License 19 | // along with epiforecast. If not, see . 20 | // license_header end 21 | 22 | // Note: the naming of this file is important in determining whether it is 23 | // properly included by the Rcpp::export attribute. 24 | 25 | #ifndef EPIFORECAST_TYPES_H 26 | #define EPIFORECAST_TYPES_H 27 | 28 | #include 29 | 30 | // xxx global typedefs; perhaps better as template arguments 31 | using Time = double; 32 | using Observation = double; 33 | using Mean = double; 34 | using SD = double; 35 | using LogLikelihood = double; 36 | using Rp = double; 37 | 38 | #endif 39 | -------------------------------------------------------------------------------- /epiforecast.cpp14funs/tests/testthat/fluview_hhs1_fetch.Rdata.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-delphi/epiforecast-R/726580337e75286532a03ec3ec5b104e9c3fd425/epiforecast.cpp14funs/tests/testthat/fluview_hhs1_fetch.Rdata.rds -------------------------------------------------------------------------------- /epiforecast.cpp14funs/tests/testthat/test-eb.sim.R: -------------------------------------------------------------------------------- 1 | ## author_header begin 2 | ## Copyright (C) 2016 Sangwon Hyun 3 | ## 4 | ## This file is part of epiforecast. Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University. 5 | ## 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174. 7 | ## author_header end 8 | ## license_header begin 9 | ## epiforecast is free software: you can redistribute it and/or modify 10 | ## it under the terms of the GNU General Public License as published by 11 | ## the Free Software Foundation, version 2 of the License. 12 | ## 13 | ## epiforecast is distributed in the hope that it will be useful, 14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ## GNU General Public License for more details. 17 | ## 18 | ## You should have received a copy of the GNU General Public License 19 | ## along with epiforecast. If not, see . 20 | ## license_header end 21 | 22 | 23 | context("Testing the eb.sim() function..") 24 | 25 | area.name = "hhs1" 26 | 27 | ## Make a sim object from fluview. 28 | full.dat = epiforecast::fetchEpidataFullDat("fluview", area.name, "wili", 29 | min.points.in.season=52L, 30 | first.week.of.season = 21L, 31 | cache.file.prefix=sprintf("fluview_%s_fetch.Rdata", area.name)) 32 | mysim = eb.sim(full.dat, max.n.sims=100) 33 | 34 | ## Tests: 35 | 36 | test_that("Returns object of class 'sim'.", { 37 | expect_equal(class(mysim),"sim") 38 | }) 39 | -------------------------------------------------------------------------------- /epiforecast.cpp14funs_0.0.1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-delphi/epiforecast-R/726580337e75286532a03ec3ec5b104e9c3fd425/epiforecast.cpp14funs_0.0.1.pdf -------------------------------------------------------------------------------- /epiforecast/.gitignore: -------------------------------------------------------------------------------- 1 | inst/doc 2 | -------------------------------------------------------------------------------- /epiforecast/DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: epiforecast 2 | Type: Package 3 | Title: Tools for forecasting semi-regular seasonal epidemic curves and similar 4 | time series 5 | Version: 0.0.1 6 | Date: 2015-12-09 7 | Author: Logan C. Brooks, Sangwon Hyun, David C. Farrow, Aaron Rumack, Ryan J. 8 | Tibshirani, Roni Rosenfeld 9 | Maintainer: Logan C. Brooks 10 | Imports: 11 | stats, 12 | utils, 13 | lubridate, 14 | splines, 15 | glmgen, 16 | glmnet, 17 | httr, 18 | Rcpp, 19 | Matrix, 20 | rlist, 21 | Hmisc, 22 | weights, 23 | hexbin, 24 | plotrix, 25 | tibble, 26 | dplyr, 27 | tidyr, 28 | pipeR, 29 | matrixStats, 30 | quantreg, 31 | lpSolve, 32 | parallel, 33 | R.utils, 34 | RCurl, 35 | pbmcapply 36 | Description: Tools for forecasting semi-regular seasonal epidemic curves and 37 | similar time series. Includes an empirical Bayes approach that forms a prior 38 | by transforming historical curves, a basis regression approach that balances 39 | matching observations from the current season and matching historical 40 | seasons' measurements for future weeks, and timestep-by-timestep weighted 41 | kernel density estimation on backward differences parameterized by both the 42 | time series measurements and the current time. 43 | License: GPL-2 44 | RoxygenNote: 7.3.2 45 | Collate: 46 | 'RcppExports.R' 47 | 'backcasters.R' 48 | 'backcasters_new.R' 49 | 'match.R' 50 | 'utils.R' 51 | 'br.R' 52 | 'cv.R' 53 | 'cv_apply.R' 54 | 'delphi_epidata.R' 55 | 'empirical.futures.R' 56 | 'empirical.trajectories.R' 57 | 'simclass.R' 58 | 'ensemble.R' 59 | 'namesp.R' 60 | 'map_join.R' 61 | 'epiproject.R' 62 | 'forecast_type.R' 63 | 'get_completed_fluview_state_df.R' 64 | 'holidays.R' 65 | 'weeks.R' 66 | 'loaders.R' 67 | 'predx_v2_spreadsheets.R' 68 | 'read.R' 69 | 'simplify2arrayp.R' 70 | 'target_spec.R' 71 | 'todo-by-file.R' 72 | 'twkde.R' 73 | LinkingTo: Rcpp 74 | SystemRequirements: 75 | Suggests: 76 | testthat 77 | Encoding: UTF-8 78 | 79 | -------------------------------------------------------------------------------- /epiforecast/NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method("[",array_proxy) 4 | S3method("[<-",array_proxy) 5 | S3method("[[",array_proxy) 6 | S3method("[[<-",array_proxy) 7 | S3method("dim<-",array_proxy) 8 | S3method("dimnames<-",array_proxy) 9 | S3method(aperm,array_proxy) 10 | S3method(as.list,array_proxy) 11 | S3method(as.vector,array_proxy) 12 | S3method(c,sim) 13 | S3method(c,target_forecast) 14 | S3method(c,uniform_forecast) 15 | S3method(dim,array_proxy) 16 | S3method(dimnames,array_proxy) 17 | S3method(length,array_proxy) 18 | S3method(names,array_proxy) 19 | S3method(plot,sim) 20 | S3method(plot,target_forecast) 21 | S3method(plot,target_multicast) 22 | S3method(print,array_proxy) 23 | S3method(print,sim) 24 | S3method(print,target_forecast) 25 | S3method(print,uniform_forecast) 26 | S3method(simplified_simlike,default) 27 | S3method(simplified_simlike,sim) 28 | S3method(target_forecast,sim) 29 | S3method(target_forecast,uniform_forecast) 30 | export("[.array_proxy") 31 | export("[<-.array_proxy") 32 | export("[[.array_proxy") 33 | export("[[<-.array_proxy") 34 | export("dim<-.array_proxy") 35 | export("dimnames<-.array_proxy") 36 | export(DateToYearWeekWdayDF) 37 | export(Date_to_epiweek) 38 | export(DatesOfSeason) 39 | export(Seq) 40 | export(add_epiweek_integer) 41 | export(aperm.array_proxy) 42 | export(array_proxy) 43 | export(as.list.array_proxy) 44 | export(as.vector.array_proxy) 45 | export(augmentWeeklyDF) 46 | export(br.sim) 47 | export(br.smoothedCurve) 48 | export(c.sim) 49 | export(c.target_forecast) 50 | export(c.uniform_forecast) 51 | export(covid19ilinet.202003.202008.target.specs) 52 | export(covid19ilinet.forecast.types) 53 | export(covid19ilinet_target_trajectory_preprocessor) 54 | export(cv_apply) 55 | export(dat.to.matrix) 56 | export(degenerate_em_weights) 57 | export(dim.array_proxy) 58 | export(dimnames.array_proxy) 59 | export(dimnames_or_inds) 60 | export(dimnamesnamesp) 61 | export(dimnamesp) 62 | export(dimnamesp_to_dimindices) 63 | export(dimp) 64 | export(dur) 65 | export(empirical.futures.sim) 66 | export(empirical.trajectories.sim) 67 | export(epi_end) 68 | export(epi_week_to_model_week) 69 | export(epiweek_Seq) 70 | export(epiweek_to_Date) 71 | export(epiweek_to_sunday) 72 | export(fetchEpidataDF) 73 | export(fetchEpidataFullDat) 74 | export(fetchEpidataHistoryDF) 75 | export(fetchUpdatingResource) 76 | export(flusight2016.evaluation.forecast.types) 77 | export(flusight2016.proxy.forecast.types) 78 | export(flusight2016.target.specs) 79 | export(flusight2016_target_trajectory_preprocessor) 80 | export(flusight2016ilinet_target_trajectory_preprocessor) 81 | export(flusight2017flusurv.target.specs) 82 | export(flusight2017flusurv_target_trajectory_preprocessor) 83 | export(flusight2018.evaluation.forecast.types) 84 | export(flusight2018.proxy.forecast.types) 85 | export(flusight2018flusurv.first.submission.epi.week) 86 | export(flusight2018flusurv.last.submission.epi.week) 87 | export(flusight2018ilinet.first.submission.epi.week) 88 | export(flusight2018ilinet.last.submission.epi.week) 89 | export(flusight2018ilinet_target_trajectory_preprocessor) 90 | export(flusight2018natreg.target.specs) 91 | export(flusight2018state.target.specs) 92 | export(get.latest.time) 93 | export(get_br_control_list) 94 | export(is_christmas) 95 | export(is_newyear) 96 | export(is_thanksgiving) 97 | export(lasso_lad_coef) 98 | export(lastWeekNumber) 99 | export(length.array_proxy) 100 | export(map_join) 101 | export(map_join_) 102 | export(match.arg.else.default) 103 | export(match.dat) 104 | export(match.integer) 105 | export(match.new.dat.sim) 106 | export(match.nonnegative.numeric) 107 | export(match.single.na.or.numeric) 108 | export(match.single.nonna.integer) 109 | export(match.single.nonna.integer.or.null) 110 | export(match.single.nonna.numeric) 111 | export(mimicPastDF) 112 | export(mimicPastEpidataDF) 113 | export(model_week_to_epi_week) 114 | export(model_week_to_time) 115 | export(named_array_to_name_arrayvecs) 116 | export(named_arrayvec_to_name_arrayvec) 117 | export(names.array_proxy) 118 | export(namesp) 119 | export(ndimp) 120 | export(no_join) 121 | export(ons) 122 | export(pht) 123 | export(plot.sim) 124 | export(plot.target_forecast) 125 | export(plot.target_multicast) 126 | export(print.array_proxy) 127 | export(print.sim) 128 | export(print.target_forecast) 129 | export(print.uniform_forecast) 130 | export(pwk) 131 | export(read.from.file) 132 | export(reformat_to_predx_v2_spreadsheet) 133 | export(seasonModelWeekDFToYearWeekDF) 134 | export(seasonModelWeekToYearWeekDF) 135 | export(seasonModelWeekWdayDFToDate) 136 | export(seasonModelWeekWdayToDate) 137 | export(seasonOfDate) 138 | export(seasonOfYearWeek) 139 | export(season_model.week_to_epiweek) 140 | export(season_to_Season) 141 | export(simplex_lad_weights) 142 | export(simplified_simlike) 143 | export(simplified_simlike.sim) 144 | export(subtract_epiweek_epiweek) 145 | export(target_forecast) 146 | export(target_forecast.sim) 147 | export(target_forecast.uniform_forecast) 148 | export(time_to_model_week) 149 | export(trimPartialPastSeasons) 150 | export(twkde.markovian.sim) 151 | export(twkde.sim) 152 | export(unite_arraylike) 153 | export(usa.flu.first.week.of.season) 154 | export(usa_flu_inseason_flags) 155 | export(vector_as_named_array) 156 | export(vector_as_named_array_) 157 | export(weekConventions) 158 | export(weighted.bw.nrd0ish) 159 | export(weighted_tabulate) 160 | export(with_dimnames) 161 | export(with_dimnamesnames) 162 | export(with_no_effective_evaluation_time_mask_window) 163 | export(yearWeekDFToSeasonModelWeekDF) 164 | export(yearWeekToSeasonModelWeekDF) 165 | export(yearWeekWdayDFToDate) 166 | export(yearWeekWdayListsToDate) 167 | export(yearWeekWdayVecsToDate) 168 | import(R.utils) 169 | import(graphics) 170 | import(hexbin) 171 | import(httr) 172 | import(parallel) 173 | import(pipeR) 174 | import(rlist) 175 | import(tibble) 176 | importFrom(Matrix,Diagonal) 177 | importFrom(Matrix,Matrix) 178 | useDynLib(epiforecast) 179 | -------------------------------------------------------------------------------- /epiforecast/R/RcppExports.R: -------------------------------------------------------------------------------- 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #' Weighted, more \code{nbins}-restrictive version of \code{base::tabulate} 5 | #' 6 | #' @param bin integer-compatible vector; entries must be non-NA and between 1 7 | #' and \code{nbins}; these indices denote entries in the result vector to which 8 | #' the corresponding weights in \code{w} should be added 9 | #' @param nbins single non-NA, non-negative integer; length of the vector to 10 | #' return 11 | #' @param w numeric-compatible vector of the same length as \code{bin}; weights 12 | #' corresponding to the indices in \code{bin} 13 | #' @return numeric vector of length \code{nbins}; the \code{i}th entry is like 14 | #' \code{sum(w[bin==i])}, but with a naive summation algorithm 15 | #' 16 | #' @useDynLib epiforecast 17 | #' @export 18 | weighted_tabulate <- function(bin, nbins, w) { 19 | .Call('_epiforecast_WeightedTabulateRcpp', PACKAGE = 'epiforecast', bin, nbins, w) 20 | } 21 | 22 | -------------------------------------------------------------------------------- /epiforecast/R/cv.R: -------------------------------------------------------------------------------- 1 | ## author_header begin 2 | ## Copyright (C) 2016 Sangwon Hyun 3 | ## 4 | ## This file is part of epiforecast. Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University. 5 | ## 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174. 7 | ## author_header end 8 | ## license_header begin 9 | ## epiforecast is free software: you can redistribute it and/or modify 10 | ## it under the terms of the GNU General Public License as published by 11 | ## the Free Software Foundation, version 2 of the License. 12 | ## 13 | ## epiforecast is distributed in the hope that it will be useful, 14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ## GNU General Public License for more details. 17 | ## 18 | ## You should have received a copy of the GNU General Public License 19 | ## along with epiforecast. If not, see . 20 | ## license_header end 21 | 22 | ##' Compare two cv.sim objects 23 | cv.compare = function(cv.sim1, cv.sim2){ 24 | print("A helpful message like this: setting 1 (wiggle, holiday effect, etc.) is better than setting 2( wiggle, etc)") 25 | } 26 | 27 | 28 | 29 | ##' Class for cv objects; contains 30 | ##' (1) control list 31 | ##' (2) for each forecasting time and for each left-out season (fold), what are the densities in the 52 by n.grid block of the 2d plane?\ 32 | ##' (3) several things pre-calculated; the prediction scores (negative log-likelihood) for 4 target forecasts. 33 | ##' The idea is that, instead of 52 by nsim curves, we can store 52 by 34 | ##' n.grid values that store the density estimates, where n.grid can be 35 | ##' hundreds, while n.sim may be 10,000's. 36 | ##' It should return /all/ quantities required for doing 37 | cv.sim = function(){ 38 | print("Not written yet") 39 | return() 40 | } 41 | 42 | ##' Holiday effect: 43 | -------------------------------------------------------------------------------- /epiforecast/R/cv_apply.R: -------------------------------------------------------------------------------- 1 | ##' @import parallel 2 | ##' @import R.utils 3 | NULL 4 | 5 | cv_apply_helper = function(train_data, test_data, indexer_list, fn, parallel_dim_i=0L, ...) { 6 | current_dim_i = length(indexer_list) 7 | stopifnot(dim(train_data)[current_dim_i] == dim(test_data)[current_dim_i]) 8 | if (current_dim_i == 0L) { 9 | result = fn(train_data, test_data, ...) 10 | return (result) 11 | } else { 12 | current_dim_size = dim(train_data)[current_dim_i] 13 | current_dim_seq = seq_len(current_dim_size) 14 | current_dim_inpnames = dimnames(train_data)[[current_dim_i]] 15 | indexer_name = names(indexer_list)[current_dim_i] 16 | indexer_val = indexer_list[[current_dim_i]] 17 | switch(indexer_name, 18 | each={ 19 | train_inds = current_dim_seq 20 | test_inds = train_inds 21 | current_dim_outnames = current_dim_inpnames 22 | }, 23 | all={ 24 | train_inds = list(TRUE) 25 | test_inds = train_inds 26 | current_dim_outnames = "all" 27 | }, 28 | smear={ 29 | train_inds = lapply(current_dim_seq, function(center_ind) { 30 | window_inds = center_ind + indexer_val 31 | window_inds <- window_inds[1L <= window_inds & window_inds <= current_dim_size] 32 | if (length(window_inds)==0L) { 33 | stop ("smear argument led to selection of no data") 34 | } 35 | window_inds 36 | }) 37 | test_inds = current_dim_seq 38 | current_dim_outnames = current_dim_inpnames 39 | }, 40 | ablation={ 41 | train_inds = lapply(current_dim_seq, function(left_out_ind) { 42 | current_dim_seq[-left_out_ind] 43 | }) 44 | test_inds = train_inds 45 | current_dim_outnames = current_dim_inpnames 46 | }, 47 | subsets={ 48 | train_inds = indexer_val 49 | test_inds = train_inds 50 | current_dim_outnames = names(indexer_val) 51 | }, 52 | loo={ 53 | train_inds = lapply(current_dim_seq, function(train_out_ind) { 54 | current_dim_seq[-train_out_ind] 55 | }) 56 | test_inds = current_dim_seq 57 | current_dim_outnames = current_dim_inpnames 58 | }, 59 | oneahead={ 60 | test_start_ind = match.single.nonna.integer(indexer_val) 61 | if (test_start_ind <= 1L || current_dim_size < test_start_ind) { 62 | stop ("oneahead argument outside the index range for the corresponding dimension, or equal to one, resulting in no training data for the first fold") 63 | } 64 | test_inds = test_start_ind-1L + seq_len(current_dim_size-test_start_ind+1L) 65 | train_inds = lapply(test_inds-1L, seq_len) 66 | current_dim_outnames = current_dim_inpnames[test_inds] 67 | }, 68 | loo_oneahead={ 69 | oneahead_start_ind = match.single.nonna.integer(indexer_val) 70 | if (oneahead_start_ind <= 2L || current_dim_size < oneahead_start_ind) { 71 | stop ("loo_oneahead argument outside the index range for the corresponding dimension, or equal to one or two, resulting in no training data for the first fold") 72 | } 73 | loo_test_inds = seq_len(oneahead_start_ind-1L) 74 | loo_train_inds = lapply(loo_test_inds, function(train_out_ind) { 75 | loo_test_inds[-train_out_ind] 76 | }) 77 | oneahead_test_inds = oneahead_start_ind-1L + seq_len(current_dim_size-oneahead_start_ind+1L) 78 | oneahead_train_inds = lapply(oneahead_test_inds-1L, seq_len) 79 | train_inds = c(loo_train_inds, oneahead_train_inds) 80 | test_inds = c(loo_test_inds, oneahead_test_inds) 81 | current_dim_outnames = current_dim_inpnames[test_inds] 82 | }, 83 | { 84 | stop("Unrecognized indexer name.") 85 | } 86 | ) 87 | stopifnot(length(train_inds) == length(test_inds)) 88 | current_dim_lapply = if (current_dim_i == parallel_dim_i) { 89 | print("parallel::mclapply") 90 | parallel::mclapply 91 | } else { 92 | lapply 93 | } 94 | subresult.list = 95 | setNames(current_dim_lapply(seq_along(train_inds), function(indset_i) { 96 | train_data_inds = as.list(rep(TRUE, length(dim(train_data)))) 97 | if (length(train_inds[[indset_i]])==0L) { 98 | stop ("Some indexing operation resulted in 0 indices.") 99 | } 100 | train_data_inds[[current_dim_i]] <- train_inds[[indset_i]] 101 | inner_train_data = do.call(`[`, c(list(train_data, drop=FALSE), train_data_inds)) 102 | test_data_inds = as.list(rep(TRUE, length(dim(test_data)))) 103 | test_data_inds[[current_dim_i]] <- test_inds[[indset_i]] 104 | inner_test_data = do.call(`[`, c(list(test_data, drop=FALSE), test_data_inds)) 105 | cv_apply_helper(inner_train_data, inner_test_data, head(indexer_list,-1L), fn, parallel_dim_i=parallel_dim_i, ...) 106 | }), current_dim_outnames) 107 | result = simplify2array(subresult.list) 108 | return (result) 109 | } 110 | } 111 | 112 | ##' \code{apply}-like function applying binary functions on training and test set selections 113 | ##' 114 | ##' @param data an array 115 | ##' @param indexer_list a named list with one entry per dimension of \code{data} 116 | ##' specifying how to select training and test indices for that dimension; for 117 | ##' the \code{i}th entry: 118 | 119 | ##' * \code{each=NULL} slices both training and test data into 120 | ##' \code{dim(data)[[i]]} pieces by selecting each index along the \code{i}th 121 | ##' dimension; the corresponding output dimension width of 122 | ##' \code{dim(data)[[i]]} 123 | 124 | ##' * \code{all=NULL} performs no indexing along the \code{i}th dimension for 125 | ##' either training or test data; the corresponding output dimension has width 126 | ##' 1 and name "all" 127 | 128 | ##' * \code{smear=relative.indices} acts like \code{each=NULL}, but instead of 129 | ##' each slices corresponding to a single index, allows for nearby indices to 130 | ##' be included as well; for the \code{j}th slice, includes data for valid 131 | ##' indices in \code{j+relative.indices} 132 | 133 | ##' * \code{ablation=NULL} acts like \code{each=NULL}, but for the \code{j}th 134 | ##' "slice", excludes, rather than selects, data corresponding to index 135 | ##' \code{j} 136 | 137 | ##' * \code{subsets=subset.list} indexes both training and test data based on 138 | ##' the subsets in \code{subset.list}; each entry in \code{subset.list} should 139 | ##' be a vector of indices (logical, integer, or character) into the 140 | ##' \code{i}th dimension of \code{data}; the output dimension has width 141 | ##' \code{length(subset.list)} and names \code{names(subset.list)} 142 | 143 | ##' * \code{loo=NULL} performs leave-one-out cross-validation indexing: the 144 | ##' training set like \code{ablation=NULL}, while the test set is indexed like 145 | ##' \code{each=NULL} 146 | 147 | ##' * \code{oneahead=test_start_ind} slices the test data by taking each index 148 | ##' greater than or equal to the specified single \emph{integer} index 149 | ##' \code{test_start_ind}; the test data corresponding to index i is paired 150 | ##' with training data from indices strictly preceding i 151 | 152 | ##' * \code{loo_oneahead=oneahead_start_ind} slices the test data by each 153 | ##' index (similar to each); if the index i is less than the specified single 154 | ##' \emph{integer} index \code{oneahead_start_ind}, then it is paired with 155 | ##' training data from indices strictly preceding \code{oneahead_start_ind}, 156 | ##' excluding i; if i >= \code{oneahead_start_ind}, then the training data 157 | ##' contains all indices strictly preceding i 158 | 159 | ##' @param fn a \code{function(training.slice, test.slice)} returning a scalar, 160 | ##' vector, matrix, array, or list, with the same class and fixed structure 161 | ##' for all inputs 162 | 163 | ##' @return an array with dimensionality equal to the sum of the 164 | ##' dimensionless of the output of \code{fn} and of \code{data} 165 | 166 | ##' @md 167 | ##' @export 168 | cv_apply = function(data, indexer_list, fn, parallel_dim_i=0L, ...) { 169 | ## If =data= is 1-D, convert it to an array so it will have non-NULL dim: 170 | if (is.null(dim(data))) { 171 | data <- as.array(data) 172 | } 173 | if (length(dim(data)) != length(indexer_list)) { 174 | stop ("Need exactly one indexer_list entry per dimension of data (or exactly 1 entry for vector data).") 175 | } 176 | if (is.null(names(indexer_list))) { 177 | stop ("Indexer types must be specified using names in indexer_list; indexer_list has no names.") 178 | } 179 | ## Use recursive cv_apply_helper to compute the entries of the result: 180 | result = cv_apply_helper(data, data, indexer_list, fn, parallel_dim_i=parallel_dim_i, ...) 181 | ## --- Adjust the class and dimnames: --- 182 | ## Make sure the result is an array: 183 | result <- as.array(result) 184 | ## Make sure the dimnames are a list, not NULL: 185 | if (is.null(dimnames(result))) { 186 | dimnames(result) <- rep(list(NULL), length(dim(result))) 187 | } 188 | ## Make sure the dimnames names are a character vector, no NULL: 189 | if (is.null(names(dimnames(result)))) { 190 | names(dimnames(result)) <- rep("",length(dim(result))) 191 | } 192 | ## If the original input =data= had dimnames names, assign them to the 193 | ## corresponding dimensions in the result: 194 | if (!is.null(names(dimnames(data)))) { 195 | names(dimnames(result))[tail(seq_along(dim(result)), length(dim(data)))] <- names(dimnames(data)) 196 | } 197 | return (result) 198 | } 199 | 200 | ## todo don't subset at each dimension; prepare all indices then subset once? or once @ parallel dimension & once @ last dimension? 201 | ## todo try explicitly constructing cluster and things to export 202 | -------------------------------------------------------------------------------- /epiforecast/R/empirical.futures.R: -------------------------------------------------------------------------------- 1 | ## author_header begin 2 | ## Copyright (C) 2017 Logan C. Brooks, Sangwon Hyun 3 | ## 4 | ## This file is part of epiforecast. Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University. 5 | ## 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174. 7 | ## author_header end 8 | ## license_header begin 9 | ## epiforecast is free software: you can redistribute it and/or modify 10 | ## it under the terms of the GNU General Public License as published by 11 | ## the Free Software Foundation, version 2 of the License. 12 | ## 13 | ## epiforecast is distributed in the hope that it will be useful, 14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ## GNU General Public License for more details. 17 | ## 18 | ## You should have received a copy of the GNU General Public License 19 | ## along with epiforecast. If not, see . 20 | ## license_header end 21 | 22 | ##' Simulate future in current trajectory with empirical (historical) 23 | ##' distribution 24 | ##' 25 | ##' @template sim.method_template 26 | ##' 27 | ##' @examples 28 | ##' ## National-level ILINet weighted %ILI data for recent seasons, excluding 2009 pandemic: 29 | ##' area.name = "nat" 30 | ##' full.dat = fetchEpidataFullDat("fluview", area.name, "wili", 31 | ##' min.points.in.season = 52L, 32 | ##' first.week.of.season = 31L, 33 | ##' cache.file.prefix=sprintf("fluview_%s_fetch", area.name)) 34 | ##' full.dat <- full.dat[names(full.dat)!="S2009"] 35 | ##' ## Sample from conditional curve distribution estimate using the above data 36 | ##' ## and CDC's 2015 national %wILI onset threshold baseline of 2.1: 37 | ##' sim = empirical.futures.sim(full.dat, baseline=2.1, max.n.sims=100) 38 | ##' print(sim) 39 | ##' plot(sim, type="lineplot") 40 | ##' 41 | ##' @author Logan C. Brooks, David C. Farrow, Sangwon Hyun, Ryan J. Tibshirani, Roni Rosenfeld 42 | ##' 43 | ##' @export 44 | empirical.futures.sim = function(full.dat, baseline=NA_real_, max.n.sims=2000L) { 45 | ## extract historical data and future data from full.dat 46 | dat = head(full.dat, -1L) 47 | dat <- match.dat(dat) 48 | new.dat.sim = tail(full.dat, 1L)[[1]] 49 | new.dat.sim <- match.new.dat.sim(new.dat.sim) 50 | old.season.labels = head(names(full.dat), -1L) 51 | new.season.label = tail(names(full.dat), 1L) 52 | baseline <- match.single.na.or.numeric(baseline) # (ignored by twkde though) 53 | max.n.sims <- match.single.nonna.integer.or.null(max.n.sims) 54 | 55 | n.out = nrow(new.dat.sim[["ys"]]) 56 | ## sim object containing specified "pasts": 57 | past.sim = new.dat.sim 58 | ## sim object containing empirical "futures": 59 | future.sim = list( 60 | ys = sapply(dat, function(historical.trajectory) { 61 | approx(historical.trajectory, xout=seq_len(nrow(new.dat.sim[["ys"]])), rule=2L)[["y"]] 62 | }), 63 | weights = rep(1, length(dat)) 64 | ) 65 | ## We want to build a sim object for the random variable dplyr::coalesce(PAST, 66 | ## FUTURE), treating PAST and FUTURE as independent. If the n.sims for the 67 | ## past and future sim objects are small enough, we can calculate the 68 | ## resulting output sim exactly; otherwise, we will need to use sampling. 69 | n.past.sims = length(past.sim[["weights"]]) 70 | n.future.sims = length(future.sim[["weights"]]) 71 | if (n.past.sims * n.future.sims <= max.n.sims) { 72 | ## We can calculate the output sim exactly. (This case will typically be 73 | ## triggered when new.dat.sim represents a single possible trajectory rather 74 | ## than a distribution.) 75 | 76 | ## Pair each new.dat.sim partial trajectory with each dat trajectory: 77 | past.sim.is = rep(seq_len(n.past.sims), each=n.future.sims) 78 | future.sim.is = rep(seq_len(n.future.sims), times=n.past.sims) 79 | ## Construct wider sim objects that together correspond to a sim object over 80 | ## the cartesian product : 81 | product.past.sim = list( 82 | ys=past.sim[["ys"]][,past.sim.is], 83 | weights=past.sim[["weights"]][past.sim.is] 84 | ) 85 | product.future.sim = list( 86 | ys=future.sim[["ys"]][,future.sim.is], 87 | weights=future.sim[["weights"]][future.sim.is] 88 | ) 89 | } else { 90 | ## Construct (potentially) wider sim objects that together correspond to a 91 | ## sim object over the cartesian product : 92 | product.past.sim = upsample_sim(past.sim, max.n.sims, TRUE) 93 | product.future.sim = upsample_sim(future.sim, max.n.sims, TRUE) 94 | } 95 | 96 | ys = dplyr::coalesce(product.past.sim[["ys"]], product.future.sim[["ys"]]) 97 | weights = product.past.sim[["weights"]] * product.future.sim[["weights"]] 98 | 99 | ## Make a dummy control list, containing only model name 100 | control.list = list(model = "empirical.futures") 101 | 102 | ## Return sim object 103 | sim = list(ys=ys, 104 | weights=weights, 105 | control.list=control.list, 106 | old.dat = list(dat), 107 | ## fake a vector new.dat if necessary: 108 | new.dat = rowMeans(new.dat.sim[["ys"]]), 109 | old.season.labels = list(old.season.labels), 110 | new.season.label = list(new.season.label)) 111 | class(sim) <- "sim" 112 | return (sim) 113 | } 114 | -------------------------------------------------------------------------------- /epiforecast/R/empirical.trajectories.R: -------------------------------------------------------------------------------- 1 | ## author_header begin 2 | ## Copyright (C) 2017 Logan C. Brooks, Sangwon Hyun 3 | ## 4 | ## This file is part of epiforecast. Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University. 5 | ## 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174. 7 | ## author_header end 8 | ## license_header begin 9 | ## epiforecast is free software: you can redistribute it and/or modify 10 | ## it under the terms of the GNU General Public License as published by 11 | ## the Free Software Foundation, version 2 of the License. 12 | ## 13 | ## epiforecast is distributed in the hope that it will be useful, 14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ## GNU General Public License for more details. 17 | ## 18 | ## You should have received a copy of the GNU General Public License 19 | ## along with epiforecast. If not, see . 20 | ## license_header end 21 | 22 | ##' Simulate future in current trajectory with empirical (historical) 23 | ##' distribution 24 | ##' 25 | ##' @template sim.method_template 26 | ##' 27 | ##' @examples 28 | ##' ## National-level ILINet weighted %ILI data for recent seasons, excluding 2009 pandemic: 29 | ##' area.name = "nat" 30 | ##' full.dat = fetchEpidataFullDat("fluview", area.name, "wili", 31 | ##' min.points.in.season = 52L, 32 | ##' first.week.of.season = 31L, 33 | ##' cache.file.prefix=sprintf("fluview_%s_fetch", area.name)) 34 | ##' full.dat <- full.dat[names(full.dat)!="S2009"] 35 | ##' ## Sample from conditional curve distribution estimate using the above data 36 | ##' ## and CDC's 2015 national %wILI onset threshold baseline of 2.1: 37 | ##' sim = empirical.trajectories.sim(full.dat, baseline=2.1, max.n.sims=100) 38 | ##' print(sim) 39 | ##' plot(sim, type="lineplot") 40 | ##' 41 | ##' @author Logan C. Brooks, David C. Farrow, Sangwon Hyun, Ryan J. Tibshirani, Roni Rosenfeld 42 | ##' 43 | ##' @export 44 | empirical.trajectories.sim = function(full.dat, baseline=NA_real_, max.n.sims=2000L) { 45 | ## extract historical data and future data from full.dat 46 | dat = head(full.dat, -1L) 47 | dat <- match.dat(dat) 48 | new.dat.sim = tail(full.dat, 1L)[[1]] 49 | new.dat.sim <- match.new.dat.sim(new.dat.sim) 50 | old.season.labels = head(names(full.dat), -1L) 51 | new.season.label = tail(names(full.dat), 1L) 52 | baseline <- match.single.na.or.numeric(baseline) # (ignored by twkde though) 53 | max.n.sims <- match.single.nonna.integer.or.null(max.n.sims) 54 | 55 | n.out = nrow(new.dat.sim[["ys"]]) 56 | empirical.sim = list( 57 | ys = sapply(dat, function(historical.trajectory) { 58 | approx(historical.trajectory, xout=seq_len(nrow(new.dat.sim[["ys"]])), rule=2L)[["y"]] 59 | }), 60 | weights = rep(1, length(dat)) 61 | ) %>>% structure(class="sim") 62 | basic.sim = downsample_sim(empirical.sim, max.n.sims) 63 | 64 | ## Make a dummy control list, containing only model name 65 | control.list = list(model = "empirical.trajectories") 66 | 67 | ## Return sim object 68 | sim = c(basic.sim, list( 69 | control.list=control.list, 70 | old.dat = list(dat), 71 | ## fake a vector new.dat if necessary: 72 | new.dat = rowMeans(new.dat.sim[["ys"]]), 73 | old.season.labels = list(old.season.labels), 74 | new.season.label = list(new.season.label) 75 | )) 76 | return (sim) 77 | } 78 | -------------------------------------------------------------------------------- /epiforecast/R/ensemble.R: -------------------------------------------------------------------------------- 1 | ##' @include simclass.R 2 | NULL 3 | 4 | ##' @export 5 | degenerate_em_weights = function(distr.cond.lkhds, 6 | init.weights=rep(1/dim(distr.cond.lkhds)[[2L]],dim(distr.cond.lkhds)[[2L]]), 7 | instance.weights=rep(1, dim(distr.cond.lkhds)[[1L]]), 8 | stop.eps = sqrt(.Machine[["double.eps"]])) { 9 | if (any(init.weights < 1e-10)) { 10 | stop("All init.weight's must be >=1e-10") 11 | } 12 | if (!isTRUE(all.equal(1, sum(init.weights)))) { 13 | stop("Sum of init.weight's must be all.equal to 1.") 14 | } 15 | 16 | ## Set some constants: 17 | n.obs = dim(distr.cond.lkhds)[[1L]] 18 | n.distr = dim(distr.cond.lkhds)[[2L]] 19 | t.distr.cond.lkhds = t(distr.cond.lkhds) # dim: n.distr x n.obs 20 | 21 | ## Set initial values of variables adjusted each step: 22 | weights = init.weights # length: n.distr 23 | t.lkhds = init.weights*t.distr.cond.lkhds # dim: n.distr x n.obs 24 | marginals = colSums(t.lkhds) # length: n.obs 25 | log.lkhd = weighted.mean(log(marginals), instance.weights) # scalar 26 | ## log.lkhds = list(log.lkhd) 27 | if (log.lkhd == -Inf) { 28 | stop ("All methods assigned a probability of 0 to at least one observed event.") 29 | } else { 30 | repeat { 31 | old.log.lkhd = log.lkhd # scalar 32 | weights <- matrixStats::colWeightedMeans(t(t.lkhds)/marginals, instance.weights) # length: n.distr 33 | t.lkhds <- weights*t.distr.cond.lkhds # dim: n.distr x n.obs 34 | marginals <- colSums(t.lkhds) # length: n.obs 35 | log.lkhd <- weighted.mean(log(marginals), instance.weights) # scalar 36 | ## xxx inefficient 37 | ## log.lkhds <- c(log.lkhds,list(log.lkhd)) 38 | stopifnot (log.lkhd >= old.log.lkhd) 39 | if (log.lkhd-old.log.lkhd <= stop.eps || (log.lkhd-old.log.lkhd)/-log.lkhd <= stop.eps) { 40 | break 41 | } 42 | } 43 | } 44 | return (weights) 45 | } 46 | ## Test for agreement: 47 | ## 48 | ## ## |cvMixtureCoeffLogLkhdTestValues|: choose the best mixing 49 | ## ## coefficient between two distributions from a set of test 50 | ## ## coefficients to minimize CV-estimated log-likelihood loss given the 51 | ## ## observed indicator values. 52 | ## ## |indicators|: nxm matrix, observed indicator values 53 | ## ## |distr1|: nxm matrix, first distr's probs/E[indicator values] 54 | ## ## |distr2|: nxm matrix, second distr's probs/E[indicator values] 55 | ## ## n: number of events 56 | ## ## m: number of folds 57 | ## ## |test.coeffs|: different mixing coefficients of distr1 58 | ## ## |safety.lambda|: coefficient with which to pre-mix a uniform distr in with distr1, distr2 to avoid 0 entries 59 | ## ## result: best test coefficient of distr1 60 | ## mixture_coef_loglkhd_test_values = function(indicators, distr1, distr2, 61 | ## test.coeffs=c(0:1000/1000,10^(0:-120/10),1-10^(0:-120/10)), 62 | ## safety.lambda=1e-15) { 63 | ## n = nrow(indicators) # number of events per fold 64 | ## m = ncol(indicators) # number of folds 65 | ## distr1 <- (1-safety.lambda)*distr1 + safety.lambda/n # premix with tiny uniform 66 | ## distr2 <- (1-safety.lambda)*distr2 + safety.lambda/n # premix with tiny uniform 67 | ## elosses = sapply(test.coeffs, function(test.coeff) 68 | ## sum(-indicators*log(test.coeff*distr1+(1-test.coeff)*distr2)) 69 | ## ) # cv avg loss for each test coefficient 70 | ## return (test.coeffs[which.min(elosses)]) 71 | ## } 72 | ## { 73 | ## print(mixture_coef_loglkhd_test_values(matrix(c(1,0,1,0,1,0,1,0),2),matrix(c(0.2,0.8,0.2,0.8,0.2,0.8,0.2,0.8),2),matrix(c(1,0,1,0,1,0,1,0),2))) 74 | ## print(degenerate_em_weights(distrsToLkhds(matrix(c(1,0,1,0,1,0,1,0),2),matrix(c(0.2,0.8,0.2,0.8,0.2,0.8,0.2,0.8),2),matrix(c(1,0,1,0,1,0,1,0),2)))) 75 | ## print(degenerate_em_weights(matrix(c(0.2,0.2,0.2,0.2, 1,1,1,1),4))) 76 | ## } 77 | ## 78 | ## ## Test using instance.weights: 79 | ## degenerate_em_weights(matrix(c(0.5,0.2,0.2,0.2, 0.8,0.4,0.1,0.1),,2L)) 80 | ## degenerate_em_weights(matrix(c(0.5,0.2,0.2, 0.8,0.4,0.1),,2L),instance.weights=c(1,1,2)) 81 | ## ## Scale of instance.weights does not impact the result: 82 | ## degenerate_em_weights(matrix(c(0.5,0.2,0.2, 0.8,0.4,0.1),,2L),instance.weights=c(1,1,2)/5) 83 | 84 | ## Illustrative example/check: 85 | ## { 86 | ## m = 1 87 | ## a = -3 88 | ## b = 4 89 | ## par(ask=TRUE) 90 | ## for (i in 1:10) { 91 | ## hist(rnorm(800),col=rgb(1,0,0,0.5,1),freq=FALSE) 92 | ## hist(rnorm(800,m),col=rgb(0,1,0,0.5,1),freq=FALSE,add=TRUE) 93 | ## hist(runif(800,a,b),col=rgb(0,0,1,0.5,1),freq=FALSE,add=TRUE) 94 | ## } 95 | ## print('distr1 coeff:') 96 | ## f = function(points) empiricalBucketMasses(points, (-5:5)*0.6, tack.neg.inf=TRUE, tack.pos.inf=TRUE) 97 | ## cvMixtureCoeffLogLkhd(replicate(800,f(rnorm(1))),replicate(800,f(rnorm(5000,m))),replicate(800,f(runif(5000,a,b)))) 98 | ## } 99 | 100 | ##' @export 101 | lasso_lad_coef = function(y, X, include.intercept=TRUE) { 102 | y <- as.vector(y) 103 | coef(quantreg::rq(if(include.intercept) obs ~ . else obs ~ . + 0, 104 | method="lasso", 105 | data=cbind(obs=y, as.data.frame(X)), 106 | lambda=mean(abs(y-X)))) 107 | } 108 | 109 | ##' @importFrom Matrix Diagonal Matrix 110 | ##' @export 111 | simplex_lad_weights = function(y, X) { 112 | if (length(y) != nrow(X)) stop("length(y) != nrow(X)") 113 | n = nrow(X) 114 | p = ncol(X) 115 | 116 | ## p beta, n s+, n s- 117 | objective.in = c(rep(0,p),rep(1,n),rep(1,n)) 118 | const.mat = rbind(cbind( X, Matrix::Diagonal(n), -Matrix::Diagonal(n) ), 119 | cbind( Matrix::Matrix(0,n,p), Matrix::Diagonal(n), Matrix::Matrix(0,n,n) ), 120 | cbind( Matrix::Matrix(0,n,p), Matrix::Matrix(0,n,n), Matrix::Diagonal(n) ), 121 | cbind( Matrix::Diagonal(p), Matrix::Matrix(0,p,n), Matrix::Matrix(0,p,n) ), 122 | c( rep(1,p), rep(0,n), rep(0,n) )) 123 | const.dir = c(rep("=" , n), 124 | rep(">=", n), 125 | rep(">=", n), 126 | rep(">=", p), 127 | "=" ) 128 | const.rhs = c( y, 129 | rep(0,n), 130 | rep(0,n), 131 | rep(0,p), 132 | 1) 133 | 134 | structure( 135 | lpSolve::lp("min",objective.in,,const.dir,const.rhs,dense.const=as.matrix(summary(const.mat)))$solution[1:p], 136 | names=colnames(X) 137 | ) 138 | } 139 | 140 | uniform_forecast = function(...) { 141 | return (structure(list(), class="uniform_forecast")) 142 | } 143 | 144 | ##' @method target_forecast uniform_forecast 145 | ##' @export 146 | ##' @export target_forecast.uniform_forecast 147 | target_forecast.uniform_forecast = function(uniform.forecast, ..., target.name, target.spec) { 148 | bin.info = target.spec[["bin_info_for"]](...) 149 | breaks = bin.info[["breaks"]] 150 | non.na.bin.representatives = breaks[-1L] - 0.5*diff(breaks) 151 | target.values = c( 152 | non.na.bin.representatives, 153 | get_na_value_or_empty_for_target(target.spec, ...) 154 | ) 155 | target.weights = rep(1/length(target.values), length(target.values)) 156 | return (structure(list(target.values = stats::setNames(list(target.values), target.name), 157 | target.weights = target.weights, 158 | method.settings = list(uniform.pseudoweight.total=0,smooth.sim.targets=FALSE) 159 | ), 160 | class="target_forecast")) 161 | } 162 | 163 | ##' @method print uniform_forecast 164 | ##' @export 165 | ##' @export print.uniform_forecast 166 | print.uniform_forecast = function(x, ...) { 167 | cat("uniform_forecast: produces uniform-probability forecasts for any target", fill=TRUE) 168 | } 169 | 170 | ##' Add information to a \code{uniform_forecast} object 171 | ##' 172 | ##' Uses \code{recursive=FALSE} and \code{use.names=TRUE} when forwarding to the 173 | ##' list \code{c} method; any attempt to override these values will generate an 174 | ##' error. 175 | ##' 176 | ##' @param uniform.forecast a \code{uniform_forecast} object 177 | ##' @param ... list of components to add to the \code{uniform_forecast} object; must lead to 178 | ##' a resulting \code{uniform_forecast} object with components that are all uniquely, 179 | ##' nontrivially (\code{!=""}) named 180 | ##' @return \code{uniform_forecast} object with the given components appended 181 | ##' 182 | ##' @method c uniform_forecast 183 | ##' 184 | ##' @export 185 | ##' @export c.uniform_forecast 186 | c.uniform_forecast = c_for_named_lists 187 | 188 | -------------------------------------------------------------------------------- /epiforecast/R/get_completed_fluview_state_df.R: -------------------------------------------------------------------------------- 1 | ## author_header begin 2 | ## Copyright (C) 2017 Aaron Rumack 3 | ## 4 | ## This file is part of epiforecast. Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University. 5 | ## 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174. 7 | ## author_header end 8 | ## license_header begin 9 | ## epiforecast is free software: you can redistribute it and/or modify 10 | ## it under the terms of the GNU General Public License as published by 11 | ## the Free Software Foundation, version 2 of the License. 12 | ## 13 | ## epiforecast is distributed in the hope that it will be useful, 14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ## GNU General Public License for more details. 17 | ## 18 | ## You should have received a copy of the GNU General Public License 19 | ## along with epiforecast. If not, see . 20 | ## license_header end 21 | 22 | get_completed_fluview_state_df = function(epigroup, first.week.of.season=31L) { 23 | st = stringi::stri_trans_tolower(epigroup) 24 | st.dat = NULL 25 | if (st != "fl" && st != "la") { 26 | st.dat = fetchEpidataDF("fluview", st, first.week.of.season = first.week.of.season, 27 | cache.file.prefix=sprintf("fluview_%s_fetch", st)) 28 | if (st == "pr") { 29 | ## Estimate seasons 2010-2013 using other epigroups in Region 2 30 | njili = get_completed_fluview_state_df("nj",first.week.of.season = first.week.of.season)$ili 31 | nyili = get_completed_fluview_state_df("ny",first.week.of.season = first.week.of.season)$ili 32 | jfkili = get_completed_fluview_state_df("jfk",first.week.of.season = first.week.of.season)$ili 33 | pr.len = length(st.dat$ili) 34 | pr.fit = lm(st.dat$ili ~ tail(njili,pr.len) + tail(nyili,pr.len) + tail(jfkili,pr.len)) 35 | pr.est = pr.fit$coefficients[1] + pr.fit$coefficients[2]*njili + pr.fit$coefficients[3]*nyili + pr.fit$coefficients[4]*jfkili 36 | pr.dat = get_completed_fluview_state_df("nj") # Placeholder for values such as epiweek, issue, etc. 37 | vars = colnames(pr.dat)[startsWith(colnames(pr.dat),"num_")] # Flush numeric variables in data frame 38 | firstepiweek = min(st.dat$epiweek[!is.na(st.dat$issue)]) 39 | mask = pr.dat$epiweek >= firstepiweek # Have data from here and later 40 | pr.dat[mask,vars] = st.dat[st.dat$epiweek >= firstepiweek,vars] 41 | pr.dat$ili[!mask] = pr.est[!mask] 42 | pr.dat$ili[mask] = st.dat$ili[st.dat$epiweek >= firstepiweek] 43 | pr.dat$wili = pr.dat$ili 44 | pr.dat$region = st 45 | st.dat = pr.dat 46 | } else if (st == "vi") { 47 | ## Estimate seasons 2010-2015 using other epigroups in Region 2 48 | njili = get_completed_fluview_state_df("nj",first.week.of.season = first.week.of.season)$ili 49 | nyili = get_completed_fluview_state_df("ny",first.week.of.season = first.week.of.season)$ili 50 | jfkili = get_completed_fluview_state_df("jfk",first.week.of.season = first.week.of.season)$ili 51 | prili = get_completed_fluview_state_df("pr",first.week.of.season = first.week.of.season)$ili 52 | vi.len = length(st.dat$ili) 53 | vi.fit = lm(st.dat$ili ~ tail(njili,vi.len) + tail(nyili,vi.len) + tail(jfkili,vi.len) + tail(prili,vi.len)) 54 | vi.est = vi.fit$coefficients[1] + vi.fit$coefficients[2]*njili + vi.fit$coefficients[3]*nyili + vi.fit$coefficients[4]*jfkili + vi.fit$coefficients[5]*prili 55 | vi.dat = get_completed_fluview_state_df("nj") # Placeholder for values such as epiweek, issue, etc. 56 | vars = colnames(vi.dat)[startsWith(colnames(vi.dat),"num_")] # Flush numeric variables in data frame 57 | firstepiweek = min(st.dat$epiweek[!is.na(st.dat$issue)]) 58 | mask = vi.dat$epiweek >= firstepiweek # Have data from here and later 59 | vi.dat[mask,vars] = st.dat[st.dat$epiweek >= firstepiweek,vars] 60 | vi.dat$ili[!mask] = vi.est[!mask] 61 | vi.dat$ili[mask] = st.dat$ili[st.dat$epiweek >= firstepiweek] 62 | vi.dat$ili[vi.dat$ili < 0] = 0 63 | vi.dat$wili = vi.dat$ili 64 | vi.dat$region = st 65 | st.dat = vi.dat 66 | } 67 | } else if (st == "la" || st == "fl") { 68 | if (st == "la") { 69 | r = c("ok","ar","tx","nm","la") 70 | rdat = get_completed_fluview_state_df("hhs6",first.week.of.season = first.week.of.season) 71 | } else { 72 | r = c("ga","al","ms","tn","ky","nc","sc","fl") 73 | rdat = get_completed_fluview_state_df("hhs4",first.week.of.season = first.week.of.season) 74 | } 75 | st1 = get_completed_fluview_state_df(r[1],first.week.of.season = first.week.of.season) 76 | st2 = get_completed_fluview_state_df(r[2],first.week.of.season = first.week.of.season) 77 | x = merge(st1,st2,by="epiweek",suffixes=c("","")) 78 | for (s in r[3:length(r)-1]) { 79 | st3 = get_completed_fluview_state_df(s,first.week.of.season = first.week.of.season) 80 | x = merge(x,st3,by="epiweek",suffixes=c("","")) 81 | } 82 | vars = colnames(st1)[startsWith(colnames(st1),"num_")] # List of numeric variables in data frame 83 | firstepiweek = min(st1$epiweek) 84 | st.dat = cbind(rdat[(rdat["epiweek"] >= firstepiweek),]) # Default values are that of the region 85 | st.dat$region = st 86 | for (v in vars) { 87 | st.dat[[v]] = get(v,st.dat) - rowSums(x[colnames(x)==v],na.rm=FALSE) # Subtract the sum of the other states in the region 88 | } 89 | st.dat$wili = st.dat$num_ili * 100 / st.dat$num_patients # Reset %ILI and wILI 90 | st.dat$ili = st.dat$wili 91 | } 92 | return (st.dat) 93 | } 94 | -------------------------------------------------------------------------------- /epiforecast/R/holidays.R: -------------------------------------------------------------------------------- 1 | ## author_header begin 2 | ## Copyright (C) 2016 Logan C. Brooks 3 | ## 4 | ## This file is part of epiforecast. Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University. 5 | ## 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174. 7 | ## author_header end 8 | ## license_header begin 9 | ## epiforecast is free software: you can redistribute it and/or modify 10 | ## it under the terms of the GNU General Public License as published by 11 | ## the Free Software Foundation, version 2 of the License. 12 | ## 13 | ## epiforecast is distributed in the hope that it will be useful, 14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ## GNU General Public License for more details. 17 | ## 18 | ## You should have received a copy of the GNU General Public License 19 | ## along with epiforecast. If not, see . 20 | ## license_header end 21 | 22 | ##' Test if Date is Christmas Day (vectorized) 23 | ##' 24 | ##' @param Date \code{Date} vector to test 25 | ##' @return logical vector 26 | ##' 27 | ##' @export 28 | is_christmas = function(Date) { 29 | posixlt = as.POSIXlt(Date) 30 | return (posixlt$mon + 1L == 12L & posixlt$mday == 25L) 31 | } 32 | 33 | ##' Test if Date is (Gregorian) New Year's Day (vectorized) 34 | ##' 35 | ##' @param Date \code{Date} vector to test 36 | ##' @return logical vector 37 | ##' 38 | ##' @export 39 | is_newyear = function(Date) { 40 | posixlt = as.POSIXlt(Date) 41 | return (posixlt$mon + 1L == 1L & posixlt$mday == 1L) 42 | } 43 | 44 | ##' Test if Date is Thanksgiving Day (vectorized) 45 | ##' 46 | ##' @param Date \code{Date} vector to test 47 | ##' @return logical vector 48 | ##' 49 | ##' @export 50 | is_thanksgiving = function(Date) { 51 | posixlt = as.POSIXlt(Date) 52 | return (posixlt$mon + 1L == 11L & (posixlt$mday - 1L) %/% 7L + 1L == 4L & posixlt$wday == 4L) 53 | } 54 | 55 | ## todo: Chinese New Year; cannot use seasonal::cny since it is GPL3.0; 56 | ## investigate seasonal-cited source, 57 | ## http://www.chinesenewyears.info/chinese-new-year-calendar.php (license does 58 | ## not mention reuse) or ConvCalendar package (projectpluto.com calendars) 59 | 60 | ## alternative: timeDate::holiday / chron::is.holiday 61 | -------------------------------------------------------------------------------- /epiforecast/R/predx_v2_spreadsheets.R: -------------------------------------------------------------------------------- 1 | 2 | ##' Reformat a predx v1 spreadsheet to a predx v2 spreadsheet 3 | ##' 4 | ##' @param old.spreadsheet predx v1 spreadsheet 5 | ##' @param old.spreadsheet predx v2 spreadsheet template 6 | ##' @return predx v2 spreadsheet 7 | ##' 8 | ##' @export 9 | reformat_to_predx_v2_spreadsheet = function(old.spreadsheet, new.template) { 10 | new.spreadsheet.minus.ordering = old.spreadsheet %>>% 11 | dplyr::group_by(Location, Target, Type, Unit) %>>% 12 | dplyr::do( 13 | tibble::tibble( 14 | location = .[["Location"]][[1L]], 15 | target = .[["Target"]][[1L]], 16 | type = tolower(.[["Type"]][[1L]]), 17 | bin = 18 | if (.[["Type"]][[1L]] == "point") { 19 | NA_character_ 20 | } else if (.[["Type"]][[1L]] == "bin") { 21 | sprintf("%.1f", covid19ilinet.percentage.bin.info[["breaks"]] %>>% {.[-length(.)]}) 22 | } else stop(sprintf('Unexpected Type encountered: %s.', .[["Type"]][[1L]])), 23 | value = .[["Value"]] 24 | ) 25 | ) %>>% 26 | dplyr::ungroup() %>>% 27 | dplyr::select(-Location, -Target, -Type, -Unit) %>>% 28 | {.} 29 | bad.rows = dplyr::anti_join(new.spreadsheet.minus.ordering, new.template, by=c("location","target", "type", "bin")) 30 | if (nrow(bad.rows)!=0L) { 31 | stop (paste(collapse="\n", capture.output({ 32 | cat('Formed rows with index column entries that are not present in the template:', fill=getOption('width')-nchar('Error: ')) 33 | print(bad.rows) 34 | }))) 35 | ## fixme todo check for duplicates 36 | } 37 | overlarge.groups = new.spreadsheet.minus.ordering %>>% 38 | dplyr::group_by(location, target, type, bin) %>>% 39 | dplyr::summarize(count=dplyr::n()) %>>% 40 | dplyr::ungroup() %>>% 41 | dplyr::filter(count != 1L) %>>% 42 | {.} 43 | if (nrow(overlarge.groups) != 0L) { 44 | stop (paste(collapse="\n", capture.output({ 45 | cat('Formed overlarge groups:', fill=getOption('width')-nchar('Error: ')) 46 | print(overlarge.groups) 47 | }))) 48 | } 49 | ## It's okay to omit rows the other way around when submitting forecasts for only a subset of targets. 50 | new.spreadsheet = new.spreadsheet.minus.ordering %>>% 51 | dplyr::mutate( 52 | location=ordered(location, unique(new.template[["location"]])), 53 | target=ordered(target, unique(new.template[["target"]])), 54 | type=ordered(type, unique(new.template[["type"]])) 55 | ) %>>% 56 | dplyr::arrange(location, target, type) %>>% 57 | {.} 58 | return (new.spreadsheet) 59 | } 60 | -------------------------------------------------------------------------------- /epiforecast/R/read.R: -------------------------------------------------------------------------------- 1 | ## author_header begin 2 | ## Copyright (C) 2016 Sangwon Hyun 3 | ## 4 | ## This file is part of epiforecast. Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University. 5 | ## 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174. 7 | ## author_header end 8 | ## license_header begin 9 | ## epiforecast is free software: you can redistribute it and/or modify 10 | ## it under the terms of the GNU General Public License as published by 11 | ## the Free Software Foundation, version 2 of the License. 12 | ## 13 | ## epiforecast is distributed in the hope that it will be useful, 14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ## GNU General Public License for more details. 17 | ## 18 | ## You should have received a copy of the GNU General Public License 19 | ## along with epiforecast. If not, see . 20 | ## license_header end 21 | 22 | ##' Get index (time) of last non-\code{NA} in a vector or other new.dat.sim 23 | ##' @export 24 | get.latest.time = function(new.dat.sim){ 25 | new.dat.sim <- match.new.dat.sim(new.dat.sim) 26 | ys = new.dat.sim[["ys"]] 27 | 28 | is.na.ys = is.na(ys) 29 | 30 | ## if (!any(is.na.ys)) { 31 | ## stop("There are no missing values in new.dat.sim, i.e., the new season has been fully observed!") 32 | ## } 33 | 34 | ## check that NA occurrences match between all columns 35 | if (ncol(is.na.ys) > 1L && # preoptimization...: no need to check if only 1 trajectory 36 | any(xor(is.na.ys[,1], is.na.ys))) { 37 | stop ("All trajectories in new.dat.sim must have NA at exactly the same indices.") 38 | } 39 | 40 | ## Obtain index of non-NA 41 | time.of.forecast = max(0L, which(!is.na.ys[,1])) 42 | 43 | if (!all(time.of.forecast < which(is.na.ys[,1]))) { 44 | stop("new.dat.sim should be formatted so that non-NA's are followed by all NA's") 45 | } 46 | 47 | return(time.of.forecast) 48 | } 49 | 50 | 51 | ##' Function that reads from a csv file 52 | ##' @param filename name of data file with each column equal to each season, 53 | ##' with the first (n-1) columns to be, and the n'th column with the current 54 | ##' season. 55 | ##' @export 56 | read.from.file = function(filename){ 57 | 58 | ## Sanity check of data file formatting 59 | check.file.contents(filename) 60 | 61 | ## Reformat table into list 62 | full.dat = read.csv(filename) 63 | return (table.to.list(full.dat)) 64 | } 65 | 66 | 67 | 68 | ##' Function to check if a data file is properly formatted (i.e. contains the 69 | ##' headers, is filled with numeric values, etc.) Current implementation is very 70 | ##' memory-inefficient. 71 | ##' 72 | ##' @param filename name of data filewith each column equal to each season, with 73 | ##' the first (n-1) columns to be, and the n'th column with the current 74 | ##' season. 75 | check.file.contents = function(filename) { 76 | my.full.dat = read.csv(filename) 77 | check.table.format(my.full.dat) 78 | } 79 | 80 | 81 | 82 | 83 | ##' Performs various checks on full.dat as a table (matrix/data.frame). 84 | check.table.format = function(full.dat){ 85 | 86 | stopifnot(inherits(full.dat, "matrix") || inherits(full.dat, "data.frame")) 87 | 88 | ## Check if any column names are missing or NA 89 | default.colnames = Map(paste0, rep("V",ncol(full.dat)), 1:ncol(full.dat)) 90 | if(any(colnames(full.dat) == default.colnames)) stop("Must supply all column names!") 91 | if(any(is.na(colnames(full.dat)))) stop("Column names contains NA values!") 92 | if(any(colnames(full.dat)=="NA")) stop("Column names contains NA values!") 93 | 94 | ## Check if it contains all numeric values. 95 | all.is.numeric = all(apply(full.dat,2, function(mycol){all(is.numeric(mycol))})) 96 | if(!all.is.numeric){ 97 | stop("Table must contain all numeric values!") 98 | } 99 | 100 | ## Helper function 101 | check.column.to.see.if.partially.observed = function(mycolumn){ 102 | ## Continue scan until you find the first three consecutive missing values 103 | scan.boolean.vector.for.three.consecutive.trues = function(ind, boolean.vector.to.scan){ 104 | if(ind+3 > length(boolean.vector.to.scan)){stop("ind is bigger than n-3")} 105 | return(all(boolean.vector.to.scan[ind:(ind+3)]))} 106 | ## Make function to deal with |is.na(mycolumn)| 107 | myscan = function(ind){scan.boolean.vector.for.three.consecutive.trues(ind, is.na(mycolumn))} 108 | max.ind.three.NA.starts = max(which(!sapply(1:(length(mycolumn)-3), myscan))) 109 | return(max.ind.three.NA.starts) 110 | } 111 | 112 | ## Check if last column is partially observed 113 | last.column = full.dat[,ncol(full.dat)] 114 | max.ind.three.NA.starts = check.column.to.see.if.partially.observed(last.column) 115 | if(max.ind.three.NA.starts >= length(last.column) - 3){ 116 | stop("I roughly scanned the last column of your table, but it doesn't seem to be partially observed!") 117 | } 118 | } 119 | 120 | 121 | 122 | ##' Performs various checks on full.dat as a list of numeric vectors. 123 | check.list.format = function(full.dat){ 124 | ## Check if list is formatted correctly. 125 | if(!inherits(full.dat, "list")) stop("Type of input is not list!") 126 | 127 | ## Check if all numeric values 128 | all.is.numeric = all(sapply(full.dat, function(mycol){all(is.numeric(mycol))})) 129 | if(!all.is.numeric){ 130 | stop("All vectors must only contain numeric values!") 131 | } 132 | 133 | ## Check if any column names are missing or NA 134 | default.colnames = Map(paste0, rep("V",length(full.dat)), 1:length(full.dat)) 135 | if(any(is.na(names(full.dat)))) stop("Column names contains NA values!") 136 | if(any(names(full.dat) == default.colnames)) stop("Must supply all column names!") 137 | if(any(nchar(names(full.dat))==0)) stop("Some column names are missing!") 138 | 139 | ## ## Check partially observed last season 140 | ## if(!any(is.na(full.dat[[length(full.dat)]]))) stop("Last element (vector) of dataframe is not partially observed! (i.e. there should be some NA values!)") 141 | 142 | 143 | ## Check if last column is partially observed. 144 | ## a = (which(!is.na(last.column.weird.full.dat[,ncol(last.column.weird.full.dat)]))) 145 | ## a.diff = c(a[2:length(a)],NA) - c(a[1:(length(a)-1)],NA) 146 | ## if(any(a.diff[!is.na(a.diff)]!=1)) stop("Last column does not seem to be partially observed! i.e. no missing values!") 147 | } 148 | 149 | 150 | 151 | ##' Function to change full.dat from table to list. 152 | table.to.list = function(full.dat){ 153 | mylist = lapply(seq_len(ncol(full.dat)), function(i) full.dat[,i]) 154 | names(mylist) = colnames(full.dat) 155 | return(mylist) 156 | } 157 | 158 | 159 | 160 | 161 | ## ##' Produces a |sim| object. 162 | ## ##' @param full.dat List of numeric vectors, each with proper names. 163 | ## ## ' @param area.name One of "nat" or "hhs*" where * is 1~10 # 164 | ## ## ' @param full.dat Either a properly formatted list (like the output of 165 | ## ## ' fetchepidataDF(); a list of numeric vectors, with the last vector being 166 | ## ## ' the 'new' season for which forecasts are being made), or a matrix/data 167 | ## ## ' frame with appropriate column names (and of which the last column 168 | ## ## ' represents the 'new' seaosn for which forecasts are being made.) 169 | ## ## ' @param filename The file name of the csv file that contains the table of 170 | ## ## ' data, with the first row being the names of the seasons, and each 171 | ## ## ' subsequent i'th row being the dataset for, say, week i. The last column 172 | ## ## ' should contain the 'new' season. 173 | ## ## ##' @examples 174 | ## ## ##' sim = make.forecast(full.dat = NULL, 175 | ## ## ##' area.name = "hhs1", 176 | ## ## ##' first.model.week = 21L, 177 | ## ## ##' method = "br") 178 | ## ## ##' matplot(sim$ys[,1:100], type = 'l', col = 'cyan', lty=1) 179 | ## make.eb.forecast = function(full.dat, 180 | ## first.model.week, 181 | ## min.points.in.season=52L, 182 | ## n.sim, 183 | ## eb.control.list = function(...){ eb.control.list(n.sim, ... ))){ 184 | 185 | ## ## Split into old dat (list) and new dat (vector) 186 | ## old.dat = head(full.dat, -1L) 187 | ## new.dat = tail(full.dat, 1L)[[1]] 188 | 189 | ## time.of.forecast = get.latest.time(new.dat) 190 | 191 | ## ## Unbundle control list that contains options for simulation 192 | ## ## Then make forecast 193 | 194 | ## if(is.null(eb.option)){ 195 | ## eb.control.list = get.eb.control.list() 196 | ## } else { 197 | ## option.names = names(eb.options) 198 | ## eb.control.list = get.eb.control.list() 199 | ## } 200 | 201 | ## ## Make sim = eb.sim(dat, new.dat, control.list = eb.control.list) 202 | 203 | ## ## Return the simulated curves 204 | ## return(sim) 205 | ## } 206 | 207 | -------------------------------------------------------------------------------- /epiforecast/R/simplify2arrayp.R: -------------------------------------------------------------------------------- 1 | ## These functions are modified R methods from base R, under GPL 2+. Original 2 | ## license information from license() is below. The modification is from Aaron 3 | ## Rumack. 4 | 5 | ## This software is distributed under the terms of the GNU General 6 | ## Public License, either Version 2, June 1991 or Version 3, June 2007. 7 | ## The terms of version 2 of the license are in a file called COPYING 8 | ## which you should have received with 9 | ## this software and which can be displayed by RShowDoc("COPYING"). 10 | ## Version 3 of the license can be displayed by RShowDoc("GPL-3"). 11 | 12 | ## Copies of both versions 2 and 3 of the license can be found 13 | ## at https://www.R-project.org/Licenses/. 14 | 15 | ## A small number of files (the API header files listed in 16 | ## R_DOC_DIR/COPYRIGHTS) are distributed under the 17 | ## LESSER GNU GENERAL PUBLIC LICENSE, version 2.1 or later. 18 | ## This can be displayed by RShowDoc("LGPL-2.1"), 19 | ## or obtained at the URI given. 20 | ## Version 3 of the license can be displayed by RShowDoc("LGPL-3"). 21 | 22 | ## 'Share and Enjoy.' 23 | 24 | simplify2arrayp = function (x, higher = TRUE) 25 | { 26 | if (length(common.len <- unique(as.vector(lengths(x)))) > 1L) 27 | return(x) 28 | if (common.len == 1L) 29 | unlist(x, recursive = FALSE) 30 | else if (common.len > 1L) { 31 | n <- length(x) 32 | r <- unlist(x, recursive = FALSE, use.names = FALSE) 33 | if (higher && length(c.dim <- unique(lapply(x, dim))) == 34 | 1 && is.numeric(c.dim <- c.dim[[1L]]) && prod(d <- c(c.dim, 35 | n)) == length(r)) { 36 | iN1 <- is.null(n1 <- dimnames(x[[1L]])) 37 | n2 <- names(x) 38 | dnam <- if (!(iN1 && is.null(n2))) 39 | c(if (iN1) rep.int(list(n1), length(c.dim)) else n1, 40 | list(n2)) 41 | array(r, dim = d, dimnames = dnam) 42 | } 43 | else if (prod(d <- c(common.len, n)) == length(r)) 44 | array(r, dim = d, dimnames = if (!(is.null(n1 <- names(x[[1L]])) & 45 | is.null(n2 <- names(x)))) 46 | list(n1, n2)) 47 | else x 48 | } 49 | else x 50 | } 51 | -------------------------------------------------------------------------------- /epiforecast/R/todo-by-file.R: -------------------------------------------------------------------------------- 1 | ## author_header begin 2 | ## Copyright (C) 2016 Logan C. Brooks 3 | ## 4 | ## This file is part of epiforecast. Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University. 5 | ## 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174. 7 | ## author_header end 8 | ## license_header begin 9 | ## epiforecast is free software: you can redistribute it and/or modify 10 | ## it under the terms of the GNU General Public License as published by 11 | ## the Free Software Foundation, version 2 of the License. 12 | ## 13 | ## epiforecast is distributed in the hope that it will be useful, 14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ## GNU General Public License for more details. 17 | ## 18 | ## You should have received a copy of the GNU General Public License 19 | ## along with epiforecast. If not, see . 20 | ## license_header end 21 | 22 | ###################### 23 | ## interface.R ####### 24 | ###################### 25 | 26 | ## todo length checks --- time.of.forecast not only with dat, but with fit 27 | ## todo remove n.out arg --- determine by min fit length 28 | 29 | ## ## source("../sample/sample_config_flu_1516.R") 30 | ## source("loaders.R") 31 | ## source("fitters.R") 32 | ## source("nidss/nidss_fetch_data.R", chdir=TRUE) 33 | 34 | ## nidss.datfit = fetchNIDSSDatFit("flu", "nationwide") 35 | 36 | ## olddatplus = nidss.datfit$olddatplus 37 | ## oldfit = nidss.datfit$fit 38 | ## include.ss = nidss.datfit$include.ss 39 | ## current.s = nidss.datfit$current.s 40 | ## first.year = nidss.datfit$first.year 41 | ## first.model.week = nidss.datfit$first.model.week 42 | 43 | ## fit.ss = include.ss[include.ss < current.s] 44 | ## exclude.2009.pandemic.season=TRUE 45 | ## if (exclude.2009.pandemic.season) { 46 | ## fit.ss <- fit.ss[-2] 47 | ## oldfit <- list(f=oldfit$f[,-2], tau=oldfit$tau[-2]) 48 | ## } 49 | ## train.ss = fit.ss 50 | ## test.s = max(train.ss)+1 51 | 52 | ## newdat = olddatplus.to.newdat(olddatplus) 53 | ## newdat.attributes = attributes(newdat) 54 | ## newdat <- newdat[match(fit.ss, include.ss)] 55 | ## newdat.attributes$names <- names(newdat) 56 | ## attributes(newdat) <- newdat.attributes 57 | 58 | ## qwer = fit.eb.control.list(oldfit.to.newfit(oldfit), get.eb.control.list()) 59 | ## asdf = eb.createForecasts(newdat, olddatplus$wili[olddatplus$season==test.s], oldfit.to.newfit(oldfit), 0) 60 | ## asdf = eb.createForecasts(newdat, olddatplus$wili[olddatplus$season==test.s], oldfit.to.newfit(oldfit), 1L) 61 | ## source("plotters.R") 62 | ## newfit = smooth.curves.to.newfit(eb.fitSmoothCurves(newdat)) 63 | ## matplot.newdat(newdat) 64 | ## matplot.newfit(newdat, newfit) 65 | ## seriesplot.newfit(newdat, smooth.curves.to.newfit(eb.fitSmoothCurves(newdat))) 66 | 67 | ## xxx instead of n.out, allow NA's in the future trajectories, just fill in all; use !is.na as another ii.match mask? 68 | ## todo explicitly make object that represents a distribution of curves, corresponding fitting functions, then the conditioning method? 69 | ## todo rename forecast time to something with "ind"? 70 | ## todo documentation 71 | ## todo imports 72 | ## todo examples 73 | 74 | ###################### 75 | ## loaders.R ######### 76 | ###################### 77 | 78 | ## xxx consider fetching by issue instead in fetchEpidataHistoryDF; at least for 79 | ## fluview, the set of all issues should be a subset of the set of all epiweeks 80 | ## from the current data frame 81 | 82 | ## todo version of mimicPastEpidataDF that never uses future data, instead 83 | ## taking the seasonally-expected change from the last available data point or 84 | ## stopping if there is no available data point beforehand (will need to handle finalized versions inputted later with lags outside the =lags= range... override their lag with the max lag in =lag= and update =issue= accordingly?) 85 | 86 | ## ## todo turn into test 87 | ## history.dt = fetchEpidataHistoryDT("fluview", "hhs1", 0:51, 88 | ## first.week.of.season = 31L, 89 | ## cache.file.prefix="~/.epiforecast-cache/fluview_hhs1") 90 | ## list(mimicPastEpidataDF1, mimicPastEpidataDF2) %>>% 91 | ## lapply(function(mimicPastEpidataDFn) { 92 | ## ## mimicPastEpidataDFn(history.dt, 201540L) %>>% 93 | ## mimicPastEpidataDFn(history.dt, 201040L) %>>% 94 | ## dplyr::arrange(-epiweek) %>>% 95 | ## dplyr::select(epiweek, issue, forecast.epiweek, wili) 96 | ## }) %>>% 97 | ## do.call(what=identical) %>>% 98 | ## {.} 99 | 100 | ## todo conversion operations for epidata df's to full dats & epidata history df's to dt's 101 | ## todo fail gracefully with curl errors in Epidata 102 | ## todo check augmentWeeklyDF input 103 | ## todo trimPartialPastSeasons setting to trim all incomplete (like min # being # of weeks in season) 104 | ## todo check fetching input, add default caching 105 | ## todo baseline as attr to trajectory, first.week.of.season and is.part.of.season.of.length function as attr's to full.dat? 106 | 107 | ####################### 108 | ## simclass.R ######### 109 | ####################### 110 | 111 | ## todo: sim objects (/ something with a new name) should include at least two 112 | ## options: (a) a constant, and (b) a list of ys and weights; this will require 113 | ## some refactoring. The br method should map constants to constants unless 114 | ## bootstrapping. Constants should not be represented as a single column with 115 | ## weight 1, because this cannot be distinguished from a single draw from a 116 | ## distribution with importance weight 1. 117 | 118 | ## todo make sure the weights for all the sim methods can be interpreted as 119 | ## effective number of draws 120 | 121 | ## todo proper metrics for multibin scores (including multi pwk) 122 | 123 | ## todo special treatment of new.dat whenever turned into a new.dat.sim 124 | 125 | ## xxx upsample_sim: concat sample to single copy of sim object, versus 126 | ## repeating the existing sim object as many times as possible and just filling 127 | ## in the remainder with samples 128 | 129 | ###################################### 130 | ## retrospective_forecasts.R ######### 131 | ###################################### 132 | 133 | ## fixme better dataset representation... list of data sources (history df's? ilinet, fluview baselines, metadata?, in.season, ...) and auxiliary information indexed in a uniform way for location and time 134 | ## todo interface for multiresolution (seasonal vs. weekly vs. ..., national vs. regions vs. ...) datasets and metadata, targets 135 | ## todo instead of faking new.dat when given a new.dat.sim, store and use one in new.dat.sim 136 | ## todo effective number of particles impacted by number of seasons used (as well as widths...) 137 | ## todo try the weighted bw function instead of the bw.SJnrd0 --- issue: needs 138 | ## to be called many times (more than the unweighted version --- the weights 139 | ## change) and would be slow (half the time appeared to be spent in bw 140 | ## calculations last check). 141 | ## todo test some changes on earlier seasons 142 | ## todo kernel HMM approach 143 | ## todo GP approach 144 | ## todo curve decomposition (+ random walk) approach, regression onto #seasons approach 145 | 146 | ##################################### 147 | ## empirical.trajectories.R ######### 148 | ##################################### 149 | 150 | ## todo due to different seasons having different baselines, 151 | ## empirical.trajectories.sim will not produce the historical distribution for 152 | ## onsets (but will for the other flusight targets); optional scaling based on 153 | ## current and historical baselines to give historical onset distribution, to 154 | ## adjust percentages for differences in network composition from season to 155 | ## season (but changing the output from the historical distributions) (perhaps 156 | ## this scaling could happen during a pre-processing step for full.dat) 157 | 158 | ####################### 159 | ## cv_apply.R ######### 160 | ####################### 161 | 162 | ## todo better cv_apply interface 163 | ## todo object-oriented iterator design 164 | ## todo other iterator structures: not one iterator per input dim, but one output dim per iterator (+ the result dimensionality dims) 165 | ## todo non-CV versions 166 | ## todo similar functions on cartesian products 167 | ## xxx option for warm starts? 168 | ## xxx option for parallelism? 169 | ## xxx decide on dropping behavior (especially in some cases, LHS will always have 1 index in a given dimension) 170 | ## todo fix issues with dimension combining and naming behavior when there are single outputs; when to run simplify2array vs. not... 171 | 172 | ## todo better operations on scalar/vector/matrix/arrays: scalars 173 | ## distinguishable from length-1 vectors, drop=FALSE whenever it is an option, 174 | ## uniform interface for length/dim & names/dimnames, dplyr operations if 175 | ## possible. Look into tbl_cube. 176 | 177 | #################### 178 | ## twkde.R ######### 179 | #################### 180 | 181 | ## todo select twkde params 182 | ## todo fully parameterized bandwidth matrix, especially 183 | ## todo try ks package's kcde, reichlab/kcde 184 | ## xxx a single variable to balance between x and diff(x): a convex combination of the two 185 | 186 | ####################### 187 | ## ensemble.R ######### 188 | ####################### 189 | 190 | ## todo calculate degen EM results in log space, in Rcpp 191 | ## todo ridge penalty on point prediction coef's 192 | ## xxx should use =rq= for constrainedLADPtPredFit with the R= r= args if 193 | ## possible; it's probably faster 194 | ## todo weighted versions of ensemble weight fitting algorithms 195 | 196 | ############################### 197 | ## DESCRIPTION/*-forecasts.R ## 198 | ############################### 199 | 200 | ## consider byte-compilation 201 | -------------------------------------------------------------------------------- /epiforecast/R/utils.R: -------------------------------------------------------------------------------- 1 | ## author_header begin 2 | ## Copyright (C) 2016 Logan C. Brooks 3 | ## 4 | ## This file is part of epiforecast. Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University. 5 | ## 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174. 7 | ## author_header end 8 | ## license_header begin 9 | ## epiforecast is free software: you can redistribute it and/or modify 10 | ## it under the terms of the GNU General Public License as published by 11 | ## the Free Software Foundation, version 2 of the License. 12 | ## 13 | ## epiforecast is distributed in the hope that it will be useful, 14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ## GNU General Public License for more details. 17 | ## 18 | ## You should have received a copy of the GNU General Public License 19 | ## along with epiforecast. If not, see . 20 | ## license_header end 21 | 22 | ##' @import pipeR 23 | ##' @import R.utils 24 | NULL 25 | 26 | ## At least some versions of roxygen generate an error when documenting this package when the pipeR pipe would actually be called when sourcing in R files (rather than just appearing in generated function bodies). Work around this by defining an un-exported copy of the pipeR pipe operator: 27 | `%pipeR>>%` = pipeR::`%>>%` 28 | 29 | ##' A \code{seq} variant that produces a 0-length vector when \code{!(from <= 30 | ##' to)}. 31 | ##' 32 | ##' @param from starting number (or other object compatible with \code{seq}) 33 | ##' @param to ending number (or other object compatible with \code{seq}) 34 | ##' @param ... arguments to forward to \code{seq} 35 | ##' 36 | ##' @export 37 | Seq = function(from, to, ...) { 38 | if (from <= to) { 39 | ## Just delegate in the "nice" case: 40 | return (seq(from, to, ...)) 41 | } else { 42 | ## Return 0 elements of a reasonable type by taking 0 elements from 43 | ## =seq(from,from)=. 44 | return (seq(from, from)[integer(0)]) 45 | ## We could do =seq(from, to, ...)=, but that could potentially hurt 46 | ## performance and generate bugs for certain dispatches. 47 | } 48 | } 49 | 50 | dat.to.matrix1 = function(dat, n) { 51 | dat <- match.dat(dat) 52 | n <- match.single.nonna.integer(n) 53 | sapply(dat, `[`, seq_len(n)) 54 | } 55 | dat.to.matrix2 = function(dat, n) { 56 | dat <- match.dat(dat) 57 | n <- match.single.nonna.integer(n) 58 | result = matrix(NA_real_, n, length(dat)) 59 | for (trajectory.i in seq_along(dat)) { 60 | trajectory = dat[[trajectory.i]] 61 | result[,trajectory.i] <- trajectory[seq_len(n)] 62 | } 63 | dimnames(result)[[2]] <- names(dat) 64 | return (result) 65 | } 66 | dat.to.matrix3 = function(dat, n) { 67 | dat <- match.dat(dat) 68 | n <- match.single.nonna.integer(n) 69 | result = matrix(NA_real_, n, length(dat)) 70 | inds = seq_len(n) 71 | for (trajectory.i in seq_along(dat)) { 72 | trajectory = dat[[trajectory.i]] 73 | result[,trajectory.i] <- trajectory[inds] 74 | } 75 | dimnames(result)[[2]] <- names(dat) 76 | return (result) 77 | } 78 | ## all.equal(dat.to.matrix1(dat, 52), dat.to.matrix2(dat, 52), dat.to.matrix3(dat, 52)) 79 | ## benchmark(dat.to.matrix1(dat,52),dat.to.matrix2(dat,52),dat.to.matrix3(dat,52),replications=100000) 80 | ## 1 dat.to.matrix1(dat, 52) 100000 35.548 4.814 35.564 0.004 81 | ## 2 dat.to.matrix2(dat, 52) 100000 7.948 1.076 7.952 0.000 82 | ## 3 dat.to.matrix3(dat, 52) 100000 7.384 1.000 7.388 0.000 83 | 84 | ##' Numeric matrix of the first \code{n} elements of each numeric vector in 85 | ##' \code{dat}. 86 | ##' 87 | ##' A more efficient implementation of \code{sapply(dat, `[`, seq_len(n))}. Any 88 | ##' vectors in \code{dat} with length less than \code{n} are extended with 89 | ##' \code{NA_real_}'s at the end. 90 | ##' 91 | ##' @param dat a list of numeric vectors 92 | ##' @param n a single integer: the number of elements to take from each vector 93 | ##' @return a \code{n}-by-\code{length(dat)} numeric matrix 94 | ##' 95 | ##' @examples 96 | ##' dat = list(11:15, 21:26) 97 | ##' dat.to.matrix(dat, 5) # (5x2: dat[[2]] is cut off) 98 | ##' dat.to.matrix(dat, 6) # (6x2: dat[[1]] is extended with NA_real_) 99 | ##' n = 3 100 | ##' identical(c(n, length(dat)), dim(dat.to.matrix(dat, n))) 101 | ##' 102 | ##' @export 103 | dat.to.matrix = dat.to.matrix3 104 | 105 | ##' Reshape arraylike into fewer but larger dimensions and/or permute (and optionally rename) dimensions; like tidyr::unite on arrays using R.utils::wrap.array 106 | ##' 107 | ##' Collapses specified array(like) object dimensions together to produce an 108 | ##' array with fewer but larger dimensions and an equal number of entries; 109 | ##' indices of each collapsed dimension refers to a Cartesian product over the 110 | ##' indices of the dimensions it was made from. This operation should be similar 111 | ##' or equivalent to melting the array, calling tidyr::unite, and casting back 112 | ##' into an array. 113 | ##' 114 | ##' Collapsed/united dimensions are ordered after all untouched dimensions. To 115 | ##' tweak the ordering or use this function to simply permute dimensions, 116 | ##' individual dimnames names can be included as their own singleton "sets" in 117 | ##' dnn.sets. 118 | ##' 119 | ##' @param arraylike the arraylike object to reshape; must have the dimnames 120 | ##' names referred to in \code{{dnn.sets}} 121 | ##' 122 | ##' @param dnn.sets a list of character vectors; each character vector specifies 123 | ##' some dimensions to unite in the array; ordering within each vector 124 | ##' determines the ordering of indices in the resulting collapsed dimension 125 | ##' (as in R.utils::wrap.array); ordering between the vectors determines the 126 | ##' ordering of the collapsed dimensions in the result; names of vector 127 | ##' entries (e.g., \code{names(dnn.sets[[1]])}) are ignored; 128 | ##' \code{names(dnn.sets)}, if specified and nonblank, override automatic 129 | ##' names of the collapsed dimensions 130 | ##' 131 | ##' @param sep length-1 character vector; string used to combine dimnames for 132 | ##' collapsed dimensions, as well as automatic dimnames names for collapsed 133 | ##' dimensions 134 | ##' 135 | ##' @examples 136 | ##' arraylike = array(1:2^5, rep(2,5)) 137 | ##' dimnames(arraylike) <- 138 | ##' list(A=c("a1","a2"), B=c("b1","b2"), C=c("c1","c2"), 139 | ##' D=c("d1","d2"), E=c("e1","e2") 140 | ##' ) 141 | ##' ## Collapsing A&B, C&D: 142 | ##' dimnames(unite_arraylike(arraylike, list(c("A","B"), c("C","D")))) 143 | ##' ## Adjusting `sep` changes resulting dimnames and dimnames names: 144 | ##' dimnames(unite_arraylike(arraylike, list(c("A","B"), c("C","D")), sep="__")) 145 | ##' ## Result dimnames names can be manually specified: 146 | ##' names(dimnames(unite_arraylike(arraylike, list(c("A","B"), DVD=c("C","D")), sep="__"))) 147 | ##' ## Singleton sets can be used to permute dimensions, optionally renaming 148 | ##' ## them (changing the dimnames names): 149 | ##' ## Place dimension A at end: 150 | ##' names(dimnames(unite_arraylike(arraylike, list("A")))) 151 | ##' ## Permute all dims: 152 | ##' names(dimnames(unite_arraylike(arraylike, list("E","D","C","B","A")))) 153 | ##' ## Place some dimensions at end and rename one: 154 | ##' names(dimnames(unite_arraylike(arraylike, list("A","EEE"="E")))) 155 | ##' ## Collapsing and permuting are actually the same operation and can be mixed: 156 | ##' names(dimnames(unite_arraylike(arraylike, list(c("C","D"),"A","EEE"="E")))) 157 | ##' 158 | ##' @export 159 | unite_arraylike = function(arraylike, dnn.sets, sep=".") { 160 | src.dnns = names(dimnames(arraylike)) 161 | if (is.null(src.dnns)) { 162 | stop ('Arraylike must have named dimnames.') 163 | } 164 | dim.i.sets = dnn.sets %>>% 165 | lapply(function(selected.src.dnns) match(selected.src.dnns, src.dnns)) 166 | all.selected.dim.is = Reduce(c, dim.i.sets) 167 | all.selected.dnns = Reduce(c, dnn.sets) 168 | if (any(is.na(all.selected.dim.is))) { 169 | stop (paste0('Dimensions in dnn.sets were not found in arraylike: ', 170 | paste(all.selected.dnns[is.na(all.selected.dim.is)], collapse=", "))) 171 | } 172 | if (anyDuplicated(all.selected.dnns) != 0L) { 173 | stop (paste0('Dimension(s) referred to more than once in dnn.sets. Duplicates: ', 174 | paste(all.selected.dnns[duplicated(all.selected.dnns)], collapse=", "))) 175 | } 176 | untouched.dim.is = setdiff(seq_along(src.dnns), all.selected.dim.is) 177 | untouched.dnns = src.dnns[untouched.dim.is] 178 | if (any(untouched.dnns %in% all.selected.dnns)) { 179 | stop (paste0('Dimensions in dnn.sets could not be uniquely identified. Dimnames names duplicates referenced in dnn.sets: ', 180 | paste(untouched.dnns %>>% `[`(.%in%all.selected.dnns), collapse=", "))) 181 | } 182 | arraylike %>>% 183 | R.utils::wrap.array(c(as.list(untouched.dim.is), dim.i.sets), sep=sep) %>>% 184 | { 185 | automatic.union.names = sapply(dnn.sets, paste0, collapse=sep) 186 | names(dim.i.sets) <- namesp(dim.i.sets) %>>% 187 | {.[.==""] <- automatic.union.names[.==""]; .} 188 | names(dimnames(.)) <- c(src.dnns[untouched.dim.is], names(dim.i.sets)) 189 | . 190 | } 191 | } 192 | -------------------------------------------------------------------------------- /epiforecast/data-raw/fluview.outdated.R: -------------------------------------------------------------------------------- 1 | ## author_header begin 2 | ## Copyright (C) 2016 Logan C. Brooks 3 | ## 4 | ## This file is part of epiforecast. Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University. 5 | ## 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174. 7 | ## author_header end 8 | ## license_header begin 9 | ## epiforecast is free software: you can redistribute it and/or modify 10 | ## it under the terms of the GNU General Public License as published by 11 | ## the Free Software Foundation, either version 2 of the License, or 12 | ## (at your option) any later version. 13 | ## 14 | ## epiforecast is distributed in the hope that it will be useful, 15 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | ## GNU General Public License for more details. 18 | ## 19 | ## You should have received a copy of the GNU General Public License 20 | ## along with epiforecast. If not, see . 21 | ## license_header end 22 | 23 | ## source("R/utils.R", chdir=TRUE) 24 | ## source("R/match.R", chdir=TRUE) 25 | ## source("R/weeks.R", chdir=TRUE) 26 | ## source("R/delphi_epidata.R", chdir=TRUE) 27 | ## source("R/loaders.R", chdir=TRUE) 28 | 29 | ## devtools::load_code() 30 | 31 | devtools::load_all() 32 | 33 | fluview.first.model.week = 21L 34 | fluview.area.names = c(sprintf("hhs%d",1:10),"nat") 35 | ## fluview.2003on.dfs = structure(lapply(fluview.area.names, function(area.name) { 36 | ## trimPartialPastSeasons(fetchEpidataDF("fluview", area.name, 37 | ## first.week.of.season=fluview.first.model.week, 38 | ## cache.file.prefix=sprintf("fluview_%s_fetch.Rdata", area.name)), 39 | ## "wili", 52) 40 | ## }), names=fluview.area.names) 41 | ## fluview.2003on.full.dats = lapply(fluview.2003on.dfs, function(df) { 42 | ## full.dat = split(df$wili, df$season) # historical seasons + current season 43 | ## names(full.dat) <- sprintf("S%s", names(full.dat)) 44 | ## full.dat <- full.dat[names(full.dat)!="S2009"] 45 | ## full.dat 46 | ## }) 47 | fluview.2003on.full.dats = setNames(lapply(fluview.area.names, function(area.name) { 48 | fetchEpidataFullDat("fluview", area.name, "wili", 49 | min.points.in.season=52L, first.week.of.season=fluview.first.model.week, 50 | cache.file.prefix=sprintf("fluview_%s_fetch.Rdata", area.name)) 51 | }), fluview.area.names) 52 | fluview.2003on.dats = lapply(fluview.2003on.full.dats, head, n=-1L) 53 | fluview.2003on.new.dats = lapply(fluview.2003on.full.dats, function(full.dat) tail(full.dat, 1L)[[1]]) 54 | 55 | ## todo document 56 | 57 | fluview.2003on.outdated.dats = fluview.2003on.dats 58 | fluview.2003on.outdated.new.dats = fluview.2003on.new.dats 59 | 60 | devtools::use_data(fluview.2003on.outdated.dats, fluview.2003on.outdated.new.dats, 61 | overwrite = TRUE) 62 | 63 | ## fixme should this just be a method? get.fluview.2003on.dats()? or something 64 | ## formatted like fetchEpidataDF but returning a dat and new.dat, or a full.dat? 65 | -------------------------------------------------------------------------------- /epiforecast/data/fluview.2003on.outdated.dats.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-delphi/epiforecast-R/726580337e75286532a03ec3ec5b104e9c3fd425/epiforecast/data/fluview.2003on.outdated.dats.rda -------------------------------------------------------------------------------- /epiforecast/data/fluview.2003on.outdated.new.dats.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-delphi/epiforecast-R/726580337e75286532a03ec3ec5b104e9c3fd425/epiforecast/data/fluview.2003on.outdated.new.dats.rda -------------------------------------------------------------------------------- /epiforecast/man-roxygen/param_baseline.R: -------------------------------------------------------------------------------- 1 | ##' @param baseline a "baseline level" for this dataset; roughly speaking, data 2 | ##' below this level does not grow like an epidemic; currently ignored by 3 | ##' most or all sim methods, but can be used as the \code{y.scale.baseline} 4 | ##' in \code{eb.sim} in \code{epiforecast.cpp14funs} by passing it through 5 | ##' the \code{control.list} argument. 6 | -------------------------------------------------------------------------------- /epiforecast/man-roxygen/param_full.dat.R: -------------------------------------------------------------------------------- 1 | ##' @param full.dat list of (a) numeric vectors, one per past season, containing 2 | ##' historical trajectories, followed by (b) a numeric vector (trajectory), 3 | ##' numeric matrix (cbound trajectories), or sim object (list with $ys a 4 | ##' numeric matrix (cbound trajectories) and $weights a numeric vector 5 | ##' (associated weights)), with \code{NA}'s for all future or missing data 6 | ##' points to forecast or infer; currently only supports \code{NA}'s at future 7 | ##' points, not mixed in between non-\code{NA} data 8 | -------------------------------------------------------------------------------- /epiforecast/man-roxygen/param_invent.scalars.R: -------------------------------------------------------------------------------- 1 | ##' @param invent.scalars length-1 non-NA logical; if \code{TRUE}, treat unnamed 2 | ##' length-1 vector \code{x}'s as "scalars" with zero dimensions; if 3 | ##' \code{FALSE}, treat them as arrays with one dimension of size 1. 4 | -------------------------------------------------------------------------------- /epiforecast/man-roxygen/param_max.n.sims.R: -------------------------------------------------------------------------------- 1 | ##' @param max.n.sims single non-\code{NA} integer value or \code{NULL}: the 2 | ##' number of curves to sample from the inferred distribution 3 | -------------------------------------------------------------------------------- /epiforecast/man-roxygen/sim.method_template.R: -------------------------------------------------------------------------------- 1 | ##' @template param_full.dat 2 | ##' @template param_baseline 3 | ##' @template param_max.n.sims 4 | ##' @return a sim object --- a list with two components: 5 | ##' 6 | ##' \code{ys}: a numeric matrix, typically with multiple columns; each column is 7 | ##' a different possible trajectory for the current season, with NA's in the 8 | ##' input for the current season filled in with random draws from the forecasted 9 | ##' distribution, and non-\code{NA}'s (observed data) filled in with an imagined 10 | ##' resampling of noise based on the model (for some models, the non-\code{NA} 11 | ##' values will remain unchanged). 12 | ##' 13 | ##' \code{weights}: a numeric vector; assigns a weight to each column of 14 | ##' \code{ys}, which is used by methods relying on importance sampling. 15 | -------------------------------------------------------------------------------- /epiforecast/meta.R: -------------------------------------------------------------------------------- 1 | ## Commands to create a vignette 2 | vignettename = "fetch_data" 3 | devtools::use_vignette(vignettename) 4 | 5 | -------------------------------------------------------------------------------- /epiforecast/src/RcppExports.cpp: -------------------------------------------------------------------------------- 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #include 5 | 6 | using namespace Rcpp; 7 | 8 | #ifdef RCPP_USE_GLOBAL_ROSTREAM 9 | Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); 10 | Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); 11 | #endif 12 | 13 | // WeightedTabulateRcpp 14 | Rcpp::NumericVector WeightedTabulateRcpp(Rcpp::IntegerVector bin, Rcpp::IntegerVector nbins, Rcpp::NumericVector w); 15 | RcppExport SEXP _epiforecast_WeightedTabulateRcpp(SEXP binSEXP, SEXP nbinsSEXP, SEXP wSEXP) { 16 | BEGIN_RCPP 17 | Rcpp::RObject rcpp_result_gen; 18 | Rcpp::RNGScope rcpp_rngScope_gen; 19 | Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type bin(binSEXP); 20 | Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type nbins(nbinsSEXP); 21 | Rcpp::traits::input_parameter< Rcpp::NumericVector >::type w(wSEXP); 22 | rcpp_result_gen = Rcpp::wrap(WeightedTabulateRcpp(bin, nbins, w)); 23 | return rcpp_result_gen; 24 | END_RCPP 25 | } 26 | 27 | static const R_CallMethodDef CallEntries[] = { 28 | {"_epiforecast_WeightedTabulateRcpp", (DL_FUNC) &_epiforecast_WeightedTabulateRcpp, 3}, 29 | {NULL, NULL, 0} 30 | }; 31 | 32 | RcppExport void R_init_epiforecast(DllInfo *dll) { 33 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 34 | R_useDynamicSymbols(dll, FALSE); 35 | } 36 | -------------------------------------------------------------------------------- /epiforecast/src/histograms_rcpp.cpp: -------------------------------------------------------------------------------- 1 | // author_header begin 2 | // Copyright (C) 2016 Logan C. Brooks 3 | // 4 | // This file is part of epiforecast. Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University. 5 | // 6 | // Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174. 7 | // author_header end 8 | // license_header begin 9 | // epiforecast is free software: you can redistribute it and/or modify 10 | // it under the terms of the GNU General Public License as published by 11 | // the Free Software Foundation, version 2 of the License. 12 | // 13 | // epiforecast is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | // 18 | // You should have received a copy of the GNU General Public License 19 | // along with epiforecast. If not, see . 20 | // license_header end 21 | 22 | #include 23 | 24 | //' Weighted, more \code{nbins}-restrictive version of \code{base::tabulate} 25 | //' 26 | //' @param bin integer-compatible vector; entries must be non-NA and between 1 27 | //' and \code{nbins}; these indices denote entries in the result vector to which 28 | //' the corresponding weights in \code{w} should be added 29 | //' @param nbins single non-NA, non-negative integer; length of the vector to 30 | //' return 31 | //' @param w numeric-compatible vector of the same length as \code{bin}; weights 32 | //' corresponding to the indices in \code{bin} 33 | //' @return numeric vector of length \code{nbins}; the \code{i}th entry is like 34 | //' \code{sum(w[bin==i])}, but with a naive summation algorithm 35 | //' 36 | //' @useDynLib epiforecast 37 | //' @export 38 | // [[Rcpp::export(name="weighted_tabulate")]] 39 | Rcpp::NumericVector WeightedTabulateRcpp 40 | ( 41 | Rcpp::IntegerVector bin, 42 | Rcpp::IntegerVector nbins, 43 | Rcpp::NumericVector w 44 | ) { 45 | if (bin.size() != w.size()) { 46 | ::Rf_error("Invalid input: length(bin) != length(w)"); 47 | } 48 | if (nbins.size() != 1 || 49 | Rcpp::IntegerVector::is_na(* nbins.begin()) || 50 | * nbins.begin() < 0) { 51 | ::Rf_error("Invalid input: nbins is not a single, non-NA, non-negative integer."); 52 | } 53 | Rcpp::IntegerVector::stored_type nbins0 = * nbins.begin(); 54 | Rcpp::NumericVector result(nbins0); // 0-initialized 55 | Rcpp::IntegerVector::iterator bin_it = bin.begin(); 56 | Rcpp::NumericVector::iterator w_it = w.begin(); 57 | for(; bin_it != bin.end(); ++bin_it, ++w_it) { 58 | if (Rcpp::IntegerVector::is_na(*bin_it)) { 59 | ::Rf_error("Invalid input: any(is.na(bin))"); 60 | } 61 | Rcpp::IntegerVector::stored_type index = *bin_it - 1; 62 | if (index < 0 || index >= nbins0) { 63 | ::Rf_error("Invalid input: !all(1L <= bin & bin <= nbins)"); 64 | } 65 | result[index] += *w_it; 66 | } 67 | return result; 68 | } 69 | 70 | // clang-format off 71 | // todo system includes 72 | /* Local Variables: */ 73 | /* clang-format-style: "Google" */ 74 | /* flycheck-clang-language-standard: "c++14" */ 75 | /* flycheck-gcc-language-standard: "c++14" */ 76 | /* flycheck-clang-include-path: ("/usr/local/lib/R/site-library/Rcpp/include/" "/usr/share/R/include/") */ 77 | /* flycheck-gcc-include-path: ("/usr/local/lib/R/site-library/Rcpp/include/" "/usr/share/R/include/") */ 78 | /* End: */ 79 | // clang-format on 80 | -------------------------------------------------------------------------------- /epiforecast/tests/testthat/test-br.sim.R: -------------------------------------------------------------------------------- 1 | ## author_header begin 2 | ## Copyright (C) 2016 Sangwon Hyun 3 | ## 4 | ## This file is part of epiforecast. Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University. 5 | ## 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174. 7 | ## author_header end 8 | ## license_header begin 9 | ## epiforecast is free software: you can redistribute it and/or modify 10 | ## it under the terms of the GNU General Public License as published by 11 | ## the Free Software Foundation, version 2 of the License. 12 | ## 13 | ## epiforecast is distributed in the hope that it will be useful, 14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ## GNU General Public License for more details. 17 | ## 18 | ## You should have received a copy of the GNU General Public License 19 | ## along with epiforecast. If not, see . 20 | ## license_header end 21 | 22 | context("Testing the br.sim() function..") 23 | 24 | area.name = "hhs1" 25 | 26 | ## Make a sim object from fluview. 27 | full.dat = fetchEpidataFullDat("fluview", area.name, "wili", 28 | min.points.in.season=52L, 29 | first.week.of.season = 21L, 30 | cache.file.prefix=sprintf("fluview_%s_fetch.Rdata", area.name)) 31 | mysim = br.sim(full.dat, max.n.sims=2L) 32 | 33 | ## Tests: 34 | 35 | test_that("Returns object of class 'sim'.", { 36 | expect_equal(class(mysim),"sim") 37 | }) 38 | -------------------------------------------------------------------------------- /epiforecast/tests/testthat/test-check.file.contents.R: -------------------------------------------------------------------------------- 1 | ## author_header begin 2 | ## Copyright (C) 2016 Sangwon Hyun 3 | ## 4 | ## This file is part of epiforecast. Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University. 5 | ## 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174. 7 | ## author_header end 8 | ## license_header begin 9 | ## epiforecast is free software: you can redistribute it and/or modify 10 | ## it under the terms of the GNU General Public License as published by 11 | ## the Free Software Foundation, version 2 of the License. 12 | ## 13 | ## epiforecast is distributed in the hope that it will be useful, 14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ## GNU General Public License for more details. 17 | ## 18 | ## You should have received a copy of the GNU General Public License 19 | ## along with epiforecast. If not, see . 20 | ## license_header end 21 | 22 | context("Testing the check.file.contents() function..") 23 | 24 | ## Setup 25 | dummytable = matrix(c(runif(n=9,1,3),NA),ncol=5) 26 | dummytable.full = matrix(runif(n=10,1,3),ncol=5) 27 | colnames(dummytable.full) = c(1997:2001) 28 | 29 | ## Tests: 30 | 31 | test_that("Missing column names returns error.", { 32 | wrong.names = c("a","b","c","","d") 33 | colnames(dummytable) = wrong.names 34 | write.table(dummytable, 35 | file = "a.csv", 36 | col.names = T, 37 | row.names=F, 38 | sep=",") 39 | expect_error(check.file.contents("a.csv")) 40 | file.remove("a.csv") 41 | }) 42 | 43 | 44 | test_that("Column names containing NA returns error.", { 45 | wrong.names = c(NA,1:4) 46 | colnames(dummytable) = wrong.names 47 | write.table(dummytable, 48 | file = "a.csv", 49 | col.names = T, 50 | row.names= F, 51 | sep=",") 52 | expect_error(check.file.contents("a.csv")) 53 | file.remove("a.csv") 54 | }) 55 | 56 | 57 | 58 | test_that("Last column being full returns error.", { 59 | write.table(dummytable.full, 60 | file = "a.csv", 61 | col.names = T, 62 | row.names=F, 63 | sep=",") 64 | expect_error(check.file.contents("a.csv")) 65 | file.remove("a.csv") 66 | }) 67 | -------------------------------------------------------------------------------- /epiforecast/tests/testthat/test-read.from.file.R: -------------------------------------------------------------------------------- 1 | ## author_header begin 2 | ## Copyright (C) 2016 Sangwon Hyun 3 | ## 4 | ## This file is part of epiforecast. Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University. 5 | ## 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174. 7 | ## author_header end 8 | ## license_header begin 9 | ## epiforecast is free software: you can redistribute it and/or modify 10 | ## it under the terms of the GNU General Public License as published by 11 | ## the Free Software Foundation, version 2 of the License. 12 | ## 13 | ## epiforecast is distributed in the hope that it will be useful, 14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ## GNU General Public License for more details. 17 | ## 18 | ## You should have received a copy of the GNU General Public License 19 | ## along with epiforecast. If not, see . 20 | ## license_header end 21 | 22 | context("Testing the read.from.file() function..") 23 | 24 | ## ## Setup 25 | ## dummytable = matrix(c(runif(n=9,1,3),NA),ncol=5) 26 | ## colnames(dummytable) = c(1997:2001) 27 | 28 | 29 | ## ## Tests: 30 | ## test_that("Return type is list of vector", { 31 | ## write.table(dummytable, 32 | ## file = "a.csv", 33 | ## col.names = T, 34 | ## row.names=F, 35 | ## sep=",") 36 | ## mydat = read.from.file("a.csv") 37 | ## mydat 38 | ## expect_equal("list", typeof(mydat)) 39 | ## file.remove("a.csv") 40 | ## }) 41 | 42 | 43 | ## Fetch some data 44 | area.name = "hhs1" 45 | hhs1.dat = fetchEpidataFullDat("fluview", area.name, "wili", 46 | min.points.in.season=52L, 47 | first.week.of.season = 21L, 48 | cache.file.prefix=sprintf("fluview_%s_fetch.Rdata", area.name)) 49 | alt.names1=paste("season",1:length(names(hhs1.dat))) 50 | alt.names2 = alt.names1; alt.names2[5] = "" 51 | alt.names3 = rep("", length(names(hhs1.dat))) 52 | 53 | list_dat_to_matrix <- function(vec_list) { 54 | stopifnot(length(vec_list) > 0L) 55 | length_range <- range(vapply(vec_list, length, integer(1L))) 56 | if (diff(length_range) > 1L) { 57 | stop ('Vectors in the list cannot vary in length by more than 1; please make sure they are repeated or filled with NAs appropriately.') 58 | } 59 | if (length_range[[1L]] == 0L) { 60 | stop ('Vectors in the list must all be of positive length.') 61 | } 62 | max_length <- length_range[[2L]] 63 | vec_list <- lapply(vec_list, function(vec) { 64 | # copy the last elt to fill the gap 65 | c(vec, rep(vec[[length(vec)]], max_length - length(vec))) 66 | }) 67 | do.call(cbind, vec_list) 68 | } 69 | 70 | ## When one label is missing 71 | 72 | 73 | ## Create csv with /no/ names 74 | test_that("When alls labels are missing, error is thrown", { 75 | filename="./all.names.missing.file.csv" 76 | all.names.missing.hhs1.dat = hhs1.dat 77 | names(all.names.missing.hhs1.dat) = alt.names3 78 | all.names.missing.hhs1.dat = list_dat_to_matrix(all.names.missing.hhs1.dat) 79 | all.names.missing.hhs1.dat = all.names.missing.hhs1.dat[-53,] 80 | write.csv(all.names.missing.hhs1.dat, file = filename, row.names=FALSE) 81 | expect_error(full.dat = read.from.file(filename)) 82 | }) 83 | 84 | ## Create csv with one name missing 85 | test_that("When one label is missing, error is thrown", { 86 | filename="./one.name.missing.file.csv" 87 | one.name.missing.hhs1.dat = hhs1.dat 88 | names(one.name.missing.hhs1.dat) = alt.names2 89 | one.name.missing.hhs1.dat = list_dat_to_matrix(one.name.missing.hhs1.dat) 90 | one.name.missing.hhs1.dat = one.name.missing.hhs1.dat[-53,] 91 | write.csv(one.name.missing.hhs1.dat, file = filename, row.names=FALSE) 92 | expect_error(full.dat = read.from.file(filename)) 93 | }) 94 | 95 | ## Last season is not partially observed 96 | test_that("When last season is not partially observed, error is thrown", { 97 | filename="./last.column.weird.file.csv" 98 | last.column.weird.hhs1.dat = list_dat_to_matrix(hhs1.dat) 99 | write.csv(last.column.weird.hhs1.dat, file = filename,row.names=FALSE) 100 | expect_error(full.dat = read.from.file(filename)) 101 | }) 102 | 103 | ## CSV is correct, so everything should be fine here. 104 | test_that("When last season is not partially observed, error is thrown", { 105 | filename="./correct.csv" 106 | correct.hhs1.dat = list_dat_to_matrix(hhs1.dat) 107 | correct.hhs1.dat = correct.hhs1.dat[-53,] 108 | write.csv(correct.hhs1.dat, file = filename,row.names=FALSE) 109 | full.dat = read.from.file(filename) 110 | expect_equal(class(full.dat), "list") 111 | }) 112 | -------------------------------------------------------------------------------- /epiforecast/tests/testthat/test-twkde.markovian.sim.R: -------------------------------------------------------------------------------- 1 | ## author_header begin 2 | ## Copyright (C) 2016 Sangwon Hyun 3 | ## 4 | ## This file is part of epiforecast. Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University. 5 | ## 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174. 7 | ## author_header end 8 | ## license_header begin 9 | ## epiforecast is free software: you can redistribute it and/or modify 10 | ## it under the terms of the GNU General Public License as published by 11 | ## the Free Software Foundation, version 2 of the License. 12 | ## 13 | ## epiforecast is distributed in the hope that it will be useful, 14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ## GNU General Public License for more details. 17 | ## 18 | ## You should have received a copy of the GNU General Public License 19 | ## along with epiforecast. If not, see . 20 | ## license_header end 21 | 22 | context("Testing the twkde.markovian.sim() function..") 23 | 24 | area.name = "hhs1" 25 | 26 | ## Make a sim object from fluview. 27 | full.dat = fetchEpidataFullDat("fluview", area.name, "wili", 28 | min.points.in.season=52L, 29 | first.week.of.season = 21L, 30 | cache.file.prefix=sprintf("fluview_%s_fetch.Rdata", area.name)) 31 | mysim = twkde.markovian.sim(full.dat, max.n.sims=2L) 32 | 33 | ## Tests: 34 | test_that("Returns object of class 'sim'.", { 35 | expect_equal(class(mysim),"sim") 36 | }) 37 | -------------------------------------------------------------------------------- /epiforecast/tests/testthat/test-twkde.sim.R: -------------------------------------------------------------------------------- 1 | ## author_header begin 2 | ## Copyright (C) 2016 Sangwon Hyun 3 | ## 4 | ## This file is part of epiforecast. Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University. 5 | ## 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174. 7 | ## author_header end 8 | ## license_header begin 9 | ## epiforecast is free software: you can redistribute it and/or modify 10 | ## it under the terms of the GNU General Public License as published by 11 | ## the Free Software Foundation, version 2 of the License. 12 | ## 13 | ## epiforecast is distributed in the hope that it will be useful, 14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ## GNU General Public License for more details. 17 | ## 18 | ## You should have received a copy of the GNU General Public License 19 | ## along with epiforecast. If not, see . 20 | ## license_header end 21 | 22 | context("Testing the twkde.sim() function..") 23 | 24 | area.name = "hhs1" 25 | 26 | ## Make a sim object from fluview. 27 | full.dat = fetchEpidataFullDat("fluview", area.name, "wili", 28 | min.points.in.season=52L, 29 | first.week.of.season = 21L, 30 | cache.file.prefix=sprintf("fluview_%s_fetch.Rdata", area.name)) 31 | mysim = twkde.sim(full.dat, max.n.sims=2L) 32 | 33 | ## Tests: 34 | test_that("Returns object of class 'sim'.", { 35 | expect_equal(class(mysim),"sim") 36 | }) 37 | -------------------------------------------------------------------------------- /epiforecast/tests/testthat/test_match.arg.or.default.R: -------------------------------------------------------------------------------- 1 | ## author_header begin 2 | ## Copyright (C) 2016 Logan C. Brooks 3 | ## 4 | ## This file is part of epiforecast. Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University. 5 | ## 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174. 7 | ## author_header end 8 | ## license_header begin 9 | ## epiforecast is free software: you can redistribute it and/or modify 10 | ## it under the terms of the GNU General Public License as published by 11 | ## the Free Software Foundation, either version 2 of the License, or 12 | ## (at your option) any later version. 13 | ## 14 | ## epiforecast is distributed in the hope that it will be useful, 15 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | ## GNU General Public License for more details. 18 | ## 19 | ## You should have received a copy of the GNU General Public License 20 | ## along with epiforecast. If not, see . 21 | ## license_header end 22 | 23 | parent_function = function(ch1=letters[1:5], ch2=c("AAA","AAB","BBB"), 24 | num1=1:5, int1=6L:10L, 25 | list1=list(1:2,3:4), list2=list(NULL,"a",c(two=2),1:5)) { 26 | return (list( 27 | ch1=match.arg.else.default(ch1), 28 | ch2=match.arg.else.default(ch2), 29 | num1=match.arg.else.default(num1), 30 | int1=match.arg.else.default(int1), 31 | list1=match.arg.else.default(list1), 32 | list2=match.arg.else.default(list2) 33 | )) 34 | } 35 | 36 | ## Return default on missing: 37 | expect_equal(parent_function(), 38 | list(ch1="a", ch2="AAA", num1=1, int1=6L, list1=1:2, list2=NULL)) 39 | 40 | ## Return default on NULL: 41 | expect_equal(parent_function(NULL, NULL, NULL, NULL, NULL, NULL), 42 | list(ch1="a", ch2="AAA", num1=1, int1=6L, list1=1:2, list2=NULL)) 43 | 44 | ## Allow partial matches, all.equal ignoring attributes: 45 | expect_equal(parent_function("b", c(extraneous.name="B"), 3L, 8.00000000001, c(p=3,q=4), 2), 46 | list(ch1="b", ch2="BBB", num1=3.0, int1=8L, list1=3:4, list2=c(two=2))) 47 | 48 | ## Return default with warning on mismatched inputs: 49 | expect_equal(suppressWarnings(parent_function("q", "A", "nonnumeric", 11L, 1:4, c("A","B","C"))), 50 | list(ch1="a", ch2="AAA", num1=1, int1=6L, list1=1:2, list2=NULL)) 51 | expect_warning(parent_function("q")) 52 | expect_warning(parent_function(,"A")) 53 | expect_warning(parent_function(,,"nonnumeric")) 54 | expect_warning(parent_function(,,,11L)) 55 | expect_warning(parent_function(,,,,1:4)) 56 | expect_warning(parent_function(,,,,,c("A","B","C"))) 57 | 58 | ## Produce error on inappropriate inputs: 59 | expect_error(parent_function(3), "length-1 character") 60 | expect_error(parent_function(letters[1:2]), "length-1 character") 61 | 62 | ## todo produce error on inappropriate =choices= 63 | -------------------------------------------------------------------------------- /epiforecast/vignettes/fetch_data.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Fetching / Loading Data" 3 | author: "Justin" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Fetch data} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding{UTF-8} 10 | 11 | 12 | First, setup by loading the package and setting working directory 13 | ```{r, eval=TRUE, results='asis'} 14 | library(devtools) 15 | outputdir = "." 16 | path_to_Rpkg_directory = "." 17 | setwd(path_to_Rpkg_directory) 18 | load_all() 19 | ``` 20 | 21 | Here is an example. Let us try to fetch national influenza intensity measurements (called `weighted ILI', or `wILI' for short), from the [link](fluview) interface using the function `fetchEpidataDF()`: 22 | ```{r, eval=TRUE, results='markup'} 23 | fluview.nat.all.df = fetchEpidataDF(source = "fluview", 24 | area = "nat", 25 | first.week.of.season=21L) 26 | ## See first few lines 27 | head(fluview.nat.all.df,3) 28 | tail(fluview.nat.all.df,3) 29 | ``` 30 | 31 | What if we wanted to fetch Google flutrends (GFT) measurements using the same function, under the same date range and settings? 32 | ```{r, eval=TRUE, results='asis'} 33 | gft.nat.all.df = fetchEpidataDF(source = "gft", 34 | area = "nat", 35 | first.week.of.season=21L) 36 | ``` 37 | Note, GFT measurements are 1/100'th the scale of wILI measurements. 38 | 39 | There are also 10 sub-regions of the united states (see [link](here) for more) whose influenza intensity measurements are available; they can be queried as follows: 40 | ```{r, eval=TRUE, results='asis'} 41 | gft.reg.dat = fetchEpidataDF(source = "gft", 42 | area ="hhs1") 43 | fluview.reg.dat = fetchEpidataDF(source = "fluview", 44 | area ="hhs1") 45 | ``` 46 | 47 | How would you query data from a certain date ranges, but from two or more sources? Use the wrapper `fetchEpidataDF.multiple.sources()` 48 | ```{r, eval=TRUE} 49 | all.reg.dat = fetchEpidataDF.multiple.sources(source = c("gft, hhs1"), area = "hhs1") 50 | ``` 51 | 52 | -------------------------------------------------------------------------------- /epiforecast/vignettes/fetch_data[woven].md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Fetching / Loading Data" 3 | author: "Justin" 4 | date: "2016-07-17" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Fetch data} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding{UTF-8} 10 | --- 11 | 12 | First, setup by loading the package and setting working directory 13 | 14 | 15 | ``` 16 | ## Loading epiforecast 17 | ``` 18 | 19 | Here is an example. 20 | 21 | 22 | ```r 23 | ## helper function to merge gft with flu 24 | mymerge = function(df1, df2, merge.names = "", merge.by = ""){#colnames.to.merge 25 | merge.dat = merge(df1,df2,by=c("year","week","date"),all=T) 26 | merge.dat[,"week"] = as.numeric(merge.dat[,"week"]) 27 | merge.dat[,"year"] = as.numeric(merge.dat[,"year"]) 28 | ord.by.dat = order(merge.dat[,c("year")],merge.dat[,c("week")]) 29 | merge.dat = merge.dat[ord.by.dat,] 30 | merge.dat = merge.dat[,c("season.y","year","week","date","gft.num","wili")] 31 | colnames(merge.dat)[colnames(merge.dat)=="season.y"] = "season" 32 | return(merge.dat) 33 | } 34 | 35 | 36 | ## National 37 | filename = file.path(outputdir, "fluview_nat_df.csv") 38 | fluview.nat.all.df = # If I need just 2003 and on, I do #fluview.nat.recent.df() 39 | trimPartialPastSeasons(fetchEpidataDF("fluview", "nat", 40 | first.week.of.season=21L, 41 | cache.file.prefix="fluview_nat_allfetch.Rdata"), 42 | "wili", 43 | min.points.in.season=33L) 44 | 45 | fluview.nat.all.df = fluview.nat.all.df[,c("region","wili","year","week","date","season","model.week")] 46 | gft.nat.df = fetchEpidataDF("gft","nat") 47 | gft.nat.df = gft.nat.df[,c("location","year","week","model.week","date","season","num")] 48 | colnames(gft.nat.df)[which(colnames(gft.nat.df) == "num")] = "gft.num" 49 | nat.dat = mymerge(df1=gft.nat.df, df2=fluview.nat.all.df) 50 | cat("writing national data",fill=T) 51 | write.csv(nat.dat, file=filename,row.names=F) 52 | 53 | 54 | ## By region 55 | regs = paste0("reg",1:10) 56 | hhss = paste0("hhs",1:10) 57 | for(jj in 1:10){ 58 | cat("writing region",jj, fill=T) 59 | myreg = regs[jj] 60 | myhhs = hhss[jj] 61 | cachename = paste0("fluview_",myhhs,"_fetch.Rdata") 62 | filename = file.path(outputdir, paste0("fluview_",myhhs,"_df.csv")) 63 | regdat = trimPartialPastSeasons(fetchEpidataDF("fluview", myhhs, 64 | first.week.of.season=21L), 65 | ## cache.file.prefix=cachename), 66 | "wili", min.points.in.season=33L) 67 | regdat2 = regdat[,c("region","wili","year","week","date","season","model.week")] 68 | gft.reg.dat = fetchEpidataDF("gft","hhs1") 69 | colnames(gft.reg.dat)[which(colnames(gft.reg.dat) == "num")] = "gft.num" 70 | reg.dat = mymerge( df1=gft.reg.dat, df2=regdat2) 71 | 72 | # write the file 73 | write.csv(reg.dat, file = filename, row.names=F) 74 | } 75 | ``` 76 | -------------------------------------------------------------------------------- /epiforecast/vignettes/figure/unnamed-chunk-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-delphi/epiforecast-R/726580337e75286532a03ec3ec5b104e9c3fd425/epiforecast/vignettes/figure/unnamed-chunk-1-1.png -------------------------------------------------------------------------------- /epiforecast/vignettes/figure/unnamed-chunk-1-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-delphi/epiforecast-R/726580337e75286532a03ec3ec5b104e9c3fd425/epiforecast/vignettes/figure/unnamed-chunk-1-2.png -------------------------------------------------------------------------------- /epiforecast/vignettes/figure/unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-delphi/epiforecast-R/726580337e75286532a03ec3ec5b104e9c3fd425/epiforecast/vignettes/figure/unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /epiforecast/vignettes/figure/unnamed-chunk-2-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-delphi/epiforecast-R/726580337e75286532a03ec3ec5b104e9c3fd425/epiforecast/vignettes/figure/unnamed-chunk-2-2.png -------------------------------------------------------------------------------- /epiforecast/vignettes/figure/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-delphi/epiforecast-R/726580337e75286532a03ec3ec5b104e9c3fd425/epiforecast/vignettes/figure/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /epiforecast/vignettes/figure/unnamed-chunk-3-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-delphi/epiforecast-R/726580337e75286532a03ec3ec5b104e9c3fd425/epiforecast/vignettes/figure/unnamed-chunk-3-2.png -------------------------------------------------------------------------------- /epiforecast/vignettes/make-forecasts-br.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Making forecasts with basis regression" 3 | author: "Justin" 4 | date: "`r Sys.Date()`" 5 | head-includes: 6 | - \usepackage{bbm} 7 | output: rmarkdown::html_vignette 8 | vignette: > 9 | %\VignetteIndexEntry{Making forecast with BR} 10 | %\VignetteEngine{knitr::rmarkdown} 11 | %\VignetteEncoding{UTF-8} 12 | 13 | --- 14 | First, setup by loading the package and setting working directory 15 | ```{r, eval=TRUE, results='asis'} 16 | library(devtools) 17 | outputdir = "." 18 | path_to_Rpkg_directory = "." 19 | setwd(path_to_Rpkg_directory) 20 | load_all() 21 | ``` 22 | 23 | Let us first fetch national influenza intensity measurements (called `weighted ILI', or `wILI' for short), from the [link](fluview) interface using the function `fetchEpidataDF()`: 24 | ```{r, eval=TRUE, results='markup'} 25 | ## Bare minimum 26 | fluview.nat.df = fetchEpidataDF(source = "fluview", 27 | area = "nat", 28 | first.week.of.season=21L) 29 | 30 | ## And with some more options 31 | area.name = "nat" 32 | fluview.nat.df = fetchEpidataFullDat(source = "fluview", area = "nat", 33 | signal.ind = "wili", 34 | min.points.in.season = 52L, 35 | first.week.of.season = 21L, 36 | cache.file = sprintf("fluview_%s_fetch.Rdata", area.name)) 37 | ``` 38 | A short explanation: Use `wili` for `source=fluview` option, `gft` for the `source=gft` option. `min.points.in.season` should be 52 if you are at forecasting time, and you would like to only take the fully observed historical seasons. The seemingly peculiar choice of `first.week.of.season=21` is a detail in the modeling and implementation of BR (and other methods, too!) that comes from that fact that the 21'st _calendar_ week in any given year is a convenient customary cutoff for which the trajectory of flu intensity measurements, when plotted, seem to be centered (on average). 39 | 40 | See the vignette [link](fetch_data.Rmd) for more details about fetching data. 41 | 42 | Let's focus our attention on the 'hhs1' region: 43 | ```{r, eval=TRUE, results='markup'} 44 | area.name = "hhs1" 45 | full.dat = fetchEpidataFullDat("fluview", area.name, "wili", 46 | min.points.in.season=52L, 47 | first.week.of.season = 21L, 48 | cache.file.prefix=sprintf("fluview_%s_fetch.Rdata", area.name)) 49 | ``` 50 | 51 | The default settings of BR is straightforward to use 52 | ```{r, eval=TRUE, results='markup'} 53 | # Try BR with basic options 54 | mysim = br.sim(full.dat, max.n.sims=1000) 55 | plot(mysim) 56 | print(mysim) 57 | targets = target_forecast(mysim, "pht") 58 | print(targets) 59 | ``` 60 | -------------------------------------------------------------------------------- /epiforecast/vignettes/test.R: -------------------------------------------------------------------------------- 1 | library(devtools) 2 | outputdir = "." 3 | path_to_Rpkg_directory = "." 4 | setwd(path_to_Rpkg_directory) 5 | load_all() 6 | build() 7 | document() 8 | check() 9 | 10 | -------------------------------------------------------------------------------- /epiforecast_0.0.1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-delphi/epiforecast-R/726580337e75286532a03ec3ec5b104e9c3fd425/epiforecast_0.0.1.pdf -------------------------------------------------------------------------------- /sample/census_weights.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-delphi/epiforecast-R/726580337e75286532a03ec3ec5b104e9c3fd425/sample/census_weights.rds -------------------------------------------------------------------------------- /sample/combine-spreadsheets.R: -------------------------------------------------------------------------------- 1 | library("pipeR") 2 | 3 | natreg.stat.spreadsheet.dir = "~/files/nosync/epiforecast-epiproject/flusight-natreg-run/stat-spreadsheets" 4 | state.stat.spreadsheet.dir = "~/files/nosync/epiforecast-epiproject/flusight-state-run/stat-spreadsheets" 5 | combined.stat.spreadsheet.dir = "~/files/nosync/epiforecast-epiproject/flusight-combined-run/stat-spreadsheets" 6 | if (!dir.exists(combined.stat.spreadsheet.dir)) { 7 | dir.create(combined.stat.spreadsheet.dir, recursive=TRUE) 8 | } 9 | 10 | spreadsheet_name_to_epi_week = function(spreadsheet.name) { 11 | as.integer(substring(spreadsheet.name, 3L,4L)) 12 | } 13 | 14 | spreadsheet_info_tbl = function(spreadsheet.names, tag) { 15 | duplicate.info.tbl = 16 | tibble::tibble(spreadsheet.name=spreadsheet.names) %>>% 17 | dplyr::mutate(epi.week=spreadsheet_name_to_epi_week(spreadsheet.name)) %>>% 18 | dplyr::group_by(epi.week) %>>% 19 | dplyr::mutate(epi.week.is.duplicated=n()>1L) %>>% 20 | dplyr::ungroup() 21 | duplicated.epi.weeks = duplicate.info.tbl %>>% 22 | dplyr::filter(epi.week.is.duplicated) %>>% 23 | dplyr::distinct(epi.week) %>>% 24 | (epi.week) 25 | print(paste0("Don't know what to do in case of multiple spreadsheets for the same EW in the same project ",tag,"; skipping spreadsheets for the following EW's (if any): ", paste(duplicated.epi.weeks, collapse=", "))) 26 | return ( 27 | duplicate.info.tbl %>>% 28 | dplyr::filter(!epi.week.is.duplicated) %>>% 29 | dplyr::select(-epi.week.is.duplicated) %>>% 30 | stats::setNames(c(paste0(tag,".spreadsheet.name"), names(.)[-1L])) %>>% 31 | {.} 32 | ) 33 | } 34 | 35 | natreg.stat.spreadsheet.info.tbl = 36 | spreadsheet_info_tbl(list.files(natreg.stat.spreadsheet.dir), "natreg.stat") 37 | state.stat.spreadsheet.info.tbl = 38 | spreadsheet_info_tbl(list.files(state.stat.spreadsheet.dir), "state.stat") 39 | 40 | print("Skipping EW in natreg but not state (if any):") 41 | print(setdiff(natreg.stat.spreadsheet.info.tbl[["epi.week"]], 42 | state.stat.spreadsheet.info.tbl[["epi.week"]])) 43 | print("Skipping EW in state but not natreg (if any):") 44 | print(setdiff(state.stat.spreadsheet.info.tbl[["epi.week"]], 45 | natreg.stat.spreadsheet.info.tbl[["epi.week"]])) 46 | 47 | combined.stat.spreadsheet.info.tbl = 48 | dplyr::inner_join(natreg.stat.spreadsheet.info.tbl, state.stat.spreadsheet.info.tbl, by="epi.week") 49 | 50 | combined.stat.spreadsheet.info.tbl %>>% 51 | dplyr::rowwise() %>>% 52 | dplyr::mutate(success={ 53 | tryCatch({ 54 | natreg.spreadsheet = readr::read_csv(file.path(natreg.stat.spreadsheet.dir, 55 | natreg.stat.spreadsheet.name), 56 | col_types=readr::cols()) 57 | state.spreadsheet = readr::read_csv(file.path(state.stat.spreadsheet.dir, 58 | state.stat.spreadsheet.name), 59 | col_types=attr(natreg.spreadsheet,"spec")) 60 | combined.spreadsheet = dplyr::bind_rows(natreg.spreadsheet, state.spreadsheet) 61 | natreg.date = as.Date(stringr::str_sub(natreg.stat.spreadsheet.name, -14L, -5L)) 62 | state.date = as.Date(stringr::str_sub(natreg.stat.spreadsheet.name, -14L, -5L)) 63 | combined.date = max(natreg.date, state.date) 64 | combined.stat.spreadsheet.name = 65 | natreg.stat.spreadsheet.name %>>% 66 | stringr::str_replace_all("Delphi-Stat", "Delphi-Stat-combined") %>>% 67 | stringr::str_replace_all(as.character(natreg.date), as.character(combined.date)) %>>% 68 | {.} 69 | readr::write_csv(combined.spreadsheet, 70 | file.path(combined.stat.spreadsheet.dir, combined.stat.spreadsheet.name)) 71 | TRUE 72 | }, 73 | error=function(e) { 74 | FALSE 75 | }) 76 | }) %>>% 77 | dplyr::select(success, dplyr::everything()) 78 | -------------------------------------------------------------------------------- /sample/covid19ilinet-templates.R: -------------------------------------------------------------------------------- 1 | 2 | covid19ilinet.natreg.spreadsheet.template = 3 | fetchUpdatingResource( 4 | function() { 5 | read.csv(textConnection(RCurl::getURL("https://raw.githubusercontent.com/cdcepi/COVID-19-ILI-forecasting/master/templates-and-data/covid19-ili-forecast-national-regional-template.csv")), check.names=FALSE, stringsAsFactors=FALSE) 6 | }, 7 | function(fetch.response) { 8 | return () 9 | }, 10 | cache.file.prefix=file.path(epidata.cache.dir,"covid19ilinet_natreg_spreadsheet_template"), 11 | cache.invalidation.period=as.difftime(8L, units="hours"), 12 | force.cache.invalidation=FALSE 13 | ) %>>% 14 | tibble::as_tibble() 15 | 16 | covid19ilinet.state.spreadsheet.template = 17 | fetchUpdatingResource( 18 | function() { 19 | read.csv(textConnection(RCurl::getURL("https://raw.githubusercontent.com/cdcepi/COVID-19-ILI-forecasting/master/templates-and-data/covid19-ili-forecast-state-template.csv")), check.names=FALSE, stringsAsFactors=FALSE) 20 | }, 21 | function(fetch.response) { 22 | return () 23 | }, 24 | cache.file.prefix=file.path(epidata.cache.dir,"covid19ilinet_state_spreadsheet_template"), 25 | cache.invalidation.period=as.difftime(8L, units="hours"), 26 | force.cache.invalidation=FALSE 27 | ) %>>% 28 | tibble::as_tibble() 29 | -------------------------------------------------------------------------------- /sample/gen-prospective-component-forecasts.R: -------------------------------------------------------------------------------- 1 | 2 | gc() 3 | s.prospective.seasons = current.issue.sw[["season"]] %>>% 4 | stats::setNames(paste0(.,"/",.+1L)) %>>% 5 | with_dimnamesnames("Season") 6 | w.prospective.model.weeks = current.issue.sw[["model.week"]] %>>% 7 | stats::setNames(paste0("MW",.)) %>>% 8 | with_dimnamesnames("Model Week") 9 | 10 | print("Current season: select available data") 11 | swg.prospective.voxel.data = map_join( 12 | get_voxel_data, 13 | s.prospective.seasons, w.prospective.model.weeks, g.epigroups, 14 | last.losocv.issue) 15 | 16 | ## Version of the above grouped by season and model week (an array of arrays of objects): 17 | sw.g.prospective.voxel.data = map_join( 18 | function(swg.array, s,w) { 19 | swg.array[s,w,,drop=FALSE] %>>% 20 | select_dims(1:2, "drop") # drop s & w dimensions, but keep g dimension even if size 1 21 | }, 22 | no_join(swg.prospective.voxel.data), 23 | named_arrayvec_to_name_arrayvec(s.prospective.seasons), 24 | named_arrayvec_to_name_arrayvec(w.prospective.model.weeks), 25 | lapply_variant=lapply, show.progress=FALSE 26 | ) 27 | 28 | print("Current season: generate backcasts") 29 | swgb.prospective.full.dats = map_join( 30 | get_backcast, 31 | swg.prospective.voxel.data, sw.g.prospective.voxel.data, source.name, signal.name, b.backcasters, 32 | epidata_df_to_chopped_trajectory_df=epidata_df_to_chopped_trajectory_df 33 | , shuffle=FALSE 34 | , cache.prefix=file.path(epiproject.cache.dir,"swgb.prospective.full.dats") 35 | ) 36 | 37 | print("Current season: generate component forecasts") 38 | swgbf.prospective.component.target.multicasts = map_join( 39 | target_multicast, 40 | swg.prospective.voxel.data, swgb.prospective.full.dats, f.forecasters, 41 | target_trajectory_preprocessor, 42 | no_join(t.target.specs), 43 | no_join(m.forecast.types), 44 | full_dat_fixup=full_dat_fixup 45 | , cache.prefix=file.path(epiproject.cache.dir,"swgbf.prospective.component.target.multicasts") 46 | ) 47 | 48 | swgtmbf.prospective.component.forecast.values = 49 | swgbf.prospective.component.target.multicasts %>>% 50 | ## first, get forecast.value's in swgbf.tm format: 51 | map_join(f=`[[`, "forecast.values") %>>% 52 | ## un-nest lists to get swgbftm format: 53 | map_join(f=`[[`, 54 | named_arrayvec_to_name_arrayvec(t.target.specs), 55 | named_arrayvec_to_name_arrayvec(m.forecast.types) 56 | ) %>>% 57 | ## permute dimension order to get desired swgtmbf format: 58 | aperm(c(1:3,6:7,4:5)) 59 | -------------------------------------------------------------------------------- /sample/gen-prospective-ensemble-forecasts.R: -------------------------------------------------------------------------------- 1 | 2 | ## Tack on additional indexer_list's for prospective forecasting: 3 | e.prospective.ensemble.weighting.scheme.swgtmbf.indexer.lists = 4 | map_join(function(wgt.indexer.list) { 5 | c(list(all=NULL), # use all past seasons 6 | wgt.indexer.list, # ensemble choice for weeks, epigroups, targets 7 | list(each=NULL), # separately for each forecast type 8 | list(all=NULL, all=NULL) # always group together all backcasters, forecasters 9 | ) 10 | }, e.ensemble.partial.weighting.scheme.wgt.indexer.lists) 11 | 12 | print("Current season: fit ensemble weightsets") 13 | e.prospective.ensemble.weightsets = map_join( 14 | function(weighting.scheme.indexer.list) { 15 | get_ensemble_weightset(swgtmbf.retro.component.forecast.values, 16 | swgtm.retro.observed.values, 17 | m.forecast.types, 18 | weighting.scheme.indexer.list) 19 | }, 20 | e.prospective.ensemble.weighting.scheme.swgtmbf.indexer.lists 21 | , cache.prefix=file.path(epiproject.cache.dir,"e.prospective.ensemble.weightsets") 22 | ) 23 | 24 | ## e.prospective.ensemble.weightsets = map_join( 25 | ## function(weighting.scheme.indexer.list) { 26 | ## stop ("Result was not computed yet.") 27 | ## }, 28 | ## e.prospective.ensemble.weighting.scheme.swgtmbf.indexer.lists 29 | ## , cache.prefix=file.path(epiproject.cache.dir,"e.prospective.ensemble.weightsets") 30 | ## ) 31 | 32 | print("Current season: generate ensemble forecasts") 33 | swgtme.prospective.ensemble.forecast.values = lapply( 34 | e.prospective.ensemble.weightsets, 35 | function(weightset) { 36 | map_join( 37 | ## `*`, # bad if only non-NA's are 0-weighted 38 | function(forecast.value, weight) { 39 | if (weight == 0) { 40 | weight <- NA 41 | } 42 | weight * forecast.value 43 | }, 44 | swgtmbf.prospective.component.forecast.values, weightset, 45 | eltname.mismatch.behavior="intersect" 46 | ) %>>% apply(1:5, Reduce, f=function(x,y) { 47 | dplyr::coalesce(x+y, x, y) 48 | }) 49 | }) %>>% 50 | simplify2arrayp() %>>% 51 | {names(dimnames(.))[[6L]] <- dimnamesnamesp(e.prospective.ensemble.weightsets); .} 52 | ## todo make this work for subsets and other indexers --- get index sets from 53 | ## indexers 54 | 55 | ## Calculate CV ensemble forecasts as target.multicasts 56 | swge.prospective.ensemble.target.multicasts = 57 | apply(swgtme.prospective.ensemble.forecast.values, c(1:3,6L), 58 | function(tm.forecast.values) { 59 | list(forecast.values=tm.forecast.values) 60 | }) 61 | -------------------------------------------------------------------------------- /sample/gen-retro-component-forecasts.R: -------------------------------------------------------------------------------- 1 | 2 | ## CV input data 3 | print("CV: select available input data") 4 | gc() 5 | swg.retro.voxel.data = 6 | tryCatch({ 7 | map_join( 8 | get_voxel_data, 9 | s.retro.seasons, w.retro.model.weeks, g.epigroups, 10 | last.losocv.issue, 11 | cache.prefix=file.path(epiproject.cache.dir,"swg.retro.voxel.data"), 12 | use.proxy=TRUE 13 | ) 14 | }, 15 | ## issues with parallel package returning long vector results from large runs... 16 | error=function(e) { 17 | print ("Encountered error preparing voxel data in parallel. Attempting to read cache files sequentially with no progress bar --- this make take a while.") 18 | map_join( 19 | get_voxel_data, 20 | s.retro.seasons, w.retro.model.weeks, g.epigroups, 21 | last.losocv.issue, 22 | lapply_variant=lapply, 23 | cache.prefix=file.path(epiproject.cache.dir,"swg.retro.voxel.data"), 24 | use.proxy=TRUE 25 | ) 26 | }) 27 | 28 | ## Version of the above grouped by season and model week (an array of arrays of objects): 29 | sw.g.retro.voxel.data = map_join( 30 | function(swg.array, s,w) { 31 | swg.array[s,w,,drop=FALSE] %>>% 32 | select_dims(1:2, "drop") # drop s & w dimensions, but keep g dimension even if size 1 33 | }, 34 | no_join(swg.retro.voxel.data), 35 | named_arrayvec_to_name_arrayvec(s.retro.seasons), 36 | named_arrayvec_to_name_arrayvec(w.retro.model.weeks), 37 | lapply_variant=lapply, show.progress=FALSE 38 | ) 39 | 40 | ## CV backcasts 41 | print("CV: generate backcasts") 42 | swgb.retro.full.dats = map_join( 43 | get_backcast, 44 | swg.retro.voxel.data, sw.g.retro.voxel.data, source.name, signal.name, b.backcasters, 45 | epidata_df_to_chopped_trajectory_df=epidata_df_to_chopped_trajectory_df, 46 | use.proxy=TRUE, 47 | cache.prefix=file.path(epiproject.cache.dir,"swgb.retro.full.dats") 48 | ) 49 | 50 | ## CV forecasts as target_multicast objects: 51 | print("CV: generate component forecasts") 52 | gc() 53 | swgbf.retro.component.target.multicasts = map_join( 54 | target_multicast, 55 | swg.retro.voxel.data, swgb.retro.full.dats, f.forecasters, 56 | target_trajectory_preprocessor, 57 | no_join(t.target.specs), 58 | no_join(m.forecast.types), 59 | full_dat_fixup=full_dat_fixup, 60 | ## lapply_variant = lapply, 61 | cache.prefix=file.path(epiproject.cache.dir,"swgbf.retro.component.target.multicasts"), 62 | shuffle=FALSE, 63 | use.proxy=TRUE 64 | ) 65 | ## xxx loading from many cache files is slow; reduce # of cache files? 66 | 67 | ## CV forecasts as forecast.value's: 68 | gc() 69 | swgtmbf.retro.component.forecast.values = 70 | map_join(swgbf.retro.component.target.multicasts, 71 | f=`[[`, "forecast.values", 72 | shuffle=FALSE#, lapply_variant=lapply 73 | ) %>>% 74 | simplify2arrayp() %>>% 75 | { 76 | original = . 77 | dim(.) <- c(dim(original)[1:2], dim(swgbf.retro.component.target.multicasts)) 78 | dimnames(.) <- c(dimnames(original)[1:2], dimnames(swgbf.retro.component.target.multicasts)) 79 | rm(original) # somehow this reaches the global environment and takes up memory 80 | gc() 81 | . 82 | } %>>% 83 | aperm(c(3:5,1:2,6:7)) 84 | 85 | print('Analysis: calculate CV component evaluations') 86 | 87 | gc() 88 | sg.retro.observed.trajectories = map_join( 89 | get_observed_trajectory, 90 | s.retro.seasons, g.epigroups, 91 | cache.prefix=file.path(epiproject.cache.dir,"sg.retro.observed.trajectories") 92 | ) 93 | 94 | gc() 95 | swgt.retro.observed.multivals = map_join( 96 | observed_multival2, 97 | swg.retro.voxel.data, 98 | target_trajectory_preprocessor, 99 | t.target.specs, 100 | sg.retro.observed.trajectories, 101 | shuffle=FALSE, 102 | cache.prefix=file.path(epiproject.cache.dir,"swgt.retro.observed.multivals") 103 | ) 104 | 105 | gc() 106 | swgtm.retro.observed.values = map_join( 107 | observed_value2, 108 | swg.retro.voxel.data, t.target.specs, m.forecast.types, 109 | swgt.retro.observed.multivals, 110 | shuffle=FALSE, lapply_variant=lapply, 111 | cache.prefix=file.path(epiproject.cache.dir,"swgtm.retro.observed.values") 112 | ) 113 | 114 | old.mc.cores = getOption("mc.cores") 115 | options("mc.cores"=min(2L,old.mc.cores)) 116 | gc() 117 | swgtmbf.retro.component.evaluations = map_join( 118 | get_evaluation, 119 | swgtmbf.retro.component.forecast.values, swgtm.retro.observed.values, m.forecast.types 120 | ) 121 | mode(swgtmbf.retro.component.evaluations) <- "numeric" 122 | saveRDS(swgtmbf.retro.component.evaluations, file.path(epiproject.cache.dir,"swgtmbf.retro.component.evaluations.rds")) 123 | options("mc.cores"=old.mc.cores) 124 | ## fixme sometimes this results in errors due to NULL's appearing in the 125 | ## evaluations, but re-running the evaluations seems to work... memory issues? gc beforehand? 126 | 127 | 128 | ## swgt.retro.observed.multibin.values = map_join( 129 | ## observed_value2, 130 | ## swg.retro.voxel.data, t.target.specs, no_join(multibin.logscore.forecast.type), 131 | ## swgt.retro.observed.multivals 132 | ## ) 133 | 134 | ## swgtmbf.retro.component.multibin.scores = map_join( 135 | ## get_evaluation, 136 | ## swgtmbf.retro.component.forecast.values[,,,,"Bin",,,drop=FALSE], 137 | ## swgt.retro.observed.multibin.values, 138 | ## no_join(multibin.logscore.forecast.type) 139 | ## ) 140 | ## mode(swgtmbf.retro.component.multibin.scores) <- "numeric" 141 | 142 | ## saveRDS(swgtmbf.retro.component.multibin.scores, file.path(epiproject.cache.dir,"swgtmbf.retro.component.multibin.scores.rds")) 143 | 144 | 145 | 146 | 147 | 148 | apply(swgtmbf.retro.component.multibin.scores, c(7L,6L), mean, na.rm=TRUE) 149 | apply(swgtmbf.retro.component.evaluations[,,,,"Bin",,,drop=FALSE], c(7L,6L), mean, na.rm=TRUE) 150 | apply(swgtmbf.retro.component.evaluations[,,,,"Bin",,,drop=FALSE]%>>%pmax(-10), c(7L,6L), mean, na.rm=TRUE) 151 | apply(swgtmbf.retro.component.evaluations[,,,,"Bin",,,drop=FALSE]%>>%pmax(-10), c(6L,7L), mean, na.rm=TRUE) 152 | apply(swgtmbf.retro.component.evaluations[names(s.retro.seasons)[s.retro.seasons>=2010L],,,,"Bin",,,drop=FALSE]%>>%pmax(-10), c(7L,6L), mean, na.rm=TRUE) 153 | apply(swgtmbf.retro.component.evaluations, c(7L,5L), mean, na.rm=TRUE) 154 | -------------------------------------------------------------------------------- /sample/gen-retro-ensemble-forecasts.R: -------------------------------------------------------------------------------- 1 | 2 | ## Observations we use for CV evaluation: 3 | print("CV: find observed trajectories, target multivals, target values") 4 | 5 | ## Tack on additional indexer_list's for CV: 6 | e.retro.ensemble.weighting.scheme.swgtmbf.indexer.lists = 7 | map_join(function(wgt.indexer.list) { 8 | c(retro.season.indexer, # retro study on seasons 9 | wgt.indexer.list, # ensemble choice for weeks, epigroups, targets 10 | list(each=NULL), # separately for each forecast type 11 | list(all=NULL, all=NULL) # always group together all backcasters, forecasters 12 | ) 13 | }, e.ensemble.partial.weighting.scheme.wgt.indexer.lists, 14 | lapply_variant=lapply, shuffle=FALSE, show.progress=FALSE 15 | ) 16 | 17 | ## Calculate CV ensemble weights: 18 | old.mc.cores = getOption("mc.cores") 19 | options("mc.cores"=min(2L,old.mc.cores)) 20 | print("CV: fit weightsets") 21 | e.retro.ensemble.weightsets = map_join( 22 | function(weighting.scheme.indexer.list) { 23 | get_ensemble_weightset(swgtmbf.retro.component.forecast.values, 24 | swgtm.retro.observed.values, 25 | m.forecast.types, 26 | weighting.scheme.indexer.list) 27 | }, 28 | e.retro.ensemble.weighting.scheme.swgtmbf.indexer.lists, 29 | lapply_variant=lapply, 30 | cache.prefix=file.path(epiproject.cache.dir,"e.retro.ensemble.weightsets") 31 | ) 32 | options("mc.cores"=old.mc.cores) 33 | 34 | ## Calculate CV ensemble forecasts as forecast.value's 35 | print("CV: generate ensemble forecasts") 36 | gc() 37 | swgtme.retro.ensemble.forecast.values.file = file.path(file.path(epiproject.cache.dir,"swgtme.retro.ensemble.forecast.values.rds")) 38 | swgtme.retro.ensemble.forecast.values = 39 | if (file.exists(swgtme.retro.ensemble.forecast.values.file)) { 40 | readRDS(swgtme.retro.ensemble.forecast.values.file) 41 | } else { 42 | lapply( 43 | e.retro.ensemble.weightsets, 44 | function(weightset) { 45 | map_join( 46 | ## `*`, # bad if only non-NA's are 0-weighted 47 | function(forecast.value, weight) { 48 | if (weight == 0) { 49 | weight <- NA 50 | } 51 | weight * forecast.value 52 | }, 53 | swgtmbf.retro.component.forecast.values, weightset, 54 | lapply_variant=lapply, show.progress=FALSE 55 | ) %>>% apply(1:5, Reduce, f=function(x,y) { 56 | dplyr::coalesce(x+y, x, y) 57 | }) 58 | }) %>>% 59 | simplify2arrayp() %>>% 60 | {names(dimnames(.))[[6L]] <- dimnamesnamesp(e.retro.ensemble.weightsets); .} 61 | } 62 | if (!file.exists(swgtme.retro.ensemble.forecast.values.file)) { 63 | saveRDS(swgtme.retro.ensemble.forecast.values, swgtme.retro.ensemble.forecast.values.file) 64 | } 65 | ## todo in weightset application, try removing NA values rather than 0 weights? is there a way to use row/col weighted means to do the above? 66 | ## todo speed up weightset fitting&application with Rcpp? 67 | ## todo use mclapply in map_join above instead of mclapply over ensembles... 68 | ## but broke before; need to avoid memory duplication issues 69 | 70 | ## Calculate CV ensemble forecasts as target.multicasts 71 | gc() 72 | swge.retro.ensemble.target.multicasts.file = file.path(epiproject.cache.dir,"swge.retro.ensemble.target.multicasts.rds") 73 | swge.retro.ensemble.target.multicasts = 74 | if (file.exists(swge.retro.ensemble.target.multicasts.file)) { 75 | readRDS(swge.retro.ensemble.target.multicasts.file) 76 | } else { 77 | apply(swgtme.retro.ensemble.forecast.values, c(1:3,6L), 78 | function(tm.forecast.values) { 79 | list(forecast.values=tm.forecast.values) 80 | }) 81 | } 82 | if (!file.exists(swge.retro.ensemble.target.multicasts.file)) { 83 | saveRDS(swge.retro.ensemble.target.multicasts, swge.retro.ensemble.target.multicasts.file) 84 | } 85 | 86 | print('Analysis: calculate CV ensemble evaluations') 87 | 88 | ## swgtme.retro.ensemble.multibin.scores = map_join( 89 | ## get_evaluation, 90 | ## swgtme.retro.ensemble.forecast.values[,,,,"Bin",,drop=FALSE], 91 | ## swgt.retro.observed.multibin.values, 92 | ## no_join(multibin.logscore.forecast.type) 93 | ## ) 94 | ## mode(swgtme.retro.ensemble.multibin.scores) <- "numeric" 95 | ## saveRDS(swgtme.retro.ensemble.multibin.scores, file.path(epiproject.cache.dir,"swgtme.retro.ensemble.multibin.scores.rds")) 96 | 97 | swgtme.retro.ensemble.evaluations = map_join( 98 | get_evaluation, 99 | swgtme.retro.ensemble.forecast.values, swgtm.retro.observed.values, m.forecast.types 100 | ) 101 | mode(swgtme.retro.ensemble.evaluations) <- "numeric" 102 | saveRDS(swgtme.retro.ensemble.evaluations, file.path(epiproject.cache.dir,"swgtme.retro.ensemble.evaluations.rds")) 103 | 104 | 105 | 106 | 107 | 108 | ## apply(swgtmbf.retro.component.multibin.scores, c(7L,6L), mean, na.rm=TRUE) 109 | ## apply(swgtme.retro.ensemble.multibin.scores, c(6L), mean, na.rm=TRUE) 110 | apply(swgtmbf.retro.component.evaluations[,,,,"Bin",,,drop=FALSE], c(7L,6L), mean, na.rm=TRUE) 111 | apply(swgtmbf.retro.component.evaluations[,,,,"Bin",,,drop=FALSE]%>>%pmax(-10), c(7L,6L), mean, na.rm=TRUE) 112 | apply(swgtmbf.retro.component.evaluations[,,,,"Bin",,,drop=FALSE]%>>%pmax(-10), c(6L,7L), mean, na.rm=TRUE) 113 | apply(swgtmbf.retro.component.evaluations[names(s.retro.seasons)[s.retro.seasons>=2010L],,,,"Bin",,,drop=FALSE]%>>%pmax(-10), c(7L,6L), mean, na.rm=TRUE) 114 | apply(swgtmbf.retro.component.evaluations, c(7L,5L), mean, na.rm=TRUE) 115 | apply(swgtme.retro.ensemble.evaluations, 6:5, mean, na.rm=TRUE) 116 | -------------------------------------------------------------------------------- /sample/generate-retro-and-prospective-forecasts.R: -------------------------------------------------------------------------------- 1 | 2 | source("gen-retro-component-forecasts.R") 3 | source("gen-retro-ensemble-forecasts.R") 4 | 5 | 6 | 7 | ## apply(swgtmbf.retro.component.evaluations[(2003:2009)%>>%paste0("/",.+1L),,,,,,,drop=FALSE], 8 | ## c(5L,7L), mean, na.rm=TRUE) 9 | ## apply(swgtme.retro.ensemble.evaluations[(2003:2009)%>>%paste0("/",.+1L),,,,,,drop=FALSE], 10 | ## 5:6, mean, na.rm=TRUE) 11 | 12 | ## apply(swgtmbf.retro.component.evaluations, c(7L,4:5), mean, na.rm=TRUE) 13 | ## apply(swgtme.retro.ensemble.evaluations, c(6L,4:5), mean, na.rm=TRUE) 14 | 15 | ## apply(swgtme.retro.ensemble.evaluations, c(1,4:5), mean, na.rm=TRUE) 16 | 17 | ## todo another level of stacking? ensemble of ensembles? 18 | 19 | ## swbf.retro.forecast.spreadsheets = map_join( 20 | ## target_multicast_epigroup_forecast_table, 21 | ## swgbf.retro.component.target.multicasts, 22 | ## swg.retro.voxel.data, 23 | ## no_join(t.target.specs), no_join(m.forecast.types), 24 | ## epigroup.colname 25 | ## ) %>>% 26 | ## apply(c(1:2,4:5), dplyr::bind_rows) 27 | 28 | ## spreadsheet.template = readr::read_csv("~/long_flu_submission_template_1718.csv", col_types=readr::cols()) 29 | ## attr(spreadsheet.template, "spec") <- NULL 30 | ## spreadsheet.to.check = swbf.retro.forecast.spreadsheets[["2010/2011",1L,1L,1L]] 31 | 32 | ## spreadsheet.to.check %>>% dplyr::mutate(A="a") %>>% dplyr::full_join(qwer %>>% dplyr::mutate(B="b"), by=c("Location","Target","Type","Unit","Bin_start_incl","Bin_end_notincl")) %>>% dplyr::filter(is.na(A) | is.na(B)) 33 | ## class(spreadsheet.to.check[["Value"]])==class(spreadsheet.template[["Value"]]) 34 | ## identical(sapply(spreadsheet.to.check, class), sapply(spreadsheet.template, class)) 35 | ## identical(sapply(spreadsheet.to.check, typeof), sapply(spreadsheet.template, typeof)) 36 | ## identical(sapply(spreadsheet.to.check, mode), sapply(spreadsheet.template, mode)) 37 | ## identical(sapply(spreadsheet.to.check, storage.mode), sapply(spreadsheet.template, storage.mode)) 38 | ## Map(all.equal, 39 | ## spreadsheet.to.check %>>% dplyr::select(-Value), 40 | ## spreadsheet.template %>>% dplyr::select(-Value)) 41 | ## Map(identical, 42 | ## spreadsheet.to.check %>>% dplyr::select(-Value), 43 | ## spreadsheet.template %>>% dplyr::select(-Value)) 44 | ## identical(class(spreadsheet.to.check), class(spreadsheet.template)) 45 | ## identical(typeof(spreadsheet.to.check), typeof(spreadsheet.template)) 46 | ## identical(mode(spreadsheet.to.check), mode(spreadsheet.template)) 47 | ## identical(storage.mode(spreadsheet.to.check), storage.mode(spreadsheet.template)) 48 | ## ## all.equal overload does not check attributes; if "spec" attribute set by 49 | ## ## readr is included, the all.equal check passes but the identical check fails; 50 | ## ## removing this attribute makes the identical check pass 51 | ## all.equal(spreadsheet.to.check %>>% dplyr::select(-Value), 52 | ## spreadsheet.template %>>% dplyr::select(-Value)) 53 | ## identical(spreadsheet.to.check %>>% dplyr::select(-Value), 54 | ## spreadsheet.template %>>% dplyr::select(-Value)) 55 | 56 | source("gen-prospective-component-forecasts.R") 57 | source("gen-prospective-ensemble-forecasts.R") 58 | 59 | ## todo automatic appropriate parallel_dim_i setting 60 | ## fixme finish documenting map_join 61 | ## todo use simplified sim's to strip out unnecessary elements 62 | ## todo sw.epidata --- if will be borrowing across epigroups... 63 | ## todo add prospective forecasts... 64 | ## todo allow for subsets in ensemble... need to use cv_apply to get the forecasts again 65 | ## xxx target.settings -> voxel.settings? 66 | ## todo also need to strip out information from target forecasts if remains after using simplified sim's 67 | ## todo dots to lists in target spec functions, maybe elsewhere as well 68 | ## todo could also have save season-evaluation epidata to hopefully speed up calculating observed values 69 | ## todo reshape so that can include labels without exploding in size 70 | ## todo don't store sim's 71 | ## reverse order on the indexing? 72 | ## fixme no uniform fallback for Point; maybe make fallback the empirical distribution since it is smoothed now and has uniform pseudoweight? or just remove since everything has uniform pseudoweight? 73 | ## fixme no Bin smoothing by default? warn about plotting target forecasts? 74 | ## todo joint epidata? voxel settings? 75 | ## joint backcast? joint forecast? 76 | ## fixme Virgin Islands --- little history 77 | ## todo document no_join 78 | ## fixme check on Season onset Point predictions of "none" vs. NA vs. ... 79 | ## fixme EB weight sum is too large 80 | ## fixme smooth sim targets with a Laplace kernel? or a spike + slab type --- can inflate bw to make up for mass on spike? 81 | ## fixme should adjust dimension ordering... for col major feel 82 | ## todo deal with parallel-related memory issues --- duplicate swg.voxel.data... 83 | ## todo work on speed getting voxel data --- possible to avoid storage? 84 | ## xxx consider just basing everything on filesystem contracts... no need to hold everything in memory 85 | ## fixme try to solve memory issues with mclapply env's? require interaction with disk? 86 | ## todo weighted cv_apply smearing schemes (boxcar kernel -> other kernels) 87 | ## todo tiny subset run to do some testing and development on 88 | ## fixme table verification 89 | ## fixme empirical distribution is using target's baseline with empirical curves again instead of pairing baselines and curves 90 | ## todo better fill-in in 2010 for states 91 | ## todo remove Season onset target for states 92 | ## fixme modularize into functions, document, ... 93 | -------------------------------------------------------------------------------- /sample/high-state-forecasts.R: -------------------------------------------------------------------------------- 1 | ## author_header begin 2 | ## Copyright (C) 2017 Logan C. Brooks, Aaron Rumack 3 | ## 4 | ## This file is part of epiforecast. Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University. 5 | ## 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174. 7 | ## author_header end 8 | ## license_header begin 9 | ## epiforecast is free software: you can redistribute it and/or modify 10 | ## it under the terms of the GNU General Public License as published by 11 | ## the Free Software Foundation, version 2 of the License. 12 | ## 13 | ## epiforecast is distributed in the hope that it will be useful, 14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ## GNU General Public License for more details. 17 | ## 18 | ## You should have received a copy of the GNU General Public License 19 | ## along with epiforecast. If not, see . 20 | ## license_header end 21 | 22 | source("high-state-config.R") 23 | 24 | source("generate-retro-and-prospective-forecasts.R") 25 | 26 | save_spreadsheets( 27 | swge.prospective.ensemble.target.multicasts[,,,"target-9time-based",drop=FALSE], 28 | swg.prospective.voxel.data, 29 | t.target.specs, m.forecast.types, 30 | epigroup.colname, 31 | file.path(epiproject.cache.dir,"stat-spreadsheets") 32 | ) 33 | 34 | save_spreadsheets( 35 | swgbf.prospective.component.target.multicasts, 36 | swg.prospective.voxel.data, 37 | t.target.specs, m.forecast.types, 38 | epigroup.colname, 39 | file.path(epiproject.cache.dir,"spreadsheets") 40 | ) 41 | 42 | save_linlog_plots( 43 | target_multicast_week_plot, 44 | swgbf.prospective.component.target.multicasts, 45 | swg.prospective.voxel.data, 46 | t.target.specs, m.forecast.types, 47 | file.path(epiproject.cache.dir,"linlog.plots-week") 48 | ) 49 | 50 | save_linlog_plots( 51 | target_multicast_percent_plot, 52 | swgbf.prospective.component.target.multicasts, 53 | swg.prospective.voxel.data, 54 | t.target.specs, m.forecast.types, 55 | file.path(epiproject.cache.dir,"linlog.plots-percent") 56 | ) 57 | 58 | save_spreadsheets( 59 | swge.prospective.ensemble.target.multicasts, 60 | swg.prospective.voxel.data, 61 | t.target.specs, m.forecast.types, 62 | epigroup.colname, 63 | file.path(epiproject.cache.dir,"spreadsheets") 64 | ) 65 | 66 | save_linlog_plots( 67 | target_multicast_week_plot, 68 | swge.prospective.ensemble.target.multicasts, 69 | swg.prospective.voxel.data, 70 | t.target.specs, m.forecast.types, 71 | file.path(epiproject.cache.dir,"linlog.plots-week") 72 | ) 73 | 74 | save_linlog_plots( 75 | target_multicast_percent_plot, 76 | swge.prospective.ensemble.target.multicasts, 77 | swg.prospective.voxel.data, 78 | t.target.specs, m.forecast.types, 79 | file.path(epiproject.cache.dir,"linlog.plots-percent") 80 | ) 81 | 82 | save_linlog_plots( 83 | target_multicast_week_plot, 84 | swge.prospective.ensemble.target.multicasts[,,,"target-9time-based",drop=FALSE], 85 | swg.prospective.voxel.data, 86 | t.target.specs, m.forecast.types, 87 | file.path(epiproject.cache.dir,"stat-linlog.plots-week") 88 | ) 89 | 90 | save_linlog_plots( 91 | target_multicast_percent_plot, 92 | swge.prospective.ensemble.target.multicasts[,,,"target-9time-based",drop=FALSE], 93 | swg.prospective.voxel.data, 94 | t.target.specs, m.forecast.types, 95 | file.path(epiproject.cache.dir,"stat-linlog.plots-percent") 96 | ) 97 | 98 | save_weighting_linlog_plots( 99 | e.prospective.ensemble.weightsets[["target-9time-based"]], 100 | swgbf.prospective.component.target.multicasts, 101 | swg.prospective.voxel.data, 102 | t.target.specs, m.forecast.types, 103 | file.path(epiproject.cache.dir,"stat-weighting-plots") 104 | ) 105 | -------------------------------------------------------------------------------- /sample/low-state-forecasts.R: -------------------------------------------------------------------------------- 1 | ## author_header begin 2 | ## Copyright (C) 2017 Logan C. Brooks, Aaron Rumack 3 | ## 4 | ## This file is part of epiforecast. Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University. 5 | ## 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174. 7 | ## author_header end 8 | ## license_header begin 9 | ## epiforecast is free software: you can redistribute it and/or modify 10 | ## it under the terms of the GNU General Public License as published by 11 | ## the Free Software Foundation, version 2 of the License. 12 | ## 13 | ## epiforecast is distributed in the hope that it will be useful, 14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ## GNU General Public License for more details. 17 | ## 18 | ## You should have received a copy of the GNU General Public License 19 | ## along with epiforecast. If not, see . 20 | ## license_header end 21 | 22 | source("low-state-config.R") 23 | 24 | source("generate-retro-and-prospective-forecasts.R") 25 | 26 | save_spreadsheets( 27 | swge.prospective.ensemble.target.multicasts[,,,"target-9time-based",drop=FALSE], 28 | swg.prospective.voxel.data, 29 | t.target.specs, m.forecast.types, 30 | epigroup.colname, 31 | file.path(epiproject.cache.dir,"stat-spreadsheets") 32 | ) 33 | 34 | save_spreadsheets( 35 | swgbf.prospective.component.target.multicasts, 36 | swg.prospective.voxel.data, 37 | t.target.specs, m.forecast.types, 38 | epigroup.colname, 39 | file.path(epiproject.cache.dir,"spreadsheets") 40 | ) 41 | 42 | save_linlog_plots( 43 | target_multicast_week_plot, 44 | swgbf.prospective.component.target.multicasts, 45 | swg.prospective.voxel.data, 46 | t.target.specs, m.forecast.types, 47 | file.path(epiproject.cache.dir,"linlog.plots-week") 48 | ) 49 | 50 | save_linlog_plots( 51 | target_multicast_percent_plot, 52 | swgbf.prospective.component.target.multicasts, 53 | swg.prospective.voxel.data, 54 | t.target.specs, m.forecast.types, 55 | file.path(epiproject.cache.dir,"linlog.plots-percent") 56 | ) 57 | 58 | save_spreadsheets( 59 | swge.prospective.ensemble.target.multicasts, 60 | swg.prospective.voxel.data, 61 | t.target.specs, m.forecast.types, 62 | epigroup.colname, 63 | file.path(epiproject.cache.dir,"spreadsheets") 64 | ) 65 | 66 | save_linlog_plots( 67 | target_multicast_week_plot, 68 | swge.prospective.ensemble.target.multicasts, 69 | swg.prospective.voxel.data, 70 | t.target.specs, m.forecast.types, 71 | file.path(epiproject.cache.dir,"linlog.plots-week") 72 | ) 73 | 74 | save_linlog_plots( 75 | target_multicast_percent_plot, 76 | swge.prospective.ensemble.target.multicasts, 77 | swg.prospective.voxel.data, 78 | t.target.specs, m.forecast.types, 79 | "../../../epiforecast-epiproject/flusight-low-state-run/linlog.plots-percent" 80 | ) 81 | 82 | save_linlog_plots( 83 | target_multicast_week_plot, 84 | swge.prospective.ensemble.target.multicasts[,,,"target-9time-based",drop=FALSE], 85 | swg.prospective.voxel.data, 86 | t.target.specs, m.forecast.types, 87 | "../../../epiforecast-epiproject/flusight-low-state-run/stat-linlog.plots-week" 88 | ) 89 | 90 | save_linlog_plots( 91 | target_multicast_percent_plot, 92 | swge.prospective.ensemble.target.multicasts[,,,"target-9time-based",drop=FALSE], 93 | swg.prospective.voxel.data, 94 | t.target.specs, m.forecast.types, 95 | "../../../epiforecast-epiproject/flusight-low-state-run/stat-linlog.plots-percent" 96 | ) 97 | 98 | save_weighting_linlog_plots( 99 | e.prospective.ensemble.weightsets[["target-9time-based"]], 100 | swgbf.prospective.component.target.multicasts, 101 | swg.prospective.voxel.data, 102 | t.target.specs, m.forecast.types, 103 | "../../../epiforecast-epiproject/flusight-low-state-run/stat-weighting-plots" 104 | ) 105 | -------------------------------------------------------------------------------- /sample/natreg-for-collaborative-ensemble.R: -------------------------------------------------------------------------------- 1 | ## See the can-use-for-2018/2019-natreg-collab-ensemble branch for scripts to generate component forecasts to send in to be incorporated into the FluSight-Network ensemble. 2 | 3 | ## But note that the models on that branch are now outdated; e.g., EB has a critical bug that has been fixed on main but not on that branch yet, and there may be other missing fixes as well. 4 | -------------------------------------------------------------------------------- /sample/natreg-forecasts.R: -------------------------------------------------------------------------------- 1 | ## author_header begin 2 | ## Copyright (C) 2017 Logan C. Brooks 3 | ## 4 | ## This file is part of epiforecast. Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University. 5 | ## 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174. 7 | ## author_header end 8 | ## license_header begin 9 | ## epiforecast is free software: you can redistribute it and/or modify 10 | ## it under the terms of the GNU General Public License as published by 11 | ## the Free Software Foundation, version 2 of the License. 12 | ## 13 | ## epiforecast is distributed in the hope that it will be useful, 14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ## GNU General Public License for more details. 17 | ## 18 | ## You should have received a copy of the GNU General Public License 19 | ## along with epiforecast. If not, see . 20 | ## license_header end 21 | 22 | source("natreg-config.R") 23 | 24 | source("generate-retro-and-prospective-forecasts.R") 25 | 26 | ## Output prospective forecast spreadsheets, plots: 27 | collab.ensemble.retro.dir = "../../../collaborative-ensemble-potential-submission-4" 28 | if (!dir.exists(collab.ensemble.retro.dir)) { 29 | dir.create(collab.ensemble.retro.dir) 30 | } 31 | save_spreadsheets(swgbf.retro.component.target.multicasts[,,,"quantile_arx_backnowcast",,drop=FALSE], 32 | swg.retro.voxel.data, 33 | t.target.specs, m.forecast.types, 34 | epigroup.colname, 35 | collab.ensemble.retro.dir, 36 | function(swg.voxel.data,s,w,...) { 37 | season = swg.voxel.data[[s,w,1L]][["season"]] 38 | year = swg.voxel.data[[s,w,1L]][["issue"]] %/% 100L 39 | week = swg.voxel.data[[s,w,1L]][["issue"]] %% 100L 40 | if (season >= 2010L && !dplyr::between(week,21L,39L)) { 41 | sprintf("%s/EW%02d-%d-%s.csv", ..2, week, year, ..2) 42 | } 43 | }) 44 | save_spreadsheets(swge.retro.ensemble.target.multicasts[,,,"target-9time-based",drop=FALSE], 45 | swg.retro.voxel.data, 46 | t.target.specs, m.forecast.types, 47 | epigroup.colname, 48 | collab.ensemble.retro.dir, 49 | function(swg.voxel.data,s,w,...) { 50 | season = swg.voxel.data[[s,w,1L]][["season"]] 51 | year = swg.voxel.data[[s,w,1L]][["issue"]] %/% 100L 52 | week = swg.voxel.data[[s,w,1L]][["issue"]] %% 100L 53 | if (season >= 2010L && !dplyr::between(week,21L,39L)) { 54 | sprintf("%s/EW%02d-%d-%s.csv", "Delphi_Stat_FewerComponentsNoBackcastNoNowcast", week, year, "Delphi_Stat_FewerComponentsNoBackcastNoNowcast") 55 | } 56 | }) 57 | save_spreadsheets(swge.prospective.ensemble.target.multicasts[,,,"target-9time-based",drop=FALSE], 58 | swg.prospective.voxel.data, 59 | t.target.specs, m.forecast.types, 60 | epigroup.colname, 61 | collab.ensemble.prospective.dir, 62 | function(swg.voxel.data,s,w,...) { 63 | season = swg.voxel.data[[s,w,1L]][["season"]] 64 | year = swg.voxel.data[[s,w,1L]][["issue"]] %/% 100L 65 | week = swg.voxel.data[[s,w,1L]][["issue"]] %% 100L 66 | if (season >= 2010L && !dplyr::between(week,21L,39L)) { 67 | sprintf("%s/EW%02d-%d-%s.csv", "Delphi_Stat_FewerComponentsNoBackcastNoNowcast", week, year, "Delphi_Stat_FewerComponentsNoBackcastNoNowcast") 68 | } 69 | }) 70 | 71 | 72 | save_spreadsheets( 73 | swge.prospective.ensemble.target.multicasts[,,,"target-9time-based",drop=FALSE], 74 | swg.prospective.voxel.data, 75 | t.target.specs, m.forecast.types, 76 | epigroup.colname, 77 | file.path(epiproject.cache.dir,"stat-spreadsheets"), 78 | function(swg.voxel.data,s,w,...) { 79 | season = swg.voxel.data[[s,w,1L]][["season"]] 80 | year = swg.voxel.data[[s,w,1L]][["issue"]] %/% 100L 81 | week = swg.voxel.data[[s,w,1L]][["issue"]] %% 100L 82 | sprintf("EW%02d-%s-%s.csv", week, "Delphi-Stat", Sys.Date()) 83 | } 84 | ) 85 | 86 | save_spreadsheets( 87 | swge.prospective.ensemble.target.multicasts, 88 | swg.prospective.voxel.data, 89 | t.target.specs, m.forecast.types, 90 | epigroup.colname, 91 | file.path(epiproject.cache.dir,"spreadsheets") 92 | ) 93 | 94 | save_linlog_plots( 95 | target_multicast_week_plot, 96 | swgbf.prospective.component.target.multicasts, 97 | swg.prospective.voxel.data, 98 | t.target.specs, m.forecast.types, 99 | file.path(epiproject.cache.dir,"linlog.plots") 100 | ) 101 | 102 | save_linlog_plots( 103 | target_multicast_percent_plot, 104 | swgbf.prospective.component.target.multicasts, 105 | swg.prospective.voxel.data, 106 | t.target.specs, m.forecast.types, 107 | file.path(epiproject.cache.dir,"linlog.plots") 108 | ) 109 | 110 | save_linlog_plots( 111 | target_multicast_week_plot, 112 | swge.prospective.ensemble.target.multicasts, 113 | swg.prospective.voxel.data, 114 | t.target.specs, m.forecast.types, 115 | file.path(epiproject.cache.dir,"linlog.plots-week") 116 | ) 117 | 118 | save_linlog_plots( 119 | target_multicast_percent_plot, 120 | swge.prospective.ensemble.target.multicasts, 121 | swg.prospective.voxel.data, 122 | t.target.specs, m.forecast.types, 123 | file.path(epiproject.cache.dir,"linlog.plots-percent") 124 | ) 125 | 126 | save_linlog_plots( 127 | target_multicast_week_plot, 128 | swge.prospective.ensemble.target.multicasts[,,,"target-9time-based",drop=FALSE], 129 | swg.prospective.voxel.data, 130 | t.target.specs, m.forecast.types, 131 | file.path(epiproject.cache.dir,"stat-linlog.plots-week") 132 | ) 133 | 134 | save_linlog_plots( 135 | target_multicast_percent_plot, 136 | swge.prospective.ensemble.target.multicasts[,,,"target-9time-based",drop=FALSE], 137 | swg.prospective.voxel.data, 138 | t.target.specs, m.forecast.types, 139 | file.path(epiproject.cache.dir,"stat-linlog.plots-percent") 140 | ) 141 | 142 | save_weighting_linlog_plots( 143 | e.prospective.ensemble.weightsets[["target-9time-based"]], 144 | swgbf.prospective.component.target.multicasts, 145 | swg.prospective.voxel.data, 146 | t.target.specs, m.forecast.types, 147 | file.path(epiproject.cache.dir,"stat-weighting-plots") 148 | ) 149 | -------------------------------------------------------------------------------- /sample/natreg-with-eb.R: -------------------------------------------------------------------------------- 1 | ## author_header begin 2 | ## Copyright (C) 2017 Logan C. Brooks 3 | ## 4 | ## This file is part of epiforecast. Algorithms included in epiforecast were developed by Logan C. Brooks, David C. Farrow, Sangwon Hyun, Shannon Gallagher, Ryan J. Tibshirani, Roni Rosenfeld, and Rob Tibshirani (Stanford University), members of the Delphi group at Carnegie Mellon University. 5 | ## 6 | ## Research reported in this publication was supported by the National Institute Of General Medical Sciences of the National Institutes of Health under Award Number U54 GM088491. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This material is based upon work supported by the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE-1252522. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation. David C. Farrow was a predoctoral trainee supported by NIH T32 training grant T32 EB009403 as part of the HHMI-NIBIB Interfaces Initiative. Ryan J. Tibshirani was supported by NSF grant DMS-1309174. 7 | ## author_header end 8 | ## license_header begin 9 | ## epiforecast is free software: you can redistribute it and/or modify 10 | ## it under the terms of the GNU General Public License as published by 11 | ## the Free Software Foundation, version 2 of the License. 12 | ## 13 | ## epiforecast is distributed in the hope that it will be useful, 14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ## GNU General Public License for more details. 17 | ## 18 | ## You should have received a copy of the GNU General Public License 19 | ## along with epiforecast. If not, see . 20 | ## license_header end 21 | 22 | source("natreg-with-eb-config.R") 23 | 24 | source("generate-retro-and-prospective-forecasts.R") 25 | 26 | save_spreadsheets( 27 | swge.prospective.ensemble.target.multicasts, 28 | swg.prospective.voxel.data, 29 | t.target.specs, m.forecast.types, 30 | epigroup.colname, 31 | file.path(epiproject.cache.dir,"spreadsheets") 32 | ) 33 | 34 | ## fixme how is EB treating back/now/pancasts fed into it? 35 | 36 | ## fixme todo add backcast targets... 37 | -------------------------------------------------------------------------------- /sample/prepare-covid19-submissions.R: -------------------------------------------------------------------------------- 1 | 2 | library("pipeR") 3 | source("covidplot.R") 4 | 5 | ## year = 2020L 6 | ## epi.week = 10L 7 | ## crowdcast.tgz.dirpath = "~/Dropbox/private/crowdcast_spreadsheets_to_filter" 8 | ## submissions.root.dirpath = "~/Dropbox/private/test_submissions" 9 | ## do.plot = TRUE 10 | ## plots.root.dirpath = "~/Dropbox/private/test_plots" 11 | ## create.destination.dirs = FALSE 12 | 13 | ## --- Provide usage message / Read in command-line arguments: --- 14 | command.args = commandArgs(trailingOnly=TRUE) 15 | if (length(command.args) != 7L) { 16 | print(command.args) 17 | stop (paste(collapse="\n",capture.output({ 18 | cat(fill=TRUE, 19 | ' 20 | Usage: Rscript prepare-covid19-submissions.R ; where: 21 | - year is probably 2020 22 | - epi.week is a two-digit epi-week-of-year number 23 | - crowdcast.tgz.dirpath is the path of the directory containing dfarrow epicast tarballs 24 | - submissions.root.dirpath is the draft submission directory or the submission repository directory 25 | - do.plot is a single Boolean (TRUE/FALSE) indicating whether or not to prepare plots 26 | - plots.root.dirpath is a directory indicating where to place plots if requested (and can be an arbitrary string otherwise) 27 | - create.destination.dirs is a single Boolean (TRUE/FALSE) indicating whether or not to create spreadsheet and plot destination directories --- use FALSE to double-check that pre-existing directories have the right structure. 28 | ') 29 | }))) 30 | } 31 | year = as.integer(command.args[[1L]]) 32 | epi.week = as.integer(command.args[[2L]]) 33 | crowdcast.tgz.dirpath = as.character(command.args[[3L]]) 34 | submissions.root.dirpath = as.character(command.args[[4L]]) 35 | do.plot = as.logical(command.args[[5L]]) 36 | plots.root.dirpath = as.character(command.args[[6L]]) 37 | create.destination.dirs = as.logical(command.args[[7L]]) 38 | 39 | 40 | ## --- Utils: --- 41 | ensure_dirpath = function(dirpath) { 42 | if (length(dirpath) != 1L) { 43 | stop ('Only tested for one dirpath at a time.') 44 | } 45 | if (!dir.exists(dirpath)) { 46 | if (create.destination.dirs) { 47 | dir.create(dirpath) 48 | } else { 49 | stop (sprintf('Destination directory "%s" does not already exist. Maybe one or both of {submissions.root.dirpath, spreadsheets.root.dirpath} have not been used before and needs a single-time set-up by using create.destination.dirs=TRUE, or one or both of these dirpaths were misspecified.')) 50 | } 51 | } 52 | } 53 | 54 | 55 | 56 | ## --- Set up destinations: --- 57 | cmu.delphi.entry.names = c("CMU_Delphi-Crowdcast","CMU_Delphi-Crowdcast_MTurk","CMU_Delphi-Stat_Nowcast") 58 | ensure_dirpath(submissions.root.dirpath) 59 | natreg.submissions.dirpath = file.path(submissions.root.dirpath, "nation-region-forecast-data") 60 | state.submissions.dirpath = file.path(submissions.root.dirpath, "state-forecast-data") 61 | for (geography.dirpath in c(natreg.submissions.dirpath, state.submissions.dirpath)) { 62 | ensure_dirpath(geography.dirpath) 63 | for (cmu.delphi.entry.name in cmu.delphi.entry.names) { 64 | ensure_dirpath(file.path(geography.dirpath, cmu.delphi.entry.name)) 65 | } 66 | } 67 | if (do.plot) { 68 | ensure_dirpath(plots.root.dirpath) 69 | natreg.plots.dirpath = file.path(plots.root.dirpath, "nation-region-forecast-plots") 70 | state.plots.dirpath = file.path(plots.root.dirpath, "state-forecast-plots") 71 | for (geography.dirpath in c(natreg.plots.dirpath, state.plots.dirpath)) { 72 | ensure_dirpath(geography.dirpath) 73 | for (cmu.delphi.entry.name in cmu.delphi.entry.names) { 74 | ensure_dirpath(file.path(geography.dirpath, cmu.delphi.entry.name)) 75 | } 76 | } 77 | } 78 | 79 | 80 | 81 | ## --- Prepare Crowdcast submission: --- 82 | ## Select a tarball: 83 | crowdcast.tgz.pattern = sprintf("^dfarrow_covid_epicast_%d%02d_([0-9]+).tgz$", year, epi.week) 84 | candidate.crowdcast.tgz.filenames = list.files(crowdcast.tgz.dirpath) %>>% 85 | {.[grep(crowdcast.tgz.pattern, .)]} 86 | selected.crowdcast.tgz.filename = 87 | if (length(candidate.crowdcast.tgz.filenames) == 0L) { 88 | stop ('Cannot find tgz for this week.') 89 | } else if (length(candidate.crowdcast.tgz.filenames) == 1L) { 90 | candidate.crowdcast.tgz.filenames[[1L]] 91 | } else { 92 | version.numbers = as.integer(sub(crowdcast.tgz.pattern, "\\1", candidate.crowdcast.tgz.filenames)) 93 | selected.filename = candidate.crowdcast.tgz.filenames[[which.max(version.numbers)]] 94 | warning(sprintf('There appeared to be multiple tarballs for the given week. Selected "%s"', selected.filename)) 95 | selected.filename 96 | } 97 | selected.crowdcast.tgz.filepath = file.path(crowdcast.tgz.dirpath, selected.crowdcast.tgz.filename) 98 | ## Extract tarball to temporary directory: 99 | extraction.dirpath = tempdir() 100 | untar.code = untar(selected.crowdcast.tgz.filepath, c("epicast-regional.csv","epicast-state.csv"), 101 | exdir=extraction.dirpath, 102 | compressed="gzip") 103 | if (untar.code != 0L) { 104 | stop (sprintf('Error extracting tarball; error code: %d',untar.code)) 105 | } 106 | cat('Extracted Crowdcast tarball.', fill=TRUE) 107 | ## Filter targets within the csv's, write to csv's in destinations: 108 | crowdcast.natreg.spreadsheet.filepath = file.path(natreg.submissions.dirpath, "CMU_Delphi-Crowdcast", sprintf("%d-ew%02d-CMU_Delphi-Crowdcast.csv", year, epi.week)) 109 | readr::read_csv(file.path(extraction.dirpath, "epicast-regional.csv"), 110 | col_types="ccccc") %>>% 111 | dplyr::filter(target %in% c("1 wk ahead","2 wk ahead")) %>>% 112 | readr::write_csv(crowdcast.natreg.spreadsheet.filepath) 113 | crowdcast.state.spreadsheet.filepath = file.path(state.submissions.dirpath, "CMU_Delphi-Crowdcast", sprintf("%d-ew%02d-CMU_Delphi-Crowdcast.csv", year, epi.week)) 114 | readr::read_csv(file.path(extraction.dirpath, "epicast-state.csv"), 115 | col_types="ccccc") %>>% 116 | dplyr::filter(target %in% c("1 wk ahead","2 wk ahead")) %>>% 117 | readr::write_csv(crowdcast.state.spreadsheet.filepath) 118 | cat('Wrote out spreadsheets.', fill=TRUE) 119 | ## Prepare plots: 120 | if (do.plot) { 121 | plot.covid.forecast(crowdcast.natreg.spreadsheet.filepath, file.path(natreg.plots.dirpath, "CMU_Delphi-Crowdcast")) 122 | plot.covid.forecast(crowdcast.state.spreadsheet.filepath, file.path(state.plots.dirpath, "CMU_Delphi-Crowdcast")) 123 | } 124 | cat('Wrote out plots.', fill=TRUE) 125 | ## Clean up temporary directory immediately: 126 | file.remove(file.path(extraction.dirpath, "epicast-regional.csv")) 127 | file.remove(file.path(extraction.dirpath, "epicast-state.csv")) 128 | unlink(extraction.dirpath) 129 | cat('Cleaned up temporary directory.', fill=TRUE) 130 | --------------------------------------------------------------------------------