├── .Rbuildignore
├── .gitignore
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── NAMESPACE
├── NEWS.md
├── R
    ├── add_outcomes.R
    ├── add_predictors.R
    ├── combine.R
    ├── utils-pipe.R
    ├── wizard.R
    └── wizard_internal.R
├── README.Rmd
├── README.html
├── README.md
├── data-raw
    └── sample_data.R
├── data
    ├── sample_fixed_data.rda
    └── sample_temporal_data.rda
├── inst
    └── extdata
    │   ├── fixed_data.csv
    │   └── temporal_data.csv
├── man
    ├── hello.Rd
    ├── pipe.Rd
    ├── wiz_add_baseline_predictors.Rd
    ├── wiz_add_growing_predictors.Rd
    ├── wiz_add_predictors.Rd
    ├── wiz_add_predictors_internal.Rd
    ├── wiz_build_temporal_data_dictionary.Rd
    ├── wiz_calc.Rd
    ├── wiz_combine.Rd
    ├── wiz_define_steps.Rd
    ├── wiz_dummy_code.Rd
    └── wiz_frame.Rd
├── vignettes
    ├── .gitignore
    └── character_language_model.Rmd
└── wizard.Rproj


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^data-raw$
4 | ^README\.Rmd$
5 | ^LICENSE\.md$
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | inst/doc
6 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: wizard
 2 | Type: Package
 3 | Title: Windowed Summarization for Autoregressive Data
 4 | Version: 0.2.1
 5 | Author: Karandeep Singh
 6 | Maintainer: Karandeep Singh <kdpsingh@umich.edu>
 7 | Description: This package uses windowed summarization to convert time series data 
 8 |     into a form that can be modeled by prediction models.
 9 | License: MIT + file LICENSE
10 | Encoding: UTF-8
11 | LazyData: true
12 | Imports: 
13 |     data.table (>= 1.12.8),
14 |     magrittr,
15 |     dplyr (>= 0.8.3),
16 |     assertthat,
17 |     tidyr (>= 1.0.0),
18 |     rlang (>= 0.4.2),
19 |     lubridate,
20 |     stringr (>= 1.4.0),
21 |     janitor,
22 |     progress,
23 |     furrr (>= 0.1.0)
24 | Depends: 
25 |     R (>= 2.10)
26 | RoxygenNote: 7.1.1
27 | URL: https://github.com/ML4LHS/wizard
28 | BugReports: https://github.com/ML4LHS/wizard/issues
29 | Suggests: 
30 |     knitr,
31 |     rmarkdown
32 | VignetteBuilder: knitr
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2020
2 | COPYRIGHT HOLDER: Karandeep Singh
3 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (c) 2020 Karandeep Singh
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export("%>%")
 4 | export(wiz_add_baseline_predictors)
 5 | export(wiz_add_growing_predictors)
 6 | export(wiz_add_outcomes)
 7 | export(wiz_add_predictors)
 8 | export(wiz_combine)
 9 | export(wiz_dummy_code)
10 | export(wiz_frame)
11 | importFrom(magrittr,"%>%")
12 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # wizard 0.0.0.9000
 2 | 
 3 | * Added a `NEWS.md` file to track changes to the package.
 4 | 
 5 | # wizard 0.0.0.9001 (2020-10-04)
 6 | 
 7 | * Changed `variable` argument to `variables` inside both `wiz_add_predictors()` and `wiz_add_outcomes()`. This was done to reflect the fact that the `variables` argument can accept a vector of variable names.
 8 | * Added `check_size_only` argument to `wiz_add_predictors()` and `wiz_add_outcomes()`
 9 | * Renamed `wiz_categorical_to_numeric()` to `wiz_dummy_code()` to better reflect what the function does. `wiz_dummy_code()` can now be used to dummy code specific variables, which is useful if you only want to dummy code subset a specific set of variables or if you want to dummy code a categorical variable that is currently coded with numbers (such as 1 meaning high and 2 meaning low).
10 | * Important bug fix to encoding of time to allow for non-time-stamps
11 | * Added `check_size_only` argument to `wiz_add_predictors()` to allow you to anticipate the size of the output object/file without actually running the calcualtions.
12 | 
13 | # wizard 0.0.0.9002 (2020-10-07)
14 | * Added `create_folder` argument to `wiz_frame()` to allow for automatic creation of the `output_folder` if it does not already exist. This is useful for automated creation of directories to override the need for user confirmation prior to directory creation.
15 | * Added several checks when creating a `wiz_frame()` to ensure that there is not missingness or duplication in the data that would be expected to lead to failure of one of the downstream tasks.
16 | * Added `save_wiz_frame` argument to `wiz_frame()` and `wiz_dummy_code()`. If set to `TRUE`, this saves the `wiz_frame` object to the specified `output_folder` with the file_name `wiz_frame.rds`. Note that this will overwrite prior versions of the file.
17 | * Added `log_file` option to `wiz_add_predictors()` and `wiz_add_outcomes()` to create and append to a log file, which is auto-titled `wiz_log.txt` and saved in the `output_folder`.
18 | * Added `wiz_add_baseline_predictors()` function to add baseline predictors with an option to specify an offset (e.g., up to 1 hour prior to admission)
19 | 
20 | # wizard 0.0.0.9003 (2020-10-08)
21 | 
22 | * All windows are now calculated simultaneously for each step, which greatly reduces the number of parallel jobs (and the time and memory taken by the allocating memory step).
23 | * Removed time column from `wiz_add_baseline_predictors()` so that it is treated as a type of "fixed data", and moved its logic into the `wiz_add_predictors()` function.
24 | 
25 | # wizard 0.0.0.9004 (2020-10-09)
26 | 
27 | * Added optional `max_length` argument to `wiz_frame()` that limits the maximum time or sequence length for each id.
28 | * Bug fix to incorrect calculation of outcomes (this error was introduced in 0.0.0.9003 due to substantial refactoring of code)
29 | * Fixed LOCF imputation bug: now, imputation only occurs within windows in the lookback period (and not beyond)
30 | * Fixed missingness leading to length stat of NA. Now, every stat's missingness is computed dynamically (e.g., length becomes 0)
31 | * Fixed implicit missingness issue due to some IDs and times ids not having certain variables left after filtering
32 | * Changed defaults so that `save_wiz_frame` and `log_file` are `TRUE`. This makes it more consistent with `output_file = TRUE` in that saving output and logs to file are the default.
33 | 
34 | # wizard 0.0.0.9005 (2020-10-11)
35 | * Created batch_size option in wiz_frame() to chunk processing into batches. A batch of 1000 means 1000 patients will be processed at once.
36 | * Moved processing code to wizard_internal.R and converted all other functions into wrappers
37 | * Moved implicit missingness and LOCF imputation into the parallel jobs to reduce memory footprint after row-binding parallel jobs
38 | * Bug fix to error introduced in wizard 0.0.0.9004 resulting extra time steps for individuals during "handling implicit missingness" step.
39 | * Note: wiz_combine() does not support batches yet but will in a subsequent version.
40 | 
41 | # wizard 0.0.0.9006 (2020-10-27)
42 | * Added wiz_add_growing_predictors() for building cumulatively growing windows beginning at time zero
43 | * Changed `batch_size` parameter to `chunk_size` in `wiz_frame()`
44 | * Changed names of files to indicate `baseline`, `growing`, or `rolling` to indicate the type of variables contained within it.
45 | * Made some changes to the `character_language_model.Rmd` vignette to generate a large rolling dataset and test the `growing` window functionality. This vignette needs to be cleaned up for educational use.
46 | 
47 | # wizard 0.0.0.9007 (2020-11-09)
48 | * Updated `wiz_combine()` to support chunked files and changed interface so that supplying a vector of files is optional.
49 | * Removed `wiz_add_predictors_streaming` functions
50 | 
51 | # wizard 0.1.0 (2021-02-22)
52 | * Fixed bug with the temporal_data_of_interest being empty due to highly sparse variables
53 | * Also added check for max_step_times_per_id being empty (though this should not occur)
54 | 
55 | # wizard 0.2.0 (2021-02-22)
56 | * Moved `wiz_calc` out to a separate function to greatly reduce the memory footprint for parallel processing
57 | 
58 | # wizard 0.2.1 (2021-02-24)
59 | * Perform a final check in `wiz_add_predictors_internal` to ensure that the output_frame is not empty. If it is, then return a message rather than simply writing an empty output file to disk, which can result in an error when combining this with other files due to mismatches in data types for the temporal_id column.
60 | 


--------------------------------------------------------------------------------
/R/add_outcomes.R:
--------------------------------------------------------------------------------
 1 | #'
 2 | #' @export
 3 | wiz_add_outcomes = function(wiz_frame = NULL,
 4 |                             variables = NULL,
 5 |                             category = NULL,
 6 |                             lookahead = lubridate::hours(48),
 7 |                             window = lookahead,
 8 |                             stats = c(mean = mean,
 9 |                                       min = min,
10 |                                       max = max),
11 |                             impute = FALSE,
12 |                             output_file = TRUE,
13 |                             log_file = TRUE,
14 |                             check_size_only = FALSE,
15 |                             last_chunk_completed = NULL) {
16 | 
17 |   if (is.null(wiz_frame$chunk_size)) {
18 |     wiz_add_predictors_internal(wiz_frame = wiz_frame,
19 |                                 variables = variables,
20 |                                 category = category,
21 |                                 lookback = -lookahead,
22 |                                 window = -window,
23 |                                 stats = stats,
24 |                                 impute = impute,
25 |                                 output_file = output_file,
26 |                                 log_file = log_file,
27 |                                 check_size_only = check_size_only)
28 |   } else {
29 |     assertthat::assert_that(wiz_frame$chunk_size > 0)
30 | 
31 |     # Make chunks based on temporal data, not fixed data
32 |     unique_temporal_ids = sort(unique(wiz_frame$temporal_data[[wiz_frame$temporal_id]]))
33 |     chunk_ids = ceiling(seq_len(length(unique_temporal_ids)) / wiz_frame$chunk_size)
34 |     unique_chunks = unique(chunk_ids)
35 |     n_chunks = max(unique_chunks)
36 | 
37 |     for (chunk_num in unique_chunks) {
38 |       if (!is.null(last_chunk_completed) && chunk_num <= last_chunk_completed) {
39 |         message(paste0('Skipping chunk # ', chunk_num, ' out of ', n_chunks, '...'))
40 |         if (log_file) {
41 |           write(paste0(Sys.time(), ': Skipping chunk # ', chunk_num, ' out of ', n_chunks, '...'),
42 |                 file.path(wiz_frame$output_folder, 'wiz_log.txt'), append = TRUE)
43 |         }
44 |         next
45 |       }
46 | 
47 |       message(paste0('Processing chunk # ', chunk_num, ' out of ', n_chunks, '...'))
48 |       if (log_file) {
49 |         write(paste0(Sys.time(), ': Processing chunk # ', chunk_num, ' out of ', n_chunks, '...'),
50 |               file.path(wiz_frame$output_folder, 'wiz_log.txt'), append = TRUE)
51 |       }
52 | 
53 |       wiz_frame_chunk = wiz_frame
54 | 
55 |       wiz_frame_chunk$temporal_data =
56 |         wiz_frame_chunk$temporal_data %>%
57 |         dplyr::filter(!!rlang::parse_expr(wiz_frame_chunk$temporal_id) %in%
58 |                         unique_temporal_ids[chunk_ids == chunk_num])
59 | 
60 |       wiz_frame_chunk$fixed_data =
61 |         wiz_frame_chunk$fixed_data %>%
62 |         dplyr::filter(!!rlang::parse_expr(wiz_frame_chunk$fixed_id) %in%
63 |                         wiz_frame_chunk$temporal_data[[wiz_frame_chunk$temporal_id]])
64 | 
65 |       wiz_add_predictors_internal(wiz_frame = wiz_frame_chunk,
66 |                                   variables = variables,
67 |                                   category = category,
68 |                                   lookback = -lookahead,
69 |                                   window = -window,
70 |                                   stats = stats,
71 |                                   impute = impute,
72 |                                   output_file = output_file,
73 |                                   log_file = log_file,
74 |                                   check_size_only = check_size_only,
75 |                                   filename_prefix = paste0('chunk_',
76 |                                                            stringr::str_pad(chunk_num,
77 |                                                                             nchar(n_chunks),
78 |                                                                             pad = '0'),
79 |                                                            '_'))
80 | 
81 |     }
82 |   }
83 | }
84 | 


--------------------------------------------------------------------------------
/R/add_predictors.R:
--------------------------------------------------------------------------------
  1 | #' Function to add predictors
  2 | #' before fixed_start.
  3 | #' @export
  4 | wiz_add_predictors = function(wiz_frame = NULL,
  5 |                               variables = NULL,
  6 |                               category = NULL,
  7 |                               lookback = lubridate::hours(48),
  8 |                               window = lookback,
  9 |                               stats = c(mean = mean,
 10 |                                         min = min,
 11 |                                         max = max),
 12 |                               impute = TRUE,
 13 |                               output_file = TRUE,
 14 |                               log_file = TRUE,
 15 |                               check_size_only = FALSE,
 16 |                               last_chunk_completed = NULL) {
 17 | 
 18 |   if (is.null(wiz_frame$chunk_size)) {
 19 |     wiz_add_predictors_internal(wiz_frame = wiz_frame,
 20 |                                 variables = variables,
 21 |                                 category = category,
 22 |                                 lookback = lookback,
 23 |                                 window = window,
 24 |                                 stats = stats,
 25 |                                 impute = impute,
 26 |                                 output_file = output_file,
 27 |                                 log_file = log_file,
 28 |                                 check_size_only = check_size_only)
 29 |   } else {
 30 |     assertthat::assert_that(wiz_frame$chunk_size > 0)
 31 | 
 32 |     # Make chunks based on temporal data, not fixed data
 33 |     unique_temporal_ids = sort(unique(wiz_frame$temporal_data[[wiz_frame$temporal_id]]))
 34 |     chunk_ids = ceiling(seq_len(length(unique_temporal_ids)) / wiz_frame$chunk_size)
 35 |     unique_chunks = unique(chunk_ids)
 36 |     n_chunks = max(unique_chunks)
 37 | 
 38 |     for (chunk_num in unique_chunks) {
 39 |       if (!is.null(last_chunk_completed) && chunk_num <= last_chunk_completed) {
 40 |         message(paste0('Skipping chunk # ', chunk_num, ' out of ', n_chunks, '...'))
 41 |         if (log_file) {
 42 |           write(paste0(Sys.time(), ': Skipping chunk # ', chunk_num, ' out of ', n_chunks, '...'),
 43 |                 file.path(wiz_frame$output_folder, 'wiz_log.txt'), append = TRUE)
 44 |         }
 45 |         next
 46 |       }
 47 | 
 48 |       message(paste0('Processing chunk # ', chunk_num, ' out of ', n_chunks, '...'))
 49 |       if (log_file) {
 50 |         write(paste0(Sys.time(), ': Processing chunk # ', chunk_num, ' out of ', n_chunks, '...'),
 51 |               file.path(wiz_frame$output_folder, 'wiz_log.txt'), append = TRUE)
 52 |       }
 53 | 
 54 |       wiz_frame_chunk = wiz_frame
 55 | 
 56 |       wiz_frame_chunk$temporal_data =
 57 |         wiz_frame_chunk$temporal_data %>%
 58 |         dplyr::filter(!!rlang::parse_expr(wiz_frame_chunk$temporal_id) %in%
 59 |                         unique_temporal_ids[chunk_ids == chunk_num])
 60 | 
 61 |       wiz_frame_chunk$fixed_data =
 62 |         wiz_frame_chunk$fixed_data %>%
 63 |         dplyr::filter(!!rlang::parse_expr(wiz_frame_chunk$fixed_id) %in%
 64 |                         wiz_frame_chunk$temporal_data[[wiz_frame_chunk$temporal_id]])
 65 | 
 66 | 
 67 |       wiz_add_predictors_internal(wiz_frame = wiz_frame_chunk,
 68 |                                   variables = variables,
 69 |                                   category = category,
 70 |                                   lookback = lookback,
 71 |                                   window = window,
 72 |                                   stats = stats,
 73 |                                   impute = impute,
 74 |                                   output_file = output_file,
 75 |                                   log_file = log_file,
 76 |                                   check_size_only = check_size_only,
 77 |                                   filename_prefix = paste0('chunk_',
 78 |                                                            stringr::str_pad(chunk_num,
 79 |                                                                             nchar(n_chunks),
 80 |                                                                             pad = '0'),
 81 |                                                            '_'))
 82 |     }
 83 |   }
 84 | }
 85 | 
 86 | 
 87 | 
 88 | #' Function to add baseline predictors
 89 | #' Offset of hours(1) would mean that everything would be anchored to 1 hour
 90 | #' before fixed_start.
 91 | #' @export
 92 | wiz_add_baseline_predictors = function(wiz_frame = NULL,
 93 |                                        variables = NULL,
 94 |                                        category = NULL,
 95 |                                        lookback = lubridate::hours(48),
 96 |                                        window = lookback,
 97 |                                        offset = lubridate::hours(0),
 98 |                                        stats = c(mean = mean,
 99 |                                                  min = min,
100 |                                                  max = max),
101 |                                        impute = TRUE,
102 |                                        output_file = TRUE,
103 |                                        log_file = TRUE,
104 |                                        check_size_only = FALSE,
105 |                                        last_chunk_completed = NULL) {
106 | 
107 |   if (is.null(wiz_frame$chunk_size)) {
108 |     wiz_add_predictors_internal(wiz_frame = wiz_frame,
109 |                                 variables = variables,
110 |                                 category = category,
111 |                                 lookback = lookback,
112 |                                 window = window,
113 |                                 stats = stats,
114 |                                 impute = impute,
115 |                                 output_file = output_file,
116 |                                 log_file = log_file,
117 |                                 check_size_only = check_size_only,
118 |                                 baseline = TRUE,
119 |                                 offset = offset)
120 |   } else {
121 |     assertthat::assert_that(wiz_frame$chunk_size > 0)
122 | 
123 |     # Make chunks based on temporal data, not fixed data
124 |     unique_temporal_ids = sort(unique(wiz_frame$temporal_data[[wiz_frame$temporal_id]]))
125 |     chunk_ids = ceiling(seq_len(length(unique_temporal_ids)) / wiz_frame$chunk_size)
126 |     unique_chunks = unique(chunk_ids)
127 |     n_chunks = max(unique_chunks)
128 | 
129 |     for (chunk_num in unique_chunks) {
130 |       if (!is.null(last_chunk_completed) && chunk_num <= last_chunk_completed) {
131 |         message(paste0('Skipping chunk # ', chunk_num, ' out of ', n_chunks, '...'))
132 |         if (log_file) {
133 |           write(paste0(Sys.time(), ': Skipping chunk # ', chunk_num, ' out of ', n_chunks, '...'),
134 |                 file.path(wiz_frame$output_folder, 'wiz_log.txt'), append = TRUE)
135 |         }
136 |         next
137 |       }
138 | 
139 |       message(paste0('Processing chunk # ', chunk_num, ' out of ', n_chunks, '...'))
140 |       if (log_file) {
141 |         write(paste0(Sys.time(), ': Processing chunk # ', chunk_num, ' out of ', n_chunks, '...'),
142 |               file.path(wiz_frame$output_folder, 'wiz_log.txt'), append = TRUE)
143 |       }
144 | 
145 |       wiz_frame_chunk = wiz_frame
146 | 
147 |       wiz_frame_chunk$temporal_data =
148 |         wiz_frame_chunk$temporal_data %>%
149 |         dplyr::filter(!!rlang::parse_expr(wiz_frame_chunk$temporal_id) %in%
150 |                         unique_temporal_ids[chunk_ids == chunk_num])
151 | 
152 |       wiz_frame_chunk$fixed_data =
153 |         wiz_frame_chunk$fixed_data %>%
154 |         dplyr::filter(!!rlang::parse_expr(wiz_frame_chunk$fixed_id) %in%
155 |                         wiz_frame_chunk$temporal_data[[wiz_frame_chunk$temporal_id]])
156 | 
157 |       wiz_add_predictors_internal(wiz_frame = wiz_frame_chunk,
158 |                                   variables = variables,
159 |                                   category = category,
160 |                                   lookback = lookback,
161 |                                   window = window,
162 |                                   stats = stats,
163 |                                   impute = impute,
164 |                                   output_file = output_file,
165 |                                   log_file = log_file,
166 |                                   check_size_only = check_size_only,
167 |                                   baseline = TRUE,
168 |                                   offset = offset,
169 |                                   filename_prefix = paste0('chunk_',
170 |                                                            stringr::str_pad(chunk_num,
171 |                                                                             nchar(n_chunks),
172 |                                                                             pad = '0'),
173 |                                                            '_'))
174 | 
175 |     }
176 |   }
177 | }
178 | 
179 | 
180 | 
181 | #' Function to add baseline predictors
182 | #' Offset of hours(1) would mean that everything would be anchored to 1 hour
183 | #' before fixed_start.
184 | #' @export
185 | wiz_add_growing_predictors = function(wiz_frame = NULL,
186 |                                        variables = NULL,
187 |                                        category = NULL,
188 |                                        stats = c(mean = mean,
189 |                                                  min = min,
190 |                                                  max = max),
191 |                                        output_file = TRUE,
192 |                                        log_file = TRUE,
193 |                                        check_size_only = FALSE,
194 |                                        last_chunk_completed = NULL) {
195 | 
196 |   if (is.null(wiz_frame$chunk_size)) {
197 |     wiz_add_predictors_internal(wiz_frame = wiz_frame,
198 |                                 variables = variables,
199 |                                 category = category,
200 |                                 lookback = lubridate::hours(48), # will ignore this in _internal
201 |                                 window = lubridate::hours(48), # will ignore this in _internal
202 |                                 stats = stats,
203 |                                 impute = FALSE,
204 |                                 output_file = output_file,
205 |                                 log_file = log_file,
206 |                                 check_size_only = check_size_only,
207 |                                 growing = TRUE)
208 |   } else {
209 |     assertthat::assert_that(wiz_frame$chunk_size > 0)
210 | 
211 |     # Make chunks based on temporal data, not fixed data
212 |     unique_temporal_ids = sort(unique(wiz_frame$temporal_data[[wiz_frame$temporal_id]]))
213 |     chunk_ids = ceiling(seq_len(length(unique_temporal_ids)) / wiz_frame$chunk_size)
214 |     unique_chunks = unique(chunk_ids)
215 |     n_chunks = max(unique_chunks)
216 | 
217 |     for (chunk_num in unique_chunks) {
218 |       if (!is.null(last_chunk_completed) && chunk_num <= last_chunk_completed) {
219 |         message(paste0('Skipping chunk # ', chunk_num, ' out of ', n_chunks, '...'))
220 |         if (log_file) {
221 |           write(paste0(Sys.time(), ': Skipping chunk # ', chunk_num, ' out of ', n_chunks, '...'),
222 |                 file.path(wiz_frame$output_folder, 'wiz_log.txt'), append = TRUE)
223 |         }
224 |         next
225 |       }
226 | 
227 |       message(paste0('Processing chunk # ', chunk_num, ' out of ', n_chunks, '...'))
228 |       if (log_file) {
229 |         write(paste0(Sys.time(), ': Processing chunk # ', chunk_num, ' out of ', n_chunks, '...'),
230 |               file.path(wiz_frame$output_folder, 'wiz_log.txt'), append = TRUE)
231 |       }
232 | 
233 |       wiz_frame_chunk = wiz_frame
234 | 
235 |       wiz_frame_chunk$temporal_data =
236 |         wiz_frame_chunk$temporal_data %>%
237 |         dplyr::filter(!!rlang::parse_expr(wiz_frame_chunk$temporal_id) %in%
238 |                         unique_temporal_ids[chunk_ids == chunk_num])
239 | 
240 |       wiz_frame_chunk$fixed_data =
241 |         wiz_frame_chunk$fixed_data %>%
242 |         dplyr::filter(!!rlang::parse_expr(wiz_frame_chunk$fixed_id) %in%
243 |                         wiz_frame_chunk$temporal_data[[wiz_frame_chunk$temporal_id]])
244 | 
245 |       wiz_add_predictors_internal(wiz_frame = wiz_frame_chunk,
246 |                                   variables = variables,
247 |                                   category = category,
248 |                                   lookback = lubridate::hours(48), # will ignore this in _internal
249 |                                   window = lubridate::hours(48), # will ignore this in _internal
250 |                                   stats = stats,
251 |                                   impute = FALSE,
252 |                                   output_file = output_file,
253 |                                   log_file = log_file,
254 |                                   check_size_only = check_size_only,
255 |                                   growing = TRUE,
256 |                                   filename_prefix = paste0('chunk_',
257 |                                                            stringr::str_pad(chunk_num,
258 |                                                                             nchar(n_chunks),
259 |                                                                             pad = '0'),
260 |                                                            '_'))
261 | 
262 |     }
263 |   }
264 | }
265 | 
266 | 
267 | # Other functions to add:
268 | # wiz_add_final_outcomes() # similar to wiz_add_baseline_predictors() but occurs after the final outcome
269 | # wiz_add_shrinking_outcomes() # cumulatively shrinking window
270 | # for outcomes, will need to respect max_length
271 | 
272 | 


--------------------------------------------------------------------------------
/R/combine.R:
--------------------------------------------------------------------------------
  1 | #'
  2 | wiz_combine_old = function(wiz_frame, ..., files = NULL, wiz_path = TRUE, dplyr_join = dplyr::inner_join) {
  3 | 
  4 |   if (is.null(files)) {
  5 |     temporal_dfs = append(list(wiz_frame$fixed_data %>%
  6 |                                  dplyr::rename(!!rlang::parse_expr(wiz_frame$temporal_id) := !!rlang::parse_expr(wiz_frame$fixed_id))),
  7 |                           list(...))
  8 |   } else {
  9 |     temporal_dfs = append(list(wiz_frame$fixed_data %>%
 10 |                                  dplyr::rename(!!rlang::parse_expr(wiz_frame$temporal_id) := !!rlang::parse_expr(wiz_frame$fixed_id))),
 11 |                           as.list(files))
 12 |   }
 13 |   temporal_dfs =
 14 |     temporal_dfs %>%
 15 |     lapply(function (x) {
 16 |       if ('data.frame' %in% class(x)) {
 17 |         return(x)
 18 |       } else if (class(x) == 'character' & wiz_path) {
 19 |         return(data.table::fread(file.path(wiz_frame$output_folder, x)))
 20 |       } else if (class(x) == 'character' & !wiz_path) {
 21 |         return(data.table::fread(x))
 22 |       } else {
 23 |         stop('Error: the ... must be limited to data frames and file paths.')
 24 |       }
 25 |     })
 26 | 
 27 |   return(Reduce(dplyr_join, temporal_dfs) %>% as.data.frame())
 28 | }
 29 | 
 30 | #' New wiz_combine function
 31 | #' @export
 32 | wiz_combine = function(wiz_frame,
 33 |                            ...,
 34 |                            files = NULL,
 35 |                            include_files = TRUE,
 36 |                            wiz_path = TRUE,
 37 |                            dplyr_join = dplyr::inner_join,
 38 |                            log_file = TRUE) {
 39 | 
 40 |     return_frame = list(wiz_frame$fixed_data %>%
 41 |                           dplyr::rename(!!rlang::parse_expr(wiz_frame$temporal_id) := !!rlang::parse_expr(wiz_frame$fixed_id)))
 42 | 
 43 |     if (length(list(...)) > 0) {
 44 |       return_frame = append(return_frame, list(...))
 45 |     }
 46 | 
 47 |     if (include_files) {
 48 |       if (is.null(files)) {
 49 |         files = dir(wiz_frame$output_folder, pattern = '.csv')
 50 |         wiz_path = TRUE # overwrite wiz_path
 51 |       }
 52 |       num_files = length(files)
 53 |     }
 54 | 
 55 |     if (include_files && num_files == 0) {
 56 |       stop(paste0('No .csv files were found in ', wiz_frame$output_folder, '. ',
 57 |                   'If you do not want to combine any files, please set ',
 58 |                   'include_files to FALSE.'))
 59 |     }
 60 | 
 61 |     if (include_files) {
 62 |       detect_chunks = stringr::str_detect(files, '\\bchunk_\\d+')
 63 | 
 64 |       if (wiz_path) {
 65 |         files = file.path(wiz_frame$output_folder, files)
 66 |       }
 67 | 
 68 |       if (!any(detect_chunks)) { # if no files are chunked
 69 |         return_frame =
 70 |           append(return_frame,
 71 |                  lapply(files[!detect_chunks],
 72 |                         function (file_name) {
 73 |                           message(paste0('Reading file: ', file_name, '...'))
 74 |                           if (log_file) {
 75 |                             write(paste0(Sys.time(), ': Reading file: ', file_name, '...'),
 76 |                                   file.path(wiz_frame$output_folder, 'wiz_log.txt'), append = TRUE)
 77 |                           }
 78 |                           data.table::fread(file_name, data.table = FALSE)
 79 |                         }))
 80 |       }
 81 | 
 82 |       if (any(detect_chunks) && !all(detect_chunks)) { # if only some files are chunked
 83 |         # append only those files that are not chunked first
 84 |         return_frame =
 85 |           append(return_frame,
 86 |                  lapply(files[!detect_chunks],
 87 |                         function (file_name) {
 88 |                           message(paste0('Reading file: ', file_name, '...'))
 89 |                           if (log_file) {
 90 |                             write(paste0(Sys.time(), ': Reading file: ', file_name, '...'),
 91 |                                   file.path(wiz_frame$output_folder, 'wiz_log.txt'), append = TRUE)
 92 |                           }
 93 |                           data.table::fread(file_name, data.table = FALSE)
 94 |                         }))
 95 |       }
 96 | 
 97 |       if (any(detect_chunks)) { # now handle chunked files
 98 |         # limit files to those with chunks (since we already appended non-chunked files above)
 99 |         files = files[detect_chunks]
100 |         unique_chunks =
101 |           stringr::str_extract(files, '\\bchunk_\\d+') %>%
102 |           unique() %>%
103 |           sort()
104 | 
105 |         return_frame =
106 |           append(return_frame,
107 |                  lapply(unique_chunks, function (chunk_name) {
108 |                    message(paste0('Reading chunk: ', chunk_name, '...'))
109 |                    if (log_file) {
110 |                      write(paste0(Sys.time(), ': Reading chunk: ', chunk_name, '...'),
111 |                            file.path(wiz_frame$output_folder, 'wiz_log.txt'), append = TRUE)
112 |                    }
113 |                    lapply(files[stringr::str_detect(files, chunk_name)], data.table::fread) %>%
114 |                      Reduce(dplyr_join, .) %>%
115 |                      as.data.frame()
116 |                  }) %>%
117 |                    dplyr::bind_rows() %>%
118 |                    list())
119 |         }
120 |     }
121 | 
122 |   return(Reduce(dplyr_join, return_frame) %>% as.data.frame())
123 | }
124 | 


--------------------------------------------------------------------------------
/R/utils-pipe.R:
--------------------------------------------------------------------------------
 1 | #' Pipe operator
 2 | #'
 3 | #' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
 4 | #'
 5 | #' @name %>%
 6 | #' @rdname pipe
 7 | #' @keywords internal
 8 | #' @export
 9 | #' @importFrom magrittr %>%
10 | #' @usage lhs \%>\% rhs
11 | NULL
12 | 


--------------------------------------------------------------------------------
/R/wizard.R:
--------------------------------------------------------------------------------
  1 | #' Define wizard frame
  2 | #' @export
  3 | wiz_frame = function(fixed_data,
  4 |                      temporal_data,
  5 |                      fixed_id = 'id',
  6 |                      fixed_start = NULL,
  7 |                      fixed_end = NULL,
  8 |                      temporal_id = 'id',
  9 |                      temporal_time = 'time',
 10 |                      temporal_variable = 'variable',
 11 |                      temporal_value = 'value',
 12 |                      temporal_category = temporal_variable,
 13 |                      step = NULL,
 14 |                      max_length = NULL,
 15 |                      output_folder = NULL,
 16 |                      create_folder = FALSE,
 17 |                      save_wiz_frame = TRUE,
 18 |                      chunk_size = NULL,
 19 |                      numeric_threshold = 0.5) {
 20 | 
 21 |   assertthat::assert_that('data.frame' %in% class(fixed_data))
 22 |   assertthat::assert_that('data.frame' %in% class(temporal_data))
 23 | 
 24 |   # To deal with any data.table -> dtplyr weirdness
 25 |   fixed_data = as.data.frame(fixed_data)
 26 |   temporal_data = as.data.frame(temporal_data)
 27 | 
 28 |   if (!is.null(fixed_start)) {
 29 |     if (class(fixed_data[[fixed_start]])[1] %in% c('Date', 'POSIXct', 'POSIXt') &&
 30 |         class(step) != 'Period') {
 31 |       stop('Both the fixed_start column in the fixed_data and step must be in the same units.')
 32 |     }
 33 |     if (is.numeric(fixed_data[[fixed_start]]) && !is.numeric(step)) {
 34 |       stop('Both the fixed_start column in the fixed_data and step must be in the same units.')
 35 |     }
 36 |     if (class(fixed_data[[fixed_start]])[1] %in% c('character', 'factor')) {
 37 |       stop('The fixed_start column cannot be a character or factor column. You must convert it to either a number or a date.')
 38 |     }
 39 |     if (any(is.na(fixed_data[[fixed_start]]))) {
 40 |       stop('The fixed_start column cannot contain missing values.')
 41 |     }
 42 |   }
 43 | 
 44 |   if (!is.null(fixed_end)) {
 45 |     if (class(fixed_data[[fixed_end]])[1] %in% c('Date', 'POSIXct', 'POSIXt') &
 46 |         class(step) != 'Period') {
 47 |       stop('Both the fixed_end column in the fixed_data and step must be in the same units.')
 48 |     }
 49 |     if (is.numeric(fixed_data[[fixed_end]]) & !is.numeric(step)) {
 50 |       stop('Both the fixed_end column in the fixed_data and step must be in the same units.')
 51 |     }
 52 |     if (class(fixed_data[[fixed_end]])[1] %in% c('character', 'factor')) {
 53 |       stop('The fixed_end column cannot be a character or factor column. You must convert it to either a number or a date.')
 54 |     }
 55 |     if (any(is.na(fixed_data[[fixed_end]]))) {
 56 |       stop('The fixed_end column cannot contain missing values.')
 57 |     }
 58 |   }
 59 | 
 60 |   if (!is.null(max_length)) {
 61 |     if (class(max_length) != class(step)) {
 62 |       stop('Both the max_length and step must be in the same units.')
 63 |     }
 64 |   }
 65 | 
 66 |   if (class(temporal_data[[temporal_time]])[1] %in% c('Date', 'POSIXct', 'POSIXt') &&
 67 |       class(step) != 'Period') {
 68 |     stop('Both the temporal_time column in the temporal_data and step must be in the same units.')
 69 |   }
 70 |   if (is.numeric(temporal_data[[temporal_time]]) && !is.numeric(step)) {
 71 |     stop('Both the temporal_time column in the temporal_data and step must be in the same units.')
 72 |   }
 73 |   if (class(temporal_data[[temporal_time]])[1] %in% c('character', 'factor')) {
 74 |     stop('The temporal_time column cannot be a character or factor column. You must convert it to either a number or a date.')
 75 |   }
 76 |   if (any(is.na(temporal_data[[temporal_time]]))) {
 77 |     stop('The temporal_time column cannot contain missing values.')
 78 |   }
 79 | 
 80 | 
 81 |   if (is.null(output_folder)) {
 82 |     stop('You must specify an output folder.')
 83 |   }
 84 | 
 85 |   if (!dir.exists(output_folder)) {
 86 |     if (create_folder) {
 87 |       dir.create(output_folder)
 88 |     } else if (tolower(readline('This folder does not exist. Would you like it to be created (y/n)? ')) %in% c('y', 'yes')) {
 89 |       dir.create(output_folder)
 90 |     } else {
 91 |       stop(paste0('The output folder ', output_folder, ' could not be created.'))
 92 |     }
 93 |   }
 94 | 
 95 |   # check to make sure no one has missing time in temporal_data
 96 |   if (any(is.na(temporal_data[[temporal_time]]))) {
 97 |     stop('You cannot have any missing time stamps in the temporal_time column.')
 98 |   }
 99 | 
100 |   # check to make sure all patients in temporal_data
101 |   # are accounted for in the fixed_data
102 |   if (length(setdiff(temporal_data[[temporal_id]], fixed_data[[fixed_id]])) > 0) {
103 |     stop('All ids in the temporal_data must also be present in the fixed_data.')
104 |   }
105 | 
106 |   # check for duplicate patients in fixed_data
107 |   if (length(unique(fixed_data[[fixed_id]])) < length(fixed_data[[fixed_id]])) {
108 |     stop('You cannot have multiple rows for with the same id in the fixed_data.')
109 |   }
110 | 
111 |   # Change step to numeric and set step_units
112 |   step_units = NULL
113 | 
114 |   if (class(step) == 'Period') {
115 |     if (step@year > 0) {
116 |       step = step@year
117 |       step_units = 'year'
118 |     } else if (step@month > 0) {
119 |       step = step$month
120 |       step_units = 'month'
121 |     } else if (step@day > 0) {
122 |       step = step@day
123 |       step_units = 'day'
124 |     } else if (step@hour > 0) {
125 |       step = step@hour
126 |       step_units = 'hour'
127 |     } else if (step@minute > 0) {
128 |       step = step@minute
129 |       step_units = 'minute'
130 |     }
131 |   }
132 | 
133 |   if (is.null(fixed_start)) { # if the start time is not provided, then the time will be indexed to min time
134 |     suppressMessages({
135 |       fixed_data =
136 |         fixed_data %>%
137 |         dplyr::left_join(., temporal_data %>%
138 |                            dplyr::select_at(c(temporal_id, temporal_time)) %>%
139 |                            dplyr::group_by(!!rlang::parse_expr(temporal_id)) %>%
140 |                            dplyr::arrange(!!rlang::parse_expr(temporal_time)) %>%
141 |                            dplyr::slice(1) %>% # Pick the first value (temporally)
142 |                            dplyr::ungroup() %>%
143 |                            dplyr::rename(!!rlang::parse_expr(fixed_id) := !!rlang::parse_expr(temporal_id)) %>%
144 |                            dplyr::rename(wiz_start_time = !!rlang::parse_expr(temporal_time)))
145 | 
146 |       fixed_start = 'wiz_start_time'
147 |     })
148 |   }
149 | 
150 |   if (is.null(fixed_end)) { # if the start time is not provided, then the time will be indexed to min time
151 |     suppressMessages({
152 |       fixed_data =
153 |         fixed_data %>%
154 |         dplyr::left_join(., temporal_data %>%
155 |                            dplyr::select_at(c(temporal_id, temporal_time)) %>%
156 |                            dplyr::group_by(!!rlang::parse_expr(temporal_id)) %>%
157 |                            dplyr::arrange(!!rlang::parse_expr(temporal_time)) %>%
158 |                            dplyr::slice(dplyr::n()) %>% # Pick the last value (temporally)
159 |                            dplyr::ungroup() %>%
160 |                            dplyr::rename(!!rlang::parse_expr(fixed_id) := !!rlang::parse_expr(temporal_id)) %>%
161 |                            dplyr::rename(wiz_end_time = !!rlang::parse_expr(temporal_time)))
162 | 
163 |       fixed_end = 'wiz_end_time'
164 |     })
165 |   }
166 | 
167 |   # check to make sure fixed_start is never greater than fixed_end
168 |   if (any(!is.na(fixed_data[[fixed_start]]) &
169 |       !is.na(fixed_data[[fixed_end]]) &
170 |       fixed_data[[fixed_start]] > fixed_data[[fixed_end]])) {
171 |     stop('fixed_start should never be greater than fixed_end.')
172 |   }
173 | 
174 |   suppressMessages({
175 |     temporal_data =
176 |       temporal_data %>%
177 |       dplyr::left_join(., fixed_data %>%
178 |                          dplyr::select_at(c(fixed_id, fixed_start)) %>%
179 |                          dplyr::rename(!!rlang::parse_expr(temporal_id) := !!rlang::parse_expr(fixed_id)) %>%
180 |                          dplyr::rename(wiz_fixed_start_time = !!rlang::parse_expr(fixed_start))
181 |       )
182 |   })
183 | 
184 |   if (!is.null(step_units)) {
185 |     temporal_data =
186 |       temporal_data %>%
187 |       dplyr::mutate(!!rlang::parse_expr(temporal_time) :=
188 |                       lubridate::time_length(!!rlang::parse_expr(temporal_time) - wiz_fixed_start_time, unit = step_units)) %>%
189 |       dplyr::select(-wiz_fixed_start_time)
190 |   } else {
191 |     temporal_data =
192 |       temporal_data %>%
193 |       dplyr::mutate(!!rlang::parse_expr(temporal_time) :=
194 |                       !!rlang::parse_expr(temporal_time) - wiz_fixed_start_time) %>%
195 |       dplyr::select(-wiz_fixed_start_time)
196 |   }
197 | 
198 | 
199 |   # Transform factors to characters
200 |   fixed_data = fixed_data %>% dplyr::mutate_if(is.factor, as.character)
201 |   temporal_data = temporal_data %>% dplyr::mutate_if(is.factor, as.character)
202 | 
203 |   # Generate a data dictionary for fixed_data
204 |   fixed_data_dict =
205 |     lapply(fixed_data, class) %>%
206 |     lapply(function (x) x[1]) %>% # If multiple classes, take only the first one (happens with date-times)
207 |     dplyr::as_tibble() %>%
208 |     tidyr::gather(key = 'variable', value = 'class') %>%
209 |     as.data.frame()
210 | 
211 |   suppressWarnings({
212 |     temporal_data_dict =
213 |       wiz_build_temporal_data_dictionary(temporal_data,
214 |                                          temporal_variable,
215 |                                          temporal_value,
216 |                                          numeric_threshold)
217 |   })
218 | 
219 | 
220 |   wiz_frame =
221 |     structure(list(
222 |       fixed_data = as.data.frame(fixed_data),
223 |       temporal_data = as.data.frame(temporal_data),
224 |       fixed_id = fixed_id,
225 |       fixed_start = fixed_start,
226 |       fixed_end = fixed_end,
227 |       temporal_id = temporal_id,
228 |       temporal_time = temporal_time,
229 |       temporal_variable = temporal_variable,
230 |       temporal_value = temporal_value,
231 |       temporal_category = temporal_category,
232 |       step = step,
233 |       max_length = max_length,
234 |       step_units = step_units,
235 |       output_folder = output_folder,
236 |       fixed_data_dict = fixed_data_dict,
237 |       temporal_data_dict = temporal_data_dict,
238 |       chunk_size = chunk_size),
239 |       class = 'wiz_frame')
240 | 
241 |   if (save_wiz_frame) {
242 |     saveRDS(wiz_frame, file.path(output_folder, 'wiz_frame.rds'))
243 |   }
244 | 
245 |   return(wiz_frame)
246 | }
247 | 
248 | 
249 | #' Determine the names and types of all of the temporal data variables.
250 | #' This function assumes that the temporal data values may be characters if
251 | #' some variables are categorical. This is an internal function.
252 | #'
253 | wiz_build_temporal_data_dictionary = function (temporal_data,
254 |                                                temporal_variable,
255 |                                                temporal_value,
256 |                                                numeric_threshold = 0.5) {
257 |   temporal_data_dict =
258 |     temporal_data %>%
259 |     dplyr::select_at(temporal_variable) %>%
260 |     dplyr::pull(1) %>%
261 |     unique() %>%
262 |     dplyr::tibble(variable = .) %>%
263 |     dplyr::mutate(class = 'unsure')
264 | 
265 |   temporal_data_class = class(temporal_data[[temporal_value]])
266 | 
267 |   if (temporal_data_class %in% c('integer', 'numeric')) {
268 |   # If all variables are numeric/integer
269 |     temporal_data_dict =
270 |       temporal_data_dict %>%
271 |       dplyr::mutate(class = 'numeric')
272 |   } else {
273 |     # If not, check data type for each temporal variable
274 |     for (temporal_data_var in temporal_data_dict$variable) {
275 | 
276 |        temporal_data_values =
277 |         temporal_data %>%
278 |         dplyr::filter(!!rlang::parse_expr(temporal_variable) == temporal_data_var) %>%
279 |         dplyr::pull(!!rlang::parse_expr(temporal_value))
280 | 
281 |       temporal_data_class = 'unsure'
282 | 
283 |       temporal_data_values_not_missing =
284 |         temporal_data_values %>% na.omit() %>% length()
285 | 
286 |       # Convert to numeric to see how many values go missing
287 |       temporal_data_values_numeric = suppressWarnings(as.numeric(temporal_data_values))
288 |       temporal_data_values_numeric_not_missing =
289 |         temporal_data_values_numeric %>% na.omit() %>% length()
290 | 
291 |       # Consider a number to be numeric if >= 50% of non-missing values are numeric
292 |       if (temporal_data_values_numeric_not_missing >= numeric_threshold * temporal_data_values_not_missing) {
293 |         temporal_data_class = 'numeric'
294 |       } else {
295 |         temporal_data_class = 'character'
296 |       }
297 | 
298 |       temporal_data_dict =
299 |         temporal_data_dict %>%
300 |         dplyr::mutate(class = dplyr::if_else(
301 |           variable == temporal_data_var,
302 |           temporal_data_class,
303 |           class))
304 | 
305 |       # message(temporal_data_var)
306 |       # message(temporal_data_class)
307 |     }
308 |   }
309 | 
310 |   temporal_data_dict =
311 |     temporal_data_dict %>%
312 |     dplyr::arrange(variable) %>%
313 |     as.data.frame()
314 |   temporal_data_dict
315 | }
316 | 
317 | #' Function that converts categorical temporal predictors into dummy variables
318 | #'
319 | #' Note that you can you can use this to dummy code variables with numerical values
320 | #' where the values are supposed to map to categorical levels (e.g, 1 means high and 2
321 | #' means low).
322 | #'
323 | #' Either provide a threshold (defaults to 0.5) or provide a vector of variables.
324 | #' If you supply a vector of variables, this takes precedence over the numeric threshold.
325 | #' @export
326 | wiz_dummy_code = function(wiz_frame = NULL,
327 |                           numeric_threshold = 0.5,
328 |                           variables = NULL,
329 |                           save_wiz_frame = TRUE) {
330 | 
331 |   if (is.null(variables)) { # if you do NOT supply a vector of variables (the default)
332 | 
333 |     categorical_vars = wiz_frame$temporal_data_dict %>%
334 |       dplyr::filter(class == 'character') %>%
335 |       dplyr::pull(variable)
336 | 
337 |     if (length(categorical_vars) == 0) {
338 |       message(paste('There are no categorical variables. There is no need to apply wiz_dummy_code(). ',
339 |                  'To override this, please supply a vector of variable names to the variables argument.'))
340 |       return(wiz_frame)
341 |     }
342 | 
343 |     wiz_frame$temporal_data = wiz_frame$temporal_data %>%
344 |       dplyr::mutate(wiz_temp_var = (!!rlang::parse_expr(wiz_frame$temporal_variable)) %in% categorical_vars) %>%
345 |       dplyr::mutate(!!rlang::parse_expr(wiz_frame$temporal_variable) :=
346 |                       dplyr::case_when(
347 |                         wiz_temp_var ~ paste0(!!rlang::parse_expr(wiz_frame$temporal_variable),
348 |                                               '_',
349 |                                               !!rlang::parse_expr(wiz_frame$temporal_value)),
350 |                         TRUE ~ !!rlang::parse_expr(wiz_frame$temporal_variable)))  %>%
351 |       dplyr::mutate(!!rlang::parse_expr(wiz_frame$temporal_value) :=
352 |                       dplyr::case_when(
353 |                         wiz_temp_var ~ '1',
354 |                         TRUE ~ !!rlang::parse_expr(wiz_frame$temporal_value))) %>%
355 |       dplyr::mutate_at(dplyr::vars(!!rlang::parse_expr(wiz_frame$temporal_value)), as.numeric) %>%
356 |       dplyr::select(-wiz_temp_var) %>%
357 |       as.data.frame()
358 |   } else { # if you specify a vector of variables
359 |     wiz_frame$temporal_data = wiz_frame$temporal_data %>%
360 |       dplyr::mutate(wiz_temp_var = (!!rlang::parse_expr(wiz_frame$temporal_variable)) %in% variables) %>%
361 |       dplyr::mutate(!!rlang::parse_expr(wiz_frame$temporal_variable) :=
362 |                       dplyr::case_when(
363 |                         wiz_temp_var ~ paste0(!!rlang::parse_expr(wiz_frame$temporal_variable),
364 |                                               '_',
365 |                                               !!rlang::parse_expr(wiz_frame$temporal_value)),
366 |                         TRUE ~ !!rlang::parse_expr(wiz_frame$temporal_variable)))  %>%
367 |       dplyr::mutate(!!rlang::parse_expr(wiz_frame$temporal_value) :=
368 |                       dplyr::case_when(
369 |                         wiz_temp_var ~ '1',
370 |                         TRUE ~ !!rlang::parse_expr(wiz_frame$temporal_value))) %>%
371 |       dplyr::mutate_at(dplyr::vars(!!rlang::parse_expr(wiz_frame$temporal_value)), as.numeric) %>%
372 |       dplyr::select(-wiz_temp_var) %>%
373 |       as.data.frame()
374 |   }
375 | 
376 |   suppressWarnings({wiz_frame$temporal_data_dict =
377 |     wiz_build_temporal_data_dictionary(wiz_frame$temporal_data,
378 |                                        wiz_frame$temporal_variable,
379 |                                        wiz_frame$temporal_value,
380 |                                        numeric_threshold)})
381 | 
382 |   if (save_wiz_frame) {
383 |     saveRDS(wiz_frame, file.path(wiz_frame$output_folder, 'wiz_frame.rds'))
384 |   }
385 | 
386 |   wiz_frame
387 | }
388 | 
389 | 
390 | 
391 | 
392 | 


--------------------------------------------------------------------------------
/R/wizard_internal.R:
--------------------------------------------------------------------------------
  1 | #' New internal helper function
  2 | wiz_define_steps = function(groups, temporal_id, step, step_units, max_length, baseline, max_step_times_per_id,
  3 |                             lookback_converted, window_converted, output_folder,
  4 |                             log_file) {
  5 | 
  6 |   if (!is.null(baseline) && baseline) {
  7 |     output_frame = data.frame(time = 0)
  8 |     return(output_frame)
  9 |   }
 10 | 
 11 |   max_step_time =
 12 |     max_step_times_per_id %>%
 13 |     dplyr::filter(!!rlang::parse_expr(temporal_id) == groups[[temporal_id]]) %>%
 14 |     dplyr::pull(wiz_step_time)
 15 | 
 16 |   if (length(max_step_time) == 0 || max_step_time < 0) { # This should only be the case if someone has no observations in temporal_data after time 0
 17 |     stop(paste0('No temporal data was found during the relevant period for ',
 18 |                 temporal_id, ' ', groups[[temporal_id]], '.'))
 19 |   }
 20 | 
 21 |   if  (!is.null(max_length)) {
 22 |     if (!is.null(step_units)) {
 23 |       max_step_time = pmin(lubridate::time_length(max_length, unit = step_units), max_step_time)
 24 |     } else {
 25 |       max_step_time = pmin(max_length, max_step_time)
 26 |     }
 27 |   }
 28 | 
 29 |   time = seq(0, max_step_time, by = step)
 30 | 
 31 |   # window_num = 1:(lookback_converted/window_converted)
 32 | 
 33 |   # window_time = window_num*window_converted
 34 | 
 35 |   # return_frame =
 36 |   #   tidyr::expand_grid(time, window_time)
 37 | 
 38 |   return_frame = data.frame(time = time)
 39 | 
 40 |   return(return_frame)
 41 | }
 42 | 
 43 | #' New internal helper function
 44 | wiz_calc = function(groups, temporal_id, temporal_variable, temporal_value, temporal_time,
 45 |                     lookback_converted, dots, window_converted, temporal_data_of_interest,
 46 |                     stats, impute, pb, all_temporal_vars, missing_value_frame, strategy) {
 47 | 
 48 |   if (lookback_converted < 0) { # E.g. if it is a lookahead
 49 |     output_item =
 50 |       temporal_data_of_interest %>%
 51 |       dplyr::filter(!!rlang::parse_expr(temporal_id) == groups[[temporal_id]][1] &
 52 |                       !!rlang::parse_expr(temporal_time) > groups$time[1] & # outcome cannot include right now
 53 |                       !!rlang::parse_expr(temporal_time) <= groups$time[1] - lookback_converted)
 54 | 
 55 |     output_item = output_item %>%
 56 |       dplyr::mutate(window_time = abs(ceiling((groups$time[1] - !!rlang::parse_expr(temporal_time)) /
 57 |                                                 window_converted) * window_converted))
 58 |   } else { # if it is a lookback
 59 |     if (!is.null(dots[['growing']]) && dots[['growing']]) {
 60 |       output_item =
 61 |         temporal_data_of_interest %>%
 62 |         dplyr::filter(!!rlang::parse_expr(temporal_id) == groups[[temporal_id]][1] &
 63 |                         !!rlang::parse_expr(temporal_time) <= groups$time[1] & # includes now in predictors
 64 |                         !!rlang::parse_expr(temporal_time) >= 0) # Includes time 0
 65 |       output_item = output_item %>%
 66 |         dplyr::mutate(window_time = 48) # arbitrary, will ignore
 67 |     } else {
 68 |       output_item =
 69 |         temporal_data_of_interest %>%
 70 |         dplyr::filter(!!rlang::parse_expr(temporal_id) == groups[[temporal_id]][1] &
 71 |                         !!rlang::parse_expr(temporal_time) <= groups$time[1] & # includes now in predictors
 72 |                         !!rlang::parse_expr(temporal_time) > groups$time[1] - lookback_converted) # up to X hours ago but not including X
 73 |       output_item = output_item %>%
 74 |         dplyr::mutate(window_time = floor((groups$time[1] - !!rlang::parse_expr(temporal_time)) /
 75 |                                             window_converted) * window_converted + window_converted)
 76 |     }
 77 |   }
 78 | 
 79 |   # If it's *not* a growing predictor, then convert the window to a factor variable
 80 |   if (is.null(dots[['growing']]) || !dots[['growing']]) {
 81 |     output_item = output_item %>%
 82 |       dplyr::mutate(window_time = factor(window_time,
 83 |                                          levels = abs(1:(lookback_converted/window_converted)*window_converted)))
 84 |   }
 85 | 
 86 |   # convert temporal_variable to a factor variable, which is useful
 87 |   # to fill in missing values
 88 |   output_item =
 89 |     output_item %>%
 90 |     dplyr::mutate(!!rlang::parse_expr(temporal_variable) :=
 91 |                     factor(!!rlang::parse_expr(temporal_variable), levels = all_temporal_vars))
 92 | 
 93 |   # If there are *no* values returned
 94 |   if (nrow(output_item) == 0) {
 95 |     output_item =
 96 |       tidyr::crossing(
 97 |         dplyr::tibble(!!rlang::parse_expr(temporal_variable) := all_temporal_vars),
 98 |         dplyr::tibble(wiz_stat = names(stats))
 99 |       ) %>%
100 |       tidyr::crossing(
101 |         dplyr::tibble(window_time = 1:(lookback_converted/window_converted)*window_converted)
102 |       ) %>%
103 |       dplyr::mutate(!!rlang::parse_expr(temporal_id) := groups[[temporal_id]][1],
104 |                     time = groups$time[1]) %>%
105 |       dplyr::select(!!rlang::parse_expr(temporal_id), time, window_time,
106 |                     !!rlang::parse_expr(temporal_variable), dplyr::everything()) %>%
107 |       dplyr::mutate(wiz_value = NA)
108 |   } else {
109 |     output_item =
110 |       output_item %>%
111 |       dplyr::arrange(!!rlang::parse_expr(temporal_variable), !!rlang::parse_expr(temporal_time)) %>%
112 |       dplyr::group_by(!!rlang::parse_expr(temporal_variable), window_time) %>%
113 |       dplyr::summarize_at(temporal_value,
114 |                           .funs = stats) %>%
115 |       tidyr::complete(window_time) %>%
116 |       dplyr::ungroup() %>%
117 |       tidyr::gather(wiz_stat, wiz_value, -!!rlang::parse_expr(temporal_variable), -window_time) %>%
118 |       tidyr::complete(!!rlang::parse_expr(temporal_variable), window_time, wiz_stat) %>%
119 |       dplyr::mutate(window_time = window_time %>% as.character() %>% as.numeric()) %>%
120 |       dplyr::mutate(!!rlang::parse_expr(temporal_variable) :=
121 |                       !!rlang::parse_expr(temporal_variable) %>% as.character()) %>%
122 |       dplyr::mutate(!!rlang::parse_expr(temporal_id) := groups[[temporal_id]][1],
123 |                     time = groups$time[1]) %>%
124 |       dplyr::select(!!rlang::parse_expr(temporal_id), time, window_time,
125 |                     !!rlang::parse_expr(temporal_variable), dplyr::everything())
126 |   }
127 | 
128 |   # Fill in precalculated missing values (separately for each statistic)
129 |   suppressMessages({
130 |     output_item =
131 |       dplyr::left_join(
132 |         output_item,
133 |         missing_value_frame
134 |       ) %>%
135 |       dplyr::mutate(wiz_value = ifelse(is.na(wiz_value), wiz_missing_value, wiz_value)) %>%
136 |       dplyr::select(-wiz_missing_value) %>%
137 |       dplyr::mutate(wiz_value = dplyr::na_if(wiz_value, -Inf)) %>%
138 |       dplyr::mutate(wiz_value = dplyr::na_if(wiz_value, Inf))
139 |   })
140 | 
141 |   # Imputation
142 |   if (impute) {
143 |     output_item =
144 |       output_item %>%
145 |       dplyr::arrange(!!rlang::parse_expr(temporal_variable),
146 |                      wiz_stat,
147 |                      dplyr::desc(window_time * sign(window_converted))) %>%
148 |       dplyr::group_by(!!rlang::parse_expr(temporal_variable),
149 |                       wiz_stat) %>%
150 |       tidyr::fill(-!!rlang::parse_expr(temporal_variable),
151 |                   -wiz_stat) %>%
152 |       dplyr::ungroup()
153 |   }
154 | 
155 |   # Name the variables
156 |   output_item =
157 |     output_item %>%
158 |     dplyr::mutate(wiz_variable =
159 |                     paste0(!!rlang::parse_expr(temporal_variable),
160 |                            '_', wiz_stat),
161 |                   wiz_value = wiz_value) %>%
162 |     dplyr::select(-!!rlang::parse_expr(temporal_variable), -wiz_stat)
163 | 
164 |   if (lookback_converted < 0) { # e.g. if it is a lookahead
165 |     output_item =
166 |       output_item %>%
167 |       dplyr::mutate(wiz_variable = paste0('outcome_', wiz_variable, '_',
168 |                                           stringr::str_pad(abs(window_time),
169 |                                                            nchar(abs(lookback_converted)), pad = '0'))) %>%
170 |       dplyr::select(-window_time)
171 |   } else { # if it is a lookback
172 | 
173 |     if (!is.null(dots[['baseline']]) && dots[['baseline']]) {
174 |       output_item =
175 |         output_item %>%
176 |         dplyr::mutate(wiz_variable = paste0('baseline_', wiz_variable, '_',
177 |                                             stringr::str_pad(abs(window_time),
178 |                                                              nchar(abs(lookback_converted)), pad = '0'))) %>%
179 |         dplyr::select(-window_time)
180 | 
181 |     } else if (!is.null(dots[['growing']]) && dots[['growing']]){
182 |       output_item =
183 |         output_item %>%
184 |         dplyr::mutate(wiz_variable = paste0('growing_', wiz_variable)) %>%
185 |         dplyr::select(-window_time)
186 |     } else {
187 |       output_item =
188 |         output_item %>%
189 |         dplyr::mutate(wiz_variable = paste0(wiz_variable, '_',
190 |                                             stringr::str_pad(abs(window_time),
191 |                                                              nchar(abs(lookback_converted)), pad = '0'))) %>%
192 |         dplyr::select(-window_time)
193 |     }
194 |   }
195 | 
196 |   output_item =
197 |     output_item %>%
198 |     tidyr::spread(wiz_variable, wiz_value) %>%
199 |     as.data.frame()
200 | 
201 |   if (!is.null(dots[['baseline']]) && dots[['baseline']]) {
202 |     output_item =
203 |       output_item %>% dplyr::select(-time)
204 |   }
205 | 
206 |   if (strategy == 'sequential') {
207 |     pb$tick()
208 |   }
209 |   return(output_item)
210 | }
211 | 
212 | 
213 | #' New furrr-enabled add_predictors function
214 | #' Internal only
215 | wiz_add_predictors_internal = function(wiz_frame = NULL,
216 |                               variables = NULL,
217 |                               category = NULL,
218 |                               lookback = lubridate::hours(48),
219 |                               window = lookback,
220 |                               stats = c(mean = mean,
221 |                                         min = min,
222 |                                         max = max),
223 |                               impute = TRUE,
224 |                               output_file = TRUE,
225 |                               log_file = TRUE,
226 |                               check_size_only = FALSE,
227 |                               filename_prefix = '',
228 |                               ...) {
229 |   dots = list(...)
230 | 
231 |   if (!is.null(wiz_frame$chunk_size) && !output_file) {
232 |     stop('If you set a chunk_size, then output_file must be set to TRUE.')
233 |   }
234 | 
235 |   if (is.null(variables) && is.null(category)) {
236 |     stop('You must specify either a variable or a category.')
237 |   }
238 | 
239 |   if (!is.null(variables) && !is.null(category)) {
240 |     stop('You must specify only a variable or a category, not both.')
241 |   }
242 | 
243 |   if (!is.null(variables)) {
244 |     for (variable in variables) {
245 |       if (!variable %in% wiz_frame$temporal_data_dict$variable) {
246 |         stop(paste0('The variable ', variable, ' could not be found in the temporal data.'))
247 |       }
248 |     }
249 |   }
250 | 
251 |   if (!is.null(category) && !any(grepl(category, wiz_frame$temporal_data[[wiz_frame$temporal_category]]))) {
252 |     stop(paste0('The category ', category, ' could not be found in the temporal data.'))
253 |   }
254 | 
255 |   wiz_variables = variables
256 |   wiz_category = category
257 | 
258 |   if (!is.null(wiz_frame$step_units)) {
259 |     lookback_converted = lubridate::time_length(lookback, unit = wiz_frame$step_units)
260 |     window_converted = lubridate::time_length(window, unit = wiz_frame$step_units)
261 |   } else {
262 |     lookback_converted = lookback
263 |     window_converted = window
264 |   }
265 | 
266 |   if (lookback_converted == 0) {
267 |     stop('lookback/lookahead cannot be 0.')
268 |   }
269 |   if (window_converted == 0) {
270 |     stop('window cannot be 0.')
271 |   }
272 | 
273 |   if ((lookback_converted > 0 && window_converted < 0) ||
274 |       (lookback_converted < 0 && window_converted > 0)) {
275 |     stop(paste0('The lookback/lookahead and window must either *both* be positive ',
276 |                 '(for lookback) or *both* negative (for lookahead).'))
277 |   }
278 | 
279 |   if (lookback_converted %% window_converted != 0) {
280 |     stop ('The lookback must be divisible by the window (with no remainder).')
281 |   }
282 | 
283 |   temporal_data_of_interest = wiz_frame$temporal_data
284 | 
285 |   # temporal_data_of_interest =
286 |   #   wiz_frame$temporal_data %>%
287 |   #   dplyr::group_by(!!rlang::parse_expr(wiz_frame$temporal_id)) %>%
288 |   #   dplyr::mutate(wiz_step_time =
289 |   #                   !!rlang::parse_expr(wiz_frame$temporal_time) %/%
290 |   #                   wiz_frame$step *
291 |   #                   wiz_frame$step +
292 |   #                   wiz_frame$step) %>%
293 |   #   dplyr::mutate(wiz_lookback_time =
294 |   #                   wiz_step_time - lookback_converted) %>%
295 |   #   dplyr::ungroup()
296 | 
297 | 
298 |   # fixed_end and fixed_start should *always* be available now because they are now created if not provided
299 |   max_step_times_per_id =
300 |     temporal_data_of_interest %>%
301 |     dplyr::left_join(., wiz_frame$fixed_data %>%
302 |                        dplyr::select_at(c(wiz_frame$fixed_id, wiz_frame$fixed_start, wiz_frame$fixed_end)) %>%
303 |                        dplyr::rename(!!rlang::parse_expr(wiz_frame$temporal_id) := !!rlang::parse_expr(wiz_frame$fixed_id)) %>%
304 |                        dplyr::mutate(wiz_fixed_start_time = !!rlang::parse_expr(wiz_frame$fixed_start)) %>%
305 |                        dplyr::mutate(wiz_fixed_end_time = !!rlang::parse_expr(wiz_frame$fixed_end))
306 |     ) %>%
307 |     dplyr::distinct(!!rlang::parse_expr(wiz_frame$temporal_id), wiz_fixed_start_time, wiz_fixed_end_time)
308 | 
309 |   if (!is.null(wiz_frame$step_units)) {
310 |     max_step_times_per_id =
311 |       max_step_times_per_id %>%
312 |       dplyr::mutate(wiz_step_time =
313 |                       lubridate::time_length(wiz_fixed_end_time - wiz_fixed_start_time, unit = wiz_frame$step_units) %/% wiz_frame$step * wiz_frame$step) %>% # will select furthest time with complete step data
314 |       dplyr::select(!!rlang::parse_expr(wiz_frame$temporal_id), wiz_step_time)
315 |   } else {
316 |     max_step_times_per_id =
317 |       max_step_times_per_id %>%
318 |       dplyr::mutate(wiz_step_time = (wiz_fixed_end_time - wiz_fixed_start_time) %/% wiz_frame$step * wiz_frame$step) %>% # will select furthest time with complete step data
319 |       dplyr::select(!!rlang::parse_expr(wiz_frame$temporal_id), wiz_step_time)
320 |   }
321 | 
322 | 
323 |   # Check to see if there is any data in the max_step_times_per_id
324 |   if (nrow(max_step_times_per_id) == 0) {
325 |     message('No values found for the selected variable(s) during the time period.')
326 |     if (log_file) {
327 |       write(paste0(Sys.time(), ': No values found for the selected variable(s) during the time period.'),
328 |             file.path(wiz_frame$output_folder, 'wiz_log.txt'), append = TRUE)
329 |     }
330 | 
331 |     if (output_file == TRUE) {
332 |       return(invisible(wiz_frame))
333 |     }
334 | 
335 |     return(NULL)
336 |   }
337 | 
338 |   # Filter temporal_data_of_interest to only the variables of interest
339 |   if (!is.null(variables)) {
340 |     temporal_data_of_interest =
341 |       temporal_data_of_interest %>%
342 |       dplyr::filter(!!rlang::parse_expr(wiz_frame$temporal_variable) %in% wiz_variables)
343 |   } else if (!is.null(category)) {
344 |     temporal_data_of_interest =
345 |       temporal_data_of_interest %>%
346 |       dplyr::filter(stringr::str_detect(!!rlang::parse_expr(wiz_frame$temporal_category), wiz_category))
347 |   } else {
348 |     stop('This option should not be possible.')
349 |   }
350 | 
351 |   if ('character' %in% (wiz_frame$temporal_data_dict %>%
352 |                         dplyr::filter(variable %in% temporal_data_of_interest[[wiz_frame$temporal_variable]]) %>%
353 |                         dplyr::pull(class)) &&
354 |       'numeric' %in% (wiz_frame$temporal_data_dict %>%
355 |                       dplyr::filter(variable %in% temporal_data_of_interest[[wiz_frame$temporal_variable]]) %>%
356 |                       dplyr::pull(class))) {
357 |     stop(paste0('Please select variables that are either all numeric or all categorical. ',
358 |                 'They cannot be mixed. If both are to be selected, then you must dummy ',
359 |                 'code the categorical variables using wiz_dummy_code().'))
360 |   }
361 | 
362 |   # If all variables are numeric, convert value column to numeric prior to calculating stats
363 |   if (all(wiz_frame$temporal_data_dict %>%
364 |           dplyr::filter(variable %in% temporal_data_of_interest[[wiz_frame$temporal_variable]]) %>%
365 |           dplyr::pull(class) == 'numeric')) {
366 |     temporal_data_of_interest[[wiz_frame$temporal_value]] =
367 |       as.numeric(temporal_data_of_interest[[wiz_frame$temporal_value]])
368 |   } else {
369 |     temporal_data_of_interest[[wiz_frame$temporal_value]] =
370 |       as.character(temporal_data_of_interest[[wiz_frame$temporal_value]])
371 |   }
372 | 
373 |   # Check to see if there is any data in the period of interest
374 |   if (nrow(temporal_data_of_interest) == 0) {
375 |     message('No values found for the selected variable(s) during the time period.')
376 |     if (log_file) {
377 |       write(paste0(Sys.time(), ': No values found for the selected variable(s) during the time period.'),
378 |             file.path(wiz_frame$output_folder, 'wiz_log.txt'), append = TRUE)
379 |     }
380 | 
381 |     if (output_file == TRUE) {
382 |       return(invisible(wiz_frame))
383 |     }
384 | 
385 |     return(NULL)
386 |   }
387 | 
388 | 
389 |   # Test to make sure all stats are calculable
390 |   for (stat in stats) {
391 |     tryCatch({
392 |       do.call(stat, list(temporal_data_of_interest[[wiz_frame$temporal_value]]))},
393 |       error = function (e) {
394 |         stop(paste0('At least one of the statistics could not be calculated for the ',
395 |                     'selected variables in the temporal data. Did you perhaps forget to ',
396 |                     'run wiz_dummy_code() on one of the variables of interest?'))
397 |       })
398 |   }
399 | 
400 |   if (!is.null(variables)) {
401 |     message(paste0('Processing variables: ', paste0(variables, collapse = ', '), '...'))
402 |     if (log_file) {
403 |       write(paste0(Sys.time(), ': Processing variables: ', paste0(variables, collapse = ', '), '...'),
404 |             file.path(wiz_frame$output_folder, 'wiz_log.txt'), append = TRUE)
405 |     }
406 |   } else if (!is.null(category)) {
407 |     message(paste0('Processing category: ', category, '...'))
408 |     if (log_file) {
409 |       write(paste0(Sys.time(), ': Processing category: ', category, '...'),
410 |             file.path(wiz_frame$output_folder, 'wiz_log.txt'), append = TRUE)
411 |     }
412 |   }
413 | 
414 |   # final_output_rows = max_step_times_per_id %>%
415 |   #   dplyr::filter(wiz_step_time >= 0) %>%
416 |   #   dplyr::pull(wiz_step_time) %>%
417 |   #   {. / wiz_frame$step + 1} %>% # e.g., if max step for an id is 18 and step is 6, there will rows for 0, 6, 12, 18 (or 18/6 + 1 rows)
418 |   #   {sum(.)}
419 |   #
420 |   # message(paste0('Anticipated number of rows in final output: ', final_output_rows))
421 |   # if (log_file) {
422 |   #   write(paste0(Sys.time(), ': Anticipated number of rows in final output: ', final_output_rows),
423 |   #         file.path(wiz_frame$output_folder, 'wiz_log.txt'), append = TRUE)
424 |   # }
425 | 
426 | 
427 |   message('Allocating memory...')
428 |   if (log_file) {
429 |     write(paste0(Sys.time(), ': Allocating memory...'),
430 |           file.path(wiz_frame$output_folder, 'wiz_log.txt'), append = TRUE)
431 |   }
432 | 
433 |   output_frame =
434 |     dplyr::tibble(!!rlang::parse_expr(wiz_frame$temporal_id) :=
435 |                     unique(wiz_frame$temporal_data[[wiz_frame$temporal_id]])) %>%
436 |     tidyr::crossing(
437 |       dplyr::tibble(
438 |         !!rlang::parse_expr(wiz_frame$temporal_variable) :=
439 |           unique(temporal_data_of_interest[[wiz_frame$temporal_variable]]))) %>%
440 |     dplyr::group_by(!!rlang::parse_expr(wiz_frame$temporal_id)) %>%
441 |     dplyr::group_modify(~wiz_define_steps(groups = .y,
442 |                                           temporal_id = wiz_frame$temporal_id,
443 |                                           step = wiz_frame$step,
444 |                                           step_units = wiz_frame$step_units,
445 |                                           max_length = wiz_frame$max_length,
446 |                                           baseline = dots[['baseline']],
447 |                                           max_step_times_per_id = max_step_times_per_id,
448 |                                           lookback_converted = lookback_converted,
449 |                                           window_converted = window_converted,
450 |                                           output_folder = wiz_frame$output_folder,
451 |                                           log_file = log_file))
452 | 
453 |   message(paste0('Number of rows in final output: ', nrow(output_frame)))
454 |   if (log_file) {
455 |     write(paste0(Sys.time(), ': Number of rows in final output: ', nrow(output_frame)),
456 |           file.path(wiz_frame$output_folder, 'wiz_log.txt'), append = TRUE)
457 |   }
458 | 
459 |   if (check_size_only) {
460 |     return(nrow(output_frame))
461 |   }
462 | 
463 | 
464 |   if (!is.null(dots[['baseline']]) && dots[['baseline']]) {
465 |     if (!is.null(wiz_frame$step_units)) {
466 |       temporal_data_of_interest[[wiz_frame$temporal_time]] =
467 |         temporal_data_of_interest[[wiz_frame$temporal_time]] +
468 |         lubridate::time_length(dots[['offset']], unit = wiz_frame$step_units)
469 |     } else {
470 |       temporal_data_of_interest[[wiz_frame$temporal_time]] =
471 |         temporal_data_of_interest[[wiz_frame$temporal_time]] + dots[['offset']]
472 |     }
473 |   }
474 | 
475 |   total_num_groups = nrow(output_frame)
476 | 
477 |   if ('sequential' %in% class(future::plan())) {
478 |     strategy = 'sequential'
479 |     message('Parallel processing is DISABLED. Calculations are happening sequentially.')
480 |     if (log_file) {
481 |       write(paste0(Sys.time(), ': Parallel processing is DISABLED. Calculations are happening sequentially.'),
482 |             file.path(wiz_frame$output_folder, 'wiz_log.txt'), append = TRUE)
483 |     }
484 |     pb = progress::progress_bar$new(format = "[:bar] :current/:total (:percent) Time remaining: :eta",
485 |                                     total = total_num_groups) # intermediate_output_rows)
486 | 
487 |     pb$tick(0)
488 |   } else {
489 |     strategy = 'parallel'
490 |     message('Parallel processing is ENABLED.')
491 |     if (log_file) {
492 |       write(paste0(Sys.time(), ': Parallel processing is ENABLED.'),
493 |             file.path(wiz_frame$output_folder, 'wiz_log.txt'), append = TRUE)
494 |     }
495 |     pb = NULL
496 |   }
497 | 
498 |   message('Determining missing values for each statistic...')
499 |   if (log_file) {
500 |     write(paste0(Sys.time(), ': Determining missing values for each statistic...'),
501 |           file.path(wiz_frame$output_folder, 'wiz_log.txt'), append = TRUE)
502 |   }
503 |   # Use a bit of R magic. Looking for is.null() because median(NULL) returns NULL
504 |   # Note: mean(NULL) returns NA, sum(NULL) returns 0, length(NULL) returns 0
505 |   suppressWarnings({
506 |     missing_value_frame = dplyr::tibble(wiz_stat = names(stats),
507 |                                         wiz_missing_value =
508 |                                           sapply(stats, function (x) {
509 |                                             ifelse(is.null(do.call(x, list(NULL))),
510 |                                                    NA,
511 |                                                    do.call(x, list(NULL)))}))
512 |   })
513 | 
514 |   message('Beginning calculation...')
515 |   if (log_file) {
516 |     write(paste0(Sys.time(), ': Beginning calculation...'),
517 |           file.path(wiz_frame$output_folder, 'wiz_log.txt'), append = TRUE)
518 |   }
519 | 
520 |   output_list =
521 |     output_frame %>%
522 |     dplyr::group_by(!!rlang::parse_expr(wiz_frame$temporal_id),
523 |                     time) %>%
524 |     dplyr::group_split()
525 | 
526 |   all_temporal_vars = unique(temporal_data_of_interest[[wiz_frame$temporal_variable]]) %>% as.factor()
527 | 
528 |   suppressWarnings({
529 |     output_frame =
530 |       output_list %>%
531 |       furrr::future_map_dfr(.f = wiz_calc,
532 |                             # groups = .x,
533 |                             temporal_id = wiz_frame$temporal_id,
534 |                             temporal_variable = wiz_frame$temporal_variable,
535 |                             temporal_value = wiz_frame$temporal_value,
536 |                             temporal_time = wiz_frame$temporal_time,
537 |                             lookback_converted = lookback_converted,
538 |                             dots = dots,
539 |                             window_converted = window_converted,
540 |                             temporal_data_of_interest = temporal_data_of_interest,
541 |                             stats = stats,
542 |                             impute = impute,
543 |                             pb = pb,
544 |                             all_temporal_vars = all_temporal_vars,
545 |                             missing_value_frame = missing_value_frame,
546 |                             strategy = strategy,
547 |                             .progress = TRUE)
548 |   })
549 | 
550 |   message('Completed calculation.')
551 |   if (log_file) {
552 |     write(paste0(Sys.time(), ': Completed calculation.'),
553 |           file.path(wiz_frame$output_folder, 'wiz_log.txt'), append = TRUE)
554 |   }
555 | 
556 |   # Check to see if there is any data in the output_frame
557 |   if (nrow(output_frame) == 0) {
558 |     message('No values found for the selected variable(s) during the time period.')
559 |     if (log_file) {
560 |       write(paste0(Sys.time(), ': No values found for the selected variable(s) during the time period.'),
561 |             file.path(wiz_frame$output_folder, 'wiz_log.txt'), append = TRUE)
562 |     }
563 | 
564 |     if (output_file == TRUE) {
565 |       return(invisible(wiz_frame))
566 |     }
567 | 
568 |     return(NULL)
569 |   }
570 | 
571 |   if (lookback_converted < 0) {
572 |     file_type = '_outcomes_'
573 |   } else {
574 |     file_type = '_predictors_'
575 |   }
576 | 
577 |   if (!is.null(dots[['baseline']]) && dots[['baseline']]) {
578 |     file_type = paste0('baseline', file_type)
579 |   } else if (!is.null(dots[['growing']]) && dots[['growing']]) {
580 |     file_type = paste0('growing', file_type)
581 |   } else {
582 |     file_type = paste0('rolling', file_type)
583 |   }
584 | 
585 |   output_file_name = if (!is.null(category)) {
586 |     file.path(wiz_frame$output_folder,
587 |               paste0(filename_prefix, file_type, '_category_', category, '_', lubridate::now()) %>%
588 |                 janitor::make_clean_names() %>%
589 |                 paste0('.csv'))
590 |   } else {
591 |     # BUGFIX checks total pathname and truncates from variable name(s)
592 |     path_length = nchar(file.path(wiz_frame$output_folder, paste0(filename_prefix, file_type, '_variables_', '_', lubridate::now(), '.csv')))
593 |     var_length = nchar(paste0(variables, collapse = '_'))
594 |     if(path_length + var_length > 255) {
595 |       new_var_length = (255 - path_length)
596 |       message("Filename truncated due to length")
597 |       } else (new_var_length = var_length)
598 |     file.path(wiz_frame$output_folder,
599 |               paste0(filename_prefix, file_type, '_variables_', substring(paste0(variables, collapse = '_'), 0, new_var_length),
600 |                      '_', lubridate::now()) %>%
601 |                 janitor::make_clean_names() %>%
602 |                 paste0('.csv'))
603 |   }
604 | 
605 | 
606 |   if (output_file == TRUE) {
607 |     data.table::fwrite(output_frame, output_file_name)
608 |     message(paste0('The output file was written to: ', output_file_name))
609 |     if (log_file) {
610 |       write(paste0(Sys.time(), ': The output file was written to: ', output_file_name),
611 |             file.path(wiz_frame$output_folder, 'wiz_log.txt'), append = TRUE)
612 |     }
613 |     return(invisible(wiz_frame))
614 |   }
615 | 
616 |   return(output_frame)
617 | }
618 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output: github_document
  3 | ---
  4 | 
  5 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  6 | 
  7 | ```{r, include = FALSE}
  8 | knitr::opts_chunk$set(
  9 |   collapse = TRUE,
 10 |   comment = "#>",
 11 |   fig.path = "man/figures/README-",
 12 |   out.width = "100%"
 13 | )
 14 | ```
 15 | 
 16 | # wizard
 17 | 
 18 | ## Windowed Summarization for Autoregressive Data
 19 | 
 20 | This package uses windowed summarization to convert time series data into a form that can be modeled by prediction models.
 21 | 
 22 | <!-- badges: start -->
 23 | [![Lifecycle: maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://www.tidyverse.org/lifecycle/#maturing)
 24 | <!-- badges: end -->
 25 | 
 26 | ## Installation
 27 | 
 28 | You can install the GitHub version of wizard with:
 29 | 
 30 | ```{r eval=FALSE}
 31 | remotes::install_github('ML4LHS/wizard')
 32 | ```
 33 | 
 34 | ## How to set up a wiz_frame()
 35 | 
 36 | Start by loading and package and defining your `wiz_frame()`. A `wiz_frame` is simply a list with the class `wiz_frame` and contains all the key information needed to describe both your fixed dataset (such as demographics, one row per patient) and your temporal dataset (one row per observation linked to a timestamp).
 37 | 
 38 | ```{r}
 39 | library(wizard)
 40 | ```
 41 | 
 42 | ```{r}
 43 | library(magrittr)
 44 | library(lubridate)
 45 | 
 46 | future::plan('multisession')
 47 | 
 48 | unlink(file.path(tempdir(), 'wizard_dir', '*.*'))
 49 | 
 50 | wf = wiz_frame(fixed_data = sample_fixed_data,
 51 |                temporal_data = sample_temporal_data %>% dplyr::filter(id %in% 1:100),
 52 |                fixed_id = 'id',
 53 |                fixed_start = 'admit_time',
 54 |                fixed_end = 'dc_time',
 55 |                temporal_id = 'id',
 56 |                temporal_time = 'time',
 57 |                temporal_variable = 'variable',
 58 |                temporal_category = 'category',
 59 |                temporal_value = 'value',
 60 |                step = hours(6),
 61 |                max_length = days(7), # optional parameter to limit to first 7 days of hospitalization
 62 |                output_folder = file.path(tempdir(), 'wizard_dir'),
 63 |                create_folder = TRUE)
 64 | 
 65 | ```
 66 | 
 67 | ## Let's look at the automatically generated data dictionaries
 68 | 
 69 | ```{r}
 70 | names(wf)
 71 | 
 72 | wf$step
 73 | 
 74 | wf$step_units
 75 | 
 76 | wf$fixed_data_dict
 77 | 
 78 | wf$temporal_data_dict
 79 | ```
 80 | 
 81 | ## Let's dummy           code the temporal categorical variables
 82 | 
 83 | 
 84 | ```{r}
 85 | wf = wf %>% 
 86 |   wiz_dummy_code()
 87 | ```
 88 | 
 89 | 
 90 | This affects only the temporal data and not the fixed data.
 91 | 
 92 | ```{r}
 93 | wf$fixed_data_dict
 94 | 
 95 | wf$temporal_data_dict
 96 | ```
 97 | 
 98 | ## Let's add some predictors and outcomes
 99 | 
100 | The default method writes output to the folder defined in your `wiz_frame`. When you write your output to file, you are allowed to chain together `add_predictors()` and `add_outcomes()` functions. This is possble because these functions invisibly return a `wiz_frame`.
101 | 
102 | If, however, you set `output_file` to `FALSE`, then your actual output is returned (rather than the `wiz_frame`) so you cannot chain functions.
103 | 
104 | ```{r}
105 | wf %>%           
106 |   wiz_add_predictors(variables = 'cr', # Note: You can supply a vector of variables
107 |                      lookback = hours(12), 
108 |                      window = hours(6), 
109 |                      stats = c(mean = mean,
110 |                                min = min,
111 |                                max = max,
112 |                                median = median,
113 |                                length = length)) %>%
114 |   wiz_add_baseline_predictors(variables = 'cr', # add baseline creatinine
115 |                               lookback = days(90),
116 |                               offset = hours(10),
117 |                               stats = c(min = min)) %>%
118 |   wiz_add_growing_predictors(variables = 'cr', # cumulative max creatinine since admission
119 |                               stats = c(max = max)) %>%
120 |   wiz_add_predictors(category = 'med', # Note: category is always a regular expression 
121 |                      lookback = days(7),
122 |                      stats = c(sum = sum)) %>% 
123 |   wiz_add_outcomes(variables = 'cr',
124 |                    lookahead = hours(24), 
125 |                    stats = c(max = max))
126 | ```
127 | 
128 | ## Let's combine our output into a single data frame
129 | 
130 | You can provide `wiz_combine()` with a set of data frames separated by commas. Or, now you can provide a vector of file names using the `files` argument. If you leave `files` blank, it will automatically find all the `.csv` files from the `output_folder` of your `wiz_frame`.
131 | 
132 | This resulting frame is essentially ready for modeling (using `tidymodels`, for example). Make sure to keep individual patients in the same fold if you divide this dataset into multiple folds.
133 | 
134 | ```{r}
135 | model_data = wiz_combine(wf)
136 | 
137 | head(model_data)
138 | ```
139 | 
140 | 
141 | ## Testing wiz_frame without writing output to files
142 | 
143 | If you want to simply test `wiz_frame`, you may prefer not to write your output to file. You can accomplish this by setting `output_file` to `FALSE`.
144 | 
145 | ```{r}
146 | wf %>% 
147 |   wiz_add_predictors(variables = 'cr',
148 |                      lookback = hours(12), 
149 |                      window = hours(6), 
150 |                      stats = c(mean = mean,
151 |                                min = min,
152 |                                max = max,
153 |                                median = median,
154 |                                length = length),
155 |                      output_file = FALSE) %>% 
156 |   head()
157 | ```
158 | 
159 | ## You can also supply a vector of variables
160 | 
161 | ```{r}
162 | wf %>% 
163 |   wiz_add_predictors(variables = c('cr', 'med_aspirin'),
164 |                      lookback = weeks(1), 
165 |                      stats = c(length = length),
166 |                      output_file = FALSE) %>% 
167 |   head()
168 | ```
169 | 
170 | ## Category accepts regular expressions
171 | 
172 | ```{r}
173 | wf %>% 
174 |   wiz_add_predictors(category = 'lab|med',
175 |                      lookback = hours(12), 
176 |                      stats = c(length = length),
177 |                      output_file = FALSE) %>% 
178 |   head()
179 | ```
180 | 
181 | ## Let's benchmark the performance on our package
182 | 
183 | ### Running in parallel
184 | 
185 | ```{r message=FALSE}
186 | benchmark_results = list()
187 | 
188 | # future::plan('multisession')
189 | 
190 | benchmark_results[['multisession']] = 
191 |   microbenchmark::microbenchmark(
192 |     wf %>% 
193 |       wiz_add_predictors(variable = 'cr',
194 |                          lookback = hours(48), 
195 |                          window = hours(6), 
196 |                          stats = c(mean = mean,
197 |                                    min = min,
198 |                                    max = max,
199 |                                    median = median,
200 |                                    length = length)),
201 |     times = 1
202 |   )
203 | ```
204 | 
205 | ### Running in parallel with a chunk_size of 20
206 | 
207 | ```{r}
208 | 
209 | wf_with_chunks = wf
210 | wf_with_chunks$chunk_size = 20
211 | 
212 | benchmark_results[['multisession with chunk_size 20']] = 
213 |   microbenchmark::microbenchmark(
214 |     wf_with_chunks %>% 
215 |       wiz_add_predictors(variable = 'cr',
216 |                          lookback = hours(48), 
217 |                          window = hours(6), 
218 |                          stats = c(mean = mean,
219 |                                    min = min,
220 |                                    max = max,
221 |                                    median = median,
222 |                                    length = length)),
223 |     times = 1
224 |   )
225 | ```
226 | 
227 | ### Running in serial
228 | 
229 | ```{r message=FALSE}
230 | future::plan('sequential')
231 | 
232 | benchmark_results[['sequential']] = 
233 |   microbenchmark::microbenchmark(
234 |   wf %>% 
235 |     wiz_add_predictors(variable = 'cr',
236 |                        lookback = hours(48), 
237 |                        window = hours(6), 
238 |                        stats = c(mean = mean,
239 |                                  min = min,
240 |                                  max = max,
241 |                                  median = median,
242 |                                  length = length)),
243 |   times = 1
244 |   )
245 | 
246 | ```
247 | 
248 | ## Benchmark results
249 | 
250 | ```{r}
251 | benchmark_results
252 | ```
253 | 
254 | ```{r include=FALSE}
255 | unlink(file.path(tempdir(), 'wizard_dir', '*.*'))
256 | ```
257 | 
258 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  3 | 
  4 | # wizard
  5 | 
  6 | ## Windowed Summarization for Autoregressive Data
  7 | 
  8 | This package uses windowed summarization to convert time series data
  9 | into a form that can be modeled by prediction models.
 10 | 
 11 | <!-- badges: start -->
 12 | 
 13 | [![Lifecycle:
 14 | maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://www.tidyverse.org/lifecycle/#maturing)
 15 | <!-- badges: end -->
 16 | 
 17 | ## Installation
 18 | 
 19 | You can install the GitHub version of wizard with:
 20 | 
 21 | ``` r
 22 | remotes::install_github('ML4LHS/wizard')
 23 | ```
 24 | 
 25 | ## How to set up a wiz\_frame()
 26 | 
 27 | Start by loading and package and defining your `wiz_frame()`. A
 28 | `wiz_frame` is simply a list with the class `wiz_frame` and contains all
 29 | the key information needed to describe both your fixed dataset (such as
 30 | demographics, one row per patient) and your temporal dataset (one row
 31 | per observation linked to a timestamp).
 32 | 
 33 | ``` r
 34 | library(wizard)
 35 | ```
 36 | 
 37 | ``` r
 38 | library(magrittr)
 39 | #> Warning: package 'magrittr' was built under R version 3.6.3
 40 | library(lubridate)
 41 | #> Warning: package 'lubridate' was built under R version 3.6.3
 42 | #> 
 43 | #> Attaching package: 'lubridate'
 44 | #> The following objects are masked from 'package:base':
 45 | #> 
 46 | #>     date, intersect, setdiff, union
 47 | 
 48 | future::plan('multisession')
 49 | 
 50 | unlink(file.path(tempdir(), 'wizard_dir', '*.*'))
 51 | 
 52 | wf = wiz_frame(fixed_data = sample_fixed_data,
 53 |                temporal_data = sample_temporal_data %>% dplyr::filter(id %in% 1:100),
 54 |                fixed_id = 'id',
 55 |                fixed_start = 'admit_time',
 56 |                fixed_end = 'dc_time',
 57 |                temporal_id = 'id',
 58 |                temporal_time = 'time',
 59 |                temporal_variable = 'variable',
 60 |                temporal_category = 'category',
 61 |                temporal_value = 'value',
 62 |                step = hours(6),
 63 |                max_length = days(7), # optional parameter to limit to first 7 days of hospitalization
 64 |                output_folder = file.path(tempdir(), 'wizard_dir'),
 65 |                create_folder = TRUE)
 66 | ```
 67 | 
 68 | ## Let’s look at the automatically generated data dictionaries
 69 | 
 70 | ``` r
 71 | names(wf)
 72 | #>  [1] "fixed_data"         "temporal_data"      "fixed_id"          
 73 | #>  [4] "fixed_start"        "fixed_end"          "temporal_id"       
 74 | #>  [7] "temporal_time"      "temporal_variable"  "temporal_value"    
 75 | #> [10] "temporal_category"  "step"               "max_length"        
 76 | #> [13] "step_units"         "output_folder"      "fixed_data_dict"   
 77 | #> [16] "temporal_data_dict" "chunk_size"
 78 | 
 79 | wf$step
 80 | #> [1] 6
 81 | 
 82 | wf$step_units
 83 | #> [1] "hour"
 84 | 
 85 | wf$fixed_data_dict
 86 | #>      variable     class
 87 | #> 1          id   integer
 88 | #> 2         sex character
 89 | #> 3         age   numeric
 90 | #> 4        race character
 91 | #> 5 baseline_cr   numeric
 92 | #> 6  admit_time   POSIXct
 93 | #> 7     dc_time   POSIXct
 94 | 
 95 | wf$temporal_data_dict
 96 | #>   variable     class
 97 | #> 1       cr   numeric
 98 | #> 2  cr_abnl character
 99 | #> 3  cr_high character
100 | #> 4      med character
101 | ```
102 | 
103 | ## Let’s dummy code the temporal categorical variables
104 | 
105 | ``` r
106 | wf = wf %>% 
107 |   wiz_dummy_code()
108 | ```
109 | 
110 | This affects only the temporal data and not the fixed data.
111 | 
112 | ``` r
113 | wf$fixed_data_dict
114 | #>      variable     class
115 | #> 1          id   integer
116 | #> 2         sex character
117 | #> 3         age   numeric
118 | #> 4        race character
119 | #> 5 baseline_cr   numeric
120 | #> 6  admit_time   POSIXct
121 | #> 7     dc_time   POSIXct
122 | 
123 | wf$temporal_data_dict
124 | #>              variable   class
125 | #> 1                  cr numeric
126 | #> 2        cr_abnl_high numeric
127 | #> 3         cr_abnl_low numeric
128 | #> 4      cr_abnl_normal numeric
129 | #> 5          cr_high_no numeric
130 | #> 6         cr_high_yes numeric
131 | #> 7   med_acetaminophen numeric
132 | #> 8         med_aspirin numeric
133 | #> 9 med_diphenhydramine numeric
134 | ```
135 | 
136 | ## Let’s add some predictors and outcomes
137 | 
138 | The default method writes output to the folder defined in your
139 | `wiz_frame`. When you write your output to file, you are allowed to
140 | chain together `add_predictors()` and `add_outcomes()` functions. This
141 | is possble because these functions invisibly return a `wiz_frame`.
142 | 
143 | If, however, you set `output_file` to `FALSE`, then your actual output
144 | is returned (rather than the `wiz_frame`) so you cannot chain functions.
145 | 
146 | ``` r
147 | wf %>%           
148 |   wiz_add_predictors(variables = 'cr', # Note: You can supply a vector of variables
149 |                      lookback = hours(12), 
150 |                      window = hours(6), 
151 |                      stats = c(mean = mean,
152 |                                min = min,
153 |                                max = max,
154 |                                median = median,
155 |                                length = length)) %>%
156 |   wiz_add_baseline_predictors(variables = 'cr', # add baseline creatinine
157 |                               lookback = days(90),
158 |                               offset = hours(10),
159 |                               stats = c(min = min)) %>%
160 |   wiz_add_growing_predictors(variables = 'cr', # cumulative max creatinine since admission
161 |                               stats = c(max = max)) %>%
162 |   wiz_add_predictors(category = 'med', # Note: category is always a regular expression 
163 |                      lookback = days(7),
164 |                      stats = c(sum = sum)) %>% 
165 |   wiz_add_outcomes(variables = 'cr',
166 |                    lookahead = hours(24), 
167 |                    stats = c(max = max))
168 | #> Joining, by = "id"
169 | #> Processing variables: cr...
170 | #> Allocating memory...
171 | #> Number of rows in final output: 1540
172 | #> Parallel processing is ENABLED.
173 | #> Determining missing values for each statistic...
174 | #> Beginning calculation...
175 | #>  Progress: ---------------------------------------------------------------------------------------                                                          100% Progress: -----------------------------------------------------------------------------------------------------------------------------                    100% Progress: ----------------------------------------------------------------------------------------------------------------------------------------------   100% Progress: ----------------------------------------------------------------------------------------------------------------------------------------------   100% Progress: ------------------------------------------------------------------------------------------------------------------------------------------------ 100%
176 | #> Completed calculation.
177 | #> The output file was written to: C:\Users\kdpsingh\AppData\Local\Temp\3\RtmpeMx3hc/wizard_dir/rolling_predictors_variables_cr_2021_02_22_21_38_15.csv
178 | #> Joining, by = "id"
179 | #> Processing variables: cr...
180 | #> Allocating memory...
181 | #> Number of rows in final output: 100
182 | #> Parallel processing is ENABLED.
183 | #> Determining missing values for each statistic...
184 | #> Beginning calculation...
185 | #>  Progress: ------------------------------------------------------------------------------------------------------------------------------------------------ 100%
186 | #> Completed calculation.
187 | #> The output file was written to: C:\Users\kdpsingh\AppData\Local\Temp\3\RtmpeMx3hc/wizard_dir/baseline_predictors_variables_cr_2021_02_22_21_38_17.csv
188 | #> Joining, by = "id"
189 | #> Processing variables: cr...
190 | #> Allocating memory...
191 | #> Number of rows in final output: 1540
192 | #> Parallel processing is ENABLED.
193 | #> Determining missing values for each statistic...
194 | #> Beginning calculation...
195 | #>  Progress: ----------------------------------------------------------------------------------------------------------------                                 100% Progress: -----------------------------------------------------------------------------------------------------------------------------------------------  100% Progress: ------------------------------------------------------------------------------------------------------------------------------------------------ 100%
196 | #> Completed calculation.
197 | #> The output file was written to: C:\Users\kdpsingh\AppData\Local\Temp\3\RtmpeMx3hc/wizard_dir/growing_predictors_variables_cr_2021_02_22_21_38_34.csv
198 | #> Joining, by = "id"
199 | #> Processing category: med...
200 | #> Allocating memory...
201 | #> Number of rows in final output: 1540
202 | #> Parallel processing is ENABLED.
203 | #> Determining missing values for each statistic...
204 | #> Beginning calculation...
205 | #>  Progress: ---------------------------------------------------------------------------------------------                                                    100% Progress: ----------------------------------------------------------------------------------------------------------------------------------------         100% Progress: ----------------------------------------------------------------------------------------------------------------------------------------------   100% Progress: ------------------------------------------------------------------------------------------------------------------------------------------------ 100%
206 | #> Completed calculation.
207 | #> The output file was written to: C:\Users\kdpsingh\AppData\Local\Temp\3\RtmpeMx3hc/wizard_dir/rolling_predictors_category_med_2021_02_22_21_38_53.csv
208 | #> Joining, by = "id"
209 | #> Processing variables: cr...
210 | #> Allocating memory...
211 | #> Number of rows in final output: 1540
212 | #> Parallel processing is ENABLED.
213 | #> Determining missing values for each statistic...
214 | #> Beginning calculation...
215 | #>  Progress: ----------------------------------------------------------------------------------------------------                                             100% Progress: -------------------------------------------------------------------------------------------------------------------------------------------      100% Progress: ----------------------------------------------------------------------------------------------------------------------------------------------   100% Progress: ------------------------------------------------------------------------------------------------------------------------------------------------ 100%
216 | #> Completed calculation.
217 | #> The output file was written to: C:\Users\kdpsingh\AppData\Local\Temp\3\RtmpeMx3hc/wizard_dir/rolling_outcomes_variables_cr_2021_02_22_21_39_12.csv
218 | ```
219 | 
220 | ## Let’s combine our output into a single data frame
221 | 
222 | You can provide `wiz_combine()` with a set of data frames separated by
223 | commas. Or, now you can provide a vector of file names using the `files`
224 | argument. If you leave `files` blank, it will automatically find all the
225 | `.csv` files from the `output_folder` of your `wiz_frame`.
226 | 
227 | This resulting frame is essentially ready for modeling (using
228 | `tidymodels`, for example). Make sure to keep individual patients in the
229 | same fold if you divide this dataset into multiple folds.
230 | 
231 | ``` r
232 | model_data = wiz_combine(wf)
233 | #> Reading file: C:\Users\kdpsingh\AppData\Local\Temp\3\RtmpeMx3hc/wizard_dir/baseline_predictors_variables_cr_2021_02_22_21_38_17.csv...
234 | #> Reading file: C:\Users\kdpsingh\AppData\Local\Temp\3\RtmpeMx3hc/wizard_dir/growing_predictors_variables_cr_2021_02_22_21_38_34.csv...
235 | #> Reading file: C:\Users\kdpsingh\AppData\Local\Temp\3\RtmpeMx3hc/wizard_dir/rolling_outcomes_variables_cr_2021_02_22_21_39_12.csv...
236 | #> Reading file: C:\Users\kdpsingh\AppData\Local\Temp\3\RtmpeMx3hc/wizard_dir/rolling_predictors_category_med_2021_02_22_21_38_53.csv...
237 | #> Reading file: C:\Users\kdpsingh\AppData\Local\Temp\3\RtmpeMx3hc/wizard_dir/rolling_predictors_variables_cr_2021_02_22_21_38_15.csv...
238 | #> Joining, by = "id"
239 | #> Joining, by = "id"
240 | #> Joining, by = c("id", "time")
241 | #> Joining, by = c("id", "time")
242 | #> Joining, by = c("id", "time")
243 | 
244 | head(model_data)
245 | #>   id  sex      age  race baseline_cr          admit_time             dc_time
246 | #> 1  1 male 66.15955 asian    1.001175 2019-06-02 00:49:23 2019-06-08 10:38:23
247 | #> 2  1 male 66.15955 asian    1.001175 2019-06-02 00:49:23 2019-06-08 10:38:23
248 | #> 3  1 male 66.15955 asian    1.001175 2019-06-02 00:49:23 2019-06-08 10:38:23
249 | #> 4  1 male 66.15955 asian    1.001175 2019-06-02 00:49:23 2019-06-08 10:38:23
250 | #> 5  1 male 66.15955 asian    1.001175 2019-06-02 00:49:23 2019-06-08 10:38:23
251 | #> 6  1 male 66.15955 asian    1.001175 2019-06-02 00:49:23 2019-06-08 10:38:23
252 | #>   baseline_cr_min_2160 time growing_cr_max outcome_cr_max_24
253 | #> 1                   NA    0             NA          1.217020
254 | #> 2                   NA    6             NA          1.217020
255 | #> 3                   NA   12       1.039322          1.217020
256 | #> 4                   NA   18       1.217020          1.179722
257 | #> 5                   NA   24       1.217020          1.274939
258 | #> 6                   NA   30       1.217020          1.274939
259 | #>   med_acetaminophen_sum_168 med_aspirin_sum_168 med_diphenhydramine_sum_168
260 | #> 1                         0                   0                           0
261 | #> 2                         0                   0                           0
262 | #> 3                         1                   0                           0
263 | #> 4                         1                   0                           0
264 | #> 5                         1                   0                           0
265 | #> 6                         1                   0                           0
266 | #>   cr_length_06 cr_length_12 cr_max_06 cr_max_12 cr_mean_06 cr_mean_12
267 | #> 1            1            1  1.003659  1.030098   1.003659   1.030098
268 | #> 2            0            1  1.003659  1.003659   1.003659   1.003659
269 | #> 3            1            0  1.039322        NA   1.039322         NA
270 | #> 4            2            1  1.217020  1.039322   1.109985   1.039322
271 | #> 5            1            2  1.179722  1.217020   1.179722   1.109985
272 | #> 6            3            1  1.165989  1.179722   1.069630   1.179722
273 | #>   cr_median_06 cr_median_12 cr_min_06 cr_min_12
274 | #> 1     1.003659     1.030098 1.0036587  1.030098
275 | #> 2     1.003659     1.003659 1.0036587  1.003659
276 | #> 3     1.039322           NA 1.0393216        NA
277 | #> 4     1.109985     1.039322 1.0029506  1.039322
278 | #> 5     1.179722     1.109985 1.1797219  1.002951
279 | #> 6     1.096827     1.179722 0.9460735  1.179722
280 | ```
281 | 
282 | ## Testing wiz\_frame without writing output to files
283 | 
284 | If you want to simply test `wiz_frame`, you may prefer not to write your
285 | output to file. You can accomplish this by setting `output_file` to
286 | `FALSE`.
287 | 
288 | ``` r
289 | wf %>% 
290 |   wiz_add_predictors(variables = 'cr',
291 |                      lookback = hours(12), 
292 |                      window = hours(6), 
293 |                      stats = c(mean = mean,
294 |                                min = min,
295 |                                max = max,
296 |                                median = median,
297 |                                length = length),
298 |                      output_file = FALSE) %>% 
299 |   head()
300 | #> Joining, by = "id"
301 | #> Processing variables: cr...
302 | #> Allocating memory...
303 | #> Number of rows in final output: 1540
304 | #> Parallel processing is ENABLED.
305 | #> Determining missing values for each statistic...
306 | #> Beginning calculation...
307 | #>  Progress: ------------------------------------------------------------------------------------------                                                       100% Progress: -------------------------------------------------------------------------------------------------------------------------------------            100% Progress: ----------------------------------------------------------------------------------------------------------------------------------------------   100% Progress: ------------------------------------------------------------------------------------------------------------------------------------------------ 100%
308 | #> Completed calculation.
309 | #>   id time cr_length_06 cr_length_12 cr_max_06 cr_max_12 cr_mean_06 cr_mean_12
310 | #> 1  1    0            1            1  1.003659  1.030098   1.003659   1.030098
311 | #> 2  1    6            0            1  1.003659  1.003659   1.003659   1.003659
312 | #> 3  1   12            1            0  1.039322        NA   1.039322         NA
313 | #> 4  1   18            2            1  1.217020  1.039322   1.109985   1.039322
314 | #> 5  1   24            1            2  1.179722  1.217020   1.179722   1.109985
315 | #> 6  1   30            3            1  1.165989  1.179722   1.069630   1.179722
316 | #>   cr_median_06 cr_median_12 cr_min_06 cr_min_12
317 | #> 1     1.003659     1.030098 1.0036587  1.030098
318 | #> 2     1.003659     1.003659 1.0036587  1.003659
319 | #> 3     1.039322           NA 1.0393216        NA
320 | #> 4     1.109985     1.039322 1.0029506  1.039322
321 | #> 5     1.179722     1.109985 1.1797219  1.002951
322 | #> 6     1.096827     1.179722 0.9460735  1.179722
323 | ```
324 | 
325 | ## You can also supply a vector of variables
326 | 
327 | ``` r
328 | wf %>% 
329 |   wiz_add_predictors(variables = c('cr', 'med_aspirin'),
330 |                      lookback = weeks(1), 
331 |                      stats = c(length = length),
332 |                      output_file = FALSE) %>% 
333 |   head()
334 | #> Joining, by = "id"
335 | #> Processing variables: cr, med_aspirin...
336 | #> Allocating memory...
337 | #> Number of rows in final output: 1540
338 | #> Parallel processing is ENABLED.
339 | #> Determining missing values for each statistic...
340 | #> Beginning calculation...
341 | #>  Progress: ------------------------------------------------------------------------------------------                                                       100% Progress: ------------------------------------------------------------------------------------------------------------------------------------             100% Progress: ----------------------------------------------------------------------------------------------------------------------------------------------   100% Progress: ------------------------------------------------------------------------------------------------------------------------------------------------ 100%
342 | #> Completed calculation.
343 | #>   id time cr_length_168 med_aspirin_length_168
344 | #> 1  1    0             2                      0
345 | #> 2  1    6             2                      0
346 | #> 3  1   12             3                      0
347 | #> 4  1   18             5                      0
348 | #> 5  1   24             6                      0
349 | #> 6  1   30             9                      0
350 | ```
351 | 
352 | ## Category accepts regular expressions
353 | 
354 | ``` r
355 | wf %>% 
356 |   wiz_add_predictors(category = 'lab|med',
357 |                      lookback = hours(12), 
358 |                      stats = c(length = length),
359 |                      output_file = FALSE) %>% 
360 |   head()
361 | #> Joining, by = "id"
362 | #> Processing category: lab|med...
363 | #> Allocating memory...
364 | #> Number of rows in final output: 1540
365 | #> Parallel processing is ENABLED.
366 | #> Determining missing values for each statistic...
367 | #> Beginning calculation...
368 | #>  Progress: ------------------------------------------------------------------------------------------                                                       100% Progress: ----------------------------------------------------------------------------------------------------------------------------------               100% Progress: ----------------------------------------------------------------------------------------------------------------------------------------------   100% Progress: ------------------------------------------------------------------------------------------------------------------------------------------------ 100%
369 | #> Completed calculation.
370 | #>   id time cr_length_12 med_acetaminophen_length_12 med_aspirin_length_12
371 | #> 1  1    0            2                           0                     0
372 | #> 2  1    6            1                           0                     0
373 | #> 3  1   12            1                           1                     0
374 | #> 4  1   18            3                           1                     0
375 | #> 5  1   24            3                           0                     0
376 | #> 6  1   30            4                           0                     0
377 | #>   med_diphenhydramine_length_12
378 | #> 1                             0
379 | #> 2                             0
380 | #> 3                             0
381 | #> 4                             0
382 | #> 5                             0
383 | #> 6                             0
384 | ```
385 | 
386 | ## Let’s benchmark the performance on our package
387 | 
388 | ### Running in parallel
389 | 
390 | ``` r
391 | benchmark_results = list()
392 | 
393 | # future::plan('multisession')
394 | 
395 | benchmark_results[['multisession']] = 
396 |   microbenchmark::microbenchmark(
397 |     wf %>% 
398 |       wiz_add_predictors(variable = 'cr',
399 |                          lookback = hours(48), 
400 |                          window = hours(6), 
401 |                          stats = c(mean = mean,
402 |                                    min = min,
403 |                                    max = max,
404 |                                    median = median,
405 |                                    length = length)),
406 |     times = 1
407 |   )
408 | #>  Progress: -----------------------------------------------------------------------------------------                                                        100% Progress: ----------------------------------------------------------------------------------------------------------------------------------               100% Progress: ----------------------------------------------------------------------------------------------------------------------------------------------   100% Progress: ------------------------------------------------------------------------------------------------------------------------------------------------ 100%
409 | ```
410 | 
411 | ### Running in parallel with a chunk\_size of 20
412 | 
413 | ``` r
414 | 
415 | wf_with_chunks = wf
416 | wf_with_chunks$chunk_size = 20
417 | 
418 | benchmark_results[['multisession with chunk_size 20']] = 
419 |   microbenchmark::microbenchmark(
420 |     wf_with_chunks %>% 
421 |       wiz_add_predictors(variable = 'cr',
422 |                          lookback = hours(48), 
423 |                          window = hours(6), 
424 |                          stats = c(mean = mean,
425 |                                    min = min,
426 |                                    max = max,
427 |                                    median = median,
428 |                                    length = length)),
429 |     times = 1
430 |   )
431 | #> Processing chunk # 1 out of 5...
432 | #> Joining, by = "id"
433 | #> Processing variables: cr...
434 | #> Allocating memory...
435 | #> Number of rows in final output: 270
436 | #> Parallel processing is ENABLED.
437 | #> Determining missing values for each statistic...
438 | #> Beginning calculation...
439 | #>  Progress: ------------------------------------------------------------------------------------------------------------------------------------------------ 100%
440 | #> Completed calculation.
441 | #> The output file was written to: C:\Users\kdpsingh\AppData\Local\Temp\3\RtmpeMx3hc/wizard_dir/chunk_1_rolling_predictors_variables_cr_2021_02_22_21_40_45.csv
442 | #> Processing chunk # 2 out of 5...
443 | #> Joining, by = "id"
444 | #> Processing variables: cr...
445 | #> Allocating memory...
446 | #> Number of rows in final output: 294
447 | #> Parallel processing is ENABLED.
448 | #> Determining missing values for each statistic...
449 | #> Beginning calculation...
450 | #>  Progress: ----------------------------------------------------------------------------------------------------------------------------------------------   100% Progress: ------------------------------------------------------------------------------------------------------------------------------------------------ 100%
451 | #> Completed calculation.
452 | #> The output file was written to: C:\Users\kdpsingh\AppData\Local\Temp\3\RtmpeMx3hc/wizard_dir/chunk_2_rolling_predictors_variables_cr_2021_02_22_21_40_50.csv
453 | #> Processing chunk # 3 out of 5...
454 | #> Joining, by = "id"
455 | #> Processing variables: cr...
456 | #> Allocating memory...
457 | #> Number of rows in final output: 309
458 | #> Parallel processing is ENABLED.
459 | #> Determining missing values for each statistic...
460 | #> Beginning calculation...
461 | #>  Progress: ------------------------------------------------------------------------------------------------------------------------------------------------ 100%
462 | #> Completed calculation.
463 | #> The output file was written to: C:\Users\kdpsingh\AppData\Local\Temp\3\RtmpeMx3hc/wizard_dir/chunk_3_rolling_predictors_variables_cr_2021_02_22_21_40_55.csv
464 | #> Processing chunk # 4 out of 5...
465 | #> Joining, by = "id"
466 | #> Processing variables: cr...
467 | #> Allocating memory...
468 | #> Number of rows in final output: 345
469 | #> Parallel processing is ENABLED.
470 | #> Determining missing values for each statistic...
471 | #> Beginning calculation...
472 | #>  Progress: ------------------------------------------------------------------------------------------------------------------------------------------------ 100%
473 | #> Completed calculation.
474 | #> The output file was written to: C:\Users\kdpsingh\AppData\Local\Temp\3\RtmpeMx3hc/wizard_dir/chunk_4_rolling_predictors_variables_cr_2021_02_22_21_41_00.csv
475 | #> Processing chunk # 5 out of 5...
476 | #> Joining, by = "id"
477 | #> Processing variables: cr...
478 | #> Allocating memory...
479 | #> Number of rows in final output: 322
480 | #> Parallel processing is ENABLED.
481 | #> Determining missing values for each statistic...
482 | #> Beginning calculation...
483 | #>  Progress: ------------------------------------------------------------------------------------------------------------------------------------------------ 100%
484 | #> Completed calculation.
485 | #> The output file was written to: C:\Users\kdpsingh\AppData\Local\Temp\3\RtmpeMx3hc/wizard_dir/chunk_5_rolling_predictors_variables_cr_2021_02_22_21_41_05.csv
486 | ```
487 | 
488 | ### Running in serial
489 | 
490 | ``` r
491 | future::plan('sequential')
492 | 
493 | benchmark_results[['sequential']] = 
494 |   microbenchmark::microbenchmark(
495 |   wf %>% 
496 |     wiz_add_predictors(variable = 'cr',
497 |                        lookback = hours(48), 
498 |                        window = hours(6), 
499 |                        stats = c(mean = mean,
500 |                                  min = min,
501 |                                  max = max,
502 |                                  median = median,
503 |                                  length = length)),
504 |   times = 1
505 |   )
506 | ```
507 | 
508 | ## Benchmark results
509 | 
510 | ``` r
511 | benchmark_results
512 | #> $multisession
513 | #> Unit: seconds
514 | #>                                                                                                                                                                               expr
515 | #>  wf %>% wiz_add_predictors(variable = "cr", lookback = hours(48),      window = hours(6), stats = c(mean = mean, min = min, max = max,          median = median, length = length))
516 | #>       min       lq     mean   median       uq      max neval
517 | #>  20.38435 20.38435 20.38435 20.38435 20.38435 20.38435     1
518 | #> 
519 | #> $`multisession with chunk_size 20`
520 | #> Unit: seconds
521 | #>                                                                                                                                                                                           expr
522 | #>  wf_with_chunks %>% wiz_add_predictors(variable = "cr", lookback = hours(48),      window = hours(6), stats = c(mean = mean, min = min, max = max,          median = median, length = length))
523 | #>       min       lq     mean   median       uq      max neval
524 | #>  23.51722 23.51722 23.51722 23.51722 23.51722 23.51722     1
525 | #> 
526 | #> $sequential
527 | #> Unit: seconds
528 | #>                                                                                                                                                                               expr
529 | #>  wf %>% wiz_add_predictors(variable = "cr", lookback = hours(48),      window = hours(6), stats = c(mean = mean, min = min, max = max,          median = median, length = length))
530 | #>       min       lq     mean   median       uq      max neval
531 | #>  182.9422 182.9422 182.9422 182.9422 182.9422 182.9422     1
532 | ```
533 | 


--------------------------------------------------------------------------------
/data-raw/sample_data.R:
--------------------------------------------------------------------------------
 1 | ## code to prepare `sample_data` dataset goes here
 2 | 
 3 | library(tidyverse)
 4 | library(lubridate)
 5 | 
 6 | ## sample_fixed_data
 7 | 
 8 | set.seed(1)
 9 | sample_fixed_data =
10 |   tibble(id = 1:1000) %>%
11 |   mutate(sex = sample(c('male', 'female'), 1000, replace = TRUE)) %>%
12 |   mutate(age = rnorm(n = 1000, mean = 65, sd = 15)) %>%
13 |   mutate(race = sample(c('asian', 'black', 'white', 'multiracial', 'other'), 1000, replace = TRUE)) %>%
14 |   mutate(baseline_cr =
15 |            case_when(sex == 'male' ~ rnorm(n = n(), mean = 1, sd = 0.1),
16 |                      sex == 'female' ~ rnorm(n = n(), mean = 0.8, sd = 0.1))) %>%
17 |   mutate(admit_time = sample(as_datetime(mdy_hms('1-1-2019 00:00:00'):mdy_hms('12-31-2019 23:59:00')), 1000, replace=TRUE)) %>%
18 |   mutate(dc_time = admit_time +
19 |            hours(sample(8:167, n(), replace=TRUE)) +
20 |                    minutes(sample(0:59, n(), replace = TRUE))) %>%
21 |   as.data.frame()
22 | 
23 | usethis::use_data(sample_fixed_data, overwrite = TRUE)
24 | 
25 | ## sample_temporal_data
26 | 
27 | set.seed(2)
28 | sample_temporal_data =
29 |   tibble(id = sample(1:1000, 100000, replace = TRUE)) %>%
30 |   left_join(., sample_fixed_data %>% select(id, admit_time, baseline_cr, sex, age)) %>%
31 |   arrange(id) %>%
32 |   group_by(id) %>%
33 |   mutate(hours_to_add = cumsum(sample(0:6, n(), replace = TRUE))) %>%
34 |   mutate(mins_to_add = cumsum(sample(0:59, n(), replace = TRUE))) %>%
35 |   mutate(time = admit_time - hours(sample(1:12, n(), replace = TRUE)) +
36 |                                       hours(hours_to_add) + minutes(mins_to_add)) %>%
37 |   ungroup() %>%
38 |   mutate(cr =
39 |            case_when(sex == 'male' ~ baseline_cr + rnorm(n = n(), mean = 0.05, sd = 0.10) + age*0.0005,
40 |                      sex == 'female' ~ baseline_cr + rnorm(n = n(), mean = -0.05, sd = 0.10) + age*0.0001)) %>%
41 |   mutate(cr_abnl =
42 |            case_when(cr < 0.3 ~ 'low',
43 |                      cr >= 1.3 ~ 'high',
44 |                      TRUE ~ 'normal')) %>%
45 |   mutate(cr_high =
46 |            case_when(cr >= 1.3 ~ 'yes',
47 |                      TRUE ~ 'no')) %>%
48 |   select(-admit_time, -baseline_cr, -sex, -age, -hours_to_add, -mins_to_add) %>%
49 |   gather(variable, value, cr, cr_abnl, cr_high) %>%
50 |   mutate(category =
51 |            case_when(variable == 'cr' ~ 'lab',
52 |                      TRUE ~ 'flag')) %>%
53 |   as.data.frame()
54 | 
55 | set.seed(3)
56 | medications =
57 |   tibble(id = sample(1:1000, 10000, replace = TRUE)) %>%
58 |   left_join(., sample_fixed_data %>% select(id, admit_time, baseline_cr, sex, age)) %>%
59 |   arrange(id) %>%
60 |   group_by(id) %>%
61 |   mutate(hours_to_add = cumsum(sample(12:23, n(), replace = TRUE))) %>%
62 |   mutate(mins_to_add = cumsum(sample(0:59, n(), replace = TRUE))) %>%
63 |   mutate(time = admit_time - hours(sample(1:12, n(), replace = TRUE)) +
64 |            hours(hours_to_add) + minutes(mins_to_add)) %>%
65 |   ungroup() %>%
66 |   mutate(variable = 'med') %>%
67 |   mutate(value =
68 |            sample(c('aspirin', 'acetaminophen', 'diphenhydramine'), n(), replace = TRUE)) %>%
69 |   mutate(category = 'medications') %>%
70 |   select(id, time, variable, value, category) %>%
71 |   arrange(id, time)
72 | 
73 | sample_temporal_data =
74 |   bind_rows(sample_temporal_data, medications) %>%
75 |   arrange(id, time)
76 | 
77 | usethis::use_data(sample_temporal_data, overwrite = TRUE)
78 | 


--------------------------------------------------------------------------------
/data/sample_fixed_data.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kdpsingh/wizard/c1f4e3320dda4fc9b053631703ca7eef8486c19c/data/sample_fixed_data.rda


--------------------------------------------------------------------------------
/data/sample_temporal_data.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kdpsingh/wizard/c1f4e3320dda4fc9b053631703ca7eef8486c19c/data/sample_temporal_data.rda


--------------------------------------------------------------------------------
/inst/extdata/fixed_data.csv:
--------------------------------------------------------------------------------
  1 | patient_id,encounter_id,admit_time,outcome
  2 | 006F52E9102A8D3BE2FE5614F42BA989,A87FF679A2F3E71D9181A67B7542122C,2019-04-02T12:53:00Z,2019-04-03T07:25:00Z
  3 | 006F52E9102A8D3BE2FE5614F42BA989,C4CA4238A0B923820DCC509A6F75849B,2019-04-01T06:18:00Z,2019-04-09T10:24:00Z
  4 | 006F52E9102A8D3BE2FE5614F42BA989,C81E728D9D4C2F636F067F89CC14862C,2019-04-01T08:40:00Z,2019-04-08T12:58:00Z
  5 | 006F52E9102A8D3BE2FE5614F42BA989,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-01T12:14:00Z,2019-04-09T13:08:00Z
  6 | 013D407166EC4FA56EB1E1F8CBE183B9,A87FF679A2F3E71D9181A67B7542122C,2019-04-03T01:57:00Z,2019-04-03T01:57:00Z
  7 | 013D407166EC4FA56EB1E1F8CBE183B9,C4CA4238A0B923820DCC509A6F75849B,2019-04-08T19:21:00Z,2019-04-09T19:01:00Z
  8 | 013D407166EC4FA56EB1E1F8CBE183B9,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-05T22:31:00Z,2019-04-07T20:42:00Z
  9 | 02522A2B2726FB0A03BB19F2D8D9524D,A87FF679A2F3E71D9181A67B7542122C,2019-04-04T10:38:00Z,2019-04-06T17:17:00Z
 10 | 02522A2B2726FB0A03BB19F2D8D9524D,C4CA4238A0B923820DCC509A6F75849B,2019-04-09T23:11:00Z,2019-04-09T23:11:00Z
 11 | 02522A2B2726FB0A03BB19F2D8D9524D,C81E728D9D4C2F636F067F89CC14862C,2019-04-03T02:57:00Z,2019-04-09T22:50:00Z
 12 | 02522A2B2726FB0A03BB19F2D8D9524D,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-03T06:19:00Z,2019-04-07T16:55:00Z
 13 | 0336DCBAB05B9D5AD24F4333C7658A0E,C4CA4238A0B923820DCC509A6F75849B,2019-04-03T18:04:00Z,2019-04-06T02:53:00Z
 14 | 0336DCBAB05B9D5AD24F4333C7658A0E,C81E728D9D4C2F636F067F89CC14862C,2019-04-03T16:45:00Z,2019-04-07T04:21:00Z
 15 | 0336DCBAB05B9D5AD24F4333C7658A0E,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-06T18:21:00Z,2019-04-06T18:21:00Z
 16 | 045117B0E0A11A242B9765E79CBF113F,A87FF679A2F3E71D9181A67B7542122C,2019-04-01T22:45:00Z,2019-04-09T21:54:00Z
 17 | 045117B0E0A11A242B9765E79CBF113F,C4CA4238A0B923820DCC509A6F75849B,2019-04-08T14:19:00Z,2019-04-08T14:19:00Z
 18 | 045117B0E0A11A242B9765E79CBF113F,C81E728D9D4C2F636F067F89CC14862C,2019-04-02T13:41:00Z,2019-04-02T13:41:00Z
 19 | 045117B0E0A11A242B9765E79CBF113F,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-04T02:25:00Z,2019-04-04T02:25:00Z
 20 | 06409663226AF2F3114485AA4E0A23B4,A87FF679A2F3E71D9181A67B7542122C,2019-04-04T05:39:00Z,2019-04-09T06:57:00Z
 21 | 06409663226AF2F3114485AA4E0A23B4,C4CA4238A0B923820DCC509A6F75849B,2019-04-08T16:18:00Z,2019-04-09T02:19:00Z
 22 | 06409663226AF2F3114485AA4E0A23B4,C81E728D9D4C2F636F067F89CC14862C,2019-04-02T03:11:00Z,2019-04-02T03:11:00Z
 23 | 06409663226AF2F3114485AA4E0A23B4,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-05T07:21:00Z,2019-04-05T20:19:00Z
 24 | 069059B7EF840F0C74A814EC9237B6EC,A87FF679A2F3E71D9181A67B7542122C,2019-04-05T15:55:00Z,2019-04-09T17:09:00Z
 25 | 069059B7EF840F0C74A814EC9237B6EC,C4CA4238A0B923820DCC509A6F75849B,2019-04-06T07:04:00Z,2019-04-06T07:04:00Z
 26 | 069059B7EF840F0C74A814EC9237B6EC,C81E728D9D4C2F636F067F89CC14862C,2019-04-05T01:51:00Z,2019-04-08T19:09:00Z
 27 | 069059B7EF840F0C74A814EC9237B6EC,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-05T22:39:00Z,2019-04-08T11:35:00Z
 28 | 0777D5C17D4066B82AB86DFF8A46AF6F,A87FF679A2F3E71D9181A67B7542122C,2019-04-04T11:49:00Z,2019-04-09T05:59:00Z
 29 | 0777D5C17D4066B82AB86DFF8A46AF6F,C4CA4238A0B923820DCC509A6F75849B,2019-04-03T07:39:00Z,2019-04-08T21:54:00Z
 30 | 0777D5C17D4066B82AB86DFF8A46AF6F,C81E728D9D4C2F636F067F89CC14862C,2019-04-04T06:53:00Z,2019-04-04T06:53:00Z
 31 | 0777D5C17D4066B82AB86DFF8A46AF6F,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-03T11:20:00Z,2019-04-03T11:20:00Z
 32 | 07E1CD7DCA89A1678042477183B7AC3F,A87FF679A2F3E71D9181A67B7542122C,2019-04-07T15:47:00Z,2019-04-08T07:53:00Z
 33 | 07E1CD7DCA89A1678042477183B7AC3F,C4CA4238A0B923820DCC509A6F75849B,2019-04-01T22:34:00Z,2019-04-01T22:34:00Z
 34 | 07E1CD7DCA89A1678042477183B7AC3F,C81E728D9D4C2F636F067F89CC14862C,2019-04-01T09:27:00Z,2019-04-07T00:43:00Z
 35 | 07E1CD7DCA89A1678042477183B7AC3F,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-05T06:42:00Z,2019-04-07T15:36:00Z
 36 | 084B6FBB10729ED4DA8C3D3F5A3AE7C9,A87FF679A2F3E71D9181A67B7542122C,2019-04-06T06:30:00Z,2019-04-07T11:03:00Z
 37 | 084B6FBB10729ED4DA8C3D3F5A3AE7C9,C4CA4238A0B923820DCC509A6F75849B,2019-04-04T17:18:00Z,2019-04-04T17:18:00Z
 38 | 084B6FBB10729ED4DA8C3D3F5A3AE7C9,C81E728D9D4C2F636F067F89CC14862C,2019-04-01T00:10:00Z,2019-04-03T08:02:00Z
 39 | 084B6FBB10729ED4DA8C3D3F5A3AE7C9,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-07T11:44:00Z,2019-04-09T12:35:00Z
 40 | 0A09C8844BA8F0936C20BD791130D6B6,A87FF679A2F3E71D9181A67B7542122C,2019-04-03T08:32:00Z,2019-04-08T09:18:00Z
 41 | 0A09C8844BA8F0936C20BD791130D6B6,C4CA4238A0B923820DCC509A6F75849B,2019-04-01T09:30:00Z,2019-04-07T03:46:00Z
 42 | 0A09C8844BA8F0936C20BD791130D6B6,C81E728D9D4C2F636F067F89CC14862C,2019-04-05T02:32:00Z,2019-04-09T01:48:00Z
 43 | 0A09C8844BA8F0936C20BD791130D6B6,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-01T20:35:00Z,2019-04-07T13:58:00Z
 44 | 0AA1883C6411F7873CB83DACB17B0AFC,A87FF679A2F3E71D9181A67B7542122C,2019-04-04T06:40:00Z,2019-04-04T06:40:00Z
 45 | 0AA1883C6411F7873CB83DACB17B0AFC,C4CA4238A0B923820DCC509A6F75849B,2019-04-02T19:41:00Z,2019-04-02T19:41:00Z
 46 | 0AA1883C6411F7873CB83DACB17B0AFC,C81E728D9D4C2F636F067F89CC14862C,2019-04-02T00:10:00Z,2019-04-06T11:10:00Z
 47 | 0AA1883C6411F7873CB83DACB17B0AFC,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-01T09:02:00Z,2019-04-09T21:36:00Z
 48 | 0E65972DCE68DAD4D52D063967F0A705,A87FF679A2F3E71D9181A67B7542122C,2019-04-06T00:32:00Z,2019-04-07T17:48:00Z
 49 | 0E65972DCE68DAD4D52D063967F0A705,C4CA4238A0B923820DCC509A6F75849B,2019-04-02T07:36:00Z,2019-04-07T21:43:00Z
 50 | 0E65972DCE68DAD4D52D063967F0A705,C81E728D9D4C2F636F067F89CC14862C,2019-04-03T07:21:00Z,2019-04-03T07:21:00Z
 51 | 0E65972DCE68DAD4D52D063967F0A705,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-02T04:10:00Z,2019-04-05T06:36:00Z
 52 | 0F28B5D49B3020AFEECD95B4009ADF4C,A87FF679A2F3E71D9181A67B7542122C,2019-04-05T16:33:00Z,2019-04-05T16:33:00Z
 53 | 0F28B5D49B3020AFEECD95B4009ADF4C,C4CA4238A0B923820DCC509A6F75849B,2019-04-09T02:39:00Z,2019-04-09T02:39:00Z
 54 | 0F28B5D49B3020AFEECD95B4009ADF4C,C81E728D9D4C2F636F067F89CC14862C,2019-04-03T02:15:00Z,2019-04-04T18:10:00Z
 55 | 0F28B5D49B3020AFEECD95B4009ADF4C,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-05T04:14:00Z,2019-04-07T15:22:00Z
 56 | 1385974ED5904A438616FF7BDB3F7439,A87FF679A2F3E71D9181A67B7542122C,2019-04-01T19:20:00Z,2019-04-09T19:28:00Z
 57 | 1385974ED5904A438616FF7BDB3F7439,C4CA4238A0B923820DCC509A6F75849B,2019-04-02T18:30:00Z,2019-04-09T06:42:00Z
 58 | 1385974ED5904A438616FF7BDB3F7439,C81E728D9D4C2F636F067F89CC14862C,2019-04-08T23:07:00Z,2019-04-08T23:07:00Z
 59 | 1385974ED5904A438616FF7BDB3F7439,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-01T00:50:00Z,2019-04-09T00:45:00Z
 60 | 140F6969D5213FD0ECE03148E62E461E,A87FF679A2F3E71D9181A67B7542122C,2019-04-05T13:17:00Z,2019-04-07T02:26:00Z
 61 | 140F6969D5213FD0ECE03148E62E461E,C4CA4238A0B923820DCC509A6F75849B,2019-04-06T23:49:00Z,2019-04-09T06:43:00Z
 62 | 140F6969D5213FD0ECE03148E62E461E,C81E728D9D4C2F636F067F89CC14862C,2019-04-03T08:22:00Z,2019-04-09T09:10:00Z
 63 | 140F6969D5213FD0ECE03148E62E461E,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-07T00:32:00Z,2019-04-08T10:46:00Z
 64 | 149E9677A5989FD342AE44213DF68868,A87FF679A2F3E71D9181A67B7542122C,2019-04-06T22:06:00Z,2019-04-09T06:49:00Z
 65 | 149E9677A5989FD342AE44213DF68868,C4CA4238A0B923820DCC509A6F75849B,2019-04-03T07:46:00Z,2019-04-03T07:46:00Z
 66 | 149E9677A5989FD342AE44213DF68868,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-02T21:57:00Z,2019-04-07T23:30:00Z
 67 | 1AFA34A7F984EEABDBB0A7D494132EE5,A87FF679A2F3E71D9181A67B7542122C,2019-04-01T20:53:00Z,2019-04-01T20:53:00Z
 68 | 1AFA34A7F984EEABDBB0A7D494132EE5,C4CA4238A0B923820DCC509A6F75849B,2019-04-05T14:18:00Z,2019-04-05T14:18:00Z
 69 | 1AFA34A7F984EEABDBB0A7D494132EE5,C81E728D9D4C2F636F067F89CC14862C,2019-04-04T02:47:00Z,2019-04-04T02:47:00Z
 70 | 1AFA34A7F984EEABDBB0A7D494132EE5,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-02T15:20:00Z,2019-04-07T10:44:00Z
 71 | 1C9AC0159C94D8D0CBEDC973445AF2DA,A87FF679A2F3E71D9181A67B7542122C,2019-04-01T15:24:00Z,2019-04-09T20:10:00Z
 72 | 1C9AC0159C94D8D0CBEDC973445AF2DA,C4CA4238A0B923820DCC509A6F75849B,2019-04-06T18:20:00Z,2019-04-06T18:20:00Z
 73 | 1C9AC0159C94D8D0CBEDC973445AF2DA,C81E728D9D4C2F636F067F89CC14862C,2019-04-03T16:13:00Z,2019-04-09T03:34:00Z
 74 | 1C9AC0159C94D8D0CBEDC973445AF2DA,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-06T07:24:00Z,2019-04-06T07:24:00Z
 75 | 1D7F7ABC18FCB43975065399B0D1E48E,A87FF679A2F3E71D9181A67B7542122C,2019-04-02T20:48:00Z,2019-04-03T23:31:00Z
 76 | 1D7F7ABC18FCB43975065399B0D1E48E,C4CA4238A0B923820DCC509A6F75849B,2019-04-05T05:13:00Z,2019-04-05T05:13:00Z
 77 | 1D7F7ABC18FCB43975065399B0D1E48E,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-02T06:12:00Z,2019-04-08T19:50:00Z
 78 | 1FF8A7B5DC7A7D1F0ED65AAA29C04B1E,A87FF679A2F3E71D9181A67B7542122C,2019-04-05T04:40:00Z,2019-04-05T04:40:00Z
 79 | 1FF8A7B5DC7A7D1F0ED65AAA29C04B1E,C81E728D9D4C2F636F067F89CC14862C,2019-04-01T13:07:00Z,2019-04-08T06:45:00Z
 80 | 1FF8A7B5DC7A7D1F0ED65AAA29C04B1E,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-04T13:00:00Z,2019-04-04T13:00:00Z
 81 | 202CB962AC59075B964B07152D234B70,A87FF679A2F3E71D9181A67B7542122C,2019-04-09T04:52:00Z,2019-04-09T04:52:00Z
 82 | 202CB962AC59075B964B07152D234B70,C4CA4238A0B923820DCC509A6F75849B,2019-04-02T06:34:00Z,2019-04-09T00:32:00Z
 83 | 202CB962AC59075B964B07152D234B70,C81E728D9D4C2F636F067F89CC14862C,2019-04-04T18:02:00Z,2019-04-08T07:55:00Z
 84 | 202CB962AC59075B964B07152D234B70,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-01T15:14:00Z,2019-04-08T22:20:00Z
 85 | 2723D092B63885E0D7C260CC007E8B9D,C4CA4238A0B923820DCC509A6F75849B,2019-04-06T16:57:00Z,2019-04-07T09:08:00Z
 86 | 2723D092B63885E0D7C260CC007E8B9D,C81E728D9D4C2F636F067F89CC14862C,2019-04-04T18:54:00Z,2019-04-09T09:16:00Z
 87 | 2723D092B63885E0D7C260CC007E8B9D,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-02T07:18:00Z,2019-04-08T11:41:00Z
 88 | 2A79EA27C279E471F4D180B08D62B00A,A87FF679A2F3E71D9181A67B7542122C,2019-04-01T04:16:00Z,2019-04-03T15:30:00Z
 89 | 2A79EA27C279E471F4D180B08D62B00A,C4CA4238A0B923820DCC509A6F75849B,2019-04-03T02:09:00Z,2019-04-08T00:32:00Z
 90 | 2A79EA27C279E471F4D180B08D62B00A,C81E728D9D4C2F636F067F89CC14862C,2019-04-09T04:08:00Z,2019-04-09T04:08:00Z
 91 | 2B24D495052A8CE66358EB576B8912C8,A87FF679A2F3E71D9181A67B7542122C,2019-04-01T06:20:00Z,2019-04-09T07:59:00Z
 92 | 2B24D495052A8CE66358EB576B8912C8,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-02T15:45:00Z,2019-04-05T11:26:00Z
 93 | 2B44928AE11FB9384C4CF38708677C48,A87FF679A2F3E71D9181A67B7542122C,2019-04-01T15:30:00Z,2019-04-01T15:30:00Z
 94 | 2B44928AE11FB9384C4CF38708677C48,C4CA4238A0B923820DCC509A6F75849B,2019-04-05T06:09:00Z,2019-04-08T03:53:00Z
 95 | 2B44928AE11FB9384C4CF38708677C48,C81E728D9D4C2F636F067F89CC14862C,2019-04-08T04:52:00Z,2019-04-08T04:52:00Z
 96 | 2B44928AE11FB9384C4CF38708677C48,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-03T04:43:00Z,2019-04-09T19:21:00Z
 97 | 31FEFC0E570CB3860F2A6D4B38C6490D,A87FF679A2F3E71D9181A67B7542122C,2019-04-01T07:58:00Z,2019-04-02T01:48:00Z
 98 | 31FEFC0E570CB3860F2A6D4B38C6490D,C4CA4238A0B923820DCC509A6F75849B,2019-04-02T19:23:00Z,2019-04-08T19:34:00Z
 99 | 31FEFC0E570CB3860F2A6D4B38C6490D,C81E728D9D4C2F636F067F89CC14862C,2019-04-02T21:21:00Z,2019-04-02T22:11:00Z
100 | 31FEFC0E570CB3860F2A6D4B38C6490D,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-05T10:51:00Z,2019-04-09T15:37:00Z
101 | 3636638817772E42B59D74CFF571FBB3,A87FF679A2F3E71D9181A67B7542122C,2019-04-07T08:40:00Z,2019-04-07T08:40:00Z
102 | 3636638817772E42B59D74CFF571FBB3,C4CA4238A0B923820DCC509A6F75849B,2019-04-03T19:26:00Z,2019-04-04T05:42:00Z
103 | 3636638817772E42B59D74CFF571FBB3,C81E728D9D4C2F636F067F89CC14862C,2019-04-01T08:53:00Z,2019-04-08T14:52:00Z
104 | 3636638817772E42B59D74CFF571FBB3,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-03T07:34:00Z,2019-04-07T11:52:00Z
105 | 3644A684F98EA8FE223C713B77189A77,A87FF679A2F3E71D9181A67B7542122C,2019-04-01T18:52:00Z,2019-04-09T14:11:00Z
106 | 3644A684F98EA8FE223C713B77189A77,C4CA4238A0B923820DCC509A6F75849B,2019-04-01T01:17:00Z,2019-04-01T01:17:00Z
107 | 3644A684F98EA8FE223C713B77189A77,C81E728D9D4C2F636F067F89CC14862C,2019-04-02T22:16:00Z,2019-04-02T22:16:00Z
108 | 3644A684F98EA8FE223C713B77189A77,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-07T01:57:00Z,2019-04-09T10:23:00Z
109 | 37A749D808E46495A8DA1E5352D03CAE,A87FF679A2F3E71D9181A67B7542122C,2019-04-03T20:53:00Z,2019-04-09T04:48:00Z
110 | 37A749D808E46495A8DA1E5352D03CAE,C4CA4238A0B923820DCC509A6F75849B,2019-04-03T23:40:00Z,2019-04-09T13:40:00Z
111 | 37A749D808E46495A8DA1E5352D03CAE,C81E728D9D4C2F636F067F89CC14862C,2019-04-06T13:43:00Z,2019-04-09T18:01:00Z
112 | 38AF86134B65D0F10FE33D30DD76442E,A87FF679A2F3E71D9181A67B7542122C,2019-04-05T03:29:00Z,2019-04-09T12:15:00Z
113 | 38AF86134B65D0F10FE33D30DD76442E,C4CA4238A0B923820DCC509A6F75849B,2019-04-05T00:14:00Z,2019-04-08T17:00:00Z
114 | 38AF86134B65D0F10FE33D30DD76442E,C81E728D9D4C2F636F067F89CC14862C,2019-04-03T08:30:00Z,2019-04-09T03:52:00Z
115 | 38AF86134B65D0F10FE33D30DD76442E,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-03T04:40:00Z,2019-04-09T01:12:00Z
116 | 38B3EFF8BAF56627478EC76A704E9B52,A87FF679A2F3E71D9181A67B7542122C,2019-04-02T01:27:00Z,2019-04-07T08:34:00Z
117 | 38B3EFF8BAF56627478EC76A704E9B52,C81E728D9D4C2F636F067F89CC14862C,2019-04-06T06:10:00Z,2019-04-06T06:10:00Z
118 | 38B3EFF8BAF56627478EC76A704E9B52,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-01T09:37:00Z,2019-04-05T02:23:00Z
119 | 3988C7F88EBCB58C6CE932B957B6F332,A87FF679A2F3E71D9181A67B7542122C,2019-04-05T16:10:00Z,2019-04-09T02:11:00Z
120 | 3988C7F88EBCB58C6CE932B957B6F332,C81E728D9D4C2F636F067F89CC14862C,2019-04-01T03:49:00Z,2019-04-09T17:35:00Z
121 | 3DEF184AD8F4755FF269862EA77393DD,A87FF679A2F3E71D9181A67B7542122C,2019-04-02T18:56:00Z,2019-04-04T17:38:00Z
122 | 3DEF184AD8F4755FF269862EA77393DD,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-09T12:38:00Z,2019-04-09T16:59:00Z
123 | 42A0E188F5033BC65BF8D78622277C4E,C4CA4238A0B923820DCC509A6F75849B,2019-04-07T00:33:00Z,2019-04-09T19:30:00Z
124 | 42A0E188F5033BC65BF8D78622277C4E,C81E728D9D4C2F636F067F89CC14862C,2019-04-01T02:48:00Z,2019-04-09T05:50:00Z
125 | 42A0E188F5033BC65BF8D78622277C4E,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-02T11:08:00Z,2019-04-08T10:09:00Z
126 | 47D1E990583C9C67424D369F3414728E,A87FF679A2F3E71D9181A67B7542122C,2019-04-04T15:33:00Z,2019-04-06T10:54:00Z
127 | 47D1E990583C9C67424D369F3414728E,C4CA4238A0B923820DCC509A6F75849B,2019-04-03T20:51:00Z,2019-04-07T14:28:00Z
128 | 47D1E990583C9C67424D369F3414728E,C81E728D9D4C2F636F067F89CC14862C,2019-04-02T21:13:00Z,2019-04-08T03:32:00Z
129 | 47D1E990583C9C67424D369F3414728E,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-01T18:32:00Z,2019-04-05T01:01:00Z
130 | 4C56FF4CE4AAF9573AA5DFF913DF997A,C4CA4238A0B923820DCC509A6F75849B,2019-04-05T22:17:00Z,2019-04-09T22:02:00Z
131 | 4C56FF4CE4AAF9573AA5DFF913DF997A,C81E728D9D4C2F636F067F89CC14862C,2019-04-04T02:35:00Z,2019-04-09T04:28:00Z
132 | 4C56FF4CE4AAF9573AA5DFF913DF997A,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-01T13:21:00Z,2019-04-08T02:36:00Z
133 | 4C5BDE74A8F110656874902F07378009,A87FF679A2F3E71D9181A67B7542122C,2019-04-02T23:03:00Z,2019-04-04T17:33:00Z
134 | 4C5BDE74A8F110656874902F07378009,C4CA4238A0B923820DCC509A6F75849B,2019-04-01T05:52:00Z,2019-04-07T04:30:00Z
135 | 4C5BDE74A8F110656874902F07378009,C81E728D9D4C2F636F067F89CC14862C,2019-04-03T18:18:00Z,2019-04-06T15:40:00Z
136 | 4C5BDE74A8F110656874902F07378009,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-01T00:56:00Z,2019-04-06T13:48:00Z
137 | 5878A7AB84FB43402106C575658472FA,A87FF679A2F3E71D9181A67B7542122C,2019-04-03T13:28:00Z,2019-04-08T14:09:00Z
138 | 5878A7AB84FB43402106C575658472FA,C4CA4238A0B923820DCC509A6F75849B,2019-04-04T22:22:00Z,2019-04-05T04:03:00Z
139 | 5878A7AB84FB43402106C575658472FA,C81E728D9D4C2F636F067F89CC14862C,2019-04-02T09:23:00Z,2019-04-08T09:47:00Z
140 | 5878A7AB84FB43402106C575658472FA,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-02T08:21:00Z,2019-04-08T05:55:00Z
141 | 58A2FC6ED39FD083F55D4182BF88826D,A87FF679A2F3E71D9181A67B7542122C,2019-04-05T08:29:00Z,2019-04-08T23:31:00Z
142 | 58A2FC6ED39FD083F55D4182BF88826D,C4CA4238A0B923820DCC509A6F75849B,2019-04-01T07:08:00Z,2019-04-05T11:49:00Z
143 | 58A2FC6ED39FD083F55D4182BF88826D,C81E728D9D4C2F636F067F89CC14862C,2019-04-03T01:38:00Z,2019-04-08T15:37:00Z
144 | 58A2FC6ED39FD083F55D4182BF88826D,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-02T06:42:00Z,2019-04-08T11:07:00Z
145 | 5EF059938BA799AAA845E1C2E8A762BD,A87FF679A2F3E71D9181A67B7542122C,2019-04-04T19:51:00Z,2019-04-05T04:58:00Z
146 | 5EF059938BA799AAA845E1C2E8A762BD,C4CA4238A0B923820DCC509A6F75849B,2019-04-05T09:38:00Z,2019-04-09T11:58:00Z
147 | 5EF059938BA799AAA845E1C2E8A762BD,C81E728D9D4C2F636F067F89CC14862C,2019-04-08T00:18:00Z,2019-04-08T00:18:00Z
148 | 5EF059938BA799AAA845E1C2E8A762BD,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-02T02:43:00Z,2019-04-08T01:11:00Z
149 | 5F93F983524DEF3DCA464469D2CF9F3E,A87FF679A2F3E71D9181A67B7542122C,2019-04-01T01:21:00Z,2019-04-07T19:23:00Z
150 | 5F93F983524DEF3DCA464469D2CF9F3E,C4CA4238A0B923820DCC509A6F75849B,2019-04-01T06:04:00Z,2019-04-09T12:11:00Z
151 | 5F93F983524DEF3DCA464469D2CF9F3E,C81E728D9D4C2F636F067F89CC14862C,2019-04-07T11:42:00Z,2019-04-07T11:42:00Z
152 | 5F93F983524DEF3DCA464469D2CF9F3E,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-01T21:28:00Z,2019-04-09T22:56:00Z
153 | 5FD0B37CD7DBBB00F97BA6CE92BF5ADD,A87FF679A2F3E71D9181A67B7542122C,2019-04-02T02:25:00Z,2019-04-04T13:56:00Z
154 | 5FD0B37CD7DBBB00F97BA6CE92BF5ADD,C4CA4238A0B923820DCC509A6F75849B,2019-04-01T19:00:00Z,2019-04-04T23:21:00Z
155 | 5FD0B37CD7DBBB00F97BA6CE92BF5ADD,C81E728D9D4C2F636F067F89CC14862C,2019-04-02T09:02:00Z,2019-04-05T20:23:00Z
156 | 65B9EEA6E1CC6BB9F0CD2A47751A186F,A87FF679A2F3E71D9181A67B7542122C,2019-04-04T03:09:00Z,2019-04-08T16:21:00Z
157 | 65B9EEA6E1CC6BB9F0CD2A47751A186F,C4CA4238A0B923820DCC509A6F75849B,2019-04-01T15:45:00Z,2019-04-05T05:18:00Z
158 | 65B9EEA6E1CC6BB9F0CD2A47751A186F,C81E728D9D4C2F636F067F89CC14862C,2019-04-03T15:20:00Z,2019-04-08T12:24:00Z
159 | 65B9EEA6E1CC6BB9F0CD2A47751A186F,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-01T22:35:00Z,2019-04-08T23:42:00Z
160 | 65DED5353C5EE48D0B7D48C591B8F430,A87FF679A2F3E71D9181A67B7542122C,2019-04-03T01:24:00Z,2019-04-04T05:59:00Z
161 | 65DED5353C5EE48D0B7D48C591B8F430,C4CA4238A0B923820DCC509A6F75849B,2019-04-07T12:05:00Z,2019-04-07T12:05:00Z
162 | 65DED5353C5EE48D0B7D48C591B8F430,C81E728D9D4C2F636F067F89CC14862C,2019-04-02T05:10:00Z,2019-04-08T08:23:00Z
163 | 65DED5353C5EE48D0B7D48C591B8F430,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-01T02:38:00Z,2019-04-09T17:47:00Z
164 | 6974CE5AC660610B44D9B9FED0FF9548,A87FF679A2F3E71D9181A67B7542122C,2019-04-08T06:07:00Z,2019-04-08T06:07:00Z
165 | 6974CE5AC660610B44D9B9FED0FF9548,C4CA4238A0B923820DCC509A6F75849B,2019-04-02T15:07:00Z,2019-04-09T01:17:00Z
166 | 6974CE5AC660610B44D9B9FED0FF9548,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-01T19:36:00Z,2019-04-01T19:36:00Z
167 | 698D51A19D8A121CE581499D7B701668,C4CA4238A0B923820DCC509A6F75849B,2019-04-04T09:01:00Z,2019-04-09T11:27:00Z
168 | 698D51A19D8A121CE581499D7B701668,C81E728D9D4C2F636F067F89CC14862C,2019-04-03T00:51:00Z,2019-04-08T10:15:00Z
169 | 698D51A19D8A121CE581499D7B701668,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-06T20:04:00Z,2019-04-06T20:04:00Z
170 | 6C4B761A28B734FE93831E3FB400CE87,A87FF679A2F3E71D9181A67B7542122C,2019-04-01T21:59:00Z,2019-04-08T22:15:00Z
171 | 6C4B761A28B734FE93831E3FB400CE87,C4CA4238A0B923820DCC509A6F75849B,2019-04-03T23:15:00Z,2019-04-08T04:45:00Z
172 | 6C4B761A28B734FE93831E3FB400CE87,C81E728D9D4C2F636F067F89CC14862C,2019-04-05T07:43:00Z,2019-04-08T22:29:00Z
173 | 6C4B761A28B734FE93831E3FB400CE87,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-01T22:58:00Z,2019-04-08T16:38:00Z
174 | 6CDD60EA0045EB7A6EC44C54D29ED402,A87FF679A2F3E71D9181A67B7542122C,2019-04-02T14:46:00Z,2019-04-03T21:25:00Z
175 | 6CDD60EA0045EB7A6EC44C54D29ED402,C4CA4238A0B923820DCC509A6F75849B,2019-04-01T18:36:00Z,2019-04-01T18:36:00Z
176 | 6CDD60EA0045EB7A6EC44C54D29ED402,C81E728D9D4C2F636F067F89CC14862C,2019-04-02T08:59:00Z,2019-04-02T14:52:00Z
177 | 6CDD60EA0045EB7A6EC44C54D29ED402,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-03T16:51:00Z,2019-04-08T20:24:00Z
178 | 73278A4A86960EEB576A8FD4C9EC6997,A87FF679A2F3E71D9181A67B7542122C,2019-04-01T21:53:00Z,2019-04-09T03:06:00Z
179 | 73278A4A86960EEB576A8FD4C9EC6997,C81E728D9D4C2F636F067F89CC14862C,2019-04-01T10:38:00Z,2019-04-08T10:04:00Z
180 | 73278A4A86960EEB576A8FD4C9EC6997,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-01T14:46:00Z,2019-04-07T11:36:00Z
181 | 76DC611D6EBAAFC66CC0879C71B5DB5C,A87FF679A2F3E71D9181A67B7542122C,2019-04-07T16:01:00Z,2019-04-08T11:32:00Z
182 | 76DC611D6EBAAFC66CC0879C71B5DB5C,C4CA4238A0B923820DCC509A6F75849B,2019-04-01T23:41:00Z,2019-04-03T21:49:00Z
183 | 76DC611D6EBAAFC66CC0879C71B5DB5C,C81E728D9D4C2F636F067F89CC14862C,2019-04-02T04:49:00Z,2019-04-08T00:06:00Z
184 | 76DC611D6EBAAFC66CC0879C71B5DB5C,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-05T22:51:00Z,2019-04-08T17:06:00Z
185 | 7E7757B1E12ABCB736AB9A754FFB617A,A87FF679A2F3E71D9181A67B7542122C,2019-04-03T02:12:00Z,2019-04-06T05:11:00Z
186 | 7E7757B1E12ABCB736AB9A754FFB617A,C4CA4238A0B923820DCC509A6F75849B,2019-04-03T23:33:00Z,2019-04-09T23:22:00Z
187 | 7E7757B1E12ABCB736AB9A754FFB617A,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-04T20:14:00Z,2019-04-09T13:01:00Z
188 | 7EF605FC8DBA5425D6965FBD4C8FBE1F,A87FF679A2F3E71D9181A67B7542122C,2019-04-05T01:27:00Z,2019-04-05T01:27:00Z
189 | 7EF605FC8DBA5425D6965FBD4C8FBE1F,C4CA4238A0B923820DCC509A6F75849B,2019-04-05T00:45:00Z,2019-04-09T21:16:00Z
190 | 7EF605FC8DBA5425D6965FBD4C8FBE1F,C81E728D9D4C2F636F067F89CC14862C,2019-04-03T22:09:00Z,2019-04-09T12:55:00Z
191 | 7EF605FC8DBA5425D6965FBD4C8FBE1F,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-05T07:28:00Z,2019-04-05T07:28:00Z
192 | 7F1DE29E6DA19D22B51C68001E7E0E54,A87FF679A2F3E71D9181A67B7542122C,2019-04-01T06:26:00Z,2019-04-05T07:14:00Z
193 | 7F1DE29E6DA19D22B51C68001E7E0E54,C4CA4238A0B923820DCC509A6F75849B,2019-04-05T15:39:00Z,2019-04-09T11:28:00Z
194 | 7F1DE29E6DA19D22B51C68001E7E0E54,C81E728D9D4C2F636F067F89CC14862C,2019-04-02T02:52:00Z,2019-04-02T02:52:00Z
195 | 7F1DE29E6DA19D22B51C68001E7E0E54,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-03T12:57:00Z,2019-04-03T12:57:00Z
196 | 7F6FFAA6BB0B408017B62254211691B5,A87FF679A2F3E71D9181A67B7542122C,2019-04-04T10:48:00Z,2019-04-08T10:47:00Z
197 | 7F6FFAA6BB0B408017B62254211691B5,C4CA4238A0B923820DCC509A6F75849B,2019-04-03T04:10:00Z,2019-04-06T01:48:00Z
198 | 7F6FFAA6BB0B408017B62254211691B5,C81E728D9D4C2F636F067F89CC14862C,2019-04-05T04:20:00Z,2019-04-05T04:20:00Z
199 | 7F6FFAA6BB0B408017B62254211691B5,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-05T19:25:00Z,2019-04-09T15:41:00Z
200 | 82161242827B703E6ACF9C726942A1E4,A87FF679A2F3E71D9181A67B7542122C,2019-04-02T03:27:00Z,2019-04-09T13:09:00Z
201 | 82161242827B703E6ACF9C726942A1E4,C4CA4238A0B923820DCC509A6F75849B,2019-04-02T22:26:00Z,2019-04-07T09:15:00Z
202 | 82161242827B703E6ACF9C726942A1E4,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-01T11:18:00Z,2019-04-08T20:26:00Z
203 | 82AA4B0AF34C2313A562076992E50AA3,A87FF679A2F3E71D9181A67B7542122C,2019-04-04T14:48:00Z,2019-04-09T06:09:00Z
204 | 82AA4B0AF34C2313A562076992E50AA3,C4CA4238A0B923820DCC509A6F75849B,2019-04-02T12:27:00Z,2019-04-09T17:29:00Z
205 | 82AA4B0AF34C2313A562076992E50AA3,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-01T02:08:00Z,2019-04-09T20:59:00Z
206 | 84D9EE44E457DDEF7F2C4F25DC8FA865,A87FF679A2F3E71D9181A67B7542122C,2019-04-01T07:38:00Z,2019-04-09T06:21:00Z
207 | 84D9EE44E457DDEF7F2C4F25DC8FA865,C4CA4238A0B923820DCC509A6F75849B,2019-04-04T13:28:00Z,2019-04-07T17:40:00Z
208 | 84D9EE44E457DDEF7F2C4F25DC8FA865,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-01T23:03:00Z,2019-04-08T20:38:00Z
209 | 85D8CE590AD8981CA2C8286F79F59954,A87FF679A2F3E71D9181A67B7542122C,2019-04-04T07:42:00Z,2019-04-08T20:02:00Z
210 | 85D8CE590AD8981CA2C8286F79F59954,C4CA4238A0B923820DCC509A6F75849B,2019-04-04T18:19:00Z,2019-04-05T17:13:00Z
211 | 85D8CE590AD8981CA2C8286F79F59954,C81E728D9D4C2F636F067F89CC14862C,2019-04-01T20:43:00Z,2019-04-08T07:51:00Z
212 | 85D8CE590AD8981CA2C8286F79F59954,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-03T22:56:00Z,2019-04-06T10:47:00Z
213 | 8D5E957F297893487BD98FA830FA6413,A87FF679A2F3E71D9181A67B7542122C,2019-04-01T03:48:00Z,2019-04-09T03:53:00Z
214 | 8D5E957F297893487BD98FA830FA6413,C4CA4238A0B923820DCC509A6F75849B,2019-04-04T19:41:00Z,2019-04-04T19:41:00Z
215 | 8D5E957F297893487BD98FA830FA6413,C81E728D9D4C2F636F067F89CC14862C,2019-04-02T01:04:00Z,2019-04-06T14:42:00Z
216 | 8D5E957F297893487BD98FA830FA6413,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-07T15:13:00Z,2019-04-09T17:11:00Z
217 | 8F53295A73878494E9BC8DD6C3C7104F,A87FF679A2F3E71D9181A67B7542122C,2019-04-01T14:38:00Z,2019-04-09T07:25:00Z
218 | 8F53295A73878494E9BC8DD6C3C7104F,C4CA4238A0B923820DCC509A6F75849B,2019-04-05T02:58:00Z,2019-04-06T22:38:00Z
219 | 8F53295A73878494E9BC8DD6C3C7104F,C81E728D9D4C2F636F067F89CC14862C,2019-04-05T20:03:00Z,2019-04-08T17:43:00Z
220 | 8F53295A73878494E9BC8DD6C3C7104F,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-04T07:40:00Z,2019-04-04T07:40:00Z
221 | 8F85517967795EEEF66C225F7883BDCB,A87FF679A2F3E71D9181A67B7542122C,2019-04-03T05:59:00Z,2019-04-08T04:27:00Z
222 | 8F85517967795EEEF66C225F7883BDCB,C4CA4238A0B923820DCC509A6F75849B,2019-04-01T04:25:00Z,2019-04-06T22:26:00Z
223 | 8F85517967795EEEF66C225F7883BDCB,C81E728D9D4C2F636F067F89CC14862C,2019-04-01T07:20:00Z,2019-04-09T17:18:00Z
224 | 8F85517967795EEEF66C225F7883BDCB,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-01T00:52:00Z,2019-04-09T04:57:00Z
225 | 903CE9225FCA3E988C2AF215D4E544D3,A87FF679A2F3E71D9181A67B7542122C,2019-04-03T11:46:00Z,2019-04-08T22:28:00Z
226 | 903CE9225FCA3E988C2AF215D4E544D3,C4CA4238A0B923820DCC509A6F75849B,2019-04-02T10:11:00Z,2019-04-07T19:22:00Z
227 | 903CE9225FCA3E988C2AF215D4E544D3,C81E728D9D4C2F636F067F89CC14862C,2019-04-03T00:13:00Z,2019-04-03T03:31:00Z
228 | 96DA2F590CD7246BBDE0051047B0D6F7,C4CA4238A0B923820DCC509A6F75849B,2019-04-03T10:13:00Z,2019-04-07T14:13:00Z
229 | 96DA2F590CD7246BBDE0051047B0D6F7,C81E728D9D4C2F636F067F89CC14862C,2019-04-07T16:33:00Z,2019-04-07T16:33:00Z
230 | 96DA2F590CD7246BBDE0051047B0D6F7,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-01T05:09:00Z,2019-04-06T08:10:00Z
231 | 9766527F2B5D3E95D4A733FCFB77BD7E,A87FF679A2F3E71D9181A67B7542122C,2019-04-03T17:20:00Z,2019-04-08T18:12:00Z
232 | 9766527F2B5D3E95D4A733FCFB77BD7E,C4CA4238A0B923820DCC509A6F75849B,2019-04-03T14:15:00Z,2019-04-03T14:15:00Z
233 | 9766527F2B5D3E95D4A733FCFB77BD7E,C81E728D9D4C2F636F067F89CC14862C,2019-04-04T11:55:00Z,2019-04-08T19:29:00Z
234 | 9766527F2B5D3E95D4A733FCFB77BD7E,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-02T16:20:00Z,2019-04-06T12:31:00Z
235 | 9872ED9FC22FC182D371C3E9ED316094,A87FF679A2F3E71D9181A67B7542122C,2019-04-01T04:33:00Z,2019-04-09T23:15:00Z
236 | 9872ED9FC22FC182D371C3E9ED316094,C4CA4238A0B923820DCC509A6F75849B,2019-04-09T11:05:00Z,2019-04-09T17:14:00Z
237 | 9872ED9FC22FC182D371C3E9ED316094,C81E728D9D4C2F636F067F89CC14862C,2019-04-05T12:44:00Z,2019-04-09T07:54:00Z
238 | 9872ED9FC22FC182D371C3E9ED316094,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-05T02:40:00Z,2019-04-05T02:40:00Z
239 | 9B8619251A19057CFF70779273E95AA6,A87FF679A2F3E71D9181A67B7542122C,2019-04-01T13:52:00Z,2019-04-06T22:14:00Z
240 | 9B8619251A19057CFF70779273E95AA6,C81E728D9D4C2F636F067F89CC14862C,2019-04-01T18:53:00Z,2019-04-09T06:40:00Z
241 | 9B8619251A19057CFF70779273E95AA6,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-04T14:24:00Z,2019-04-09T19:14:00Z
242 | 9DCB88E0137649590B755372B040AFAD,A87FF679A2F3E71D9181A67B7542122C,2019-04-06T17:08:00Z,2019-04-09T11:29:00Z
243 | 9DCB88E0137649590B755372B040AFAD,C4CA4238A0B923820DCC509A6F75849B,2019-04-02T18:10:00Z,2019-04-02T18:10:00Z
244 | 9DCB88E0137649590B755372B040AFAD,C81E728D9D4C2F636F067F89CC14862C,2019-04-06T16:06:00Z,2019-04-07T13:43:00Z
245 | 9FC3D7152BA9336A670E36D0ED79BC43,A87FF679A2F3E71D9181A67B7542122C,2019-04-06T00:28:00Z,2019-04-06T00:28:00Z
246 | 9FC3D7152BA9336A670E36D0ED79BC43,C4CA4238A0B923820DCC509A6F75849B,2019-04-03T21:44:00Z,2019-04-04T21:41:00Z
247 | 9FC3D7152BA9336A670E36D0ED79BC43,C81E728D9D4C2F636F067F89CC14862C,2019-04-03T16:28:00Z,2019-04-08T03:22:00Z
248 | 9FC3D7152BA9336A670E36D0ED79BC43,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-03T07:20:00Z,2019-04-08T08:21:00Z
249 | A0A080F42E6F13B3A2DF133F073095DD,A87FF679A2F3E71D9181A67B7542122C,2019-04-01T05:28:00Z,2019-04-06T07:33:00Z
250 | A0A080F42E6F13B3A2DF133F073095DD,C4CA4238A0B923820DCC509A6F75849B,2019-04-04T14:42:00Z,2019-04-09T21:02:00Z
251 | A0A080F42E6F13B3A2DF133F073095DD,C81E728D9D4C2F636F067F89CC14862C,2019-04-04T03:18:00Z,2019-04-04T04:08:00Z
252 | A0A080F42E6F13B3A2DF133F073095DD,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-01T21:02:00Z,2019-04-01T21:02:00Z
253 | A2557A7B2E94197FF767970B67041697,A87FF679A2F3E71D9181A67B7542122C,2019-04-03T12:36:00Z,2019-04-07T06:57:00Z
254 | A2557A7B2E94197FF767970B67041697,C4CA4238A0B923820DCC509A6F75849B,2019-04-03T07:27:00Z,2019-04-06T16:34:00Z
255 | A2557A7B2E94197FF767970B67041697,C81E728D9D4C2F636F067F89CC14862C,2019-04-04T08:24:00Z,2019-04-08T15:33:00Z
256 | A2557A7B2E94197FF767970B67041697,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-01T11:21:00Z,2019-04-09T12:44:00Z
257 | A3C65C2974270FD093EE8A9BF8AE7D0B,A87FF679A2F3E71D9181A67B7542122C,2019-04-05T11:10:00Z,2019-04-09T08:43:00Z
258 | A3C65C2974270FD093EE8A9BF8AE7D0B,C4CA4238A0B923820DCC509A6F75849B,2019-04-01T09:52:00Z,2019-04-07T15:38:00Z
259 | A3C65C2974270FD093EE8A9BF8AE7D0B,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-09T13:39:00Z,2019-04-09T13:39:00Z
260 | A4A042CF4FD6BFB47701CBC8A1653ADA,C4CA4238A0B923820DCC509A6F75849B,2019-04-04T12:23:00Z,2019-04-09T16:11:00Z
261 | A4A042CF4FD6BFB47701CBC8A1653ADA,C81E728D9D4C2F636F067F89CC14862C,2019-04-06T02:12:00Z,2019-04-06T02:12:00Z
262 | A4A042CF4FD6BFB47701CBC8A1653ADA,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-01T21:15:00Z,2019-04-04T00:51:00Z
263 | A597E50502F5FF68E3E25B9114205D4A,A87FF679A2F3E71D9181A67B7542122C,2019-04-08T10:58:00Z,2019-04-09T05:34:00Z
264 | A597E50502F5FF68E3E25B9114205D4A,C4CA4238A0B923820DCC509A6F75849B,2019-04-08T02:17:00Z,2019-04-08T02:17:00Z
265 | A597E50502F5FF68E3E25B9114205D4A,C81E728D9D4C2F636F067F89CC14862C,2019-04-04T04:09:00Z,2019-04-04T04:09:00Z
266 | A597E50502F5FF68E3E25B9114205D4A,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-02T22:52:00Z,2019-04-09T14:13:00Z
267 | A5E00132373A7031000FD987A3C9F87B,A87FF679A2F3E71D9181A67B7542122C,2019-04-01T16:00:00Z,2019-04-06T06:54:00Z
268 | A5E00132373A7031000FD987A3C9F87B,C4CA4238A0B923820DCC509A6F75849B,2019-04-02T11:02:00Z,2019-04-08T17:22:00Z
269 | A5E00132373A7031000FD987A3C9F87B,C81E728D9D4C2F636F067F89CC14862C,2019-04-03T22:38:00Z,2019-04-07T04:34:00Z
270 | A8BAA56554F96369AB93E4F3BB068C22,A87FF679A2F3E71D9181A67B7542122C,2019-04-03T10:17:00Z,2019-04-08T10:00:00Z
271 | A8BAA56554F96369AB93E4F3BB068C22,C4CA4238A0B923820DCC509A6F75849B,2019-04-09T12:40:00Z,2019-04-09T12:40:00Z
272 | A8BAA56554F96369AB93E4F3BB068C22,C81E728D9D4C2F636F067F89CC14862C,2019-04-04T14:16:00Z,2019-04-05T04:26:00Z
273 | A8BAA56554F96369AB93E4F3BB068C22,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-01T01:49:00Z,2019-04-05T08:49:00Z
274 | A8F15EDA80C50ADB0E71943ADC8015CF,A87FF679A2F3E71D9181A67B7542122C,2019-04-04T22:21:00Z,2019-04-05T14:36:00Z
275 | A8F15EDA80C50ADB0E71943ADC8015CF,C4CA4238A0B923820DCC509A6F75849B,2019-04-03T11:52:00Z,2019-04-06T18:29:00Z
276 | A8F15EDA80C50ADB0E71943ADC8015CF,C81E728D9D4C2F636F067F89CC14862C,2019-04-01T06:36:00Z,2019-04-08T07:21:00Z
277 | A8F15EDA80C50ADB0E71943ADC8015CF,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-01T19:47:00Z,2019-04-07T07:58:00Z
278 | A97DA629B098B75C294DFFDC3E463904,A87FF679A2F3E71D9181A67B7542122C,2019-04-04T20:46:00Z,2019-04-09T22:23:00Z
279 | A97DA629B098B75C294DFFDC3E463904,C4CA4238A0B923820DCC509A6F75849B,2019-04-01T13:46:00Z,2019-04-01T13:46:00Z
280 | A97DA629B098B75C294DFFDC3E463904,C81E728D9D4C2F636F067F89CC14862C,2019-04-09T19:04:00Z,2019-04-09T19:04:00Z
281 | A97DA629B098B75C294DFFDC3E463904,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-02T20:51:00Z,2019-04-03T16:10:00Z
282 | B3E3E393C77E35A4A3F3CBD1E429B5DC,A87FF679A2F3E71D9181A67B7542122C,2019-04-05T05:02:00Z,2019-04-09T07:17:00Z
283 | B3E3E393C77E35A4A3F3CBD1E429B5DC,C4CA4238A0B923820DCC509A6F75849B,2019-04-02T00:24:00Z,2019-04-06T11:27:00Z
284 | B3E3E393C77E35A4A3F3CBD1E429B5DC,C81E728D9D4C2F636F067F89CC14862C,2019-04-02T04:35:00Z,2019-04-08T07:14:00Z
285 | B3E3E393C77E35A4A3F3CBD1E429B5DC,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-03T09:08:00Z,2019-04-09T14:27:00Z
286 | B73CE398C39F506AF761D2277D853A92,A87FF679A2F3E71D9181A67B7542122C,2019-04-02T10:45:00Z,2019-04-09T14:56:00Z
287 | B73CE398C39F506AF761D2277D853A92,C4CA4238A0B923820DCC509A6F75849B,2019-04-02T13:04:00Z,2019-04-09T00:00:00Z
288 | B73CE398C39F506AF761D2277D853A92,C81E728D9D4C2F636F067F89CC14862C,2019-04-01T02:11:00Z,2019-04-08T06:46:00Z
289 | B73CE398C39F506AF761D2277D853A92,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-01T19:44:00Z,2019-04-09T22:39:00Z
290 | BD4C9AB730F5513206B999EC0D90D1FB,A87FF679A2F3E71D9181A67B7542122C,2019-04-01T20:04:00Z,2019-04-08T16:29:00Z
291 | BD4C9AB730F5513206B999EC0D90D1FB,C4CA4238A0B923820DCC509A6F75849B,2019-04-05T04:56:00Z,2019-04-07T19:35:00Z
292 | BD4C9AB730F5513206B999EC0D90D1FB,C81E728D9D4C2F636F067F89CC14862C,2019-04-06T19:58:00Z,2019-04-06T19:58:00Z
293 | BD4C9AB730F5513206B999EC0D90D1FB,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-02T19:21:00Z,2019-04-09T18:35:00Z
294 | BD686FD640BE98EFAAE0091FA301E613,A87FF679A2F3E71D9181A67B7542122C,2019-04-01T04:17:00Z,2019-04-09T01:07:00Z
295 | BD686FD640BE98EFAAE0091FA301E613,C4CA4238A0B923820DCC509A6F75849B,2019-04-04T00:23:00Z,2019-04-04T00:23:00Z
296 | BD686FD640BE98EFAAE0091FA301E613,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-05T16:18:00Z,2019-04-06T11:58:00Z
297 | BF8229696F7A3BB4700CFDDEF19FA23F,A87FF679A2F3E71D9181A67B7542122C,2019-04-01T02:07:00Z,2019-04-04T03:20:00Z
298 | BF8229696F7A3BB4700CFDDEF19FA23F,C4CA4238A0B923820DCC509A6F75849B,2019-04-02T14:55:00Z,2019-04-08T13:19:00Z
299 | BF8229696F7A3BB4700CFDDEF19FA23F,C81E728D9D4C2F636F067F89CC14862C,2019-04-04T05:12:00Z,2019-04-08T08:47:00Z
300 | BF8229696F7A3BB4700CFDDEF19FA23F,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-02T18:28:00Z,2019-04-07T02:52:00Z
301 | C45147DEE729311EF5B5C3003946C48F,A87FF679A2F3E71D9181A67B7542122C,2019-04-04T12:19:00Z,2019-04-07T02:05:00Z
302 | C45147DEE729311EF5B5C3003946C48F,C81E728D9D4C2F636F067F89CC14862C,2019-04-04T10:37:00Z,2019-04-08T05:16:00Z
303 | C45147DEE729311EF5B5C3003946C48F,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-01T23:23:00Z,2019-04-09T11:25:00Z
304 | C8FFE9A587B126F152ED3D89A146B445,A87FF679A2F3E71D9181A67B7542122C,2019-04-01T16:48:00Z,2019-04-08T01:13:00Z
305 | C8FFE9A587B126F152ED3D89A146B445,C4CA4238A0B923820DCC509A6F75849B,2019-04-01T04:15:00Z,2019-04-04T02:59:00Z
306 | C8FFE9A587B126F152ED3D89A146B445,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-04T09:35:00Z,2019-04-09T13:22:00Z
307 | C9E1074F5B3F9FC8EA15D152ADD07294,A87FF679A2F3E71D9181A67B7542122C,2019-04-01T08:15:00Z,2019-04-04T05:19:00Z
308 | C9E1074F5B3F9FC8EA15D152ADD07294,C4CA4238A0B923820DCC509A6F75849B,2019-04-01T20:28:00Z,2019-04-09T18:17:00Z
309 | C9E1074F5B3F9FC8EA15D152ADD07294,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-08T19:51:00Z,2019-04-08T19:51:00Z
310 | CEDEBB6E872F539BEF8C3F919874E9D7,A87FF679A2F3E71D9181A67B7542122C,2019-04-01T05:48:00Z,2019-04-08T06:16:00Z
311 | CEDEBB6E872F539BEF8C3F919874E9D7,C4CA4238A0B923820DCC509A6F75849B,2019-04-02T12:19:00Z,2019-04-09T03:10:00Z
312 | CEDEBB6E872F539BEF8C3F919874E9D7,C81E728D9D4C2F636F067F89CC14862C,2019-04-02T22:50:00Z,2019-04-04T18:07:00Z
313 | CEDEBB6E872F539BEF8C3F919874E9D7,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-06T02:32:00Z,2019-04-09T07:57:00Z
314 | CFECDB276F634854F3EF915E2E980C31,A87FF679A2F3E71D9181A67B7542122C,2019-04-04T11:42:00Z,2019-04-09T02:56:00Z
315 | CFECDB276F634854F3EF915E2E980C31,C4CA4238A0B923820DCC509A6F75849B,2019-04-01T15:28:00Z,2019-04-06T09:22:00Z
316 | CFECDB276F634854F3EF915E2E980C31,C81E728D9D4C2F636F067F89CC14862C,2019-04-05T17:27:00Z,2019-04-08T13:17:00Z
317 | CFECDB276F634854F3EF915E2E980C31,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-02T15:02:00Z,2019-04-07T17:18:00Z
318 | D1F491A404D6854880943E5C3CD9CA25,A87FF679A2F3E71D9181A67B7542122C,2019-04-03T03:06:00Z,2019-04-05T20:12:00Z
319 | D1F491A404D6854880943E5C3CD9CA25,C81E728D9D4C2F636F067F89CC14862C,2019-04-02T19:17:00Z,2019-04-09T21:18:00Z
320 | D1F491A404D6854880943E5C3CD9CA25,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-03T05:40:00Z,2019-04-08T12:55:00Z
321 | DA4FB5C6E93E74D3DF8527599FA62642,A87FF679A2F3E71D9181A67B7542122C,2019-04-04T18:12:00Z,2019-04-04T18:12:00Z
322 | DA4FB5C6E93E74D3DF8527599FA62642,C4CA4238A0B923820DCC509A6F75849B,2019-04-01T21:23:00Z,2019-04-07T06:01:00Z
323 | DA4FB5C6E93E74D3DF8527599FA62642,C81E728D9D4C2F636F067F89CC14862C,2019-04-02T11:49:00Z,2019-04-09T08:16:00Z
324 | DA4FB5C6E93E74D3DF8527599FA62642,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-05T15:40:00Z,2019-04-06T03:46:00Z
325 | E00DA03B685A0DD18FB6A08AF0923DE0,A87FF679A2F3E71D9181A67B7542122C,2019-04-09T08:57:00Z,2019-04-09T08:57:00Z
326 | E00DA03B685A0DD18FB6A08AF0923DE0,C4CA4238A0B923820DCC509A6F75849B,2019-04-03T18:54:00Z,2019-04-03T18:54:00Z
327 | E00DA03B685A0DD18FB6A08AF0923DE0,C81E728D9D4C2F636F067F89CC14862C,2019-04-01T17:57:00Z,2019-04-05T19:31:00Z
328 | E00DA03B685A0DD18FB6A08AF0923DE0,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-02T00:39:00Z,2019-04-09T13:38:00Z
329 | EB160DE1DE89D9058FCB0B968DBBBD68,A87FF679A2F3E71D9181A67B7542122C,2019-04-06T13:24:00Z,2019-04-07T21:25:00Z
330 | EB160DE1DE89D9058FCB0B968DBBBD68,C4CA4238A0B923820DCC509A6F75849B,2019-04-07T19:17:00Z,2019-04-07T23:35:00Z
331 | EB160DE1DE89D9058FCB0B968DBBBD68,C81E728D9D4C2F636F067F89CC14862C,2019-04-01T19:37:00Z,2019-04-09T06:29:00Z
332 | EB160DE1DE89D9058FCB0B968DBBBD68,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-03T16:07:00Z,2019-04-09T02:58:00Z
333 | EC5DECCA5ED3D6B8079E2E7E7BACC9F2,A87FF679A2F3E71D9181A67B7542122C,2019-04-01T09:20:00Z,2019-04-01T09:20:00Z
334 | EC5DECCA5ED3D6B8079E2E7E7BACC9F2,C81E728D9D4C2F636F067F89CC14862C,2019-04-01T08:28:00Z,2019-04-09T03:41:00Z
335 | EC5DECCA5ED3D6B8079E2E7E7BACC9F2,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-01T07:06:00Z,2019-04-07T01:15:00Z
336 | EC8956637A99787BD197EACD77ACCE5E,A87FF679A2F3E71D9181A67B7542122C,2019-04-01T00:58:00Z,2019-04-08T11:43:00Z
337 | EC8956637A99787BD197EACD77ACCE5E,C4CA4238A0B923820DCC509A6F75849B,2019-04-03T16:26:00Z,2019-04-07T20:14:00Z
338 | EC8956637A99787BD197EACD77ACCE5E,C81E728D9D4C2F636F067F89CC14862C,2019-04-01T18:04:00Z,2019-04-08T02:57:00Z
339 | EC8956637A99787BD197EACD77ACCE5E,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-03T00:23:00Z,2019-04-09T16:22:00Z
340 | EECCA5B6365D9607EE5A9D336962C534,C4CA4238A0B923820DCC509A6F75849B,2019-04-04T18:32:00Z,2019-04-09T19:27:00Z
341 | EECCA5B6365D9607EE5A9D336962C534,C81E728D9D4C2F636F067F89CC14862C,2019-04-01T12:58:00Z,2019-04-05T01:40:00Z
342 | EECCA5B6365D9607EE5A9D336962C534,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-03T19:42:00Z,2019-04-05T08:59:00Z
343 | F0935E4CD5920AA6C7C996A5EE53A70F,A87FF679A2F3E71D9181A67B7542122C,2019-04-02T22:43:00Z,2019-04-08T05:23:00Z
344 | F0935E4CD5920AA6C7C996A5EE53A70F,C4CA4238A0B923820DCC509A6F75849B,2019-04-03T04:53:00Z,2019-04-06T03:34:00Z
345 | F0935E4CD5920AA6C7C996A5EE53A70F,C81E728D9D4C2F636F067F89CC14862C,2019-04-03T14:33:00Z,2019-04-09T08:22:00Z
346 | F0935E4CD5920AA6C7C996A5EE53A70F,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-09T15:43:00Z,2019-04-09T15:43:00Z
347 | F2217062E9A397A1DCA429E7D70BC6CA,A87FF679A2F3E71D9181A67B7542122C,2019-04-08T09:57:00Z,2019-04-08T09:57:00Z
348 | F2217062E9A397A1DCA429E7D70BC6CA,C4CA4238A0B923820DCC509A6F75849B,2019-04-05T06:23:00Z,2019-04-09T22:45:00Z
349 | F2217062E9A397A1DCA429E7D70BC6CA,C81E728D9D4C2F636F067F89CC14862C,2019-04-02T02:01:00Z,2019-04-05T05:45:00Z
350 | F2217062E9A397A1DCA429E7D70BC6CA,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-02T01:08:00Z,2019-04-06T13:51:00Z
351 | F7E6C85504CE6E82442C770F7C8606F0,A87FF679A2F3E71D9181A67B7542122C,2019-04-06T02:14:00Z,2019-04-06T02:14:00Z
352 | F7E6C85504CE6E82442C770F7C8606F0,C4CA4238A0B923820DCC509A6F75849B,2019-04-02T10:48:00Z,2019-04-05T16:52:00Z
353 | F7E6C85504CE6E82442C770F7C8606F0,C81E728D9D4C2F636F067F89CC14862C,2019-04-03T08:53:00Z,2019-04-03T08:53:00Z
354 | F7E6C85504CE6E82442C770F7C8606F0,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-07T15:07:00Z,2019-04-07T15:07:00Z
355 | FA7CDFAD1A5AAF8370EBEDA47A1FF1C3,A87FF679A2F3E71D9181A67B7542122C,2019-04-03T05:34:00Z,2019-04-09T13:25:00Z
356 | FA7CDFAD1A5AAF8370EBEDA47A1FF1C3,C4CA4238A0B923820DCC509A6F75849B,2019-04-02T03:44:00Z,2019-04-02T03:44:00Z
357 | FA7CDFAD1A5AAF8370EBEDA47A1FF1C3,C81E728D9D4C2F636F067F89CC14862C,2019-04-01T13:22:00Z,2019-04-01T13:22:00Z
358 | FA7CDFAD1A5AAF8370EBEDA47A1FF1C3,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-08T00:17:00Z,2019-04-09T18:15:00Z
359 | FC221309746013AC554571FBD180E1C8,A87FF679A2F3E71D9181A67B7542122C,2019-04-07T06:02:00Z,2019-04-08T23:48:00Z
360 | FC221309746013AC554571FBD180E1C8,C4CA4238A0B923820DCC509A6F75849B,2019-04-02T01:07:00Z,2019-04-08T11:14:00Z
361 | FC221309746013AC554571FBD180E1C8,ECCBC87E4B5CE2FE28308FD9F2A7BAF3,2019-04-02T12:22:00Z,2019-04-09T23:33:00Z
362 | 


--------------------------------------------------------------------------------
/man/hello.Rd:
--------------------------------------------------------------------------------
 1 | \name{hello}
 2 | \alias{hello}
 3 | \title{Hello, World!}
 4 | \usage{
 5 | hello()
 6 | }
 7 | \description{
 8 | Prints 'Hello, world!'.
 9 | }
10 | \examples{
11 | hello()
12 | }
13 | 


--------------------------------------------------------------------------------
/man/pipe.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils-pipe.R
 3 | \name{\%>\%}
 4 | \alias{\%>\%}
 5 | \title{Pipe operator}
 6 | \usage{
 7 | lhs \%>\% rhs
 8 | }
 9 | \description{
10 | See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/man/wiz_add_baseline_predictors.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/add_predictors.R
 3 | \name{wiz_add_baseline_predictors}
 4 | \alias{wiz_add_baseline_predictors}
 5 | \title{Function to add baseline predictors
 6 | Offset of hours(1) would mean that everything would be anchored to 1 hour
 7 | before fixed_start.}
 8 | \usage{
 9 | wiz_add_baseline_predictors(
10 |   wiz_frame = NULL,
11 |   variables = NULL,
12 |   category = NULL,
13 |   lookback = lubridate::hours(48),
14 |   window = lookback,
15 |   offset = lubridate::hours(0),
16 |   stats = c(mean = mean, min = min, max = max),
17 |   impute = TRUE,
18 |   output_file = TRUE,
19 |   log_file = TRUE,
20 |   check_size_only = FALSE,
21 |   last_chunk_completed = NULL
22 | )
23 | }
24 | \description{
25 | Function to add baseline predictors
26 | Offset of hours(1) would mean that everything would be anchored to 1 hour
27 | before fixed_start.
28 | }
29 | 


--------------------------------------------------------------------------------
/man/wiz_add_growing_predictors.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/add_predictors.R
 3 | \name{wiz_add_growing_predictors}
 4 | \alias{wiz_add_growing_predictors}
 5 | \title{Function to add baseline predictors
 6 | Offset of hours(1) would mean that everything would be anchored to 1 hour
 7 | before fixed_start.}
 8 | \usage{
 9 | wiz_add_growing_predictors(
10 |   wiz_frame = NULL,
11 |   variables = NULL,
12 |   category = NULL,
13 |   stats = c(mean = mean, min = min, max = max),
14 |   output_file = TRUE,
15 |   log_file = TRUE,
16 |   check_size_only = FALSE,
17 |   last_chunk_completed = NULL
18 | )
19 | }
20 | \description{
21 | Function to add baseline predictors
22 | Offset of hours(1) would mean that everything would be anchored to 1 hour
23 | before fixed_start.
24 | }
25 | 


--------------------------------------------------------------------------------
/man/wiz_add_predictors.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/add_predictors.R
 3 | \name{wiz_add_predictors}
 4 | \alias{wiz_add_predictors}
 5 | \title{Function to add predictors
 6 | before fixed_start.}
 7 | \usage{
 8 | wiz_add_predictors(
 9 |   wiz_frame = NULL,
10 |   variables = NULL,
11 |   category = NULL,
12 |   lookback = lubridate::hours(48),
13 |   window = lookback,
14 |   stats = c(mean = mean, min = min, max = max),
15 |   impute = TRUE,
16 |   output_file = TRUE,
17 |   log_file = TRUE,
18 |   check_size_only = FALSE,
19 |   last_chunk_completed = NULL
20 | )
21 | }
22 | \description{
23 | Function to add predictors
24 | before fixed_start.
25 | }
26 | 


--------------------------------------------------------------------------------
/man/wiz_add_predictors_internal.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/wizard_internal.R
 3 | \name{wiz_add_predictors_internal}
 4 | \alias{wiz_add_predictors_internal}
 5 | \title{New furrr-enabled add_predictors function
 6 | Internal only}
 7 | \usage{
 8 | wiz_add_predictors_internal(
 9 |   wiz_frame = NULL,
10 |   variables = NULL,
11 |   category = NULL,
12 |   lookback = lubridate::hours(48),
13 |   window = lookback,
14 |   stats = c(mean = mean, min = min, max = max),
15 |   impute = TRUE,
16 |   output_file = TRUE,
17 |   log_file = TRUE,
18 |   check_size_only = FALSE,
19 |   filename_prefix = "",
20 |   ...
21 | )
22 | }
23 | \description{
24 | New furrr-enabled add_predictors function
25 | Internal only
26 | }
27 | 


--------------------------------------------------------------------------------
/man/wiz_build_temporal_data_dictionary.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/wizard.R
 3 | \name{wiz_build_temporal_data_dictionary}
 4 | \alias{wiz_build_temporal_data_dictionary}
 5 | \title{Determine the names and types of all of the temporal data variables.
 6 | This function assumes that the temporal data values may be characters if
 7 | some variables are categorical. This is an internal function.}
 8 | \usage{
 9 | wiz_build_temporal_data_dictionary(
10 |   temporal_data,
11 |   temporal_variable,
12 |   temporal_value,
13 |   numeric_threshold = 0.5
14 | )
15 | }
16 | \description{
17 | Determine the names and types of all of the temporal data variables.
18 | This function assumes that the temporal data values may be characters if
19 | some variables are categorical. This is an internal function.
20 | }
21 | 


--------------------------------------------------------------------------------
/man/wiz_calc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/wizard_internal.R
 3 | \name{wiz_calc}
 4 | \alias{wiz_calc}
 5 | \title{New internal helper function}
 6 | \usage{
 7 | wiz_calc(
 8 |   groups,
 9 |   temporal_id,
10 |   temporal_variable,
11 |   temporal_value,
12 |   temporal_time,
13 |   lookback_converted,
14 |   dots,
15 |   window_converted,
16 |   temporal_data_of_interest,
17 |   stats,
18 |   impute,
19 |   pb,
20 |   all_temporal_vars,
21 |   missing_value_frame,
22 |   strategy
23 | )
24 | }
25 | \description{
26 | New internal helper function
27 | }
28 | 


--------------------------------------------------------------------------------
/man/wiz_combine.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/combine.R
 3 | \name{wiz_combine}
 4 | \alias{wiz_combine}
 5 | \title{New wiz_combine function}
 6 | \usage{
 7 | wiz_combine(
 8 |   wiz_frame,
 9 |   ...,
10 |   files = NULL,
11 |   include_files = TRUE,
12 |   wiz_path = TRUE,
13 |   dplyr_join = dplyr::inner_join,
14 |   log_file = TRUE
15 | )
16 | }
17 | \description{
18 | New wiz_combine function
19 | }
20 | 


--------------------------------------------------------------------------------
/man/wiz_define_steps.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/wizard_internal.R
 3 | \name{wiz_define_steps}
 4 | \alias{wiz_define_steps}
 5 | \title{New internal helper function}
 6 | \usage{
 7 | wiz_define_steps(
 8 |   groups,
 9 |   temporal_id,
10 |   step,
11 |   step_units,
12 |   max_length,
13 |   baseline,
14 |   max_step_times_per_id,
15 |   lookback_converted,
16 |   window_converted,
17 |   output_folder,
18 |   log_file
19 | )
20 | }
21 | \description{
22 | New internal helper function
23 | }
24 | 


--------------------------------------------------------------------------------
/man/wiz_dummy_code.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/wizard.R
 3 | \name{wiz_dummy_code}
 4 | \alias{wiz_dummy_code}
 5 | \title{Function that converts categorical temporal predictors into dummy variables}
 6 | \usage{
 7 | wiz_dummy_code(
 8 |   wiz_frame = NULL,
 9 |   numeric_threshold = 0.5,
10 |   variables = NULL,
11 |   save_wiz_frame = TRUE
12 | )
13 | }
14 | \description{
15 | Note that you can you can use this to dummy code variables with numerical values
16 | where the values are supposed to map to categorical levels (e.g, 1 means high and 2
17 | means low).
18 | }
19 | \details{
20 | Either provide a threshold (defaults to 0.5) or provide a vector of variables.
21 | If you supply a vector of variables, this takes precedence over the numeric threshold.
22 | }
23 | 


--------------------------------------------------------------------------------
/man/wiz_frame.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/wizard.R
 3 | \name{wiz_frame}
 4 | \alias{wiz_frame}
 5 | \title{Define wizard frame}
 6 | \usage{
 7 | wiz_frame(
 8 |   fixed_data,
 9 |   temporal_data,
10 |   fixed_id = "id",
11 |   fixed_start = NULL,
12 |   fixed_end = NULL,
13 |   temporal_id = "id",
14 |   temporal_time = "time",
15 |   temporal_variable = "variable",
16 |   temporal_value = "value",
17 |   temporal_category = temporal_variable,
18 |   step = NULL,
19 |   max_length = NULL,
20 |   output_folder = NULL,
21 |   create_folder = FALSE,
22 |   save_wiz_frame = TRUE,
23 |   chunk_size = NULL,
24 |   numeric_threshold = 0.5
25 | )
26 | }
27 | \description{
28 | Define wizard frame
29 | }
30 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/vignettes/character_language_model.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Preparing a dataset for a simple language model"
  3 | output: rmarkdown::html_vignette
  4 | vignette: >
  5 |   %\VignetteIndexEntry{Preparing a dataset for a simple language model}
  6 |   %\VignetteEngine{knitr::rmarkdown}
  7 |   %\VignetteEncoding{UTF-8}
  8 | ---
  9 | 
 10 | ```{r, include = FALSE}
 11 | knitr::opts_chunk$set(
 12 |   collapse = TRUE,
 13 |   comment = "#>"
 14 | )
 15 | ```
 16 | 
 17 | ```{r, eval = FALSE}
 18 | library(wizard)
 19 | ```
 20 | 
 21 | ## Let's start by loading the mtsamples dataset
 22 | 
 23 | We can find this in the `clinspacy` package (on GitHub).
 24 | 
 25 | ```{r}
 26 | library(dplyr)
 27 | library(tidytext)
 28 | 
 29 | mtsamples = clinspacy::dataset_mtsamples()
 30 | 
 31 | nrow(mtsamples)
 32 | 
 33 | mtsamples$transcription[[1]]
 34 | ```
 35 | 
 36 | ## Let's combine the text from all of the notes into one string
 37 | 
 38 | ```{r}
 39 | dataset = data.frame(id = 1:nrow(mtsamples), # 1:500, 
 40 |                      variable = 'variable',
 41 |                      text = mtsamples$transcription, # [1:500], 
 42 |                      stringsAsFactors = FALSE)
 43 | 
 44 | dataset = 
 45 |   dataset %>% 
 46 |   unnest_character_shingles(char, text, n = 1, strip_non_alphanum = FALSE) %>% 
 47 |   group_by(id) %>% 
 48 |   mutate(sequence_num = row_number()) %>% 
 49 |   ungroup()
 50 | 
 51 | cat(dataset$char[1:100])
 52 | ```
 53 | 
 54 | ## Let's generate a wiz_frame
 55 | 
 56 | ```{r}
 57 | char_frame = wiz_frame(fixed_data = dataset %>% distinct(id), 
 58 |                        temporal_data = dataset %>% filter(id == 1),
 59 |                        fixed_id = 'id',
 60 |                        temporal_id = 'id',
 61 |                        temporal_time = 'sequence_num',
 62 |                        temporal_variable = 'variable',
 63 |                        temporal_value = 'char',
 64 |                        step = 2,
 65 |                        max_length = 20,
 66 |                        output_folder = 'Z:/kdpsingh/wiz_char_lang', 
 67 |                        save_wiz_frame = FALSE)
 68 | ```
 69 | 
 70 | ## Let's generate a wiz_frame (for fun)
 71 | 
 72 | ```{r}
 73 | char_frame = wiz_frame(fixed_data = dataset %>% distinct(id), 
 74 |                        temporal_data = dataset,
 75 |                        fixed_id = 'id',
 76 |                        temporal_id = 'id',
 77 |                        temporal_time = 'sequence_num',
 78 |                        temporal_variable = 'variable',
 79 |                        temporal_value = 'char',
 80 |                        step = 1,
 81 |                        output_folder = 'Z:/kdpsingh/wiz_char_lang/all_data_in_chunks',
 82 |                        create_folder = TRUE,
 83 |                        chunk_size = 16)
 84 | ```
 85 | 
 86 | 
 87 | 
 88 | ## Let's generate a dataset that will predict the next letter using the last 5 letters
 89 | 
 90 | ```{r}
 91 | future::plan('multisession', workers = 6)
 92 | 
 93 | # model_predictors = char_frame %>%
 94 | #   wiz_add_predictors(variables = 'variable',
 95 | #                      lookback = 20,
 96 | #                      window = 1,
 97 | #                      stats = c(first = . %>% .[1]),
 98 | #                      output_file = FALSE)
 99 | # 
100 | # 
101 | # model_predictors = char_frame %>%
102 | #   wiz_add_growing_predictors(variables = 'variable',
103 | #                      stats = c(ngram = . %>% paste(collapse = '')),
104 | #                      output_file = FALSE)
105 | 
106 | char_frame %>% 
107 |   wiz_add_predictors(variables = 'variable', 
108 |                      lookback = 50,
109 |                      window = 1, 
110 |                      stats = c(first = . %>% .[1]),
111 |                      last_chunk_completed = 64)
112 | 
113 | 
114 | char_frame %>% 
115 |   wiz_add_outcomes(variables = 'variable', 
116 |                    lookahead = 1,
117 |                    stats = c(first = . %>% .[1]))
118 | 
119 | # model_predictors_ngram = char_frame %>%
120 | #   wiz_add_predictors(variables = 'variable',
121 | #                      lookback = 3,
122 | #                      stats = c(ngram = . %>% paste(collapse = '')),
123 | #                      output_file = FALSE)
124 | 
125 | 
126 | model_outcome = char_frame %>% 
127 |   wiz_add_outcomes(variables = 'variable', 
128 |                    lookahead = 1,
129 |                    stats = c(first = . %>% .[1]),
130 |                    output_file = FALSE)
131 | 
132 | # model_outcome_ngram = char_frame %>% 
133 | #   wiz_add_outcomes(variables = 'variable', 
134 | #                    lookahead = 1,
135 | #                    stats = c(ngram = . %>% paste(collapse = '')),
136 | #                    output_file = FALSE)
137 | ```
138 | 
139 | ## Combine multi-file chunk datasets
140 | 
141 | ```{r}
142 | 
143 | ```
144 | 
145 | 
146 | ## Combine the datasets
147 | 
148 | ```{r}
149 | model_data = wiz_combine(char_frame, 
150 |                          model_outcome,
151 |                          model_predictors)
152 | 
153 | knitr::kable(model_data, align = 'c')
154 | 
155 | # head(model_data)
156 | ```
157 | 
158 | ## Combine the ngram datasets
159 | 
160 | ```{r}
161 | model_data_ngram = wiz_combine(char_frame,
162 |                                model_outcome_ngram,
163 |                                model_predictors_ngram)
164 | 
165 | knitr::kable(model_data_ngram, align = 'c')
166 | 
167 | 
168 | # head(model_data_ngram)
169 | ```
170 | 
171 | 


--------------------------------------------------------------------------------
/wizard.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | 


--------------------------------------------------------------------------------