├── README.md
├── data
    ├── alternative_cgm_data_test.pkl
    ├── alternative_cgm_data_train.pkl
    ├── processed_cgm_coeffs.pkl
    ├── processed_cgm_data_test.pkl
    ├── processed_cgm_data_train.pkl
    ├── processed_cgm_data_validation.pkl
    └── unprocessed_cgm_data.xlsx
├── lib
    ├── .ipynb_checkpoints
    │   ├── glucose_dataset-checkpoint.py
    │   ├── model-checkpoint.py
    │   └── trainer-checkpoint.py
    ├── __init__.py
    ├── __pycache__
    │   └── model.cpython-36.pyc
    ├── glucose_dataset.py
    ├── model.py
    └── trainer.py
└── walkthrough.ipynb


/README.md:
--------------------------------------------------------------------------------
 1 | # multi-output-glucose-forecasting
 2 | The code and data used for the paper Deep Multi-Output Forecasting: Learning to Accurately Predict Blood Glucose Trajectories published in KDD 2018. The full paper is available on arxiv: https://arxiv.org/pdf/1806.05357.pdf
 3 | 
 4 | By downloading and using these data you agree to comply with the following:
 5 | 
 6 |    - You will no attempt re-identification;
 7 |    - You will contact The University of Michigan (UM) if identifiers are detected;
 8 |    - You will not redistribute or resell the data;
 9 |    - Data ownership remains with UM
10 |    - Requirements survive changes in ownership of entity
11 | 
12 | Update: this repo was originally intented to serve as the starting point for a full and clean version of the code I used (since my research code was...researchy). Since it's been years and I never got around to actually doing it here's the full research version in all of it's messiness: https://gitlab.eecs.umich.edu/mld3/glucose_forecasting
13 | 


--------------------------------------------------------------------------------
/data/alternative_cgm_data_test.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/igfox/multi-output-glucose-forecasting/27bdfbfa1bbe0816ebcd5808d9bc76dd767d6f61/data/alternative_cgm_data_test.pkl


--------------------------------------------------------------------------------
/data/alternative_cgm_data_train.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/igfox/multi-output-glucose-forecasting/27bdfbfa1bbe0816ebcd5808d9bc76dd767d6f61/data/alternative_cgm_data_train.pkl


--------------------------------------------------------------------------------
/data/processed_cgm_coeffs.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/igfox/multi-output-glucose-forecasting/27bdfbfa1bbe0816ebcd5808d9bc76dd767d6f61/data/processed_cgm_coeffs.pkl


--------------------------------------------------------------------------------
/data/processed_cgm_data_test.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/igfox/multi-output-glucose-forecasting/27bdfbfa1bbe0816ebcd5808d9bc76dd767d6f61/data/processed_cgm_data_test.pkl


--------------------------------------------------------------------------------
/data/processed_cgm_data_train.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/igfox/multi-output-glucose-forecasting/27bdfbfa1bbe0816ebcd5808d9bc76dd767d6f61/data/processed_cgm_data_train.pkl


--------------------------------------------------------------------------------
/data/processed_cgm_data_validation.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/igfox/multi-output-glucose-forecasting/27bdfbfa1bbe0816ebcd5808d9bc76dd767d6f61/data/processed_cgm_data_validation.pkl


--------------------------------------------------------------------------------
/data/unprocessed_cgm_data.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/igfox/multi-output-glucose-forecasting/27bdfbfa1bbe0816ebcd5808d9bc76dd767d6f61/data/unprocessed_cgm_data.xlsx


--------------------------------------------------------------------------------
/lib/.ipynb_checkpoints/glucose_dataset-checkpoint.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from numpy.polynomial import polynomial as pn
  3 | import joblib
  4 | from joblib import Parallel, delayed
  5 | from torch.utils.data import Dataset
  6 | 
  7 | 
  8 | class GlucoseDataset(Dataset):
  9 |     """
 10 |     Blood glucose dataset for pytorch
 11 |     Entry: (data, y_bin, y_real, len)
 12 |     loading everything into memory as small
 13 |     """
 14 |     def __init__(self,
 15 |                  data_pkl,
 16 |                  max_pad,
 17 |                  output_len,
 18 |                  output_dim,
 19 |                  polynomial=False,
 20 |                  degree=None,
 21 |                  range_low=None,
 22 |                  range_high=None,
 23 |                  coeff_file=None,
 24 |                  real_values=False,
 25 |                  parallel_cache=False,
 26 |                  max_size=None,
 27 |                  flip_signal=False):
 28 | 
 29 |         # for one-hot encoding, assumes 40-400 mg/dL range
 30 |         self.output_dim = output_dim
 31 |         self.polynomial = polynomial
 32 |         self.degree = degree
 33 |         self.range_low = range_low
 34 |         self.range_high = range_high
 35 |         self.real_values = real_values
 36 |         self.max_pad = max_pad
 37 | 
 38 |         self.data = joblib.load(data_pkl)
 39 |         if flip_signal:
 40 |             self.data_flip = []
 41 |             for i in range(len(self.data)):
 42 |                 self.data_flip.append(np.flip(self.data[i], axis=0))
 43 |             self.data = self.data_flip
 44 |         if max_size is not None:
 45 |             self.data = self.data[0:max_size]
 46 |         self.output_len = output_len
 47 | 
 48 |         if not self.real_values:
 49 |             if self.polynomial:
 50 |                 # from degree, calculate ranges
 51 |                 # from ranges, get bins
 52 |                 # should make more flexible, function parameter binning
 53 |                 # old hand-defined range, captured 100% variation
 54 |                 #[[40, 400],
 55 |                 # [-36, 36],
 56 |                 # [-5.5, 5.5]]
 57 |                 for var in [self.degree, self.range_low, self.range_high]:
 58 |                     assert var is not None, 'Must set degree, range_high, and range_low for polynomial'
 59 | 
 60 |                 if coeff_file is None:
 61 |                     ranges = self.auto_poly_range()
 62 |                 else:
 63 |                     ranges = self.precomputed_poly_range(coeff_file)
 64 | 
 65 |                 self.bin_step = [(ranges[i][1]-ranges[i][0])/(self.output_dim-1) for i in range(degree+1)]
 66 |                 self.bins = [
 67 |                     np.linspace(ranges[i][0],
 68 |                                 ranges[i][1],
 69 |                                 self.output_dim) + (0.5 * self.bin_step[i])
 70 |                     for i in range(self.degree+1)]
 71 |             else:
 72 |                 # simple value binning
 73 |                 self.bin_step = (400-40)/(self.output_dim-1)
 74 |                 # the half step appraoch is an artifact of wanting perfect bins with output_dim=361
 75 |                 self.bins = np.linspace(40, 400, self.output_dim)+(self.bin_step * 0.5)
 76 | 
 77 |         # trying out precaching results for less intensive load
 78 |         count = 0
 79 |         self.x_out = []
 80 |         self.y_out = []
 81 |         self.y_real = []
 82 |         self.lens = []
 83 |         print('caching results')
 84 |         if parallel_cache:
 85 |             res_tuples = Parallel(n_jobs=5, verbose=10)(delayed(self.prepare_output)(idx) for idx in range(len(self.data)))
 86 |             for idx in range(len(self.data)):
 87 |                 x_pad, y_pad, y_real_pad, lens = res_tuples[idx]
 88 |                 self.x_out.append(x_pad)
 89 |                 self.y_out.append(y_pad)
 90 |                 self.y_real.append(y_real_pad)
 91 |                 self.lens.append(lens)
 92 |         else:
 93 |             for idx in range(len(self.data)):
 94 |                 if idx % 10 == 0:
 95 |                     print('{}/{}'.format(idx, len(self.data)))
 96 |                 x_pad, y_pad, y_real_pad, lens = self.prepare_output(idx)
 97 |                 self.x_out.append(x_pad)
 98 |                 self.y_out.append(y_pad)
 99 |                 self.y_real.append(y_real_pad)
100 |                 self.lens.append(lens)
101 | 
102 |     def prepare_output(self, idx, real_y=True):
103 |         x_dat = self.data[idx]
104 |         length = self.max_pad - len(x_dat)
105 |         x_pad = np.pad(x_dat,
106 |                        (0, length),
107 |                        mode='constant',
108 |                        constant_values=-1)
109 |         y_dat = self.window_stack(x_dat[1::].reshape(-1, 1))
110 |         if self.real_values:
111 |             y_bins = y_dat
112 |         else:
113 |             y_bins = self.values_to_bins(y_dat)
114 |         y_pad = np.pad(y_bins,
115 |                        ((0, length), (0, 0)),
116 |                        mode='constant',
117 |                        constant_values=-1)
118 |         if real_y:
119 |             y_real_pad = np.pad(y_dat,
120 |                                 ((0, length), (0, 0)),
121 |                                 mode='constant',
122 |                                 constant_values=-1)
123 |             return x_pad, y_pad, y_real_pad, self.max_pad - length
124 |         else:
125 |             return x_pad, y_pad, self.max_pad - length
126 | 
127 |     def auto_poly_range(self, percentile):
128 |         """
129 |         Using degree and training data, creates
130 |         range that captures percentile% of variation of the best fit
131 |         coefficient values.
132 |         """
133 |         raise NotImplementedError('TODO')
134 | 
135 |     def precomputed_poly_range(self, coeff_file):
136 |         """
137 |         Simple function that uses precomputed coefficient
138 |         percentile dict
139 | 
140 |         low, high can be integers in 0-100
141 | 
142 |         Requires precomputed coeff dict
143 |         """
144 |         assert self.range_low < self.range_high
145 | 
146 |         coeff = joblib.load(coeff_file)
147 | 
148 |         ranges = []
149 |         for i in range(self.degree+1):
150 |             low_val = coeff[self.degree][i][self.range_low]
151 |             high_val = coeff[self.degree][i][self.range_high]
152 |             ranges.append([low_val, high_val])
153 |         return ranges
154 | 
155 |     def scale(self, x):
156 |         """
157 |         turn glucose signal with 40-400 to range -1 to 1
158 |         can add more intelligent scaling for balencing hypo/hyper,
159 |         though real concern is moving over to classification
160 |         """
161 |         return (x-220)/180.
162 | 
163 |     def one_hot(self, seq):
164 |         """
165 |         turn glucose signal into one hot distribution
166 |         with size=output_dim, linearly bins glucose
167 |         range 40-400
168 |         don't need for NLLLoss
169 |         """
170 |         dist = np.zeros((seq.size, self.output_dim))
171 |         dist[np.arange(seq.size), np.digitize(seq, self.bins)] = 1.
172 |         return dist
173 | 
174 |     def polymerize(self, y):
175 |         """
176 |         Turns output window into best fit polynomial
177 |         with output [x'_0, ..., x'_d] where x' is
178 |         bin number that x would be in (using ranges)
179 |         """
180 |         x_inds = []
181 |         if len(y.shape) > 1:
182 |             for j in range(y.shape[0]):
183 |                 coeffs = pn.polyfit(np.arange(len(y[j])), y[j], deg=self.degree)
184 |                 x_inds.append([np.digitize(coeffs[i], self.bins[i]).item() for i in range(self.degree+1)])
185 |         else:
186 |             coeffs = pn.polyfit(np.arange(len(y)), y, deg=self.degree)
187 |             for i in range(self.degree+1):
188 |                 x_inds.append(np.digitize(coeffs[i], self.bins[i]).item())
189 |         return np.clip(x_inds, 0, self.output_dim-1)
190 | 
191 |     def bins_to_coeff_values(self, pred):
192 |         """
193 |         Given bins for polynomial coefficients,
194 |         return estimate of real coefficient values
195 |         """
196 |         if len(pred.shape) > 1:
197 |             vals = [np.array([self.bins[i][np.clip(np.array(pred[:, i], dtype=int), 0, self.output_dim-1)]])
198 |                     - (0.5 * self.bin_step[i]) for i in range(self.degree+1)]
199 |             coeffs = np.concatenate(vals, axis=0).T
200 |         else:
201 |             coeffs = [self.bins[i][np.clip(np.array(pred[i], dtype=int), 0, self.output_dim-1)]
202 |                       - (0.5 * self.bin_step[i]) for i in range(self.degree+1)]
203 |         return np.array(coeffs)
204 | 
205 |     def reverse_polymerize(self, pred):
206 |         """
207 |         Given bins for polynomial coefficients, returns forecast
208 |         For new foreacsting system, flexible degree and doesn't assume
209 |         adding mistake
210 |         """
211 |         coeffs = self.bins_to_coeff_values(pred)
212 |         return pn.polyval(np.arange(self.output_len), coeffs.T)
213 | 
214 |     def values_to_bins(self, y):
215 |         """
216 |         Gvien a y sample (or batch of y samples), changes from
217 |         value to categorical representation
218 |         """
219 |         if self.real_values:
220 |             return y
221 |         if self.polynomial:
222 |             return self.polymerize(y)
223 |         else:
224 |             return np.digitize(y, self.bins)
225 | 
226 |     def bins_to_values(self, y):
227 |         """
228 |         Given a y sample (or batch of y samples), changes from categorical
229 |         to value representation
230 |         """
231 |         if type(y) is not np.ndarray:
232 |             y = y.numpy()
233 |         if self.real_values:
234 |             return y
235 |         if self.polynomial:
236 |             return self.reverse_polymerize(y)
237 |         else:
238 |             vals = self.bins[np.clip(np.array(y, dtype=int), 0, self.output_dim-1)]
239 |             return vals - (0.5 * self.bin_step)
240 | 
241 |     def index_to_values(self, x, i):
242 |         """
243 |         Given i index for output value: y[i]
244 |         returns ground truth x values
245 |         bins_to_values can also be used, but ignores
246 |         polynomial residual
247 |         """
248 |         return x[i+1:i+1+self.output_len]
249 | 
250 |     def window_stack(self, seq, stepsize=1):
251 |         """
252 |         Gets rolling window from seq of length self.output_len
253 |         stepsize determines dilation
254 |         """
255 |         length = self.output_len
256 |         n = seq.shape[0]
257 |         return np.hstack(seq[i:1+n+i-length:stepsize] for i in range(length))
258 | 
259 |     def __len__(self):
260 |         return len(self.data)
261 | 
262 |     def __getitem__(self, idx):
263 |         return self.x_out[idx], self.y_out[idx], self.y_real[idx], self.lens[idx]
264 | 


--------------------------------------------------------------------------------
/lib/.ipynb_checkpoints/model-checkpoint.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import joblib
  3 | 
  4 | import torch
  5 | from torch import nn
  6 | from torch.autograd import Variable
  7 | 
  8 | 
  9 | class ForecastRNN(nn.Module):
 10 |     """
 11 |     Helper for pytorch reimplementation
 12 |     Uses variable sized/depth GRU with linear layer to get output right
 13 |     """
 14 |     def __init__(self, input_dim, output_dim, hidden_size, depth, output_len=-1, cuda=False):
 15 |         super(ForecastRNN, self).__init__()
 16 |         self.cuda = cuda
 17 |         self.rnn = nn.GRU(input_size=input_dim,
 18 |                           hidden_size=hidden_size,
 19 |                           num_layers=depth,
 20 |                           dropout=False,
 21 |                           bidirectional=False,  # would bidirectional help forecasting?
 22 |                           batch_first=True)
 23 |         self.sm = nn.LogSoftmax(dim=1)
 24 |         self.input_dim = input_dim
 25 |         self.output_dim = output_dim
 26 |         self.output_len = output_len
 27 |         if self.cuda:
 28 |             self.rnn = self.rnn.cuda()
 29 |             self.sm = self.sm.cuda()
 30 |             self.float = torch.cuda.FloatTensor  # not sure I need this
 31 |         else:
 32 |             self.float = torch.FloatTensor
 33 | 
 34 |     @staticmethod
 35 |     def _dist_to_bins(dist):
 36 |         return torch.max(dist, dim=-1)[1]
 37 | 
 38 |     @staticmethod
 39 |     def _get_sequence_info(seq):
 40 |         """
 41 |         gets info on fed sequence
 42 |         """
 43 |         if type(seq) == torch.nn.utils.rnn.PackedSequence:
 44 |             pack = True
 45 |             batch_size = seq.batch_sizes[0]
 46 |             sequence_length = len(seq.batch_sizes)
 47 |         else:
 48 |             pack = False
 49 |             batch_size = seq.size(0)
 50 |             sequence_length = seq.size(1)
 51 |         return pack, batch_size, sequence_length
 52 |     
 53 |     def _rnn_forward(self, seq, pack, batch_size):
 54 |         """
 55 |         Helper function for forward that computes up to output layer
 56 |         """
 57 |         h = Variable(torch.zeros(self.rnn.num_layers, 
 58 |                                  batch_size, # not sure if need to reshape for batch_first
 59 |                                  self.rnn.hidden_size).type(self.float), 
 60 |                          requires_grad=False)
 61 |         # predict within the sequence
 62 |         out, h = self.rnn.forward(seq, h)
 63 |         if pack:
 64 |             out, lens = nn.utils.rnn.pad_packed_sequence(out, batch_first=True, padding_value=-1)
 65 |         else:
 66 |             lens = None
 67 |         # out has dim (batch_size, sequence_length, hidden_size)
 68 |         out_flat = out.contiguous().view(-1, self.rnn.hidden_size)
 69 |         return out_flat, h, lens
 70 |     
 71 |     def _extract_final_dist(self, pack, batch_size, y, lens):
 72 |         """
 73 |         Given y (possibly with padding), get distribution
 74 |         for final prediction at t+1
 75 |         prediction must be of size (batch_size, 1[, output_len], output_length)
 76 |         """
 77 |         if type(self) is RecursiveRNN:
 78 |             output_len = 1
 79 |         else:
 80 |             output_len = self.decoding_steps
 81 |         single_view = 1, 1, output_len, self.output_dim
 82 |         batch_view = batch_size, 1, output_len, self.output_dim
 83 |         if pack:
 84 |             # need to handle uneven lengths
 85 |             final_dist = []
 86 |             for i in range(batch_size):
 87 |                 final_dist.append(y[i, lens[i]-1].view(single_view))
 88 |             final_dist = torch.cat(final_dist).view(batch_view)
 89 |         else:
 90 |             final_dist = y[:, -1].contiguous().view(batch_view)
 91 |         return final_dist
 92 |     
 93 |     def forward(self, seq, glucose_dat, pred_len=0):
 94 |         raise NotImplementedError
 95 |         
 96 | 
 97 | class RecursiveRNN(ForecastRNN):
 98 |     """
 99 |     Designed to handle uneven batch sizes
100 |     """
101 |     def __init__(self, input_dim, output_dim, hidden_size, depth, cuda):
102 |         super(RecursiveRNN, self).__init__(input_dim=input_dim, 
103 |                                            output_dim=output_dim, 
104 |                                            hidden_size=hidden_size,
105 |                                            depth=depth, 
106 |                                            cuda=cuda)
107 |         self.output = nn.Linear(hidden_size, output_dim)
108 |         if self.cuda:
109 |             self.output = self.output.cuda()
110 |     
111 |     def _hidden_state_to_output(self, out_flat, batch_size, sequence_length):
112 |         """
113 |         Given output from RNN layer, translate to output
114 |         """
115 |         return self.sm(self.output(out_flat)).contiguous().view(batch_size, sequence_length, 1, self.output_dim)
116 |     
117 |     def forward(self, seq, glucose_dat, pred_len=0, **kwargs):
118 |         """
119 |         pred_len is number of recursive forecasts to make
120 |         Note: there is padding in form of -1, need to remove for
121 |         accurate loss
122 |         bins reverse probability predictions to real values
123 |         
124 |         returns:
125 |         curr_dist: (batch_size, sequence_length-1, 1[output_len], output_dim)
126 |         curr_pred: (batch_size, sequence_length-1, 1[pred_dim])
127 |         future_dist: (batch_size, 1[tiled preds], pred_len+1, output_dim)
128 |         future_pred: (batch_size, 1[tiled preds], pred_len+1)
129 |         """
130 |         pack, batch_size, sequence_length = self._get_sequence_info(seq)
131 |         out_flat, h, lens = self._rnn_forward(seq, pack, batch_size)
132 |         
133 |         y = self._hidden_state_to_output(out_flat, batch_size, sequence_length)
134 | 
135 |         final_dist = self._extract_final_dist(pack, batch_size, y, lens)
136 |          
137 |         if y.data.shape[1] == 1:
138 |             # only 1 input, no within series predictions
139 |             curr_dist = None
140 |         else:
141 |             curr_dist = y[:, :-1]
142 |         curr_pred = self._dist_to_bins(curr_dist)
143 |         
144 |         future_dist = [final_dist]
145 | 
146 |         future_pred = [self._dist_to_bins(future_dist[-1])]
147 |         
148 |         for i in range(pred_len):
149 |             if self.cuda:
150 |                 pred_vals = glucose_dat.bins_to_values(future_pred[-1].data.cpu().numpy())
151 |             else:
152 |                 pred_vals = glucose_dat.bins_to_values(future_pred[-1].data.numpy())
153 |             out, h = self.rnn.forward(Variable(torch.from_numpy(pred_vals).type(self.float)), h)
154 |             out_flat = out.contiguous().view(-1, self.rnn.hidden_size)
155 |             y_f = self._hidden_state_to_output(out_flat, batch_size, 1)
156 |             future_dist.append(y_f)
157 |             future_pred.append(self._dist_to_bins(future_dist[-1]))
158 |         return curr_dist, curr_pred, torch.cat(future_dist, dim=2), torch.cat(future_pred, dim=2)
159 | 
160 | 
161 | class MultiOutputRNN(ForecastRNN):
162 |     """
163 |     Designed to handle uneven batch sizes
164 |     """
165 |     def __init__(self, 
166 |                  input_dim, 
167 |                  output_dim, 
168 |                  output_len, 
169 |                  hidden_size, 
170 |                  depth, 
171 |                  cuda, 
172 |                  autoregressive=False,
173 |                  sequence=False,
174 |                  polynomial=False, 
175 |                  degree=2):
176 |         super(MultiOutputRNN, self).__init__(input_dim=input_dim, 
177 |                                              output_dim=output_dim, 
178 |                                              hidden_size=hidden_size, 
179 |                                              depth=depth, 
180 |                                              output_len=output_len,
181 |                                              cuda=cuda)
182 |         self.ar = autoregressive
183 |         self.seq = sequence
184 |         self.polynomial = polynomial
185 |         self.degree = degree
186 |         if self.polynomial:
187 |             self.decoding_steps = self.degree+1
188 |             self.polyval_layer = nn.Linear(self.decoding_steps*output_dim, output_len*output_dim)
189 |         else:
190 |             self.decoding_steps = self.output_len
191 |         if self.seq:
192 |             self.decoder = nn.GRU(input_size=hidden_size,
193 |                                   hidden_size=hidden_size,
194 |                                   num_layers=1,
195 |                                   dropout=False,
196 |                                   bidirectional=False,
197 |                                   batch_first=False)
198 |             self.decoder.cuda()
199 |             self.output = nn.Linear(hidden_size, output_dim)
200 |         elif self.ar:
201 |             output = [nn.Linear(hidden_size, output_dim)]
202 |             for i in range(self.decoding_steps-1):
203 |                 output.append(nn.Linear(hidden_size + output_dim, output_dim))
204 |             self.output = nn.ModuleList(output)
205 |         else:
206 |             output = [nn.Linear(hidden_size, output_dim) for i in range(self.decoding_steps)]
207 |             self.output = nn.ModuleList(output)
208 |         if self.cuda:
209 |             self.output = self.output.cuda()
210 | 
211 |     def _hidden_state_to_output(self, out_flat, batch_size, sequence_length):
212 |         """
213 |         Given output from RNN layer, translate to output
214 |         y has size (batch_size, sequence_length, output_len, output_dim)
215 |         might want to change
216 |         """
217 |         if self.seq:
218 |             y = []
219 |             encoded = out_flat[None, :]
220 |             hidden = Variable(torch.zeros(encoded.data.shape)).cuda()
221 |             for i in range(self.decoding_steps):
222 |                 encoded, hidden = self.decoder(encoded, hidden)
223 |                 pred = self.sm(self.output(encoded[0])).contiguous()
224 |                 y.append(pred.view(batch_size,
225 |                                    sequence_length,
226 |                                    1,
227 |                                    self.output_dim))
228 |             return torch.cat(y, dim=2)
229 |         else:
230 |             y = []
231 |             for i in range(len(self.output)):
232 |                 if self.ar:
233 |                     if i == 0:
234 |                         pred = self.sm(self.output[0](out_flat)).contiguous()
235 |                         y.append(pred.view(batch_size,
236 |                                            sequence_length,
237 |                                            1,
238 |                                            self.output_dim))
239 |                     else:
240 |                         fused_state = torch.cat((out_flat, pred), dim=1)
241 |                         pred = self.sm(self.output[i](fused_state)).contiguous()
242 |                         y.append(pred.view(batch_size,
243 |                                            sequence_length,
244 |                                            1,
245 |                                            self.output_dim))
246 |                 else:
247 |                     y.append(self.sm(self.output[i](out_flat)).contiguous().view(batch_size,
248 |                                                                                  sequence_length,
249 |                                                                                  1,
250 |                                                                                  self.output_dim))
251 |             return torch.cat(y, dim=2)
252 | 
253 |     def poly_to_val(self, poly):
254 |         return poly
255 | 
256 |     def forward(self, seq, glucose_dat, **kwargs):
257 |         """
258 |         prediction into future is based on output size
259 |         Note: there is padding in form of -1, need to remove for
260 |         accurate loss
261 |         bins reverse probability predictions to real values
262 |         """
263 |         pack, batch_size, sequence_length = self._get_sequence_info(seq)
264 |         out_flat, h, lens = self._rnn_forward(seq, pack, batch_size)
265 |         
266 |         y = self._hidden_state_to_output(out_flat, batch_size, sequence_length)
267 | 
268 |         final_dist = self._extract_final_dist(pack, batch_size, y, lens)
269 | 
270 |         if y.data.shape[1] <= self.output_len:
271 |             # curr_dist contains dists ENTIRELY within signal
272 |             # note that this reduces training size
273 |             curr_dist = None
274 |         else:
275 |             curr_dist = y[:, :-self.output_len]
276 |         curr_pred = self._dist_to_bins(curr_dist)
277 |         
278 |         future_dist = [final_dist]
279 |         future_pred = self._dist_to_bins(future_dist[-1])
280 |         if self.polynomial:
281 |             curr_real_pred = self.poly_to_val(curr_pred)
282 |             future_real_pred = self.poly_to_val(future_pred)
283 |         return (curr_dist, 
284 |                 curr_pred, 
285 |                 torch.cat(future_dist, dim=0), 
286 |                 future_pred)
287 | 
288 | def sort_batch(batch_x, batch_y, batch_y_real, lens):
289 |     """
290 |     Sorts minibatch by length in decreasing order
291 |     to accomodate pack_padded_sequence 
292 |     """
293 |     dat_x, dat_y, dat_y_real, dat_l = batch_x.numpy(), batch_y.numpy(), batch_y_real.numpy(), lens.numpy()
294 |     sort_x = dat_x[(dat_l*-1).argsort()]  # -1 to get descending order
295 |     sort_y = dat_y[(dat_l*-1).argsort()]
296 |     sort_y_real = dat_y_real[(dat_l*-1).argsort()]
297 |     sort_l = dat_l[(dat_l*-1).argsort()]
298 |     return sort_x, sort_y, sort_y_real, sort_l
299 | 
300 | 
301 | def convert_batch(batch_x, batch_y, batch_y_real, batch_l, cuda, real_values=False):
302 |     """
303 |     Given batches in numpy form, 
304 |     convert to proper type for model input
305 |     """
306 |     if cuda:
307 |         float_type = torch.cuda.FloatTensor
308 |         long_type = torch.cuda.LongTensor
309 |     else:
310 |         float_type = torch.FloatTensor
311 |         long_type = torch.LongTensor
312 |     new_batch_x = Variable(torch.from_numpy(batch_x).type(float_type), requires_grad=False)
313 |     if real_values:
314 |         new_batch_y = Variable(torch.from_numpy(batch_y).type(float_type), requires_grad=False)
315 |         new_batch_y_real = new_batch_y
316 |     else:
317 |         new_batch_y = Variable(torch.from_numpy(batch_y).type(long_type), requires_grad=False)
318 |         new_batch_y_real = Variable(torch.from_numpy(batch_y_real).type(long_type), requires_grad=False)
319 |     new_batch_l = list(batch_l)
320 |     return new_batch_x, new_batch_y, new_batch_y_real, new_batch_l
321 | 
322 | 
323 | def remove_prediction_padding(prediction_distribution,
324 |                               target_value,
325 |                               loss_weight,
326 |                               target_real_value):
327 |     """
328 |     Masks prediction for artificial targets and flattens
329 |     """
330 |     # assuming target value will have all -1 or no -1
331 |     missing_indicator = torch.min(target_value, dim=2)[0] != -1
332 | 
333 |     prediction_nopad = torch.masked_select(
334 |         prediction_distribution,
335 |         missing_indicator[:, :, None, None]).view(-1, prediction_distribution.shape[-1])
336 |     target_nopad = torch.masked_select(
337 |         target_value,
338 |         missing_indicator[:, :, None])
339 |     target_real_nopad = torch.masked_select(
340 |         target_real_value,
341 |         missing_indicator[:, :, None])
342 |     loss_weight_nopad = torch.masked_select(
343 |         loss_weight,
344 |         missing_indicator[:, :, None])
345 |     return prediction_nopad, target_nopad, target_real_nopad, loss_weight_nopad
346 | 
347 | 
348 | def remove_prediction_padding_old(prediction_distribution,
349 |                               target_value,
350 |                               loss_weight,
351 |                               target_real_value):
352 |     """
353 |     Masks prediction for artificial targets
354 |     """
355 |     prediction_distribution = prediction_distribution.contiguous().view(-1, 361)
356 |     target_value = target_value.contiguous().view(-1)
357 |     loss_weight = loss_weight.contiguous().view(-1)
358 |     inter = (target_value != -1).view(-1, 1)
359 |     mask = inter.expand(prediction_distribution.size(0), prediction_distribution.size(1))
360 |     ret = [prediction_distribution[mask].view(-1, prediction_distribution.size(1)),
361 |            target_value[(target_value != -1)],
362 |            None]
363 |     if loss_weight is not None:
364 |         ret.append(loss_weight[(target_value != -1)])
365 |     else:
366 |         ret.append(None)
367 |     return ret
368 | 
369 | 
370 | def get_loss(inp,
371 |              out,
372 |              out_real,
373 |              lens,
374 |              cuda,
375 |              gn,
376 |              glucose_dat,
377 |              criterion,
378 |              base=1,
379 |              value_weight=0,
380 |              value_ratio=0):
381 |     """
382 |     Simple helper function that calculates model loss.
383 |     Basically to save some space
384 |     """
385 |     batch_size_val = inp.size(0)
386 |     output_dim = gn.output_dim
387 | 
388 |     weight_vec = torch.Tensor([base ** i for i in reversed(range(out.size(-1)))])
389 |     weight_vec = (weight_vec/weight_vec.sum()) * weight_vec.numel()  # consistent weighting on output length
390 |     loss_weight = weight_vec.expand(out.shape)
391 | 
392 |     inp_s, out_s, out_real_s, lens_s = sort_batch(inp, out, out_real, lens)
393 |     inp_s, out_s, out_real_s, lens_s = convert_batch(batch_x=inp_s,
394 |                                                      batch_y=out_s,
395 |                                                      batch_y_real=out_real_s,
396 |                                                      batch_l=lens_s,
397 |                                                      cuda=cuda,
398 |                                                      real_values=glucose_dat.real_values)
399 |     x = nn.utils.rnn.pack_padded_sequence(inp_s.view(batch_size_val, 
400 |                                                      glucose_dat.max_pad,
401 |                                                      1), 
402 |                                           list(np.array(lens_s)), 
403 |                                           batch_first=True)
404 |     if glucose_dat.real_values:
405 |         yd_p, y_p, yd_f, y_f = gn(x, pred_len=0)
406 |         y_p_flat = y_p.contiguous().view(-1, output_dim)
407 |         (y_p_nopad,
408 |          y_nopad,
409 |          y_real_nopad,
410 |          loss_weight_nopad) = remove_prediction_padding(prediction_distribution=y_p_flat,
411 |                                                         target_value=out_s.view(-1),
412 |                                                         loss_weight=Variable(loss_weight.cuda()),
413 |                                                         target_real_value=out_real_s)
414 |         try:
415 |             loss = criterion(y_p_nopad, y_nopad)
416 |         except:
417 |             print(type(y_nopad.data))
418 |             print(type(out_s.data))
419 |             print(type(out))
420 |             raise
421 |             
422 |     else:
423 |         yd_p, y_p, yd_f, y_f = gn(x, glucose_dat, pred_len=out.shape[-1])
424 |         (yd_p_nopad,
425 |          y_nopad,
426 |          y_real_nopad,
427 |          loss_weight_nopad) = remove_prediction_padding(prediction_distribution=yd_p,
428 |                                                         target_value=out_s,
429 |                                                         loss_weight=Variable(loss_weight.cuda()),
430 |                                                         target_real_value=out_real_s)
431 |         if glucose_dat.polynomial:
432 |             # include MSE
433 |             real_criterion = torch.nn.MSELoss()
434 |             coeffs = get_coeffs(yd_p_nopad.view(-1, len(glucose_dat.bins), yd_p_nopad.shape[-1]), glucose_dat.bins)
435 |             real_values = coeffs_to_values(coeffs)
436 |             loss_real = real_criterion(real_values.view(-1), y_real_nopad.float()) * value_weight
437 |             loss_dist = criterion(yd_p_nopad, y_nopad) * loss_weight_nopad
438 |             loss = (1-value_ratio) * loss_dist + value_ratio * loss_real
439 |             if np.isnan(loss.data[0]):
440 |                 raise ValueError('Got NaN loss')
441 |         else:
442 |             loss = criterion(yd_p_nopad, y_nopad) * loss_weight_nopad
443 |             if np.isnan(loss.data[0]):
444 |                 raise ValueError('Got NaN loss')
445 |             if torch.min(y_nopad.data) == -1:
446 |                 print('trouble ahead')
447 |     return loss.mean(), y_p
448 | 
449 | 
450 | def get_coeffs(dist, bins):
451 |     prob = torch.exp(dist)
452 |     bin_vals = Variable(torch.from_numpy(np.array(bins)).float().cuda()).expand_as(prob).transpose(1, 2)
453 |     coeffs = torch.bmm(prob, bin_vals)  # includes false off-diag coeffs
454 |     real_coeffs = coeffs[torch.eye(len(bins)).expand_as(coeffs).byte().cuda()].view(-1, len(bins))  # extract diagonals
455 |     return real_coeffs
456 | 
457 | 
458 | def coeffs_to_values(coeffs):
459 |     degree = coeffs.shape[-1]
460 |     basis = Variable(torch.stack([torch.arange(0, 6) ** i for i in range(degree)]).cuda())
461 |     return coeffs.view(-1, degree) @ basis
462 | 
463 | 
464 | def get_predictions(inp,
465 |                     out,
466 |                     lens,
467 |                     cuda,
468 |                     gn,
469 |                     glucose_dat):
470 |     """
471 |     Gets predictions
472 |     """
473 |     batch_size_val = inp.size(0)
474 |     output_dim = gn.output_dim
475 | 
476 |     inp_s, out_s, lens_s = sort_batch(inp, out, lens)
477 |     inp_s, out_s, lens_s = convert_batch(inp_s,
478 |                                          out_s,
479 |                                          lens_s,
480 |                                          cuda,
481 |                                          glucose_dat.real_values)
482 |     x = nn.utils.rnn.pack_padded_sequence(inp_s.view(batch_size_val,
483 |                                                      glucose_dat.max_pad,
484 |                                                      1),
485 |                                           list(np.array(lens_s)),
486 |                                           batch_first=True)
487 |     if glucose_dat.real_values:
488 |         yd_p, y_p, yd_f, y_f = gn(x, pred_len=0)
489 |         y_p_flat = y_p.contiguous().view(-1, output_dim)
490 |         y_p_nopad, y_nopad = remove_prediction_padding(y_p_flat,
491 |                                                        out_s.view(-1))
492 | 
493 |     else:
494 |         yd_p, y_p, yd_f, y_f = gn(x, glucose_dat, pred_len=out.shape[-1])
495 |         yd_p_flat = yd_p.contiguous().view(-1, output_dim)
496 |         yd_p_nopad, y_nopad = remove_prediction_padding(yd_p_flat,
497 |                                                         out_s.view(-1))
498 |     return yd_p, y_p, yd_f, y_f
499 | 
500 | 
501 | def make_model(config):
502 |     """
503 |     A poor man's factory method.
504 |     """
505 |     if config.model_type == 'recursive':
506 |         gn = RecursiveRNN(input_dim=config.input_dim,
507 |                           output_dim=config.output_dim,
508 |                           hidden_size=config.hidden_size,
509 |                           depth=config.depth,
510 |                           cuda=True)
511 |     else:
512 |         assert config.output_len == config.pred_len # could relax
513 |         gn = MultiOutputRNN(input_dim=config.input_dim,
514 |                             output_dim=config.output_dim,
515 |                             hidden_size=config.hidden_size,
516 |                             output_len=config.output_len,
517 |                             depth=config.depth,
518 |                             cuda=True,
519 |                             autoregressive=config.autoregressive,
520 |                             sequence=config.sequence,
521 |                             polynomial=config.polynomial,
522 |                             degree=config.degree)
523 |     return gn
524 | 


--------------------------------------------------------------------------------
/lib/.ipynb_checkpoints/trainer-checkpoint.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Implements training schemes with logging
  3 | """
  4 | import numpy as np
  5 | import os
  6 | import time
  7 | import torch
  8 | from torch.autograd import Variable
  9 | from torch.utils.data import DataLoader, sampler
 10 | from tensorboardX import SummaryWriter
 11 | from tqdm import tqdm
 12 | import joblib
 13 | 
 14 | from forecast_code.lib.training import model as forecast_model
 15 | 
 16 | 
 17 | class ExperimentTrainer:
 18 |     """
 19 |     Simple training scheme
 20 |     """
 21 | 
 22 |     def __init__(self, model, optimizer, criterion, name, model_dir, log_dir,
 23 |                  load=False, load_epoch=None):
 24 |         """
 25 |         :param model: initialized model for training
 26 |         :param optimizer: initialized training optimizer
 27 |         :param name: string to save trainer results under
 28 |         :param load: whether or not to load results from previous train if they exist
 29 |         :param epoch: which epoch results to load, if None then the best found
 30 |         """
 31 |         self.model = model
 32 |         self.criterion = criterion
 33 |         self.optimizer = optimizer
 34 |         self.name = name
 35 |         self.model_dir = model_dir
 36 |         self.log_dir = log_dir
 37 | 
 38 |         if not os.path.exists(self.model_dir):
 39 |             os.makedirs(self.model_dir)
 40 |             os.makedirs(self.log_dir)
 41 |         else:
 42 |             if load:
 43 |                 if load_epoch is None:
 44 |                     self.model.load_state_dict(torch.load(os.path.join(self.model_dir, 'bsf_sup.pt')))
 45 |                 else:
 46 |                     self.model.load_state_dict(torch.load(os.path.join(self.model_dir, '{}_sup.pt'.format(load_epoch))))
 47 | 
 48 |             else:
 49 |                 print('Warning: directory already exists')
 50 |         self.writer = SummaryWriter(log_dir=self.log_dir)
 51 | 
 52 |     def train_sup(self, epoch_lim, data, valid_data, early_stopping_lim,
 53 |                   batch_size, num_workers, track_embeddings, validation_rate, loss_weight_base=1,
 54 |                   value_weight=0, value_ratio=0):
 55 |         """
 56 |         Training loop
 57 |         :param epoch_lim: total number of training epochs
 58 |         :param data: training data
 59 |         :param valid_data: validation data
 60 |         :param early_stopping_lim: Number of epochs to run without validation improvement before stopping
 61 |         if None, never stop early
 62 |         :param batch_size: training batch_size
 63 |         :param num_workers: number of CPU workers to use for data loading
 64 |         :param track_embeddings: Save out embedding information at end of run
 65 |         :param validation_rate: Check validation performance every validation_rate training epochs
 66 |         :param loss_weight_base: A constant between 0 and 1 used to interpolate between Single (=0) and Multi (=1) Step forecasting.
 67 |         :param value_weight: A constant multiplier for the real-value loss, set to 0 in the paper
 68 |         :param value_ratio: The proportion of loss used for the MSE loss term (as opposed for the cross-entropy loss), set to 0 in the paper
 69 |         :return loss array, model:
 70 |         """
 71 |         if early_stopping_lim is None:
 72 |             early_stopping_lim = epoch_lim
 73 |         train_sampler = sampler.RandomSampler(np.arange(len(data)))
 74 |         data_train = DataLoader(data,
 75 |                                 batch_size=batch_size,
 76 |                                 sampler=train_sampler,
 77 |                                 drop_last=True)
 78 | 
 79 |         valid_sampler = sampler.SequentialSampler(np.arange(len(valid_data)))
 80 |         data_valid = DataLoader(valid_data,
 81 |                                 batch_size=batch_size,
 82 |                                 sampler=valid_sampler)
 83 |         step = 0
 84 | 
 85 |         bsf_loss = np.inf
 86 |         epochs_without_improvement = 0
 87 |         improvements = []
 88 |         for epoch in range(epoch_lim):
 89 |             if epochs_without_improvement > early_stopping_lim:
 90 |                 print('Exceeded early stopping limit, stopping')
 91 |                 break
 92 |             if epoch % validation_rate == 0:
 93 |                 valid_loss = self.validation(data_valid=data_valid,
 94 |                                              step=step,
 95 |                                              data=data,
 96 |                                              loss_weight_base=loss_weight_base,
 97 |                                              value_weight=value_weight, value_ratio=value_ratio)
 98 |                 (bsf_loss,
 99 |                  epochs_without_improvement,
100 |                  improvements) = self.manage_early_stopping(bsf_loss=bsf_loss,
101 |                                                             early_stopping_lim=early_stopping_lim,
102 |                                                             epochs_without_improvement=epochs_without_improvement,
103 |                                                             valid_loss=valid_loss, validation_rate=validation_rate,
104 |                                                             improvements=improvements)
105 |             running_train_loss = 0
106 |             for inp, out, out_real, lens in tqdm(data_train):
107 |                 loss, y_p = forecast_model.get_loss(inp=inp,
108 |                                                     out=out,
109 |                                                     lens=lens,
110 |                                                     cuda=True,
111 |                                                     gn=self.model,
112 |                                                     glucose_dat=data,
113 |                                                     criterion=self.criterion,
114 |                                                     base=loss_weight_base,
115 |                                                     out_real=out_real,
116 |                                                     value_weight=value_weight,
117 |                                                     value_ratio=value_ratio)
118 |                 step += 1
119 |                 running_train_loss += loss.data.cpu().numpy()[0]
120 |                 self.optimizer.zero_grad()
121 |                 loss.backward()
122 |                 self.optimizer.step()
123 |             running_train_loss = running_train_loss/len(data_train)
124 |             self.writer.add_scalar(tag='train_loss',
125 |                                    scalar_value=running_train_loss,
126 |                                    global_step=step)
127 |         torch.save(self.model.state_dict(), '{}/final_sup.pt'.format(self.model_dir))
128 |         if track_embeddings:
129 |             self.embed(data_valid, step, embed_batch=100)
130 |         return improvements
131 | 
132 |     def manage_early_stopping(self, bsf_loss, early_stopping_lim, epochs_without_improvement, valid_loss,
133 |                               validation_rate, improvements):
134 |         if valid_loss < bsf_loss:
135 |             print('improved validation loss from {:.3f} to {:.3f}'.format(bsf_loss, valid_loss))
136 |             bsf_loss = valid_loss
137 |             improvements.append(epochs_without_improvement)
138 |             epochs_without_improvement = 0
139 |             torch.save(self.model.state_dict(),
140 |                        '{}/bsf_sup.pt'.format(self.model_dir))
141 |         else:
142 |             epochs_without_improvement += validation_rate
143 |             print('Validation loss of {} did not improve on {}'.format(valid_loss, bsf_loss))
144 |             print('Early stopping at {}/{}'.format(epochs_without_improvement, early_stopping_lim))
145 |         return bsf_loss, epochs_without_improvement, improvements
146 | 
147 |     def validation(self, data_valid, step, data, loss_weight_base, value_weight, value_ratio):
148 |         self.model.eval()
149 |         running_valid_loss = 0
150 |         for inp, out, out_real, lens in data_valid:
151 |             loss, y_p = forecast_model.get_loss(inp=inp,
152 |                                                 out=out,
153 |                                                 lens=lens,
154 |                                                 cuda=True,
155 |                                                 gn=self.model,
156 |                                                 glucose_dat=data,
157 |                                                 criterion=self.criterion,
158 |                                                 base=loss_weight_base,
159 |                                                 out_real=out_real,
160 |                                                 value_weight=value_weight,
161 |                                                 value_ratio=value_ratio)
162 |             step += 1
163 |             running_valid_loss += loss.data.cpu().numpy()[0]
164 |         running_valid_loss = running_valid_loss / len(data_valid)
165 |         print('validation loss: {:.3f}'.format(running_valid_loss))
166 |         self.writer.add_scalar(tag='valid_total_loss',
167 |                                scalar_value=running_valid_loss,
168 |                                global_step=step)
169 |         self.model.train()
170 |         return running_valid_loss
171 | 
172 |     def embed(self, dataloader, step, embed_batch=5):
173 |         print('embed')
174 |         embeddings = None
175 |         metadata = []
176 |         i = 0
177 |         for dat, dat_past, dat_future, init, label in dataloader:
178 |             x = Variable(dat.float().cuda())
179 |             e = self.model.embed(x).data
180 |             metadata += np.round(label.numpy(), 2).tolist()
181 |             if embeddings is None:
182 |                 embeddings = e
183 |             else:
184 |                 embeddings = torch.cat((embeddings, e))
185 |             if i > embed_batch:
186 |                 break
187 |             i += 1
188 |         print(len(metadata))
189 |         self.writer.add_embedding(mat=embeddings,
190 |                                   metadata=metadata,
191 |                                   global_step=step)
192 | 
193 |     def get_predictions(self, dataloader):
194 |         self.model.eval()
195 |         data = None
196 |         data_past = None
197 |         data_future = None
198 |         y = None
199 |         pred_pres = None
200 |         pred_past = None
201 |         pred_future = None
202 |         pred = None
203 |         for dat, dat_past, dat_future,  init, label in dataloader:
204 |             print('evaluation batch')
205 |             window_data = []
206 |             window_data_past = []
207 |             window_data_future = []
208 |             window_y = []
209 |             window_pred = []
210 |             window_pred_pres = []
211 |             window_pred_past = []
212 |             window_pred_future = []
213 |             if not self.window:
214 |                 dat = [dat]
215 |                 dat_past = [dat_past]
216 |                 dat_future = [dat_future]
217 |             for window in range(len(dat)):
218 |                 x = Variable(dat[window].float().cuda())
219 |                 y_pred, x_pres, x_past, x_future = self.model.forward(x)
220 |                 y_pred = y_pred.data.cpu().numpy()
221 |                 if self.decode_present:
222 |                     x_pres = x_pres.data.cpu().numpy()
223 |                 if self.decode_past:
224 |                     x_past = x_past.data.cpu().numpy()
225 |                 if self.decode_future:
226 |                     x_future = x_future.data.cpu().numpy()
227 |                 yt = label.numpy()
228 |                 xt_pres = dat[window].numpy()
229 |                 xt_past = dat_past[window].numpy()
230 |                 xt_future = dat_future[window].numpy()
231 |                 window_data.append(xt_pres)
232 |                 window_data_past.append(xt_past)
233 |                 window_data_future.append(xt_future)
234 |                 window_y.append(yt)
235 |                 window_pred.append(y_pred)
236 |                 window_pred_pres.append(x_pres)
237 |                 window_pred_past.append(x_past)
238 |                 window_pred_future.append(x_future)
239 |             if data is None:
240 |                 data = [window_data]
241 |                 data_past = [window_data_past]
242 |                 data_future = [window_data_future]
243 |                 y = [window_y]
244 |                 pred_pres = [window_pred_pres]
245 |                 pred_past = [window_pred_past]
246 |                 pred_future = [window_pred_future]
247 |                 pred = [window_pred]
248 |             else:
249 |                 data.append(window_data)
250 |                 data_past.append(window_data_past)
251 |                 data_future.append(window_data_future)
252 |                 y.append(window_y)
253 |                 if self.decode_present:
254 |                     pred_pres.append(window_pred_pres)
255 |                 if self.decode_past:
256 |                     pred_past.append(window_pred_past)
257 |                 if self.decode_future:
258 |                     pred_future.append(window_pred_future)
259 |                 pred.append(window_pred)
260 |         print('done getting predictions')
261 |         return (data, data_past, data_future, y,
262 |                 pred_pres, pred_past, pred_future, pred)
263 | 


--------------------------------------------------------------------------------
/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/igfox/multi-output-glucose-forecasting/27bdfbfa1bbe0816ebcd5808d9bc76dd767d6f61/lib/__init__.py


--------------------------------------------------------------------------------
/lib/__pycache__/model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/igfox/multi-output-glucose-forecasting/27bdfbfa1bbe0816ebcd5808d9bc76dd767d6f61/lib/__pycache__/model.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/glucose_dataset.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from numpy.polynomial import polynomial as pn
  3 | import joblib
  4 | from joblib import Parallel, delayed
  5 | from torch.utils.data import Dataset
  6 | 
  7 | 
  8 | class GlucoseDataset(Dataset):
  9 |     """
 10 |     Blood glucose dataset for pytorch
 11 |     Entry: (data, y_bin, y_real, len)
 12 |     loading everything into memory as small
 13 |     """
 14 |     def __init__(self,
 15 |                  data_pkl,
 16 |                  max_pad,
 17 |                  output_len,
 18 |                  output_dim,
 19 |                  polynomial=False,
 20 |                  degree=None,
 21 |                  range_low=None,
 22 |                  range_high=None,
 23 |                  coeff_file=None,
 24 |                  real_values=False,
 25 |                  parallel_cache=False,
 26 |                  max_size=None,
 27 |                  flip_signal=False):
 28 | 
 29 |         # for one-hot encoding, assumes 40-400 mg/dL range
 30 |         self.output_dim = output_dim
 31 |         self.polynomial = polynomial
 32 |         self.degree = degree
 33 |         self.range_low = range_low
 34 |         self.range_high = range_high
 35 |         self.real_values = real_values
 36 |         self.max_pad = max_pad
 37 | 
 38 |         self.data = joblib.load(data_pkl)
 39 |         if flip_signal:
 40 |             self.data_flip = []
 41 |             for i in range(len(self.data)):
 42 |                 self.data_flip.append(np.flip(self.data[i], axis=0))
 43 |             self.data = self.data_flip
 44 |         if max_size is not None:
 45 |             self.data = self.data[0:max_size]
 46 |         self.output_len = output_len
 47 | 
 48 |         if not self.real_values:
 49 |             if self.polynomial:
 50 |                 # from degree, calculate ranges
 51 |                 # from ranges, get bins
 52 |                 # should make more flexible, function parameter binning
 53 |                 # old hand-defined range, captured 100% variation
 54 |                 #[[40, 400],
 55 |                 # [-36, 36],
 56 |                 # [-5.5, 5.5]]
 57 |                 for var in [self.degree, self.range_low, self.range_high]:
 58 |                     assert var is not None, 'Must set degree, range_high, and range_low for polynomial'
 59 | 
 60 |                 if coeff_file is None:
 61 |                     ranges = self.auto_poly_range()
 62 |                 else:
 63 |                     ranges = self.precomputed_poly_range(coeff_file)
 64 | 
 65 |                 self.bin_step = [(ranges[i][1]-ranges[i][0])/(self.output_dim-1) for i in range(degree+1)]
 66 |                 self.bins = [
 67 |                     np.linspace(ranges[i][0],
 68 |                                 ranges[i][1],
 69 |                                 self.output_dim) + (0.5 * self.bin_step[i])
 70 |                     for i in range(self.degree+1)]
 71 |             else:
 72 |                 # simple value binning
 73 |                 self.bin_step = (400-40)/(self.output_dim-1)
 74 |                 # the half step appraoch is an artifact of wanting perfect bins with output_dim=361
 75 |                 self.bins = np.linspace(40, 400, self.output_dim)+(self.bin_step * 0.5)
 76 | 
 77 |         # trying out precaching results for less intensive load
 78 |         count = 0
 79 |         self.x_out = []
 80 |         self.y_out = []
 81 |         self.y_real = []
 82 |         self.lens = []
 83 |         print('caching results')
 84 |         if parallel_cache:
 85 |             res_tuples = Parallel(n_jobs=5, verbose=10)(delayed(self.prepare_output)(idx) for idx in range(len(self.data)))
 86 |             for idx in range(len(self.data)):
 87 |                 x_pad, y_pad, y_real_pad, lens = res_tuples[idx]
 88 |                 self.x_out.append(x_pad)
 89 |                 self.y_out.append(y_pad)
 90 |                 self.y_real.append(y_real_pad)
 91 |                 self.lens.append(lens)
 92 |         else:
 93 |             for idx in range(len(self.data)):
 94 |                 if idx % 1000 == 0:
 95 |                     print('{}/{}'.format(idx, len(self.data)))
 96 |                 x_pad, y_pad, y_real_pad, lens = self.prepare_output(idx)
 97 |                 self.x_out.append(x_pad)
 98 |                 self.y_out.append(y_pad)
 99 |                 self.y_real.append(y_real_pad)
100 |                 self.lens.append(lens)
101 | 
102 |     def prepare_output(self, idx, real_y=True):
103 |         x_dat = self.data[idx]
104 |         length = self.max_pad - len(x_dat)
105 |         x_pad = np.pad(x_dat,
106 |                        (0, length),
107 |                        mode='constant',
108 |                        constant_values=-1)
109 |         y_dat = self.window_stack(x_dat[1::].reshape(-1, 1))
110 |         if self.real_values:
111 |             y_bins = y_dat
112 |         else:
113 |             y_bins = self.values_to_bins(y_dat)
114 |         y_pad = np.pad(y_bins,
115 |                        ((0, length), (0, 0)),
116 |                        mode='constant',
117 |                        constant_values=-1)
118 |         if real_y:
119 |             y_real_pad = np.pad(y_dat,
120 |                                 ((0, length), (0, 0)),
121 |                                 mode='constant',
122 |                                 constant_values=-1)
123 |             return x_pad, y_pad, y_real_pad, self.max_pad - length
124 |         else:
125 |             return x_pad, y_pad, self.max_pad - length
126 | 
127 |     def auto_poly_range(self, percentile):
128 |         """
129 |         Using degree and training data, creates
130 |         range that captures percentile% of variation of the best fit
131 |         coefficient values.
132 |         """
133 |         raise NotImplementedError('TODO')
134 | 
135 |     def precomputed_poly_range(self, coeff_file):
136 |         """
137 |         Simple function that uses precomputed coefficient
138 |         percentile dict
139 | 
140 |         low, high can be integers in 0-100
141 | 
142 |         Requires precomputed coeff dict
143 |         """
144 |         assert self.range_low < self.range_high
145 | 
146 |         coeff = joblib.load(coeff_file)
147 | 
148 |         ranges = []
149 |         for i in range(self.degree+1):
150 |             low_val = coeff[self.degree][i][self.range_low]
151 |             high_val = coeff[self.degree][i][self.range_high]
152 |             ranges.append([low_val, high_val])
153 |         return ranges
154 | 
155 |     def scale(self, x):
156 |         """
157 |         turn glucose signal with 40-400 to range -1 to 1
158 |         can add more intelligent scaling for balencing hypo/hyper,
159 |         though real concern is moving over to classification
160 |         """
161 |         return (x-220)/180.
162 | 
163 |     def one_hot(self, seq):
164 |         """
165 |         turn glucose signal into one hot distribution
166 |         with size=output_dim, linearly bins glucose
167 |         range 40-400
168 |         don't need for NLLLoss
169 |         """
170 |         dist = np.zeros((seq.size, self.output_dim))
171 |         dist[np.arange(seq.size), np.digitize(seq, self.bins)] = 1.
172 |         return dist
173 | 
174 |     def polymerize(self, y):
175 |         """
176 |         Turns output window into best fit polynomial
177 |         with output [x'_0, ..., x'_d] where x' is
178 |         bin number that x would be in (using ranges)
179 |         """
180 |         x_inds = []
181 |         if len(y.shape) > 1:
182 |             for j in range(y.shape[0]):
183 |                 coeffs = pn.polyfit(np.arange(len(y[j])), y[j], deg=self.degree)
184 |                 x_inds.append([np.digitize(coeffs[i], self.bins[i]).item() for i in range(self.degree+1)])
185 |         else:
186 |             coeffs = pn.polyfit(np.arange(len(y)), y, deg=self.degree)
187 |             for i in range(self.degree+1):
188 |                 x_inds.append(np.digitize(coeffs[i], self.bins[i]).item())
189 |         return np.clip(x_inds, 0, self.output_dim-1)
190 | 
191 |     def bins_to_coeff_values(self, pred):
192 |         """
193 |         Given bins for polynomial coefficients,
194 |         return estimate of real coefficient values
195 |         """
196 |         if len(pred.shape) > 1:
197 |             vals = [np.array([self.bins[i][np.clip(np.array(pred[:, i], dtype=int), 0, self.output_dim-1)]])
198 |                     - (0.5 * self.bin_step[i]) for i in range(self.degree+1)]
199 |             coeffs = np.concatenate(vals, axis=0).T
200 |         else:
201 |             coeffs = [self.bins[i][np.clip(np.array(pred[i], dtype=int), 0, self.output_dim-1)]
202 |                       - (0.5 * self.bin_step[i]) for i in range(self.degree+1)]
203 |         return np.array(coeffs)
204 | 
205 |     def reverse_polymerize(self, pred):
206 |         """
207 |         Given bins for polynomial coefficients, returns forecast
208 |         For new foreacsting system, flexible degree and doesn't assume
209 |         adding mistake
210 |         """
211 |         coeffs = self.bins_to_coeff_values(pred)
212 |         return pn.polyval(np.arange(self.output_len), coeffs.T)
213 | 
214 |     def values_to_bins(self, y):
215 |         """
216 |         Gvien a y sample (or batch of y samples), changes from
217 |         value to categorical representation
218 |         """
219 |         if self.real_values:
220 |             return y
221 |         if self.polynomial:
222 |             return self.polymerize(y)
223 |         else:
224 |             return np.digitize(y, self.bins)
225 | 
226 |     def bins_to_values(self, y):
227 |         """
228 |         Given a y sample (or batch of y samples), changes from categorical
229 |         to value representation
230 |         """
231 |         if type(y) is not np.ndarray:
232 |             y = y.numpy()
233 |         if self.real_values:
234 |             return y
235 |         if self.polynomial:
236 |             return self.reverse_polymerize(y)
237 |         else:
238 |             vals = self.bins[np.clip(np.array(y, dtype=int), 0, self.output_dim-1)]
239 |             return vals - (0.5 * self.bin_step)
240 | 
241 |     def index_to_values(self, x, i):
242 |         """
243 |         Given i index for output value: y[i]
244 |         returns ground truth x values
245 |         bins_to_values can also be used, but ignores
246 |         polynomial residual
247 |         """
248 |         return x[i+1:i+1+self.output_len]
249 | 
250 |     def window_stack(self, seq, stepsize=1):
251 |         """
252 |         Gets rolling window from seq of length self.output_len
253 |         stepsize determines dilation
254 |         """
255 |         length = self.output_len
256 |         n = seq.shape[0]
257 |         return np.hstack(seq[i:1+n+i-length:stepsize] for i in range(length))
258 | 
259 |     def __len__(self):
260 |         return len(self.data)
261 | 
262 |     def __getitem__(self, idx):
263 |         return self.x_out[idx], self.y_out[idx], self.y_real[idx], self.lens[idx]
264 | 


--------------------------------------------------------------------------------
/lib/model.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import joblib
  3 | 
  4 | import torch
  5 | from torch import nn
  6 | from torch.autograd import Variable
  7 | 
  8 | 
  9 | class ForecastRNN(nn.Module):
 10 |     """
 11 |     Helper for pytorch reimplementation
 12 |     Uses variable sized/depth GRU with linear layer to get output right
 13 |     """
 14 |     def __init__(self, input_dim, output_dim, hidden_size, depth, output_len=-1, cuda=False):
 15 |         super(ForecastRNN, self).__init__()
 16 |         self.cuda = cuda
 17 |         self.rnn = nn.GRU(input_size=input_dim,
 18 |                           hidden_size=hidden_size,
 19 |                           num_layers=depth,
 20 |                           dropout=False,
 21 |                           bidirectional=False,  # would bidirectional help forecasting?
 22 |                           batch_first=True)
 23 |         self.sm = nn.LogSoftmax(dim=1)
 24 |         self.input_dim = input_dim
 25 |         self.output_dim = output_dim
 26 |         self.output_len = output_len
 27 |         if self.cuda:
 28 |             self.rnn = self.rnn.cuda()
 29 |             self.sm = self.sm.cuda()
 30 |             self.float = torch.cuda.FloatTensor  # not sure I need this
 31 |         else:
 32 |             self.float = torch.FloatTensor
 33 | 
 34 |     @staticmethod
 35 |     def _dist_to_bins(dist):
 36 |         return torch.max(dist, dim=-1)[1]
 37 | 
 38 |     @staticmethod
 39 |     def _get_sequence_info(seq):
 40 |         """
 41 |         gets info on fed sequence
 42 |         """
 43 |         if type(seq) == torch.nn.utils.rnn.PackedSequence:
 44 |             pack = True
 45 |             batch_size = seq.batch_sizes[0]
 46 |             sequence_length = len(seq.batch_sizes)
 47 |         else:
 48 |             pack = False
 49 |             batch_size = seq.size(0)
 50 |             sequence_length = seq.size(1)
 51 |         return pack, batch_size, sequence_length
 52 |     
 53 |     def _rnn_forward(self, seq, pack, batch_size):
 54 |         """
 55 |         Helper function for forward that computes up to output layer
 56 |         """
 57 |         h = Variable(torch.zeros(self.rnn.num_layers, 
 58 |                                  batch_size, # not sure if need to reshape for batch_first
 59 |                                  self.rnn.hidden_size).type(self.float), 
 60 |                          requires_grad=False)
 61 |         # predict within the sequence
 62 |         out, h = self.rnn.forward(seq, h)
 63 |         if pack:
 64 |             out, lens = nn.utils.rnn.pad_packed_sequence(out, batch_first=True, padding_value=-1)
 65 |         else:
 66 |             lens = None
 67 |         # out has dim (batch_size, sequence_length, hidden_size)
 68 |         out_flat = out.contiguous().view(-1, self.rnn.hidden_size)
 69 |         return out_flat, h, lens
 70 |     
 71 |     def _extract_final_dist(self, pack, batch_size, y, lens):
 72 |         """
 73 |         Given y (possibly with padding), get distribution
 74 |         for final prediction at t+1
 75 |         prediction must be of size (batch_size, 1[, output_len], output_length)
 76 |         """
 77 |         if type(self) is RecursiveRNN:
 78 |             output_len = 1
 79 |         else:
 80 |             output_len = self.decoding_steps
 81 |         single_view = 1, 1, output_len, self.output_dim
 82 |         batch_view = batch_size, 1, output_len, self.output_dim
 83 |         if pack:
 84 |             # need to handle uneven lengths
 85 |             final_dist = []
 86 |             for i in range(batch_size):
 87 |                 final_dist.append(y[i, lens[i]-1].view(single_view))
 88 |             final_dist = torch.cat(final_dist).view(batch_view)
 89 |         else:
 90 |             final_dist = y[:, -1].contiguous().view(batch_view)
 91 |         return final_dist
 92 |     
 93 |     def forward(self, seq, glucose_dat, pred_len=0):
 94 |         raise NotImplementedError
 95 |         
 96 | 
 97 | class RecursiveRNN(ForecastRNN):
 98 |     """
 99 |     Designed to handle uneven batch sizes
100 |     """
101 |     def __init__(self, input_dim, output_dim, hidden_size, depth, cuda):
102 |         super(RecursiveRNN, self).__init__(input_dim=input_dim, 
103 |                                            output_dim=output_dim, 
104 |                                            hidden_size=hidden_size,
105 |                                            depth=depth, 
106 |                                            cuda=cuda)
107 |         self.output = nn.Linear(hidden_size, output_dim)
108 |         if self.cuda:
109 |             self.output = self.output.cuda()
110 |     
111 |     def _hidden_state_to_output(self, out_flat, batch_size, sequence_length):
112 |         """
113 |         Given output from RNN layer, translate to output
114 |         """
115 |         return self.sm(self.output(out_flat)).contiguous().view(batch_size, sequence_length, 1, self.output_dim)
116 |     
117 |     def forward(self, seq, glucose_dat, pred_len=0, **kwargs):
118 |         """
119 |         pred_len is number of recursive forecasts to make
120 |         Note: there is padding in form of -1, need to remove for
121 |         accurate loss
122 |         bins reverse probability predictions to real values
123 |         
124 |         returns:
125 |         curr_dist: (batch_size, sequence_length-1, 1[output_len], output_dim)
126 |         curr_pred: (batch_size, sequence_length-1, 1[pred_dim])
127 |         future_dist: (batch_size, 1[tiled preds], pred_len+1, output_dim)
128 |         future_pred: (batch_size, 1[tiled preds], pred_len+1)
129 |         """
130 |         pack, batch_size, sequence_length = self._get_sequence_info(seq)
131 |         out_flat, h, lens = self._rnn_forward(seq, pack, batch_size)
132 |         
133 |         y = self._hidden_state_to_output(out_flat, batch_size, sequence_length)
134 | 
135 |         final_dist = self._extract_final_dist(pack, batch_size, y, lens)
136 |          
137 |         if y.data.shape[1] == 1:
138 |             # only 1 input, no within series predictions
139 |             curr_dist = None
140 |         else:
141 |             curr_dist = y[:, :-1]
142 |         curr_pred = self._dist_to_bins(curr_dist)
143 |         
144 |         future_dist = [final_dist]
145 | 
146 |         future_pred = [self._dist_to_bins(future_dist[-1])]
147 |         
148 |         for i in range(pred_len):
149 |             if self.cuda:
150 |                 pred_vals = glucose_dat.bins_to_values(future_pred[-1].data.cpu().numpy())
151 |             else:
152 |                 pred_vals = glucose_dat.bins_to_values(future_pred[-1].data.numpy())
153 |             out, h = self.rnn.forward(Variable(torch.from_numpy(pred_vals).type(self.float)), h)
154 |             out_flat = out.contiguous().view(-1, self.rnn.hidden_size)
155 |             y_f = self._hidden_state_to_output(out_flat, batch_size, 1)
156 |             future_dist.append(y_f)
157 |             future_pred.append(self._dist_to_bins(future_dist[-1]))
158 |         return curr_dist, curr_pred, torch.cat(future_dist, dim=2), torch.cat(future_pred, dim=2)
159 | 
160 | 
161 | class MultiOutputRNN(ForecastRNN):
162 |     """
163 |     Designed to handle uneven batch sizes
164 |     """
165 |     def __init__(self, 
166 |                  input_dim, 
167 |                  output_dim, 
168 |                  output_len, 
169 |                  hidden_size, 
170 |                  depth, 
171 |                  cuda, 
172 |                  autoregressive=False,
173 |                  sequence=False,
174 |                  polynomial=False, 
175 |                  degree=2):
176 |         super(MultiOutputRNN, self).__init__(input_dim=input_dim, 
177 |                                              output_dim=output_dim, 
178 |                                              hidden_size=hidden_size, 
179 |                                              depth=depth, 
180 |                                              output_len=output_len,
181 |                                              cuda=cuda)
182 |         self.ar = autoregressive
183 |         self.seq = sequence
184 |         self.polynomial = polynomial
185 |         self.degree = degree
186 |         if self.polynomial:
187 |             self.decoding_steps = self.degree+1
188 |             self.polyval_layer = nn.Linear(self.decoding_steps*output_dim, output_len*output_dim)
189 |         else:
190 |             self.decoding_steps = self.output_len
191 |         if self.seq:
192 |             self.decoder = nn.GRU(input_size=hidden_size,
193 |                                   hidden_size=hidden_size,
194 |                                   num_layers=1,
195 |                                   dropout=False,
196 |                                   bidirectional=False,
197 |                                   batch_first=False)
198 |             self.decoder.cuda()
199 |             self.output = nn.Linear(hidden_size, output_dim)
200 |         elif self.ar:
201 |             output = [nn.Linear(hidden_size, output_dim)]
202 |             for i in range(self.decoding_steps-1):
203 |                 output.append(nn.Linear(hidden_size + output_dim, output_dim))
204 |             self.output = nn.ModuleList(output)
205 |         else:
206 |             output = [nn.Linear(hidden_size, output_dim) for i in range(self.decoding_steps)]
207 |             self.output = nn.ModuleList(output)
208 |         if self.cuda:
209 |             self.output = self.output.cuda()
210 | 
211 |     def _hidden_state_to_output(self, out_flat, batch_size, sequence_length):
212 |         """
213 |         Given output from RNN layer, translate to output
214 |         y has size (batch_size, sequence_length, output_len, output_dim)
215 |         might want to change
216 |         """
217 |         if self.seq:
218 |             y = []
219 |             encoded = out_flat[None, :]
220 |             hidden = Variable(torch.zeros(encoded.data.shape)).cuda()
221 |             for i in range(self.decoding_steps):
222 |                 encoded, hidden = self.decoder(encoded, hidden)
223 |                 pred = self.sm(self.output(encoded[0])).contiguous()
224 |                 y.append(pred.view(batch_size,
225 |                                    sequence_length,
226 |                                    1,
227 |                                    self.output_dim))
228 |             return torch.cat(y, dim=2)
229 |         else:
230 |             y = []
231 |             for i in range(len(self.output)):
232 |                 if self.ar:
233 |                     if i == 0:
234 |                         pred = self.sm(self.output[0](out_flat)).contiguous()
235 |                         y.append(pred.view(batch_size,
236 |                                            sequence_length,
237 |                                            1,
238 |                                            self.output_dim))
239 |                     else:
240 |                         fused_state = torch.cat((out_flat, pred), dim=1)
241 |                         pred = self.sm(self.output[i](fused_state)).contiguous()
242 |                         y.append(pred.view(batch_size,
243 |                                            sequence_length,
244 |                                            1,
245 |                                            self.output_dim))
246 |                 else:
247 |                     y.append(self.sm(self.output[i](out_flat)).contiguous().view(batch_size,
248 |                                                                                  sequence_length,
249 |                                                                                  1,
250 |                                                                                  self.output_dim))
251 |             return torch.cat(y, dim=2)
252 | 
253 |     def poly_to_val(self, poly):
254 |         return poly
255 | 
256 |     def forward(self, seq, glucose_dat, **kwargs):
257 |         """
258 |         prediction into future is based on output size
259 |         Note: there is padding in form of -1, need to remove for
260 |         accurate loss
261 |         bins reverse probability predictions to real values
262 |         """
263 |         pack, batch_size, sequence_length = self._get_sequence_info(seq)
264 |         out_flat, h, lens = self._rnn_forward(seq, pack, batch_size)
265 |         
266 |         y = self._hidden_state_to_output(out_flat, batch_size, sequence_length)
267 | 
268 |         final_dist = self._extract_final_dist(pack, batch_size, y, lens)
269 | 
270 |         if y.data.shape[1] <= self.output_len:
271 |             # curr_dist contains dists ENTIRELY within signal
272 |             # note that this reduces training size
273 |             curr_dist = None
274 |         else:
275 |             curr_dist = y[:, :-self.output_len]
276 |         curr_pred = self._dist_to_bins(curr_dist)
277 |         
278 |         future_dist = [final_dist]
279 |         future_pred = self._dist_to_bins(future_dist[-1])
280 |         if self.polynomial:
281 |             curr_real_pred = self.poly_to_val(curr_pred)
282 |             future_real_pred = self.poly_to_val(future_pred)
283 |         return (curr_dist, 
284 |                 curr_pred, 
285 |                 torch.cat(future_dist, dim=0), 
286 |                 future_pred)
287 | 
288 | def sort_batch(batch_x, batch_y, batch_y_real, lens):
289 |     """
290 |     Sorts minibatch by length in decreasing order
291 |     to accomodate pack_padded_sequence 
292 |     """
293 |     dat_x, dat_y, dat_y_real, dat_l = batch_x.numpy(), batch_y.numpy(), batch_y_real.numpy(), lens.numpy()
294 |     sort_x = dat_x[(dat_l*-1).argsort()]  # -1 to get descending order
295 |     sort_y = dat_y[(dat_l*-1).argsort()]
296 |     sort_y_real = dat_y_real[(dat_l*-1).argsort()]
297 |     sort_l = dat_l[(dat_l*-1).argsort()]
298 |     return sort_x, sort_y, sort_y_real, sort_l
299 | 
300 | 
301 | def convert_batch(batch_x, batch_y, batch_y_real, batch_l, cuda, real_values=False):
302 |     """
303 |     Given batches in numpy form, 
304 |     convert to proper type for model input
305 |     """
306 |     if cuda:
307 |         float_type = torch.cuda.FloatTensor
308 |         long_type = torch.cuda.LongTensor
309 |     else:
310 |         float_type = torch.FloatTensor
311 |         long_type = torch.LongTensor
312 |     new_batch_x = Variable(torch.from_numpy(batch_x).type(float_type), requires_grad=False)
313 |     if real_values:
314 |         new_batch_y = Variable(torch.from_numpy(batch_y).type(float_type), requires_grad=False)
315 |         new_batch_y_real = new_batch_y
316 |     else:
317 |         new_batch_y = Variable(torch.from_numpy(batch_y).type(long_type), requires_grad=False)
318 |         new_batch_y_real = Variable(torch.from_numpy(batch_y_real).type(long_type), requires_grad=False)
319 |     new_batch_l = list(batch_l)
320 |     return new_batch_x, new_batch_y, new_batch_y_real, new_batch_l
321 | 
322 | 
323 | def remove_prediction_padding(prediction_distribution,
324 |                               target_value,
325 |                               loss_weight,
326 |                               target_real_value):
327 |     """
328 |     Masks prediction for artificial targets and flattens
329 |     """
330 |     # assuming target value will have all -1 or no -1
331 |     missing_indicator = torch.min(target_value, dim=2)[0] != -1
332 | 
333 |     prediction_nopad = torch.masked_select(
334 |         prediction_distribution,
335 |         missing_indicator[:, :, None, None]).view(-1, prediction_distribution.shape[-1])
336 |     target_nopad = torch.masked_select(
337 |         target_value,
338 |         missing_indicator[:, :, None])
339 |     target_real_nopad = torch.masked_select(
340 |         target_real_value,
341 |         missing_indicator[:, :, None])
342 |     loss_weight_nopad = torch.masked_select(
343 |         loss_weight,
344 |         missing_indicator[:, :, None])
345 |     return prediction_nopad, target_nopad, target_real_nopad, loss_weight_nopad
346 | 
347 | 
348 | def remove_prediction_padding_old(prediction_distribution,
349 |                               target_value,
350 |                               loss_weight,
351 |                               target_real_value):
352 |     """
353 |     Masks prediction for artificial targets
354 |     """
355 |     prediction_distribution = prediction_distribution.contiguous().view(-1, 361)
356 |     target_value = target_value.contiguous().view(-1)
357 |     loss_weight = loss_weight.contiguous().view(-1)
358 |     inter = (target_value != -1).view(-1, 1)
359 |     mask = inter.expand(prediction_distribution.size(0), prediction_distribution.size(1))
360 |     ret = [prediction_distribution[mask].view(-1, prediction_distribution.size(1)),
361 |            target_value[(target_value != -1)],
362 |            None]
363 |     if loss_weight is not None:
364 |         ret.append(loss_weight[(target_value != -1)])
365 |     else:
366 |         ret.append(None)
367 |     return ret
368 | 
369 | 
370 | def get_loss(inp,
371 |              out,
372 |              out_real,
373 |              lens,
374 |              cuda,
375 |              gn,
376 |              glucose_dat,
377 |              criterion,
378 |              base=1,
379 |              value_weight=0,
380 |              value_ratio=0):
381 |     """
382 |     Simple helper function that calculates model loss.
383 |     Basically to save some space
384 |     """
385 |     batch_size_val = inp.size(0)
386 |     output_dim = gn.output_dim
387 | 
388 |     weight_vec = torch.Tensor([base ** i for i in reversed(range(out.size(-1)))])
389 |     weight_vec = (weight_vec/weight_vec.sum()) * weight_vec.numel()  # consistent weighting on output length
390 |     loss_weight = weight_vec.expand(out.shape)
391 | 
392 |     inp_s, out_s, out_real_s, lens_s = sort_batch(inp, out, out_real, lens)
393 |     inp_s, out_s, out_real_s, lens_s = convert_batch(batch_x=inp_s,
394 |                                                      batch_y=out_s,
395 |                                                      batch_y_real=out_real_s,
396 |                                                      batch_l=lens_s,
397 |                                                      cuda=cuda,
398 |                                                      real_values=glucose_dat.real_values)
399 |     x = nn.utils.rnn.pack_padded_sequence(inp_s.view(batch_size_val, 
400 |                                                      glucose_dat.max_pad,
401 |                                                      1), 
402 |                                           list(np.array(lens_s)), 
403 |                                           batch_first=True)
404 |     if glucose_dat.real_values:
405 |         yd_p, y_p, yd_f, y_f = gn(x, pred_len=0)
406 |         y_p_flat = y_p.contiguous().view(-1, output_dim)
407 |         (y_p_nopad,
408 |          y_nopad,
409 |          y_real_nopad,
410 |          loss_weight_nopad) = remove_prediction_padding(prediction_distribution=y_p_flat,
411 |                                                         target_value=out_s.view(-1),
412 |                                                         loss_weight=Variable(loss_weight.cuda()),
413 |                                                         target_real_value=out_real_s)
414 |         try:
415 |             loss = criterion(y_p_nopad, y_nopad)
416 |         except:
417 |             print(type(y_nopad.data))
418 |             print(type(out_s.data))
419 |             print(type(out))
420 |             raise
421 |             
422 |     else:
423 |         yd_p, y_p, yd_f, y_f = gn(x, glucose_dat, pred_len=out.shape[-1])
424 |         (yd_p_nopad,
425 |          y_nopad,
426 |          y_real_nopad,
427 |          loss_weight_nopad) = remove_prediction_padding(prediction_distribution=yd_p,
428 |                                                         target_value=out_s,
429 |                                                         loss_weight=Variable(loss_weight.cuda()),
430 |                                                         target_real_value=out_real_s)
431 |         if glucose_dat.polynomial:
432 |             # include MSE
433 |             real_criterion = torch.nn.MSELoss()
434 |             coeffs = get_coeffs(yd_p_nopad.view(-1, len(glucose_dat.bins), yd_p_nopad.shape[-1]), glucose_dat.bins)
435 |             real_values = coeffs_to_values(coeffs)
436 |             loss_real = real_criterion(real_values.view(-1), y_real_nopad.float()) * value_weight
437 |             loss_dist = criterion(yd_p_nopad, y_nopad) * loss_weight_nopad
438 |             loss = (1-value_ratio) * loss_dist + value_ratio * loss_real
439 |             if np.isnan(loss.data[0]):
440 |                 raise ValueError('Got NaN loss')
441 |         else:
442 |             loss = criterion(yd_p_nopad, y_nopad) * loss_weight_nopad
443 |             if np.isnan(loss.data[0]):
444 |                 raise ValueError('Got NaN loss')
445 |             if torch.min(y_nopad.data) == -1:
446 |                 print('trouble ahead')
447 |     return loss.mean(), y_p
448 | 
449 | 
450 | def get_coeffs(dist, bins):
451 |     prob = torch.exp(dist)
452 |     bin_vals = Variable(torch.from_numpy(np.array(bins)).float().cuda()).expand_as(prob).transpose(1, 2)
453 |     coeffs = torch.bmm(prob, bin_vals)  # includes false off-diag coeffs
454 |     real_coeffs = coeffs[torch.eye(len(bins)).expand_as(coeffs).byte().cuda()].view(-1, len(bins))  # extract diagonals
455 |     return real_coeffs
456 | 
457 | 
458 | def coeffs_to_values(coeffs):
459 |     degree = coeffs.shape[-1]
460 |     basis = Variable(torch.stack([torch.arange(0, 6) ** i for i in range(degree)]).cuda())
461 |     return coeffs.view(-1, degree) @ basis
462 | 
463 | 
464 | def get_predictions(inp,
465 |                     out,
466 |                     lens,
467 |                     cuda,
468 |                     gn,
469 |                     glucose_dat):
470 |     """
471 |     Gets predictions
472 |     """
473 |     batch_size_val = inp.size(0)
474 |     output_dim = gn.output_dim
475 | 
476 |     inp_s, out_s, lens_s = sort_batch(inp, out, lens)
477 |     inp_s, out_s, lens_s = convert_batch(inp_s,
478 |                                          out_s,
479 |                                          lens_s,
480 |                                          cuda,
481 |                                          glucose_dat.real_values)
482 |     x = nn.utils.rnn.pack_padded_sequence(inp_s.view(batch_size_val,
483 |                                                      glucose_dat.max_pad,
484 |                                                      1),
485 |                                           list(np.array(lens_s)),
486 |                                           batch_first=True)
487 |     if glucose_dat.real_values:
488 |         yd_p, y_p, yd_f, y_f = gn(x, pred_len=0)
489 |         y_p_flat = y_p.contiguous().view(-1, output_dim)
490 |         y_p_nopad, y_nopad = remove_prediction_padding(y_p_flat,
491 |                                                        out_s.view(-1))
492 | 
493 |     else:
494 |         yd_p, y_p, yd_f, y_f = gn(x, glucose_dat, pred_len=out.shape[-1])
495 |         yd_p_flat = yd_p.contiguous().view(-1, output_dim)
496 |         yd_p_nopad, y_nopad = remove_prediction_padding(yd_p_flat,
497 |                                                         out_s.view(-1))
498 |     return yd_p, y_p, yd_f, y_f
499 | 
500 | 
501 | def make_model(config):
502 |     """
503 |     A poor man's factory method.
504 |     """
505 |     if config.model_type == 'recursive':
506 |         gn = RecursiveRNN(input_dim=config.input_dim,
507 |                           output_dim=config.output_dim,
508 |                           hidden_size=config.hidden_size,
509 |                           depth=config.depth,
510 |                           cuda=True)
511 |     else:
512 |         assert config.output_len == config.pred_len # could relax
513 |         gn = MultiOutputRNN(input_dim=config.input_dim,
514 |                             output_dim=config.output_dim,
515 |                             hidden_size=config.hidden_size,
516 |                             output_len=config.output_len,
517 |                             depth=config.depth,
518 |                             cuda=True,
519 |                             autoregressive=config.autoregressive,
520 |                             sequence=config.sequence,
521 |                             polynomial=config.polynomial,
522 |                             degree=config.degree)
523 |     return gn
524 | 


--------------------------------------------------------------------------------
/lib/trainer.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Implements training schemes with logging
  3 | """
  4 | import numpy as np
  5 | import os
  6 | import time
  7 | import torch
  8 | from torch.autograd import Variable
  9 | from torch.utils.data import DataLoader, sampler
 10 | from tensorboardX import SummaryWriter
 11 | from tqdm import tqdm
 12 | import joblib
 13 | 
 14 | import model as forecast_model
 15 | 
 16 | 
 17 | class ExperimentTrainer:
 18 |     """
 19 |     Simple training scheme
 20 |     """
 21 | 
 22 |     def __init__(self, model, optimizer, criterion, name, model_dir, log_dir,
 23 |                  load=False, load_epoch=None):
 24 |         """
 25 |         :param model: initialized model for training
 26 |         :param optimizer: initialized training optimizer
 27 |         :param name: string to save trainer results under
 28 |         :param load: whether or not to load results from previous train if they exist
 29 |         :param epoch: which epoch results to load, if None then the best found
 30 |         """
 31 |         self.model = model
 32 |         self.criterion = criterion
 33 |         self.optimizer = optimizer
 34 |         self.name = name
 35 |         self.model_dir = model_dir
 36 |         self.log_dir = log_dir
 37 | 
 38 |         if not os.path.exists(self.model_dir):
 39 |             os.makedirs(self.model_dir)
 40 |             os.makedirs(self.log_dir)
 41 |         else:
 42 |             if load:
 43 |                 if load_epoch is None:
 44 |                     self.model.load_state_dict(torch.load(os.path.join(self.model_dir, 'bsf_sup.pt')))
 45 |                 else:
 46 |                     self.model.load_state_dict(torch.load(os.path.join(self.model_dir, '{}_sup.pt'.format(load_epoch))))
 47 | 
 48 |             else:
 49 |                 print('Warning: directory already exists')
 50 |         self.writer = SummaryWriter(log_dir=self.log_dir)
 51 | 
 52 |     def train_sup(self, epoch_lim, data, valid_data, early_stopping_lim,
 53 |                   batch_size, num_workers, track_embeddings, validation_rate, loss_weight_base=1,
 54 |                   value_weight=0, value_ratio=0):
 55 |         """
 56 |         Training loop
 57 |         :param epoch_lim: total number of training epochs
 58 |         :param data: training data
 59 |         :param valid_data: validation data
 60 |         :param early_stopping_lim: Number of epochs to run without validation improvement before stopping
 61 |         if None, never stop early
 62 |         :param batch_size: training batch_size
 63 |         :param num_workers: number of CPU workers to use for data loading
 64 |         :param track_embeddings: Save out embedding information at end of run
 65 |         :param validation_rate: Check validation performance every validation_rate training epochs
 66 |         :param loss_weight_base: A constant between 0 and 1 used to interpolate between Single (=0) and Multi (=1) Step forecasting.
 67 |         :param value_weight: A constant multiplier for the real-value loss, set to 0 in the paper
 68 |         :param value_ratio: The proportion of loss used for the MSE loss term (as opposed for the cross-entropy loss), set to 0 in the paper
 69 |         :return loss array, model:
 70 |         """
 71 |         if early_stopping_lim is None:
 72 |             early_stopping_lim = epoch_lim
 73 |         train_sampler = sampler.RandomSampler(np.arange(len(data)))
 74 |         data_train = DataLoader(data,
 75 |                                 batch_size=batch_size,
 76 |                                 sampler=train_sampler,
 77 |                                 drop_last=True)
 78 | 
 79 |         valid_sampler = sampler.SequentialSampler(np.arange(len(valid_data)))
 80 |         data_valid = DataLoader(valid_data,
 81 |                                 batch_size=batch_size,
 82 |                                 sampler=valid_sampler)
 83 |         step = 0
 84 | 
 85 |         bsf_loss = np.inf
 86 |         epochs_without_improvement = 0
 87 |         improvements = []
 88 |         for epoch in range(epoch_lim):
 89 |             if epochs_without_improvement > early_stopping_lim:
 90 |                 print('Exceeded early stopping limit, stopping')
 91 |                 break
 92 |             if epoch % validation_rate == 0:
 93 |                 valid_loss = self.validation(data_valid=data_valid,
 94 |                                              step=step,
 95 |                                              data=data,
 96 |                                              loss_weight_base=loss_weight_base,
 97 |                                              value_weight=value_weight, value_ratio=value_ratio)
 98 |                 (bsf_loss,
 99 |                  epochs_without_improvement,
100 |                  improvements) = self.manage_early_stopping(bsf_loss=bsf_loss,
101 |                                                             early_stopping_lim=early_stopping_lim,
102 |                                                             epochs_without_improvement=epochs_without_improvement,
103 |                                                             valid_loss=valid_loss, validation_rate=validation_rate,
104 |                                                             improvements=improvements)
105 |             running_train_loss = 0
106 |             for inp, out, out_real, lens in tqdm(data_train):
107 |                 loss, y_p = forecast_model.get_loss(inp=inp,
108 |                                                     out=out,
109 |                                                     lens=lens,
110 |                                                     cuda=True,
111 |                                                     gn=self.model,
112 |                                                     glucose_dat=data,
113 |                                                     criterion=self.criterion,
114 |                                                     base=loss_weight_base,
115 |                                                     out_real=out_real,
116 |                                                     value_weight=value_weight,
117 |                                                     value_ratio=value_ratio)
118 |                 step += 1
119 |                 running_train_loss += loss.data.cpu().numpy()[0]
120 |                 self.optimizer.zero_grad()
121 |                 loss.backward()
122 |                 self.optimizer.step()
123 |             running_train_loss = running_train_loss/len(data_train)
124 |             self.writer.add_scalar(tag='train_loss',
125 |                                    scalar_value=running_train_loss,
126 |                                    global_step=step)
127 |         torch.save(self.model.state_dict(), '{}/final_sup.pt'.format(self.model_dir))
128 |         if track_embeddings:
129 |             self.embed(data_valid, step, embed_batch=100)
130 |         return improvements
131 | 
132 |     def manage_early_stopping(self, bsf_loss, early_stopping_lim, epochs_without_improvement, valid_loss,
133 |                               validation_rate, improvements):
134 |         if valid_loss < bsf_loss:
135 |             print('improved validation loss from {:.3f} to {:.3f}'.format(bsf_loss, valid_loss))
136 |             bsf_loss = valid_loss
137 |             improvements.append(epochs_without_improvement)
138 |             epochs_without_improvement = 0
139 |             torch.save(self.model.state_dict(),
140 |                        '{}/bsf_sup.pt'.format(self.model_dir))
141 |         else:
142 |             epochs_without_improvement += validation_rate
143 |             print('Validation loss of {} did not improve on {}'.format(valid_loss, bsf_loss))
144 |             print('Early stopping at {}/{}'.format(epochs_without_improvement, early_stopping_lim))
145 |         return bsf_loss, epochs_without_improvement, improvements
146 | 
147 |     def validation(self, data_valid, step, data, loss_weight_base, value_weight, value_ratio):
148 |         self.model.eval()
149 |         running_valid_loss = 0
150 |         for inp, out, out_real, lens in data_valid:
151 |             loss, y_p = forecast_model.get_loss(inp=inp,
152 |                                                 out=out,
153 |                                                 lens=lens,
154 |                                                 cuda=True,
155 |                                                 gn=self.model,
156 |                                                 glucose_dat=data,
157 |                                                 criterion=self.criterion,
158 |                                                 base=loss_weight_base,
159 |                                                 out_real=out_real,
160 |                                                 value_weight=value_weight,
161 |                                                 value_ratio=value_ratio)
162 |             step += 1
163 |             running_valid_loss += loss.data.cpu().numpy()[0]
164 |         running_valid_loss = running_valid_loss / len(data_valid)
165 |         print('validation loss: {:.3f}'.format(running_valid_loss))
166 |         self.writer.add_scalar(tag='valid_total_loss',
167 |                                scalar_value=running_valid_loss,
168 |                                global_step=step)
169 |         self.model.train()
170 |         return running_valid_loss
171 | 
172 |     def embed(self, dataloader, step, embed_batch=5):
173 |         print('embed')
174 |         embeddings = None
175 |         metadata = []
176 |         i = 0
177 |         for dat, dat_past, dat_future, init, label in dataloader:
178 |             x = Variable(dat.float().cuda())
179 |             e = self.model.embed(x).data
180 |             metadata += np.round(label.numpy(), 2).tolist()
181 |             if embeddings is None:
182 |                 embeddings = e
183 |             else:
184 |                 embeddings = torch.cat((embeddings, e))
185 |             if i > embed_batch:
186 |                 break
187 |             i += 1
188 |         print(len(metadata))
189 |         self.writer.add_embedding(mat=embeddings,
190 |                                   metadata=metadata,
191 |                                   global_step=step)
192 | 
193 |     def get_predictions(self, dataloader):
194 |         self.model.eval()
195 |         data = None
196 |         data_past = None
197 |         data_future = None
198 |         y = None
199 |         pred_pres = None
200 |         pred_past = None
201 |         pred_future = None
202 |         pred = None
203 |         for dat, dat_past, dat_future,  init, label in dataloader:
204 |             print('evaluation batch')
205 |             window_data = []
206 |             window_data_past = []
207 |             window_data_future = []
208 |             window_y = []
209 |             window_pred = []
210 |             window_pred_pres = []
211 |             window_pred_past = []
212 |             window_pred_future = []
213 |             if not self.window:
214 |                 dat = [dat]
215 |                 dat_past = [dat_past]
216 |                 dat_future = [dat_future]
217 |             for window in range(len(dat)):
218 |                 x = Variable(dat[window].float().cuda())
219 |                 y_pred, x_pres, x_past, x_future = self.model.forward(x)
220 |                 y_pred = y_pred.data.cpu().numpy()
221 |                 if self.decode_present:
222 |                     x_pres = x_pres.data.cpu().numpy()
223 |                 if self.decode_past:
224 |                     x_past = x_past.data.cpu().numpy()
225 |                 if self.decode_future:
226 |                     x_future = x_future.data.cpu().numpy()
227 |                 yt = label.numpy()
228 |                 xt_pres = dat[window].numpy()
229 |                 xt_past = dat_past[window].numpy()
230 |                 xt_future = dat_future[window].numpy()
231 |                 window_data.append(xt_pres)
232 |                 window_data_past.append(xt_past)
233 |                 window_data_future.append(xt_future)
234 |                 window_y.append(yt)
235 |                 window_pred.append(y_pred)
236 |                 window_pred_pres.append(x_pres)
237 |                 window_pred_past.append(x_past)
238 |                 window_pred_future.append(x_future)
239 |             if data is None:
240 |                 data = [window_data]
241 |                 data_past = [window_data_past]
242 |                 data_future = [window_data_future]
243 |                 y = [window_y]
244 |                 pred_pres = [window_pred_pres]
245 |                 pred_past = [window_pred_past]
246 |                 pred_future = [window_pred_future]
247 |                 pred = [window_pred]
248 |             else:
249 |                 data.append(window_data)
250 |                 data_past.append(window_data_past)
251 |                 data_future.append(window_data_future)
252 |                 y.append(window_y)
253 |                 if self.decode_present:
254 |                     pred_pres.append(window_pred_pres)
255 |                 if self.decode_past:
256 |                     pred_past.append(window_pred_past)
257 |                 if self.decode_future:
258 |                     pred_future.append(window_pred_future)
259 |                 pred.append(window_pred)
260 |         print('done getting predictions')
261 |         return (data, data_past, data_future, y,
262 |                 pred_pres, pred_past, pred_future, pred)
263 | 


--------------------------------------------------------------------------------
/walkthrough.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "import sklearn.ensemble\n",
 11 |     "import torch\n",
 12 |     "import pandas\n",
 13 |     "import joblib\n",
 14 |     "\n",
 15 |     "from matplotlib import pyplot as plt\n",
 16 |     "from lib import model, glucose_dataset"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "# Introduction\n",
 24 |     "\n",
 25 |     "This notebook provides a brief walkthrough of the public code release for our KDD 2018 paper: Deep Multi-Output Forecasting: Learning to Accurately Predict Blood Glucose Trajectories. The full paper is available via arXiv: https://arxiv.org/abs/1806.05357. We hope to release our glucose data to the general public soon. In the meantime, people interested in blood glucose forecasting may be interested in the recently released OhioT1DM dataset: http://smarthealth.cs.ohio.edu/OhioT1DM-dataset.html."
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "# Data\n",
 33 |     "We have included both the processed and unprocessed dataset used to generate our results. This data was collected by authors Mamta Jaiswal, Dr. Lynn Ang, and Dr Rodica Pop-Busui."
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "## Unprocessed\n",
 41 |     "The unprocessed dataset, data/unprocessed_cgm_data.xlsx, is an excel file with one sheet per recording session (from baseline to 36 months). Each row is one individual, note that patient ids are consistent across recording sessions, and not all patients have all recording sessions. The CGM data is giving at 5 minute resolution. The unprocessed data also contain information on the daily insulin dose and delivery method, which was not used in the paper. "
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": null,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "unprocessed = pandas.read_excel('data/unprocessed_cgm_data.xlsx', sheet_name=None)"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": null,
 56 |    "metadata": {},
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "unprocessed.keys()"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": null,
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "unprocessed['Baseline']"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "markdown",
 73 |    "metadata": {},
 74 |    "source": [
 75 |     "## Processed\n",
 76 |     "The processed data is stored as four pickle files (accessible via joblib), data/processed_cgm_data_{train/validation/test}.pkl and data/processed_cgm_coeffs.pkl. To process we:\n",
 77 |     "\n",
 78 |     "1. Remove data points which differ from previous ones by more than 40 mg/dL, as these measurements are almost certainly the result of sensor error\n",
 79 |     "2. Impute small data gaps using linear interpolation.\n",
 80 |     "3. Split data into contiguous chunks, splitting either on missing data or when a chunk is >101 measurements long\n",
 81 |     "4. (PolyMO) compute coefficient bins on the training data.\n",
 82 |     "\n",
 83 |     "The test set is constructed using the most recent session from each patient (approximately 10% of the data). \n",
 84 |     "\n",
 85 |     "We also include a differently processed version of the data, data/alternative_cgm_data_{train/test}, which we found useful for other projects. This data is constructed on a per-day basis, removing days with excessive missingness. Importantly, each day is linked to the ID of the patient it came from."
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": null,
 91 |    "metadata": {},
 92 |    "outputs": [],
 93 |    "source": [
 94 |     "data_tr = glucose_dat_train_rec = glucose_dataset.GlucoseDataset(data_pkl='data/processed_cgm_data_train.pkl',\n",
 95 |     "                                                                 max_pad=101,\n",
 96 |     "                                                                 output_len=6, # set 1 for Recursive, 6 for MO\n",
 97 |     "                                                                 output_dim=361,\n",
 98 |     "                                                                 polynomial=False,\n",
 99 |     "                                                                 degree=2,\n",
100 |     "                                                                 range_low=0,\n",
101 |     "                                                                 range_high=100,\n",
102 |     "                                                                 coeff_file='/data2/ifox/glucose/data/training_coefficient_percentiles_ridge_alpha1_roc40.pkl')"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": null,
108 |    "metadata": {},
109 |    "outputs": [],
110 |    "source": [
111 |     "for x, y_index, y_real, lens in data_tr:\n",
112 |     "    break"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": null,
118 |    "metadata": {},
119 |    "outputs": [],
120 |    "source": [
121 |     "# The polynomial fitting takes a while (several minutes), but is only required once before training\n",
122 |     "data_tr_poly = glucose_dat_train_rec = glucose_dataset.GlucoseDataset(data_pkl='data/processed_cgm_data_train.pkl',\n",
123 |     "                                                                      max_pad=101,\n",
124 |     "                                                                      output_len=6,\n",
125 |     "                                                                      output_dim=361,\n",
126 |     "                                                                      polynomial=True,\n",
127 |     "                                                                      degree=2,\n",
128 |     "                                                                      range_low=0,\n",
129 |     "                                                                      range_high=100,\n",
130 |     "                                                                      coeff_file='/data2/ifox/glucose/data/training_coefficient_percentiles_ridge_alpha1_roc40.pkl')"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": null,
136 |    "metadata": {},
137 |    "outputs": [],
138 |    "source": [
139 |     "for x, poly_index, y_real, lens in data_tr_poly:\n",
140 |     "    break"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "markdown",
145 |    "metadata": {},
146 |    "source": [
147 |     "# Models\n",
148 |     "\n",
149 |     "Our paper considers 8 classes of models:\n",
150 |     "\n",
151 |     "Shallow Baselines\n",
152 |     "* Extrapolation\n",
153 |     "* Recursive Random Forest\n",
154 |     "* Multi-Output Random Forest\n",
155 |     "\n",
156 |     "Deep Baselines\n",
157 |     "* Recursive RNN\n",
158 |     "* Multi-Output RNN\n",
159 |     "\n",
160 |     "Our Approaches\n",
161 |     "* Sequential Multi-Output RNN\n",
162 |     "* Polynomial Multi-Output RNN\n",
163 |     "* Polynomial Sequential Multi-Output RNN\n",
164 |     "\n",
165 |     "We will walk through how we implemented, trained, and evaluated each model"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "markdown",
170 |    "metadata": {},
171 |    "source": [
172 |     "## Shallow Baselines"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "markdown",
177 |    "metadata": {},
178 |    "source": [
179 |     "### Extrapolation\n",
180 |     "\n",
181 |     "This is a simple linear extrapolation baseline implemented via Numpy. We extrapolate using the last 30 minutes (6 samples as our data was sampled at 5 minute intervals) to predict 30 minutes into the future."
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "code",
186 |    "execution_count": null,
187 |    "metadata": {},
188 |    "outputs": [],
189 |    "source": [
190 |     "data_tr = np.cumsum(np.random.randn(1000, 16), axis=1)\n",
191 |     "data_ts = np.cumsum(np.random.randn(100, 10), axis=1)"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": null,
197 |    "metadata": {},
198 |    "outputs": [],
199 |    "source": [
200 |     "n_input = 6\n",
201 |     "horizon = 6\n",
202 |     "degree = 1\n",
203 |     "extrap_pred = []\n",
204 |     "for i in range(len(data_ts)):\n",
205 |     "    coeffs = np.polynomial.polynomial.polyfit(x=np.arange(n_input), y=data_ts[i][-n_input:], deg=degree)\n",
206 |     "    extrap_pred.append(np.polyval(p=np.flip(coeffs, axis=0), x=np.arange(horizon)+n_input))"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "markdown",
211 |    "metadata": {},
212 |    "source": [
213 |     "### Recursive and Multi-Output Random Forest\n",
214 |     "\n",
215 |     "Implemented using scikit-learn. Note the scikit-learn implementation automatically infers output size during the fitting step. "
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "markdown",
220 |    "metadata": {},
221 |    "source": [
222 |     "#### Recursive"
223 |    ]
224 |   },
225 |   {
226 |    "cell_type": "code",
227 |    "execution_count": null,
228 |    "metadata": {},
229 |    "outputs": [],
230 |    "source": [
231 |     "rf_rec = sklearn.ensemble.RandomForestRegressor(n_estimators=100, n_jobs=-1)"
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "code",
236 |    "execution_count": null,
237 |    "metadata": {},
238 |    "outputs": [],
239 |    "source": [
240 |     "# Note, for actually training recursive models, you should use all of the data by taking input_size tiles\n",
241 |     "X_rec_tr = data_tr[:, :10]\n",
242 |     "y_rec_tr = data_tr[:, 10:11].ravel()"
243 |    ]
244 |   },
245 |   {
246 |    "cell_type": "code",
247 |    "execution_count": null,
248 |    "metadata": {},
249 |    "outputs": [],
250 |    "source": [
251 |     "rf_rec.fit(X_rec_tr, y_rec_tr)"
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "code",
256 |    "execution_count": null,
257 |    "metadata": {},
258 |    "outputs": [],
259 |    "source": [
260 |     "# recursive prediction\n",
261 |     "X_mod = data_ts.copy()\n",
262 |     "p_rec_arr = []\n",
263 |     "for i in range(6):\n",
264 |     "    p = rf_rec.predict(X_mod)\n",
265 |     "    p_rec_arr.append(p.reshape(-1, 1))\n",
266 |     "    X_mod = np.concatenate((X_mod[:, 1:], p.reshape(-1, 1)), axis=1)"
267 |    ]
268 |   },
269 |   {
270 |    "cell_type": "markdown",
271 |    "metadata": {},
272 |    "source": [
273 |     "#### Multi-Output"
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "code",
278 |    "execution_count": null,
279 |    "metadata": {},
280 |    "outputs": [],
281 |    "source": [
282 |     "# Note, for actually training recursive models, you should use all of the data by taking input_size tiles\n",
283 |     "X_mo_tr = data_tr[:, :10]\n",
284 |     "y_mo_tr = data_tr[:, 10:]"
285 |    ]
286 |   },
287 |   {
288 |    "cell_type": "code",
289 |    "execution_count": null,
290 |    "metadata": {},
291 |    "outputs": [],
292 |    "source": [
293 |     "rf_mo = sklearn.ensemble.RandomForestRegressor(n_estimators=100, n_jobs=-1)"
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "code",
298 |    "execution_count": null,
299 |    "metadata": {},
300 |    "outputs": [],
301 |    "source": [
302 |     "rf_mo.fit(X_mo_tr, y_mo_tr)"
303 |    ]
304 |   },
305 |   {
306 |    "cell_type": "code",
307 |    "execution_count": null,
308 |    "metadata": {},
309 |    "outputs": [],
310 |    "source": [
311 |     "p_mo_arr = rf_mo.predict(data_ts)"
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "markdown",
316 |    "metadata": {},
317 |    "source": [
318 |     "## Deep Models\n",
319 |     "Our deep baselines are all implemented in PyTorch. They are a bit more involved to train. The basic training procedure is outlined in lib/trainer.py in the ExperimentTrainer class. The train_sup function is used to fit the provided model. The use of TensorboardX is not required, but convenient for monitoring losses. The data is assumed to be in the form of a pytorch dataset in the form of lib/glucose_dataset.py (though the specifics can vary greatly).\n",
320 |     "\n",
321 |     "Note that the dataset code requires precomputed polynomial coefficients for the PolyMO setting. This can be done using Numpy's polyfit function on your training data. \n",
322 |     "\n",
323 |     "The cuda flag should be set to True if a GPU is available."
324 |    ]
325 |   },
326 |   {
327 |    "cell_type": "markdown",
328 |    "metadata": {},
329 |    "source": [
330 |     "### Recursive Baseline"
331 |    ]
332 |   },
333 |   {
334 |    "cell_type": "code",
335 |    "execution_count": null,
336 |    "metadata": {},
337 |    "outputs": [],
338 |    "source": [
339 |     "rec_rnn = model.RecursiveRNN(input_dim=1, output_dim=361, hidden_size=512, depth=2,  cuda=False)"
340 |    ]
341 |   },
342 |   {
343 |    "cell_type": "markdown",
344 |    "metadata": {},
345 |    "source": [
346 |     "### Multi-Output Baseline"
347 |    ]
348 |   },
349 |   {
350 |    "cell_type": "code",
351 |    "execution_count": null,
352 |    "metadata": {},
353 |    "outputs": [],
354 |    "source": [
355 |     "mo_rnn = model.MultiOutputRNN(input_dim=1, output_dim=361, output_len=6, hidden_size=512, depth=2, cuda=False)"
356 |    ]
357 |   },
358 |   {
359 |    "cell_type": "markdown",
360 |    "metadata": {},
361 |    "source": [
362 |     "### Sequential Multi-Output"
363 |    ]
364 |   },
365 |   {
366 |    "cell_type": "code",
367 |    "execution_count": null,
368 |    "metadata": {},
369 |    "outputs": [],
370 |    "source": [
371 |     "seqmo_rnn = model.MultiOutputRNN(input_dim=1, output_dim=361, output_len=6, hidden_size=512, depth=2, cuda=False, sequence=True)"
372 |    ]
373 |   },
374 |   {
375 |    "cell_type": "markdown",
376 |    "metadata": {},
377 |    "source": [
378 |     "### Polynomial Multi-Output"
379 |    ]
380 |   },
381 |   {
382 |    "cell_type": "code",
383 |    "execution_count": null,
384 |    "metadata": {},
385 |    "outputs": [],
386 |    "source": [
387 |     "polymo_rnn = model.MultiOutputRNN(input_dim=1, output_dim=361, output_len=6, hidden_size=512, depth=2, cuda=False, polynomial=True, degree=1)"
388 |    ]
389 |   },
390 |   {
391 |    "cell_type": "markdown",
392 |    "metadata": {},
393 |    "source": [
394 |     "### Polynomial Sequential Multi-Output"
395 |    ]
396 |   },
397 |   {
398 |    "cell_type": "code",
399 |    "execution_count": null,
400 |    "metadata": {},
401 |    "outputs": [],
402 |    "source": [
403 |     "polymo_rnn = model.MultiOutputRNN(input_dim=1, output_dim=361, output_len=6, hidden_size=512, depth=2, cuda=False, sequence=True, polynomial=True, degree=1)"
404 |    ]
405 |   },
406 |   {
407 |    "cell_type": "code",
408 |    "execution_count": null,
409 |    "metadata": {},
410 |    "outputs": [],
411 |    "source": []
412 |   }
413 |  ],
414 |  "metadata": {
415 |   "kernelspec": {
416 |    "display_name": "Python 3",
417 |    "language": "python",
418 |    "name": "python3"
419 |   },
420 |   "language_info": {
421 |    "codemirror_mode": {
422 |     "name": "ipython",
423 |     "version": 3
424 |    },
425 |    "file_extension": ".py",
426 |    "mimetype": "text/x-python",
427 |    "name": "python",
428 |    "nbconvert_exporter": "python",
429 |    "pygments_lexer": "ipython3",
430 |    "version": "3.6.5"
431 |   }
432 |  },
433 |  "nbformat": 4,
434 |  "nbformat_minor": 2
435 | }
436 | 


--------------------------------------------------------------------------------