├── README.md ├── data ├── alternative_cgm_data_test.pkl ├── alternative_cgm_data_train.pkl ├── processed_cgm_coeffs.pkl ├── processed_cgm_data_test.pkl ├── processed_cgm_data_train.pkl ├── processed_cgm_data_validation.pkl └── unprocessed_cgm_data.xlsx ├── lib ├── .ipynb_checkpoints │ ├── glucose_dataset-checkpoint.py │ ├── model-checkpoint.py │ └── trainer-checkpoint.py ├── __init__.py ├── __pycache__ │ └── model.cpython-36.pyc ├── glucose_dataset.py ├── model.py └── trainer.py └── walkthrough.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # multi-output-glucose-forecasting 2 | The code and data used for the paper Deep Multi-Output Forecasting: Learning to Accurately Predict Blood Glucose Trajectories published in KDD 2018. The full paper is available on arxiv: https://arxiv.org/pdf/1806.05357.pdf 3 | 4 | By downloading and using these data you agree to comply with the following: 5 | 6 | - You will no attempt re-identification; 7 | - You will contact The University of Michigan (UM) if identifiers are detected; 8 | - You will not redistribute or resell the data; 9 | - Data ownership remains with UM 10 | - Requirements survive changes in ownership of entity 11 | 12 | Update: this repo was originally intented to serve as the starting point for a full and clean version of the code I used (since my research code was...researchy). Since it's been years and I never got around to actually doing it here's the full research version in all of it's messiness: https://gitlab.eecs.umich.edu/mld3/glucose_forecasting 13 | -------------------------------------------------------------------------------- /data/alternative_cgm_data_test.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igfox/multi-output-glucose-forecasting/27bdfbfa1bbe0816ebcd5808d9bc76dd767d6f61/data/alternative_cgm_data_test.pkl -------------------------------------------------------------------------------- /data/alternative_cgm_data_train.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igfox/multi-output-glucose-forecasting/27bdfbfa1bbe0816ebcd5808d9bc76dd767d6f61/data/alternative_cgm_data_train.pkl -------------------------------------------------------------------------------- /data/processed_cgm_coeffs.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igfox/multi-output-glucose-forecasting/27bdfbfa1bbe0816ebcd5808d9bc76dd767d6f61/data/processed_cgm_coeffs.pkl -------------------------------------------------------------------------------- /data/processed_cgm_data_test.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igfox/multi-output-glucose-forecasting/27bdfbfa1bbe0816ebcd5808d9bc76dd767d6f61/data/processed_cgm_data_test.pkl -------------------------------------------------------------------------------- /data/processed_cgm_data_train.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igfox/multi-output-glucose-forecasting/27bdfbfa1bbe0816ebcd5808d9bc76dd767d6f61/data/processed_cgm_data_train.pkl -------------------------------------------------------------------------------- /data/processed_cgm_data_validation.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igfox/multi-output-glucose-forecasting/27bdfbfa1bbe0816ebcd5808d9bc76dd767d6f61/data/processed_cgm_data_validation.pkl -------------------------------------------------------------------------------- /data/unprocessed_cgm_data.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igfox/multi-output-glucose-forecasting/27bdfbfa1bbe0816ebcd5808d9bc76dd767d6f61/data/unprocessed_cgm_data.xlsx -------------------------------------------------------------------------------- /lib/.ipynb_checkpoints/glucose_dataset-checkpoint.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.polynomial import polynomial as pn 3 | import joblib 4 | from joblib import Parallel, delayed 5 | from torch.utils.data import Dataset 6 | 7 | 8 | class GlucoseDataset(Dataset): 9 | """ 10 | Blood glucose dataset for pytorch 11 | Entry: (data, y_bin, y_real, len) 12 | loading everything into memory as small 13 | """ 14 | def __init__(self, 15 | data_pkl, 16 | max_pad, 17 | output_len, 18 | output_dim, 19 | polynomial=False, 20 | degree=None, 21 | range_low=None, 22 | range_high=None, 23 | coeff_file=None, 24 | real_values=False, 25 | parallel_cache=False, 26 | max_size=None, 27 | flip_signal=False): 28 | 29 | # for one-hot encoding, assumes 40-400 mg/dL range 30 | self.output_dim = output_dim 31 | self.polynomial = polynomial 32 | self.degree = degree 33 | self.range_low = range_low 34 | self.range_high = range_high 35 | self.real_values = real_values 36 | self.max_pad = max_pad 37 | 38 | self.data = joblib.load(data_pkl) 39 | if flip_signal: 40 | self.data_flip = [] 41 | for i in range(len(self.data)): 42 | self.data_flip.append(np.flip(self.data[i], axis=0)) 43 | self.data = self.data_flip 44 | if max_size is not None: 45 | self.data = self.data[0:max_size] 46 | self.output_len = output_len 47 | 48 | if not self.real_values: 49 | if self.polynomial: 50 | # from degree, calculate ranges 51 | # from ranges, get bins 52 | # should make more flexible, function parameter binning 53 | # old hand-defined range, captured 100% variation 54 | #[[40, 400], 55 | # [-36, 36], 56 | # [-5.5, 5.5]] 57 | for var in [self.degree, self.range_low, self.range_high]: 58 | assert var is not None, 'Must set degree, range_high, and range_low for polynomial' 59 | 60 | if coeff_file is None: 61 | ranges = self.auto_poly_range() 62 | else: 63 | ranges = self.precomputed_poly_range(coeff_file) 64 | 65 | self.bin_step = [(ranges[i][1]-ranges[i][0])/(self.output_dim-1) for i in range(degree+1)] 66 | self.bins = [ 67 | np.linspace(ranges[i][0], 68 | ranges[i][1], 69 | self.output_dim) + (0.5 * self.bin_step[i]) 70 | for i in range(self.degree+1)] 71 | else: 72 | # simple value binning 73 | self.bin_step = (400-40)/(self.output_dim-1) 74 | # the half step appraoch is an artifact of wanting perfect bins with output_dim=361 75 | self.bins = np.linspace(40, 400, self.output_dim)+(self.bin_step * 0.5) 76 | 77 | # trying out precaching results for less intensive load 78 | count = 0 79 | self.x_out = [] 80 | self.y_out = [] 81 | self.y_real = [] 82 | self.lens = [] 83 | print('caching results') 84 | if parallel_cache: 85 | res_tuples = Parallel(n_jobs=5, verbose=10)(delayed(self.prepare_output)(idx) for idx in range(len(self.data))) 86 | for idx in range(len(self.data)): 87 | x_pad, y_pad, y_real_pad, lens = res_tuples[idx] 88 | self.x_out.append(x_pad) 89 | self.y_out.append(y_pad) 90 | self.y_real.append(y_real_pad) 91 | self.lens.append(lens) 92 | else: 93 | for idx in range(len(self.data)): 94 | if idx % 10 == 0: 95 | print('{}/{}'.format(idx, len(self.data))) 96 | x_pad, y_pad, y_real_pad, lens = self.prepare_output(idx) 97 | self.x_out.append(x_pad) 98 | self.y_out.append(y_pad) 99 | self.y_real.append(y_real_pad) 100 | self.lens.append(lens) 101 | 102 | def prepare_output(self, idx, real_y=True): 103 | x_dat = self.data[idx] 104 | length = self.max_pad - len(x_dat) 105 | x_pad = np.pad(x_dat, 106 | (0, length), 107 | mode='constant', 108 | constant_values=-1) 109 | y_dat = self.window_stack(x_dat[1::].reshape(-1, 1)) 110 | if self.real_values: 111 | y_bins = y_dat 112 | else: 113 | y_bins = self.values_to_bins(y_dat) 114 | y_pad = np.pad(y_bins, 115 | ((0, length), (0, 0)), 116 | mode='constant', 117 | constant_values=-1) 118 | if real_y: 119 | y_real_pad = np.pad(y_dat, 120 | ((0, length), (0, 0)), 121 | mode='constant', 122 | constant_values=-1) 123 | return x_pad, y_pad, y_real_pad, self.max_pad - length 124 | else: 125 | return x_pad, y_pad, self.max_pad - length 126 | 127 | def auto_poly_range(self, percentile): 128 | """ 129 | Using degree and training data, creates 130 | range that captures percentile% of variation of the best fit 131 | coefficient values. 132 | """ 133 | raise NotImplementedError('TODO') 134 | 135 | def precomputed_poly_range(self, coeff_file): 136 | """ 137 | Simple function that uses precomputed coefficient 138 | percentile dict 139 | 140 | low, high can be integers in 0-100 141 | 142 | Requires precomputed coeff dict 143 | """ 144 | assert self.range_low < self.range_high 145 | 146 | coeff = joblib.load(coeff_file) 147 | 148 | ranges = [] 149 | for i in range(self.degree+1): 150 | low_val = coeff[self.degree][i][self.range_low] 151 | high_val = coeff[self.degree][i][self.range_high] 152 | ranges.append([low_val, high_val]) 153 | return ranges 154 | 155 | def scale(self, x): 156 | """ 157 | turn glucose signal with 40-400 to range -1 to 1 158 | can add more intelligent scaling for balencing hypo/hyper, 159 | though real concern is moving over to classification 160 | """ 161 | return (x-220)/180. 162 | 163 | def one_hot(self, seq): 164 | """ 165 | turn glucose signal into one hot distribution 166 | with size=output_dim, linearly bins glucose 167 | range 40-400 168 | don't need for NLLLoss 169 | """ 170 | dist = np.zeros((seq.size, self.output_dim)) 171 | dist[np.arange(seq.size), np.digitize(seq, self.bins)] = 1. 172 | return dist 173 | 174 | def polymerize(self, y): 175 | """ 176 | Turns output window into best fit polynomial 177 | with output [x'_0, ..., x'_d] where x' is 178 | bin number that x would be in (using ranges) 179 | """ 180 | x_inds = [] 181 | if len(y.shape) > 1: 182 | for j in range(y.shape[0]): 183 | coeffs = pn.polyfit(np.arange(len(y[j])), y[j], deg=self.degree) 184 | x_inds.append([np.digitize(coeffs[i], self.bins[i]).item() for i in range(self.degree+1)]) 185 | else: 186 | coeffs = pn.polyfit(np.arange(len(y)), y, deg=self.degree) 187 | for i in range(self.degree+1): 188 | x_inds.append(np.digitize(coeffs[i], self.bins[i]).item()) 189 | return np.clip(x_inds, 0, self.output_dim-1) 190 | 191 | def bins_to_coeff_values(self, pred): 192 | """ 193 | Given bins for polynomial coefficients, 194 | return estimate of real coefficient values 195 | """ 196 | if len(pred.shape) > 1: 197 | vals = [np.array([self.bins[i][np.clip(np.array(pred[:, i], dtype=int), 0, self.output_dim-1)]]) 198 | - (0.5 * self.bin_step[i]) for i in range(self.degree+1)] 199 | coeffs = np.concatenate(vals, axis=0).T 200 | else: 201 | coeffs = [self.bins[i][np.clip(np.array(pred[i], dtype=int), 0, self.output_dim-1)] 202 | - (0.5 * self.bin_step[i]) for i in range(self.degree+1)] 203 | return np.array(coeffs) 204 | 205 | def reverse_polymerize(self, pred): 206 | """ 207 | Given bins for polynomial coefficients, returns forecast 208 | For new foreacsting system, flexible degree and doesn't assume 209 | adding mistake 210 | """ 211 | coeffs = self.bins_to_coeff_values(pred) 212 | return pn.polyval(np.arange(self.output_len), coeffs.T) 213 | 214 | def values_to_bins(self, y): 215 | """ 216 | Gvien a y sample (or batch of y samples), changes from 217 | value to categorical representation 218 | """ 219 | if self.real_values: 220 | return y 221 | if self.polynomial: 222 | return self.polymerize(y) 223 | else: 224 | return np.digitize(y, self.bins) 225 | 226 | def bins_to_values(self, y): 227 | """ 228 | Given a y sample (or batch of y samples), changes from categorical 229 | to value representation 230 | """ 231 | if type(y) is not np.ndarray: 232 | y = y.numpy() 233 | if self.real_values: 234 | return y 235 | if self.polynomial: 236 | return self.reverse_polymerize(y) 237 | else: 238 | vals = self.bins[np.clip(np.array(y, dtype=int), 0, self.output_dim-1)] 239 | return vals - (0.5 * self.bin_step) 240 | 241 | def index_to_values(self, x, i): 242 | """ 243 | Given i index for output value: y[i] 244 | returns ground truth x values 245 | bins_to_values can also be used, but ignores 246 | polynomial residual 247 | """ 248 | return x[i+1:i+1+self.output_len] 249 | 250 | def window_stack(self, seq, stepsize=1): 251 | """ 252 | Gets rolling window from seq of length self.output_len 253 | stepsize determines dilation 254 | """ 255 | length = self.output_len 256 | n = seq.shape[0] 257 | return np.hstack(seq[i:1+n+i-length:stepsize] for i in range(length)) 258 | 259 | def __len__(self): 260 | return len(self.data) 261 | 262 | def __getitem__(self, idx): 263 | return self.x_out[idx], self.y_out[idx], self.y_real[idx], self.lens[idx] 264 | -------------------------------------------------------------------------------- /lib/.ipynb_checkpoints/model-checkpoint.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import joblib 3 | 4 | import torch 5 | from torch import nn 6 | from torch.autograd import Variable 7 | 8 | 9 | class ForecastRNN(nn.Module): 10 | """ 11 | Helper for pytorch reimplementation 12 | Uses variable sized/depth GRU with linear layer to get output right 13 | """ 14 | def __init__(self, input_dim, output_dim, hidden_size, depth, output_len=-1, cuda=False): 15 | super(ForecastRNN, self).__init__() 16 | self.cuda = cuda 17 | self.rnn = nn.GRU(input_size=input_dim, 18 | hidden_size=hidden_size, 19 | num_layers=depth, 20 | dropout=False, 21 | bidirectional=False, # would bidirectional help forecasting? 22 | batch_first=True) 23 | self.sm = nn.LogSoftmax(dim=1) 24 | self.input_dim = input_dim 25 | self.output_dim = output_dim 26 | self.output_len = output_len 27 | if self.cuda: 28 | self.rnn = self.rnn.cuda() 29 | self.sm = self.sm.cuda() 30 | self.float = torch.cuda.FloatTensor # not sure I need this 31 | else: 32 | self.float = torch.FloatTensor 33 | 34 | @staticmethod 35 | def _dist_to_bins(dist): 36 | return torch.max(dist, dim=-1)[1] 37 | 38 | @staticmethod 39 | def _get_sequence_info(seq): 40 | """ 41 | gets info on fed sequence 42 | """ 43 | if type(seq) == torch.nn.utils.rnn.PackedSequence: 44 | pack = True 45 | batch_size = seq.batch_sizes[0] 46 | sequence_length = len(seq.batch_sizes) 47 | else: 48 | pack = False 49 | batch_size = seq.size(0) 50 | sequence_length = seq.size(1) 51 | return pack, batch_size, sequence_length 52 | 53 | def _rnn_forward(self, seq, pack, batch_size): 54 | """ 55 | Helper function for forward that computes up to output layer 56 | """ 57 | h = Variable(torch.zeros(self.rnn.num_layers, 58 | batch_size, # not sure if need to reshape for batch_first 59 | self.rnn.hidden_size).type(self.float), 60 | requires_grad=False) 61 | # predict within the sequence 62 | out, h = self.rnn.forward(seq, h) 63 | if pack: 64 | out, lens = nn.utils.rnn.pad_packed_sequence(out, batch_first=True, padding_value=-1) 65 | else: 66 | lens = None 67 | # out has dim (batch_size, sequence_length, hidden_size) 68 | out_flat = out.contiguous().view(-1, self.rnn.hidden_size) 69 | return out_flat, h, lens 70 | 71 | def _extract_final_dist(self, pack, batch_size, y, lens): 72 | """ 73 | Given y (possibly with padding), get distribution 74 | for final prediction at t+1 75 | prediction must be of size (batch_size, 1[, output_len], output_length) 76 | """ 77 | if type(self) is RecursiveRNN: 78 | output_len = 1 79 | else: 80 | output_len = self.decoding_steps 81 | single_view = 1, 1, output_len, self.output_dim 82 | batch_view = batch_size, 1, output_len, self.output_dim 83 | if pack: 84 | # need to handle uneven lengths 85 | final_dist = [] 86 | for i in range(batch_size): 87 | final_dist.append(y[i, lens[i]-1].view(single_view)) 88 | final_dist = torch.cat(final_dist).view(batch_view) 89 | else: 90 | final_dist = y[:, -1].contiguous().view(batch_view) 91 | return final_dist 92 | 93 | def forward(self, seq, glucose_dat, pred_len=0): 94 | raise NotImplementedError 95 | 96 | 97 | class RecursiveRNN(ForecastRNN): 98 | """ 99 | Designed to handle uneven batch sizes 100 | """ 101 | def __init__(self, input_dim, output_dim, hidden_size, depth, cuda): 102 | super(RecursiveRNN, self).__init__(input_dim=input_dim, 103 | output_dim=output_dim, 104 | hidden_size=hidden_size, 105 | depth=depth, 106 | cuda=cuda) 107 | self.output = nn.Linear(hidden_size, output_dim) 108 | if self.cuda: 109 | self.output = self.output.cuda() 110 | 111 | def _hidden_state_to_output(self, out_flat, batch_size, sequence_length): 112 | """ 113 | Given output from RNN layer, translate to output 114 | """ 115 | return self.sm(self.output(out_flat)).contiguous().view(batch_size, sequence_length, 1, self.output_dim) 116 | 117 | def forward(self, seq, glucose_dat, pred_len=0, **kwargs): 118 | """ 119 | pred_len is number of recursive forecasts to make 120 | Note: there is padding in form of -1, need to remove for 121 | accurate loss 122 | bins reverse probability predictions to real values 123 | 124 | returns: 125 | curr_dist: (batch_size, sequence_length-1, 1[output_len], output_dim) 126 | curr_pred: (batch_size, sequence_length-1, 1[pred_dim]) 127 | future_dist: (batch_size, 1[tiled preds], pred_len+1, output_dim) 128 | future_pred: (batch_size, 1[tiled preds], pred_len+1) 129 | """ 130 | pack, batch_size, sequence_length = self._get_sequence_info(seq) 131 | out_flat, h, lens = self._rnn_forward(seq, pack, batch_size) 132 | 133 | y = self._hidden_state_to_output(out_flat, batch_size, sequence_length) 134 | 135 | final_dist = self._extract_final_dist(pack, batch_size, y, lens) 136 | 137 | if y.data.shape[1] == 1: 138 | # only 1 input, no within series predictions 139 | curr_dist = None 140 | else: 141 | curr_dist = y[:, :-1] 142 | curr_pred = self._dist_to_bins(curr_dist) 143 | 144 | future_dist = [final_dist] 145 | 146 | future_pred = [self._dist_to_bins(future_dist[-1])] 147 | 148 | for i in range(pred_len): 149 | if self.cuda: 150 | pred_vals = glucose_dat.bins_to_values(future_pred[-1].data.cpu().numpy()) 151 | else: 152 | pred_vals = glucose_dat.bins_to_values(future_pred[-1].data.numpy()) 153 | out, h = self.rnn.forward(Variable(torch.from_numpy(pred_vals).type(self.float)), h) 154 | out_flat = out.contiguous().view(-1, self.rnn.hidden_size) 155 | y_f = self._hidden_state_to_output(out_flat, batch_size, 1) 156 | future_dist.append(y_f) 157 | future_pred.append(self._dist_to_bins(future_dist[-1])) 158 | return curr_dist, curr_pred, torch.cat(future_dist, dim=2), torch.cat(future_pred, dim=2) 159 | 160 | 161 | class MultiOutputRNN(ForecastRNN): 162 | """ 163 | Designed to handle uneven batch sizes 164 | """ 165 | def __init__(self, 166 | input_dim, 167 | output_dim, 168 | output_len, 169 | hidden_size, 170 | depth, 171 | cuda, 172 | autoregressive=False, 173 | sequence=False, 174 | polynomial=False, 175 | degree=2): 176 | super(MultiOutputRNN, self).__init__(input_dim=input_dim, 177 | output_dim=output_dim, 178 | hidden_size=hidden_size, 179 | depth=depth, 180 | output_len=output_len, 181 | cuda=cuda) 182 | self.ar = autoregressive 183 | self.seq = sequence 184 | self.polynomial = polynomial 185 | self.degree = degree 186 | if self.polynomial: 187 | self.decoding_steps = self.degree+1 188 | self.polyval_layer = nn.Linear(self.decoding_steps*output_dim, output_len*output_dim) 189 | else: 190 | self.decoding_steps = self.output_len 191 | if self.seq: 192 | self.decoder = nn.GRU(input_size=hidden_size, 193 | hidden_size=hidden_size, 194 | num_layers=1, 195 | dropout=False, 196 | bidirectional=False, 197 | batch_first=False) 198 | self.decoder.cuda() 199 | self.output = nn.Linear(hidden_size, output_dim) 200 | elif self.ar: 201 | output = [nn.Linear(hidden_size, output_dim)] 202 | for i in range(self.decoding_steps-1): 203 | output.append(nn.Linear(hidden_size + output_dim, output_dim)) 204 | self.output = nn.ModuleList(output) 205 | else: 206 | output = [nn.Linear(hidden_size, output_dim) for i in range(self.decoding_steps)] 207 | self.output = nn.ModuleList(output) 208 | if self.cuda: 209 | self.output = self.output.cuda() 210 | 211 | def _hidden_state_to_output(self, out_flat, batch_size, sequence_length): 212 | """ 213 | Given output from RNN layer, translate to output 214 | y has size (batch_size, sequence_length, output_len, output_dim) 215 | might want to change 216 | """ 217 | if self.seq: 218 | y = [] 219 | encoded = out_flat[None, :] 220 | hidden = Variable(torch.zeros(encoded.data.shape)).cuda() 221 | for i in range(self.decoding_steps): 222 | encoded, hidden = self.decoder(encoded, hidden) 223 | pred = self.sm(self.output(encoded[0])).contiguous() 224 | y.append(pred.view(batch_size, 225 | sequence_length, 226 | 1, 227 | self.output_dim)) 228 | return torch.cat(y, dim=2) 229 | else: 230 | y = [] 231 | for i in range(len(self.output)): 232 | if self.ar: 233 | if i == 0: 234 | pred = self.sm(self.output[0](out_flat)).contiguous() 235 | y.append(pred.view(batch_size, 236 | sequence_length, 237 | 1, 238 | self.output_dim)) 239 | else: 240 | fused_state = torch.cat((out_flat, pred), dim=1) 241 | pred = self.sm(self.output[i](fused_state)).contiguous() 242 | y.append(pred.view(batch_size, 243 | sequence_length, 244 | 1, 245 | self.output_dim)) 246 | else: 247 | y.append(self.sm(self.output[i](out_flat)).contiguous().view(batch_size, 248 | sequence_length, 249 | 1, 250 | self.output_dim)) 251 | return torch.cat(y, dim=2) 252 | 253 | def poly_to_val(self, poly): 254 | return poly 255 | 256 | def forward(self, seq, glucose_dat, **kwargs): 257 | """ 258 | prediction into future is based on output size 259 | Note: there is padding in form of -1, need to remove for 260 | accurate loss 261 | bins reverse probability predictions to real values 262 | """ 263 | pack, batch_size, sequence_length = self._get_sequence_info(seq) 264 | out_flat, h, lens = self._rnn_forward(seq, pack, batch_size) 265 | 266 | y = self._hidden_state_to_output(out_flat, batch_size, sequence_length) 267 | 268 | final_dist = self._extract_final_dist(pack, batch_size, y, lens) 269 | 270 | if y.data.shape[1] <= self.output_len: 271 | # curr_dist contains dists ENTIRELY within signal 272 | # note that this reduces training size 273 | curr_dist = None 274 | else: 275 | curr_dist = y[:, :-self.output_len] 276 | curr_pred = self._dist_to_bins(curr_dist) 277 | 278 | future_dist = [final_dist] 279 | future_pred = self._dist_to_bins(future_dist[-1]) 280 | if self.polynomial: 281 | curr_real_pred = self.poly_to_val(curr_pred) 282 | future_real_pred = self.poly_to_val(future_pred) 283 | return (curr_dist, 284 | curr_pred, 285 | torch.cat(future_dist, dim=0), 286 | future_pred) 287 | 288 | def sort_batch(batch_x, batch_y, batch_y_real, lens): 289 | """ 290 | Sorts minibatch by length in decreasing order 291 | to accomodate pack_padded_sequence 292 | """ 293 | dat_x, dat_y, dat_y_real, dat_l = batch_x.numpy(), batch_y.numpy(), batch_y_real.numpy(), lens.numpy() 294 | sort_x = dat_x[(dat_l*-1).argsort()] # -1 to get descending order 295 | sort_y = dat_y[(dat_l*-1).argsort()] 296 | sort_y_real = dat_y_real[(dat_l*-1).argsort()] 297 | sort_l = dat_l[(dat_l*-1).argsort()] 298 | return sort_x, sort_y, sort_y_real, sort_l 299 | 300 | 301 | def convert_batch(batch_x, batch_y, batch_y_real, batch_l, cuda, real_values=False): 302 | """ 303 | Given batches in numpy form, 304 | convert to proper type for model input 305 | """ 306 | if cuda: 307 | float_type = torch.cuda.FloatTensor 308 | long_type = torch.cuda.LongTensor 309 | else: 310 | float_type = torch.FloatTensor 311 | long_type = torch.LongTensor 312 | new_batch_x = Variable(torch.from_numpy(batch_x).type(float_type), requires_grad=False) 313 | if real_values: 314 | new_batch_y = Variable(torch.from_numpy(batch_y).type(float_type), requires_grad=False) 315 | new_batch_y_real = new_batch_y 316 | else: 317 | new_batch_y = Variable(torch.from_numpy(batch_y).type(long_type), requires_grad=False) 318 | new_batch_y_real = Variable(torch.from_numpy(batch_y_real).type(long_type), requires_grad=False) 319 | new_batch_l = list(batch_l) 320 | return new_batch_x, new_batch_y, new_batch_y_real, new_batch_l 321 | 322 | 323 | def remove_prediction_padding(prediction_distribution, 324 | target_value, 325 | loss_weight, 326 | target_real_value): 327 | """ 328 | Masks prediction for artificial targets and flattens 329 | """ 330 | # assuming target value will have all -1 or no -1 331 | missing_indicator = torch.min(target_value, dim=2)[0] != -1 332 | 333 | prediction_nopad = torch.masked_select( 334 | prediction_distribution, 335 | missing_indicator[:, :, None, None]).view(-1, prediction_distribution.shape[-1]) 336 | target_nopad = torch.masked_select( 337 | target_value, 338 | missing_indicator[:, :, None]) 339 | target_real_nopad = torch.masked_select( 340 | target_real_value, 341 | missing_indicator[:, :, None]) 342 | loss_weight_nopad = torch.masked_select( 343 | loss_weight, 344 | missing_indicator[:, :, None]) 345 | return prediction_nopad, target_nopad, target_real_nopad, loss_weight_nopad 346 | 347 | 348 | def remove_prediction_padding_old(prediction_distribution, 349 | target_value, 350 | loss_weight, 351 | target_real_value): 352 | """ 353 | Masks prediction for artificial targets 354 | """ 355 | prediction_distribution = prediction_distribution.contiguous().view(-1, 361) 356 | target_value = target_value.contiguous().view(-1) 357 | loss_weight = loss_weight.contiguous().view(-1) 358 | inter = (target_value != -1).view(-1, 1) 359 | mask = inter.expand(prediction_distribution.size(0), prediction_distribution.size(1)) 360 | ret = [prediction_distribution[mask].view(-1, prediction_distribution.size(1)), 361 | target_value[(target_value != -1)], 362 | None] 363 | if loss_weight is not None: 364 | ret.append(loss_weight[(target_value != -1)]) 365 | else: 366 | ret.append(None) 367 | return ret 368 | 369 | 370 | def get_loss(inp, 371 | out, 372 | out_real, 373 | lens, 374 | cuda, 375 | gn, 376 | glucose_dat, 377 | criterion, 378 | base=1, 379 | value_weight=0, 380 | value_ratio=0): 381 | """ 382 | Simple helper function that calculates model loss. 383 | Basically to save some space 384 | """ 385 | batch_size_val = inp.size(0) 386 | output_dim = gn.output_dim 387 | 388 | weight_vec = torch.Tensor([base ** i for i in reversed(range(out.size(-1)))]) 389 | weight_vec = (weight_vec/weight_vec.sum()) * weight_vec.numel() # consistent weighting on output length 390 | loss_weight = weight_vec.expand(out.shape) 391 | 392 | inp_s, out_s, out_real_s, lens_s = sort_batch(inp, out, out_real, lens) 393 | inp_s, out_s, out_real_s, lens_s = convert_batch(batch_x=inp_s, 394 | batch_y=out_s, 395 | batch_y_real=out_real_s, 396 | batch_l=lens_s, 397 | cuda=cuda, 398 | real_values=glucose_dat.real_values) 399 | x = nn.utils.rnn.pack_padded_sequence(inp_s.view(batch_size_val, 400 | glucose_dat.max_pad, 401 | 1), 402 | list(np.array(lens_s)), 403 | batch_first=True) 404 | if glucose_dat.real_values: 405 | yd_p, y_p, yd_f, y_f = gn(x, pred_len=0) 406 | y_p_flat = y_p.contiguous().view(-1, output_dim) 407 | (y_p_nopad, 408 | y_nopad, 409 | y_real_nopad, 410 | loss_weight_nopad) = remove_prediction_padding(prediction_distribution=y_p_flat, 411 | target_value=out_s.view(-1), 412 | loss_weight=Variable(loss_weight.cuda()), 413 | target_real_value=out_real_s) 414 | try: 415 | loss = criterion(y_p_nopad, y_nopad) 416 | except: 417 | print(type(y_nopad.data)) 418 | print(type(out_s.data)) 419 | print(type(out)) 420 | raise 421 | 422 | else: 423 | yd_p, y_p, yd_f, y_f = gn(x, glucose_dat, pred_len=out.shape[-1]) 424 | (yd_p_nopad, 425 | y_nopad, 426 | y_real_nopad, 427 | loss_weight_nopad) = remove_prediction_padding(prediction_distribution=yd_p, 428 | target_value=out_s, 429 | loss_weight=Variable(loss_weight.cuda()), 430 | target_real_value=out_real_s) 431 | if glucose_dat.polynomial: 432 | # include MSE 433 | real_criterion = torch.nn.MSELoss() 434 | coeffs = get_coeffs(yd_p_nopad.view(-1, len(glucose_dat.bins), yd_p_nopad.shape[-1]), glucose_dat.bins) 435 | real_values = coeffs_to_values(coeffs) 436 | loss_real = real_criterion(real_values.view(-1), y_real_nopad.float()) * value_weight 437 | loss_dist = criterion(yd_p_nopad, y_nopad) * loss_weight_nopad 438 | loss = (1-value_ratio) * loss_dist + value_ratio * loss_real 439 | if np.isnan(loss.data[0]): 440 | raise ValueError('Got NaN loss') 441 | else: 442 | loss = criterion(yd_p_nopad, y_nopad) * loss_weight_nopad 443 | if np.isnan(loss.data[0]): 444 | raise ValueError('Got NaN loss') 445 | if torch.min(y_nopad.data) == -1: 446 | print('trouble ahead') 447 | return loss.mean(), y_p 448 | 449 | 450 | def get_coeffs(dist, bins): 451 | prob = torch.exp(dist) 452 | bin_vals = Variable(torch.from_numpy(np.array(bins)).float().cuda()).expand_as(prob).transpose(1, 2) 453 | coeffs = torch.bmm(prob, bin_vals) # includes false off-diag coeffs 454 | real_coeffs = coeffs[torch.eye(len(bins)).expand_as(coeffs).byte().cuda()].view(-1, len(bins)) # extract diagonals 455 | return real_coeffs 456 | 457 | 458 | def coeffs_to_values(coeffs): 459 | degree = coeffs.shape[-1] 460 | basis = Variable(torch.stack([torch.arange(0, 6) ** i for i in range(degree)]).cuda()) 461 | return coeffs.view(-1, degree) @ basis 462 | 463 | 464 | def get_predictions(inp, 465 | out, 466 | lens, 467 | cuda, 468 | gn, 469 | glucose_dat): 470 | """ 471 | Gets predictions 472 | """ 473 | batch_size_val = inp.size(0) 474 | output_dim = gn.output_dim 475 | 476 | inp_s, out_s, lens_s = sort_batch(inp, out, lens) 477 | inp_s, out_s, lens_s = convert_batch(inp_s, 478 | out_s, 479 | lens_s, 480 | cuda, 481 | glucose_dat.real_values) 482 | x = nn.utils.rnn.pack_padded_sequence(inp_s.view(batch_size_val, 483 | glucose_dat.max_pad, 484 | 1), 485 | list(np.array(lens_s)), 486 | batch_first=True) 487 | if glucose_dat.real_values: 488 | yd_p, y_p, yd_f, y_f = gn(x, pred_len=0) 489 | y_p_flat = y_p.contiguous().view(-1, output_dim) 490 | y_p_nopad, y_nopad = remove_prediction_padding(y_p_flat, 491 | out_s.view(-1)) 492 | 493 | else: 494 | yd_p, y_p, yd_f, y_f = gn(x, glucose_dat, pred_len=out.shape[-1]) 495 | yd_p_flat = yd_p.contiguous().view(-1, output_dim) 496 | yd_p_nopad, y_nopad = remove_prediction_padding(yd_p_flat, 497 | out_s.view(-1)) 498 | return yd_p, y_p, yd_f, y_f 499 | 500 | 501 | def make_model(config): 502 | """ 503 | A poor man's factory method. 504 | """ 505 | if config.model_type == 'recursive': 506 | gn = RecursiveRNN(input_dim=config.input_dim, 507 | output_dim=config.output_dim, 508 | hidden_size=config.hidden_size, 509 | depth=config.depth, 510 | cuda=True) 511 | else: 512 | assert config.output_len == config.pred_len # could relax 513 | gn = MultiOutputRNN(input_dim=config.input_dim, 514 | output_dim=config.output_dim, 515 | hidden_size=config.hidden_size, 516 | output_len=config.output_len, 517 | depth=config.depth, 518 | cuda=True, 519 | autoregressive=config.autoregressive, 520 | sequence=config.sequence, 521 | polynomial=config.polynomial, 522 | degree=config.degree) 523 | return gn 524 | -------------------------------------------------------------------------------- /lib/.ipynb_checkpoints/trainer-checkpoint.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implements training schemes with logging 3 | """ 4 | import numpy as np 5 | import os 6 | import time 7 | import torch 8 | from torch.autograd import Variable 9 | from torch.utils.data import DataLoader, sampler 10 | from tensorboardX import SummaryWriter 11 | from tqdm import tqdm 12 | import joblib 13 | 14 | from forecast_code.lib.training import model as forecast_model 15 | 16 | 17 | class ExperimentTrainer: 18 | """ 19 | Simple training scheme 20 | """ 21 | 22 | def __init__(self, model, optimizer, criterion, name, model_dir, log_dir, 23 | load=False, load_epoch=None): 24 | """ 25 | :param model: initialized model for training 26 | :param optimizer: initialized training optimizer 27 | :param name: string to save trainer results under 28 | :param load: whether or not to load results from previous train if they exist 29 | :param epoch: which epoch results to load, if None then the best found 30 | """ 31 | self.model = model 32 | self.criterion = criterion 33 | self.optimizer = optimizer 34 | self.name = name 35 | self.model_dir = model_dir 36 | self.log_dir = log_dir 37 | 38 | if not os.path.exists(self.model_dir): 39 | os.makedirs(self.model_dir) 40 | os.makedirs(self.log_dir) 41 | else: 42 | if load: 43 | if load_epoch is None: 44 | self.model.load_state_dict(torch.load(os.path.join(self.model_dir, 'bsf_sup.pt'))) 45 | else: 46 | self.model.load_state_dict(torch.load(os.path.join(self.model_dir, '{}_sup.pt'.format(load_epoch)))) 47 | 48 | else: 49 | print('Warning: directory already exists') 50 | self.writer = SummaryWriter(log_dir=self.log_dir) 51 | 52 | def train_sup(self, epoch_lim, data, valid_data, early_stopping_lim, 53 | batch_size, num_workers, track_embeddings, validation_rate, loss_weight_base=1, 54 | value_weight=0, value_ratio=0): 55 | """ 56 | Training loop 57 | :param epoch_lim: total number of training epochs 58 | :param data: training data 59 | :param valid_data: validation data 60 | :param early_stopping_lim: Number of epochs to run without validation improvement before stopping 61 | if None, never stop early 62 | :param batch_size: training batch_size 63 | :param num_workers: number of CPU workers to use for data loading 64 | :param track_embeddings: Save out embedding information at end of run 65 | :param validation_rate: Check validation performance every validation_rate training epochs 66 | :param loss_weight_base: A constant between 0 and 1 used to interpolate between Single (=0) and Multi (=1) Step forecasting. 67 | :param value_weight: A constant multiplier for the real-value loss, set to 0 in the paper 68 | :param value_ratio: The proportion of loss used for the MSE loss term (as opposed for the cross-entropy loss), set to 0 in the paper 69 | :return loss array, model: 70 | """ 71 | if early_stopping_lim is None: 72 | early_stopping_lim = epoch_lim 73 | train_sampler = sampler.RandomSampler(np.arange(len(data))) 74 | data_train = DataLoader(data, 75 | batch_size=batch_size, 76 | sampler=train_sampler, 77 | drop_last=True) 78 | 79 | valid_sampler = sampler.SequentialSampler(np.arange(len(valid_data))) 80 | data_valid = DataLoader(valid_data, 81 | batch_size=batch_size, 82 | sampler=valid_sampler) 83 | step = 0 84 | 85 | bsf_loss = np.inf 86 | epochs_without_improvement = 0 87 | improvements = [] 88 | for epoch in range(epoch_lim): 89 | if epochs_without_improvement > early_stopping_lim: 90 | print('Exceeded early stopping limit, stopping') 91 | break 92 | if epoch % validation_rate == 0: 93 | valid_loss = self.validation(data_valid=data_valid, 94 | step=step, 95 | data=data, 96 | loss_weight_base=loss_weight_base, 97 | value_weight=value_weight, value_ratio=value_ratio) 98 | (bsf_loss, 99 | epochs_without_improvement, 100 | improvements) = self.manage_early_stopping(bsf_loss=bsf_loss, 101 | early_stopping_lim=early_stopping_lim, 102 | epochs_without_improvement=epochs_without_improvement, 103 | valid_loss=valid_loss, validation_rate=validation_rate, 104 | improvements=improvements) 105 | running_train_loss = 0 106 | for inp, out, out_real, lens in tqdm(data_train): 107 | loss, y_p = forecast_model.get_loss(inp=inp, 108 | out=out, 109 | lens=lens, 110 | cuda=True, 111 | gn=self.model, 112 | glucose_dat=data, 113 | criterion=self.criterion, 114 | base=loss_weight_base, 115 | out_real=out_real, 116 | value_weight=value_weight, 117 | value_ratio=value_ratio) 118 | step += 1 119 | running_train_loss += loss.data.cpu().numpy()[0] 120 | self.optimizer.zero_grad() 121 | loss.backward() 122 | self.optimizer.step() 123 | running_train_loss = running_train_loss/len(data_train) 124 | self.writer.add_scalar(tag='train_loss', 125 | scalar_value=running_train_loss, 126 | global_step=step) 127 | torch.save(self.model.state_dict(), '{}/final_sup.pt'.format(self.model_dir)) 128 | if track_embeddings: 129 | self.embed(data_valid, step, embed_batch=100) 130 | return improvements 131 | 132 | def manage_early_stopping(self, bsf_loss, early_stopping_lim, epochs_without_improvement, valid_loss, 133 | validation_rate, improvements): 134 | if valid_loss < bsf_loss: 135 | print('improved validation loss from {:.3f} to {:.3f}'.format(bsf_loss, valid_loss)) 136 | bsf_loss = valid_loss 137 | improvements.append(epochs_without_improvement) 138 | epochs_without_improvement = 0 139 | torch.save(self.model.state_dict(), 140 | '{}/bsf_sup.pt'.format(self.model_dir)) 141 | else: 142 | epochs_without_improvement += validation_rate 143 | print('Validation loss of {} did not improve on {}'.format(valid_loss, bsf_loss)) 144 | print('Early stopping at {}/{}'.format(epochs_without_improvement, early_stopping_lim)) 145 | return bsf_loss, epochs_without_improvement, improvements 146 | 147 | def validation(self, data_valid, step, data, loss_weight_base, value_weight, value_ratio): 148 | self.model.eval() 149 | running_valid_loss = 0 150 | for inp, out, out_real, lens in data_valid: 151 | loss, y_p = forecast_model.get_loss(inp=inp, 152 | out=out, 153 | lens=lens, 154 | cuda=True, 155 | gn=self.model, 156 | glucose_dat=data, 157 | criterion=self.criterion, 158 | base=loss_weight_base, 159 | out_real=out_real, 160 | value_weight=value_weight, 161 | value_ratio=value_ratio) 162 | step += 1 163 | running_valid_loss += loss.data.cpu().numpy()[0] 164 | running_valid_loss = running_valid_loss / len(data_valid) 165 | print('validation loss: {:.3f}'.format(running_valid_loss)) 166 | self.writer.add_scalar(tag='valid_total_loss', 167 | scalar_value=running_valid_loss, 168 | global_step=step) 169 | self.model.train() 170 | return running_valid_loss 171 | 172 | def embed(self, dataloader, step, embed_batch=5): 173 | print('embed') 174 | embeddings = None 175 | metadata = [] 176 | i = 0 177 | for dat, dat_past, dat_future, init, label in dataloader: 178 | x = Variable(dat.float().cuda()) 179 | e = self.model.embed(x).data 180 | metadata += np.round(label.numpy(), 2).tolist() 181 | if embeddings is None: 182 | embeddings = e 183 | else: 184 | embeddings = torch.cat((embeddings, e)) 185 | if i > embed_batch: 186 | break 187 | i += 1 188 | print(len(metadata)) 189 | self.writer.add_embedding(mat=embeddings, 190 | metadata=metadata, 191 | global_step=step) 192 | 193 | def get_predictions(self, dataloader): 194 | self.model.eval() 195 | data = None 196 | data_past = None 197 | data_future = None 198 | y = None 199 | pred_pres = None 200 | pred_past = None 201 | pred_future = None 202 | pred = None 203 | for dat, dat_past, dat_future, init, label in dataloader: 204 | print('evaluation batch') 205 | window_data = [] 206 | window_data_past = [] 207 | window_data_future = [] 208 | window_y = [] 209 | window_pred = [] 210 | window_pred_pres = [] 211 | window_pred_past = [] 212 | window_pred_future = [] 213 | if not self.window: 214 | dat = [dat] 215 | dat_past = [dat_past] 216 | dat_future = [dat_future] 217 | for window in range(len(dat)): 218 | x = Variable(dat[window].float().cuda()) 219 | y_pred, x_pres, x_past, x_future = self.model.forward(x) 220 | y_pred = y_pred.data.cpu().numpy() 221 | if self.decode_present: 222 | x_pres = x_pres.data.cpu().numpy() 223 | if self.decode_past: 224 | x_past = x_past.data.cpu().numpy() 225 | if self.decode_future: 226 | x_future = x_future.data.cpu().numpy() 227 | yt = label.numpy() 228 | xt_pres = dat[window].numpy() 229 | xt_past = dat_past[window].numpy() 230 | xt_future = dat_future[window].numpy() 231 | window_data.append(xt_pres) 232 | window_data_past.append(xt_past) 233 | window_data_future.append(xt_future) 234 | window_y.append(yt) 235 | window_pred.append(y_pred) 236 | window_pred_pres.append(x_pres) 237 | window_pred_past.append(x_past) 238 | window_pred_future.append(x_future) 239 | if data is None: 240 | data = [window_data] 241 | data_past = [window_data_past] 242 | data_future = [window_data_future] 243 | y = [window_y] 244 | pred_pres = [window_pred_pres] 245 | pred_past = [window_pred_past] 246 | pred_future = [window_pred_future] 247 | pred = [window_pred] 248 | else: 249 | data.append(window_data) 250 | data_past.append(window_data_past) 251 | data_future.append(window_data_future) 252 | y.append(window_y) 253 | if self.decode_present: 254 | pred_pres.append(window_pred_pres) 255 | if self.decode_past: 256 | pred_past.append(window_pred_past) 257 | if self.decode_future: 258 | pred_future.append(window_pred_future) 259 | pred.append(window_pred) 260 | print('done getting predictions') 261 | return (data, data_past, data_future, y, 262 | pred_pres, pred_past, pred_future, pred) 263 | -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igfox/multi-output-glucose-forecasting/27bdfbfa1bbe0816ebcd5808d9bc76dd767d6f61/lib/__init__.py -------------------------------------------------------------------------------- /lib/__pycache__/model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igfox/multi-output-glucose-forecasting/27bdfbfa1bbe0816ebcd5808d9bc76dd767d6f61/lib/__pycache__/model.cpython-36.pyc -------------------------------------------------------------------------------- /lib/glucose_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.polynomial import polynomial as pn 3 | import joblib 4 | from joblib import Parallel, delayed 5 | from torch.utils.data import Dataset 6 | 7 | 8 | class GlucoseDataset(Dataset): 9 | """ 10 | Blood glucose dataset for pytorch 11 | Entry: (data, y_bin, y_real, len) 12 | loading everything into memory as small 13 | """ 14 | def __init__(self, 15 | data_pkl, 16 | max_pad, 17 | output_len, 18 | output_dim, 19 | polynomial=False, 20 | degree=None, 21 | range_low=None, 22 | range_high=None, 23 | coeff_file=None, 24 | real_values=False, 25 | parallel_cache=False, 26 | max_size=None, 27 | flip_signal=False): 28 | 29 | # for one-hot encoding, assumes 40-400 mg/dL range 30 | self.output_dim = output_dim 31 | self.polynomial = polynomial 32 | self.degree = degree 33 | self.range_low = range_low 34 | self.range_high = range_high 35 | self.real_values = real_values 36 | self.max_pad = max_pad 37 | 38 | self.data = joblib.load(data_pkl) 39 | if flip_signal: 40 | self.data_flip = [] 41 | for i in range(len(self.data)): 42 | self.data_flip.append(np.flip(self.data[i], axis=0)) 43 | self.data = self.data_flip 44 | if max_size is not None: 45 | self.data = self.data[0:max_size] 46 | self.output_len = output_len 47 | 48 | if not self.real_values: 49 | if self.polynomial: 50 | # from degree, calculate ranges 51 | # from ranges, get bins 52 | # should make more flexible, function parameter binning 53 | # old hand-defined range, captured 100% variation 54 | #[[40, 400], 55 | # [-36, 36], 56 | # [-5.5, 5.5]] 57 | for var in [self.degree, self.range_low, self.range_high]: 58 | assert var is not None, 'Must set degree, range_high, and range_low for polynomial' 59 | 60 | if coeff_file is None: 61 | ranges = self.auto_poly_range() 62 | else: 63 | ranges = self.precomputed_poly_range(coeff_file) 64 | 65 | self.bin_step = [(ranges[i][1]-ranges[i][0])/(self.output_dim-1) for i in range(degree+1)] 66 | self.bins = [ 67 | np.linspace(ranges[i][0], 68 | ranges[i][1], 69 | self.output_dim) + (0.5 * self.bin_step[i]) 70 | for i in range(self.degree+1)] 71 | else: 72 | # simple value binning 73 | self.bin_step = (400-40)/(self.output_dim-1) 74 | # the half step appraoch is an artifact of wanting perfect bins with output_dim=361 75 | self.bins = np.linspace(40, 400, self.output_dim)+(self.bin_step * 0.5) 76 | 77 | # trying out precaching results for less intensive load 78 | count = 0 79 | self.x_out = [] 80 | self.y_out = [] 81 | self.y_real = [] 82 | self.lens = [] 83 | print('caching results') 84 | if parallel_cache: 85 | res_tuples = Parallel(n_jobs=5, verbose=10)(delayed(self.prepare_output)(idx) for idx in range(len(self.data))) 86 | for idx in range(len(self.data)): 87 | x_pad, y_pad, y_real_pad, lens = res_tuples[idx] 88 | self.x_out.append(x_pad) 89 | self.y_out.append(y_pad) 90 | self.y_real.append(y_real_pad) 91 | self.lens.append(lens) 92 | else: 93 | for idx in range(len(self.data)): 94 | if idx % 1000 == 0: 95 | print('{}/{}'.format(idx, len(self.data))) 96 | x_pad, y_pad, y_real_pad, lens = self.prepare_output(idx) 97 | self.x_out.append(x_pad) 98 | self.y_out.append(y_pad) 99 | self.y_real.append(y_real_pad) 100 | self.lens.append(lens) 101 | 102 | def prepare_output(self, idx, real_y=True): 103 | x_dat = self.data[idx] 104 | length = self.max_pad - len(x_dat) 105 | x_pad = np.pad(x_dat, 106 | (0, length), 107 | mode='constant', 108 | constant_values=-1) 109 | y_dat = self.window_stack(x_dat[1::].reshape(-1, 1)) 110 | if self.real_values: 111 | y_bins = y_dat 112 | else: 113 | y_bins = self.values_to_bins(y_dat) 114 | y_pad = np.pad(y_bins, 115 | ((0, length), (0, 0)), 116 | mode='constant', 117 | constant_values=-1) 118 | if real_y: 119 | y_real_pad = np.pad(y_dat, 120 | ((0, length), (0, 0)), 121 | mode='constant', 122 | constant_values=-1) 123 | return x_pad, y_pad, y_real_pad, self.max_pad - length 124 | else: 125 | return x_pad, y_pad, self.max_pad - length 126 | 127 | def auto_poly_range(self, percentile): 128 | """ 129 | Using degree and training data, creates 130 | range that captures percentile% of variation of the best fit 131 | coefficient values. 132 | """ 133 | raise NotImplementedError('TODO') 134 | 135 | def precomputed_poly_range(self, coeff_file): 136 | """ 137 | Simple function that uses precomputed coefficient 138 | percentile dict 139 | 140 | low, high can be integers in 0-100 141 | 142 | Requires precomputed coeff dict 143 | """ 144 | assert self.range_low < self.range_high 145 | 146 | coeff = joblib.load(coeff_file) 147 | 148 | ranges = [] 149 | for i in range(self.degree+1): 150 | low_val = coeff[self.degree][i][self.range_low] 151 | high_val = coeff[self.degree][i][self.range_high] 152 | ranges.append([low_val, high_val]) 153 | return ranges 154 | 155 | def scale(self, x): 156 | """ 157 | turn glucose signal with 40-400 to range -1 to 1 158 | can add more intelligent scaling for balencing hypo/hyper, 159 | though real concern is moving over to classification 160 | """ 161 | return (x-220)/180. 162 | 163 | def one_hot(self, seq): 164 | """ 165 | turn glucose signal into one hot distribution 166 | with size=output_dim, linearly bins glucose 167 | range 40-400 168 | don't need for NLLLoss 169 | """ 170 | dist = np.zeros((seq.size, self.output_dim)) 171 | dist[np.arange(seq.size), np.digitize(seq, self.bins)] = 1. 172 | return dist 173 | 174 | def polymerize(self, y): 175 | """ 176 | Turns output window into best fit polynomial 177 | with output [x'_0, ..., x'_d] where x' is 178 | bin number that x would be in (using ranges) 179 | """ 180 | x_inds = [] 181 | if len(y.shape) > 1: 182 | for j in range(y.shape[0]): 183 | coeffs = pn.polyfit(np.arange(len(y[j])), y[j], deg=self.degree) 184 | x_inds.append([np.digitize(coeffs[i], self.bins[i]).item() for i in range(self.degree+1)]) 185 | else: 186 | coeffs = pn.polyfit(np.arange(len(y)), y, deg=self.degree) 187 | for i in range(self.degree+1): 188 | x_inds.append(np.digitize(coeffs[i], self.bins[i]).item()) 189 | return np.clip(x_inds, 0, self.output_dim-1) 190 | 191 | def bins_to_coeff_values(self, pred): 192 | """ 193 | Given bins for polynomial coefficients, 194 | return estimate of real coefficient values 195 | """ 196 | if len(pred.shape) > 1: 197 | vals = [np.array([self.bins[i][np.clip(np.array(pred[:, i], dtype=int), 0, self.output_dim-1)]]) 198 | - (0.5 * self.bin_step[i]) for i in range(self.degree+1)] 199 | coeffs = np.concatenate(vals, axis=0).T 200 | else: 201 | coeffs = [self.bins[i][np.clip(np.array(pred[i], dtype=int), 0, self.output_dim-1)] 202 | - (0.5 * self.bin_step[i]) for i in range(self.degree+1)] 203 | return np.array(coeffs) 204 | 205 | def reverse_polymerize(self, pred): 206 | """ 207 | Given bins for polynomial coefficients, returns forecast 208 | For new foreacsting system, flexible degree and doesn't assume 209 | adding mistake 210 | """ 211 | coeffs = self.bins_to_coeff_values(pred) 212 | return pn.polyval(np.arange(self.output_len), coeffs.T) 213 | 214 | def values_to_bins(self, y): 215 | """ 216 | Gvien a y sample (or batch of y samples), changes from 217 | value to categorical representation 218 | """ 219 | if self.real_values: 220 | return y 221 | if self.polynomial: 222 | return self.polymerize(y) 223 | else: 224 | return np.digitize(y, self.bins) 225 | 226 | def bins_to_values(self, y): 227 | """ 228 | Given a y sample (or batch of y samples), changes from categorical 229 | to value representation 230 | """ 231 | if type(y) is not np.ndarray: 232 | y = y.numpy() 233 | if self.real_values: 234 | return y 235 | if self.polynomial: 236 | return self.reverse_polymerize(y) 237 | else: 238 | vals = self.bins[np.clip(np.array(y, dtype=int), 0, self.output_dim-1)] 239 | return vals - (0.5 * self.bin_step) 240 | 241 | def index_to_values(self, x, i): 242 | """ 243 | Given i index for output value: y[i] 244 | returns ground truth x values 245 | bins_to_values can also be used, but ignores 246 | polynomial residual 247 | """ 248 | return x[i+1:i+1+self.output_len] 249 | 250 | def window_stack(self, seq, stepsize=1): 251 | """ 252 | Gets rolling window from seq of length self.output_len 253 | stepsize determines dilation 254 | """ 255 | length = self.output_len 256 | n = seq.shape[0] 257 | return np.hstack(seq[i:1+n+i-length:stepsize] for i in range(length)) 258 | 259 | def __len__(self): 260 | return len(self.data) 261 | 262 | def __getitem__(self, idx): 263 | return self.x_out[idx], self.y_out[idx], self.y_real[idx], self.lens[idx] 264 | -------------------------------------------------------------------------------- /lib/model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import joblib 3 | 4 | import torch 5 | from torch import nn 6 | from torch.autograd import Variable 7 | 8 | 9 | class ForecastRNN(nn.Module): 10 | """ 11 | Helper for pytorch reimplementation 12 | Uses variable sized/depth GRU with linear layer to get output right 13 | """ 14 | def __init__(self, input_dim, output_dim, hidden_size, depth, output_len=-1, cuda=False): 15 | super(ForecastRNN, self).__init__() 16 | self.cuda = cuda 17 | self.rnn = nn.GRU(input_size=input_dim, 18 | hidden_size=hidden_size, 19 | num_layers=depth, 20 | dropout=False, 21 | bidirectional=False, # would bidirectional help forecasting? 22 | batch_first=True) 23 | self.sm = nn.LogSoftmax(dim=1) 24 | self.input_dim = input_dim 25 | self.output_dim = output_dim 26 | self.output_len = output_len 27 | if self.cuda: 28 | self.rnn = self.rnn.cuda() 29 | self.sm = self.sm.cuda() 30 | self.float = torch.cuda.FloatTensor # not sure I need this 31 | else: 32 | self.float = torch.FloatTensor 33 | 34 | @staticmethod 35 | def _dist_to_bins(dist): 36 | return torch.max(dist, dim=-1)[1] 37 | 38 | @staticmethod 39 | def _get_sequence_info(seq): 40 | """ 41 | gets info on fed sequence 42 | """ 43 | if type(seq) == torch.nn.utils.rnn.PackedSequence: 44 | pack = True 45 | batch_size = seq.batch_sizes[0] 46 | sequence_length = len(seq.batch_sizes) 47 | else: 48 | pack = False 49 | batch_size = seq.size(0) 50 | sequence_length = seq.size(1) 51 | return pack, batch_size, sequence_length 52 | 53 | def _rnn_forward(self, seq, pack, batch_size): 54 | """ 55 | Helper function for forward that computes up to output layer 56 | """ 57 | h = Variable(torch.zeros(self.rnn.num_layers, 58 | batch_size, # not sure if need to reshape for batch_first 59 | self.rnn.hidden_size).type(self.float), 60 | requires_grad=False) 61 | # predict within the sequence 62 | out, h = self.rnn.forward(seq, h) 63 | if pack: 64 | out, lens = nn.utils.rnn.pad_packed_sequence(out, batch_first=True, padding_value=-1) 65 | else: 66 | lens = None 67 | # out has dim (batch_size, sequence_length, hidden_size) 68 | out_flat = out.contiguous().view(-1, self.rnn.hidden_size) 69 | return out_flat, h, lens 70 | 71 | def _extract_final_dist(self, pack, batch_size, y, lens): 72 | """ 73 | Given y (possibly with padding), get distribution 74 | for final prediction at t+1 75 | prediction must be of size (batch_size, 1[, output_len], output_length) 76 | """ 77 | if type(self) is RecursiveRNN: 78 | output_len = 1 79 | else: 80 | output_len = self.decoding_steps 81 | single_view = 1, 1, output_len, self.output_dim 82 | batch_view = batch_size, 1, output_len, self.output_dim 83 | if pack: 84 | # need to handle uneven lengths 85 | final_dist = [] 86 | for i in range(batch_size): 87 | final_dist.append(y[i, lens[i]-1].view(single_view)) 88 | final_dist = torch.cat(final_dist).view(batch_view) 89 | else: 90 | final_dist = y[:, -1].contiguous().view(batch_view) 91 | return final_dist 92 | 93 | def forward(self, seq, glucose_dat, pred_len=0): 94 | raise NotImplementedError 95 | 96 | 97 | class RecursiveRNN(ForecastRNN): 98 | """ 99 | Designed to handle uneven batch sizes 100 | """ 101 | def __init__(self, input_dim, output_dim, hidden_size, depth, cuda): 102 | super(RecursiveRNN, self).__init__(input_dim=input_dim, 103 | output_dim=output_dim, 104 | hidden_size=hidden_size, 105 | depth=depth, 106 | cuda=cuda) 107 | self.output = nn.Linear(hidden_size, output_dim) 108 | if self.cuda: 109 | self.output = self.output.cuda() 110 | 111 | def _hidden_state_to_output(self, out_flat, batch_size, sequence_length): 112 | """ 113 | Given output from RNN layer, translate to output 114 | """ 115 | return self.sm(self.output(out_flat)).contiguous().view(batch_size, sequence_length, 1, self.output_dim) 116 | 117 | def forward(self, seq, glucose_dat, pred_len=0, **kwargs): 118 | """ 119 | pred_len is number of recursive forecasts to make 120 | Note: there is padding in form of -1, need to remove for 121 | accurate loss 122 | bins reverse probability predictions to real values 123 | 124 | returns: 125 | curr_dist: (batch_size, sequence_length-1, 1[output_len], output_dim) 126 | curr_pred: (batch_size, sequence_length-1, 1[pred_dim]) 127 | future_dist: (batch_size, 1[tiled preds], pred_len+1, output_dim) 128 | future_pred: (batch_size, 1[tiled preds], pred_len+1) 129 | """ 130 | pack, batch_size, sequence_length = self._get_sequence_info(seq) 131 | out_flat, h, lens = self._rnn_forward(seq, pack, batch_size) 132 | 133 | y = self._hidden_state_to_output(out_flat, batch_size, sequence_length) 134 | 135 | final_dist = self._extract_final_dist(pack, batch_size, y, lens) 136 | 137 | if y.data.shape[1] == 1: 138 | # only 1 input, no within series predictions 139 | curr_dist = None 140 | else: 141 | curr_dist = y[:, :-1] 142 | curr_pred = self._dist_to_bins(curr_dist) 143 | 144 | future_dist = [final_dist] 145 | 146 | future_pred = [self._dist_to_bins(future_dist[-1])] 147 | 148 | for i in range(pred_len): 149 | if self.cuda: 150 | pred_vals = glucose_dat.bins_to_values(future_pred[-1].data.cpu().numpy()) 151 | else: 152 | pred_vals = glucose_dat.bins_to_values(future_pred[-1].data.numpy()) 153 | out, h = self.rnn.forward(Variable(torch.from_numpy(pred_vals).type(self.float)), h) 154 | out_flat = out.contiguous().view(-1, self.rnn.hidden_size) 155 | y_f = self._hidden_state_to_output(out_flat, batch_size, 1) 156 | future_dist.append(y_f) 157 | future_pred.append(self._dist_to_bins(future_dist[-1])) 158 | return curr_dist, curr_pred, torch.cat(future_dist, dim=2), torch.cat(future_pred, dim=2) 159 | 160 | 161 | class MultiOutputRNN(ForecastRNN): 162 | """ 163 | Designed to handle uneven batch sizes 164 | """ 165 | def __init__(self, 166 | input_dim, 167 | output_dim, 168 | output_len, 169 | hidden_size, 170 | depth, 171 | cuda, 172 | autoregressive=False, 173 | sequence=False, 174 | polynomial=False, 175 | degree=2): 176 | super(MultiOutputRNN, self).__init__(input_dim=input_dim, 177 | output_dim=output_dim, 178 | hidden_size=hidden_size, 179 | depth=depth, 180 | output_len=output_len, 181 | cuda=cuda) 182 | self.ar = autoregressive 183 | self.seq = sequence 184 | self.polynomial = polynomial 185 | self.degree = degree 186 | if self.polynomial: 187 | self.decoding_steps = self.degree+1 188 | self.polyval_layer = nn.Linear(self.decoding_steps*output_dim, output_len*output_dim) 189 | else: 190 | self.decoding_steps = self.output_len 191 | if self.seq: 192 | self.decoder = nn.GRU(input_size=hidden_size, 193 | hidden_size=hidden_size, 194 | num_layers=1, 195 | dropout=False, 196 | bidirectional=False, 197 | batch_first=False) 198 | self.decoder.cuda() 199 | self.output = nn.Linear(hidden_size, output_dim) 200 | elif self.ar: 201 | output = [nn.Linear(hidden_size, output_dim)] 202 | for i in range(self.decoding_steps-1): 203 | output.append(nn.Linear(hidden_size + output_dim, output_dim)) 204 | self.output = nn.ModuleList(output) 205 | else: 206 | output = [nn.Linear(hidden_size, output_dim) for i in range(self.decoding_steps)] 207 | self.output = nn.ModuleList(output) 208 | if self.cuda: 209 | self.output = self.output.cuda() 210 | 211 | def _hidden_state_to_output(self, out_flat, batch_size, sequence_length): 212 | """ 213 | Given output from RNN layer, translate to output 214 | y has size (batch_size, sequence_length, output_len, output_dim) 215 | might want to change 216 | """ 217 | if self.seq: 218 | y = [] 219 | encoded = out_flat[None, :] 220 | hidden = Variable(torch.zeros(encoded.data.shape)).cuda() 221 | for i in range(self.decoding_steps): 222 | encoded, hidden = self.decoder(encoded, hidden) 223 | pred = self.sm(self.output(encoded[0])).contiguous() 224 | y.append(pred.view(batch_size, 225 | sequence_length, 226 | 1, 227 | self.output_dim)) 228 | return torch.cat(y, dim=2) 229 | else: 230 | y = [] 231 | for i in range(len(self.output)): 232 | if self.ar: 233 | if i == 0: 234 | pred = self.sm(self.output[0](out_flat)).contiguous() 235 | y.append(pred.view(batch_size, 236 | sequence_length, 237 | 1, 238 | self.output_dim)) 239 | else: 240 | fused_state = torch.cat((out_flat, pred), dim=1) 241 | pred = self.sm(self.output[i](fused_state)).contiguous() 242 | y.append(pred.view(batch_size, 243 | sequence_length, 244 | 1, 245 | self.output_dim)) 246 | else: 247 | y.append(self.sm(self.output[i](out_flat)).contiguous().view(batch_size, 248 | sequence_length, 249 | 1, 250 | self.output_dim)) 251 | return torch.cat(y, dim=2) 252 | 253 | def poly_to_val(self, poly): 254 | return poly 255 | 256 | def forward(self, seq, glucose_dat, **kwargs): 257 | """ 258 | prediction into future is based on output size 259 | Note: there is padding in form of -1, need to remove for 260 | accurate loss 261 | bins reverse probability predictions to real values 262 | """ 263 | pack, batch_size, sequence_length = self._get_sequence_info(seq) 264 | out_flat, h, lens = self._rnn_forward(seq, pack, batch_size) 265 | 266 | y = self._hidden_state_to_output(out_flat, batch_size, sequence_length) 267 | 268 | final_dist = self._extract_final_dist(pack, batch_size, y, lens) 269 | 270 | if y.data.shape[1] <= self.output_len: 271 | # curr_dist contains dists ENTIRELY within signal 272 | # note that this reduces training size 273 | curr_dist = None 274 | else: 275 | curr_dist = y[:, :-self.output_len] 276 | curr_pred = self._dist_to_bins(curr_dist) 277 | 278 | future_dist = [final_dist] 279 | future_pred = self._dist_to_bins(future_dist[-1]) 280 | if self.polynomial: 281 | curr_real_pred = self.poly_to_val(curr_pred) 282 | future_real_pred = self.poly_to_val(future_pred) 283 | return (curr_dist, 284 | curr_pred, 285 | torch.cat(future_dist, dim=0), 286 | future_pred) 287 | 288 | def sort_batch(batch_x, batch_y, batch_y_real, lens): 289 | """ 290 | Sorts minibatch by length in decreasing order 291 | to accomodate pack_padded_sequence 292 | """ 293 | dat_x, dat_y, dat_y_real, dat_l = batch_x.numpy(), batch_y.numpy(), batch_y_real.numpy(), lens.numpy() 294 | sort_x = dat_x[(dat_l*-1).argsort()] # -1 to get descending order 295 | sort_y = dat_y[(dat_l*-1).argsort()] 296 | sort_y_real = dat_y_real[(dat_l*-1).argsort()] 297 | sort_l = dat_l[(dat_l*-1).argsort()] 298 | return sort_x, sort_y, sort_y_real, sort_l 299 | 300 | 301 | def convert_batch(batch_x, batch_y, batch_y_real, batch_l, cuda, real_values=False): 302 | """ 303 | Given batches in numpy form, 304 | convert to proper type for model input 305 | """ 306 | if cuda: 307 | float_type = torch.cuda.FloatTensor 308 | long_type = torch.cuda.LongTensor 309 | else: 310 | float_type = torch.FloatTensor 311 | long_type = torch.LongTensor 312 | new_batch_x = Variable(torch.from_numpy(batch_x).type(float_type), requires_grad=False) 313 | if real_values: 314 | new_batch_y = Variable(torch.from_numpy(batch_y).type(float_type), requires_grad=False) 315 | new_batch_y_real = new_batch_y 316 | else: 317 | new_batch_y = Variable(torch.from_numpy(batch_y).type(long_type), requires_grad=False) 318 | new_batch_y_real = Variable(torch.from_numpy(batch_y_real).type(long_type), requires_grad=False) 319 | new_batch_l = list(batch_l) 320 | return new_batch_x, new_batch_y, new_batch_y_real, new_batch_l 321 | 322 | 323 | def remove_prediction_padding(prediction_distribution, 324 | target_value, 325 | loss_weight, 326 | target_real_value): 327 | """ 328 | Masks prediction for artificial targets and flattens 329 | """ 330 | # assuming target value will have all -1 or no -1 331 | missing_indicator = torch.min(target_value, dim=2)[0] != -1 332 | 333 | prediction_nopad = torch.masked_select( 334 | prediction_distribution, 335 | missing_indicator[:, :, None, None]).view(-1, prediction_distribution.shape[-1]) 336 | target_nopad = torch.masked_select( 337 | target_value, 338 | missing_indicator[:, :, None]) 339 | target_real_nopad = torch.masked_select( 340 | target_real_value, 341 | missing_indicator[:, :, None]) 342 | loss_weight_nopad = torch.masked_select( 343 | loss_weight, 344 | missing_indicator[:, :, None]) 345 | return prediction_nopad, target_nopad, target_real_nopad, loss_weight_nopad 346 | 347 | 348 | def remove_prediction_padding_old(prediction_distribution, 349 | target_value, 350 | loss_weight, 351 | target_real_value): 352 | """ 353 | Masks prediction for artificial targets 354 | """ 355 | prediction_distribution = prediction_distribution.contiguous().view(-1, 361) 356 | target_value = target_value.contiguous().view(-1) 357 | loss_weight = loss_weight.contiguous().view(-1) 358 | inter = (target_value != -1).view(-1, 1) 359 | mask = inter.expand(prediction_distribution.size(0), prediction_distribution.size(1)) 360 | ret = [prediction_distribution[mask].view(-1, prediction_distribution.size(1)), 361 | target_value[(target_value != -1)], 362 | None] 363 | if loss_weight is not None: 364 | ret.append(loss_weight[(target_value != -1)]) 365 | else: 366 | ret.append(None) 367 | return ret 368 | 369 | 370 | def get_loss(inp, 371 | out, 372 | out_real, 373 | lens, 374 | cuda, 375 | gn, 376 | glucose_dat, 377 | criterion, 378 | base=1, 379 | value_weight=0, 380 | value_ratio=0): 381 | """ 382 | Simple helper function that calculates model loss. 383 | Basically to save some space 384 | """ 385 | batch_size_val = inp.size(0) 386 | output_dim = gn.output_dim 387 | 388 | weight_vec = torch.Tensor([base ** i for i in reversed(range(out.size(-1)))]) 389 | weight_vec = (weight_vec/weight_vec.sum()) * weight_vec.numel() # consistent weighting on output length 390 | loss_weight = weight_vec.expand(out.shape) 391 | 392 | inp_s, out_s, out_real_s, lens_s = sort_batch(inp, out, out_real, lens) 393 | inp_s, out_s, out_real_s, lens_s = convert_batch(batch_x=inp_s, 394 | batch_y=out_s, 395 | batch_y_real=out_real_s, 396 | batch_l=lens_s, 397 | cuda=cuda, 398 | real_values=glucose_dat.real_values) 399 | x = nn.utils.rnn.pack_padded_sequence(inp_s.view(batch_size_val, 400 | glucose_dat.max_pad, 401 | 1), 402 | list(np.array(lens_s)), 403 | batch_first=True) 404 | if glucose_dat.real_values: 405 | yd_p, y_p, yd_f, y_f = gn(x, pred_len=0) 406 | y_p_flat = y_p.contiguous().view(-1, output_dim) 407 | (y_p_nopad, 408 | y_nopad, 409 | y_real_nopad, 410 | loss_weight_nopad) = remove_prediction_padding(prediction_distribution=y_p_flat, 411 | target_value=out_s.view(-1), 412 | loss_weight=Variable(loss_weight.cuda()), 413 | target_real_value=out_real_s) 414 | try: 415 | loss = criterion(y_p_nopad, y_nopad) 416 | except: 417 | print(type(y_nopad.data)) 418 | print(type(out_s.data)) 419 | print(type(out)) 420 | raise 421 | 422 | else: 423 | yd_p, y_p, yd_f, y_f = gn(x, glucose_dat, pred_len=out.shape[-1]) 424 | (yd_p_nopad, 425 | y_nopad, 426 | y_real_nopad, 427 | loss_weight_nopad) = remove_prediction_padding(prediction_distribution=yd_p, 428 | target_value=out_s, 429 | loss_weight=Variable(loss_weight.cuda()), 430 | target_real_value=out_real_s) 431 | if glucose_dat.polynomial: 432 | # include MSE 433 | real_criterion = torch.nn.MSELoss() 434 | coeffs = get_coeffs(yd_p_nopad.view(-1, len(glucose_dat.bins), yd_p_nopad.shape[-1]), glucose_dat.bins) 435 | real_values = coeffs_to_values(coeffs) 436 | loss_real = real_criterion(real_values.view(-1), y_real_nopad.float()) * value_weight 437 | loss_dist = criterion(yd_p_nopad, y_nopad) * loss_weight_nopad 438 | loss = (1-value_ratio) * loss_dist + value_ratio * loss_real 439 | if np.isnan(loss.data[0]): 440 | raise ValueError('Got NaN loss') 441 | else: 442 | loss = criterion(yd_p_nopad, y_nopad) * loss_weight_nopad 443 | if np.isnan(loss.data[0]): 444 | raise ValueError('Got NaN loss') 445 | if torch.min(y_nopad.data) == -1: 446 | print('trouble ahead') 447 | return loss.mean(), y_p 448 | 449 | 450 | def get_coeffs(dist, bins): 451 | prob = torch.exp(dist) 452 | bin_vals = Variable(torch.from_numpy(np.array(bins)).float().cuda()).expand_as(prob).transpose(1, 2) 453 | coeffs = torch.bmm(prob, bin_vals) # includes false off-diag coeffs 454 | real_coeffs = coeffs[torch.eye(len(bins)).expand_as(coeffs).byte().cuda()].view(-1, len(bins)) # extract diagonals 455 | return real_coeffs 456 | 457 | 458 | def coeffs_to_values(coeffs): 459 | degree = coeffs.shape[-1] 460 | basis = Variable(torch.stack([torch.arange(0, 6) ** i for i in range(degree)]).cuda()) 461 | return coeffs.view(-1, degree) @ basis 462 | 463 | 464 | def get_predictions(inp, 465 | out, 466 | lens, 467 | cuda, 468 | gn, 469 | glucose_dat): 470 | """ 471 | Gets predictions 472 | """ 473 | batch_size_val = inp.size(0) 474 | output_dim = gn.output_dim 475 | 476 | inp_s, out_s, lens_s = sort_batch(inp, out, lens) 477 | inp_s, out_s, lens_s = convert_batch(inp_s, 478 | out_s, 479 | lens_s, 480 | cuda, 481 | glucose_dat.real_values) 482 | x = nn.utils.rnn.pack_padded_sequence(inp_s.view(batch_size_val, 483 | glucose_dat.max_pad, 484 | 1), 485 | list(np.array(lens_s)), 486 | batch_first=True) 487 | if glucose_dat.real_values: 488 | yd_p, y_p, yd_f, y_f = gn(x, pred_len=0) 489 | y_p_flat = y_p.contiguous().view(-1, output_dim) 490 | y_p_nopad, y_nopad = remove_prediction_padding(y_p_flat, 491 | out_s.view(-1)) 492 | 493 | else: 494 | yd_p, y_p, yd_f, y_f = gn(x, glucose_dat, pred_len=out.shape[-1]) 495 | yd_p_flat = yd_p.contiguous().view(-1, output_dim) 496 | yd_p_nopad, y_nopad = remove_prediction_padding(yd_p_flat, 497 | out_s.view(-1)) 498 | return yd_p, y_p, yd_f, y_f 499 | 500 | 501 | def make_model(config): 502 | """ 503 | A poor man's factory method. 504 | """ 505 | if config.model_type == 'recursive': 506 | gn = RecursiveRNN(input_dim=config.input_dim, 507 | output_dim=config.output_dim, 508 | hidden_size=config.hidden_size, 509 | depth=config.depth, 510 | cuda=True) 511 | else: 512 | assert config.output_len == config.pred_len # could relax 513 | gn = MultiOutputRNN(input_dim=config.input_dim, 514 | output_dim=config.output_dim, 515 | hidden_size=config.hidden_size, 516 | output_len=config.output_len, 517 | depth=config.depth, 518 | cuda=True, 519 | autoregressive=config.autoregressive, 520 | sequence=config.sequence, 521 | polynomial=config.polynomial, 522 | degree=config.degree) 523 | return gn 524 | -------------------------------------------------------------------------------- /lib/trainer.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implements training schemes with logging 3 | """ 4 | import numpy as np 5 | import os 6 | import time 7 | import torch 8 | from torch.autograd import Variable 9 | from torch.utils.data import DataLoader, sampler 10 | from tensorboardX import SummaryWriter 11 | from tqdm import tqdm 12 | import joblib 13 | 14 | import model as forecast_model 15 | 16 | 17 | class ExperimentTrainer: 18 | """ 19 | Simple training scheme 20 | """ 21 | 22 | def __init__(self, model, optimizer, criterion, name, model_dir, log_dir, 23 | load=False, load_epoch=None): 24 | """ 25 | :param model: initialized model for training 26 | :param optimizer: initialized training optimizer 27 | :param name: string to save trainer results under 28 | :param load: whether or not to load results from previous train if they exist 29 | :param epoch: which epoch results to load, if None then the best found 30 | """ 31 | self.model = model 32 | self.criterion = criterion 33 | self.optimizer = optimizer 34 | self.name = name 35 | self.model_dir = model_dir 36 | self.log_dir = log_dir 37 | 38 | if not os.path.exists(self.model_dir): 39 | os.makedirs(self.model_dir) 40 | os.makedirs(self.log_dir) 41 | else: 42 | if load: 43 | if load_epoch is None: 44 | self.model.load_state_dict(torch.load(os.path.join(self.model_dir, 'bsf_sup.pt'))) 45 | else: 46 | self.model.load_state_dict(torch.load(os.path.join(self.model_dir, '{}_sup.pt'.format(load_epoch)))) 47 | 48 | else: 49 | print('Warning: directory already exists') 50 | self.writer = SummaryWriter(log_dir=self.log_dir) 51 | 52 | def train_sup(self, epoch_lim, data, valid_data, early_stopping_lim, 53 | batch_size, num_workers, track_embeddings, validation_rate, loss_weight_base=1, 54 | value_weight=0, value_ratio=0): 55 | """ 56 | Training loop 57 | :param epoch_lim: total number of training epochs 58 | :param data: training data 59 | :param valid_data: validation data 60 | :param early_stopping_lim: Number of epochs to run without validation improvement before stopping 61 | if None, never stop early 62 | :param batch_size: training batch_size 63 | :param num_workers: number of CPU workers to use for data loading 64 | :param track_embeddings: Save out embedding information at end of run 65 | :param validation_rate: Check validation performance every validation_rate training epochs 66 | :param loss_weight_base: A constant between 0 and 1 used to interpolate between Single (=0) and Multi (=1) Step forecasting. 67 | :param value_weight: A constant multiplier for the real-value loss, set to 0 in the paper 68 | :param value_ratio: The proportion of loss used for the MSE loss term (as opposed for the cross-entropy loss), set to 0 in the paper 69 | :return loss array, model: 70 | """ 71 | if early_stopping_lim is None: 72 | early_stopping_lim = epoch_lim 73 | train_sampler = sampler.RandomSampler(np.arange(len(data))) 74 | data_train = DataLoader(data, 75 | batch_size=batch_size, 76 | sampler=train_sampler, 77 | drop_last=True) 78 | 79 | valid_sampler = sampler.SequentialSampler(np.arange(len(valid_data))) 80 | data_valid = DataLoader(valid_data, 81 | batch_size=batch_size, 82 | sampler=valid_sampler) 83 | step = 0 84 | 85 | bsf_loss = np.inf 86 | epochs_without_improvement = 0 87 | improvements = [] 88 | for epoch in range(epoch_lim): 89 | if epochs_without_improvement > early_stopping_lim: 90 | print('Exceeded early stopping limit, stopping') 91 | break 92 | if epoch % validation_rate == 0: 93 | valid_loss = self.validation(data_valid=data_valid, 94 | step=step, 95 | data=data, 96 | loss_weight_base=loss_weight_base, 97 | value_weight=value_weight, value_ratio=value_ratio) 98 | (bsf_loss, 99 | epochs_without_improvement, 100 | improvements) = self.manage_early_stopping(bsf_loss=bsf_loss, 101 | early_stopping_lim=early_stopping_lim, 102 | epochs_without_improvement=epochs_without_improvement, 103 | valid_loss=valid_loss, validation_rate=validation_rate, 104 | improvements=improvements) 105 | running_train_loss = 0 106 | for inp, out, out_real, lens in tqdm(data_train): 107 | loss, y_p = forecast_model.get_loss(inp=inp, 108 | out=out, 109 | lens=lens, 110 | cuda=True, 111 | gn=self.model, 112 | glucose_dat=data, 113 | criterion=self.criterion, 114 | base=loss_weight_base, 115 | out_real=out_real, 116 | value_weight=value_weight, 117 | value_ratio=value_ratio) 118 | step += 1 119 | running_train_loss += loss.data.cpu().numpy()[0] 120 | self.optimizer.zero_grad() 121 | loss.backward() 122 | self.optimizer.step() 123 | running_train_loss = running_train_loss/len(data_train) 124 | self.writer.add_scalar(tag='train_loss', 125 | scalar_value=running_train_loss, 126 | global_step=step) 127 | torch.save(self.model.state_dict(), '{}/final_sup.pt'.format(self.model_dir)) 128 | if track_embeddings: 129 | self.embed(data_valid, step, embed_batch=100) 130 | return improvements 131 | 132 | def manage_early_stopping(self, bsf_loss, early_stopping_lim, epochs_without_improvement, valid_loss, 133 | validation_rate, improvements): 134 | if valid_loss < bsf_loss: 135 | print('improved validation loss from {:.3f} to {:.3f}'.format(bsf_loss, valid_loss)) 136 | bsf_loss = valid_loss 137 | improvements.append(epochs_without_improvement) 138 | epochs_without_improvement = 0 139 | torch.save(self.model.state_dict(), 140 | '{}/bsf_sup.pt'.format(self.model_dir)) 141 | else: 142 | epochs_without_improvement += validation_rate 143 | print('Validation loss of {} did not improve on {}'.format(valid_loss, bsf_loss)) 144 | print('Early stopping at {}/{}'.format(epochs_without_improvement, early_stopping_lim)) 145 | return bsf_loss, epochs_without_improvement, improvements 146 | 147 | def validation(self, data_valid, step, data, loss_weight_base, value_weight, value_ratio): 148 | self.model.eval() 149 | running_valid_loss = 0 150 | for inp, out, out_real, lens in data_valid: 151 | loss, y_p = forecast_model.get_loss(inp=inp, 152 | out=out, 153 | lens=lens, 154 | cuda=True, 155 | gn=self.model, 156 | glucose_dat=data, 157 | criterion=self.criterion, 158 | base=loss_weight_base, 159 | out_real=out_real, 160 | value_weight=value_weight, 161 | value_ratio=value_ratio) 162 | step += 1 163 | running_valid_loss += loss.data.cpu().numpy()[0] 164 | running_valid_loss = running_valid_loss / len(data_valid) 165 | print('validation loss: {:.3f}'.format(running_valid_loss)) 166 | self.writer.add_scalar(tag='valid_total_loss', 167 | scalar_value=running_valid_loss, 168 | global_step=step) 169 | self.model.train() 170 | return running_valid_loss 171 | 172 | def embed(self, dataloader, step, embed_batch=5): 173 | print('embed') 174 | embeddings = None 175 | metadata = [] 176 | i = 0 177 | for dat, dat_past, dat_future, init, label in dataloader: 178 | x = Variable(dat.float().cuda()) 179 | e = self.model.embed(x).data 180 | metadata += np.round(label.numpy(), 2).tolist() 181 | if embeddings is None: 182 | embeddings = e 183 | else: 184 | embeddings = torch.cat((embeddings, e)) 185 | if i > embed_batch: 186 | break 187 | i += 1 188 | print(len(metadata)) 189 | self.writer.add_embedding(mat=embeddings, 190 | metadata=metadata, 191 | global_step=step) 192 | 193 | def get_predictions(self, dataloader): 194 | self.model.eval() 195 | data = None 196 | data_past = None 197 | data_future = None 198 | y = None 199 | pred_pres = None 200 | pred_past = None 201 | pred_future = None 202 | pred = None 203 | for dat, dat_past, dat_future, init, label in dataloader: 204 | print('evaluation batch') 205 | window_data = [] 206 | window_data_past = [] 207 | window_data_future = [] 208 | window_y = [] 209 | window_pred = [] 210 | window_pred_pres = [] 211 | window_pred_past = [] 212 | window_pred_future = [] 213 | if not self.window: 214 | dat = [dat] 215 | dat_past = [dat_past] 216 | dat_future = [dat_future] 217 | for window in range(len(dat)): 218 | x = Variable(dat[window].float().cuda()) 219 | y_pred, x_pres, x_past, x_future = self.model.forward(x) 220 | y_pred = y_pred.data.cpu().numpy() 221 | if self.decode_present: 222 | x_pres = x_pres.data.cpu().numpy() 223 | if self.decode_past: 224 | x_past = x_past.data.cpu().numpy() 225 | if self.decode_future: 226 | x_future = x_future.data.cpu().numpy() 227 | yt = label.numpy() 228 | xt_pres = dat[window].numpy() 229 | xt_past = dat_past[window].numpy() 230 | xt_future = dat_future[window].numpy() 231 | window_data.append(xt_pres) 232 | window_data_past.append(xt_past) 233 | window_data_future.append(xt_future) 234 | window_y.append(yt) 235 | window_pred.append(y_pred) 236 | window_pred_pres.append(x_pres) 237 | window_pred_past.append(x_past) 238 | window_pred_future.append(x_future) 239 | if data is None: 240 | data = [window_data] 241 | data_past = [window_data_past] 242 | data_future = [window_data_future] 243 | y = [window_y] 244 | pred_pres = [window_pred_pres] 245 | pred_past = [window_pred_past] 246 | pred_future = [window_pred_future] 247 | pred = [window_pred] 248 | else: 249 | data.append(window_data) 250 | data_past.append(window_data_past) 251 | data_future.append(window_data_future) 252 | y.append(window_y) 253 | if self.decode_present: 254 | pred_pres.append(window_pred_pres) 255 | if self.decode_past: 256 | pred_past.append(window_pred_past) 257 | if self.decode_future: 258 | pred_future.append(window_pred_future) 259 | pred.append(window_pred) 260 | print('done getting predictions') 261 | return (data, data_past, data_future, y, 262 | pred_pres, pred_past, pred_future, pred) 263 | -------------------------------------------------------------------------------- /walkthrough.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import sklearn.ensemble\n", 11 | "import torch\n", 12 | "import pandas\n", 13 | "import joblib\n", 14 | "\n", 15 | "from matplotlib import pyplot as plt\n", 16 | "from lib import model, glucose_dataset" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "# Introduction\n", 24 | "\n", 25 | "This notebook provides a brief walkthrough of the public code release for our KDD 2018 paper: Deep Multi-Output Forecasting: Learning to Accurately Predict Blood Glucose Trajectories. The full paper is available via arXiv: https://arxiv.org/abs/1806.05357. We hope to release our glucose data to the general public soon. In the meantime, people interested in blood glucose forecasting may be interested in the recently released OhioT1DM dataset: http://smarthealth.cs.ohio.edu/OhioT1DM-dataset.html." 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "# Data\n", 33 | "We have included both the processed and unprocessed dataset used to generate our results. This data was collected by authors Mamta Jaiswal, Dr. Lynn Ang, and Dr Rodica Pop-Busui." 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "## Unprocessed\n", 41 | "The unprocessed dataset, data/unprocessed_cgm_data.xlsx, is an excel file with one sheet per recording session (from baseline to 36 months). Each row is one individual, note that patient ids are consistent across recording sessions, and not all patients have all recording sessions. The CGM data is giving at 5 minute resolution. The unprocessed data also contain information on the daily insulin dose and delivery method, which was not used in the paper. " 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "unprocessed = pandas.read_excel('data/unprocessed_cgm_data.xlsx', sheet_name=None)" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "unprocessed.keys()" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "unprocessed['Baseline']" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "## Processed\n", 76 | "The processed data is stored as four pickle files (accessible via joblib), data/processed_cgm_data_{train/validation/test}.pkl and data/processed_cgm_coeffs.pkl. To process we:\n", 77 | "\n", 78 | "1. Remove data points which differ from previous ones by more than 40 mg/dL, as these measurements are almost certainly the result of sensor error\n", 79 | "2. Impute small data gaps using linear interpolation.\n", 80 | "3. Split data into contiguous chunks, splitting either on missing data or when a chunk is >101 measurements long\n", 81 | "4. (PolyMO) compute coefficient bins on the training data.\n", 82 | "\n", 83 | "The test set is constructed using the most recent session from each patient (approximately 10% of the data). \n", 84 | "\n", 85 | "We also include a differently processed version of the data, data/alternative_cgm_data_{train/test}, which we found useful for other projects. This data is constructed on a per-day basis, removing days with excessive missingness. Importantly, each day is linked to the ID of the patient it came from." 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "data_tr = glucose_dat_train_rec = glucose_dataset.GlucoseDataset(data_pkl='data/processed_cgm_data_train.pkl',\n", 95 | " max_pad=101,\n", 96 | " output_len=6, # set 1 for Recursive, 6 for MO\n", 97 | " output_dim=361,\n", 98 | " polynomial=False,\n", 99 | " degree=2,\n", 100 | " range_low=0,\n", 101 | " range_high=100,\n", 102 | " coeff_file='/data2/ifox/glucose/data/training_coefficient_percentiles_ridge_alpha1_roc40.pkl')" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "for x, y_index, y_real, lens in data_tr:\n", 112 | " break" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [ 121 | "# The polynomial fitting takes a while (several minutes), but is only required once before training\n", 122 | "data_tr_poly = glucose_dat_train_rec = glucose_dataset.GlucoseDataset(data_pkl='data/processed_cgm_data_train.pkl',\n", 123 | " max_pad=101,\n", 124 | " output_len=6,\n", 125 | " output_dim=361,\n", 126 | " polynomial=True,\n", 127 | " degree=2,\n", 128 | " range_low=0,\n", 129 | " range_high=100,\n", 130 | " coeff_file='/data2/ifox/glucose/data/training_coefficient_percentiles_ridge_alpha1_roc40.pkl')" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [ 139 | "for x, poly_index, y_real, lens in data_tr_poly:\n", 140 | " break" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "# Models\n", 148 | "\n", 149 | "Our paper considers 8 classes of models:\n", 150 | "\n", 151 | "Shallow Baselines\n", 152 | "* Extrapolation\n", 153 | "* Recursive Random Forest\n", 154 | "* Multi-Output Random Forest\n", 155 | "\n", 156 | "Deep Baselines\n", 157 | "* Recursive RNN\n", 158 | "* Multi-Output RNN\n", 159 | "\n", 160 | "Our Approaches\n", 161 | "* Sequential Multi-Output RNN\n", 162 | "* Polynomial Multi-Output RNN\n", 163 | "* Polynomial Sequential Multi-Output RNN\n", 164 | "\n", 165 | "We will walk through how we implemented, trained, and evaluated each model" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "## Shallow Baselines" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "### Extrapolation\n", 180 | "\n", 181 | "This is a simple linear extrapolation baseline implemented via Numpy. We extrapolate using the last 30 minutes (6 samples as our data was sampled at 5 minute intervals) to predict 30 minutes into the future." 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": null, 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [ 190 | "data_tr = np.cumsum(np.random.randn(1000, 16), axis=1)\n", 191 | "data_ts = np.cumsum(np.random.randn(100, 10), axis=1)" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "metadata": {}, 198 | "outputs": [], 199 | "source": [ 200 | "n_input = 6\n", 201 | "horizon = 6\n", 202 | "degree = 1\n", 203 | "extrap_pred = []\n", 204 | "for i in range(len(data_ts)):\n", 205 | " coeffs = np.polynomial.polynomial.polyfit(x=np.arange(n_input), y=data_ts[i][-n_input:], deg=degree)\n", 206 | " extrap_pred.append(np.polyval(p=np.flip(coeffs, axis=0), x=np.arange(horizon)+n_input))" 207 | ] 208 | }, 209 | { 210 | "cell_type": "markdown", 211 | "metadata": {}, 212 | "source": [ 213 | "### Recursive and Multi-Output Random Forest\n", 214 | "\n", 215 | "Implemented using scikit-learn. Note the scikit-learn implementation automatically infers output size during the fitting step. " 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "metadata": {}, 221 | "source": [ 222 | "#### Recursive" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [ 231 | "rf_rec = sklearn.ensemble.RandomForestRegressor(n_estimators=100, n_jobs=-1)" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": null, 237 | "metadata": {}, 238 | "outputs": [], 239 | "source": [ 240 | "# Note, for actually training recursive models, you should use all of the data by taking input_size tiles\n", 241 | "X_rec_tr = data_tr[:, :10]\n", 242 | "y_rec_tr = data_tr[:, 10:11].ravel()" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": null, 248 | "metadata": {}, 249 | "outputs": [], 250 | "source": [ 251 | "rf_rec.fit(X_rec_tr, y_rec_tr)" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": null, 257 | "metadata": {}, 258 | "outputs": [], 259 | "source": [ 260 | "# recursive prediction\n", 261 | "X_mod = data_ts.copy()\n", 262 | "p_rec_arr = []\n", 263 | "for i in range(6):\n", 264 | " p = rf_rec.predict(X_mod)\n", 265 | " p_rec_arr.append(p.reshape(-1, 1))\n", 266 | " X_mod = np.concatenate((X_mod[:, 1:], p.reshape(-1, 1)), axis=1)" 267 | ] 268 | }, 269 | { 270 | "cell_type": "markdown", 271 | "metadata": {}, 272 | "source": [ 273 | "#### Multi-Output" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": null, 279 | "metadata": {}, 280 | "outputs": [], 281 | "source": [ 282 | "# Note, for actually training recursive models, you should use all of the data by taking input_size tiles\n", 283 | "X_mo_tr = data_tr[:, :10]\n", 284 | "y_mo_tr = data_tr[:, 10:]" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": null, 290 | "metadata": {}, 291 | "outputs": [], 292 | "source": [ 293 | "rf_mo = sklearn.ensemble.RandomForestRegressor(n_estimators=100, n_jobs=-1)" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": null, 299 | "metadata": {}, 300 | "outputs": [], 301 | "source": [ 302 | "rf_mo.fit(X_mo_tr, y_mo_tr)" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": null, 308 | "metadata": {}, 309 | "outputs": [], 310 | "source": [ 311 | "p_mo_arr = rf_mo.predict(data_ts)" 312 | ] 313 | }, 314 | { 315 | "cell_type": "markdown", 316 | "metadata": {}, 317 | "source": [ 318 | "## Deep Models\n", 319 | "Our deep baselines are all implemented in PyTorch. They are a bit more involved to train. The basic training procedure is outlined in lib/trainer.py in the ExperimentTrainer class. The train_sup function is used to fit the provided model. The use of TensorboardX is not required, but convenient for monitoring losses. The data is assumed to be in the form of a pytorch dataset in the form of lib/glucose_dataset.py (though the specifics can vary greatly).\n", 320 | "\n", 321 | "Note that the dataset code requires precomputed polynomial coefficients for the PolyMO setting. This can be done using Numpy's polyfit function on your training data. \n", 322 | "\n", 323 | "The cuda flag should be set to True if a GPU is available." 324 | ] 325 | }, 326 | { 327 | "cell_type": "markdown", 328 | "metadata": {}, 329 | "source": [ 330 | "### Recursive Baseline" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": null, 336 | "metadata": {}, 337 | "outputs": [], 338 | "source": [ 339 | "rec_rnn = model.RecursiveRNN(input_dim=1, output_dim=361, hidden_size=512, depth=2, cuda=False)" 340 | ] 341 | }, 342 | { 343 | "cell_type": "markdown", 344 | "metadata": {}, 345 | "source": [ 346 | "### Multi-Output Baseline" 347 | ] 348 | }, 349 | { 350 | "cell_type": "code", 351 | "execution_count": null, 352 | "metadata": {}, 353 | "outputs": [], 354 | "source": [ 355 | "mo_rnn = model.MultiOutputRNN(input_dim=1, output_dim=361, output_len=6, hidden_size=512, depth=2, cuda=False)" 356 | ] 357 | }, 358 | { 359 | "cell_type": "markdown", 360 | "metadata": {}, 361 | "source": [ 362 | "### Sequential Multi-Output" 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "execution_count": null, 368 | "metadata": {}, 369 | "outputs": [], 370 | "source": [ 371 | "seqmo_rnn = model.MultiOutputRNN(input_dim=1, output_dim=361, output_len=6, hidden_size=512, depth=2, cuda=False, sequence=True)" 372 | ] 373 | }, 374 | { 375 | "cell_type": "markdown", 376 | "metadata": {}, 377 | "source": [ 378 | "### Polynomial Multi-Output" 379 | ] 380 | }, 381 | { 382 | "cell_type": "code", 383 | "execution_count": null, 384 | "metadata": {}, 385 | "outputs": [], 386 | "source": [ 387 | "polymo_rnn = model.MultiOutputRNN(input_dim=1, output_dim=361, output_len=6, hidden_size=512, depth=2, cuda=False, polynomial=True, degree=1)" 388 | ] 389 | }, 390 | { 391 | "cell_type": "markdown", 392 | "metadata": {}, 393 | "source": [ 394 | "### Polynomial Sequential Multi-Output" 395 | ] 396 | }, 397 | { 398 | "cell_type": "code", 399 | "execution_count": null, 400 | "metadata": {}, 401 | "outputs": [], 402 | "source": [ 403 | "polymo_rnn = model.MultiOutputRNN(input_dim=1, output_dim=361, output_len=6, hidden_size=512, depth=2, cuda=False, sequence=True, polynomial=True, degree=1)" 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": null, 409 | "metadata": {}, 410 | "outputs": [], 411 | "source": [] 412 | } 413 | ], 414 | "metadata": { 415 | "kernelspec": { 416 | "display_name": "Python 3", 417 | "language": "python", 418 | "name": "python3" 419 | }, 420 | "language_info": { 421 | "codemirror_mode": { 422 | "name": "ipython", 423 | "version": 3 424 | }, 425 | "file_extension": ".py", 426 | "mimetype": "text/x-python", 427 | "name": "python", 428 | "nbconvert_exporter": "python", 429 | "pygments_lexer": "ipython3", 430 | "version": "3.6.5" 431 | } 432 | }, 433 | "nbformat": 4, 434 | "nbformat_minor": 2 435 | } 436 | --------------------------------------------------------------------------------