├── GRUD.py ├── README.md └── main.py /GRUD.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat May 12 16:48:54 2018 4 | 5 | @author: Zhiyong 6 | """ 7 | 8 | import torch.utils.data as utils 9 | import torch.nn.functional as F 10 | import torch 11 | import torch.nn as nn 12 | from torch.autograd import Variable 13 | from torch.nn.parameter import Parameter 14 | import math 15 | import numpy as np 16 | import pandas as pd 17 | import time 18 | 19 | class FilterLinear(nn.Module): 20 | def __init__(self, in_features, out_features, filter_square_matrix, bias=True): 21 | ''' 22 | filter_square_matrix : filter square matrix, whose each elements is 0 or 1. 23 | ''' 24 | super(FilterLinear, self).__init__() 25 | self.in_features = in_features 26 | self.out_features = out_features 27 | 28 | use_gpu = torch.cuda.is_available() 29 | self.filter_square_matrix = None 30 | if use_gpu: 31 | self.filter_square_matrix = Variable(filter_square_matrix.cuda(), requires_grad=False) 32 | else: 33 | self.filter_square_matrix = Variable(filter_square_matrix, requires_grad=False) 34 | 35 | self.weight = Parameter(torch.Tensor(out_features, in_features)) 36 | if bias: 37 | self.bias = Parameter(torch.Tensor(out_features)) 38 | else: 39 | self.register_parameter('bias', None) 40 | self.reset_parameters() 41 | 42 | def reset_parameters(self): 43 | stdv = 1. / math.sqrt(self.weight.size(1)) 44 | self.weight.data.uniform_(-stdv, stdv) 45 | if self.bias is not None: 46 | self.bias.data.uniform_(-stdv, stdv) 47 | # print(self.weight.data) 48 | # print(self.bias.data) 49 | 50 | def forward(self, input): 51 | # print(self.filter_square_matrix.mul(self.weight)) 52 | return F.linear(input, self.filter_square_matrix.mul(self.weight), self.bias) 53 | 54 | def __repr__(self): 55 | return self.__class__.__name__ + '(' \ 56 | + 'in_features=' + str(self.in_features) \ 57 | + ', out_features=' + str(self.out_features) \ 58 | + ', bias=' + str(self.bias is not None) + ')' 59 | 60 | class GRUD(nn.Module): 61 | def __init__(self, input_size, cell_size, hidden_size, X_mean, output_last = False): 62 | """ 63 | Recurrent Neural Networks for Multivariate Times Series with Missing Values 64 | GRU-D: GRU exploit two representations of informative missingness patterns, i.e., masking and time interval. 65 | cell_size is the size of cell_state. 66 | 67 | Implemented based on the paper: 68 | @article{che2018recurrent, 69 | title={Recurrent neural networks for multivariate time series with missing values}, 70 | author={Che, Zhengping and Purushotham, Sanjay and Cho, Kyunghyun and Sontag, David and Liu, Yan}, 71 | journal={Scientific reports}, 72 | volume={8}, 73 | number={1}, 74 | pages={6085}, 75 | year={2018}, 76 | publisher={Nature Publishing Group} 77 | } 78 | 79 | GRU-D: 80 | input_size: variable dimension of each time 81 | hidden_size: dimension of hidden_state 82 | mask_size: dimension of masking vector 83 | X_mean: the mean of the historical input data 84 | """ 85 | 86 | super(GRUD, self).__init__() 87 | 88 | self.hidden_size = hidden_size 89 | self.delta_size = input_size 90 | self.mask_size = input_size 91 | 92 | use_gpu = torch.cuda.is_available() 93 | if use_gpu: 94 | self.identity = torch.eye(input_size).cuda() 95 | self.zeros = Variable(torch.zeros(input_size).cuda()) 96 | self.X_mean = Variable(torch.Tensor(X_mean).cuda()) 97 | else: 98 | self.identity = torch.eye(input_size) 99 | self.zeros = Variable(torch.zeros(input_size)) 100 | self.X_mean = Variable(torch.Tensor(X_mean)) 101 | 102 | self.zl = nn.Linear(input_size + hidden_size + self.mask_size, hidden_size) 103 | self.rl = nn.Linear(input_size + hidden_size + self.mask_size, hidden_size) 104 | self.hl = nn.Linear(input_size + hidden_size + self.mask_size, hidden_size) 105 | 106 | self.gamma_x_l = FilterLinear(self.delta_size, self.delta_size, self.identity) 107 | 108 | self.gamma_h_l = nn.Linear(self.delta_size, self.delta_size) 109 | 110 | self.output_last = output_last 111 | 112 | def step(self, x, x_last_obsv, x_mean, h, mask, delta): 113 | 114 | batch_size = x.shape[0] 115 | dim_size = x.shape[1] 116 | 117 | delta_x = torch.exp(-torch.max(self.zeros, self.gamma_x_l(delta))) 118 | delta_h = torch.exp(-torch.max(self.zeros, self.gamma_h_l(delta))) 119 | 120 | x = mask * x + (1 - mask) * (delta_x * x_last_obsv + (1 - delta_x) * x_mean) 121 | h = delta_h * h 122 | 123 | combined = torch.cat((x, h, mask), 1) 124 | z = F.sigmoid(self.zl(combined)) 125 | r = F.sigmoid(self.rl(combined)) 126 | combined_r = torch.cat((x, r * h, mask), 1) 127 | h_tilde = F.tanh(self.hl(combined_r)) 128 | h = (1 - z) * h + z * h_tilde 129 | 130 | return h 131 | 132 | def forward(self, input): 133 | batch_size = input.size(0) 134 | type_size = input.size(1) 135 | step_size = input.size(2) 136 | spatial_size = input.size(3) 137 | 138 | Hidden_State = self.initHidden(batch_size) 139 | X = torch.squeeze(input[:,0,:,:]) 140 | X_last_obsv = torch.squeeze(input[:,1,:,:]) 141 | Mask = torch.squeeze(input[:,2,:,:]) 142 | Delta = torch.squeeze(input[:,3,:,:]) 143 | 144 | outputs = None 145 | for i in range(step_size): 146 | Hidden_State = self.step(torch.squeeze(X[:,i:i+1,:])\ 147 | , torch.squeeze(X_last_obsv[:,i:i+1,:])\ 148 | , torch.squeeze(self.X_mean[:,i:i+1,:])\ 149 | , Hidden_State\ 150 | , torch.squeeze(Mask[:,i:i+1,:])\ 151 | , torch.squeeze(Delta[:,i:i+1,:])) 152 | if outputs is None: 153 | outputs = Hidden_State.unsqueeze(1) 154 | else: 155 | outputs = torch.cat((outputs, Hidden_State.unsqueeze(1)), 1) 156 | 157 | if self.output_last: 158 | return outputs[:,-1,:] 159 | else: 160 | return outputs 161 | 162 | def initHidden(self, batch_size): 163 | use_gpu = torch.cuda.is_available() 164 | if use_gpu: 165 | Hidden_State = Variable(torch.zeros(batch_size, self.hidden_size).cuda()) 166 | return Hidden_State 167 | else: 168 | Hidden_State = Variable(torch.zeros(batch_size, self.hidden_size)) 169 | return Hidden_State 170 | 171 | 172 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Code of Re-implementing the methodology in the paper: 2 | ## Recurrent Neural Networks for Multivariate Time Series with Missing Values 3 | 4 | This method can be used for dealing with time series with missing values, especially for time series with non-fixed time intervals. 5 | 6 | ### Environment 7 | * Python 3.6.1 8 | * PyTorch 0.4.1 9 | 10 | ### Cite 11 | ``` 12 | @article{che2018recurrent, 13 | title={Recurrent neural networks for multivariate time series with missing values}, 14 | author={Che, Zhengping and Purushotham, Sanjay and Cho, Kyunghyun and Sontag, David and Liu, Yan}, 15 | journal={Scientific reports}, 16 | volume={8}, 17 | number={1}, 18 | pages={6085}, 19 | year={2018}, 20 | publisher={Nature Publishing Group} 21 | } 22 | ``` 23 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat May 12 16:49:49 2018 4 | 5 | @author: Zhiyong 6 | """ 7 | 8 | from GRUD import * 9 | 10 | def PrepareDataset(speed_matrix, \ 11 | BATCH_SIZE = 40, \ 12 | seq_len = 10, \ 13 | pred_len = 1, \ 14 | train_propotion = 0.7, \ 15 | valid_propotion = 0.2, \ 16 | masking = False, \ 17 | mask_ones_proportion = 0.8): 18 | """ Prepare training and testing datasets and dataloaders. 19 | 20 | Convert speed/volume/occupancy matrix to training and testing dataset. 21 | The vertical axis of speed_matrix is the time axis and the horizontal axis 22 | is the spatial axis. 23 | 24 | Args: 25 | speed_matrix: a Matrix containing spatial-temporal speed data for a network 26 | seq_len: length of input sequence 27 | pred_len: length of predicted sequence 28 | Returns: 29 | Training dataloader 30 | Testing dataloader 31 | """ 32 | time_len = speed_matrix.shape[0] 33 | 34 | speed_matrix = speed_matrix.clip(0, 100) 35 | 36 | max_speed = speed_matrix.max().max() 37 | speed_matrix = speed_matrix / max_speed 38 | 39 | speed_sequences, speed_labels = [], [] 40 | for i in range(time_len - seq_len - pred_len): 41 | speed_sequences.append(speed_matrix.iloc[i:i+seq_len].values) 42 | speed_labels.append(speed_matrix.iloc[i+seq_len:i+seq_len+pred_len].values) 43 | speed_sequences, speed_labels = np.asarray(speed_sequences), np.asarray(speed_labels) 44 | 45 | # using zero-one mask to randomly set elements to zeros 46 | if masking: 47 | print('Split Speed finished. Start to generate Mask, Delta, Last_observed_X ...') 48 | np.random.seed(1024) 49 | Mask = np.random.choice([0,1], size=(speed_sequences.shape), p = [1 - mask_ones_proportion, mask_ones_proportion]) 50 | speed_sequences = np.multiply(speed_sequences, Mask) 51 | 52 | # temporal information 53 | interval = 5 # 5 minutes 54 | S = np.zeros_like(speed_sequences) # time stamps 55 | for i in range(S.shape[1]): 56 | S[:,i,:] = interval * i 57 | 58 | Delta = np.zeros_like(speed_sequences) # time intervals 59 | for i in range(1, S.shape[1]): 60 | Delta[:,i,:] = S[:,i,:] - S[:,i-1,:] 61 | 62 | missing_index = np.where(Mask == 0) 63 | 64 | X_last_obsv = np.copy(speed_sequences) 65 | for idx in range(missing_index[0].shape[0]): 66 | i = missing_index[0][idx] 67 | j = missing_index[1][idx] 68 | k = missing_index[2][idx] 69 | if j != 0 and j != 9: 70 | Delta[i,j+1,k] = Delta[i,j+1,k] + Delta[i,j,k] 71 | if j != 0: 72 | X_last_obsv[i,j,k] = X_last_obsv[i,j-1,k] # last observation 73 | Delta = Delta / Delta.max() # normalize 74 | 75 | # shuffle and split the dataset to training and testing datasets 76 | print('Generate Mask, Delta, Last_observed_X finished. Start to shuffle and split dataset ...') 77 | sample_size = speed_sequences.shape[0] 78 | index = np.arange(sample_size, dtype = int) 79 | np.random.seed(1024) 80 | np.random.shuffle(index) 81 | 82 | speed_sequences = speed_sequences[index] 83 | speed_labels = speed_labels[index] 84 | 85 | if masking: 86 | X_last_obsv = X_last_obsv[index] 87 | Mask = Mask[index] 88 | Delta = Delta[index] 89 | speed_sequences = np.expand_dims(speed_sequences, axis=1) 90 | X_last_obsv = np.expand_dims(X_last_obsv, axis=1) 91 | Mask = np.expand_dims(Mask, axis=1) 92 | Delta = np.expand_dims(Delta, axis=1) 93 | dataset_agger = np.concatenate((speed_sequences, X_last_obsv, Mask, Delta), axis = 1) 94 | 95 | train_index = int(np.floor(sample_size * train_propotion)) 96 | valid_index = int(np.floor(sample_size * ( train_propotion + valid_propotion))) 97 | 98 | if masking: 99 | train_data, train_label = dataset_agger[:train_index], speed_labels[:train_index] 100 | valid_data, valid_label = dataset_agger[train_index:valid_index], speed_labels[train_index:valid_index] 101 | test_data, test_label = dataset_agger[valid_index:], speed_labels[valid_index:] 102 | else: 103 | train_data, train_label = speed_sequences[:train_index], speed_labels[:train_index] 104 | valid_data, valid_label = speed_sequences[train_index:valid_index], speed_labels[train_index:valid_index] 105 | test_data, test_label = speed_sequences[valid_index:], speed_labels[valid_index:] 106 | 107 | train_data, train_label = torch.Tensor(train_data), torch.Tensor(train_label) 108 | valid_data, valid_label = torch.Tensor(valid_data), torch.Tensor(valid_label) 109 | test_data, test_label = torch.Tensor(test_data), torch.Tensor(test_label) 110 | 111 | train_dataset = utils.TensorDataset(train_data, train_label) 112 | valid_dataset = utils.TensorDataset(valid_data, valid_label) 113 | test_dataset = utils.TensorDataset(test_data, test_label) 114 | 115 | train_dataloader = utils.DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle=True, drop_last = True) 116 | valid_dataloader = utils.DataLoader(valid_dataset, batch_size = BATCH_SIZE, shuffle=True, drop_last = True) 117 | test_dataloader = utils.DataLoader(test_dataset, batch_size = BATCH_SIZE, shuffle=True, drop_last = True) 118 | 119 | X_mean = np.mean(speed_sequences, axis = 0) 120 | 121 | print('Finished') 122 | 123 | return train_dataloader, valid_dataloader, test_dataloader, max_speed, X_mean 124 | 125 | 126 | 127 | def Train_Model(model, train_dataloader, valid_dataloader, num_epochs = 300, patience = 10, min_delta = 0.00001): 128 | 129 | print('Model Structure: ', model) 130 | print('Start Training ... ') 131 | 132 | model.cuda() 133 | 134 | if (type(model) == nn.modules.container.Sequential): 135 | output_last = model[-1].output_last 136 | print('Output type dermined by the last layer') 137 | else: 138 | output_last = model.output_last 139 | print('Output type dermined by the model') 140 | 141 | loss_MSE = torch.nn.MSELoss() 142 | loss_L1 = torch.nn.L1Loss() 143 | 144 | learning_rate = 0.0001 145 | optimizer = torch.optim.RMSprop(model.parameters(), lr = learning_rate, alpha=0.99) 146 | use_gpu = torch.cuda.is_available() 147 | 148 | interval = 100 149 | losses_train = [] 150 | losses_valid = [] 151 | losses_epochs_train = [] 152 | losses_epochs_valid = [] 153 | 154 | cur_time = time.time() 155 | pre_time = time.time() 156 | 157 | # Variables for Early Stopping 158 | is_best_model = 0 159 | patient_epoch = 0 160 | for epoch in range(num_epochs): 161 | 162 | trained_number = 0 163 | 164 | valid_dataloader_iter = iter(valid_dataloader) 165 | 166 | losses_epoch_train = [] 167 | losses_epoch_valid = [] 168 | 169 | for data in train_dataloader: 170 | inputs, labels = data 171 | 172 | if inputs.shape[0] != batch_size: 173 | continue 174 | 175 | if use_gpu: 176 | inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda()) 177 | else: 178 | inputs, labels = Variable(inputs), Variable(labels) 179 | 180 | model.zero_grad() 181 | 182 | outputs = model(inputs) 183 | 184 | if output_last: 185 | loss_train = loss_MSE(torch.squeeze(outputs), torch.squeeze(labels)) 186 | else: 187 | full_labels = torch.cat((inputs[:,1:,:], labels), dim = 1) 188 | loss_train = loss_MSE(outputs, full_labels) 189 | 190 | losses_train.append(loss_train.data) 191 | losses_epoch_train.append(loss_train.data) 192 | 193 | optimizer.zero_grad() 194 | 195 | loss_train.backward() 196 | 197 | optimizer.step() 198 | 199 | # validation 200 | try: 201 | inputs_val, labels_val = next(valid_dataloader_iter) 202 | except StopIteration: 203 | valid_dataloader_iter = iter(valid_dataloader) 204 | inputs_val, labels_val = next(valid_dataloader_iter) 205 | 206 | if use_gpu: 207 | inputs_val, labels_val = Variable(inputs_val.cuda()), Variable(labels_val.cuda()) 208 | else: 209 | inputs_val, labels_val = Variable(inputs_val), Variable(labels_val) 210 | 211 | model.zero_grad() 212 | 213 | outputs_val = model(inputs_val) 214 | 215 | if output_last: 216 | loss_valid = loss_MSE(torch.squeeze(outputs_val), torch.squeeze(labels_val)) 217 | else: 218 | full_labels_val = torch.cat((inputs_val[:,1:,:], labels_val), dim = 1) 219 | loss_valid = loss_MSE(outputs_val, full_labels_val) 220 | 221 | losses_valid.append(loss_valid.data) 222 | losses_epoch_valid.append(loss_valid.data) 223 | 224 | # output 225 | trained_number += 1 226 | 227 | avg_losses_epoch_train = sum(losses_epoch_train).cpu().numpy() / float(len(losses_epoch_train)) 228 | avg_losses_epoch_valid = sum(losses_epoch_valid).cpu().numpy() / float(len(losses_epoch_valid)) 229 | losses_epochs_train.append(avg_losses_epoch_train) 230 | losses_epochs_valid.append(avg_losses_epoch_valid) 231 | 232 | # Early Stopping 233 | if epoch == 0: 234 | is_best_model = 1 235 | best_model = model 236 | min_loss_epoch_valid = 10000.0 237 | if avg_losses_epoch_valid < min_loss_epoch_valid: 238 | min_loss_epoch_valid = avg_losses_epoch_valid 239 | else: 240 | if min_loss_epoch_valid - avg_losses_epoch_valid > min_delta: 241 | is_best_model = 1 242 | best_model = model 243 | min_loss_epoch_valid = avg_losses_epoch_valid 244 | patient_epoch = 0 245 | else: 246 | is_best_model = 0 247 | patient_epoch += 1 248 | if patient_epoch >= patience: 249 | print('Early Stopped at Epoch:', epoch) 250 | break 251 | 252 | # Print training parameters 253 | cur_time = time.time() 254 | print('Epoch: {}, train_loss: {}, valid_loss: {}, time: {}, best model: {}'.format( \ 255 | epoch, \ 256 | np.around(avg_losses_epoch_train, decimals=8),\ 257 | np.around(avg_losses_epoch_valid, decimals=8),\ 258 | np.around([cur_time - pre_time] , decimals=2),\ 259 | is_best_model) ) 260 | pre_time = cur_time 261 | 262 | return best_model, [losses_train, losses_valid, losses_epochs_train, losses_epochs_valid] 263 | 264 | 265 | def Test_Model(model, test_dataloader, max_speed): 266 | 267 | if (type(model) == nn.modules.container.Sequential): 268 | output_last = model[-1].output_last 269 | else: 270 | output_last = model.output_last 271 | 272 | inputs, labels = next(iter(test_dataloader)) 273 | [batch_size, type_size, step_size, fea_size] = inputs.size() 274 | 275 | cur_time = time.time() 276 | pre_time = time.time() 277 | 278 | use_gpu = torch.cuda.is_available() 279 | 280 | loss_MSE = torch.nn.MSELoss() 281 | loss_L1 = torch.nn.MSELoss() 282 | 283 | tested_batch = 0 284 | 285 | losses_mse = [] 286 | losses_l1 = [] 287 | MAEs = [] 288 | MAPEs = [] 289 | 290 | 291 | for data in test_dataloader: 292 | inputs, labels = data 293 | 294 | if inputs.shape[0] != batch_size: 295 | continue 296 | 297 | if use_gpu: 298 | inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda()) 299 | else: 300 | inputs, labels = Variable(inputs), Variable(labels) 301 | 302 | outputs = model(inputs) 303 | 304 | loss_MSE = torch.nn.MSELoss() 305 | loss_L1 = torch.nn.L1Loss() 306 | 307 | if output_last: 308 | loss_mse = loss_MSE(torch.squeeze(outputs), torch.squeeze(labels)) 309 | loss_l1 = loss_L1(torch.squeeze(outputs), torch.squeeze(labels)) 310 | MAE = torch.mean(torch.abs(torch.squeeze(outputs) - torch.squeeze(labels))) 311 | MAPE = torch.mean(torch.abs(torch.squeeze(outputs) - torch.squeeze(labels)) / torch.squeeze(labels)) 312 | else: 313 | loss_mse = loss_MSE(outputs[:,-1,:], labels) 314 | loss_l1 = loss_L1(outputs[:,-1,:], labels) 315 | MAE = torch.mean(torch.abs(outputs[:,-1,:] - torch.squeeze(labels))) 316 | MAPE = torch.mean(torch.abs(outputs[:,-1,:] - torch.squeeze(labels)) / torch.squeeze(labels)) 317 | 318 | losses_mse.append(loss_mse.data) 319 | losses_l1.append(loss_l1.data) 320 | MAEs.append(MAE.data) 321 | MAPEs.append(MAPE.data) 322 | 323 | tested_batch += 1 324 | 325 | if tested_batch % 1000 == 0: 326 | cur_time = time.time() 327 | print('Tested #: {}, loss_l1: {}, loss_mse: {}, time: {}'.format( \ 328 | tested_batch * batch_size, \ 329 | np.around([loss_l1.data[0]], decimals=8), \ 330 | np.around([loss_mse.data[0]], decimals=8), \ 331 | np.around([cur_time - pre_time], decimals=8) ) ) 332 | pre_time = cur_time 333 | losses_l1 = np.array(losses_l1) 334 | losses_mse = np.array(losses_mse) 335 | MAEs = np.array(MAEs) 336 | MAPEs = np.array(MAPEs) 337 | 338 | mean_l1 = np.mean(losses_l1) * max_speed 339 | std_l1 = np.std(losses_l1) * max_speed 340 | MAE_ = np.mean(MAEs) * max_speed 341 | MAPE_ = np.mean(MAPEs) * 100 342 | 343 | print('Tested: L1_mean: {}, L1_std: {}, MAE: {} MAPE: {}'.format(mean_l1, std_l1, MAE_, MAPE_)) 344 | return [losses_l1, losses_mse, mean_l1, std_l1] 345 | 346 | 347 | if __name__ == "__main__": 348 | 349 | data = 'loop' 350 | if data == 'inrix': 351 | speed_matrix = pd.read_pickle('../Data_Warehouse/Data_network_traffic/inrix_seattle_speed_matrix_2012') 352 | elif data == 'loop': 353 | speed_matrix = pd.read_pickle('../Data_Warehouse/Data_network_traffic//speed_matrix_2015') 354 | 355 | train_dataloader, valid_dataloader, test_dataloader, max_speed, X_mean = PrepareDataset(speed_matrix, BATCH_SIZE = 64, masking = True) 356 | 357 | inputs, labels = next(iter(train_dataloader)) 358 | [batch_size, type_size, step_size, fea_size] = inputs.size() 359 | input_dim = fea_size 360 | hidden_dim = fea_size 361 | output_dim = fea_size 362 | 363 | grud = GRUD(input_dim, hidden_dim, output_dim, X_mean, output_last = True) 364 | best_grud, losses_grud = Train_Model(grud, train_dataloader, valid_dataloader) 365 | [losses_l1, losses_mse, mean_l1, std_l1] = Test_Model(best_grud, test_dataloader, max_speed) --------------------------------------------------------------------------------