├── Images └── Architecture.png ├── Keras ├── Main.py ├── Models.py └── __pycache__ │ └── Models.cpython-36.pyc ├── PyTorch ├── Main.py ├── Models.py ├── TrainModel.py └── __pycache__ │ ├── Models.cpython-36.pyc │ └── TrainModel.cpython-36.pyc └── README.md /Images/Architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyongc/Stacked_Bidirectional_Unidirectional_LSTM/b5f291b09a9f832b0901f86fc566502574d96826/Images/Architecture.png -------------------------------------------------------------------------------- /Keras/Main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Apr 15 23:34:26 2018 4 | 5 | @author: Zhiyong 6 | """ 7 | 8 | from Models import * 9 | 10 | def Get_Data_Label_Aux_Set(speedMatrix, steps): 11 | cabinets = speedMatrix.columns.values 12 | stamps = speedMatrix.index.values 13 | x_dim = len(cabinets) 14 | time_dim = len(stamps) 15 | 16 | speedMatrix = speedMatrix.iloc[:,:].values 17 | 18 | data_set = [] 19 | label_set = [] 20 | hour_set = [] 21 | dayofweek_set = [] 22 | 23 | for i in range(time_dim - steps ): 24 | data_set.append(speedMatrix[i : i + steps]) 25 | label_set.append(speedMatrix[i + steps]) 26 | stamp = stamps[i + steps] 27 | hour_set.append(float(stamp[11:13])) 28 | dayofweek = datetime.datetime.strptime(stamp[0:10], '%Y-%M-%d').strftime('%w') 29 | dayofweek_set.append(float(dayofweek)) 30 | 31 | data_set = np.array(data_set) 32 | label_set = np.array(label_set) 33 | hour_set = np.array(hour_set) 34 | dayofweek_set = np.array(dayofweek_set) 35 | return data_set, label_set, hour_set, dayofweek_set 36 | 37 | def SplitData(X_full, Y_full, hour_full, dayofweek_full, train_prop = 0.7, valid_prop = 0.2, test_prop = 0.1): 38 | n = Y_full.shape[0] 39 | indices = np.arange(n) 40 | RS = RandomState(1024) 41 | RS.shuffle(indices) 42 | sep_1 = int(float(n) * train_prop) 43 | sep_2 = int(float(n) * (train_prop + valid_prop)) 44 | print ('train : valid : test = ', train_prop, valid_prop, test_prop) 45 | train_indices = indices[:sep_1] 46 | valid_indices = indices[sep_1:sep_2] 47 | test_indices = indices[sep_2:] 48 | X_train = X_full[train_indices] 49 | X_valid = X_full[valid_indices] 50 | X_test = X_full[test_indices] 51 | Y_train = Y_full[train_indices] 52 | Y_valid = Y_full[valid_indices] 53 | Y_test = Y_full[test_indices] 54 | hour_train = hour_full[train_indices] 55 | hour_valid = hour_full[valid_indices] 56 | hour_test = hour_full[test_indices] 57 | dayofweek_train = dayofweek_full[train_indices] 58 | dayofweek_valid = dayofweek_full[valid_indices] 59 | dayofweek_test = dayofweek_full[test_indices] 60 | return X_train, X_valid, X_test, \ 61 | Y_train, Y_valid, Y_test, \ 62 | hour_train, hour_valid, hour_test, \ 63 | dayofweek_train, dayofweek_valid, dayofweek_test 64 | 65 | def MeasurePerformance(Y_test_scale, Y_pred, X_max, model_name = 'default', epochs = 30, model_time_lag = 10): 66 | 67 | time_num = Y_test_scale.shape[0] 68 | loop_num = Y_test_scale.shape[1] 69 | 70 | difference_sum = np.zeros(time_num) 71 | diff_frac_sum = np.zeros(time_num) 72 | 73 | for loop_idx in range(loop_num): 74 | true_speed = Y_test_scale[:,loop_idx] * X_max 75 | predicted_speed = Y_pred[:,loop_idx] * X_max 76 | diff = np.abs( true_speed - predicted_speed ) 77 | diff_frac = diff / true_speed 78 | difference_sum += diff 79 | diff_frac_sum += diff_frac 80 | 81 | difference_avg = difference_sum / loop_num 82 | MAPE = diff_frac_sum / loop_num * 100 83 | 84 | print('MAE :', round(np.mean(difference_avg),3), 'MAPE :', round(np.mean(MAPE),3), 'STD of MAE:', round(np.std(difference_avg),3)) 85 | print('Epoch : ' , epochs) 86 | 87 | 88 | if __name__ == "__main__": 89 | 90 | ####################################################### 91 | # load 2015 speed data 92 | ####################################################### 93 | speedMatrix = pd.read_pickle('../../../Data_Warehouse/Data_network_traffic//speed_matrix_2015') 94 | print('speedMatrix shape:', speedMatrix.shape) 95 | loopgroups_full = speedMatrix.columns.values 96 | 97 | time_lag = 10 98 | print('time lag :', time_lag) 99 | 100 | X_full, Y_full, hour_full, dayofweek_full = Get_Data_Label_Aux_Set(speedMatrix, time_lag) 101 | print('X_full shape: ', X_full.shape, 'Y_full shape:', Y_full.shape) 102 | 103 | ####################################################### 104 | # split full dataset into training, validation and test dataset 105 | ####################################################### 106 | X_train, X_valid, X_test, \ 107 | Y_train, Y_valid, Y_test, \ 108 | hour_train, hour_valid, hour_test, \ 109 | dayofweek_train, dayofweek_valid, dayofweek_test \ 110 | = SplitData(X_full, Y_full, hour_full, dayofweek_full, train_prop = 0.9, valid_prop = 0.0, test_prop = 0.1) 111 | print('X_train shape: ', X_train.shape, 'Y_train shape:', Y_train.shape) 112 | print('X_valid shape: ', X_valid.shape, 'Y_valid shape:', Y_valid.shape) 113 | print('X_test shape: ' , X_test.shape, 'Y_test shape:', Y_test.shape) 114 | 115 | ####################################################### 116 | # bound training data to 0 to 100 117 | # get the max value of X to scale X 118 | ####################################################### 119 | X_train = np.clip(X_train, 0, 100) 120 | X_test = np.clip(X_test, 0, 100) 121 | 122 | X_max = np.max([np.max(X_train), np.max(X_test)]) 123 | X_min = np.min([np.min(X_train), np.min(X_test)]) 124 | print('X_full max:', X_max) 125 | 126 | ####################################################### 127 | # scale data into 0~1 128 | ####################################################### 129 | X_train_scale = X_train / X_max 130 | X_test_scale = X_test / X_max 131 | 132 | Y_train_scale = Y_train / X_max 133 | Y_test_scale = Y_test / X_max 134 | 135 | model_epoch = 100 136 | patience = 20 137 | 138 | print("#######################################################") 139 | print("model_2_Bi_LSTM") 140 | print("time_lag", time_lag) 141 | model_2_Bi_LSTM, history_2_Bi_LSTM = train_2_Bi_LSTM_mask(X_train_scale, Y_train_scale, epochs = model_epoch) 142 | model_2_Bi_LSTM.save('Model_2_Bi_LSTM_' + str(len(history_2_Bi_LSTM.losses))+ 'ep' + '_tl' + str(time_lag)+ '.h5') 143 | Y_pred_test = model_2_Bi_LSTM.predict(X_test_scale) 144 | MeasurePerformance(Y_test_scale, Y_pred_test, X_max, model_name = 'default', epochs = len(history_2_Bi_LSTM.losses), model_time_lag = 10) 145 | -------------------------------------------------------------------------------- /Keras/Models.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Apr 15 23:31:31 2018 4 | 5 | @author: Zhiyong 6 | """ 7 | 8 | 9 | 10 | 11 | from keras.preprocessing import sequence 12 | from keras.utils import np_utils 13 | from keras.models import Sequential, load_model 14 | from keras.layers import Dense, Dropout, Activation, Embedding, Input 15 | from keras.layers import LSTM, SimpleRNN, GRU, Merge, merge, Masking 16 | from keras.models import Model 17 | from keras.callbacks import Callback 18 | from keras.callbacks import EarlyStopping 19 | from keras import backend as K 20 | from keras.layers.wrappers import Bidirectional 21 | 22 | import numpy as np 23 | from numpy.random import RandomState 24 | from random import shuffle 25 | import datetime 26 | 27 | np.random.seed(1024) 28 | 29 | class LossHistory(Callback): 30 | def on_train_begin(self, logs={}): 31 | self.losses = [] 32 | self.val_losses = [] 33 | 34 | def on_epoch_end(self, epoch, logs={}): 35 | self.losses.append(logs.get('loss')) 36 | self.val_losses.append(logs.get('val_loss')) 37 | 38 | 39 | def train_Bi_LSTM(X, Y, epochs = 30, validation_split = 0.2, patience=20): 40 | speed_input = Input(shape = (X.shape[1], X.shape[2]), name = 'speed') 41 | 42 | main_output = Bidirectional(LSTM(input_shape = (X.shape[1], X.shape[2]), output_dim = X.shape[2], return_sequences=False), merge_mode='ave')(speed_input) 43 | 44 | final_model = Model(input = [speed_input], output = [main_output]) 45 | 46 | final_model.summary() 47 | 48 | final_model.compile(loss='mse', optimizer='rmsprop') 49 | 50 | history = LossHistory() 51 | earlyStopping = EarlyStopping(monitor='val_loss', min_delta=0.00001, patience=patience, verbose=0, mode='auto') 52 | final_model.fit([X], Y, validation_split = 0.2, nb_epoch = epochs, callbacks=[history, earlyStopping]) 53 | 54 | return final_model, history 55 | 56 | def train_2_Bi_LSTM_mask(X, Y, epochs = 30, validation_split = 0.2, patience=20): 57 | 58 | model = Sequential() 59 | model.add(Masking(mask_value=0.,input_shape=(X.shape[1], X.shape[2]))) 60 | model.add(LSTM(output_dim = X.shape[2], return_sequences=True, input_shape = (X.shape[1], X.shape[2]))) 61 | model.add(LSTM(output_dim = X.shape[2], return_sequences=False, input_shape = (X.shape[1], X.shape[2]))) 62 | 63 | model.add(Dense(X.shape[2])) 64 | model.compile(loss='mse', optimizer='rmsprop') 65 | 66 | history = LossHistory() 67 | earlyStopping = EarlyStopping(monitor='val_loss', min_delta=0.00001, patience=patience, verbose=0, mode='auto') 68 | model.fit(X, Y, validation_split = 0.2, nb_epoch = epochs, callbacks=[history, earlyStopping]) 69 | 70 | return model, history 71 | 72 | def train_2_Bi_LSTM(X, Y, epochs = 30, validation_split = 0.2, patience=20): 73 | speed_input = Input(shape = (X.shape[1], X.shape[2]), name = 'speed') 74 | 75 | lstm_output = Bidirectional(LSTM(input_shape = (X.shape[1], X.shape[2]), output_dim = X.shape[2], return_sequences=True), merge_mode='ave')(speed_input) 76 | 77 | main_output = LSTM(input_shape = (X.shape[1], X.shape[2]), output_dim = X.shape[2])(lstm_output) 78 | 79 | final_model = Model(input = [speed_input], output = [main_output]) 80 | 81 | final_model.summary() 82 | 83 | final_model.compile(loss='mse', optimizer='rmsprop') 84 | 85 | history = LossHistory() 86 | earlyStopping = EarlyStopping(monitor='val_loss', min_delta=0.00001, patience=patience, verbose=0, mode='auto') 87 | final_model.fit([X], Y, validation_split = 0.2, nb_epoch = epochs, callbacks=[history, earlyStopping]) 88 | 89 | return final_model, history -------------------------------------------------------------------------------- /Keras/__pycache__/Models.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyongc/Stacked_Bidirectional_Unidirectional_LSTM/b5f291b09a9f832b0901f86fc566502574d96826/Keras/__pycache__/Models.cpython-36.pyc -------------------------------------------------------------------------------- /PyTorch/Main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Apr 15 18:30:03 2018 4 | 5 | @author: Zhiyong 6 | """ 7 | 8 | from TrainModel import * 9 | 10 | def PrepareDataset(speed_matrix, BATCH_SIZE = 40, seq_len = 10, pred_len = 1, train_propotion = 0.7, valid_propotion = 0.2): 11 | """ Prepare training and testing datasets and dataloaders. 12 | 13 | Convert speed/volume/occupancy matrix to training and testing dataset. 14 | The vertical axis of speed_matrix is the time axis and the horizontal axis 15 | is the spatial axis. 16 | 17 | Args: 18 | speed_matrix: a Matrix containing spatial-temporal speed data for a network 19 | seq_len: length of input sequence 20 | pred_len: length of predicted sequence 21 | Returns: 22 | Training dataloader 23 | Testing dataloader 24 | """ 25 | time_len = speed_matrix.shape[0] 26 | 27 | speed_matrix = speed_matrix.clip(0, 100) 28 | 29 | max_speed = speed_matrix.max().max() 30 | speed_matrix = speed_matrix / max_speed 31 | 32 | speed_sequences, speed_labels = [], [] 33 | for i in range(time_len - seq_len - pred_len): 34 | speed_sequences.append(speed_matrix.iloc[i:i+seq_len].values) 35 | speed_labels.append(speed_matrix.iloc[i+seq_len:i+seq_len+pred_len].values) 36 | speed_sequences, speed_labels = np.asarray(speed_sequences), np.asarray(speed_labels) 37 | 38 | # shuffle and split the dataset to training and testing datasets 39 | sample_size = speed_sequences.shape[0] 40 | index = np.arange(sample_size, dtype = int) 41 | np.random.shuffle(index) 42 | 43 | train_index = int(np.floor(sample_size * train_propotion)) 44 | valid_index = int(np.floor(sample_size * ( train_propotion + valid_propotion))) 45 | 46 | train_data, train_label = speed_sequences[:train_index], speed_labels[:train_index] 47 | valid_data, valid_label = speed_sequences[train_index:valid_index], speed_labels[train_index:valid_index] 48 | test_data, test_label = speed_sequences[valid_index:], speed_labels[valid_index:] 49 | 50 | train_data, train_label = torch.Tensor(train_data), torch.Tensor(train_label) 51 | valid_data, valid_label = torch.Tensor(valid_data), torch.Tensor(valid_label) 52 | test_data, test_label = torch.Tensor(test_data), torch.Tensor(test_label) 53 | 54 | train_dataset = utils.TensorDataset(train_data, train_label) 55 | valid_dataset = utils.TensorDataset(valid_data, valid_label) 56 | test_dataset = utils.TensorDataset(test_data, test_label) 57 | 58 | train_dataloader = utils.DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle=True, drop_last = True) 59 | valid_dataloader = utils.DataLoader(valid_dataset, batch_size = BATCH_SIZE, shuffle=True, drop_last = True) 60 | test_dataloader = utils.DataLoader(test_dataset, batch_size = BATCH_SIZE, shuffle=True, drop_last = True) 61 | 62 | return train_dataloader, valid_dataloader, test_dataloader, max_speed 63 | 64 | if __name__ == "__main__": 65 | 66 | data = 'loop' 67 | if data == 'inrix': 68 | speed_matrix = pd.read_pickle('../../../Data_Warehouse/Data_network_traffic/inrix_seattle_speed_matrix_2012') 69 | 70 | elif data == 'loop': 71 | speed_matrix = pd.read_pickle('../../../Data_Warehouse/Data_network_traffic/speed_matrix_2015') 72 | 73 | train_dataloader, valid_dataloader, test_dataloader, max_speed = PrepareDataset(speed_matrix) 74 | 75 | # lstm, lstm_loss = TrainLSTM(train_dataloader, valid_dataloader, num_epochs = 10) 76 | 77 | # bilstm, bilstm_loss = Train_BiLSTM(train_dataloader, valid_dataloader, num_epochs = 10) 78 | 79 | multibilstm, multibilstm_loss = Train_Multi_Bi_LSTM(train_dataloader, valid_dataloader, num_epochs = 10) -------------------------------------------------------------------------------- /PyTorch/Models.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Apr 15 18:00:24 2018 4 | 5 | @author: Zhiyong 6 | """ 7 | 8 | import torch.utils.data as utils 9 | import torch.nn.functional as F 10 | import torch 11 | import torch.nn as nn 12 | from torch.autograd import Variable 13 | from torch.nn.parameter import Parameter 14 | import math 15 | import numpy as np 16 | import pandas as pd 17 | import time 18 | 19 | class LSTM(nn.Module): 20 | def __init__(self, input_size, cell_size, hidden_size): 21 | """ 22 | cell_size is the size of cell_state. 23 | hidden_size is the size of hidden_state, or say the output_state of each step 24 | """ 25 | super(LSTM, self).__init__() 26 | 27 | self.cell_size = cell_size 28 | self.hidden_size = hidden_size 29 | self.fl = nn.Linear(input_size + hidden_size, hidden_size) 30 | self.il = nn.Linear(input_size + hidden_size, hidden_size) 31 | self.ol = nn.Linear(input_size + hidden_size, hidden_size) 32 | self.Cl = nn.Linear(input_size + hidden_size, hidden_size) 33 | 34 | def step(self, input, Hidden_State, Cell_State): 35 | combined = torch.cat((input, Hidden_State), 1) 36 | f = F.sigmoid(self.fl(combined)) 37 | i = F.sigmoid(self.il(combined)) 38 | o = F.sigmoid(self.ol(combined)) 39 | C = F.tanh(self.Cl(combined)) 40 | Cell_State = f * Cell_State + i * C 41 | Hidden_State = o * F.tanh(Cell_State) 42 | 43 | return Hidden_State, Cell_State 44 | 45 | def forward(self, inputs): 46 | batch_size = inputs.size(0) 47 | time_step = inputs.size(1) 48 | Hidden_State, Cell_State = self.initHidden(batch_size) 49 | outputs = None 50 | for i in range(time_step): 51 | Hidden_State, Cell_State = self.step(torch.squeeze(inputs[:,i:i+1,:]), Hidden_State, Cell_State) 52 | if outputs is None: 53 | outputs = Hidden_State.unsqueeze(1) 54 | else: 55 | outputs = torch.cat((Hidden_State.unsqueeze(1), outputs), 1) 56 | return outputs 57 | 58 | def initHidden(self, batch_size): 59 | use_gpu = torch.cuda.is_available() 60 | if use_gpu: 61 | Hidden_State = Variable(torch.zeros(batch_size, self.hidden_size).cuda()) 62 | Cell_State = Variable(torch.zeros(batch_size, self.hidden_size).cuda()) 63 | return Hidden_State, Cell_State 64 | else: 65 | Hidden_State = Variable(torch.zeros(batch_size, self.hidden_size)) 66 | Cell_State = Variable(torch.zeros(batch_size, self.hidden_size)) 67 | return Hidden_State, Cell_State 68 | 69 | class BiLSTM(nn.Module): 70 | 71 | def __init__(self, input_size, cell_size, hidden_size): 72 | """ 73 | cell_size is the size of cell_state. 74 | hidden_size is the size of hidden_state, or say the output_state of each step 75 | """ 76 | super(BiLSTM, self).__init__() 77 | 78 | self.cell_size = cell_size 79 | self.hidden_size = hidden_size 80 | self.fl_f = nn.Linear(input_size + hidden_size, hidden_size) 81 | self.il_f = nn.Linear(input_size + hidden_size, hidden_size) 82 | self.ol_f = nn.Linear(input_size + hidden_size, hidden_size) 83 | self.Cl_f = nn.Linear(input_size + hidden_size, hidden_size) 84 | self.fl_b = nn.Linear(input_size + hidden_size, hidden_size) 85 | self.il_b = nn.Linear(input_size + hidden_size, hidden_size) 86 | self.ol_b = nn.Linear(input_size + hidden_size, hidden_size) 87 | self.Cl_b = nn.Linear(input_size + hidden_size, hidden_size) 88 | 89 | 90 | 91 | def step(self, input_f, input_b, Hidden_State_f, Cell_State_f, Hidden_State_b, Cell_State_b): 92 | batch_size = input_f.size(0) 93 | 94 | combined_f = torch.cat((input_f, Hidden_State_f), 1) 95 | 96 | f_f = F.sigmoid(self.fl_f(combined_f)) 97 | i_f = F.sigmoid(self.il_f(combined_f)) 98 | o_f = F.sigmoid(self.ol_f(combined_f)) 99 | C_f = F.tanh(self.Cl_f(combined_f)) 100 | Cell_State_f = f_f * Cell_State_f + i_f * C_f 101 | Hidden_State_f = o_f * F.tanh(Cell_State_f) 102 | 103 | combined_b = torch.cat((input_b, Hidden_State_b), 1) 104 | 105 | f_b = F.sigmoid(self.fl_b(combined_b)) 106 | i_b = F.sigmoid(self.il_b(combined_b)) 107 | o_b = F.sigmoid(self.ol_b(combined_b)) 108 | C_b = F.tanh(self.Cl_b(combined_b)) 109 | Cell_State_b = f_b * Cell_State_b + i_b * C_b 110 | Hidden_State_b = o_b * F.tanh(Cell_State_b) 111 | 112 | return Hidden_State_f, Cell_State_f, Hidden_State_b, Cell_State_b 113 | 114 | def forward(self, inputs): 115 | outputs_f = None 116 | outputs_b = None 117 | 118 | batch_size = inputs.size(0) 119 | steps = inputs.size(1) 120 | 121 | Hidden_State_f, Cell_State_f, Hidden_State_b, Cell_State_b = self.initHidden(batch_size) 122 | 123 | for i in range(steps): 124 | Hidden_State_f, Cell_State_f, Hidden_State_b, Cell_State_b = \ 125 | self.step(torch.squeeze(inputs[:,i:i+1,:]), torch.squeeze(inputs[:,steps-i-1:steps-i,:])\ 126 | , Hidden_State_f, Cell_State_f, Hidden_State_b, Cell_State_b) 127 | 128 | if outputs_f is None: 129 | outputs_f = Hidden_State_f.unsqueeze(1) 130 | else: 131 | outputs_f = torch.cat((outputs_f, Hidden_State_f.unsqueeze(1)), 1) 132 | if outputs_b is None: 133 | outputs_b = Hidden_State_b.unsqueeze(1) 134 | else: 135 | outputs_b = torch.cat((Hidden_State_b.unsqueeze(1), outputs_b), 1) 136 | outputs = (outputs_f + outputs_b) / 2 137 | return outputs 138 | 139 | 140 | def initHidden(self, batch_size): 141 | use_gpu = torch.cuda.is_available() 142 | if use_gpu: 143 | Hidden_State_f = Variable(torch.zeros(batch_size, self.hidden_size).cuda()) 144 | Cell_State_f = Variable(torch.zeros(batch_size, self.hidden_size).cuda()) 145 | Hidden_State_b = Variable(torch.zeros(batch_size, self.hidden_size).cuda()) 146 | Cell_State_b = Variable(torch.zeros(batch_size, self.hidden_size).cuda()) 147 | return Hidden_State_f, Cell_State_f, Hidden_State_b, Cell_State_b 148 | else: 149 | Hidden_State_f = Variable(torch.zeros(batch_size, self.hidden_size)) 150 | Cell_State_f = Variable(torch.zeros(batch_size, self.hidden_size)) 151 | Hidden_State_b = Variable(torch.zeros(batch_size, self.hidden_size)) 152 | Cell_State_b = Variable(torch.zeros(batch_size, self.hidden_size)) 153 | return Hidden_State_f, Cell_State_f, Hidden_State_b, Cell_State_b 154 | -------------------------------------------------------------------------------- /PyTorch/TrainModel.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Apr 15 18:29:13 2018 4 | 5 | @author: Zhiyong 6 | """ 7 | 8 | import torch.utils.data as utils 9 | import torch.nn.functional as F 10 | import torch 11 | import torch.nn as nn 12 | from torch.autograd import Variable 13 | from torch.nn.parameter import Parameter 14 | import math 15 | import numpy as np 16 | import pandas as pd 17 | import time 18 | 19 | from Models import * 20 | 21 | 22 | def TrainLSTM(train_dataloader, valid_dataloader, num_epochs = 3): 23 | 24 | inputs, labels = next(iter(train_dataloader)) 25 | [batch_size, step_size, fea_size] = inputs.size() 26 | input_dim = fea_size 27 | hidden_dim = fea_size 28 | output_dim = fea_size 29 | 30 | lstm = LSTM(input_dim, hidden_dim, output_dim) 31 | 32 | lstm.cuda() 33 | 34 | loss_MSE = torch.nn.MSELoss() 35 | loss_L1 = torch.nn.L1Loss() 36 | 37 | learning_rate = 1e-5 38 | optimizer = torch.optim.RMSprop(lstm.parameters(), lr = learning_rate) 39 | 40 | use_gpu = torch.cuda.is_available() 41 | 42 | interval = 100 43 | losses_train = [] 44 | losses_interval_train = [] 45 | losses_valid = [] 46 | losses_interval_valid = [] 47 | 48 | cur_time = time.time() 49 | pre_time = time.time() 50 | 51 | for epoch in range(num_epochs): 52 | print('Epoch {}/{}'.format(epoch, num_epochs - 1)) 53 | print('-' * 10) 54 | 55 | trained_number = 0 56 | 57 | valid_dataloader_iter = iter(valid_dataloader) 58 | 59 | for data in train_dataloader: 60 | inputs, labels = data 61 | 62 | if inputs.shape[0] != batch_size: 63 | continue 64 | 65 | if use_gpu: 66 | inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda()) 67 | else: 68 | inputs, labels = Variable(inputs), Variable(labels) 69 | 70 | lstm.zero_grad() 71 | 72 | outputs = lstm(inputs) 73 | 74 | full_labels = torch.cat((inputs[:,1:,:], labels), dim = 1) 75 | 76 | loss_train = loss_MSE(outputs, full_labels) 77 | 78 | losses_train.append(loss_train.data) 79 | 80 | optimizer.zero_grad() 81 | 82 | loss_train.backward() 83 | 84 | optimizer.step() 85 | 86 | # validation 87 | try: 88 | inputs_val, labels_val = next(valid_dataloader_iter) 89 | except StopIteration: 90 | valid_dataloader_iter = iter(valid_dataloader) 91 | inputs_val, labels_val = next(valid_dataloader_iter) 92 | 93 | if use_gpu: 94 | inputs_val, labels_val = Variable(inputs_val.cuda()), Variable(labels_val.cuda()) 95 | else: 96 | inputs_val, labels_val = Variable(inputs_val), Variable(labels_val) 97 | 98 | full_labels_val = torch.cat((inputs_val[:,1:,:], labels_val), dim = 1) 99 | 100 | outputs_val = lstm(inputs_val) 101 | 102 | loss_valid = loss_MSE(outputs_val, full_labels_val) 103 | 104 | losses_valid.append(loss_valid.data) 105 | 106 | # output 107 | trained_number += 1 108 | 109 | if trained_number % interval == 0: 110 | cur_time = time.time() 111 | loss_interval_train = np.around(sum(losses_train[-interval:]).cpu().numpy()[0]/interval, decimals=8) 112 | losses_interval_train.append(loss_interval_train) 113 | loss_interval_valid = np.around(sum(losses_valid[-interval:]).cpu().numpy()[0]/interval, decimals=8) 114 | losses_interval_valid.append(loss_interval_valid) 115 | print('Iteration #: {}, train_loss: {}, valid_loss: {}, time: {}'.format(\ 116 | trained_number * batch_size, \ 117 | loss_interval_train,\ 118 | loss_interval_valid,\ 119 | np.around([cur_time - pre_time], decimals=8) ) ) 120 | pre_time = cur_time 121 | 122 | return lstm, [losses_train, losses_interval_train, losses_valid, losses_interval_valid] 123 | 124 | def Train_BiLSTM(train_dataloader, valid_dataloader, num_epochs = 3): 125 | 126 | inputs, labels = next(iter(train_dataloader)) 127 | [batch_size, step_size, fea_size] = inputs.size() 128 | input_dim = fea_size 129 | hidden_dim = fea_size 130 | output_dim = fea_size 131 | 132 | bilstm = BiLSTM(input_dim, hidden_dim, output_dim) 133 | 134 | bilstm.cuda() 135 | 136 | loss_MSE = torch.nn.MSELoss() 137 | loss_L1 = torch.nn.L1Loss() 138 | 139 | learning_rate = 1e-5 140 | optimizer = torch.optim.RMSprop(bilstm.parameters(), lr = learning_rate) 141 | use_gpu = torch.cuda.is_available() 142 | 143 | interval = 100 144 | losses_train = [] 145 | losses_interval_train = [] 146 | losses_valid = [] 147 | losses_interval_valid = [] 148 | 149 | cur_time = time.time() 150 | pre_time = time.time() 151 | 152 | for epoch in range(num_epochs): 153 | print('Epoch {}/{}'.format(epoch, num_epochs - 1)) 154 | print('-' * 10) 155 | 156 | trained_number = 0 157 | 158 | valid_dataloader_iter = iter(valid_dataloader) 159 | 160 | for data in train_dataloader: 161 | inputs, labels = data 162 | 163 | if inputs.shape[0] != batch_size: 164 | continue 165 | 166 | if use_gpu: 167 | inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda()) 168 | else: 169 | inputs, labels = Variable(inputs), Variable(labels) 170 | 171 | bilstm.zero_grad() 172 | 173 | outputs = bilstm(inputs) 174 | 175 | full_labels = torch.cat((inputs[:,1:,:], labels), dim = 1) 176 | 177 | loss_train = loss_MSE(outputs, full_labels) 178 | 179 | losses_train.append(loss_train.data) 180 | 181 | optimizer.zero_grad() 182 | 183 | loss_train.backward() 184 | 185 | optimizer.step() 186 | 187 | # validation 188 | try: 189 | inputs_val, labels_val = next(valid_dataloader_iter) 190 | except StopIteration: 191 | valid_dataloader_iter = iter(valid_dataloader) 192 | inputs_val, labels_val = next(valid_dataloader_iter) 193 | 194 | if use_gpu: 195 | inputs_val, labels_val = Variable(inputs_val.cuda()), Variable(labels_val.cuda()) 196 | else: 197 | inputs_val, labels_val = Variable(inputs_val), Variable(labels_val) 198 | 199 | 200 | bilstm.zero_grad() 201 | 202 | full_labels_val = torch.cat((inputs_val[:,1:,:], labels_val), dim = 1) 203 | 204 | outputs_val = bilstm(inputs_val) 205 | 206 | # Hidden_State, Cell_State = bilstm.loop(inputs_val) 207 | 208 | loss_valid = loss_MSE(outputs_val, full_labels_val) 209 | # loss_valid = loss_MSE(Hidden_State, labels_val) 210 | 211 | losses_valid.append(loss_valid.data) 212 | 213 | # output 214 | trained_number += 1 215 | 216 | if trained_number % interval == 0: 217 | cur_time = time.time() 218 | loss_interval_train = np.around(sum(losses_train[-interval:]).cpu().numpy()[0]/interval, decimals=8) 219 | losses_interval_train.append(loss_interval_train) 220 | loss_interval_valid = np.around(sum(losses_valid[-interval:]).cpu().numpy()[0]/interval, decimals=8) 221 | losses_interval_valid.append(loss_interval_valid) 222 | print('Iteration #: {}, train_loss: {}, valid_loss: {}, time: {}'.format(\ 223 | trained_number * batch_size, \ 224 | loss_interval_train,\ 225 | loss_interval_valid,\ 226 | np.around([cur_time - pre_time], decimals=8) ) ) 227 | pre_time = cur_time 228 | 229 | return bilstm, [losses_train, losses_interval_train, losses_valid, losses_interval_valid] 230 | 231 | def Train_Multi_Bi_LSTM(train_dataloader, valid_dataloader, num_epochs = 3): 232 | 233 | inputs, labels = next(iter(train_dataloader)) 234 | [batch_size, step_size, fea_size] = inputs.size() 235 | input_dim = fea_size 236 | hidden_dim = fea_size 237 | output_dim = fea_size 238 | 239 | # multiBiLSTM = Multi_Bi_LSTM(input_dim, hidden_dim, output_dim) 240 | 241 | multiBiLSTM = nn.Sequential(BiLSTM(input_dim, hidden_dim, output_dim), LSTM(input_dim, hidden_dim, output_dim)) 242 | 243 | multiBiLSTM.cuda() 244 | 245 | loss_MSE = torch.nn.MSELoss() 246 | loss_L1 = torch.nn.L1Loss() 247 | 248 | learning_rate = 1e-5 249 | optimizer = torch.optim.RMSprop(multiBiLSTM.parameters(), lr = learning_rate) 250 | use_gpu = torch.cuda.is_available() 251 | 252 | interval = 100 253 | losses_train = [] 254 | losses_interval_train = [] 255 | losses_valid = [] 256 | losses_interval_valid = [] 257 | 258 | cur_time = time.time() 259 | pre_time = time.time() 260 | 261 | for epoch in range(num_epochs): 262 | print('Epoch {}/{}'.format(epoch, num_epochs - 1)) 263 | print('-' * 10) 264 | 265 | trained_number = 0 266 | 267 | valid_dataloader_iter = iter(valid_dataloader) 268 | 269 | for data in train_dataloader: 270 | inputs, labels = data 271 | 272 | if inputs.shape[0] != batch_size: 273 | continue 274 | 275 | if use_gpu: 276 | inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda()) 277 | else: 278 | inputs, labels = Variable(inputs), Variable(labels) 279 | 280 | multiBiLSTM.zero_grad() 281 | 282 | outputs = multiBiLSTM(inputs) 283 | 284 | full_labels = torch.cat((inputs[:,1:,:], labels), dim = 1) 285 | 286 | loss_train = loss_MSE(outputs, full_labels) 287 | 288 | losses_train.append(loss_train.data) 289 | 290 | optimizer.zero_grad() 291 | 292 | loss_train.backward() 293 | 294 | optimizer.step() 295 | 296 | # validation 297 | try: 298 | inputs_val, labels_val = next(valid_dataloader_iter) 299 | except StopIteration: 300 | valid_dataloader_iter = iter(valid_dataloader) 301 | inputs_val, labels_val = next(valid_dataloader_iter) 302 | 303 | if use_gpu: 304 | inputs_val, labels_val = Variable(inputs_val.cuda()), Variable(labels_val.cuda()) 305 | else: 306 | inputs_val, labels_val = Variable(inputs_val), Variable(labels_val) 307 | 308 | 309 | multiBiLSTM.zero_grad() 310 | 311 | full_labels_val = torch.cat((inputs_val[:,1:,:], labels_val), dim = 1) 312 | 313 | outputs_val = multiBiLSTM(inputs_val) 314 | 315 | # Hidden_State, Cell_State = bilstm.loop(inputs_val) 316 | 317 | loss_valid = loss_MSE(outputs_val, full_labels_val) 318 | # loss_valid = loss_MSE(Hidden_State, labels_val) 319 | 320 | losses_valid.append(loss_valid.data) 321 | 322 | # output 323 | trained_number += 1 324 | 325 | if trained_number % interval == 0: 326 | cur_time = time.time() 327 | loss_interval_train = np.around(sum(losses_train[-interval:]).cpu().numpy()[0]/interval, decimals=8) 328 | losses_interval_train.append(loss_interval_train) 329 | loss_interval_valid = np.around(sum(losses_valid[-interval:]).cpu().numpy()[0]/interval, decimals=8) 330 | losses_interval_valid.append(loss_interval_valid) 331 | print('Iteration #: {}, train_loss: {}, valid_loss: {}, time: {}'.format(\ 332 | trained_number * batch_size, \ 333 | loss_interval_train,\ 334 | loss_interval_valid,\ 335 | np.around([cur_time - pre_time], decimals=8) ) ) 336 | pre_time = cur_time 337 | 338 | return multiBiLSTM, [losses_train, losses_interval_train, losses_valid, losses_interval_valid] 339 | -------------------------------------------------------------------------------- /PyTorch/__pycache__/Models.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyongc/Stacked_Bidirectional_Unidirectional_LSTM/b5f291b09a9f832b0901f86fc566502574d96826/PyTorch/__pycache__/Models.cpython-36.pyc -------------------------------------------------------------------------------- /PyTorch/__pycache__/TrainModel.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyongc/Stacked_Bidirectional_Unidirectional_LSTM/b5f291b09a9f832b0901f86fc566502574d96826/PyTorch/__pycache__/TrainModel.cpython-36.pyc -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep Stacked Bidirectional and Unidirectional LSTM Recurrent Neural Network 2 | #### For *Network-wide Traffic Speed Prediction* 3 | 4 | ## Prologue 5 | Normally, we use RNN to characterize the forward dependency of time series data. While, bi-directional RNNs can capture both forward and backward dependencies in time series data. It has been shown that stacked (multi-layer) RNNs/LSTMs work better than one-layer RNN/LSTM in many NLP related applications. It is good to try a combination of bi-directional RNNs and uni-directional RNNs. We find that a neural network with multiple stacked bi-directional LSTMs followed by an uni-directiaonl LSTM works better. 6 | 7 | ## New Progress 8 | We are designing several internal structures in the LSTM cell to overcome the missing values problem in time series data (replacing the masking layer in the following figure), and to make the model to be suitable for graph-structured data. 9 | 10 | The original model is implemented by Keras. A newly improved version implemented by PyTorch will soon be released. 11 | 12 | ### Environment 13 | * Python 3.6.1 14 | * Keras 2.1.5 15 | * PyTorch 0.3.0 16 | 17 | For more detailed information about the model, you can refer to our [paper](https://arxiv.org/abs/1801.02143), referenced at the bottom. 18 | 19 | ## Model Structure 20 | ![alt text](/Images/Architecture.png) 21 | 22 | 23 | ## Data 24 | To run the code, you need to download the loop detector data from my GitHub link: https://github.com/zhiyongc/Seattle-Loop-Data. I'm sorry that the INRIX data can not be shared because of the confidentiality issues. 25 | 26 | 27 | 28 | ## Cite 29 | Hope our work can benefit your. If you use this code or data in your own workPlease cite our paper: 30 | [Deep Bidirectional and Unidirectional LSTM Recurrent Neural Network for Network-wide Traffic Speed Prediction](https://arxiv.org/abs/1801.02143) 31 | ``` 32 | @article{cui2018deep, 33 | title={Deep Bidirectional and Unidirectional LSTM Recurrent Neural Network for Network-wide Traffic Speed Prediction}, 34 | author={Cui, Zhiyong and Ke, Ruimin and Wang, Yinhai}, 35 | journal={arXiv preprint arXiv:1801.02143}, 36 | year={2018} 37 | } 38 | ``` 39 | or 40 | ``` 41 | @inproceedings{cui2016deep, 42 | title={Deep Stacked Bidirectional and Unidirectional LSTM Recurrent Neural Network for Network-wide Traffic Speed Prediction}, 43 | author={Cui, Zhiyong and Ke, Ruimin and Wang, Yinhai}, 44 | booktitle={6th International Workshop on Urban Computing (UrbComp 2017)}, 45 | year={2016} 46 | } 47 | ``` 48 | 49 | --------------------------------------------------------------------------------