├── Images
    └── Architecture.png
├── Keras
    ├── Main.py
    ├── Models.py
    └── __pycache__
    │   └── Models.cpython-36.pyc
├── PyTorch
    ├── Main.py
    ├── Models.py
    ├── TrainModel.py
    └── __pycache__
    │   ├── Models.cpython-36.pyc
    │   └── TrainModel.cpython-36.pyc
└── README.md


/Images/Architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyongc/Stacked_Bidirectional_Unidirectional_LSTM/b5f291b09a9f832b0901f86fc566502574d96826/Images/Architecture.png


--------------------------------------------------------------------------------
/Keras/Main.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sun Apr 15 23:34:26 2018
  4 | 
  5 | @author: Zhiyong
  6 | """
  7 | 
  8 | from Models import * 
  9 | 
 10 | def Get_Data_Label_Aux_Set(speedMatrix, steps):
 11 |     cabinets = speedMatrix.columns.values
 12 |     stamps = speedMatrix.index.values
 13 |     x_dim = len(cabinets)
 14 |     time_dim = len(stamps)
 15 |     
 16 |     speedMatrix = speedMatrix.iloc[:,:].values
 17 |     
 18 |     data_set = []
 19 |     label_set = []
 20 |     hour_set = []
 21 |     dayofweek_set = []
 22 | 
 23 |     for i in range(time_dim - steps ):
 24 |         data_set.append(speedMatrix[i : i + steps])
 25 |         label_set.append(speedMatrix[i + steps])
 26 |         stamp = stamps[i + steps]
 27 |         hour_set.append(float(stamp[11:13]))
 28 |         dayofweek = datetime.datetime.strptime(stamp[0:10], '%Y-%M-%d').strftime('%w')
 29 |         dayofweek_set.append(float(dayofweek))
 30 | 
 31 |     data_set = np.array(data_set)
 32 |     label_set = np.array(label_set)
 33 |     hour_set = np.array(hour_set)
 34 |     dayofweek_set = np.array(dayofweek_set)
 35 |     return data_set, label_set, hour_set, dayofweek_set
 36 | 
 37 | def SplitData(X_full, Y_full, hour_full, dayofweek_full, train_prop = 0.7, valid_prop = 0.2, test_prop = 0.1):
 38 |     n = Y_full.shape[0]
 39 |     indices = np.arange(n)
 40 |     RS = RandomState(1024)
 41 |     RS.shuffle(indices)
 42 |     sep_1 = int(float(n) * train_prop)
 43 |     sep_2 = int(float(n) * (train_prop + valid_prop))
 44 |     print ('train : valid : test = ', train_prop, valid_prop, test_prop)
 45 |     train_indices = indices[:sep_1]
 46 |     valid_indices = indices[sep_1:sep_2]
 47 |     test_indices = indices[sep_2:]
 48 |     X_train = X_full[train_indices]
 49 |     X_valid = X_full[valid_indices]
 50 |     X_test = X_full[test_indices]
 51 |     Y_train = Y_full[train_indices]
 52 |     Y_valid = Y_full[valid_indices]
 53 |     Y_test = Y_full[test_indices]
 54 |     hour_train = hour_full[train_indices]
 55 |     hour_valid = hour_full[valid_indices]
 56 |     hour_test = hour_full[test_indices]
 57 |     dayofweek_train = dayofweek_full[train_indices]
 58 |     dayofweek_valid = dayofweek_full[valid_indices]
 59 |     dayofweek_test = dayofweek_full[test_indices]
 60 |     return X_train, X_valid, X_test, \
 61 |             Y_train, Y_valid, Y_test, \
 62 |             hour_train, hour_valid, hour_test, \
 63 |             dayofweek_train, dayofweek_valid, dayofweek_test
 64 |             
 65 | def MeasurePerformance(Y_test_scale, Y_pred, X_max, model_name = 'default', epochs = 30, model_time_lag = 10):
 66 | 
 67 |     time_num = Y_test_scale.shape[0]
 68 |     loop_num = Y_test_scale.shape[1]
 69 | 
 70 |     difference_sum = np.zeros(time_num)
 71 |     diff_frac_sum = np.zeros(time_num)
 72 | 
 73 |     for loop_idx in range(loop_num):
 74 |         true_speed = Y_test_scale[:,loop_idx] * X_max
 75 |         predicted_speed = Y_pred[:,loop_idx] * X_max
 76 |         diff = np.abs( true_speed - predicted_speed )
 77 |         diff_frac = diff / true_speed
 78 |         difference_sum += diff
 79 |         diff_frac_sum += diff_frac
 80 |         
 81 |     difference_avg = difference_sum / loop_num
 82 |     MAPE = diff_frac_sum / loop_num * 100
 83 |     
 84 |     print('MAE :', round(np.mean(difference_avg),3), 'MAPE :', round(np.mean(MAPE),3), 'STD of MAE:', round(np.std(difference_avg),3))
 85 |     print('Epoch : ' , epochs)
 86 | 
 87 | 
 88 | if __name__ == "__main__":
 89 |     
 90 |     #######################################################
 91 |     # load 2015 speed data
 92 |     #######################################################
 93 |     speedMatrix = pd.read_pickle('../../../Data_Warehouse/Data_network_traffic//speed_matrix_2015')
 94 |     print('speedMatrix shape:', speedMatrix.shape)
 95 |     loopgroups_full = speedMatrix.columns.values
 96 |     
 97 |     time_lag = 10
 98 |     print('time lag :', time_lag)
 99 |     
100 |     X_full, Y_full, hour_full, dayofweek_full = Get_Data_Label_Aux_Set(speedMatrix, time_lag)
101 |     print('X_full shape: ', X_full.shape, 'Y_full shape:', Y_full.shape)
102 |     
103 |     #######################################################
104 |     # split full dataset into training, validation and test dataset
105 |     #######################################################
106 |     X_train, X_valid, X_test, \
107 |         Y_train, Y_valid, Y_test, \
108 |         hour_train, hour_valid, hour_test, \
109 |         dayofweek_train, dayofweek_valid, dayofweek_test \
110 |                     = SplitData(X_full, Y_full, hour_full, dayofweek_full, train_prop = 0.9, valid_prop = 0.0, test_prop = 0.1)
111 |     print('X_train shape: ', X_train.shape, 'Y_train shape:', Y_train.shape)
112 |     print('X_valid shape: ', X_valid.shape, 'Y_valid shape:', Y_valid.shape)
113 |     print('X_test shape: ' , X_test.shape,  'Y_test shape:',  Y_test.shape)
114 |     
115 |     #######################################################
116 |     # bound training data to 0 to 100
117 |     # get the max value of X to scale X
118 |     #######################################################
119 |     X_train = np.clip(X_train, 0, 100)
120 |     X_test = np.clip(X_test, 0, 100)
121 | 
122 |     X_max = np.max([np.max(X_train), np.max(X_test)])
123 |     X_min = np.min([np.min(X_train), np.min(X_test)])
124 |     print('X_full max:', X_max)
125 |     
126 |     #######################################################
127 |     # scale data into 0~1
128 |     #######################################################
129 |     X_train_scale = X_train / X_max
130 |     X_test_scale = X_test / X_max
131 |     
132 |     Y_train_scale = Y_train / X_max
133 |     Y_test_scale = Y_test / X_max
134 |     
135 |     model_epoch = 100
136 |     patience = 20
137 |     
138 |     print("#######################################################")
139 |     print("model_2_Bi_LSTM")
140 |     print("time_lag", time_lag)
141 |     model_2_Bi_LSTM, history_2_Bi_LSTM = train_2_Bi_LSTM_mask(X_train_scale, Y_train_scale, epochs = model_epoch)
142 |     model_2_Bi_LSTM.save('Model_2_Bi_LSTM_' + str(len(history_2_Bi_LSTM.losses))+ 'ep' + '_tl' + str(time_lag)+ '.h5')
143 |     Y_pred_test = model_2_Bi_LSTM.predict(X_test_scale)
144 |     MeasurePerformance(Y_test_scale, Y_pred_test, X_max, model_name = 'default', epochs = len(history_2_Bi_LSTM.losses), model_time_lag = 10)
145 | 


--------------------------------------------------------------------------------
/Keras/Models.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sun Apr 15 23:31:31 2018
 4 | 
 5 | @author: Zhiyong
 6 | """
 7 | 
 8 | 
 9 | 
10 | 
11 | from keras.preprocessing import sequence
12 | from keras.utils import np_utils
13 | from keras.models import Sequential, load_model
14 | from keras.layers import Dense, Dropout, Activation, Embedding, Input
15 | from keras.layers import LSTM, SimpleRNN, GRU, Merge, merge, Masking
16 | from keras.models import Model
17 | from keras.callbacks import Callback
18 | from keras.callbacks import EarlyStopping
19 | from keras import backend as K
20 | from keras.layers.wrappers import Bidirectional
21 | 
22 | import numpy as np
23 | from numpy.random import RandomState
24 | from random import shuffle
25 | import datetime
26 | 
27 | np.random.seed(1024)
28 | 
29 | class LossHistory(Callback):
30 |     def on_train_begin(self, logs={}):
31 |         self.losses = []
32 |         self.val_losses = []
33 | 
34 |     def on_epoch_end(self, epoch, logs={}):
35 |         self.losses.append(logs.get('loss'))
36 |         self.val_losses.append(logs.get('val_loss'))
37 | 
38 | 
39 | def train_Bi_LSTM(X, Y, epochs = 30, validation_split = 0.2, patience=20):
40 |     speed_input = Input(shape = (X.shape[1], X.shape[2]), name = 'speed')
41 |     
42 |     main_output = Bidirectional(LSTM(input_shape = (X.shape[1], X.shape[2]), output_dim = X.shape[2], return_sequences=False), merge_mode='ave')(speed_input)
43 |     
44 |     final_model = Model(input = [speed_input], output = [main_output])
45 |     
46 |     final_model.summary()
47 |     
48 |     final_model.compile(loss='mse', optimizer='rmsprop')
49 |     
50 |     history = LossHistory()
51 |     earlyStopping = EarlyStopping(monitor='val_loss', min_delta=0.00001, patience=patience, verbose=0, mode='auto')
52 |     final_model.fit([X], Y, validation_split = 0.2, nb_epoch = epochs, callbacks=[history, earlyStopping])
53 |     
54 |     return final_model, history
55 | 
56 | def train_2_Bi_LSTM_mask(X, Y, epochs = 30, validation_split = 0.2, patience=20):
57 |     
58 |     model = Sequential()
59 |     model.add(Masking(mask_value=0.,input_shape=(X.shape[1], X.shape[2])))
60 |     model.add(LSTM(output_dim = X.shape[2], return_sequences=True, input_shape = (X.shape[1], X.shape[2])))
61 |     model.add(LSTM(output_dim = X.shape[2], return_sequences=False, input_shape = (X.shape[1], X.shape[2])))
62 | 
63 |     model.add(Dense(X.shape[2]))
64 |     model.compile(loss='mse', optimizer='rmsprop')
65 | 
66 |     history = LossHistory()
67 |     earlyStopping = EarlyStopping(monitor='val_loss', min_delta=0.00001, patience=patience, verbose=0, mode='auto')
68 |     model.fit(X, Y, validation_split = 0.2, nb_epoch = epochs, callbacks=[history, earlyStopping])
69 | 
70 |     return model, history
71 | 
72 | def train_2_Bi_LSTM(X, Y, epochs = 30, validation_split = 0.2, patience=20):
73 |     speed_input = Input(shape = (X.shape[1], X.shape[2]), name = 'speed')
74 |     
75 |     lstm_output = Bidirectional(LSTM(input_shape = (X.shape[1], X.shape[2]), output_dim = X.shape[2], return_sequences=True), merge_mode='ave')(speed_input)
76 |     
77 |     main_output = LSTM(input_shape = (X.shape[1], X.shape[2]), output_dim = X.shape[2])(lstm_output)
78 |     
79 |     final_model = Model(input = [speed_input], output = [main_output])
80 |     
81 |     final_model.summary()
82 |     
83 |     final_model.compile(loss='mse', optimizer='rmsprop')
84 |     
85 |     history = LossHistory()
86 |     earlyStopping = EarlyStopping(monitor='val_loss', min_delta=0.00001, patience=patience, verbose=0, mode='auto')
87 |     final_model.fit([X], Y, validation_split = 0.2, nb_epoch = epochs, callbacks=[history, earlyStopping])
88 |     
89 |     return final_model, history


--------------------------------------------------------------------------------
/Keras/__pycache__/Models.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyongc/Stacked_Bidirectional_Unidirectional_LSTM/b5f291b09a9f832b0901f86fc566502574d96826/Keras/__pycache__/Models.cpython-36.pyc


--------------------------------------------------------------------------------
/PyTorch/Main.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sun Apr 15 18:30:03 2018
 4 | 
 5 | @author: Zhiyong
 6 | """
 7 | 
 8 | from TrainModel import *
 9 | 
10 | def PrepareDataset(speed_matrix, BATCH_SIZE = 40, seq_len = 10, pred_len = 1, train_propotion = 0.7, valid_propotion = 0.2):
11 |     """ Prepare training and testing datasets and dataloaders.
12 |     
13 |     Convert speed/volume/occupancy matrix to training and testing dataset. 
14 |     The vertical axis of speed_matrix is the time axis and the horizontal axis 
15 |     is the spatial axis.
16 |     
17 |     Args:
18 |         speed_matrix: a Matrix containing spatial-temporal speed data for a network
19 |         seq_len: length of input sequence
20 |         pred_len: length of predicted sequence
21 |     Returns:
22 |         Training dataloader
23 |         Testing dataloader
24 |     """
25 |     time_len = speed_matrix.shape[0]
26 |     
27 |     speed_matrix = speed_matrix.clip(0, 100)
28 |     
29 |     max_speed = speed_matrix.max().max()
30 |     speed_matrix =  speed_matrix / max_speed
31 |     
32 |     speed_sequences, speed_labels = [], []
33 |     for i in range(time_len - seq_len - pred_len):
34 |         speed_sequences.append(speed_matrix.iloc[i:i+seq_len].values)
35 |         speed_labels.append(speed_matrix.iloc[i+seq_len:i+seq_len+pred_len].values)
36 |     speed_sequences, speed_labels = np.asarray(speed_sequences), np.asarray(speed_labels)
37 |     
38 |     # shuffle and split the dataset to training and testing datasets
39 |     sample_size = speed_sequences.shape[0]
40 |     index = np.arange(sample_size, dtype = int)
41 |     np.random.shuffle(index)
42 |     
43 |     train_index = int(np.floor(sample_size * train_propotion))
44 |     valid_index = int(np.floor(sample_size * ( train_propotion + valid_propotion)))
45 |     
46 |     train_data, train_label = speed_sequences[:train_index], speed_labels[:train_index]
47 |     valid_data, valid_label = speed_sequences[train_index:valid_index], speed_labels[train_index:valid_index]
48 |     test_data, test_label = speed_sequences[valid_index:], speed_labels[valid_index:]
49 |     
50 |     train_data, train_label = torch.Tensor(train_data), torch.Tensor(train_label)
51 |     valid_data, valid_label = torch.Tensor(valid_data), torch.Tensor(valid_label)
52 |     test_data, test_label = torch.Tensor(test_data), torch.Tensor(test_label)
53 |     
54 |     train_dataset = utils.TensorDataset(train_data, train_label)
55 |     valid_dataset = utils.TensorDataset(valid_data, valid_label)
56 |     test_dataset = utils.TensorDataset(test_data, test_label)
57 |     
58 |     train_dataloader = utils.DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle=True, drop_last = True)
59 |     valid_dataloader = utils.DataLoader(valid_dataset, batch_size = BATCH_SIZE, shuffle=True, drop_last = True)
60 |     test_dataloader = utils.DataLoader(test_dataset, batch_size = BATCH_SIZE, shuffle=True, drop_last = True)
61 |     
62 |     return train_dataloader, valid_dataloader, test_dataloader, max_speed
63 | 
64 | if __name__ == "__main__":
65 |     
66 |     data = 'loop'
67 |     if data == 'inrix':
68 |         speed_matrix =  pd.read_pickle('../../../Data_Warehouse/Data_network_traffic/inrix_seattle_speed_matrix_2012')
69 |     
70 |     elif data == 'loop':
71 |         speed_matrix =  pd.read_pickle('../../../Data_Warehouse/Data_network_traffic/speed_matrix_2015')
72 |     
73 |     train_dataloader, valid_dataloader, test_dataloader, max_speed = PrepareDataset(speed_matrix)
74 |     
75 | #    lstm, lstm_loss = TrainLSTM(train_dataloader, valid_dataloader, num_epochs = 10)
76 |     
77 | #    bilstm, bilstm_loss = Train_BiLSTM(train_dataloader, valid_dataloader, num_epochs = 10)
78 |     
79 |     multibilstm, multibilstm_loss = Train_Multi_Bi_LSTM(train_dataloader, valid_dataloader, num_epochs = 10)


--------------------------------------------------------------------------------
/PyTorch/Models.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sun Apr 15 18:00:24 2018
  4 | 
  5 | @author: Zhiyong
  6 | """
  7 | 
  8 | import torch.utils.data as utils
  9 | import torch.nn.functional as F
 10 | import torch
 11 | import torch.nn as nn
 12 | from torch.autograd import Variable
 13 | from torch.nn.parameter import Parameter
 14 | import math
 15 | import numpy as np
 16 | import pandas as pd
 17 | import time
 18 | 
 19 | class LSTM(nn.Module):
 20 |     def __init__(self, input_size, cell_size, hidden_size):
 21 |         """
 22 |         cell_size is the size of cell_state.
 23 |         hidden_size is the size of hidden_state, or say the output_state of each step
 24 |         """
 25 |         super(LSTM, self).__init__()
 26 |         
 27 |         self.cell_size = cell_size
 28 |         self.hidden_size = hidden_size
 29 |         self.fl = nn.Linear(input_size + hidden_size, hidden_size)
 30 |         self.il = nn.Linear(input_size + hidden_size, hidden_size)
 31 |         self.ol = nn.Linear(input_size + hidden_size, hidden_size)
 32 |         self.Cl = nn.Linear(input_size + hidden_size, hidden_size)
 33 |         
 34 |     def step(self, input, Hidden_State, Cell_State):
 35 |         combined = torch.cat((input, Hidden_State), 1)
 36 |         f = F.sigmoid(self.fl(combined))
 37 |         i = F.sigmoid(self.il(combined))
 38 |         o = F.sigmoid(self.ol(combined))
 39 |         C = F.tanh(self.Cl(combined))
 40 |         Cell_State = f * Cell_State + i * C
 41 |         Hidden_State = o * F.tanh(Cell_State)
 42 |         
 43 |         return Hidden_State, Cell_State
 44 |     
 45 |     def forward(self, inputs):
 46 |         batch_size = inputs.size(0)
 47 |         time_step = inputs.size(1)
 48 |         Hidden_State, Cell_State = self.initHidden(batch_size)
 49 |         outputs = None
 50 |         for i in range(time_step):
 51 |             Hidden_State, Cell_State = self.step(torch.squeeze(inputs[:,i:i+1,:]), Hidden_State, Cell_State)  
 52 |             if outputs is None:
 53 |                 outputs = Hidden_State.unsqueeze(1)
 54 |             else:
 55 |                 outputs = torch.cat((Hidden_State.unsqueeze(1), outputs), 1)
 56 |         return outputs
 57 |     
 58 |     def initHidden(self, batch_size):
 59 |         use_gpu = torch.cuda.is_available()
 60 |         if use_gpu:
 61 |             Hidden_State = Variable(torch.zeros(batch_size, self.hidden_size).cuda())
 62 |             Cell_State = Variable(torch.zeros(batch_size, self.hidden_size).cuda())
 63 |             return Hidden_State, Cell_State
 64 |         else:
 65 |             Hidden_State = Variable(torch.zeros(batch_size, self.hidden_size))
 66 |             Cell_State = Variable(torch.zeros(batch_size, self.hidden_size))
 67 |             return Hidden_State, Cell_State
 68 |         
 69 | class BiLSTM(nn.Module):
 70 |     
 71 |     def __init__(self, input_size, cell_size, hidden_size):
 72 |         """
 73 |         cell_size is the size of cell_state.
 74 |         hidden_size is the size of hidden_state, or say the output_state of each step
 75 |         """
 76 |         super(BiLSTM, self).__init__()
 77 |         
 78 |         self.cell_size = cell_size
 79 |         self.hidden_size = hidden_size
 80 |         self.fl_f = nn.Linear(input_size + hidden_size, hidden_size)
 81 |         self.il_f = nn.Linear(input_size + hidden_size, hidden_size)
 82 |         self.ol_f = nn.Linear(input_size + hidden_size, hidden_size)
 83 |         self.Cl_f = nn.Linear(input_size + hidden_size, hidden_size)
 84 |         self.fl_b = nn.Linear(input_size + hidden_size, hidden_size)
 85 |         self.il_b = nn.Linear(input_size + hidden_size, hidden_size)
 86 |         self.ol_b = nn.Linear(input_size + hidden_size, hidden_size)
 87 |         self.Cl_b = nn.Linear(input_size + hidden_size, hidden_size)
 88 |         
 89 |         
 90 |     
 91 |     def step(self, input_f, input_b, Hidden_State_f, Cell_State_f, Hidden_State_b, Cell_State_b):
 92 |         batch_size = input_f.size(0)
 93 |         
 94 |         combined_f = torch.cat((input_f, Hidden_State_f), 1)
 95 |         
 96 |         f_f = F.sigmoid(self.fl_f(combined_f))
 97 |         i_f = F.sigmoid(self.il_f(combined_f))
 98 |         o_f = F.sigmoid(self.ol_f(combined_f))
 99 |         C_f = F.tanh(self.Cl_f(combined_f))
100 |         Cell_State_f = f_f * Cell_State_f + i_f * C_f
101 |         Hidden_State_f = o_f * F.tanh(Cell_State_f)
102 |         
103 |         combined_b = torch.cat((input_b, Hidden_State_b), 1)
104 | 
105 |         f_b = F.sigmoid(self.fl_b(combined_b))
106 |         i_b = F.sigmoid(self.il_b(combined_b))
107 |         o_b = F.sigmoid(self.ol_b(combined_b))
108 |         C_b = F.tanh(self.Cl_b(combined_b))
109 |         Cell_State_b = f_b * Cell_State_b + i_b * C_b
110 |         Hidden_State_b = o_b * F.tanh(Cell_State_b)
111 |         
112 |         return Hidden_State_f, Cell_State_f, Hidden_State_b, Cell_State_b
113 |     
114 |     def forward(self, inputs):  
115 |         outputs_f = None
116 |         outputs_b = None
117 |         
118 |         batch_size = inputs.size(0)
119 |         steps = inputs.size(1)
120 |         
121 |         Hidden_State_f, Cell_State_f, Hidden_State_b, Cell_State_b = self.initHidden(batch_size)
122 |         
123 |         for i in range(steps):
124 |             Hidden_State_f, Cell_State_f, Hidden_State_b, Cell_State_b = \
125 |                 self.step(torch.squeeze(inputs[:,i:i+1,:]), torch.squeeze(inputs[:,steps-i-1:steps-i,:])\
126 |                           , Hidden_State_f, Cell_State_f, Hidden_State_b, Cell_State_b)  
127 |             
128 |             if outputs_f is None:
129 |                 outputs_f = Hidden_State_f.unsqueeze(1)
130 |             else:
131 |                 outputs_f = torch.cat((outputs_f, Hidden_State_f.unsqueeze(1)), 1)
132 |             if outputs_b is None:
133 |                 outputs_b = Hidden_State_b.unsqueeze(1)
134 |             else:
135 |                 outputs_b = torch.cat((Hidden_State_b.unsqueeze(1), outputs_b), 1)
136 |         outputs = (outputs_f + outputs_b) / 2
137 |         return outputs
138 |         
139 |     
140 |     def initHidden(self, batch_size):
141 |         use_gpu = torch.cuda.is_available()
142 |         if use_gpu:
143 |             Hidden_State_f = Variable(torch.zeros(batch_size, self.hidden_size).cuda())
144 |             Cell_State_f = Variable(torch.zeros(batch_size, self.hidden_size).cuda())
145 |             Hidden_State_b = Variable(torch.zeros(batch_size, self.hidden_size).cuda())
146 |             Cell_State_b = Variable(torch.zeros(batch_size, self.hidden_size).cuda())
147 |             return Hidden_State_f, Cell_State_f, Hidden_State_b, Cell_State_b
148 |         else:
149 |             Hidden_State_f = Variable(torch.zeros(batch_size, self.hidden_size))
150 |             Cell_State_f = Variable(torch.zeros(batch_size, self.hidden_size))
151 |             Hidden_State_b = Variable(torch.zeros(batch_size, self.hidden_size))
152 |             Cell_State_b = Variable(torch.zeros(batch_size, self.hidden_size))
153 |             return Hidden_State_f, Cell_State_f, Hidden_State_b, Cell_State_b
154 |    


--------------------------------------------------------------------------------
/PyTorch/TrainModel.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sun Apr 15 18:29:13 2018
  4 | 
  5 | @author: Zhiyong
  6 | """
  7 | 
  8 | import torch.utils.data as utils
  9 | import torch.nn.functional as F
 10 | import torch
 11 | import torch.nn as nn
 12 | from torch.autograd import Variable
 13 | from torch.nn.parameter import Parameter
 14 | import math
 15 | import numpy as np
 16 | import pandas as pd
 17 | import time
 18 | 
 19 | from Models import *
 20 | 
 21 | 
 22 | def TrainLSTM(train_dataloader, valid_dataloader, num_epochs = 3):
 23 |     
 24 |     inputs, labels = next(iter(train_dataloader))
 25 |     [batch_size, step_size, fea_size] = inputs.size()
 26 |     input_dim = fea_size
 27 |     hidden_dim = fea_size
 28 |     output_dim = fea_size
 29 |     
 30 |     lstm = LSTM(input_dim, hidden_dim, output_dim)
 31 |     
 32 |     lstm.cuda()
 33 |     
 34 |     loss_MSE = torch.nn.MSELoss()
 35 |     loss_L1 = torch.nn.L1Loss()
 36 |     
 37 |     learning_rate = 1e-5
 38 |     optimizer = torch.optim.RMSprop(lstm.parameters(), lr = learning_rate)
 39 |     
 40 |     use_gpu = torch.cuda.is_available()
 41 |     
 42 |     interval = 100
 43 |     losses_train = []
 44 |     losses_interval_train = []
 45 |     losses_valid = []
 46 |     losses_interval_valid = []
 47 |     
 48 |     cur_time = time.time()
 49 |     pre_time = time.time()
 50 |     
 51 |     for epoch in range(num_epochs):
 52 |         print('Epoch {}/{}'.format(epoch, num_epochs - 1))
 53 |         print('-' * 10)
 54 |         
 55 |         trained_number = 0
 56 |         
 57 |         valid_dataloader_iter = iter(valid_dataloader)
 58 | 
 59 |         for data in train_dataloader:
 60 |             inputs, labels = data
 61 | 
 62 |             if inputs.shape[0] != batch_size:
 63 |                 continue
 64 | 
 65 |             if use_gpu:
 66 |                 inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
 67 |             else: 
 68 |                 inputs, labels = Variable(inputs), Variable(labels)
 69 |                 
 70 |             lstm.zero_grad()
 71 | 
 72 |             outputs = lstm(inputs)
 73 |             
 74 |             full_labels = torch.cat((inputs[:,1:,:], labels), dim = 1)
 75 | 
 76 |             loss_train = loss_MSE(outputs, full_labels)
 77 |         
 78 |             losses_train.append(loss_train.data)
 79 |             
 80 |             optimizer.zero_grad()
 81 |             
 82 |             loss_train.backward()
 83 |             
 84 |             optimizer.step()
 85 |             
 86 |              # validation 
 87 |             try: 
 88 |                 inputs_val, labels_val = next(valid_dataloader_iter)
 89 |             except StopIteration:
 90 |                 valid_dataloader_iter = iter(valid_dataloader)
 91 |                 inputs_val, labels_val = next(valid_dataloader_iter)
 92 |             
 93 |             if use_gpu:
 94 |                 inputs_val, labels_val = Variable(inputs_val.cuda()), Variable(labels_val.cuda())
 95 |             else: 
 96 |                 inputs_val, labels_val = Variable(inputs_val), Variable(labels_val)
 97 | 
 98 |             full_labels_val = torch.cat((inputs_val[:,1:,:], labels_val), dim = 1)
 99 |             
100 |             outputs_val = lstm(inputs_val)
101 |             
102 |             loss_valid = loss_MSE(outputs_val, full_labels_val)
103 |     
104 |             losses_valid.append(loss_valid.data)
105 |             
106 |             # output
107 |             trained_number += 1
108 |             
109 |             if trained_number % interval == 0:
110 |                 cur_time = time.time()
111 |                 loss_interval_train = np.around(sum(losses_train[-interval:]).cpu().numpy()[0]/interval, decimals=8)
112 |                 losses_interval_train.append(loss_interval_train)
113 |                 loss_interval_valid = np.around(sum(losses_valid[-interval:]).cpu().numpy()[0]/interval, decimals=8)
114 |                 losses_interval_valid.append(loss_interval_valid)
115 |                 print('Iteration #: {}, train_loss: {}, valid_loss: {}, time: {}'.format(\
116 |                                                                                          trained_number * batch_size, \
117 |                                                                                          loss_interval_train,\
118 |                                                                                          loss_interval_valid,\
119 |                                                                                          np.around([cur_time - pre_time], decimals=8) ) )
120 |                 pre_time = cur_time
121 | 
122 |     return lstm, [losses_train, losses_interval_train, losses_valid, losses_interval_valid]
123 | 
124 | def Train_BiLSTM(train_dataloader, valid_dataloader, num_epochs = 3):
125 |     
126 |     inputs, labels = next(iter(train_dataloader))
127 |     [batch_size, step_size, fea_size] = inputs.size()
128 |     input_dim = fea_size
129 |     hidden_dim = fea_size
130 |     output_dim = fea_size
131 |     
132 |     bilstm = BiLSTM(input_dim, hidden_dim, output_dim)
133 |     
134 |     bilstm.cuda()
135 |     
136 |     loss_MSE = torch.nn.MSELoss()
137 |     loss_L1 = torch.nn.L1Loss()
138 |     
139 |     learning_rate = 1e-5
140 |     optimizer = torch.optim.RMSprop(bilstm.parameters(), lr = learning_rate)
141 |     use_gpu = torch.cuda.is_available()
142 |     
143 |     interval = 100
144 |     losses_train = []
145 |     losses_interval_train = []
146 |     losses_valid = []
147 |     losses_interval_valid = []
148 |     
149 |     cur_time = time.time()
150 |     pre_time = time.time()
151 |     
152 |     for epoch in range(num_epochs):
153 |         print('Epoch {}/{}'.format(epoch, num_epochs - 1))
154 |         print('-' * 10)
155 |         
156 |         trained_number = 0
157 |         
158 |         valid_dataloader_iter = iter(valid_dataloader)
159 | 
160 |         for data in train_dataloader:
161 |             inputs, labels = data
162 | 
163 |             if inputs.shape[0] != batch_size:
164 |                 continue
165 | 
166 |             if use_gpu:
167 |                 inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
168 |             else: 
169 |                 inputs, labels = Variable(inputs), Variable(labels)
170 |             
171 |             bilstm.zero_grad()
172 | 
173 |             outputs = bilstm(inputs)
174 |             
175 |             full_labels = torch.cat((inputs[:,1:,:], labels), dim = 1)
176 | 
177 |             loss_train = loss_MSE(outputs, full_labels)
178 |         
179 |             losses_train.append(loss_train.data)
180 |             
181 |             optimizer.zero_grad()
182 |             
183 |             loss_train.backward()
184 |             
185 |             optimizer.step()
186 |             
187 |              # validation 
188 |             try: 
189 |                 inputs_val, labels_val = next(valid_dataloader_iter)
190 |             except StopIteration:
191 |                 valid_dataloader_iter = iter(valid_dataloader)
192 |                 inputs_val, labels_val = next(valid_dataloader_iter)
193 |             
194 |             if use_gpu:
195 |                 inputs_val, labels_val = Variable(inputs_val.cuda()), Variable(labels_val.cuda())
196 |             else: 
197 |                 inputs_val, labels_val = Variable(inputs_val), Variable(labels_val)
198 | 
199 |                 
200 |             bilstm.zero_grad()
201 | 
202 |             full_labels_val = torch.cat((inputs_val[:,1:,:], labels_val), dim = 1)
203 |             
204 |             outputs_val = bilstm(inputs_val)
205 |             
206 | #             Hidden_State, Cell_State = bilstm.loop(inputs_val)
207 | 
208 |             loss_valid = loss_MSE(outputs_val, full_labels_val)
209 | #             loss_valid = loss_MSE(Hidden_State, labels_val)
210 |     
211 |             losses_valid.append(loss_valid.data)
212 |             
213 |             # output
214 |             trained_number += 1
215 |             
216 |             if trained_number % interval == 0:
217 |                 cur_time = time.time()
218 |                 loss_interval_train = np.around(sum(losses_train[-interval:]).cpu().numpy()[0]/interval, decimals=8)
219 |                 losses_interval_train.append(loss_interval_train)
220 |                 loss_interval_valid = np.around(sum(losses_valid[-interval:]).cpu().numpy()[0]/interval, decimals=8)
221 |                 losses_interval_valid.append(loss_interval_valid)
222 |                 print('Iteration #: {}, train_loss: {}, valid_loss: {}, time: {}'.format(\
223 |                                                                                          trained_number * batch_size, \
224 |                                                                                          loss_interval_train,\
225 |                                                                                          loss_interval_valid,\
226 |                                                                                          np.around([cur_time - pre_time], decimals=8) ) )
227 |                 pre_time = cur_time
228 | 
229 |     return bilstm, [losses_train, losses_interval_train, losses_valid, losses_interval_valid]
230 | 
231 | def Train_Multi_Bi_LSTM(train_dataloader, valid_dataloader, num_epochs = 3):
232 |     
233 |     inputs, labels = next(iter(train_dataloader))
234 |     [batch_size, step_size, fea_size] = inputs.size()
235 |     input_dim = fea_size
236 |     hidden_dim = fea_size
237 |     output_dim = fea_size
238 |     
239 | #     multiBiLSTM = Multi_Bi_LSTM(input_dim, hidden_dim, output_dim)
240 | 
241 |     multiBiLSTM = nn.Sequential(BiLSTM(input_dim, hidden_dim, output_dim), LSTM(input_dim, hidden_dim, output_dim))
242 |     
243 |     multiBiLSTM.cuda()
244 |     
245 |     loss_MSE = torch.nn.MSELoss()
246 |     loss_L1 = torch.nn.L1Loss()
247 |     
248 |     learning_rate = 1e-5
249 |     optimizer = torch.optim.RMSprop(multiBiLSTM.parameters(), lr = learning_rate)
250 |     use_gpu = torch.cuda.is_available()
251 |     
252 |     interval = 100
253 |     losses_train = []
254 |     losses_interval_train = []
255 |     losses_valid = []
256 |     losses_interval_valid = []
257 |     
258 |     cur_time = time.time()
259 |     pre_time = time.time()
260 |     
261 |     for epoch in range(num_epochs):
262 |         print('Epoch {}/{}'.format(epoch, num_epochs - 1))
263 |         print('-' * 10)
264 |         
265 |         trained_number = 0
266 |         
267 |         valid_dataloader_iter = iter(valid_dataloader)
268 | 
269 |         for data in train_dataloader:
270 |             inputs, labels = data
271 | 
272 |             if inputs.shape[0] != batch_size:
273 |                 continue
274 | 
275 |             if use_gpu:
276 |                 inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
277 |             else: 
278 |                 inputs, labels = Variable(inputs), Variable(labels)
279 |             
280 |             multiBiLSTM.zero_grad()
281 | 
282 |             outputs = multiBiLSTM(inputs)
283 |             
284 |             full_labels = torch.cat((inputs[:,1:,:], labels), dim = 1)
285 | 
286 |             loss_train = loss_MSE(outputs, full_labels)
287 |         
288 |             losses_train.append(loss_train.data)
289 |             
290 |             optimizer.zero_grad()
291 |             
292 |             loss_train.backward()
293 |             
294 |             optimizer.step()
295 |             
296 |              # validation 
297 |             try: 
298 |                 inputs_val, labels_val = next(valid_dataloader_iter)
299 |             except StopIteration:
300 |                 valid_dataloader_iter = iter(valid_dataloader)
301 |                 inputs_val, labels_val = next(valid_dataloader_iter)
302 |             
303 |             if use_gpu:
304 |                 inputs_val, labels_val = Variable(inputs_val.cuda()), Variable(labels_val.cuda())
305 |             else: 
306 |                 inputs_val, labels_val = Variable(inputs_val), Variable(labels_val)
307 | 
308 |                 
309 |             multiBiLSTM.zero_grad()
310 | 
311 |             full_labels_val = torch.cat((inputs_val[:,1:,:], labels_val), dim = 1)
312 |             
313 |             outputs_val = multiBiLSTM(inputs_val)
314 |             
315 | #             Hidden_State, Cell_State = bilstm.loop(inputs_val)
316 | 
317 |             loss_valid = loss_MSE(outputs_val, full_labels_val)
318 | #             loss_valid = loss_MSE(Hidden_State, labels_val)
319 |     
320 |             losses_valid.append(loss_valid.data)
321 |             
322 |             # output
323 |             trained_number += 1
324 |             
325 |             if trained_number % interval == 0:
326 |                 cur_time = time.time()
327 |                 loss_interval_train = np.around(sum(losses_train[-interval:]).cpu().numpy()[0]/interval, decimals=8)
328 |                 losses_interval_train.append(loss_interval_train)
329 |                 loss_interval_valid = np.around(sum(losses_valid[-interval:]).cpu().numpy()[0]/interval, decimals=8)
330 |                 losses_interval_valid.append(loss_interval_valid)
331 |                 print('Iteration #: {}, train_loss: {}, valid_loss: {}, time: {}'.format(\
332 |                                                                                          trained_number * batch_size, \
333 |                                                                                          loss_interval_train,\
334 |                                                                                          loss_interval_valid,\
335 |                                                                                          np.around([cur_time - pre_time], decimals=8) ) )
336 |                 pre_time = cur_time
337 | 
338 |     return multiBiLSTM, [losses_train, losses_interval_train, losses_valid, losses_interval_valid]
339 | 


--------------------------------------------------------------------------------
/PyTorch/__pycache__/Models.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyongc/Stacked_Bidirectional_Unidirectional_LSTM/b5f291b09a9f832b0901f86fc566502574d96826/PyTorch/__pycache__/Models.cpython-36.pyc


--------------------------------------------------------------------------------
/PyTorch/__pycache__/TrainModel.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyongc/Stacked_Bidirectional_Unidirectional_LSTM/b5f291b09a9f832b0901f86fc566502574d96826/PyTorch/__pycache__/TrainModel.cpython-36.pyc


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Deep Stacked Bidirectional and Unidirectional LSTM Recurrent Neural Network
 2 | #### For *Network-wide Traffic Speed Prediction*
 3 | 
 4 | ## Prologue
 5 | Normally, we use RNN to characterize the forward dependency of time series data. While, bi-directional RNNs can capture both forward and backward dependencies in time series data. It has been shown that stacked (multi-layer) RNNs/LSTMs work better than one-layer RNN/LSTM in many NLP related applications. It is good to try a combination of bi-directional RNNs and uni-directional RNNs. We find that a neural network with multiple stacked bi-directional LSTMs followed by an uni-directiaonl LSTM works better.
 6 | 
 7 | ## New Progress
 8 | We are designing several internal structures in the LSTM cell to overcome the missing values problem in time series data (replacing the masking layer in the following figure), and to make the model to be suitable for graph-structured data. 
 9 | 
10 | The original model is implemented by Keras. A newly improved version implemented by PyTorch will soon be released. 
11 | 
12 | ### Environment
13 | * Python 3.6.1
14 | * Keras 2.1.5
15 | * PyTorch 0.3.0
16 | 
17 | For more detailed information about the model, you can refer to our [paper](https://arxiv.org/abs/1801.02143), referenced at the bottom.
18 | 
19 | ## Model Structure
20 | ![alt text](/Images/Architecture.png)
21 | 
22 | 
23 | ## Data 
24 | To run the code, you need to download the loop detector data from my GitHub link: https://github.com/zhiyongc/Seattle-Loop-Data. I'm sorry that the INRIX data can not be shared because of the confidentiality issues.
25 | 
26 | 
27 | 
28 | ## Cite
29 | Hope our work can benefit your. If you use this code or data in your own workPlease cite our paper:
30 | [Deep Bidirectional and Unidirectional LSTM Recurrent Neural Network for Network-wide Traffic Speed Prediction](https://arxiv.org/abs/1801.02143)
31 | ```
32 | @article{cui2018deep,
33 |   title={Deep Bidirectional and Unidirectional LSTM Recurrent Neural Network for Network-wide Traffic Speed Prediction},
34 |   author={Cui, Zhiyong and Ke, Ruimin and Wang, Yinhai},
35 |   journal={arXiv preprint arXiv:1801.02143},
36 |   year={2018}
37 | }
38 | ```
39 | or
40 | ```
41 | @inproceedings{cui2016deep,
42 |   title={Deep Stacked Bidirectional and Unidirectional LSTM Recurrent Neural Network for Network-wide Traffic Speed Prediction},
43 |   author={Cui, Zhiyong and Ke, Ruimin and Wang, Yinhai},
44 |   booktitle={6th International Workshop on Urban Computing (UrbComp 2017)},
45 |   year={2016}
46 | }
47 | ```
48 | 
49 | 


--------------------------------------------------------------------------------