├── README.md ├── build_model_basic.py ├── build_model_multi_variate.py ├── build_model_with_outliers.py ├── datasets.py └── time_series_seq2seq.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # Multivariate-Time-Series-forecast-using-seq2seq-in-TensorFlow 2 | 3 | A study on Time Series using sequence to sequence model in TensorFlow. 4 | 5 | This repo follows the original post from: 6 | 7 | weiminwang.blog/2017/09/29/multivariate-time-series-forecast-using-seq2seq-in-tensorflow/ 8 | 9 | You can also look at the ipython notebook for code details. 10 | 11 | Contents - 12 | 1) Univariate time series¶ 13 | 2) Multivariate time series 14 | 3) Seq2seq for outliers/extreme events 15 | 4) A case study - Beijing pollution data 16 | 17 | Dataset credits - https://archive.ics.uci.edu/ml/datasets/Beijing+PM2.5+Data 18 | 19 | 20 | -------------------------------------------------------------------------------- /build_model_basic.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.contrib import rnn 3 | from tensorflow.python.ops import variable_scope 4 | from tensorflow.python.framework import dtypes 5 | import copy 6 | 7 | ## Parameters 8 | learning_rate = 0.01 9 | lambda_l2_reg = 0.003 10 | 11 | ## Network Parameters 12 | # length of input signals 13 | input_seq_len = 15 14 | # length of output signals 15 | output_seq_len = 20 16 | # size of LSTM Cell 17 | hidden_dim = 64 18 | # num of input signals 19 | input_dim = 1 20 | # num of output signals 21 | output_dim = 1 22 | # num of stacked lstm layers 23 | num_stacked_layers = 2 24 | # gradient clipping - to avoid gradient exploding 25 | GRADIENT_CLIPPING = 2.5 26 | 27 | def build_graph(feed_previous = False): 28 | 29 | tf.reset_default_graph() 30 | 31 | global_step = tf.Variable( 32 | initial_value=0, 33 | name="global_step", 34 | trainable=False, 35 | collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES]) 36 | 37 | weights = { 38 | 'out': tf.get_variable('Weights_out', \ 39 | shape = [hidden_dim, output_dim], \ 40 | dtype = tf.float32, \ 41 | initializer = tf.truncated_normal_initializer()), 42 | } 43 | biases = { 44 | 'out': tf.get_variable('Biases_out', \ 45 | shape = [output_dim], \ 46 | dtype = tf.float32, \ 47 | initializer = tf.constant_initializer(0.)), 48 | } 49 | 50 | with tf.variable_scope('Seq2seq'): 51 | # Encoder: inputs 52 | enc_inp = [ 53 | tf.placeholder(tf.float32, shape=(None, input_dim), name="inp_{}".format(t)) 54 | for t in range(input_seq_len) 55 | ] 56 | 57 | # Decoder: target outputs 58 | target_seq = [ 59 | tf.placeholder(tf.float32, shape=(None, output_dim), name="y".format(t)) 60 | for t in range(output_seq_len) 61 | ] 62 | 63 | # Give a "GO" token to the decoder. 64 | # If dec_inp are fed into decoder as inputs, this is 'guided' training; otherwise only the 65 | # first element will be fed as decoder input which is then 'un-guided' 66 | dec_inp = [ tf.zeros_like(target_seq[0], dtype=tf.float32, name="GO") ] + target_seq[:-1] 67 | 68 | with tf.variable_scope('LSTMCell'): 69 | cells = [] 70 | for i in range(num_stacked_layers): 71 | with tf.variable_scope('RNN_{}'.format(i)): 72 | cells.append(tf.contrib.rnn.LSTMCell(hidden_dim)) 73 | cell = tf.contrib.rnn.MultiRNNCell(cells) 74 | 75 | def _rnn_decoder(decoder_inputs, 76 | initial_state, 77 | cell, 78 | loop_function=None, 79 | scope=None): 80 | """RNN decoder for the sequence-to-sequence model. 81 | Args: 82 | decoder_inputs: A list of 2D Tensors [batch_size x input_size]. 83 | initial_state: 2D Tensor with shape [batch_size x cell.state_size]. 84 | cell: rnn_cell.RNNCell defining the cell function and size. 85 | loop_function: If not None, this function will be applied to the i-th output 86 | in order to generate the i+1-st input, and decoder_inputs will be ignored, 87 | except for the first element ("GO" symbol). This can be used for decoding, 88 | but also for training to emulate http://arxiv.org/abs/1506.03099. 89 | Signature -- loop_function(prev, i) = next 90 | * prev is a 2D Tensor of shape [batch_size x output_size], 91 | * i is an integer, the step number (when advanced control is needed), 92 | * next is a 2D Tensor of shape [batch_size x input_size]. 93 | scope: VariableScope for the created subgraph; defaults to "rnn_decoder". 94 | Returns: 95 | A tuple of the form (outputs, state), where: 96 | outputs: A list of the same length as decoder_inputs of 2D Tensors with 97 | shape [batch_size x output_size] containing generated outputs. 98 | state: The state of each cell at the final time-step. 99 | It is a 2D Tensor of shape [batch_size x cell.state_size]. 100 | (Note that in some cases, like basic RNN cell or GRU cell, outputs and 101 | states can be the same. They are different for LSTM cells though.) 102 | """ 103 | with variable_scope.variable_scope(scope or "rnn_decoder"): 104 | state = initial_state 105 | outputs = [] 106 | prev = None 107 | for i, inp in enumerate(decoder_inputs): 108 | if loop_function is not None and prev is not None: 109 | with variable_scope.variable_scope("loop_function", reuse=True): 110 | inp = loop_function(prev, i) 111 | if i > 0: 112 | variable_scope.get_variable_scope().reuse_variables() 113 | output, state = cell(inp, state) 114 | outputs.append(output) 115 | if loop_function is not None: 116 | prev = output 117 | return outputs, state 118 | 119 | def _basic_rnn_seq2seq(encoder_inputs, 120 | decoder_inputs, 121 | cell, 122 | feed_previous, 123 | dtype=dtypes.float32, 124 | scope=None): 125 | """Basic RNN sequence-to-sequence model. 126 | This model first runs an RNN to encode encoder_inputs into a state vector, 127 | then runs decoder, initialized with the last encoder state, on decoder_inputs. 128 | Encoder and decoder use the same RNN cell type, but don't share parameters. 129 | Args: 130 | encoder_inputs: A list of 2D Tensors [batch_size x input_size]. 131 | decoder_inputs: A list of 2D Tensors [batch_size x input_size]. 132 | feed_previous: Boolean; if True, only the first of decoder_inputs will be 133 | used (the "GO" symbol), all other inputs will be generated by the previous 134 | decoder output using _loop_function below. If False, decoder_inputs are used 135 | as given (the standard decoder case). 136 | dtype: The dtype of the initial state of the RNN cell (default: tf.float32). 137 | scope: VariableScope for the created subgraph; default: "basic_rnn_seq2seq". 138 | Returns: 139 | A tuple of the form (outputs, state), where: 140 | outputs: A list of the same length as decoder_inputs of 2D Tensors with 141 | shape [batch_size x output_size] containing the generated outputs. 142 | state: The state of each decoder cell in the final time-step. 143 | It is a 2D Tensor of shape [batch_size x cell.state_size]. 144 | """ 145 | with variable_scope.variable_scope(scope or "basic_rnn_seq2seq"): 146 | enc_cell = copy.deepcopy(cell) 147 | _, enc_state = rnn.static_rnn(enc_cell, encoder_inputs, dtype=dtype) 148 | if feed_previous: 149 | return _rnn_decoder(decoder_inputs, enc_state, cell, _loop_function) 150 | else: 151 | return _rnn_decoder(decoder_inputs, enc_state, cell) 152 | 153 | def _loop_function(prev, _): 154 | '''Naive implementation of loop function for _rnn_decoder. Transform prev from 155 | dimension [batch_size x hidden_dim] to [batch_size x output_dim], which will be 156 | used as decoder input of next time step ''' 157 | return tf.matmul(prev, weights['out']) + biases['out'] 158 | 159 | dec_outputs, dec_memory = _basic_rnn_seq2seq( 160 | enc_inp, 161 | dec_inp, 162 | cell, 163 | feed_previous = feed_previous 164 | ) 165 | 166 | reshaped_outputs = [tf.matmul(i, weights['out']) + biases['out'] for i in dec_outputs] 167 | 168 | # Training loss and optimizer 169 | with tf.variable_scope('Loss'): 170 | # L2 loss 171 | output_loss = 0 172 | for _y, _Y in zip(reshaped_outputs, target_seq): 173 | output_loss += tf.reduce_mean(tf.pow(_y - _Y, 2)) 174 | 175 | # L2 regularization for weights and biases 176 | reg_loss = 0 177 | for tf_var in tf.trainable_variables(): 178 | if 'Biases_' in tf_var.name or 'Weights_' in tf_var.name: 179 | reg_loss += tf.reduce_mean(tf.nn.l2_loss(tf_var)) 180 | 181 | loss = output_loss + lambda_l2_reg * reg_loss 182 | 183 | with tf.variable_scope('Optimizer'): 184 | optimizer = tf.contrib.layers.optimize_loss( 185 | loss=loss, 186 | learning_rate=learning_rate, 187 | global_step=global_step, 188 | optimizer='Adam', 189 | clip_gradients=GRADIENT_CLIPPING) 190 | 191 | saver = tf.train.Saver 192 | 193 | return dict( 194 | enc_inp = enc_inp, 195 | target_seq = target_seq, 196 | train_op = optimizer, 197 | loss=loss, 198 | saver = saver, 199 | reshaped_outputs = reshaped_outputs, 200 | ) -------------------------------------------------------------------------------- /build_model_multi_variate.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.contrib import rnn 3 | from tensorflow.python.ops import variable_scope 4 | from tensorflow.python.framework import dtypes 5 | import copy 6 | 7 | ## Parameters 8 | learning_rate = 0.01 9 | lambda_l2_reg = 0.003 10 | 11 | ## Network Parameters 12 | # length of input signals 13 | input_seq_len = 15 14 | # length of output signals 15 | output_seq_len = 20 16 | # size of LSTM Cell 17 | hidden_dim = 64 18 | # num of input signals 19 | input_dim = 3 20 | # num of output signals 21 | output_dim = 2 22 | # num of stacked lstm layers 23 | num_stacked_layers = 2 24 | # gradient clipping - to avoid gradient exploding 25 | GRADIENT_CLIPPING = 2.5 26 | 27 | def build_graph(feed_previous = False): 28 | 29 | tf.reset_default_graph() 30 | 31 | global_step = tf.Variable( 32 | initial_value=0, 33 | name="global_step", 34 | trainable=False, 35 | collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES]) 36 | 37 | weights = { 38 | 'out': tf.get_variable('Weights_out', \ 39 | shape = [hidden_dim, output_dim], \ 40 | dtype = tf.float32, \ 41 | initializer = tf.truncated_normal_initializer()), 42 | } 43 | biases = { 44 | 'out': tf.get_variable('Biases_out', \ 45 | shape = [output_dim], \ 46 | dtype = tf.float32, \ 47 | initializer = tf.constant_initializer(0.)), 48 | } 49 | 50 | with tf.variable_scope('Seq2seq'): 51 | # Encoder: inputs 52 | enc_inp = [ 53 | tf.placeholder(tf.float32, shape=(None, input_dim), name="inp_{}".format(t)) 54 | for t in range(input_seq_len) 55 | ] 56 | 57 | # Decoder: target outputs 58 | target_seq = [ 59 | tf.placeholder(tf.float32, shape=(None, output_dim), name="y".format(t)) 60 | for t in range(output_seq_len) 61 | ] 62 | 63 | # Give a "GO" token to the decoder. 64 | # If dec_inp are fed into decoder as inputs, this is 'guided' training; otherwise only the 65 | # first element will be fed as decoder input which is then 'un-guided' 66 | dec_inp = [ tf.zeros_like(target_seq[0], dtype=tf.float32, name="GO") ] + target_seq[:-1] 67 | 68 | with tf.variable_scope('LSTMCell'): 69 | cells = [] 70 | for i in range(num_stacked_layers): 71 | with tf.variable_scope('RNN_{}'.format(i)): 72 | cells.append(tf.contrib.rnn.LSTMCell(hidden_dim)) 73 | cell = tf.contrib.rnn.MultiRNNCell(cells) 74 | 75 | def _rnn_decoder(decoder_inputs, 76 | initial_state, 77 | cell, 78 | loop_function=None, 79 | scope=None): 80 | """RNN decoder for the sequence-to-sequence model. 81 | Args: 82 | decoder_inputs: A list of 2D Tensors [batch_size x input_size]. 83 | initial_state: 2D Tensor with shape [batch_size x cell.state_size]. 84 | cell: rnn_cell.RNNCell defining the cell function and size. 85 | loop_function: If not None, this function will be applied to the i-th output 86 | in order to generate the i+1-st input, and decoder_inputs will be ignored, 87 | except for the first element ("GO" symbol). This can be used for decoding, 88 | but also for training to emulate http://arxiv.org/abs/1506.03099. 89 | Signature -- loop_function(prev, i) = next 90 | * prev is a 2D Tensor of shape [batch_size x output_size], 91 | * i is an integer, the step number (when advanced control is needed), 92 | * next is a 2D Tensor of shape [batch_size x input_size]. 93 | scope: VariableScope for the created subgraph; defaults to "rnn_decoder". 94 | Returns: 95 | A tuple of the form (outputs, state), where: 96 | outputs: A list of the same length as decoder_inputs of 2D Tensors with 97 | shape [batch_size x output_size] containing generated outputs. 98 | state: The state of each cell at the final time-step. 99 | It is a 2D Tensor of shape [batch_size x cell.state_size]. 100 | (Note that in some cases, like basic RNN cell or GRU cell, outputs and 101 | states can be the same. They are different for LSTM cells though.) 102 | """ 103 | with variable_scope.variable_scope(scope or "rnn_decoder"): 104 | state = initial_state 105 | outputs = [] 106 | prev = None 107 | for i, inp in enumerate(decoder_inputs): 108 | if loop_function is not None and prev is not None: 109 | with variable_scope.variable_scope("loop_function", reuse=True): 110 | inp = loop_function(prev, i) 111 | if i > 0: 112 | variable_scope.get_variable_scope().reuse_variables() 113 | output, state = cell(inp, state) 114 | outputs.append(output) 115 | if loop_function is not None: 116 | prev = output 117 | return outputs, state 118 | 119 | def _basic_rnn_seq2seq(encoder_inputs, 120 | decoder_inputs, 121 | cell, 122 | feed_previous, 123 | dtype=dtypes.float32, 124 | scope=None): 125 | """Basic RNN sequence-to-sequence model. 126 | This model first runs an RNN to encode encoder_inputs into a state vector, 127 | then runs decoder, initialized with the last encoder state, on decoder_inputs. 128 | Encoder and decoder use the same RNN cell type, but don't share parameters. 129 | Args: 130 | encoder_inputs: A list of 2D Tensors [batch_size x input_size]. 131 | decoder_inputs: A list of 2D Tensors [batch_size x input_size]. 132 | feed_previous: Boolean; if True, only the first of decoder_inputs will be 133 | used (the "GO" symbol), all other inputs will be generated by the previous 134 | decoder output using _loop_function below. If False, decoder_inputs are used 135 | as given (the standard decoder case). 136 | dtype: The dtype of the initial state of the RNN cell (default: tf.float32). 137 | scope: VariableScope for the created subgraph; default: "basic_rnn_seq2seq". 138 | Returns: 139 | A tuple of the form (outputs, state), where: 140 | outputs: A list of the same length as decoder_inputs of 2D Tensors with 141 | shape [batch_size x output_size] containing the generated outputs. 142 | state: The state of each decoder cell in the final time-step. 143 | It is a 2D Tensor of shape [batch_size x cell.state_size]. 144 | """ 145 | with variable_scope.variable_scope(scope or "basic_rnn_seq2seq"): 146 | enc_cell = copy.deepcopy(cell) 147 | _, enc_state = rnn.static_rnn(enc_cell, encoder_inputs, dtype=dtype) 148 | if feed_previous: 149 | return _rnn_decoder(decoder_inputs, enc_state, cell, _loop_function) 150 | else: 151 | return _rnn_decoder(decoder_inputs, enc_state, cell) 152 | 153 | def _loop_function(prev, _): 154 | '''Naive implementation of loop function for _rnn_decoder. Transform prev from 155 | dimension [batch_size x hidden_dim] to [batch_size x output_dim], which will be 156 | used as decoder input of next time step ''' 157 | return tf.matmul(prev, weights['out']) + biases['out'] 158 | 159 | dec_outputs, dec_memory = _basic_rnn_seq2seq( 160 | enc_inp, 161 | dec_inp, 162 | cell, 163 | feed_previous = feed_previous 164 | ) 165 | 166 | reshaped_outputs = [tf.matmul(i, weights['out']) + biases['out'] for i in dec_outputs] 167 | 168 | # Training loss and optimizer 169 | with tf.variable_scope('Loss'): 170 | # L2 loss 171 | output_loss = 0 172 | for _y, _Y in zip(reshaped_outputs, target_seq): 173 | output_loss += tf.reduce_mean(tf.pow(_y - _Y, 2)) 174 | 175 | # L2 regularization for weights and biases 176 | reg_loss = 0 177 | for tf_var in tf.trainable_variables(): 178 | if 'Biases_' in tf_var.name or 'Weights_' in tf_var.name: 179 | reg_loss += tf.reduce_mean(tf.nn.l2_loss(tf_var)) 180 | 181 | loss = output_loss + lambda_l2_reg * reg_loss 182 | 183 | with tf.variable_scope('Optimizer'): 184 | optimizer = tf.contrib.layers.optimize_loss( 185 | loss=loss, 186 | learning_rate=learning_rate, 187 | global_step=global_step, 188 | optimizer='Adam', 189 | clip_gradients=GRADIENT_CLIPPING) 190 | 191 | saver = tf.train.Saver 192 | 193 | return dict( 194 | enc_inp = enc_inp, 195 | target_seq = target_seq, 196 | train_op = optimizer, 197 | loss=loss, 198 | saver = saver, 199 | reshaped_outputs = reshaped_outputs, 200 | ) -------------------------------------------------------------------------------- /build_model_with_outliers.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.contrib import rnn 3 | from tensorflow.python.ops import variable_scope 4 | from tensorflow.python.framework import dtypes 5 | import copy 6 | 7 | ## Parameters 8 | learning_rate = 0.01 9 | lambda_l2_reg = 0.003 10 | 11 | ## Network Parameters 12 | # length of input signals 13 | input_seq_len = 15 14 | # length of output signals 15 | output_seq_len = 20 16 | # size of LSTM Cell 17 | hidden_dim = 64 18 | # num of input signals 19 | input_dim = 1 20 | # num of output signals 21 | output_dim = 1 22 | # num of stacked lstm layers 23 | num_stacked_layers = 2 24 | # gradient clipping - to avoid gradient exploding 25 | GRADIENT_CLIPPING = 2.5 26 | 27 | def build_graph(feed_previous = False): 28 | 29 | tf.reset_default_graph() 30 | 31 | global_step = tf.Variable( 32 | initial_value=0, 33 | name="global_step", 34 | trainable=False, 35 | collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES]) 36 | 37 | weights = { 38 | 'out': tf.get_variable('Weights_out', \ 39 | shape = [hidden_dim, output_dim], \ 40 | dtype = tf.float32, \ 41 | initializer = tf.contrib.layers.xavier_initializer()), 42 | 43 | 'out_dec_inp': tf.get_variable('Weights_out_dec', \ 44 | shape = [output_dim+1, output_dim], \ 45 | dtype = tf.float32, \ 46 | initializer = tf.contrib.layers.xavier_initializer()), 47 | } 48 | biases = { 49 | 'out': tf.get_variable('Biases_out', \ 50 | shape = [output_dim], \ 51 | dtype = tf.float32, \ 52 | initializer = tf.zeros_initializer()), 53 | 54 | 'out_dec_inp': tf.get_variable('Biases_out_dec', \ 55 | shape = [output_dim], \ 56 | dtype = tf.float32, \ 57 | initializer = tf.zeros_initializer()), 58 | } 59 | 60 | with tf.variable_scope('Seq2seq'): 61 | # Encoder: inputs 62 | enc_inp = [ 63 | tf.placeholder(tf.float32, shape=(None, input_dim), name="inp_{}".format(t)) 64 | for t in range(input_seq_len) 65 | ] 66 | 67 | # Decoder: target outputs 68 | target_seq = [ 69 | tf.placeholder(tf.float32, shape=(None, output_dim), name="y".format(t)) 70 | for t in range(output_seq_len) 71 | ] 72 | 73 | # Extreme events bool vectors for input seq 74 | #input_seq_extremes_bool = [ 75 | # tf.placeholder(tf.float32, shape=(None, 1), name="event_bool".format(t)) 76 | # for t in range(input_seq_len) 77 | #] 78 | 79 | # Extreme events bool vectors for output seq 80 | output_seq_extremes_bool = [ 81 | tf.placeholder(tf.float32, shape=(None, 1), name="event_bool".format(t)) 82 | for t in range(output_seq_len) 83 | ] 84 | 85 | # Give a "GO" token to the decoder. 86 | # If dec_inp are fed into decoder as inputs, this is 'guided' training; otherwise only the 87 | # first element will be fed as decoder input which is then 'un-guided' 88 | dec_inp = [ tf.zeros_like(target_seq[0], dtype=tf.float32, name="GO") ] + target_seq[:-1] 89 | dec_inp = [ tf.concat([b, d], 1) for b, d in zip(output_seq_extremes_bool, dec_inp) ] 90 | 91 | #enc_inp = [ tf.concat([b, e], 1) for b, e in zip(input_seq_extremes_bool, enc_inp_raw) ] 92 | 93 | with tf.variable_scope('LSTMCell'): 94 | cells = [] 95 | for i in range(num_stacked_layers): 96 | with tf.variable_scope('RNN_{}'.format(i)): 97 | cells.append(tf.contrib.rnn.LSTMCell(hidden_dim)) 98 | cell = tf.contrib.rnn.MultiRNNCell(cells) 99 | 100 | def _rnn_decoder(decoder_inputs, 101 | initial_state, 102 | cell, 103 | loop_function=None, 104 | scope=None): 105 | """RNN decoder for the sequence-to-sequence model. 106 | Args: 107 | decoder_inputs: A list of 2D Tensors [batch_size x input_size]. 108 | initial_state: 2D Tensor with shape [batch_size x cell.state_size]. 109 | cell: rnn_cell.RNNCell defining the cell function and size. 110 | loop_function: If not None, this function will be applied to the i-th output 111 | in order to generate the i+1-st input, and decoder_inputs will be ignored, 112 | except for the first element ("GO" symbol). This can be used for decoding, 113 | but also for training to emulate http://arxiv.org/abs/1506.03099. 114 | Signature -- loop_function(prev, i) = next 115 | * prev is a 2D Tensor of shape [batch_size x output_size], 116 | * i is an integer, the step number (when advanced control is needed), 117 | * next is a 2D Tensor of shape [batch_size x input_size]. 118 | scope: VariableScope for the created subgraph; defaults to "rnn_decoder". 119 | Returns: 120 | A tuple of the form (outputs, state), where: 121 | outputs: A list of the same length as decoder_inputs of 2D Tensors with 122 | shape [batch_size x output_size] containing generated outputs. 123 | state: The state of each cell at the final time-step. 124 | It is a 2D Tensor of shape [batch_size x cell.state_size]. 125 | (Note that in some cases, like basic RNN cell or GRU cell, outputs and 126 | states can be the same. They are different for LSTM cells though.) 127 | """ 128 | with variable_scope.variable_scope(scope or "rnn_decoder"): 129 | state = initial_state 130 | outputs = [] 131 | prev = None 132 | for i, inp in enumerate(decoder_inputs): 133 | if loop_function is not None and prev is not None: 134 | with variable_scope.variable_scope("loop_function", reuse=True): 135 | inp = loop_function(prev, i) 136 | else: 137 | inp = tf.matmul(inp, weights['out_dec_inp']) + biases['out_dec_inp'] 138 | if i > 0: 139 | variable_scope.get_variable_scope().reuse_variables() 140 | output, state = cell(inp, state) 141 | outputs.append(output) 142 | if loop_function is not None: 143 | prev = output 144 | return outputs, state 145 | 146 | def _basic_rnn_seq2seq(encoder_inputs, 147 | decoder_inputs, 148 | cell, 149 | feed_previous, 150 | dtype=dtypes.float32, 151 | scope=None): 152 | """Basic RNN sequence-to-sequence model. 153 | This model first runs an RNN to encode encoder_inputs into a state vector, 154 | then runs decoder, initialized with the last encoder state, on decoder_inputs. 155 | Encoder and decoder use the same RNN cell type, but don't share parameters. 156 | Args: 157 | encoder_inputs: A list of 2D Tensors [batch_size x input_size]. 158 | decoder_inputs: A list of 2D Tensors [batch_size x input_size]. 159 | feed_previous: Boolean; if True, only the first of decoder_inputs will be 160 | used (the "GO" symbol), all other inputs will be generated by the previous 161 | decoder output using _loop_function below. If False, decoder_inputs are used 162 | as given (the standard decoder case). 163 | dtype: The dtype of the initial state of the RNN cell (default: tf.float32). 164 | scope: VariableScope for the created subgraph; default: "basic_rnn_seq2seq". 165 | Returns: 166 | A tuple of the form (outputs, state), where: 167 | outputs: A list of the same length as decoder_inputs of 2D Tensors with 168 | shape [batch_size x output_size] containing the generated outputs. 169 | state: The state of each decoder cell in the final time-step. 170 | It is a 2D Tensor of shape [batch_size x cell.state_size]. 171 | """ 172 | with variable_scope.variable_scope(scope or "basic_rnn_seq2seq"): 173 | enc_cell = copy.deepcopy(cell) 174 | _, enc_state = rnn.static_rnn(enc_cell, encoder_inputs, dtype=dtype) 175 | if feed_previous: 176 | return _rnn_decoder(decoder_inputs, enc_state, cell, _loop_function) 177 | else: 178 | return _rnn_decoder(decoder_inputs, enc_state, cell) 179 | 180 | def _loop_function(prev, i): 181 | '''Naive implementation of loop function for _rnn_decoder. Transform prev from 182 | dimension [batch_size x hidden_dim] to [batch_size x output_dim], which will be 183 | used as decoder input of next time step ''' 184 | #return tf.matmul(prev, weights['out']) + biases['out'] 185 | temp_out = tf.matmul(prev, weights['out']) + biases['out'] 186 | temp_concat = tf.concat([output_seq_extremes_bool[i], temp_out], 1) 187 | return tf.matmul(temp_concat, weights['out_dec_inp']) + biases['out_dec_inp'] 188 | 189 | dec_outputs, dec_memory = _basic_rnn_seq2seq( 190 | enc_inp, 191 | dec_inp, 192 | cell, 193 | feed_previous = feed_previous 194 | ) 195 | 196 | reshaped_outputs = [tf.matmul(i, weights['out']) + biases['out'] for i in dec_outputs] 197 | 198 | # Training loss and optimizer 199 | with tf.variable_scope('Loss'): 200 | # L2 loss 201 | output_loss = 0 202 | for _y, _Y in zip(reshaped_outputs, target_seq): 203 | output_loss += tf.reduce_mean(tf.pow(_y - _Y, 2)) 204 | 205 | # L2 regularization for weights and biases 206 | reg_loss = 0 207 | for tf_var in tf.trainable_variables(): 208 | if 'Biases_' in tf_var.name or 'Weights_' in tf_var.name: 209 | reg_loss += tf.reduce_mean(tf.nn.l2_loss(tf_var)) 210 | 211 | loss = output_loss + lambda_l2_reg * reg_loss 212 | 213 | with tf.variable_scope('Optimizer'): 214 | optimizer = tf.contrib.layers.optimize_loss( 215 | loss=loss, 216 | learning_rate=learning_rate, 217 | global_step=global_step, 218 | optimizer='Adam', 219 | clip_gradients=GRADIENT_CLIPPING) 220 | 221 | saver = tf.train.Saver 222 | 223 | return dict( 224 | enc_inp = enc_inp, 225 | target_seq = target_seq, 226 | train_op = optimizer, 227 | loss=loss, 228 | saver = saver, 229 | reshaped_outputs = reshaped_outputs, 230 | output_seq_extremes_bool = output_seq_extremes_bool, 231 | ) -------------------------------------------------------------------------------- /datasets.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import requests 3 | 4 | import random 5 | import math 6 | 7 | __author__ = "Guillaume Chevalier" 8 | __license__ = "MIT" 9 | __version__ = "2017-03" 10 | 11 | 12 | def generate_x_y_data_v1(isTrain, batch_size): 13 | """ 14 | Data for exercise 1. 15 | returns: tuple (X, Y) 16 | X is a sine and a cosine from 0.0*pi to 1.5*pi 17 | Y is a sine and a cosine from 1.5*pi to 3.0*pi 18 | Therefore, Y follows X. There is also a random offset 19 | commonly applied to X an Y. 20 | The returned arrays are of shape: 21 | (seq_length, batch_size, output_dim) 22 | Therefore: (10, batch_size, 2) 23 | For this exercise, let's ignore the "isTrain" 24 | argument and test on the same data. 25 | """ 26 | seq_length = 10 27 | 28 | batch_x = [] 29 | batch_y = [] 30 | for _ in range(batch_size): 31 | rand = random.random() * 2 * math.pi 32 | 33 | sig1 = np.sin(np.linspace(0.0 * math.pi + rand, 34 | 3.0 * math.pi + rand, seq_length * 2)) 35 | sig2 = np.cos(np.linspace(0.0 * math.pi + rand, 36 | 3.0 * math.pi + rand, seq_length * 2)) 37 | x1 = sig1[:seq_length] 38 | y1 = sig1[seq_length:] 39 | x2 = sig2[:seq_length] 40 | y2 = sig2[seq_length:] 41 | 42 | x_ = np.array([x1, x2]) 43 | y_ = np.array([y1, y2]) 44 | x_, y_ = x_.T, y_.T 45 | 46 | batch_x.append(x_) 47 | batch_y.append(y_) 48 | 49 | batch_x = np.array(batch_x) 50 | batch_y = np.array(batch_y) 51 | # shape: (batch_size, seq_length, output_dim) 52 | 53 | batch_x = np.array(batch_x).transpose((1, 0, 2)) 54 | batch_y = np.array(batch_y).transpose((1, 0, 2)) 55 | # shape: (seq_length, batch_size, output_dim) 56 | 57 | return batch_x, batch_y 58 | 59 | 60 | def generate_x_y_data_two_freqs(isTrain, batch_size, seq_length): 61 | batch_x = [] 62 | batch_y = [] 63 | for _ in range(batch_size): 64 | offset_rand = random.random() * 2 * math.pi 65 | freq_rand = (random.random() - 0.5) / 1.5 * 15 + 0.5 66 | amp_rand = random.random() + 0.1 67 | 68 | sig1 = amp_rand * np.sin(np.linspace( 69 | seq_length / 15.0 * freq_rand * 0.0 * math.pi + offset_rand, 70 | seq_length / 15.0 * freq_rand * 3.0 * math.pi + offset_rand, 71 | seq_length * 2 72 | ) 73 | ) 74 | 75 | offset_rand = random.random() * 2 * math.pi 76 | freq_rand = (random.random() - 0.5) / 1.5 * 15 + 0.5 77 | amp_rand = random.random() * 1.2 78 | 79 | sig1 = amp_rand * np.cos(np.linspace( 80 | seq_length / 15.0 * freq_rand * 0.0 * math.pi + offset_rand, 81 | seq_length / 15.0 * freq_rand * 3.0 * math.pi + offset_rand, 82 | seq_length * 2 83 | ) 84 | ) + sig1 85 | 86 | x1 = sig1[:seq_length] 87 | y1 = sig1[seq_length:] 88 | 89 | x_ = np.array([x1]) 90 | y_ = np.array([y1]) 91 | x_, y_ = x_.T, y_.T 92 | 93 | batch_x.append(x_) 94 | batch_y.append(y_) 95 | 96 | batch_x = np.array(batch_x) 97 | batch_y = np.array(batch_y) 98 | # shape: (batch_size, seq_length, output_dim) 99 | 100 | batch_x = np.array(batch_x).transpose((1, 0, 2)) 101 | batch_y = np.array(batch_y).transpose((1, 0, 2)) 102 | # shape: (seq_length, batch_size, output_dim) 103 | 104 | return batch_x, batch_y 105 | 106 | 107 | def generate_x_y_data_v2(isTrain, batch_size): 108 | """ 109 | Similar the the "v1" function, but here we generate a signal with 110 | 2 frequencies chosen randomly - and this for the 2 signals. Plus, 111 | the lenght of the examples is of 15 rather than 10. 112 | So we have 30 total values for past and future. 113 | """ 114 | return generate_x_y_data_two_freqs(isTrain, batch_size, seq_length=15) 115 | 116 | 117 | def generate_x_y_data_v3(isTrain, batch_size): 118 | """ 119 | Similar to the "v2" function, but here we generate a signal 120 | with noise in the X values. Plus, 121 | the lenght of the examples is of 30 rather than 10. 122 | So we have 60 total values for past and future. 123 | """ 124 | seq_length = 30 125 | x, y = generate_x_y_data_two_freqs( 126 | isTrain, batch_size, seq_length=seq_length) 127 | noise_amount = random.random() * 0.15 + 0.10 128 | x = x + noise_amount * np.random.randn(seq_length, batch_size, 1) 129 | 130 | avg = np.average(x) 131 | std = np.std(x) + 0.0001 132 | x = x - avg 133 | y = y - avg 134 | x = x / std / 2.5 135 | y = y / std / 2.5 136 | 137 | return x, y 138 | 139 | 140 | def loadCurrency(curr, window_size): 141 | """ 142 | Return the historical data for the USD or EUR bitcoin value. Is done with an web API call. 143 | curr = "USD" | "EUR" 144 | """ 145 | # For more info on the URL call, it is inspired by : 146 | # https://github.com/Levino/coindesk-api-node 147 | r = requests.get( 148 | "http://api.coindesk.com/v1/bpi/historical/close.json?start=2010-07-17&end=2017-03-03¤cy={}".format( 149 | curr 150 | ) 151 | ) 152 | data = r.json() 153 | time_to_values = sorted(data["bpi"].items()) 154 | values = [val for key, val in time_to_values] 155 | kept_values = values[1000:] 156 | 157 | X = [] 158 | Y = [] 159 | for i in range(len(kept_values) - window_size * 2): 160 | X.append(kept_values[i:i + window_size]) 161 | Y.append(kept_values[i + window_size:i + window_size * 2]) 162 | 163 | # To be able to concat on inner dimension later on: 164 | X = np.expand_dims(X, axis=2) 165 | Y = np.expand_dims(Y, axis=2) 166 | 167 | return X, Y 168 | 169 | 170 | def normalize(X, Y=None): 171 | """ 172 | Normalise X and Y according to the mean and standard deviation of the X values only. 173 | """ 174 | # # It would be possible to normalize with last rather than mean, such as: 175 | # lasts = np.expand_dims(X[:, -1, :], axis=1) 176 | # assert (lasts[:, :] == X[:, -1, :]).all(), "{}, {}, {}. {}".format(lasts[:, :].shape, X[:, -1, :].shape, lasts[:, :], X[:, -1, :]) 177 | mean = np.expand_dims(np.average(X, axis=1) + 0.00001, axis=1) 178 | stddev = np.expand_dims(np.std(X, axis=1) + 0.00001, axis=1) 179 | # print (mean.shape, stddev.shape) 180 | # print (X.shape, Y.shape) 181 | X = X - mean 182 | X = X / (2.5 * stddev) 183 | if Y is not None: 184 | assert Y.shape == X.shape, (Y.shape, X.shape) 185 | Y = Y - mean 186 | Y = Y / (2.5 * stddev) 187 | return X, Y 188 | return X 189 | 190 | 191 | def fetch_batch_size_random(X, Y, batch_size): 192 | """ 193 | Returns randomly an aligned batch_size of X and Y among all examples. 194 | The external dimension of X and Y must be the batch size (eg: 1 column = 1 example). 195 | X and Y can be N-dimensional. 196 | """ 197 | assert X.shape == Y.shape, (X.shape, Y.shape) 198 | idxes = np.random.randint(X.shape[0], size=batch_size) 199 | X_out = np.array(X[idxes]).transpose((1, 0, 2)) 200 | Y_out = np.array(Y[idxes]).transpose((1, 0, 2)) 201 | return X_out, Y_out 202 | 203 | X_train = [] 204 | Y_train = [] 205 | X_test = [] 206 | Y_test = [] 207 | 208 | 209 | def generate_x_y_data_v4(isTrain, batch_size): 210 | """ 211 | Return financial data for the bitcoin. 212 | Features are USD and EUR, in the internal dimension. 213 | We normalize X and Y data according to the X only to not 214 | spoil the predictions we ask for. 215 | For every window (window or seq_length), Y is the prediction following X. 216 | Train and test data are separated according to the 80/20 rule. 217 | Therefore, the 20 percent of the test data are the most 218 | recent historical bitcoin values. Every example in X contains 219 | 40 points of USD and then EUR data in the feature axis/dimension. 220 | It is to be noted that the returned X and Y has the same shape 221 | and are in a tuple. 222 | """ 223 | # 40 pas values for encoder, 40 after for decoder's predictions. 224 | seq_length = 40 225 | 226 | global Y_train 227 | global X_train 228 | global X_test 229 | global Y_test 230 | # First load, with memoization: 231 | if len(Y_test) == 0: 232 | # API call: 233 | X_usd, Y_usd = loadCurrency("USD", window_size=seq_length) 234 | X_eur, Y_eur = loadCurrency("EUR", window_size=seq_length) 235 | 236 | # All data, aligned: 237 | X = np.concatenate((X_usd, X_eur), axis=2) 238 | Y = np.concatenate((Y_usd, Y_eur), axis=2) 239 | X, Y = normalize(X, Y) 240 | 241 | # Split 80-20: 242 | X_train = X[:int(len(X) * 0.8)] 243 | Y_train = Y[:int(len(Y) * 0.8)] 244 | X_test = X[int(len(X) * 0.8):] 245 | Y_test = Y[int(len(Y) * 0.8):] 246 | 247 | if isTrain: 248 | return fetch_batch_size_random(X_train, Y_train, batch_size) 249 | else: 250 | return fetch_batch_size_random(X_test, Y_test, batch_size) 251 | --------------------------------------------------------------------------------