├── .gitignore ├── README.md ├── multivariate ├── Generate_stock_data.py ├── attention_encoder.py ├── data │ ├── README.md │ ├── electricity │ │ └── electricity.txt.gz │ ├── exchange_rate │ │ └── exchange_rate.txt.gz │ ├── solar-energy │ │ └── solar_AL.txt.gz │ └── traffic │ │ └── traffic.txt.gz ├── example.sh ├── main.py └── utils.py └── univariate ├── Generate_stock_data.py ├── data ├── GEFCom2014 │ └── Price.csv └── beijing │ ├── PRSA_data_2010.1.1-2014.12.31.csv │ ├── interpolate.csv │ └── padding_zeros.csv ├── example.sh ├── get_score.py ├── main.py ├── results ├── bj_result.csv └── gef.csv └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | TEST 2 | backup 3 | *.txt 4 | data 5 | log* 6 | *.pyc 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DARNN 2 | An implementation of the paper 3 | 4 | [A Dual-Stage Attention-Based Recurrent Neural Network for Time Series Prediction](https://arxiv.org/abs/1704.02971). 5 | Yao Qin, Dongjin Song, Haifeng Cheng, Wei Cheng, Guofei Jiang, Garrison. W. Cottrell 6 | International Joint Conference on Artificial Intelligence (IJCAI), 2017. 7 | 8 | run in tf 1.3 9 | 10 | This repository is used to obtain a baseline result for the following paper: 11 | [A Memory-Network Based Solution for Multivariate Time-Series Forecasting 12 | ](https://arxiv.org/abs/1809.02105). You can find the (community) implementation of the paper [here](https://github.com/Maple728/MTNet/tree/master). 13 | 14 | ## Time Series Datasets 15 | 16 | ### Univariate 17 | * [Beijing PM2.5](https://github.com/petwill/DARNN/univariate/data/beijing) 18 | * [GefCom Electricity(2014)](https://github.com/petwill/DARNN/univariate/data/GEFC2014) 19 | 20 | ### Multivariate 21 | 22 | [Source](https://github.com/laiguokun/multivariate-time-series-data) 23 | 24 | - Solar-Energy 25 | - Traffic 26 | - Electricity 27 | - Exchange-Rate 28 | -------------------------------------------------------------------------------- /multivariate/Generate_stock_data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | import pandas as pd 4 | class Input_data: 5 | def __init__(self, batch_size, n_step_encoder, n_step_decoder, n_hidden_encoder, i, filename, timestep=None, horizon=3): 6 | 7 | self.horizon = horizon 8 | self.n_label = 1 9 | 10 | # read the data 11 | data = pd.read_csv(filename, header=None) 12 | data = data[[j for j in range(data.shape[1]) if j != i] + [i]] 13 | 14 | 15 | self.data = np.array(data) 16 | 17 | sz = self.data.shape[0] 18 | train_size = int(sz * .6) 19 | val_size = int(sz * .8) 20 | 21 | self.train = self.data[:train_size, :] 22 | self.val = self.data[train_size:val_size, :] 23 | self.test = self.data[val_size:, :] 24 | 25 | # parameters for the network 26 | self.batch_size = batch_size 27 | self.n_hidden_state = n_hidden_encoder 28 | self.n_step_encoder = n_step_encoder 29 | self.n_step_decoder = n_step_decoder 30 | 31 | 32 | self.n_train = len(self.train) 33 | self.n_val = len(self.val) 34 | self.n_test = len(self.test) 35 | self.n_feature = self.data.shape[1]- 1 36 | 37 | 38 | # data normalization 39 | self.mean = np.mean(self.train,axis=0) 40 | self.stdev = np.std(self.train,axis=0) 41 | 42 | 43 | # in case the stdev=0,then we will get nan 44 | for i in range (len(self.stdev)): 45 | if self.stdev[i] < 0.00000001: 46 | self.stdev[i] = 1 47 | 48 | 49 | self.train = (self.train-self.mean)/self.stdev 50 | self.test = (self.test-self.mean)/self.stdev 51 | self.val = (self.val - self.mean)/self.stdev 52 | print(self.train.shape, self.test.shape, self.val.shape) 53 | 54 | def next_batch(self): 55 | # generate of a random index from the range [0, self.n_train -self.n_step_decoder +1] 56 | index = random.sample(list(np.arange(0,self.n_train-self.n_step_decoder-self.horizon+1)),self.batch_size) 57 | # index = np.arange(0,self.n_train-self.n_step_decoder) 58 | np.random.shuffle(index) 59 | index = np.array(index) 60 | # the shape of batch_x, label, previous_y 61 | 62 | # batch_x = np.zeros([index.shape[0],self.n_step_encoder, self.n_feature]) 63 | # label = np.zeros([index.shape[0], self.n_label]) 64 | # previous_y = np.zeros([index.shape[0],self.n_step_decoder, self.n_label]) 65 | batch_x = np.zeros([self.batch_size,self.n_step_encoder, self.n_feature]) 66 | label = np.zeros([self.batch_size, self.n_label]) 67 | previous_y = np.zeros([self.batch_size,self.n_step_decoder, self.n_label]) 68 | 69 | temp = 0 70 | for item in index: 71 | batch_x[temp,:,:] = self.train[item:item+self.n_step_encoder, :self.n_feature] 72 | previous_y[temp,:,0] = self.train[item:item + self.n_step_decoder, -1] 73 | temp += 1 74 | label[:,0] = np.array(self.train[index + self.n_step_decoder + self.horizon - 1, -1]) 75 | encoder_states = np.swapaxes(batch_x, 1, 2) 76 | return batch_x, label, previous_y, encoder_states 77 | 78 | def returnMean(self): 79 | return self.mean, self.stdev 80 | 81 | def validation(self): 82 | index = np.arange(0, self.n_val-self.n_step_decoder-self.horizon+1) 83 | index_size = len(index) 84 | val_x = np.zeros([index_size, self.n_step_encoder, self.n_feature]) 85 | val_label = np.zeros([index_size, self.n_label]) 86 | val_prev_y = np.zeros([index_size, self.n_step_decoder, self.n_label]) 87 | temp = 0 88 | for item in index: 89 | val_x[temp,:,:] = self.val[item:item + self.n_step_encoder, :self.n_feature] 90 | val_prev_y[temp,:,0] = self.val[item:item + self.n_step_decoder, -1] 91 | temp += 1 92 | 93 | val_label[:, 0] = np.array(self.val[index + self.n_step_decoder + self.horizon - 1, -1]) 94 | encoder_states_val = np.swapaxes(val_x,1,2) 95 | return val_x, val_label, val_prev_y, encoder_states_val 96 | 97 | def testing(self): 98 | index = np.arange(0,self.n_test-self.n_step_decoder-self.horizon+1) 99 | index_size = len(index) 100 | test_x = np.zeros([index_size, self.n_step_encoder, self.n_feature]) 101 | test_label = np.zeros([index_size, self.n_label]) 102 | test_prev_y = np.zeros([index_size, self.n_step_decoder, self.n_label]) 103 | temp = 0 104 | for item in index: 105 | test_x[temp,:,:] = self.test[item:item + self.n_step_encoder, :self.n_feature] 106 | test_prev_y[temp,:,0] = self.test[item:item + self.n_step_decoder, -1] 107 | temp += 1 108 | 109 | test_label[:, 0] = np.array(self.test[index + self.n_step_decoder + self.horizon - 1, -1]) 110 | encoder_states_test = np.swapaxes(test_x,1,2) 111 | return test_x, test_label, test_prev_y, encoder_states_test 112 | -------------------------------------------------------------------------------- /multivariate/attention_encoder.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | # We disable pylint because we need python3 compatibility. 6 | from six.moves import xrange # pylint: disable=redefined-builtin 7 | # from tensorflow.contrib.rnn.python.ops import core_rnn_cell_impl #omit when tf = 1.3 8 | from tensorflow.python.ops import rnn_cell_impl 9 | from tensorflow.python.framework import dtypes 10 | from tensorflow.python.framework import ops 11 | from tensorflow.python.ops import array_ops 12 | from tensorflow.python.ops import math_ops 13 | from tensorflow.python.ops import nn_ops 14 | from tensorflow.python.ops import variable_scope 15 | from tensorflow.python.util import nest 16 | 17 | # TODO(ebrevdo): Remove once _linear is fully deprecated. 18 | # linear = core_rnn_cell_impl._linear # pylint: disable=protected-access #omit when tf = 1.3 19 | linear = rnn_cell_impl._linear #add when tf = 1.3 20 | 21 | def attention_encoder(encoder_inputs, attention_states, cell, 22 | output_size=None, num_heads=1, 23 | dtype=dtypes.float32, scope=None): 24 | 25 | """RNN encoder with attention. 26 | In this context "attention" means that, during encoding, the RNN can look up 27 | information in the additional tensor "attention_states", which is constructed by transpose the dimensions of time steps and input features of the inputs, 28 | and it does this to focus on a few features of the input. 29 | 30 | Args: 31 | encoder_inputs: A list of 2D Tensors [batch_size x n_input_encoder]. 32 | initial_state: 2D Tensor [batch_size x cell.state_size]. 33 | attention_states: 3D Tensor [batch_size x attn_length x attn_size]. 34 | cell: rnn_cell.RNNCell defining the cell function and size. 35 | output_size: Size of the output vectors; if None, we use cell.output_size. 36 | num_heads: Number of attention heads that read from attention_states. 37 | dtype: The dtype to use for the RNN initial state (default: tf.float32). 38 | scope: VariableScope for the created subgraph; default: "attention_decoder". 39 | 40 | Returns: 41 | A tuple of the form (outputs, state, attn_weights), where: 42 | outputs: A list of the encoder hidden states. Each element is a 2D Tensor of shape [batch_size x output_size]. 43 | state: The state of encoder cell at the final time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. 44 | attn_weights: A list of the input attention weights. Each element is a 2D Tensor of shape [batch_size x attn_length] 45 | Raises: 46 | ValueError: when num_heads is not positive, there are no inputs, shapes 47 | of attention_states are not set, or input size cannot be inferred 48 | from the input. 49 | """ 50 | if not encoder_inputs: 51 | raise ValueError("Must provide at least 1 input to attention encoder.") 52 | if num_heads < 1: 53 | raise ValueError("With less than 1 heads, use a non-attention encoder.") 54 | if not attention_states.get_shape()[1:2].is_fully_defined(): 55 | raise ValueError("Shape[1] and [2] of attention_states must be known: %s" 56 | % attention_states.get_shape()) 57 | if output_size is None: 58 | output_size = cell.output_size 59 | 60 | with variable_scope.variable_scope(scope or "attention_encoder"): 61 | # get the batch_size of the encoder_input 62 | batch_size = array_ops.shape(encoder_inputs[0])[0] # Needed for reshaping. 63 | # attention_state.shape (batch_size, n_input_encoder, n_steps_encoder) 64 | attn_length = attention_states.get_shape()[1].value # n_input_encoder 65 | attn_size = attention_states.get_shape()[2].value # n_steps_encoder 66 | 67 | # To calculate W1 * h_t we use a 1-by-1 convolution, need to reshape before. 68 | # hidden_features shape: (batch_size, attn_length, 1, attn_size) 69 | hidden = array_ops.reshape( 70 | attention_states, [-1, attn_length, 1, attn_size]) 71 | hidden_features = [] 72 | v = [] 73 | attention_vec_size = attn_size # Size of query vectors for attention. 74 | for a in xrange(num_heads): 75 | k = variable_scope.get_variable("Attn_EncoderW_%d" % a, 76 | [1, 1, attn_size, attention_vec_size]) 77 | hidden_features.append(nn_ops.conv2d(hidden, k, [1, 1, 1, 1], "SAME")) 78 | v.append(variable_scope.get_variable("AttnEncoderV_%d" % a, 79 | [attention_vec_size])) 80 | # how to get the initial_state 81 | initial_state_size = array_ops.stack([batch_size, output_size]) 82 | initial_state = [array_ops.zeros(initial_state_size,dtype=dtype) for _ in xrange(2)] 83 | state = initial_state 84 | 85 | def attention(query): 86 | """Put attention masks on hidden using hidden_features and query.""" 87 | ds = [] # Results of attention reads will be stored here. 88 | if nest.is_sequence(query): # If the query is a tuple, flatten it. 89 | query_list = nest.flatten(query) 90 | for q in query_list: # Check that ndims == 2 if specified. 91 | ndims = q.get_shape().ndims 92 | if ndims: 93 | assert ndims == 2 94 | query = array_ops.concat(query_list,1) 95 | for a in xrange(num_heads): 96 | with variable_scope.variable_scope("AttentionEncoder_%d" % a): 97 | # y with the shape (batch_size, attention_vec_size) 98 | y = linear(query, attention_vec_size, True) 99 | # y with the shape (batch_size, 1, 1, attention_vec_size) 100 | y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size]) 101 | # Attention mask is a softmax of v^T * tanh(...). 102 | # hidden_features with the shape (batch_size, attn_length, 1, attn_size) 103 | s = math_ops.reduce_sum(v[a] * math_ops.tanh(hidden_features[a] + y), [2, 3]) 104 | # a with shape (batch_size, attn_length) 105 | # a is the attention weight 106 | a = nn_ops.softmax(s) 107 | ds.append(a) 108 | return ds 109 | 110 | outputs = [] 111 | attn_weights = [] 112 | batch_attn_size = array_ops.stack([batch_size, attn_length]) 113 | attns = [array_ops.zeros(batch_attn_size, dtype=dtype) 114 | for _ in xrange(num_heads)] 115 | 116 | # i is the index of the which time step 117 | # inp is numpy.array and the shape of inp is (batch_size, n_feature) 118 | for i, inp in enumerate(encoder_inputs): 119 | if i > 0: 120 | variable_scope.get_variable_scope().reuse_variables() 121 | input_size = inp.get_shape().with_rank(2)[1] 122 | if input_size.value is None: 123 | raise ValueError("Could not infer input size from input: %s" % inp.name) 124 | 125 | # multiply attention weights with the original input 126 | # get the newly input 127 | x = attns[0]*inp 128 | # Run the BasicLSTM with the newly input 129 | cell_output, state = cell(x, state) 130 | 131 | # Run the attention mechanism. 132 | attns = attention(state) 133 | 134 | with variable_scope.variable_scope("AttnEncoderOutputProjection"): 135 | output = cell_output 136 | 137 | outputs.append(output) 138 | attn_weights.append(attns) 139 | 140 | return outputs, state, attn_weights 141 | 142 | -------------------------------------------------------------------------------- /multivariate/data/README.md: -------------------------------------------------------------------------------- 1 | # Multivariate Time series Data sets 2 | 3 | In this githup repo, we provide four data sets could be used for researches related to the multivariate time series signals. The format is same for the different datasets. Assumed a time series signal contains T time stamps and n sensors at each time stamp, the data file would include T lines, and each line has n real number splited by comma. 4 | 5 | ### Paper 6 | 7 | [Modeling Long- and Short-Term Temporal Patterns with Deep Neural Networks](https://arxiv.org/abs/1703.07015) 8 | 9 | 10 | ### Electricity consumption 11 | 12 | 13 | The raw dataset is in https://archive.ics.uci.edu/ml/datasets/ElectricityLoadDiagrams20112014. It is the electricity consumption in kWh was recorded every 15 minutes from 2011 to 2014. Because the some dimensions are equal to 0. So we eliminate the records in 2011. Final we get data contains electircity consumption of 321 clients from 2012 to 2014. And we converted the data to reflect hourly consumption. 14 | 15 | ### Traffic Usage 16 | 17 | The raw data is in http://pems.dot.ca.gov. The data in this repo is a collection of 48 months (2015-2016) hourly data from the California Department of Transportation. The data describes the road occupancy rates (between 0 and 1) measured by different sensors on San Francisco Bay area freeways. 18 | 19 | ### Solar Energy 20 | 21 | The raw data is in http://www.nrel.gov/grid/solar-power-data.html : It contains the solar power production records in the year of 2006, which is sampled every 10 minutes from 137 PV plants in Alabama State. 22 | 23 | ### Exchange Rate 24 | 25 | the collection of the daily exchange rates of eight foreign countries including Australia, British, Canada, Switzerland, China, Japan, New Zealand and Singapore ranging from 1990 to 2016. 26 | -------------------------------------------------------------------------------- /multivariate/data/electricity/electricity.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunfanyunn/DARNN/b3f83efac0fc41180f50cfc3087405b884f43e32/multivariate/data/electricity/electricity.txt.gz -------------------------------------------------------------------------------- /multivariate/data/exchange_rate/exchange_rate.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunfanyunn/DARNN/b3f83efac0fc41180f50cfc3087405b884f43e32/multivariate/data/exchange_rate/exchange_rate.txt.gz -------------------------------------------------------------------------------- /multivariate/data/solar-energy/solar_AL.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunfanyunn/DARNN/b3f83efac0fc41180f50cfc3087405b884f43e32/multivariate/data/solar-energy/solar_AL.txt.gz -------------------------------------------------------------------------------- /multivariate/data/traffic/traffic.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunfanyunn/DARNN/b3f83efac0fc41180f50cfc3087405b884f43e32/multivariate/data/traffic/traffic.txt.gz -------------------------------------------------------------------------------- /multivariate/example.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | # TEST is a virtualenv 4 | . ../../TEST/bin/activate 5 | 6 | python main.py $@ 7 | -------------------------------------------------------------------------------- /multivariate/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 3 | import tensorflow as tf 4 | import numpy as np 5 | # from tensorflow.contrib.legacy_seq2seq.python.ops import seq2seq 6 | from tensorflow.contrib.rnn.python.ops import rnn 7 | # from tensorflow.contrib.rnn.python.ops import core_rnn_cell_impl as rnn_cell #omit when tf = 1.3 8 | from tensorflow.python.ops import rnn_cell_impl as rnn_cell #add when tf = 1.3 9 | # import attention_encoder 10 | from utils import * 11 | import Generate_stock_data as GD 12 | import pandas as pd 13 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' #Disable Tensorflow debugging message 14 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1) 15 | 16 | from sklearn.metrics import mean_absolute_error 17 | from sklearn.metrics import mean_squared_error 18 | 19 | import scipy.stats 20 | import math 21 | #import keras 22 | #import keras.backend as K 23 | import os 24 | # os.environ['KERAS_BACKEND'] = 'tensorflow' 25 | 26 | def rrse_(y_true, y_pred): 27 | return np.sqrt(np.sum((y_true - y_pred) ** 2) / np.sum((y_true - np.mean(y_true)) ** 2)) 28 | 29 | def CORR(y_true, y_pred): 30 | N = y_true.shape[0] 31 | total = 0.0 32 | for i in range(N): 33 | if math.isnan(scipy.stats.pearsonr(y_true[i], y_pred[i])[0]): 34 | N -= 1 35 | else: 36 | total += scipy.stats.pearsonr(y_true[i], y_pred[i])[0] 37 | return total / N 38 | 39 | def RNN(encoder_input, decoder_input, weights, biases, encoder_attention_states, 40 | n_input_encoder, n_steps_encoder, n_hidden_encoder, 41 | n_input_decoder, n_steps_decoder, n_hidden_decoder): 42 | 43 | # Prepare data shape to match `rnn` function requirements 44 | # Current data input shape: (batch_size, n_steps, n_input) 45 | # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) 46 | 47 | # Prepare data for encoder 48 | # Permuting batch_size and n_steps 49 | encoder_input = tf.transpose(encoder_input, [1, 0, 2]) 50 | # Reshaping to (n_steps*batch_size, n_input) 51 | encoder_input = tf.reshape(encoder_input, [-1, n_input_encoder]) 52 | # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) 53 | encoder_input = tf.split(encoder_input, n_steps_encoder, 0) 54 | 55 | # Prepare data for decoder 56 | # Permuting batch_size and n_steps 57 | decoder_input = tf.transpose(decoder_input, [1, 0, 2]) 58 | # Reshaping to (n_steps*batch_size, n_input) 59 | decoder_input = tf.reshape(decoder_input, [-1, n_input_decoder]) 60 | # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) 61 | decoder_input = tf.split(decoder_input, n_steps_decoder,0 ) 62 | 63 | # Encoder. 64 | with tf.variable_scope('encoder') as scope: 65 | encoder_cell = rnn_cell.BasicLSTMCell(n_hidden_encoder, forget_bias=1.0) 66 | encoder_outputs, encoder_state, attn_weights = attention_encoder(encoder_input, 67 | encoder_attention_states, encoder_cell) 68 | 69 | # First calculate a concatenation of encoder outputs to put attention on. 70 | top_states = [tf.reshape(e, [-1, 1, encoder_cell.output_size]) for e in encoder_outputs] 71 | attention_states = tf.concat(top_states,1) 72 | 73 | with tf.variable_scope('decoder') as scope: 74 | decoder_cell = rnn_cell.BasicLSTMCell(n_hidden_decoder, forget_bias=1.0) 75 | outputs, states, attn_weights = attention_decoder(decoder_input, encoder_state, 76 | attention_states, decoder_cell) 77 | 78 | return tf.matmul(outputs[-1], weights['out1']) + biases['out1'], attn_weights 79 | 80 | def mean_absolute_percentage_error(y_true, y_pred): 81 | """ 82 | Use of this metric is not recommended; for illustration only. 83 | See other regression metrics on sklearn docs: 84 | http://scikit-learn.org/stable/modules/classes.html#regression-metrics 85 | Use like any other metric 86 | >>> y_true = [3, -0.5, 2, 7]; y_pred = [2.5, -0.3, 2, 8] 87 | >>> mean_absolute_percentage_error(y_true, y_pred) 88 | Out[]: 24.791666666666668 89 | """ 90 | 91 | # y_true, y_pred = check_arrays(y_true, y_pred) 92 | 93 | ## Note: does not handle mix 1d representation 94 | #if _is_1d(y_true): 95 | # y_true, y_pred = _check_1d_array(y_true, y_pred) 96 | 97 | return np.mean(np.abs((y_true - y_pred) / y_true)) * 100 98 | 99 | def root_relative_squared_error(y_true, y_pred): 100 | mn = np.mean(y_true) 101 | return np.sqrt(np.average((y_true-y_pred)**2))/np.sqrt(np.average((y_true - mn)**2)) 102 | 103 | 104 | def go(dataset, horizon): 105 | 106 | learning_rate = 0.001 107 | training_iters = 1000000 108 | batch_size = 128 109 | 110 | model_path = './{}_model/'.format(dataset) 111 | filename = './data/{}/{}.txt'.format(dataset, dataset) 112 | df = pd.read_csv(filename,header=None) 113 | display_step = int(df.shape[0]*.8)//batch_size 114 | 115 | timestep = 10 116 | # Network Parameters 117 | # encoder parameter 118 | num_feature = df.shape[1]-1 # number of index #98 #72 119 | n_input_encoder = num_feature # n_feature of encoder input #98 #72 120 | n_steps_encoder = timestep# time steps 121 | # n_hidden_encoder = 256 # size of hidden units 122 | n_hidden_encoder = 64 123 | 124 | # decoder parameter 125 | n_input_decoder = 1 126 | n_steps_decoder = timestep-1 127 | # n_hidden_decoder = 256 128 | n_hidden_decoder = 64 129 | n_classes = 1 # size of the decoder output 130 | 131 | ret_maes = [] 132 | ret_rmses = [] 133 | ret_mapes = [] 134 | 135 | all_y_test = [] 136 | all_y_pred = [] 137 | for i in range(num_feature): 138 | print('predicting {} series out of {}'.format(i, num_feature)) 139 | 140 | tf.reset_default_graph() 141 | # Parameters 142 | 143 | # tf Graph input 144 | encoder_input = tf.placeholder("float", [None, n_steps_encoder, n_input_encoder]) 145 | decoder_input = tf.placeholder("float", [None, n_steps_decoder, n_input_decoder]) 146 | decoder_gt = tf.placeholder("float", [None, n_classes]) 147 | encoder_attention_states = tf.placeholder("float", [None, n_input_encoder, n_steps_encoder]) 148 | # Define weights 149 | weights = {'out1': tf.Variable(tf.random_normal([n_hidden_decoder, n_classes]))} 150 | biases = {'out1': tf.Variable(tf.random_normal([n_classes]))} 151 | 152 | pred, attn_weights = RNN(encoder_input, decoder_input, weights, biases, encoder_attention_states, 153 | n_input_encoder, n_steps_encoder, n_hidden_encoder, 154 | n_input_decoder, n_steps_decoder, n_hidden_decoder) 155 | 156 | # Define loss and optimizer 157 | cost = tf.reduce_sum(tf.pow(tf.subtract(pred, decoder_gt), 2)) 158 | loss = tf.pow(tf.subtract(pred, decoder_gt), 2) 159 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) 160 | init = tf.global_variables_initializer() 161 | 162 | # save the model 163 | saver = tf.train.Saver() 164 | loss_value = [] 165 | step_value = [] 166 | # loss_test=[] 167 | loss_val = [] 168 | 169 | maes = [] 170 | rmses = [] 171 | mapes = [] 172 | 173 | 174 | # Launch the graph 175 | with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: 176 | 177 | sess.run(init) 178 | step = 1 179 | count = 1 180 | epochs = 50 181 | 182 | Data = GD.Input_data(batch_size, n_steps_encoder, n_steps_decoder, n_hidden_encoder, i, filename, n_classes, horizon) 183 | 184 | mn_validation_loss = 1e15 185 | ret_y_pred = ret_y_test = None 186 | # Keep training until reach max iterations 187 | while step < training_iters: 188 | # the shape of batch_x is (batch_size, n_steps, n_input) 189 | batch_x, batch_y, prev_y, encoder_states = Data.next_batch() 190 | feed_dict = {encoder_input: batch_x, decoder_gt: batch_y, decoder_input: prev_y, 191 | encoder_attention_states:encoder_states} 192 | # Run optimization op (backprop) 193 | sess.run(optimizer, feed_dict) 194 | # display the result 195 | if step % display_step == 0: 196 | # Calculate batch loss 197 | 198 | if step // display_step > epochs: 199 | break 200 | 201 | loss = sess.run(cost, feed_dict)/batch_size 202 | print ("Epoch", step //display_step, ", Minibatch Loss= " + "{:.6f}".format(loss)) 203 | 204 | #store the value 205 | loss_value.append(loss) 206 | step_value.append(step) 207 | 208 | # Val 209 | val_x, val_y, val_prev_y, encoder_states_val = Data.validation() 210 | feed_dict = {encoder_input: val_x, decoder_gt: val_y, decoder_input: val_prev_y, 211 | encoder_attention_states:encoder_states_val} 212 | loss_val1 = sess.run(cost, feed_dict)/len(val_y) 213 | loss_val.append(loss_val1) 214 | 215 | if loss_val1 < mn_validation_loss: 216 | 217 | mn_validation_loss = loss_val1 218 | 219 | # testing 220 | test_x, test_y, test_prev_y, encoder_states_test= Data.testing() 221 | feed_dict = {encoder_input: test_x, decoder_gt: test_y, decoder_input: test_prev_y, 222 | encoder_attention_states:encoder_states_test} 223 | pred_y=sess.run(pred, feed_dict) 224 | loss_test1 = sess.run(cost, feed_dict)/len(test_y) 225 | 226 | mean, stdev = Data.returnMean() 227 | testing_result = test_y*stdev[num_feature] + mean[num_feature] 228 | pred_result = pred_y*stdev[num_feature] + mean[num_feature] 229 | 230 | ret_y_pred = pred_result.copy() 231 | ret_y_test = testing_result.copy() 232 | 233 | mae = mean_absolute_error(testing_result, pred_result) 234 | print('testing mae', mae) 235 | 236 | step += 1 237 | count += 1 238 | 239 | # reduce the learning rate 240 | if count > 10000: 241 | learning_rate *= 0.1 242 | count = 0 243 | 244 | print ("Optimization Finished!") 245 | all_y_pred.append(ret_y_pred.flatten()) 246 | all_y_test.append(ret_y_test.flatten()) 247 | print(np.array(all_y_pred).shape) 248 | print(np.array(all_y_test).shape) 249 | 250 | rrse = rrse_(np.array(all_y_test), np.array(all_y_pred)) 251 | corr = CORR(np.array(all_y_test) ,np.array(all_y_pred)) 252 | print('current score', rrse, corr) 253 | 254 | return np.array(all_y_pred), np.array(all_y_test) 255 | 256 | if __name__ == '__main__': 257 | # datasets = ['electricity', 'exchange_rate', 'solar-energy', 'traffic'] 258 | # datasets = datasets[1:] 259 | # datasets = datasets[::-1] 260 | import sys 261 | datasets = [sys.argv[1]] 262 | horizons = [int(sys.argv[2])] 263 | print(datasets, horizons) 264 | #horizons = [3, 6, 12, 24] 265 | 266 | f = open('log', 'a+') 267 | f.write('dataset,horizon,mae,rmse,mape,rrse,corr\n') 268 | for dataset in datasets: 269 | for horizon in horizons: 270 | print(dataset, horizons) 271 | y_pred, y_test = go(dataset, horizon) 272 | print(y_pred.shape, y_test.shape) 273 | mae = mean_absolute_error(y_test, y_pred) 274 | rmse = np.sqrt(mean_squared_error(y_test, y_pred)) 275 | mape = mean_absolute_percentage_error(y_test, y_pred) 276 | rrse = rrse_(y_test, y_pred) 277 | corr = CORR(y_test ,y_pred) 278 | # mae, rmse, mape = go(dataset, horizon) 279 | f.write('{},{},{},{},{},{},{}\n'.format(dataset, horizon, mae, rmse, mape, rrse, corr)) 280 | f.flush() 281 | 282 | f.close() 283 | -------------------------------------------------------------------------------- /multivariate/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | # We disable pylint because we need python3 compatibility. 6 | from six.moves import xrange # pylint: disable=redefined-builtin 7 | from six.moves import zip # pylint: disable=redefined-builtin 8 | 9 | from tensorflow.contrib.rnn.python.ops import core_rnn_cell 10 | from tensorflow.python.framework import dtypes 11 | from tensorflow.python.framework import ops 12 | from tensorflow.python.ops import array_ops 13 | from tensorflow.python.ops import control_flow_ops 14 | from tensorflow.python.ops import embedding_ops 15 | from tensorflow.python.ops import math_ops 16 | from tensorflow.python.ops import nn_ops 17 | from tensorflow.python.ops import rnn 18 | from tensorflow.python.ops import rnn_cell_impl 19 | from tensorflow.python.ops import variable_scope 20 | from tensorflow.python.util import nest 21 | 22 | # We disable pylint because we need python3 compatibility. 23 | from six.moves import xrange # pylint: disable=redefined-builtin 24 | # from tensorflow.contrib.rnn.python.ops import core_rnn_cell_impl #omit when tf = 1.3 25 | from tensorflow.python.ops import rnn_cell_impl 26 | from tensorflow.python.framework import dtypes 27 | from tensorflow.python.framework import ops 28 | from tensorflow.python.ops import array_ops 29 | from tensorflow.python.ops import math_ops 30 | from tensorflow.python.ops import nn_ops 31 | from tensorflow.python.ops import variable_scope 32 | from tensorflow.python.util import nest 33 | 34 | 35 | import tensorflow as tf 36 | # TODO(ebrevdo): Remove once _linear is fully deprecated. 37 | # linear = core_rnn_cell_impl._linear # pylint: disable=protected-access #omit when tf = 1.3 38 | Linear = linear = rnn_cell_impl._linear #add when tf = 1.3 39 | 40 | def attention_encoder(encoder_inputs, attention_states, cell, 41 | output_size=None, num_heads=1, 42 | dtype=dtypes.float32, scope=None): 43 | 44 | """RNN encoder with attention. 45 | In this context "attention" means that, during encoding, the RNN can look up 46 | information in the additional tensor "attention_states", which is constructed by transpose the dimensions of time steps and input features of the inputs, 47 | and it does this to focus on a few features of the input. 48 | 49 | Args: 50 | encoder_inputs: A list of 2D Tensors [batch_size x n_input_encoder]. 51 | initial_state: 2D Tensor [batch_size x cell.state_size]. 52 | attention_states: 3D Tensor [batch_size x attn_length x attn_size]. 53 | cell: rnn_cell.RNNCell defining the cell function and size. 54 | output_size: Size of the output vectors; if None, we use cell.output_size. 55 | num_heads: Number of attention heads that read from attention_states. 56 | dtype: The dtype to use for the RNN initial state (default: tf.float32). 57 | scope: VariableScope for the created subgraph; default: "attention_decoder". 58 | 59 | Returns: 60 | A tuple of the form (outputs, state, attn_weights), where: 61 | outputs: A list of the encoder hidden states. Each element is a 2D Tensor of shape [batch_size x output_size]. 62 | state: The state of encoder cell at the final time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. 63 | attn_weights: A list of the input attention weights. Each element is a 2D Tensor of shape [batch_size x attn_length] 64 | Raises: 65 | ValueError: when num_heads is not positive, there are no inputs, shapes 66 | of attention_states are not set, or input size cannot be inferred 67 | from the input. 68 | """ 69 | if not encoder_inputs: 70 | raise ValueError("Must provide at least 1 input to attention encoder.") 71 | if num_heads < 1: 72 | raise ValueError("With less than 1 heads, use a non-attention encoder.") 73 | if not attention_states.get_shape()[1:2].is_fully_defined(): 74 | raise ValueError("Shape[1] and [2] of attention_states must be known: %s" 75 | % attention_states.get_shape()) 76 | if output_size is None: 77 | output_size = cell.output_size 78 | 79 | with variable_scope.variable_scope(scope or "attention_encoder"): 80 | # get the batch_size of the encoder_input 81 | batch_size = array_ops.shape(encoder_inputs[0])[0] # Needed for reshaping. 82 | # attention_state.shape (batch_size, n_input_encoder, n_steps_encoder) 83 | attn_length = attention_states.get_shape()[1].value # n_input_encoder 84 | attn_size = attention_states.get_shape()[2].value # n_steps_encoder 85 | 86 | # To calculate W1 * h_t we use a 1-by-1 convolution, need to reshape before. 87 | # hidden_features shape: (batch_size, attn_length, 1, attn_size) 88 | hidden = array_ops.reshape( 89 | attention_states, [-1, attn_length, 1, attn_size]) 90 | hidden_features = [] 91 | v = [] 92 | attention_vec_size = attn_size # Size of query vectors for attention. 93 | for a in xrange(num_heads): 94 | k = variable_scope.get_variable("Attn_EncoderW_%d" % a, 95 | [1, 1, attn_size, attention_vec_size]) 96 | hidden_features.append(nn_ops.conv2d(hidden, k, [1, 1, 1, 1], "SAME")) 97 | v.append(variable_scope.get_variable("AttnEncoderV_%d" % a, 98 | [attention_vec_size])) 99 | # how to get the initial_state 100 | initial_state_size = array_ops.stack([batch_size, output_size]) 101 | initial_state = [array_ops.zeros(initial_state_size,dtype=dtype) for _ in xrange(2)] 102 | state = initial_state 103 | 104 | def attention(query): 105 | """Put attention masks on hidden using hidden_features and query.""" 106 | ds = [] # Results of attention reads will be stored here. 107 | if nest.is_sequence(query): # If the query is a tuple, flatten it. 108 | query_list = nest.flatten(query) 109 | for q in query_list: # Check that ndims == 2 if specified. 110 | ndims = q.get_shape().ndims 111 | if ndims: 112 | assert ndims == 2 113 | query = array_ops.concat(query_list,1) 114 | for a in xrange(num_heads): 115 | with variable_scope.variable_scope("AttentionEncoder_%d" % a): 116 | # y with the shape (batch_size, attention_vec_size) 117 | y = linear(query, attention_vec_size, True) 118 | # y with the shape (batch_size, 1, 1, attention_vec_size) 119 | y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size]) 120 | # Attention mask is a softmax of v^T * tanh(...). 121 | # hidden_features with the shape (batch_size, attn_length, 1, attn_size) 122 | s = math_ops.reduce_sum(v[a] * math_ops.tanh(hidden_features[a] + y), [2, 3]) 123 | # a with shape (batch_size, attn_length) 124 | # a is the attention weight 125 | a = nn_ops.softmax(s) 126 | ds.append(a) 127 | return ds 128 | 129 | outputs = [] 130 | attn_weights = [] 131 | batch_attn_size = array_ops.stack([batch_size, attn_length]) 132 | attns = [array_ops.zeros(batch_attn_size, dtype=dtype) 133 | for _ in xrange(num_heads)] 134 | 135 | # i is the index of the which time step 136 | # inp is numpy.array and the shape of inp is (batch_size, n_feature) 137 | for i, inp in enumerate(encoder_inputs): 138 | if i > 0: 139 | variable_scope.get_variable_scope().reuse_variables() 140 | input_size = inp.get_shape().with_rank(2)[1] 141 | if input_size.value is None: 142 | raise ValueError("Could not infer input size from input: %s" % inp.name) 143 | 144 | # multiply attention weights with the original input 145 | # get the newly input 146 | x = attns[0]*inp 147 | # Run the BasicLSTM with the newly input 148 | cell_output, state = cell(x, state) 149 | 150 | # Run the attention mechanism. 151 | attns = attention(state) 152 | 153 | with variable_scope.variable_scope("AttnEncoderOutputProjection"): 154 | output = cell_output 155 | 156 | outputs.append(output) 157 | attn_weights.append(attns) 158 | 159 | return outputs, state, attn_weights 160 | 161 | 162 | def attention_decoder(decoder_inputs, 163 | initial_state, 164 | attention_states, 165 | cell, 166 | output_size=None, 167 | num_heads=1, 168 | loop_function=None, 169 | dtype=None, 170 | scope=None, 171 | initial_state_attention=False): 172 | """RNN decoder with attention for the sequence-to-sequence model. 173 | In this context "attention" means that, during decoding, the RNN can look up 174 | information in the additional tensor attention_states, and it does this by 175 | focusing on a few entries from the tensor. This model has proven to yield 176 | especially good results in a number of sequence-to-sequence tasks. This 177 | implementation is based on http://arxiv.org/abs/1412.7449 (see below for 178 | details). It is recommended for complex sequence-to-sequence tasks. 179 | Args: 180 | decoder_inputs: A list of 2D Tensors [batch_size x input_size]. 181 | initial_state: 2D Tensor [batch_size x cell.state_size]. 182 | attention_states: 3D Tensor [batch_size x attn_length x attn_size]. 183 | cell: tf.nn.rnn_cell.RNNCell defining the cell function and size. 184 | output_size: Size of the output vectors; if None, we use cell.output_size. 185 | num_heads: Number of attention heads that read from attention_states. 186 | loop_function: If not None, this function will be applied to i-th output 187 | in order to generate i+1-th input, and decoder_inputs will be ignored, 188 | except for the first element ("GO" symbol). This can be used for decoding, 189 | but also for training to emulate http://arxiv.org/abs/1506.03099. 190 | Signature -- loop_function(prev, i) = next 191 | * prev is a 2D Tensor of shape [batch_size x output_size], 192 | * i is an integer, the step number (when advanced control is needed), 193 | * next is a 2D Tensor of shape [batch_size x input_size]. 194 | dtype: The dtype to use for the RNN initial state (default: tf.float32). 195 | scope: VariableScope for the created subgraph; default: "attention_decoder". 196 | initial_state_attention: If False (default), initial attentions are zero. 197 | If True, initialize the attentions from the initial state and attention 198 | states -- useful when we wish to resume decoding from a previously 199 | stored decoder state and attention states. 200 | Returns: 201 | A tuple of the form (outputs, state), where: 202 | outputs: A list of the same length as decoder_inputs of 2D Tensors of 203 | shape [batch_size x output_size]. These represent the generated outputs. 204 | Output i is computed from input i (which is either the i-th element 205 | of decoder_inputs or loop_function(output {i-1}, i)) as follows. 206 | First, we run the cell on a combination of the input and previous 207 | attention masks: 208 | cell_output, new_state = cell(linear(input, prev_attn), prev_state). 209 | Then, we calculate new attention masks: 210 | new_attn = softmax(V^T * tanh(W * attention_states + U * new_state)) 211 | and then we calculate the output: 212 | output = linear(cell_output, new_attn). 213 | state: The state of each decoder cell the final time-step. 214 | It is a 2D Tensor of shape [batch_size x cell.state_size]. 215 | Raises: 216 | ValueError: when num_heads is not positive, there are no inputs, shapes 217 | of attention_states are not set, or input size cannot be inferred 218 | from the input. 219 | """ 220 | if not decoder_inputs: 221 | raise ValueError("Must provide at least 1 input to attention decoder.") 222 | if num_heads < 1: 223 | raise ValueError("With less than 1 heads, use a non-attention decoder.") 224 | if attention_states.get_shape()[2].value is None: 225 | raise ValueError("Shape[2] of attention_states must be known: %s" % 226 | attention_states.get_shape()) 227 | if output_size is None: 228 | output_size = cell.output_size 229 | 230 | with variable_scope.variable_scope( 231 | scope or "attention_decoder", dtype=dtype) as scope: 232 | dtype = scope.dtype 233 | 234 | batch_size = array_ops.shape(decoder_inputs[0])[0] # Needed for reshaping. 235 | attn_length = attention_states.get_shape()[1].value 236 | if attn_length is None: 237 | attn_length = array_ops.shape(attention_states)[1] 238 | attn_size = attention_states.get_shape()[2].value 239 | 240 | # To calculate W1 * h_t we use a 1-by-1 convolution, need to reshape before. 241 | hidden = array_ops.reshape(attention_states, 242 | [-1, attn_length, 1, attn_size]) 243 | hidden_features = [] 244 | v = [] 245 | attention_vec_size = attn_size # Size of query vectors for attention. 246 | for a in xrange(num_heads): 247 | k = variable_scope.get_variable("AttnW_%d" % a, 248 | [1, 1, attn_size, attention_vec_size]) 249 | hidden_features.append(nn_ops.conv2d(hidden, k, [1, 1, 1, 1], "SAME")) 250 | v.append( 251 | variable_scope.get_variable("AttnV_%d" % a, [attention_vec_size])) 252 | 253 | state = initial_state 254 | 255 | def attention(query): 256 | """Put attention masks on hidden using hidden_features and query.""" 257 | ds = [] # Results of attention reads will be stored here. 258 | if nest.is_sequence(query): # If the query is a tuple, flatten it. 259 | query_list = nest.flatten(query) 260 | for q in query_list: # Check that ndims == 2 if specified. 261 | ndims = q.get_shape().ndims 262 | if ndims: 263 | assert ndims == 2 264 | query = array_ops.concat(query_list, 1) 265 | for a in xrange(num_heads): 266 | with variable_scope.variable_scope("Attention_%d" % a): 267 | y = linear(query, attention_vec_size, True) 268 | y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size]) 269 | # Attention mask is a softmax of v^T * tanh(...). 270 | s = math_ops.reduce_sum(v[a] * math_ops.tanh(hidden_features[a] + y), 271 | [2, 3]) 272 | a = nn_ops.softmax(s) 273 | # Now calculate the attention-weighted vector d. 274 | d = math_ops.reduce_sum( 275 | array_ops.reshape(a, [-1, attn_length, 1, 1]) * hidden, [1, 2]) 276 | ds.append(array_ops.reshape(d, [-1, attn_size])) 277 | return ds, a 278 | 279 | outputs = [] 280 | prev = None 281 | batch_attn_size = array_ops.stack([batch_size, attn_size]) 282 | attns = [ 283 | array_ops.zeros( 284 | batch_attn_size, dtype=dtype) for _ in xrange(num_heads) 285 | ] 286 | for a in attns: # Ensure the second shape of attention vectors is set. 287 | a.set_shape([None, attn_size]) 288 | if initial_state_attention: 289 | attns, _ = attention(initial_state) 290 | for i, inp in enumerate(decoder_inputs): 291 | if i > 0: 292 | variable_scope.get_variable_scope().reuse_variables() 293 | # If loop_function is set, we use it instead of decoder_inputs. 294 | if loop_function is not None and prev is not None: 295 | with variable_scope.variable_scope("loop_function", reuse=True): 296 | inp = loop_function(prev, i) 297 | # Merge input and previous attentions into one vector of the right size. 298 | input_size = inp.get_shape().with_rank(2)[1] 299 | if input_size.value is None: 300 | raise ValueError("Could not infer input size from input: %s" % inp.name) 301 | x = linear([inp] + attns, input_size, True) 302 | # Run the RNN. 303 | cell_output, state = cell(x, state) 304 | # Run the attention mechanism. 305 | if i == 0 and initial_state_attention: 306 | with variable_scope.variable_scope( 307 | variable_scope.get_variable_scope(), reuse=True): 308 | attns, _ = attention(state) 309 | else: 310 | attns, weights = attention(state) 311 | 312 | with variable_scope.variable_scope("AttnOutputProjection"): 313 | output = linear([cell_output] + attns, output_size, True) 314 | if loop_function is not None: 315 | prev = output 316 | outputs.append(output) 317 | 318 | return outputs, state, weights 319 | -------------------------------------------------------------------------------- /univariate/Generate_stock_data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | import pandas as pd 4 | class Input_data: 5 | def __init__(self, batch_size, n_step_encoder, n_step_decoder, n_hidden_encoder, filename, n_label=None, horizon=3): 6 | self.horizon = horizon 7 | # read the data 8 | data = pd.read_csv(filename) 9 | if 'key' in data.columns: 10 | data.drop(columns=['key'], inplace=True) 11 | if 'pm2.5' in data.columns: 12 | data = data.iloc[24:, :] 13 | data = data.interpolate() 14 | # data.fillna(0, inplace=True) 15 | else: 16 | # default 17 | print('fillna 0') 18 | data.fillna(0, inplace=True) 19 | 20 | 21 | self.data = np.array(data) 22 | # self.train_day = 300 # stat from 62 ~ 70 23 | # self.val_day = 16 24 | # self.test_day = 16 25 | # minutes = 1 26 | 27 | sz = self.data.shape[0] 28 | train_size = int(sz * .6) 29 | val_size = int(sz * .8) 30 | 31 | self.train = self.data[0:train_size, :] 32 | self.val = self.data[train_size:val_size, :] 33 | self.test = self.data[val_size:, :] 34 | 35 | # parameters for the network 36 | self.batch_size = batch_size 37 | self.n_hidden_state = n_hidden_encoder 38 | self.n_step_encoder = n_step_encoder 39 | self.n_step_decoder = n_step_decoder 40 | 41 | 42 | self.n_train = len(self.train) 43 | self.n_val = len(self.val) 44 | self.n_test = len(self.test) 45 | self.n_feature = self.data.shape[1]- 1 46 | self.n_label = n_label 47 | 48 | # data normalization 49 | self.mean = np.mean(self.train,axis=0) 50 | self.stdev = np.std(self.train,axis=0) 51 | 52 | 53 | # in case the stdev=0,then we will get nan 54 | for i in range (len(self.stdev)): 55 | if self.stdev[i] < 0.00000001: 56 | self.stdev[i] = 1 57 | 58 | 59 | self.train = (self.train-self.mean)/self.stdev 60 | self.test = (self.test-self.mean)/self.stdev 61 | self.val = (self.val - self.mean)/self.stdev 62 | print(self.train.shape, self.val.shape, self.test.shape) 63 | 64 | def next_batch(self): 65 | # generate of a random index from the range [0, self.n_train -self.n_step_decoder +1] 66 | if self.n_label == 1: 67 | index = random.sample(np.arange(0,self.n_train-self.n_step_decoder-self.horizon+1),self.batch_size) 68 | # index = np.arange(0,self.n_train-self.n_step_decoder) 69 | np.random.shuffle(index) 70 | index = np.array(index) 71 | # the shape of batch_x, label, previous_y 72 | 73 | # batch_x = np.zeros([index.shape[0],self.n_step_encoder, self.n_feature]) 74 | # label = np.zeros([index.shape[0], self.n_label]) 75 | # previous_y = np.zeros([index.shape[0],self.n_step_decoder, self.n_label]) 76 | batch_x = np.zeros([self.batch_size,self.n_step_encoder, self.n_feature]) 77 | label = np.zeros([self.batch_size, self.n_label]) 78 | previous_y = np.zeros([self.batch_size,self.n_step_decoder, self.n_label]) 79 | 80 | temp = 0 81 | for item in index: 82 | batch_x[temp,:,:] = self.train[item:item+self.n_step_encoder, :self.n_feature] 83 | previous_y[temp,:,0] = self.train[item:item + self.n_step_decoder, -1] 84 | temp += 1 85 | label[:,0] = np.array(self.train[index + self.n_step_decoder + self.horizon - 1, -1]) 86 | encoder_states = np.swapaxes(batch_x, 1, 2) 87 | return batch_x, label, previous_y, encoder_states 88 | 89 | # index = random.sample(np.arange(0,self.n_train-self.n_step_decoder-self.n_label),self.batch_size) 90 | # index = np.array(index) 91 | # # the shape of batch_x, label, previous_y 92 | # batch_x = np.zeros([self.batch_size,self.n_step_encoder, self.n_feature]) 93 | # label = np.zeros([self.batch_size, self.n_label]) 94 | # previous_y = np.zeros([self.batch_size,self.n_step_decoder, 1]) 95 | # # print(batch_x.shape, label.shape, previous_y.shape) 96 | # temp = 0 97 | # for item in index: 98 | # batch_x[temp,:,:] = self.train[item:item+self.n_step_encoder, :self.n_feature] 99 | # previous_y[temp,:,0] = self.train[item:item + self.n_step_decoder, -1] 100 | # label[temp,:] = self.train[item+self.n_step_decoder: item+self.n_step_decoder + self.n_label, -1] 101 | # temp += 1 102 | # # label[:,0] = np.array(self.train[index + self.n_step_decoder, -1]) 103 | # encoder_states = np.swapaxes(batch_x, 1, 2) 104 | # return batch_x, label, previous_y, encoder_states 105 | 106 | def returnMean(self): 107 | return self.mean, self.stdev 108 | 109 | def validation(self): 110 | index = np.arange(0, self.n_val-self.n_step_decoder-self.horizon+1) 111 | index_size = len(index) 112 | val_x = np.zeros([index_size, self.n_step_encoder, self.n_feature]) 113 | val_label = np.zeros([index_size, self.n_label]) 114 | val_prev_y = np.zeros([index_size, self.n_step_decoder, self.n_label]) 115 | temp = 0 116 | for item in index: 117 | val_x[temp,:,:] = self.val[item:item + self.n_step_encoder, :self.n_feature] 118 | val_prev_y[temp,:,0] = self.val[item:item + self.n_step_decoder, -1] 119 | temp += 1 120 | 121 | val_label[:, 0] = np.array(self.val[index + self.n_step_decoder + self.horizon - 1, -1]) 122 | encoder_states_val = np.swapaxes(val_x,1,2) 123 | return val_x, val_label, val_prev_y, encoder_states_val 124 | 125 | def testing(self): 126 | if self.n_label == 1: 127 | index = np.arange(0,self.n_test-self.n_step_decoder-self.horizon+1) 128 | index_size = len(index) 129 | test_x = np.zeros([index_size, self.n_step_encoder, self.n_feature]) 130 | test_label = np.zeros([index_size, self.n_label]) 131 | test_prev_y = np.zeros([index_size, self.n_step_decoder, self.n_label]) 132 | temp = 0 133 | for item in index: 134 | test_x[temp,:,:] = self.test[item:item + self.n_step_encoder, :self.n_feature] 135 | test_prev_y[temp,:,0] = self.test[item:item + self.n_step_decoder, -1] 136 | temp += 1 137 | 138 | test_label[:, 0] = np.array(self.test[index + self.n_step_decoder + self.horizon - 1, -1]) 139 | encoder_states_test = np.swapaxes(test_x,1,2) 140 | return test_x, test_label, test_prev_y, encoder_states_test 141 | else: 142 | index = np.arange(0,self.n_test-self.n_step_decoder-self.n_label) 143 | index_size = len(index) 144 | test_x = np.zeros([index_size, self.n_step_encoder, self.n_feature]) 145 | test_label = np.zeros([index_size, self.n_label]) 146 | test_prev_y = np.zeros([index_size, self.n_step_decoder, 1]) 147 | temp = 0 148 | for item in index: 149 | test_x[temp,:,:] = self.test[item:item + self.n_step_encoder, :self.n_feature] 150 | test_prev_y[temp,:,0] = self.test[item:item + self.n_step_decoder, -1] 151 | test_label[temp, :] = np.array(self.test[item+ self.n_step_decoder:item+ self.n_step_decoder + self.n_label, -1]) 152 | temp += 1 153 | 154 | encoder_states_test = np.swapaxes(test_x,1,2) 155 | return test_x, test_label, test_prev_y, encoder_states_test 156 | -------------------------------------------------------------------------------- /univariate/example.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | # TEST is a virtualenv 4 | . ../TEST/bin/activate 5 | 6 | python2 main.py ../data/beijing/interpolate.csv bj_result.csv 7 | -------------------------------------------------------------------------------- /univariate/get_score.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import math 3 | import sys 4 | df = pd.read_csv(sys.argv[1]) 5 | 6 | for horizon in [3, 6, 12, 24]: 7 | mae, rmse = 1e9, 1e9 8 | 9 | for timestep in df.timestep.unique(): 10 | for n_hidden in df.n_hidden.unique(): 11 | tmpdf = df[(df.timestep == timestep) & (df.n_hidden == n_hidden) & (df.horizon == horizon)] 12 | if tmpdf['mae'].mean() < mae: 13 | mae = tmpdf['mae'].mean() 14 | rmse = math.sqrt(tmpdf['mse'].mean()) 15 | 16 | x = timestep 17 | y = n_hidden 18 | print('====== horizon {} ====='.format(horizon)) 19 | print('mae', mae, 'rmse', rmse) 20 | print('best params:', 'timestep', x, 'n_hidden', y) 21 | print('=======================') 22 | 23 | -------------------------------------------------------------------------------- /univariate/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 3 | import sys 4 | import tensorflow as tf 5 | import numpy as np 6 | from tensorflow.contrib.legacy_seq2seq.python.ops import seq2seq 7 | from tensorflow.contrib.rnn.python.ops import rnn 8 | # from tensorflow.contrib.rnn.python.ops import core_rnn_cell_impl as rnn_cell #omit when tf = 1.3 9 | from tensorflow.python.ops import rnn_cell_impl as rnn_cell #add when tf = 1.3 10 | import attention_encoder 11 | import Generate_stock_data as GD 12 | import pandas as pd 13 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' #Disable Tensorflow debugging message 14 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1) 15 | 16 | 17 | def mean_absolute_percentage_error(y_true, y_pred): 18 | """ 19 | Use of this metric is not recommended; for illustration only. 20 | See other regression metrics on sklearn docs: 21 | http://scikit-learn.org/stable/modules/classes.html#regression-metrics 22 | Use like any other metric 23 | >>> y_true = [3, -0.5, 2, 7]; y_pred = [2.5, -0.3, 2, 8] 24 | >>> mean_absolute_percentage_error(y_true, y_pred) 25 | Out[]: 24.791666666666668 26 | """ 27 | 28 | # y_true, y_pred = check_arrays(y_true, y_pred) 29 | 30 | ## Note: does not handle mix 1d representation 31 | #if _is_1d(y_true): 32 | # y_true, y_pred = _check_1d_array(y_true, y_pred) 33 | 34 | return np.mean(np.abs((y_true - y_pred) / y_true)) * 100 35 | 36 | from sklearn.metrics import mean_absolute_error 37 | from sklearn.metrics import mean_squared_error 38 | 39 | 40 | def RNN(encoder_input, decoder_input, weights, biases, encoder_attention_states, 41 | n_input_encoder, n_steps_encoder, n_hidden_encoder, 42 | n_input_decoder, n_steps_decoder, n_hidden_decoder): 43 | 44 | # Prepare data shape to match `rnn` function requirements 45 | # Current data input shape: (batch_size, n_steps, n_input) 46 | # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) 47 | 48 | # Prepare data for encoder 49 | # Permuting batch_size and n_steps 50 | encoder_input = tf.transpose(encoder_input, [1, 0, 2]) 51 | # Reshaping to (n_steps*batch_size, n_input) 52 | encoder_input = tf.reshape(encoder_input, [-1, n_input_encoder]) 53 | # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) 54 | encoder_input = tf.split(encoder_input, n_steps_encoder, 0) 55 | 56 | # Prepare data for decoder 57 | # Permuting batch_size and n_steps 58 | decoder_input = tf.transpose(decoder_input, [1, 0, 2]) 59 | # Reshaping to (n_steps*batch_size, n_input) 60 | decoder_input = tf.reshape(decoder_input, [-1, n_input_decoder]) 61 | # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) 62 | decoder_input = tf.split(decoder_input, n_steps_decoder,0 ) 63 | 64 | # Encoder. 65 | with tf.variable_scope('encoder') as scope: 66 | encoder_cell = rnn_cell.BasicLSTMCell(n_hidden_encoder, forget_bias=1.0) 67 | encoder_outputs, encoder_state, attn_weights = attention_encoder.attention_encoder(encoder_input, 68 | encoder_attention_states, encoder_cell) 69 | 70 | # First calculate a concatenation of encoder outputs to put attention on. 71 | top_states = [tf.reshape(e, [-1, 1, encoder_cell.output_size]) for e in encoder_outputs] 72 | attention_states = tf.concat(top_states,1) 73 | 74 | with tf.variable_scope('decoder') as scope: 75 | decoder_cell = rnn_cell.BasicLSTMCell(n_hidden_decoder, forget_bias=1.0) 76 | outputs, states = seq2seq.attention_decoder(decoder_input, encoder_state, 77 | attention_states, decoder_cell) 78 | 79 | return tf.matmul(outputs[-1], weights['out1']) + biases['out1'], attn_weights 80 | 81 | 82 | def run(timestep, n_hidden, horizon): 83 | all_pred_val = [] 84 | all_test_val = [] 85 | 86 | tf.reset_default_graph() 87 | # Parameters 88 | learning_rate = 0.001 89 | training_iters = 1000000 90 | batch_size = 128 91 | 92 | model_path = './model/' 93 | filename = sys.argv[1] 94 | 95 | df= pd.read_csv(filename) 96 | if 'spx' in sys.argv[1]: 97 | df.drop(columns=['key'], inplace=True) 98 | display_step = int(df.shape[0]*.8)//batch_size 99 | 100 | # Network Parameters 101 | # encoder parameter 102 | num_feature = df.shape[1]-1 # number of index #98 #72 103 | print(num_feature) 104 | n_input_encoder = df.shape[1]-1 # n_feature of encoder input #98 #72 105 | n_steps_encoder = timestep # time steps 106 | # n_hidden_encoder = 256 # size of hidden units 107 | n_hidden_encoder = n_hidden 108 | 109 | # decoder parameter 110 | n_input_decoder = 1 111 | n_steps_decoder = timestep 112 | # n_hidden_decoder = 256 113 | n_hidden_decoder = n_hidden 114 | n_classes = 1 # size of the decoder output 115 | 116 | # tf Graph input 117 | encoder_input = tf.placeholder("float", [None, n_steps_encoder, n_input_encoder]) 118 | decoder_input = tf.placeholder("float", [None, n_steps_decoder, n_input_decoder]) 119 | decoder_gt = tf.placeholder("float", [None, n_classes]) 120 | encoder_attention_states = tf.placeholder("float", [None, n_input_encoder, n_steps_encoder]) 121 | 122 | # Define weights 123 | weights = {'out1': tf.Variable(tf.random_normal([n_hidden_decoder, n_classes]))} 124 | biases = {'out1': tf.Variable(tf.random_normal([n_classes]))} 125 | 126 | # pred, attn_weights = RNN(encoder_input, decoder_input, weights, biases, encoder_attention_states) 127 | 128 | pred, attn_weights = RNN(encoder_input, decoder_input, weights, biases, encoder_attention_states, 129 | n_input_encoder, n_steps_encoder, n_hidden_encoder, 130 | n_input_decoder, n_steps_decoder, n_hidden_decoder) 131 | 132 | # Define loss and optimizer 133 | cost = tf.reduce_sum(tf.pow(tf.subtract(pred, decoder_gt), 2)) 134 | loss = tf.pow(tf.subtract(pred, decoder_gt), 2) 135 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) 136 | init = tf.global_variables_initializer() 137 | 138 | # save the model 139 | saver = tf.train.Saver() 140 | loss_value = [] 141 | step_value = [] 142 | loss_test=[] 143 | loss_val = [] 144 | 145 | # Launch the graph 146 | with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: 147 | 148 | sess.run(init) 149 | def get_nb_params_shape(shape): 150 | # ''' 151 | # Computes the total number of params for a given shap. 152 | # Works for any number of shapes etc [D,F] or [W,H,C] computes D*F and W*H*C. 153 | # ''' 154 | nb_params = 1 155 | for dim in shape: 156 | nb_params = nb_params*int(dim) 157 | return nb_params 158 | def count_number_trainable_params(): 159 | # ''' 160 | # Counts the number of trainable variables. 161 | # ''' 162 | tot_nb_params = 0 163 | for trainable_variable in tf.trainable_variables(): 164 | shape = trainable_variable.get_shape() # e.g [D,F] or [W,H,C] 165 | current_nb_params = get_nb_params_shape(shape) 166 | tot_nb_params = tot_nb_params + current_nb_params 167 | return tot_nb_params 168 | 169 | 170 | step = 1 171 | count = 1 172 | 173 | Data = GD.Input_data(batch_size, n_steps_encoder, n_steps_decoder, n_hidden_encoder, filename, n_classes, horizon) 174 | # Keep training until reach max iterations 175 | mn_validation_loss = 1e15 176 | while step < training_iters: 177 | # the shape of batch_x is (batch_size, n_steps, n_input) 178 | 179 | sz = Data.train.shape[0]//batch_size 180 | all_batch_x, all_batch_y, all_prev_y, all_encoder_states = Data.next_batch() 181 | for i in range(sz): 182 | 183 | batch_x = all_batch_x[batch_size*i: batch_size*(i+1), ...] 184 | batch_y = all_batch_y[batch_size*i: batch_size*(i+1), ...] 185 | prev_y = all_prev_y[batch_size*i: batch_size*(i+1), ...] 186 | encoder_states = all_encoder_states[batch_size*i: batch_size*(i+1), ...] 187 | 188 | feed_dict = {encoder_input: batch_x, decoder_gt: batch_y, decoder_input: prev_y, 189 | encoder_attention_states:encoder_states} 190 | # Run optimization op (backprop) 191 | sess.run(optimizer, feed_dict) 192 | 193 | step += 1 194 | count += 1 195 | 196 | # reduce the learning rate 197 | if count > 10000: 198 | learning_rate *= 0.1 199 | count = 0 200 | save_path = saver.save(sess, model_path + 'dual_stage_' + str(step) + '.ckpt') 201 | 202 | # display the result 203 | if True: 204 | # Calculate batch loss 205 | loss = sess.run(cost, feed_dict)/batch_size 206 | epoch = step // display_step 207 | if epoch > 50: 208 | break 209 | print "Epoch", epoch 210 | print "Iter " + str(step) + ", Minibatch Loss= " + "{:.6f}".format(loss) 211 | 212 | #store the value 213 | loss_value.append(loss) 214 | step_value.append(step) 215 | 216 | # Val 217 | val_x, val_y, val_prev_y, encoder_states_val = Data.validation() 218 | feed_dict = {encoder_input: val_x, decoder_gt: val_y, decoder_input: val_prev_y, 219 | encoder_attention_states:encoder_states_val} 220 | loss_val1 = sess.run(cost, feed_dict)/len(val_y) 221 | loss_val.append(loss_val1) 222 | print "validation loss:", loss_val1 223 | 224 | # testing 225 | test_x, test_y, test_prev_y, encoder_states_test= Data.testing() 226 | feed_dict = {encoder_input: test_x, decoder_gt: test_y, decoder_input: test_prev_y, 227 | encoder_attention_states:encoder_states_test} 228 | pred_y=sess.run(pred, feed_dict) 229 | loss_test1 = sess.run(cost, feed_dict)/len(test_y) 230 | loss_test.append(loss_test1) 231 | print "Testing loss:", loss_test1 232 | 233 | 234 | mean, stdev = Data.returnMean() 235 | # print mean 236 | # print stdev 237 | 238 | testing_result = test_y*stdev[num_feature] + mean[num_feature] 239 | pred_result = pred_y*stdev[num_feature] + mean[num_feature] 240 | 241 | all_test_val.append(str(testing_result[len(testing_result) - 1]).replace('[', '').replace(']', '')) 242 | all_pred_val.append(str(pred_result[len(pred_result) - 1]).replace('[', '').replace(']', '')) 243 | 244 | # print "testing data:" 245 | # print testing_result 246 | # print testing_result.shape 247 | 248 | # print "pred data:" 249 | # print pred_result 250 | # print pred_result.shape 251 | # from sklearn.utils import check_arrays 252 | if loss_val1 < mn_validation_loss: 253 | df = pd.DataFrame(pred_result, columns=['pred']) 254 | df.insert(loc=1, column='gt', value=testing_result) 255 | df.to_csv('gef_prediction.csv', index=False) 256 | 257 | mn_validation_loss = loss_val1 258 | mae = mean_absolute_error(testing_result, pred_result) 259 | mse = mean_squared_error(testing_result, pred_result) 260 | mape = mean_absolute_percentage_error(testing_result, pred_result) 261 | print('mae', mae) 262 | print('mse', mse) 263 | print('mape', mape) 264 | 265 | 266 | print "Optimization Finished!" 267 | f.write('{},{},{},{},{},{}\n'.format(horizon,timestep, n_hidden, mae, mse, mape)) 268 | f.flush() 269 | 270 | if __name__ == '__main__': 271 | 272 | f = open(sys.argv[2], 'a+') 273 | f.write('horizon,timestep,n_hidden,mae,mse,mape\n') 274 | run(168, 32, 24) 275 | # run(168, 32, 3) 276 | for _ in range(10): 277 | # for timestep in [3, 5, 10, 15, 25]: 278 | for n_hidden in [16]: 279 | for horizon in [1,3, 6, 12, 24]: 280 | # for n_hidden in [16]: 281 | f.flush() 282 | 283 | f.close() 284 | -------------------------------------------------------------------------------- /univariate/results/bj_result.csv: -------------------------------------------------------------------------------- 1 | horizon,timestep,n_hidden,mae,mse,mape 2 | 3,15,32,28.9092558777,2010.59081752,54.4944785453 3 | 6,15,32,39.3596231275,3402.6360833,113.728705448 4 | 12,15,32,54.0770282427,5818.48822455,159.894606243 5 | 24,15,32,61.5858071311,6988.49699706,212.885848209 6 | 3,3,16,28.3469375984,1946.30784107,53.8508153086 7 | 6,3,16,39.4829746471,3535.4238375,99.5076729951 8 | 12,3,16,54.8609118334,5783.75064674,174.637663781 9 | 24,3,16,60.682296276,7196.08764653,183.136693239 10 | 3,3,32,28.7537173818,2026.55467335,52.974133183 11 | 6,3,32,42.6455434979,3648.57953336,130.852653349 12 | 12,3,32,52.8852221972,5710.09599778,149.501886927 13 | 24,3,32,60.963785724,7356.67224687,181.227400872 14 | 3,3,64,27.3763419441,1851.87836191,58.5316718941 15 | 6,3,64,42.589341343,3833.97342744,111.476191628 16 | 12,3,64,52.7710404528,5473.74645934,161.616907107 17 | 24,3,64,62.5586738853,7188.90659735,212.138405483 18 | 3,3,128,28.8636640352,2007.24770853,63.0559996916 19 | 6,3,128,40.9322492355,3599.79833628,110.127922581 20 | 12,3,128,52.8665406986,5611.97070321,160.95873742 21 | 24,3,128,61.6643473846,7177.40946778,203.818447063 22 | 3,3,256,35.2338075388,2535.59589794,97.5730153661 23 | 6,3,256,41.5275985285,3641.04096519,112.096874557 24 | 12,3,256,93.1532437055,17026.8913553,144.413347725 25 | 24,3,256,62.2147626342,7542.27378995,188.147378961 26 | 3,5,16,27.1029599052,1906.11409109,60.7497349399 27 | 6,5,16,40.8743607819,3617.91582045,109.537301333 28 | 12,5,16,52.3518398157,5573.44055843,151.980223796 29 | 24,5,16,61.1030845594,7052.40287511,202.888986612 30 | 3,5,32,26.600965444,1795.03176935,55.3631065686 31 | 6,5,32,40.5089757337,3695.9651421,88.1186988232 32 | 12,5,32,53.4604615438,5597.62183361,165.758105715 33 | 24,5,32,60.7820258068,7206.5645918,187.908241686 34 | 3,5,64,26.983720756,1811.41965064,55.8503319305 35 | 6,5,64,40.4239431311,3623.69671799,103.047274168 36 | 12,5,64,53.7054199035,5811.29976084,154.064874352 37 | 24,5,64,62.0534088381,7301.42551898,199.500411552 38 | 3,5,128,32.1693778636,2151.25994617,89.1494451231 39 | 6,5,128,42.3791552409,3796.7144242,124.491272902 40 | 12,5,128,56.0555626773,6500.00684732,144.428003424 41 | 24,5,128,63.6646409537,7491.26633353,206.064880429 42 | 3,5,256,33.4037181062,2362.95410484,73.8344481707 43 | 6,5,256,53.1137630624,5894.78652911,148.633678141 44 | 12,5,256,60.103800692,6305.81961992,204.23822975 45 | 24,5,256,63.1760231642,7474.47515174,202.701418373 46 | 3,10,16,27.3437956485,1792.41576004,66.3627540952 47 | 6,10,16,42.4049058603,3684.7406633,127.077033441 48 | 12,10,16,55.0578875899,6006.47345549,163.900616538 49 | 24,10,16,61.2974202665,7463.07822975,177.774414355 50 | 3,10,32,27.2123194864,1784.85170698,66.4036720241 51 | 6,10,32,40.9096413275,3675.55522108,102.738425145 52 | 12,10,32,55.2573798111,5883.74647104,171.05777931 53 | 24,10,32,60.9673621177,6997.14340047,194.137059836 54 | 3,10,64,26.5585722256,1879.1589427,52.9448152226 55 | 6,10,64,42.7741548585,3692.57639861,128.826942586 56 | 12,10,64,53.4789209175,5730.34676961,154.658349075 57 | 24,10,64,61.8745378875,7254.48346008,201.141561439 58 | 3,10,128,39.0435305976,2959.6479136,92.9592493435 59 | 6,10,128,47.4820941303,4839.23396565,98.5859425873 60 | 12,10,128,52.8515211061,5831.45825128,153.893725854 61 | 24,10,128,64.0290675014,8061.07949267,196.544239307 62 | 3,10,256,68.3013033262,11854.1340628,192.666750496 63 | 6,10,256,92.5699090363,19261.2123323,309.759871305 64 | 12,10,256,92.7886787355,16736.0149364,153.046025223 65 | 24,10,256,68.3285946392,10092.890839,176.205563327 66 | 3,15,16,26.9581388313,1777.53486963,69.5080561763 67 | 6,15,16,41.2731177716,3641.62621165,114.261421073 68 | 12,15,16,52.9006948373,5444.15627054,167.863815526 69 | 24,15,16,60.6369399898,7083.23984811,192.388598128 70 | 3,15,32,29.4184901578,1889.75285301,80.5686379358 71 | 6,15,32,40.2540696558,3517.71396103,113.026288183 72 | 12,15,32,53.8639471584,5527.35727208,176.645752811 73 | 24,15,32,63.4522581817,7491.33394485,208.219877408 74 | 3,15,64,27.4929607279,1858.30641891,53.6176033336 75 | 6,15,64,41.5941402481,3671.30326066,113.225995197 76 | 12,15,64,54.3979448963,5651.88061249,175.622051979 77 | 24,15,64,62.9927606652,7221.1128868,218.15949304 78 | 3,15,128,36.3896513557,2859.76503851,87.5385084468 79 | 6,15,128,163.113689414,36130.7760229,314.028471577 80 | 12,15,128,60.3955619472,7371.66939472,108.494513316 81 | 24,15,128,95.7189277367,12152.8169817,382.680905198 82 | 3,15,256,133.588599848,47744.7053335,415.245213349 83 | 6,15,256,80.2043635983,13052.935034,169.80966077 84 | 12,15,256,195.848579379,49004.4940241,782.913380533 85 | 24,15,256,239.287112647,73859.3710875,553.663465474 86 | 3,25,16,27.7027122119,1851.84220797,63.618591747 87 | 6,25,16,40.4770537611,3594.97537212,102.471279899 88 | 12,25,16,53.0404482991,5564.64881183,164.020190996 89 | 24,25,16,61.3398275266,7094.1662842,205.458914638 90 | 3,25,32,29.3844191833,1854.38513476,85.1738543155 91 | 6,25,32,42.2756641773,3858.12042031,106.22310988 92 | 12,25,32,52.8404698399,5801.96687948,141.342102455 93 | 24,25,32,60.7870869505,7284.38734814,182.760824832 94 | 3,25,64,30.8711551955,2097.80028519,54.7452887001 95 | 6,25,64,46.6704219155,4162.41283141,145.522471685 96 | 12,25,64,54.7354950332,6026.51357441,156.410547345 97 | 24,25,64,63.8654890682,7871.91908864,190.880882224 98 | 3,25,128,53.4337893837,5349.270855,158.394147527 99 | 6,25,128,66.7866131816,8262.74458539,210.40734757 100 | 12,25,128,62.5179022781,7550.21681773,189.206276731 101 | 24,25,128,66.8439895102,10238.9082215,145.666582396 102 | 3,25,256,90.9643324363,11137.4619225,375.798084046 103 | 6,25,256,231.681172134,88278.2721002,907.829719195 104 | 12,25,256,307.289726227,141187.641991,1098.09357593 105 | 24,25,256,71.0213600654,10766.1592047,155.859687685 106 | 3,3,16,28.5692481372,1858.4039852,76.4872211135 107 | 6,3,16,40.9504682738,3562.31515297,117.131777079 108 | 12,3,16,52.7528920157,5610.91824163,154.931907593 109 | 24,3,16,60.6286738662,7040.35774368,192.460058577 110 | 3,3,32,27.6811488768,1958.59939631,60.0901652522 111 | 6,3,32,42.0115987679,3719.5071379,114.96869858 112 | 12,3,32,52.4723761877,5496.99307533,162.672231722 113 | 24,3,32,62.4831292513,7192.95125314,211.841919757 114 | 3,3,64,30.5051800809,1939.76982717,93.4293890197 115 | 6,3,64,39.3526953842,3438.42779115,109.394574657 116 | 12,3,64,54.71575024,5598.95684972,180.821293095 117 | 24,3,64,61.8737223743,7612.04496059,180.998286958 118 | 3,3,128,28.2334301069,1918.27946213,68.4029700409 119 | 6,3,128,43.9020073576,4122.07251043,102.37506888 120 | 12,3,128,54.6138076826,5643.65545724,177.667529042 121 | 24,3,128,61.2731809623,7330.80919889,184.728406539 122 | 3,3,256,38.8783466019,3303.2126208,80.1006053481 123 | 6,3,256,65.9322648262,9110.882557,176.263638013 124 | 12,3,256,56.3054908261,6379.20505822,156.744023747 125 | 24,3,256,65.9950167772,9058.44246588,183.625038441 126 | 3,5,16,27.2578738363,1807.42542951,66.9949996889 127 | 6,5,16,42.211178142,3613.70540124,127.004052029 128 | 12,5,16,53.1104052634,5685.76719422,154.748040429 129 | 24,5,16,61.0934301641,6929.38061211,206.533657278 130 | 3,5,32,27.171307102,1771.32650144,65.0221755069 131 | 6,5,32,38.4942497465,3490.58492263,90.290880829 132 | 12,5,32,54.5571039163,5944.06533501,161.459489777 133 | 24,5,32,62.0771433935,7443.35101263,191.328751341 134 | 3,5,64,28.5922871312,1943.52157602,67.1340516443 135 | 6,5,64,41.7132809681,3705.12490851,112.504985078 136 | 12,5,64,54.2672010273,5864.76474782,159.332918352 137 | 24,5,64,62.1092746772,7468.00317138,202.289590642 138 | 3,5,128,35.5162080375,2382.86656513,114.081793137 139 | 6,5,128,47.394719235,4647.48158932,125.980013704 140 | 12,5,128,56.1926259519,5835.62448404,181.327146856 141 | 24,5,128,65.1554450038,8130.62726083,194.380123333 142 | 3,5,256,37.8008127596,2924.71230231,115.050465453 143 | 6,5,256,70.9268587533,9189.30146918,228.900780047 144 | 12,5,256,60.3505882524,6697.35331921,191.849103222 145 | 24,5,256,64.7323719746,8854.32156841,204.578525479 146 | 3,10,16,26.6523832871,1780.40114334,48.7988245787 147 | 6,10,16,42.5447076107,3698.37741863,123.185377983 148 | 12,10,16,54.1005658474,5515.92285622,182.046603964 149 | 24,10,16,60.8918637284,6855.39562916,209.564431645 150 | 3,10,32,26.1912098002,1779.14832171,51.0863881049 151 | 6,10,32,40.8827617031,3548.25422986,117.636071416 152 | 12,10,32,53.0225658036,5744.23741059,147.476359733 153 | 24,10,32,61.3100725151,7202.75278923,193.767421536 154 | 3,10,64,28.6823380153,1958.69441402,69.3460559903 155 | 6,10,64,44.5410835674,3951.32352757,136.387062086 156 | 12,10,64,53.0654195234,5665.19699146,157.721644738 157 | 24,10,64,63.4687510409,7289.99094508,220.222875007 158 | 3,10,128,36.167315809,2572.36953514,108.732018702 159 | 6,10,128,47.6222173043,4627.50435271,129.389826541 160 | 12,10,128,54.8305914201,6129.16583489,141.979170019 161 | 24,10,128,66.7862200177,8955.6840929,195.652299848 162 | 3,10,256,110.238917785,20663.2895325,324.836050879 163 | 6,10,256,180.663166487,38344.4007047,728.029109939 164 | 12,10,256,65.9745434321,9903.87378873,136.436513522 165 | 24,10,256,221.705670302,223693.325459,519.256500675 166 | 3,15,16,26.8677253523,1777.30628186,55.0270204229 167 | 6,15,16,39.0491679615,3428.64125065,100.861667384 168 | 12,15,16,53.2370966211,5712.88891605,155.045853689 169 | 24,15,16,61.341737445,7191.93029371,194.772777182 170 | 3,15,32,27.1001284582,1836.8812387,51.3208735277 171 | 6,15,32,40.0695107152,3479.76580344,118.015139716 172 | 12,15,32,52.573281565,5547.75694346,154.037917228 173 | 24,15,32,61.8344979925,7100.74984476,207.453785362 174 | 3,15,64,30.3305189533,2143.44338864,69.3022094541 175 | 6,15,64,41.6209248419,3731.27640754,104.511570242 176 | 12,15,64,53.2865996347,5699.4127652,158.005250837 177 | 24,15,64,62.071619626,7251.53044549,204.269961673 178 | 3,15,128,66.2519829848,8504.11542458,196.055849741 179 | 6,15,128,47.7490195848,4253.22313313,163.652934647 180 | 12,15,128,52.3795931769,5614.77377644,148.874505741 181 | 24,15,128,63.3952388007,7876.05061214,192.259507544 182 | 3,15,256,183.42624933,64746.0296227,421.8613599 183 | 6,15,256,247.076058189,71608.6430696,548.00599393 184 | 12,15,256,70.3341501888,12795.5441155,214.432468848 185 | 24,15,256,281.670507608,101842.470278,670.584763851 186 | 3,25,16,25.8784408643,1774.65891474,50.85032671 187 | 6,25,16,39.6710905838,3432.1077846,104.510154128 188 | 12,25,16,52.4799179434,5409.94647844,172.16074673 189 | 24,25,16,60.5340014734,7110.60781689,190.026684859 190 | 3,25,32,30.9143888626,2122.83620191,77.7948968731 191 | 6,25,32,39.0531941199,3465.85657791,91.2356732014 192 | 12,25,32,53.4231480491,5717.2286504,156.722708372 193 | 24,25,32,61.8661603503,7199.08214881,201.534623837 194 | 3,25,64,30.5508642491,2114.77177946,75.804500587 195 | 6,25,64,41.6687709713,3720.87537491,108.821183877 196 | 12,25,64,53.0827897691,5799.14353019,146.631270937 197 | 24,25,64,62.4736455233,7292.45776177,205.689170954 198 | 3,25,128,62.7413375288,7287.41893985,194.868914931 199 | 6,25,128,44.8753220672,4271.79807114,113.908745007 200 | 12,25,128,72.882025958,9807.54093503,235.36707027 201 | 24,25,128,61.8005340215,7770.42284396,173.568357503 202 | 3,25,256,188.203327571,52996.5031842,310.858239776 203 | 6,25,256,98.8073539179,18750.1207978,392.745587129 204 | 12,25,256,216.128665122,59505.8576625,781.450437864 205 | 24,25,256,88.5401865745,14049.0210561,194.082004499 206 | 3,3,16,28.0299596434,2020.13494159,58.9421970546 207 | 6,3,16,40.1460110668,3674.69599929,91.8760144106 208 | 12,3,16,51.8078130516,5575.07023803,143.353088297 209 | 24,3,16,61.2331997522,7084.14212205,199.678536926 210 | 3,3,32,27.5231650538,1874.85364511,48.5886358269 211 | 6,3,32,42.1303684983,3702.38003555,117.847416935 212 | 12,3,32,54.6515329149,5667.82478383,175.935631599 213 | 24,3,32,61.7737695623,6999.65050049,210.046273196 214 | 3,3,64,28.5894835387,1803.01709645,81.3422951045 215 | 6,3,64,41.1846463094,3586.19361677,112.642997375 216 | 12,3,64,53.2606813719,5682.72898019,156.651288034 217 | 24,3,64,60.8830620172,7385.72117164,175.642733151 218 | 3,3,128,29.0191563776,2043.27627412,64.1877840025 219 | 6,3,128,41.0854038732,3672.58152326,109.945257455 220 | 12,3,128,53.6312626181,5561.05153466,170.364405277 221 | 24,3,128,61.6994729327,7185.86733562,201.743242466 222 | 3,3,256,60.1027679095,7675.4244446,155.255000781 223 | 6,3,256,59.2890295459,7444.0624848,140.985186681 224 | 12,3,256,61.0657785094,7565.32755372,167.37021352 225 | 24,3,256,62.1013622868,7155.87929684,206.798587653 226 | 3,5,16,30.8008984139,2230.96780749,62.0154760008 227 | 6,5,16,42.787934053,3890.71529656,115.537118128 228 | 12,5,16,53.5153813124,5849.44881972,148.470035991 229 | 24,5,16,60.4189150557,6917.63349794,199.836243956 230 | 3,5,32,25.8892148484,1728.70220806,48.6925732879 231 | 6,5,32,39.5764645788,3530.35528617,91.4407791739 232 | 12,5,32,52.8079997098,5564.87940286,159.34778693 233 | 24,5,32,61.8507644463,7473.30135148,187.455121682 234 | 3,5,64,28.1844312284,1832.49089057,72.8543717721 235 | 6,5,64,40.1790517739,3530.37217169,110.953364197 236 | 12,5,64,52.3738884413,5601.7887266,149.449173537 237 | 24,5,64,61.4146819798,6969.26438429,209.544520975 238 | 3,5,128,56.6251613726,6645.36718405,99.7544542873 239 | 6,5,128,43.0852902336,4065.57006539,95.6668096332 240 | 12,5,128,55.6490696622,5716.98896661,187.5700799 241 | 24,5,128,64.5493021371,7454.69796752,220.332420779 242 | 3,5,256,47.8964785974,4428.08585162,116.559835796 243 | 6,5,256,59.4978815776,7984.05597162,124.274179053 244 | 12,5,256,62.3138585928,7211.01237067,190.238813915 245 | 24,5,256,190.989056001,60956.9179589,720.112844592 246 | 3,10,16,26.4146329533,1759.67006098,53.0517273386 247 | 6,10,16,41.0339063591,3484.72283259,122.697965894 248 | 12,10,16,52.181930574,5525.90778223,151.898632255 249 | 24,10,16,59.9857047295,7084.51241062,179.535355532 250 | 3,10,32,26.1693767673,1733.31712906,66.5055686141 251 | 6,10,32,42.0302201318,3686.43701579,117.046822255 252 | 12,10,32,51.6461221094,5469.33832039,147.68019787 253 | 24,10,32,62.1677995948,6999.83673882,220.98904728 254 | 3,10,64,32.8663252068,2222.39432312,105.805095992 255 | 6,10,64,39.0772745249,3442.49960114,92.9596828932 256 | 12,10,64,51.8948817293,5467.56753483,154.820370952 257 | 24,10,64,61.9220535315,7332.76309407,195.791357208 258 | 3,10,128,81.9107017992,12051.6705215,175.194766445 259 | 6,10,128,44.247372557,4040.88139206,116.725436241 260 | 12,10,128,56.3188877158,5743.87849006,191.430027348 261 | 24,10,128,63.9938902157,8471.27304667,168.789427421 262 | 3,10,256,73.610738575,9386.39895604,228.924412183 263 | 6,10,256,128.135517115,32733.0328931,254.768952991 264 | 12,10,256,157.125436117,34495.8977941,516.737557301 265 | 24,10,256,151.751016066,33174.5894025,614.108767084 266 | 3,15,16,27.5283831992,1861.30060512,55.021031543 267 | 6,15,16,40.0062224663,3579.50930195,96.3430826401 268 | 12,15,16,53.2466554698,5408.65971316,177.056639951 269 | 24,15,16,61.2391051979,7054.19879137,204.235365842 270 | 3,15,32,26.3166459722,1847.56701131,57.5361815088 271 | 6,15,32,38.6656559594,3439.05805105,88.2114488258 272 | 12,15,32,53.8858966457,5865.79081419,151.869927005 273 | 24,15,32,61.3057130653,7188.13144964,194.557790931 274 | 3,15,64,26.6315035397,1748.13134517,67.6479171796 275 | 6,15,64,42.1667159364,3836.22831866,103.546765387 276 | 12,15,64,52.1472744178,5449.55490861,160.679669746 277 | 24,15,64,61.7050371124,7233.52205418,199.987467758 278 | 3,15,128,52.4275950578,6172.58972901,107.8900038 279 | 6,15,128,45.0034353005,4195.94356454,130.917071015 280 | 12,15,128,60.0161245319,6684.3683728,187.722571891 281 | 24,15,128,66.1410389248,8831.6716632,192.485848636 282 | 3,15,256,61.7296475691,7955.14386364,164.921688839 283 | 6,15,256,99.632523389,15128.5988846,397.848184643 284 | 12,15,256,248.96714057,73503.3898244,545.625595233 285 | 24,15,256,111.672185918,22793.2958673,194.314001339 286 | 3,25,16,27.6771580591,1824.79670554,55.7867048874 287 | 6,25,16,45.763250009,4516.32828296,105.862333762 288 | 12,25,16,52.1068328589,5593.54574404,144.175186588 289 | 24,25,16,61.5308346989,6919.00301986,211.794505747 290 | 3,25,32,29.0131664903,1821.48017945,88.6317505894 291 | 6,25,32,41.5408460921,3638.46802464,116.730839584 292 | 12,25,32,53.0416330813,5706.45510532,152.321692052 293 | 24,25,32,60.6689179984,7090.52550004,194.964664192 294 | 3,25,64,28.8776565184,1932.49541154,77.9587780789 295 | 6,25,64,42.0995790892,3666.16496971,120.364192588 296 | 12,25,64,53.3509380433,5773.94688032,151.700727949 297 | 24,25,64,62.8773657227,7398.55451229,206.978186789 298 | 3,25,128,47.9758343378,3604.25821097,181.02891789 299 | 6,25,128,72.4810791549,9831.69918042,237.663095557 300 | 12,25,128,97.6197685054,23491.9671037,280.648832216 301 | 24,25,128,126.770944616,27126.0839636,201.2650037 302 | 3,25,256,76.1686202071,10729.9110604,220.01299452 303 | 6,25,256,89.7219153475,28095.1413638,280.21167397 304 | 12,25,256,138.771237158,36651.0448912,262.318571531 305 | 24,25,256,212.916267125,61136.8414974,464.243083743 306 | 3,3,16,28.3945218856,1976.13805015,59.5833790069 307 | 6,3,16,41.1601270989,3736.90284121,103.861638209 308 | 12,3,16,53.2702819674,5684.33152814,157.141875099 309 | 24,3,16,61.0047013327,7109.87301281,193.511082597 310 | 3,3,32,27.0891039789,1832.45749155,62.6976435815 311 | 6,3,32,40.117096253,3537.09477543,103.806726951 312 | 12,3,32,54.2055414565,5833.25695021,162.143434852 313 | 24,3,32,60.8320565989,7077.35027097,194.357324571 314 | 3,3,64,28.1223561236,1852.86218567,67.872033099 315 | 6,3,64,40.27649928,3518.5000933,110.02652688 316 | 12,3,64,53.0120339246,5731.3549033,148.531086662 317 | 24,3,64,61.6791753195,7312.90699129,193.715670731 318 | 3,3,128,31.2496689519,2232.75850076,81.6146548142 319 | 6,3,128,43.7739171472,3908.76183017,125.782877181 320 | 12,3,128,54.3856197499,5729.22472023,172.646929692 321 | 24,3,128,60.9386721192,7361.74115109,178.673147857 322 | 3,3,256,80.1180487935,11472.6933137,161.600846343 323 | 6,3,256,49.5435332548,5414.56791177,104.414063463 324 | 12,3,256,67.1889635841,8883.74145193,201.003524422 325 | 24,3,256,63.9507625814,8309.32213982,178.25717353 326 | 3,5,16,28.24065003,1905.2372083,71.6568395659 327 | 6,5,16,41.5540561824,3695.1090885,112.666687963 328 | 12,5,16,52.4572373291,5604.87268725,152.77556946 329 | 24,5,16,61.7281278799,7169.10360741,202.996307574 330 | 3,5,32,27.7535263948,1826.36452858,66.2210894109 331 | 6,5,32,41.4666304249,3521.83205735,127.052116791 332 | 12,5,32,53.1888819059,5624.4828919,160.410333749 333 | 24,5,32,61.4762909826,7154.59721971,197.893431037 334 | 3,5,64,26.1978344246,1752.30325521,59.2553591762 335 | 6,5,64,40.4955303246,3505.01177439,113.29147795 336 | 12,5,64,52.5298890768,5569.69835085,154.156321103 337 | 24,5,64,62.0285389962,7497.34205316,188.511090888 338 | 3,5,128,35.3378660649,2727.59613969,93.6053218742 339 | 6,5,128,42.0794138456,3770.21192388,109.700313521 340 | 12,5,128,55.9429587818,5981.66341823,174.351397407 341 | 24,5,128,62.977794659,7172.21729294,220.221353631 342 | 3,5,256,65.1003798959,8102.24391786,189.531609866 343 | 6,5,256,58.8496761405,6403.36502266,159.204229901 344 | 12,5,256,67.3658447122,8575.66028834,195.52255053 345 | 24,5,256,68.3856947283,8786.33536847,192.521646389 346 | 3,10,16,27.4405430896,1794.91145508,63.9939406556 347 | 6,10,16,41.6675004266,3615.31588765,120.395475302 348 | 12,10,16,53.7624281394,5687.44636678,161.653251743 349 | 24,10,16,61.6691576034,7202.57379658,200.705163906 350 | 3,10,32,26.4850007618,1782.49228903,54.3007540166 351 | 3,168,32,30.7234852177,2153.7434355,91.2120885407 352 | 1,168,16,18.8302553214,778.280272338,29.8522929713 353 | 3,168,16,27.275449321,1853.28839407,53.4692836046 354 | 6,168,16,40.6152020216,3663.43791966,103.978287771 355 | 12,168,16,53.2685102669,5531.30518598,175.054431688 356 | 24,168,16,61.539156608,7016.41172316,216.706944912 357 | 1,168,32,15.8313225492,634.495595303,36.6129898969 358 | 3,168,32,29.9643555061,1989.77798373,84.5015655818 359 | 6,168,32,41.1683435255,3571.85770679,125.870927203 360 | 12,168,32,51.591004574,5523.74033746,149.739406274 361 | 24,168,32,61.7601065118,7311.82112935,199.22997728 362 | 1,168,16,13.6793385362,555.211903327,25.3415040305 363 | 3,168,16,26.9050228702,1823.6439688,55.2588439086 364 | 6,168,16,39.702877601,3629.96296635,87.6997362098 365 | 12,168,16,52.1430346888,5475.83684522,160.794529141 366 | 24,168,16,61.0124481104,7274.94914019,192.264709428 367 | 1,168,32,14.6979261062,625.567286162,31.5142834604 368 | 3,168,32,27.1280684621,1839.94579966,59.3506318656 369 | 6,168,32,40.1908538009,3583.60942726,99.4448086913 370 | 12,168,32,53.8514374122,5940.13374655,154.346114874 371 | 24,168,32,62.8637949291,7271.16183846,217.861402561 372 | 1,168,16,17.8805562282,664.874646545,59.7337409904 373 | 3,168,16,29.0571717157,1900.81396644,75.4325139898 374 | 6,168,16,41.7691387219,3584.92507593,123.947196282 375 | -------------------------------------------------------------------------------- /univariate/results/gef.csv: -------------------------------------------------------------------------------- 1 | horizon,timestep,n_hidden,mae,mse,mape 2 | 3,3,16,9.84935328431,313.384423138,14.2822468453 3 | 6,3,16,12.8353930698,528.256766351,17.3879313155 4 | 12,3,16,13.8048288179,581.702400377,19.7771929042 5 | 24,3,16,10.3895290716,364.218690737,14.6121638584 6 | 1,3,16,5.24831034335,97.7056875918,7.90380021515 7 | 3,3,16,10.1065737986,332.120639636,14.5485924532 8 | 6,3,16,13.6199973027,571.562027267,18.9293210052 9 | 12,3,16,14.4269563906,533.37157636,21.2809752997 10 | 24,3,16,9.95896995635,328.349515322,14.4111137671 11 | 1,3,32,4.54437115406,66.3805538646,7.21395083967 12 | 3,3,32,10.651330747,316.303525907,16.8127535704 13 | 6,3,32,12.7324513578,438.212782862,18.8363261014 14 | 12,3,32,12.0720943766,487.068818111,17.2698774341 15 | 24,3,32,10.096295184,335.754258946,14.0426973469 16 | 1,3,64,4.70578428617,74.2507728699,7.23379931435 17 | 3,3,64,10.1233188564,267.400944299,16.5163795866 18 | 6,3,64,18.3542943946,1153.11510143,22.576894372 19 | 12,3,64,12.0418202045,516.07580232,16.1924842226 20 | 24,3,64,10.27320468,354.268668722,14.1694989047 21 | 1,3,128,7.60059771289,143.706376248,13.1540538866 22 | 3,3,128,11.8386188105,451.127172144,16.4999597182 23 | 6,3,128,16.2058426788,901.204847993,21.0605269773 24 | 12,3,128,13.5395103936,617.359449718,18.1751328563 25 | 24,3,128,10.591549432,368.633435978,14.6345307285 26 | 1,3,256,5.30851643692,93.6380425941,8.08383181339 27 | 3,3,256,10.2090191576,288.662524742,16.2241487245 28 | 6,3,256,17.1610906604,1042.71388119,21.6378126968 29 | 12,3,256,15.0422946318,811.938110508,19.0260046801 30 | 24,3,256,12.4060050348,418.744593683,18.3881731298 31 | 1,5,16,5.83173361226,90.4493541143,9.81622249326 32 | 3,5,16,10.0284298072,247.652802594,16.7770643183 33 | 6,5,16,12.3373442981,482.399263692,17.1759172249 34 | 12,5,16,12.4316531999,440.260068061,18.2913299149 35 | 24,5,16,10.361586536,393.601937219,14.5320450565 36 | 1,5,32,5.09279233792,72.7352884645,8.34363633867 37 | 3,5,32,9.82988736059,308.669853021,14.0927199588 38 | 6,5,32,11.2066718271,326.947310695,17.1932756378 39 | 12,5,32,12.1541871467,507.892272439,17.1937342509 40 | 24,5,32,10.2287341519,363.936582307,14.1790770155 41 | 1,5,64,4.76110033419,72.405335948,7.46117234618 42 | 3,5,64,10.1586406848,357.283694626,14.0149836569 43 | 6,5,64,13.6259921895,632.736274033,17.6442546397 44 | 12,5,64,12.609242316,494.5558985,18.2700138963 45 | 24,5,64,11.3134391735,454.790833777,15.1010974049 46 | 1,5,128,7.68356588878,181.090647076,11.3944268675 47 | 3,5,128,11.1273889832,375.724063433,15.9300278992 48 | 6,5,128,13.2940101571,429.259661342,20.8430062377 49 | 12,5,128,16.7105685969,991.501176886,21.8565837144 50 | 24,5,128,11.5513023505,387.791265546,17.6291018222 51 | 1,5,256,51.4544021346,5145.42102958,87.5434535275 52 | 3,5,256,14.8071344044,685.782619574,19.7877906115 53 | 6,5,256,14.0834311506,689.940547174,18.3232571665 54 | 12,5,256,14.1616380212,561.562946291,20.0829448623 55 | 24,5,256,19.5780216709,1239.72660545,26.1940136817 56 | 1,10,16,5.93430283983,129.938388819,8.53292054297 57 | 3,10,16,9.34786986975,265.801486899,13.7503854801 58 | 6,10,16,10.3795547696,335.140714138,14.8163847009 59 | 12,10,16,10.9436364641,374.821096307,16.4925725267 60 | 24,10,16,10.947327319,462.732369389,14.2261463841 61 | 1,10,32,5.90474402138,113.767481329,8.71620830975 62 | 3,10,32,10.0882173217,331.760117289,14.1815260563 63 | 6,10,32,12.7690241883,515.823796921,17.4533872447 64 | 12,10,32,11.5791180294,443.764093738,16.6897904094 65 | 24,10,32,9.85018285141,309.650319505,14.3197933973 66 | 1,10,64,4.92786785504,79.0101040621,7.69819791076 67 | 3,10,64,11.8777414077,467.4745419,15.8133484752 68 | 6,10,64,12.657209736,483.486937928,17.5621102268 69 | 12,10,64,11.909304912,449.64690994,16.9655949531 70 | 24,10,64,10.7626774333,372.52128555,14.619945237 71 | 1,10,128,5.0558989226,76.3442881339,7.93322221977 72 | 3,10,128,12.9420224591,472.753195606,19.3374175131 73 | 6,10,128,16.5811769753,890.575757397,21.1828182856 74 | 12,10,128,16.3275661898,865.343647278,22.0561458206 75 | 24,10,128,15.3011515767,592.073097549,21.7255853587 76 | 1,10,256,36.0981416511,2823.21586139,57.0688500272 77 | 3,10,256,17.0864987838,913.048094817,22.3495165483 78 | 6,10,256,30.7728344564,2104.60190702,49.6209383097 79 | 12,10,256,19.4581270433,1208.14238084,24.8410445663 80 | 24,10,256,30.8258781014,1909.45042001,48.4187511373 81 | 1,15,16,5.68608550997,105.644261564,8.65831607341 82 | 3,15,16,9.87842355463,318.142270196,14.2869466552 83 | 6,15,16,10.2351058476,335.101384078,15.432620763 84 | 12,15,16,10.6079748983,374.289840535,14.980055721 85 | 24,15,16,9.80784746405,295.347113886,14.9280531276 86 | 1,15,32,4.72106252981,80.1224435013,6.99773831365 87 | 3,15,32,8.97908365048,247.371279325,13.6521364846 88 | 6,15,32,10.8723860651,400.068303619,15.303231295 89 | 12,15,32,12.2814022244,570.070520843,16.9473159969 90 | 24,15,32,9.79030475426,318.321018154,13.8649498522 91 | 1,15,64,5.33179035698,84.9820909789,8.31209693758 92 | 3,15,64,9.47495370875,302.138222588,13.2934472284 93 | 6,15,64,11.1211389184,390.462401358,16.4976700966 94 | 12,15,64,13.3036412128,624.945077391,17.0855236391 95 | 24,15,64,10.8178949287,420.675853161,14.3941193312 96 | 1,15,128,13.5422303066,607.016849429,18.0810253625 97 | 3,15,128,10.7695046278,369.359060293,14.9904512755 98 | 6,15,128,12.5158190652,504.862476725,17.4201455524 99 | 12,15,128,13.0774026189,578.775368639,17.7314920605 100 | 24,15,128,14.6927681592,568.86602026,20.4287079669 101 | 1,15,256,21.7857997807,1318.16036019,33.0141445494 102 | 3,15,256,25.1475763962,1665.39361843,38.4265662805 103 | 6,15,256,27.2402007716,1532.49342448,49.1687426276 104 | 12,15,256,46.2463819142,6559.46569863,70.0409480118 105 | 3,168,32,8.40669470492,203.2086492,13.1428868192 106 | 1,168,16,5.8402748507,97.6695603114,9.19022393473 107 | 3,168,16,10.4984913306,268.10787939,16.9269651362 108 | 3,168,32,8.41220203443,237.009596615,12.2110514966 109 | 1,3,16,5.22142724722,83.2554816339,8.20060618859 110 | 3,3,16,10.0472772817,342.062175677,13.8868649689 111 | 6,3,16,12.1324788924,466.039423022,17.0554914218 112 | 12,3,16,12.1463613111,503.250643164,17.3831926702 113 | 24,3,16,9.99664998882,342.842662658,14.5434631349 114 | 1,3,32,4.72875899135,77.1302914881,7.19083706323 115 | 3,3,32,10.0985324398,335.342985625,14.70028409 116 | 6,3,32,11.8486368343,392.141857716,18.2378755095 117 | 12,3,32,10.5104941481,313.564082106,16.0407934597 118 | 24,3,32,9.98700325591,289.440082148,15.6213991504 119 | 1,3,64,5.10932333837,79.0496013938,8.18479710914 120 | 3,3,64,10.3408835676,320.502091356,15.0991003748 121 | 6,3,64,12.6163072581,543.226276851,16.8942246029 122 | 12,3,64,13.7377459434,522.697303188,19.9155795731 123 | 24,3,64,9.69298880375,294.865195686,14.0987620288 124 | 1,3,128,6.18313874968,114.573062965,9.69692837886 125 | 3,3,128,10.8299148612,353.692665508,15.9053959443 126 | 6,3,128,11.9980461866,397.26042347,17.974462211 127 | 12,3,128,15.5459421828,864.893757329,19.6660184034 128 | 24,3,128,10.2307410011,309.104960008,15.3545407524 129 | 1,3,256,6.71813262403,121.657588452,11.5698069727 130 | 3,3,256,11.7017492042,458.457603765,16.1401814963 131 | 6,3,256,13.5554535185,544.545647988,20.3495362964 132 | 12,3,256,17.8917578317,1017.15090609,22.9854597631 133 | 24,3,256,12.4112924084,448.909364306,17.7177151761 134 | 1,5,16,8.0738831773,124.636398477,15.0795870812 135 | 3,5,16,9.41558145371,267.658145781,14.4730508018 136 | 6,5,16,11.7492101228,350.922668085,18.5050515712 137 | 12,5,16,11.3443517758,361.30126044,17.1045719455 138 | 24,5,16,10.1279836128,358.298569282,14.3725221644 139 | 1,5,32,5.45310433754,101.402597267,8.0347240808 140 | 3,5,32,9.23058112442,278.771864408,13.2928365444 141 | 6,5,32,12.1420124676,472.702594299,16.7907310712 142 | 12,5,32,12.4259047079,413.860299547,18.9839475703 143 | 24,5,32,10.1530908311,330.401944243,14.6349228012 144 | 1,5,64,6.46142249467,141.905493056,8.99416388013 145 | 3,5,64,9.58593648014,281.237256509,14.1723033519 146 | 6,5,64,13.2282226612,570.05687025,17.8575660882 147 | 12,5,64,11.5629003528,369.483046494,18.4537047438 148 | 24,5,64,10.1891712241,347.327276724,13.8534537513 149 | 1,5,128,7.66820961463,188.240341456,11.3707174315 150 | 3,5,128,12.5404819595,430.691450601,19.8556576612 151 | 6,5,128,12.9958573816,501.543594549,17.8661129082 152 | 12,5,128,15.5785342443,856.100893578,19.9221296473 153 | 3,168,32,8.41220203443,237.009596615,12.2110514966 154 | 1,3,16,5.22142724722,83.2554816339,8.20060618859 155 | 3,3,16,10.0472772817,342.062175677,13.8868649689 156 | 6,3,16,12.1324788924,466.039423022,17.0554914218 157 | 12,3,16,12.1463613111,503.250643164,17.3831926702 158 | 24,3,16,9.99664998882,342.842662658,14.5434631349 159 | 1,3,32,4.72875899135,77.1302914881,7.19083706323 160 | 3,3,32,10.0985324398,335.342985625,14.70028409 161 | 6,3,32,11.8486368343,392.141857716,18.2378755095 162 | 12,3,32,10.5104941481,313.564082106,16.0407934597 163 | 24,3,32,9.98700325591,289.440082148,15.6213991504 164 | 1,3,64,5.10932333837,79.0496013938,8.18479710914 165 | 3,3,64,10.3408835676,320.502091356,15.0991003748 166 | 6,3,64,12.6163072581,543.226276851,16.8942246029 167 | 12,3,64,13.7377459434,522.697303188,19.9155795731 168 | 24,3,64,9.69298880375,294.865195686,14.0987620288 169 | 1,3,128,6.18313874968,114.573062965,9.69692837886 170 | 3,3,128,10.8299148612,353.692665508,15.9053959443 171 | 6,3,128,11.9980461866,397.26042347,17.974462211 172 | 12,3,128,15.5459421828,864.893757329,19.6660184034 173 | 24,3,128,10.2307410011,309.104960008,15.3545407524 174 | 1,3,256,6.71813262403,121.657588452,11.5698069727 175 | 3,3,256,11.7017492042,458.457603765,16.1401814963 176 | 6,3,256,13.5554535185,544.545647988,20.3495362964 177 | 12,3,256,17.8917578317,1017.15090609,22.9854597631 178 | 24,3,256,12.4112924084,448.909364306,17.7177151761 179 | 1,5,16,8.0738831773,124.636398477,15.0795870812 180 | 3,5,16,9.41558145371,267.658145781,14.4730508018 181 | 6,5,16,11.7492101228,350.922668085,18.5050515712 182 | 12,5,16,11.3443517758,361.30126044,17.1045719455 183 | 24,5,16,10.1279836128,358.298569282,14.3725221644 184 | 1,5,32,5.45310433754,101.402597267,8.0347240808 185 | 3,5,32,9.23058112442,278.771864408,13.2928365444 186 | 6,5,32,12.1420124676,472.702594299,16.7907310712 187 | 12,5,32,12.4259047079,413.860299547,18.9839475703 188 | 24,5,32,10.1530908311,330.401944243,14.6349228012 189 | 1,5,64,6.46142249467,141.905493056,8.99416388013 190 | 3,5,64,9.58593648014,281.237256509,14.1723033519 191 | 6,5,64,13.2282226612,570.05687025,17.8575660882 192 | 12,5,64,11.5629003528,369.483046494,18.4537047438 193 | 24,5,64,10.1891712241,347.327276724,13.8534537513 194 | 1,5,128,7.66820961463,188.240341456,11.3707174315 195 | 3,5,128,12.5404819595,430.691450601,19.8556576612 196 | 6,5,128,12.9958573816,501.543594549,17.8661129082 197 | 12,5,128,15.5785342443,856.100893578,19.9221296473 198 | horizon,timestep,n_hidden,mae,mse,mape 199 | 1,168,16,5.67677236894,120.9180011,8.71057124541 200 | 3,168,16,10.9471625291,365.579631557,15.568187798 201 | 6,168,16,10.9548025833,497.637699501,15.2375628573 202 | -------------------------------------------------------------------------------- /univariate/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | # We disable pylint because we need python3 compatibility. 6 | from six.moves import xrange # pylint: disable=redefined-builtin 7 | # from tensorflow.contrib.rnn.python.ops import core_rnn_cell_impl #omit when tf = 1.3 8 | from tensorflow.python.ops import rnn_cell_impl 9 | from tensorflow.python.framework import dtypes 10 | from tensorflow.python.framework import ops 11 | from tensorflow.python.ops import array_ops 12 | from tensorflow.python.ops import math_ops 13 | from tensorflow.python.ops import nn_ops 14 | from tensorflow.python.ops import variable_scope 15 | from tensorflow.python.util import nest 16 | 17 | # TODO(ebrevdo): Remove once _linear is fully deprecated. 18 | # linear = core_rnn_cell_impl._linear # pylint: disable=protected-access #omit when tf = 1.3 19 | linear = rnn_cell_impl._linear #add when tf = 1.3 20 | 21 | def attention_encoder(encoder_inputs, attention_states, cell, 22 | output_size=None, num_heads=1, 23 | dtype=dtypes.float32, scope=None): 24 | 25 | """RNN encoder with attention. 26 | In this context "attention" means that, during encoding, the RNN can look up 27 | information in the additional tensor "attention_states", which is constructed by transpose the dimensions of time steps and input features of the inputs, 28 | and it does this to focus on a few features of the input. 29 | 30 | Args: 31 | encoder_inputs: A list of 2D Tensors [batch_size x n_input_encoder]. 32 | initial_state: 2D Tensor [batch_size x cell.state_size]. 33 | attention_states: 3D Tensor [batch_size x attn_length x attn_size]. 34 | cell: rnn_cell.RNNCell defining the cell function and size. 35 | output_size: Size of the output vectors; if None, we use cell.output_size. 36 | num_heads: Number of attention heads that read from attention_states. 37 | dtype: The dtype to use for the RNN initial state (default: tf.float32). 38 | scope: VariableScope for the created subgraph; default: "attention_decoder". 39 | 40 | Returns: 41 | A tuple of the form (outputs, state, attn_weights), where: 42 | outputs: A list of the encoder hidden states. Each element is a 2D Tensor of shape [batch_size x output_size]. 43 | state: The state of encoder cell at the final time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. 44 | attn_weights: A list of the input attention weights. Each element is a 2D Tensor of shape [batch_size x attn_length] 45 | Raises: 46 | ValueError: when num_heads is not positive, there are no inputs, shapes 47 | of attention_states are not set, or input size cannot be inferred 48 | from the input. 49 | """ 50 | if not encoder_inputs: 51 | raise ValueError("Must provide at least 1 input to attention encoder.") 52 | if num_heads < 1: 53 | raise ValueError("With less than 1 heads, use a non-attention encoder.") 54 | if not attention_states.get_shape()[1:2].is_fully_defined(): 55 | raise ValueError("Shape[1] and [2] of attention_states must be known: %s" 56 | % attention_states.get_shape()) 57 | if output_size is None: 58 | output_size = cell.output_size 59 | 60 | with variable_scope.variable_scope(scope or "attention_encoder"): 61 | # get the batch_size of the encoder_input 62 | batch_size = array_ops.shape(encoder_inputs[0])[0] # Needed for reshaping. 63 | # attention_state.shape (batch_size, n_input_encoder, n_steps_encoder) 64 | attn_length = attention_states.get_shape()[1].value # n_input_encoder 65 | attn_size = attention_states.get_shape()[2].value # n_steps_encoder 66 | 67 | # To calculate W1 * h_t we use a 1-by-1 convolution, need to reshape before. 68 | # hidden_features shape: (batch_size, attn_length, 1, attn_size) 69 | hidden = array_ops.reshape( 70 | attention_states, [-1, attn_length, 1, attn_size]) 71 | hidden_features = [] 72 | v = [] 73 | attention_vec_size = attn_size # Size of query vectors for attention. 74 | for a in xrange(num_heads): 75 | k = variable_scope.get_variable("Attn_EncoderW_%d" % a, 76 | [1, 1, attn_size, attention_vec_size]) 77 | hidden_features.append(nn_ops.conv2d(hidden, k, [1, 1, 1, 1], "SAME")) 78 | v.append(variable_scope.get_variable("AttnEncoderV_%d" % a, 79 | [attention_vec_size])) 80 | # how to get the initial_state 81 | initial_state_size = array_ops.stack([batch_size, output_size]) 82 | initial_state = [array_ops.zeros(initial_state_size,dtype=dtype) for _ in xrange(2)] 83 | state = initial_state 84 | 85 | def attention(query): 86 | """Put attention masks on hidden using hidden_features and query.""" 87 | ds = [] # Results of attention reads will be stored here. 88 | if nest.is_sequence(query): # If the query is a tuple, flatten it. 89 | query_list = nest.flatten(query) 90 | for q in query_list: # Check that ndims == 2 if specified. 91 | ndims = q.get_shape().ndims 92 | if ndims: 93 | assert ndims == 2 94 | query = array_ops.concat(query_list,1) 95 | for a in xrange(num_heads): 96 | with variable_scope.variable_scope("AttentionEncoder_%d" % a): 97 | # y with the shape (batch_size, attention_vec_size) 98 | y = linear(query, attention_vec_size, True) 99 | # y with the shape (batch_size, 1, 1, attention_vec_size) 100 | y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size]) 101 | # Attention mask is a softmax of v^T * tanh(...). 102 | # hidden_features with the shape (batch_size, attn_length, 1, attn_size) 103 | s = math_ops.reduce_sum(v[a] * math_ops.tanh(hidden_features[a] + y), [2, 3]) 104 | # a with shape (batch_size, attn_length) 105 | # a is the attention weight 106 | a = nn_ops.softmax(s) 107 | ds.append(a) 108 | return ds 109 | 110 | outputs = [] 111 | attn_weights = [] 112 | batch_attn_size = array_ops.stack([batch_size, attn_length]) 113 | attns = [array_ops.zeros(batch_attn_size, dtype=dtype) 114 | for _ in xrange(num_heads)] 115 | 116 | # i is the index of the which time step 117 | # inp is numpy.array and the shape of inp is (batch_size, n_feature) 118 | for i, inp in enumerate(encoder_inputs): 119 | if i > 0: 120 | variable_scope.get_variable_scope().reuse_variables() 121 | input_size = inp.get_shape().with_rank(2)[1] 122 | if input_size.value is None: 123 | raise ValueError("Could not infer input size from input: %s" % inp.name) 124 | 125 | # multiply attention weights with the original input 126 | # get the newly input 127 | x = attns[0]*inp 128 | # Run the BasicLSTM with the newly input 129 | cell_output, state = cell(x, state) 130 | 131 | # Run the attention mechanism. 132 | attns = attention(state) 133 | 134 | with variable_scope.variable_scope("AttnEncoderOutputProjection"): 135 | output = cell_output 136 | 137 | outputs.append(output) 138 | attn_weights.append(attns) 139 | 140 | return outputs, state, attn_weights 141 | 142 | def attention_decoder(decoder_inputs, 143 | initial_state, 144 | attention_states, 145 | cell, 146 | output_size=None, 147 | num_heads=1, 148 | loop_function=None, 149 | dtype=None, 150 | scope=None, 151 | initial_state_attention=False): 152 | """RNN decoder with attention for the sequence-to-sequence model. 153 | In this context "attention" means that, during decoding, the RNN can look up 154 | information in the additional tensor attention_states, and it does this by 155 | focusing on a few entries from the tensor. This model has proven to yield 156 | especially good results in a number of sequence-to-sequence tasks. This 157 | implementation is based on http://arxiv.org/abs/1412.7449 (see below for 158 | details). It is recommended for complex sequence-to-sequence tasks. 159 | Args: 160 | decoder_inputs: A list of 2D Tensors [batch_size x input_size]. 161 | initial_state: 2D Tensor [batch_size x cell.state_size]. 162 | attention_states: 3D Tensor [batch_size x attn_length x attn_size]. 163 | cell: tf.nn.rnn_cell.RNNCell defining the cell function and size. 164 | output_size: Size of the output vectors; if None, we use cell.output_size. 165 | num_heads: Number of attention heads that read from attention_states. 166 | loop_function: If not None, this function will be applied to i-th output 167 | in order to generate i+1-th input, and decoder_inputs will be ignored, 168 | except for the first element ("GO" symbol). This can be used for decoding, 169 | but also for training to emulate http://arxiv.org/abs/1506.03099. 170 | Signature -- loop_function(prev, i) = next 171 | * prev is a 2D Tensor of shape [batch_size x output_size], 172 | * i is an integer, the step number (when advanced control is needed), 173 | * next is a 2D Tensor of shape [batch_size x input_size]. 174 | dtype: The dtype to use for the RNN initial state (default: tf.float32). 175 | scope: VariableScope for the created subgraph; default: "attention_decoder". 176 | initial_state_attention: If False (default), initial attentions are zero. 177 | If True, initialize the attentions from the initial state and attention 178 | states -- useful when we wish to resume decoding from a previously 179 | stored decoder state and attention states. 180 | Returns: 181 | A tuple of the form (outputs, state), where: 182 | outputs: A list of the same length as decoder_inputs of 2D Tensors of 183 | shape [batch_size x output_size]. These represent the generated outputs. 184 | Output i is computed from input i (which is either the i-th element 185 | of decoder_inputs or loop_function(output {i-1}, i)) as follows. 186 | First, we run the cell on a combination of the input and previous 187 | attention masks: 188 | cell_output, new_state = cell(linear(input, prev_attn), prev_state). 189 | Then, we calculate new attention masks: 190 | new_attn = softmax(V^T * tanh(W * attention_states + U * new_state)) 191 | and then we calculate the output: 192 | output = linear(cell_output, new_attn). 193 | state: The state of each decoder cell the final time-step. 194 | It is a 2D Tensor of shape [batch_size x cell.state_size]. 195 | Raises: 196 | ValueError: when num_heads is not positive, there are no inputs, shapes 197 | of attention_states are not set, or input size cannot be inferred 198 | from the input. 199 | """ 200 | if not decoder_inputs: 201 | raise ValueError("Must provide at least 1 input to attention decoder.") 202 | if num_heads < 1: 203 | raise ValueError("With less than 1 heads, use a non-attention decoder.") 204 | if attention_states.get_shape()[2].value is None: 205 | raise ValueError("Shape[2] of attention_states must be known: %s" % 206 | attention_states.get_shape()) 207 | if output_size is None: 208 | output_size = cell.output_size 209 | 210 | with variable_scope.variable_scope( 211 | scope or "attention_decoder", dtype=dtype) as scope: 212 | dtype = scope.dtype 213 | 214 | batch_size = array_ops.shape(decoder_inputs[0])[0] # Needed for reshaping. 215 | attn_length = attention_states.get_shape()[1].value 216 | if attn_length is None: 217 | attn_length = array_ops.shape(attention_states)[1] 218 | attn_size = attention_states.get_shape()[2].value 219 | 220 | # To calculate W1 * h_t we use a 1-by-1 convolution, need to reshape before. 221 | hidden = array_ops.reshape(attention_states, 222 | [-1, attn_length, 1, attn_size]) 223 | hidden_features = [] 224 | v = [] 225 | attention_vec_size = attn_size # Size of query vectors for attention. 226 | for a in xrange(num_heads): 227 | k = variable_scope.get_variable( 228 | "AttnW_%d" % a, [1, 1, attn_size, attention_vec_size], 229 | dtype=dtype) 230 | hidden_features.append(nn_ops.conv2d(hidden, k, [1, 1, 1, 1], "SAME")) 231 | v.append( 232 | variable_scope.get_variable( 233 | "AttnV_%d" % a, [attention_vec_size], dtype=dtype)) 234 | 235 | state = initial_state 236 | 237 | def attention(query): 238 | """Put attention masks on hidden using hidden_features and query.""" 239 | ds = [] # Results of attention reads will be stored here. 240 | if nest.is_sequence(query): # If the query is a tuple, flatten it. 241 | query_list = nest.flatten(query) 242 | for q in query_list: # Check that ndims == 2 if specified. 243 | ndims = q.get_shape().ndims 244 | if ndims: 245 | assert ndims == 2 246 | query = array_ops.concat(query_list, 1) 247 | for a in xrange(num_heads): 248 | with variable_scope.variable_scope("Attention_%d" % a): 249 | y = Linear(query, attention_vec_size, True)(query) 250 | y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size]) 251 | y = math_ops.cast(y, dtype) 252 | # Attention mask is a softmax of v^T * tanh(...). 253 | s = math_ops.reduce_sum(v[a] * math_ops.tanh(hidden_features[a] + y), 254 | [2, 3]) 255 | a = nn_ops.softmax(math_ops.cast(s, dtype=dtypes.float32)) 256 | # Now calculate the attention-weighted vector d. 257 | a = math_ops.cast(a, dtype) 258 | d = math_ops.reduce_sum( 259 | array_ops.reshape(a, [-1, attn_length, 1, 1]) * hidden, [1, 2]) 260 | ds.append(array_ops.reshape(d, [-1, attn_size])) 261 | return ds 262 | 263 | outputs = [] 264 | prev = None 265 | batch_attn_size = array_ops.stack([batch_size, attn_size]) 266 | attns = [ 267 | array_ops.zeros( 268 | batch_attn_size, dtype=dtype) for _ in xrange(num_heads) 269 | ] 270 | for a in attns: # Ensure the second shape of attention vectors is set. 271 | a.set_shape([None, attn_size]) 272 | if initial_state_attention: 273 | attns = attention(initial_state) 274 | for i, inp in enumerate(decoder_inputs): 275 | if i > 0: 276 | variable_scope.get_variable_scope().reuse_variables() 277 | # If loop_function is set, we use it instead of decoder_inputs. 278 | if loop_function is not None and prev is not None: 279 | with variable_scope.variable_scope("loop_function", reuse=True): 280 | inp = loop_function(prev, i) 281 | # Merge input and previous attentions into one vector of the right size. 282 | input_size = inp.get_shape().with_rank(2)[1] 283 | if input_size.value is None: 284 | raise ValueError("Could not infer input size from input: %s" % inp.name) 285 | 286 | inputs = [inp] + attns 287 | inputs = [math_ops.cast(e, dtype) for e in inputs] 288 | x = Linear(inputs, input_size, True)(inputs) 289 | # Run the RNN. 290 | cell_output, state = cell(x, state) 291 | # Run the attention mechanism. 292 | if i == 0 and initial_state_attention: 293 | with variable_scope.variable_scope( 294 | variable_scope.get_variable_scope(), reuse=True): 295 | attns = attention(state) 296 | else: 297 | attns = attention(state) 298 | 299 | with variable_scope.variable_scope("AttnOutputProjection"): 300 | cell_output = math_ops.cast(cell_output, dtype) 301 | inputs = [cell_output] + attns 302 | output = Linear(inputs, output_size, True)(inputs) 303 | if loop_function is not None: 304 | prev = output 305 | outputs.append(output) 306 | 307 | return outputs, state 308 | 309 | 310 | def attention_decoder(decoder_inputs, 311 | initial_state, 312 | attention_states, 313 | cell, 314 | output_size=None, 315 | num_heads=1, 316 | loop_function=None, 317 | dtype=None, 318 | scope=None, 319 | initial_state_attention=False): 320 | """RNN decoder with attention for the sequence-to-sequence model. 321 | In this context "attention" means that, during decoding, the RNN can look up 322 | information in the additional tensor attention_states, and it does this by 323 | focusing on a few entries from the tensor. This model has proven to yield 324 | especially good results in a number of sequence-to-sequence tasks. This 325 | implementation is based on http://arxiv.org/abs/1412.7449 (see below for 326 | details). It is recommended for complex sequence-to-sequence tasks. 327 | Args: 328 | decoder_inputs: A list of 2D Tensors [batch_size x input_size]. 329 | initial_state: 2D Tensor [batch_size x cell.state_size]. 330 | attention_states: 3D Tensor [batch_size x attn_length x attn_size]. 331 | cell: tf.nn.rnn_cell.RNNCell defining the cell function and size. 332 | output_size: Size of the output vectors; if None, we use cell.output_size. 333 | num_heads: Number of attention heads that read from attention_states. 334 | loop_function: If not None, this function will be applied to i-th output 335 | in order to generate i+1-th input, and decoder_inputs will be ignored, 336 | except for the first element ("GO" symbol). This can be used for decoding, 337 | but also for training to emulate http://arxiv.org/abs/1506.03099. 338 | Signature -- loop_function(prev, i) = next 339 | * prev is a 2D Tensor of shape [batch_size x output_size], 340 | * i is an integer, the step number (when advanced control is needed), 341 | * next is a 2D Tensor of shape [batch_size x input_size]. 342 | dtype: The dtype to use for the RNN initial state (default: tf.float32). 343 | scope: VariableScope for the created subgraph; default: "attention_decoder". 344 | initial_state_attention: If False (default), initial attentions are zero. 345 | If True, initialize the attentions from the initial state and attention 346 | states -- useful when we wish to resume decoding from a previously 347 | stored decoder state and attention states. 348 | Returns: 349 | A tuple of the form (outputs, state), where: 350 | outputs: A list of the same length as decoder_inputs of 2D Tensors of 351 | shape [batch_size x output_size]. These represent the generated outputs. 352 | Output i is computed from input i (which is either the i-th element 353 | of decoder_inputs or loop_function(output {i-1}, i)) as follows. 354 | First, we run the cell on a combination of the input and previous 355 | attention masks: 356 | cell_output, new_state = cell(linear(input, prev_attn), prev_state). 357 | Then, we calculate new attention masks: 358 | new_attn = softmax(V^T * tanh(W * attention_states + U * new_state)) 359 | and then we calculate the output: 360 | output = linear(cell_output, new_attn). 361 | state: The state of each decoder cell the final time-step. 362 | It is a 2D Tensor of shape [batch_size x cell.state_size]. 363 | Raises: 364 | ValueError: when num_heads is not positive, there are no inputs, shapes 365 | of attention_states are not set, or input size cannot be inferred 366 | from the input. 367 | """ 368 | if not decoder_inputs: 369 | raise ValueError("Must provide at least 1 input to attention decoder.") 370 | if num_heads < 1: 371 | raise ValueError("With less than 1 heads, use a non-attention decoder.") 372 | if attention_states.get_shape()[2].value is None: 373 | raise ValueError("Shape[2] of attention_states must be known: %s" % 374 | attention_states.get_shape()) 375 | if output_size is None: 376 | output_size = cell.output_size 377 | 378 | with variable_scope.variable_scope( 379 | scope or "attention_decoder", dtype=dtype) as scope: 380 | dtype = scope.dtype 381 | 382 | batch_size = array_ops.shape(decoder_inputs[0])[0] # Needed for reshaping. 383 | attn_length = attention_states.get_shape()[1].value 384 | if attn_length is None: 385 | attn_length = array_ops.shape(attention_states)[1] 386 | attn_size = attention_states.get_shape()[2].value 387 | 388 | # To calculate W1 * h_t we use a 1-by-1 convolution, need to reshape before. 389 | hidden = array_ops.reshape(attention_states, 390 | [-1, attn_length, 1, attn_size]) 391 | hidden_features = [] 392 | v = [] 393 | attention_vec_size = attn_size # Size of query vectors for attention. 394 | for a in xrange(num_heads): 395 | k = variable_scope.get_variable( 396 | "AttnW_%d" % a, [1, 1, attn_size, attention_vec_size], 397 | dtype=dtype) 398 | hidden_features.append(nn_ops.conv2d(hidden, k, [1, 1, 1, 1], "SAME")) 399 | v.append( 400 | variable_scope.get_variable( 401 | "AttnV_%d" % a, [attention_vec_size], dtype=dtype)) 402 | 403 | state = initial_state 404 | 405 | def attention(query): 406 | """Put attention masks on hidden using hidden_features and query.""" 407 | ds = [] # Results of attention reads will be stored here. 408 | if nest.is_sequence(query): # If the query is a tuple, flatten it. 409 | query_list = nest.flatten(query) 410 | for q in query_list: # Check that ndims == 2 if specified. 411 | ndims = q.get_shape().ndims 412 | if ndims: 413 | assert ndims == 2 414 | query = array_ops.concat(query_list, 1) 415 | for a in xrange(num_heads): 416 | with variable_scope.variable_scope("Attention_%d" % a): 417 | y = Linear(query, attention_vec_size, True)(query) 418 | y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size]) 419 | y = math_ops.cast(y, dtype) 420 | # Attention mask is a softmax of v^T * tanh(...). 421 | s = math_ops.reduce_sum(v[a] * math_ops.tanh(hidden_features[a] + y), 422 | [2, 3]) 423 | a = nn_ops.softmax(math_ops.cast(s, dtype=dtypes.float32)) 424 | # Now calculate the attention-weighted vector d. 425 | a = math_ops.cast(a, dtype) 426 | d = math_ops.reduce_sum( 427 | array_ops.reshape(a, [-1, attn_length, 1, 1]) * hidden, [1, 2]) 428 | ds.append(array_ops.reshape(d, [-1, attn_size])) 429 | return ds 430 | 431 | outputs = [] 432 | prev = None 433 | batch_attn_size = array_ops.stack([batch_size, attn_size]) 434 | attns = [ 435 | array_ops.zeros( 436 | batch_attn_size, dtype=dtype) for _ in xrange(num_heads) 437 | ] 438 | for a in attns: # Ensure the second shape of attention vectors is set. 439 | a.set_shape([None, attn_size]) 440 | if initial_state_attention: 441 | attns = attention(initial_state) 442 | for i, inp in enumerate(decoder_inputs): 443 | if i > 0: 444 | variable_scope.get_variable_scope().reuse_variables() 445 | # If loop_function is set, we use it instead of decoder_inputs. 446 | if loop_function is not None and prev is not None: 447 | with variable_scope.variable_scope("loop_function", reuse=True): 448 | inp = loop_function(prev, i) 449 | # Merge input and previous attentions into one vector of the right size. 450 | input_size = inp.get_shape().with_rank(2)[1] 451 | if input_size.value is None: 452 | raise ValueError("Could not infer input size from input: %s" % inp.name) 453 | 454 | inputs = [inp] + attns 455 | inputs = [math_ops.cast(e, dtype) for e in inputs] 456 | x = Linear(inputs, input_size, True)(inputs) 457 | # Run the RNN. 458 | cell_output, state = cell(x, state) 459 | # Run the attention mechanism. 460 | if i == 0 and initial_state_attention: 461 | with variable_scope.variable_scope( 462 | variable_scope.get_variable_scope(), reuse=True): 463 | attns = attention(state) 464 | else: 465 | attns = attention(state) 466 | 467 | with variable_scope.variable_scope("AttnOutputProjection"): 468 | cell_output = math_ops.cast(cell_output, dtype) 469 | inputs = [cell_output] + attns 470 | output = Linear(inputs, output_size, True)(inputs) 471 | if loop_function is not None: 472 | prev = output 473 | outputs.append(output) 474 | 475 | return outputs, state 476 | --------------------------------------------------------------------------------