├── .gitignore ├── README.md ├── __init__.py ├── example.py ├── main.py └── source ├── EncoderDecoderAttention.py ├── LSTM.py └── __init__.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/* 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AttentionLSTM 2 | Implement attention model to LSTM using TensorFlow 3 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/windg/AttentionLSTM/79a4294b09793f57bff64fc9fedcaeeeaff2e239/__init__.py -------------------------------------------------------------------------------- /example.py: -------------------------------------------------------------------------------- 1 | from random import randint 2 | from numpy import array 3 | from numpy import argmax 4 | from numpy import zeros 5 | import tensorflow as tf 6 | from source.LSTM import LSTM 7 | from source.LSTM import LSTMAttention 8 | from source.EncoderDecoderAttention import EDA 9 | import matplotlib.pyplot as plt 10 | 11 | # generate a sequence of random integers 12 | def generate_sequence(length, n_unique): 13 | return [randint(0, n_unique - 1) for _ in range(length)] 14 | 15 | 16 | # one hot encode sequence 17 | def one_hot_encode(sequence, n_unique): 18 | encoding = list() 19 | for value in sequence: 20 | vector = [0 for _ in range(n_unique)] 21 | vector[value] = 1 22 | encoding.append(vector) 23 | return array(encoding) 24 | 25 | 26 | # decode a one hot encoded string 27 | def one_hot_decode(encoded_seq): 28 | return [argmax(vector) for vector in encoded_seq] 29 | 30 | 31 | # prepare data for the LSTM 32 | def get_pair(batch_size, n_in, n_out, cardinality): 33 | # generate random sequence 34 | Xout = zeros([batch_size,n_in,cardinality]) 35 | 36 | yout = zeros([batch_size,n_in,cardinality]) 37 | for i in range(batch_size): 38 | sequence_in = generate_sequence(n_in, cardinality) 39 | sequence_out = sequence_in[:n_out] + [0 for _ in range(n_in - n_out)] 40 | # one hot encode 41 | X = one_hot_encode(sequence_in, cardinality) 42 | y = one_hot_encode(sequence_out, cardinality) 43 | # reshape as 3D 44 | Xout[i] = X.reshape((1, X.shape[0], X.shape[1])) 45 | yout[i] = y.reshape((1, y.shape[0], y.shape[1])) 46 | return Xout, yout 47 | 48 | 49 | # configure problem 50 | n_features = 50 51 | n_timesteps_in = 5 52 | n_timesteps_out = 2 53 | n_cell = 150 54 | batch_size = 1 55 | truncated_backprop_length = 15 56 | num_epochs = 5000 57 | # x,y = get_pair(batch_size, n_timesteps_in,n_timesteps_out, n_features) 58 | 59 | # define model 60 | batchX_placeholder = tf.placeholder(tf.float32, [batch_size, n_timesteps_in, n_features]) 61 | # print(batchX_placeholder) 62 | batchY_placeholder = tf.placeholder(tf.int32, [batch_size, n_timesteps_in, n_features]) 63 | 64 | mdl = EDA(input=batchX_placeholder,encoder_ncell=n_cell, decoder_ncell=n_cell) 65 | loss = tf.losses.softmax_cross_entropy(batchY_placeholder, logits=mdl.output) 66 | total_loss = tf.reduce_mean(loss) 67 | train_step = tf.train.AdagradDAOptimizer(learning_rate=0.3, global_step=array(1,dtype='int64')).minimize(loss) 68 | 69 | def plot(loss_list): 70 | 71 | plt.cla() 72 | plt.plot(loss_list) 73 | 74 | plt.draw() 75 | plt.pause(0.0001) 76 | 77 | 78 | 79 | # 80 | with tf.Session() as sess: 81 | sess.run(tf.global_variables_initializer()) 82 | plt.ion() 83 | plt.figure() 84 | plt.show() 85 | loss_list = [] 86 | 87 | for epoch_idx in range(num_epochs): 88 | x, y = get_pair(batch_size, n_timesteps_in,n_timesteps_out, n_features) 89 | 90 | _current_state = zeros((batch_size, n_cell)) 91 | 92 | print("New data, epoch", epoch_idx) 93 | _total_loss, _train_step = sess.run( 94 | [loss, train_step], 95 | feed_dict={ 96 | batchX_placeholder: x, 97 | batchY_placeholder: y 98 | }) 99 | loss_list.append(_total_loss) 100 | 101 | plot(loss_list) 102 | 103 | 104 | plt.ioff() 105 | plt.show() 106 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from random import randint 2 | from numpy import array 3 | from numpy import argmax 4 | from numpy import zeros 5 | import tensorflow as tf 6 | from source.LSTM import LSTM 7 | from source.LSTM import LSTMAttention 8 | import matplotlib.pyplot as plt 9 | 10 | # from keras import Sequential 11 | # from keras.layers import LSTM 12 | 13 | # generate a sequence of random integers 14 | def generate_sequence(length, n_unique): 15 | return [randint(0, n_unique - 1) for _ in range(length)] 16 | 17 | 18 | # one hot encode sequence 19 | def one_hot_encode(sequence, n_unique): 20 | encoding = list() 21 | for value in sequence: 22 | vector = [0 for _ in range(n_unique)] 23 | vector[value] = 1 24 | encoding.append(vector) 25 | return array(encoding) 26 | 27 | 28 | # decode a one hot encoded string 29 | def one_hot_decode(encoded_seq): 30 | return [argmax(vector) for vector in encoded_seq] 31 | 32 | 33 | # prepare data for the LSTM 34 | def get_pair(batch_size, n_in, n_out, cardinality): 35 | # generate random sequence 36 | Xout = zeros([batch_size,n_in,cardinality]) 37 | 38 | yout = zeros([batch_size,n_in,cardinality]) 39 | for i in range(batch_size): 40 | sequence_in = generate_sequence(n_in, cardinality) 41 | sequence_out = sequence_in[:n_out] + [0 for _ in range(n_in - n_out)] 42 | # one hot encode 43 | X = one_hot_encode(sequence_in, cardinality) 44 | y = one_hot_encode(sequence_out, cardinality) 45 | # reshape as 3D 46 | Xout[i] = X.reshape((1, X.shape[0], X.shape[1])) 47 | yout[i] = y.reshape((1, y.shape[0], y.shape[1])) 48 | return Xout, yout 49 | 50 | 51 | # configure problem 52 | n_features = 50 53 | n_timesteps_in = 5 54 | n_timesteps_out = 2 55 | n_cell = 150 56 | batch_size = 1 57 | truncated_backprop_length = 15 58 | num_epochs = 5000 59 | # x,y = get_pair(batch_size, n_timesteps_in,n_timesteps_out, n_features) 60 | 61 | # define model 62 | batchX_placeholder = tf.placeholder(tf.float32, [batch_size, n_timesteps_in, n_features]) 63 | # print(batchX_placeholder) 64 | batchY_placeholder = tf.placeholder(tf.int32, [batch_size, n_timesteps_in, n_features]) 65 | Encoder = LSTM(batchX_placeholder, n_cell) 66 | Decoder = LSTMAttention(Encoder, n_cell) 67 | logits = tf.layers.dense(inputs=Decoder, units=n_features, activation=tf.nn.softmax) 68 | print(logits) 69 | # tf.losses.softmax_cross_entropy() 70 | loss = tf.losses.softmax_cross_entropy(batchY_placeholder, logits=logits) 71 | total_loss = tf.reduce_mean(loss) 72 | train_step = tf.train.AdagradDAOptimizer(learning_rate=0.3, global_step=array(1,dtype='int64')).minimize(loss) 73 | 74 | def plot(loss_list): 75 | 76 | plt.cla() 77 | plt.plot(loss_list) 78 | 79 | plt.draw() 80 | plt.pause(0.0001) 81 | 82 | 83 | 84 | # 85 | with tf.Session() as sess: 86 | sess.run(tf.global_variables_initializer()) 87 | plt.ion() 88 | plt.figure() 89 | plt.show() 90 | loss_list = [] 91 | 92 | for epoch_idx in range(num_epochs): 93 | x, y = get_pair(batch_size, n_timesteps_in,n_timesteps_out, n_features) 94 | 95 | _current_state = zeros((batch_size, n_cell)) 96 | 97 | print("New data, epoch", epoch_idx) 98 | _total_loss, _train_step = sess.run( 99 | [loss, train_step], 100 | feed_dict={ 101 | batchX_placeholder: x, 102 | batchY_placeholder: y 103 | }) 104 | loss_list.append(_total_loss) 105 | 106 | plot(loss_list) 107 | 108 | 109 | plt.ioff() 110 | plt.show() 111 | # train LSTM 112 | # for epoch in range(5000): 113 | # # generate new random sequence 114 | # X, y = get_pair(n_timesteps_in, n_timesteps_out, n_features) 115 | # # fit model for one epoch on this sequence 116 | # model.fit(X, y, epochs=1, verbose=2) 117 | # # evaluate LSTM 118 | # total, correct = 100, 0 119 | # for _ in range(total): 120 | # X, y = get_pair(n_timesteps_in, n_timesteps_out, n_features) 121 | # yhat = model.predict(X, verbose=0) 122 | # if array_equal(one_hot_decode(y[0]), one_hot_decode(yhat[0])): 123 | # correct += 1 124 | # print('Accuracy: %.2f%%' % (float(correct) / float(total) * 100.0)) 125 | # # spot check some examples 126 | # for _ in range(10): 127 | # X, y = get_pair(n_timesteps_in, n_timesteps_out, n_features) 128 | # yhat = model.predict(X, verbose=0) 129 | # print('Expected:', one_hot_decode(y[0]), 'Predicted', one_hot_decode(yhat[0])) -------------------------------------------------------------------------------- /source/EncoderDecoderAttention.py: -------------------------------------------------------------------------------- 1 | # http://colah.github.io/posts/2015-08-Understanding-LSTMs/ 2 | from __future__ import print_function, division 3 | import numpy as np 4 | import tensorflow as tf 5 | import matplotlib.pyplot as plt 6 | from random import randint 7 | from numpy import array 8 | from numpy import argmax 9 | 10 | class EDA: 11 | def __init__(self, input, encoder_ncell, decoder_ncell, plot=False): 12 | input 13 | self.batchsize, self.timesteps, self.input_dim = input.get_shape().as_list() 14 | Encoder = self._Encoder(input, encoder_ncell) 15 | Decoder = self._Decoder(Encoder, decoder_ncell) 16 | self.output = tf.layers.dense(inputs=Decoder, units=self.input_dim, activation=tf.nn.softmax) 17 | 18 | def plot(loss_list): 19 | 20 | plt.cla() 21 | plt.plot(loss_list) 22 | 23 | plt.draw() 24 | plt.pause(0.0001) 25 | 26 | def _Encoder(self, input, cells): 27 | batchsize, timesteps, input_dim = input.shape 28 | 29 | # Variables for Encoder 30 | self.En_Wf = tf.get_variable(name='En_Wf', shape=[self.input_dim + cells, cells]) 31 | self.En_bf = tf.get_variable(name='En_bf', shape=cells) 32 | self.En_Wi = tf.get_variable(name='En_Wi', shape=[self.input_dim + cells, cells]) 33 | self.En_bi = tf.get_variable(name='En_bi', shape=cells) 34 | self.En_Wc = tf.get_variable(name='En_Wc', shape=[self.input_dim + cells, cells]) 35 | self.En_bc = tf.get_variable(name='En_bc', shape=cells) 36 | self.En_Wo = tf.get_variable(name='En_Wo', shape=[self.input_dim + cells, cells]) 37 | self.En_bo = tf.get_variable(name='En_bo', shape=cells) 38 | # print(input) 39 | inputs_series = tf.unstack(input, axis=1) 40 | # Variables for LSTM 41 | init_output = tf.zeros([batchsize, cells]) 42 | init_state = tf.zeros([batchsize, cells]) 43 | current_state = init_state 44 | h = init_output 45 | output = tf.expand_dims(init_output, axis=1) 46 | for current_input in inputs_series: 47 | stacked_input_h = tf.concat([current_input, h], axis=1) 48 | 49 | f = tf.sigmoid(tf.matmul(stacked_input_h, self.En_Wf) + self.En_bf) 50 | i = tf.sigmoid(tf.matmul(stacked_input_h, self.En_Wi) + self.En_bi) 51 | C_tilda = tf.tanh(tf.matmul(stacked_input_h, self.En_Wc) + self.En_bc) 52 | current_state = tf.multiply(f, current_state) + tf.multiply(i, C_tilda) 53 | o = tf.sigmoid(tf.matmul(stacked_input_h, self.En_Wo) + self.En_bo) 54 | h = tf.multiply(o, tf.tanh(current_state)) 55 | output = tf.concat([output, tf.expand_dims(h, axis=1)], axis=1) 56 | return output[:, 1:, :] 57 | 58 | 59 | def _Decoder(self, input, cells): 60 | batchsize, timesteps, input_dim = input.get_shape().as_list() 61 | 62 | inputs_series = tf.unstack(input, axis=1) 63 | # 64 | # Variables for Decoder 65 | self.De_Wf = tf.get_variable(name='De_Wf', shape=[input_dim + cells, cells]) 66 | self.De_bf = tf.get_variable(name='De_bf', shape=cells) 67 | self.De_Wi = tf.get_variable(name='De_Wi', shape=[input_dim + cells, cells]) 68 | self.De_bi = tf.get_variable(name='De_bi', shape=cells) 69 | self.De_Wc = tf.get_variable(name='De_Wc', shape=[input_dim + cells, cells]) 70 | self.De_bc = tf.get_variable(name='De_bc', shape=cells) 71 | self.De_Wo = tf.get_variable(name='De_Wo', shape=[input_dim + cells, cells]) 72 | self.De_bo = tf.get_variable(name='De_bo', shape=cells) 73 | # Variable for attention model 74 | self.Va = tf.get_variable(name='Va', shape=[input_dim, 1]) 75 | self.Wa = tf.get_variable(name='Wa', shape=[cells, input_dim]) 76 | self.ba = tf.get_variable(name='ba', shape=input_dim) 77 | self.Ua = tf.get_variable(name='Ua', shape=[input_dim, input_dim]) 78 | embed = tf.reshape(input,[-1, input_dim]) 79 | embed = tf.matmul(embed, self.Ua) 80 | 81 | init_output = tf.zeros([batchsize, cells]) 82 | init_state = tf.zeros([batchsize, cells]) 83 | current_state = init_state 84 | h = init_output 85 | output = tf.expand_dims(init_output, axis=1) 86 | for current_input in inputs_series: 87 | expanded_state = tf.tile(current_state, [timesteps,1]) 88 | 89 | e = tf.tanh(tf.matmul(expanded_state, self.Wa) + embed) 90 | 91 | e = tf.matmul(e, tf.tile(self.Va, multiples=[1, input_dim])) 92 | e = tf.reshape(e, [batchsize,timesteps,-1]) 93 | 94 | a = tf.nn.softmax(e,dim=1) 95 | c = tf.reduce_sum(tf.multiply(a,input),axis=1) 96 | stacked_input_h = tf.concat([c, h], axis=1) 97 | 98 | f = tf.sigmoid(tf.matmul(stacked_input_h, self.De_Wf) + self.De_bf) 99 | i = tf.sigmoid(tf.matmul(stacked_input_h, self.De_Wi) + self.De_bi) 100 | C_tilda = tf.tanh(tf.matmul(stacked_input_h, self.De_Wc) + self.De_bc) 101 | current_state = tf.multiply(f, current_state) + tf.multiply(i, C_tilda) 102 | o = tf.sigmoid(tf.matmul(stacked_input_h, self.De_Wo) + self.De_bo) 103 | h = tf.multiply(o, tf.tanh(current_state)) 104 | output = tf.concat([output, tf.expand_dims(h, axis=1)], axis=1) 105 | return output[:,1:,:] 106 | 107 | 108 | def LSTM(input, cells, return_sequences=False): 109 | batchsize, timesteps, input_dim = input.shape 110 | # print(input) 111 | inputs_series = tf.unstack(input, axis=1) 112 | # Variables for LSTM 113 | 114 | Wf = tf.get_variable(name='Wf', shape=[input_dim+cells , cells]) 115 | bf = tf.get_variable(name='bf', shape=cells) 116 | Wi = tf.get_variable(name='Wi', shape=[input_dim+cells, cells]) 117 | bi = tf.get_variable(name='bi', shape=cells) 118 | Wc = tf.get_variable(name='Wc', shape=[input_dim+cells, cells]) 119 | bc = tf.get_variable(name='bc', shape=cells) 120 | Wo = tf.get_variable(name='Wo', shape=[input_dim+cells, cells]) 121 | bo = tf.get_variable(name='bo', shape=cells) 122 | init_output = tf.zeros([batchsize, cells]) 123 | init_state = tf.zeros([batchsize, cells]) 124 | current_state = init_state 125 | h = init_output 126 | output = tf.expand_dims(init_output,axis=1) 127 | for current_input in inputs_series: 128 | # print(current_input.shape) 129 | # current_input = tf.reshape(current_input, [batch_size, n_features]) 130 | stacked_input_h = tf.concat([current_input, h],axis=1) 131 | 132 | f = tf.sigmoid(tf.matmul(stacked_input_h, Wf) + bf) 133 | i = tf.sigmoid(tf.matmul(stacked_input_h, Wi) + bi) 134 | C_tilda = tf.tanh(tf.matmul(stacked_input_h, Wc) + bc) 135 | current_state = tf.multiply(f, current_state) + tf.multiply(i, C_tilda) 136 | o = tf.sigmoid(tf.matmul(stacked_input_h, Wo) + bo) 137 | h = tf.multiply(o, tf.tanh(current_state)) 138 | output = tf.concat([output, tf.expand_dims(h, axis=1)],axis = 1) 139 | return output[:,1:,:] 140 | if __name__ == '__main__': 141 | n_features = 50 142 | n_timesteps_in = 5 143 | n_timesteps_out = 5 144 | n_cell = 30 145 | batch_size = 4 146 | # 147 | # # X, y = get_pair(n_timesteps_in, n_timesteps_out, n_features) 148 | # # print(X.shape) 149 | batchX_placeholder = tf.placeholder(tf.float32, [batch_size, n_timesteps_in, n_features]) 150 | # print(batchX_placeholder) 151 | batchY_placeholder = tf.placeholder(tf.int32, [batch_size, n_timesteps_out, n_features]) 152 | 153 | mdl = LSTM(batchX_placeholder, n_cell) 154 | print(mdl) -------------------------------------------------------------------------------- /source/LSTM.py: -------------------------------------------------------------------------------- 1 | # http://colah.github.io/posts/2015-08-Understanding-LSTMs/ 2 | from __future__ import print_function, division 3 | import numpy as np 4 | import tensorflow as tf 5 | import matplotlib.pyplot as plt 6 | from random import randint 7 | from numpy import array 8 | from numpy import argmax 9 | 10 | 11 | 12 | # generate a sequence of random integers 13 | def generate_sequence(length, n_unique): 14 | return [randint(0, n_unique - 1) for _ in range(length)] 15 | 16 | 17 | # one hot encode sequence 18 | def one_hot_encode(sequence, n_unique): 19 | encoding = list() 20 | for value in sequence: 21 | vector = [0 for _ in range(n_unique)] 22 | vector[value] = 1 23 | encoding.append(vector) 24 | return array(encoding) 25 | 26 | 27 | # decode a one hot encoded string 28 | def one_hot_decode(encoded_seq): 29 | return [argmax(vector) for vector in encoded_seq] 30 | 31 | 32 | # prepare data for the LSTM 33 | def get_pair(n_in, n_out, cardinality): 34 | # generate random sequence 35 | sequence_in = generate_sequence(n_in, cardinality) 36 | sequence_out = sequence_in[:n_out] + [0 for _ in range(n_in - n_out)] 37 | # one hot encode 38 | X = one_hot_encode(sequence_in, cardinality) 39 | y = one_hot_encode(sequence_out, cardinality) 40 | # reshape as 3D 41 | X = X.reshape((1, X.shape[0], X.shape[1])) 42 | y = y.reshape((1, y.shape[0], y.shape[1])) 43 | return X, y 44 | 45 | def weight_variable(shape): 46 | # Random initial values 47 | initial = np.random.rand(shape[0], shape[1]) 48 | return tf.Variable(initial,dtype=tf.float32) 49 | def bias_variable(shape): 50 | initial = np.zeros((1, shape)) 51 | return tf.Variable(initial,dtype=tf.float32) 52 | def LSTMAttention(input, cells): 53 | batchsize, timesteps, input_dim = input.get_shape().as_list() 54 | 55 | inputs_series = tf.unstack(input, axis=1) 56 | # Variables for LSTM 57 | Wf = weight_variable(shape=[input_dim + cells, cells]) 58 | bf = bias_variable(shape=cells) 59 | Wi = weight_variable(shape=[input_dim + cells, cells]) 60 | bi = bias_variable(shape=cells) 61 | Wc = weight_variable(shape=[input_dim + cells, cells]) 62 | bc = bias_variable(shape=cells) 63 | Wo = weight_variable(shape=[input_dim + cells, cells]) 64 | bo = bias_variable(shape=cells) 65 | # Variable for attention model 66 | Va = weight_variable(shape=[input_dim,1]) 67 | Wa = weight_variable(shape=[cells, input_dim]) 68 | ba = bias_variable(shape=input_dim) 69 | Ua = weight_variable(shape=[input_dim, input_dim]) 70 | # print(input_dim) 71 | embed = tf.reshape(input,[-1, input_dim]) 72 | # print(embed.shape) 73 | embed = tf.matmul(embed, Ua) 74 | # print(embed.shape) 75 | # embed = tf.reshape(embed,[batch_size,timesteps,input_dim]) 76 | # print(embed.shape) 77 | 78 | init_output = tf.zeros([batchsize, cells]) 79 | init_state = tf.zeros([batchsize, cells]) 80 | current_state = init_state 81 | h = init_output 82 | output = tf.expand_dims(init_output, axis=1) 83 | for current_input in inputs_series: 84 | # print(type(current_input)) 85 | # current_input = tf.reshape(current_input, [batch_size, n_features]) 86 | expanded_state = tf.tile(current_state, [timesteps,1]) 87 | 88 | e = tf.tanh(tf.matmul(expanded_state, Wa) + embed) 89 | 90 | e = tf.matmul(e,tf.tile(Va, multiples=[1, input_dim])) 91 | e = tf.reshape(e,[batchsize,timesteps,-1]) 92 | 93 | a = tf.nn.softmax(e,dim=1) 94 | # print(a.shape) 95 | c = tf.reduce_sum(tf.multiply(a,input),axis=1) 96 | # print(c.shape) 97 | stacked_input_h = tf.concat([c, h], axis=1) 98 | 99 | f = tf.sigmoid(tf.matmul(stacked_input_h, Wf) + bf) 100 | i = tf.sigmoid(tf.matmul(stacked_input_h, Wi) + bi) 101 | C_tilda = tf.tanh(tf.matmul(stacked_input_h, Wc) + bc) 102 | current_state = tf.multiply(f, current_state) + tf.multiply(i, C_tilda) 103 | o = tf.sigmoid(tf.matmul(stacked_input_h, Wo) + bo) 104 | h = tf.multiply(o, tf.tanh(current_state)) 105 | output = tf.concat([output, tf.expand_dims(h, axis=1)], axis=1) 106 | return output[:,1:,:] 107 | 108 | 109 | def LSTM(input, cells, return_sequences=False): 110 | batchsize, timesteps, input_dim = input.shape 111 | # print(input) 112 | inputs_series = tf.unstack(input, axis=1) 113 | # Variables for LSTM 114 | 115 | Wf = weight_variable(shape=[input_dim+cells , cells]) 116 | bf = bias_variable(shape=cells) 117 | Wi = weight_variable(shape=[input_dim+cells, cells]) 118 | bi = bias_variable(shape=cells) 119 | Wc = weight_variable(shape=[input_dim+cells, cells]) 120 | bc = bias_variable(shape=cells) 121 | Wo = weight_variable(shape=[input_dim+cells, cells]) 122 | bo = bias_variable(shape=cells) 123 | init_output = tf.zeros([batchsize, cells]) 124 | init_state = tf.zeros([batchsize, cells]) 125 | current_state = init_state 126 | h = init_output 127 | output = tf.expand_dims(init_output,axis=1) 128 | for current_input in inputs_series: 129 | # print(current_input.shape) 130 | # current_input = tf.reshape(current_input, [batch_size, n_features]) 131 | stacked_input_h = tf.concat([current_input, h],axis=1) 132 | 133 | f = tf.sigmoid(tf.matmul(stacked_input_h, Wf) + bf) 134 | i = tf.sigmoid(tf.matmul(stacked_input_h, Wi) + bi) 135 | C_tilda = tf.tanh(tf.matmul(stacked_input_h, Wc) + bc) 136 | current_state = tf.multiply(f, current_state) + tf.multiply(i, C_tilda) 137 | o = tf.sigmoid(tf.matmul(stacked_input_h, Wo) + bo) 138 | h = tf.multiply(o, tf.tanh(current_state)) 139 | output = tf.concat([output, tf.expand_dims(h, axis=1)],axis = 1) 140 | return output[:,1:,:] 141 | if __name__ == '__main__': 142 | n_features = 50 143 | n_timesteps_in = 5 144 | n_timesteps_out = 5 145 | n_cell = 30 146 | batch_size = 4 147 | # 148 | # # X, y = get_pair(n_timesteps_in, n_timesteps_out, n_features) 149 | # # print(X.shape) 150 | batchX_placeholder = tf.placeholder(tf.float32, [batch_size, n_timesteps_in, n_features]) 151 | # print(batchX_placeholder) 152 | batchY_placeholder = tf.placeholder(tf.int32, [batch_size, n_timesteps_out, n_features]) 153 | 154 | mdl = LSTM(batchX_placeholder, n_cell) 155 | print(mdl) -------------------------------------------------------------------------------- /source/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/windg/AttentionLSTM/79a4294b09793f57bff64fc9fedcaeeeaff2e239/source/__init__.py --------------------------------------------------------------------------------