├── README.md ├── _config.yml ├── data └── imdbDatareader.py ├── dockerfile ├── model ├── BayesianCNN.py ├── BayesianLSTM.py ├── BayesianMLP ├── sentiment.py └── utils.py └── requirements.txt /README.md: -------------------------------------------------------------------------------- 1 | 2 | ## Implementation of Bayesian LSTM in Tensorflow 3 | 4 | For full details see the blog post 5 | 6 | https://medium.com/@jehillparikh/bayesian-neural-networks-lstm-3616327e8b7c 7 | 8 | Original paper: https://arxiv.org/pdf/1704.02798.pdf 9 | 10 | docker image: jehillparikh/betamlstack:v2 (for all dependency employed in this project) : UPDATE: dockerimage may be out of date 11 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-tactile -------------------------------------------------------------------------------- /data/imdbDatareader.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import pandas as pd 5 | import spacy 6 | from keras.preprocessing import sequence 7 | 8 | nlp = spacy.load('en') 9 | 10 | 11 | def get_filenames(dir): 12 | nii_files = []; 13 | for dirName, subdirList, fileList in os.walk(dir): 14 | for filename in fileList: 15 | name = os.path.join(dirName, filename) 16 | if ".txt" in filename.lower() in filename: # we only want the short axis images 17 | nii_files.append(name) 18 | else: 19 | continue 20 | return nii_files 21 | 22 | 23 | def read_text(file): 24 | data = open(file, 'r', encoding="ISO-8859-1").read() 25 | # data=data.split(' ') #.split('.') 26 | return data 27 | 28 | 29 | def get_imbd_data(dir): 30 | pos = get_filenames(os.path.join(dir, 'pos')) 31 | neg = get_filenames(os.path.join(dir, 'neg')) 32 | 33 | idx = 0 34 | data = [] 35 | rating = [] 36 | index = [] 37 | 38 | for f in pos: 39 | data.append(read_text(f)) 40 | rating.append(1) 41 | index.append(idx) 42 | idx = idx + 1 43 | 44 | for f in neg: 45 | data.append(read_text(f)) 46 | rating.append(0) 47 | index.append(idx) 48 | idx = idx + 1 49 | 50 | dataset = list(zip(index, data, rating)) 51 | 52 | np.random.shuffle(dataset) 53 | data2 = pd.DataFrame(data=dataset, columns=['entry', 'text', 'sentiment']) 54 | 55 | return data2 56 | 57 | 58 | def word_to_sentence_embedding(sentence): 59 | # print(len(sentence)) 60 | 61 | data = np.zeros((len(sentence), 300)) 62 | k = 0 63 | 64 | for word in sentence: 65 | print(word) 66 | data[k, :] = get_word_embedding(word) 67 | k = k + 1 68 | 69 | return data 70 | 71 | 72 | def get_word_embedding(word): 73 | emd = nlp(word) 74 | return emd.vector 75 | 76 | 77 | def sentence_embedding(sentence, embedding_dim): 78 | tokens = nlp(sentence) 79 | data = np.zeros((len(sentence), embedding_dim)) 80 | k = 0 81 | for token in tokens: 82 | data[k, :] = token.vector 83 | k = k + 1 84 | 85 | return data 86 | 87 | 88 | def get_training_batch(data, batch_size, embedding_dim, num_classes, maxlen): 89 | num_classes = num_classes 90 | x = np.zeros([batch_size, maxlen, embedding_dim]) 91 | y = np.zeros([batch_size, num_classes]) 92 | 93 | index = 0 94 | 95 | for idx, row in data.iterrows(): 96 | x[index, :, :] = sequence.pad_sequences([sentence_embedding(row['text'], embedding_dim)], maxlen=maxlen) 97 | if row['sentiment']: 98 | y[index, :] = np.array([0, 1]) 99 | else: 100 | y[index, :] = np.array([1, 0]) 101 | 102 | index = index + 1 103 | 104 | # print(x.shape) 105 | # print(y.shape) 106 | 107 | return x, y 108 | 109 | 110 | def load_glove(): 111 | glove_filename = '/home/jehill/python/NLP/datasets/GloVE/glove.6B.300d.txt' 112 | 113 | glove_vocab = [] 114 | glove_embed = [] 115 | embedding_dict = {} 116 | 117 | file = open(glove_filename, 'r', encoding='UTF-8') 118 | 119 | for line in file.readlines(): 120 | row = line.strip().split(' ') 121 | vocab_word = row[0] 122 | glove_vocab.append(vocab_word) 123 | embed_vector = [float(i) for i in row[1:]] # convert to list of float 124 | embedding_dict[vocab_word] = embed_vector 125 | glove_embed.append(embed_vector) 126 | 127 | print('Loaded GLOVE') 128 | file.close() 129 | 130 | return glove_vocab, glove_embed, embedding_dict 131 | 132 | 133 | def get_train_test_data(): 134 | DataDir = '/home/jehill/python/NLP/datasets/' 135 | 136 | train_dir = os.path.join(DataDir, 'train') 137 | test_dir = os.path.join(DataDir, 'test') 138 | 139 | train_data = get_imbd_data(train_dir) 140 | test_data = get_imbd_data(test_dir) 141 | 142 | n_train = len(train_data) 143 | print("Number of the training points : " + str(n_train)) 144 | print("Number of the training points : " + str(len(test_data))) 145 | 146 | return train_data, test_data 147 | 148 | 149 | if __name__ == '__main__': 150 | # glove_vocab, glove_embed, word_embedding_dict= load_glove() 151 | DataDir = '/home/jehill/python/NLP/datasets/' 152 | 153 | train_dir = os.path.join(DataDir, 'train') 154 | test_dir = os.path.join(DataDir, 'test') 155 | 156 | train_data = get_imbd_data(train_dir) 157 | # test_data =get_imbd_data(test_dir) 158 | 159 | text = train_data.text.values 160 | 161 | """ 162 | 163 | batch_size=50 164 | index=0 165 | 166 | for index in range(0, len(train_data), batch_size): 167 | print (index) 168 | BATCH_X,BATCH_Y=get_training_batch(train_data[index:index+batch_size],batch_size=batch_size,embedding_dim=384,num_classes=2,maxlen=1000) 169 | print(np.shape(BATCH_X)) 170 | print(np.shape(BATCH_Y)) 171 | 172 | """ 173 | -------------------------------------------------------------------------------- /dockerfile: -------------------------------------------------------------------------------- 1 | FROM gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-gpu:v0.5.0 2 | COPY requirements.txt /tmp/ 3 | ADD src /app 4 | RUN pip install --requirement /tmp/requirements.txt 5 | EXPOSE 80 6 | #CMD ["python", "/app/test_gpu.py"] 7 | 8 | #run using interactive model 9 | #build the image use docker file or check out from dockerhub 10 | #docker run -p 8000:40 YourDockerImageName -it 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /model/BayesianCNN.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow_probability as tfp 3 | 4 | features=[], labels=[] #assuming features and labels 5 | 6 | #for completeness: including tensorflow probablity models 7 | 8 | model = tf.keras.Sequential([ 9 | tf.keras.layers.Reshape([32, 32, 3]), 10 | tfp.layers.Convolution2DReparameterization( 11 | 64, kernel_size=5, padding='SAME', activation=tf.nn.relu), 12 | tf.keras.layers.MaxPooling2D(pool_size=[2, 2], 13 | strides=[2, 2], 14 | padding='SAME'), 15 | tf.keras.layers.Flatten(), 16 | tfp.layers.DenseReparameterization(10), 17 | ]) 18 | 19 | logits = model(features) 20 | neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logits) 21 | kl = sum(model.losses) 22 | loss = neg_log_likelihood + kl 23 | train_op = tf.train.AdamOptimizer().minimize(loss) 24 | 25 | tfp.layers.DenseReparameterization.losses -------------------------------------------------------------------------------- /model/BayesianLSTM.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.contrib.rnn import BasicLSTMCell, LSTMStateTuple, LSTMCell 3 | from model.utils import variationalPosterior 4 | 5 | 6 | class BayesianLSTMCell(LSTMCell): 7 | 8 | def __init__(self, num_units, prior, is_training, name, **kwargs): 9 | 10 | super(BayesianLSTMCell, self).__init__(num_units, **kwargs) 11 | 12 | self.w = None 13 | self.b = None 14 | self.prior = prior 15 | self.layer_name = name 16 | self.isTraining = is_training 17 | self.num_units = num_units 18 | self.kl_loss=None 19 | 20 | print("Creating lstm layer:" + name) 21 | 22 | 23 | def call(self, inputs, state): 24 | 25 | if self.w is None: 26 | 27 | size = inputs.get_shape()[-1].value 28 | self.w, self.w_mean, self.w_sd = variationalPosterior((size+self.num_units, 4*self.num_units), self.layer_name+'_weights', self.prior, self.isTraining) 29 | self.b, self.b_mean, self.b_sd = variationalPosterior((4*self.num_units,1), self.layer_name+'_bias', self.prior, self.isTraining) 30 | 31 | cell, hidden = state 32 | concat_inputs_hidden = tf.concat([inputs, hidden], 1) 33 | concat_inputs_hidden = tf.nn.bias_add(tf.matmul(concat_inputs_hidden, self.w), tf.squeeze(self.b)) 34 | # Gates: Input, New, Forget and Output 35 | i, j, f, o = tf.split(value=concat_inputs_hidden, num_or_size_splits=4, axis=1) 36 | new_cell = (cell * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(i) * self._activation(j)) 37 | new_hidden = self._activation(new_cell) * tf.sigmoid(o) 38 | new_state = LSTMStateTuple(new_cell, new_hidden) 39 | 40 | return new_hidden, new_state 41 | 42 | 43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /model/BayesianMLP: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow_probability as tfp 3 | 4 | 5 | features=[] 6 | labels=[] 7 | 8 | model = tf.keras.Sequential([ 9 | tfp.layers.DenseReparameterization(512, activation=tf.nn.relu), 10 | tfp.layers.DenseReparameterization(10), 11 | ]) 12 | 13 | logits = model(features) 14 | neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits( 15 | labels=labels, logits=logits) 16 | kl = sum(model.losses) 17 | loss = neg_log_likelihood + kl 18 | train_op = tf.train.AdamOptimizer().minimize(loss) 19 | 20 | 21 | -------------------------------------------------------------------------------- /model/sentiment.py: -------------------------------------------------------------------------------- 1 | #sentiment analysis using multilayer RNN (LSTM) twitter 2 | import tensorflow as tf 3 | 4 | from data.imdbDatareader import * 5 | from model.BayesianLSTM import BayesianLSTMCell 6 | from model.utils import variationalPosterior 7 | from tensorflow_probability.python.distributions import Normal 8 | import os 9 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 10 | 11 | class SentimentAnalysisMultiLayerLSTM: 12 | 13 | def __init__(self, training): 14 | 15 | self.LSTM_KL=0 16 | self.embedding_dim = 300 # the number of hidden units in each RNN 17 | self.keep_prob = 0.5 18 | self.batch_size = 512 19 | self.lstm_sizes = [128, 64] # number hidden layer in each LSTM 20 | self.num_classes = 2 21 | self.max_sequence_length = 100 22 | self.prior=(0,1) #univariator prior 23 | self.isTraining=training 24 | 25 | 26 | with tf.variable_scope('rnn_i/o'): 27 | # use None for batch size and dynamic sequence length 28 | self.inputs = tf.placeholder(tf.float32, shape=[None, None, self.embedding_dim]) 29 | self.groundtruths = tf.placeholder(tf.float32, shape=[None, self.num_classes]) 30 | 31 | with tf.variable_scope('rnn_cell'): 32 | self.initial_state, self.final_lstm_outputs, self.final_state, self.cell = self.build_lstm_layers(self.lstm_sizes, self.inputs,self.keep_prob, self.batch_size) 33 | 34 | 35 | 36 | self.softmax_w, self.softmax_w_mean, self.softmax_w_std= variationalPosterior((self.lstm_sizes[-1], self.num_classes), "softmax_w", self.prior, self.isTraining) 37 | self.softmax_b, self.softmax_b_mean, self.softmax_b_std = variationalPosterior((self.num_classes), "softmax_b", self.prior, self.isTraining) 38 | self.logits=tf.nn.xw_plus_b(self.final_lstm_outputs, self.softmax_w,self.softmax_b) 39 | 40 | with tf.variable_scope('rnn_loss', reuse=tf.AUTO_REUSE): 41 | 42 | if (self.isTraining): 43 | self.KL=0. 44 | # use cross_entropy as class loss 45 | self.loss = tf.losses.softmax_cross_entropy(onehot_labels=self.groundtruths, logits=self.logits) 46 | self.KL=tf.add_n(tf.get_collection("KL_layers"), "KL") 47 | 48 | self.cost=(self.loss+self.KL)/self.batch_size #the total cost need to divide by batch size 49 | self.optimizer = tf.train.AdamOptimizer(0.02).minimize(self.loss) 50 | 51 | #with tf.variable_scope('rnn_accuracy'): 52 | # self.accuracy = tf.contrib.metrics.accuracy(labels=tf.argmax(self.groundtruths, axis=1), predictions=self.prediction) 53 | 54 | self.sess = tf.Session() 55 | self.sess.run(tf.global_variables_initializer()) # don't forget to initial all variables 56 | self.saver = tf.train.Saver() # a saver is for saving or restoring your trained weight 57 | 58 | print("Completed creating the graph") 59 | 60 | def train(self, batch_x, batch_y, state): 61 | 62 | fd = {} 63 | fd[self.inputs] = batch_x 64 | fd[self.groundtruths] = batch_y 65 | fd[self.initial_state] = state 66 | # feed in input and groundtruth to get loss and update the weight via Adam optimizer 67 | loss, accuracy, final_state, _ = self.sess.run([self.loss, self.accuracy, self.final_state, self.optimizer], fd) 68 | 69 | return loss, accuracy, final_state 70 | 71 | def test(self, batch_x, batch_y, batch_size): 72 | 73 | """ 74 | NEED TO RE-WRITE this function interface by adding the state 75 | :param batch_x: 76 | :param batch_y: 77 | :return 78 | 79 | """ 80 | # restore the model 81 | 82 | # with tf.Session() as sess: 83 | # model=model.restore(); 84 | 85 | test_state = model.cell.zero_state(batch_size, tf.float32) 86 | fd = {} 87 | fd[self.inputs] = batch_x 88 | fd[self.groundtruths] = batch_y 89 | fd[self.initial_state] = test_state 90 | prediction, accuracy = self.sess.run([self.prediction, self.accuracy], fd) 91 | 92 | return prediction, accuracy 93 | 94 | def save(self, e): 95 | self.saver.save(self.sess, 'model/rnn/rnn_%d.ckpt' % (e + 1)) 96 | 97 | def restore(self, e): 98 | self.saver.restore(self.sess, 'model/rnn/rnn_%d.ckpt' % (e)) 99 | 100 | def build_lstm_layers(self, lstm_sizes, inputs, keep_prob_, batch_size): 101 | """ 102 | Create the LSTM layers 103 | inputs: array containing size of hidden layer for each lstm, 104 | input_embedding, for the shape batch_size, sequence_length, emddeding dimension [None, None, 384], 105 | None and None are to handle variable batch size and variable sequence length 106 | keep_prob for the dropout and batch_size 107 | 108 | outputs: initial state for the RNN (lstm) : tuple of [(batch_size, hidden_layer_1), (batch_size, hidden_layer_2)] 109 | outputs of the RNN [Batch_size, sequence_length, last_hidden_layer_dim] 110 | RNN cell: tensorflow implementation of the RNN cell 111 | final state: tuple of [(batch_size, hidden_layer_1), (batch_size, hidden_layer_2)] 112 | 113 | """ 114 | self.lstms=[] 115 | for i in range (0,len(lstm_sizes)): 116 | self.lstms.append(BayesianLSTMCell(lstm_sizes[i], self.prior, self.isTraining, 'lstm'+str(i))) 117 | 118 | # Stack up multiple LSTM layers, for deep learning 119 | cell = tf.contrib.rnn.MultiRNNCell(self.lstms) 120 | # Getting an initial state of all zeros 121 | 122 | initial_state = cell.zero_state(batch_size, tf.float32) 123 | # perform dynamic unrolling of the network, for variable 124 | #lstm_outputs, final_state = tf.nn.dynamic_rnn(cell, embed_input, initial_state=initial_state) 125 | 126 | # we avoid dynamic RNN, as this produces while loop errors related to gradient checking 127 | if True: 128 | outputs = [] 129 | state = initial_state 130 | with tf.variable_scope("RNN"): 131 | for time_step in range(self.max_sequence_length): 132 | if time_step > 0: tf.get_variable_scope().reuse_variables() 133 | (cell_output, state) = cell(inputs[:, time_step, :], state) 134 | outputs.append(cell_output) 135 | 136 | final_lstm_outputs = cell_output 137 | final_state = state 138 | #outputs=tf.reshape(tf.concat(1, outputs), [-1, self.embedding_dim]) 139 | 140 | 141 | return initial_state, final_lstm_outputs, final_state, cell 142 | 143 | 144 | 145 | if __name__ == '__main__': 146 | 147 | # hyperparameter of our network 148 | EPOCHS = 20 149 | tf.reset_default_graph() 150 | model = SentimentAnalysisMultiLayerLSTM(training=True) 151 | 152 | 153 | """ 154 | 155 | train_data = get_twets_data() 156 | n_train = len(train_data) 157 | 158 | BATCH_SIZE = model.batch_size 159 | print("BATCH SIZE : " + str(BATCH_SIZE)) 160 | 161 | rec_loss = [] 162 | 163 | for epoch in range(EPOCHS): 164 | 165 | state = model.sess.run([model.initial_state]) 166 | train_data = train_data.sample(frac=1).reset_index(drop=True) 167 | loss_train = 0 168 | accuracy_train = 0 169 | 170 | for idx in range(0, n_train, BATCH_SIZE): 171 | BATCH_X, BATCH_Y = get_training_batch_twets(train_data[idx:(idx + BATCH_SIZE)], BATCH_SIZE, 172 | model.embedding_dim, num_classes=model.num_classes, 173 | maxlen=model.max_sequence_length) 174 | loss_batch, accuracy_batch, state = model.train(BATCH_X, BATCH_Y, state) 175 | loss_train += loss_batch 176 | accuracy_train += accuracy_batch 177 | print("EPOCH: " + str(epoch) + "BATCH_INDEX:" + str(idx) + "Batch Loss:" + str( 178 | loss_batch) + "Batch Accuracy:" + str(accuracy_train)) 179 | 180 | loss_train /= n_train 181 | accuracy_train /= n_train 182 | 183 | model.save(epoch) # save your model after each epoch 184 | rec_loss.append([loss_train, accuracy_train]) 185 | 186 | np.save('./model/rnn/rec_loss.npy', rec_loss) 187 | print("Training completed") 188 | 189 | """ 190 | -------------------------------------------------------------------------------- /model/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | import tensorflow_probability as tfp 4 | from tensorflow_probability.python.distributions import Normal 5 | 6 | def compute_KL_univariate_prior(univariateprior, theta, sample): 7 | 8 | """ 9 | :param prior: assuming univariate prior of Normal(m,s); 10 | :param posterior: (theta: mean,std) to create posterior q(w/theta) i.e. Normal(mean,std) 11 | :param sample: Number of sample 12 | """ 13 | sample=tf.reshape(sample, [-1]) #flatten vector 14 | (mean,std)=theta 15 | mean =tf.reshape(mean, [-1]) 16 | std=tf.reshape(std, [-1]) 17 | posterior = Normal(mean, std) 18 | 19 | (mean2,std2) = univariateprior 20 | prior=Normal(mean2, std2) 21 | 22 | q_theta=tf.reduce_sum(posterior.log_prob(sample)) 23 | p_d=tf.reduce_sum(prior.log_prob(sample)) 24 | 25 | KL=tf.subtract(q_theta,p_d) 26 | 27 | return KL 28 | 29 | 30 | 31 | def variationalPosterior(shape, name, prior, istraining): 32 | """ 33 | this function create a variational posterior q(w/theta) over a given "weight:w" of the network 34 | theta is parameterized by mean+standard*noise we apply the reparameterization trick from kingma et al, 2014 35 | with correct loss function (free energy) we learn mean and standard to estimate of theta, thus can estimate 36 | posterior p(w/D) by computing KL loss for each variational posterior q(w/theta) with prior(w) 37 | 38 | :param name: is the name of the tensor/variable to create variational posterior q(w/Q) for true posterior (p(w/D)) 39 | :param shape: is the shape of the weight variable 40 | :param training: whether in training or inference mode 41 | :return: samples (i.e. weights), mean of weights, std in-case of the training there is noise add to the weights 42 | """ 43 | # theta=mu+sigma i.e. theta = mu+sigma i.e. mu+log(1+exp(rho)), log(1+exp(rho)) 44 | # is the computed by using tf.math.softplus(rho) 45 | 46 | 47 | mu=tf.get_variable("{}_mean".format(name), shape=shape, dtype=tf.float32); 48 | rho=tf.get_variable("{}_rho".format(name), shape=shape, dtype=tf.float32); 49 | sigma = tf.math.softplus(rho) 50 | 51 | #if training we add noise to variation parameters theta 52 | if (istraining): 53 | epsilon= Normal(0,1.0).sample(shape) 54 | sample=mu+sigma*epsilon 55 | else: 56 | sample=mu+sigma; 57 | 58 | theta=(mu,sigma) 59 | 60 | kl_loss = compute_KL_univariate_prior(prior, theta, sample) 61 | 62 | tf.summary.histogram(name + '_rho_hist', rho) 63 | tf.summary.histogram(name + '_mu_hist', mu) 64 | tf.summary.histogram(name + '_sigma_hist', sigma) 65 | 66 | # we shall used this in the training to get kl loss 67 | tf.add_to_collection("KL_layers", kl_loss) 68 | 69 | return sample, mu, sigma 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow_probability==0.6.0 2 | numpy==1.16.3 3 | Keras==2.2.2 4 | pandas==0.22.0 5 | tensorflow==1.13.1 6 | spacy==2.0.12 7 | --------------------------------------------------------------------------------