├── DeXpression.py ├── GRU_Cart.py ├── QLearn_Func.py ├── README.md ├── attention_mech.py └── seq2seq.py /DeXpression.py: -------------------------------------------------------------------------------- 1 | '''An implementation of the DeXpression network described in https://arxiv.org/abs/1509.05371 using tensorflow and tflearn. This 2 | is run on the CK+ dataset, available here: http://www.consortium.ri.cmu.edu/ckagree/. 3 | ''' 4 | 5 | import tensorflow as tf 6 | import tflearn 7 | from tflearn.layers.conv import conv_2d, max_pool_2d 8 | from tflearn.layers.core import input_data, dropout, fully_connected 9 | from tflearn.layers.estimator import regression 10 | import numpy as np 11 | 12 | 13 | features=np.load('dex_feat.npy') 14 | labels=np.load('dex_lab.npy') 15 | 16 | imagesize=50 17 | dropout_rate=0.8 18 | LR=0.001 19 | batch_size=128 20 | num_epoch=10 21 | 22 | padding='VALID' 23 | 24 | net=input_data(shape=[None, imagesize, imagesize, 1]) 25 | net=conv_2d(net, 64, 7, strides=2, padding=padding, activation=None) 26 | net=tf.nn.relu(net) 27 | net=max_pool_2d(net, 3, strides=2, padding=padding) 28 | net=tflearn.batch_normalization(net) 29 | 30 | net_1=conv_2d(net, 96, 1, padding=padding) 31 | net_1=tf.nn.relu(net_1) 32 | net_2=max_pool_2d(net, 3, strides=1, padding=padding) 33 | net_3=conv_2d(net_1, 208, 3, padding=padding) 34 | net_3=tf.nn.relu(net_3) 35 | net_4=conv_2d(net_2, 64, 1, padding=padding) 36 | net_4=tf.nn.relu(net_4) 37 | chunk_1=tflearn.merge([net_3, net_4], mode='concat', axis=3) 38 | 39 | 40 | net_5=conv_2d(chunk_1, 96, 1, padding=padding) 41 | net_5=tf.nn.relu(net_5) 42 | net_6=max_pool_2d(chunk_1, 3, strides=1, padding=padding) 43 | net_7=conv_2d(net_5, 208, 3, padding=padding) 44 | net_7=tf.nn.relu(net_7) 45 | net_8=conv_2d(net_6, 64, 1, padding=padding) 46 | net_8=tf.nn.relu(net_8) 47 | chunk_2=tflearn.merge([net_7, net_8], mode='concat', axis=3) 48 | 49 | net=tflearn.flatten(chunk_2) 50 | net=dropout(net, dropout_rate) 51 | net=fully_connected(net, 7, activation='softmax') 52 | net=regression(net, optimizer='adam', loss='categorical_crossentropy', learning_rate=LR ) 53 | 54 | model=tflearn.DNN(net) 55 | model.fit(features, labels, n_epoch=num_epoch, validation_set=0.1, show_metric=True, batch_size=batch_size) 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /GRU_Cart.py: -------------------------------------------------------------------------------- 1 | ''' This code is based off of an old tutorial from pythonprogramming.net, which was my first exposure to reinforcment learning. 2 | This code however is more of an experiment, so a lot of it is changed and customized, as it was easier to do that than just rewrite the whole thing 3 | from scratch. This architecture usually gets a perfect 200 in the cartpole environment. 4 | ''' 5 | 6 | 7 | import tflearn 8 | from tflearn.layers.recurrent import gru 9 | from tflearn.layers.core import input_data, dropout, fully_connected 10 | from tflearn.layers.estimator import regression 11 | from tflearn.layers.normalization import batch_normalization 12 | import gym 13 | import random 14 | import numpy as np 15 | from statistics import mean, median 16 | from collections import Counter 17 | 18 | 19 | LR=1e-3 20 | env=gym.make('CartPole-v0') 21 | env.reset() 22 | goal_steps=500 23 | score_requirement=100 24 | initial_games=20000 25 | dropout_rate=0.8 26 | num_epochs=5 27 | 28 | def initial_population(): 29 | training_data=[] #Our position and observations 30 | scores=[] #The scores corresponding to our positions. 31 | accepted_scores=[] #The scores that meet a certain threshhold. 32 | for _ in range(initial_games): #Now we iterate through however many games we want. 33 | score=0 34 | game_memory=[] #Information about the environment 35 | prev_observation=[] #the last observation we made. 36 | for _ in range(goal_steps): # 37 | action=random.randrange(0,2) #Choose a random action. 38 | observation, reward, done, info=env.step(action) #This makes us take the action. 39 | if len(prev_observation)>0: #This appends our previous action to a list, then stores the new one. 40 | game_memory.append([prev_observation, action]) 41 | prev_observation=observation 42 | score+=reward 43 | if done: break 44 | if score >= score_requirement: #Here is the reinforcement step. If we do good (reach the threshhold) we wanna remember what we did. 45 | accepted_scores.append(score) 46 | for data in game_memory: 47 | if data[1]==1: #Converts to a one hot array. 48 | output=[0,1] 49 | elif data[1]==0: 50 | output=[1,0] 51 | training_data.append([data[0], output]) 52 | env.reset() #Here we save and reset our score. 53 | scores.append(score) 54 | return training_data 55 | 56 | 57 | def neural_network_model(input_size): 58 | network=input_data(shape=[None, input_size, 1], name='input') 59 | network=gru(network, 128, return_seq=True) 60 | network=gru(network, 256) 61 | network=batch_normalization(network) 62 | network=fully_connected(network, 256, activation='relu') 63 | network=dropout(network, dropout_rate) 64 | network=fully_connected(network, 256, activation='relu') 65 | network=dropout(network, dropout_rate) 66 | network=fully_connected(network, 2, activation='softmax') 67 | network=regression(network, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy', name='targets') 68 | model=tflearn.DNN(network) 69 | return model 70 | 71 | 72 | 73 | def train_model(training_data, model=False): 74 | X = np.array([i[0] for i in training_data]).reshape(-1,len(training_data[0][0]),1) #We reformat our data 75 | y = [i[1] for i in training_data] 76 | if not model: 77 | model = neural_network_model(input_size = len(X[0])) 78 | model.fit(X, y, n_epoch=num_epochs, snapshot_step=500, show_metric=True, run_id='openai_learning') #This is how our model is trained and by what parameters. 79 | return model 80 | 81 | 82 | 83 | training_data = initial_population() 84 | model = train_model(training_data) 85 | scores = [] 86 | choices = [] 87 | for each_game in range(10): 88 | score = 0 89 | game_memory = [] 90 | prev_obs = [] 91 | env.reset() 92 | for _ in range(goal_steps): 93 | env.render() 94 | if len(prev_obs)==0: 95 | action = random.randrange(0,2) 96 | else: 97 | action = np.argmax(model.predict(prev_obs.reshape(-1,len(prev_obs),1))[0]) 98 | choices.append(action) 99 | new_observation, reward, done, info = env.step(action) 100 | prev_obs = new_observation 101 | game_memory.append([new_observation, action]) 102 | score+=reward 103 | if done: break 104 | scores.append(score) 105 | 106 | print('Average Score:',sum(scores)/len(scores)) 107 | print('choice 1:{} choice 0:{}'.format(choices.count(1)/len(choices),choices.count(0)/len(choices))) 108 | print(score_requirement) 109 | -------------------------------------------------------------------------------- /QLearn_Func.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 3 | This is a quick and dirty Q-Learning function for exploring the concept. It runs the Frozen Lake environment, and can run most other environments 4 | if the observation and action spaces are tweaked. That being said, there are much better algorithms for reinforcement learning, and a lot can be deployed 5 | easily using Tensorforce, a RL library for Tensorflow. 6 | 7 | ''' 8 | 9 | import numpy as np 10 | import gym 11 | import random 12 | 13 | def QLearn(LR, min_LR, LR_decay, disc_factor, num_episodes, env_name, Static_LR=False): 14 | 15 | #We need to initialize our q matrix, and make an array to store our rewards in. 16 | reward_array=[] 17 | env=gym.make(env_name) 18 | q_table=np.zeros([env.observation_space.n, env.action_space.n]) 19 | 20 | for i in range(num_episodes): 21 | state=env.reset() #Reset the environment to start a new episode. 22 | done=False 23 | total_reward=0 24 | 25 | 26 | if Static_LR==False: #This lets one experiment with decaying or static learning rates. 27 | eta=max(min_LR, LR*(LR_decay**(i//100))) 28 | if Static_LR==True: 29 | eta=LR 30 | 31 | #The following is where the "learning" really takes place. 32 | for k in range(100) : 33 | action=np.argmax(q_table[state,:]+np.random.randn(1, env.action_space.n)*(1./(i+1))) #We decide which action to take using a policy that explores at the start but buckles down as time foes on. 34 | new_state, reward, decision, _= env.step(action) #We extract the information from our action 35 | total_reward+=reward 36 | q_table[state, action]=(1-eta)*q_table[state, action]+eta*(reward+disc_factor*np.max(q_table[new_state,:])) #We then calculate the values of our q-table 37 | state=new_state #We then enter a new state. 38 | if done==True: 39 | break 40 | reward_array.append(total_reward) 41 | 42 | 43 | print("Score over time: " + str(sum(reward_array)/num_episodes)) 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep Learning Implementations 2 | This is a repository of implementations of deep learning papers, along with mini projects and experiments. Most are written 3 | using a mix of TFLearn and Tensorflow. 4 | 5 | # Project Descriptions 6 | Basic descriptions of the projects. They are all commented with the relevant information, so if you're curious about anything, check there. If it's not there, feel free to ask me! 7 | 8 | DeXpression- Implementation of the DeXpression paper: https://arxiv.org/abs/1509.05371 9 | 10 | GRU Cart- A network that utilizes gated recurrent units to solve OpenAI's cartpole environment. 11 | 12 | Attention Mech- A function made in Tensorflow that replicates the attention mechanism described in http://www.cs.cmu.edu/~./hovy/papers/16HLT-hierarchical-attention-networks.pdf 13 | 14 | seq2seq- A basic sequence2sequence function using LSTM cells without attention that's based off of the Tensorflow tutorial with some minor changes and work arounds for issues with Deepcopy. 15 | 16 | Q-Learn Func- A basic function for experimenting with Q-Learning and the involved hyperparameters. 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /attention_mech.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | def attention(inputs, input_size, attention_size, bi_rnn=False): 5 | #This is based on http://www.cs.cmu.edu/~./hovy/papers/16HLT-hierarchical-attention-networks.pdf 6 | if bi_rnn=True: 7 | inputs=tf.concat(inputs, 2) 8 | #Our first step is to pass our weights*inputs+bias through the tanh function. We start by initializing our values: 9 | W=tf.Variable(tf.random_normal([input_size, attention_size], stddev=0.2)) 10 | b=tf.Variable(tf.random_normal([attention_size], stddev=0.2)) 11 | u=tf.Variable(tf.random_normal([attention_size], stddev=0.2)) #This is our "context" vector. 12 | 13 | hid_rep=tf.tensordot(inputs, W, axes=1) + b #We run our input through a feed forward neural net 14 | hid_rep=tf.tanh(hid_rep) 15 | 16 | word_dif=tf.tensordot(hid_rep, u, axes=1) #Then we calculate the word difference. 17 | alpha=tf.nn.softmax(word_dif) 18 | output = tf.reduce_sum(inputs * tf.expand_dims(alpha, -1), 1) 19 | 20 | return output 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /seq2seq.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import sys 4 | 5 | class seq2seq(object): 6 | setattr(tf.contrib.rnn.BasicLSTMCell, '__deepcopy__', lambda self, _: self) #Because of a threading error with Tensorflow, this line is needed as a work around 7 | def __init__(self, encode_seq_len, decode_seq_len, encoder_vocab_size, decoder_vocab_size, embedding_dim, num_layers, ckpt_path, num_epochs, learning_rate=0.001, model_name='seq2seq_model'): 8 | 9 | 10 | self.encode_seq_len = encode_seq_len 11 | self.decode_seq_len = decode_seq_len 12 | self.ckpt_path = ckpt_path 13 | self.num_epochs = num_epochs 14 | self.model_name = model_name 15 | 16 | def comp_graph(): #We need to create our computation graph. 17 | tf.reset_default_graph() 18 | 19 | #A seq2seq model is effectively an encoder-decoder architecture that's built for sequence data. 20 | #This model is a basic LSTM architecture without an explicit attention mechanism, but one can be added without much struggle. 21 | 22 | self.encoder_inputs=[ tf.placeholder(shape=[None,], dtype=tf.int64, name='ei_{}'.format(t)) for t in range(encode_seq_len) ] 23 | 24 | self.labels = [ tf.placeholder(shape=[None,], dtype=tf.int64, name='ei_{}'.format(t)) for t in range(decode_seq_len) ] 25 | self.decoder_inputs = [ tf.zeros_like(self.encoder_inputs[0], dtype=tf.int64, name='GO') ] + self.labels[:-1] 26 | 27 | self.dropout_rate = tf.placeholder(tf.float32) #Dropout is a powerful normalizing tool for NLP, so it's a good idea to include it. 28 | basic_cell = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.BasicLSTMCell(embedding_dim, state_is_tuple=True), output_keep_prob=self.dropout_rate) 29 | 30 | #You can use different recurrent cells like GRUs. 31 | 32 | stacked_LSTM = tf.contrib.rnn.MultiRNNCell([basic_cell]*num_layers, state_is_tuple=True) #Let's us determine depth. 33 | 34 | with tf.variable_scope('decoder') as scope: 35 | 36 | self.decode_outputs, self.decode_states = tf.contrib.legacy_seq2seq.embedding_rnn_seq2seq(self.encoder_inputs, self.decoder_inputs, stacked_LSTM, encoder_vocab_size, decoder_vocab_size, embedding_dim) 37 | scope.reuse_variables() 38 | self.decode_outputs_test, self.decode_states_test = tf.contrib.legacy_seq2seq.embedding_rnn_seq2seq(self.encoder_inputs, self.decoder_inputs, stacked_LSTM, encoder_vocab_size, decoder_vocab_size, embedding_dim,feed_previous=True) 39 | #We use legacy_seq2seq here which is a deprecated library in Tensorflow, but it works well for our purposes. 40 | 41 | 42 | loss_weights = [ tf.ones_like(label, dtype=tf.float32) for label in self.labels ] 43 | self.loss = tf.contrib.legacy_seq2seq.sequence_loss(self.decode_outputs, self.labels, loss_weights, decoder_vocab_size) 44 | self.train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.loss) 45 | #The Adam optimizer is standard in NLP, though SGD with momentum can be used effectively in its place. 46 | 47 | 48 | comp_graph() #Create our graph. 49 | 50 | 51 | #The following are basic helper functions to run the seq2seq model. 52 | 53 | def Get_dict(self, X, Y, dropout_rate): 54 | feed_dict = {self.encoder_inputs[t]: X[t] for t in range(self.encode_seq_len)} 55 | feed_dict.update({self.labels[t]: Y[t] for t in range(self.decode_seq_len)}) 56 | feed_dict[self.dropout_rate] = dropout_rate 57 | 58 | return feed_dict 59 | 60 | 61 | def train_batch(self, sess, train_batch_gen): 62 | batchX, batchY = train_batch_gen.__next__() 63 | feed_dict = self.Get_dict(batchX, batchY, dropout_rate=0.2) #keep prob 64 | _, loss_v = sess.run([self.train_op, self.loss], feed_dict) 65 | return loss_v 66 | 67 | def eval_step(self, sess, eval_batch_gen): 68 | batchX, batchY = eval_batch_gen.__next__() 69 | feed_dict = self.Get_dict(batchX, batchY, dropout_rate=1.) 70 | loss_v, decoder_outputs_v = sess.run([self.loss, self.decode_outputs_test], feed_dict) 71 | decoder_outputs_v = np.array(decoder_outputs_v).transpose([1,0,2]) 72 | return loss_v, decoder_outputs_v, batchX, batchY 73 | 74 | def eval_batches(self, sess, eval_batch_gen, num_batches): 75 | losses = [] 76 | for i in range(num_batches): 77 | loss_v, decoder_outputs_v, batchX, batchY = self.eval_step(sess, eval_batch_gen) 78 | losses.append(loss_v) 79 | return np.mean(losses) 80 | 81 | def train(self, train_set, valid_set, sess=None, save=True): 82 | saver = tf.train.Saver() 83 | 84 | if not sess: 85 | sess = tf.Session() 86 | sess.run(tf.global_variables_initializer()) 87 | 88 | for i in range(self.num_epochs): 89 | try: 90 | self.train_batch(sess, train_set) 91 | 92 | if i%2==0: 93 | if save==True: 94 | saver.save(sess, self.ckpt_path + self.model_name + '.ckpt', global_step=i) 95 | 96 | val_loss = self.eval_batches(sess, valid_set, 16) 97 | print('val loss : {0:.6f}'.format(val_loss)) 98 | 99 | sys.stdout.flush() 100 | except KeyboardInterrupt: 101 | print('Interrupted by user at iteration {}'.format(i)) 102 | self.session = sess 103 | return sess 104 | 105 | def restore_last_session(self): 106 | saver = tf.train.Saver() 107 | 108 | sess = tf.Session() 109 | 110 | ckpt = tf.train.get_checkpoint_state(self.ckpt_path) 111 | if ckpt and ckpt.model_checkpoint_path: 112 | saver.restore(sess, ckpt.model_checkpoint_path) 113 | 114 | return sess 115 | 116 | 117 | def predict(self, sess, X): 118 | feed_dict = {self.encoder_inputs[t]: X[t] for t in range(self.encode_seq_len)} 119 | feed_dict[self.dropout_rate] = 1. 120 | decoder_outputs_v = sess.run(self.decode_outputs_test, feed_dict) 121 | decoder_outputs_v = np.array(decoder_outputs_v).transpose([1,0,2]) 122 | 123 | return np.argmax(decoder_outputs_v, axis=2) 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | --------------------------------------------------------------------------------