├── DeXpression.py
├── GRU_Cart.py
├── QLearn_Func.py
├── README.md
├── attention_mech.py
└── seq2seq.py


/DeXpression.py:
--------------------------------------------------------------------------------
 1 | '''An implementation of the DeXpression network described in https://arxiv.org/abs/1509.05371 using tensorflow and tflearn. This 
 2 | is run on the CK+ dataset, available here: http://www.consortium.ri.cmu.edu/ckagree/. 
 3 | '''
 4 | 
 5 | import tensorflow as tf
 6 | import tflearn
 7 | from tflearn.layers.conv import conv_2d, max_pool_2d
 8 | from tflearn.layers.core import input_data, dropout, fully_connected
 9 | from tflearn.layers.estimator import regression
10 | import numpy as np
11 | 
12 | 
13 | features=np.load('dex_feat.npy')
14 | labels=np.load('dex_lab.npy')
15 | 
16 | imagesize=50
17 | dropout_rate=0.8
18 | LR=0.001
19 | batch_size=128
20 | num_epoch=10
21 | 
22 | padding='VALID'
23 | 
24 | net=input_data(shape=[None, imagesize, imagesize, 1])
25 | net=conv_2d(net, 64, 7, strides=2, padding=padding, activation=None)
26 | net=tf.nn.relu(net)
27 | net=max_pool_2d(net, 3, strides=2, padding=padding)
28 | net=tflearn.batch_normalization(net)
29 | 
30 | net_1=conv_2d(net, 96, 1, padding=padding)
31 | net_1=tf.nn.relu(net_1)
32 | net_2=max_pool_2d(net, 3, strides=1, padding=padding)
33 | net_3=conv_2d(net_1, 208, 3, padding=padding)
34 | net_3=tf.nn.relu(net_3)
35 | net_4=conv_2d(net_2, 64, 1, padding=padding)
36 | net_4=tf.nn.relu(net_4)
37 | chunk_1=tflearn.merge([net_3, net_4], mode='concat', axis=3)
38 | 
39 | 
40 | net_5=conv_2d(chunk_1, 96, 1, padding=padding)
41 | net_5=tf.nn.relu(net_5)
42 | net_6=max_pool_2d(chunk_1, 3, strides=1, padding=padding)
43 | net_7=conv_2d(net_5, 208, 3, padding=padding)
44 | net_7=tf.nn.relu(net_7)
45 | net_8=conv_2d(net_6, 64, 1, padding=padding)
46 | net_8=tf.nn.relu(net_8)
47 | chunk_2=tflearn.merge([net_7, net_8], mode='concat', axis=3)
48 | 
49 | net=tflearn.flatten(chunk_2)
50 | net=dropout(net, dropout_rate)
51 | net=fully_connected(net, 7, activation='softmax')
52 | net=regression(net, optimizer='adam', loss='categorical_crossentropy', learning_rate=LR )
53 | 
54 | model=tflearn.DNN(net)
55 | model.fit(features, labels, n_epoch=num_epoch, validation_set=0.1, show_metric=True, batch_size=batch_size)
56 | 
57 | 
58 | 
59 | 
60 | 
61 | 
62 | 
63 | 
64 | 
65 | 
66 | 
67 | 
68 | 
69 | 
70 | 


--------------------------------------------------------------------------------
/GRU_Cart.py:
--------------------------------------------------------------------------------
  1 | ''' This code is based off of an old tutorial from pythonprogramming.net, which was my first exposure to reinforcment learning.
  2 | This code however is more of an experiment, so a lot of it is changed and customized, as it was easier to do that than just rewrite the whole thing 
  3 | from scratch. This architecture usually gets a perfect 200 in the cartpole environment.
  4 | '''
  5 | 
  6 | 
  7 | import tflearn 
  8 | from tflearn.layers.recurrent import gru
  9 | from tflearn.layers.core import input_data, dropout, fully_connected
 10 | from tflearn.layers.estimator import regression
 11 | from tflearn.layers.normalization import batch_normalization
 12 | import gym
 13 | import random
 14 | import numpy as np
 15 | from statistics import mean, median
 16 | from collections import Counter
 17 | 
 18 | 
 19 | LR=1e-3
 20 | env=gym.make('CartPole-v0')
 21 | env.reset()
 22 | goal_steps=500
 23 | score_requirement=100
 24 | initial_games=20000
 25 | dropout_rate=0.8
 26 | num_epochs=5
 27 | 
 28 | def initial_population():
 29 | 	training_data=[] #Our position and observations
 30 | 	scores=[] #The scores corresponding to our positions.
 31 | 	accepted_scores=[] #The scores that meet a certain threshhold.
 32 | 	for _ in range(initial_games): #Now we iterate through however many games we want.
 33 | 		score=0
 34 | 		game_memory=[] #Information about the environment
 35 | 		prev_observation=[] #the last observation we made.
 36 | 		for _ in range(goal_steps): #
 37 | 			action=random.randrange(0,2) #Choose a random action.
 38 | 			observation, reward, done, info=env.step(action) #This makes us take the action.
 39 | 			if len(prev_observation)>0: #This appends our previous action to a list, then stores the new one.
 40 | 				game_memory.append([prev_observation, action])
 41 | 			prev_observation=observation
 42 | 			score+=reward
 43 | 			if done: break
 44 | 		if score >= score_requirement: #Here is the reinforcement step. If we do good (reach the threshhold) we wanna remember what we did.
 45 | 			accepted_scores.append(score)
 46 | 			for data in game_memory:
 47 | 				if data[1]==1: #Converts to a one hot array.
 48 | 					output=[0,1]
 49 | 				elif data[1]==0:
 50 | 					output=[1,0]
 51 | 				training_data.append([data[0], output])
 52 | 		env.reset() #Here we save and reset our score.
 53 | 		scores.append(score)
 54 | 	return training_data
 55 | 	
 56 | 	
 57 | def neural_network_model(input_size):
 58 | 	network=input_data(shape=[None, input_size, 1], name='input')
 59 | 	network=gru(network, 128, return_seq=True)
 60 | 	network=gru(network, 256)
 61 | 	network=batch_normalization(network)
 62 | 	network=fully_connected(network, 256, activation='relu')
 63 | 	network=dropout(network, dropout_rate)
 64 | 	network=fully_connected(network, 256, activation='relu')
 65 | 	network=dropout(network, dropout_rate)
 66 | 	network=fully_connected(network, 2, activation='softmax')
 67 | 	network=regression(network, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy', name='targets')
 68 | 	model=tflearn.DNN(network)
 69 | 	return model
 70 | 
 71 | 	
 72 | 	
 73 | def train_model(training_data, model=False):
 74 | 	X = np.array([i[0] for i in training_data]).reshape(-1,len(training_data[0][0]),1) #We reformat our data
 75 | 	y = [i[1] for i in training_data]
 76 | 	if not model:
 77 | 		model = neural_network_model(input_size = len(X[0]))
 78 | 	model.fit(X, y, n_epoch=num_epochs, snapshot_step=500, show_metric=True, run_id='openai_learning') #This is how our model is trained and by what parameters.
 79 | 	return model
 80 | 
 81 | 
 82 | 
 83 | training_data = initial_population()			
 84 | model = train_model(training_data)
 85 | scores = []
 86 | choices = []
 87 | for each_game in range(10):
 88 | 	score = 0
 89 | 	game_memory = []
 90 | 	prev_obs = []
 91 | 	env.reset()
 92 | 	for _ in range(goal_steps):
 93 | 		env.render()
 94 | 		if len(prev_obs)==0:
 95 | 			action = random.randrange(0,2)
 96 | 		else:
 97 | 			action = np.argmax(model.predict(prev_obs.reshape(-1,len(prev_obs),1))[0])
 98 | 		choices.append(action)
 99 | 		new_observation, reward, done, info = env.step(action)
100 | 		prev_obs = new_observation
101 | 		game_memory.append([new_observation, action])
102 | 		score+=reward
103 | 		if done: break
104 | 	scores.append(score)
105 | 
106 | print('Average Score:',sum(scores)/len(scores))
107 | print('choice 1:{}  choice 0:{}'.format(choices.count(1)/len(choices),choices.count(0)/len(choices)))
108 | print(score_requirement)
109 | 


--------------------------------------------------------------------------------
/QLearn_Func.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | 
 3 | This is a quick and dirty Q-Learning function for exploring the concept. It runs the Frozen Lake environment, and can run most other environments
 4 | if the observation and action spaces are tweaked. That being said, there are much better algorithms for reinforcement learning, and a lot can be deployed
 5 | easily using Tensorforce, a RL library for Tensorflow.
 6 | 
 7 | '''
 8 | 
 9 | import numpy as np
10 | import gym
11 | import random
12 | 
13 | def QLearn(LR, min_LR, LR_decay, disc_factor, num_episodes, env_name, Static_LR=False):
14 | 	
15 | 	#We need to initialize our q matrix, and make an array to store our rewards in.
16 | 	reward_array=[]
17 | 	env=gym.make(env_name)
18 | 	q_table=np.zeros([env.observation_space.n, env.action_space.n])
19 | 
20 | 	for i in range(num_episodes):
21 | 		state=env.reset() #Reset the environment to start a new episode.
22 | 		done=False
23 | 		total_reward=0
24 | 		
25 | 		
26 | 		if Static_LR==False: #This lets one experiment with decaying or static learning rates.
27 | 			eta=max(min_LR, LR*(LR_decay**(i//100)))
28 | 		if Static_LR==True:
29 | 			eta=LR
30 | 			
31 | 		#The following is where the "learning" really takes place.
32 | 		for k in range(100) :
33 | 			action=np.argmax(q_table[state,:]+np.random.randn(1, env.action_space.n)*(1./(i+1))) #We decide which action to take using a policy that explores at the start but buckles down as time foes on.
34 | 			new_state, reward, decision, _= env.step(action) #We extract the information from our action
35 | 			total_reward+=reward
36 | 			q_table[state, action]=(1-eta)*q_table[state, action]+eta*(reward+disc_factor*np.max(q_table[new_state,:])) #We then calculate the values of our q-table
37 | 			state=new_state #We then enter a new state.
38 | 			if done==True:
39 | 				break
40 | 		reward_array.append(total_reward)
41 | 		
42 | 		
43 | 	print("Score over time: " +  str(sum(reward_array)/num_episodes))
44 | 
45 | 
46 | 
47 | 
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 
54 | 
55 | 
56 | 
57 | 
58 | 
59 | 
60 | 
61 | 
62 | 
63 | 
64 | 
65 | 
66 | 
67 | 
68 | 
69 | 
70 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Deep Learning Implementations
 2 | This is a repository of implementations of deep learning papers, along with mini projects and experiments. Most are written
 3 | using a mix of TFLearn and Tensorflow. 
 4 | 
 5 | # Project Descriptions
 6 | Basic descriptions of the projects. They are all commented with the relevant information, so if you're curious about anything, check there. If it's not there, feel free to ask me!
 7 | 
 8 | DeXpression- Implementation of the DeXpression paper: https://arxiv.org/abs/1509.05371
 9 | 
10 | GRU Cart- A network that utilizes gated recurrent units to solve OpenAI's cartpole environment.
11 | 
12 | Attention Mech- A function made in Tensorflow that replicates the attention mechanism described in http://www.cs.cmu.edu/~./hovy/papers/16HLT-hierarchical-attention-networks.pdf
13 | 
14 | seq2seq- A basic sequence2sequence function using LSTM cells without attention that's based off of the Tensorflow tutorial with some minor changes and work arounds for issues with Deepcopy.
15 | 
16 | Q-Learn Func- A basic function for experimenting with Q-Learning and the involved hyperparameters.
17 | 
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/attention_mech.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | 
 4 | def attention(inputs, input_size, attention_size, bi_rnn=False):
 5 | 	#This is based on http://www.cs.cmu.edu/~./hovy/papers/16HLT-hierarchical-attention-networks.pdf
 6 | 	if bi_rnn=True:
 7 | 		inputs=tf.concat(inputs, 2)
 8 | 	#Our first step is to pass our weights*inputs+bias through the tanh function. We start by initializing our values:
 9 | 	W=tf.Variable(tf.random_normal([input_size, attention_size], stddev=0.2))
10 | 	b=tf.Variable(tf.random_normal([attention_size], stddev=0.2))
11 | 	u=tf.Variable(tf.random_normal([attention_size], stddev=0.2)) #This is our "context" vector.
12 | 	
13 | 	hid_rep=tf.tensordot(inputs, W, axes=1) + b #We run our input through a feed forward neural net
14 | 	hid_rep=tf.tanh(hid_rep)
15 | 	
16 | 	word_dif=tf.tensordot(hid_rep, u, axes=1) #Then we calculate the word difference.
17 | 	alpha=tf.nn.softmax(word_dif)
18 | 	output = tf.reduce_sum(inputs * tf.expand_dims(alpha, -1), 1)
19 | 	
20 | 	return output
21 | 
22 | 
23 | 
24 | 
25 | 
26 | 
27 | 
28 | 
29 | 
30 | 
31 | 
32 | 
33 | 
34 | 
35 | 
36 | 
37 | 
38 | 
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/seq2seq.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import sys
  4 | 
  5 | class seq2seq(object):
  6 | 	setattr(tf.contrib.rnn.BasicLSTMCell, '__deepcopy__', lambda self, _: self) #Because of a threading error with Tensorflow, this line is needed as a work around
  7 | 	def __init__(self, encode_seq_len, decode_seq_len, encoder_vocab_size, decoder_vocab_size, embedding_dim, num_layers, ckpt_path, num_epochs, learning_rate=0.001,  model_name='seq2seq_model'):
  8 | 		
  9 | 		
 10 | 		self.encode_seq_len = encode_seq_len
 11 | 		self.decode_seq_len = decode_seq_len
 12 | 		self.ckpt_path = ckpt_path
 13 | 		self.num_epochs = num_epochs
 14 | 		self.model_name = model_name
 15 | 
 16 | 		def comp_graph(): #We need to create our computation graph.
 17 | 			tf.reset_default_graph()
 18 | 			
 19 | 			#A seq2seq model is effectively an encoder-decoder architecture that's built for sequence data.
 20 | 			#This model is a basic LSTM architecture without an explicit attention mechanism, but one can be added without much struggle.
 21 | 			
 22 | 			self.encoder_inputs=[ tf.placeholder(shape=[None,], dtype=tf.int64, name='ei_{}'.format(t)) for t in range(encode_seq_len) ] 
 23 | 				
 24 | 			self.labels = [ tf.placeholder(shape=[None,], dtype=tf.int64, name='ei_{}'.format(t)) for t in range(decode_seq_len) ]
 25 | 			self.decoder_inputs = [ tf.zeros_like(self.encoder_inputs[0], dtype=tf.int64, name='GO') ] + self.labels[:-1]
 26 | 				
 27 | 			self.dropout_rate = tf.placeholder(tf.float32) #Dropout is a powerful normalizing tool for NLP, so it's a good idea to include it.
 28 | 			basic_cell = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.BasicLSTMCell(embedding_dim, state_is_tuple=True), output_keep_prob=self.dropout_rate)
 29 | 			
 30 | 			#You can use different recurrent cells like GRUs.
 31 | 			
 32 | 			stacked_LSTM = tf.contrib.rnn.MultiRNNCell([basic_cell]*num_layers, state_is_tuple=True) #Let's us determine depth.
 33 | 				
 34 | 			with tf.variable_scope('decoder') as scope:
 35 | 				
 36 | 				self.decode_outputs, self.decode_states = tf.contrib.legacy_seq2seq.embedding_rnn_seq2seq(self.encoder_inputs, self.decoder_inputs, stacked_LSTM, encoder_vocab_size, decoder_vocab_size, embedding_dim)
 37 | 				scope.reuse_variables()
 38 | 				self.decode_outputs_test, self.decode_states_test = tf.contrib.legacy_seq2seq.embedding_rnn_seq2seq(self.encoder_inputs, self.decoder_inputs, stacked_LSTM, encoder_vocab_size, decoder_vocab_size, embedding_dim,feed_previous=True)
 39 | 				#We use legacy_seq2seq here which is a deprecated library in Tensorflow, but it works well for our purposes.
 40 | 			
 41 | 			
 42 | 			loss_weights = [ tf.ones_like(label, dtype=tf.float32) for label in self.labels ]
 43 | 			self.loss = tf.contrib.legacy_seq2seq.sequence_loss(self.decode_outputs, self.labels, loss_weights, decoder_vocab_size)
 44 | 			self.train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.loss)
 45 | 			#The Adam optimizer is standard in NLP, though SGD with momentum can be used effectively in its place.
 46 | 			
 47 | 		
 48 | 		comp_graph() #Create our graph.
 49 | 		
 50 | 		
 51 | 		#The following are basic helper functions to run the seq2seq model.
 52 | 		
 53 | 	def Get_dict(self, X, Y, dropout_rate):
 54 | 		feed_dict = {self.encoder_inputs[t]: X[t] for t in range(self.encode_seq_len)}	
 55 | 		feed_dict.update({self.labels[t]: Y[t] for t in range(self.decode_seq_len)})
 56 | 		feed_dict[self.dropout_rate] = dropout_rate
 57 | 			
 58 | 		return feed_dict
 59 | 			
 60 | 			
 61 | 	def train_batch(self, sess, train_batch_gen):
 62 | 		batchX, batchY = train_batch_gen.__next__()
 63 | 		feed_dict = self.Get_dict(batchX, batchY, dropout_rate=0.2) #keep prob
 64 | 		_, loss_v = sess.run([self.train_op, self.loss], feed_dict)
 65 | 		return loss_v
 66 | 			
 67 | 	def eval_step(self, sess, eval_batch_gen):
 68 | 		batchX, batchY = eval_batch_gen.__next__()
 69 | 		feed_dict = self.Get_dict(batchX, batchY, dropout_rate=1.)
 70 | 		loss_v, decoder_outputs_v = sess.run([self.loss, self.decode_outputs_test], feed_dict)
 71 | 		decoder_outputs_v = np.array(decoder_outputs_v).transpose([1,0,2])
 72 | 		return loss_v, decoder_outputs_v, batchX, batchY
 73 | 
 74 | 	def eval_batches(self, sess, eval_batch_gen, num_batches):
 75 | 		losses = []
 76 | 		for i in range(num_batches):
 77 | 			loss_v, decoder_outputs_v, batchX, batchY = self.eval_step(sess, eval_batch_gen)
 78 | 			losses.append(loss_v)
 79 | 		return np.mean(losses)
 80 | 
 81 | 	def train(self, train_set, valid_set, sess=None, save=True):
 82 | 		saver = tf.train.Saver()
 83 | 			
 84 | 		if not sess:
 85 | 			sess = tf.Session()
 86 | 			sess.run(tf.global_variables_initializer())
 87 | 				
 88 | 		for i in range(self.num_epochs):
 89 | 			try:
 90 | 				self.train_batch(sess, train_set)
 91 | 					
 92 | 				if i%2==0:
 93 | 					if save==True:
 94 | 						saver.save(sess, self.ckpt_path + self.model_name + '.ckpt', global_step=i)
 95 | 						
 96 | 					val_loss = self.eval_batches(sess, valid_set, 16)
 97 | 					print('val   loss : {0:.6f}'.format(val_loss))
 98 | 						
 99 | 					sys.stdout.flush()
100 | 			except KeyboardInterrupt:
101 | 				print('Interrupted by user at iteration {}'.format(i))
102 | 				self.session = sess
103 | 				return sess
104 | 				
105 | 	def restore_last_session(self):
106 | 		saver = tf.train.Saver()
107 | 				
108 | 		sess = tf.Session()
109 | 				
110 | 		ckpt = tf.train.get_checkpoint_state(self.ckpt_path)
111 | 		if ckpt and ckpt.model_checkpoint_path:
112 | 			saver.restore(sess, ckpt.model_checkpoint_path)
113 | 
114 | 		return sess
115 | 
116 | 
117 | 	def predict(self, sess, X):
118 | 		feed_dict = {self.encoder_inputs[t]: X[t] for t in range(self.encode_seq_len)}
119 | 		feed_dict[self.dropout_rate] = 1.
120 | 		decoder_outputs_v = sess.run(self.decode_outputs_test, feed_dict)
121 | 		decoder_outputs_v = np.array(decoder_outputs_v).transpose([1,0,2])
122 | 
123 | 		return np.argmax(decoder_outputs_v, axis=2)
124 | 				
125 | 				
126 | 				
127 | 				
128 | 
129 | 			
130 | 			
131 | 			
132 | 


--------------------------------------------------------------------------------