├── Learner.py
├── README.md
├── Solving Random Mazes with Asyncronous Deep Learning.pdf
├── World.py
├── World.pyc
├── floodfill.py
├── floodfill.pyc
├── maze_gen.py
├── maze_gen.pyc
└── saved_networks
    ├── async_maze-dqn-15000
    ├── async_maze-dqn-15000.meta
    ├── async_maze-dqn-20000
    ├── async_maze-dqn-20000.meta
    ├── async_maze-dqn-25000
    ├── async_maze-dqn-25000.meta
    ├── async_maze-dqn-30000
    ├── async_maze-dqn-30000.meta
    ├── async_maze-dqn-35000
    ├── async_maze-dqn-35000.meta
    ├── async_maze-dqn-400669
    ├── async_maze-dqn-400669.meta
    ├── async_maze-dqn-405401
    ├── async_maze-dqn-405401.meta
    ├── async_maze-dqn-410055
    ├── async_maze-dqn-410055.meta
    ├── async_maze-dqn-414369
    ├── async_maze-dqn-414369.meta
    ├── async_maze-dqn-418747
    ├── async_maze-dqn-418747.meta
    ├── async_maze-dqn-63998
    ├── async_maze-dqn-63998.meta
    ├── async_maze-dqn-64498
    ├── async_maze-dqn-64498.meta
    ├── async_maze-dqn-64998
    ├── async_maze-dqn-64998.meta
    ├── async_maze-dqn-65498
    ├── async_maze-dqn-65498.meta
    ├── async_maze-dqn-65998
    ├── async_maze-dqn-65998.meta
    ├── async_maze_long_learner
    ├── async_maze_long_learner.meta
    ├── blank_maze-dqn-5000
    ├── blank_maze-dqn-5000.meta
    ├── checkpoint
    ├── i_maze-dqn-5000
    ├── i_maze-dqn-5000.meta
    ├── random_maze-dqn-10000
    ├── random_maze-dqn-10000.meta
    ├── random_maze-dqn-105000
    ├── random_maze-dqn-105000.meta
    ├── random_maze-dqn-110000
    ├── random_maze-dqn-110000.meta
    ├── random_maze-dqn-115000
    ├── random_maze-dqn-115000.meta
    ├── random_maze-dqn-120000
    ├── random_maze-dqn-120000.meta
    ├── random_maze-dqn-125000
    ├── random_maze-dqn-125000.meta
    ├── random_maze-dqn-15000
    ├── random_maze-dqn-15000.meta
    ├── random_maze-dqn-20000
    ├── random_maze-dqn-20000.meta
    ├── random_maze-dqn-25000
    ├── random_maze-dqn-25000.meta
    ├── random_maze-dqn-5000
    ├── random_maze-dqn-5000.meta
    ├── sparse_maze-dqn-5000
    └── sparse_maze-dqn-5000.meta


/Learner.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | __author__ = 'Aaron Brown'
  3 | import World
  4 | import threading
  5 | import time
  6 | import random
  7 | import numpy as np
  8 | import tensorflow as tf
  9 | import floodfill
 10 | from collections import deque
 11 | 
 12 | actions = World.actions
 13 | 
 14 | #Turn the GUI on, or off if training
 15 | gui_display = True
 16 | 
 17 | if(not gui_display):
 18 | 	World.gui_off()
 19 | 
 20 | # The variables below hold all the trainable weights for our CNN. For each, the
 21 | 
 22 | GAMMA = 0.8 # decay rate of past observations
 23 | BATCH = 25 # size of minibatch
 24 | 
 25 | #<s,a,r,s'>
 26 | state_input_1 = tf.placeholder(
 27 | 	tf.float32,
 28 | 	[None,9,9,1])
 29 | 
 30 | action_input = tf.placeholder(
 31 | 	tf.bool,
 32 | 	shape=(BATCH,4))
 33 | 
 34 | reward_input = tf.placeholder(
 35 | 	tf.float32,
 36 | 	shape=(BATCH))
 37 | 
 38 | max_val_input = tf.placeholder(
 39 | 	tf.float32,
 40 | 	shape=(BATCH))
 41 | 
 42 | terminal_input = tf.placeholder(
 43 | 	tf.float32,
 44 | 	shape=(BATCH))
 45 | 
 46 | conv1_weights = tf.Variable(
 47 |   tf.truncated_normal([5, 5, 1, 16],  # 5x5 filter, depth 16.
 48 |                       stddev=0.1))
 49 | conv1_biases = tf.Variable(tf.zeros([16]))
 50 | conv2_weights = tf.Variable(
 51 |   tf.truncated_normal([3, 3, 16, 32], # 3x3 filter, depth 32
 52 |                       stddev=0.1))
 53 | conv2_biases = tf.Variable(tf.constant(0.1, shape=[32]))
 54 | 
 55 | conv3_weights = tf.Variable(
 56 |   tf.truncated_normal([2, 2, 32, 64], # 3x3 filter, depth 64
 57 |                       stddev=0.1))
 58 | conv3_biases = tf.Variable(tf.constant(0.1, shape=[64]))
 59 | 
 60 | fc1_weights = tf.Variable(  # fully connected, depth 128.
 61 |   tf.truncated_normal([256, 512],
 62 |                       stddev=0.1))
 63 | fc1_biases = tf.Variable(tf.constant(0.1, shape=[512]))
 64 | fc2_weights = tf.Variable(
 65 |   tf.truncated_normal([512, 4],
 66 |                       stddev=0.1))
 67 | fc2_biases = tf.Variable(tf.constant(0.1, shape=[4]))
 68 | 
 69 | def network(data):
 70 | 	conv = tf.nn.conv2d(data,
 71 |                       conv1_weights,
 72 |                       strides=[1, 1, 1, 1],
 73 |                       padding='VALID')
 74 | 
 75 | 	# Bias and rectified linear non-linearity.
 76 | 	relu = tf.nn.relu(tf.nn.bias_add(conv, conv1_biases))
 77 | 
 78 | 	conv = tf.nn.conv2d(relu,
 79 |                       conv2_weights,
 80 |                       strides=[1, 1, 1, 1],
 81 |                       padding='VALID')
 82 |   	relu = tf.nn.relu(tf.nn.bias_add(conv, conv2_biases))
 83 | 
 84 |   	conv = tf.nn.conv2d(relu,
 85 |                       conv3_weights,
 86 |                       strides=[1, 1, 1, 1],
 87 |                       padding='VALID')
 88 |   	relu = tf.nn.relu(tf.nn.bias_add(conv, conv3_biases))
 89 | 
 90 |   	# fully connected layers.
 91 |   	relu_flat = tf.reshape(relu, [-1,256])
 92 |   
 93 |   	# Fully connected layer. Note that the '+' operation automatically
 94 |   	# broadcasts the biases.
 95 |   	hidden = tf.nn.relu(tf.matmul(relu_flat, fc1_weights) + fc1_biases)
 96 |   
 97 |   	return tf.matmul(hidden, fc2_weights) + fc2_biases
 98 | 
 99 | sess = tf.InteractiveSession()
100 | sess.as_default()
101 | 
102 | # L = .5[r + discount * max a' Q(s', a') - Q(s, a)]^2
103 | #	     |------target-------|  |prediction|
104 | 
105 | # Do a feedforward pass for the current state s to get predicted Q-values for all actions.
106 | action_array_1 = network(state_input_1)
107 | # Do a feedforward pass for the next state s' and calculate maximum overall network outputs max a' Q(s', a').
108 | # Set Q-value target for action to r + discount * max a' Q(s', a') (use the max calculated in step 2). 
109 | # For all other actions, set the Q-value target to the same as originally returned from step 1, making the error 0 for those outputs.
110 | 
111 | # tt = rr + discount * max(a') Q(ss',aa') or rr if terminal state
112 | tt = reward_input + terminal_input * (GAMMA * max_val_input) 
113 | tt = tf.reshape(tt,(BATCH,1))
114 | target_prep = tf.tile(tt,[1,4])
115 | target = tf.select(action_input, target_prep, action_array_1)
116 | 
117 | # loss is .5(tt - Q(ss,aa))^2
118 | Qerror = tf.sub(target, action_array_1)
119 | loss = .5*tf.reduce_sum(tf.mul(Qerror, Qerror))
120 | 
121 | # Update the weights using backpropagation.
122 | optimizer = tf.train.GradientDescentOptimizer(1e-3).minimize(loss)
123 | 
124 | # saving and loading networks
125 | saver = tf.train.Saver()
126 | tf.initialize_all_variables().run()
127 | 
128 | checkpoint = tf.train.get_checkpoint_state("saved_networks")
129 | if checkpoint and checkpoint.model_checkpoint_path:
130 | 	saver.restore(sess, checkpoint.model_checkpoint_path)
131 | 	print("Successfully loaded:", checkpoint.model_checkpoint_path)
132 | else:
133 | 	print("Could not find old network weights")
134 | 
135 | def see_action(action,i,j):
136 | 	
137 | 	if action == actions[0]:
138 | 		reward, s2, t = World.see_move(0, -1,i,j)
139 | 	elif action == actions[1]:
140 | 		reward, s2, t= World.see_move(1, 0,i,j)
141 | 	elif action == actions[2]:
142 | 		reward, s2, t = World.see_move(0, 1,i,j)
143 | 	elif action == actions[3]:
144 | 		reward, s2, t = World.see_move(-1, 0,i,j)
145 | 	else:
146 | 		return
147 | 
148 | 	return reward, s2, t
149 | 
150 | def do_action(action):
151 | 	
152 | 	if action == actions[0]:
153 | 		World.do_move(0, -1)
154 | 	elif action == actions[1]:
155 | 		World.do_move(1, 0)
156 | 	elif action == actions[2]:
157 | 		World.do_move(0, 1)
158 | 	elif action == actions[3]:
159 | 		World.do_move(-1, 0)
160 | 	else:
161 | 		return
162 | 
163 | #update the visual network arrow display in GUI
164 | def network_triangles():
165 | 	D = deque()
166 | 	for i in range(World.x):
167 | 		for j in range(World.y):
168 | 			state_peek_1 = World.get_state((i,j))
169 | 			state_peek_1 = np.reshape(state_peek_1,(1, 9, 9, 1)).astype(np.float32)
170 | 			feed_dict = {state_input_1: state_peek_1}
171 | 			values_1 = sess.run(action_array_1, feed_dict=feed_dict)
172 | 			state_peek_1 = np.reshape(state_peek_1,(9, 9, 1)).astype(np.float32)
173 | 
174 | 			random_index = np.random.choice(4,1)
175 | 			try_index = random_index[0]
176 | 			try_act = actions[try_index]
177 | 
178 | 			try_act_prep = np.reshape([False, False, False, False],(4)).astype(np.bool)
179 | 			try_act_prep[try_index] = True
180 | 
181 | 			reward, s2, terminal = see_action(try_act,i,j)
182 | 
183 | 			state_peek_2 = np.reshape(s2,(1, 9, 9, 1)).astype(np.float32)
184 | 			feed_dict = {state_input_1: state_peek_2}
185 | 			values_2 = sess.run(action_array_1, feed_dict=feed_dict)
186 | 
187 | 			max_val_data = np.amax(values_2)
188 | 
189 | 			D.append((state_peek_1, try_act_prep, reward, max_val_data, terminal))
190 | 
191 | 			if(gui_display):
192 | 				for action in actions:
193 | 					World.set_cell_score(i,j,action,values_1)
194 | 
195 | 	return D
196 | 
197 | def run():
198 | 	#initalize variables
199 |     trials = 1 
200 |     moves = 1
201 |     t = 0
202 |     hit_one = True
203 | 
204 |     #t0_floodfill = time.time()
205 |     floodfill.FloodFillValues()
206 |     #t1_floodfill = time.time()
207 | 
208 |     #print('running floodfill took {}'.format(t1_floodfill-t0_floodfill))
209 | 
210 |     opt_moves = floodfill.get_value(0,4)
211 | 
212 |     sub_trials = 1
213 | 
214 |     # variables used for running tests, note that some of these are not really compatiable with each other. Sort of hacked together for testing purposes
215 |     train = True # used to train the network
216 |     maze_space = -1 # number of saved mazes to iterate through, -1 means no iteration and always use new maze every time
217 |     save_trial = 500 # save network off after every so many trials, -1 to disable save
218 |     number_trial = -1 # number of trials to run, -1 for indefinite
219 |     max_moves = -1 # max number of moves before restarting, -1 for no limit
220 | 
221 | 
222 |     World.set_maze_size(maze_space)
223 | 
224 |     while trials < number_trial or (number_trial == -1):
225 | 
226 |     	# run transitions multiple times to get collection of <s,a,r,s'> data thats equal to BATCH_SIZE
227 | 
228 |     	# update current state
229 |     	state_1 = World.get_state(World.player)
230 | 
231 |     	#print(state_1)
232 | 
233 |     	state_peek = np.reshape(state_1,(1, 9, 9, 1)).astype(np.float32)
234 |     	feed_dict = {state_input_1: state_peek}
235 |     	#t0_network = time.time()
236 |     	net_out_1 = sess.run(action_array_1, feed_dict=feed_dict)
237 |     	#t1_network = time.time()
238 | 
239 |     	#print('running the network took {}'.format(t1_network-t0_network))
240 |     	
241 |     	#World.get_pos_from_state(state_peek)
242 |     	#print(net_out_1[0])
243 |     	
244 |     	max_index = np.argmax(net_out_1[0])
245 | 
246 |     	max_act = actions[max_index]
247 |     		
248 |     	do_action(max_act)
249 | 
250 |     	# Check if the game has restarted
251 |     	if World.has_restarted() or (moves > max_moves and max_moves > 0):
252 | 
253 |     		if(moves==opt_moves or (trials < maze_space or maze_space < 0)):
254 |     			trials+=1
255 |     			hit_one = True
256 | 
257 |     		if(moves < max_moves or max_moves == -1):
258 |     			sub_trials+=1
259 |     		moves = 0
260 | 
261 |     		#DEBUG
262 |     		print('at trial {}'.format(trials))
263 |     		#print('at subtrial {}'.format(sub_trials))
264 |     			
265 |     		World.restart_game(trials)
266 | 
267 |     		#recalculate optimum number of moves
268 |     		#t0_floodfill = time.time()
269 |     		floodfill.FloodFillValues()
270 |     		#t1_floodfill = time.time()
271 |     		#print('running floodfill took {}'.format(t1_floodfill-t0_floodfill))
272 |     		opt_moves = floodfill.get_value(0,4)
273 | 
274 |     		# save progress every so many iterations
275 |         	if save_trial > 0 and trials % save_trial == 0 and hit_one:
276 |         		saver.save(sess, 'saved_networks/' + 'async_maze' + '-dqn', global_step = t)
277 |         		
278 |         		print('completed trial {}'.format(trials))
279 |         		#subtrials is used as a reference in certain testing areas
280 |         		#print('took {} subtrials'.format(sub_trials))
281 | 
282 |         		hit_one = False
283 |         		
284 |         		sub_trials = 1
285 | 
286 |     	# update weights and minimize loss function for BATCH_SIZE amount of data points
287 | 
288 |     	# sample a minibatch to train on
289 |     	if(train):
290 |     		D = network_triangles()
291 |     		minibatch = random.sample(D, BATCH)
292 | 
293 |     		s1_update = [d[0] for d in minibatch]
294 |     		a_update  = [d[1] for d in minibatch]
295 |     		r_update  = [d[2] for d in minibatch]
296 |     		mv_update = [d[3] for d in minibatch]
297 |     		term      = [d[4] for d in minibatch]
298 | 
299 |     		feed_dict = {state_input_1: s1_update, action_input: a_update, reward_input: r_update, max_val_input: mv_update, terminal_input: term}
300 | 
301 |     		_, my_loss, start, _end_, my_tt = sess.run([optimizer, loss, action_array_1, target, tt], feed_dict=feed_dict)
302 | 
303 | 
304 |     	# MODIFY THIS SLEEP IF THE GAME IS GOING TOO FAST.
305 |     	#if gui_display:
306 |     		#time.sleep(1.0)
307 |     	moves += 1
308 |     	t += 1
309 | 
310 |     #log = open(".\optimal_policy.txt", "w")
311 |     #print(get_policy(), file = log)
312 |     #Test for maze completion without training
313 |     if(max_moves > 0):
314 |     	print('completed trial {}'.format(trials))
315 |     	print('took {} subtrials'.format(sub_trials))
316 | 
317 | t = threading.Thread(target=run)
318 | t.daemon = True
319 | t.start()
320 | World.start_game()
321 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Solving Random Mazes using Asynchronous Deep Reinforcement Learning
 2 | ====================================================================
 3 | 
 4 | Requirement for running this program is installation of Tensorflow, which is freely available. If you are a Windows user it is strongly encouraged to install the Windows 10 Bash with Ubuntu Update to easily work with Tensorflow. A helpful link can be found here, http://www.howtogeek.com/249966/how-to-install-and-use-the-linux-bash-shell-on-windows-10/
 5 | 
 6 | The basic idea of this project is to use deep reinforcement learning to solve any number of 5x5 randomly generated mazes, where the start location is always at the bottom left cell and the goal is always at the center of the maze.
 7 | 
 8 | In order to solve any number of randomly generated mazes the agent needs to know as much about the maze as possible such as where all its walls are located, beyond just knowing where it its self is located in the maze.
 9 | 
10 | In order to deal with high dimensional state spaces the agent uses and trains with a convolutional neural network, similar to how Deep Mind trained with a program to play Atari games with just raw pixel data.
11 | 
12 | Each maze state consits of a 9x9 2D image showing walls/pegs with -1, open spaces with 0, and the agent with 1. An example of a possible state is shown below.
13 |   0     1     2     3     4
14 |   -------------------------
15 | 0|0  0  0  0  0  0  0  0  0
16 |  |0 -1  0 -1 -1 -1 -1 -1  0
17 | 1|0 -1  0  0  0  0  0 -1  0
18 |  |0 -1 -1 -1  0 -1  0 -1  0
19 | 2|0  0  0 -1  0 -1  0  0  0
20 |  |0 -1  0 -1  0 -1 -1 -1  0
21 | 3|0 -1  0  0  0  0  1 -1  0
22 |  |0 -1 -1 -1 -1 -1  0 -1  0
23 | 4|0  0  0  0  0  0  0  0  0
24 | 
25 | This Maze is called the "I maze" because it looks like a hollowed out I. It has 4 entrances into its center goal area, the agent can be seen entering from the bottom right entrance. The agent "1" is located at the (3,3) cell which has cell (0,0) at top left. Notice that there are two optimal paths from the start to the goal, each 6 moves long. 
26 | 
27 | deep reinforcement learning example, based on the Q-function.
28 | - Rules: The agent (black box) has to reach the center goal and then it starts over with a new random maze.
29 | - Rewards: Each step gives a negative reward of -0.1. Running into a wall gets -1. Reaching the center goal gets +10.
30 | - Actions: There are only 4 actions. Up, Down, Right, Left.
31 | 
32 | The Learning Policy follows this algorithm.
33 | Initialize action -value function Q with random weights 
34 | For N trials DO:
35 | 	Set agent to start of maze
36 | 	Generate new random maze
37 | 	While agent has not reached center of maze
38 | 		For each possible cell position Pick random action for cell position 
39 | 			Feed forward cell position state in Q and see reward, and next state from
40 | 			following random action
41 | 			Set y_j= r_j 					if next state is terminal
42 |  				 r_j + gamma* maxa'Q(s_(j+1) , a ) 	for non-terminal state
43 | 		End for
44 | 	Perform gradient decent and minimize Loss as defined in Learner.py
45 | 	Feed forward agents state in Q and choice action with highest value from Q
46 | 	End while
47 | End for
48 | 
49 | 
50 | The little triangles represent the values of the Q network for each state and each action. Green is positive and red is negative.
51 | 
52 | # Run
53 | Run the file Learner.py
54 | 
55 | # Demo
56 | https://www.youtube.com/watch?v=1F6oGEItjOg&feature=youtu.be
57 | 


--------------------------------------------------------------------------------
/Solving Random Mazes with Asyncronous Deep Learning.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/Solving Random Mazes with Asyncronous Deep Learning.pdf


--------------------------------------------------------------------------------
/World.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'Aaron Brown'
  2 | from Tkinter import *
  3 | import maze_gen
  4 | import numpy as np
  5 | master = Tk()
  6 | 
  7 | wall_width = 90
  8 | pip_width = 6
  9 | (x, y) = (5, 5)
 10 | actions = ["up", "right", "down", "left"]
 11 | gui_display = True
 12 | 
 13 | board = Canvas(master, width=(x+1)*pip_width+x*wall_width, height=(y+1)*pip_width+y*wall_width)
 14 | player = (0, y-1)
 15 | restart = False
 16 | walk_reward = -0.1
 17 | wall_reward = -1.0
 18 | goal_reward = 10
 19 | me = 0
 20 | cell_scores = {}
 21 | triangle_size = 0.1
 22 | 
 23 | #wall for rows and columns
 24 | rows, columns = maze_gen.generate(5,0)
 25 | goal = (2, 2)
 26 | 
 27 | def create_triangle(i, j, action):
 28 | 	if action == actions[0]:
 29 | 		return board.create_polygon((i+0.5-triangle_size)*wall_width+(i+1)*pip_width, (j+triangle_size)*wall_width+(j+1)*pip_width,
 30 |                                     (i+0.5+triangle_size)*wall_width+(i+1)*pip_width, (j+triangle_size)*wall_width+(j+1)*pip_width,
 31 |                                     (i+0.5)*wall_width+(i+1)*pip_width, j*wall_width+(j+1)*pip_width,
 32 |                                     fill="green", width=1)
 33 | 	elif action == actions[2]:
 34 | 		return board.create_polygon((i+0.5-triangle_size)*wall_width+(i+1)*pip_width, (j+1-triangle_size)*wall_width+(j+1)*pip_width,
 35 |                                     (i+0.5+triangle_size)*wall_width+(i+1)*pip_width, (j+1-triangle_size)*wall_width+(j+1)*pip_width,
 36 |                                     (i+0.5)*wall_width+(i+1)*pip_width, (j+1)*wall_width+(j+1)*pip_width,
 37 |                                     fill="green", width=1)
 38 | 	elif action == actions[3]:
 39 | 		return board.create_polygon((i+triangle_size)*wall_width+(i+1)*pip_width, (j+0.5-triangle_size)*wall_width+(j+1)*pip_width,
 40 |                                     (i+triangle_size)*wall_width+(i+1)*pip_width, (j+0.5+triangle_size)*wall_width+(j+1)*pip_width,
 41 |                                     i*wall_width+(i+1)*pip_width, (j+0.5)*wall_width+(j+1)*pip_width,
 42 |                                     fill="green", width=1)
 43 | 	elif action == actions[1]:
 44 | 		return board.create_polygon((i+1-triangle_size)*wall_width+(i+1)*pip_width, (j+0.5-triangle_size)*wall_width+(j+1)*pip_width,
 45 |                                     (i+1-triangle_size)*wall_width+(i+1)*pip_width, (j+0.5+triangle_size)*wall_width+(j+1)*pip_width,
 46 |                                     (i+1)*wall_width+(i+1)*pip_width, (j+0.5)*wall_width+(j+1)*pip_width,
 47 |                                     fill="green", width=1)
 48 | 
 49 | def render_grid():
 50 | 	global walls, Width, x, y, player
 51 | 	#creat the white base board
 52 | 	board.create_rectangle(0, 0, (x+1)*pip_width+x*wall_width, (y+1)*pip_width+y*wall_width, fill="white", width=1)
 53 | 	for i in range(x+1):
 54 | 		for j in range(y+1):
 55 | 			#create network signal arrows
 56 | 			temp = {}
 57 | 			for action in actions:
 58 | 				temp[action] = create_triangle(i, j, action)
 59 | 			cell_scores[(i,j)] = temp
 60 | 			#create the red pips
 61 | 			board.create_rectangle(i*pip_width+i*wall_width, j*pip_width+j*wall_width, (i+1)*pip_width+i*wall_width, (j+1)*pip_width+j*wall_width, fill="red", width=1)
 62 | 	#create the blue row walls
 63 | 	for n in range(len(rows)):
 64 | 		for i in range(len(rows[n])):
 65 | 			if rows[n][i] is 1:
 66 | 				board.create_rectangle((i+1)*pip_width+i*wall_width, n*pip_width+n*wall_width, (i+1)*pip_width+(i+1)*wall_width, (n+1)*pip_width+n*wall_width, fill="blue", width=1)
 67 | 	#create the blue column walls
 68 | 	for n in range(len(columns)):
 69 | 		for i in range(len(columns[n])):
 70 | 			if columns[n][i] is 1:
 71 | 				board.create_rectangle(i*pip_width+i*wall_width, (n+1)*pip_width+n*wall_width, (i+1)*pip_width+i*wall_width, (n+1)*pip_width+(n+1)*wall_width, fill="blue", width=1)
 72 | 
 73 | 	board.grid(row=0, column=0)
 74 | 
 75 | def set_cell_score(i, j, action, vals):
 76 | 	
 77 | 	triangle = cell_scores[(i,j)][action]
 78 | 	if(i==2 and j==2):
 79 | 		board.itemconfigure(triangle, fill='blue') #set center goal cells markers to blue
 80 | 		return	
 81 | 	if action == 'up':
 82 | 		vact = 0
 83 | 	elif action == 'right':
 84 | 		vact = 1
 85 | 	elif action == 'down':
 86 | 		vact = 2
 87 | 	elif action == 'left':
 88 | 		vact = 3
 89 | 	val = vals[0][vact]
 90 | 	
 91 | 	cell_score_min = np.min(vals)
 92 | 	cell_score_max = np.max(vals)
 93 | 	green_dec = int(min(255, max(0, (val - cell_score_min) * 255.0 / (cell_score_max - cell_score_min))))
 94 | 	green = hex(green_dec)[2:]
 95 | 	red = hex(255-green_dec)[2:]
 96 | 	if len(red) == 1:
 97 | 		red += "0"
 98 | 	if len(green) == 1:
 99 | 		green += "0"
100 | 	color = "#" + red + green + "00"
101 | 	board.itemconfigure(triangle, fill=color)
102 | 
103 | def render_player():
104 | 	global me
105 | 	me = board.create_rectangle((player[0]+1)*pip_width+player[0]*wall_width+wall_width*1/3, (player[1]+1)*pip_width+player[1]*wall_width+wall_width*1/3,
106 | 			(player[0]+1)*pip_width+player[0]*wall_width+wall_width*2/3, (player[1]+1)*pip_width+player[1]*wall_width+wall_width*2/3, fill="black", width=1, tag="me")	
107 | 
108 | if(gui_display):
109 | 	render_grid()
110 | 	render_player()
111 | 
112 | def do_move(dx, dy):
113 | 
114 | 	global player, me, restart
115 | 	if restart == True:
116 | 		restart_game()
117 | 	new_x = player[0] + dx
118 | 	new_y = player[1] + dy
119 | 	if (new_x >= 0) and (new_x < x) and (new_y >= 0) and (new_y < y) and wall_check( player[0], player[1], dx, dy):
120 | 		if(gui_display):
121 | 			board.coords(me, (new_x+1)*pip_width+new_x*wall_width+wall_width*1/3, (new_y+1)*pip_width+new_y*wall_width+wall_width*1/3, 
122 | 													(new_x+1)*pip_width+new_x*wall_width+wall_width*2/3, (new_y+1)*pip_width+new_y*wall_width+wall_width*2/3)
123 | 		player = (new_x, new_y)
124 | 		
125 | 		if new_x == goal[0] and new_y == goal[0]:
126 | 			#print "Arrived at Goal "
127 | 			restart = True
128 | 
129 | def see_move(dx, dy, i, j):
130 | 	
131 | 	score = 0
132 | 	new_x = i + dx
133 | 	new_y = j + dy
134 | 	score += walk_reward
135 | 	terminal = 1
136 | 	if (new_x >= 0) and (new_x < x) and (new_y >= 0) and (new_y < y) and wall_check( i, j, dx, dy):
137 | 		
138 | 		state = get_state((new_x, new_y))
139 | 		
140 | 		if new_x == goal[0] and new_y == goal[0]:
141 | 			score -= walk_reward
142 | 			score += goal_reward
143 | 			terminal = 0
144 | 	else:
145 | 		score -= walk_reward
146 | 		score += wall_reward
147 | 		state = get_state((i,j))
148 | 
149 | 	return score, state, terminal
150 | 
151 | #state is an (2n-1)x(2n-1) array where n is maze dim. walls are -1 empty spaces are 0 and agent is 1
152 | def get_state(position):
153 | 	global x, rows, columns
154 | 	state = []
155 | 	dim = 2*x-1
156 | 
157 | 	#intially fill in all spaces with 0
158 | 	state = [[0.0 for i in range(dim)] for j in range(dim)]
159 | 	#fill in pegs with -1, these are always static but it helps us format our state in a square
160 | 	for j in np.arange(1,dim-1,2):
161 | 		for i in np.arange(1,dim-1,2):
162 | 			state[j][i] = -1
163 | 
164 | 	#fill in position with 1
165 | 	state[position[1]*2][position[0]*2] = 1
166 | 	#fill in rows 
167 | 	for j in np.arange(1,dim-1,2):
168 | 		for i in np.arange(0,dim,2):
169 | 			state[j][i] = -1*rows[j/2+1][i/2]	
170 | 	#fill in columns 
171 | 	for j in np.arange(0,dim,2):
172 | 		for i in np.arange(1,dim-1,2):
173 | 			state[j][i] = -1*columns[j/2][i/2+1]
174 | 	
175 | 	return state
176 | 
177 | def get_pos_from_state(state):
178 | 	state = np.reshape(state,(9,9))
179 | 	#print state
180 | 	x, y = np.unravel_index(np.argmax(state), np.shape(state))
181 | 	#print x/2, y/2
182 | 	return x/2, y/2
183 | 
184 | def wall_check(curr_x, curr_y, dx, dy):
185 | 	#if going right
186 | 	if(dx > 0):
187 | 		if columns[curr_y][curr_x+1] is not 1:
188 | 			return True
189 | 	#if going left
190 | 	elif(dx < 0):
191 | 		if columns[curr_y][curr_x] is not 1:
192 | 			return True
193 | 	#if going up
194 | 	elif(dy < 0):
195 | 		if rows[curr_y][curr_x] is not 1:
196 | 			return True
197 | 	#if going down
198 | 	else:
199 | 		if rows[curr_y+1][curr_x] is not 1:
200 | 			return True
201 | 	return False
202 | 
203 | def call_up(event):
204 | 	try_move(0, -1)
205 | 
206 | def call_right(event):
207 | 	try_move(1, 0)
208 | 
209 | def call_down(event):
210 | 	try_move(0, 1)
211 | 
212 | def call_left(event):
213 | 	try_move(-1, 0)
214 | 
215 | def set_maze_size(size):
216 | 	if(size > 0):
217 | 		maze_gen.set_maze_size(size)
218 | 
219 | def restart_game(trial):
220 | 	#print "lets restart"
221 | 	global player, me, restart, rows, columns
222 | 
223 | 	rows, columns = rows, columns = maze_gen.generate(5,trial)
224 | 	if(gui_display):
225 | 		render_grid()
226 | 		render_player()
227 | 
228 | 	player = (0, y-1)
229 | 	restart = False
230 | 
231 | def has_restarted():
232 | 	return restart
233 | 
234 | def gui_off():
235 | 	global gui_display
236 | 	gui_display = False
237 | 
238 | def start_game():
239 | 	master.mainloop()
240 | 
241 | 
242 | 
243 | 


--------------------------------------------------------------------------------
/World.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/World.pyc


--------------------------------------------------------------------------------
/floodfill.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'Aaron Brown'
 2 | import World
 3 | import numpy as np
 4 | 
 5 | Values = []
 6 | walls = 0
 7 | 
 8 | class Stack:
 9 |      def __init__(self):
10 |          self.items = []
11 | 
12 |      def isEmpty(self):
13 |          return self.items == []
14 | 
15 |      def push(self, item):
16 |          self.items.append(item)
17 | 
18 |      def pop(self):
19 |          return self.items.pop()
20 | 
21 |      def peek(self):
22 |          return self.items[len(self.items)-1]
23 | 
24 |      def size(self):
25 |          return len(self.items)
26 | 
27 | cell_stack = Stack()
28 | 
29 | class MazeCell:
30 | 	def __init__(self,x,y,value):
31 | 		self.x = x
32 | 		self.y = y
33 | 		self.value = value
34 | 
35 | def FloodFillValues():
36 | 	global walls, cell_stack, Values
37 | 
38 | 	walls = World.x
39 | 
40 | 	#set all maze cells to -1 representing void value
41 | 	Values = [[-1 for i in range(walls)] for j in range(walls)] 
42 | 
43 | 	#if maze size is even
44 | 	if walls%2 == 0:
45 | 		base_1 = walls/2-1
46 | 		base_2 = walls/2
47 | 
48 | 		cell_stack.push(MazeCell(base_1,base_1,0))
49 | 		cell_stack.push(MazeCell(base_2,base_1,0))
50 | 		cell_stack.push(MazeCell(base_1,base_2,0))
51 | 		cell_stack.push(MazeCell(base_2,base_2,0))
52 | 
53 | 	else:
54 | 		base = (walls-1)/2
55 | 		cell_stack.push(MazeCell(base,base,0))
56 | 
57 | 	while(cell_stack.size() > 0):
58 | 		current_cell = cell_stack.pop()
59 | 		SetCell(current_cell.x,current_cell.y,current_cell.value)
60 | 
61 | #set value in maze cell by x and y location
62 | def SetCell( x, y, value):
63 | 	global Values, cell_stack
64 | 
65 | 	current_value = Values[x][y]
66 | 	if current_value == -1 or value < current_value:
67 | 		Values[x][y] = value
68 | 		# look up
69 | 		if World.wall_check(x,y,0,-1) and y > 0:
70 | 			cell_stack.push(MazeCell(x,y-1,value+1))
71 | 		# look right
72 | 		if World.wall_check(x,y,1,0) and x < walls-1:
73 | 			cell_stack.push(MazeCell(x+1,y,value+1))
74 | 		# look down
75 | 		if World.wall_check(x,y,0,1) and y < walls-1:
76 | 			cell_stack.push(MazeCell(x,y+1,value+1))
77 | 		# look left
78 | 		if World.wall_check(x,y,-1,0) and x > 0:
79 | 			cell_stack.push(MazeCell(x-1,y,value+1))
80 | 
81 | def get_value(x,y):
82 | 	return Values[x][y]
83 | 
84 | 
85 | 
86 | 
87 | 	


--------------------------------------------------------------------------------
/floodfill.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/floodfill.pyc


--------------------------------------------------------------------------------
/maze_gen.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'Aaron Brown'
  2 | #Class to generate random mazes of some nxn dimensions
  3 | import numpy as np
  4 | import random
  5 | from collections import deque
  6 | 
  7 | rows = []
  8 | columns = []
  9 | cells = []
 10 | walls = 0
 11 | saved_mazes = []
 12 | saved_size = 0
 13 | 
 14 | class Stack:
 15 |      def __init__(self):
 16 |          self.items = []
 17 | 
 18 |      def isEmpty(self):
 19 |          return self.items == []
 20 | 
 21 |      def push(self, item):
 22 |          self.items.append(item)
 23 | 
 24 |      def pop(self):
 25 |          return self.items.pop()
 26 | 
 27 |      def peek(self):
 28 |          return self.items[len(self.items)-1]
 29 | 
 30 |      def size(self):
 31 |          return len(self.items)
 32 | 
 33 | class MazeCell:
 34 | 	def __init__(self,x,y):
 35 | 		self.x = x
 36 |     		self.y = y
 37 |         	self.visited = False
 38 | 
 39 | def set_maze_size(size):
 40 | 	global saved_size
 41 | 	saved_size = size
 42 | 
 43 | #generate a random maze
 44 | def generate(maze_size, trial):
 45 | 	
 46 | 	global walls
 47 | 	global rows
 48 | 	global columns 
 49 | 	global cells 
 50 | 
 51 | 	if(saved_size > 0 and len(saved_mazes) == saved_size):
 52 | 		#print "loading maze ", trial%saved_size
 53 | 		rows, columns = saved_mazes[trial%saved_size]
 54 | 		return rows, columns
 55 | 
 56 | 	walls = maze_size
 57 | 	rows = [[1 for i in range(walls)] for j in range(walls+1)] 
 58 | 	columns = [[1 for i in range(walls+1)] for j in range(walls)] 
 59 | 
 60 | 	# DEBUG blank maze
 61 | 	#rows = [[1,1,1,1,1],[0,0,0,0,0],[0,0,0,0,0],[0,0,0,0,0],[0,0,0,0,0],[1,1,1,1,1]]
 62 | 	#columns = [[1,0,0,0,0,1],[1,0,0,0,0,1],[1,0,0,0,0,1],[1,0,0,0,0,1],[1,0,0,0,0,1]]
 63 | 	#return rows, columns
 64 | 	# DEBUG
 65 | 
 66 | 	# DEBUG sparse maze
 67 | 	#rows = [[1,1,1,1,1],[0,0,1,0,0],[0,0,0,0,0],[0,0,0,0,0],[0,0,1,0,0],[1,1,1,1,1]]
 68 | 	#columns = [[1,0,0,0,0,1],[1,1,0,0,1,1],[1,0,1,1,0,1],[1,1,0,0,1,1],[1,0,0,0,0,1]]
 69 | 	#return rows, columns
 70 | 	# DEBUG
 71 | 
 72 | 	# DEBUG i maze
 73 | 	#rows = [[1,1,1,1,1],[0,0,1,1,0],[0,1,0,0,0],[0,0,0,1,0],[0,1,1,0,0],[1,1,1,1,1]]
 74 | 	#olumns = [[1,0,0,0,0,1],[1,1,0,0,1,1],[1,0,1,1,0,1],[1,1,0,0,1,1],[1,0,0,0,0,1]]
 75 | 	#return rows, columns
 76 | 	# DEBUG
 77 | 
 78 | 
 79 | 	# Create the cells that shows visited or not
 80 | 	for y in range(walls):
 81 | 		for x in range(walls):
 82 | 			cells.append(MazeCell(x,y))
 83 | 
 84 | 	cell_stack = Stack()
 85 | 	unvistedCells = len(cells)
 86 | 	currentCell = 0
 87 | 	cells[currentCell].visited = True
 88 | 	unvistedCells -= 1
 89 | 	
 90 | 	#While there are unvisited cells
 91 | 	while (unvistedCells > 0):
 92 | 		nextCell = chooseUnvisitedNeighbor(currentCell)
 93 | 		if(nextCell != -1):
 94 | 			cell_stack.push(currentCell)
 95 | 			#remove the wall in between currentCell and nextCell
 96 | 			removeWall(currentCell,nextCell)
 97 | 			currentCell = nextCell
 98 | 			cells[currentCell].visited = True	
 99 | 			unvistedCells -= 1
100 | 		elif(cell_stack.size() > 0):
101 | 			currentCell = cell_stack.pop()
102 | 	
103 | 	cells = [] #reset cells for when method is called again
104 | 
105 | 	if(saved_size > 0 and len(saved_mazes) < saved_size):
106 | 		saved_mazes.append((rows,columns))
107 | 	return rows, columns
108 | 
109 | def chooseUnvisitedNeighbor(currentCell):
110 | 	x = cells[currentCell].x
111 | 	y = cells[currentCell].y
112 | 	
113 | 	candidates = []
114 | 
115 | 	# left
116 | 	if(x > 0 and cells[currentCell-1].visited is False):
117 | 		candidates.append(currentCell-1)
118 | 	# right
119 | 	if(x < (walls-1) and cells[currentCell+1].visited is False):
120 | 		candidates.append(currentCell+1)
121 | 	# up
122 | 	if(y > 0 and cells[currentCell-walls].visited is False):
123 | 		candidates.append(currentCell-walls)	
124 | 	# down
125 | 	if(y < (walls-1) and cells[currentCell+walls].visited is False):
126 | 		candidates.append(currentCell+walls)
127 | 
128 | 	if(len(candidates) == 0):
129 | 		#print "no choice"
130 | 		return -1
131 | 
132 | 	#choose a random candidate
133 | 	random_choice = random.sample(candidates,len(candidates))
134 | 	#print random_choice[0]
135 | 	return random_choice[0]
136 | 
137 | def removeWall(currentCell,nextCell):
138 | 
139 | 	global columns
140 | 	global rows
141 | 
142 | 	#remove column to the right of currentCell
143 | 	if(nextCell-currentCell == 1):
144 | 		columns[currentCell/walls][currentCell%walls+1] = 0
145 | 		#print "right"
146 | 	#remove column to the left of currentCell
147 | 	elif(currentCell - nextCell == 1):
148 | 		columns[currentCell/walls][currentCell%walls] = 0
149 | 		#print "left"
150 | 	#remove row above currentCell
151 | 	elif(currentCell - nextCell == walls):
152 | 		rows[currentCell/walls][currentCell%walls] = 0
153 | 		#print "up"
154 | 	#remove row below currentCell
155 | 	elif(nextCell - currentCell == walls):
156 | 		rows[currentCell/walls+1][currentCell%walls] = 0
157 | 		#print "down"
158 | 


--------------------------------------------------------------------------------
/maze_gen.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/maze_gen.pyc


--------------------------------------------------------------------------------
/saved_networks/async_maze-dqn-15000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-15000


--------------------------------------------------------------------------------
/saved_networks/async_maze-dqn-15000.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-15000.meta


--------------------------------------------------------------------------------
/saved_networks/async_maze-dqn-20000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-20000


--------------------------------------------------------------------------------
/saved_networks/async_maze-dqn-20000.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-20000.meta


--------------------------------------------------------------------------------
/saved_networks/async_maze-dqn-25000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-25000


--------------------------------------------------------------------------------
/saved_networks/async_maze-dqn-25000.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-25000.meta


--------------------------------------------------------------------------------
/saved_networks/async_maze-dqn-30000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-30000


--------------------------------------------------------------------------------
/saved_networks/async_maze-dqn-30000.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-30000.meta


--------------------------------------------------------------------------------
/saved_networks/async_maze-dqn-35000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-35000


--------------------------------------------------------------------------------
/saved_networks/async_maze-dqn-35000.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-35000.meta


--------------------------------------------------------------------------------
/saved_networks/async_maze-dqn-400669:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-400669


--------------------------------------------------------------------------------
/saved_networks/async_maze-dqn-400669.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-400669.meta


--------------------------------------------------------------------------------
/saved_networks/async_maze-dqn-405401:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-405401


--------------------------------------------------------------------------------
/saved_networks/async_maze-dqn-405401.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-405401.meta


--------------------------------------------------------------------------------
/saved_networks/async_maze-dqn-410055:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-410055


--------------------------------------------------------------------------------
/saved_networks/async_maze-dqn-410055.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-410055.meta


--------------------------------------------------------------------------------
/saved_networks/async_maze-dqn-414369:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-414369


--------------------------------------------------------------------------------
/saved_networks/async_maze-dqn-414369.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-414369.meta


--------------------------------------------------------------------------------
/saved_networks/async_maze-dqn-418747:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-418747


--------------------------------------------------------------------------------
/saved_networks/async_maze-dqn-418747.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-418747.meta


--------------------------------------------------------------------------------
/saved_networks/async_maze-dqn-63998:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-63998


--------------------------------------------------------------------------------
/saved_networks/async_maze-dqn-63998.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-63998.meta


--------------------------------------------------------------------------------
/saved_networks/async_maze-dqn-64498:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-64498


--------------------------------------------------------------------------------
/saved_networks/async_maze-dqn-64498.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-64498.meta


--------------------------------------------------------------------------------
/saved_networks/async_maze-dqn-64998:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-64998


--------------------------------------------------------------------------------
/saved_networks/async_maze-dqn-64998.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-64998.meta


--------------------------------------------------------------------------------
/saved_networks/async_maze-dqn-65498:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-65498


--------------------------------------------------------------------------------
/saved_networks/async_maze-dqn-65498.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-65498.meta


--------------------------------------------------------------------------------
/saved_networks/async_maze-dqn-65998:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-65998


--------------------------------------------------------------------------------
/saved_networks/async_maze-dqn-65998.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-65998.meta


--------------------------------------------------------------------------------
/saved_networks/async_maze_long_learner:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze_long_learner


--------------------------------------------------------------------------------
/saved_networks/async_maze_long_learner.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze_long_learner.meta


--------------------------------------------------------------------------------
/saved_networks/blank_maze-dqn-5000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/blank_maze-dqn-5000


--------------------------------------------------------------------------------
/saved_networks/blank_maze-dqn-5000.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/blank_maze-dqn-5000.meta


--------------------------------------------------------------------------------
/saved_networks/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "async_maze-dqn-418747"
2 | all_model_checkpoint_paths: "async_maze-dqn-400669"
3 | all_model_checkpoint_paths: "async_maze-dqn-405401"
4 | all_model_checkpoint_paths: "async_maze-dqn-410055"
5 | all_model_checkpoint_paths: "async_maze-dqn-414369"
6 | all_model_checkpoint_paths: "async_maze-dqn-418747"
7 | 


--------------------------------------------------------------------------------
/saved_networks/i_maze-dqn-5000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/i_maze-dqn-5000


--------------------------------------------------------------------------------
/saved_networks/i_maze-dqn-5000.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/i_maze-dqn-5000.meta


--------------------------------------------------------------------------------
/saved_networks/random_maze-dqn-10000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-10000


--------------------------------------------------------------------------------
/saved_networks/random_maze-dqn-10000.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-10000.meta


--------------------------------------------------------------------------------
/saved_networks/random_maze-dqn-105000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-105000


--------------------------------------------------------------------------------
/saved_networks/random_maze-dqn-105000.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-105000.meta


--------------------------------------------------------------------------------
/saved_networks/random_maze-dqn-110000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-110000


--------------------------------------------------------------------------------
/saved_networks/random_maze-dqn-110000.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-110000.meta


--------------------------------------------------------------------------------
/saved_networks/random_maze-dqn-115000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-115000


--------------------------------------------------------------------------------
/saved_networks/random_maze-dqn-115000.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-115000.meta


--------------------------------------------------------------------------------
/saved_networks/random_maze-dqn-120000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-120000


--------------------------------------------------------------------------------
/saved_networks/random_maze-dqn-120000.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-120000.meta


--------------------------------------------------------------------------------
/saved_networks/random_maze-dqn-125000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-125000


--------------------------------------------------------------------------------
/saved_networks/random_maze-dqn-125000.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-125000.meta


--------------------------------------------------------------------------------
/saved_networks/random_maze-dqn-15000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-15000


--------------------------------------------------------------------------------
/saved_networks/random_maze-dqn-15000.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-15000.meta


--------------------------------------------------------------------------------
/saved_networks/random_maze-dqn-20000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-20000


--------------------------------------------------------------------------------
/saved_networks/random_maze-dqn-20000.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-20000.meta


--------------------------------------------------------------------------------
/saved_networks/random_maze-dqn-25000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-25000


--------------------------------------------------------------------------------
/saved_networks/random_maze-dqn-25000.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-25000.meta


--------------------------------------------------------------------------------
/saved_networks/random_maze-dqn-5000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-5000


--------------------------------------------------------------------------------
/saved_networks/random_maze-dqn-5000.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-5000.meta


--------------------------------------------------------------------------------
/saved_networks/sparse_maze-dqn-5000:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/sparse_maze-dqn-5000


--------------------------------------------------------------------------------
/saved_networks/sparse_maze-dqn-5000.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/sparse_maze-dqn-5000.meta


--------------------------------------------------------------------------------