├── Learner.py ├── README.md ├── Solving Random Mazes with Asyncronous Deep Learning.pdf ├── World.py ├── World.pyc ├── floodfill.py ├── floodfill.pyc ├── maze_gen.py ├── maze_gen.pyc └── saved_networks ├── async_maze-dqn-15000 ├── async_maze-dqn-15000.meta ├── async_maze-dqn-20000 ├── async_maze-dqn-20000.meta ├── async_maze-dqn-25000 ├── async_maze-dqn-25000.meta ├── async_maze-dqn-30000 ├── async_maze-dqn-30000.meta ├── async_maze-dqn-35000 ├── async_maze-dqn-35000.meta ├── async_maze-dqn-400669 ├── async_maze-dqn-400669.meta ├── async_maze-dqn-405401 ├── async_maze-dqn-405401.meta ├── async_maze-dqn-410055 ├── async_maze-dqn-410055.meta ├── async_maze-dqn-414369 ├── async_maze-dqn-414369.meta ├── async_maze-dqn-418747 ├── async_maze-dqn-418747.meta ├── async_maze-dqn-63998 ├── async_maze-dqn-63998.meta ├── async_maze-dqn-64498 ├── async_maze-dqn-64498.meta ├── async_maze-dqn-64998 ├── async_maze-dqn-64998.meta ├── async_maze-dqn-65498 ├── async_maze-dqn-65498.meta ├── async_maze-dqn-65998 ├── async_maze-dqn-65998.meta ├── async_maze_long_learner ├── async_maze_long_learner.meta ├── blank_maze-dqn-5000 ├── blank_maze-dqn-5000.meta ├── checkpoint ├── i_maze-dqn-5000 ├── i_maze-dqn-5000.meta ├── random_maze-dqn-10000 ├── random_maze-dqn-10000.meta ├── random_maze-dqn-105000 ├── random_maze-dqn-105000.meta ├── random_maze-dqn-110000 ├── random_maze-dqn-110000.meta ├── random_maze-dqn-115000 ├── random_maze-dqn-115000.meta ├── random_maze-dqn-120000 ├── random_maze-dqn-120000.meta ├── random_maze-dqn-125000 ├── random_maze-dqn-125000.meta ├── random_maze-dqn-15000 ├── random_maze-dqn-15000.meta ├── random_maze-dqn-20000 ├── random_maze-dqn-20000.meta ├── random_maze-dqn-25000 ├── random_maze-dqn-25000.meta ├── random_maze-dqn-5000 ├── random_maze-dqn-5000.meta ├── sparse_maze-dqn-5000 └── sparse_maze-dqn-5000.meta /Learner.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | __author__ = 'Aaron Brown' 3 | import World 4 | import threading 5 | import time 6 | import random 7 | import numpy as np 8 | import tensorflow as tf 9 | import floodfill 10 | from collections import deque 11 | 12 | actions = World.actions 13 | 14 | #Turn the GUI on, or off if training 15 | gui_display = True 16 | 17 | if(not gui_display): 18 | World.gui_off() 19 | 20 | # The variables below hold all the trainable weights for our CNN. For each, the 21 | 22 | GAMMA = 0.8 # decay rate of past observations 23 | BATCH = 25 # size of minibatch 24 | 25 | # 26 | state_input_1 = tf.placeholder( 27 | tf.float32, 28 | [None,9,9,1]) 29 | 30 | action_input = tf.placeholder( 31 | tf.bool, 32 | shape=(BATCH,4)) 33 | 34 | reward_input = tf.placeholder( 35 | tf.float32, 36 | shape=(BATCH)) 37 | 38 | max_val_input = tf.placeholder( 39 | tf.float32, 40 | shape=(BATCH)) 41 | 42 | terminal_input = tf.placeholder( 43 | tf.float32, 44 | shape=(BATCH)) 45 | 46 | conv1_weights = tf.Variable( 47 | tf.truncated_normal([5, 5, 1, 16], # 5x5 filter, depth 16. 48 | stddev=0.1)) 49 | conv1_biases = tf.Variable(tf.zeros([16])) 50 | conv2_weights = tf.Variable( 51 | tf.truncated_normal([3, 3, 16, 32], # 3x3 filter, depth 32 52 | stddev=0.1)) 53 | conv2_biases = tf.Variable(tf.constant(0.1, shape=[32])) 54 | 55 | conv3_weights = tf.Variable( 56 | tf.truncated_normal([2, 2, 32, 64], # 3x3 filter, depth 64 57 | stddev=0.1)) 58 | conv3_biases = tf.Variable(tf.constant(0.1, shape=[64])) 59 | 60 | fc1_weights = tf.Variable( # fully connected, depth 128. 61 | tf.truncated_normal([256, 512], 62 | stddev=0.1)) 63 | fc1_biases = tf.Variable(tf.constant(0.1, shape=[512])) 64 | fc2_weights = tf.Variable( 65 | tf.truncated_normal([512, 4], 66 | stddev=0.1)) 67 | fc2_biases = tf.Variable(tf.constant(0.1, shape=[4])) 68 | 69 | def network(data): 70 | conv = tf.nn.conv2d(data, 71 | conv1_weights, 72 | strides=[1, 1, 1, 1], 73 | padding='VALID') 74 | 75 | # Bias and rectified linear non-linearity. 76 | relu = tf.nn.relu(tf.nn.bias_add(conv, conv1_biases)) 77 | 78 | conv = tf.nn.conv2d(relu, 79 | conv2_weights, 80 | strides=[1, 1, 1, 1], 81 | padding='VALID') 82 | relu = tf.nn.relu(tf.nn.bias_add(conv, conv2_biases)) 83 | 84 | conv = tf.nn.conv2d(relu, 85 | conv3_weights, 86 | strides=[1, 1, 1, 1], 87 | padding='VALID') 88 | relu = tf.nn.relu(tf.nn.bias_add(conv, conv3_biases)) 89 | 90 | # fully connected layers. 91 | relu_flat = tf.reshape(relu, [-1,256]) 92 | 93 | # Fully connected layer. Note that the '+' operation automatically 94 | # broadcasts the biases. 95 | hidden = tf.nn.relu(tf.matmul(relu_flat, fc1_weights) + fc1_biases) 96 | 97 | return tf.matmul(hidden, fc2_weights) + fc2_biases 98 | 99 | sess = tf.InteractiveSession() 100 | sess.as_default() 101 | 102 | # L = .5[r + discount * max a' Q(s', a') - Q(s, a)]^2 103 | # |------target-------| |prediction| 104 | 105 | # Do a feedforward pass for the current state s to get predicted Q-values for all actions. 106 | action_array_1 = network(state_input_1) 107 | # Do a feedforward pass for the next state s' and calculate maximum overall network outputs max a' Q(s', a'). 108 | # Set Q-value target for action to r + discount * max a' Q(s', a') (use the max calculated in step 2). 109 | # For all other actions, set the Q-value target to the same as originally returned from step 1, making the error 0 for those outputs. 110 | 111 | # tt = rr + discount * max(a') Q(ss',aa') or rr if terminal state 112 | tt = reward_input + terminal_input * (GAMMA * max_val_input) 113 | tt = tf.reshape(tt,(BATCH,1)) 114 | target_prep = tf.tile(tt,[1,4]) 115 | target = tf.select(action_input, target_prep, action_array_1) 116 | 117 | # loss is .5(tt - Q(ss,aa))^2 118 | Qerror = tf.sub(target, action_array_1) 119 | loss = .5*tf.reduce_sum(tf.mul(Qerror, Qerror)) 120 | 121 | # Update the weights using backpropagation. 122 | optimizer = tf.train.GradientDescentOptimizer(1e-3).minimize(loss) 123 | 124 | # saving and loading networks 125 | saver = tf.train.Saver() 126 | tf.initialize_all_variables().run() 127 | 128 | checkpoint = tf.train.get_checkpoint_state("saved_networks") 129 | if checkpoint and checkpoint.model_checkpoint_path: 130 | saver.restore(sess, checkpoint.model_checkpoint_path) 131 | print("Successfully loaded:", checkpoint.model_checkpoint_path) 132 | else: 133 | print("Could not find old network weights") 134 | 135 | def see_action(action,i,j): 136 | 137 | if action == actions[0]: 138 | reward, s2, t = World.see_move(0, -1,i,j) 139 | elif action == actions[1]: 140 | reward, s2, t= World.see_move(1, 0,i,j) 141 | elif action == actions[2]: 142 | reward, s2, t = World.see_move(0, 1,i,j) 143 | elif action == actions[3]: 144 | reward, s2, t = World.see_move(-1, 0,i,j) 145 | else: 146 | return 147 | 148 | return reward, s2, t 149 | 150 | def do_action(action): 151 | 152 | if action == actions[0]: 153 | World.do_move(0, -1) 154 | elif action == actions[1]: 155 | World.do_move(1, 0) 156 | elif action == actions[2]: 157 | World.do_move(0, 1) 158 | elif action == actions[3]: 159 | World.do_move(-1, 0) 160 | else: 161 | return 162 | 163 | #update the visual network arrow display in GUI 164 | def network_triangles(): 165 | D = deque() 166 | for i in range(World.x): 167 | for j in range(World.y): 168 | state_peek_1 = World.get_state((i,j)) 169 | state_peek_1 = np.reshape(state_peek_1,(1, 9, 9, 1)).astype(np.float32) 170 | feed_dict = {state_input_1: state_peek_1} 171 | values_1 = sess.run(action_array_1, feed_dict=feed_dict) 172 | state_peek_1 = np.reshape(state_peek_1,(9, 9, 1)).astype(np.float32) 173 | 174 | random_index = np.random.choice(4,1) 175 | try_index = random_index[0] 176 | try_act = actions[try_index] 177 | 178 | try_act_prep = np.reshape([False, False, False, False],(4)).astype(np.bool) 179 | try_act_prep[try_index] = True 180 | 181 | reward, s2, terminal = see_action(try_act,i,j) 182 | 183 | state_peek_2 = np.reshape(s2,(1, 9, 9, 1)).astype(np.float32) 184 | feed_dict = {state_input_1: state_peek_2} 185 | values_2 = sess.run(action_array_1, feed_dict=feed_dict) 186 | 187 | max_val_data = np.amax(values_2) 188 | 189 | D.append((state_peek_1, try_act_prep, reward, max_val_data, terminal)) 190 | 191 | if(gui_display): 192 | for action in actions: 193 | World.set_cell_score(i,j,action,values_1) 194 | 195 | return D 196 | 197 | def run(): 198 | #initalize variables 199 | trials = 1 200 | moves = 1 201 | t = 0 202 | hit_one = True 203 | 204 | #t0_floodfill = time.time() 205 | floodfill.FloodFillValues() 206 | #t1_floodfill = time.time() 207 | 208 | #print('running floodfill took {}'.format(t1_floodfill-t0_floodfill)) 209 | 210 | opt_moves = floodfill.get_value(0,4) 211 | 212 | sub_trials = 1 213 | 214 | # variables used for running tests, note that some of these are not really compatiable with each other. Sort of hacked together for testing purposes 215 | train = True # used to train the network 216 | maze_space = -1 # number of saved mazes to iterate through, -1 means no iteration and always use new maze every time 217 | save_trial = 500 # save network off after every so many trials, -1 to disable save 218 | number_trial = -1 # number of trials to run, -1 for indefinite 219 | max_moves = -1 # max number of moves before restarting, -1 for no limit 220 | 221 | 222 | World.set_maze_size(maze_space) 223 | 224 | while trials < number_trial or (number_trial == -1): 225 | 226 | # run transitions multiple times to get collection of data thats equal to BATCH_SIZE 227 | 228 | # update current state 229 | state_1 = World.get_state(World.player) 230 | 231 | #print(state_1) 232 | 233 | state_peek = np.reshape(state_1,(1, 9, 9, 1)).astype(np.float32) 234 | feed_dict = {state_input_1: state_peek} 235 | #t0_network = time.time() 236 | net_out_1 = sess.run(action_array_1, feed_dict=feed_dict) 237 | #t1_network = time.time() 238 | 239 | #print('running the network took {}'.format(t1_network-t0_network)) 240 | 241 | #World.get_pos_from_state(state_peek) 242 | #print(net_out_1[0]) 243 | 244 | max_index = np.argmax(net_out_1[0]) 245 | 246 | max_act = actions[max_index] 247 | 248 | do_action(max_act) 249 | 250 | # Check if the game has restarted 251 | if World.has_restarted() or (moves > max_moves and max_moves > 0): 252 | 253 | if(moves==opt_moves or (trials < maze_space or maze_space < 0)): 254 | trials+=1 255 | hit_one = True 256 | 257 | if(moves < max_moves or max_moves == -1): 258 | sub_trials+=1 259 | moves = 0 260 | 261 | #DEBUG 262 | print('at trial {}'.format(trials)) 263 | #print('at subtrial {}'.format(sub_trials)) 264 | 265 | World.restart_game(trials) 266 | 267 | #recalculate optimum number of moves 268 | #t0_floodfill = time.time() 269 | floodfill.FloodFillValues() 270 | #t1_floodfill = time.time() 271 | #print('running floodfill took {}'.format(t1_floodfill-t0_floodfill)) 272 | opt_moves = floodfill.get_value(0,4) 273 | 274 | # save progress every so many iterations 275 | if save_trial > 0 and trials % save_trial == 0 and hit_one: 276 | saver.save(sess, 'saved_networks/' + 'async_maze' + '-dqn', global_step = t) 277 | 278 | print('completed trial {}'.format(trials)) 279 | #subtrials is used as a reference in certain testing areas 280 | #print('took {} subtrials'.format(sub_trials)) 281 | 282 | hit_one = False 283 | 284 | sub_trials = 1 285 | 286 | # update weights and minimize loss function for BATCH_SIZE amount of data points 287 | 288 | # sample a minibatch to train on 289 | if(train): 290 | D = network_triangles() 291 | minibatch = random.sample(D, BATCH) 292 | 293 | s1_update = [d[0] for d in minibatch] 294 | a_update = [d[1] for d in minibatch] 295 | r_update = [d[2] for d in minibatch] 296 | mv_update = [d[3] for d in minibatch] 297 | term = [d[4] for d in minibatch] 298 | 299 | feed_dict = {state_input_1: s1_update, action_input: a_update, reward_input: r_update, max_val_input: mv_update, terminal_input: term} 300 | 301 | _, my_loss, start, _end_, my_tt = sess.run([optimizer, loss, action_array_1, target, tt], feed_dict=feed_dict) 302 | 303 | 304 | # MODIFY THIS SLEEP IF THE GAME IS GOING TOO FAST. 305 | #if gui_display: 306 | #time.sleep(1.0) 307 | moves += 1 308 | t += 1 309 | 310 | #log = open(".\optimal_policy.txt", "w") 311 | #print(get_policy(), file = log) 312 | #Test for maze completion without training 313 | if(max_moves > 0): 314 | print('completed trial {}'.format(trials)) 315 | print('took {} subtrials'.format(sub_trials)) 316 | 317 | t = threading.Thread(target=run) 318 | t.daemon = True 319 | t.start() 320 | World.start_game() 321 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Solving Random Mazes using Asynchronous Deep Reinforcement Learning 2 | ==================================================================== 3 | 4 | Requirement for running this program is installation of Tensorflow, which is freely available. If you are a Windows user it is strongly encouraged to install the Windows 10 Bash with Ubuntu Update to easily work with Tensorflow. A helpful link can be found here, http://www.howtogeek.com/249966/how-to-install-and-use-the-linux-bash-shell-on-windows-10/ 5 | 6 | The basic idea of this project is to use deep reinforcement learning to solve any number of 5x5 randomly generated mazes, where the start location is always at the bottom left cell and the goal is always at the center of the maze. 7 | 8 | In order to solve any number of randomly generated mazes the agent needs to know as much about the maze as possible such as where all its walls are located, beyond just knowing where it its self is located in the maze. 9 | 10 | In order to deal with high dimensional state spaces the agent uses and trains with a convolutional neural network, similar to how Deep Mind trained with a program to play Atari games with just raw pixel data. 11 | 12 | Each maze state consits of a 9x9 2D image showing walls/pegs with -1, open spaces with 0, and the agent with 1. An example of a possible state is shown below. 13 | 0 1 2 3 4 14 | ------------------------- 15 | 0|0 0 0 0 0 0 0 0 0 16 | |0 -1 0 -1 -1 -1 -1 -1 0 17 | 1|0 -1 0 0 0 0 0 -1 0 18 | |0 -1 -1 -1 0 -1 0 -1 0 19 | 2|0 0 0 -1 0 -1 0 0 0 20 | |0 -1 0 -1 0 -1 -1 -1 0 21 | 3|0 -1 0 0 0 0 1 -1 0 22 | |0 -1 -1 -1 -1 -1 0 -1 0 23 | 4|0 0 0 0 0 0 0 0 0 24 | 25 | This Maze is called the "I maze" because it looks like a hollowed out I. It has 4 entrances into its center goal area, the agent can be seen entering from the bottom right entrance. The agent "1" is located at the (3,3) cell which has cell (0,0) at top left. Notice that there are two optimal paths from the start to the goal, each 6 moves long. 26 | 27 | deep reinforcement learning example, based on the Q-function. 28 | - Rules: The agent (black box) has to reach the center goal and then it starts over with a new random maze. 29 | - Rewards: Each step gives a negative reward of -0.1. Running into a wall gets -1. Reaching the center goal gets +10. 30 | - Actions: There are only 4 actions. Up, Down, Right, Left. 31 | 32 | The Learning Policy follows this algorithm. 33 | Initialize action -value function Q with random weights 34 | For N trials DO: 35 | Set agent to start of maze 36 | Generate new random maze 37 | While agent has not reached center of maze 38 | For each possible cell position Pick random action for cell position 39 | Feed forward cell position state in Q and see reward, and next state from 40 | following random action 41 | Set y_j= r_j if next state is terminal 42 | r_j + gamma* maxa'Q(s_(j+1) , a ) for non-terminal state 43 | End for 44 | Perform gradient decent and minimize Loss as defined in Learner.py 45 | Feed forward agents state in Q and choice action with highest value from Q 46 | End while 47 | End for 48 | 49 | 50 | The little triangles represent the values of the Q network for each state and each action. Green is positive and red is negative. 51 | 52 | # Run 53 | Run the file Learner.py 54 | 55 | # Demo 56 | https://www.youtube.com/watch?v=1F6oGEItjOg&feature=youtu.be 57 | -------------------------------------------------------------------------------- /Solving Random Mazes with Asyncronous Deep Learning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/Solving Random Mazes with Asyncronous Deep Learning.pdf -------------------------------------------------------------------------------- /World.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Aaron Brown' 2 | from Tkinter import * 3 | import maze_gen 4 | import numpy as np 5 | master = Tk() 6 | 7 | wall_width = 90 8 | pip_width = 6 9 | (x, y) = (5, 5) 10 | actions = ["up", "right", "down", "left"] 11 | gui_display = True 12 | 13 | board = Canvas(master, width=(x+1)*pip_width+x*wall_width, height=(y+1)*pip_width+y*wall_width) 14 | player = (0, y-1) 15 | restart = False 16 | walk_reward = -0.1 17 | wall_reward = -1.0 18 | goal_reward = 10 19 | me = 0 20 | cell_scores = {} 21 | triangle_size = 0.1 22 | 23 | #wall for rows and columns 24 | rows, columns = maze_gen.generate(5,0) 25 | goal = (2, 2) 26 | 27 | def create_triangle(i, j, action): 28 | if action == actions[0]: 29 | return board.create_polygon((i+0.5-triangle_size)*wall_width+(i+1)*pip_width, (j+triangle_size)*wall_width+(j+1)*pip_width, 30 | (i+0.5+triangle_size)*wall_width+(i+1)*pip_width, (j+triangle_size)*wall_width+(j+1)*pip_width, 31 | (i+0.5)*wall_width+(i+1)*pip_width, j*wall_width+(j+1)*pip_width, 32 | fill="green", width=1) 33 | elif action == actions[2]: 34 | return board.create_polygon((i+0.5-triangle_size)*wall_width+(i+1)*pip_width, (j+1-triangle_size)*wall_width+(j+1)*pip_width, 35 | (i+0.5+triangle_size)*wall_width+(i+1)*pip_width, (j+1-triangle_size)*wall_width+(j+1)*pip_width, 36 | (i+0.5)*wall_width+(i+1)*pip_width, (j+1)*wall_width+(j+1)*pip_width, 37 | fill="green", width=1) 38 | elif action == actions[3]: 39 | return board.create_polygon((i+triangle_size)*wall_width+(i+1)*pip_width, (j+0.5-triangle_size)*wall_width+(j+1)*pip_width, 40 | (i+triangle_size)*wall_width+(i+1)*pip_width, (j+0.5+triangle_size)*wall_width+(j+1)*pip_width, 41 | i*wall_width+(i+1)*pip_width, (j+0.5)*wall_width+(j+1)*pip_width, 42 | fill="green", width=1) 43 | elif action == actions[1]: 44 | return board.create_polygon((i+1-triangle_size)*wall_width+(i+1)*pip_width, (j+0.5-triangle_size)*wall_width+(j+1)*pip_width, 45 | (i+1-triangle_size)*wall_width+(i+1)*pip_width, (j+0.5+triangle_size)*wall_width+(j+1)*pip_width, 46 | (i+1)*wall_width+(i+1)*pip_width, (j+0.5)*wall_width+(j+1)*pip_width, 47 | fill="green", width=1) 48 | 49 | def render_grid(): 50 | global walls, Width, x, y, player 51 | #creat the white base board 52 | board.create_rectangle(0, 0, (x+1)*pip_width+x*wall_width, (y+1)*pip_width+y*wall_width, fill="white", width=1) 53 | for i in range(x+1): 54 | for j in range(y+1): 55 | #create network signal arrows 56 | temp = {} 57 | for action in actions: 58 | temp[action] = create_triangle(i, j, action) 59 | cell_scores[(i,j)] = temp 60 | #create the red pips 61 | board.create_rectangle(i*pip_width+i*wall_width, j*pip_width+j*wall_width, (i+1)*pip_width+i*wall_width, (j+1)*pip_width+j*wall_width, fill="red", width=1) 62 | #create the blue row walls 63 | for n in range(len(rows)): 64 | for i in range(len(rows[n])): 65 | if rows[n][i] is 1: 66 | board.create_rectangle((i+1)*pip_width+i*wall_width, n*pip_width+n*wall_width, (i+1)*pip_width+(i+1)*wall_width, (n+1)*pip_width+n*wall_width, fill="blue", width=1) 67 | #create the blue column walls 68 | for n in range(len(columns)): 69 | for i in range(len(columns[n])): 70 | if columns[n][i] is 1: 71 | board.create_rectangle(i*pip_width+i*wall_width, (n+1)*pip_width+n*wall_width, (i+1)*pip_width+i*wall_width, (n+1)*pip_width+(n+1)*wall_width, fill="blue", width=1) 72 | 73 | board.grid(row=0, column=0) 74 | 75 | def set_cell_score(i, j, action, vals): 76 | 77 | triangle = cell_scores[(i,j)][action] 78 | if(i==2 and j==2): 79 | board.itemconfigure(triangle, fill='blue') #set center goal cells markers to blue 80 | return 81 | if action == 'up': 82 | vact = 0 83 | elif action == 'right': 84 | vact = 1 85 | elif action == 'down': 86 | vact = 2 87 | elif action == 'left': 88 | vact = 3 89 | val = vals[0][vact] 90 | 91 | cell_score_min = np.min(vals) 92 | cell_score_max = np.max(vals) 93 | green_dec = int(min(255, max(0, (val - cell_score_min) * 255.0 / (cell_score_max - cell_score_min)))) 94 | green = hex(green_dec)[2:] 95 | red = hex(255-green_dec)[2:] 96 | if len(red) == 1: 97 | red += "0" 98 | if len(green) == 1: 99 | green += "0" 100 | color = "#" + red + green + "00" 101 | board.itemconfigure(triangle, fill=color) 102 | 103 | def render_player(): 104 | global me 105 | me = board.create_rectangle((player[0]+1)*pip_width+player[0]*wall_width+wall_width*1/3, (player[1]+1)*pip_width+player[1]*wall_width+wall_width*1/3, 106 | (player[0]+1)*pip_width+player[0]*wall_width+wall_width*2/3, (player[1]+1)*pip_width+player[1]*wall_width+wall_width*2/3, fill="black", width=1, tag="me") 107 | 108 | if(gui_display): 109 | render_grid() 110 | render_player() 111 | 112 | def do_move(dx, dy): 113 | 114 | global player, me, restart 115 | if restart == True: 116 | restart_game() 117 | new_x = player[0] + dx 118 | new_y = player[1] + dy 119 | if (new_x >= 0) and (new_x < x) and (new_y >= 0) and (new_y < y) and wall_check( player[0], player[1], dx, dy): 120 | if(gui_display): 121 | board.coords(me, (new_x+1)*pip_width+new_x*wall_width+wall_width*1/3, (new_y+1)*pip_width+new_y*wall_width+wall_width*1/3, 122 | (new_x+1)*pip_width+new_x*wall_width+wall_width*2/3, (new_y+1)*pip_width+new_y*wall_width+wall_width*2/3) 123 | player = (new_x, new_y) 124 | 125 | if new_x == goal[0] and new_y == goal[0]: 126 | #print "Arrived at Goal " 127 | restart = True 128 | 129 | def see_move(dx, dy, i, j): 130 | 131 | score = 0 132 | new_x = i + dx 133 | new_y = j + dy 134 | score += walk_reward 135 | terminal = 1 136 | if (new_x >= 0) and (new_x < x) and (new_y >= 0) and (new_y < y) and wall_check( i, j, dx, dy): 137 | 138 | state = get_state((new_x, new_y)) 139 | 140 | if new_x == goal[0] and new_y == goal[0]: 141 | score -= walk_reward 142 | score += goal_reward 143 | terminal = 0 144 | else: 145 | score -= walk_reward 146 | score += wall_reward 147 | state = get_state((i,j)) 148 | 149 | return score, state, terminal 150 | 151 | #state is an (2n-1)x(2n-1) array where n is maze dim. walls are -1 empty spaces are 0 and agent is 1 152 | def get_state(position): 153 | global x, rows, columns 154 | state = [] 155 | dim = 2*x-1 156 | 157 | #intially fill in all spaces with 0 158 | state = [[0.0 for i in range(dim)] for j in range(dim)] 159 | #fill in pegs with -1, these are always static but it helps us format our state in a square 160 | for j in np.arange(1,dim-1,2): 161 | for i in np.arange(1,dim-1,2): 162 | state[j][i] = -1 163 | 164 | #fill in position with 1 165 | state[position[1]*2][position[0]*2] = 1 166 | #fill in rows 167 | for j in np.arange(1,dim-1,2): 168 | for i in np.arange(0,dim,2): 169 | state[j][i] = -1*rows[j/2+1][i/2] 170 | #fill in columns 171 | for j in np.arange(0,dim,2): 172 | for i in np.arange(1,dim-1,2): 173 | state[j][i] = -1*columns[j/2][i/2+1] 174 | 175 | return state 176 | 177 | def get_pos_from_state(state): 178 | state = np.reshape(state,(9,9)) 179 | #print state 180 | x, y = np.unravel_index(np.argmax(state), np.shape(state)) 181 | #print x/2, y/2 182 | return x/2, y/2 183 | 184 | def wall_check(curr_x, curr_y, dx, dy): 185 | #if going right 186 | if(dx > 0): 187 | if columns[curr_y][curr_x+1] is not 1: 188 | return True 189 | #if going left 190 | elif(dx < 0): 191 | if columns[curr_y][curr_x] is not 1: 192 | return True 193 | #if going up 194 | elif(dy < 0): 195 | if rows[curr_y][curr_x] is not 1: 196 | return True 197 | #if going down 198 | else: 199 | if rows[curr_y+1][curr_x] is not 1: 200 | return True 201 | return False 202 | 203 | def call_up(event): 204 | try_move(0, -1) 205 | 206 | def call_right(event): 207 | try_move(1, 0) 208 | 209 | def call_down(event): 210 | try_move(0, 1) 211 | 212 | def call_left(event): 213 | try_move(-1, 0) 214 | 215 | def set_maze_size(size): 216 | if(size > 0): 217 | maze_gen.set_maze_size(size) 218 | 219 | def restart_game(trial): 220 | #print "lets restart" 221 | global player, me, restart, rows, columns 222 | 223 | rows, columns = rows, columns = maze_gen.generate(5,trial) 224 | if(gui_display): 225 | render_grid() 226 | render_player() 227 | 228 | player = (0, y-1) 229 | restart = False 230 | 231 | def has_restarted(): 232 | return restart 233 | 234 | def gui_off(): 235 | global gui_display 236 | gui_display = False 237 | 238 | def start_game(): 239 | master.mainloop() 240 | 241 | 242 | 243 | -------------------------------------------------------------------------------- /World.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/World.pyc -------------------------------------------------------------------------------- /floodfill.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Aaron Brown' 2 | import World 3 | import numpy as np 4 | 5 | Values = [] 6 | walls = 0 7 | 8 | class Stack: 9 | def __init__(self): 10 | self.items = [] 11 | 12 | def isEmpty(self): 13 | return self.items == [] 14 | 15 | def push(self, item): 16 | self.items.append(item) 17 | 18 | def pop(self): 19 | return self.items.pop() 20 | 21 | def peek(self): 22 | return self.items[len(self.items)-1] 23 | 24 | def size(self): 25 | return len(self.items) 26 | 27 | cell_stack = Stack() 28 | 29 | class MazeCell: 30 | def __init__(self,x,y,value): 31 | self.x = x 32 | self.y = y 33 | self.value = value 34 | 35 | def FloodFillValues(): 36 | global walls, cell_stack, Values 37 | 38 | walls = World.x 39 | 40 | #set all maze cells to -1 representing void value 41 | Values = [[-1 for i in range(walls)] for j in range(walls)] 42 | 43 | #if maze size is even 44 | if walls%2 == 0: 45 | base_1 = walls/2-1 46 | base_2 = walls/2 47 | 48 | cell_stack.push(MazeCell(base_1,base_1,0)) 49 | cell_stack.push(MazeCell(base_2,base_1,0)) 50 | cell_stack.push(MazeCell(base_1,base_2,0)) 51 | cell_stack.push(MazeCell(base_2,base_2,0)) 52 | 53 | else: 54 | base = (walls-1)/2 55 | cell_stack.push(MazeCell(base,base,0)) 56 | 57 | while(cell_stack.size() > 0): 58 | current_cell = cell_stack.pop() 59 | SetCell(current_cell.x,current_cell.y,current_cell.value) 60 | 61 | #set value in maze cell by x and y location 62 | def SetCell( x, y, value): 63 | global Values, cell_stack 64 | 65 | current_value = Values[x][y] 66 | if current_value == -1 or value < current_value: 67 | Values[x][y] = value 68 | # look up 69 | if World.wall_check(x,y,0,-1) and y > 0: 70 | cell_stack.push(MazeCell(x,y-1,value+1)) 71 | # look right 72 | if World.wall_check(x,y,1,0) and x < walls-1: 73 | cell_stack.push(MazeCell(x+1,y,value+1)) 74 | # look down 75 | if World.wall_check(x,y,0,1) and y < walls-1: 76 | cell_stack.push(MazeCell(x,y+1,value+1)) 77 | # look left 78 | if World.wall_check(x,y,-1,0) and x > 0: 79 | cell_stack.push(MazeCell(x-1,y,value+1)) 80 | 81 | def get_value(x,y): 82 | return Values[x][y] 83 | 84 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /floodfill.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/floodfill.pyc -------------------------------------------------------------------------------- /maze_gen.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Aaron Brown' 2 | #Class to generate random mazes of some nxn dimensions 3 | import numpy as np 4 | import random 5 | from collections import deque 6 | 7 | rows = [] 8 | columns = [] 9 | cells = [] 10 | walls = 0 11 | saved_mazes = [] 12 | saved_size = 0 13 | 14 | class Stack: 15 | def __init__(self): 16 | self.items = [] 17 | 18 | def isEmpty(self): 19 | return self.items == [] 20 | 21 | def push(self, item): 22 | self.items.append(item) 23 | 24 | def pop(self): 25 | return self.items.pop() 26 | 27 | def peek(self): 28 | return self.items[len(self.items)-1] 29 | 30 | def size(self): 31 | return len(self.items) 32 | 33 | class MazeCell: 34 | def __init__(self,x,y): 35 | self.x = x 36 | self.y = y 37 | self.visited = False 38 | 39 | def set_maze_size(size): 40 | global saved_size 41 | saved_size = size 42 | 43 | #generate a random maze 44 | def generate(maze_size, trial): 45 | 46 | global walls 47 | global rows 48 | global columns 49 | global cells 50 | 51 | if(saved_size > 0 and len(saved_mazes) == saved_size): 52 | #print "loading maze ", trial%saved_size 53 | rows, columns = saved_mazes[trial%saved_size] 54 | return rows, columns 55 | 56 | walls = maze_size 57 | rows = [[1 for i in range(walls)] for j in range(walls+1)] 58 | columns = [[1 for i in range(walls+1)] for j in range(walls)] 59 | 60 | # DEBUG blank maze 61 | #rows = [[1,1,1,1,1],[0,0,0,0,0],[0,0,0,0,0],[0,0,0,0,0],[0,0,0,0,0],[1,1,1,1,1]] 62 | #columns = [[1,0,0,0,0,1],[1,0,0,0,0,1],[1,0,0,0,0,1],[1,0,0,0,0,1],[1,0,0,0,0,1]] 63 | #return rows, columns 64 | # DEBUG 65 | 66 | # DEBUG sparse maze 67 | #rows = [[1,1,1,1,1],[0,0,1,0,0],[0,0,0,0,0],[0,0,0,0,0],[0,0,1,0,0],[1,1,1,1,1]] 68 | #columns = [[1,0,0,0,0,1],[1,1,0,0,1,1],[1,0,1,1,0,1],[1,1,0,0,1,1],[1,0,0,0,0,1]] 69 | #return rows, columns 70 | # DEBUG 71 | 72 | # DEBUG i maze 73 | #rows = [[1,1,1,1,1],[0,0,1,1,0],[0,1,0,0,0],[0,0,0,1,0],[0,1,1,0,0],[1,1,1,1,1]] 74 | #olumns = [[1,0,0,0,0,1],[1,1,0,0,1,1],[1,0,1,1,0,1],[1,1,0,0,1,1],[1,0,0,0,0,1]] 75 | #return rows, columns 76 | # DEBUG 77 | 78 | 79 | # Create the cells that shows visited or not 80 | for y in range(walls): 81 | for x in range(walls): 82 | cells.append(MazeCell(x,y)) 83 | 84 | cell_stack = Stack() 85 | unvistedCells = len(cells) 86 | currentCell = 0 87 | cells[currentCell].visited = True 88 | unvistedCells -= 1 89 | 90 | #While there are unvisited cells 91 | while (unvistedCells > 0): 92 | nextCell = chooseUnvisitedNeighbor(currentCell) 93 | if(nextCell != -1): 94 | cell_stack.push(currentCell) 95 | #remove the wall in between currentCell and nextCell 96 | removeWall(currentCell,nextCell) 97 | currentCell = nextCell 98 | cells[currentCell].visited = True 99 | unvistedCells -= 1 100 | elif(cell_stack.size() > 0): 101 | currentCell = cell_stack.pop() 102 | 103 | cells = [] #reset cells for when method is called again 104 | 105 | if(saved_size > 0 and len(saved_mazes) < saved_size): 106 | saved_mazes.append((rows,columns)) 107 | return rows, columns 108 | 109 | def chooseUnvisitedNeighbor(currentCell): 110 | x = cells[currentCell].x 111 | y = cells[currentCell].y 112 | 113 | candidates = [] 114 | 115 | # left 116 | if(x > 0 and cells[currentCell-1].visited is False): 117 | candidates.append(currentCell-1) 118 | # right 119 | if(x < (walls-1) and cells[currentCell+1].visited is False): 120 | candidates.append(currentCell+1) 121 | # up 122 | if(y > 0 and cells[currentCell-walls].visited is False): 123 | candidates.append(currentCell-walls) 124 | # down 125 | if(y < (walls-1) and cells[currentCell+walls].visited is False): 126 | candidates.append(currentCell+walls) 127 | 128 | if(len(candidates) == 0): 129 | #print "no choice" 130 | return -1 131 | 132 | #choose a random candidate 133 | random_choice = random.sample(candidates,len(candidates)) 134 | #print random_choice[0] 135 | return random_choice[0] 136 | 137 | def removeWall(currentCell,nextCell): 138 | 139 | global columns 140 | global rows 141 | 142 | #remove column to the right of currentCell 143 | if(nextCell-currentCell == 1): 144 | columns[currentCell/walls][currentCell%walls+1] = 0 145 | #print "right" 146 | #remove column to the left of currentCell 147 | elif(currentCell - nextCell == 1): 148 | columns[currentCell/walls][currentCell%walls] = 0 149 | #print "left" 150 | #remove row above currentCell 151 | elif(currentCell - nextCell == walls): 152 | rows[currentCell/walls][currentCell%walls] = 0 153 | #print "up" 154 | #remove row below currentCell 155 | elif(nextCell - currentCell == walls): 156 | rows[currentCell/walls+1][currentCell%walls] = 0 157 | #print "down" 158 | -------------------------------------------------------------------------------- /maze_gen.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/maze_gen.pyc -------------------------------------------------------------------------------- /saved_networks/async_maze-dqn-15000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-15000 -------------------------------------------------------------------------------- /saved_networks/async_maze-dqn-15000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-15000.meta -------------------------------------------------------------------------------- /saved_networks/async_maze-dqn-20000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-20000 -------------------------------------------------------------------------------- /saved_networks/async_maze-dqn-20000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-20000.meta -------------------------------------------------------------------------------- /saved_networks/async_maze-dqn-25000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-25000 -------------------------------------------------------------------------------- /saved_networks/async_maze-dqn-25000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-25000.meta -------------------------------------------------------------------------------- /saved_networks/async_maze-dqn-30000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-30000 -------------------------------------------------------------------------------- /saved_networks/async_maze-dqn-30000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-30000.meta -------------------------------------------------------------------------------- /saved_networks/async_maze-dqn-35000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-35000 -------------------------------------------------------------------------------- /saved_networks/async_maze-dqn-35000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-35000.meta -------------------------------------------------------------------------------- /saved_networks/async_maze-dqn-400669: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-400669 -------------------------------------------------------------------------------- /saved_networks/async_maze-dqn-400669.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-400669.meta -------------------------------------------------------------------------------- /saved_networks/async_maze-dqn-405401: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-405401 -------------------------------------------------------------------------------- /saved_networks/async_maze-dqn-405401.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-405401.meta -------------------------------------------------------------------------------- /saved_networks/async_maze-dqn-410055: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-410055 -------------------------------------------------------------------------------- /saved_networks/async_maze-dqn-410055.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-410055.meta -------------------------------------------------------------------------------- /saved_networks/async_maze-dqn-414369: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-414369 -------------------------------------------------------------------------------- /saved_networks/async_maze-dqn-414369.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-414369.meta -------------------------------------------------------------------------------- /saved_networks/async_maze-dqn-418747: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-418747 -------------------------------------------------------------------------------- /saved_networks/async_maze-dqn-418747.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-418747.meta -------------------------------------------------------------------------------- /saved_networks/async_maze-dqn-63998: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-63998 -------------------------------------------------------------------------------- /saved_networks/async_maze-dqn-63998.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-63998.meta -------------------------------------------------------------------------------- /saved_networks/async_maze-dqn-64498: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-64498 -------------------------------------------------------------------------------- /saved_networks/async_maze-dqn-64498.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-64498.meta -------------------------------------------------------------------------------- /saved_networks/async_maze-dqn-64998: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-64998 -------------------------------------------------------------------------------- /saved_networks/async_maze-dqn-64998.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-64998.meta -------------------------------------------------------------------------------- /saved_networks/async_maze-dqn-65498: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-65498 -------------------------------------------------------------------------------- /saved_networks/async_maze-dqn-65498.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-65498.meta -------------------------------------------------------------------------------- /saved_networks/async_maze-dqn-65998: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-65998 -------------------------------------------------------------------------------- /saved_networks/async_maze-dqn-65998.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze-dqn-65998.meta -------------------------------------------------------------------------------- /saved_networks/async_maze_long_learner: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze_long_learner -------------------------------------------------------------------------------- /saved_networks/async_maze_long_learner.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/async_maze_long_learner.meta -------------------------------------------------------------------------------- /saved_networks/blank_maze-dqn-5000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/blank_maze-dqn-5000 -------------------------------------------------------------------------------- /saved_networks/blank_maze-dqn-5000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/blank_maze-dqn-5000.meta -------------------------------------------------------------------------------- /saved_networks/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "async_maze-dqn-418747" 2 | all_model_checkpoint_paths: "async_maze-dqn-400669" 3 | all_model_checkpoint_paths: "async_maze-dqn-405401" 4 | all_model_checkpoint_paths: "async_maze-dqn-410055" 5 | all_model_checkpoint_paths: "async_maze-dqn-414369" 6 | all_model_checkpoint_paths: "async_maze-dqn-418747" 7 | -------------------------------------------------------------------------------- /saved_networks/i_maze-dqn-5000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/i_maze-dqn-5000 -------------------------------------------------------------------------------- /saved_networks/i_maze-dqn-5000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/i_maze-dqn-5000.meta -------------------------------------------------------------------------------- /saved_networks/random_maze-dqn-10000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-10000 -------------------------------------------------------------------------------- /saved_networks/random_maze-dqn-10000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-10000.meta -------------------------------------------------------------------------------- /saved_networks/random_maze-dqn-105000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-105000 -------------------------------------------------------------------------------- /saved_networks/random_maze-dqn-105000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-105000.meta -------------------------------------------------------------------------------- /saved_networks/random_maze-dqn-110000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-110000 -------------------------------------------------------------------------------- /saved_networks/random_maze-dqn-110000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-110000.meta -------------------------------------------------------------------------------- /saved_networks/random_maze-dqn-115000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-115000 -------------------------------------------------------------------------------- /saved_networks/random_maze-dqn-115000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-115000.meta -------------------------------------------------------------------------------- /saved_networks/random_maze-dqn-120000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-120000 -------------------------------------------------------------------------------- /saved_networks/random_maze-dqn-120000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-120000.meta -------------------------------------------------------------------------------- /saved_networks/random_maze-dqn-125000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-125000 -------------------------------------------------------------------------------- /saved_networks/random_maze-dqn-125000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-125000.meta -------------------------------------------------------------------------------- /saved_networks/random_maze-dqn-15000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-15000 -------------------------------------------------------------------------------- /saved_networks/random_maze-dqn-15000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-15000.meta -------------------------------------------------------------------------------- /saved_networks/random_maze-dqn-20000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-20000 -------------------------------------------------------------------------------- /saved_networks/random_maze-dqn-20000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-20000.meta -------------------------------------------------------------------------------- /saved_networks/random_maze-dqn-25000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-25000 -------------------------------------------------------------------------------- /saved_networks/random_maze-dqn-25000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-25000.meta -------------------------------------------------------------------------------- /saved_networks/random_maze-dqn-5000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-5000 -------------------------------------------------------------------------------- /saved_networks/random_maze-dqn-5000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/random_maze-dqn-5000.meta -------------------------------------------------------------------------------- /saved_networks/sparse_maze-dqn-5000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/sparse_maze-dqn-5000 -------------------------------------------------------------------------------- /saved_networks/sparse_maze-dqn-5000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awbrown90/DeepReinforcementLearning/8506b260e33bda21e004cf292e4849b11f05ebed/saved_networks/sparse_maze-dqn-5000.meta --------------------------------------------------------------------------------