├── .gitignore ├── README.md ├── agent_dqn.py ├── game_main.py ├── img ├── ddqn_summary.png ├── dqn_summary.png └── img.gif ├── saved_networks └── MsPacman-v0 │ ├── ddqn │ ├── checkpoint │ ├── game_model-2000000.data-00000-of-00001 │ ├── game_model-2000000.index │ ├── game_model-2000000.meta │ ├── game_model-2200000.data-00000-of-00001 │ ├── game_model-2200000.index │ ├── game_model-2200000.meta │ ├── game_model-2400000.data-00000-of-00001 │ ├── game_model-2400000.index │ ├── game_model-2400000.meta │ ├── game_model-2600000.data-00000-of-00001 │ ├── game_model-2600000.index │ ├── game_model-2600000.meta │ ├── game_model-2800000.data-00000-of-00001 │ ├── game_model-2800000.index │ └── game_model-2800000.meta │ └── dqn │ ├── checkpoint │ ├── game_model-200000.data-00000-of-00001 │ ├── game_model-200000.index │ ├── game_model-200000.meta │ ├── game_model-400000.data-00000-of-00001 │ ├── game_model-400000.index │ └── game_model-400000.meta └── summary └── MsPacman-v0 ├── ddqn └── events.out.tfevents.1537978897.P-V-12 └── dqn └── events.out.tfevents.1537974967.P-V-12 /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # dqn_game_tensorflow 2 | playing Atari game with Deep Q Learning (DQN & DDQN) in tensorflow 3 | # Requirement main 4 | python3.6 5 | 6 | gym[atari] 7 | 8 | opencv-python 9 | 10 | tensorflow-1.10 11 | # Usage 12 | For DQN train: 13 | 14 | python game_main.py --episode=15000 --env_name=MsPacman-v0 --model_type=dqn --train=True --load_network=False 15 | 16 | For DDQN train: 17 | 18 | --model_type=ddqn 19 | 20 | For test model: 21 | 22 | --train=False --load_network=True 23 | # Result 24 | The saved_network and summary file is saved with 5 hours of training data 25 | 26 | ![game_test](https://github.com/demomagic/dqn_game_tensorflow/blob/master/img/img.gif) 27 | # Summary 28 | tensorboard --logdir=./summary/MsPacman-v0/dqn 29 | tensorboard --logdir=./summary/MsPacman-v0/ddqn 30 | 31 | For DQN summary: 32 | 33 | ![dqn_summary](https://github.com/demomagic/dqn_game_tensorflow/blob/master/img/dqn_summary.png) 34 | 35 | For DDQN summary: 36 | 37 | ![ddqn_summary](https://github.com/demomagic/dqn_game_tensorflow/blob/master/img/ddqn_summary.png) 38 | # Reference 39 | [DQN in Keras + TensorFlow + OpenAI Gym](https://github.com/tokb23/dqn) 40 | -------------------------------------------------------------------------------- /agent_dqn.py: -------------------------------------------------------------------------------- 1 | # ----------------------------- 2 | # File: Deep Q-Learning Algorithm 3 | # Author: Yiting Xie 4 | # Date: 2018.9.10 5 | # E-mail: 369587353@qq.com 6 | # ----------------------------- 7 | 8 | import numpy as np 9 | import os 10 | import cv2 11 | import random 12 | import tensorflow as tf 13 | from collections import deque 14 | 15 | GAME_WIDTH = 84 # resized frame width 16 | GAME_HEIGHT = 84 # resized frame height 17 | STATE_LENGTH = 4 # number of image channel 18 | 19 | INITIAL_EPSILON = 1.0 # starting value of epsilon 20 | FINAL_EPSILON = 0.1 # final value of epsilon 21 | EXPLORATION_STEPS = 1000000 # Number of steps over which the initial value of epsilon is linearly annealed to its final value 22 | 23 | TRAIN_VALUE = 4 # the agent selects 4 actions between successive updates 24 | UPDARE_NETWORK_VALUE = 10000 # the frequency with which the target network is updated 25 | SAVE_VALUE = 200000 # the frequency with which the network is saved 26 | 27 | REPLAY_SIZE = 20000 # number of steps to populate the replay memory before training starts 28 | REPLAY_MEMORY = 50000 # number of previous transitions to remember 29 | 30 | BATCH_SIZE = 32 # size of minibatch 31 | GAMMA = 0.99 # the value of the proportion of study in the past 32 | 33 | BASE_NETWORK_PATH = 'saved_networks/' 34 | BASE_SUMMARY_PATH = 'summary/' 35 | 36 | class Agent(): 37 | def __init__(self, actions_num, env_name, load_network, agent_model): 38 | self.actions_num = actions_num # number of action 39 | self.env_name = env_name # game name 40 | self.agent_model = agent_model # 'dqn' is DQN, 'ddqn' is DDoubleQN 41 | 42 | self.epsilon = INITIAL_EPSILON 43 | self.epsilon_step = (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORATION_STEPS 44 | self.replay_memory = deque() 45 | 46 | self.time_step = 0 47 | 48 | # init Q network 49 | self.input_start, self.Q, self.w_conv1, self.b_conv1, self.w_conv2, self.b_conv2, self.w_conv3, self.b_conv3, self.w_fc1, self.b_fc1, self.w_fc2, self.b_fc2 = self.build_network() 50 | 51 | # init target Q network 52 | self.input_startT, self.QT, self.w_conv1T, self.b_conv1T, self.w_conv2T, self.b_conv2T, self.w_conv3T, self.b_conv3T, self.w_fc1T, self.b_fc1T, self.w_fc2T, self.b_fc2T = self.build_network() 53 | self.update_target_q_network = [self.w_conv1T.assign(self.w_conv1), self.b_conv1T.assign(self.b_conv1), 54 | self.w_conv2T.assign(self.w_conv2), self.b_conv2T.assign(self.b_conv2), 55 | self.w_conv3T.assign(self.w_conv3), self.b_conv3T.assign(self.b_conv3), 56 | self.w_fc1T.assign(self.w_fc1), self.b_fc1T.assign(self.b_fc1), 57 | self.w_fc2T.assign(self.w_fc2), self.b_fc2T.assign(self.b_fc2)] 58 | 59 | # build training network 60 | self.action_input, self.q_input, self.loss, self.grads_update = self.build_training_method() 61 | 62 | self.saver = tf.train.Saver([self.w_conv1, self.b_conv1, self.w_conv2, self.b_conv2, self.w_conv3, self.b_conv3, self.w_fc1, self.b_fc1, self.w_fc2, self.b_fc2]) 63 | self.sess = tf.InteractiveSession() 64 | self.sess.run(tf.global_variables_initializer()) 65 | 66 | # make network path 67 | if not os.path.exists(BASE_NETWORK_PATH + env_name + '/' + agent_model): 68 | os.makedirs(BASE_NETWORK_PATH + env_name + '/' + agent_model) 69 | 70 | # load network 71 | if load_network: 72 | self.load_network() 73 | 74 | # init target network 75 | self.sess.run(self.update_target_q_network) 76 | 77 | # init summary parameters 78 | self.total_reward = 0 79 | self.total_q_max = 0 80 | self.total_loss = 0 81 | self.duration = 0 82 | self.episode = 0 83 | self.summary_placeholders, self.update_ops, self.summary_op = self.setup_summary() 84 | self.summary_writer = tf.summary.FileWriter(BASE_SUMMARY_PATH + env_name + '/' + agent_model, self.sess.graph) 85 | 86 | def build_network(self): 87 | w_conv1 = self.weight_variable([8, 8, 4, 32]) 88 | b_conv1 = self.bias_variable([32]) 89 | w_conv2 = self.weight_variable([4, 4, 32, 64]) 90 | b_conv2 = self.bias_variable([64]) 91 | w_conv3 = self.weight_variable([3, 3, 64, 64]) 92 | b_conv3 = self.bias_variable([64]) 93 | 94 | input_start = tf.placeholder(tf.float32, [None, GAME_WIDTH, GAME_HEIGHT, STATE_LENGTH]) 95 | 96 | h_conv1 = tf.nn.relu(self.conv2d(input_start, w_conv1, 4) + b_conv1) 97 | h_conv2 = tf.nn.relu(self.conv2d(h_conv1, w_conv2, 2) + b_conv2) 98 | h_conv3 = tf.nn.relu(self.conv2d(h_conv2, w_conv3, 1) + b_conv3) 99 | h_conv3_flatten = tf.layers.flatten(h_conv3) 100 | 101 | ''' 102 | h_fc1 = tf.layers.dense(h_conv3_flatten, 512, activation = tf.nn.relu) 103 | Q = tf.layers.dense(h_fc1, self.actions_num) 104 | ''' 105 | w_fc1 = self.weight_variable([11 * 11 * 64, 512]) 106 | b_fc1 = self.bias_variable([512]) 107 | w_fc2 = self.weight_variable([512,self.actions_num]) 108 | b_fc2 = self.bias_variable([self.actions_num]) 109 | 110 | h_fc1 = tf.nn.relu(tf.matmul(h_conv3_flatten, w_fc1) + b_fc1) 111 | 112 | # Q Value layer 113 | Q = tf.matmul(h_fc1, w_fc2) + b_fc2 114 | 115 | return input_start, Q, w_conv1, b_conv1, w_conv2, b_conv2, w_conv3, b_conv3, w_fc1, b_fc1, w_fc2, b_fc2 116 | 117 | def build_training_method(self): 118 | action_input = tf.placeholder(tf.int64, [None]) 119 | q_input = tf.placeholder(tf.float32, [None]) 120 | 121 | action_one_hot = tf.one_hot(action_input, self.actions_num, 1.0, 0.0) 122 | q_value = tf.reduce_sum(tf.multiply(self.Q, action_one_hot), axis = 1) 123 | 124 | error = tf.abs(q_input - q_value) 125 | quadratic_part = tf.clip_by_value(error, 0.0, 1.0) 126 | linear_part = error - quadratic_part 127 | loss = tf.reduce_mean(0.5 * tf.square(quadratic_part) + linear_part) 128 | 129 | optimizer = tf.train.RMSPropOptimizer(0.00025, momentum = 0.95, epsilon = 0.01) 130 | grads_update = optimizer.minimize(loss, var_list = [self.w_conv1, self.b_conv1, self.w_conv2, self.b_conv2, self.w_conv3, self.b_conv3, self.w_fc1, self.b_fc1, self.w_fc2, self.b_fc2]) 131 | 132 | return action_input, q_input, loss, grads_update 133 | 134 | def initial_state(self, observation, last_observation): 135 | new_observation = np.maximum(observation, last_observation) 136 | gray_observation = cv2.resize(cv2.cvtColor(new_observation, cv2.COLOR_BGR2GRAY),(GAME_WIDTH, GAME_HEIGHT),interpolation = cv2.INTER_CUBIC) * 255 137 | state = [np.uint8(gray_observation) for _ in range(STATE_LENGTH)] 138 | return np.stack(state, axis=2) 139 | 140 | def get_action(self, state): 141 | if self.epsilon >= random.random() or self.time_step < REPLAY_SIZE: 142 | action = random.randrange(self.actions_num) 143 | else: 144 | action = np.argmax(self.Q.eval(feed_dict = {self.input_start: [np.float32(state / 255)]})) 145 | 146 | # anneal epsilon linearly over time 147 | if self.epsilon > FINAL_EPSILON and self.time_step >= REPLAY_SIZE: 148 | self.epsilon -= self.epsilon_step 149 | 150 | return action 151 | 152 | def get_action_test(self, state): 153 | if random.random() <= 0.05: 154 | action = random.randrange(self.actions_num) 155 | else: 156 | action = np.argmax(self.Q.eval(feed_dict={self.input_start: [np.float32(state / 255.0)]})) 157 | 158 | self.time_step += 1 159 | 160 | return action 161 | 162 | def run_agent(self, action, reward, game_state, next_observation, state): 163 | next_state = np.append(state[: ,:, 1:], next_observation, axis = 2) 164 | reward = np.clip(reward, -1, 1) 165 | self.replay_memory.append((state, action, reward, next_state, game_state)) 166 | 167 | if len(self.replay_memory) > REPLAY_MEMORY: 168 | self.replay_memory.popleft() 169 | 170 | if self.time_step >= REPLAY_SIZE: 171 | # train network every TRAIN_VALUE iteration 172 | if self.time_step % TRAIN_VALUE == 0: 173 | self.train_network() 174 | 175 | # update network every UPDARE_NETWORK_VALUE iteration 176 | if self.time_step % UPDARE_NETWORK_VALUE == 0: 177 | self.sess.run(self.update_target_q_network) 178 | 179 | # save network every SAVE_VALUE iteration 180 | if self.time_step % SAVE_VALUE == 0: 181 | save_path = self.saver.save(self.sess, BASE_NETWORK_PATH + self.env_name + '/' + self.agent_model + '/' + 'game_model', global_step = self.time_step) 182 | print('Successfully saved: ' + save_path) 183 | 184 | self.time_step += 1 185 | 186 | # Summary 187 | self.total_reward += reward 188 | self.total_q_max += np.max(self.Q.eval(feed_dict={self.input_start: [np.float32(state / 255.0)]})) 189 | self.duration += 1 190 | if game_state: 191 | # Write summary 192 | if self.time_step >= REPLAY_SIZE: 193 | stats = [self.total_reward, self.total_q_max / float(self.duration), 194 | self.duration, self.total_loss / (float(self.duration) / float(TRAIN_VALUE))] 195 | for i in range(len(stats)): 196 | self.sess.run(self.update_ops[i], feed_dict={ 197 | self.summary_placeholders[i]: float(stats[i]) 198 | }) 199 | summary_str = self.sess.run(self.summary_op) 200 | self.summary_writer.add_summary(summary_str, self.episode + 1) 201 | # debug and print info 202 | if self.time_step <= REPLAY_SIZE: 203 | mode = 'random' 204 | elif REPLAY_SIZE <= self.time_step <= REPLAY_SIZE + EXPLORATION_STEPS: 205 | mode = 'explore' 206 | else: 207 | mode = 'exploit' 208 | print('AGENT_MODEL:{0} ---- EPISODE: {1:6d} / TIMESTEP: {2:8d} / DURATION: {3:5d} / EPSILON: {4:.5f} / TOTAL_REWARD: {5:3.0f} / AVG_MAX_Q: {6:2.4f} / AVG_LOSS: {7:.5f} / MODE: {8}'.format( 209 | self.agent_model, 210 | self.episode + 1, self.time_step, 211 | self.duration, self.epsilon, 212 | self.total_reward, self.total_q_max / float(self.duration), 213 | self.total_loss / (float(self.duration) / float(TRAIN_VALUE)), mode)) 214 | self.total_reward = 0 215 | self.total_q_max = 0 216 | self.total_loss = 0 217 | self.duration = 0 218 | self.episode += 1 219 | 220 | return next_state 221 | 222 | # train q network 223 | def train_network(self): 224 | # Step 1: obtain random minibatch from replay memory 225 | minibatch = random.sample(self.replay_memory, BATCH_SIZE) 226 | state_batch = [data[0] for data in minibatch] 227 | action_batch = [data[1] for data in minibatch] 228 | reward_batch = [data[2] for data in minibatch] 229 | next_state_batch = [data[3] for data in minibatch] 230 | terminal_batch = [data[4] for data in minibatch] 231 | # convert true/false to 1/0 232 | terminal_batch = np.array(terminal_batch) + 0 233 | 234 | # Step 2: calculate q value 235 | q_batch = [] 236 | if self.agent_model == 'ddqn': 237 | next_action_batch = np.argmax(self.Q.eval(feed_dict = {self.input_start: np.float32(np.array(next_state_batch) / 255.0)}), axis=1) 238 | target_q_batch = self.QT.eval(feed_dict = {self.input_startT: np.float32(np.array(next_state_batch) / 255.0)}) 239 | for i in range(BATCH_SIZE): 240 | q_batch.append(reward_batch[i] + (1 - terminal_batch[i]) * GAMMA * target_q_batch[i][next_action_batch[i]]) 241 | else: 242 | target_q_batch = self.QT.eval(feed_dict = {self.input_startT: np.float32(np.array(next_state_batch) / 255.0)}) 243 | for i in range(BATCH_SIZE): 244 | q_batch.append(reward_batch[i] + (1 - terminal_batch[i]) * GAMMA * np.max(target_q_batch[i])) 245 | 246 | loss, _ = self.sess.run([self.loss, self.grads_update], feed_dict = { 247 | self.input_start: np.float32(np.array(state_batch) / 255.0), 248 | self.action_input: action_batch, 249 | self.q_input: q_batch 250 | }) 251 | 252 | self.total_loss += loss 253 | 254 | # load network 255 | def load_network(self): 256 | checkpoint = tf.train.get_checkpoint_state(BASE_NETWORK_PATH + self.env_name + '/' + self.agent_model) 257 | if checkpoint and checkpoint.model_checkpoint_path: 258 | self.saver.restore(self.sess, checkpoint.model_checkpoint_path) 259 | print('Successfully loaded: ' + checkpoint.model_checkpoint_path) 260 | else: 261 | print('Training new network...') 262 | 263 | # setup summary 264 | def setup_summary(self): 265 | episode_total_reward = tf.Variable(0.) 266 | tf.summary.scalar(self.env_name + '/Total Reward/Episode', episode_total_reward) 267 | episode_avg_max_q = tf.Variable(0.) 268 | tf.summary.scalar(self.env_name + '/Average Max Q/Episode', episode_avg_max_q) 269 | episode_duration = tf.Variable(0.) 270 | tf.summary.scalar(self.env_name + '/Duration/Episode', episode_duration) 271 | episode_avg_loss = tf.Variable(0.) 272 | tf.summary.scalar(self.env_name + '/Average Loss/Episode', episode_avg_loss) 273 | summary_vars = [episode_total_reward, episode_avg_max_q, episode_duration, episode_avg_loss] 274 | summary_placeholders = [tf.placeholder(tf.float32) for _ in range(len(summary_vars))] 275 | update_ops = [summary_vars[i].assign(summary_placeholders[i]) for i in range(len(summary_vars))] 276 | summary_op = tf.summary.merge_all() 277 | return summary_placeholders, update_ops, summary_op 278 | 279 | # define weight 280 | def weight_variable(self, shape): 281 | initial = tf.truncated_normal(shape, stddev = 0.1) 282 | return tf.Variable(initial) 283 | 284 | # define bias 285 | def bias_variable(self, shape): 286 | initial = tf.constant(0.01, shape = shape) 287 | return tf.Variable(initial) 288 | 289 | # define conv2d 290 | def conv2d(self, inputs, filters, stride): 291 | return tf.nn.conv2d(inputs, filters, strides = [1, stride, stride, 1], padding = 'SAME') 292 | 293 | def process_observation(observation, last_observation): 294 | new_observation = np.maximum(observation, last_observation) 295 | gray_observation = cv2.resize(cv2.cvtColor(new_observation, cv2.COLOR_BGR2GRAY), (GAME_WIDTH, GAME_HEIGHT), interpolation = cv2.INTER_CUBIC) * 255 296 | return np.reshape(np.uint8(gray_observation), (GAME_WIDTH, GAME_HEIGHT, 1)) 297 | -------------------------------------------------------------------------------- /game_main.py: -------------------------------------------------------------------------------- 1 | # ----------------------------- 2 | # File: Main 3 | # Author: Yiting Xie 4 | # Date: 2018.9.10 5 | # E-mail: 369587353@qq.com 6 | # ----------------------------- 7 | import gym 8 | import numpy as np 9 | import argparse 10 | from agent_dqn import Agent,process_observation 11 | ''' 12 | ENV_NAME = 'MsPacman-v0' # game name 13 | EPISODES = 15000 14 | ISTRAIN = True # False to test, true to train 15 | ''' 16 | def main(): 17 | parse = argparse.ArgumentParser(description="Start program") 18 | parse.add_argument("--episode", help="training frequency", type = int, default=15000) 19 | parse.add_argument("--env_name", help="game name", default='MsPacman-v0') 20 | parse.add_argument("--model_type", help="'dqn' is DQN, 'ddqn' is DDoubleQN", default='dqn') 21 | parse.add_argument("--train", help="train or test model, False is test, True is train", default=True) 22 | parse.add_argument("--load_network", help="load model True or False", default=False) 23 | 24 | args = parse.parse_args() 25 | EPISODES = args.episode 26 | ENV_NAME = args.env_name 27 | MODEL_TYPE = args.model_type 28 | ISTRAIN = args.train 29 | LOAD_NETWORK = args.load_network 30 | 31 | env = gym.make(ENV_NAME) 32 | # train model 33 | if ISTRAIN: 34 | # init agent 35 | agent = Agent(env.action_space.n, ENV_NAME, load_network = LOAD_NETWORK, agent_model = MODEL_TYPE) 36 | for _ in range(EPISODES): 37 | game_status = False 38 | # reset the state of the environment 39 | last_observation = env.reset() 40 | observation, _, _, _ = env.step(0) 41 | state = agent.initial_state(observation, last_observation) 42 | 43 | while not game_status: 44 | last_observation = observation 45 | action = agent.get_action(state) 46 | observation, reward, game_status, _ = env.step(action) 47 | #redraw a frame of the environment 48 | env.render() 49 | next_observation = process_observation(observation, last_observation) 50 | state = agent.run_agent(action, reward, game_status, next_observation, state) 51 | #test model 52 | else: 53 | agent = Agent(env.action_space.n, ENV_NAME, load_network = LOAD_NETWORK, agent_model = MODEL_TYPE) 54 | for _ in range(EPISODES): 55 | game_status = False 56 | last_observation = env.reset() 57 | observation, _, _, _ = env.step(0) 58 | state = agent.initial_state(observation, last_observation) 59 | 60 | while not game_status: 61 | last_observation = observation 62 | action = agent.get_action_test(state) 63 | observation, _, game_status, _ = env.step(action) 64 | env.render() 65 | next_observation = process_observation(observation, last_observation) 66 | state = np.append(state[: ,:, 1:], next_observation, axis = 2) 67 | 68 | if __name__ == '__main__': 69 | main() 70 | -------------------------------------------------------------------------------- /img/ddqn_summary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demomagic/dqn_game_tensorflow/aa28bcf405700b006760b653ae104a8f24b55bce/img/ddqn_summary.png -------------------------------------------------------------------------------- /img/dqn_summary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demomagic/dqn_game_tensorflow/aa28bcf405700b006760b653ae104a8f24b55bce/img/dqn_summary.png -------------------------------------------------------------------------------- /img/img.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demomagic/dqn_game_tensorflow/aa28bcf405700b006760b653ae104a8f24b55bce/img/img.gif -------------------------------------------------------------------------------- /saved_networks/MsPacman-v0/ddqn/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "game_model-2800000" 2 | all_model_checkpoint_paths: "game_model-2000000" 3 | all_model_checkpoint_paths: "game_model-2200000" 4 | all_model_checkpoint_paths: "game_model-2400000" 5 | all_model_checkpoint_paths: "game_model-2600000" 6 | all_model_checkpoint_paths: "game_model-2800000" 7 | -------------------------------------------------------------------------------- /saved_networks/MsPacman-v0/ddqn/game_model-2000000.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demomagic/dqn_game_tensorflow/aa28bcf405700b006760b653ae104a8f24b55bce/saved_networks/MsPacman-v0/ddqn/game_model-2000000.data-00000-of-00001 -------------------------------------------------------------------------------- /saved_networks/MsPacman-v0/ddqn/game_model-2000000.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demomagic/dqn_game_tensorflow/aa28bcf405700b006760b653ae104a8f24b55bce/saved_networks/MsPacman-v0/ddqn/game_model-2000000.index -------------------------------------------------------------------------------- /saved_networks/MsPacman-v0/ddqn/game_model-2000000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demomagic/dqn_game_tensorflow/aa28bcf405700b006760b653ae104a8f24b55bce/saved_networks/MsPacman-v0/ddqn/game_model-2000000.meta -------------------------------------------------------------------------------- /saved_networks/MsPacman-v0/ddqn/game_model-2200000.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demomagic/dqn_game_tensorflow/aa28bcf405700b006760b653ae104a8f24b55bce/saved_networks/MsPacman-v0/ddqn/game_model-2200000.data-00000-of-00001 -------------------------------------------------------------------------------- /saved_networks/MsPacman-v0/ddqn/game_model-2200000.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demomagic/dqn_game_tensorflow/aa28bcf405700b006760b653ae104a8f24b55bce/saved_networks/MsPacman-v0/ddqn/game_model-2200000.index -------------------------------------------------------------------------------- /saved_networks/MsPacman-v0/ddqn/game_model-2200000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demomagic/dqn_game_tensorflow/aa28bcf405700b006760b653ae104a8f24b55bce/saved_networks/MsPacman-v0/ddqn/game_model-2200000.meta -------------------------------------------------------------------------------- /saved_networks/MsPacman-v0/ddqn/game_model-2400000.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demomagic/dqn_game_tensorflow/aa28bcf405700b006760b653ae104a8f24b55bce/saved_networks/MsPacman-v0/ddqn/game_model-2400000.data-00000-of-00001 -------------------------------------------------------------------------------- /saved_networks/MsPacman-v0/ddqn/game_model-2400000.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demomagic/dqn_game_tensorflow/aa28bcf405700b006760b653ae104a8f24b55bce/saved_networks/MsPacman-v0/ddqn/game_model-2400000.index -------------------------------------------------------------------------------- /saved_networks/MsPacman-v0/ddqn/game_model-2400000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demomagic/dqn_game_tensorflow/aa28bcf405700b006760b653ae104a8f24b55bce/saved_networks/MsPacman-v0/ddqn/game_model-2400000.meta -------------------------------------------------------------------------------- /saved_networks/MsPacman-v0/ddqn/game_model-2600000.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demomagic/dqn_game_tensorflow/aa28bcf405700b006760b653ae104a8f24b55bce/saved_networks/MsPacman-v0/ddqn/game_model-2600000.data-00000-of-00001 -------------------------------------------------------------------------------- /saved_networks/MsPacman-v0/ddqn/game_model-2600000.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demomagic/dqn_game_tensorflow/aa28bcf405700b006760b653ae104a8f24b55bce/saved_networks/MsPacman-v0/ddqn/game_model-2600000.index -------------------------------------------------------------------------------- /saved_networks/MsPacman-v0/ddqn/game_model-2600000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demomagic/dqn_game_tensorflow/aa28bcf405700b006760b653ae104a8f24b55bce/saved_networks/MsPacman-v0/ddqn/game_model-2600000.meta -------------------------------------------------------------------------------- /saved_networks/MsPacman-v0/ddqn/game_model-2800000.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demomagic/dqn_game_tensorflow/aa28bcf405700b006760b653ae104a8f24b55bce/saved_networks/MsPacman-v0/ddqn/game_model-2800000.data-00000-of-00001 -------------------------------------------------------------------------------- /saved_networks/MsPacman-v0/ddqn/game_model-2800000.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demomagic/dqn_game_tensorflow/aa28bcf405700b006760b653ae104a8f24b55bce/saved_networks/MsPacman-v0/ddqn/game_model-2800000.index -------------------------------------------------------------------------------- /saved_networks/MsPacman-v0/ddqn/game_model-2800000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demomagic/dqn_game_tensorflow/aa28bcf405700b006760b653ae104a8f24b55bce/saved_networks/MsPacman-v0/ddqn/game_model-2800000.meta -------------------------------------------------------------------------------- /saved_networks/MsPacman-v0/dqn/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "game_model-400000" 2 | all_model_checkpoint_paths: "game_model-200000" 3 | all_model_checkpoint_paths: "game_model-400000" 4 | -------------------------------------------------------------------------------- /saved_networks/MsPacman-v0/dqn/game_model-200000.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demomagic/dqn_game_tensorflow/aa28bcf405700b006760b653ae104a8f24b55bce/saved_networks/MsPacman-v0/dqn/game_model-200000.data-00000-of-00001 -------------------------------------------------------------------------------- /saved_networks/MsPacman-v0/dqn/game_model-200000.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demomagic/dqn_game_tensorflow/aa28bcf405700b006760b653ae104a8f24b55bce/saved_networks/MsPacman-v0/dqn/game_model-200000.index -------------------------------------------------------------------------------- /saved_networks/MsPacman-v0/dqn/game_model-200000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demomagic/dqn_game_tensorflow/aa28bcf405700b006760b653ae104a8f24b55bce/saved_networks/MsPacman-v0/dqn/game_model-200000.meta -------------------------------------------------------------------------------- /saved_networks/MsPacman-v0/dqn/game_model-400000.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demomagic/dqn_game_tensorflow/aa28bcf405700b006760b653ae104a8f24b55bce/saved_networks/MsPacman-v0/dqn/game_model-400000.data-00000-of-00001 -------------------------------------------------------------------------------- /saved_networks/MsPacman-v0/dqn/game_model-400000.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demomagic/dqn_game_tensorflow/aa28bcf405700b006760b653ae104a8f24b55bce/saved_networks/MsPacman-v0/dqn/game_model-400000.index -------------------------------------------------------------------------------- /saved_networks/MsPacman-v0/dqn/game_model-400000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demomagic/dqn_game_tensorflow/aa28bcf405700b006760b653ae104a8f24b55bce/saved_networks/MsPacman-v0/dqn/game_model-400000.meta -------------------------------------------------------------------------------- /summary/MsPacman-v0/ddqn/events.out.tfevents.1537978897.P-V-12: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demomagic/dqn_game_tensorflow/aa28bcf405700b006760b653ae104a8f24b55bce/summary/MsPacman-v0/ddqn/events.out.tfevents.1537978897.P-V-12 -------------------------------------------------------------------------------- /summary/MsPacman-v0/dqn/events.out.tfevents.1537974967.P-V-12: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demomagic/dqn_game_tensorflow/aa28bcf405700b006760b653ae104a8f24b55bce/summary/MsPacman-v0/dqn/events.out.tfevents.1537974967.P-V-12 --------------------------------------------------------------------------------