├── .gitignore ├── DDQN.ipynb ├── DQN.ipynb ├── README.md ├── mspacman.jpg └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | models/* 2 | .ipynb_checkpoints/* 3 | .DS_Store 4 | venv/ 5 | -------------------------------------------------------------------------------- /DDQN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stderr", 10 | "output_type": "stream", 11 | "text": [ 12 | "/Users/tim/anaconda2/lib/python2.7/site-packages/h5py/__init__.py:34: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n", 13 | " from ._conv import register_converters as _register_converters\n", 14 | "Using TensorFlow backend.\n" 15 | ] 16 | } 17 | ], 18 | "source": [ 19 | "import random\n", 20 | "import gym\n", 21 | "import numpy as np\n", 22 | "from collections import deque\n", 23 | "from keras.models import Sequential\n", 24 | "from keras.layers import Dense, Activation, Flatten, Conv2D, MaxPooling2D\n", 25 | "from keras.optimizers import RMSprop" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "# Agent" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "metadata": { 39 | "collapsed": true 40 | }, 41 | "outputs": [], 42 | "source": [ 43 | "class DDQN_Agent:\n", 44 | " #\n", 45 | " # Initializes attributes and constructs CNN model and target_model\n", 46 | " #\n", 47 | " def __init__(self, state_size, action_size):\n", 48 | " self.state_size = state_size\n", 49 | " self.action_size = action_size\n", 50 | " self.memory = deque(maxlen=100000)\n", 51 | " \n", 52 | " # Hyperparameters\n", 53 | " self.gamma = 0.99 # Discount rate\n", 54 | " self.epsilon = 1.0 # Exploration rate\n", 55 | " self.epsilon_min = 0.1 # Minimal exploration rate (epsilon-greedy)\n", 56 | " self.epsilon_decay = 0.995 # Decay rate for epsilon\n", 57 | " self.update_rate = 10000 # Number of steps until updating the target network\n", 58 | " \n", 59 | " # Construct DQN models\n", 60 | " self.model = self._build_model()\n", 61 | " self.target_model = self._build_model()\n", 62 | " self.target_model.set_weights(self.model.get_weights())\n", 63 | " self.model.summary()\n", 64 | "\n", 65 | " #\n", 66 | " # Constructs CNN\n", 67 | " #\n", 68 | " def _build_model(self):\n", 69 | " model = Sequential()\n", 70 | " \n", 71 | " # Conv Layers\n", 72 | " model.add(Conv2D(32, (8, 8), strides=4, padding='same', input_shape=self.state_size))\n", 73 | " model.add(Activation('relu'))\n", 74 | " \n", 75 | " model.add(Conv2D(64, (4, 4), strides=2, padding='same'))\n", 76 | " model.add(Activation('relu'))\n", 77 | " \n", 78 | " model.add(Conv2D(64, (3, 3), strides=1, padding='same'))\n", 79 | " model.add(Activation('relu'))\n", 80 | " model.add(Flatten())\n", 81 | "\n", 82 | " # FC Layers\n", 83 | " model.add(Dense(128, activation='relu'))\n", 84 | " model.add(Dense(128, activation='relu'))\n", 85 | " model.add(Dense(64, activation='relu'))\n", 86 | " model.add(Dense(self.action_size, activation='linear'))\n", 87 | " \n", 88 | " model.compile(loss='mse', optimizer=RMSprop(lr=0.00025, rho=0.95, epsilon=None, decay=0.0)\n", 89 | ")\n", 90 | " return model\n", 91 | "\n", 92 | " #\n", 93 | " # Stores experience in replay memory\n", 94 | " #\n", 95 | " def remember(self, state, action, reward, next_state, done):\n", 96 | " self.memory.append((state, action, reward, next_state, done))\n", 97 | "\n", 98 | " #\n", 99 | " # Chooses action based on epsilon-greedy policy\n", 100 | " #\n", 101 | " def act(self, state):\n", 102 | " # Random exploration\n", 103 | " if np.random.rand() <= self.epsilon:\n", 104 | " return random.randrange(self.action_size)\n", 105 | " \n", 106 | " act_values = self.model.predict(state)\n", 107 | " \n", 108 | " return np.argmax(act_values[0]) # Returns action using policy\n", 109 | "\n", 110 | " #\n", 111 | " # Trains the model using randomly selected experiences in the replay memory\n", 112 | " #\n", 113 | " def replay(self, batch_size):\n", 114 | " minibatch = random.sample(self.memory, batch_size)\n", 115 | " \n", 116 | " for state, action, reward, next_state, done in minibatch:\n", 117 | " \n", 118 | " if not done:\n", 119 | " max_action = np.argmax(self.model.predict(next_state)[0])\n", 120 | " target = (reward + self.gamma * self.target_model.predict(next_state)[0][max_action])\n", 121 | " else:\n", 122 | " target = reward\n", 123 | " \n", 124 | " # Construct the target vector as follows:\n", 125 | " # 1. Use the current model to output the Q-value predictions\n", 126 | " target_f = self.model.predict(state)\n", 127 | " \n", 128 | " # 2. Rewrite the chosen action value with the computed target\n", 129 | " target_f[0][action] = target\n", 130 | " \n", 131 | " # 3. Use vectors in the objective computation\n", 132 | " self.model.fit(state, target_f, epochs=1, verbose=0)\n", 133 | " \n", 134 | " if self.epsilon > self.epsilon_min:\n", 135 | " self.epsilon *= self.epsilon_decay\n", 136 | "\n", 137 | " #\n", 138 | " # Sets the target model parameters to the current model parameters\n", 139 | " #\n", 140 | " def update_target_model(self):\n", 141 | " self.target_model.set_weights(self.model.get_weights())\n", 142 | " \n", 143 | " #\n", 144 | " # Loads a saved model\n", 145 | " #\n", 146 | " def load(self, name):\n", 147 | " self.model.load_weights(name)\n", 148 | "\n", 149 | " #\n", 150 | " # Saves parameters of a trained model\n", 151 | " #\n", 152 | " def save(self, name):\n", 153 | " self.model.save_weights(name)" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "# Preprocessing" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 3, 166 | "metadata": { 167 | "collapsed": true 168 | }, 169 | "outputs": [], 170 | "source": [ 171 | "# Helpful preprocessing taken from github.com/ageron/tiny-dqn\n", 172 | "def process_frame(frame):\n", 173 | "\n", 174 | " mspacman_color = np.array([210, 164, 74]).mean()\n", 175 | " img = frame[1:176:2, ::2] # Crop and downsize\n", 176 | " img = img.mean(axis=2) # Convert to greyscale\n", 177 | " img[img==mspacman_color] = 0 # Improve contrast by making pacman white\n", 178 | " img = (img - 128) / 128 - 1 # Normalize from -1 to 1.\n", 179 | " \n", 180 | " return np.expand_dims(img.reshape(88, 80, 1), axis=0)" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 4, 186 | "metadata": { 187 | "collapsed": true 188 | }, 189 | "outputs": [], 190 | "source": [ 191 | "# Averages images from the last few frame\n", 192 | "def blend_images (images, blend):\n", 193 | " avg_image = np.expand_dims(np.zeros((88, 80, 1), np.float64), axis=0)\n", 194 | "\n", 195 | " for image in images:\n", 196 | " avg_image += image\n", 197 | " \n", 198 | " if len(images) < blend:\n", 199 | " return avg_image / len(images)\n", 200 | " else:\n", 201 | " return avg_image / blend" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "metadata": {}, 207 | "source": [ 208 | "# Environment" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 5, 214 | "metadata": {}, 215 | "outputs": [ 216 | { 217 | "name": "stdout", 218 | "output_type": "stream", 219 | "text": [ 220 | "_________________________________________________________________\n", 221 | "Layer (type) Output Shape Param # \n", 222 | "=================================================================\n", 223 | "conv2d_1 (Conv2D) (None, 22, 20, 32) 2080 \n", 224 | "_________________________________________________________________\n", 225 | "activation_1 (Activation) (None, 22, 20, 32) 0 \n", 226 | "_________________________________________________________________\n", 227 | "conv2d_2 (Conv2D) (None, 11, 10, 64) 32832 \n", 228 | "_________________________________________________________________\n", 229 | "activation_2 (Activation) (None, 11, 10, 64) 0 \n", 230 | "_________________________________________________________________\n", 231 | "conv2d_3 (Conv2D) (None, 11, 10, 64) 36928 \n", 232 | "_________________________________________________________________\n", 233 | "activation_3 (Activation) (None, 11, 10, 64) 0 \n", 234 | "_________________________________________________________________\n", 235 | "flatten_1 (Flatten) (None, 7040) 0 \n", 236 | "_________________________________________________________________\n", 237 | "dense_1 (Dense) (None, 128) 901248 \n", 238 | "_________________________________________________________________\n", 239 | "dense_2 (Dense) (None, 128) 16512 \n", 240 | "_________________________________________________________________\n", 241 | "dense_3 (Dense) (None, 64) 8256 \n", 242 | "_________________________________________________________________\n", 243 | "dense_4 (Dense) (None, 9) 585 \n", 244 | "=================================================================\n", 245 | "Total params: 998,441\n", 246 | "Trainable params: 998,441\n", 247 | "Non-trainable params: 0\n", 248 | "_________________________________________________________________\n" 249 | ] 250 | } 251 | ], 252 | "source": [ 253 | "env = gym.make('MsPacman-v0')\n", 254 | "state_size = (88, 80, 1)\n", 255 | "action_size = env.action_space.n\n", 256 | "agent = DDQN_Agent(state_size, action_size)\n", 257 | "#agent.load('models/')\n", 258 | "\n", 259 | "episodes = 5000\n", 260 | "batch_size = 32\n", 261 | "skip_start = 90 # MsPacman-v0 waits for 90 actions before the episode begins\n", 262 | "total_time = 0 # Counter for total number of steps taken\n", 263 | "all_rewards = 0 # Used to compute avg reward over time\n", 264 | "blend = 4 # Number of images to blend\n", 265 | "done = False" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": null, 271 | "metadata": {}, 272 | "outputs": [ 273 | { 274 | "name": "stdout", 275 | "output_type": "stream", 276 | "text": [ 277 | "episode: 1/5000, game score: 240.0, reward: 240.0, avg reward: 240.0, time: 617, total time: 618\n", 278 | "episode: 2/5000, game score: 360.0, reward: 360.0, avg reward: 300.0, time: 556, total time: 1175\n", 279 | "episode: 3/5000, game score: 230.0, reward: 230.0, avg reward: 276.666666667, time: 659, total time: 1835\n", 280 | "episode: 4/5000, game score: 280.0, reward: 280.0, avg reward: 277.5, time: 603, total time: 2439\n", 281 | "episode: 5/5000, game score: 220.0, reward: 220.0, avg reward: 266.0, time: 621, total time: 3061\n", 282 | "episode: 6/5000, game score: 280.0, reward: 280.0, avg reward: 268.333333333, time: 601, total time: 3663\n", 283 | "episode: 7/5000, game score: 230.0, reward: 230.0, avg reward: 262.857142857, time: 626, total time: 4290\n", 284 | "episode: 8/5000, game score: 290.0, reward: 290.0, avg reward: 266.25, time: 789, total time: 5080\n", 285 | "episode: 9/5000, game score: 270.0, reward: 270.0, avg reward: 266.666666667, time: 543, total time: 5624\n", 286 | "episode: 10/5000, game score: 230.0, reward: 230.0, avg reward: 263.0, time: 430, total time: 6055\n", 287 | "episode: 11/5000, game score: 230.0, reward: 230.0, avg reward: 260.0, time: 692, total time: 6748\n", 288 | "episode: 12/5000, game score: 200.0, reward: 200.0, avg reward: 255.0, time: 556, total time: 7305\n", 289 | "episode: 13/5000, game score: 180.0, reward: 180.0, avg reward: 249.230769231, time: 600, total time: 7906\n", 290 | "episode: 14/5000, game score: 240.0, reward: 240.0, avg reward: 248.571428571, time: 493, total time: 8400\n", 291 | "episode: 15/5000, game score: 160.0, reward: 160.0, avg reward: 242.666666667, time: 549, total time: 8950\n", 292 | "episode: 16/5000, game score: 260.0, reward: 260.0, avg reward: 243.75, time: 595, total time: 9546\n", 293 | "episode: 17/5000, game score: 270.0, reward: 270.0, avg reward: 245.294117647, time: 546, total time: 10093\n", 294 | "episode: 18/5000, game score: 300.0, reward: 300.0, avg reward: 248.333333333, time: 564, total time: 10658\n", 295 | "episode: 19/5000, game score: 230.0, reward: 230.0, avg reward: 247.368421053, time: 574, total time: 11233\n", 296 | "episode: 20/5000, game score: 270.0, reward: 270.0, avg reward: 248.5, time: 478, total time: 11712\n", 297 | "episode: 21/5000, game score: 230.0, reward: 230.0, avg reward: 247.619047619, time: 698, total time: 12411\n", 298 | "episode: 22/5000, game score: 240.0, reward: 240.0, avg reward: 247.272727273, time: 602, total time: 13014\n", 299 | "episode: 23/5000, game score: 310.0, reward: 310.0, avg reward: 250.0, time: 598, total time: 13613\n", 300 | "episode: 24/5000, game score: 300.0, reward: 300.0, avg reward: 252.083333333, time: 635, total time: 14249\n", 301 | "episode: 25/5000, game score: 280.0, reward: 280.0, avg reward: 253.2, time: 726, total time: 14976\n", 302 | "episode: 26/5000, game score: 350.0, reward: 350.0, avg reward: 256.923076923, time: 500, total time: 15477\n", 303 | "episode: 27/5000, game score: 260.0, reward: 260.0, avg reward: 257.037037037, time: 441, total time: 15919\n", 304 | "episode: 28/5000, game score: 530.0, reward: 530.0, avg reward: 266.785714286, time: 579, total time: 16499\n", 305 | "episode: 29/5000, game score: 300.0, reward: 300.0, avg reward: 267.931034483, time: 479, total time: 16979\n", 306 | "episode: 30/5000, game score: 260.0, reward: 260.0, avg reward: 267.666666667, time: 542, total time: 17522\n", 307 | "episode: 31/5000, game score: 220.0, reward: 220.0, avg reward: 266.129032258, time: 587, total time: 18110\n", 308 | "episode: 32/5000, game score: 230.0, reward: 230.0, avg reward: 265.0, time: 643, total time: 18754\n", 309 | "episode: 33/5000, game score: 210.0, reward: 210.0, avg reward: 263.333333333, time: 410, total time: 19165\n", 310 | "episode: 34/5000, game score: 280.0, reward: 280.0, avg reward: 263.823529412, time: 831, total time: 19997\n", 311 | "episode: 35/5000, game score: 980.0, reward: 980.0, avg reward: 284.285714286, time: 878, total time: 20876\n", 312 | "episode: 36/5000, game score: 370.0, reward: 370.0, avg reward: 286.666666667, time: 931, total time: 21808\n", 313 | "episode: 37/5000, game score: 330.0, reward: 330.0, avg reward: 287.837837838, time: 636, total time: 22445\n", 314 | "episode: 38/5000, game score: 280.0, reward: 280.0, avg reward: 287.631578947, time: 757, total time: 23203\n", 315 | "episode: 39/5000, game score: 540.0, reward: 540.0, avg reward: 294.102564103, time: 744, total time: 23948\n", 316 | "episode: 40/5000, game score: 210.0, reward: 210.0, avg reward: 292.0, time: 552, total time: 24501\n", 317 | "episode: 41/5000, game score: 320.0, reward: 320.0, avg reward: 292.682926829, time: 854, total time: 25356\n", 318 | "episode: 42/5000, game score: 360.0, reward: 360.0, avg reward: 294.285714286, time: 606, total time: 25963\n", 319 | "episode: 43/5000, game score: 260.0, reward: 260.0, avg reward: 293.488372093, time: 474, total time: 26438\n", 320 | "episode: 44/5000, game score: 270.0, reward: 270.0, avg reward: 292.954545455, time: 555, total time: 26994\n", 321 | "episode: 45/5000, game score: 320.0, reward: 320.0, avg reward: 293.555555556, time: 602, total time: 27597\n", 322 | "episode: 46/5000, game score: 300.0, reward: 300.0, avg reward: 293.695652174, time: 507, total time: 28105\n", 323 | "episode: 47/5000, game score: 240.0, reward: 240.0, avg reward: 292.553191489, time: 442, total time: 28548\n", 324 | "episode: 48/5000, game score: 800.0, reward: 800.0, avg reward: 303.125, time: 1044, total time: 29593\n", 325 | "episode: 49/5000, game score: 290.0, reward: 290.0, avg reward: 302.857142857, time: 587, total time: 30181\n", 326 | "episode: 50/5000, game score: 350.0, reward: 350.0, avg reward: 303.8, time: 785, total time: 30967\n", 327 | "episode: 51/5000, game score: 280.0, reward: 280.0, avg reward: 303.333333333, time: 600, total time: 31568\n", 328 | "episode: 52/5000, game score: 250.0, reward: 250.0, avg reward: 302.307692308, time: 475, total time: 32044\n", 329 | "episode: 53/5000, game score: 560.0, reward: 560.0, avg reward: 307.169811321, time: 766, total time: 32811\n", 330 | "episode: 54/5000, game score: 580.0, reward: 580.0, avg reward: 312.222222222, time: 828, total time: 33640\n", 331 | "episode: 55/5000, game score: 520.0, reward: 520.0, avg reward: 316.0, time: 731, total time: 34372\n", 332 | "episode: 56/5000, game score: 250.0, reward: 250.0, avg reward: 314.821428571, time: 602, total time: 34975\n", 333 | "episode: 57/5000, game score: 270.0, reward: 270.0, avg reward: 314.035087719, time: 728, total time: 35704\n", 334 | "episode: 58/5000, game score: 210.0, reward: 210.0, avg reward: 312.24137931, time: 481, total time: 36186\n", 335 | "episode: 59/5000, game score: 260.0, reward: 260.0, avg reward: 311.355932203, time: 587, total time: 36774\n", 336 | "episode: 60/5000, game score: 220.0, reward: 220.0, avg reward: 309.833333333, time: 509, total time: 37284\n", 337 | "episode: 61/5000, game score: 270.0, reward: 270.0, avg reward: 309.180327869, time: 558, total time: 37843\n", 338 | "episode: 62/5000, game score: 280.0, reward: 280.0, avg reward: 308.709677419, time: 412, total time: 38256\n", 339 | "episode: 63/5000, game score: 540.0, reward: 540.0, avg reward: 312.380952381, time: 857, total time: 39114\n", 340 | "episode: 64/5000, game score: 240.0, reward: 240.0, avg reward: 311.25, time: 590, total time: 39705\n", 341 | "episode: 65/5000, game score: 220.0, reward: 220.0, avg reward: 309.846153846, time: 497, total time: 40203\n", 342 | "episode: 66/5000, game score: 310.0, reward: 310.0, avg reward: 309.848484848, time: 594, total time: 40798\n", 343 | "episode: 67/5000, game score: 370.0, reward: 370.0, avg reward: 310.746268657, time: 884, total time: 41683\n", 344 | "episode: 68/5000, game score: 570.0, reward: 570.0, avg reward: 314.558823529, time: 771, total time: 42455\n", 345 | "episode: 69/5000, game score: 210.0, reward: 210.0, avg reward: 313.043478261, time: 511, total time: 42967\n", 346 | "episode: 70/5000, game score: 920.0, reward: 920.0, avg reward: 321.714285714, time: 848, total time: 43816\n", 347 | "episode: 71/5000, game score: 260.0, reward: 260.0, avg reward: 320.845070423, time: 505, total time: 44322\n", 348 | "episode: 72/5000, game score: 300.0, reward: 300.0, avg reward: 320.555555556, time: 693, total time: 45016\n", 349 | "episode: 73/5000, game score: 270.0, reward: 270.0, avg reward: 319.863013699, time: 476, total time: 45493\n", 350 | "episode: 74/5000, game score: 910.0, reward: 910.0, avg reward: 327.837837838, time: 829, total time: 46323\n", 351 | "episode: 75/5000, game score: 220.0, reward: 220.0, avg reward: 326.4, time: 560, total time: 46884\n", 352 | "episode: 76/5000, game score: 340.0, reward: 340.0, avg reward: 326.578947368, time: 558, total time: 47443\n", 353 | "episode: 77/5000, game score: 310.0, reward: 310.0, avg reward: 326.363636364, time: 624, total time: 48068\n", 354 | "episode: 78/5000, game score: 310.0, reward: 310.0, avg reward: 326.153846154, time: 637, total time: 48706\n" 355 | ] 356 | }, 357 | { 358 | "name": "stdout", 359 | "output_type": "stream", 360 | "text": [ 361 | "episode: 79/5000, game score: 310.0, reward: 310.0, avg reward: 325.949367089, time: 624, total time: 49331\n", 362 | "episode: 80/5000, game score: 300.0, reward: 300.0, avg reward: 325.625, time: 587, total time: 49919\n", 363 | "episode: 81/5000, game score: 320.0, reward: 320.0, avg reward: 325.555555556, time: 540, total time: 50460\n", 364 | "episode: 82/5000, game score: 250.0, reward: 250.0, avg reward: 324.634146341, time: 630, total time: 51091\n", 365 | "episode: 83/5000, game score: 240.0, reward: 240.0, avg reward: 323.614457831, time: 568, total time: 51660\n", 366 | "episode: 84/5000, game score: 230.0, reward: 230.0, avg reward: 322.5, time: 573, total time: 52234\n", 367 | "episode: 85/5000, game score: 1020.0, reward: 1020.0, avg reward: 330.705882353, time: 713, total time: 52948\n", 368 | "episode: 86/5000, game score: 260.0, reward: 260.0, avg reward: 329.88372093, time: 653, total time: 53602\n", 369 | "episode: 87/5000, game score: 450.0, reward: 450.0, avg reward: 331.264367816, time: 922, total time: 54525\n", 370 | "episode: 88/5000, game score: 310.0, reward: 310.0, avg reward: 331.022727273, time: 605, total time: 55131\n", 371 | "episode: 89/5000, game score: 580.0, reward: 580.0, avg reward: 333.820224719, time: 1061, total time: 56193\n", 372 | "episode: 90/5000, game score: 260.0, reward: 260.0, avg reward: 333.0, time: 533, total time: 56727\n", 373 | "episode: 91/5000, game score: 200.0, reward: 200.0, avg reward: 331.538461538, time: 448, total time: 57176\n", 374 | "episode: 92/5000, game score: 350.0, reward: 350.0, avg reward: 331.739130435, time: 1182, total time: 58359\n", 375 | "episode: 93/5000, game score: 410.0, reward: 410.0, avg reward: 332.580645161, time: 978, total time: 59338\n", 376 | "episode: 94/5000, game score: 360.0, reward: 360.0, avg reward: 332.872340426, time: 713, total time: 60052\n", 377 | "episode: 95/5000, game score: 520.0, reward: 520.0, avg reward: 334.842105263, time: 1076, total time: 61129\n", 378 | "episode: 96/5000, game score: 290.0, reward: 290.0, avg reward: 334.375, time: 508, total time: 61638\n", 379 | "episode: 97/5000, game score: 230.0, reward: 230.0, avg reward: 333.298969072, time: 583, total time: 62222\n", 380 | "episode: 98/5000, game score: 240.0, reward: 240.0, avg reward: 332.346938776, time: 559, total time: 62782\n", 381 | "episode: 99/5000, game score: 190.0, reward: 190.0, avg reward: 330.909090909, time: 498, total time: 63281\n", 382 | "episode: 100/5000, game score: 280.0, reward: 280.0, avg reward: 330.4, time: 592, total time: 63874\n", 383 | "episode: 101/5000, game score: 260.0, reward: 260.0, avg reward: 329.702970297, time: 493, total time: 64368\n", 384 | "episode: 102/5000, game score: 220.0, reward: 220.0, avg reward: 328.62745098, time: 574, total time: 64943\n", 385 | "episode: 103/5000, game score: 360.0, reward: 360.0, avg reward: 328.932038835, time: 752, total time: 65696\n", 386 | "episode: 104/5000, game score: 310.0, reward: 310.0, avg reward: 328.75, time: 608, total time: 66305\n", 387 | "episode: 105/5000, game score: 590.0, reward: 590.0, avg reward: 331.238095238, time: 764, total time: 67070\n", 388 | "episode: 106/5000, game score: 350.0, reward: 350.0, avg reward: 331.41509434, time: 769, total time: 67840\n", 389 | "episode: 107/5000, game score: 420.0, reward: 420.0, avg reward: 332.242990654, time: 1028, total time: 68869\n", 390 | "episode: 108/5000, game score: 260.0, reward: 260.0, avg reward: 331.574074074, time: 509, total time: 69379\n", 391 | "episode: 109/5000, game score: 430.0, reward: 430.0, avg reward: 332.47706422, time: 736, total time: 70116\n", 392 | "episode: 110/5000, game score: 170.0, reward: 170.0, avg reward: 331.0, time: 493, total time: 70610\n", 393 | "episode: 111/5000, game score: 220.0, reward: 220.0, avg reward: 330.0, time: 582, total time: 71193\n", 394 | "episode: 112/5000, game score: 230.0, reward: 230.0, avg reward: 329.107142857, time: 613, total time: 71807\n", 395 | "episode: 113/5000, game score: 300.0, reward: 300.0, avg reward: 328.849557522, time: 535, total time: 72343\n", 396 | "episode: 114/5000, game score: 240.0, reward: 240.0, avg reward: 328.070175439, time: 521, total time: 72865\n", 397 | "episode: 115/5000, game score: 650.0, reward: 650.0, avg reward: 330.869565217, time: 653, total time: 73519\n", 398 | "episode: 116/5000, game score: 240.0, reward: 240.0, avg reward: 330.086206897, time: 559, total time: 74079\n", 399 | "episode: 117/5000, game score: 380.0, reward: 380.0, avg reward: 330.512820513, time: 592, total time: 74672\n", 400 | "episode: 118/5000, game score: 390.0, reward: 390.0, avg reward: 331.016949153, time: 1080, total time: 75753\n", 401 | "episode: 119/5000, game score: 280.0, reward: 280.0, avg reward: 330.588235294, time: 637, total time: 76391\n", 402 | "episode: 120/5000, game score: 290.0, reward: 290.0, avg reward: 330.25, time: 642, total time: 77034\n", 403 | "episode: 121/5000, game score: 250.0, reward: 250.0, avg reward: 329.58677686, time: 380, total time: 77415\n", 404 | "episode: 122/5000, game score: 250.0, reward: 250.0, avg reward: 328.93442623, time: 521, total time: 77937\n", 405 | "episode: 123/5000, game score: 310.0, reward: 310.0, avg reward: 328.780487805, time: 483, total time: 78421\n", 406 | "episode: 124/5000, game score: 240.0, reward: 240.0, avg reward: 328.064516129, time: 621, total time: 79043\n", 407 | "episode: 125/5000, game score: 340.0, reward: 340.0, avg reward: 328.16, time: 617, total time: 79661\n", 408 | "episode: 126/5000, game score: 330.0, reward: 330.0, avg reward: 328.174603175, time: 766, total time: 80428\n", 409 | "episode: 127/5000, game score: 610.0, reward: 610.0, avg reward: 330.393700787, time: 732, total time: 81161\n", 410 | "episode: 128/5000, game score: 250.0, reward: 250.0, avg reward: 329.765625, time: 470, total time: 81632\n", 411 | "episode: 129/5000, game score: 290.0, reward: 290.0, avg reward: 329.457364341, time: 599, total time: 82232\n", 412 | "episode: 130/5000, game score: 330.0, reward: 330.0, avg reward: 329.461538462, time: 766, total time: 82999\n", 413 | "episode: 131/5000, game score: 230.0, reward: 230.0, avg reward: 328.702290076, time: 436, total time: 83436\n", 414 | "episode: 132/5000, game score: 1000.0, reward: 1000.0, avg reward: 333.787878788, time: 896, total time: 84333\n", 415 | "episode: 133/5000, game score: 260.0, reward: 260.0, avg reward: 333.233082707, time: 636, total time: 84970\n", 416 | "episode: 134/5000, game score: 470.0, reward: 470.0, avg reward: 334.253731343, time: 886, total time: 85857\n", 417 | "episode: 135/5000, game score: 490.0, reward: 490.0, avg reward: 335.407407407, time: 1003, total time: 86861\n", 418 | "episode: 136/5000, game score: 230.0, reward: 230.0, avg reward: 334.632352941, time: 572, total time: 87434\n", 419 | "episode: 137/5000, game score: 240.0, reward: 240.0, avg reward: 333.941605839, time: 635, total time: 88070\n", 420 | "episode: 138/5000, game score: 320.0, reward: 320.0, avg reward: 333.84057971, time: 658, total time: 88729\n", 421 | "episode: 139/5000, game score: 380.0, reward: 380.0, avg reward: 334.172661871, time: 914, total time: 89644\n", 422 | "episode: 140/5000, game score: 290.0, reward: 290.0, avg reward: 333.857142857, time: 639, total time: 90284\n", 423 | "episode: 141/5000, game score: 250.0, reward: 250.0, avg reward: 333.262411348, time: 531, total time: 90816\n", 424 | "episode: 142/5000, game score: 320.0, reward: 320.0, avg reward: 333.169014085, time: 562, total time: 91379\n", 425 | "episode: 143/5000, game score: 270.0, reward: 270.0, avg reward: 332.727272727, time: 573, total time: 91953\n", 426 | "episode: 144/5000, game score: 240.0, reward: 240.0, avg reward: 332.083333333, time: 435, total time: 92389\n", 427 | "episode: 145/5000, game score: 250.0, reward: 250.0, avg reward: 331.517241379, time: 507, total time: 92897\n", 428 | "episode: 146/5000, game score: 160.0, reward: 160.0, avg reward: 330.342465753, time: 551, total time: 93449\n", 429 | "episode: 147/5000, game score: 210.0, reward: 210.0, avg reward: 329.523809524, time: 461, total time: 93911\n", 430 | "episode: 148/5000, game score: 400.0, reward: 400.0, avg reward: 330.0, time: 828, total time: 94740\n", 431 | "episode: 149/5000, game score: 330.0, reward: 330.0, avg reward: 330.0, time: 730, total time: 95471\n", 432 | "episode: 150/5000, game score: 250.0, reward: 250.0, avg reward: 329.466666667, time: 471, total time: 95943\n", 433 | "episode: 151/5000, game score: 260.0, reward: 260.0, avg reward: 329.006622517, time: 545, total time: 96489\n", 434 | "episode: 152/5000, game score: 260.0, reward: 260.0, avg reward: 328.552631579, time: 605, total time: 97095\n", 435 | "episode: 153/5000, game score: 350.0, reward: 350.0, avg reward: 328.692810458, time: 826, total time: 97922\n", 436 | "episode: 154/5000, game score: 290.0, reward: 290.0, avg reward: 328.441558442, time: 609, total time: 98532\n", 437 | "episode: 155/5000, game score: 360.0, reward: 360.0, avg reward: 328.64516129, time: 1082, total time: 99615\n" 438 | ] 439 | }, 440 | { 441 | "name": "stdout", 442 | "output_type": "stream", 443 | "text": [ 444 | "episode: 156/5000, game score: 260.0, reward: 260.0, avg reward: 328.205128205, time: 503, total time: 100119\n", 445 | "episode: 157/5000, game score: 240.0, reward: 240.0, avg reward: 327.643312102, time: 584, total time: 100704\n", 446 | "episode: 158/5000, game score: 330.0, reward: 330.0, avg reward: 327.658227848, time: 595, total time: 101300\n", 447 | "episode: 159/5000, game score: 340.0, reward: 340.0, avg reward: 327.735849057, time: 582, total time: 101883\n", 448 | "episode: 160/5000, game score: 300.0, reward: 300.0, avg reward: 327.5625, time: 594, total time: 102478\n", 449 | "episode: 161/5000, game score: 310.0, reward: 310.0, avg reward: 327.453416149, time: 774, total time: 103253\n", 450 | "episode: 162/5000, game score: 660.0, reward: 660.0, avg reward: 329.50617284, time: 926, total time: 104180\n", 451 | "episode: 163/5000, game score: 280.0, reward: 280.0, avg reward: 329.202453988, time: 545, total time: 104726\n", 452 | "episode: 164/5000, game score: 380.0, reward: 380.0, avg reward: 329.512195122, time: 704, total time: 105431\n", 453 | "episode: 165/5000, game score: 560.0, reward: 560.0, avg reward: 330.909090909, time: 717, total time: 106149\n", 454 | "episode: 166/5000, game score: 300.0, reward: 300.0, avg reward: 330.722891566, time: 537, total time: 106687\n", 455 | "episode: 167/5000, game score: 440.0, reward: 440.0, avg reward: 331.377245509, time: 988, total time: 107676\n", 456 | "episode: 168/5000, game score: 410.0, reward: 410.0, avg reward: 331.845238095, time: 678, total time: 108355\n", 457 | "episode: 169/5000, game score: 160.0, reward: 160.0, avg reward: 330.828402367, time: 367, total time: 108723\n", 458 | "episode: 170/5000, game score: 260.0, reward: 260.0, avg reward: 330.411764706, time: 638, total time: 109362\n", 459 | "episode: 171/5000, game score: 170.0, reward: 170.0, avg reward: 329.473684211, time: 473, total time: 109836\n", 460 | "episode: 172/5000, game score: 340.0, reward: 340.0, avg reward: 329.534883721, time: 571, total time: 110408\n", 461 | "episode: 173/5000, game score: 330.0, reward: 330.0, avg reward: 329.537572254, time: 844, total time: 111253\n", 462 | "episode: 174/5000, game score: 270.0, reward: 270.0, avg reward: 329.195402299, time: 540, total time: 111794\n", 463 | "episode: 175/5000, game score: 310.0, reward: 310.0, avg reward: 329.085714286, time: 554, total time: 112349\n", 464 | "episode: 176/5000, game score: 310.0, reward: 310.0, avg reward: 328.977272727, time: 648, total time: 112998\n", 465 | "episode: 177/5000, game score: 270.0, reward: 270.0, avg reward: 328.644067797, time: 504, total time: 113503\n", 466 | "episode: 178/5000, game score: 310.0, reward: 310.0, avg reward: 328.539325843, time: 575, total time: 114079\n", 467 | "episode: 179/5000, game score: 340.0, reward: 340.0, avg reward: 328.603351955, time: 564, total time: 114644\n", 468 | "episode: 180/5000, game score: 290.0, reward: 290.0, avg reward: 328.388888889, time: 688, total time: 115333\n", 469 | "episode: 181/5000, game score: 330.0, reward: 330.0, avg reward: 328.397790055, time: 690, total time: 116024\n", 470 | "episode: 182/5000, game score: 120.0, reward: 120.0, avg reward: 327.252747253, time: 460, total time: 116485\n", 471 | "episode: 183/5000, game score: 260.0, reward: 260.0, avg reward: 326.885245902, time: 678, total time: 117164\n", 472 | "episode: 184/5000, game score: 310.0, reward: 310.0, avg reward: 326.793478261, time: 632, total time: 117797\n", 473 | "episode: 185/5000, game score: 300.0, reward: 300.0, avg reward: 326.648648649, time: 597, total time: 118395\n", 474 | "episode: 186/5000, game score: 230.0, reward: 230.0, avg reward: 326.129032258, time: 499, total time: 118895\n", 475 | "episode: 187/5000, game score: 300.0, reward: 300.0, avg reward: 325.989304813, time: 682, total time: 119578\n", 476 | "episode: 188/5000, game score: 1150.0, reward: 1150.0, avg reward: 330.372340426, time: 1014, total time: 120593\n", 477 | "episode: 189/5000, game score: 230.0, reward: 230.0, avg reward: 329.841269841, time: 547, total time: 121141\n", 478 | "episode: 190/5000, game score: 200.0, reward: 200.0, avg reward: 329.157894737, time: 494, total time: 121636\n", 479 | "episode: 191/5000, game score: 990.0, reward: 990.0, avg reward: 332.617801047, time: 846, total time: 122483\n", 480 | "episode: 192/5000, game score: 260.0, reward: 260.0, avg reward: 332.239583333, time: 597, total time: 123081\n", 481 | "episode: 193/5000, game score: 230.0, reward: 230.0, avg reward: 331.70984456, time: 615, total time: 123697\n", 482 | "episode: 194/5000, game score: 240.0, reward: 240.0, avg reward: 331.237113402, time: 488, total time: 124186\n", 483 | "episode: 195/5000, game score: 230.0, reward: 230.0, avg reward: 330.717948718, time: 467, total time: 124654\n", 484 | "episode: 196/5000, game score: 790.0, reward: 790.0, avg reward: 333.06122449, time: 896, total time: 125551\n", 485 | "episode: 197/5000, game score: 230.0, reward: 230.0, avg reward: 332.538071066, time: 512, total time: 126064\n", 486 | "episode: 198/5000, game score: 280.0, reward: 280.0, avg reward: 332.272727273, time: 595, total time: 126660\n", 487 | "episode: 199/5000, game score: 150.0, reward: 150.0, avg reward: 331.35678392, time: 480, total time: 127141\n", 488 | "episode: 200/5000, game score: 280.0, reward: 280.0, avg reward: 331.1, time: 458, total time: 127600\n", 489 | "episode: 201/5000, game score: 240.0, reward: 240.0, avg reward: 330.646766169, time: 503, total time: 128104\n", 490 | "episode: 202/5000, game score: 280.0, reward: 280.0, avg reward: 330.396039604, time: 802, total time: 128907\n", 491 | "episode: 203/5000, game score: 230.0, reward: 230.0, avg reward: 329.901477833, time: 541, total time: 129449\n", 492 | "episode: 204/5000, game score: 220.0, reward: 220.0, avg reward: 329.362745098, time: 507, total time: 129957\n", 493 | "episode: 205/5000, game score: 170.0, reward: 170.0, avg reward: 328.585365854, time: 462, total time: 130420\n", 494 | "episode: 206/5000, game score: 220.0, reward: 220.0, avg reward: 328.058252427, time: 539, total time: 130960\n", 495 | "episode: 207/5000, game score: 290.0, reward: 290.0, avg reward: 327.874396135, time: 573, total time: 131534\n", 496 | "episode: 208/5000, game score: 290.0, reward: 290.0, avg reward: 327.692307692, time: 506, total time: 132041\n", 497 | "episode: 209/5000, game score: 310.0, reward: 310.0, avg reward: 327.607655502, time: 681, total time: 132723\n", 498 | "episode: 210/5000, game score: 220.0, reward: 220.0, avg reward: 327.095238095, time: 416, total time: 133140\n", 499 | "episode: 211/5000, game score: 160.0, reward: 160.0, avg reward: 326.303317536, time: 606, total time: 133747\n", 500 | "episode: 212/5000, game score: 220.0, reward: 220.0, avg reward: 325.801886792, time: 607, total time: 134355\n", 501 | "episode: 213/5000, game score: 340.0, reward: 340.0, avg reward: 325.868544601, time: 589, total time: 134945\n", 502 | "episode: 214/5000, game score: 810.0, reward: 810.0, avg reward: 328.130841121, time: 990, total time: 135936\n", 503 | "episode: 215/5000, game score: 170.0, reward: 170.0, avg reward: 327.395348837, time: 361, total time: 136298\n", 504 | "episode: 216/5000, game score: 210.0, reward: 210.0, avg reward: 326.851851852, time: 577, total time: 136876\n", 505 | "episode: 217/5000, game score: 350.0, reward: 350.0, avg reward: 326.958525346, time: 506, total time: 137383\n", 506 | "episode: 218/5000, game score: 290.0, reward: 290.0, avg reward: 326.788990826, time: 660, total time: 138044\n", 507 | "episode: 219/5000, game score: 240.0, reward: 240.0, avg reward: 326.392694064, time: 678, total time: 138723\n", 508 | "episode: 220/5000, game score: 140.0, reward: 140.0, avg reward: 325.545454545, time: 333, total time: 139057\n", 509 | "episode: 221/5000, game score: 190.0, reward: 190.0, avg reward: 324.932126697, time: 516, total time: 139574\n", 510 | "episode: 222/5000, game score: 290.0, reward: 290.0, avg reward: 324.774774775, time: 572, total time: 140147\n", 511 | "episode: 223/5000, game score: 320.0, reward: 320.0, avg reward: 324.753363229, time: 584, total time: 140732\n", 512 | "episode: 224/5000, game score: 220.0, reward: 220.0, avg reward: 324.285714286, time: 642, total time: 141375\n", 513 | "episode: 225/5000, game score: 290.0, reward: 290.0, avg reward: 324.133333333, time: 599, total time: 141975\n", 514 | "episode: 226/5000, game score: 420.0, reward: 420.0, avg reward: 324.557522124, time: 729, total time: 142705\n", 515 | "episode: 227/5000, game score: 300.0, reward: 300.0, avg reward: 324.449339207, time: 584, total time: 143290\n", 516 | "episode: 228/5000, game score: 200.0, reward: 200.0, avg reward: 323.903508772, time: 401, total time: 143692\n", 517 | "episode: 229/5000, game score: 220.0, reward: 220.0, avg reward: 323.449781659, time: 621, total time: 144314\n", 518 | "episode: 230/5000, game score: 350.0, reward: 350.0, avg reward: 323.565217391, time: 591, total time: 144906\n" 519 | ] 520 | }, 521 | { 522 | "name": "stdout", 523 | "output_type": "stream", 524 | "text": [ 525 | "episode: 231/5000, game score: 340.0, reward: 340.0, avg reward: 323.636363636, time: 775, total time: 145682\n", 526 | "episode: 232/5000, game score: 140.0, reward: 140.0, avg reward: 322.844827586, time: 496, total time: 146179\n", 527 | "episode: 233/5000, game score: 150.0, reward: 150.0, avg reward: 322.103004292, time: 575, total time: 146755\n", 528 | "episode: 234/5000, game score: 130.0, reward: 130.0, avg reward: 321.282051282, time: 377, total time: 147133\n", 529 | "episode: 235/5000, game score: 320.0, reward: 320.0, avg reward: 321.276595745, time: 606, total time: 147740\n", 530 | "episode: 236/5000, game score: 280.0, reward: 280.0, avg reward: 321.101694915, time: 557, total time: 148298\n", 531 | "episode: 237/5000, game score: 230.0, reward: 230.0, avg reward: 320.717299578, time: 673, total time: 148972\n", 532 | "episode: 238/5000, game score: 280.0, reward: 280.0, avg reward: 320.546218487, time: 519, total time: 149492\n", 533 | "episode: 239/5000, game score: 580.0, reward: 580.0, avg reward: 321.631799163, time: 777, total time: 150270\n", 534 | "episode: 240/5000, game score: 370.0, reward: 370.0, avg reward: 321.833333333, time: 606, total time: 150877\n", 535 | "episode: 241/5000, game score: 160.0, reward: 160.0, avg reward: 321.161825726, time: 424, total time: 151302\n", 536 | "episode: 242/5000, game score: 250.0, reward: 250.0, avg reward: 320.867768595, time: 618, total time: 151921\n", 537 | "episode: 243/5000, game score: 280.0, reward: 280.0, avg reward: 320.699588477, time: 638, total time: 152560\n", 538 | "episode: 244/5000, game score: 220.0, reward: 220.0, avg reward: 320.286885246, time: 583, total time: 153144\n", 539 | "episode: 245/5000, game score: 180.0, reward: 180.0, avg reward: 319.714285714, time: 449, total time: 153594\n", 540 | "episode: 246/5000, game score: 290.0, reward: 290.0, avg reward: 319.593495935, time: 659, total time: 154254\n", 541 | "episode: 247/5000, game score: 190.0, reward: 190.0, avg reward: 319.068825911, time: 416, total time: 154671\n", 542 | "episode: 248/5000, game score: 170.0, reward: 170.0, avg reward: 318.467741935, time: 424, total time: 155096\n", 543 | "episode: 249/5000, game score: 260.0, reward: 260.0, avg reward: 318.232931727, time: 486, total time: 155583\n", 544 | "episode: 250/5000, game score: 310.0, reward: 310.0, avg reward: 318.2, time: 559, total time: 156143\n", 545 | "episode: 251/5000, game score: 240.0, reward: 240.0, avg reward: 317.888446215, time: 590, total time: 156734\n", 546 | "episode: 252/5000, game score: 150.0, reward: 150.0, avg reward: 317.222222222, time: 434, total time: 157169\n", 547 | "episode: 253/5000, game score: 190.0, reward: 190.0, avg reward: 316.719367589, time: 486, total time: 157656\n", 548 | "episode: 254/5000, game score: 300.0, reward: 300.0, avg reward: 316.653543307, time: 534, total time: 158191\n", 549 | "episode: 255/5000, game score: 240.0, reward: 240.0, avg reward: 316.352941176, time: 475, total time: 158667\n", 550 | "episode: 256/5000, game score: 210.0, reward: 210.0, avg reward: 315.9375, time: 384, total time: 159052\n", 551 | "episode: 257/5000, game score: 160.0, reward: 160.0, avg reward: 315.3307393, time: 558, total time: 159611\n", 552 | "episode: 258/5000, game score: 290.0, reward: 290.0, avg reward: 315.23255814, time: 618, total time: 160230\n", 553 | "episode: 259/5000, game score: 300.0, reward: 300.0, avg reward: 315.173745174, time: 633, total time: 160864\n", 554 | "episode: 260/5000, game score: 310.0, reward: 310.0, avg reward: 315.153846154, time: 579, total time: 161444\n", 555 | "episode: 261/5000, game score: 290.0, reward: 290.0, avg reward: 315.057471264, time: 647, total time: 162092\n", 556 | "episode: 262/5000, game score: 210.0, reward: 210.0, avg reward: 314.65648855, time: 441, total time: 162534\n", 557 | "episode: 263/5000, game score: 210.0, reward: 210.0, avg reward: 314.258555133, time: 642, total time: 163177\n", 558 | "episode: 264/5000, game score: 140.0, reward: 140.0, avg reward: 313.598484848, time: 584, total time: 163762\n", 559 | "episode: 265/5000, game score: 200.0, reward: 200.0, avg reward: 313.169811321, time: 474, total time: 164237\n", 560 | "episode: 266/5000, game score: 290.0, reward: 290.0, avg reward: 313.082706767, time: 618, total time: 164856\n", 561 | "episode: 267/5000, game score: 120.0, reward: 120.0, avg reward: 312.359550562, time: 467, total time: 165324\n", 562 | "episode: 268/5000, game score: 210.0, reward: 210.0, avg reward: 311.97761194, time: 564, total time: 165889\n", 563 | "episode: 269/5000, game score: 960.0, reward: 960.0, avg reward: 314.3866171, time: 679, total time: 166569\n", 564 | "episode: 270/5000, game score: 200.0, reward: 200.0, avg reward: 313.962962963, time: 628, total time: 167198\n", 565 | "episode: 271/5000, game score: 300.0, reward: 300.0, avg reward: 313.911439114, time: 729, total time: 167928\n", 566 | "episode: 272/5000, game score: 140.0, reward: 140.0, avg reward: 313.272058824, time: 350, total time: 168279\n", 567 | "episode: 273/5000, game score: 290.0, reward: 290.0, avg reward: 313.186813187, time: 674, total time: 168954\n", 568 | "episode: 274/5000, game score: 390.0, reward: 390.0, avg reward: 313.467153285, time: 592, total time: 169547\n", 569 | "episode: 275/5000, game score: 180.0, reward: 180.0, avg reward: 312.981818182, time: 467, total time: 170015\n", 570 | "episode: 276/5000, game score: 260.0, reward: 260.0, avg reward: 312.789855072, time: 541, total time: 170557\n", 571 | "episode: 277/5000, game score: 360.0, reward: 360.0, avg reward: 312.960288809, time: 721, total time: 171279\n", 572 | "episode: 278/5000, game score: 160.0, reward: 160.0, avg reward: 312.410071942, time: 470, total time: 171750\n", 573 | "episode: 279/5000, game score: 190.0, reward: 190.0, avg reward: 311.971326165, time: 507, total time: 172258\n", 574 | "episode: 280/5000, game score: 150.0, reward: 150.0, avg reward: 311.392857143, time: 447, total time: 172706\n", 575 | "episode: 281/5000, game score: 180.0, reward: 180.0, avg reward: 310.925266904, time: 517, total time: 173224\n", 576 | "episode: 282/5000, game score: 180.0, reward: 180.0, avg reward: 310.460992908, time: 412, total time: 173637\n", 577 | "episode: 283/5000, game score: 190.0, reward: 190.0, avg reward: 310.035335689, time: 571, total time: 174209\n", 578 | "episode: 284/5000, game score: 160.0, reward: 160.0, avg reward: 309.507042254, time: 335, total time: 174545\n", 579 | "episode: 285/5000, game score: 230.0, reward: 230.0, avg reward: 309.228070175, time: 488, total time: 175034\n", 580 | "episode: 286/5000, game score: 280.0, reward: 280.0, avg reward: 309.125874126, time: 653, total time: 175688\n", 581 | "episode: 287/5000, game score: 250.0, reward: 250.0, avg reward: 308.919860627, time: 492, total time: 176181\n", 582 | "episode: 288/5000, game score: 260.0, reward: 260.0, avg reward: 308.75, time: 511, total time: 176693\n", 583 | "episode: 289/5000, game score: 260.0, reward: 260.0, avg reward: 308.581314879, time: 605, total time: 177299\n", 584 | "episode: 290/5000, game score: 250.0, reward: 250.0, avg reward: 308.379310345, time: 556, total time: 177856\n", 585 | "episode: 291/5000, game score: 290.0, reward: 290.0, avg reward: 308.316151203, time: 629, total time: 178486\n", 586 | "episode: 292/5000, game score: 190.0, reward: 190.0, avg reward: 307.910958904, time: 533, total time: 179020\n", 587 | "episode: 293/5000, game score: 320.0, reward: 320.0, avg reward: 307.95221843, time: 825, total time: 179846\n", 588 | "episode: 294/5000, game score: 260.0, reward: 260.0, avg reward: 307.789115646, time: 655, total time: 180502\n", 589 | "episode: 295/5000, game score: 240.0, reward: 240.0, avg reward: 307.559322034, time: 609, total time: 181112\n", 590 | "episode: 296/5000, game score: 210.0, reward: 210.0, avg reward: 307.22972973, time: 538, total time: 181651\n", 591 | "episode: 297/5000, game score: 230.0, reward: 230.0, avg reward: 306.96969697, time: 557, total time: 182209\n", 592 | "episode: 298/5000, game score: 290.0, reward: 290.0, avg reward: 306.912751678, time: 531, total time: 182741\n", 593 | "episode: 299/5000, game score: 280.0, reward: 280.0, avg reward: 306.822742475, time: 612, total time: 183354\n", 594 | "episode: 300/5000, game score: 250.0, reward: 250.0, avg reward: 306.633333333, time: 488, total time: 183843\n", 595 | "episode: 301/5000, game score: 340.0, reward: 340.0, avg reward: 306.744186047, time: 774, total time: 184618\n", 596 | "episode: 302/5000, game score: 170.0, reward: 170.0, avg reward: 306.291390728, time: 434, total time: 185053\n", 597 | "episode: 303/5000, game score: 270.0, reward: 270.0, avg reward: 306.171617162, time: 582, total time: 185636\n", 598 | "episode: 304/5000, game score: 250.0, reward: 250.0, avg reward: 305.986842105, time: 508, total time: 186145\n", 599 | "episode: 305/5000, game score: 330.0, reward: 330.0, avg reward: 306.06557377, time: 639, total time: 186785\n" 600 | ] 601 | }, 602 | { 603 | "name": "stdout", 604 | "output_type": "stream", 605 | "text": [ 606 | "episode: 306/5000, game score: 240.0, reward: 240.0, avg reward: 305.849673203, time: 421, total time: 187207\n", 607 | "episode: 307/5000, game score: 230.0, reward: 230.0, avg reward: 305.602605863, time: 506, total time: 187714\n", 608 | "episode: 308/5000, game score: 160.0, reward: 160.0, avg reward: 305.12987013, time: 369, total time: 188084\n", 609 | "episode: 309/5000, game score: 280.0, reward: 280.0, avg reward: 305.048543689, time: 603, total time: 188688\n", 610 | "episode: 310/5000, game score: 260.0, reward: 260.0, avg reward: 304.903225806, time: 579, total time: 189268\n", 611 | "episode: 311/5000, game score: 180.0, reward: 180.0, avg reward: 304.501607717, time: 522, total time: 189791\n", 612 | "episode: 312/5000, game score: 180.0, reward: 180.0, avg reward: 304.102564103, time: 481, total time: 190273\n", 613 | "episode: 313/5000, game score: 190.0, reward: 190.0, avg reward: 303.738019169, time: 607, total time: 190881\n", 614 | "episode: 314/5000, game score: 220.0, reward: 220.0, avg reward: 303.47133758, time: 530, total time: 191412\n", 615 | "episode: 315/5000, game score: 190.0, reward: 190.0, avg reward: 303.111111111, time: 593, total time: 192006\n", 616 | "episode: 316/5000, game score: 150.0, reward: 150.0, avg reward: 302.626582278, time: 528, total time: 192535\n", 617 | "episode: 317/5000, game score: 270.0, reward: 270.0, avg reward: 302.523659306, time: 638, total time: 193174\n", 618 | "episode: 318/5000, game score: 180.0, reward: 180.0, avg reward: 302.13836478, time: 475, total time: 193650\n", 619 | "episode: 319/5000, game score: 520.0, reward: 520.0, avg reward: 302.821316614, time: 1188, total time: 194839\n", 620 | "episode: 320/5000, game score: 320.0, reward: 320.0, avg reward: 302.875, time: 668, total time: 195508\n", 621 | "episode: 321/5000, game score: 120.0, reward: 120.0, avg reward: 302.30529595, time: 462, total time: 195971\n", 622 | "episode: 322/5000, game score: 320.0, reward: 320.0, avg reward: 302.360248447, time: 530, total time: 196502\n", 623 | "episode: 323/5000, game score: 260.0, reward: 260.0, avg reward: 302.229102167, time: 567, total time: 197070\n", 624 | "episode: 324/5000, game score: 220.0, reward: 220.0, avg reward: 301.975308642, time: 448, total time: 197519\n", 625 | "episode: 325/5000, game score: 220.0, reward: 220.0, avg reward: 301.723076923, time: 480, total time: 198000\n", 626 | "episode: 326/5000, game score: 290.0, reward: 290.0, avg reward: 301.687116564, time: 529, total time: 198530\n", 627 | "episode: 327/5000, game score: 250.0, reward: 250.0, avg reward: 301.529051988, time: 664, total time: 199195\n", 628 | "episode: 328/5000, game score: 180.0, reward: 180.0, avg reward: 301.158536585, time: 544, total time: 199740\n", 629 | "episode: 329/5000, game score: 210.0, reward: 210.0, avg reward: 300.881458967, time: 488, total time: 200229\n", 630 | "episode: 330/5000, game score: 180.0, reward: 180.0, avg reward: 300.515151515, time: 468, total time: 200698\n", 631 | "episode: 331/5000, game score: 300.0, reward: 300.0, avg reward: 300.513595166, time: 667, total time: 201366\n", 632 | "episode: 332/5000, game score: 150.0, reward: 150.0, avg reward: 300.060240964, time: 595, total time: 201962\n", 633 | "episode: 333/5000, game score: 230.0, reward: 230.0, avg reward: 299.84984985, time: 470, total time: 202433\n", 634 | "episode: 334/5000, game score: 220.0, reward: 220.0, avg reward: 299.610778443, time: 611, total time: 203045\n", 635 | "episode: 335/5000, game score: 370.0, reward: 370.0, avg reward: 299.820895522, time: 731, total time: 203777\n", 636 | "episode: 336/5000, game score: 280.0, reward: 280.0, avg reward: 299.761904762, time: 581, total time: 204359\n", 637 | "episode: 337/5000, game score: 220.0, reward: 220.0, avg reward: 299.525222552, time: 528, total time: 204888\n", 638 | "episode: 338/5000, game score: 210.0, reward: 210.0, avg reward: 299.26035503, time: 573, total time: 205462\n", 639 | "episode: 339/5000, game score: 290.0, reward: 290.0, avg reward: 299.233038348, time: 577, total time: 206040\n", 640 | "episode: 340/5000, game score: 220.0, reward: 220.0, avg reward: 299.0, time: 555, total time: 206596\n", 641 | "episode: 341/5000, game score: 170.0, reward: 170.0, avg reward: 298.62170088, time: 333, total time: 206930\n", 642 | "episode: 342/5000, game score: 510.0, reward: 510.0, avg reward: 299.239766082, time: 1016, total time: 207947\n", 643 | "episode: 343/5000, game score: 370.0, reward: 370.0, avg reward: 299.44606414, time: 830, total time: 208778\n", 644 | "episode: 344/5000, game score: 210.0, reward: 210.0, avg reward: 299.186046512, time: 454, total time: 209233\n", 645 | "episode: 345/5000, game score: 190.0, reward: 190.0, avg reward: 298.869565217, time: 503, total time: 209737\n", 646 | "episode: 346/5000, game score: 240.0, reward: 240.0, avg reward: 298.699421965, time: 521, total time: 210259\n", 647 | "episode: 347/5000, game score: 290.0, reward: 290.0, avg reward: 298.674351585, time: 608, total time: 210868\n", 648 | "episode: 348/5000, game score: 240.0, reward: 240.0, avg reward: 298.505747126, time: 525, total time: 211394\n", 649 | "episode: 349/5000, game score: 270.0, reward: 270.0, avg reward: 298.424068768, time: 577, total time: 211972\n", 650 | "episode: 350/5000, game score: 260.0, reward: 260.0, avg reward: 298.314285714, time: 629, total time: 212602\n", 651 | "episode: 351/5000, game score: 1000.0, reward: 1000.0, avg reward: 300.313390313, time: 1439, total time: 214042\n", 652 | "episode: 352/5000, game score: 230.0, reward: 230.0, avg reward: 300.113636364, time: 510, total time: 214553\n", 653 | "episode: 353/5000, game score: 300.0, reward: 300.0, avg reward: 300.113314448, time: 496, total time: 215050\n", 654 | "episode: 354/5000, game score: 240.0, reward: 240.0, avg reward: 299.943502825, time: 602, total time: 215653\n", 655 | "episode: 355/5000, game score: 290.0, reward: 290.0, avg reward: 299.915492958, time: 445, total time: 216099\n", 656 | "episode: 356/5000, game score: 190.0, reward: 190.0, avg reward: 299.606741573, time: 380, total time: 216480\n", 657 | "episode: 357/5000, game score: 250.0, reward: 250.0, avg reward: 299.467787115, time: 543, total time: 217024\n", 658 | "episode: 358/5000, game score: 210.0, reward: 210.0, avg reward: 299.217877095, time: 667, total time: 217692\n", 659 | "episode: 359/5000, game score: 210.0, reward: 210.0, avg reward: 298.969359331, time: 519, total time: 218212\n", 660 | "episode: 360/5000, game score: 230.0, reward: 230.0, avg reward: 298.777777778, time: 546, total time: 218759\n", 661 | "episode: 361/5000, game score: 150.0, reward: 150.0, avg reward: 298.36565097, time: 567, total time: 219327\n", 662 | "episode: 362/5000, game score: 190.0, reward: 190.0, avg reward: 298.066298343, time: 505, total time: 219833\n", 663 | "episode: 363/5000, game score: 180.0, reward: 180.0, avg reward: 297.741046832, time: 428, total time: 220262\n", 664 | "episode: 364/5000, game score: 180.0, reward: 180.0, avg reward: 297.417582418, time: 495, total time: 220758\n", 665 | "episode: 365/5000, game score: 180.0, reward: 180.0, avg reward: 297.095890411, time: 469, total time: 221228\n", 666 | "episode: 366/5000, game score: 140.0, reward: 140.0, avg reward: 296.666666667, time: 476, total time: 221705\n", 667 | "episode: 367/5000, game score: 260.0, reward: 260.0, avg reward: 296.566757493, time: 522, total time: 222228\n", 668 | "episode: 368/5000, game score: 210.0, reward: 210.0, avg reward: 296.331521739, time: 401, total time: 222630\n", 669 | "episode: 369/5000, game score: 150.0, reward: 150.0, avg reward: 295.93495935, time: 403, total time: 223034\n", 670 | "episode: 370/5000, game score: 120.0, reward: 120.0, avg reward: 295.459459459, time: 581, total time: 223616\n", 671 | "episode: 371/5000, game score: 210.0, reward: 210.0, avg reward: 295.229110512, time: 529, total time: 224146\n", 672 | "episode: 372/5000, game score: 210.0, reward: 210.0, avg reward: 295.0, time: 496, total time: 224643\n", 673 | "episode: 373/5000, game score: 90.0, reward: 90.0, avg reward: 294.450402145, time: 615, total time: 225259\n", 674 | "episode: 374/5000, game score: 210.0, reward: 210.0, avg reward: 294.22459893, time: 577, total time: 225837\n", 675 | "episode: 375/5000, game score: 430.0, reward: 430.0, avg reward: 294.586666667, time: 891, total time: 226729\n", 676 | "episode: 376/5000, game score: 170.0, reward: 170.0, avg reward: 294.255319149, time: 418, total time: 227148\n", 677 | "episode: 377/5000, game score: 230.0, reward: 230.0, avg reward: 294.084880637, time: 527, total time: 227676\n", 678 | "episode: 378/5000, game score: 300.0, reward: 300.0, avg reward: 294.100529101, time: 636, total time: 228313\n", 679 | "episode: 379/5000, game score: 190.0, reward: 190.0, avg reward: 293.82585752, time: 543, total time: 228857\n", 680 | "episode: 380/5000, game score: 440.0, reward: 440.0, avg reward: 294.210526316, time: 803, total time: 229661\n" 681 | ] 682 | }, 683 | { 684 | "name": "stdout", 685 | "output_type": "stream", 686 | "text": [ 687 | "episode: 381/5000, game score: 170.0, reward: 170.0, avg reward: 293.884514436, time: 421, total time: 230083\n", 688 | "episode: 382/5000, game score: 230.0, reward: 230.0, avg reward: 293.717277487, time: 587, total time: 230671\n", 689 | "episode: 383/5000, game score: 190.0, reward: 190.0, avg reward: 293.446475196, time: 458, total time: 231130\n", 690 | "episode: 384/5000, game score: 220.0, reward: 220.0, avg reward: 293.255208333, time: 534, total time: 231665\n", 691 | "episode: 385/5000, game score: 170.0, reward: 170.0, avg reward: 292.935064935, time: 556, total time: 232222\n", 692 | "episode: 386/5000, game score: 160.0, reward: 160.0, avg reward: 292.590673575, time: 487, total time: 232710\n", 693 | "episode: 387/5000, game score: 130.0, reward: 130.0, avg reward: 292.170542636, time: 389, total time: 233100\n", 694 | "episode: 388/5000, game score: 130.0, reward: 130.0, avg reward: 291.75257732, time: 342, total time: 233443\n", 695 | "episode: 389/5000, game score: 230.0, reward: 230.0, avg reward: 291.593830334, time: 616, total time: 234060\n", 696 | "episode: 390/5000, game score: 180.0, reward: 180.0, avg reward: 291.307692308, time: 541, total time: 234602\n", 697 | "episode: 391/5000, game score: 200.0, reward: 200.0, avg reward: 291.074168798, time: 517, total time: 235120\n", 698 | "episode: 392/5000, game score: 170.0, reward: 170.0, avg reward: 290.765306122, time: 499, total time: 235620\n", 699 | "episode: 393/5000, game score: 220.0, reward: 220.0, avg reward: 290.58524173, time: 399, total time: 236020\n", 700 | "episode: 394/5000, game score: 280.0, reward: 280.0, avg reward: 290.558375635, time: 547, total time: 236568\n", 701 | "episode: 395/5000, game score: 270.0, reward: 270.0, avg reward: 290.506329114, time: 606, total time: 237175\n", 702 | "episode: 396/5000, game score: 220.0, reward: 220.0, avg reward: 290.328282828, time: 501, total time: 237677\n", 703 | "episode: 397/5000, game score: 140.0, reward: 140.0, avg reward: 289.949622166, time: 378, total time: 238056\n", 704 | "episode: 398/5000, game score: 210.0, reward: 210.0, avg reward: 289.748743719, time: 513, total time: 238570\n", 705 | "episode: 399/5000, game score: 190.0, reward: 190.0, avg reward: 289.498746867, time: 638, total time: 239209\n", 706 | "episode: 400/5000, game score: 190.0, reward: 190.0, avg reward: 289.25, time: 629, total time: 239839\n", 707 | "episode: 401/5000, game score: 300.0, reward: 300.0, avg reward: 289.27680798, time: 637, total time: 240477\n", 708 | "episode: 402/5000, game score: 200.0, reward: 200.0, avg reward: 289.054726368, time: 533, total time: 241011\n", 709 | "episode: 403/5000, game score: 480.0, reward: 480.0, avg reward: 289.52853598, time: 885, total time: 241897\n", 710 | "episode: 404/5000, game score: 190.0, reward: 190.0, avg reward: 289.282178218, time: 532, total time: 242430\n", 711 | "episode: 405/5000, game score: 270.0, reward: 270.0, avg reward: 289.234567901, time: 602, total time: 243033\n", 712 | "episode: 406/5000, game score: 260.0, reward: 260.0, avg reward: 289.162561576, time: 662, total time: 243696\n", 713 | "episode: 407/5000, game score: 200.0, reward: 200.0, avg reward: 288.943488943, time: 549, total time: 244246\n", 714 | "episode: 408/5000, game score: 310.0, reward: 310.0, avg reward: 288.995098039, time: 685, total time: 244932\n", 715 | "episode: 409/5000, game score: 180.0, reward: 180.0, avg reward: 288.728606357, time: 496, total time: 245429\n", 716 | "episode: 410/5000, game score: 180.0, reward: 180.0, avg reward: 288.463414634, time: 601, total time: 246031\n", 717 | "episode: 411/5000, game score: 250.0, reward: 250.0, avg reward: 288.369829684, time: 576, total time: 246608\n", 718 | "episode: 412/5000, game score: 150.0, reward: 150.0, avg reward: 288.033980583, time: 453, total time: 247062\n", 719 | "episode: 413/5000, game score: 210.0, reward: 210.0, avg reward: 287.84503632, time: 634, total time: 247697\n", 720 | "episode: 414/5000, game score: 260.0, reward: 260.0, avg reward: 287.777777778, time: 616, total time: 248314\n", 721 | "episode: 415/5000, game score: 200.0, reward: 200.0, avg reward: 287.56626506, time: 458, total time: 248773\n", 722 | "episode: 416/5000, game score: 220.0, reward: 220.0, avg reward: 287.403846154, time: 438, total time: 249212\n", 723 | "episode: 417/5000, game score: 100.0, reward: 100.0, avg reward: 286.954436451, time: 437, total time: 249650\n", 724 | "episode: 418/5000, game score: 280.0, reward: 280.0, avg reward: 286.937799043, time: 595, total time: 250246\n", 725 | "episode: 419/5000, game score: 110.0, reward: 110.0, avg reward: 286.515513126, time: 482, total time: 250729\n", 726 | "episode: 420/5000, game score: 220.0, reward: 220.0, avg reward: 286.357142857, time: 467, total time: 251197\n", 727 | "episode: 421/5000, game score: 280.0, reward: 280.0, avg reward: 286.342042755, time: 703, total time: 251901\n", 728 | "episode: 422/5000, game score: 160.0, reward: 160.0, avg reward: 286.042654028, time: 522, total time: 252424\n", 729 | "episode: 423/5000, game score: 220.0, reward: 220.0, avg reward: 285.886524823, time: 552, total time: 252977\n", 730 | "episode: 424/5000, game score: 180.0, reward: 180.0, avg reward: 285.636792453, time: 612, total time: 253590\n", 731 | "episode: 425/5000, game score: 410.0, reward: 410.0, avg reward: 285.929411765, time: 776, total time: 254367\n", 732 | "episode: 426/5000, game score: 160.0, reward: 160.0, avg reward: 285.633802817, time: 544, total time: 254912\n", 733 | "episode: 427/5000, game score: 210.0, reward: 210.0, avg reward: 285.456674473, time: 576, total time: 255489\n", 734 | "episode: 428/5000, game score: 160.0, reward: 160.0, avg reward: 285.163551402, time: 492, total time: 255982\n", 735 | "episode: 429/5000, game score: 160.0, reward: 160.0, avg reward: 284.871794872, time: 447, total time: 256430\n", 736 | "episode: 430/5000, game score: 170.0, reward: 170.0, avg reward: 284.604651163, time: 530, total time: 256961\n", 737 | "episode: 431/5000, game score: 250.0, reward: 250.0, avg reward: 284.524361949, time: 553, total time: 257515\n", 738 | "episode: 432/5000, game score: 230.0, reward: 230.0, avg reward: 284.398148148, time: 568, total time: 258084\n", 739 | "episode: 433/5000, game score: 180.0, reward: 180.0, avg reward: 284.15704388, time: 605, total time: 258690\n", 740 | "episode: 434/5000, game score: 120.0, reward: 120.0, avg reward: 283.778801843, time: 374, total time: 259065\n", 741 | "episode: 435/5000, game score: 350.0, reward: 350.0, avg reward: 283.931034483, time: 632, total time: 259698\n", 742 | "episode: 436/5000, game score: 150.0, reward: 150.0, avg reward: 283.623853211, time: 595, total time: 260294\n", 743 | "episode: 437/5000, game score: 190.0, reward: 190.0, avg reward: 283.409610984, time: 632, total time: 260927\n", 744 | "episode: 438/5000, game score: 150.0, reward: 150.0, avg reward: 283.105022831, time: 558, total time: 261486\n", 745 | "episode: 439/5000, game score: 250.0, reward: 250.0, avg reward: 283.029612756, time: 508, total time: 261995\n", 746 | "episode: 440/5000, game score: 510.0, reward: 510.0, avg reward: 283.545454545, time: 944, total time: 262940\n", 747 | "episode: 441/5000, game score: 170.0, reward: 170.0, avg reward: 283.287981859, time: 614, total time: 263555\n", 748 | "episode: 442/5000, game score: 230.0, reward: 230.0, avg reward: 283.167420814, time: 568, total time: 264124\n", 749 | "episode: 443/5000, game score: 130.0, reward: 130.0, avg reward: 282.821670429, time: 491, total time: 264616\n", 750 | "episode: 444/5000, game score: 180.0, reward: 180.0, avg reward: 282.59009009, time: 498, total time: 265115\n", 751 | "episode: 445/5000, game score: 250.0, reward: 250.0, avg reward: 282.516853933, time: 479, total time: 265595\n", 752 | "episode: 446/5000, game score: 250.0, reward: 250.0, avg reward: 282.443946188, time: 501, total time: 266097\n", 753 | "episode: 447/5000, game score: 360.0, reward: 360.0, avg reward: 282.617449664, time: 560, total time: 266658\n", 754 | "episode: 448/5000, game score: 240.0, reward: 240.0, avg reward: 282.522321429, time: 539, total time: 267198\n", 755 | "episode: 449/5000, game score: 190.0, reward: 190.0, avg reward: 282.316258352, time: 348, total time: 267547\n", 756 | "episode: 450/5000, game score: 280.0, reward: 280.0, avg reward: 282.311111111, time: 606, total time: 268154\n", 757 | "episode: 451/5000, game score: 180.0, reward: 180.0, avg reward: 282.084257206, time: 519, total time: 268674\n", 758 | "episode: 452/5000, game score: 280.0, reward: 280.0, avg reward: 282.079646018, time: 548, total time: 269223\n", 759 | "episode: 453/5000, game score: 300.0, reward: 300.0, avg reward: 282.119205298, time: 555, total time: 269779\n", 760 | "episode: 454/5000, game score: 260.0, reward: 260.0, avg reward: 282.070484581, time: 580, total time: 270360\n", 761 | "episode: 455/5000, game score: 170.0, reward: 170.0, avg reward: 281.824175824, time: 318, total time: 270679\n" 762 | ] 763 | }, 764 | { 765 | "name": "stdout", 766 | "output_type": "stream", 767 | "text": [ 768 | "episode: 456/5000, game score: 150.0, reward: 150.0, avg reward: 281.535087719, time: 588, total time: 271268\n", 769 | "episode: 457/5000, game score: 160.0, reward: 160.0, avg reward: 281.269146608, time: 482, total time: 271751\n", 770 | "episode: 458/5000, game score: 470.0, reward: 470.0, avg reward: 281.681222707, time: 859, total time: 272611\n", 771 | "episode: 459/5000, game score: 260.0, reward: 260.0, avg reward: 281.633986928, time: 599, total time: 273211\n", 772 | "episode: 460/5000, game score: 180.0, reward: 180.0, avg reward: 281.413043478, time: 428, total time: 273640\n", 773 | "episode: 461/5000, game score: 150.0, reward: 150.0, avg reward: 281.127982646, time: 547, total time: 274188\n", 774 | "episode: 462/5000, game score: 260.0, reward: 260.0, avg reward: 281.082251082, time: 524, total time: 274713\n", 775 | "episode: 463/5000, game score: 280.0, reward: 280.0, avg reward: 281.079913607, time: 546, total time: 275260\n", 776 | "episode: 464/5000, game score: 180.0, reward: 180.0, avg reward: 280.862068966, time: 516, total time: 275777\n", 777 | "episode: 465/5000, game score: 180.0, reward: 180.0, avg reward: 280.64516129, time: 486, total time: 276264\n", 778 | "episode: 466/5000, game score: 150.0, reward: 150.0, avg reward: 280.364806867, time: 474, total time: 276739\n", 779 | "episode: 467/5000, game score: 220.0, reward: 220.0, avg reward: 280.235546039, time: 564, total time: 277304\n", 780 | "episode: 468/5000, game score: 310.0, reward: 310.0, avg reward: 280.299145299, time: 557, total time: 277862\n", 781 | "episode: 469/5000, game score: 330.0, reward: 330.0, avg reward: 280.405117271, time: 520, total time: 278383\n", 782 | "episode: 470/5000, game score: 260.0, reward: 260.0, avg reward: 280.361702128, time: 588, total time: 278972\n", 783 | "episode: 471/5000, game score: 270.0, reward: 270.0, avg reward: 280.33970276, time: 600, total time: 279573\n", 784 | "episode: 472/5000, game score: 230.0, reward: 230.0, avg reward: 280.233050847, time: 588, total time: 280162\n", 785 | "episode: 473/5000, game score: 160.0, reward: 160.0, avg reward: 279.978858351, time: 421, total time: 280584\n", 786 | "episode: 474/5000, game score: 210.0, reward: 210.0, avg reward: 279.831223629, time: 431, total time: 281016\n", 787 | "episode: 475/5000, game score: 250.0, reward: 250.0, avg reward: 279.768421053, time: 598, total time: 281615\n", 788 | "episode: 476/5000, game score: 200.0, reward: 200.0, avg reward: 279.600840336, time: 538, total time: 282154\n", 789 | "episode: 477/5000, game score: 280.0, reward: 280.0, avg reward: 279.601677149, time: 697, total time: 282852\n", 790 | "episode: 478/5000, game score: 280.0, reward: 280.0, avg reward: 279.60251046, time: 544, total time: 283397\n" 791 | ] 792 | } 793 | ], 794 | "source": [ 795 | "for e in range(episodes):\n", 796 | " total_reward = 0\n", 797 | " game_score = 0\n", 798 | " state = process_frame(env.reset())\n", 799 | " images = deque(maxlen=blend) # Array of images to be blended\n", 800 | " images.append(state)\n", 801 | " \n", 802 | " for skip in range(skip_start): # skip the start of each game\n", 803 | " env.step(0)\n", 804 | " \n", 805 | " for time in range(20000):\n", 806 | " env.render()\n", 807 | " total_time += 1\n", 808 | " \n", 809 | " # Every update_rate timesteps we update the target network parameters\n", 810 | " if total_time % agent.update_rate == 0:\n", 811 | " agent.update_target_model()\n", 812 | " \n", 813 | " # Return the avg of the last 4 frames\n", 814 | " state = blend_images(images, blend)\n", 815 | " \n", 816 | " # Transition Dynamics\n", 817 | " action = agent.act(state)\n", 818 | " next_state, reward, done, _ = env.step(action)\n", 819 | " \n", 820 | " game_score += reward\n", 821 | " total_reward += reward\n", 822 | " \n", 823 | " # Return the avg of the last 4 frames\n", 824 | " next_state = process_frame(next_state)\n", 825 | " images.append(next_state)\n", 826 | " next_state = blend_images(images, blend)\n", 827 | " \n", 828 | " # Store sequence in replay memory\n", 829 | " agent.remember(state, action, reward, next_state, done)\n", 830 | " \n", 831 | " state = next_state\n", 832 | " \n", 833 | " if done:\n", 834 | " all_rewards += game_score\n", 835 | " \n", 836 | " print(\"episode: {}/{}, game score: {}, reward: {}, avg reward: {}, time: {}, total time: {}\"\n", 837 | " .format(e+1, episodes, game_score, total_reward, all_rewards/(e+1), time, total_time))\n", 838 | " \n", 839 | " break\n", 840 | " \n", 841 | " if len(agent.memory) > batch_size:\n", 842 | " agent.replay(batch_size)" 843 | ] 844 | }, 845 | { 846 | "cell_type": "code", 847 | "execution_count": 12, 848 | "metadata": { 849 | "collapsed": true 850 | }, 851 | "outputs": [], 852 | "source": [ 853 | "agent.save('models/breakout_ddqn_50k-memory_783-steps')" 854 | ] 855 | }, 856 | { 857 | "cell_type": "code", 858 | "execution_count": null, 859 | "metadata": { 860 | "collapsed": true 861 | }, 862 | "outputs": [], 863 | "source": [] 864 | } 865 | ], 866 | "metadata": { 867 | "kernelspec": { 868 | "display_name": "Python 2", 869 | "language": "python", 870 | "name": "python2" 871 | }, 872 | "language_info": { 873 | "codemirror_mode": { 874 | "name": "ipython", 875 | "version": 2 876 | }, 877 | "file_extension": ".py", 878 | "mimetype": "text/x-python", 879 | "name": "python", 880 | "nbconvert_exporter": "python", 881 | "pygments_lexer": "ipython2", 882 | "version": "2.7.14" 883 | } 884 | }, 885 | "nbformat": 4, 886 | "nbformat_minor": 2 887 | } 888 | -------------------------------------------------------------------------------- /DQN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "ename": "ImportError", 10 | "evalue": "cannot import name np_utils", 11 | "output_type": "error", 12 | "traceback": [ 13 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 14 | "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", 15 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mcollections\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mdeque\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mkeras\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodels\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mSequential\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mkeras\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlayers\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mDense\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mActivation\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mFlatten\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mConv2D\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mMaxPooling2D\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mkeras\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptimizers\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mAdam\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 16 | "\u001b[0;32m/Users/tim/anaconda2/lib/python2.7/site-packages/keras/__init__.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0m__future__\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mabsolute_import\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mutils\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mactivations\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mapplications\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 17 | "\u001b[0;32m/Users/tim/anaconda2/lib/python2.7/site-packages/keras/utils/__init__.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0m__future__\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mabsolute_import\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnp_utils\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mgeneric_utils\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mdata_utils\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mio_utils\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 18 | "\u001b[0;31mImportError\u001b[0m: cannot import name np_utils" 19 | ] 20 | } 21 | ], 22 | "source": [ 23 | "import random\n", 24 | "import gym\n", 25 | "import numpy as np\n", 26 | "from collections import deque\n", 27 | "from keras.models import Sequential\n", 28 | "from keras.layers import Dense, Activation, Flatten, Conv2D, MaxPooling2D\n", 29 | "from keras.optimizers import Adam" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "# Agent" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": { 43 | "collapsed": true 44 | }, 45 | "outputs": [], 46 | "source": [ 47 | "class DQN_Agent:\n", 48 | " #\n", 49 | " # Initializes attributes and constructs CNN model and target_model\n", 50 | " #\n", 51 | " def __init__(self, state_size, action_size):\n", 52 | " self.state_size = state_size\n", 53 | " self.action_size = action_size\n", 54 | " self.memory = deque(maxlen=5000)\n", 55 | " \n", 56 | " # Hyperparameters\n", 57 | " self.gamma = 1.0 # Discount rate\n", 58 | " self.epsilon = 1.0 # Exploration rate\n", 59 | " self.epsilon_min = 0.1 # Minimal exploration rate (epsilon-greedy)\n", 60 | " self.epsilon_decay = 0.995 # Decay rate for epsilon\n", 61 | " self.update_rate = 1000 # Number of steps until updating the target network\n", 62 | " \n", 63 | " # Construct DQN models\n", 64 | " self.model = self._build_model()\n", 65 | " self.target_model = self._build_model()\n", 66 | " self.target_model.set_weights(self.model.get_weights())\n", 67 | " self.model.summary()\n", 68 | "\n", 69 | " #\n", 70 | " # Constructs CNN\n", 71 | " #\n", 72 | " def _build_model(self):\n", 73 | " model = Sequential()\n", 74 | " \n", 75 | " # Conv Layers\n", 76 | " model.add(Conv2D(32, (8, 8), strides=4, padding='same', input_shape=self.state_size))\n", 77 | " model.add(Activation('relu'))\n", 78 | " \n", 79 | " model.add(Conv2D(64, (4, 4), strides=2, padding='same'))\n", 80 | " model.add(Activation('relu'))\n", 81 | " \n", 82 | " model.add(Conv2D(64, (3, 3), strides=1, padding='same'))\n", 83 | " model.add(Activation('relu'))\n", 84 | " model.add(Flatten())\n", 85 | "\n", 86 | " # FC Layers\n", 87 | " model.add(Dense(512, activation='relu'))\n", 88 | " model.add(Dense(self.action_size, activation='linear'))\n", 89 | " \n", 90 | " model.compile(loss='mse', optimizer=Adam())\n", 91 | " return model\n", 92 | "\n", 93 | " #\n", 94 | " # Stores experience in replay memory\n", 95 | " #\n", 96 | " def remember(self, state, action, reward, next_state, done):\n", 97 | " self.memory.append((state, action, reward, next_state, done))\n", 98 | "\n", 99 | " #\n", 100 | " # Chooses action based on epsilon-greedy policy\n", 101 | " #\n", 102 | " def act(self, state):\n", 103 | " # Random exploration\n", 104 | " if np.random.rand() <= self.epsilon:\n", 105 | " return random.randrange(self.action_size)\n", 106 | " \n", 107 | " act_values = self.model.predict(state)\n", 108 | " \n", 109 | " return np.argmax(act_values[0]) # Returns action using policy\n", 110 | "\n", 111 | " #\n", 112 | " # Trains the model using randomly selected experiences in the replay memory\n", 113 | " #\n", 114 | " def replay(self, batch_size):\n", 115 | " minibatch = random.sample(self.memory, batch_size)\n", 116 | " \n", 117 | " for state, action, reward, next_state, done in minibatch:\n", 118 | " \n", 119 | " if not done:\n", 120 | " target = (reward + self.gamma * np.amax(self.target_model.predict(next_state)))\n", 121 | " else:\n", 122 | " target = reward\n", 123 | " \n", 124 | " # Construct the target vector as follows:\n", 125 | " # 1. Use the current model to output the Q-value predictions\n", 126 | " target_f = self.model.predict(state)\n", 127 | " \n", 128 | " # 2. Rewrite the chosen action value with the computed target\n", 129 | " target_f[0][action] = target\n", 130 | " \n", 131 | " # 3. Use vectors in the objective computation\n", 132 | " self.model.fit(state, target_f, epochs=1, verbose=0)\n", 133 | " \n", 134 | " if self.epsilon > self.epsilon_min:\n", 135 | " self.epsilon *= self.epsilon_decay\n", 136 | "\n", 137 | " #\n", 138 | " # Sets the target model parameters to the current model parameters\n", 139 | " #\n", 140 | " def update_target_model(self):\n", 141 | " self.target_model.set_weights(self.model.get_weights())\n", 142 | " \n", 143 | " #\n", 144 | " # Loads a saved model\n", 145 | " #\n", 146 | " def load(self, name):\n", 147 | " self.model.load_weights(name)\n", 148 | "\n", 149 | " #\n", 150 | " # Saves parameters of a trained model\n", 151 | " #\n", 152 | " def save(self, name):\n", 153 | " self.model.save_weights(name)" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "# Preprocessing" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "metadata": { 167 | "collapsed": true 168 | }, 169 | "outputs": [], 170 | "source": [ 171 | "# Helpful preprocessing taken from github.com/ageron/tiny-dqn\n", 172 | "def process_frame(frame):\n", 173 | " mspacman_color = np.array([210, 164, 74]).mean()\n", 174 | " img = frame[1:176:2, ::2] # Crop and downsize\n", 175 | " img = img.mean(axis=2) # Convert to greyscale\n", 176 | " img[img==mspacman_color] = 0 # Improve contrast by making pacman white\n", 177 | " img = (img - 128) / 128 - 1 # Normalize from -1 to 1.\n", 178 | " \n", 179 | " return np.expand_dims(img.reshape(88, 80, 1), axis=0)" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": { 186 | "collapsed": true 187 | }, 188 | "outputs": [], 189 | "source": [ 190 | "def blend_images(images, blend):\n", 191 | " avg_image = np.expand_dims(np.zeros((88, 80, 1), np.float64), axis=0)\n", 192 | "\n", 193 | " for image in images:\n", 194 | " avg_image += image\n", 195 | " \n", 196 | " if len(images) < blend:\n", 197 | " return avg_image / len(images)\n", 198 | " else:\n", 199 | " return avg_image / blend" 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": {}, 205 | "source": [ 206 | "# Environment" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": null, 212 | "metadata": { 213 | "collapsed": true 214 | }, 215 | "outputs": [], 216 | "source": [ 217 | "env = gym.make('MsPacman-v0')\n", 218 | "state_size = (88, 80, 1)\n", 219 | "action_size = env.action_space.n\n", 220 | "agent = DQN_Agent(state_size, action_size)\n", 221 | "\n", 222 | "episodes = 500\n", 223 | "batch_size = 8\n", 224 | "skip_start = 90 # MsPacman-v0 waits for 90 actions before the episode begins\n", 225 | "total_time = 0 # Counter for total number of steps taken\n", 226 | "all_rewards = 0 # Used to compute avg reward over time\n", 227 | "blend = 4 # Number of images to blend\n", 228 | "done = False" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "metadata": { 235 | "collapsed": true 236 | }, 237 | "outputs": [], 238 | "source": [ 239 | "for e in range(episodes):\n", 240 | " total_reward = 0\n", 241 | " game_score = 0\n", 242 | " state = process_frame(env.reset())\n", 243 | " images = deque(maxlen=blend) # Array of images to be blended\n", 244 | " images.append(state)\n", 245 | " \n", 246 | " for skip in range(skip_start): # skip the start of each game\n", 247 | " env.step(0)\n", 248 | " \n", 249 | " for time in range(20000):\n", 250 | " env.render()\n", 251 | " total_time += 1\n", 252 | " \n", 253 | " # Every update_rate timesteps we update the target network parameters\n", 254 | " if total_time % agent.update_rate == 0:\n", 255 | " agent.update_target_model()\n", 256 | " \n", 257 | " # Return the avg of the last 4 frames\n", 258 | " state = blend_images(images, blend)\n", 259 | " \n", 260 | " # Transition Dynamics\n", 261 | " action = agent.act(state)\n", 262 | " next_state, reward, done, _ = env.step(action)\n", 263 | " \n", 264 | " # Return the avg of the last 4 frames\n", 265 | " next_state = process_frame(next_state)\n", 266 | " images.append(next_state)\n", 267 | " next_state = blend_images(images, blend)\n", 268 | " \n", 269 | " # Store sequence in replay memory\n", 270 | " agent.remember(state, action, reward, next_state, done)\n", 271 | " \n", 272 | " state = next_state\n", 273 | " game_score += reward\n", 274 | " reward -= 1 # Punish behavior which does not accumulate reward\n", 275 | " total_reward += reward\n", 276 | " \n", 277 | " if done:\n", 278 | " all_rewards += game_score\n", 279 | " \n", 280 | " print(\"episode: {}/{}, game score: {}, reward: {}, avg reward: {}, time: {}, total time: {}\"\n", 281 | " .format(e+1, episodes, game_score, total_reward, all_rewards/(e+1), time, total_time))\n", 282 | " \n", 283 | " break\n", 284 | " \n", 285 | " if len(agent.memory) > batch_size:\n", 286 | " agent.replay(batch_size)" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": null, 292 | "metadata": { 293 | "collapsed": true 294 | }, 295 | "outputs": [], 296 | "source": [ 297 | "agent.save('models/5k-memory_1k-games')" 298 | ] 299 | } 300 | ], 301 | "metadata": { 302 | "kernelspec": { 303 | "display_name": "Python 2", 304 | "language": "python", 305 | "name": "python2" 306 | }, 307 | "language_info": { 308 | "codemirror_mode": { 309 | "name": "ipython", 310 | "version": 2 311 | }, 312 | "file_extension": ".py", 313 | "mimetype": "text/x-python", 314 | "name": "python", 315 | "nbconvert_exporter": "python", 316 | "pygments_lexer": "ipython2", 317 | "version": "2.7.14" 318 | } 319 | }, 320 | "nbformat": 4, 321 | "nbformat_minor": 2 322 | } 323 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep Q-network 2 | Keras implementation of DQN (DQN.ipynb) for MsPacman-v0 from OpenAI Gym. 3 | 4 | Implements Deep Q-network (DQN) in Keras following the architecture proposed in the 2013 paper by V. Mnih et al., "Playing Atari with Deep Reinforcement Learning": arXiv:1312.5602. See: http://www.davidqiu.com:8888/research/nature14236.pdf 5 | 6 | The agent learns to play the MsPacman-v0 Gym environment. 7 | 8 | ![Alt text](mspacman.jpg?raw=true "Title") 9 | 10 | I modified the example found from https://keon.io/deep-q-learning/ by implementing the CNN model, target model logic, and frame averaging (among other things). 11 | 12 | Hyperparameters were chosen according to the original paper as well as from https://github.com/ageron/tiny-dqn which also provided the image preprocessing method. One key trick I found lead to better policies was introducing a fixed penalty of -1 at each action which did not naturally have a reward. 13 | 14 | --- 15 | # Instructions 16 | This is a Python 3 project and you should have a local Python 3 `venv`. 17 | 1. Install libraries: `$ pip install -r requirements.txt` 18 | 2. Run notebooks: `$ jupyter notebook` 19 | 20 | --- 21 | # Double Deep Q-network 22 | Double Deep Q-network (DDQN.ipynb) is implemented. See: https://arxiv.org/pdf/1509.06461.pdf 23 | 24 | --- 25 | # Future Work 26 | * Try different Atari environments 27 | * Experiment with hyperparameters 28 | * Classical algorithm problems (learn functions) 29 | -------------------------------------------------------------------------------- /mspacman.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moduIo/Deep-Q-network/e89c8e9c88e69aee02b27c5b0db3d02e70589e3b/mspacman.jpg -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.11.0 2 | appnope==0.1.2 3 | argon2-cffi==20.1.0 4 | astunparse==1.6.3 5 | async-generator==1.10 6 | atari-py==0.2.6 7 | attrs==20.3.0 8 | backcall==0.2.0 9 | bleach==3.3.0 10 | cached-property==1.5.2 11 | cachetools==4.2.1 12 | certifi==2020.12.5 13 | cffi==1.14.5 14 | chardet==4.0.0 15 | cloudpickle==1.6.0 16 | decorator==4.4.2 17 | defusedxml==0.6.0 18 | entrypoints==0.3 19 | flatbuffers==1.12 20 | future==0.18.2 21 | gast==0.3.3 22 | google-auth==1.27.0 23 | google-auth-oauthlib==0.4.2 24 | google-pasta==0.2.0 25 | grpcio==1.32.0 26 | gym==0.18.0 27 | h5py==2.10.0 28 | idna==2.10 29 | importlib-metadata==3.7.0 30 | ipykernel==5.5.0 31 | ipython==7.16.1 32 | ipython-genutils==0.2.0 33 | jedi==0.18.0 34 | Jinja2==2.11.3 35 | jsonschema==3.2.0 36 | jupyter-client==6.1.11 37 | jupyter-core==4.7.1 38 | jupyterlab-pygments==0.1.2 39 | Keras==2.4.3 40 | Keras-Preprocessing==1.1.2 41 | Markdown==3.3.4 42 | MarkupSafe==1.1.1 43 | mistune==0.8.4 44 | nbclient==0.5.2 45 | nbconvert==6.0.7 46 | nbformat==5.1.2 47 | nest-asyncio==1.5.1 48 | notebook==6.2.0 49 | np-utils==0.5.12.1 50 | numpy==1.19.5 51 | oauthlib==3.1.0 52 | opencv-python==4.5.1.48 53 | opt-einsum==3.3.0 54 | packaging==20.9 55 | pandocfilters==1.4.3 56 | parso==0.8.1 57 | pexpect==4.8.0 58 | pickleshare==0.7.5 59 | Pillow==7.2.0 60 | prometheus-client==0.9.0 61 | prompt-toolkit==3.0.16 62 | protobuf==3.15.3 63 | ptyprocess==0.7.0 64 | pyasn1==0.4.8 65 | pyasn1-modules==0.2.8 66 | pycparser==2.20 67 | pyglet==1.5.0 68 | Pygments==2.8.0 69 | pyparsing==2.4.7 70 | pyrsistent==0.17.3 71 | python-dateutil==2.8.1 72 | PyYAML==5.4.1 73 | pyzmq==22.0.3 74 | requests==2.25.1 75 | requests-oauthlib==1.3.0 76 | rsa==4.7.2 77 | scipy==1.5.4 78 | Send2Trash==1.5.0 79 | six==1.15.0 80 | tensorboard==2.4.1 81 | tensorboard-plugin-wit==1.8.0 82 | tensorflow==2.4.1 83 | tensorflow-estimator==2.4.0 84 | termcolor==1.1.0 85 | terminado==0.9.2 86 | testpath==0.4.4 87 | tornado==6.1 88 | traitlets==4.3.3 89 | typing-extensions==3.7.4.3 90 | urllib3==1.26.3 91 | wcwidth==0.2.5 92 | webencodings==0.5.1 93 | Werkzeug==1.0.1 94 | wrapt==1.12.1 95 | zipp==3.4.0 96 | --------------------------------------------------------------------------------