├── .gitignore ├── 0 - Introduction to Gym.ipynb ├── 1 - Vanilla Policy Gradient (REINFORCE) [CartPole].ipynb ├── 1_policy_gradient.ipynb ├── 2 - Actor Critic [CartPole].ipynb ├── 2_q_learning.ipynb ├── 3 - Advantage Actor Critic (A2C) [CartPole].ipynb ├── 3_advantage_actor_critic.ipynb ├── 3a - Advantage Actor Critic (A2C) [LunarLander].ipynb ├── 4 - Generalized Advantage Estimation (GAE) [CartPole].ipynb ├── 4a - Generalized Advantage Estimation (GAE) [LunarLander].ipynb ├── 5 - Proximal Policy Optimization (PPO) [CartPole].ipynb ├── 5a - Proximal Policy Optimization (PPO) [LunarLander].ipynb ├── 8 - n step A2C.ipynb ├── LICENSE ├── README.md ├── checkpoint_viz.ipynb ├── dqn_working.ipynb ├── n_step_a2c.py ├── q_learning.py └── runner.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | checkpoints/ -------------------------------------------------------------------------------- /0 - Introduction to Gym.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## introduction" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "name": "stdout", 17 | "output_type": "stream", 18 | "text": [ 19 | "0.15.4\n" 20 | ] 21 | } 22 | ], 23 | "source": [ 24 | "%matplotlib inline\n", 25 | "\n", 26 | "import matplotlib.pyplot as plt\n", 27 | "import numpy as np\n", 28 | "import gym\n", 29 | "\n", 30 | "print(gym.__version__)" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "https://gym.openai.com/envs/" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 2, 43 | "metadata": {}, 44 | "outputs": [ 45 | { 46 | "name": "stdout", 47 | "output_type": "stream", 48 | "text": [ 49 | "There are 859 gym environments. Such as ['Copy-v0', 'RepeatCopy-v0', 'ReversedAddition-v0', 'ReversedAddition3-v0', 'DuplicatedInput-v0', 'Reverse-v0', 'CartPole-v0', 'CartPole-v1', 'MountainCar-v0', 'MountainCarContinuous-v0', 'Pendulum-v0', 'Acrobot-v1']\n" 50 | ] 51 | } 52 | ], 53 | "source": [ 54 | "all_envs = gym.envs.registry.all()\n", 55 | "env_ids = [env.id for env in all_envs]\n", 56 | "\n", 57 | "print(f'There are {len(env_ids)} gym environments. Such as {env_ids[:12]}')" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "https://gym.openai.com/envs/CartPole-v1/" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "## discrete action space environment" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 3, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "env = gym.make('CartPole-v1')" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 4, 86 | "metadata": {}, 87 | "outputs": [ 88 | { 89 | "name": "stdout", 90 | "output_type": "stream", 91 | "text": [ 92 | "observation space is: Box(4,)\n", 93 | "is observation space discrete? False\n", 94 | "is observation space continuous? True\n", 95 | "observation space shape: (4,)\n", 96 | "observation space high values? [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38]\n", 97 | "observation space low values? [-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38]\n" 98 | ] 99 | } 100 | ], 101 | "source": [ 102 | "print('observation space is:', env.observation_space)\n", 103 | "\n", 104 | "print('is observation space discrete?', isinstance(env.observation_space, gym.spaces.Discrete))\n", 105 | "print('is observation space continuous?', isinstance(env.observation_space, gym.spaces.Box))\n", 106 | "\n", 107 | "print('observation space shape:', env.observation_space.shape)\n", 108 | "\n", 109 | "print('observation space high values?', env.observation_space.high)\n", 110 | "print('observation space low values?', env.observation_space.low)" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 5, 116 | "metadata": {}, 117 | "outputs": [ 118 | { 119 | "name": "stdout", 120 | "output_type": "stream", 121 | "text": [ 122 | "action space is: Discrete(2)\n", 123 | "is action space discrete? True\n", 124 | "is action space continuous? False\n", 125 | "action space shape: 2\n" 126 | ] 127 | } 128 | ], 129 | "source": [ 130 | "print('action space is:', env.action_space)\n", 131 | "\n", 132 | "print('is action space discrete?', isinstance(env.action_space, gym.spaces.Discrete))\n", 133 | "print('is action space continuous?', isinstance(env.action_space, gym.spaces.Box))\n", 134 | "\n", 135 | "print('action space shape:', env.action_space.n)" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 9, 141 | "metadata": {}, 142 | "outputs": [ 143 | { 144 | "name": "stdout", 145 | "output_type": "stream", 146 | "text": [ 147 | "500\n", 148 | "475.0\n", 149 | "False\n" 150 | ] 151 | } 152 | ], 153 | "source": [ 154 | "print(env.spec.max_episode_steps)\n", 155 | "print(env.spec.reward_threshold)\n", 156 | "print(env.spec.nondeterministic)" 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "metadata": {}, 162 | "source": [ 163 | "## continuous action space environment" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 10, 169 | "metadata": {}, 170 | "outputs": [], 171 | "source": [ 172 | "env = gym.make('Pendulum-v0')" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 11, 178 | "metadata": {}, 179 | "outputs": [ 180 | { 181 | "name": "stdout", 182 | "output_type": "stream", 183 | "text": [ 184 | "observation space is: Box(3,)\n", 185 | "is observation space discrete? False\n", 186 | "is observation space continuous? True\n", 187 | "observation space shape: (3,)\n", 188 | "observation space high values? [1. 1. 8.]\n", 189 | "observation space low values? [-1. -1. -8.]\n" 190 | ] 191 | } 192 | ], 193 | "source": [ 194 | "print('observation space is:', env.observation_space)\n", 195 | "\n", 196 | "print('is observation space discrete?', isinstance(env.observation_space, gym.spaces.Discrete))\n", 197 | "print('is observation space continuous?', isinstance(env.observation_space, gym.spaces.Box))\n", 198 | "\n", 199 | "print('observation space shape:', env.observation_space.shape)\n", 200 | "\n", 201 | "print('observation space high values?', env.observation_space.high)\n", 202 | "print('observation space low values?', env.observation_space.low)" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": 12, 208 | "metadata": {}, 209 | "outputs": [ 210 | { 211 | "name": "stdout", 212 | "output_type": "stream", 213 | "text": [ 214 | "action space is: Box(1,)\n", 215 | "is action space discrete? False\n", 216 | "is action space continuous? True\n", 217 | "action space shape: (1,)\n", 218 | "action space high values? [2.]\n", 219 | "action space low values? [-2.]\n" 220 | ] 221 | } 222 | ], 223 | "source": [ 224 | "print('action space is:', env.action_space)\n", 225 | "\n", 226 | "print('is action space discrete?', isinstance(env.action_space, gym.spaces.Discrete))\n", 227 | "print('is action space continuous?', isinstance(env.action_space, gym.spaces.Box))\n", 228 | "\n", 229 | "print('action space shape:', env.action_space.shape)\n", 230 | "\n", 231 | "print('action space high values?', env.action_space.high)\n", 232 | "print('action space low values?', env.action_space.low)" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": 14, 238 | "metadata": {}, 239 | "outputs": [ 240 | { 241 | "name": "stdout", 242 | "output_type": "stream", 243 | "text": [ 244 | "200\n", 245 | "None\n", 246 | "False\n" 247 | ] 248 | } 249 | ], 250 | "source": [ 251 | "print(env.spec.max_episode_steps)\n", 252 | "print(env.spec.reward_threshold)\n", 253 | "print(env.spec.nondeterministic)" 254 | ] 255 | }, 256 | { 257 | "cell_type": "markdown", 258 | "metadata": {}, 259 | "source": [ 260 | "## atari environments" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": 15, 266 | "metadata": {}, 267 | "outputs": [], 268 | "source": [ 269 | "env = gym.make('Freeway-v4')" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 16, 275 | "metadata": {}, 276 | "outputs": [ 277 | { 278 | "name": "stdout", 279 | "output_type": "stream", 280 | "text": [ 281 | "observation space is: Box(210, 160, 3)\n", 282 | "is observation space discrete? False\n", 283 | "is observation space continuous? True\n", 284 | "observation space shape: (210, 160, 3)\n" 285 | ] 286 | } 287 | ], 288 | "source": [ 289 | "print('observation space is:', env.observation_space)\n", 290 | "print('is observation space discrete?', isinstance(env.observation_space, gym.spaces.Discrete))\n", 291 | "print('is observation space continuous?', isinstance(env.observation_space, gym.spaces.Box))\n", 292 | "print('observation space shape:', env.observation_space.shape)" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": 17, 298 | "metadata": {}, 299 | "outputs": [ 300 | { 301 | "name": "stdout", 302 | "output_type": "stream", 303 | "text": [ 304 | "action space is: Discrete(3)\n", 305 | "action space shape: 3\n", 306 | "is action space discrete? True\n", 307 | "is action space continuous? False\n", 308 | "action meanings: ['NOOP', 'UP', 'DOWN']\n" 309 | ] 310 | } 311 | ], 312 | "source": [ 313 | "print('action space is:', env.action_space)\n", 314 | "print('action space shape:', env.action_space.n)\n", 315 | "print('is action space discrete?', isinstance(env.action_space, gym.spaces.Discrete))\n", 316 | "print('is action space continuous?', isinstance(env.action_space, gym.spaces.Box))\n", 317 | "print('action meanings:', env.unwrapped.get_action_meanings())" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": 19, 323 | "metadata": {}, 324 | "outputs": [ 325 | { 326 | "name": "stdout", 327 | "output_type": "stream", 328 | "text": [ 329 | "100000\n", 330 | "None\n", 331 | "False\n" 332 | ] 333 | } 334 | ], 335 | "source": [ 336 | "print(env.spec.max_episode_steps)\n", 337 | "print(env.spec.reward_threshold)\n", 338 | "print(env.spec.nondeterministic)" 339 | ] 340 | }, 341 | { 342 | "cell_type": "markdown", 343 | "metadata": {}, 344 | "source": [ 345 | "- Pong-v0 => 10k steps, randomly skips 2-4 frames, repeat action probability of 25%\n", 346 | "- Pong-v4 => 100k steps, randomly skips 2-4 frames\n", 347 | "- PongDeterministic-v0 => 100k steps, always skips 4 frames, repeat action probability of 25%\n", 348 | "- PongDeterministic-v4 => 100k steps, always skips 4 frames\n", 349 | "- PongNoFrameskip-v0 => 100k steps, returns every frame, repeat action probability of 25%\n", 350 | "- PongNoFrameskip-v4 => 100k steps, returns every frame\n", 351 | "\n", 352 | "information about environments: https://github.com/openai/gym/blob/master/gym/envs/__init__.py#L604\n", 353 | "spaceinvadersdeterministic always skips 3 frames instead of 4: https://github.com/openai/gym/blob/master/gym/envs/__init__.py#L620\n", 354 | "default frameskip when one not provided: https://github.com/openai/gym/blob/master/gym/envs/atari/atari_env.py#L30\n", 355 | "when skipping frames, you repeat the last action: https://github.com/openai/gym/blob/master/gym/envs/atari/atari_env.py#L94" 356 | ] 357 | }, 358 | { 359 | "cell_type": "markdown", 360 | "metadata": {}, 361 | "source": [ 362 | "## wrappers\n", 363 | "\n", 364 | "not exclusive to atari, but most commonly used for atari\n", 365 | "\n", 366 | "commonly used atari wrappers: https://github.com/openai/baselines/blob/master/baselines/common/atari_wrappers.py" 367 | ] 368 | }, 369 | { 370 | "cell_type": "code", 371 | "execution_count": 20, 372 | "metadata": {}, 373 | "outputs": [], 374 | "source": [ 375 | "class ClipRewardEnv(gym.RewardWrapper):\n", 376 | " def __init__(self, env):\n", 377 | " gym.RewardWrapper.__init__(self, env)\n", 378 | "\n", 379 | " def reward(self, reward):\n", 380 | " \"\"\"Bin reward to {+1, 0, -1} by its sign.\"\"\"\n", 381 | " return np.sign(reward)\n", 382 | " \n", 383 | "class ScaledFloatFrame(gym.ObservationWrapper):\n", 384 | " def __init__(self, env):\n", 385 | " gym.ObservationWrapper.__init__(self, env)\n", 386 | " self.observation_space = gym.spaces.Box(low=0, high=1, shape=env.observation_space.shape, dtype=np.float32)\n", 387 | "\n", 388 | " def observation(self, observation):\n", 389 | " return np.array(observation).astype(np.float32) / 255.0" 390 | ] 391 | }, 392 | { 393 | "cell_type": "code", 394 | "execution_count": 21, 395 | "metadata": {}, 396 | "outputs": [], 397 | "source": [ 398 | "env = gym.make('Pong-v0')\n", 399 | "\n", 400 | "env = ClipRewardEnv(env)\n", 401 | "\n", 402 | "env = ScaledFloatFrame(env)" 403 | ] 404 | }, 405 | { 406 | "cell_type": "markdown", 407 | "metadata": {}, 408 | "source": [ 409 | "## interacting with an environment" 410 | ] 411 | }, 412 | { 413 | "cell_type": "code", 414 | "execution_count": 22, 415 | "metadata": {}, 416 | "outputs": [ 417 | { 418 | "name": "stdout", 419 | "output_type": "stream", 420 | "text": [ 421 | "state type: \n", 422 | "state shape: (4,)\n", 423 | "state: [-0.01315549 -0.04012824 0.04801855 -0.0044493 ]\n" 424 | ] 425 | } 426 | ], 427 | "source": [ 428 | "env = gym.make('CartPole-v1')\n", 429 | "\n", 430 | "env.seed(1234)\n", 431 | "\n", 432 | "state = env.reset()\n", 433 | "\n", 434 | "print('state type:', type(state))\n", 435 | "print('state shape:', state.shape)\n", 436 | "print('state:', state)" 437 | ] 438 | }, 439 | { 440 | "cell_type": "markdown", 441 | "metadata": {}, 442 | "source": [ 443 | "https://github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py#L21\n", 444 | "\n", 445 | "cart pos, cart velocity, pole angle, pole velocity" 446 | ] 447 | }, 448 | { 449 | "cell_type": "code", 450 | "execution_count": 23, 451 | "metadata": {}, 452 | "outputs": [ 453 | { 454 | "name": "stdout", 455 | "output_type": "stream", 456 | "text": [ 457 | "selected action: 1\n" 458 | ] 459 | } 460 | ], 461 | "source": [ 462 | "action = env.action_space.sample() #select random action, uniformly between high and low for continuous\n", 463 | "\n", 464 | "print('selected action:', action)" 465 | ] 466 | }, 467 | { 468 | "cell_type": "markdown", 469 | "metadata": {}, 470 | "source": [ 471 | "https://github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py#L29\n", 472 | "\n", 473 | "0 = left, 1 = right" 474 | ] 475 | }, 476 | { 477 | "cell_type": "code", 478 | "execution_count": 24, 479 | "metadata": {}, 480 | "outputs": [ 481 | { 482 | "name": "stdout", 483 | "output_type": "stream", 484 | "text": [ 485 | "state: [-0.01395805 0.15427334 0.04792957 -0.28160352]\n", 486 | "reward: 1.0\n", 487 | "done: False\n", 488 | "info: {}\n" 489 | ] 490 | } 491 | ], 492 | "source": [ 493 | "state, reward, done, info = env.step(action) #perform action on environment\n", 494 | "\n", 495 | "print('state:', state)\n", 496 | "print('reward:', reward)\n", 497 | "print('done:', done)\n", 498 | "print('info:', info)" 499 | ] 500 | }, 501 | { 502 | "cell_type": "markdown", 503 | "metadata": {}, 504 | "source": [ 505 | "## interacting with the atari environment" 506 | ] 507 | }, 508 | { 509 | "cell_type": "code", 510 | "execution_count": 25, 511 | "metadata": {}, 512 | "outputs": [ 513 | { 514 | "name": "stdout", 515 | "output_type": "stream", 516 | "text": [ 517 | "state type: \n", 518 | "state shape: (210, 160, 3)\n" 519 | ] 520 | } 521 | ], 522 | "source": [ 523 | "env = gym.make('FreewayNoFrameskip-v4')\n", 524 | "\n", 525 | "env.seed(1234)\n", 526 | "\n", 527 | "state = env.reset()\n", 528 | "\n", 529 | "print('state type:', type(state))\n", 530 | "print('state shape:', state.shape)" 531 | ] 532 | }, 533 | { 534 | "cell_type": "code", 535 | "execution_count": 26, 536 | "metadata": {}, 537 | "outputs": [ 538 | { 539 | "data": { 540 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAANEAAAD8CAYAAADpCEEHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAERFJREFUeJzt3X2sHNV5x/HvrxD8B6EBYuRcAzEGGYSDqLlFJHIBUcVJjIswVOjWVp2QgmpAQBy5DrWD1NBWEQkvjgxpXRlhYRdiuA2hsRA4YJQXVQ4E23GMsWMwxIhrLnYhLdCkIgGe/rGzdFjv+s7es29z7+8jje7umTNzZtb7eGbOznlGEYGZjd4fdHsDzMrOQWSWyEFklshBZJbIQWSWyEFklqhtQSRptqTdkvZIWtqudsy6Te34nUjSYcBzwGeAIeBpYH5E7Gx5Y2Zd1q4j0TnAnoh4MSJ+B9wPzG1TW2ZddXib1ns88HLu/RDwyUaVJfm2CetFr0XEcSNValcQjUjSQmBht9o3K+ClIpXaFUT7gBNz70/Iyt4XEauAVeAjkZVbu66JngamSZoq6QhgHrC+TW2ZdVVbjkQR8Y6k64AfAIcBqyPi2dGub/ny5S3bNrOiFi9eXKhe266JIuIR4JF2rb+XDbz0wVPpwSlTurQl3TVePgffsdBi1S/O4JQp739par9M48F4+hwcRGaJHERmiRxEZokcRGaJHERmiRxELZbvicr3UI034+lz6Nq9c2PZWP2yNGu8fA4+EpklasugvKY3wjegWm/aEhFnj1SpFKdzg4OD3d4EG4cGBgYK1fPpnFkiB5FZIgeRWSIHkVkiB5FZIgeRWaJRB5GkEyX9UNJOSc9KWpSV3yRpn6Rt2TSndZtr1ntG/WOrpD6gLyK2SjoK2AJcAgwA/xMRtzWxLv/Yar2ovT+2RsQwMJy9fkvSLipJG1vOiUqsG4omKmnJNZGkk4CzgKeyouskbZe0WtIxrWjDrFclB5GkDwMPAl+OiDeBlcApwAwqR6rbGyy3UNJmSZtTt8Gsm5LunZP0ISoBdF9EfA8gIvbn5t8FPFxvWWdAtV5V9DSuKqV3TsDdwK6IWJ4r78tVuxTYMdo2zLph3eTJrJs8uXD9lCPRnwCfB56RtC0r+yowX9IMIIC9wFUJbZh1VN9thTuV35fSO/cfgOrMGpdZT21sGV6yBDrZO2c2njmIzBI5iMwSOYjM6mimg8FBZJYzvGRJ08uUIlGJWSfNf+WVpur7SGRWY/ny5U3d9OwgMktUytO52bNnH1S2YcOGQ9Zp9fyx0oY/y8bzOzoUwmw8K0UaYQ/Ks25YvHhxoZGtPhKZJXIQmSUqZceCWTt1bFCe2Vj18PBXeHj4K4XrO4jMck6dP9z0Mg4iszqeW9c3cqVM8jWRpL3AW8C7wDsRcbakY4EHgJOoDBEfiIj/Sm3LrBe16kj0pxExI9envhR4IiKmAU9k783GpHadzs0F1mSv11BJL2w2JrUiiAJ4TNIWSQuzsklZmmGAV4FJLWjHrGOa6WBoRRCdGxH9wIXAtZLOz8+Myn1FB93W4wyo1oua6VCoSu5YiIh92d8Dkh4CzgH2S+qLiOEsmeOBOss5A6r1pIv6bm2qftKRSNKR2WNVkHQk8FkqGU/XA5dn1S4Hvp/SjlknNTsoL/VINAl4qJJRmMOB70TEBklPA4OSrgReovLMIrMxKSmIIuJF4I/qlL8OfDpl3YfigWTtm9+JNsryWRa9h87jicwa8Hgisw5xEJkl8ngisxoeT2SW6JprPsI113ykcH0HkVnO88//fdPLOIjM6pg27WuF6zqIzBI5iMwSOYjMEjmIzBI5iMzqaKaXzkFkltNMr1yV71gwq7Fy5RtN1feRyKyGn5Rn1mGlPJ3zQLL2ze9EG2X5LNs+KE/SaVSynFadDPwdcDTw18B/ZuVfjYhHRliXB+VZzyk6KG/UR6KI2A3MAJB0GLAPeAj4K+BbEXHbaNdtViatuib6NPBCRLzUovWZlUarronmAety76+T9AVgM/A3TmZvZdLxQXmSjgAuBv4tK1oJnELlVG8YuL3Bcs6Aaj1paNEihhYtKly/FadzFwJbI2I/QETsj4h3I+I94C4qGVEPEhGrIuLsIhduZp0y8FLzVyStCKL55E7lsrTBVZdSyYhqViqDU6YUrpt0TZSlDv4McFWu+BZJM6gksd9bM89szEnNgPob4KM1ZZ9P2iKzkvFtP2aJHERmdTTTweAgMstppkOhqpQ3oJq10wkrVjRV30cisxoeT2TWYaU8nfMYmPbN70QbZfks/ZAvs0R+yJdZhziIzBKV8prIrJ38kC+zDnMQmdX42OR1fGzyupErZhxEZjm33NY3cqUaDiKzOm5YMly4roPILJGDyCxRoSCStFrSAUk7cmXHSnpc0vPZ32Oyckm6Q9IeSdsl9bdr4816QdEj0T1A7U1WS4EnImIa8ET2HirZf6Zl00IqKbTMSqWZDoZCQRQRPwF+XVM8F1iTvV4DXJIrXxsVTwJH12QAMutZzXQoVKXcsTApIqotvgpMyl4fD7ycqzeUlX1g6yQtpHKkMuspr74yv6n6LbntJyJipDux6yyzClgFI9/FbdZJ1VEDRW//SQmi/ZL6ImI4O107kJXvA07M1TshK2uZL31p00Fld9wx85B1Wj1/rLThz7Lx/KK30BUeTyTpJODhiDgje38r8HpEfEPSUuDYiLhB0p8B1wFzgE8Cd0RE3VTCuXV7PJH1nJY+n0jSOuACYKKkIeBrwDeAQUlXAi8BA1n1R6gE0B7gt1SeV9RS/t+zffM70UZZPsuWH4nayUci60Ue2WrWIR6UZ1bDg/LMEkzccSdrr5jK2iumFl7GQWRWx2tnXF+4roPIrMZrZ1zPxB13Fq7vIDJL5CAyS+QgMkvkIDKrMXHHne5YMEvVTMeCf2w1y3ntjOv9Y6tZKj/ky6zDHERmiUp5TeSnu7VvfifaKMtn6SflmSXyeCKzDhkxiBpkP71V0i+zDKcPSTo6Kz9J0v9K2pZN/9LOjTfrBUWuie4Bvg2szZU9DiyLiHckfRNYBvxtNu+FiJjR0q0066CW/05UL/tpRDwWEe9kb5+kkhbLrPQ2bZ3NZQt2ctmCnYWXacU10RXAo7n3UyX9XNKPJZ3XaCFJCyVtlrS5Bdtg1lIz+w/u3WskKYgk3Qi8A9yXFQ0DH4+Is4DFwHck/WG9ZSNiVUScXaT3w6yTZvZvYNPWg7v+Gxl1EEn6InAR8JeR9ZNHxNsR8Xr2egvwAnDqaNswK4NRBZGk2cANwMUR8dtc+XGSDsten0zl8SovtmJDzXrViL1zDbKfLgMmAI9LAngyIq4Gzgf+QdLvgfeAqyOi9pEsZmPKiEEUEfWeM3F3g7oPAg+mbpRZN23aOpuZ/Rv47r3F6vuOBbM6mulYKOUNqGbtMrN/gwflmaXyoDyzDivl6ZzHwLRvfifaKMtn6fFEZok8nsisQxxEZolKeU1k1k7u4jZLMP3ee9nQ38+G/v7CyziIzOrYuWBB4boOIrMaOxcsYPq9BW+cw0FklsxBZJbIQWSWyEFklshBZFZj+r33trZ3rkEG1Jsk7ctlOp2Tm7dM0h5JuyV9ruk9MOsBre6duweoN8zvWxExI5seAZA0HZgHfCJb5p+riUvMymDnggXM3rqV2Vu3Fl5mVBlQD2EucH+WOutXwB7gnMJbY9YDOjko77osof1qScdkZccDL+fqDGVlB3EGVBsrRnsD6krgH4HI/t5OJZ1wYRGxClgFI48nquWBZO2b34k2yvJZtnRQnqSTgIcj4oxDzZO0DCAibs7m/QC4KSJ+OsL6PSjPek5bB+VJ6su9vRSo9tytB+ZJmiBpKpUMqD8bTRtmZTHaDKgXSJpB5XRuL3AVQEQ8K2kQ2Ekl0f21EfFuezbdrDe0NANqVv/rwNdTNsqsmzwozyzB26dv4uZHL+PmRy8rvIyDyKyOCbtmFq7rIDKrMWHXTN4+fVPh+g4is0QOIrNEDiKzRA4isxpvn77JHQtmqZrpWHAGVLOcCbtm+sdWs1R+yJdZh5XydM5jYNo3vxNtlOWz9EO+zBL5IV9mHeIgMkvkIDJLVGRk62rgIuBANceCpAeA07IqRwP/HREzsnwLu4Dd2bwnI+LqVm+0WTs1+ztRkd65e4BvA2urBRHxF9XXkm4H3sjVfyEiZjS1FWY9Ytas9WzffgEAZ575o0LLJCVvlCRgAFhXcBvNSmHjxosL1029JjoP2B8Rz+fKpkr6uaQfSzovcf1mHbdx48XMmrW+cP3UH1vn88Gj0DDw8Yh4XdIfA/8u6RMR8WbtgpIWAgsT2zfrulEfiSQdDvw58EC1LMvB/Xr2egvwAnBqveUjYlVEnF3kxyyzXpZyOjcL+GVEDFULJB1XfQqEpJOpJG98MW0TzXpbkecTrQN+CpwmaUjSldmseRzcoXA+sF3SNuC7wNURUfSJEmY9Ydas9U11LIw2eSMR8cU6ZQ8CDxZu3axHNdOx4DsWzHI2bryYM8/8UeHfiMBBZHYQD8oz6zAHkVkiB5FZolKMbDXrkkIjW0uRY2FwcLDbm2Dj0MDAQKF6Pp0zS+QgMkvkIDJL5CAyS+QgMkvkIDJL5CAyS+QgMkvkOxbMGhs7dyy0ytCiRYecf8KKFUn1bXwqMjz8REk/lLRT0rOSFmXlx0p6XNLz2d9jsnJJukPSHknbJfW3eyfMuioiDjkBfUB/9voo4DlgOnALsDQrXwp8M3s9B3gUEPAp4KkCbUS7pqFFiwqV13vfaFlP42baPNJ3NyIKZUAdjoit2eu3qOTaPh6YC6zJqq0BLslezwXWRsWTwNGS+kZqpx2qp2P5v7WnaPVO2apl1dO16nL55WvXWVveaN029jTVO5clrD8LeAqYFBHD2axXgUnZ6+OBl3OLDWVlXVF73TLSdUw+gPJBcMKKFe9PI7Xh4BlfCncsSPowlUw+X46INytpuCsiIprtYetUBtRmvtC1R6BG6ynSoeBAGj8KHYkkfYhKAN0XEd/LivdXT9Oyvwey8n3AibnFT8jKPqBTGVCb7UFrdKRpdBRqRZtWbkV65wTcDeyKiHwKlPXA5dnry4Hv58q/kPXSfQp4I3fa13WHOkI0+vLXXvcUWacDaRwp0HN2LpWeiu3AtmyaA3wUeAJ4HtgIHJvVF/BPVPJwPwOc3c3eOU+eEqZCvXO+Y8GsMT893KwTHERmiRxEZokcRGaJHERmiXplKMRrwG+yv2PFRMbO/oylfYHi+zOlyMp6oosbQNLmsfT81rG0P2NpX6D1++PTObNEDiKzRL0URKu6vQEtNpb2ZyztC7R4f3rmmsisrHrpSGRWSl0PIkmzJe3OEpss7fb2jIakvZKekbRN0uasrG4il14kabWkA5J25MpKm4imwf7cJGlf9m+0TdKc3Lxl2f7slvS5phsscqt3uybgMCpDJk4GjgB+AUzv5jaNcj/2AhNryuomcunFCTgf6Ad2jLT9jCIRTY/sz03Akjp1p2ffuwnA1Oz7eFgz7XX7SHQOsCciXoyI3wH3U0l0MhY0SuTScyLiJ8Cva4p7PhFNIw32p5G5wP0R8XZE/ArYQ+V7WVi3g6inkpokCOAxSVuy3BHQOJFLWZQiEU2TrstOQVfnTq+T96fbQTRWnBsR/cCFwLWSzs/PjMp5Q2m7Qcu+/ZmVwCnADGAYuL1VK+52EBVKatLrImJf9vcA8BCV04FGiVzKIikRTa+JiP0R8W5EvAfcxf+fsiXvT7eD6GlgmqSpko4A5lFJdFIako6UdFT1NfBZYAeNE7mURSkT0TRSc912KZV/I6jszzxJEyRNBaYBP2tq5T3QkzKHSmriF4Abu709o9j+k6n07vwCeLa6DzRI5NKLE7COyinO76lcE1zZaPsZRSKaHtmff822d3sWOH25+jdm+7MbuLDZ9nzHglmibp/OmZWeg8gskYPILJGDyCyRg8gskYPILJGDyCyRg8gs0f8B/uzQ/ucwgfkAAAAASUVORK5CYII=\n", 541 | "text/plain": [ 542 | "
" 543 | ] 544 | }, 545 | "metadata": { 546 | "needs_background": "light" 547 | }, 548 | "output_type": "display_data" 549 | } 550 | ], 551 | "source": [ 552 | "plt.imshow(state);" 553 | ] 554 | }, 555 | { 556 | "cell_type": "code", 557 | "execution_count": 27, 558 | "metadata": {}, 559 | "outputs": [ 560 | { 561 | "name": "stdout", 562 | "output_type": "stream", 563 | "text": [ 564 | "selected action: 2\n", 565 | "action meaning: DOWN\n" 566 | ] 567 | } 568 | ], 569 | "source": [ 570 | "action = env.action_space.sample() #select random action, uniformly between high and low for continuous\n", 571 | "\n", 572 | "print('selected action:', action)\n", 573 | "print('action meaning:', env.unwrapped.get_action_meanings()[action])" 574 | ] 575 | }, 576 | { 577 | "cell_type": "code", 578 | "execution_count": 28, 579 | "metadata": {}, 580 | "outputs": [ 581 | { 582 | "name": "stdout", 583 | "output_type": "stream", 584 | "text": [ 585 | "reward: 0.0\n", 586 | "done: False\n", 587 | "info: {'ale.lives': 0}\n" 588 | ] 589 | } 590 | ], 591 | "source": [ 592 | "state, reward, done, info = env.step(action) #perform action on environment\n", 593 | "\n", 594 | "print('reward:', reward)\n", 595 | "print('done:', done)\n", 596 | "print('info:', info)" 597 | ] 598 | }, 599 | { 600 | "cell_type": "code", 601 | "execution_count": 29, 602 | "metadata": {}, 603 | "outputs": [ 604 | { 605 | "data": { 606 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAANEAAAD8CAYAAADpCEEHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAEVRJREFUeJzt3X/sVfV9x/Hna1j5w7qqxdCvP4po0EiNw++MbZgal9IWmRE1hkFGa6cZatTSMOuwJqvb0tj6g0bblQUjUYZFWdWVGKWK6Y8sVCtQighF0WIEv8K0m7p2sVXf++Oe646Xe/me+/3cX+fe1yO5+d77OZ/zk/vmfM7nns/7KCIws7H7o25vgFnZOYjMEjmIzBI5iMwSOYjMEjmIzBK1LYgkzZS0Q9JOSYvbtR6zblM7fieSNA54DvgMsBt4GpgXEdtavjKzLmvXmegMYGdEvBgRvwfuA2a3aV1mXXVQm5Z7NPBy7vNu4JONKkvybRPWi16LiCNHq9SuIBqVpAXAgm6t36yAl4pUalcQ7QGOzX0+Jit7X0QsA5aBz0RWbu26JnoamCJpsqSDgbnAmjaty6yr2nImioh3JF0N/BAYByyPiGfHurwlS5a0bNvMilq0aFGhem27JoqIR4BH2rX8XjbnpQ82pVdPmtSlLemuQTkOvmOhxapfnNWTJr3/pan9Mg2CQToODiKzRA4is0QOIrNEDiKzRA4is0QOohbL90Tle6gGzSAdh67dO9fP+vXL0qxBOQ4+E5klasugvKY3wjegWm/aGBGnj1apFM251atXd3sTbADNmTOnUD0358wSOYjMEjmIzBI5iMwSOYjMEjmIzBKNOYgkHSvpR5K2SXpW0sKs/EZJeyRtzl6zWre5Zr1nzD+2ShoChiJik6RDgY3ABcAc4H8i4tYmluUfW60XtffH1ogYAUay929J2k4laWPLOVGJdUPRRCUtuSaSdBxwGvBUVnS1pC2Slks6vBXrMOtVyUEk6cPAA8CXI+JNYClwAjCNypnqtgbzLZC0QdKG1G0w66ake+ckfYhKAN0bEQ8CRMTe3PQ7gYfrzesMqNarijbjqlJ65wTcBWyPiCW58qFctQuBrWNdh1k3rDrqKFYddVTh+ilnoj8DPg88I2lzVvZVYJ6kaUAAu4DLE9Zh1lFDtxbuVH5fSu/cfwCqM2kgs55afxm59lroZO+c2SBzEJklchCZJXIQmdXRTAeDg8gsZ+Taa5uepxSJSsw6ad4rrzRV32cisxpLlixp6qZnB5FZolI252bOnLlf2dq1aw9Yp9XT+2UdPpaNp3d0KITZICtFGmEPyrNuWLRoUaGRrT4TmSVyEJklKmXHglk7dWxQnlm/enjkKzw88pXC9R1EZjknzhtpeh4HkVkdz60aGr1SJvmaSNIu4C3gXeCdiDhd0hHA/cBxVIaIz4mI/0pdl1kvatWZ6M8jYlquT30x8ERETAGeyD6b9aV2NedmA/dk7++hkl7YrC+1IogCeEzSRkkLsrKJWZphgFeBiS1Yj1nHNNPB0IogOjMihoFzgasknZ2fGJX7iva7rccZUK0XNdOhUJXcsRARe7K/+yQ9BJwB7JU0FBEjWTLHfXXmcwZU60nnDd3SVP2kM5GkQ7LHqiDpEOCzVDKergEuyapdAvwgZT1mndTsoLzUM9FE4KFKRmEOAr4XEWslPQ2slnQZ8BKVZxaZ9aWkIIqIF4E/qVP+OvDplGUfiAeStW96J9ZRlmNZ9B46jycya8Djicw6xEFklsjjicxqeDyRWaIrr/wIV175kcL1HURmOc8//w9Nz+MgMqtjypSvFa7rIDJL5CAyS+QgMkvkIDJL5CAyq6OZXjoHkVlOM71yVb5jwazG0qVvNFXfZyKzGn5SnlmHlbI554Fk7ZveiXWU5Vi2fVCepJOoZDmtOh74e+Aw4G+A/8zKvxoRj4yyLA/Ks55TdFDemM9EEbEDmAYgaRywB3gI+GvgWxFx61iXbVYmrbom+jTwQkS81KLlmZVGq66J5gKrcp+vlvQFYAPwt05mb2XS8UF5kg4Gzgf+LStaCpxApak3AtzWYD5nQLWetHvhQnYvXFi4fiuac+cCmyJiL0BE7I2IdyPiPeBOKhlR9xMRyyLi9CIXbmadMuel5q9IWhFE88g15bK0wVUXUsmIalYqqydNKlw36ZooSx38GeDyXPHNkqZRSWK/q2aaWd9JzYD6W+CjNWWfT9ois5LxbT9miRxEZnU008HgIDLLaaZDoaqUN6CatdMxt9/eVH2ficxqeDyRWYeVsjnnMTDtm96JdZTlWPohX2aJ/JAvsw5xEJklKuU1kVk7+SFfZh3mIDKr8bGjVvGxo1aNXjHjIDLLufnWodEr1XAQmdVx3bUjhes6iMwSOYjMEhUKIknLJe2TtDVXdoSkxyU9n/09PCuXpDsk7ZS0RdJwuzberBcUPRPdDdTeZLUYeCIipgBPZJ+hkv1nSvZaQCWFllmpNNPBUCiIIuKnwG9qimcD92Tv7wEuyJWviIongcNqMgCZ9axmOhSqUu5YmBgR1TW+CkzM3h8NvJyrtzsr+8DWSVpA5Uxl1lNefWVeU/VbcttPRMRod2LXmWcZsAxGv4vbrJOqowaK3v6TEkR7JQ1FxEjWXNuXle8Bjs3VOyYra5kvfWn9fmV33DH9gHVaPb1f1uFj2Xh60VvoCo8nknQc8HBEnJJ9vgV4PSK+IWkxcEREXCfpL4CrgVnAJ4E7IqJuKuHcsj2eyHpOS59PJGkVcA4wQdJu4GvAN4DVki4DXgLmZNUfoRJAO4HfUXleUUv5f8/2Te/EOspyLFt+Jmonn4msF3lkq1mHeFCeWQ0PyjNLMGHrt1lx6WRWXDq58DwOIrM6XjvlmsJ1HURmNV475RombP124foOIrNEDiKzRA4is0QOIrMaE7Z+2x0LZqma6Vjwj61mOa+dco1/bDVL5Yd8mXWYg8gsUSmvifx0t/ZN78Q6ynIs/aQ8s0QeT2TWIaMGUYPsp7dI+lWW4fQhSYdl5cdJ+l9Jm7PXv7Rz4816QZFroruB7wArcmWPA9dHxDuSvglcD/xdNu2FiJjW0q0066CW/05UL/tpRDwWEe9kH5+kkhbLrPTWb5rJxfO3cfH8bYXnacU10aXAo7nPkyX9QtJPJJ3VaCZJCyRtkLShBdtg1lLTh/fv3WskKYgk3QC8A9ybFY0AH4+I04BFwPck/XG9eSNiWUScXqT3w6yTpg+vZf2m/bv+GxlzEEn6InAe8FeR9ZNHxNsR8Xr2fiPwAnDiWNdhVgZjCiJJM4HrgPMj4ne58iMljcveH0/l8SovtmJDzXrVqL1zDbKfXg+MBx6XBPBkRFwBnA38o6Q/AO8BV0RE7SNZzPrKqEEUEfWeM3FXg7oPAA+kbpRZN63fNJPpw2v5/spi9X3HglkdzXQslPIGVLN2mT681oPyzFJ5UJ5Zh5WyOecxMO2b3ol1lOVYejyRWSKPJzLrEAeRWaJSXhOZtZO7uM0STF25krXDw6wdHi48j4PIrI5t8+cXrusgMquxbf58pq4seOMcDiKzZA4is0QOIrNEDiKzRA4isxpTV65sbe9cgwyoN0rak8t0Ois37XpJOyXtkPS5pvfArAe0unfubqDeML9vRcS07PUIgKSpwFzgE9k8360mLjErg23z5zNz0yZmbtpUeJ4xZUA9gNnAfVnqrF8DO4EzCm+NWQ/o5KC8q7OE9sslHZ6VHQ28nKuzOyvbjzOgWr8Y6w2oS4F/AiL7exuVdMKFRcQyYBmMPp6olgeStW96J9ZRlmPZ0kF5ko4DHo6IUw40TdL1ABFxUzbth8CNEfGzUZbvQXnWc9o6KE/SUO7jhUC1524NMFfSeEmTqWRA/flY1mFWFmPNgHqOpGlUmnO7gMsBIuJZSauBbVQS3V8VEe+2Z9PNekNLM6Bm9b8OfD1lo8y6yYPyzBK8ffJ6bnr0Ym569OLC8ziIzOoYv3164boOIrMa47dP5+2T1xeu7yAyS+QgMkvkIDJL5CAyq/H2yevdsWCWqpmOBWdANcsZv326f2w1S+WHfJl1WCmbcx4D077pnVhHWY6lH/JllsgP+TLrEAeRWSIHkVmiIiNblwPnAfuqORYk3Q+clFU5DPjviJiW5VvYDuzIpj0ZEVe0eqPN2qnZ34mK9M7dDXwHWFEtiIi/rL6XdBvwRq7+CxExramtMOsRM2asYcuWcwA49dQfF5onKXmjJAFzgFUFt9GsFNatO79w3dRrorOAvRHxfK5ssqRfSPqJpLMSl2/WcevWnc+MGWsK10/9sXUeHzwLjQAfj4jXJf0p8O+SPhERb9bOKGkBsCBx/WZdN+YzkaSDgIuA+6tlWQ7u17P3G4EXgBPrzR8RyyLi9CI/Zpn1spTm3AzgVxGxu1og6cjqUyAkHU8leeOLaZto1tuKPJ9oFfAz4CRJuyVdlk2ay/4dCmcDWyRtBr4PXBERRZ8oYdYTZsxY01THwliTNxIRX6xT9gDwQOG1m/WoZjoWfMeCWc66dedz6qk/LvwbETiIzPbjQXlmHeYgMkvkIDJLVIqRrWZdUmhkaylyLKxevbrbm2ADaM6cOYXquTnXQhdd5GCHwTsODqIWG7QvUCODdBwcRC1S/dI8+GCxJkC/GsTj4CBKUP3CDNL/uvUM+nFwECUa1C9OrUE+Dg6iBIPUZDmQQT8ODqJE+S/QIH+ZBvk4OIhaoPqlGeQmDQzucfAdC2aN9c8dC62ye+HCA04/5vbbk+rbYCoyPPxYST+StE3Ss5IWZuVHSHpc0vPZ38Ozckm6Q9JOSVskDbd7J8y6KiIO+AKGgOHs/aHAc8BU4GZgcVa+GPhm9n4W8Cgg4FPAUwXWEe167V64sFB5vc+N5vVrYF4bRvvuRkShDKgjEbEpe/8WlVzbRwOzgXuyavcAF2TvZwMrouJJ4DBJQ6Otpx2qzbH839omWr0mW7Ws2lyrzpefv3aZteWNlm39p6neuSxh/WnAU8DEiBjJJr0KTMzeHw28nJttd1bWFbXXLaNdx+QDKB8Ex9x++/uv0dbh4BkshTsWJH2YSiafL0fEm5U03BUREc32sHUqA2ozX+jaM1Cj5RTpUHAgDY5CZyJJH6ISQPdGxINZ8d5qMy37uy8r3wMcm5v9mKzsAzqVAbXZHrRGZ5pGZ6FWrNPKrUjvnIC7gO0RkU+Bsga4JHt/CfCDXPkXsl66TwFv5Jp9XXegM0SjL3/tdU+RZTqQBkiBnrMzqfRUbAE2Z69ZwEeBJ4DngXXAEVl9Af9MJQ/3M8Dp3eyd88uvhFeh3jnfsWDWmJ8ebtYJDiKzRA4is0QOIrNEDiKzRL0yFOI14LfZ334xgf7Zn37aFyi+P5OKLKwnurgBJG3op+e39tP+9NO+QOv3x805s0QOIrNEvRREy7q9AS3WT/vTT/sCLd6fnrkmMiurXjoTmZVS14NI0kxJO7LEJou7vT1jIWmXpGckbZa0ISurm8ilF0laLmmfpK25stImommwPzdK2pP9G22WNCs37fpsf3ZI+lzTKyxyq3e7XsA4KkMmjgcOBn4JTO3mNo1xP3YBE2rK6iZy6cUXcDYwDGwdbfsZQyKaHtmfG4Fr69Sdmn3vxgOTs+/juGbW1+0z0RnAzoh4MSJ+D9xHJdFJP2iUyKXnRMRPgd/UFPd8IppGGuxPI7OB+yLi7Yj4NbCTyveysG4HUU8lNUkQwGOSNma5I6BxIpeyKEUimiZdnTVBl+ea18n70+0g6hdnRsQwcC5wlaSz8xOj0m4obTdo2bc/sxQ4AZgGjAC3tWrB3Q6iQklNel1E7Mn+7gMeotIcaJTIpSySEtH0mojYGxHvRsR7wJ38f5MteX+6HURPA1MkTZZ0MDCXSqKT0pB0iKRDq++BzwJbaZzIpSxKmYimkZrrtgup/BtBZX/mShovaTIwBfh5UwvvgZ6UWVRSE78A3NDt7RnD9h9PpXfnl8Cz1X2gQSKXXnwBq6g0cf5A5ZrgskbbzxgS0fTI/vxrtr1bssAZytW/IdufHcC5za7PdyyYJep2c86s9BxEZokcRGaJHERmiRxEZokcRGaJHERmiRxEZon+Dxo19DYHRdd9AAAAAElFTkSuQmCC\n", 607 | "text/plain": [ 608 | "
" 609 | ] 610 | }, 611 | "metadata": { 612 | "needs_background": "light" 613 | }, 614 | "output_type": "display_data" 615 | } 616 | ], 617 | "source": [ 618 | "plt.imshow(state);" 619 | ] 620 | }, 621 | { 622 | "cell_type": "code", 623 | "execution_count": 30, 624 | "metadata": {}, 625 | "outputs": [ 626 | { 627 | "data": { 628 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAANEAAAD8CAYAAADpCEEHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAEu1JREFUeJzt3X+sXGWdx/H3RxT+ULOAuN0rYCmmGkvj1krUdJW4EbV2DRVi7tJs/bGarRj5YZrKFt2sqDG6WDBUd9mtoRG2bPFGQBuCVUrcZTddlFKxltbS8qOh5dJucVdc3VSB7/4xZy7nzp25d+4858zMmfm8ksmd85wzc54znW+f5zxznu9RRGBmnXtRrytgVnUOIrNEDiKzRA4is0QOIrNEDiKzRKUFkaSlkvZJOiBpbVn7Mes1lfE7kaQTgIeBdwGHgPuBFRGxp/CdmfVYWS3Rm4EDEfFoRPwOuBVYXtK+zHrqxSW97+nAE7nlQ8BbWm0syZdNWD86FhGvnGmjsoJoRpJWAat6tX+zNhxsZ6OygugwcGZu+YysbEJEbAA2gFsiq7ayzonuB+ZLmifpROBiYEtJ+zLrqVJaooh4VtKlwA+AE4CNEfFQp+933XXXFVY3s3atXr26re1KOyeKiLuAu8p6/342enByV3ps7twe1aS3huVz8BULBat/ccbmzp340jR+mYbBMH0ODiKzRA4is0QOIrNEDiKzRA4is0QOooLlR6LyI1TDZpg+h55dOzfIBvXLMlvD8jm4JTJLVMqkvFlXwhegWn96ICLOnWmjSnTnxsbGel0FG0Kjo6NtbefunFkiB5FZIgeRWSIHkVkiB5FZIgeRWaKOg0jSmZJ+JGmPpIckXZGVXy3psKQHs8ey4qpr1n86/rFV0ggwEhE7Jb0ceAB4PzAK/G9ErJvFe/nHVutH5f7YGhHjwHj2/NeS9lJL2lg4JyqxXmg3UUkh50SSzgLeCPw4K7pU0i5JGyWdUsQ+zPpVchBJehlwG/CpiHgGuAF4DbCIWkt1bYvXrZK0Q9KO1DqY9VJSEEl6CbUAuiUibgeIiCMR8VxEPA98k1py+ykiYkNEnNtOn9OsCKtXr57URWtc7lTH50SSBNwI7I2I63LlI9n5EsCFwO60KpoVY/OrXlV7sm7dlOXxNWs6ft+Uq7j/BPgg8HNJD2ZlnwFWSFoEBPA48PGEfZgVYmTd5MHietA0lnciZXTuPwA1WTWUWU+tGhqDZ3zNmuRA8hULZokcRGaJHERmiSoxPdysKPlzofxyCrdENhQah7BH1q0rJIDALZENkRVPPgm8cC1m/YfW1GszHUQ2NBqDpagLm92dM0tUyZZo6dKlU8q2bt067TZFrx+UffizbL2+q1MhzIZZJdIIe1Ke9cLq1avbmtnqlsgskYPILFElBxbM2pEfGCj6t6E8B5ENrDvHP51bqs0TvXPFpyeWH948Ush+3J2zgfTaFeNTyh7ePFJY4OS5JbKBlg+aemA9vHmkaZB1KjmIJD0O/Bp4Dng2Is6VdCrwbeAsalPERyPiv1P3ZdaPiurO/WlELMqNqa8F7omI+cA92bLZQCrrnGg5cFP2/CZq6YXNBlIR50QB/DC76uCfImIDMCeXNuspYE4B+zGbtfy5T/38qMjzISgmiN4WEYcl/SFwt6Rf5FdGRDS7rEfSKmBVAfs3m6LZ4EHRwVOXHEQRcTj7e1TSHdQynh6pJ3HM7h5xtMnrNgAbwHeFsHK8b+SrE8/79sdWSS8FXpTdFeKlwLuBLwBbgA8DX8n+fi+1omaz1SxQyriYObUlmgPcUcsozIuBf4mIrZLuB8YkfQw4SO2eRWYDKSmIIuJR4I+blD8NvDPlvafjiWTlre/GPqryWbY7Kc/zicxa8Hwisy5xEJkl8gWoNjAah6+bzScqg4PIBsYnPvEHAOzf//lJy/Wy+fM/V8p+3Z2zgVAPnLr58z9XWtA0cktkA6UeOPWgalwug1sis0QOIrNEDiKzRA4is0QeWLCB0s0BhTq3RDYQGoez9+//fFcCCNwS2QC54YZfAb5iwaxjZd0JbybuzpklqmRL5Ilk5a3vxj6q8lmWPilP0uuoZTmtOxv4W+Bk4K+A/8rKPxMRd83wXp6UZ32n3Ul5HbdEEbEPWAQg6QTgMHAH8JfA1yJiXafvbVYlRZ0TvRN4JCIOFvR+ZpVRVBBdDGzOLV8qaZekjZJOKWgfNuRWr1496TylcblXkhOVSDoReBI4JyKOSJoDHKOWXviLwEhEfLTJ6/IZUN803T58TmQAowdbd3TG5s4tfH/dTFTyXmBnRBwBiIgjEfFcRDwPfJNaRtQpImJDRJzbTiXNGgNobO7cUgKnE0UMca8g15Wrpw/OFi8EdhewDzPghRanHlRjc+dO20J1QxFphN8FfDxXfI2kRdS6c483rDMbOKkZUH8DvKKh7INJNTKrGF/2Y5aokpf92PDKnwvll3vJLZFVQuNI3OjBg30RQOCWyCrkjOuvB8q9YVcnHERWGb2aLzQTd+fMElWyJfIcmPLWd2MfVfksfZMvs0S+yZdZlziIzBI5iCxZv87z6RYHkVmiSo7OWX/5o1fVZsJcs27q8pVrxlu9bGC4JbIk16wbmbR85ZrxoQicPLdEVoh64NSD6so141MCbFC5JTJL5CAyS9RWEGWpr45K2p0rO1XS3ZL2Z39Pycolab2kA1narMVlVd6sH7R7TvQt4BvAzbmytcA9EfEVSWuz5b+mlv1nfvZ4C3BD9tcGWP5cKL88DNpqiSLiXuCXDcXLgZuy5zcB78+V3xw19wEnSxqeT3TINI7EXbNuZKgCCNJG5+bkUmM9BczJnp8OPJHb7lBWNunTbkjeaBX21JMrgP6bLNcthQxxR0TMdCV2k9dsADbAzFdxW3/r18ly3ZISREfqiRqz7trRrPwwcGZuuzOyssJcfvn2KWXr1y+Zdpui1w/KPvxZtl7f7uV/bc8nknQWcGdELMyWvwo8nRtYODUirpT0Z8ClwDJqAwrrI6JpKuHce3s+kfWdQu9PJGkz8A7gNEmHgM8BXwHGJH0MOAiMZpvfRS2ADgC/pXa/okL5f8/y1ndjH1X5LAtvicrklsj6kWe2mnWJg6hAwz45bVj5Ku6CnLb769z80Xm1hd1fB5hYPtarSllXuCUq2LGFl3Fs4WWTyk7LgsoGk1uiAh1beNlEwNQDyQE0+NwSmSVyEJklchCZJfI5UYFO2/11nwsNIQdRwZoFT+NonQ0Wd+cKcmzhZXxo42N8aONjE8Pc9WUbbG6JCjTs82qGlVsis0QOIrNElezO+e5u5a3vxj6q8ln6TnlmiTyfyKxLZgyiFtlPvyrpF1mG0zsknZyVnyXp/yQ9mD3+sczKm/WDdlqibwGNHee7gYUR8QbgYeCq3LpHImJR9rikmGqm8WQ5K9OMAwsRcW+W6Sdf9sPc4n3AB4qtVnG271zKB1YuzZ7XyurLMPXk02y2ijgn+ijw/dzyPEk/lfRvkt7e6kWSVknaIWlHAXWY0ZLFW1myeHLQbN85dWTKbLaShrglfRZ4FrglKxoHXh0RT0t6E/BdSedExDONr+1mBtQli7dOBEw9kBxAVpSOWyJJHwHeB/xFZOPkEXE8Ip7Onj8APAK8toB6mvWtjoJI0lLgSuCCiPhtrvyVkk7Inp9N7fYqjxZRUbN+NWN3rkX206uAk4C7JQHcl43EnQd8QdLvgeeBSyKi8ZYsZgOlndG5FU2Kb2yx7W3AbamVKtr2nUt9LmSlqeS1c51oFjyNo3VmnRj4y36WLN7KdzYt4DubFkwMc9eXzYowFC2RJ8tZmQa+JTIrWyVbIs+BKW99N/ZRlc/S84nMEnk+UY80u/OcDTYHUQkcSMNlIIKoX+YL1YOn2f1CbXBVcmAhb8GmTWxdvLi2sGkTwMTynpL3ffnl21m/fsmklscBNHwGoiUC2LNyJXtWrpxUtiALqjK562YDEUR7Vq5kwaZNLNi0qWkwlcWtjsEAdOf6hQNqeA1ES9RL9eBxt254OYjMErk7VwB35YbbQARRfUCh/tysmzrNgHq1pMO5TKfLcuuuknRA0j5J7ymr4o3qo3N53Rqls+HWaQZUgK/lMp3eBSBpAXAxcE72mn+oJy4py56VK1m6cydLd+6cGN6uL5t1Q0cZUKexHLg1Io4Dj0k6ALwZ+M+Oa9gGT7qzXkoZnbs0S2i/UdIpWdnpwBO5bQ5lZVN0OwOqWVk6HVi4AfgiENnfa6mlE25bSgZUTyQrb3039lGVz7LQSXlZd+7OiFg43TpJVwFExJezdT8Aro6IabtznpRn/ajUSXmSRnKLFwL1kbstwMWSTpI0j1oG1J90sg+zqug0A+o7JC2i1p17HPg4QEQ8JGmM2iyEZ4FPRsRz5VTdrD8UmgE12/5LwJdSKjWdej8138VrVmbWLZW6YuH467fz5e/X7id2nBcu+KyXsbcXtbJhV8kLUE/au2TikXf89b6S2rqvUi0R1AIoHyz1QHIAWa9UsiUy6ycOIrNEDiKzRJU7Jzr++u2TBhR8LmS9VrkggtaB0zhaZ9YNlQqik/YumeHHVgeRdV+lggiaX5XgKxWslzywYJaoci0ReA5Mmeu7sY+qfJa+yZdZIt/ky6xLHERmiRxEZonamdm6EXgfcLSeY0HSt4HXZZucDPxPRCzK8i3sBfZl6+6LiEtSK9n421D+hM/nS9Zr7YzOfQv4BnBzvSAi/rz+XNK1wK9y2z8SEYuKqiDArl3vyJ5taViGbduK3JPZ7M3YnYuIe4FfNlsnScAosLngek2xbdsFbNt2wZTy88/fUvauzaaV+jvR24EjEbE/VzZP0k+BZ4C/iYh/T9wH27ZdMBEs9UBy8Fi/SA2iFUxuhcaBV0fE05LeBHxX0jkR8UzjCyWtAlYl7t+s5zoenZP0YuAi4Nv1sog4HhFPZ88fAB4BXtvs9RGxISLObefHLLN+ljLEfT7wi4g4VC+Q9Mr6XSAknU0teeOjaVU0628dJW+MiBup3UKlcUDhPOALkn4PPA9cEhFNByVm4/zzt/hcyPpWp8kbiYiPNCm7DbgtvVpTtQqeZiN2Zt1Uiau43/CGfwVa/djqILLeqkQQ+SZe1s987ZxZIgeRWSIHkVmiSsxsNeuRtma2VmJgYWxsrNdVsCE0Ojra1nbuzlnhLrpouP7TcxBZKYYpkBxEVqh68Nx+e3tdoUHgILJk9cAZptYnz0FkhRjWAAIHkRVgmLpuzTiIrBD5QBq2oHIQWWHqwTNsXTtfsWDW2uBcsVCUQ1dcMe36M66/Pml7G04zducknSnpR5L2SHpI0hVZ+amS7pa0P/t7SlYuSeslHZC0S9Lisg/CrKciYtoHMAIszp6/HHgYWABcA6zNytcCf5c9XwZ8HxDwVuDHbewjynocuuKKtsqbLbd6rR9D89gx03c3ItrKgDoeETuz57+mlmv7dGA5cFO22U3A+7Pny4Gbo+Y+4GRJIzPtpwz17lj+b2MXrVmXrV5W767VX5d/feN7Npa3em8bPLMancsS1r8R+DEwJyLGs1VPAXOy56cDT+Redigr64nG85aZzmPyAZQPgjOuv37iMdM+HDzDpe2BBUkvo5bJ51MR8UwtDXdNRMRsR9i6lQF1Nl/oxhao1fu0M6DgQBoebbVEkl5CLYBuiYjbs+Ij9W5a9vdoVn4YODP38jOyskm6lQF1tiNorVqaVq1QEfu0amtndE7AjcDeiMin2dkCfDh7/mHge7nyD2WjdG8FfpXr9vXcdC1Eqy9/43lPO+/pQBoibYycvY3aSMUu4MHssQx4BXAPsB/YBpyabS/g76nl4f45cG4vR+f88CPh0dbonK9YMGvNdw836wYHkVkiB5FZIgeRWSIHkVmifpkKcQz4TfZ3UJzG4BzPIB0LtH88c9t5s74Y4gaQtGOQ7t86SMczSMcCxR+Pu3NmiRxEZon6KYg29LoCBRuk4xmkY4GCj6dvzonMqqqfWiKzSup5EElaKmlflthkba/r0wlJj0v6uaQHJe3IypomculHkjZKOippd66ssoloWhzP1ZIOZ/9GD0pallt3VXY8+yS9Z9Y7bOdS77IewAnUpkycDZwI/AxY0Ms6dXgcjwOnNZQ1TeTSjw/gPGAxsHum+tNBIpo+OZ6rgTVNtl2Qfe9OAuZl38cTZrO/XrdEbwYORMSjEfE74FZqiU4GQatELn0nIu4FftlQ3PeJaFppcTytLAdujYjjEfEYcIDa97JtvQ6ivkpqkiCAH0p6IMsdAa0TuVRFJRLRzNKlWRd0Y657nXw8vQ6iQfG2iFgMvBf4pKTz8iuj1m+o7DBo1eufuQF4DbAIGAeuLeqNex1EbSU16XcRcTj7exS4g1p3oFUil6pISkTTbyLiSEQ8FxHPA9/khS5b8vH0OojuB+ZLmifpROBiaolOKkPSSyW9vP4ceDewm9aJXKqikoloWmk4b7uQ2r8R1I7nYkknSZoHzAd+Mqs374ORlGXUUhM/Any21/XpoP5nUxvd+RnwUP0YaJHIpR8fwGZqXZzfUzsn+Fir+tNBIpo+OZ5/zuq7Kwuckdz2n82OZx/w3tnuz1csmCXqdXfOrPIcRGaJHERmiRxEZokcRGaJHERmiRxEZokcRGaJ/h/gIhFlO89QtQAAAABJRU5ErkJggg==\n", 629 | "text/plain": [ 630 | "
" 631 | ] 632 | }, 633 | "metadata": { 634 | "needs_background": "light" 635 | }, 636 | "output_type": "display_data" 637 | } 638 | ], 639 | "source": [ 640 | "up_action = env.unwrapped.get_action_meanings().index('UP')\n", 641 | "\n", 642 | "for i in range(50):\n", 643 | " state, reward, done, info = env.step(up_action) #presses up 10 times\n", 644 | "\n", 645 | "plt.imshow(state);" 646 | ] 647 | }, 648 | { 649 | "cell_type": "markdown", 650 | "metadata": {}, 651 | "source": [ 652 | "## reinforcement learning loop" 653 | ] 654 | }, 655 | { 656 | "cell_type": "code", 657 | "execution_count": 31, 658 | "metadata": {}, 659 | "outputs": [ 660 | { 661 | "name": "stdout", 662 | "output_type": "stream", 663 | "text": [ 664 | "episode: 1, reward: 240.0\n", 665 | "episode: 2, reward: 165.0\n", 666 | "episode: 3, reward: 260.0\n", 667 | "episode: 4, reward: 210.0\n", 668 | "episode: 5, reward: 105.0\n", 669 | "episode: 6, reward: 105.0\n", 670 | "episode: 7, reward: 155.0\n", 671 | "episode: 8, reward: 60.0\n", 672 | "episode: 9, reward: 80.0\n", 673 | "episode: 10, reward: 125.0\n" 674 | ] 675 | } 676 | ], 677 | "source": [ 678 | "env = gym.make('SpaceInvadersNoFrameskip-v4')\n", 679 | "\n", 680 | "env.seed(1234)\n", 681 | "\n", 682 | "n_episodes = 10\n", 683 | "\n", 684 | "for episode in range(n_episodes):\n", 685 | " \n", 686 | " episode_reward = 0\n", 687 | " done = False\n", 688 | " state = env.reset()\n", 689 | " \n", 690 | " while not done:\n", 691 | " \n", 692 | " action = env.action_space.sample()\n", 693 | " \n", 694 | " state, reward, done, _ = env.step(action)\n", 695 | " \n", 696 | " episode_reward += reward\n", 697 | " \n", 698 | " print(f'episode: {episode+1}, reward: {episode_reward}')" 699 | ] 700 | }, 701 | { 702 | "cell_type": "code", 703 | "execution_count": null, 704 | "metadata": {}, 705 | "outputs": [], 706 | "source": [] 707 | } 708 | ], 709 | "metadata": { 710 | "kernelspec": { 711 | "display_name": "Python 3", 712 | "language": "python", 713 | "name": "python3" 714 | }, 715 | "language_info": { 716 | "codemirror_mode": { 717 | "name": "ipython", 718 | "version": 3 719 | }, 720 | "file_extension": ".py", 721 | "mimetype": "text/x-python", 722 | "name": "python", 723 | "nbconvert_exporter": "python", 724 | "pygments_lexer": "ipython3", 725 | "version": "3.7.0" 726 | } 727 | }, 728 | "nbformat": 4, 729 | "nbformat_minor": 2 730 | } 731 | -------------------------------------------------------------------------------- /3_advantage_actor_critic.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "3_advantage_actor_critic.ipynb", 7 | "provenance": [], 8 | "machine_shape": "hm" 9 | }, 10 | "kernelspec": { 11 | "display_name": "Python 3", 12 | "name": "python3" 13 | }, 14 | "accelerator": "GPU" 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "code", 19 | "metadata": { 20 | "id": "lIYdn1woOS1n" 21 | }, 22 | "source": [ 23 | "import torch\n", 24 | "import torch.nn as nn\n", 25 | "import torch.optim as optim\n", 26 | "import torch.nn.functional as F\n", 27 | "import torch.distributions as distributions\n", 28 | "\n", 29 | "import matplotlib.pyplot as plt\n", 30 | "import numpy as np\n", 31 | "import gym\n", 32 | "import tqdm" 33 | ], 34 | "execution_count": 1, 35 | "outputs": [] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "metadata": { 40 | "id": "D1kir0PxunA7" 41 | }, 42 | "source": [ 43 | "train_env = gym.make('CartPole-v1')\n", 44 | "test_env = gym.make('CartPole-v1')" 45 | ], 46 | "execution_count": 2, 47 | "outputs": [] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "metadata": { 52 | "id": "Bh6ySKtk4giJ" 53 | }, 54 | "source": [ 55 | "SEED = 1234\n", 56 | "\n", 57 | "train_env.seed(SEED);\n", 58 | "test_env.seed(SEED+1);\n", 59 | "np.random.seed(SEED);\n", 60 | "torch.manual_seed(SEED);" 61 | ], 62 | "execution_count": 3, 63 | "outputs": [] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "metadata": { 68 | "id": "hOxENZewvGhX" 69 | }, 70 | "source": [ 71 | "class MLP(nn.Module):\n", 72 | " def __init__(self, input_dim, hidden_dim, output_dim):\n", 73 | " super().__init__()\n", 74 | "\n", 75 | " self.fc_1 = nn.Linear(input_dim, hidden_dim)\n", 76 | " self.fc_2 = nn.Linear(hidden_dim, output_dim)\n", 77 | "\n", 78 | " def forward(self, x):\n", 79 | " x = self.fc_1(x)\n", 80 | " x = F.relu(x)\n", 81 | " x = self.fc_2(x)\n", 82 | " return x" 83 | ], 84 | "execution_count": 4, 85 | "outputs": [] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "metadata": { 90 | "id": "ssN8w9iewdul" 91 | }, 92 | "source": [ 93 | "class ActorCritic(nn.Module):\n", 94 | " def __init__(self, actor, critic):\n", 95 | " super().__init__()\n", 96 | " \n", 97 | " self.actor = actor\n", 98 | " self.critic = critic\n", 99 | " \n", 100 | " def forward(self, state):\n", 101 | " \n", 102 | " action_pred = self.actor(state)\n", 103 | " value_pred = self.critic(state)\n", 104 | " \n", 105 | " return action_pred, value_pred" 106 | ], 107 | "execution_count": 5, 108 | "outputs": [] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "metadata": { 113 | "id": "C42N0qKUxo0d" 114 | }, 115 | "source": [ 116 | "input_dim = train_env.observation_space.shape[0]\n", 117 | "hidden_dim = 32\n", 118 | "output_dim = train_env.action_space.n" 119 | ], 120 | "execution_count": 6, 121 | "outputs": [] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "metadata": { 126 | "id": "DMalrE2I0iNn" 127 | }, 128 | "source": [ 129 | "def init_weights(m):\n", 130 | " if type(m) == nn.Linear:\n", 131 | " torch.nn.init.kaiming_normal_(m.weight)\n", 132 | " m.bias.data.fill_(0)" 133 | ], 134 | "execution_count": 7, 135 | "outputs": [] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "metadata": { 140 | "id": "jmDLrSWK_7Ec" 141 | }, 142 | "source": [ 143 | "device = torch.device('cuda')" 144 | ], 145 | "execution_count": 8, 146 | "outputs": [] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "metadata": { 151 | "id": "NHAb-opgA7b8" 152 | }, 153 | "source": [ 154 | "def train(env, policy, optimizer, discount_factor, device):\n", 155 | " \n", 156 | " policy.train()\n", 157 | " \n", 158 | " log_prob_actions = []\n", 159 | " entropies = []\n", 160 | " value_preds = []\n", 161 | " rewards = []\n", 162 | " done = False\n", 163 | " episode_reward = 0\n", 164 | "\n", 165 | " state = env.reset()\n", 166 | "\n", 167 | " while not done:\n", 168 | "\n", 169 | " state = torch.FloatTensor(state).unsqueeze(0).to(device)\n", 170 | "\n", 171 | " action_pred, value_pred = policy(state)\n", 172 | " \n", 173 | " action_prob = F.softmax(action_pred, dim = -1)\n", 174 | " \n", 175 | " dist = distributions.Categorical(action_prob)\n", 176 | "\n", 177 | " action = dist.sample()\n", 178 | " log_prob_action = dist.log_prob(action)\n", 179 | " \n", 180 | " entropy = dist.entropy()\n", 181 | "\n", 182 | " state, reward, done, _ = env.step(action.item())\n", 183 | "\n", 184 | " log_prob_actions.append(log_prob_action)\n", 185 | " entropies.append(entropy)\n", 186 | " value_preds.append(value_pred.squeeze(0))\n", 187 | " rewards.append(reward)\n", 188 | "\n", 189 | " episode_reward += reward\n", 190 | "\n", 191 | " log_prob_actions = torch.cat(log_prob_actions)\n", 192 | " entropies = torch.cat(entropies)\n", 193 | " value_preds = torch.cat(value_preds)\n", 194 | " \n", 195 | " returns = calculate_returns(rewards, discount_factor, device)\n", 196 | " advantages = calculate_advantages(returns, value_preds)\n", 197 | "\n", 198 | " loss = update_policy(advantages, log_prob_actions, returns, value_preds, entropies, optimizer)\n", 199 | "\n", 200 | " return loss, episode_reward" 201 | ], 202 | "execution_count": 9, 203 | "outputs": [] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "metadata": { 208 | "id": "uhnGQt3FA-0P" 209 | }, 210 | "source": [ 211 | "def calculate_returns(rewards, discount_factor, device, normalize = True):\n", 212 | " \n", 213 | " returns = []\n", 214 | " R = 0\n", 215 | " \n", 216 | " for r in reversed(rewards):\n", 217 | " R = r + R * discount_factor\n", 218 | " returns.insert(0, R)\n", 219 | " \n", 220 | " returns = torch.tensor(returns).to(device)\n", 221 | " \n", 222 | " if normalize:\n", 223 | " returns = (returns - returns.mean()) / returns.std()\n", 224 | " \n", 225 | " return returns" 226 | ], 227 | "execution_count": 10, 228 | "outputs": [] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "metadata": { 233 | "id": "c_FPx8bR3y9g" 234 | }, 235 | "source": [ 236 | "def calculate_advantages(returns, pred_values, normalize = True):\n", 237 | " \n", 238 | " advantages = returns - pred_values\n", 239 | " \n", 240 | " if normalize:\n", 241 | " \n", 242 | " advantages = (advantages - advantages.mean()) / advantages.std()\n", 243 | " \n", 244 | " return advantages" 245 | ], 246 | "execution_count": 11, 247 | "outputs": [] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "metadata": { 252 | "id": "MxefSt0hCaRM" 253 | }, 254 | "source": [ 255 | "def update_policy(advantages, log_prob_actions, returns, value_preds, entropies, optimizer):\n", 256 | " \n", 257 | " returns = returns.detach()\n", 258 | " \n", 259 | " policy_loss = -(advantages * log_prob_actions).mean()\n", 260 | " value_loss = F.smooth_l1_loss(returns, value_preds)\n", 261 | "\n", 262 | " optimizer.zero_grad()\n", 263 | " \n", 264 | " loss = policy_loss + value_loss * 0.5 - entropies.mean() * 0.01\n", 265 | " \n", 266 | " loss.backward()\n", 267 | " \n", 268 | " optimizer.step()\n", 269 | " \n", 270 | " return loss.item()" 271 | ], 272 | "execution_count": 12, 273 | "outputs": [] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "metadata": { 278 | "id": "dHbV1Ec_CdxK" 279 | }, 280 | "source": [ 281 | "def evaluate(env, policy, device):\n", 282 | " \n", 283 | " policy.eval()\n", 284 | " \n", 285 | " done = False\n", 286 | " episode_reward = 0\n", 287 | "\n", 288 | " state = env.reset()\n", 289 | "\n", 290 | " while not done:\n", 291 | " \n", 292 | " state = torch.FloatTensor(state).unsqueeze(0).to(device)\n", 293 | " \n", 294 | " with torch.no_grad():\n", 295 | " \n", 296 | " action_pred, _ = policy(state)\n", 297 | " \n", 298 | " action_prob = F.softmax(action_pred, dim = -1)\n", 299 | " \n", 300 | " action = torch.argmax(action_prob, dim = -1)\n", 301 | " \n", 302 | " state, reward, done, _ = env.step(action.item())\n", 303 | "\n", 304 | " episode_reward += reward\n", 305 | " \n", 306 | " return episode_reward" 307 | ], 308 | "execution_count": 13, 309 | "outputs": [] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "metadata": { 314 | "id": "-rdcX8lLDAER", 315 | "outputId": "1de6f968-8c30-4707-b7dc-2db02dd4835e", 316 | "colab": { 317 | "base_uri": "https://localhost:8080/", 318 | "height": 104 319 | } 320 | }, 321 | "source": [ 322 | "n_runs = 5\n", 323 | "max_episodes = 300\n", 324 | "discount_factor = 0.99\n", 325 | "\n", 326 | "train_rewards = torch.zeros(n_runs, max_episodes)\n", 327 | "test_rewards = torch.zeros(n_runs, max_episodes)\n", 328 | "device = torch.device('cpu')\n", 329 | "\n", 330 | "for run in range(n_runs):\n", 331 | " \n", 332 | " actor = MLP(input_dim, hidden_dim, output_dim)\n", 333 | " critic = MLP(input_dim, hidden_dim, 1)\n", 334 | " actor_critic = ActorCritic(actor, critic)\n", 335 | " actor_critic = actor_critic.to(device)\n", 336 | " actor_critic.apply(init_weights)\n", 337 | " optimizer = optim.Adam(actor_critic.parameters(), lr=1e-2)\n", 338 | " \n", 339 | " for episode in tqdm.tqdm(range(max_episodes), desc=f'Run: {run}'):\n", 340 | " \n", 341 | " loss, train_reward = train(train_env, actor_critic, optimizer, discount_factor, device)\n", 342 | " \n", 343 | " test_reward = evaluate(test_env, actor_critic, device)\n", 344 | " \n", 345 | " train_rewards[run][episode] = train_reward\n", 346 | " test_rewards[run][episode] = test_reward" 347 | ], 348 | "execution_count": 14, 349 | "outputs": [ 350 | { 351 | "output_type": "stream", 352 | "text": [ 353 | "Run: 0: 100%|██████████| 300/300 [01:33<00:00, 3.21it/s]\n", 354 | "Run: 1: 100%|██████████| 300/300 [01:40<00:00, 2.99it/s]\n", 355 | "Run: 2: 100%|██████████| 300/300 [01:29<00:00, 3.35it/s]\n", 356 | "Run: 3: 100%|██████████| 300/300 [01:23<00:00, 3.60it/s]\n", 357 | "Run: 4: 100%|██████████| 300/300 [01:23<00:00, 3.59it/s]\n" 358 | ], 359 | "name": "stderr" 360 | } 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "metadata": { 366 | "id": "jhRxmYL1FLhz", 367 | "outputId": "acdef1e1-2dd4-4b89-bb50-5c8241aaf9ff", 368 | "colab": { 369 | "base_uri": "https://localhost:8080/", 370 | "height": 388 371 | } 372 | }, 373 | "source": [ 374 | "idxs = range(max_episodes)\n", 375 | "fig, ax = plt.subplots(1, figsize=(10,6))\n", 376 | "ax.plot(idxs, test_rewards.mean(0))\n", 377 | "ax.fill_between(idxs, test_rewards.min(0).values, test_rewards.max(0).values, alpha=0.1)\n", 378 | "ax.set_xlabel('Steps')\n", 379 | "ax.set_ylabel('Rewards');" 380 | ], 381 | "execution_count": 15, 382 | "outputs": [ 383 | { 384 | "output_type": "display_data", 385 | "data": { 386 | "image/png": "\n", 387 | "text/plain": [ 388 | "
" 389 | ] 390 | }, 391 | "metadata": { 392 | "tags": [], 393 | "needs_background": "light" 394 | } 395 | } 396 | ] 397 | }, 398 | { 399 | "cell_type": "code", 400 | "metadata": { 401 | "id": "SkzpFIC7zttT", 402 | "outputId": "476e8c7e-0f0a-47a6-85c3-6625f267fd9b", 403 | "colab": { 404 | "base_uri": "https://localhost:8080/", 405 | "height": 52 406 | } 407 | }, 408 | "source": [ 409 | "x = torch.randn(2, 10)\n", 410 | "y = torch.randn(2, 10)\n", 411 | "print(F.smooth_l1_loss(x, y))\n", 412 | "print(F.mse_loss(x, y)) " 413 | ], 414 | "execution_count": 16, 415 | "outputs": [ 416 | { 417 | "output_type": "stream", 418 | "text": [ 419 | "tensor(0.5806)\n", 420 | "tensor(1.4047)\n" 421 | ], 422 | "name": "stdout" 423 | } 424 | ] 425 | }, 426 | { 427 | "cell_type": "markdown", 428 | "metadata": { 429 | "id": "S8NwG5Eiz4NZ" 430 | }, 431 | "source": [ 432 | "\n", 433 | "\n", 434 | "```\n", 435 | "# This is formatted as code\n", 436 | "```\n", 437 | "\n" 438 | ] 439 | }, 440 | { 441 | "cell_type": "markdown", 442 | "metadata": { 443 | "id": "TgQoXeO1itDY" 444 | }, 445 | "source": [ 446 | "\n", 447 | "\n", 448 | "```\n", 449 | "# This is formatted as code\n", 450 | "```\n", 451 | "\n" 452 | ] 453 | } 454 | ] 455 | } -------------------------------------------------------------------------------- /4a - Generalized Advantage Estimation (GAE) [LunarLander].ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "https://datascience.stackexchange.com/questions/32480/how-does-generalised-advantage-estimation-work\n", 8 | "https://lilianweng.github.io/lil-log/2018/02/19/a-long-peek-into-reinforcement-learning.html#combining-td-and-mc-learning\n", 9 | "https://arxiv.org/pdf/1506.02438.pdf\n", 10 | "https://github.com/higgsfield/RL-Adventure-2\n", 11 | "http://www.breloff.com/DeepRL-OnlineGAE/\n", 12 | "https://arxiv.org/pdf/1804.02717.pdf\n", 13 | "https://ewrl.files.wordpress.com/2015/02/ewrl12_2015_submission_18.pdf\n", 14 | "https://github.com/Kaixhin/Dist-A3C\n", 15 | "https://github.com/Kaixhin/Dist-A3C/blob/master/client.py" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 1, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "import torch\n", 25 | "import torch.nn as nn\n", 26 | "import torch.optim as optim\n", 27 | "import torch.nn.functional as F\n", 28 | "import torch.distributions as distributions\n", 29 | "\n", 30 | "import matplotlib.pyplot as plt\n", 31 | "import numpy as np\n", 32 | "import gym" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "train_env = gym.make('LunarLander-v2')\n", 42 | "test_env = gym.make('LunarLander-v2')" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 3, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "SEED = 1234\n", 52 | "\n", 53 | "train_env.seed(SEED);\n", 54 | "test_env.seed(SEED+1);\n", 55 | "np.random.seed(SEED);\n", 56 | "torch.manual_seed(SEED);" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 4, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "class MLP(nn.Module):\n", 66 | " def __init__(self, input_dim, hidden_dim, output_dim, dropout = 0.1):\n", 67 | " super().__init__()\n", 68 | " \n", 69 | " self.net = nn.Sequential(\n", 70 | " nn.Linear(input_dim, hidden_dim),\n", 71 | " nn.Dropout(dropout),\n", 72 | " nn.PReLU(),\n", 73 | " nn.Linear(hidden_dim, hidden_dim),\n", 74 | " nn.Dropout(dropout),\n", 75 | " nn.PReLU(),\n", 76 | " nn.Linear(hidden_dim, output_dim)\n", 77 | " )\n", 78 | " \n", 79 | " def forward(self, x):\n", 80 | " x = self.net(x)\n", 81 | " return x" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 5, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "class ActorCritic(nn.Module):\n", 91 | " def __init__(self, actor, critic):\n", 92 | " super().__init__()\n", 93 | " \n", 94 | " self.actor = actor\n", 95 | " self.critic = critic\n", 96 | " \n", 97 | " def forward(self, state):\n", 98 | " \n", 99 | " action_pred = self.actor(state)\n", 100 | " value_pred = self.critic(state)\n", 101 | " \n", 102 | " return action_pred, value_pred" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 6, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "INPUT_DIM = train_env.observation_space.shape[0]\n", 112 | "HIDDEN_DIM = 128\n", 113 | "OUTPUT_DIM = train_env.action_space.n\n", 114 | "\n", 115 | "actor = MLP(INPUT_DIM, HIDDEN_DIM, OUTPUT_DIM)\n", 116 | "critic = MLP(INPUT_DIM, HIDDEN_DIM, 1)\n", 117 | "\n", 118 | "policy = ActorCritic(actor, critic)" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 7, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "def init_weights(m):\n", 128 | " if type(m) == nn.Linear:\n", 129 | " torch.nn.init.xavier_normal_(m.weight)\n", 130 | " m.bias.data.fill_(0)" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 8, 136 | "metadata": {}, 137 | "outputs": [ 138 | { 139 | "data": { 140 | "text/plain": [ 141 | "ActorCritic(\n", 142 | " (actor): MLP(\n", 143 | " (net): Sequential(\n", 144 | " (0): Linear(in_features=8, out_features=128, bias=True)\n", 145 | " (1): Dropout(p=0.1, inplace=False)\n", 146 | " (2): PReLU(num_parameters=1)\n", 147 | " (3): Linear(in_features=128, out_features=128, bias=True)\n", 148 | " (4): Dropout(p=0.1, inplace=False)\n", 149 | " (5): PReLU(num_parameters=1)\n", 150 | " (6): Linear(in_features=128, out_features=4, bias=True)\n", 151 | " )\n", 152 | " )\n", 153 | " (critic): MLP(\n", 154 | " (net): Sequential(\n", 155 | " (0): Linear(in_features=8, out_features=128, bias=True)\n", 156 | " (1): Dropout(p=0.1, inplace=False)\n", 157 | " (2): PReLU(num_parameters=1)\n", 158 | " (3): Linear(in_features=128, out_features=128, bias=True)\n", 159 | " (4): Dropout(p=0.1, inplace=False)\n", 160 | " (5): PReLU(num_parameters=1)\n", 161 | " (6): Linear(in_features=128, out_features=1, bias=True)\n", 162 | " )\n", 163 | " )\n", 164 | ")" 165 | ] 166 | }, 167 | "execution_count": 8, 168 | "metadata": {}, 169 | "output_type": "execute_result" 170 | } 171 | ], 172 | "source": [ 173 | "policy.apply(init_weights)" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 9, 179 | "metadata": {}, 180 | "outputs": [], 181 | "source": [ 182 | "LEARNING_RATE = 0.0005\n", 183 | "\n", 184 | "optimizer = optim.Adam(policy.parameters(), lr = LEARNING_RATE)" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 10, 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "def train(env, policy, optimizer, discount_factor, trace_decay):\n", 194 | " \n", 195 | " policy.train()\n", 196 | " \n", 197 | " log_prob_actions = []\n", 198 | " values = []\n", 199 | " rewards = []\n", 200 | " done = False\n", 201 | " episode_reward = 0\n", 202 | "\n", 203 | " state = env.reset()\n", 204 | "\n", 205 | " while not done:\n", 206 | "\n", 207 | " state = torch.FloatTensor(state).unsqueeze(0)\n", 208 | "\n", 209 | " action_pred, value_pred = policy(state)\n", 210 | " \n", 211 | " action_prob = F.softmax(action_pred, dim = -1)\n", 212 | " \n", 213 | " dist = distributions.Categorical(action_prob)\n", 214 | "\n", 215 | " action = dist.sample()\n", 216 | " \n", 217 | " log_prob_action = dist.log_prob(action)\n", 218 | " \n", 219 | " state, reward, done, _ = env.step(action.item())\n", 220 | "\n", 221 | " log_prob_actions.append(log_prob_action)\n", 222 | " values.append(value_pred)\n", 223 | " rewards.append(reward)\n", 224 | "\n", 225 | " episode_reward += reward\n", 226 | " \n", 227 | " log_prob_actions = torch.cat(log_prob_actions)\n", 228 | " values = torch.cat(values).squeeze(-1)\n", 229 | " \n", 230 | " returns = calculate_returns(rewards, discount_factor)\n", 231 | " #note: calculate_advantages takes in rewards, not returns!\n", 232 | " advantages = calculate_advantages(rewards, values, discount_factor, trace_decay)\n", 233 | " \n", 234 | " policy_loss, value_loss = update_policy(advantages, log_prob_actions, returns, values, optimizer)\n", 235 | "\n", 236 | " return policy_loss, value_loss, episode_reward" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": 11, 242 | "metadata": {}, 243 | "outputs": [], 244 | "source": [ 245 | "def calculate_returns(rewards, discount_factor, normalize = True):\n", 246 | " \n", 247 | " returns = []\n", 248 | " R = 0\n", 249 | " \n", 250 | " for r in reversed(rewards):\n", 251 | " R = r + R * discount_factor\n", 252 | " returns.insert(0, R)\n", 253 | " \n", 254 | " returns = torch.tensor(returns)\n", 255 | " \n", 256 | " if normalize:\n", 257 | " \n", 258 | " returns = (returns - returns.mean()) / returns.std()\n", 259 | " \n", 260 | " return returns" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": 12, 266 | "metadata": {}, 267 | "outputs": [], 268 | "source": [ 269 | "def calculate_advantages(rewards, values, discount_factor, trace_decay, normalize = True):\n", 270 | " \n", 271 | " advantages = []\n", 272 | " advantage = 0\n", 273 | " next_value = 0\n", 274 | " \n", 275 | " for r, v in zip(reversed(rewards), reversed(values)):\n", 276 | " td_error = r + next_value * discount_factor - v\n", 277 | " advantage = td_error + advantage * discount_factor * trace_decay\n", 278 | " next_value = v\n", 279 | " advantages.insert(0, advantage)\n", 280 | " \n", 281 | " advantages = torch.tensor(advantages)\n", 282 | " \n", 283 | " if normalize:\n", 284 | " advantages = (advantages - advantages.mean()) / advantages.std()\n", 285 | " \n", 286 | " return advantages" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": 13, 292 | "metadata": {}, 293 | "outputs": [], 294 | "source": [ 295 | "def update_policy(advantages, log_prob_actions, returns, values, optimizer):\n", 296 | " \n", 297 | " advantages = advantages.detach()\n", 298 | " returns = returns.detach()\n", 299 | " \n", 300 | " policy_loss = - (advantages * log_prob_actions).sum()\n", 301 | " \n", 302 | " value_loss = F.smooth_l1_loss(returns, values).sum()\n", 303 | " \n", 304 | " optimizer.zero_grad()\n", 305 | " \n", 306 | " policy_loss.backward()\n", 307 | " value_loss.backward()\n", 308 | " \n", 309 | " optimizer.step()\n", 310 | " \n", 311 | " return policy_loss.item(), value_loss.item()" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": 14, 317 | "metadata": {}, 318 | "outputs": [], 319 | "source": [ 320 | "def evaluate(env, policy):\n", 321 | " \n", 322 | " policy.eval()\n", 323 | " \n", 324 | " rewards = []\n", 325 | " done = False\n", 326 | " episode_reward = 0\n", 327 | "\n", 328 | " state = env.reset()\n", 329 | "\n", 330 | " while not done:\n", 331 | "\n", 332 | " state = torch.FloatTensor(state).unsqueeze(0)\n", 333 | "\n", 334 | " with torch.no_grad():\n", 335 | " \n", 336 | " action_pred, _ = policy(state)\n", 337 | "\n", 338 | " action_prob = F.softmax(action_pred, dim = -1)\n", 339 | " \n", 340 | " action = torch.argmax(action_prob, dim = -1)\n", 341 | " \n", 342 | " state, reward, done, _ = env.step(action.item())\n", 343 | "\n", 344 | " episode_reward += reward\n", 345 | " \n", 346 | " return episode_reward" 347 | ] 348 | }, 349 | { 350 | "cell_type": "code", 351 | "execution_count": null, 352 | "metadata": {}, 353 | "outputs": [ 354 | { 355 | "name": "stdout", 356 | "output_type": "stream", 357 | "text": [ 358 | "| Episode: 10 | Mean Train Rewards: -167.7 | Mean Test Rewards: -476.3 |\n", 359 | "| Episode: 20 | Mean Train Rewards: -209.6 | Mean Test Rewards: -443.5 |\n", 360 | "| Episode: 30 | Mean Train Rewards: -207.5 | Mean Test Rewards: -276.2 |\n", 361 | "| Episode: 40 | Mean Train Rewards: -184.9 | Mean Test Rewards: 9.2 |\n", 362 | "| Episode: 50 | Mean Train Rewards: -147.1 | Mean Test Rewards: -20.4 |\n", 363 | "| Episode: 60 | Mean Train Rewards: -123.5 | Mean Test Rewards: -96.6 |\n", 364 | "| Episode: 70 | Mean Train Rewards: -128.2 | Mean Test Rewards: -45.4 |\n", 365 | "| Episode: 80 | Mean Train Rewards: -128.0 | Mean Test Rewards: -30.5 |\n", 366 | "| Episode: 90 | Mean Train Rewards: -128.4 | Mean Test Rewards: -75.3 |\n", 367 | "| Episode: 100 | Mean Train Rewards: -110.3 | Mean Test Rewards: -208.3 |\n", 368 | "| Episode: 110 | Mean Train Rewards: -115.3 | Mean Test Rewards: -362.8 |\n", 369 | "| Episode: 120 | Mean Train Rewards: -96.6 | Mean Test Rewards: -607.0 |\n", 370 | "| Episode: 130 | Mean Train Rewards: -100.1 | Mean Test Rewards: -985.8 |\n", 371 | "| Episode: 140 | Mean Train Rewards: -98.6 | Mean Test Rewards: -1331.9 |\n", 372 | "| Episode: 150 | Mean Train Rewards: -117.8 | Mean Test Rewards: -1366.6 |\n", 373 | "| Episode: 160 | Mean Train Rewards: -120.6 | Mean Test Rewards: -1356.4 |\n", 374 | "| Episode: 170 | Mean Train Rewards: -118.9 | Mean Test Rewards: -1389.0 |\n", 375 | "| Episode: 180 | Mean Train Rewards: -107.4 | Mean Test Rewards: -1557.8 |\n", 376 | "| Episode: 190 | Mean Train Rewards: -109.4 | Mean Test Rewards: -1638.7 |\n", 377 | "| Episode: 200 | Mean Train Rewards: -115.1 | Mean Test Rewards: -1551.3 |\n", 378 | "| Episode: 210 | Mean Train Rewards: -96.5 | Mean Test Rewards: -1780.7 |\n", 379 | "| Episode: 220 | Mean Train Rewards: -70.0 | Mean Test Rewards: -2215.9 |\n", 380 | "| Episode: 230 | Mean Train Rewards: -75.8 | Mean Test Rewards: -2133.9 |\n", 381 | "| Episode: 240 | Mean Train Rewards: -71.7 | Mean Test Rewards: -2612.4 |\n", 382 | "| Episode: 250 | Mean Train Rewards: -90.6 | Mean Test Rewards: -2863.6 |\n", 383 | "| Episode: 260 | Mean Train Rewards: -68.9 | Mean Test Rewards: -4042.3 |\n", 384 | "| Episode: 270 | Mean Train Rewards: -76.4 | Mean Test Rewards: -3659.9 |\n", 385 | "| Episode: 280 | Mean Train Rewards: -71.2 | Mean Test Rewards: -2684.7 |\n", 386 | "| Episode: 290 | Mean Train Rewards: -85.1 | Mean Test Rewards: -1661.0 |\n", 387 | "| Episode: 300 | Mean Train Rewards: -78.1 | Mean Test Rewards: -1253.0 |\n", 388 | "| Episode: 310 | Mean Train Rewards: -76.8 | Mean Test Rewards: -1201.1 |\n", 389 | "| Episode: 320 | Mean Train Rewards: -57.7 | Mean Test Rewards: -1318.0 |\n", 390 | "| Episode: 330 | Mean Train Rewards: -50.3 | Mean Test Rewards: -1219.0 |\n", 391 | "| Episode: 340 | Mean Train Rewards: -41.7 | Mean Test Rewards: -1110.5 |\n", 392 | "| Episode: 350 | Mean Train Rewards: -33.0 | Mean Test Rewards: -811.2 |\n", 393 | "| Episode: 360 | Mean Train Rewards: -42.7 | Mean Test Rewards: -877.1 |\n", 394 | "| Episode: 370 | Mean Train Rewards: -72.8 | Mean Test Rewards: -1050.2 |\n", 395 | "| Episode: 380 | Mean Train Rewards: -89.9 | Mean Test Rewards: -1137.1 |\n", 396 | "| Episode: 390 | Mean Train Rewards: -78.1 | Mean Test Rewards: -1400.2 |\n", 397 | "| Episode: 400 | Mean Train Rewards: -67.3 | Mean Test Rewards: -1688.1 |\n", 398 | "| Episode: 410 | Mean Train Rewards: -58.8 | Mean Test Rewards: -1565.0 |\n", 399 | "| Episode: 420 | Mean Train Rewards: -50.1 | Mean Test Rewards: -1014.3 |\n", 400 | "| Episode: 430 | Mean Train Rewards: -34.2 | Mean Test Rewards: -797.6 |\n", 401 | "| Episode: 440 | Mean Train Rewards: -19.9 | Mean Test Rewards: -511.5 |\n", 402 | "| Episode: 450 | Mean Train Rewards: -1.5 | Mean Test Rewards: -274.3 |\n", 403 | "| Episode: 460 | Mean Train Rewards: -5.0 | Mean Test Rewards: -201.7 |\n", 404 | "| Episode: 470 | Mean Train Rewards: 10.6 | Mean Test Rewards: -265.2 |\n", 405 | "| Episode: 480 | Mean Train Rewards: 1.5 | Mean Test Rewards: -328.7 |\n", 406 | "| Episode: 490 | Mean Train Rewards: -5.8 | Mean Test Rewards: -342.8 |\n", 407 | "| Episode: 500 | Mean Train Rewards: -15.7 | Mean Test Rewards: -385.6 |\n", 408 | "| Episode: 510 | Mean Train Rewards: -13.3 | Mean Test Rewards: -572.9 |\n", 409 | "| Episode: 520 | Mean Train Rewards: 10.7 | Mean Test Rewards: -766.1 |\n", 410 | "| Episode: 530 | Mean Train Rewards: 9.8 | Mean Test Rewards: -641.0 |\n", 411 | "| Episode: 540 | Mean Train Rewards: 7.0 | Mean Test Rewards: -344.2 |\n", 412 | "| Episode: 550 | Mean Train Rewards: -3.2 | Mean Test Rewards: -240.4 |\n" 413 | ] 414 | } 415 | ], 416 | "source": [ 417 | "MAX_EPISODES = 1_000\n", 418 | "DISCOUNT_FACTOR = 0.99\n", 419 | "TRACE_DECAY = 0.99\n", 420 | "N_TRIALS = 25\n", 421 | "REWARD_THRESHOLD = 200\n", 422 | "PRINT_EVERY = 10\n", 423 | "\n", 424 | "train_rewards = []\n", 425 | "test_rewards = []\n", 426 | "\n", 427 | "for episode in range(1, MAX_EPISODES+1):\n", 428 | " \n", 429 | " policy_loss, value_loss, train_reward = train(train_env, policy, optimizer, DISCOUNT_FACTOR, TRACE_DECAY)\n", 430 | " \n", 431 | " test_reward = evaluate(test_env, policy)\n", 432 | " \n", 433 | " train_rewards.append(train_reward)\n", 434 | " test_rewards.append(test_reward)\n", 435 | " \n", 436 | " mean_train_rewards = np.mean(train_rewards[-N_TRIALS:])\n", 437 | " mean_test_rewards = np.mean(test_rewards[-N_TRIALS:])\n", 438 | " \n", 439 | " if episode % PRINT_EVERY == 0:\n", 440 | " \n", 441 | " print(f'| Episode: {episode:3} | Mean Train Rewards: {mean_train_rewards:7.1f} | Mean Test Rewards: {mean_test_rewards:7.1f} |')\n", 442 | " \n", 443 | " if mean_test_rewards >= REWARD_THRESHOLD:\n", 444 | " \n", 445 | " print(f'Reached reward threshold in {episode} episodes')\n", 446 | " \n", 447 | " break" 448 | ] 449 | }, 450 | { 451 | "cell_type": "code", 452 | "execution_count": null, 453 | "metadata": {}, 454 | "outputs": [], 455 | "source": [ 456 | "plt.figure(figsize=(12,8))\n", 457 | "plt.plot(test_rewards, label='Test Reward')\n", 458 | "plt.plot(train_rewards, label='Train Reward')\n", 459 | "plt.xlabel('Episode', fontsize=20)\n", 460 | "plt.ylabel('Reward', fontsize=20)\n", 461 | "plt.hlines(REWARD_THRESHOLD, 0, len(test_rewards), color='r')\n", 462 | "plt.legend(loc='lower right')\n", 463 | "plt.grid()" 464 | ] 465 | } 466 | ], 467 | "metadata": { 468 | "kernelspec": { 469 | "display_name": "Python 3", 470 | "language": "python", 471 | "name": "python3" 472 | }, 473 | "language_info": { 474 | "codemirror_mode": { 475 | "name": "ipython", 476 | "version": 3 477 | }, 478 | "file_extension": ".py", 479 | "mimetype": "text/x-python", 480 | "name": "python", 481 | "nbconvert_exporter": "python", 482 | "pygments_lexer": "ipython3", 483 | "version": "3.7.6" 484 | } 485 | }, 486 | "nbformat": 4, 487 | "nbformat_minor": 2 488 | } 489 | -------------------------------------------------------------------------------- /8 - n step A2C.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 2 6 | } 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Ben Trevett 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyTorch Reinforcement Learning 2 | 3 | This repo contains tutorials covering reinforcement learning using PyTorch 1.3 and Gym 0.15.4 using Python 3.7. 4 | 5 | **If you find any mistakes or disagree with any of the explanations, please do not hesitate to [submit an issue](https://github.com/bentrevett/pytorch-rl/issues/new). I welcome any feedback, positive or negative!** 6 | 7 | ## Getting Started 8 | 9 | To install PyTorch, see installation instructions on the [PyTorch website](pytorch.org). 10 | 11 | To install Gym, see installation instructions on the [Gym GitHub repo](https://github.com/openai/gym). 12 | 13 | ## Tutorials 14 | 15 | All tutorials use Monte Carlo methods to train the CartPole-v1 environment with the goal of reaching a total episode reward of 475 averaged over the last 25 episodes. There are also alternate versions of some algorithms to show how to use those algorithms with other environments. 16 | 17 | * 0 - [Introduction to Gym](https://github.com/bentrevett/pytorch-rl/blob/master/0%20-%20Introduction%20to%20Gym.ipynb) 18 | 19 | * 1 - [Vanilla Policy Gradient (REINFORCE)](https://github.com/bentrevett/pytorch-rl/blob/master/1%20-%20Vanilla%20Policy%20Gradient%20(REINFORCE)%20[CartPole].ipynb) 20 | 21 | This tutorial covers the workflow of a reinforcement learning project. We'll learn how to: create an environment, initialize a model to act as our policy, create a state/action/reward loop and update our policy. We update our policy with the [vanilla policy gradient algorithm](https://papers.nips.cc/paper/1713-policy-gradient-methods-for-reinforcement-learning-with-function-approximation.pdf), also known as [REINFORCE](http://www-anw.cs.umass.edu/~barto/courses/cs687/williams92simple.pdf). 22 | 23 | * 2 - [Actor Critic](https://github.com/bentrevett/pytorch-rl/blob/master/2%20-%20Actor%20Critic.ipynb) 24 | 25 | This tutorial introduces the family of [actor-critic](https://papers.nips.cc/paper/1786-actor-critic-algorithms.pdf) algorithms, which we will use for the next few tutorials. 26 | 27 | * 3 - [Advantage Actor Critic (A2C)](https://github.com/bentrevett/pytorch-rl/blob/master/3%20-%20Advantage%20Actor%20Critic%20(A2C)%20[CartPole].ipynb) 28 | 29 | We cover an improvement to the actor-critic framework, the [A2C](https://arxiv.org/abs/1602.01783) (advantage actor-critic) algorithm. 30 | 31 | * 4 - [Generalized Advantage Estimation (GAE)](https://github.com/bentrevett/pytorch-rl/blob/master/4%20-%20Generalized%20Advantage%20Estimation%20(GAE)%20[CartPole].ipynb) 32 | 33 | We improve on A2C by adding [GAE](https://arxiv.org/abs/1506.02438) (generalized advantage estimation). 34 | 35 | * 5 - [Proximal Policy Evaluation](https://github.com/bentrevett/pytorch-rl/blob/master/5%20-%20Proximal%20Policy%20Optimization%20(PPO)%20[CartPole].ipynb) 36 | 37 | We cover another improvement on A2C, [PPO](https://arxiv.org/abs/1707.06347) (proximal policy optimization). 38 | 39 | Potential algorithms covered in future tutorials: DQN, ACER, ACKTR. 40 | 41 | ## References 42 | 43 | * 'Reinforcement Learning: An Introduction' - http://incompleteideas.net/sutton/book/the-book-2nd.html 44 | * 'Algorithms for Reinforcement Learning' - https://sites.ualberta.ca/~szepesva/papers/RLAlgsInMDPs.pdf 45 | * List of key papers in deep reinforcement learning - https://spinningup.openai.com/en/latest/spinningup/keypapers.html -------------------------------------------------------------------------------- /checkpoint_viz.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 32, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import torch\n", 10 | "import os\n", 11 | "import matplotlib.pyplot as plt" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 46, 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "name": "stdout", 21 | "output_type": "stream", 22 | "text": [ 23 | "198\n", 24 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=2-grad_clip=5.0-hid_dim=128-init=xavier-n_runs=5-n_episodes=10000-discount_factor=0.99-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.4-optim=rmsprop-lr=5e-05_train.pt\n", 25 | "116006.0\n", 26 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=1-grad_clip=5.0-hid_dim=256-init=xavier-n_runs=5-n_episodes=10000-discount_factor=0.9-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.8-optim=rmsprop-lr=5e-05_train.pt\n", 27 | "179124.0\n", 28 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=1-grad_clip=0.1-hid_dim=256-init=xavier-n_runs=5-n_episodes=10000-discount_factor=0.9-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.6-optim=rmsprop-lr=0.005_train.pt\n", 29 | "251220.0\n", 30 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=1-grad_clip=1.0-hid_dim=32-init=xavier-n_runs=5-n_episodes=10000-discount_factor=0.9-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.2-optim=rmsprop-lr=1e-05_test.pt\n", 31 | "187895.0\n", 32 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=1-grad_clip=0.1-hid_dim=128-init=kaiming-n_runs=5-n_episodes=10000-discount_factor=0.99-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.2-optim=adam-lr=0.0005_train.pt\n", 33 | "104849.0\n", 34 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=1-grad_clip=0.1-hid_dim=64-init=kaiming-n_runs=5-n_episodes=10000-discount_factor=0.99-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.2-optim=rmsprop-lr=1e-05_train.pt\n", 35 | "113282.0\n", 36 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=1-grad_clip=5.0-hid_dim=128-init=xavier-n_runs=5-n_episodes=10000-discount_factor=0.999-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.4-optim=rmsprop-lr=5e-05_test.pt\n", 37 | "116807.0\n", 38 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=2-grad_clip=1.0-hid_dim=128-init=kaiming-n_runs=5-n_episodes=10000-discount_factor=0.99-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.8-optim=adam-lr=0.0001_train.pt\n", 39 | "137352.0\n", 40 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=1-grad_clip=0.1-hid_dim=256-init=xavier-n_runs=5-n_episodes=10000-discount_factor=0.9-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.6-optim=rmsprop-lr=0.005_test.pt\n", 41 | "251220.0\n", 42 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=1-grad_clip=5.0-hid_dim=256-init=kaiming-n_runs=5-n_episodes=10000-discount_factor=0.99-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.8-optim=rmsprop-lr=0.0005_test.pt\n", 43 | "138481.0\n", 44 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=1-grad_clip=5.0-hid_dim=128-init=xavier-n_runs=5-n_episodes=10000-discount_factor=0.999-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.4-optim=rmsprop-lr=5e-05_train.pt\n", 45 | "116807.0\n", 46 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=1-grad_clip=0.1-hid_dim=32-init=xavier-n_runs=5-n_episodes=10000-discount_factor=0.99-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.4-optim=adam-lr=1e-05_train.pt\n", 47 | "157157.0\n", 48 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=2-grad_clip=5.0-hid_dim=64-init=kaiming-n_runs=5-n_episodes=10000-discount_factor=0.99-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.2-optim=rmsprop-lr=0.0001_test.pt\n", 49 | "104504.0\n", 50 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=2-grad_clip=1.0-hid_dim=128-init=kaiming-n_runs=5-n_episodes=10000-discount_factor=0.99-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.8-optim=adam-lr=0.0001_test.pt\n", 51 | "137352.0\n", 52 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=2-grad_clip=5.0-hid_dim=64-init=kaiming-n_runs=5-n_episodes=10000-discount_factor=0.99-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.2-optim=rmsprop-lr=0.0001_train.pt\n", 53 | "104504.0\n", 54 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=1-grad_clip=0.5-hid_dim=64-init=kaiming-n_runs=5-n_episodes=10000-discount_factor=0.9-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.6-optim=rmsprop-lr=0.01_train.pt\n", 55 | "154171.0\n", 56 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=2-grad_clip=0.1-hid_dim=32-init=xavier-n_runs=5-n_episodes=10000-discount_factor=0.9-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.2-optim=rmsprop-lr=1e-05_test.pt\n", 57 | "114305.0\n", 58 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=1-grad_clip=0.5-hid_dim=128-init=kaiming-n_runs=5-n_episodes=10000-discount_factor=0.9-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.6-optim=rmsprop-lr=0.0005_test.pt\n", 59 | "153088.0\n", 60 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=2-grad_clip=0.1-hid_dim=32-init=xavier-n_runs=5-n_episodes=10000-discount_factor=0.9-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.2-optim=rmsprop-lr=1e-05_train.pt\n", 61 | "114305.0\n", 62 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=2-grad_clip=0.1-hid_dim=64-init=kaiming-n_runs=5-n_episodes=10000-discount_factor=0.999-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.2-optim=rmsprop-lr=1e-05_train.pt\n", 63 | "105261.0\n", 64 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=1-grad_clip=5.0-hid_dim=256-init=xavier-n_runs=5-n_episodes=10000-discount_factor=0.9-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.8-optim=rmsprop-lr=5e-05_test.pt\n", 65 | "179124.0\n", 66 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=1-grad_clip=5.0-hid_dim=256-init=kaiming-n_runs=5-n_episodes=10000-discount_factor=0.99-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.8-optim=rmsprop-lr=0.0005_train.pt\n", 67 | "138481.0\n", 68 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=2-grad_clip=0.5-hid_dim=256-init=xavier-n_runs=5-n_episodes=10000-discount_factor=0.99-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.6-optim=rmsprop-lr=0.005_test.pt\n", 69 | "127870.0\n", 70 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=2-grad_clip=5.0-hid_dim=128-init=xavier-n_runs=5-n_episodes=10000-discount_factor=0.99-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.4-optim=rmsprop-lr=5e-05_test.pt\n", 71 | "116006.0\n", 72 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=1-grad_clip=0.5-hid_dim=128-init=kaiming-n_runs=5-n_episodes=10000-discount_factor=0.9-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.6-optim=rmsprop-lr=0.0005_train.pt\n", 73 | "153088.0\n", 74 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=1-grad_clip=1.0-hid_dim=32-init=xavier-n_runs=5-n_episodes=10000-discount_factor=0.9-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.2-optim=rmsprop-lr=1e-05_train.pt\n", 75 | "187895.0\n", 76 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=2-grad_clip=0.1-hid_dim=256-init=kaiming-n_runs=5-n_episodes=10000-discount_factor=0.99-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.4-optim=adam-lr=0.0005_test.pt\n", 77 | "116346.0\n", 78 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=2-grad_clip=0.1-hid_dim=64-init=kaiming-n_runs=5-n_episodes=10000-discount_factor=0.9-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.4-optim=adam-lr=5e-05_test.pt\n", 79 | "113641.0\n", 80 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=2-grad_clip=0.1-hid_dim=64-init=kaiming-n_runs=5-n_episodes=10000-discount_factor=0.9-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.4-optim=adam-lr=5e-05_train.pt\n", 81 | "113641.0\n", 82 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=2-grad_clip=0.5-hid_dim=256-init=xavier-n_runs=5-n_episodes=10000-discount_factor=0.9-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.8-optim=adam-lr=0.0001_train.pt\n", 83 | "146711.0\n", 84 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=2-grad_clip=0.5-hid_dim=256-init=xavier-n_runs=5-n_episodes=10000-discount_factor=0.99-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.6-optim=rmsprop-lr=0.005_train.pt\n", 85 | "127870.0\n", 86 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=2-grad_clip=0.1-hid_dim=64-init=kaiming-n_runs=5-n_episodes=10000-discount_factor=0.999-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.2-optim=rmsprop-lr=1e-05_test.pt\n", 87 | "105261.0\n", 88 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=2-grad_clip=0.1-hid_dim=256-init=kaiming-n_runs=5-n_episodes=10000-discount_factor=0.99-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.4-optim=adam-lr=0.0005_train.pt\n", 89 | "116346.0\n", 90 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=1-grad_clip=0.5-hid_dim=64-init=kaiming-n_runs=5-n_episodes=10000-discount_factor=0.9-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.6-optim=rmsprop-lr=0.01_test.pt\n", 91 | "154171.0\n", 92 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=1-grad_clip=0.1-hid_dim=128-init=kaiming-n_runs=5-n_episodes=10000-discount_factor=0.99-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.2-optim=adam-lr=0.0005_test.pt\n", 93 | "104849.0\n", 94 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=1-grad_clip=0.1-hid_dim=64-init=kaiming-n_runs=5-n_episodes=10000-discount_factor=0.99-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.2-optim=rmsprop-lr=1e-05_test.pt\n", 95 | "113282.0\n", 96 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=1-grad_clip=0.1-hid_dim=32-init=xavier-n_runs=5-n_episodes=10000-discount_factor=0.99-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.4-optim=adam-lr=1e-05_test.pt\n", 97 | "157157.0\n", 98 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=2-grad_clip=0.5-hid_dim=256-init=xavier-n_runs=5-n_episodes=10000-discount_factor=0.9-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.8-optim=adam-lr=0.0001_test.pt\n", 99 | "146711.0\n", 100 | "38\n" 101 | ] 102 | } 103 | ], 104 | "source": [ 105 | "checkpoints = os.listdir('checkpoints')\n", 106 | "\n", 107 | "print(len(checkpoints))\n", 108 | "\n", 109 | "all_rewards = []\n", 110 | "\n", 111 | "for checkpoint in checkpoints:\n", 112 | " pth = os.path.join('checkpoints', checkpoint)\n", 113 | " rewards = torch.load(pth)\n", 114 | " if rewards.shape[-1] == 10_000:\n", 115 | " tot = max(rewards.sum(-1)).item()\n", 116 | " print(pth)\n", 117 | " print(tot)\n", 118 | " all_rewards.append((pth, rewards, tot))\n", 119 | " \n", 120 | "print(len(all_rewards))" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 47, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "all_rewards.sort(key=lambda x: x[-1])" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 48, 135 | "metadata": {}, 136 | "outputs": [ 137 | { 138 | "name": "stdout", 139 | "output_type": "stream", 140 | "text": [ 141 | "checkpoints/env=CartPole-v1-seed=1234-n_layers=1-grad_clip=0.1-hid_dim=256-init=xavier-n_runs=5-n_episodes=10000-discount_factor=0.9-start_epsilon=1.0-end_epsilon=0.01-exploration_time=0.6-optim=rmsprop-lr=0.005_test.pt\n", 142 | "251220.0\n" 143 | ] 144 | }, 145 | { 146 | "data": { 147 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmQAAAK5CAYAAAARsTJNAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAACV9UlEQVR4nOzdeXwV5b3H8c8vCWHfQYqAAoILbiio4EYU695ivdpra91q9XZH7b1WahetvXVprUv12lq0davVWre61iphUUDCvkPY9y0kIUDW87t/zOSQkO2E5GQC+b5fr3mdmWeeeeY5Z87k/PLMM8+YuyMiIiIi0UmJugIiIiIiLZ0CMhEREZGIKSATERERiZgCMhEREZGIKSATERERiVha1BVoiB49enj//v2jroaIiIhInWbOnLnd3XtWt+6gDsj69+9PVlZW1NUQERERqZOZralpnS5ZioiIiERMAZmIiIhIxBSQiYiIiERMAZmIiIhIxBSQiYiIiERMAZmIiIhIxBSQiYiIiERMAZmIiIhIxBSQiYiIiERMAZmIiIhIxBSQiYiIiERMAZmIiIhIxBSQiYiIiERMAZmIiIhIxBSQiYiIiERMAZmIiIhIxBSQiYiIiERMAZmIiIhIxBSQiYiIiERMAZmIiIhIxBSQiYiIiERMAZmIiIhIxBSQiYiIiERMAZmIiIhIxBSQiYiIiERMAZmIiIhIxJIekJlZqpnNNrN3wuUBZjbdzLLN7BUzSw/TW4fL2eH6/smum4iIiEhz0BQtZGOBxRWWHwQecfdBwE7g5jD9ZmBnmP5ImE9ERETkkJfUgMzM+gKXAePDZQPOB14LszwHXBHOjwmXCdePDvOLiIiIHNKS3UL2KHAnEAuXuwO57l4aLq8H+oTzfYB1AOH6vDB/JWZ2q5llmVnWtm3bklh1ERERkaaRtIDMzC4Htrr7zMYs192fdvfh7j68Z8+ejVm0iIiISCTSklj2WcCXzexSoA3QCXgM6GJmaWErWF9gQ5h/A9APWG9maUBnYEcS6yciIiLSLCSthczdx7l7X3fvD1wDfOLu1wITgKvCbDcAb4Xzb4fLhOs/cXdPVv1EREREmosoxiH7MXCHmWUT9BF7Jkx/Bugept8B3BVB3URERESaXDIvWca5eyaQGc6vBE6vJk8hcHVT1EdERESkOdFI/SIiIiIRU0AmIiIiErGEAjIzG2tmnSzwjJnNMrMLk105ERERkZYg0Rayb7p7PnAh0BW4DnggabUSERERaUESDcjKH2F0KfCCuy+skCYiIiIiDZBoQDbTzP5FEJB9aGYd2fc4JBERERFpgESHvbgZGAqsdPc9ZtYduClptRIRERFpQWoNyMzs1P2SBprpSqWIiIhIY6qrhezh8LUNMAyYR9B37CQgCxiZvKqJiIiItAy19iFz9/Pc/TxgEzDM3Ye7+zDgFPY9FFxEREREGiDRTv3HuPv88gV3XwAcl5wqiYiIiLQsiXbqn29m44EXw+VrCS5fioiIiEgDJRqQ3Qh8BxgbLk8CnkpGhURERERamjoDMjNLBd4P+5I9kvwqiYiIiLQsdfYhc/cyIGZmnZugPiIiIiItTqKXLAsI+pF9BOwuT3T3HyalViIiIiItSKIB2evhJCIiIiKNLKGAzN2fS3ZFRERERFqqhAIyMxsM3A8MIRi1HwB3H5ikeomIiIi0GIkODPtngmEuSoHzgOfZNyaZiIiIiDRAogFZW3f/GDB3X+Pu9wCXJa9aIiIiIi1Hop36i8wsBVhuZt8neI5lh+RVS0RERKTlSLSFbCzQDvghMAz4BnBDsiolIiIi0pIk2kKW4+4FBOOR3ZTE+oiIiIi0OIkGZM+aWV9gBjAZmOTu85NXLREREZGWI9FxyEaZWTpwGpABvGtmHdy9WzIrJyIiItISJDoO2dnAOeHUBXiHoKVMRERERBoo0UuWmcBMgsFh33P34qTVSERERKSFSTQg6wGcBZwL/NDMYsBUd/9Z0momIiIi0kIk2ocs18xWAv2AvsCZQKtkVkxERESkpUi0D9lKYAkwheARSjfpsqWIiIhI40j0kuUgd48ltSYiIiIiLVSiI/UPMrOPzWwBgJmdZGY/rW0DM+tnZhPMbJGZLTSzsWF6NzP7yMyWh69dw3Qzs8fNLNvM5pnZqQ16ZyIiIiIHiUQDsj8B44ASAHefB1xTxzalwI/cfQgwAviemQ0B7gI+dvfBwMfhMsAlwOBwupXg0qiIiIjIIS/RgKydu3++X1ppbRu4+yZ3nxXO7wIWA32AMcBzYbbngCvC+THA8x6YBnQxs94J1k9ERETkoJVoQLbdzI4CHMDMrgI2JboTM+sPnAJMB3q5e/m2m4Fe4XwfYF2FzdaHafuXdauZZZlZ1rZt2xKtgoiIiEizlWin/u8BTwPHmtkGYBVwbSIbmlkH4B/Abe6eb2bxde7uZub1qbC7Px3WheHDh9drWxEREZHmKNFxyFYCF5hZe4JWtT0EfcjW1LadmbUiCMZecvfXw+QtZtbb3TeFlyS3hukbCMY5K9c3TBMRERE5pNV6ydLMOpnZODN7wsy+SBCI3QBkA1+tY1sDngEWu/vvKqx6OyyD8PWtCunXh3dbjgDyKlzaFBERETlk1dVC9gKwE5gK3ALcDRjwFXefU8e2ZwHXAfPNrDzvT4AHgFfN7GaCFrbywO494FKCYG8PcFN93oiIiIjIwaqugGygu58IYGbjCTryH+HuhXUV7O5TCIK36oyuJr8T9FUTERERaVHqusuypHzG3cuA9YkEYyIiIiKSuLpayE42s/xw3oC24bIRNGp1SmrtRERERFqAWgMyd09tqoqIiIiItFSJDgwrIiIiIkmigExEREQkYgrIRERERCKmgExEREQkYgrIRERERCKmgExEREQkYgrIRERERCKmgExEREQkYgrImsi66RvJW5sXdTVERESkGVJA1kSOGHE4Jw/cFXU1REREpBlSQNaE1pT1jboKIiIi0gwpIBMRERGJmAIyERERkYgpIBMRERGJmAKyBnrrJ9OZ/49lUVdDREREDmIKyBroivvP4KSrjo66GtUqyi8iVhqLuhpxz9w4mZ2rcqOuhoiISLOjgOwQVVxQTJvOrblzxKR6b/vQpZmkWzF7tu9ptPrMeWUp33ruHL4wsG2jlXmgYqUxnv7GJIoLiqOuioiICKCALOmuPHwaPz4js8b12xZv5/U7px1Q2Qvfyq6xBWxvzl4AHp6ZUa8y9+bs5cfvZ1BCOmumbTqgelVbbl4Q/BTTGggu9W6et7XRyq+Pl384lf966Vz+97LPItm/iIjI/hSQJdkbm0bw0OcZNa7/8umb+I/fjGDH8px42of/m1VnsPKLUZmccMUg7jpzEmZBq9bGWZv56IGZAFiKHVB9S/aUVEn79Kl5ZH+85oDKq05RfhFX3H8Go0/Pb3BZe3P2Ygb3jc5MeJvc7aUAbM85sM9IRESksSkgq4f1MzZx73mZeMwbrcyVu78AQMne0njaxT8dzlnDghauFZ+s4ej0VTx3yxRGdFhAWXEZJXtK+OWkDADemdsPgKc+Oorhp8GF44YxduhE9uQUVru/D36Vxd/vmFpjfSoGcpd9pRXP3TKFs797EoMvOLJB77Oi8la9FUUNHyg3f2MBAE9MOL7BZTWG3DV56icnIiL1poAsQSsz19Lv9N7ck5nBgjeWM238AjbO2hxf39nyuKpvzYFOubFDJ3LNEZ/xp+sr9+0q2LY33roFsLI0CIAevW01y0sGcOP4s5m++wRyVuxk8lML4vnMguAw5samWBDcPT53FCcOrf7QXvKz4Xz1kZHx5by1eZjBry/MrHITwKrSI7hx/NnxZY8562cElzHvG52J2b7gauOszZQVl9X5/gGe/fbnABTRhpwVOxPapi57vXWjlANBUGUG42+YXO9tu/bvTLeBXRqtLiIi0jIoIKvDRT2yOL5NNkedd0Q8LVbmjLzlBPoM+0I8LZ/O/GPDSP7ruEnMe63mYTAenzuKV9adya0vnFsp/for8rlw3DDWTt0QTzu/62z2FlU+RH8au4DR/31KfDmFICDLK+tYKd927xGfL69PaeG+VrgzOgRB3ad/Dtbd/VEGZ/deQays5ta/P35jMv1O782cV5ZyzyfnANA9PZ8vdp9Jn2Ff4Kejqg9gti3eXqnc7/99VHz+51+Zx4asqn3VCjYXULC5oMa6QNDa99GTQf130anaPNuX7qj0vgGKioK6bNpRfRC3eloQaP/+1cNq3b+IiEijcfeDdho2bJgnG1Sdru0/pdr0itP1Ayf7oFaras0z47mF3oqiSmn//Nn0OsuuOJ3UZklC+WJlsSr12bk6t0q+v37/0xrLOK/LLAf3r/b71I2yKutPbLPU3d1vGjzJB6StcXf3zfO3JlS/q/t+Vu3nXpNF/8yuUkZFuWtyPX9DvoP7d46fWGldKiW1lj/7b0vin+3+inYVef6G/Dq/L8m2c3Wuf/6XhcnfkYiINBogy2uIaSIPqhoyRRWQHYzTjOcWVkm7/dQJjb6f/mlrKwUlc/++NOFtq/vcC/MK4+V8c/AkLysp87x1ebVuX7y72MH9it5THdx72Lb4uk//MK/SNtkfr/Zr+0/xspKyeJ7aArKzO82pNeA6kICseHex7925t1JaYV5h/L1X5/T28x2CQPtQVrCloNKxERE5mNUWkOmSZQtx2g1DqqQ9Miuj0fezurRffN4MNi1N/E7KD36VxQltlnPbKRPjaW06t2bJeyu5+D878ezyc/j8L4s4of+uard/ZexnmMFnf1oIwJubRgD7Lt/OfXUpZ337xErbDBp9JC+tPouXvh8MgZH1/CJKCoO+cNuLO7H8o9WV8k/JPxmAdCuODy3SUKf3WEHbrm0qpbXt3Ip+XWr+7D7ffQJQ/V2xXzl8Glcn0J+xoXJW7GTp+yuTVn5xQTEderXntmH178snInLQqSlSOxgmtZBpSnQaOzTT3/5pzZeDn7g60xe/u8LB/ZS2iyqtK9lb4sW7i/2l71a+VL30g5Xu7r7kvRWety7PV09ZF183//VlvmvTLt80d4vfkzHB+6RsjH+nCvMK/Y/XTvSykjKf+eK+fe3atKvK966u7+Wdp0+ocV1F6z7f6Jvmbqn1uz7vtaX+8W9n1X1ShI5MXVdrHRuqvCW0AzVfIhYROZhQSwuZBesPTsOHD/esrKyklb9jeQ49ju6WtPKl+TjMttElrYBlJQMS3qar7SQn1hVLcDiz0qIyfnLOZFZuaM1rG0Zy2WGf8+7W0yvl6WHbmfCPnZx45WAAMh+dQ9GeMvoM6cwrD69n8JA0li8q5b7JGfFtYmXOqknr2Lw0j5LCMjJuGwrA7L8tpaSwjGXTd/KNp84CglCtnMe80jAn5e/DPRgr7uU7ZnDD02cx47lFrFuUz0U/OpH2h7UP8qZYpfy7Nu7iD7fMZMhp7bnsntOqvPfyoWLqMz5e/vp8OvfrRAd2scs71r2BiEgzZ2Yz3X14tStritQOhinZLWSv3v5Z5C07mjQ15pRGcUL5hrUL+hye2XFupfSfnDmhSl5392uO2HczyMs/+NQvP2y6L/pntv/28gl+cY/PHdzPaD/f/3T9JC8tKo2fY6smr/Psj1f7n66f5CM7zPPd23b7zBcX+Z+un+S3HDsxXuaU/5vrS95b4e7upUWlfkXvqb5y4tpK52thXqFPfHyO//6qTC8r2XfTSXk/u8xHZ/uKCWv89Tunes7KnV5aVOont1nib9w1rV5/F9ZO2+DzX19WpY/figlrvGhXUX3/zIhIC4JayA7MuJGZPDAtI2nli0jyDWu3iJl7qvahrMiIcXan+czN70+3tHxWl/bjwu5ZpKY472+r2uJX0fGtl7OwaHB8uYvlMrjdBh75TRld+7Sj6xEdWfzxRnr2b88//7iRTp2Noed1JTXN+OyfO+jeK43CPTFOGtWVh+7ZQ7+ehbRq5Vx2TScGnNaDx25bxa339qH/yN6069Eu3sroMSdWGiM1PRWAzfO2sn1FHsdeMoC0NmkJfTZrPl3P4af0iuev2IK5atI6Dh96GB5zti/fSd/Tesf3u39egNkvL6FTr7a079GWL5y0b8iY/QfS/uTh2fQ+phNDvjwooTrWZd30jRx2XHdad9o3jM3qKetp1SaV9j3b0eXIznjMWZm5lv5n941/XnXZuSqXWGmMjr07sGnuVo48qy8bZ22mTefWFGzdwxEj+8RbmdfP2ET3o7rQttu+Z/VunLWZLkd0ol2PdrXuo6y4jB7HdD/wDyABFVvD5766FHcY+p/HVMkXK43x6h3TuPq3Z1T5nPbm7CVnZS59hveucx81WfPpetbO3sE53z85fkyOOv9IIBj/sbSwlB7HdOe9e2cw8vrBdB3QpUr5ieynYHMBBVv3VPoebpqzhY5faE+HL3SolLf8exErjSV83jREbS1kzSogM7OLgceAVGC8uz9QW/5kB2STn5jLuT84OWnli4iISPPwP6dl1vqow8ZQW0DWbO6yNLNU4EngEmAI8DUzq/3f2iQ769snMrTtkirpz90yBYBO5FVKH/OF6dw1IpPn/2tKpVHer+g9jaevqzwyf0WJ3BGXQuVR8M/vOovX75zGm+Oms+azDSz712ou7TmjUp6j0tbwhZQtDG61islPzuP5/5rCNUd8xsU9ZvDq7VO55dhJfGPAlGr3t39Z++udsrnW9SIiIgeTs7/Ytu5MSdRsWsjMbCRwj7tfFC6PA3D3+2vaJtktZNL8JNJc3dx5zPGYk5LWbP4fqlV1n3kix6HiJYZy1ZVTcV1tlyUqpu2/fv/LaBWXa7rEtn/99t9/bXUqn4+VxoIbHParS21l1VSv8u9FpRstqqlzTZ9NdfmrU543JS2lyuXE6o5XyZ4SWrVrFU8v/+5WXLYUo6y4jLQ2aXjMKSsui+dJSUuJl1vb8S3fT23Hv2K9yj/76j7P/etWnf3fa/nlX485pYWl1V7GrVjP6r67FZf3r3fFz6D8s6m4z/K67l9WdWr6fMrLq64+5Y+5q/j5lpdR/lmWl1H+tymRc9xjHn8P+++v4j7Ky66YVtO5Vv45VzwGtX3/KtZ1/8+mtLCUVu1axb+TFfPs/71sit+W2lrIkn/BNHF9gHUVltcDZ+yfycxuBW4NFwvMbGmS69UD2J7kfUj96bg0PzomzZOOS/OjY9I8NcVxObKmFc0pIEuIuz8NPN1U+zOzrJqiWYmOjkvzo2PSPOm4ND86Js1T1MelOV0z2QD0q7DcN0wTEREROaQ1p4BsBjDYzAaYWTpwDfB2xHUSERERSbpmc8nS3UvN7PvAhwTDXjzr7gsjrhY04eVRqRcdl+ZHx6R50nFpfnRMmqdIj0uzuctSREREpKVqTpcsRURERFokBWQiIiIiEVNAVgszu9jMlppZtpndFXV9DmVm1s/MJpjZIjNbaGZjw/RuZvaRmS0PX7uG6WZmj4fHZp6ZnVqhrBvC/MvN7Iao3tOhwsxSzWy2mb0TLg8ws+nhZ/9KeBMOZtY6XM4O1/evUMa4MH2pmV0U0Vs5ZJhZFzN7zcyWmNliMxupcyVaZnZ7+LdrgZm9bGZtdK40PTN71sy2mtmCCmmNdm6Y2TAzmx9u87iZNd5osjU9dbylTwQ3FqwABgLpwFxgSNT1OlQnoDdwajjfEVhG8Aith4C7wvS7gAfD+UuB9wEDRgDTw/RuwMrwtWs43zXq93cwT8AdwF+Bd8LlV4Frwvk/AN8J578L/CGcvwZ4JZwfEp4/rYEB4XmVGvX7Opgn4DngW+F8OtBF50qkx6MPsApoGy6/CtyocyWSY3EucCqwoEJao50bwOdhXgu3vaSx6q4WspqdDmS7+0p3Lwb+BoyJuE6HLHff5O6zwvldwGKCP3JjCH58CF+vCOfHAM97YBrQxcx6AxcBH7l7jrvvBD4CLm66d3JoMbO+wGXA+HDZgPOB18Is+x+T8mP1GjA6zD8G+Ju7F7n7KiCb4PySA2BmnQl+dJ4BcPdid89F50rU0oC2ZpYGtAM2oXOlybn7JCBnv+RGOTfCdZ3cfZoH0dnzFcpqMAVkNavuUU59IqpLixI2358CTAd6ufumcNVmoFc4X9Px0XFrXI8CdwKxcLk7kOvupeFyxc83/tmH6/PC/DomjWsAsA34c3gpebyZtUfnSmTcfQPwW2AtQSCWB8xE50pz0VjnRp9wfv/0RqGATJoVM+sA/AO4zd3zK64L/yPROC1NxMwuB7a6+8yo6yKVpBFcknnK3U8BdhNchonTudK0wj5JYwiC5cOB9qi1sVlqzueGArKa6VFOTczMWhEEYy+5++th8pawmZjwdWuYXtPx0XFrPGcBXzaz1QSX7M8HHiNo1i8fVLri5xv/7MP1nYEd6Jg0tvXAenefHi6/RhCg6VyJzgXAKnff5u4lwOsE54/Oleahsc6NDeH8/umNQgFZzfQopyYU9p94Bljs7r+rsOptoPwOlxuAtyqkXx/eJTMCyAubpD8ELjSzruF/rReGaVJP7j7O3fu6e3+C7/8n7n4tMAG4Ksy2/zEpP1ZXhfk9TL8mvLNsADCYoGOsHAB33wysM7NjwqTRwCJ0rkRpLTDCzNqFf8vKj4nOleahUc6NcF2+mY0Ij/P1FcpquKjviGjOE8EdGMsI7nS5O+r6HMoTcDZBM/I8YE44XUrQr+JjYDnwb6BbmN+AJ8NjMx8YXqGsbxJ0hs0Gbor6vR0KE5DBvrssBxL8SGQDfwdah+ltwuXscP3ACtvfHR6rpTTiXUktdQKGAlnh+fImwZ1gOleiPSb3AkuABcALBHdK6lxp+uPwMkE/vhKC1uSbG/PcAIaHx3gF8AThE48aY9Kjk0REREQipkuWIiIiIhFTQCYiIiISMQVkIiIiIhFTQCYiIiISMQVkIiIiIhFLqztL89WjRw/v379/1NUQERERqdPMmTO3u3vP6tYd1AFZ//79ycrKiroaIiIiInUyszU1rdMlSxEREZGIKSATERERiZgCMhEREZGIKSATERERiZgCMhEREZGIKSATERERiZgCMhEREZGIKSATERERiZgCMhEREZGIKSATERERiZgCMhEREZGIKSATERERiZgCMhEREZGIKSATERERiZgCMhEREZGIKSATERERiZgCMhEREZGIKSATERERiZgCMhEREZGIKSATERERiZgCMhEREZGIKSATERERiZgCMhEREZGIKSATERERiZgCMhEREZGIKSATERERiVjSAzIzSzWz2Wb2Trg8wMymm1m2mb1iZulheutwOTtc3z/ZdRMRERFpDpqihWwssLjC8oPAI+4+CNgJ3Bym3wzsDNMfCfOJiIiIHPKSGpCZWV/gMmB8uGzA+cBrYZbngCvC+THhMuH60WF+ERERkUNaslvIHgXuBGLhcncg191Lw+X1QJ9wvg+wDiBcnxfmr8TMbjWzLDPL2rZtWxKrLiIiItI0khaQmdnlwFZ3n9mY5br70+4+3N2H9+zZszGLFhEREYlEWhLLPgv4spldCrQBOgGPAV3MLC1sBesLbAjzbwD6AevNLA3oDOxIYv1EREREmoWktZC5+zh37+vu/YFrgE/c/VpgAnBVmO0G4K1w/u1wmXD9J+7uyaqfiIiISHMRxThkPwbuMLNsgj5iz4TpzwDdw/Q7gLsiqJuIiIhIk0vmJcs4d88EMsP5lcDp1eQpBK5uivqIiIiINCcaqV9EREQkYgrIRERERCKWUEBmZmPNrJMFnjGzWWZ2YbIrJyIiItISJNpC9k13zwcuBLoC1wEPJK1WIiIiIi1IogFZ+SOMLgVecPeFFdJEREREpAESDchmmtm/CAKyD82sI/sehyQiIiIiDZDosBc3A0OBle6+x8y6AzclrVYiIiIiLUitAZmZnbpf0kAzXakUERERaUx1tZA9HL62AYYB8wj6jp0EZAEjk1c1ERERkZah1j5k7n6eu58HbAKGuftwdx8GnMK+h4KLiIiISAMk2qn/GHefX77g7guA45JTJREREZGWJdFO/fPNbDzwYrh8LcHlSxERERFpoEQDshuB7wBjw+VJwFPJqJCIiIhIS1NnQGZmqcD7YV+yR5JfJREREZGWpc4+ZO5eBsTMrHMT1EdERESkxUn0kmUBQT+yj4Dd5Ynu/sOk1EpERESkBUk0IHs9nERERESkkSUUkLn7c8muiIiIiEhLlVBAZmaDgfuBIQSj9gPg7gOTVC8RERGRFiPRgWH/TDDMRSlwHvA8+8YkExEREZEGSDQga+vuHwPm7mvc/R7gsuRVS0RERKTlSLRTf5GZpQDLzez7BM+x7JC8aomIiIi0HIm2kI0F2gE/BIYB3wBuSFalRERERFqSRFvIcty9gGA8spuSWB8RERGRFifRgOxZM+sLzAAmA5PcfX7yqiUiIiLSciQ6DtkoM0sHTgMygHfNrIO7d0tm5URERERagkTHITsbOCecugDvELSUiYiIiEgDJXrJMhOYSTA47HvuXpy0GomIiIi0MIkGZD2As4BzgR+aWQyY6u4/S1rNRERERFqIRPuQ5ZrZSqAf0Bc4E2iVzIqJiIiItBSJ9iFbCSwBphA8QukmXbYUERERaRyJXrIc5O6xpNZEREREpIVKdKT+QWb2sZktADCzk8zsp7VtYGb9zGyCmS0ys4VmNjZM72ZmH5nZ8vC1a5huZva4mWWb2TwzO7VB70xERETkIJFoQPYnYBxQAuDu84Br6timFPiRuw8BRgDfM7MhwF3Ax+4+GPg4XAa4BBgcTrcSXBoVEREROeQlGpC1c/fP90srrW0Dd9/k7rPC+V3AYqAPMAZ4Lsz2HHBFOD8GeN4D04AuZtY7wfqJiIiIHLQSDci2m9lRgAOY2VXApkR3Ymb9gVOA6UAvdy/fdjPQK5zvA6yrsNn6MG3/sm41sywzy9q2bVuiVRARERFpthLt1P894GngWDPbAKwCrk1kQzPrAPwDuM3d880svs7d3cy8PhV296fDujB8+PB6bSsiIiLSHCU6DtlK4AIza0/QqraHoA/Zmtq2M7NWBMHYS+7+epi8xcx6u/um8JLk1jB9A8E4Z+X6hmkiIiIih7RaL1maWSczG2dmT5jZFwkCsRuAbOCrdWxrwDPAYnf/XYVVb4dlEL6+VSH9+vBuyxFAXoVLmyIiIiKHrLpayF4AdgJTgVuAuwEDvuLuc+rY9izgOmC+mZXn/QnwAPCqmd1M0MJWHti9B1xKEOztAW6qzxsREREROVjVFZANdPcTAcxsPEFH/iPcvbCugt19CkHwVp3R1eR3gr5qIiIiIi1KXXdZlpTPuHsZsD6RYExEREREEldXC9nJZpYfzhvQNlw2gkatTkmtnYiIiEgLUGtA5u6pTVURERERkZYq0YFhRURERCRJFJCJiIiIREwBmYiIiEjEFJCJiIiIREwBmYiIiEjEFJCJiIiIREwBmYiIiEjEFJCJiIiIREwBmYiIiEjEFJCJiIiIREwBmYiIiEjEFJCJiIiIREwBmYiIiEjEFJCJiIiIREwBmYiIiEjEFJA1wJYF2zCD7580ETPYtnh7k+7/49/Mwgw2ZG1qkv0V5hZSmFvYJPsSERFpSRSQNUDWa6sBeHL+KADmvbM26fuc/fISzCD74zX836PFAEx7ZXXS9wvQtmsb2nZt0yT7EhERaUkUkB2gP3x9Eq//tebWovUzNmEG956X2aj7fe6hLQD88/erGrXcuuzauKte+T/4VRaL3s5u0D5LC0spKy5rUBkiIiIHg7SoK3Cw+s7L59a6/sMns4He3JOZwc9KY6SkNU7s6x68mhlmwYLHGqXoWhXmFQEdE85/yc+GA/vqeyBatU3jlLaLmbXnuAMvpJla/M4KNi/L57w7Tom6KiIi0gyohawOuWvy6JWyjWnjF8TTPnpgZrV533ih+lak1FYpjOiwgKe+NqnG/ZS3Jr0y9jOOSNvAE1dPZOeq3Brzm4GF87UFPXlr81g/I+hj9sGvsnj2psns2riL5R+tZsUna2rcbtOcLeSs2FlzwRV4zPnFqEyWvLeyyrp/3T+TtVM30D0lh2Uf1r9Vb/beIBh79CsTyegyp97bN1dDvnQU5/9IwZiIiAQUkNXh0z8vY6v35L6f7I2nXThuWLV5n5w/ir05e/GYs3xJ5Utt03efwHf/di6fPjUPM/i/aybG171993SOHzOIl3/wGdc8fibryvrwg9dG0W1gF1770dRK5VQXfMXKao7IThq4i36n9waCVqub/3IOpw3YxtEX9mfQ6CP54ckT+cWoTLYv3VFpu8NP6UX3QV1rLLeiHctz+OWkDM77UvsqQd67rxTwyq+Wk+Pd+NNPaw4Aty7cxo/PyKzxEuXtb45iYt7QhOqTTMUFxfz38Exy1+RFXRURETmEKCCrRcmeEi6/9zQA3tt2Gh5zFr5Ve7+o//3ydMbfOJkHp2dUu/7s754EwPdeGRVPWzAjCPa+/sSZVfL/9WWrtJxbkAoELWRLd3QH4Fd/7FljfdaW9a2StrR4YHz+9/NG8ctJGfQ8NigrZ8VO7j4rs8byABa8sTzeordx1maKdgU3F2yO9WLQ6CNZ/M6KeN6KAeTmHTVfIf/uRdk89HkGH94/q8q6n51Te32qM/8fyyr1YVvxyRqynl9Ubd61UzcQK637uu/aqRto3TGdh2dmMO7SufWuk4iISE0UkNVi4+wtlZan/mkBJ1wxqNZt/vfTDO5+MbE+T0PbLuXTp+bVmueNTSPYs30PAJN+P5cXVp4NwNjXRzG/8GgAFhQNZt5ry/jFqExK9pQktO/qPPqViVw3Mptff5YRT8v+eA3ndJ7LnLf2tW6deOVgjh8ziOKCYvoM+wI3XbSxUjnrF+TG57fnpTF5ZlsAXlx1NtPGL2DLgm0A5K/P50/XT+L3V02ksCQINNcv38v+fjUlo0oaQMHmAvLWBi1Ve3P2VrrEetJVR3P8mEHxfQ0afSSn3TCkShnLP1rNkWf24ZcXVL6cfP9FmZhRqcVu9j/Xx+eLSyoHynXJen5Rkw1P0lA7V+WyN6fqcRARkSRy94N2GjZsmCfT2mkbPGjjSe50Rvv5ta4/vvUyf+OuaQmXtyM7J/4eytMqztdn+s1lE2pc9+Al1a87v+vMWss8zLb6juycSmmjK2xzzRGf+n2jqy+7ojbsiacNab280vry/P1S11f5HIp3F3vOyp2V0oe3W1ht2Xt27PG7Rkzwhy6tXJ9vDp5U5fvy74dm+lf7feqxsliVdeDeiqJqj0tDbV20rdp9HihwP6H1skYrT0REAkCW1xDTJC1Yaoop2QHZus83NklAlozp3w/N9MNsa3y5aFdR5HWqbToqbXXCef/yrcnuvm+5ZG9JpQAnVharcdtYWcwv7vF5PG95+sltlrh78LllPjo7HpCtnLi22nLKA7LX75zq4J6zcqenUBqvz02DJ/mx6dn+3RMy/d8PzaxUv4rHYn+7Nu3y31w2wYt3Fyf0HV09ZZ2D+wMXTzjg7/n+GjNYTLadq3OjroKISMIUkB2ggzkgO9Snrx05pdr0CY/M9vE3TEqoDPfKy7s27apXHdzdz+w418H9T9dPigdkG2ZuqnWb/de/9N0p7l45ULtp8CSPlcV8+rML/I27prm7e1lJmT/6lUzfvW13/Ds66fdzHNwPT9noH/xqht9y7ET/58+m+y/PnxDPs+xfq/zPN09OuBWtYl1jZTHftWnXgZ9EB2DV5HW+5rP1deZ76bvBd2DWXxc3Qa1ERBpOAdkBWj9DAdmhPN15+oRKyxldZiV9n49ckVlt+reHTKySVvGS8JrP1vu3jgnyjB2aGf+O3n7qhBr3VbSryKc/uyC+fN/oCT7rr4t9cKuVvvCt5e7uvm3Jdr9x0CRf8t4KXz9jo7vv2/7fD83041svi++/3IXdZ/i3h0x092AfZSVl/uJ3pvimuVuqnEM5K3f6ry+c4Ksmr0v4vCvff12uGzjZYV+LqYhIc6eA7AA9cHHNP3aaNEU1Hd96mb9+51T/y7cmN6icS3t+XiVt/uvLat1m09wt8fmPHqzcV/DENkt90T+zHdyvHzjZN8zc5EZZfP1jV2Z6WUmZv3HXNH/nF5/7OZ3m+N/v+MyPS8/2qX+aHw/uyvN3Jte/0ntqpXNy1eR1/ukf5vnubbsrBWTP3jTJty/b4YfZVr9v9IR4/tl/W+Kxspi/d+/n3s12eMGWAncPLnWunLjWy0rKvKykLJ7/jbumxVskG0NpUWmjlSUiB7/aAjIL1h+chg8f7llZWUkr/+UffFbtUBQicnDqZjnkeLcq6Ue3WsWykgHx5bbsYVSPhXyw/bRqyzkmfSVLiwfy7E2TueS2Y8jbUMCJl/ZlWIel9O+2i9OGFvOjtzMA+ELKFhZlt2b6S9m8+dc93P3nQVx5fi5Ze4bQO2UzH71ewOZl+Uz9MJ9eh6cw7KKe3HILDO6xk6v/0xhz32mU7Clh7fRNHHXeEaycuI6eR3elY+8OrJ22ka5HdiKtdSrrZ20ltVUKfU45jDZdgmfOxkpjbJq7lQ6HtaPrgC54LPh7bymGxzz+BBGPObHSGLHSGGlt0oiVxljwZjbturbm8JN70q5HO2LhE0dK9pSwYdYWeh7dlQ5f6BAvc/WU9XTs1Y4uR3bGUozU9FRKC0vxmLPk/VWc8JXBeMzxmJOaHtxVveS9lRxx+hdo3ak1pYWlpHdIx2OOpVi8XpZi8budy4enKa93YW4hmxfu4MiRh5OSlsLmeVtZNmkzGbcNpay4DEsxUtJSiJXG4mXGKjw5pay4jLQ2afF15SrWoXzbkj0lrJy0nqMy+pGSloLHnLQ2aZQVl8WXy8u0FCOtTVq8rBUT1tJzcBfWzdzKcZcNJDU9lbfvns6pVxzB4af0oii/iL/ekcVldxzDzrW7yJ6+g4t+PJT0DumU7Clh+b/XMOj8IyjKL2Lbsp30O703WxdtJ1bmbMvOo13X1hxz8QA85hQXFJOankpKWgpLP1jFsZcOxFKMovwictfmkzk+m/944DRS01OZ8dwi3GHY14/h49/NpW3HNM685XiWfriaQef1o02XNnjMKS0spVW7Viz7cBVFBSUMPKcP7Xq0w2NOyZ4S0jukU1pYGv9cy79fuzYVkLd+F/3OODz+eZa/fvrUPDp0b83JXz0GjzlF+UVMf24J3Y9oz/FjBsWPe2p6Kh7z+OdcfjzLP9vF7wYDkh97SXD+lh+T8mOXmp4aP4blr6npqRTlF9G6U2vKisvix3L/70EymNlMdx9e7brmFJCZ2cXAY0AqMN7dH6gtf7IDsqBOSS1eREREmoHMR+cwauzQpO6jtoCs2YxDZmapwJPAJcAQ4GtmVnXgqCb29zum8r0TJ3JVn6n89OzMePr3TpzIoFar+cf/TOOcTsEgocemr2Dyk/N47b+ncv3AKVzRexoA/VI3ADBt/AKGtA4GK72054xK+/nw1zO5/6JMbj81k7M7zeW/h2cybmQm53aeA0Anah4Z/ndjgryJeP++LK454rNKaW/cNR2A754wkTtPr76cv99R+YkBNxw1pdb9nN91Fv/82ed88vBsxg6dWGveupzWfmGDtr/91MwGbS8iIoe+JZ/nR7r/ZtNCZmYjgXvc/aJweRyAu99f0zZN0UImciAqXvKoLU3qFuXn1lT7rngpsTYVL7cd6H7KLycl8r4a4/0n+t4OdN8H+pnU5zOA2uufzO9JbWUf6GfbGMr3Xd3+6/o8DvTzSla5Tam2FrKan2XT9PoA6yosrwfO2D+Tmd0K3BouFpjZ0iTXqwewPcn7kPrTcWl+dEyaJx2X5kfHpHlqiuNyZE0rmlNAlhB3fxp4uqn2Z2ZZNUWzEh0dl+ZHx6R50nFpfnRMmqeoj0uz6UMGbAD6VVjuG6aJiIiIHNKaU0A2AxhsZgPMLB24Bng74jqJiIiIJF2zuWTp7qVm9n3gQ4JhL55194bdXtc4muzyqNSLjkvzo2PSPOm4ND86Js1TpMel2dxlKSIiItJSNadLliIiIiItkgIyERERkYgpIKuFmV1sZkvNLNvM7oq6PocyM+tnZhPMbJGZLTSzsWF6NzP7yMyWh69dw3Qzs8fDYzPPzE6tUNYNYf7lZnZDVO/pUGFmqWY228zeCZcHmNn08LN/JbwJBzNrHS5nh+v7VyhjXJi+1MwuiuitHDLMrIuZvWZmS8xssZmN1LkSLTO7PfzbtcDMXjazNjpXmp6ZPWtmW81sQYW0Rjs3zGyYmc0Pt3ncrBEfsFjTU8db+kRwY8EKYCCQDswFhkRdr0N1AnoDp4bzHYFlBI/Qegi4K0y/C3gwnL8UeB8wYAQwPUzvBqwMX7uG812jfn8H8wTcAfwVeCdcfhW4Jpz/A/CdcP67wB/C+WuAV8L5IeH50xoYEJ5XqVG/r4N5Ap4DvhXOpwNddK5Eejz6AKuAtuHyq8CNOlciORbnAqcCCyqkNdq5AXwe5rVw20saq+5qIavZ6UC2u69092Lgb8CYiOt0yHL3Te4+K5zfBSwm+CM3huDHh/D1inB+DPC8B6YBXcysN3AR8JG757j7TuAj4OKmeyeHFjPrC1wGjA+XDTgfeC3Msv8xKT9WrwGjw/xjgL+5e5G7rwKyCc4vOQBm1pngR+cZAHcvdvdcdK5ELQ1oa2ZpQDtgEzpXmpy7TwJy9ktulHMjXNfJ3ad5EJ09X6GsBlNAVrPqHuXUJ6K6tChh8/0pwHSgl7tvCldtBnqF8zUdHx23xvUocCcQC5e7A7nuXhouV/x84599uD4vzK9j0rgGANuAP4eXksebWXt0rkTG3TcAvwXWEgRiecBMdK40F411bvQJ5/dPbxQKyKRZMbMOwD+A29w9v+K68D8SjdPSRMzscmCru8+Mui5SSRrBJZmn3P0UYDfBZZg4nStNK+yTNIYgWD4caI9aG5ul5nxuKCCrmR7l1MTMrBVBMPaSu78eJm8Jm4kJX7eG6TUdHx23xnMW8GUzW01wyf584DGCZv3yQaUrfr7xzz5c3xnYgY5JY1sPrHf36eHyawQBms6V6FwArHL3be5eArxOcP7oXGkeGuvc2BDO75/eKBSQ1UyPcmpCYf+JZ4DF7v67CqveBsrvcLkBeKtC+vXhXTIjgLywSfpD4EIz6xr+13phmCb15O7j3L2vu/cn+P5/4u7XAhOAq8Js+x+T8mN1VZjfw/RrwjvLBgCDCTrGygFw983AOjM7JkwaDSxC50qU1gIjzKxd+Les/JjoXGkeGuXcCNflm9mI8DhfX6Gshov6jojmPBHcgbGM4E6Xu6Ouz6E8AWcTNCPPA+aE06UE/So+BpYD/wa6hfkNeDI8NvOB4RXK+iZBZ9hs4Kao39uhMAEZ7LvLciDBj0Q28HegdZjeJlzODtcPrLD93eGxWkoj3pXUUidgKJAVni9vEtwJpnMl2mNyL7AEWAC8QHCnpM6Vpj8OLxP04yshaE2+uTHPDWB4eIxXAE8QPvGoMSY9OklEREQkYrpkKSIiIhIxBWQiIiIiEVNAJiIiIhIxBWQiIiIiEVNAJiIiIhKxtLqzNF89evTw/v37R10NERERkTrNnDlzu7v3rG7dQR2Q9e/fn6ysrKirISIiIlInM1tT0zpdshQRERGJmAIyERERkYgpIBMRERGJmAIyERERkYgpIBMRERGJmAIyERERkYgpIBMRERGJmAIyERERkYgpIBMRERGJmAIyERERkYgpIBMRERGJmAIyERERkYgpIBMRERGJmAIyERERkYgpIBMRERGJmAIyERERkYgpIBMRERGJmAIyERERkYgpIBMRERGJmAIyERERkYgpIBMRERGJmAIyERERkYgpIBMRERGJmAIyERERkYgpIBMRERGJmAIyERERkYglPSAzs1Qzm21m74TLA8xsupllm9krZpYeprcOl7PD9f2TXTcRERGR5qApWsjGAosrLD8IPOLug4CdwM1h+s3AzjD9kTCfiIiIyCEvqQGZmfUFLgPGh8sGnA+8FmZ5DrginB8TLhOuHx3mFxERETmkJbuF7FHgTiAWLncHct29NFxeD/QJ5/sA6wDC9Xlh/krM7FYzyzKzrG3btiWx6iIiIiJNI2kBmZldDmx195mNWa67P+3uw919eM+ePRuzaBEREZFIpCWx7LOAL5vZpUAboBPwGNDFzNLCVrC+wIYw/wagH7DezNKAzsCOJNZPREREpFlIWguZu49z977u3h+4BvjE3a8FJgBXhdluAN4K598OlwnXf+Lunqz6iYiIiDQXUYxD9mPgDjPLJugj9kyY/gzQPUy/A7grgrqJiIiINLlkXrKMc/dMIDOcXwmcXk2eQuDqpqiPiIiISHOikfpFREREIqaATERERCRiCQVkZjbWzDpZ4Bkzm2VmFya7ciIiIiItQaItZN9093zgQqArcB3wQNJqJSIiItKCJBqQlT/C6FLgBXdfWCFNRERERBog0YBsppn9iyAg+9DMOrLvcUgiIiIi0gCJDntxMzAUWOnue8ysO3BT0molIiIi0oLUGpCZ2an7JQ0005VKERERkcZUVwvZw+FrG2AYMI+g79hJQBYwMnlVExEREWkZau1D5u7nuft5wCZgmLsPd/dhwCnseyi4iIiIiDRAop36j3H3+eUL7r4AOC45VRIRERFpWRLt1D/fzMYDL4bL1xJcvhQRERGRBko0ILsR+A4wNlyeBDyVjAqJiIiItDR1BmRmlgq8H/YleyT5VRIRERFpWersQ+buZUDMzDo3QX1EREREWpxEL1kWEPQj+wjYXZ7o7j9MSq1EREREWpBEA7LXw0lEREREGllCAZm7P5fsioiIiIi0VAkFZGY2GLgfGEIwaj8A7j4wSfUSERERaTESHRj2zwTDXJQC5wHPs29MMhERERFpgEQDsrbu/jFg7r7G3e8BLktetURERERajkQ79ReZWQqw3My+T/Acyw7Jq5aIiIhIy5FoC9lYoB3wQ2AY8A3ghmRVSkRERKQlSbSFLMfdCwjGI7spifURERERaXESDcieNbO+wAxgMjDJ3ecnr1oiIiIiLUei45CNMrN04DQgA3jXzDq4e7dkVk5ERESkJUh0HLKzgXPCqQvwDkFLmYiIiIg0UKKXLDOBmQSDw77n7sVJq5GIiIhIC5NoQNYDOAs4F/ihmcWAqe7+s6TVTERERKSFSLQPWa6ZrQT6AX2BM4FWyayYiIiISEuRaB+ylcASYArBI5Ru0mVLERERkcaR6CXLQe4eS2pNRERERFqoREfqH2RmH5vZAgAzO8nMflrbBmbWz8wmmNkiM1toZmPD9G5m9pGZLQ9fu4bpZmaPm1m2mc0zs1Mb9M5EREREDhKJBmR/AsYBJQDuPg+4po5tSoEfufsQYATwPTMbAtwFfOzug4GPw2WAS4DB4XQrwaVRERERkUNeogFZO3f/fL+00to2cPdN7j4rnN8FLAb6AGOA58JszwFXhPNjgOc9MA3oYma9E6yfiIiIyEEr0YBsu5kdBTiAmV0FbEp0J2bWHzgFmA70cvfybTcDvcL5PsC6CputD9P2L+tWM8sys6xt27YlWgURERGRZivRTv3fA54GjjWzDcAq4NpENjSzDsA/gNvcPd/M4uvc3c3M61Nhd386rAvDhw+v17YiIiIizVGi45CtBC4ws/YErWp7CPqQraltOzNrRRCMveTur4fJW8yst7tvCi9Jbg3TNxCMc1aub5gmIiIickir9ZKlmXUys3Fm9oSZfZEgELsByAa+Wse2BjwDLHb331VY9XZYBuHrWxXSrw/vthwB5FW4tCkiIiJyyKqrhewFYCcwFbgFuBsw4CvuPqeObc8CrgPmm1l53p8ADwCvmtnNBC1s5YHde8ClBMHeHuCm+rwRERERkYNVXQHZQHc/EcDMxhN05D/C3QvrKtjdpxAEb9UZXU1+J+irJiIiItKi1HWXZUn5jLuXAesTCcZEREREJHF1tZCdbGb54bwBbcNlI2jU6pTU2omIiIi0ALUGZO6e2lQVEREREWmpEh0YVkRERESSRAGZiIiISMQUkImIiIhETAGZiIiISMQUkImIiIhETAGZHPLKisu4JyOTnatyo66KiIhItRSQySHvn7/I4t6JGYy9YEHUVREREamWAjI55JUUxQDYW6Rh9UREpHlSQCYiIiISMQVkSeIxZ+zQicx9dWnUVREREZFmTgFZkmxfuoPH547igmu6R12VFs/do66CiIhIrRSQiYiIiERMAZkcsNw1ebzx42kNLidWGuOuEZmsm76xEWolIiJy8FFA1kBmcMuxkxLKm7smj5wVO5Nco6bz9dOWceVDI1jz6foGlZP1wmIenJ7BtRdua6SaiYiIHFwUkNXBY87aqRtqzTN+6bkJldW1f2e6D+raGNWKzJxXlrLikzUArMjtAcDevOIq+Tzm3Dc6ky0LgiBr2YerGNp2abUBaaws6ONVXFb/YSmKC4rZPG9rfHn1lPU8dGlmpTxmVu9yRUREmpICsjq88O1POfLMPkz5v3m15nvwkky2LqzawrPdeySrapE45ZpjGDT6SADMau4sP238An7+SQbXnruO+0ZnctdNW5hbeAz/fGBhjdscSNf71h3T6X3yYXgs2Pr882L8+P0Mpo3fNwisOvWLiEhzp4CsDp9ODgYVXfhpbq357vogg+tHral2XVSP7CkrLiN3TV6T7Ksov4jLe33OorezgX2DsX6881R+/kkGb2waAYA73Dc6k4qNVuXzjrHgjeU8dGkmf/h6YpeBy8VKg/2tKj0CgJG3nMCqSesa8I5ERESajgKyBO0uiFFWXFZrnvyi1hTmFlKUX1QpPXfdroT28c+ffc7sl5cccB3LrZ6ynhe+PYXvn/IpXft3rlKf2lze63O+2H1mvfbnMeez8Yt4d+vpfO+Gut/rzz/JqLRsKUFENmP38Zx45WB+/H4G33n5XH51QWY8T2lhKb+9PLPG91IekFW0dvYOAO56om+C72Sf52+dwm8uy6wzn4iISGNQQJagH72dwQ3H1H1HYduubejbpWpQckG3WfH59TM2Vbvtl391Oqd+/dgq6R5znv7GpIRb2kaMSuf6P57NHxYFfdsuH7CAMb2nM/fVpdx2ysT45b3qvLv1dP6dM4y9OXvr3E+Z7/v6lJUe+GXB8oBsfz/7OCM+//SNn/I/72bQpnNr8tfnV8m7a1NBtWWUFpbGW81e2zCSiY/N4dtDJjGy4/xa63TDn87mzvcyas0jIiLSWBSQ1cJjztNL9nXYf2n1WbUGM+W2ew9Kiyq3pn2889T4fL/TezNt/AJWTVrHjuU5dZY388XF/NdL53LzWUHrWVlxGTNfXFxj/i2xwyot/ztnGG9vPoOh/3kMj80ZRc6KncRKY5QWltZYRrvubeu8ezK7pD8AQ750FF/8cfD+SsKO+c8+Vn1L2bzZVfdZW5/7kj0lAOzete9zf+Kbsxg7dCJptq+sr52xosq2sybkcm7Pyp9Txm1D+ePic5lWcCJ/vHYShbmFNe8c6GT55K7J41vHTCZvbfIv/77/yxm0tb3s2phYq6qIiBwaFJDVorqAJSXVGDt0IgCZj86ptG7lnl7x+cNP2TdfVlL1ctrIW05g4Kh+DD4miEYq3hCQvz6fR78ykU1ztgCwNz8ISrbtbgvAi9+byvDrjuOtn0wHgjsN+6Ru4vU7pyX0Qx4rjfHFw+bQqm1arfmyP9vKvNeW1VleRZ/uOolnbpzMcyvOrnb9I7My4vP/+J+pdZaX3r4Vr97+GakVqnr3Rxk8PncUZexLnLxjSJVt73grg6kFJ9ZY9rf/ei73XFx7q+cuOtG1f2eeWXYOD31tdp31ra9fX5jJuZ3nxpd//mB7CmnLko/U/01EpCVRQHYAHp87ig9+lcV5tw+tlL5/y1S5wRccWWNZO70rH/5vFr1O6BlP+6+zFnD7m6M4/JRemAUd4QE+yz+Bpe+vZMmCIFBcmBVcVty2ZAcbY735wcNHcubAzXXW/8VxC/gkbLGb+NgcnvzqxGrz/fn/9nLy1Ufzr/ur9imr7o7Sct967pw66wBw1W9H0t52c/GNvWrN98Sz7Ump4bJmudgBfpVnLOsEwAMXBzca3DUik1dv/6zavKn1H5WjTnd/lMHk/JPjy7ofVESkZbKDeUiA4cOHe1ZWVtLKLy4opnXH9KSVX5OzO81lSoUf6Yt7zOCD7afFl39yZia//iyDrraTJfNL+fT5FVz50IgG7XProu0s+td6Mm4bGk87ofVyFhQNpm/qRr44cAXPLjun1suLydQ/bR2rS/vVmqdkb2mdrX7VWfav1Qy7sBu76FRrvnvPy6xyQ0JNZr20mHZdW3PspQNrzVf+eZaVxBjZZRGf7z4hvq60qIzU9CREgSIiEgkzm+nuw6tdp4CsZvnr8+ncr/Yf6ai1YzdDO67gs10nNaick9osZV7hMZXSTmyzjPmFRzeo3KZU3XtoTL+6IJO7P8qolBYrjfHLCybx3SeP57Dj97VyxofyqOX08piTkhpkLNiymw692ldaX5hXROtOretdz/svymT7Dng4K6POvCIi0nRqC8h0ybIWf7trTtRVqNMe2jc4GAOqDWQOpmAMqn8PjSk1NQjAhrVbzG8vz6RkTwlv3f05907M4JvnrwLgj9dOYsl7K+ssa9vi7fFgDOCD31QdeHj2q8ur3TZWGuOdn39e4w0mP/lXBr+bmcHKzLU8cXX1l6NFRKR5UUBWi7+92zHqKkgzkpYGH/92NrP2Hsf/vJtBevtW8UvFeYVBS9a3/3oup172hUrbFRcUV7lD85nbF1Ravuq3I6vsb+QtJ7Bt8XZK9pTEB/j97gmTuPQLs/jSfafzt7FTKdhcEL9TdPvSHfz28sz49uddkMIPXhtFwebqhwSpSc6Kncx4blGV9E8ens1HD9RvjLrmKn99Phf1yKrzsWgiIk1FAVktWqfVPCyEtDxprYwLxw2rdt2U/JN5c1xw1+te2sXTn/zqRC7os5guR3bm4S9lsmN5Ds/eNJmXMvsktM/DhvTg8iPm0rV/Z1KtjKcWnsuHO4LW7jffgo69O3BU952UFpbS89ju/M+7GfFtt5V1A4I8ZsFl1Je++ykAz9w4mXXTN1ba16pJ63j+1imMOn47p984hKL8Iib9fi4Tfjeb9+6dwej/PqXS+y/YXFBpsOT37p3Bvx8Kxtub+NgczGoec6+suKxKoFhWXNZkw3387a45/GvHcO67qepwKSIikXD3g3YaNmyYJ9MTV2d60AtIk6ZDc/rWMRN9/A2T/Psn1u+7vuS9FQ7uZ7Sf78v/vdp3ZOfE153SdpH/Z79PHdyHtVvouWtyPWflTi/eXRw/t05ss9TBfe20Df7QpRN85ouL4tuX7C3x7ct2OLgflbba3/nF5770g5WVzs2iXUWeuybXY2Ux37VpV6V1G2ZucnCf8n9zq5zTuzbt8tl/W+L3jZ7g4H7LsRO9YEuBx8piNf4dKCsp893bdjfwr4mIiDuQ5V59TKNO/bVY/tFqjr6wf9LKF5H6S6OEUlo1ermtKObnoz9jfOYg+nfYzsS8oZXWP/zlTJ54b2D8yQ8AF3Sbyb9zglbD5/9rCquXl3LMSen856NncnKbpfz8Bzv5j98El7WnjV/AiG8Fd9FmPjqHy287ij/9YD7tO6dRkFtK2w6pLJ+3l0XLUiktM7blt+HCswrYuAEu+3pnOvZoTefe7Zj93kb27IpxyoU9SUk1ykqdCS9v4ryv9aasJMaJVxxFyZ4S2vVoR2lhKYW5haS1SaNoVzGt2qbRpksbyorLSO+QTlF+EQVbduMxp8uRnSkuKCatTRrpHdLjT+tITU/FY05RfhFlJTHadG5N606tyd+wi1Zt00hrk0ZpYWlwk0paCtuW5rBhwU5S04xh1x5LyZ4S2nRpQ966fHJW5ZGSlsLW5XkMv+448jfsokOv9sRKY6R3SGfnqly6HNmZ3Vt3k5qeyuL3V7N7ZzFDrxxIanoqKyau59hLBlCUX0T7w9qzZ/seUtJSmPHSMk679mjadgvGalzwxnJSUo1B5wfHylIMjzmt2rViy4JtzH5zTTytuDDGxXcNpVW7VpTsKaF1p9ZsX7qDrUt3ktY6lVVZOzj/9pOxFIvXsyi/iLQ2aaSmp5K3No/U9FRK9pbSpnNrYqUx2vVox5xXlnL8lwbSql0r8tbl07F3B0oLS7EUIyUthfVZm2nXtTXrZm/npCsH0apdK4oLiikrLmPZv9dSvLeMASN60XVAF3auyqVj7w607tSasuIydm/dHX//qemptOsRtMoX5hayd2chXQd0YW/OXizFSGuTRv6GXbTp3BqPOe16tGP31t3s2bGXdbO3MzijD8W7S1gxZRMn/8cg2nRpQ2FuYbw+C99ZxXEXHwlASloKlmKkpqdSVlwWfH4FxXQ+onP8nCjMLSS9Qzole0qY9bdlnHTFQDzmpKan0rpTa/Zs31Op/2t6h3SmPruYwWf3oqwkxv/85xqemnAc3Y7qSlF+Eekd0inMLeTjR+fT66gODLnkSFq1a8XubXto171t/DhA0MoeK42Rty6fTQt2cNxlA9mbs5e23dpSmFtIhy90AIIxRkv2lFBWXEbx7hLS27eKr0u2g+YuSzO7GHgMSAXGu/sDteVPdkC2atI6Bo6qfagFEREROfjdeXomD07PSOo+Doq7LM0sFXgSuAQYAnzNzKoOv96EjjyzD8elr+D09vs6YN97XiYAV/etOsr83WdlcvPRk3nqa5P42pGf8rNzMmlFcZV8Z7RfEC/n6FarmPniYu6/KJMNMzcz+cl5fKnXdM7sWPmuu14pW7m05wwAOpPH4/8xkW8MmFJtvf947aT4/JDW2ZXKePeeGRybvoLeKZv547WTuHHQ5Lo/h9TgEUo/PzeTwrwi7snIZM+Ovbx7zwyeu2UKn/1xPlf0nsZhto3HrpzIz87JZPKT83jo0kwyH51TbT0//u1sIGiVqPhZ/vfwTB7+ciZvjpvOsemV+/f88vxMFrwZvJ/RXWfxw5Mn0tOCAWqv6jO10pMTNszczLiRmVzQbSZfO/JTti7azrv3zKCHbeeejEyObx3cwdiO3Tx3yxQ++FUWr99ZddT+sUMncuXhdT/DVEREDm75BdGGRM2mhczMRgL3uPtF4fI4AHe/v6Ztkt1CJiIiItJYamshq/+w5snTB6j4AL/1wBn7ZzKzW4Fbw8UCM1ua5Hr1ALYneR9SfzouzY+OSfOk49L86Jg0T01xXI6saUVzCsgS4u5PA0831f7MLKumaFaio+PS/OiYNE86Ls2PjknzFPVxaTZ9yIANQMUe9H3DNBEREZFDWnMKyGYAg81sgJmlA9cAb0dcJxEREZGkazaXLN291My+D3xIMOzFs+6+MOJqQRNeHpV60XFpfnRMmicdl+ZHx6R5ivS4NJu7LEVERERaquZ0yVJERESkRVJAJiIiIhIxBWS1MLOLzWypmWWb2V1R1+dQZmb9zGyCmS0ys4VmNjZM72ZmH5nZ8vC1a5huZvZ4eGzmmdmpFcq6Icy/3MxuiOo9HSrMLNXMZpvZO+HyADObHn72r4Q34WBmrcPl7HB9/wpljAvTl5rZRRG9lUOGmXUxs9fMbImZLTazkTpXomVmt4d/uxaY2ctm1kbnStMzs2fNbKuZLaiQ1mjnhpkNM7P54TaPm5k1WuVreup4S58IbixYAQwE0oG5wJCo63WoTkBv4NRwviOwjOARWg8Bd4XpdwEPhvOXAu8DBowApofp3YCV4WvXcL5r1O/vYJ6AO4C/Au+Ey68C14TzfwC+E85/F/hDOH8N8Eo4PyQ8f1oDA8LzKjXq93UwT8BzwLfC+XSgi86VSI9HH2AV0DZcfhW4UedKJMfiXOBUYEGFtEY7N4DPw7wWbntJY9VdLWQ1Ox3IdveV7l4M/A0YE3GdDlnuvsndZ4Xzu4DFBH/kxhD8+BC+XhHOjwGe98A0oIuZ9QYuAj5y9xx33wl8BFzcdO/k0GJmfYHLgPHhsgHnA6+FWfY/JuXH6jVgdJh/DPA3dy9y91VANsH5JQfAzDoT/Og8A+Duxe6ei86VqKUBbc0sDWgHbELnSpNz90lAzn7JjXJuhOs6ufs0D6Kz5yuU1WAKyGpW3aOc+kRUlxYlbL4/BZgO9HL3TeGqzUCvcL6m46Pj1rgeBe4EYuFydyDX3UvD5Yqfb/yzD9fnhfl1TBrXAGAb8OfwUvJ4M2uPzpXIuPsG4LfAWoJALA+Yic6V5qKxzo0+4fz+6Y1CAZk0K2bWAfgHcJu751dcF/5HonFamoiZXQ5sdfeZUddFKkkjuCTzlLufAuwmuAwTp3OlaYV9ksYQBMuHA+1Ra2Oz1JzPDQVkNdOjnJqYmbUiCMZecvfXw+QtYTMx4evWML2m46Pj1njOAr5sZqsJLtmfDzxG0KxfPqh0xc83/tmH6zsDO9AxaWzrgfXuPj1cfo0gQNO5Ep0LgFXuvs3dS4DXCc4fnSvNQ2OdGxvC+f3TG4UCsprpUU5NKOw/8Qyw2N1/V2HV20D5HS43AG9VSL8+vEtmBJAXNkl/CFxoZl3D/1ovDNOkntx9nLv3dff+BN//T9z9WmACcFWYbf9jUn6srgrze5h+TXhn2QBgMEHHWDkA7r4ZWGdmx4RJo4FF6FyJ0lpghJm1C/+WlR8TnSvNQ6OcG+G6fDMbER7n6yuU1XBR3xHRnCeCOzCWEdzpcnfU9TmUJ+BsgmbkecCccLqUoF/Fx8By4N9AtzC/AU+Gx2Y+MLxCWd8k6AybDdwU9Xs7FCYgg313WQ4k+JHIBv4OtA7T24TL2eH6gRW2vzs8VktpxLuSWuoEDAWywvPlTYI7wXSuRHtM7gWWAAuAFwjulNS50vTH4WWCfnwlBK3JNzfmuQEMD4/xCuAJwiceNcakRyeJiIiIREyXLEVEREQipoBMREREJGIKyEREREQipoBMREREJGIKyEREREQillZ3luarR48e3r9//6irISIiIlKnmTNnbnf3ntWtO6gDsv79+5OVlRV1NURERETqZGZralqnS5YiIiIiEVNAJiIiIhIxBWQiIiIiEVNAJiIiIhIxBWQiIiIiEVNAJiIiIhIxBWQiIiIiEVNAJiIiIhIxBWQiIiIiEVNAJiIiIhIxBWQiIiIiEVNAJiIiIhIxBWQiIiIiEVNAJiIiIhIxBWQiIiIiEVNAJiIiIhIxBWQiIiIiEVNAJiIiIhIxBWQiIiIiEVNAJiIiIhIxBWQiIiIiEVNAJiIiIhIxBWQiIiIiEVNAJiIiIhIxBWQiIiIiEVNAJiIiIhKxpAdkZpZqZrPN7J1weYCZTTezbDN7xczSw/TW4XJ2uL5/susmIiIi0hw0RQvZWGBxheUHgUfcfRCwE7g5TL8Z2BmmPxLmExERETnkJTUgM7O+wGXA+HDZgPOB18IszwFXhPNjwmXC9aPD/CIiIiKHtGS3kD0K3AnEwuXuQK67l4bL64E+4XwfYB1AuD4vzF+Jmd1qZllmlrVt27YkVl1ERESkaSQtIDOzy4Gt7j6zMct196fdfbi7D+/Zs2djFi0iIiISibQkln0W8GUzuxRoA3QCHgO6mFla2ArWF9gQ5t8A9APWm1ka0BnYkcT6iYiIiDQLSWshc/dx7t7X3fsD1wCfuPu1wATgqjDbDcBb4fzb4TLh+k/c3ZNVPxEREZHmIopxyH4M3GFm2QR9xJ4J058BuofpdwB3RVA3ERERkSaXzEuWce6eCWSG8yuB06vJUwhc3RT1EREREWlONFK/iIiISMQUkImIiIhELKGAzMzGmlknCzxjZrPM7MJkV05ERESkJUi0heyb7p4PXAh0Ba4DHkharURERERakEQDsvJHGF0KvODuCyukiYiIiEgDJBqQzTSzfxEEZB+aWUf2PQ5JRERERBog0WEvbgaGAivdfY+ZdQduSlqtRERERFqQWgMyMzt1v6SBZrpSKSIiItKY6mohezh8bQMMA+YR9B07CcgCRiavaiIiIiItQ619yNz9PHc/D9gEDHP34e4+DDiFfQ8FFxEREZEGSLRT/zHuPr98wd0XAMclp0oiIiIiLUuinfrnm9l44MVw+VqCy5ciIiIi0kCJBmQ3At8BxobLk4CnklEhERERkZamzoDMzFKB98O+ZI8kv0oiIiIiLUudfcjcvQyImVnnJqiPiIiISIuT6CXLAoJ+ZB8Bu8sT3f2HSamViIiISAuSaED2ejiJiIiISCNLKCBz9+eSXRERERGRliqhgMzMBgP3A0MIRu0HwN0HJqleIiIiIi1GogPD/plgmItS4DzgefaNSSYiIiIiDZBoQNbW3T8GzN3XuPs9wGXJq5aIiIhIy5Fop/4iM0sBlpvZ9wmeY9khedUSERERaTkSbSEbC7QDfggMA74B3JCsSomIiIi0JIm2kOW4ewHBeGQ3JbE+IiIiIi1OogHZs2bWF5gBTAYmufv85FVLREREpOVIdByyUWaWDpwGZADvmlkHd++WzMqJiIiItASJjkN2NnBOOHUB3iFoKRMRERGRBkr0kmUmMJNgcNj33L04aTUSERERaWESDch6AGcB5wI/NLMYMNXdf5a0momIiIi0EIn2Ics1s5VAP6AvcCbQKpkVExEREWkpEu1DthJYAkwheITSTbpsKSIiItI4Er1kOcjdY0mtiYiIiEgLlehI/YPM7GMzWwBgZieZ2U9r28DM+pnZBDNbZGYLzWxsmN7NzD4ys+Xha9cw3czscTPLNrN5ZnZqg96ZiIiIyEEi0YDsT8A4oATA3ecB19SxTSnwI3cfAowAvmdmQ4C7gI/dfTDwcbgMcAkwOJxuJbg0KiIiInLISzQga+fun++XVlrbBu6+yd1nhfO7gMVAH2AM8FyY7TnginB+DPC8B6YBXcysd4L1ExERETloJRqQbTezowAHMLOrgE2J7sTM+gOnANOBXu5evu1moFc43wdYV2Gz9WHa/mXdamZZZpa1bdu2RKsgIiIi0mwl2qn/e8DTwLFmtgFYBVybyIZm1gH4B3Cbu+ebWXydu7uZeX0q7O5Ph3Vh+PDh9dpWREREpDlKdByylcAFZtaeoFVtD0EfsjW1bWdmrQiCsZfc/fUweYuZ9Xb3TeElya1h+gaCcc7K9Q3TRERERA5ptV6yNLNOZjbOzJ4wsy8SBGI3ANnAV+vY1oBngMXu/rsKq94OyyB8fatC+vXh3ZYjgLwKlzZFREREDll1tZC9AOwEpgK3AHcDBnzF3efUse1ZwHXAfDMrz/sT4AHgVTO7maCFrTywew+4lCDY2wPcVJ83IiIiInKwqisgG+juJwKY2XiCjvxHuHthXQW7+xSC4K06o6vJ7wR91URERERalLrusiwpn3H3MmB9IsGYiIiIiCSurhayk80sP5w3oG24bASNWp2SWjsRERGRFqDWgMzdU5uqIiIiIiItVaIDw4qIiIhIkiggExEREYmYAjIRERGRiCkgExEREYmYArJmLndNHj84aSKFuRptRERE5FClgKyZu/crc3hi/iieuXV61FURERGRJFFA1syVlgavsVi09RAREZHkUUAmIiIiEjEFZCIiIiIRU0AmIiIiEjEFZCIiIiIRU0DWAHlr81jy3sqoqyEiIiIHuVofLi61O/PobSwqGoR71DURERGRg5layBpgUdGgJtuXgj4REZFDlwKyQ0RhbiGv3v5Z1NUQERGRA6CArBHt3rqbd37+eb23K9lTwvoZmxq07x9nTOc/Hz2TCb+b3aByREREpOkpIKungs0F/PzcTEr2lFRZ919nzOFL953Owrey61Xm2NOn0u/03uxclcuTX53Id0+YVO96rd3aGoDcLUUAbMjaxP9+MfOAAkQRERFpWurUX0/3fimL32Zl0O/bk4BzAVj0djZDvjyIlds7AZC3eW+9ynx3yUAA8jcW8P2/jwLg/xLcNmfFTjr27hBfvvKhEey6vYCjT+vEHjLg3+C/rFd1REREpImphawWxQXFXHn4ND7/80I2z9vKik/WsGB1+2Bd4b5e9sePGcRHD8wkxYK0WFn9euA7Vm36Fb2n88T8UbVu231QV75+dFaltJ9flsUe2terDvvbm7OXvTnVB5atrIR7z8tsUPnlZr64mDfH6cHpIiLSsikgq8XMvy7ljU0jOOObx9P75MMYNPpIPth+GgC23yd34bhhBxyQrSvrA0D/s/tWSn9r8xnx+d0FNZf52oaRlZYfmZVRabli/7TpzyygKL8ovuwx54mrJ5K3Nq/SNu26t6Vz99Rq91dKK+7JzKh23bv3zGD2y0tqrGu51++cxqK3sxl+3XF85YEz6swPsGf7HsqKyxLK21xtyNrEX741JepqiIhIM6OA7AAVF1VN27S3MwAFOcWV0suKyyguKK66AWDVN45V8ZN/ZQBBADX9mQUJ1xPgo6eCPm3LPlzFiG+dQJvOrVk3fSOL31lBSqrxg9dG8Z1zF7DgjeUUbC7g06fmAVBCOp0sn2njE9/f5feexqlfPxaA7Ut3sPyj1dXm+4/fjOD4MfUbNqR9z3bceOzUatctfX8lO1fl1rjth/+bxbzXltVrf8lw4dm7uemZs2uta3VK9pRQWlianEqJiEjkFJAdoNvfrHopMbukPwCX3XMaC95Yzi3HTmJvzl4u7j2H1h3T+faQScRKYwxOX817986otfxYaaza9MGt1zLiWyfw1k+mJ9xa9M0/n8Ob46azY01BPO2/vrSBlx5YF19+ec1ZnHjlYDr27sD6Jfvy7aITI285odrLiqO6zCFWGotPHqvcinfscXD0hf0TqmO56oKO7xw/ia8cPg2AF1edXe12x146kOFH51W7btmHq7j4p8M5+eqj61WX+vCYJ9QyuLm4G1Dz8a1JevtWnNh5zQHVTUREmj8FZEly4pWDGb/0XE7tvZF/5wwD4I+LzyW1VQrZJf257J7T+Ov3Pq1x+xFdFldJ656Sw4rSIwG44v4zSGtd/SXF6nzlgTOwlH3NcUbNg81uXFO1NW/Fgr2c03lupRa9SXlDOb3zEs7ttoDUVimkpFZu7tvh3QHIXZOX0LAer4z9jFZt01j24apK6X9YdC5vbhoRX37tR9W3kq0MP5tyC95YDsAxFw+oc98N9cRXJ3Hq149NeNiR/YPXRCwpPqre24iIyMFBAVmS1fYjeu3/nVXjuhm7j6+SluPdasz/0aYT6qzLnrx9Q3W8t+00fv1ZRrX57nir+vQp+SdXSZu5Zwif7jqpSnrFgKNr/870O713rXVb/tFqrnn8TADmfriZSb8Pgr/qLule/bt9feZmvriY7ik5VfL87YefceKVg3njx9Nq3S/AMzdO5pOHg0DqhqOm8MDFmVXeyx+vncSaT9fXWMbsecGptGp+QY15REREaqKArBZlpQfP84p206HOPKP/+5QDLj8/v375928tA/jnzz6nYHMBJ7Wt2per4qXN519KZdQPqwZ/+/vX/TP54XdLKgWq3z9pImYwf2bQynf1Q8Or3Xb+P5Yx55WlmMG3njsn/tk8v/Jsxn2YQa+UbQD86fpJ/OzciXz7r+fS/+y+3HbKxErB5hs/noYZ/GN5EJTe/JdzqtwgUe7BSzLjda3YWvnkVycmdLmzOXjrJ9NrvPsWYM2n6w/6Gy9ERCLh7gftNGzYME+mO0+f4MGFPU3NbbpvdO3Hpi27612me+Xlr/SeWmPe1uytcV3/tLX+8JcnxL9HSz9Y6a/c9mmlPIvfXeGb5m7xXZt2xdPK56854lPftmS7u7vHymK+dtqGSnWc/bclPrLDPN+9bXet398NMzd5aVFppbTpzy7w9TM2+qa5W7x4d7G7u3/y8Cz/+x2fxfNM+v0c37poW5Xysl5Y5OD+rWMmVru/lRPXOrj/7JwJ1a5PhvwN+b5zdW6T7U9EpCGALPfqY5pqEw+WKdkB2d1n1f6jr+nQmq6oJQA70GnFhDUHvO2AtDX+4CUTKqX95VuT4/MTHpkd/65uX7YjHmA9/h+Zfk9G5e3y1uX57m2Vg9RvDp7kWxdtiy+7u29ZsNXBfXCrle7u/tGDMx3cV01e5/dfFJQ5tO1i//QP89zd/Y/XTvQfnzHB3d0nPznXwf3MjnOrPZ+KdhX5B7+a4fdfNMG3L9vh7u6/uSwoc//Asdyfrp/ki/6ZXeM52pG8eN1r88GvZsQ/n4p2ZOdUm95QsbKYv3fv5x4rizV62SJy8FJAdoAUkGlq7tOQ1ssrLR+Xnt1oZV/d9zO/sPuMGtf/+sIJ8fmHLp1QKaD9/omZ/sZd0/xLvab5t4dM9DWfrfcvdsuKrz+r41x/7b8/i7c0Tnx8ji98a7lvWbDVP3pwprsHQSa4G2Xu7p6zcmelwG1Hdk68vFhZzJ++bqK/84vPfefqXC8tKvVF/8z2F749xX9+blDPi3sEAdKO7Jx4GeXbP3vTpHirZH3FymLxALPcMzdOcghea5OzcqcXbCnwgi0FB7RvETm41BaQWbD+4DR8+HDPysqqO+MB+uX5mfxiQkbSyheR5i2FMmKkxl8Ps20Uejr5dOYrvafxRoW7fwFOa7+QMwZur/KEjesGTmHSmv4M7bmBtzafwSNXTCTjmi9wyjXHxPPcfmomJ5yUSuu2KSxbWMK85W14c9MIBqStZWdZJ958ZDXd+raj3/BezHljFefdPpSXf/AZl/z38aS1SaNg6x62Lt1Jn6E9sRSjQ6/2FBcUk5qeSu7afPbmFrFm1g5O/8bRFGzZzbrZ2xl4Vm+6DuhS5X0XbC5g785CZr+5hqFfPoJ23dsy85VsTrj0CNJapxIrc0oLS2nfsx1lxWXs3VlI98HdKNhcwF9um0Ms5px6XheWzsjnwm8PpM+pvfjwgdkM/VI/egzuSutOrXnmpikcPbwTp/zHQHZv30unwzvgMWfvzkLWzNjK7p3FnHDpEXQ7qit7c/ayad421szawdCvDGDh+2tJb5vK8Zf1B6BkbylprVNJSUshJS2FwrwiOvbuwAe/nsXo206kZE8JHQ/vGH9vuet2sS07D0sxiveUsmt7Eef/6JRKfTtzVuxkT04hvY7vQWFuIcW7S9gwdzsnXXU066ZvJH/zHgq2F3LMBf1ISTWmvbCcEy7qQ8neUtLbt2Lay6sYelkfNi7cSev2aXTs2YYjzuhNrDRG+8Pak7c2j8L84nif1Hbd2lCyt5Tug4N+pnu27+FfD89n5LUD6XF0N5b9azW9jutG647ppKSlUFpYSlqbNHLX5tOmc/As464DurDo7Wy6HtGRw4b04N17s7j83tNYN30jq2Zs56QvHUm3o7qSs2Inrdqm0fHwjkz6/Vw692rDF4Z047AhPeKfwbrpG/nXUytYuqiM6358OG89tZG7/zWK3DV57FyTT+c+HVjw3loGndULd3jt18vofUQrRn/3GNp1b0v+xgJ6HN2NzfO30al3e9p0acOSD1bT6QvtOPLMPuxclUu3o7oCkPnoHDav3MO5Nw7k8FO/AATjWBbtCr6/Xft3pqy4jLz1uwBo27UN25bt5IX7VvPNXw9iw/wcTrjsSDof0bnKd3lD1iZ2ritg1vtb2JVbxpd/NBhLMd5+eBmnnN+NLoe348gRvVn8wRpO+PJAWndqncBfhgNnZjPdvdrOzc0qIDOzi4HHgFRgvLs/UFv+ZAdkf79jKl99ZGTdGUVEROSg9qVe03l7c2JPjjlQtQVkzebh4maWCjwJfBFYD8wws7fdfVFUdbrygdP58WeZvDOnD+3SimmXVsJ1V+wid0cZo7/Rmwfv3EHfw4r598Iv0KtdATd+vZhnXmrNhSN2MXV2m/h/zzcfPZme3cr47bSzKKUV3x4yifZtY/xh5nCOb7+ac4/bztDTWjH0ol6YwcqsHPqf0pXP395Mappx8gU9WTM3l7EP9aFNajEri/vy6H9OJTfHSUmBN6f0oE1aKUd0LyB7a2emFpzInadnsn5LKzbmtWNAzwJKSo3rb23Lnb9sz9aizlzQfwVb8tqwLO8wUi1Gdkl/HrtyIl++/ShSUo0PnlzBf710Lj84aSK/nzeKH5+RyZYdqRSXpPDXNfuG67hvdCbjMwexpqxvDZ/iPh3YRQHBf6nfHjKJbl1iNQ69AdDNcirdQTm41SqWlyR/TLGa/PL8TO775ExKSI+sDiIikhxXXl5Sd6YkajYtZGY2ErjH3S8Kl8cBuPv9NW2T7BYyERERkcZyULSQAX2AdRWW1wNV2g7N7Fbg1nCxwMyWJrlePYDtSd6H1J+OS/OjY9I86bg0PzomzVNTHJcja1rRnAKyhLj708DTTbU/M8uqKZqV6Oi4ND86Js2Tjkvzo2PSPEV9XJrTSP0bgH4VlvuGaSIiIiKHtOYUkM0ABpvZADNLB64B3o64TiIiIiJJ12wuWbp7qZl9H/iQYNiLZ919YcTVgia8PCr1ouPS/OiYNE86Ls2PjknzFOlxaTZ3WYqIiIi0VM3pkqWIiIhIi6SATERERCRiCshqYWYXm9lSM8s2s7uirs+hzMz6mdkEM1tkZgvNbGyY3s3MPjKz5eFr1zDdzOzx8NjMM7NTK5R1Q5h/uZndENV7OlSYWaqZzTazd8LlAWY2PfzsXwlvwsHMWofL2eH6/hXKGBemLzWziyJ6K4cMM+tiZq+Z2RIzW2xmI3WuRMvMbg//di0ws5fNrI3OlaZnZs+a2VYzW1AhrdHODTMbZmbzw20eNzOjsdT01PGWPhHcWLACGAikA3OBIVHX61CdgN7AqeF8R2AZMAR4CLgrTL8LeDCcvxR4HzBgBDA9TO8GrAxfu4bzXaN+fwfzBNwB/BV4J1x+FbgmnP8D8J1w/rvAH8L5a4BXwvkh4fnTGhgQnlepUb+vg3kCngO+Fc6nA110rkR6PPoAq4C24fKrwI06VyI5FucCpwILKqQ12rkBfB7mtXDbSxqr7mohq9npQLa7r3T3YuBvwJiI63TIcvdN7j4rnN8FLCb4IzeG4MeH8PWKcH4M8LwHpgFdzKw3cBHwkbvnuPtO4CPg4qZ7J4cWM+sLXAaMD5cNOB94Lcyy/zEpP1avAaPD/GOAv7l7kbuvArIJzi85AGbWmeBH5xkAdy9291x0rkQtDWhrZmlAO2ATOleanLtPAnL2S26UcyNc18ndp3kQnT1foawGU0BWs+oe5dQnorq0KGHz/SnAdKCXu28KV20GeoXzNR0fHbfG9ShwJxALl7sDue5eGi5X/Hzjn324Pi/Mr2PSuAYA24A/h5eSx5tZe3SuRMbdNwC/BdYSBGJ5wEx0rjQXjXVu9Ann909vFArIpFkxsw7AP4Db3D2/4rrwPxKN09JEzOxyYKu7z4y6LlJJGsElmafc/RRgN8FlmDidK00r7JM0hiBYPhxoj1obm6XmfG4oIKuZHuXUxMysFUEw9pK7vx4mbwmbiQlft4bpNR0fHbfGcxbwZTNbTXDJ/nzgMYJm/fJBpSt+vvHPPlzfGdiBjkljWw+sd/fp4fJrBAGazpXoXACscvdt7l4CvE5w/uhcaR4a69zYEM7vn94oFJDVTI9yakJh/4lngMXu/rsKq94Gyu9wuQF4q0L69eFdMiOAvLBJ+kPgQjPrGv7XemGYJvXk7uPcva+79yf4/n/i7tcCE4Crwmz7H5PyY3VVmN/D9GvCO8sGAIMJOsbKAXD3zcA6MzsmTBoNLELnSpTWAiPMrF34t6z8mOhcaR4a5dwI1+Wb2YjwOF9foayGi/qOiOY8EdyBsYzgTpe7o67PoTwBZxM0I88D5oTTpQT9Kj4GlgP/BrqF+Q14Mjw284HhFcr6JkFn2Gzgpqjf26EwARnsu8tyIMGPRDbwd6B1mN4mXM4O1w+ssP3d4bFaSiPeldRSJ2AokBWeL28S3AmmcyXaY3IvsARYALxAcKekzpWmPw4vE/TjKyFoTb65Mc8NYHh4jFcATxA+8agxJj06SURERCRiumQpIiIiEjEFZCIiIiIRU0AmIiIiEjEFZCIiIiIRU0AmIiIiErG0urM0Xz169PD+/ftHXQ0RERGROs2cOXO7u/esbt1BHZD179+frKysqKshIiIiUiczW1PTOl2yFBEREYmYAjIRERGRiCkgExEREYmYAjIRERGRiCkgExEREYmYAjIRERGRiCkgExEREYmYAjIRERGRiCkgExEREYmYAjIRERGRiCkgExEREYmYAjIRERGRiCkgExEREYmYAjIRERGRiCkgExEREYmYAjIRERGRiCkgExEREYmYAjIRERGRiCkgExEREYmYAjIRERGRiCkgExEREYmYAjIRERGRiCkgExEREYmYAjIRERGRiCkgExEREYmYAjIRERGRiCU9IDOzVDObbWbvhMsDzGy6mWWb2Stmlh6mtw6Xs8P1/ZNdNxEREZHmoClayMYCiyssPwg84u6DgJ3AzWH6zcDOMP2RMJ+IiIjIIS+pAZmZ9QUuA8aHywacD7wWZnkOuCKcHxMuE64fHeYXEREROaQlu4XsUeBOIBYudwdy3b00XF4P9Ann+wDrAML1eWH+SszsVjPLMrOsbdu2JbHqIiIiIk0jaQGZmV0ObHX3mY1Zrrs/7e7D3X14z549G7NoERERkUikJbHss4Avm9mlQBugE/AY0MXM0sJWsL7AhjD/BqAfsN7M0oDOwI4k1k9ERESkWUhaC5m7j3P3vu7eH7gG+MTdrwUmAFeF2W4A3grn3w6XCdd/4u6erPqJiIiINBdRjEP2Y+AOM8sm6CP2TJj+DNA9TL8DuCuCuomIiIg0uWResoxz90wgM5xfCZxeTZ5C4OqmqI+IiIhIc6KR+kVEREQillBAZmZjzayTBZ4xs1lmdmGyKyciIiLSEiTaQvZNd88HLgS6AtcBDyStViIiIiItSKIBWfmI+ZcCL7j7wgppIiIiItIAiQZkM83sXwQB2Ydm1pF9o++LiIiISAMkepflzcBQYKW77zGz7sBNSauViIiISAtSa0BmZqfulzRQz/sWERERaVx1tZA9HL62AYYB8wj6jp0EZAEjk1c1ERERkZah1j5k7n6eu58HbAKGhQ/1Hgacwr5nUIqIiIhIAyTaqf8Yd59fvuDuC4DjklMlERERkZYl0U79881sPPBiuHwtweVLEREREWmgRAOyG4HvAGPD5UnAU8mokIiIiEhLU2dAZmapwPthX7JHkl8lERERkZalzj5k7l4GxMyscxPUR0REpFYTfjeb5R+tjroaIo0q0UuWBQT9yD4CdpcnuvsPk1IrERGRGpz/o1MAcI+4IiKNKNGA7PVwEhEREZFGllBA5u7PJbsiIiIiIi1VQgGZmQ0G7geGEIzaD4C7D0xSvURERERajEQHhv0zwTAXpcB5wPPsG5NMRERERBog0YCsrbt/DJi7r3H3e4DLklctERERkZYj0U79RWaWAiw3s+8TPMeyQ/KqJSIiItJyJNpCNhZoB/wQGAZ8A7ghWZUSERERaUkSbSHLcfcCgvHIbkpifURERERanEQDsmfNrC8wA5gMTHL3+cmrloiIiEjLkeg4ZKPMLB04DcgA3jWzDu7eLZmVExEREWkJEh2H7GzgnHDqArxD0FImIiIiIg2U6CXLTGAmweCw77l7cdJqJCIiItLCJHqXZQ/gl8BI4AMz+7eZ3Ze8aomIiNTuk4dnR10FkUaTUEDm7rnASmAVsAk4Cjg3edUSERGp3fjf74m6CiKNJtE+ZCuBJcAUgkco3aTLliIiIiKNI9E+ZIPcPZbUmoiIiNSDu0VdBZFGk2gfskFm9rGZLQAws5PM7KdJrJeIiEit3KOugUjjSTQg+xMwDigBcPd5wDW1bWBm/cxsgpktMrOFZjY2TO9mZh+Z2fLwtWuYbmb2uJllm9k8Mzv1wN+WiIiIyMEj0YCsnbt/vl9aaR3blAI/cvchwAjge2Y2BLgL+NjdBwMfh8sAlwCDw+lWgr5qIiIi1VIDmRxKEg3ItpvZUYTffzO7iuBuyxq5+yZ3nxXO7wIWA32AMcBzYbbngCvC+THA8x6YBnQxs971eC8iItLMzXhuEWunbmiUst5adyoeU1jWmF749hSmjV8QdTVapEQDsu8BfwSONbMNwG3AtxPdiZn1B04BpgO93L08mNsM9Arn+wDrKmy2Pkzbv6xbzSzLzLK2bduWaBVEROQAPHJFJhuyav3/u15Ov3EIR55Z5U/7ASmiDR/+emajlCWB6/94NiNvOSHqarRIiY5DttLdLwB6AscCo4CzE9nWzDoA/wBuc/f8/cp16tnq7O5Pu/twdx/es2fP+mwqIiL1sOKTNdzxVgZXjNoZaT1e+PYUxo3MrHbd1rWFTVuZUGFuIQWbCyLZd3M04XezMYM1n66PuioHrVoDMjPrZGbjzOwJM/sisAe4AcgGvlpX4WbWiiAYe8ndXw+Tt5Rfigxft4bpG4B+FTbvG6aJiEgESgrLAMgvaduk+92bs5eSPSXx5ev/eDYPTMto0jrU5cTDNtOxd4eoq9FsjH88GKR3yktrIq7JwauuFrIXgGOA+cAtwATgauAr7j6mtg3NzIBngMXu/rsKq94mCOoIX9+qkH59eLflCCCvwqVNERFpIdp1b0v79s7OVblRV6VG2SX9673Nysy1bJ63te6M0iLVNTDsQHc/EcDMxhN05D/C3RNpIz4LuA6Yb2ZzwrSfAA8Ar5rZzcAa9rW0vQdcStD6tge4qR7vQ0REDiElpHPGMRtYVtyl1nwH01hkR513BHBw1bm+DuX3lmx1BWTxNmN3LzOz9QkGY7j7FKCmYZRHV5PfCW4eEBGRZqD8DkazaH5ll5cMiGS/IlGoKyA72czKO+Ib0DZcNoIYqlNSayciIiLSAtTah8zdU929Uzh1dPe0CvMKxkREpMnkrsnj6PRVUVejyaybvpEBrdaxekrzv3PR9FjRBkt0HDIREREKcws5o8OCGgcPPantMk5os7zR9rdjeU58fsL/LW5RlzH/8pNlrC7txzPjspO6n7LiskYba87VieyAKSATEanB+7+cwZWHT4u6Gs3K/LdW8vnuE/jB2Op/PuYXHs3CosGNtr+iXcXx+dKSqj/2B/L7f9/ozGbborMycy0L3ggC2qaKbX585mT6ntabrQs12HqU6upDJiLSYl36i9OirkKkducUAWDN5KmRX39keKOU8/NPMhqlnGSoz52YJXtK+O1XPuX2V0bQpkubA97nu/ODIUB3rMonGP/9wFlzjXQPAmohExGpQ0t9XuJpNwxp0v09+pWJjL9hco3rS2nVqPs7kOM66fdziZXGGrUe9XHdwCmc2m4xAH/65mf85F8ZPHBF/Vpxp41fwC3HTmq07/Wit7PZmh8EhLpkeeDUQiYiUgePOZai//zrY9r4BYz4Vv2eiXj7m6OqpBUVlFSTs3FsW7yd3PUFHH1Rzf3S9ubsZcmHazjla8fywa+yuORnw/nNh5lARtLqVZsXVwVPLXz77uns3hUEP7t316+M828ZyF5O4NHtu2l/WPsG1+n4MYMaXIaohUxEpE7NuYXsqr5TuW7glKTuo2IomuhnMfKWE/jk4dkN3vfAUf3qzrSfvTl7+eO1dbcAHXFCR465uPabBG44eQ6nfv1YdizPYe2S4PFAy1bU/NO58K1s9mzfU+86Q/A8yHJdU3IpK6u8PvvjfY8lGvPrMw5oHwAeHtFE/8lYN30jZjDrpcUHvE+pmwIyEZE6NOeA7B8bRsZbTQ7UlgXbeP3O+l32ytqz73Kmxzw+VTT6v09J+sOmqzs2P71wOt/+67ncOmRylSEjKl5uLKLuflefbewPwJ6cwjr7dX34v1mccMUgrjlhfjztlbGfVbpTtDaZb+XF53O9C3sqxHUec159sPKQH/MWN81FrvceD+7y/OOv1Ok/mRSQiYjUoTkGZL8bk8mM5xY1SllfOLEn//GbEeSvz6878362L91BSqqRkmrVfk7LJm9pjCrWy9acIFAZv/Rcjj7nsErr7vvipAMq01IsHpDV1K508U+Dmw4mbjkWgLVTN3DN42fy1TNWJ7SPX07KqHFdSqpx90eV1x9oIF7eQrb/8crdeGAte9I4FJCJSCRWfLKmWQY61WmO9fzR2xmcfmPjdrovLdp3jey5WxK7DLpu1r5Wk/p0dl/8zgrMgqFFkqmE9Pj85nlbuSczo17bV3fkE72RcG9ucJfquoKu9dpnU9n/e33mf51Yfb7yQFTdKJNKAZmINLkJv5vNoNFH8tytnza4rPfuncE/f/Z5I9SqZs0xINtfaWFptenrpm9k+jPVD+JamxvHJ9b6UrEfUn0Csqn/2AjA319M6PHI9eJefeQw9vIDH2DVbF9gsmFHYkNM1NSiVrIneTcqJKK8hexXV2Rx+6kT685fj4DMo7sB9aCngExEmtyiacGlsRnTy+rIWbfL7jmNL//q9AaXU5uDISD72+3Tq03vP6JXve923F/5w8X/df9Mzvjm8fH0H548kVOuOSa+XJ+ALDUt+HX/8/JzGlS36vp17Z9W3go3YX31A9a+9N2a/zEoD162rchnxfLg/b2zNbHvW/n3JsUqfy4L3lrBvNeWsWrSuoTKaeyWqfL39OD0DB6dXfXOVomGArIWYsfyHLKeb5z+JiIHas2n61ny3sp6bbM3Zy+Tfj83STVKTEMCsvz1+RQXFNedsYFKiir/6O/N2cuvL8wkRmqDy15UNIhJv5/Lc3+s3Jr1+3mVf8zbdk18cNK0VvuijMbu+L9sW+VLhJf+4jT+8PVJbPPqBz39xlNn1VnmKdccw8MzM+pVj1hZ8L3ZP5768LnNnHz10Qwc1Y+i/KI6y8nNa1hEVrKnhLy1+24Y8Bp7wVVPY4s1DQVkLcQ5J+xs8kEeRfbX/+y+HHfZwHpt853TZzLqhydXuuW/3N6cvY1VtVpVDMhK9pTwj/+ZmvBlwM79OnFBn+QPF7D/b+aDV06v0gm8IX7/4IF1+L5i3LHxuww//s2seHp5CxlAWUksocCkOtXFChXvAC33nZfPrbWcRW833vMi8+nM0LZLWfH5DmBfC2O5cR9mxOe/d1rNl9tzdwU/0Ztz0mvMk4j09q3ocmTnBpUB+1rqYqUxbho8mc//vLDBZco+Csia2M5VuXztyM8q/bfSFBYXH3VA28VKY032oyctz649qXy9/6e1Dgswf2N3API2VQ0I2nVvW2l5/YxNpFsxZnDz0ZPrbNl69fbPeOSKzDrrWbGcS/rO56rfjmTEt07g8f+YmND5MTn/5GrTd2+t54iewKY5W6q9hLV/YLKroN5FB+XEnD98veqdiKWxA2ul2UN7fn/LPIryi7jgzlPj6RVbyAAmPD5//02bVE2Dm9a3Nanc3MJj+OGDfQAoLKv5CQPPLKv5ku34pUEQ2Ta97kv7u7fuxiz4Ttelvu+pfADacjkrdvKX7HO4+ObD61WO1E4BWRLESmM1drD97Tfm8Le1Z/LEzQ0fMLEp3D58Mu26t01KJ9TSwlK6WB4vfqfhHbvl4HL334cC8MLKs3l5zVnc/7V5VfLMemlxjedRTX4wZk38rrpnl59D7pqq//iUFpZSVhz8wP3no2dyx1sZDGpVtfWtJh/v3BdUjH19FNedNId3fl61laO6vwMV017+wWd06NWen5yZCcCM5xYldGn00xeqv+Qb22/b39Xz8hoEwVjWC4urbU3avrstK3d0qneZAC98OoAtC7dXSktJrRwUrJhX/+AU4NV32tadqQlUd+xiYeCzsvRIti3eXmV9osrqCIafyhrOmmmbAPjF//Wqs171CcimP7OAO9/LqJT28f8tBWCnV717VFc3D5wCslrszdnLhqxN9e7/ceFhc2jVNrqnUsVKY/Xq81LdgI7lxs8NxtWp6TO48/RMLj3swG5bz1uXTx6dGfvH4w5oezl45VH58sn+fXPmvLKUYd84jnsumFKvx1rX9DOTuyaP1VPW8/IPPqNV2zRO7lQ5qFlRemSt5dZ2Pv1jw0i+dF/VTt5nd11Y5e/AFyv8bXjnn0GZ90/N4Plbp3D6jUP48811DzWxfyCzr451blqnf9y7gD251Z/rU/JPZlpB9cMi1GVl6ZGU7K0cnFZs5fOY8/2/H1jn8n/tSOyB471TNh9Q+YkGL49fVbVVscz3/cRuWpjY4LDVeWl17X3cdtOhxnWJBGTVvcfylt9p7+6Ip5Vfel23oubfRAVkB04BWS0+/M08+p7Wm0Xvrqoz7/alO3jrJ8FdThX/g25KM55bxL8fmkVqqxT+cG3ND+jd3zM3TSEl1diQtanGPDX9IP1mRgbvbzut3nWVQ9+S91ZyT0bmAXWI37gkuAtz1rJ9z9kr2l3Kysy1NW7z0nc/rXIpb/2srQAcP3APA87py9efOBOAhUXV321XkwN5D1OrCV4+Cf82vPyDz9hTvK+zffaSIFhZvKDuS1MV+15VFGuEgCxne4yy0uT8olYc4wwSf2xPY0m3+rW2ltsSO6zuTMBtb1QNKA/0cueB+PztIOBcUnwU66Zv5F/3zwzqUOG7Wz7vCfz0f+34qq3W5Wq767P8RgapPwVktajYgbEuXx6+kSvuP6NSX5hxIzPZurDmR01c3XcqvxuT2dBqxp1+4xC++OPgD/5f/tmt2jwzX1zM4ndWsGN5TvxW8Bfe6ghA9qdVR9S2sH0i0R+khW9lYxY8QkRattFfasu9EzMSfmzMvedl0tqKghbbCuM3lf/tv+77nTjqvCMozK1+3KpvPHUW722s3FfrpKuOBmBjrPcBvIN9Hr1uJmYc0Ej25SqeQ19/4kze3DRi37p6/IbV1EJ218vV91Orj5KS5P2glhTuF5A18SCja8r6JqXchW/VfDNArMJPbLKHTvlkwr75k0e25aKfDKuy35p+y6obt+3fm4+vklZcEo7wX+Gt7P++Nq5r+FA2LZUCslqU/+FL5I/lit1fAKjULP/AtAxuvaDmW/xf2zCSH72dUWu5f/nWFKaNr/+gjuX/me3N2VvpD9/w645jyJeO4rJTNnLpL06jYPMB9vytweRXgsEeX38x8Tuycrwb1x+171JNYW4hb99d/ZhKFd1+6kTSrZiNszaTuyaPu8/KrLXP0WNXTmTxOysSrpc0TJEHfbkKtu3luoFT+OnZmbXmvyczg2JaU1pYGv8j/16F1teV4WXFovwiln24inEjq5ZXSNX+RDUFhI9dWXVAzE+fmsfpHRZWuePvyczgrr1N8+vuBzT75SXV3hFa3m+tOnvD+wIS+VtTU0C2/2XgRFX8QS0tTV7gUFpcORg4VEZ9n/p6zVcWYhUCnaa8lFexb1fFIKymgGxZSdUHrFs1nQWKSoKQoWJr7P7flyGnJj70iVSmgKwW5X/4Kv7H+OH/ZvHBr4LWny0LttUZLBWVVv8R13Ry7tq4i10bd8WXb3rmbEbeUv9BHcv/EOSszK12/ZLd/YDKlxG2r9/LkvdWYka8ubumFrKSPSXV/kHNzan+hF81aR3zXltWY31fWLlvVPD/OXc6Y359BlOfrnrX1WNXTsQsuFv10dmjKCGdZ+9cwp0Xz+XXn2Xw9/+p3LnaDH4xKrhsdtsbozj9S3VffjALgr13fv75QTEgaHNV/t355TdX8eKqs/nfTzMS2q6suKzS+TFrb+U+hrEy5+xLOvDAtMTK63F09a3F+19iMoOrv38YM3Yfz3eGf84fr93XJ6j8stWW7F3U5LjWK8joModTv34sgy+o2ift0j7/3955h0dVpX/8+0IAQXpTNiJFKbIoCIgERVBWEGXFgottRRdxLatrXUHdn23FFVnE3hAEV5ClumsFdNJIoaYHCJBQAiShhBBKyuT7++PemUwnZSYzCe/nec4z955z7rnn3nPPve+85z3v8e5LzWZDV5WnzduQpT8oLw+chsx1yLKh0MjHV9RxyDLQ7xJv3xTH8/r6U+BKMVph48IMp35SbnVXUlTHGbDiGxXIfGDraI4P3A0vDcG4vxtGpIMHlFdLWGIFMSNulFv8jl92I2tNDrYs3orW4a3QOryVz3JO5J/Anvhcn3kqvCwd4qlONibOirD7iHJ1AJm0MhvfPFE5nbo4z/OMKJt/nYz9bZzq2HNkVwy4ozfuuCAeix+P8zj0k5dmDO/GZBnaxn+/767ZeGOVoalY+vdK+4aiIqCw2JhWXl5W2VaLHzfq+1r0KPuLqBitcDjriNOLae3Mzbip8wYn7dqcLSPx+9eHYsmT8R6v019YS624PTzBo/DZUDh+snoTXMb3zPDped9aasVxejdirg0HKoxnb37WCDy8yH2m4cgnvA8Lbi29CFHHBnpNX3NkcK3rB3jXkNUUJx9rdTlkWcc2ZIHCV3uUstLdRaBtq7wtFzVlQKX5yMQ+KdUSoFx9V5aZCobOXSrtH13Layiaz2CgApkPbB1t5v8V42h2odODdrrwNHId7FJcHf954uShymG8zRmVat1ev+uG3mO6Y9Ddfe1xCx+K9Wqb0PK8c9FteDi+f8X77MZyGh0mblGOx3Rbfftd7NkZ48b9v4GIIcAAxoforveH29NPHfW9/lxs0QB0G2744LkwrFIwW5Ybgbs/GI42XVu7/WPMTshDSVEJkk8bS7F8nDYCpwtPO/l5snna/vPXlR/LtzeMwtJ9EU5l7V63z27ADQA3X1jpZqRj7/Z4aWTlpIfrnx+EHwquwP2XuA+TrossQ06sf72IO5KXVoAV+4fh9kc8exCvz9g0ZMtyI86Q05kzTYqxllV4HE45W/C3hsxpyNLqn8kBnvA1ZFlSXDu3OoHS0lSl3Ae+uBo9m3iebFLkMIz8/BOB9efYPdzzPXScofl9/lAU5XrX8p6JUqshMlx0ueH+pH+zLHeBrIEI2sFABTIf2B6sFfuHoX3Ptk5p3pYI+XHONqf9nw5dgf2bD+J04Wm8e2+lALXSwaDXE5M/vxr9b6l0VFhSVAIRYGrfymGU8a9e4dXIOPV0bzQWK/7wjueP4UkatjYHK85DtId/9RVeZgd1anQI376QiDcmb/OY7krmdzux1xruMc11GnzE1P7o2rbyeohGCG9/0u78M3JO0hnPV1ZifFy6X+1swOs6E3Tl5gvdjvU0tfyD1JHoMcKzMXDWmhwse6Z2GrRGYaZNRgPsiuUIjOuX8tPlOIUWASk72FTFzqhxk8A9K2VlEjgNmcvSTo6apZG3tK1V2bNvcXc54Q+q6gcvu9z9feJKoGffHyyo2hJZtRFey02BzCbEN2lU7lZe03Ma3rusrtA754M1K6r2T0Kk0sbkT/PdvS6veHMb2rSTWi1jkrLSMEa3eW620aZra3xydzQObTvsdoyvNexK0czn+XaUdfcYf4gdccubV2JlRh+n+I/vivaoqu73e+8rBNw+It8tznWtuSM07H+mR0Ti2qcG+qwzAEz5ckSVbDXCxIp3b4tC17D9TvHdw6quDes7pivumB2Bf94QWeVjXLEJ/VUdYvYHr10Xif7nZAX8PJ6cRvqDX+c23IkZ3oadHAmkhuyygYLPPvNr8Xbe/qezcPP1/EpfVofYsVZlZ26v/XqdnqiOzVWw8eTx39O7sDbXNDrCMFVxFNpdBbJmLQLTFmcDKpD54FSJf158jy8beUYB6EwMvd/7OpSPLL4Gnfp2qFX51cVma2Pj0W98rxPnCU9+mrxRVQNuAHh2qPvsOVfCpAJPrhyJfVbnpT+qMjU+6t0kpC7fbhd4bXZzrCCspdZq/QO1z+StQ39FL1tGVdsPVyjx2WLfNpb1mYOHm2Dp0761rjXVkHn7o+JoEpCSRHyX791+rzb8dMhZS71o95kX9K4qNmNzf1PdlSJCDU8OvV+8LaPG5TV2mehWwUY68cmPqEDmAx0Jr59UZckYm51adTl15BRGPTnQ7t/KkeeGRiGsWWO3D+aSv8Zh+XPOH9myk2VY+FCs/WVWFQ3B5q8zYZm9xacDXwD49V9bvDpQdfx3PO+BGKcXdk1erDNvjMTYjnXrcy7Wy7qQDYFv9gzHH96J8LjMzuo3N2Hu5JgqGfXPGh/pFnd3D8+C3q7YSi3xB6k185YfbBY6zNL2J9ayuplBeDS7MCDltmrj/qx8vrX6f55t2Jbnsglkyaf7YOmLSU551FN/LSBZb8PgwYMZSB75bRSNx0vD2R7yMwqYu+kAP/uj52eCJMNQat+P/zyVAHnz+Qn2uK0/7CRAPnZppD3uXzdbnMq5u1sss9bm0FpmdXseHfPtjtvn9bl1rFN2zF6Wnii1p304KdKpnBljLKywVvCL+6MJkJv+nUGSnD3Bws2LMvn2TRYmL91GkqywVnDltAT2CNvNB/tEuV2/Lc/21dlu9dVQvbB3/X4ufy6exXnFbu2aOC/N63Ebv8rgtW03e033xB97xgT9eqsTHr8sklHvJfm8tpqWbet3uZsOEDD6QV1c0zODLZw2rG7OVZsQ0TKFJwpO8OcZG+1xYztscMqz+s2NXt9NCglgI+lZpvEYWV9CoAWyP1+iApmGmoX+zbbX6vgujQ5wT0Iu8zMKuOatTW6CG0Be0DjXvh05ZwufHeKeByAf7hdFklz/ZbrH9NYotG/3DMvxmOeDOyL5QK9or/W9t0cMSXLuZCOPpw+mhqqHa9psIUAObpHO3E0HSFamJcxNrXG5jswY4/l5CXS4rt2mKuVLWbaNa2du4mvXWTiweabbNbjmP5iaT5L8/XkJ1arPnFsjeVWrZPv+j69v4NqZRh07SX7QnwVbGNzCvf9+fFfdf6Me7BPFBVMrhfjRLu2pAplvVCCrIa5aAA0a6mt4fbQl4OeI/SiZfZoaWkBPHw8NDSfc3S02YGXf2iWey5+Ld3oX52cU2NNtuGp7Adqfv+qET++JYnOcqLN792j/SH50p+9vy4jWSfzq4VjOn1Ip+JDkT/+o1EZN7Wv80Qp0fU8U+L43rvfu5xkqkPnCl0AmRnr9ZMiQIdy4MXD2K38bGom3N4wKWPmKoii1YeaNkZjx40AUsm2dnXNQ80zcdlUeXlo7yu9lt5cjOFzheWUF2yxux09WbZyQPtArBvOzRiBxXjrGTgmvs3sYhjKUsQkKMg+hcz/PtqO2azy07TCuurQIq1ZU4JLxxoz19G93YOGMfXgrcRQA/zli3b46B4X7T9onkBXnnUBx/kmc179Tlc4zsk0Soo4NxKrpiRj1cF9II8Gx3GK0CW+J8hIrig6cwN6kw+g6sAO6DQ+HNBKUny5H4e5jKDtVjpadW6BofzE6XNwO0R+n49q/XoatP2Zj/f/ycH73c3DR0A5ofX4LdOzdHtvX7Eb3iC44vLMQXQZ0xpFdhehwcTskLc1Ci7ZN0eniNrCWVeDcjs1RYSVKi0txbqcW+PD+DRhwVUu0bN8Ugyb1QvP2zZGfXoDW4a1QnHcCHfsEfnKciGwiOcRjojdJLRgBwA0AtgHYAWDamfIHWkNmsyPQoEFDww3DWqYEvQ41DTb8Xe4Hd0SSJJc/F++knQKMIanSE6Uej7upcyL7Nt3hMe2N6y08kJzHd25x12wBZDOcYuScLV7fxzmxe3ny8EmnuJpc283nJ3Bg80xay6zc+sNOkuSehNwalVWTMLxVsr3+xXnFbunvT4ys1nfKH3VKmJtqL+/H1zdwwdSYap/n1i7xdXYPAxX++1Jite59TUB9GLIE0BjATgA9ATQFkAygn69jAi2QkYbdTcLcVD4xINJuyDisZQrHd07kmrc2cURrw1Zm6w87GTlnC18fbeGcWyP58kgLb/tNPJc8uY79mmVxYngc7+key+vbb+SotpuZHbOXr11n4YYF6Vz/ZTrfvS2Sk7qu40tXW9gKx+yG393D9nBPQi7nTo7mgeQ8fnF/ND+9J4qXnrONifPS+Mb1Fr4+2rCxePVaC6dHWJi0ZCtjP0rmlN7RfOzSSL41zsIju47y+SstfPsmC2eMsXDNW8a4f/LSbcxLy+ecWyO5K2oPlz4dx/duN849sHkm5z0QzfGdE52M2Zc8uY47fjFsjf7zVBzHdVrPldMSOD3CwmcGWziu03omLdnKdZ+kcOnTcfaybcNmM8ZYOHuChW9cb+ENHddzal+j7IubZPPeHjH8ZdZm/vDqegLkn3pFc/YEi91wdNZ4i5O9B2DYa93lMoTyj99ZPHY4R5uQK85NYyfJ50VhObS8s8VjHg21CxPOT+D4zokEyMYo499HWHho+2HutOxm/2bb+dGdUbSWWRn7UTK/fSGBRblFJMmCrYe4f8tBVlgrmDA3lU1QQqDSjGBK72hODI/j8ufimbrC3V4vwhSy0r/NYmsUctZ4Y5LCVw/Hcvlz8XZD9heGG5Mayk6V2es5onUSt6/O5pTe0Xz+Sgv/7xrjWUqcl8Z/3mDhQ32j+Mqoyucr/vNU3nnhOgKGfdSpo6fsNjY9w3J4MDWfC6bGMO5To05vjrVwgjnZY1yn9XxmsIW74/axjYMtn7cwMTyOA5tnsuR4idN7KubDZPv9nh5RWbfOks8pvd1t/7o23keAvK9nDJ8aZKG1zMrtq7O5duYmj+/B8pJynjx8kl8+GMMKa4U9vuR4CTO/38njB47z+SuNco4fOM4vH4zhrPEW7oraw7hPU9wEreK8Yq74Wzw3LEhn4rw0pzKrQ4W1ghu/ynAbqryu3Sau+yTFPsGENAz2PU2WsZHxP3dBsl+zLK6ansCOUsBtP+0iabwzv340lsf2HmPh7kLmxO5lXlo+nxls4SO/jeKHkyKZMDeVBVsPce/6/STJBVNj7O/S08dOu5075sNkXt06yWf9vLEnIZdHcwpJ0t7W3728nvGfp3Lzokwu+ss6Fu4u5Oujjffos0OM78GfL4nivg37q3zvd0Xt8flsZq31bH9an8Jjl0ZW+/5XF18CWcgMWYpIBIBXSI4196cDAMk3vR0T6CFLRVEURVEUf+FryDIwa5vUjHAAex329wG40jWTiDwE4CFzt1hEqraGT83pCMDdKZASbLRdQg9tk9BE2yX00DYJTeqiXbp5SwglgaxKkPwMQIAW93BHRDZ6k2aV4KHtEnpom4Qm2i6hh7ZJaBLsdgklT/25ALo67F9gximKoiiKojRoQkkg2wCgl4j0EJGmAO4E8N8g10lRFEVRFCXghMyQJclyEfkLgJ9hzLicRzI9yNUC6nB4VKkW2i6hh7ZJaKLtEnpom4QmQW2XkJllqSiKoiiKcrYSSkOWiqIoiqIoZyUqkCmKoiiKogQZFch8ICI3iMg2EdkhItOCXZ+GjIh0FRGLiGSISLqI/NWMby8ia0Qky/xtZ8aLiLxntk2KiAxyKGuymT9LRCYH65oaCiLSWES2iMh35n4PEUk07/0ScxIORKSZub/DTO/uUMZ0M36biIwN0qU0GESkrYgsE5GtIpIpIhHaV4KLiDxlvrvSRGSxiJyjfaXuEZF5IpIvImkOcX7rGyIyWERSzWPeE/HXaqKAR/f9GgjUYCknDbW6310ADDK3WwHYDqAfgJkw1zUFMA3AW+b2jQB+BCAAhgFINOPbA9hl/rYzt9sF+/rqcwDwNIBFAL4z9/8D4E5z+xMAj5jbjwL4xNy+E8ASc7uf2X+aAehh9qvGwb6u+hwALADwoLndFEBb7StBbY9wANkAmpv7/wFwv/aVoLTFNQAGAUhziPNb3wCw3swr5rHj/FV31ZB5ZyiAHSR3kSwF8A2ACUGuU4OF5AGSm83t4wAyYbzkJsD4+MD8vcXcngBgIQ0SALQVkS4AxgJYQ/IIyaMA1sBYtF6pASJyAYCbAMw19wXAdQCWmVlc28TWVssAjDbzTwDwDckSktkAdsDoX0oNEJE2MD46XwAAyVKShdC+EmzCADQXkTAALQAcgPaVOodkNIAjLtF+6RtmWmuSCTSks4UOZdUaFci842kpp/Ag1eWswlTfXw4gEcB5JA+YSQcBnGdue2sfbTf/MgfA3wBUmPsdABSSLDf3He+v/d6b6cfM/Nom/qUHgAIA882h5Lkici60rwQNkrkAZgHYA0MQOwZgE7SvhAr+6hvh5rZrvF9QgUwJKUSkJYDlAJ4kWeSYZv4jUT8tdYSIjAeQT3JTsOuiOBEGY0jmY5KXAzgBYxjGjvaVusW0SZoAQ1j+DYBzodrGkCSU+4YKZN7RpZzqGBFpAkMY+5rkCjM6z1QTw/zNN+O9tY+2m/+4CsDNIpIDY8j+OgDvwlDr25xKO95f+70309sAOAxtE3+zD8A+konm/jIYApr2leDxOwDZJAtIlgFYAaP/aF8JDfzVN3LNbdd4v6ACmXd0Kac6xLSf+AJAJsnZDkn/BWCb4TIZwLcO8feZs2SGAThmqqR/BjBGRNqZ/1rHmHFKNSE5neQFJLvDeP5/JXkPAAuAiWY21zaxtdVEMz/N+DvNmWU9APSCYRir1ACSBwHsFZE+ZtRoABnQvhJM9gAYJiItzHeZrU20r4QGfukbZlqRiAwz2/k+h7JqT7BnRIRygDEDYzuMmS4vBrs+DTkAuBqGGjkFQJIZboRhV/ELgCwAawG0N/MLgA/NtkkFMMShrD/BMIbdAeCBYF9bQwgARqFylmVPGB+JHQCWAmhmxp9j7u8w03s6HP+i2Vbb4MdZSWdrADAQwEazv6yCMRNM+0pw2+RVAFsBpAH4CsZMSe0rdd8Oi2HY8ZXB0CZP8WffADDEbOOdAD6AueKRP4IunaQoiqIoihJkdMhSURRFURQlyKhApiiKoiiKEmRUIFMURVEURQkyKpApiqIoiqIEGRXIFEVRFEVRgowKZIqi1GtExCoiSQ5h2hnyPywi9/nhvDki0rG25SiKogBQtxeKotRvRKSYZMsgnDcHht+iQ3V9bkVRGh6qIVMUpUFiarBmikiqiKwXkYvN+FdE5Flz+wkRyRCRFBH5xoxrLyKrzLgEEbnMjO8gIqtFJF1E5sJwKmk7173mOZJE5FMRaWyGL0UkzazDU0G4DYqi1BNUIFMUpb7T3GXIcpJD2jGSl8LwqD3Hw7HTAFxO8jIAD5txrwLYYsa9AGChGf8ygFiSvwWwEsCFACAilwCYBOAqkgMBWAHcA8ObfjjJ/mYd5vvrghVFaXiEnTmLoihKSHPKFIQ8sdjh9x0P6SkAvhaRVTCWIAKMZbxuBwCSv5qasdYArgFwmxn/vYgcNfOPBjAYwAZjeTs0h7F48f8A9BSR9wF8D2B1Da9PUZSzANWQKYrSkKGXbRs3wVjLbhAMgaomf1IFwAKSA83Qh+QrJI8CGAAgEob2bW4NylYU5SxBBTJFURoykxx+4x0TRKQRgK4kLQCeB9AGQEsAMTCGHCEiowAcIlkEIBrA3Wb8OBgLegPGosUTRaSzmdZeRLqZMzAbkVwO4CUYQp+iKIpHdMhSUZT6TnMRSXLY/4mkzfVFOxFJAVAC4C6X4xoD+LeItIGh5XqPZKGIvAJgnnncSQCTzfyvAlgsIukA4gDsAQCSGSLyEoDVppBXBuAxAKcAzDfjAGC6365YUZQGh7q9UBSlQaJuKRRFqU/okKWiKIqiKEqQUQ2ZoiiKoihKkFENmaIoiqIoSpBRgUxRFEVRFCXIqECmKIqiKIoSZFQgUxRFURRFCTIqkCmKoiiKogSZ/wcHb8QP685n3AAAAABJRU5ErkJggg==\n", 148 | "text/plain": [ 149 | "
" 150 | ] 151 | }, 152 | "metadata": { 153 | "needs_background": "light" 154 | }, 155 | "output_type": "display_data" 156 | } 157 | ], 158 | "source": [ 159 | "path, rewards, tot = all_rewards[-1]\n", 160 | "print(path)\n", 161 | "print(tot)\n", 162 | "n_runs, n_episodes = rewards.shape\n", 163 | "idxs = range(n_episodes)\n", 164 | "fig, ax = plt.subplots(n_runs, figsize=(10,12))\n", 165 | "for i, _ax in enumerate(ax):\n", 166 | " _ax.plot(idxs, rewards[i], c='red')\n", 167 | " _ax.plot(idxs, rewards[i], c='blue')\n", 168 | " _ax.set_ylim(0, 550)\n", 169 | " _ax.set_ylabel('Rewards');\n", 170 | " if i == n_runs - 1:\n", 171 | " _ax.set_xlabel('Episodes')" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 39, 177 | "metadata": {}, 178 | "outputs": [ 179 | { 180 | "data": { 181 | "text/plain": [ 182 | "-9.994999166249727" 183 | ] 184 | }, 185 | "execution_count": 39, 186 | "metadata": {}, 187 | "output_type": "execute_result" 188 | } 189 | ], 190 | "source": [ 191 | "min_e = 0.01\n", 192 | "max_e = 1.0\n", 193 | "decay = 0.999\n", 194 | "import numpy as np\n", 195 | "import math\n", 196 | "(min_e/max_e)/np.log(decay)" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 6, 202 | "metadata": {}, 203 | "outputs": [], 204 | "source": [ 205 | "x = (min_e/max_e)/math.log(decay, decay)" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 7, 211 | "metadata": {}, 212 | "outputs": [ 213 | { 214 | "name": "stdout", 215 | "output_type": "stream", 216 | "text": [ 217 | "0.01\n" 218 | ] 219 | } 220 | ], 221 | "source": [ 222 | "print(x)" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [] 231 | } 232 | ], 233 | "metadata": { 234 | "kernelspec": { 235 | "display_name": "Python 3", 236 | "language": "python", 237 | "name": "python3" 238 | }, 239 | "language_info": { 240 | "codemirror_mode": { 241 | "name": "ipython", 242 | "version": 3 243 | }, 244 | "file_extension": ".py", 245 | "mimetype": "text/x-python", 246 | "name": "python", 247 | "nbconvert_exporter": "python", 248 | "pygments_lexer": "ipython3", 249 | "version": "3.8.3" 250 | } 251 | }, 252 | "nbformat": 4, 253 | "nbformat_minor": 4 254 | } 255 | -------------------------------------------------------------------------------- /n_step_a2c.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import torch.optim as optim 6 | from torch.nn.functional import log_softmax, softmax, mse_loss, normalize 7 | from torch.distributions import Categorical 8 | from torch.nn.utils import clip_grad_value_ 9 | from collections import deque 10 | 11 | 12 | ALPHA = 0.0005 # learning rate for the actor 13 | BETA = 0.0005 # learning rate for the critic 14 | GAMMA = 0.99 # discount rate 15 | HIDDEN_SIZE = 256 # number of hidden nodes we have in our approximation 16 | PSI = 0.1 # the entropy bonus multiplier 17 | 18 | BATCH_SIZE = 25 # number of episodes in a batch 19 | NUM_EPOCHS = 5000 20 | NUM_STEPS = 7 # number of steps to bootstrap after 21 | 22 | RENDER_EVERY = 100 23 | 24 | 25 | # Q-table is replaced by a neural network 26 | class Actor(nn.Module): 27 | def __init__(self, observation_space_size: int, action_space_size: int, hidden_size: int): 28 | super(Actor, self).__init__() 29 | 30 | self.net = nn.Sequential( 31 | nn.Linear(in_features=observation_space_size, out_features=hidden_size, bias=True), 32 | nn.PReLU(), 33 | nn.Linear(in_features=hidden_size, out_features=hidden_size, bias=True), 34 | nn.PReLU(), 35 | nn.Linear(in_features=hidden_size, out_features=action_space_size, bias=True) 36 | ) 37 | 38 | def forward(self, x): 39 | x = normalize(x, dim=1) 40 | x = self.net(x) 41 | return x 42 | 43 | 44 | class Critic(nn.Module): 45 | def __init__(self, observation_space_size: int, hidden_size: int): 46 | super(Critic, self).__init__() 47 | 48 | self.net = nn.Sequential( 49 | nn.Linear(in_features=observation_space_size, out_features=hidden_size, bias=True), 50 | nn.PReLU(), 51 | nn.Linear(in_features=hidden_size, out_features=hidden_size, bias=True), 52 | nn.PReLU(), 53 | nn.Linear(in_features=hidden_size, out_features=1, bias=True) 54 | ) 55 | 56 | def forward(self, x): 57 | x = normalize(x, dim=1) 58 | x = self.net(x) 59 | return x 60 | 61 | 62 | def get_discounted_returns(rewards: torch.Tensor, gamma: float, state_values: torch.Tensor, n: int): 63 | """ 64 | Computes the array of discounted rewards [Gt:t+1] for the episode. See reference on p.143 S&B. 65 | Args: 66 | rewards: the sequence of the rewards obtained from running the episode 67 | gamma: the discounting factor 68 | state_values: teh values of the states calculated by the critic network 69 | n: the horizon of the bootstrapping 70 | Returns: 71 | discounted_rewards: the sequence of the discounted returns from time step t 72 | """ 73 | discounted_rewards = torch.empty_like(rewards) 74 | gamma_array = torch.full(size=(n+1,), fill_value=gamma) if n != 1 else None 75 | power_gamma_array = torch.pow(gamma_array, torch.arange(n+1).float()) if n != 1 else None 76 | 77 | # # turn the state values torch tensor into the numpy array 78 | # state_values = state_values.numpy() 79 | 80 | # define the end of sequence 81 | T = rewards.shape[0] 82 | 83 | # for every time step in the sequence 84 | for t in range(T): 85 | 86 | # special case of 1 step lookahead bootstrapping 87 | if n == 1: 88 | 89 | # check if we can discount 90 | if t < T - 1: 91 | Gt = rewards[t] + gamma * state_values[t+1] 92 | 93 | else: 94 | # the last reward 95 | Gt = rewards[T-1] 96 | 97 | # check if we can bootstrap 98 | elif t + n < T: 99 | # calculate the bootstrapped return 100 | Gt = torch.sum(power_gamma_array[:-1] * rewards[t:(t+n)]) + power_gamma_array[-1] * state_values[t+n] 101 | 102 | # if we can't bootstrap anymore 103 | else: 104 | 105 | # check if we can discount 106 | if t < T - 1: 107 | # compute the monte carlo return 108 | Gt = torch.sum(power_gamma_array[:rewards[t:T].shape[0]] * rewards[t:T]) 109 | 110 | else: 111 | # the last reward 112 | Gt = rewards[T-1] 113 | 114 | discounted_rewards[t] = Gt 115 | 116 | return discounted_rewards 117 | 118 | 119 | def get_entropy_bonus(logits: torch.Tensor) -> (torch.Tensor, torch.Tensor): 120 | """ 121 | Calculates the entropy bonus. 122 | Args: 123 | logits: the logits of the actor network 124 | Returns: 125 | entropy_bonus: entropy bonus 126 | mean_entropy: the mean entropy of the episode 127 | """ 128 | # calculate the probabilities 129 | p = softmax(logits, dim=1) 130 | 131 | # calculate the log probabilities 132 | log_p = log_softmax(logits, dim=1) 133 | 134 | # calculate the entropy 135 | entropy = -1 * torch.sum(p * log_p, dim=1) 136 | 137 | # calculate the mean entropy for the episode 138 | mean_entropy = torch.mean(entropy, dim=0) 139 | 140 | # calculate the entropy bonus 141 | entropy_bonus = -1 * PSI * mean_entropy 142 | 143 | return entropy_bonus, mean_entropy 144 | 145 | 146 | def play_episode(env: gym.Env, actor: nn.Module, critic: nn.Module, epoch: int, episode: int): 147 | """ 148 | Plays an episode of the environment. 149 | Args: 150 | env: the OpenAI environment 151 | actor: the policy network 152 | critic: the state value function 153 | epoch: current epoch 154 | episode: current episode 155 | Returns: 156 | state_values: the values of the states as calculated by the critic network 157 | action_log_probs: log-probabilities of the takes actions in the trajectory 158 | rewards: the sequence of the obtained rewards 159 | logits: the logits of every action taken - needed to compute entropy for entropy bonus 160 | episode_total_reward: sum of the rewards for the episode - needed for the average over 200 episode statistic 161 | """ 162 | # initialize the environment state 163 | current_state = env.reset() 164 | 165 | logits = torch.empty(size=(0, env.action_space.n), dtype=torch.float) 166 | action_log_probs = torch.empty(size=(0,), dtype=torch.float) 167 | state_values = torch.empty(size=(0,), dtype=torch.float) 168 | rewards = torch.empty(size=(0,), dtype=torch.float) 169 | 170 | # set the done flag to false 171 | done = False 172 | 173 | # init the total reward 174 | episode_total_reward = 0 175 | 176 | # accumulate data for 1 episode 177 | while not done: 178 | 179 | # render the episode 180 | if epoch % RENDER_EVERY == 0 and episode == 0: 181 | env.render() 182 | 183 | # get the action logits from the agent - (preferences) 184 | action_logits = actor(torch.tensor(current_state).float().unsqueeze(dim=0)).squeeze() 185 | 186 | # append the logits 187 | logits = torch.cat((logits, action_logits.unsqueeze(dim=0)), dim=0) 188 | 189 | # sample an action according to the action distribution 190 | action = Categorical(logits=action_logits).sample() 191 | 192 | # compute the log-probabilities of the actions 193 | log_probs = log_softmax(action_logits, dim=0) 194 | 195 | # get the log-probability of the chosen action 196 | action_log_probs = torch.cat((action_log_probs, log_probs[action.item()].unsqueeze(dim=0)), dim=0) 197 | 198 | # get the current state value 199 | current_state_value = critic(torch.tensor(current_state).float().unsqueeze(dim=0)) 200 | state_values = torch.cat((state_values, current_state_value), dim=0) 201 | 202 | # take the action 203 | new_state, reward, done, _ = env.step(action.item()) 204 | 205 | episode_total_reward += reward 206 | 207 | # save the reward 208 | rewards = torch.cat((rewards, torch.tensor(reward, dtype=torch.float).unsqueeze(dim=0)), dim=0) 209 | 210 | # if the episode is over 211 | if done: 212 | break 213 | 214 | # update the state 215 | current_state = new_state 216 | 217 | return state_values, action_log_probs, rewards, logits, episode_total_reward 218 | 219 | 220 | def main(): 221 | 222 | # create the environment 223 | env = gym.make('LunarLander-v2') 224 | 225 | # policy network 226 | actor = Actor(observation_space_size=env.observation_space.shape[0], 227 | action_space_size=env.action_space.n, 228 | hidden_size=HIDDEN_SIZE) 229 | 230 | # state-value network 231 | critic = Critic(observation_space_size=env.observation_space.shape[0], 232 | hidden_size=HIDDEN_SIZE) 233 | 234 | # define the optimizers for the policy and state-value networks 235 | adam_actor = optim.Adam(params=actor.parameters(), lr=ALPHA) 236 | adam_critic = optim.Adam(params=critic.parameters(), lr=BETA) 237 | 238 | total_rewards = deque([], maxlen=100) 239 | 240 | # run for N epochs 241 | for epoch in range(NUM_EPOCHS): 242 | 243 | # holder for the weighted log-probs 244 | epoch_weighted_log_probs = torch.empty(size=(0,), dtype=torch.float) 245 | 246 | # holder for the epoch logits 247 | epoch_logits = torch.empty(size=(0, env.action_space.n), dtype=torch.float) 248 | 249 | # holder for the epoch state values 250 | epoch_state_values = torch.empty(size=(0,), dtype=torch.float) 251 | 252 | # holder for the epoch discounted returns 253 | epoch_discounted_returns = torch.empty(size=(0,), dtype=torch.float) 254 | 255 | # collect the data from the episode 256 | for episode in range(BATCH_SIZE): 257 | 258 | # play an episode 259 | (state_values, 260 | action_log_probs, 261 | rewards, 262 | logits, 263 | episode_total_reward) = play_episode(env=env, actor=actor, critic=critic, epoch=epoch, episode=episode) 264 | 265 | # calculate the sequence of the discounted returns Gt 266 | discounted_returns = get_discounted_returns(rewards=rewards, 267 | gamma=GAMMA, 268 | state_values=state_values.detach().squeeze(), 269 | n=NUM_STEPS) 270 | 271 | # calculate the advantage for time t: Q(s,a) - V(s) 272 | advantages = discounted_returns - state_values.detach().squeeze() 273 | 274 | # append sum of logP * A 275 | epoch_weighted_log_probs = torch.cat((epoch_weighted_log_probs, 276 | torch.sum(action_log_probs * advantages).unsqueeze(dim=0)), dim=0) 277 | 278 | # append the logits for the entropy bonus 279 | epoch_logits = torch.cat((epoch_logits, logits), dim=0) 280 | 281 | # append the state values 282 | epoch_state_values = torch.cat((epoch_state_values, state_values), dim=0) 283 | 284 | # append the discounted returns 285 | epoch_discounted_returns = torch.cat((epoch_discounted_returns, discounted_returns), dim=0) 286 | 287 | # append the episodic total rewards 288 | total_rewards.append(episode_total_reward) 289 | 290 | # calculate the policy loss 291 | policy_loss = -1 * torch.mean(epoch_weighted_log_probs) 292 | 293 | # get the entropy bonus 294 | entropy_bonus, mean_entropy = get_entropy_bonus(logits=epoch_logits) 295 | 296 | # add the entropy bonus 297 | policy_loss += (PSI * entropy_bonus) 298 | 299 | # zero the gradient in both actor and the critic networks 300 | actor.zero_grad() 301 | critic.zero_grad() 302 | 303 | # calculate the policy gradient 304 | policy_loss.backward() 305 | 306 | # calculate the critic loss 307 | critic_loss = mse_loss(input=epoch_state_values.squeeze(), target=epoch_discounted_returns) 308 | 309 | # calculate the gradient of the critic loss 310 | critic_loss.backward() 311 | 312 | # clip the gradients in the policy gradients and the critic loss gradients 313 | clip_grad_value_(parameters=actor.parameters(), clip_value=0.1) 314 | clip_grad_value_(parameters=critic.parameters(), clip_value=0.1) 315 | 316 | # update the actor and critic parameters 317 | adam_actor.step() 318 | adam_critic.step() 319 | 320 | print("\r", f"Epoch: {epoch}, Avg Return per Epoch: {np.mean(total_rewards):.3f}", end="", flush=True) 321 | 322 | # check if solved 323 | if np.mean(total_rewards) > 200: 324 | print('\nSolved!') 325 | break 326 | 327 | # close the environment 328 | env.close() 329 | 330 | if __name__ == "__main__": 331 | main() -------------------------------------------------------------------------------- /q_learning.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | import torch.nn.functional as F 5 | 6 | import argparse 7 | import collections 8 | import random 9 | import matplotlib.pyplot as plt 10 | import numpy as np 11 | import gym 12 | import tqdm 13 | import os 14 | 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument('--env', default='CartPole-v1', type=str) 17 | parser.add_argument('--seed', default=1234, type=int) 18 | parser.add_argument('--n_layers', default=1, type=int) 19 | parser.add_argument('--grad_clip', default=0.5, type=float) 20 | parser.add_argument('--hid_dim', default=32, type=int) 21 | parser.add_argument('--init', default='xavier', type=str) 22 | parser.add_argument('--n_runs', default=5, type=int) 23 | parser.add_argument('--n_episodes', default=1000, type=int) 24 | parser.add_argument('--discount_factor', default=0.99, type=float) 25 | parser.add_argument('--start_epsilon', default=1.0, type=float) 26 | parser.add_argument('--end_epsilon', default=0.01, type=float) 27 | parser.add_argument('--exploration_time', default=0.5, type=float) 28 | parser.add_argument('--optim', default='adam', type=str) 29 | parser.add_argument('--lr', default=1e-3, type=float) 30 | args = parser.parse_args() 31 | 32 | name = '-'.join([f'{k}={v}' for k, v in vars(args).items()]) 33 | print(name) 34 | 35 | import os 36 | assert not os.path.exists('checkpoints/'+name+'_train.pt') 37 | 38 | train_env = gym.make(args.env) 39 | test_env = gym.make(args.env) 40 | 41 | train_env.seed(args.seed) 42 | test_env.seed(args.seed+1) 43 | np.random.seed(args.seed) 44 | random.seed(args.seed) 45 | torch.manual_seed(args.seed) 46 | 47 | if args.n_layers == 1: 48 | class MLP(nn.Module): 49 | def __init__(self, input_dim, hidden_dim, output_dim): 50 | super().__init__() 51 | 52 | self.fc_1 = nn.Linear(input_dim, hidden_dim) 53 | self.fc_2 = nn.Linear(hidden_dim, output_dim) 54 | 55 | def forward(self, x): 56 | x = self.fc_1(x) 57 | x = F.relu(x) 58 | x = self.fc_2(x) 59 | return x 60 | else: 61 | assert args.n_layers == 2 62 | class MLP(nn.Module): 63 | def __init__(self, input_dim, hidden_dim, output_dim): 64 | super().__init__() 65 | 66 | self.fc_1 = nn.Linear(input_dim, hidden_dim) 67 | self.fc_2 = nn.Linear(hidden_dim, hidden_dim) 68 | self.fc_3 = nn.Linear(hidden_dim, output_dim) 69 | 70 | def forward(self, x): 71 | x = self.fc_1(x) 72 | x = F.relu(x) 73 | x = self.fc_2(x) 74 | x = F.relu(x) 75 | x = self.fc_3(x) 76 | return x 77 | 78 | input_dim = train_env.observation_space.shape[0] 79 | hidden_dim = args.hid_dim 80 | output_dim = train_env.action_space.n 81 | 82 | if args.init == 'xavier': 83 | def init_weights(m): 84 | if type(m) == nn.Linear: 85 | torch.nn.init.xavier_normal_(m.weight) 86 | m.bias.data.fill_(0) 87 | else: 88 | assert args.init == 'kaiming' 89 | def init_weights(m): 90 | if type(m) == nn.Linear: 91 | torch.nn.init.kaiming_normal_(m.weight) 92 | m.bias.data.fill_(0) 93 | 94 | def train(env, policy, optimizer, discount_factor, epsilon, device): 95 | 96 | policy.train() 97 | 98 | states = [] 99 | actions = [] 100 | rewards = [] 101 | next_states = [] 102 | done = False 103 | episode_reward = 0 104 | 105 | state = env.reset() 106 | state = torch.FloatTensor(state).unsqueeze(0).to(device) 107 | 108 | while not done: 109 | 110 | if np.random.random() < epsilon: 111 | action = env.action_space.sample() 112 | else: 113 | q_pred = policy(state) 114 | action = torch.argmax(q_pred).item() 115 | 116 | next_state, reward, done, _ = env.step(action) 117 | 118 | next_state = torch.FloatTensor(next_state).unsqueeze(0).to(device) 119 | 120 | loss = update_policy(policy, state, action, reward, next_state, done, discount_factor, optimizer) 121 | 122 | state = next_state 123 | episode_reward += reward 124 | 125 | return loss, episode_reward 126 | 127 | def update_policy(policy, state, action, reward, next_state, done, discount_factor, optimizer): 128 | 129 | q_preds = policy(state) 130 | q_vals = q_preds[:, action] 131 | 132 | with torch.no_grad(): 133 | q_next_preds = policy(next_state) 134 | q_next_vals = q_next_preds.max(1).values 135 | targets = reward + q_next_vals * discount_factor * done 136 | 137 | loss = F.smooth_l1_loss(q_vals, targets.detach()) 138 | optimizer.zero_grad() 139 | loss.backward() 140 | nn.utils.clip_grad_norm_(policy.parameters(), args.grad_clip) 141 | optimizer.step() 142 | 143 | return loss.item() 144 | 145 | def evaluate(env, policy, device): 146 | 147 | policy.eval() 148 | 149 | done = False 150 | episode_reward = 0 151 | 152 | state = env.reset() 153 | 154 | while not done: 155 | 156 | state = torch.FloatTensor(state).unsqueeze(0).to(device) 157 | 158 | with torch.no_grad(): 159 | q_pred = policy(state) 160 | action = torch.argmax(q_pred).item() 161 | 162 | state, reward, done, _ = env.step(action) 163 | episode_reward += reward 164 | 165 | return episode_reward 166 | 167 | n_runs = args.n_runs 168 | n_episodes = args.n_episodes 169 | discount_factor = args.discount_factor 170 | start_epsilon = args.start_epsilon 171 | end_epsilon = args.end_epsilon 172 | exploration_time = int(args.n_episodes * args.exploration_time) 173 | 174 | epsilons = np.linspace(start_epsilon, end_epsilon, exploration_time) 175 | 176 | train_rewards = torch.zeros(n_runs, n_episodes) 177 | test_rewards = torch.zeros(n_runs, n_episodes) 178 | device = torch.device('cpu') 179 | 180 | for run in range(n_runs): 181 | 182 | policy = MLP(input_dim, hidden_dim, output_dim) 183 | policy = policy.to(device) 184 | policy.apply(init_weights) 185 | epsilon = start_epsilon 186 | 187 | if args.optim == 'adam': 188 | optimizer = optim.Adam(policy.parameters(), lr=args.lr) 189 | else: 190 | assert args.optim == 'rmsprop' 191 | optimizer = optim.RMSprop(policy.parameters(), lr=args.lr) 192 | 193 | for episode in tqdm.tqdm(range(n_episodes), desc=f'Run: {run}'): 194 | 195 | loss, train_reward = train(train_env, policy, optimizer, discount_factor, epsilon, device) 196 | 197 | if episode < exploration_time: 198 | epsilon = epsilons[episode] 199 | 200 | test_reward = evaluate(test_env, policy, device) 201 | 202 | train_rewards[run][episode] = train_reward 203 | test_rewards[run][episode] = test_reward 204 | 205 | torch.save(train_rewards, 'checkpoints/'+name+'_train.pt') 206 | torch.save(train_rewards, 'checkpoints/'+name+'_test.pt') -------------------------------------------------------------------------------- /runner.py: -------------------------------------------------------------------------------- 1 | import random 2 | import os 3 | import subprocess 4 | 5 | while True: 6 | args = {'env': 'CartPole-v1', 7 | 'seed': 1234, 8 | 'n_layers': random.choice([1,2]), 9 | 'grad_clip': random.choice([5, 1, 0.5, 0.1]), 10 | 'hid_dim': random.choice([32, 64, 128, 256]), 11 | 'init': random.choice(['xavier', 'kaiming']), 12 | 'n_runs': 5, 13 | 'n_episodes': random.choice([500, 1000, 2500, 5000, 10_000]), 14 | 'discount_factor': random.choice([0.9, 0.99, 0.999]), 15 | 'start_epsilon': 1.0, 16 | 'end_epsilon': random.choice([0.01, 0.05]), 17 | 'exploration_time': random.choice([0.8, 0.6, 0.4, 0.2]), 18 | 'optim': random.choice(['adam', 'rmsprop']), 19 | 'lr': random.choice([1e-2, 5e-3, 1e-3, 5e-4, 1e-4, 5e-5, 1e-5]), 20 | } 21 | 22 | name = '-'.join([f'{k}={v}' for k, v in args.items()]) 23 | 24 | if os.path.exists('checkpoints/'+name+'_train.pt'): 25 | continue 26 | 27 | cmd_args = ['python', 'q_learning.py'] 28 | for k, v in args.items(): 29 | cmd_args.append(f'--{k}') 30 | cmd_args.append(f'{v}') 31 | 32 | print(cmd_args) 33 | 34 | subprocess.run(cmd_args) --------------------------------------------------------------------------------