├── A2C_2M_model.zip ├── A2C_300k_model.zip ├── Main Course.ipynb ├── PPO_2m_Driving_model_2.7z ├── PPO_428k_Driving_model_2.7z ├── Project 1-Breakout.ipynb ├── Project 2 - Self Driving.ipynb └── Project 3 - Custom Environment.ipynb /A2C_2M_model.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicknochnack/ReinforcementLearningCourse/c1b974d82ecf131095d68fdbdce5352eee57f1cd/A2C_2M_model.zip -------------------------------------------------------------------------------- /A2C_300k_model.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicknochnack/ReinforcementLearningCourse/c1b974d82ecf131095d68fdbdce5352eee57f1cd/A2C_300k_model.zip -------------------------------------------------------------------------------- /Main Course.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# https://stable-baselines3.readthedocs.io/en/master/guide/rl.html\n", 10 | "# https://spinningup.openai.com/en/latest/spinningup/rl_intro2.html#a-taxonomy-of-rl-algorithms" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# 1. Import dependencies" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "!pip install stable-baselines3[extra]" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "import gym \n", 36 | "from stable_baselines3 import PPO\n", 37 | "from stable_baselines3.common.vec_env import DummyVecEnv\n", 38 | "from stable_baselines3.common.evaluation import evaluate_policy" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "# 2. Load Environment" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "environment_name = \"CartPole-v0\"" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "env = gym.make(environment_name)" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "episodes = 5\n", 73 | "for episode in range(1, episodes+1):\n", 74 | " state = env.reset()\n", 75 | " done = False\n", 76 | " score = 0 \n", 77 | " \n", 78 | " while not done:\n", 79 | " env.render()\n", 80 | " action = env.action_space.sample()\n", 81 | " n_state, reward, done, info = env.step(action)\n", 82 | " score+=reward\n", 83 | " print('Episode:{} Score:{}'.format(episode, score))\n", 84 | "env.close()" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "# Understanding The Environment\n", 92 | "https://github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "# 0-push cart to left, 1-push cart to the right\n", 102 | "env.action_space.sample()" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "# [cart position, cart velocity, pole angle, pole angular velocity]\n", 112 | "env.observation_space.sample()" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "# 3. Train an RL Model" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "env = gym.make(environment_name)\n", 129 | "env = DummyVecEnv([lambda: env])\n", 130 | "model = PPO('MlpPolicy', env, verbose = 1)" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "metadata": { 137 | "scrolled": true 138 | }, 139 | "outputs": [], 140 | "source": [ 141 | "model.learn(total_timesteps=20000)" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "# 4. Save and Reload Model" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "PPO_path = os.path.join('Training', 'Saved Models', 'PPO_model')" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [ 166 | "model.save(PPO_path)" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "del model" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "model = PPO.load('PPO_model', env=env)" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": {}, 190 | "source": [ 191 | "# 4. Evaluation" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "metadata": {}, 198 | "outputs": [], 199 | "source": [ 200 | "from stable_baselines3.common.evaluation import evaluate_policy" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "metadata": {}, 207 | "outputs": [], 208 | "source": [ 209 | "evaluate_policy(model, env, n_eval_episodes=10, render=True)" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": null, 215 | "metadata": {}, 216 | "outputs": [], 217 | "source": [ 218 | "env.close()" 219 | ] 220 | }, 221 | { 222 | "cell_type": "markdown", 223 | "metadata": {}, 224 | "source": [ 225 | "# 5. Test Model" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": null, 231 | "metadata": {}, 232 | "outputs": [], 233 | "source": [ 234 | "obs = env.reset()\n", 235 | "while True:\n", 236 | " action, _states = model.predict(obs)\n", 237 | " obs, rewards, done, info = env.step(action)\n", 238 | " env.render()\n", 239 | " if done: \n", 240 | " print('info', info)\n", 241 | " break" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": null, 247 | "metadata": {}, 248 | "outputs": [], 249 | "source": [ 250 | "env.close()" 251 | ] 252 | }, 253 | { 254 | "cell_type": "markdown", 255 | "metadata": {}, 256 | "source": [ 257 | "# 6. Viewing Logs in Tensorboard" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": null, 263 | "metadata": {}, 264 | "outputs": [], 265 | "source": [ 266 | "training_log_path = os.path.join(log_path, 'PPO_3')" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "metadata": {}, 273 | "outputs": [], 274 | "source": [ 275 | "!tensorboard --logdir={training_log_path}" 276 | ] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "metadata": {}, 281 | "source": [ 282 | "# 7. Adding a callback to the training Stage" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": null, 288 | "metadata": {}, 289 | "outputs": [], 290 | "source": [ 291 | "from stable_baselines3.common.callbacks import EvalCallback, StopTrainingOnRewardThreshold\n", 292 | "import os" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": null, 298 | "metadata": {}, 299 | "outputs": [], 300 | "source": [ 301 | "save_path = os.path.join('Training', 'Saved Models')\n", 302 | "log_path = os.path.join('Training', 'Logs')" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": null, 308 | "metadata": {}, 309 | "outputs": [], 310 | "source": [ 311 | "env = gym.make(environment_name)\n", 312 | "env = DummyVecEnv([lambda: env])" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": null, 318 | "metadata": {}, 319 | "outputs": [], 320 | "source": [ 321 | "stop_callback = StopTrainingOnRewardThreshold(reward_threshold=190, verbose=1)\n", 322 | "eval_callback = EvalCallback(env, \n", 323 | " callback_on_new_best=stop_callback, \n", 324 | " eval_freq=10000, \n", 325 | " best_model_save_path=save_path, \n", 326 | " verbose=1)" 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": null, 332 | "metadata": {}, 333 | "outputs": [], 334 | "source": [ 335 | "model = PPO('MlpPolicy', env, verbose = 1, tensorboard_log=log_path)" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": null, 341 | "metadata": { 342 | "scrolled": true 343 | }, 344 | "outputs": [], 345 | "source": [ 346 | "model.learn(total_timesteps=20000, callback=eval_callback)" 347 | ] 348 | }, 349 | { 350 | "cell_type": "code", 351 | "execution_count": null, 352 | "metadata": {}, 353 | "outputs": [], 354 | "source": [ 355 | "model_path = os.path.join('Training', 'Saved Models', 'best_model')\n", 356 | "model = PPO.load(model_path, env=env)" 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": null, 362 | "metadata": {}, 363 | "outputs": [], 364 | "source": [ 365 | "evaluate_policy(model, env, n_eval_episodes=10, render=True)" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": null, 371 | "metadata": {}, 372 | "outputs": [], 373 | "source": [ 374 | "env.close()" 375 | ] 376 | }, 377 | { 378 | "cell_type": "markdown", 379 | "metadata": {}, 380 | "source": [ 381 | "# 8. Changing Policies" 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": null, 387 | "metadata": {}, 388 | "outputs": [], 389 | "source": [ 390 | "net_arch=[dict(pi=[128, 128, 128, 128], vf=[128, 128, 128, 128])]" 391 | ] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "execution_count": null, 396 | "metadata": {}, 397 | "outputs": [], 398 | "source": [ 399 | "model = PPO('MlpPolicy', env, verbose = 1, policy_kwargs={'net_arch': net_arch})" 400 | ] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "execution_count": null, 405 | "metadata": { 406 | "scrolled": true 407 | }, 408 | "outputs": [], 409 | "source": [ 410 | "model.learn(total_timesteps=20000, callback=eval_callback)" 411 | ] 412 | }, 413 | { 414 | "cell_type": "markdown", 415 | "metadata": {}, 416 | "source": [ 417 | "# 9. Using an Alternate Algorithm" 418 | ] 419 | }, 420 | { 421 | "cell_type": "code", 422 | "execution_count": null, 423 | "metadata": {}, 424 | "outputs": [], 425 | "source": [ 426 | "from stable_baselines3 import DQN" 427 | ] 428 | }, 429 | { 430 | "cell_type": "code", 431 | "execution_count": null, 432 | "metadata": {}, 433 | "outputs": [], 434 | "source": [ 435 | "model = DQN('MlpPolicy', env, verbose = 1, tensorboard_log=log_path)" 436 | ] 437 | }, 438 | { 439 | "cell_type": "code", 440 | "execution_count": null, 441 | "metadata": { 442 | "scrolled": true 443 | }, 444 | "outputs": [], 445 | "source": [ 446 | "model.learn(total_timesteps=20000, callback=eval_callback)" 447 | ] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "execution_count": null, 452 | "metadata": {}, 453 | "outputs": [], 454 | "source": [ 455 | "dqn_path = os.path.join('Training', 'Saved Models', 'DQN_model')" 456 | ] 457 | }, 458 | { 459 | "cell_type": "code", 460 | "execution_count": null, 461 | "metadata": {}, 462 | "outputs": [], 463 | "source": [ 464 | "model.save(dqn_path)" 465 | ] 466 | }, 467 | { 468 | "cell_type": "code", 469 | "execution_count": null, 470 | "metadata": {}, 471 | "outputs": [], 472 | "source": [ 473 | "model = DQN.load(dqn_path, env=env)" 474 | ] 475 | }, 476 | { 477 | "cell_type": "code", 478 | "execution_count": null, 479 | "metadata": {}, 480 | "outputs": [], 481 | "source": [ 482 | "evaluate_policy(model, env, n_eval_episodes=10, render=True)" 483 | ] 484 | }, 485 | { 486 | "cell_type": "code", 487 | "execution_count": null, 488 | "metadata": {}, 489 | "outputs": [], 490 | "source": [ 491 | "env.close()" 492 | ] 493 | } 494 | ], 495 | "metadata": { 496 | "kernelspec": { 497 | "display_name": "rlcourse", 498 | "language": "python", 499 | "name": "rlcourse" 500 | }, 501 | "language_info": { 502 | "codemirror_mode": { 503 | "name": "ipython", 504 | "version": 3 505 | }, 506 | "file_extension": ".py", 507 | "mimetype": "text/x-python", 508 | "name": "python", 509 | "nbconvert_exporter": "python", 510 | "pygments_lexer": "ipython3", 511 | "version": "3.7.3" 512 | } 513 | }, 514 | "nbformat": 4, 515 | "nbformat_minor": 2 516 | } 517 | -------------------------------------------------------------------------------- /PPO_2m_Driving_model_2.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicknochnack/ReinforcementLearningCourse/c1b974d82ecf131095d68fdbdce5352eee57f1cd/PPO_2m_Driving_model_2.7z -------------------------------------------------------------------------------- /PPO_428k_Driving_model_2.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicknochnack/ReinforcementLearningCourse/c1b974d82ecf131095d68fdbdce5352eee57f1cd/PPO_428k_Driving_model_2.7z -------------------------------------------------------------------------------- /Project 1-Breakout.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 1. Import Dependencies" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import gym \n", 17 | "from stable_baselines3 import A2C\n", 18 | "from stable_baselines3.common.vec_env import VecFrameStack\n", 19 | "from stable_baselines3.common.evaluation import evaluate_policy\n", 20 | "from stable_baselines3.common.env_util import make_atari_env\n", 21 | "import os" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "# 2. Test Environment" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "environment_name = \"Breakout-v0\"" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "env = gym.make(environment_name)" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "episodes = 5\n", 56 | "for episode in range(1, episodes+1):\n", 57 | " state = env.reset()\n", 58 | " done = False\n", 59 | " score = 0 \n", 60 | " \n", 61 | " while not done:\n", 62 | " env.render()\n", 63 | " action = env.action_space.sample()\n", 64 | " n_state, reward, done, info = env.step(action)\n", 65 | " score+=reward\n", 66 | " print('Episode:{} Score:{}'.format(episode, score))\n", 67 | "env.close()" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "env.action_space.sample()" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": { 83 | "scrolled": true 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "env.observation_space.sample()" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "# 3. Vectorise Environment and Train Model" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "env = make_atari_env('Breakout-v0', n_envs=4, seed=0)" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "env = VecFrameStack(env, n_stack=4)" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [ 121 | "log_path = os.path.join('Training', 'Logs')" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "model = A2C(\"CnnPolicy\", env, verbose=1, tensorboard_log=log_path)" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "metadata": { 137 | "scrolled": true 138 | }, 139 | "outputs": [], 140 | "source": [ 141 | "model.learn(total_timesteps=400000)" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "# 4. Save and Reload Model" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "a2c_path = os.path.join('Training', 'Saved Models', 'A2C_model')" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [ 166 | "model.save(a2c_path)" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "del model" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "env = make_atari_env('Breakout-v0', n_envs=1, seed=0)\n", 185 | "env = VecFrameStack(env, n_stack=4)" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "model = A2C.load(a2c_path, env)" 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "metadata": {}, 200 | "source": [ 201 | "# 5. Evaluate and Test" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "metadata": {}, 208 | "outputs": [], 209 | "source": [ 210 | "evaluate_policy(model, env, n_eval_episodes=10, render=True)" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": {}, 217 | "outputs": [], 218 | "source": [ 219 | "obs = env.reset()\n", 220 | "while True:\n", 221 | " action, _states = model.predict(obs)\n", 222 | " obs, rewards, dones, info = env.step(action)\n", 223 | " env.render()" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": null, 229 | "metadata": {}, 230 | "outputs": [], 231 | "source": [ 232 | "env.close()" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": null, 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [] 241 | } 242 | ], 243 | "metadata": { 244 | "kernelspec": { 245 | "display_name": "rlcourse", 246 | "language": "python", 247 | "name": "rlcourse" 248 | }, 249 | "language_info": { 250 | "codemirror_mode": { 251 | "name": "ipython", 252 | "version": 3 253 | }, 254 | "file_extension": ".py", 255 | "mimetype": "text/x-python", 256 | "name": "python", 257 | "nbconvert_exporter": "python", 258 | "pygments_lexer": "ipython3", 259 | "version": "3.7.3" 260 | } 261 | }, 262 | "nbformat": 4, 263 | "nbformat_minor": 2 264 | } 265 | -------------------------------------------------------------------------------- /Project 2 - Self Driving.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 1. Import Dependencies" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "#Install SWIG https://sourceforge.net/projects/swig/files/swigwin/swigwin-4.0.2/swigwin-4.0.2.zip/download?use_mirror=ixpeering" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "!pip install gym[box2d] pyglet==1.3.2" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "import gym \n", 35 | "from stable_baselines3 import PPO\n", 36 | "from stable_baselines3.common.vec_env import VecFrameStack\n", 37 | "from stable_baselines3.common.evaluation import evaluate_policy\n", 38 | "import os" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "# 2. Test Environment" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "environment_name = \"CarRacing-v0\"" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "env = gym.make(environment_name)" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "episodes = 5\n", 73 | "for episode in range(1, episodes+1):\n", 74 | " state = env.reset()\n", 75 | " done = False\n", 76 | " score = 0 \n", 77 | " \n", 78 | " while not done:\n", 79 | " env.render()\n", 80 | " action = env.action_space.sample()\n", 81 | " n_state, reward, done, info = env.step(action)\n", 82 | " score+=reward\n", 83 | " print('Episode:{} Score:{}'.format(episode, score))\n", 84 | "env.close()" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "env.close()" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "env.action_space.sample()" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": { 109 | "scrolled": true 110 | }, 111 | "outputs": [], 112 | "source": [ 113 | "env.observation_space.sample()" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "# 3. Train Model" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "log_path = os.path.join('Training', 'Logs')" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "model = PPO(\"CnnPolicy\", env, verbose=1, tensorboard_log=log_path)" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": { 145 | "scrolled": true 146 | }, 147 | "outputs": [], 148 | "source": [ 149 | "model.learn(total_timesteps=40000)" 150 | ] 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "metadata": {}, 155 | "source": [ 156 | "# 4. Save Model " 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": {}, 163 | "outputs": [], 164 | "source": [ 165 | "ppo_path = os.path.join('Training', 'Saved Models', 'PPO_Driving_model')" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "model.save(ppo_path)" 175 | ] 176 | }, 177 | { 178 | "cell_type": "markdown", 179 | "metadata": {}, 180 | "source": [ 181 | "# 5. Evaluate and Test" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": null, 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [ 190 | "evaluate_policy(model, env, n_eval_episodes=10, render=True)" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [ 199 | "env.close()" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "obs = env.reset()\n", 209 | "while True:\n", 210 | " action, _states = model.predict(obs)\n", 211 | " obs, rewards, dones, info = env.step(action)\n", 212 | " env.render()" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": null, 218 | "metadata": {}, 219 | "outputs": [], 220 | "source": [ 221 | "env.close()" 222 | ] 223 | } 224 | ], 225 | "metadata": { 226 | "kernelspec": { 227 | "display_name": "rlcourse", 228 | "language": "python", 229 | "name": "rlcourse" 230 | }, 231 | "language_info": { 232 | "codemirror_mode": { 233 | "name": "ipython", 234 | "version": 3 235 | }, 236 | "file_extension": ".py", 237 | "mimetype": "text/x-python", 238 | "name": "python", 239 | "nbconvert_exporter": "python", 240 | "pygments_lexer": "ipython3", 241 | "version": "3.7.3" 242 | } 243 | }, 244 | "nbformat": 4, 245 | "nbformat_minor": 2 246 | } 247 | -------------------------------------------------------------------------------- /Project 3 - Custom Environment.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#https://sourceforge.net/projects/swig/files/swigwin/swigwin-4.0.2/swigwin-4.0.2.zip/download?use_mirror=ixpeering" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "# 1. Import Dependencies" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "import gym \n", 26 | "from gym import Env\n", 27 | "from gym.spaces import Discrete, Box, Dict, Tuple, MultiBinary, MultiDiscrete \n", 28 | "import numpy as np\n", 29 | "import random\n", 30 | "import os\n", 31 | "from stable_baselines3 import PPO\n", 32 | "from stable_baselines3.common.vec_env import VecFrameStack\n", 33 | "from stable_baselines3.common.evaluation import evaluate_policy" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "# 2. Types of Spaces" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "Discrete(3)" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "Box(0,1,shape=(3,3)).sample()" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "Box(0,255,shape=(3,3), dtype=int).sample()" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "Tuple((Discrete(2), Box(0,100, shape=(1,)))).sample()" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "Dict({'height':Discrete(2), \"speed\":Box(0,100, shape=(1,))}).sample()" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "MultiBinary(4).sample()" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "MultiDiscrete([5,2,2]).sample()" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "# 3. Building an Environment" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "class ShowerEnv(Env):\n", 120 | " def __init__(self):\n", 121 | " # Actions we can take, down, stay, up\n", 122 | " self.action_space = Discrete(3)\n", 123 | " # Temperature array\n", 124 | " self.observation_space = Box(low=np.array([0]), high=np.array([100]))\n", 125 | " # Set start temp\n", 126 | " self.state = 38 + random.randint(-3,3)\n", 127 | " # Set shower length\n", 128 | " self.shower_length = 60\n", 129 | " \n", 130 | " def step(self, action):\n", 131 | " # Apply action\n", 132 | " # 0 -1 = -1 temperature\n", 133 | " # 1 -1 = 0 \n", 134 | " # 2 -1 = 1 temperature \n", 135 | " self.state += action -1 \n", 136 | " # Reduce shower length by 1 second\n", 137 | " self.shower_length -= 1 \n", 138 | " \n", 139 | " # Calculate reward\n", 140 | " if self.state >=37 and self.state <=39: \n", 141 | " reward =1 \n", 142 | " else: \n", 143 | " reward = -1 \n", 144 | " \n", 145 | " # Check if shower is done\n", 146 | " if self.shower_length <= 0: \n", 147 | " done = True\n", 148 | " else:\n", 149 | " done = False\n", 150 | " \n", 151 | " # Apply temperature noise\n", 152 | " #self.state += random.randint(-1,1)\n", 153 | " # Set placeholder for info\n", 154 | " info = {}\n", 155 | " \n", 156 | " # Return step information\n", 157 | " return self.state, reward, done, info\n", 158 | "\n", 159 | " def render(self):\n", 160 | " # Implement viz\n", 161 | " pass\n", 162 | " \n", 163 | " def reset(self):\n", 164 | " # Reset shower temperature\n", 165 | " self.state = np.array([38 + random.randint(-3,3)]).astype(float)\n", 166 | " # Reset shower time\n", 167 | " self.shower_length = 60 \n", 168 | " return self.state" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": null, 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [ 177 | "env=ShowerEnv()" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": {}, 184 | "outputs": [], 185 | "source": [ 186 | "env.observation_space.sample()" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": {}, 193 | "outputs": [], 194 | "source": [ 195 | "env.reset()" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": null, 201 | "metadata": {}, 202 | "outputs": [], 203 | "source": [ 204 | "from stable_baselines3.common.env_checker import check_env" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": null, 210 | "metadata": {}, 211 | "outputs": [], 212 | "source": [ 213 | "check_env(env, warn=True)" 214 | ] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "metadata": {}, 219 | "source": [ 220 | "# 4. Test Environment" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": null, 226 | "metadata": {}, 227 | "outputs": [], 228 | "source": [ 229 | "episodes = 5\n", 230 | "for episode in range(1, episodes+1):\n", 231 | " state = env.reset()\n", 232 | " done = False\n", 233 | " score = 0 \n", 234 | " \n", 235 | " while not done:\n", 236 | " env.render()\n", 237 | " action = env.action_space.sample()\n", 238 | " n_state, reward, done, info = env.step(action)\n", 239 | " score+=reward\n", 240 | " print('Episode:{} Score:{}'.format(episode, score))\n", 241 | "env.close()" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": null, 247 | "metadata": {}, 248 | "outputs": [], 249 | "source": [ 250 | "env.close()" 251 | ] 252 | }, 253 | { 254 | "cell_type": "markdown", 255 | "metadata": {}, 256 | "source": [ 257 | "# 5. Train Model" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": null, 263 | "metadata": {}, 264 | "outputs": [], 265 | "source": [ 266 | "log_path = os.path.join('Training', 'Logs')" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "metadata": {}, 273 | "outputs": [], 274 | "source": [ 275 | "model = PPO(\"MlpPolicy\", env, verbose=1, tensorboard_log=log_path)" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": null, 281 | "metadata": { 282 | "scrolled": true 283 | }, 284 | "outputs": [], 285 | "source": [ 286 | "model.learn(total_timesteps=400000)" 287 | ] 288 | }, 289 | { 290 | "cell_type": "markdown", 291 | "metadata": {}, 292 | "source": [ 293 | "# 6. Save Model" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": null, 299 | "metadata": {}, 300 | "outputs": [], 301 | "source": [ 302 | "model.save('PPO')" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": null, 308 | "metadata": {}, 309 | "outputs": [], 310 | "source": [ 311 | "evaluate_policy(model, env, n_eval_episodes=10, render=True)" 312 | ] 313 | } 314 | ], 315 | "metadata": { 316 | "kernelspec": { 317 | "display_name": "rlcourse", 318 | "language": "python", 319 | "name": "rlcourse" 320 | }, 321 | "language_info": { 322 | "codemirror_mode": { 323 | "name": "ipython", 324 | "version": 3 325 | }, 326 | "file_extension": ".py", 327 | "mimetype": "text/x-python", 328 | "name": "python", 329 | "nbconvert_exporter": "python", 330 | "pygments_lexer": "ipython3", 331 | "version": "3.7.3" 332 | } 333 | }, 334 | "nbformat": 4, 335 | "nbformat_minor": 2 336 | } 337 | --------------------------------------------------------------------------------