└── OpenAI Custom Environment Reinforcement Learning.ipynb /OpenAI Custom Environment Reinforcement Learning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 0. Install Dependencies" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [ 17 | { 18 | "name": "stdout", 19 | "output_type": "stream", 20 | "text": [ 21 | "Requirement already satisfied: tensorflow==2.3.0 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (2.3.0)\n", 22 | "Requirement already satisfied: opt-einsum>=2.3.2 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow==2.3.0) (3.1.0)\n", 23 | "Requirement already satisfied: numpy<1.19.0,>=1.16.0 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow==2.3.0) (1.17.2)\n", 24 | "Requirement already satisfied: tensorboard<3,>=2.3.0 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow==2.3.0) (2.3.0)\n", 25 | "Requirement already satisfied: absl-py>=0.7.0 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow==2.3.0) (0.9.0)\n", 26 | "Requirement already satisfied: protobuf>=3.9.2 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow==2.3.0) (3.12.2)\n", 27 | "Requirement already satisfied: tensorflow-estimator<2.4.0,>=2.3.0 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow==2.3.0) (2.3.0)\n", 28 | "Requirement already satisfied: termcolor>=1.1.0 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow==2.3.0) (1.1.0)\n", 29 | "Requirement already satisfied: wrapt>=1.11.1 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow==2.3.0) (1.11.2)\n", 30 | "Requirement already satisfied: wheel>=0.26 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow==2.3.0) (0.33.6)\n", 31 | "Requirement already satisfied: grpcio>=1.8.6 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow==2.3.0) (1.32.0)\n", 32 | "Requirement already satisfied: google-pasta>=0.1.8 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow==2.3.0) (0.2.0)\n", 33 | "Requirement already satisfied: h5py<2.11.0,>=2.10.0 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow==2.3.0) (2.10.0)\n", 34 | "Requirement already satisfied: scipy==1.4.1 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow==2.3.0) (1.4.1)\n", 35 | "Requirement already satisfied: keras-preprocessing<1.2,>=1.1.1 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow==2.3.0) (1.1.2)\n", 36 | "Requirement already satisfied: gast==0.3.3 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow==2.3.0) (0.3.3)\n", 37 | "Requirement already satisfied: six>=1.12.0 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow==2.3.0) (1.15.0)\n", 38 | "Requirement already satisfied: astunparse==1.6.3 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow==2.3.0) (1.6.3)\n", 39 | "Requirement already satisfied: requests<3,>=2.21.0 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorboard<3,>=2.3.0->tensorflow==2.3.0) (2.24.0)\n", 40 | "Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorboard<3,>=2.3.0->tensorflow==2.3.0) (0.4.1)\n", 41 | "Requirement already satisfied: markdown>=2.6.8 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorboard<3,>=2.3.0->tensorflow==2.3.0) (3.1.1)\n", 42 | "Requirement already satisfied: setuptools>=41.0.0 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorboard<3,>=2.3.0->tensorflow==2.3.0) (41.4.0)\n", 43 | "Requirement already satisfied: werkzeug>=0.11.15 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorboard<3,>=2.3.0->tensorflow==2.3.0) (0.16.0)\n", 44 | "Requirement already satisfied: google-auth<2,>=1.6.3 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorboard<3,>=2.3.0->tensorflow==2.3.0) (1.21.2)\n", 45 | "Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorboard<3,>=2.3.0->tensorflow==2.3.0) (1.6.0.post3)\n", 46 | "Requirement already satisfied: certifi>=2017.4.17 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from requests<3,>=2.21.0->tensorboard<3,>=2.3.0->tensorflow==2.3.0) (2019.9.11)\n", 47 | "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from requests<3,>=2.21.0->tensorboard<3,>=2.3.0->tensorflow==2.3.0) (1.24.2)\n", 48 | "Requirement already satisfied: chardet<4,>=3.0.2 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from requests<3,>=2.21.0->tensorboard<3,>=2.3.0->tensorflow==2.3.0) (3.0.4)\n", 49 | "Requirement already satisfied: idna<3,>=2.5 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from requests<3,>=2.21.0->tensorboard<3,>=2.3.0->tensorflow==2.3.0) (2.8)\n", 50 | "Requirement already satisfied: requests-oauthlib>=0.7.0 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard<3,>=2.3.0->tensorflow==2.3.0) (1.3.0)\n", 51 | "Requirement already satisfied: pyasn1-modules>=0.2.1 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow==2.3.0) (0.2.8)\n", 52 | "Requirement already satisfied: rsa<5,>=3.1.4; python_version >= \"3.5\" in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow==2.3.0) (4.6)\n", 53 | "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow==2.3.0) (4.1.0)\n", 54 | "Requirement already satisfied: oauthlib>=3.0.0 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard<3,>=2.3.0->tensorflow==2.3.0) (3.1.0)\n", 55 | "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from pyasn1-modules>=0.2.1->google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow==2.3.0) (0.4.8)\n", 56 | "Requirement already satisfied: gym in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (0.17.2)\n", 57 | "Requirement already satisfied: pyglet<=1.5.0,>=1.4.0 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from gym) (1.5.0)\n", 58 | "Requirement already satisfied: cloudpickle<1.4.0,>=1.2.0 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from gym) (1.2.2)\n", 59 | "Requirement already satisfied: scipy in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from gym) (1.4.1)\n", 60 | "Requirement already satisfied: numpy>=1.10.4 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from gym) (1.17.2)\n", 61 | "Requirement already satisfied: future in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from pyglet<=1.5.0,>=1.4.0->gym) (0.18.2)\n", 62 | "Requirement already satisfied: keras in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (2.4.3)\n", 63 | "Requirement already satisfied: numpy>=1.9.1 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from keras) (1.17.2)\n", 64 | "Requirement already satisfied: h5py in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from keras) (2.10.0)\n", 65 | "Requirement already satisfied: scipy>=0.14 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from keras) (1.4.1)\n", 66 | "Requirement already satisfied: pyyaml in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from keras) (5.1.2)\n", 67 | "Requirement already satisfied: six in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from h5py->keras) (1.15.0)\n", 68 | "Requirement already satisfied: keras-rl2 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (1.0.4)\n", 69 | "Requirement already satisfied: tensorflow>=2.1.0 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from keras-rl2) (2.3.0)\n", 70 | "Requirement already satisfied: tensorboard<3,>=2.3.0 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow>=2.1.0->keras-rl2) (2.3.0)\n", 71 | "Requirement already satisfied: numpy<1.19.0,>=1.16.0 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow>=2.1.0->keras-rl2) (1.17.2)\n", 72 | "Requirement already satisfied: gast==0.3.3 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow>=2.1.0->keras-rl2) (0.3.3)\n", 73 | "Requirement already satisfied: tensorflow-estimator<2.4.0,>=2.3.0 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow>=2.1.0->keras-rl2) (2.3.0)\n", 74 | "Requirement already satisfied: protobuf>=3.9.2 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow>=2.1.0->keras-rl2) (3.12.2)\n", 75 | "Requirement already satisfied: scipy==1.4.1 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow>=2.1.0->keras-rl2) (1.4.1)\n", 76 | "Requirement already satisfied: astunparse==1.6.3 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow>=2.1.0->keras-rl2) (1.6.3)\n" 77 | ] 78 | }, 79 | { 80 | "name": "stdout", 81 | "output_type": "stream", 82 | "text": [ 83 | "Requirement already satisfied: keras-preprocessing<1.2,>=1.1.1 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow>=2.1.0->keras-rl2) (1.1.2)\n", 84 | "Requirement already satisfied: termcolor>=1.1.0 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow>=2.1.0->keras-rl2) (1.1.0)\n", 85 | "Requirement already satisfied: google-pasta>=0.1.8 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow>=2.1.0->keras-rl2) (0.2.0)\n", 86 | "Requirement already satisfied: opt-einsum>=2.3.2 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow>=2.1.0->keras-rl2) (3.1.0)\n", 87 | "Requirement already satisfied: wheel>=0.26 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow>=2.1.0->keras-rl2) (0.33.6)\n", 88 | "Requirement already satisfied: absl-py>=0.7.0 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow>=2.1.0->keras-rl2) (0.9.0)\n", 89 | "Requirement already satisfied: six>=1.12.0 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow>=2.1.0->keras-rl2) (1.15.0)\n", 90 | "Requirement already satisfied: h5py<2.11.0,>=2.10.0 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow>=2.1.0->keras-rl2) (2.10.0)\n", 91 | "Requirement already satisfied: grpcio>=1.8.6 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow>=2.1.0->keras-rl2) (1.32.0)\n", 92 | "Requirement already satisfied: wrapt>=1.11.1 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorflow>=2.1.0->keras-rl2) (1.11.2)\n", 93 | "Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorboard<3,>=2.3.0->tensorflow>=2.1.0->keras-rl2) (0.4.1)\n", 94 | "Requirement already satisfied: markdown>=2.6.8 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorboard<3,>=2.3.0->tensorflow>=2.1.0->keras-rl2) (3.1.1)\n", 95 | "Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorboard<3,>=2.3.0->tensorflow>=2.1.0->keras-rl2) (1.6.0.post3)\n", 96 | "Requirement already satisfied: setuptools>=41.0.0 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorboard<3,>=2.3.0->tensorflow>=2.1.0->keras-rl2) (41.4.0)\n", 97 | "Requirement already satisfied: google-auth<2,>=1.6.3 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorboard<3,>=2.3.0->tensorflow>=2.1.0->keras-rl2) (1.21.2)\n", 98 | "Requirement already satisfied: requests<3,>=2.21.0 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorboard<3,>=2.3.0->tensorflow>=2.1.0->keras-rl2) (2.24.0)\n", 99 | "Requirement already satisfied: werkzeug>=0.11.15 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from tensorboard<3,>=2.3.0->tensorflow>=2.1.0->keras-rl2) (0.16.0)\n", 100 | "Requirement already satisfied: requests-oauthlib>=0.7.0 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard<3,>=2.3.0->tensorflow>=2.1.0->keras-rl2) (1.3.0)\n", 101 | "Requirement already satisfied: rsa<5,>=3.1.4; python_version >= \"3.5\" in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow>=2.1.0->keras-rl2) (4.6)\n", 102 | "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow>=2.1.0->keras-rl2) (4.1.0)\n", 103 | "Requirement already satisfied: pyasn1-modules>=0.2.1 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow>=2.1.0->keras-rl2) (0.2.8)\n", 104 | "Requirement already satisfied: chardet<4,>=3.0.2 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from requests<3,>=2.21.0->tensorboard<3,>=2.3.0->tensorflow>=2.1.0->keras-rl2) (3.0.4)\n", 105 | "Requirement already satisfied: certifi>=2017.4.17 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from requests<3,>=2.21.0->tensorboard<3,>=2.3.0->tensorflow>=2.1.0->keras-rl2) (2019.9.11)\n", 106 | "Requirement already satisfied: idna<3,>=2.5 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from requests<3,>=2.21.0->tensorboard<3,>=2.3.0->tensorflow>=2.1.0->keras-rl2) (2.8)\n", 107 | "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from requests<3,>=2.21.0->tensorboard<3,>=2.3.0->tensorflow>=2.1.0->keras-rl2) (1.24.2)\n", 108 | "Requirement already satisfied: oauthlib>=3.0.0 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard<3,>=2.3.0->tensorflow>=2.1.0->keras-rl2) (3.1.0)\n", 109 | "Requirement already satisfied: pyasn1>=0.1.3 in /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages (from rsa<5,>=3.1.4; python_version >= \"3.5\"->google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow>=2.1.0->keras-rl2) (0.4.8)\n" 110 | ] 111 | } 112 | ], 113 | "source": [ 114 | "!pip install tensorflow==2.3.0\n", 115 | "!pip install gym\n", 116 | "!pip install keras\n", 117 | "!pip install keras-rl2" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "# 1. Test Random Environment with OpenAI Gym" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 2, 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "from gym import Env\n", 134 | "from gym.spaces import Discrete, Box\n", 135 | "import numpy as np\n", 136 | "import random" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 56, 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "class ShowerEnv(Env):\n", 146 | " def __init__(self):\n", 147 | " # Actions we can take, down, stay, up\n", 148 | " self.action_space = Discrete(3)\n", 149 | " # Temperature array\n", 150 | " self.observation_space = Box(low=np.array([0]), high=np.array([100]))\n", 151 | " # Set start temp\n", 152 | " self.state = 38 + random.randint(-3,3)\n", 153 | " # Set shower length\n", 154 | " self.shower_length = 60\n", 155 | " \n", 156 | " def step(self, action):\n", 157 | " # Apply action\n", 158 | " # 0 -1 = -1 temperature\n", 159 | " # 1 -1 = 0 \n", 160 | " # 2 -1 = 1 temperature \n", 161 | " self.state += action -1 \n", 162 | " # Reduce shower length by 1 second\n", 163 | " self.shower_length -= 1 \n", 164 | " \n", 165 | " # Calculate reward\n", 166 | " if self.state >=37 and self.state <=39: \n", 167 | " reward =1 \n", 168 | " else: \n", 169 | " reward = -1 \n", 170 | " \n", 171 | " # Check if shower is done\n", 172 | " if self.shower_length <= 0: \n", 173 | " done = True\n", 174 | " else:\n", 175 | " done = False\n", 176 | " \n", 177 | " # Apply temperature noise\n", 178 | " #self.state += random.randint(-1,1)\n", 179 | " # Set placeholder for info\n", 180 | " info = {}\n", 181 | " \n", 182 | " # Return step information\n", 183 | " return self.state, reward, done, info\n", 184 | "\n", 185 | " def render(self):\n", 186 | " # Implement viz\n", 187 | " pass\n", 188 | " \n", 189 | " def reset(self):\n", 190 | " # Reset shower temperature\n", 191 | " self.state = 38 + random.randint(-3,3)\n", 192 | " # Reset shower time\n", 193 | " self.shower_length = 60 \n", 194 | " return self.state\n", 195 | " " 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": 57, 201 | "metadata": {}, 202 | "outputs": [ 203 | { 204 | "name": "stderr", 205 | "output_type": "stream", 206 | "text": [ 207 | "/Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages/gym/logger.py:30: UserWarning: \u001b[33mWARN: Box bound precision lowered by casting to float32\u001b[0m\n", 208 | " warnings.warn(colorize('%s: %s'%('WARN', msg % args), 'yellow'))\n" 209 | ] 210 | } 211 | ], 212 | "source": [ 213 | "env = ShowerEnv()" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": 58, 219 | "metadata": {}, 220 | "outputs": [ 221 | { 222 | "data": { 223 | "text/plain": [ 224 | "array([27.3778], dtype=float32)" 225 | ] 226 | }, 227 | "execution_count": 58, 228 | "metadata": {}, 229 | "output_type": "execute_result" 230 | } 231 | ], 232 | "source": [ 233 | "env.observation_space.sample()" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 59, 239 | "metadata": {}, 240 | "outputs": [ 241 | { 242 | "name": "stdout", 243 | "output_type": "stream", 244 | "text": [ 245 | "Episode:1 Score:8\n", 246 | "Episode:2 Score:-10\n", 247 | "Episode:3 Score:-54\n", 248 | "Episode:4 Score:-38\n", 249 | "Episode:5 Score:-32\n", 250 | "Episode:6 Score:-52\n", 251 | "Episode:7 Score:-50\n", 252 | "Episode:8 Score:-30\n", 253 | "Episode:9 Score:-38\n", 254 | "Episode:10 Score:10\n" 255 | ] 256 | } 257 | ], 258 | "source": [ 259 | "episodes = 10\n", 260 | "for episode in range(1, episodes+1):\n", 261 | " state = env.reset()\n", 262 | " done = False\n", 263 | " score = 0 \n", 264 | " \n", 265 | " while not done:\n", 266 | " #env.render()\n", 267 | " action = env.action_space.sample()\n", 268 | " n_state, reward, done, info = env.step(action)\n", 269 | " score+=reward\n", 270 | " print('Episode:{} Score:{}'.format(episode, score))" 271 | ] 272 | }, 273 | { 274 | "cell_type": "markdown", 275 | "metadata": {}, 276 | "source": [ 277 | "# 2. Create a Deep Learning Model with Keras" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": 60, 283 | "metadata": {}, 284 | "outputs": [], 285 | "source": [ 286 | "import numpy as np\n", 287 | "from tensorflow.keras.models import Sequential\n", 288 | "from tensorflow.keras.layers import Dense, Flatten\n", 289 | "from tensorflow.keras.optimizers import Adam" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 61, 295 | "metadata": {}, 296 | "outputs": [], 297 | "source": [ 298 | "states = env.observation_space.shape\n", 299 | "actions = env.action_space.n" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": 62, 305 | "metadata": {}, 306 | "outputs": [ 307 | { 308 | "data": { 309 | "text/plain": [ 310 | "3" 311 | ] 312 | }, 313 | "execution_count": 62, 314 | "metadata": {}, 315 | "output_type": "execute_result" 316 | } 317 | ], 318 | "source": [ 319 | "actions" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": 63, 325 | "metadata": {}, 326 | "outputs": [], 327 | "source": [ 328 | "def build_model(states, actions):\n", 329 | " model = Sequential() \n", 330 | " model.add(Dense(24, activation='relu', input_shape=states))\n", 331 | " model.add(Dense(24, activation='relu'))\n", 332 | " model.add(Dense(actions, activation='linear'))\n", 333 | " return model" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": 64, 339 | "metadata": {}, 340 | "outputs": [], 341 | "source": [ 342 | "del model " 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": 65, 348 | "metadata": {}, 349 | "outputs": [], 350 | "source": [ 351 | "model = build_model(states, actions)" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": 66, 357 | "metadata": {}, 358 | "outputs": [ 359 | { 360 | "name": "stdout", 361 | "output_type": "stream", 362 | "text": [ 363 | "Model: \"sequential_3\"\n", 364 | "_________________________________________________________________\n", 365 | "Layer (type) Output Shape Param # \n", 366 | "=================================================================\n", 367 | "dense_9 (Dense) (None, 24) 48 \n", 368 | "_________________________________________________________________\n", 369 | "dense_10 (Dense) (None, 24) 600 \n", 370 | "_________________________________________________________________\n", 371 | "dense_11 (Dense) (None, 3) 75 \n", 372 | "=================================================================\n", 373 | "Total params: 723\n", 374 | "Trainable params: 723\n", 375 | "Non-trainable params: 0\n", 376 | "_________________________________________________________________\n" 377 | ] 378 | } 379 | ], 380 | "source": [ 381 | "model.summary()" 382 | ] 383 | }, 384 | { 385 | "cell_type": "markdown", 386 | "metadata": {}, 387 | "source": [ 388 | "# 3. Build Agent with Keras-RL" 389 | ] 390 | }, 391 | { 392 | "cell_type": "code", 393 | "execution_count": 67, 394 | "metadata": {}, 395 | "outputs": [], 396 | "source": [ 397 | "from rl.agents import DQNAgent\n", 398 | "from rl.policy import BoltzmannQPolicy\n", 399 | "from rl.memory import SequentialMemory" 400 | ] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "execution_count": 68, 405 | "metadata": {}, 406 | "outputs": [], 407 | "source": [ 408 | "def build_agent(model, actions):\n", 409 | " policy = BoltzmannQPolicy()\n", 410 | " memory = SequentialMemory(limit=50000, window_length=1)\n", 411 | " dqn = DQNAgent(model=model, memory=memory, policy=policy, \n", 412 | " nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)\n", 413 | " return dqn" 414 | ] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "execution_count": null, 419 | "metadata": {}, 420 | "outputs": [ 421 | { 422 | "name": "stdout", 423 | "output_type": "stream", 424 | "text": [ 425 | "Training for 50000 steps ...\n", 426 | "Interval 1 (0 steps performed)\n", 427 | "10000/10000 [==============================] - 50s 5ms/step - reward: -0.6330\n", 428 | "166 episodes - episode_reward: -38.000 [-60.000, 32.000] - loss: 1.235 - mae: 6.439 - mean_q: -8.204\n", 429 | "\n", 430 | "Interval 2 (10000 steps performed)\n", 431 | "10000/10000 [==============================] - 50s 5ms/step - reward: -0.5052\n", 432 | "167 episodes - episode_reward: -30.263 [-60.000, 36.000] - loss: 2.347 - mae: 11.012 - mean_q: -15.812\n", 433 | "\n", 434 | "Interval 3 (20000 steps performed)\n", 435 | "10000/10000 [==============================] - 49s 5ms/step - reward: -0.4650\n", 436 | "167 episodes - episode_reward: -27.964 [-60.000, 36.000] - loss: 2.621 - mae: 11.725 - mean_q: -16.873\n", 437 | "\n", 438 | "Interval 4 (30000 steps performed)\n", 439 | "10000/10000 [==============================] - 49s 5ms/step - reward: -0.4816\n", 440 | "166 episodes - episode_reward: -28.916 [-60.000, 42.000] - loss: 2.326 - mae: 10.960 - mean_q: -15.735\n", 441 | "\n", 442 | "Interval 5 (40000 steps performed)\n", 443 | " 6449/10000 [==================>...........] - ETA: 17s - reward: -0.4052" 444 | ] 445 | } 446 | ], 447 | "source": [ 448 | "dqn = build_agent(model, actions)\n", 449 | "dqn.compile(Adam(lr=1e-3), metrics=['mae'])\n", 450 | "dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)" 451 | ] 452 | }, 453 | { 454 | "cell_type": "code", 455 | "execution_count": 54, 456 | "metadata": {}, 457 | "outputs": [ 458 | { 459 | "name": "stdout", 460 | "output_type": "stream", 461 | "text": [ 462 | "Testing for 100 episodes ...\n", 463 | "Episode 1: reward: -56.000, steps: 60\n", 464 | "Episode 2: reward: -60.000, steps: 60\n", 465 | "Episode 3: reward: -50.000, steps: 60\n", 466 | "Episode 4: reward: -60.000, steps: 60\n", 467 | "Episode 5: reward: -56.000, steps: 60\n", 468 | "Episode 6: reward: -52.000, steps: 60\n", 469 | "Episode 7: reward: -60.000, steps: 60\n", 470 | "Episode 8: reward: -50.000, steps: 60\n", 471 | "Episode 9: reward: -52.000, steps: 60\n", 472 | "Episode 10: reward: -56.000, steps: 60\n", 473 | "Episode 11: reward: -60.000, steps: 60\n", 474 | "Episode 12: reward: -60.000, steps: 60\n", 475 | "Episode 13: reward: -52.000, steps: 60\n", 476 | "Episode 14: reward: -52.000, steps: 60\n", 477 | "Episode 15: reward: -58.000, steps: 60\n", 478 | "Episode 16: reward: -50.000, steps: 60\n", 479 | "Episode 17: reward: -54.000, steps: 60\n", 480 | "Episode 18: reward: -58.000, steps: 60\n", 481 | "Episode 19: reward: -60.000, steps: 60\n", 482 | "Episode 20: reward: -56.000, steps: 60\n", 483 | "Episode 21: reward: -56.000, steps: 60\n", 484 | "Episode 22: reward: -52.000, steps: 60\n", 485 | "Episode 23: reward: -60.000, steps: 60\n", 486 | "Episode 24: reward: -56.000, steps: 60\n", 487 | "Episode 25: reward: -58.000, steps: 60\n", 488 | "Episode 26: reward: -58.000, steps: 60\n", 489 | "Episode 27: reward: -58.000, steps: 60\n", 490 | "Episode 28: reward: -60.000, steps: 60\n", 491 | "Episode 29: reward: -58.000, steps: 60\n", 492 | "Episode 30: reward: -60.000, steps: 60\n", 493 | "Episode 31: reward: -56.000, steps: 60\n", 494 | "Episode 32: reward: -56.000, steps: 60\n", 495 | "Episode 33: reward: -60.000, steps: 60\n", 496 | "Episode 34: reward: -60.000, steps: 60\n", 497 | "Episode 35: reward: -54.000, steps: 60\n", 498 | "Episode 36: reward: -52.000, steps: 60\n", 499 | "Episode 37: reward: -52.000, steps: 60\n", 500 | "Episode 38: reward: -56.000, steps: 60\n", 501 | "Episode 39: reward: -60.000, steps: 60\n", 502 | "Episode 40: reward: -56.000, steps: 60\n", 503 | "Episode 41: reward: -54.000, steps: 60\n", 504 | "Episode 42: reward: -60.000, steps: 60\n", 505 | "Episode 43: reward: -54.000, steps: 60\n", 506 | "Episode 44: reward: -60.000, steps: 60\n", 507 | "Episode 45: reward: -60.000, steps: 60\n", 508 | "Episode 46: reward: -60.000, steps: 60\n", 509 | "Episode 47: reward: -60.000, steps: 60\n", 510 | "Episode 48: reward: -60.000, steps: 60\n", 511 | "Episode 49: reward: -60.000, steps: 60\n", 512 | "Episode 50: reward: -54.000, steps: 60\n", 513 | "Episode 51: reward: -56.000, steps: 60\n", 514 | "Episode 52: reward: -60.000, steps: 60\n", 515 | "Episode 53: reward: -58.000, steps: 60\n", 516 | "Episode 54: reward: -60.000, steps: 60\n", 517 | "Episode 55: reward: -56.000, steps: 60\n", 518 | "Episode 56: reward: -60.000, steps: 60\n", 519 | "Episode 57: reward: -58.000, steps: 60\n", 520 | "Episode 58: reward: -58.000, steps: 60\n", 521 | "Episode 59: reward: -58.000, steps: 60\n", 522 | "Episode 60: reward: -60.000, steps: 60\n", 523 | "Episode 61: reward: -60.000, steps: 60\n", 524 | "Episode 62: reward: -60.000, steps: 60\n", 525 | "Episode 63: reward: -54.000, steps: 60\n", 526 | "Episode 64: reward: -60.000, steps: 60\n", 527 | "Episode 65: reward: -60.000, steps: 60\n", 528 | "Episode 66: reward: -58.000, steps: 60\n", 529 | "Episode 67: reward: -60.000, steps: 60\n", 530 | "Episode 68: reward: -58.000, steps: 60\n", 531 | "Episode 69: reward: -60.000, steps: 60\n", 532 | "Episode 70: reward: -56.000, steps: 60\n", 533 | "Episode 71: reward: -58.000, steps: 60\n", 534 | "Episode 72: reward: -54.000, steps: 60\n", 535 | "Episode 73: reward: -54.000, steps: 60\n", 536 | "Episode 74: reward: -52.000, steps: 60\n", 537 | "Episode 75: reward: -58.000, steps: 60\n", 538 | "Episode 76: reward: -60.000, steps: 60\n", 539 | "Episode 77: reward: -58.000, steps: 60\n", 540 | "Episode 78: reward: -60.000, steps: 60\n", 541 | "Episode 79: reward: -54.000, steps: 60\n", 542 | "Episode 80: reward: -60.000, steps: 60\n", 543 | "Episode 81: reward: -54.000, steps: 60\n", 544 | "Episode 82: reward: -60.000, steps: 60\n", 545 | "Episode 83: reward: -54.000, steps: 60\n", 546 | "Episode 84: reward: -60.000, steps: 60\n", 547 | "Episode 85: reward: -58.000, steps: 60\n", 548 | "Episode 86: reward: -54.000, steps: 60\n", 549 | "Episode 87: reward: -60.000, steps: 60\n", 550 | "Episode 88: reward: -60.000, steps: 60\n", 551 | "Episode 89: reward: -56.000, steps: 60\n", 552 | "Episode 90: reward: -52.000, steps: 60\n", 553 | "Episode 91: reward: -58.000, steps: 60\n", 554 | "Episode 92: reward: -60.000, steps: 60\n", 555 | "Episode 93: reward: -60.000, steps: 60\n", 556 | "Episode 94: reward: -56.000, steps: 60\n", 557 | "Episode 95: reward: -54.000, steps: 60\n", 558 | "Episode 96: reward: -54.000, steps: 60\n", 559 | "Episode 97: reward: -60.000, steps: 60\n", 560 | "Episode 98: reward: -60.000, steps: 60\n", 561 | "Episode 99: reward: -60.000, steps: 60\n", 562 | "Episode 100: reward: -58.000, steps: 60\n", 563 | "-57.18\n" 564 | ] 565 | } 566 | ], 567 | "source": [ 568 | "scores = dqn.test(env, nb_episodes=100, visualize=False)\n", 569 | "print(np.mean(scores.history['episode_reward']))" 570 | ] 571 | }, 572 | { 573 | "cell_type": "code", 574 | "execution_count": 29, 575 | "metadata": {}, 576 | "outputs": [ 577 | { 578 | "name": "stdout", 579 | "output_type": "stream", 580 | "text": [ 581 | "Testing for 15 episodes ...\n", 582 | "Episode 1: reward: 200.000, steps: 200\n", 583 | "Episode 2: reward: 200.000, steps: 200\n", 584 | "Episode 3: reward: 200.000, steps: 200\n", 585 | "Episode 4: reward: 200.000, steps: 200\n", 586 | "Episode 5: reward: 200.000, steps: 200\n", 587 | "Episode 6: reward: 200.000, steps: 200\n", 588 | "Episode 7: reward: 200.000, steps: 200\n", 589 | "Episode 8: reward: 200.000, steps: 200\n", 590 | "Episode 9: reward: 200.000, steps: 200\n", 591 | "Episode 10: reward: 200.000, steps: 200\n", 592 | "Episode 11: reward: 200.000, steps: 200\n", 593 | "Episode 12: reward: 200.000, steps: 200\n", 594 | "Episode 13: reward: 200.000, steps: 200\n", 595 | "Episode 14: reward: 200.000, steps: 200\n", 596 | "Episode 15: reward: 200.000, steps: 200\n" 597 | ] 598 | } 599 | ], 600 | "source": [ 601 | "_ = dqn.test(env, nb_episodes=15, visualize=True)" 602 | ] 603 | }, 604 | { 605 | "cell_type": "markdown", 606 | "metadata": {}, 607 | "source": [ 608 | "# 4. Reloading Agent from Memory" 609 | ] 610 | }, 611 | { 612 | "cell_type": "code", 613 | "execution_count": 30, 614 | "metadata": {}, 615 | "outputs": [], 616 | "source": [ 617 | "dqn.save_weights('dqn_weights.h5f', overwrite=True)" 618 | ] 619 | }, 620 | { 621 | "cell_type": "code", 622 | "execution_count": 31, 623 | "metadata": {}, 624 | "outputs": [], 625 | "source": [ 626 | "del model\n", 627 | "del dqn\n", 628 | "del env" 629 | ] 630 | }, 631 | { 632 | "cell_type": "code", 633 | "execution_count": 9, 634 | "metadata": {}, 635 | "outputs": [], 636 | "source": [ 637 | "env = gym.make('CartPole-v0')\n", 638 | "actions = env.action_space.n\n", 639 | "states = env.observation_space.shape[0]\n", 640 | "model = build_model(states, actions)\n", 641 | "dqn = build_agent(model, actions)\n", 642 | "dqn.compile(Adam(lr=1e-3), metrics=['mae'])" 643 | ] 644 | }, 645 | { 646 | "cell_type": "code", 647 | "execution_count": 10, 648 | "metadata": {}, 649 | "outputs": [], 650 | "source": [ 651 | "dqn.load_weights('dqn_weights.h5f')" 652 | ] 653 | }, 654 | { 655 | "cell_type": "code", 656 | "execution_count": 11, 657 | "metadata": {}, 658 | "outputs": [ 659 | { 660 | "name": "stdout", 661 | "output_type": "stream", 662 | "text": [ 663 | "Testing for 5 episodes ...\n", 664 | "WARNING:tensorflow:From /Users/nicholasrenotte/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training_v1.py:2070: Model.state_updates (from tensorflow.python.keras.engine.training) is deprecated and will be removed in a future version.\n", 665 | "Instructions for updating:\n", 666 | "This property should not be used in TensorFlow 2.0, as updates are applied automatically.\n", 667 | "Episode 1: reward: 200.000, steps: 200\n", 668 | "Episode 2: reward: 200.000, steps: 200\n", 669 | "Episode 3: reward: 200.000, steps: 200\n", 670 | "Episode 4: reward: 200.000, steps: 200\n", 671 | "Episode 5: reward: 200.000, steps: 200\n" 672 | ] 673 | } 674 | ], 675 | "source": [ 676 | "_ = dqn.test(env, nb_episodes=5, visualize=True)" 677 | ] 678 | }, 679 | { 680 | "cell_type": "code", 681 | "execution_count": null, 682 | "metadata": {}, 683 | "outputs": [], 684 | "source": [] 685 | } 686 | ], 687 | "metadata": { 688 | "kernelspec": { 689 | "display_name": "Python 3", 690 | "language": "python", 691 | "name": "python3" 692 | }, 693 | "language_info": { 694 | "codemirror_mode": { 695 | "name": "ipython", 696 | "version": 3 697 | }, 698 | "file_extension": ".py", 699 | "mimetype": "text/x-python", 700 | "name": "python", 701 | "nbconvert_exporter": "python", 702 | "pygments_lexer": "ipython3", 703 | "version": "3.7.4" 704 | } 705 | }, 706 | "nbformat": 4, 707 | "nbformat_minor": 2 708 | } 709 | --------------------------------------------------------------------------------