├── Game-TestHuman.py
├── LICENSE
├── README.md
├── Reinforcement Learning (RL) Using Python.ipynb
├── Train_GridSearch.py
└── images
    ├── EnvExp.jpg
    ├── gifs
        ├── EnvPlayed.gif
        └── envExp.gif
    └── wall.jpg


/Game-TestHuman.py:
--------------------------------------------------------------------------------
  1 | from random import randint
  2 | from collections import deque
  3 | from time import sleep
  4 | import pygame
  5 | 
  6 | pygame.init()
  7 | 
  8 | class Field:
  9 |     def __init__(self, height=10, width=5):
 10 |         self.width = width
 11 |         self.height = height
 12 |         self.body = [[0] * width for _ in range(height)]
 13 | 
 14 |     def update_field(self, walls, player):
 15 |         self.body = [[0] * self.width for _ in range(self.height)]
 16 | 
 17 |         for wall in walls:
 18 |             if not wall.out_of_range:
 19 |                 for i in range(wall.y, min(wall.y + wall.height, self.height)):
 20 |                     self.body[i][:] = wall.body[i - wall.y][:]
 21 | 
 22 |         for i in range(player.y, min(player.y + player.height, self.height)):
 23 |             for j in range(player.x, min(player.x + player.width, self.width)):
 24 |                 self.body[i][j] = player.body[i - player.y][j - player.x]
 25 | 
 26 | 
 27 | class Wall:
 28 |     def __init__(self, height=5, width=100, hole_width=20, y=0, speed=1):
 29 |         self.height = height
 30 |         self.width = width
 31 |         self.hole_width = hole_width
 32 |         self.y = y
 33 |         self.speed = speed
 34 |         self.body_unit = 1
 35 |         self.body = [[self.body_unit] * width for _ in range(height)]
 36 |         self.out_of_range = False
 37 |         self.create_hole()
 38 | 
 39 |     def create_hole(self):
 40 |         hole_pos = randint(0, self.width - self.hole_width)
 41 |         for i in range(self.hole_width):
 42 |             self.body[self.height // 2][hole_pos + i] = 0
 43 | 
 44 |     def move(self):
 45 |         self.y += self.speed
 46 |         self.out_of_range = self.y + self.height > field.height
 47 | 
 48 | 
 49 | class Player:
 50 |     def __init__(self, height=5, max_width=10, width=2, x=0, y=0, speed=2):
 51 |         self.height = height
 52 |         self.max_width = max_width
 53 |         self.width = width
 54 |         self.x = x
 55 |         self.y = y
 56 |         self.speed = speed
 57 |         self.body_unit = 2
 58 |         self.body = [[self.body_unit] * width for _ in range(height)]
 59 | 
 60 |     def move(self, direction=0):
 61 |         if direction == 1 and self.x > 0:
 62 |             self.x -= self.speed
 63 |         elif direction == 2 and self.x + self.width < field.width:
 64 |             self.x += self.speed
 65 | 
 66 | 
 67 | class Environment:
 68 |     def __init__(self):
 69 |         self.BLACK = (25, 25, 25)
 70 |         self.WHITE = (255, 255, 255)
 71 |         self.RED = (255, 80, 80)
 72 |         self.field = self.walls = self.player = None
 73 |         self.current_state = self.reset()
 74 | 
 75 |     def reset(self):
 76 |         self.field = Field()
 77 |         self.walls = deque([Wall()])
 78 |         self.player = Player(x=field.width // 2 - 1, y=field.height - 5)
 79 |         return self.field.body
 80 | 
 81 |     def step(self, action):
 82 |         reward = 0
 83 | 
 84 |         if action == 1 or action == 2:
 85 |             self.player.move(action)
 86 | 
 87 |         for wall in self.walls:
 88 |             wall.move()
 89 | 
 90 |         self.field.update_field(self.walls, self.player)
 91 | 
 92 |         if self.walls[-1].y == self.player.y + self.player.height:
 93 |             reward += 1
 94 | 
 95 |         return self.field.body, reward
 96 | 
 97 |     def render(self, window):
 98 |         window.fill(self.WHITE)
 99 | 
100 |         for r in range(field.height):
101 |             for c in range(field.width):
102 |                 color = self.WHITE if self.field.body[r][c] == 0 else self.BLACK
103 |                 pygame.draw.rect(window, color, (c * 40, r * 30, 40, 30))
104 | 
105 |         pygame.display.update()
106 | 
107 | 
108 | env = Environment()
109 | field = env.field
110 | 
111 | WINDOW_WIDTH = field.width * 40
112 | WINDOW_HEIGHT = field.height * 30
113 | WINDOW = pygame.display.set_mode((WINDOW_WIDTH, WINDOW_HEIGHT))
114 | 
115 | clock = pygame.time.Clock()
116 | game_over = False
117 | 
118 | while not game_over:
119 |     clock.tick(27)
120 |     env.render(WINDOW)
121 | 
122 |     for event in pygame.event.get():
123 |         if event.type == pygame.QUIT:
124 |             game_over = True
125 |         elif event.type == pygame.KEYDOWN:
126 |             if event.key == pygame.K_LEFT:
127 |                 env.step(1)
128 |             elif event.key == pygame.K_RIGHT:
129 |                 env.step(2)
130 | 
131 | pygame.quit()
132 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Mohammed A. AL-Maamari
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Deep Reinforcement Learning (RL) Using Python
 2 | 
 3 | ![](https://cdn-images-1.medium.com/max/2560/1*a9F8vOTfpDEM52eW5SSXAQ.jpeg)
 4 | 
 5 | Explanation of the game rules | The game played by a human
 6 | :-------------------------:|:-------------------------:
 7 | ![](images/gifs/envExp.gif)  |  ![](images/gifs/EnvPlayed.gif)
 8 | 
 9 | In this tutorial series, we are going through every step of building an expert Reinforcement Learning (RL) agent that is capable of playing games.
10 | 
11 | This series is divided into three parts:
12 | 
13 | * **Part 1**: Designing and Building the Game Environment. In this part we will build a game environment and customize it to make the RL agent able to train on it.
14 | 
15 | * **Part 2**: Build and Train the Deep Q Neural Network (DQN). In this part, we define and build the different layers of DQN and train it.
16 | 
17 | * **Part 3**: Test and Play the Game.
18 | 
19 | We might also try making another simple game environment and use Q-Learning to create an agent that can play this simple game.
20 | 
21 | ## The Motivation:
22 | 
23 | One time I was in the rabbit hole of YouTube and [THIS VIDEO](https://www.youtube.com/watch?v=k-rWB1jOt9s) was recommended to me, it was about the **sense of self **in human babies, after watching the video a similar question popped into my mind* “Can I develop a smart agent that is smart enough to have a sense of its body and has the ability to change its features to accomplish a certain task?”*
24 | 
25 | This series is my way of answering this question.
26 | 
27 | 


--------------------------------------------------------------------------------
/Reinforcement Learning (RL) Using Python.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Reinforcement Learning With Python | Part 1 | Creating The Environment"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Explanation of the game rules | The game played by a human\n",
 15 |     ":-------------------------:|:-------------------------:\n",
 16 |     "![](images/gifs/envExp.gif)  |  ![](images/gifs/EnvPlayed.gif)"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "In this tutorial series, we are going through every step of building an expert Reinforcement Learning (RL) agent that is capable to play games.\n",
 24 |     "\n",
 25 |     "**This series is divided into three parts:**\n",
 26 |     "- **Part 1:** Designing and Building the Game Environment. In this part we will build a game environment and customize it to make the RL agent able to train on it.\n",
 27 |     "- **Part 2:** Build and Train the Deep Q Neural Network (DQN). In this part, we define and build the different layers of DQN and train it.\n",
 28 |     "- **Part 3:** Test and Play the Game.\n",
 29 |     "\n",
 30 |     "We might also try making another simple game environment and use Q-Learning to create an agent that can play this simple game."
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "## Designing the Environment:\n",
 38 |     "\n",
 39 |     "For this environment, we want the agent to develop a sense of its body and how to change its body features to avoid losing the game."
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "### First: The Elements of The Environment:\n",
 47 |     "The Elements of The Environment |  .\n",
 48 |     ":-------------------------:|:-------------------------:\n",
 49 |     "<img src=\"images/EnvExp.jpg\" width=\"100%\">"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "markdown",
 54 |    "metadata": {},
 55 |    "source": [
 56 |     "#### 1- The Field:\n",
 57 |     "Contains all the other elements,we represent it in code by class named \"Field\" as follows:"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": null,
 63 |    "metadata": {},
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "class Field:\n",
 67 |     "    def __init__(self, height=10, width=5):\n",
 68 |     "        self.width      = width\n",
 69 |     "        self.height     = height\n",
 70 |     "        self.body       = np.zeros(shape=(self.height, self.width))\n",
 71 |     "    \n",
 72 |     "    def update_field(self,walls, player):\n",
 73 |     "        try:\n",
 74 |     "            # Clear the field:\n",
 75 |     "            self.body = np.zeros(shape=(self.height, self.width))\n",
 76 |     "            # Put the walls on the field:\n",
 77 |     "            for wall in walls:\n",
 78 |     "                if not wall.out_of_range :\n",
 79 |     "                    self.body[wall.y:min(wall.y+wall.height,self.height),:] = wall.body\n",
 80 |     "\n",
 81 |     "            # Put the player on the field:\n",
 82 |     "            self.body[player.y:player.y+player.height,\n",
 83 |     "                      player.x:player.x+player.width] += player.body \n",
 84 |     "        except :\n",
 85 |     "            pass"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "markdown",
 90 |    "metadata": {},
 91 |    "source": [
 92 |     "**Field attributes:**\n",
 93 |     "\n",
 94 |     "* ***width (int)*** : the width of the field (not in pixels)\n",
 95 |     "\n",
 96 |     "* ***height (int)*** the height of the field (not in pixels)\n",
 97 |     "\n",
 98 |     "* ***body (np.array)*** : holds the array representation of the game elements (player and walls) \n",
 99 |     "\n",
100 |     "This array is passed to the DQN, and also used to draw the interface using pygame.\n",
101 |     "<br/><br/>\n",
102 |     "**Field methods:**\n",
103 |     "\n",
104 |     "* ***update_field***(self,walls, player) : updates the field."
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "markdown",
109 |    "metadata": {},
110 |    "source": [
111 |     "#### 2- The Walls:"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": null,
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": [
120 |     "class Wall:        \n",
121 |     "    def __init__(self, height = 5, width=100,  hole_width = 20,\n",
122 |     "                 y = 0, speed = 1, field = None):\n",
123 |     "        self.height       = height\n",
124 |     "        self.width        = width\n",
125 |     "        self.hole_width   = hole_width\n",
126 |     "        self.y            = y\n",
127 |     "        self.speed        = speed\n",
128 |     "        self.field        = field\n",
129 |     "        self.body_unit     = 1\n",
130 |     "        self.body         = np.ones(shape = (self.height, self.width))*self.body_unit\n",
131 |     "        self.out_of_range = False\n",
132 |     "        self.create_hole()\n",
133 |     "    def create_hole(self):\n",
134 |     "        hole = np.zeros(shape = (self.height, self.hole_width))\n",
135 |     "        hole_pos = randint(0,self.width-self.hole_width)\n",
136 |     "        self.body[ : , hole_pos:hole_pos+self.hole_width] = 0\n",
137 |     "    def move(self):\n",
138 |     "        self.y += self.speed\n",
139 |     "        self.out_of_range = True if ((self.y + self.height) > self.field.height) else False"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "markdown",
144 |    "metadata": {},
145 |    "source": [
146 |     "The Wall |  .\n",
147 |     ":-------------------------:|:-------------------------:\n",
148 |     "<img src=\"images/wall.jpg\" width=\"100%\">"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "markdown",
153 |    "metadata": {},
154 |    "source": [
155 |     "**Wall attributes:**\n",
156 |     "\n",
157 |     "|Attribute   |Type       |Description                                                                         |\n",
158 |     "|------------|-----------|------------------------------------------------------------------------------------|\n",
159 |     "|height      |int        |the wall's height                                                                   |\n",
160 |     "|width       |int        |the wall's width ( the same value as the field's width)                             |\n",
161 |     "|hole_width  |int        |the hole's width (max value of hole_width should be field.width or wall.width)      |\n",
162 |     "|y           |int        |the vertical coordinate of the wall (y axis) (max value of y should be field.height)|\n",
163 |     "|speed       |int        |speed of the wall (raw/step)                                                        |\n",
164 |     "|field       |Field      |the field that contains the wall                                                    |\n",
165 |     "|body_unit   |int ; float|the number used to represent the wall in the array representation (in field.body)   |\n",
166 |     "|body        |np.array   |the wall's body                                                                     |\n",
167 |     "|out_of_range|bool       |A flag used to delete the wall when it moves out of the field range.                |"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "markdown",
172 |    "metadata": {},
173 |    "source": [
174 |     "**Wall methods:**\n",
175 |     "\n",
176 |     "* ***create_hole***(self): Creates a hole in the wall that its width = self.hole_width.\n",
177 |     "* ***move***(self): Moves the wall vertically (every time it get called the wall moves n steps from downward (n = self.speed))"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "markdown",
182 |    "metadata": {},
183 |    "source": [
184 |     "#### 3- The Player :"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": null,
190 |    "metadata": {},
191 |    "outputs": [],
192 |    "source": [
193 |     "class Player:\n",
194 |     "    def __init__(self, height = 5, max_width = 10 , width=2,\n",
195 |     "                 x = 0, y = 0, speed = 2):\n",
196 |     "        self.height        = height\n",
197 |     "        self.max_width     = max_width\n",
198 |     "        self.width         = width\n",
199 |     "        self.x             = x\n",
200 |     "        self.y             = y\n",
201 |     "        self.speed         = speed\n",
202 |     "        self.body_unit     = 2\n",
203 |     "        self.body          = np.ones(shape = (self.height, self.width))*self.body_unit\n",
204 |     "        self.stamina       = 20\n",
205 |     "        self.max_stamina   = 20\n",
206 |     "    def move(self, field, direction = 0 ):\n",
207 |     "        '''\n",
208 |     "        Moves the player :\n",
209 |     "         - No change          = 0\n",
210 |     "         - left, if direction  = 1\n",
211 |     "         - right, if direction = 2\n",
212 |     "        '''\n",
213 |     "        val2dir   = {0:0 , 1:-1 , 2:1}\n",
214 |     "        direction = val2dir[direction]\n",
215 |     "        next_x = (self.x + self.speed*direction)\n",
216 |     "        if not (next_x + self.width > field.width or next_x < 0):\n",
217 |     "            self.x += self.speed*direction\n",
218 |     "            self.stamina -= 1 \n",
219 |     "    def change_width(self, action = 0):\n",
220 |     "        '''\n",
221 |     "        Change the player's width:\n",
222 |     "         - No change          = 0\n",
223 |     "         - narrow by one unit = 3\n",
224 |     "         - widen by one unit  = 4\n",
225 |     "        '''\n",
226 |     "        val2act   = {0:0 , 3:-1 , 4:1}\n",
227 |     "        action = val2act[action]\n",
228 |     "        new_width = self.width+action\n",
229 |     "        player_end = self.x + new_width\n",
230 |     "        if new_width <= self.max_width and new_width > 0 and player_end <= self.max_width:\n",
231 |     "            self.width = new_width\n",
232 |     "            self.body  = np.ones(shape = (self.height, self.width))*self.body_unit"
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "markdown",
237 |    "metadata": {},
238 |    "source": [
239 |     "**Player attributes:**\n",
240 |     "\n",
241 |     "|Attribute   |Type       |Description                                                                         |\n",
242 |     "|------------|-----------|------------------------------------------------------------------------------------|\n",
243 |     "|height      |int        |player's height                                                                     |\n",
244 |     "|max_width   |int        |player's maximum width (must be less than field.width)                              |\n",
245 |     "|width       |int        |player's width (must be less than or equal to max_width and begger than 0)          |\n",
246 |     "|x           |int        |player's x coordinate in the field                                                  |\n",
247 |     "|y           |int        |player's y coordinate in the field                                                  |\n",
248 |     "|speed       |int        |player's speed (how many horizontal units it moves per step)                        |\n",
249 |     "|body_unit   |int ; float|the number used to represent the player in the array representation (in field.body) |\n",
250 |     "|body        |np.array   |the player's body                                                                   |\n",
251 |     "|stamina     |int ; float|player's energy (stamina) (when a player's energy hits zero the player dies)        |\n",
252 |     "|max_stamina |int ; float|maximum value for player's stamina                                                  |"
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "markdown",
257 |    "metadata": {},
258 |    "source": [
259 |     "**Player methods:**\n",
260 |     "* ***move***(self, field, direction = 0 ): Moves the player :\n",
261 |     "    - direction = 0 -> No change \n",
262 |     "    - direction = 1 -> left \n",
263 |     "    - direction = 2 -> right\n",
264 |     "* ***change_width***(self, action = 0):\n",
265 |     "    - action = 0 -> No change\n",
266 |     "    - action = 3 -> narrow by one unit\n",
267 |     "    - action = 4 -> widen by one unit"
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "markdown",
272 |    "metadata": {},
273 |    "source": [
274 |     "---"
275 |    ]
276 |   },
277 |   {
278 |    "cell_type": "markdown",
279 |    "metadata": {},
280 |    "source": [
281 |     "## The \"Environment\" Class :\n",
282 |     "This class facilitates the communication between the environment and the agent, it is designed to work either with an RL agent or with a human player."
283 |    ]
284 |   },
285 |   {
286 |    "cell_type": "markdown",
287 |    "metadata": {},
288 |    "source": [
289 |     "### Main Components Needed by the RL Agent:\n",
290 |     "- ***ENVIRONMENT_SHAPE*** attribute : used by the DQN to set the shape of the input layer.\n",
291 |     "- ***ACTION_SPACE*** attribute : used by the DQN to set the shape of the output layer.\n",
292 |     "- ***PUNISHMENT*** and ***REWARD*** : set the values of both punishment and reward, used to train the agent (we use these values to tell the agent if its previous actions were good or bad).\n",
293 |     "- ***reset*** method : to reset the environment.\n",
294 |     "- ***step*** method: takes an action as an argument and returns next state, reward, a boolean variable named game_over that is used to tell us if the game is over (the player lost) or not.\n",
295 |     "\n",
296 |     "It is clear that this environment is not different, it subsumes all the required components and more."
297 |    ]
298 |   },
299 |   {
300 |    "cell_type": "code",
301 |    "execution_count": null,
302 |    "metadata": {},
303 |    "outputs": [],
304 |    "source": [
305 |     "class Environment:\n",
306 |     "    P_HEIGHT      = 2  # Height of the player\n",
307 |     "    F_HEIGHT      = 20 # Height of the field\n",
308 |     "    W_HEIGHT      = 2  # Height of the walls\n",
309 |     "    WIDTH         = 10 # Width of the field and the walls\n",
310 |     "    MIN_H_WIDTH   = 2  # Minimum width of the holes\n",
311 |     "    MAX_H_WIDTH   = 6  # Maximum width of the holes\n",
312 |     "    MIN_P_WIDTH   = 2  # Minimum Width of the player\n",
313 |     "    MAX_P_WIDTH   = 6  # Maximum Width of the player\n",
314 |     "    HEIGHT_MUL    = 30 # Height Multiplier (used to draw np.array as blocks in pygame )\n",
315 |     "    WIDTH_MUL     = 40 # Width Multiplier (used to draw np.array as blocks in pygame )\n",
316 |     "    WINDOW_HEIGHT = (F_HEIGHT+1) * HEIGHT_MUL # Height of the pygame window\n",
317 |     "    WINDOW_WIDTH  = (WIDTH) * WIDTH_MUL       # Widh of the pygame window\n",
318 |     "    \n",
319 |     "    ENVIRONMENT_SHAPE = (F_HEIGHT,WIDTH,1)\n",
320 |     "    ACTION_SPACE      = [0,1,2,3,4]\n",
321 |     "    ACTION_SPACE_SIZE = len(ACTION_SPACE)\n",
322 |     "    PUNISHMENT        = -100  # Punishment increment\n",
323 |     "    REWARD            = 10    # Reward increment\n",
324 |     "    score             = 0     # Initial Score\n",
325 |     "    \n",
326 |     "    MOVE_WALL_EVERY   = 4     # Every how many frames the wall moves.\n",
327 |     "    MOVE_PLAYER_EVERY = 1     # Every how many frames the player moves.\n",
328 |     "    frames_counter    = 0\n",
329 |     "\n",
330 |     "    def __init__(self):\n",
331 |     "        # Colors:\n",
332 |     "        self.BLACK      = (25,25,25)\n",
333 |     "        self.WHITE      = (255,255,255)\n",
334 |     "        self.RED        = (255, 80, 80)\n",
335 |     "        self.BLUE       = (80, 80, 255)\n",
336 |     "        self.field = self.walls = self.player = None\n",
337 |     "        self.current_state = self.reset()\n",
338 |     "        self.val2color  = {0:self.WHITE, self.walls[0].body_unit:self.BLACK,\n",
339 |     "                           self.player.body_unit:self.BLACK, self.MAX_VAL:self.RED}\n",
340 |     "    def reset(self):\n",
341 |     "        self.score          = 0\n",
342 |     "        self.frames_counter = 0\n",
343 |     "        self.game_over      = False\n",
344 |     "        \n",
345 |     "        self.field = Field(height=self.F_HEIGHT, width=self.WIDTH )\n",
346 |     "        w1    = Wall( height = self.W_HEIGHT, width=self.WIDTH,\n",
347 |     "                      hole_width = randint(self.MIN_H_WIDTH,self.MAX_H_WIDTH),\n",
348 |     "                     field = self.field)\n",
349 |     "        self.walls = deque([w1])\n",
350 |     "        p_width = randint(self.MIN_P_WIDTH,self.MAX_P_WIDTH)\n",
351 |     "        self.player    = Player( height = self.P_HEIGHT, max_width = self.WIDTH,\n",
352 |     "                                width = p_width,\n",
353 |     "                                x = randint(0,self.field.width-p_width),\n",
354 |     "                                y = int(self.field.height*0.7), speed = 1)\n",
355 |     "        self.MAX_VAL = self.player.body_unit + w1.body_unit\n",
356 |     "        # Update the field :\n",
357 |     "        self.field.update_field(self.walls, self.player)\n",
358 |     "        \n",
359 |     "        observation = self.field.body/self.MAX_VAL\n",
360 |     "        return observation\n",
361 |     "    def print_text(self, WINDOW = None, text_cords = (0,0), center = False,\n",
362 |     "                   text = \"\", color = (0,0,0), size = 32):\n",
363 |     "        pygame.init()\n",
364 |     "        font = pygame.font.Font('freesansbold.ttf', size) \n",
365 |     "        text_to_print = font.render(text, True, color) \n",
366 |     "        textRect = text_to_print.get_rect()\n",
367 |     "        if center:\n",
368 |     "            textRect.center = text_cords\n",
369 |     "        else:\n",
370 |     "            textRect.x = text_cords[0]\n",
371 |     "            textRect.y = text_cords[1]\n",
372 |     "        WINDOW.blit(text_to_print, textRect)\n",
373 |     "        \n",
374 |     "    def step(self, action):\n",
375 |     "        global score_increased\n",
376 |     "\n",
377 |     "        self.frames_counter += 1\n",
378 |     "        reward = 0\n",
379 |     "\n",
380 |     "        # If the performed action is (move) then player.move method is called:\n",
381 |     "        if action in [1,2]:\n",
382 |     "            self.player.move(direction = action, field = self.field)\n",
383 |     "        # If the performed action is (change_width) then player.change_width method is called:\n",
384 |     "        if action in [3,4]:\n",
385 |     "            self.player.change_width(action = action)                \n",
386 |     "        \n",
387 |     "        # Move the wall one step (one step every WALL_SPEED frames):\n",
388 |     "        if self.frames_counter % self.WALL_SPEED == 0:\n",
389 |     "            # move the wall one step\n",
390 |     "            self.walls[-1].move()\n",
391 |     "            # reset the frames counter\n",
392 |     "            self.frames_counter = 0\n",
393 |     "        \n",
394 |     "        # Update the field :\n",
395 |     "        self.field.update_field(self.walls, self.player)\n",
396 |     "\n",
397 |     "        # If the player passed a wall successfully increase the reward +1\n",
398 |     "        if ((self.walls[-1].y) == (self.player.y + self.player.height)) and not score_increased :\n",
399 |     "            reward += self.REWARD\n",
400 |     "            self.score  += self.REWARD\n",
401 |     "            \n",
402 |     "            # Increase player's stamina every time it passed a wall successfully  \n",
403 |     "            self.player.stamina = min(self.player.max_stamina, self.player.stamina+10)\n",
404 |     "            # score_increased : a flag to make sure that reward increases once per wall \n",
405 |     "            score_increased = True\n",
406 |     "            \n",
407 |     "        \n",
408 |     "        #  Lose Conditions : \n",
409 |     "        # C1 : The player hits a wall\n",
410 |     "        # C2 : Player's width was far thinner than hole's width\n",
411 |     "        # C3 : Player fully consumed its stamina (energy)\n",
412 |     "        lose_conds = [self.MAX_VAL in self.field.body,\n",
413 |     "                      ((self.player.y == self.walls[-1].y) and (self.player.width < (self.walls[-1].hole_width-1))),\n",
414 |     "                      self.player.stamina <=0]\n",
415 |     "        \n",
416 |     "\n",
417 |     "        # If one lose condition or more happend, the game ends:\n",
418 |     "        if True in lose_conds:\n",
419 |     "            self.game_over = True\n",
420 |     "            reward = self.PUNISHMENT\n",
421 |     "            return self.field.body/self.MAX_VAL, reward, self.game_over\n",
422 |     "\n",
423 |     "        # Check if a wall moved out of the scene:\n",
424 |     "        if self.walls[-1].out_of_range:\n",
425 |     "            # Create a new wall\n",
426 |     "            self.walls[-1] = Wall( height = self.W_HEIGHT, width = self.WIDTH,\n",
427 |     "                                   hole_width = randint(self.MIN_H_WIDTH,self.MAX_H_WIDTH),\n",
428 |     "                                   field = self.field)\n",
429 |     "\n",
430 |     "            score_increased = False\n",
431 |     "\n",
432 |     "        \n",
433 |     "        # Return New Observation , reward, game_over(bool)\n",
434 |     "        return self.field.body/self.MAX_VAL, reward, self.game_over\n",
435 |     "    \n",
436 |     "    def render(self, WINDOW = None, human=False):\n",
437 |     "        if human:\n",
438 |     "            ################ Check Actions #####################\n",
439 |     "            action = 0\n",
440 |     "            events = pygame.event.get()\n",
441 |     "            for event in events:\n",
442 |     "                if event.type == pygame.QUIT:\n",
443 |     "                    self.game_over = True\n",
444 |     "                if event.type == pygame.KEYDOWN:\n",
445 |     "                    if event.key == pygame.K_LEFT:\n",
446 |     "                        action = 1\n",
447 |     "                    if event.key == pygame.K_RIGHT:\n",
448 |     "                        action = 2\n",
449 |     "\n",
450 |     "                    if event.key == pygame.K_UP:\n",
451 |     "                        action = 4\n",
452 |     "                    if event.key == pygame.K_DOWN:\n",
453 |     "                        action = 3\n",
454 |     "            ################## Step ############################            \n",
455 |     "            _,reward, self.game_over = self.step(action)\n",
456 |     "        ################ Draw Environment ###################\n",
457 |     "        WINDOW.fill(self.WHITE)\n",
458 |     "        self.field.update_field(self.walls, self.player)\n",
459 |     "        for r in range(self.field.body.shape[0]):\n",
460 |     "            for c in range(self.field.body.shape[1]):\n",
461 |     "                pygame.draw.rect(WINDOW,\n",
462 |     "                                 self.val2color[self.field.body[r][c]],\n",
463 |     "                                 (c*self.WIDTH_MUL, r*self.HEIGHT_MUL, self.WIDTH_MUL, self.HEIGHT_MUL))\n",
464 |     "\n",
465 |     "        self.print_text(WINDOW = WINDOW, text_cords = (self.WINDOW_WIDTH // 2, int(self.WINDOW_HEIGHT*0.1)),\n",
466 |     "                       text = str(self.score), color = self.RED, center = True)\n",
467 |     "        self.print_text(WINDOW = WINDOW, text_cords = (0, int(self.WINDOW_HEIGHT*0.9)),\n",
468 |     "                       text = str(self.player.stamina), color = self.RED)\n",
469 |     "        \n",
470 |     "        pygame.display.update()"
471 |    ]
472 |   },
473 |   {
474 |    "cell_type": "markdown",
475 |    "metadata": {},
476 |    "source": [
477 |     "**Environment attributes:**\n",
478 |     "\n",
479 |     "|Attribute   |Type       |Description                                                                         |\n",
480 |     "|------------|-----------|------------------------------------------------------------------------------------|\n",
481 |     "|P_HEIGHT    |int        |Height of the player                                                                |\n",
482 |     "|F_HEIGHT    |int        |Height of the field                                                                 |\n",
483 |     "|W_HEIGHT    |int        |Height of the walls                                                                 |\n",
484 |     "|WIDTH       |int        |Width of the field and the walls                                                    |\n",
485 |     "|MIN_H_WIDTH |int        |Minimum width of the holes                                                          |\n",
486 |     "|MAX_H_WIDTH |int        |Maximum width of the holes                                                          |\n",
487 |     "|MIN_P_WIDTH |int        |Minimum Width of the player                                                         |\n",
488 |     "|MAX_P_WIDTH |int        |Maximum Width of the player                                                         |\n",
489 |     "|HEIGHT_MUL  |int        |Height Multiplier (used to draw np.array as blocks in pygame )                      |\n",
490 |     "|WIDTH_MUL   |int        |Width Multiplier (used to draw np.array as blocks in pygame )                       |\n",
491 |     "|WINDOW_HEIGHT|int        |Height of the pygame window                                                         |\n",
492 |     "|WINDOW_WIDTH|int        |Width of the pygame window                                                          |\n",
493 |     "|ENVIRONMENT_SHAPE|tuple      |(field height ; field width ; 1)                                                    |\n",
494 |     "|ACTION_SPACE|list       |list of actions an agent can perform                                                |\n",
495 |     "|ACTION_SPACE_SIZE|int        |number of actions an agent can perform                                              |\n",
496 |     "|PUNISHMENT  |int ; float|Punishment increment                                                                |\n",
497 |     "|REWARD      |int ; float|Reward increment                                                                    |\n",
498 |     "|score       |int ; float|Initial Score                                                                       |\n",
499 |     "|MOVE_WALL_EVERY|int        |Every how many frames the wall moves.                                               |\n",
500 |     "|MOVE_PLAYER_EVERY|int        |Every how many frames the player moves.                                             |\n",
501 |     "|frames_counter|int        |used to handle the wall speed                                                       |\n",
502 |     "|field       |Field      | the field object that holds walls and players                                      |\n",
503 |     "|walls       |double ended queue of Wall objects|a que of walls                                                                      |\n",
504 |     "|player      |Player     |the player object                                                                   |\n",
505 |     "|current_state|np.array   |holds the current state of the field (the array representation of the game field)   |\n",
506 |     "|val2color   |dictionary |used to color the blocks depending on their values (ex: if you want to color the player RED you will put 'self.player.body_unit:RED' in val2color dictionary)|\n",
507 |     "|MAX_VAL     |int ; float| used to detect collisions between walls and players (MAX_VAL = self.player.body_unit + self.wall.body_unit) |\n"
508 |    ]
509 |   },
510 |   {
511 |    "cell_type": "markdown",
512 |    "metadata": {},
513 |    "source": [
514 |     "***Environment methods:***\n",
515 |     "* \\__***init***__(self) : initializes the environment by initializing some attributes and calling the reset method.\n",
516 |     "* ***reset***(self) : resets the environment and returns the state of the game field after resetting it.\n",
517 |     "* ***print_text***(self, WINDOW = None, text_cords = (0,0), center = False, text = \"\", color = (0,0,0), size = 32): prints a text in a given pygame.display (WINDOW) with the given features.\n",
518 |     "---\n",
519 |     "**+ step(self, action):**\n",
520 |     "\n",
521 |     "1. Call the player's move method to move the player.\n",
522 |     "2. Call the player's change_width method to move the player.\n",
523 |     "3. Move the wall one step.\n",
524 |     "4. Update the field.\n",
525 |     "5. Check if the player passed a wall successfully. If so, gives the player a reward and increase its stamina.\n",
526 |     "6. Check the three losing conditions: the player loses the game if at least one of these three conditions met.\n",
527 |     "\n",
528 |     "**Losing Conditions**:\n",
529 |     "\n",
530 |     "|Condition   |Explanation|Code                                                                                |\n",
531 |     "|------------|-----------|------------------------------------------------------------------------------------|\n",
532 |     "|C1          |The player hits a wall|self.MAX_VAL in self.field.body                                                     |\n",
533 |     "|C2          |Player's width was far thinner than hole's width|((self.player.y == self.walls[-1].y) and (self.player.width < (self.walls[-1].hole_width-1)))|\n",
534 |     "|C3          |Player fully consumed its stamina (energy)|self.player.stamina <=0                                                             |\n",
535 |     "\n",
536 |     "\n",
537 |     "when a player loses, the value of returned reward will equal PUNISHMENT, and the indicator of the game state (game_over) changes from false to true.\n",
538 |     "\n",
539 |     "7. Check if the current wall hits the bottom of the field, when that happens, the out of range wall is replaced by a new wall.\n",
540 |     "8. Return next_state normalized, reward, game_over\n",
541 |     "---\n",
542 |     "**+render**(self, WINDOW = None, human=False):\n",
543 |     "\n",
544 |     "**Arguments:**\n",
545 |     "* ***WINDOW*** (pygame.display): the pygame.display that the game will be rendered on.\n",
546 |     "* ***human*** (bool): If a human will play the game, this argument is set to True, in this case pygame catch pressed keyboard keys to get the action that will be performed.\n",
547 |     "\n",
548 |     "**Explanation of render method line by line:**\n",
549 |     "1. Check if the player is a human. If so, get the pressed key and translate it to the corresponding action (ex: if the right arrow is pressed then set action = 2, that means move the player on step to the right), then call step method to perform the chosen action.\n",
550 |     "2. Update the field then start drawing the walls and the player as blocks.\n",
551 |     "3. Print the score and the player's stamina.\n",
552 |     "4. Finally, update the display to show the rendered screen."
553 |    ]
554 |   },
555 |   {
556 |    "cell_type": "markdown",
557 |    "metadata": {},
558 |    "source": [
559 |     "## Finally : Put it all together\n",
560 |     "Now we are going to use everything we explained and play the game:\n",
561 |     "\n",
562 |     "The following code repeats the game until the player wins by getting a score higher than or equals  winning_score, or quits the game."
563 |    ]
564 |   },
565 |   {
566 |    "cell_type": "code",
567 |    "execution_count": null,
568 |    "metadata": {},
569 |    "outputs": [],
570 |    "source": [
571 |     "# Make an environment object\n",
572 |     "env            = Environment()\n",
573 |     "# Change wall speed to 3 (one step every 3 frames)\n",
574 |     "env.WALL_SPEED = 3\n",
575 |     "\n",
576 |     "# Initialize some variables \n",
577 |     "WINDOW          = pygame.display.set_mode((env.WINDOW_WIDTH, env.WINDOW_HEIGHT))\n",
578 |     "clock           = pygame.time.Clock()\n",
579 |     "win             = False\n",
580 |     "winning_score   = 100\n",
581 |     "\n",
582 |     "# Repeaat the game untill the player win (got a score of winning_score) or quits the game.\n",
583 |     "while not win:\n",
584 |     "  score_increased = False\n",
585 |     "  game_over       = False\n",
586 |     "  _ = env.reset()\n",
587 |     "  pygame.display.set_caption(\"Game\")\n",
588 |     "  while not game_over:\n",
589 |     "      clock.tick(27)\n",
590 |     "      env.render(WINDOW = WINDOW, human=True)\n",
591 |     "      game_over = env.game_over\n",
592 |     "  #####################################################\n",
593 |     "  sleep(0.5)\n",
594 |     "  WINDOW.fill(env.WHITE)\n",
595 |     "  if env.score >= winning_score:\n",
596 |     "    win = True\n",
597 |     "    env.print_text(WINDOW = WINDOW, text_cords = (env.WINDOW_WIDTH // 2, env.WINDOW_HEIGHT// 2),\n",
598 |     "                       text = f\"You Win - Score : {env.score}\", color = env.RED, center = True)\n",
599 |     "  else:\n",
600 |     "    env.print_text(WINDOW = WINDOW, text_cords = (env.WINDOW_WIDTH // 2, env.WINDOW_HEIGHT// 2),\n",
601 |     "                       text = f\"Game Over - Score : {env.score}\", color = env.RED, center = True)\n",
602 |     "  pygame.display.update()"
603 |    ]
604 |   },
605 |   {
606 |    "cell_type": "markdown",
607 |    "metadata": {},
608 |    "source": [
609 |     "You can get the full code [HERE](https://github.com/ModMaamari/reinforcement-learning-using-python)"
610 |    ]
611 |   }
612 |  ],
613 |  "metadata": {
614 |   "kernelspec": {
615 |    "display_name": "Python 3",
616 |    "language": "python",
617 |    "name": "python3"
618 |   },
619 |   "language_info": {
620 |    "codemirror_mode": {
621 |     "name": "ipython",
622 |     "version": 3
623 |    },
624 |    "file_extension": ".py",
625 |    "mimetype": "text/x-python",
626 |    "name": "python",
627 |    "nbconvert_exporter": "python",
628 |    "pygments_lexer": "ipython3",
629 |    "version": "3.7.5"
630 |   }
631 |  },
632 |  "nbformat": 4,
633 |  "nbformat_minor": 2
634 | }
635 | 


--------------------------------------------------------------------------------
/Train_GridSearch.py:
--------------------------------------------------------------------------------
  1 | from random import randint, choice
  2 | from collections import deque
  3 | from time import sleep
  4 | import pygame, time
  5 | import numpy as np
  6 | import pandas as pd
  7 | 
  8 | from keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Activation, Flatten
  9 | from keras.layers import Input, BatchNormalization, GlobalMaxPooling2D
 10 | from keras.callbacks import TensorBoard, ModelCheckpoint
 11 | import keras.backend.tensorflow_backend as backend
 12 | from keras.models import Sequential, Model
 13 | from keras.models import load_model
 14 | from keras.optimizers import Adam
 15 | import tensorflow as tf
 16 | from tqdm import tqdm
 17 | import random
 18 | import os
 19 | 
 20 | # For more repetitive results
 21 | random.seed(1)
 22 | np.random.seed(1)
 23 | tf.random.set_seed(1)
 24 | 
 25 | PATH = ""
 26 | # Create models folder
 27 | if not os.path.isdir(f'{PATH}models'):
 28 |     os.makedirs(f'{PATH}models')
 29 | # Create results folder
 30 | if not os.path.isdir(f'{PATH}results'):
 31 |     os.makedirs(f'{PATH}results')
 32 |     
 33 | pygame.init()
 34 | 
 35 | TstartTime = time.time()
 36 | 
 37 | 
 38 | ######################################################################################
 39 | class Field:
 40 |     def __init__(self, height=10, width=5):
 41 |         self.width      = width
 42 |         self.height     = height
 43 |         self.body = np.zeros(shape=(self.height, self.width))
 44 |     def update_field(self,walls, player):
 45 |         try:
 46 |             # Clear the field:
 47 |             self.body = np.zeros(shape=(self.height, self.width))
 48 |             # Put the walls on the field:
 49 |             for wall in walls:
 50 |                 if not wall.out_of_range :
 51 |                     self.body[wall.y:min(wall.y+wall.height,self.height),:] = wall.body
 52 | 
 53 |             # Put the player on the field:
 54 |             self.body[player.y:player.y+player.height,
 55 |                       player.x:player.x+player.width] += player.body 
 56 |         except :
 57 |             pass
 58 | ######################################################################################
 59 | class Wall:        
 60 |     def __init__(self, height = 5, width=100,  hole_width = 20,
 61 |                  y = 0, speed = 1, field = None):
 62 |         self.height       = height
 63 |         self.width        = width
 64 |         self.hole_width   = hole_width
 65 |         self.y            = y
 66 |         self.speed        = speed
 67 |         self.field        = field
 68 |         self.body_unit     = 1
 69 |         self.body         = np.ones(shape = (self.height, self.width))*self.body_unit
 70 |         self.out_of_range = False
 71 |         self.create_hole()
 72 |     def create_hole(self):
 73 |         hole = np.zeros(shape = (self.height, self.hole_width))
 74 |         hole_pos = randint(0,self.width-self.hole_width)
 75 |         self.body[ : , hole_pos:hole_pos+self.hole_width] = 0
 76 |     def move(self):
 77 |         self.y += self.speed
 78 |         self.out_of_range = True if ((self.y + self.height) > self.field.height) else False
 79 | ######################################################################################
 80 | class Player:
 81 |     def __init__(self, height = 5, max_width = 10 , width=2,
 82 |                  x = 0, y = 0, speed = 2):
 83 |         self.height        = height
 84 |         self.max_width     = max_width
 85 |         self.width         = width
 86 |         self.x             = x
 87 |         self.y             = y
 88 |         self.speed         = speed
 89 |         self.body_unit     = 2
 90 |         self.body          = np.ones(shape = (self.height, self.width))*self.body_unit
 91 |         self.stamina       = 20
 92 |         self.max_stamina   = 20
 93 |     def move(self, field, direction = 0 ):
 94 |         '''
 95 |         Moves the player :
 96 |          - No change          = 0
 97 |          - left, if direction  = 1
 98 |          - right, if direction = 2
 99 |         '''
100 |         val2dir   = {0:0 , 1:-1 , 2:1}
101 |         direction = val2dir[direction]
102 |         next_x = (self.x + self.speed*direction)
103 |         if not (next_x + self.width > field.width or next_x < 0):
104 |             self.x += self.speed*direction
105 |             self.stamina -= 1 
106 |     def change_width(self, action = 0):
107 |         '''
108 |         Change the player's width:
109 |          - No change          = 0
110 |          - narrow by one unit = 3
111 |          - widen by one unit  = 4
112 |         '''
113 |         val2act   = {0:0 , 3:-1 , 4:1}
114 |         action = val2act[action]
115 |         new_width = self.width+action
116 |         player_end = self.x + new_width
117 |         if new_width <= self.max_width and new_width > 0 and player_end <= self.max_width:
118 |             self.width = new_width
119 |             self.body  = np.ones(shape = (self.height, self.width))*self.body_unit
120 | ######################################################################################
121 | class Environment:
122 |     P_HEIGHT      = 2  # Height of the player
123 |     F_HEIGHT      = 20 # Height of the field
124 |     W_HEIGHT      = 2  # Height of the walls
125 |     WIDTH         = 10 # Width of the field and the walls
126 |     MIN_H_WIDTH   = 2  # Minimum width of the holes
127 |     MAX_H_WIDTH   = 6  # Maximum width of the holes
128 |     MIN_P_WIDTH   = 2  # Minimum Width of the player
129 |     MAX_P_WIDTH   = 6  # Maximum Width of the player
130 |     HEIGHT_MUL    = 30 # Height Multiplier (used to draw np.array as blocks in pygame )
131 |     WIDTH_MUL     = 40 # Width Multiplier (used to draw np.array as blocks in pygame )
132 |     WINDOW_HEIGHT = (F_HEIGHT+1) * HEIGHT_MUL # Height of the pygame window
133 |     WINDOW_WIDTH  = (WIDTH) * WIDTH_MUL       # Widh of the pygame window
134 |     
135 |     ENVIRONMENT_SHAPE = (F_HEIGHT,WIDTH,1)
136 |     ACTION_SPACE      = [0,1,2,3,4]
137 |     ACTION_SPACE_SIZE = len(ACTION_SPACE)
138 |     PUNISHMENT        = -100  # Punishment increment
139 |     REWARD            = 10    # Reward increment
140 |     score             = 0     # Initial Score
141 |     
142 |     MOVE_WALL_EVERY   = 4     # Every how many frames the wall moves.
143 |     MOVE_PLAYER_EVERY = 1     # Every how many frames the player moves.
144 |     frames_counter    = 0
145 | 
146 |     def __init__(self):
147 |         # Colors:
148 |         self.BLACK      = (25,25,25)
149 |         self.WHITE      = (255,255,255)
150 |         self.RED        = (255, 80, 80)
151 |         self.BLUE       = (80, 80, 255)
152 |         self.field = self.walls = self.player = None
153 |         self.current_state = self.reset()
154 |         self.val2color  = {0:self.WHITE, self.walls[0].body_unit:self.BLACK,
155 |                            self.player.body_unit:self.BLACK, self.MAX_VAL:self.RED}
156 |     def reset(self):
157 |         self.score          = 0
158 |         self.frames_counter = 0
159 |         self.game_over      = False
160 |         
161 |         self.field = Field(height=self.F_HEIGHT, width=self.WIDTH )
162 |         w1    = Wall( height = self.W_HEIGHT, width=self.WIDTH,
163 |                       hole_width = randint(self.MIN_H_WIDTH,self.MAX_H_WIDTH),
164 |                      field = self.field)
165 |         self.walls = deque([w1])
166 |         p_width = randint(self.MIN_P_WIDTH,self.MAX_P_WIDTH)
167 |         self.player    = Player( height = self.P_HEIGHT, max_width = self.WIDTH,
168 |                                 width = p_width,
169 |                                 x = randint(0,self.field.width-p_width),
170 |                                 y = int(self.field.height*0.7), speed = 1)
171 |         self.MAX_VAL = self.player.body_unit + w1.body_unit
172 |         # Update the field :
173 |         self.field.update_field(self.walls, self.player)
174 |         
175 |         observation = self.field.body/self.MAX_VAL
176 |         return observation
177 |     def print_text(self, WINDOW = None, text_cords = (0,0), center = False,
178 |                    text = "", color = (0,0,0), size = 32):
179 |         pygame.init()
180 |         font = pygame.font.Font('freesansbold.ttf', size) 
181 |         text_to_print = font.render(text, True, color) 
182 |         textRect = text_to_print.get_rect()
183 |         if center:
184 |             textRect.center = text_cords
185 |         else:
186 |             textRect.x = text_cords[0]
187 |             textRect.y = text_cords[1]
188 |         WINDOW.blit(text_to_print, textRect)
189 |         
190 |     def step(self, action):
191 |         global score_increased
192 | 
193 |         self.frames_counter += 1
194 |         reward = 0
195 | 
196 |         # If the performed action is (move) then player.move method is called:
197 |         if action in [1,2]:
198 |             self.player.move(direction = action, field = self.field)
199 |         # If the performed action is (change_width) then player.change_width method is called:
200 |         if action in [3,4]:
201 |             self.player.change_width(action = action)                
202 |         
203 |         # Move the wall one step (one step every MOVE_WALL_EVERY frames):
204 |         if self.frames_counter % self.MOVE_WALL_EVERY == 0:
205 |             # move the wall one step
206 |             self.walls[-1].move()
207 |             # reset the frames counter
208 |             self.frames_counter = 0
209 |         
210 |         # Update the field :
211 |         self.field.update_field(self.walls, self.player)
212 | 
213 |         # If the player passed a wall successfully increase the reward +1
214 |         if ((self.walls[-1].y) == (self.player.y + self.player.height)) and not score_increased :
215 |             reward += self.REWARD
216 |             self.score  += self.REWARD
217 |             
218 |             # Increase player's stamina every time it passed a wall successfully  
219 |             self.player.stamina = min(self.player.max_stamina, self.player.stamina+10)
220 |             # score_increased : a flag to make sure that reward increases once per wall 
221 |             score_increased = True
222 |             
223 |         
224 |         #  Lose Conditions : 
225 |         # C1 : The player hits a wall
226 |         # C2 : Player's width was far thinner than hole's width
227 |         # C3 : Player fully consumed its stamina (energy)
228 |         lose_conds = [self.MAX_VAL in self.field.body,
229 |                       ((self.player.y == self.walls[-1].y) and (self.player.width < (self.walls[-1].hole_width-1))),
230 |                       self.player.stamina <=0]
231 |         
232 | 
233 |         # If one lose condition or more happend, the game ends:
234 |         if True in lose_conds:
235 |             self.game_over = True
236 |             reward = self.PUNISHMENT
237 |             return self.field.body/self.MAX_VAL, reward, self.game_over
238 | 
239 |         # Check if a wall moved out of the scene:
240 |         if self.walls[-1].out_of_range:
241 |             # Create a new wall
242 |             self.walls[-1] = Wall( height = self.W_HEIGHT, width = self.WIDTH,
243 |                                    hole_width = randint(self.MIN_H_WIDTH,self.MAX_H_WIDTH),
244 |                                    field = self.field)
245 | 
246 |             score_increased = False
247 | 
248 |         
249 |         # Return New Observation , reward, game_over(bool)
250 |         return self.field.body/self.MAX_VAL, reward, self.game_over
251 |     
252 |     def render(self, WINDOW = None, human=False):
253 |         if human:
254 |             ################ Check Actions #####################
255 |             action = 0
256 |             events = pygame.event.get()
257 |             for event in events:
258 |                 if event.type == pygame.QUIT:
259 |                     self.game_over = True
260 |                 if event.type == pygame.KEYDOWN:
261 |                     if event.key == pygame.K_LEFT:
262 |                         action = 1
263 |                     if event.key == pygame.K_RIGHT:
264 |                         action = 2
265 | 
266 |                     if event.key == pygame.K_UP:
267 |                         action = 4
268 |                     if event.key == pygame.K_DOWN:
269 |                         action = 3
270 |             ################## Step ############################            
271 |             _,reward, self.game_over = self.step(action)
272 |         ################ Draw Environment ###################
273 |         WINDOW.fill(self.WHITE)
274 |         self.field.update_field(self.walls, self.player)
275 |         for r in range(self.field.body.shape[0]):
276 |             for c in range(self.field.body.shape[1]):
277 |                 pygame.draw.rect(WINDOW,
278 |                                  self.val2color[self.field.body[r][c]],
279 |                                  (c*self.WIDTH_MUL, r*self.HEIGHT_MUL, self.WIDTH_MUL, self.HEIGHT_MUL))
280 | 
281 |         self.print_text(WINDOW = WINDOW, text_cords = (self.WINDOW_WIDTH // 2, int(self.WINDOW_HEIGHT*0.1)),
282 |                        text = str(self.score), color = self.RED, center = True)
283 |         self.print_text(WINDOW = WINDOW, text_cords = (0, int(self.WINDOW_HEIGHT*0.9)),
284 |                        text = str(self.player.stamina), color = self.RED)
285 |         
286 |         pygame.display.update()
287 | ######################################################################################
288 | class ModifiedTensorBoard(TensorBoard):
289 |     # Overriding init to set initial step and writer (we want one log file for all .fit() calls)
290 |     def __init__(self, name, **kwargs):
291 |         super().__init__(**kwargs)
292 |         self.step = 1
293 |         self.writer = tf.summary.create_file_writer(self.log_dir)
294 |         self._log_write_dir = os.path.join(self.log_dir, name)
295 | 
296 |     # Overriding this method to stop creating default log writer
297 |     def set_model(self, model):
298 |         pass
299 | 
300 |     # Overrided, saves logs with our step number
301 |     # (otherwise every .fit() will start writing from 0th step)
302 |     def on_epoch_end(self, epoch, logs=None):
303 |         self.update_stats(**logs)
304 | 
305 |     # Overrided
306 |     # We train for one batch only, no need to save anything at epoch end
307 |     def on_batch_end(self, batch, logs=None):
308 |         pass
309 | 
310 |     # Overrided, so won't close writer
311 |     def on_train_end(self, _):
312 |         pass
313 | 
314 |     def on_train_batch_end(self, batch, logs=None):
315 |         pass
316 | 
317 |     # Custom method for saving own metrics
318 |     # Creates writer, writes custom metrics and closes writer
319 |     def update_stats(self, **stats):
320 |         self._write_logs(stats, self.step)
321 | 
322 |     def _write_logs(self, logs, index):
323 |         with self.writer.as_default():
324 |             for name, value in logs.items():
325 |                 tf.summary.scalar(name, value, step=index)
326 |                 self.step += 1
327 |                 self.writer.flush()
328 | ######################################################################################
329 | # Agent class
330 | class DQNAgent:
331 |     def __init__(self, name, env, conv_list, dense_list, util_list):
332 |         self.env = env
333 |         self.conv_list  = conv_list
334 |         self.dense_list = dense_list
335 |         self.name = [str(name) +" | " + "".join(str(c)+"C | " for c in conv_list) + "".join(str(d) + "D | " for d in dense_list) + "".join(u + " | " for u in util_list) ][0]
336 |         
337 |         # Main model
338 |         self.model = self.create_model(self.conv_list, self.dense_list)
339 | 
340 |         # Target network
341 |         self.target_model = self.create_model(self.conv_list, self.dense_list)
342 |         self.target_model.set_weights(self.model.get_weights())
343 | 
344 |         # An array with last n steps for training
345 |         self.replay_memory = deque(maxlen=REPLAY_MEMORY_SIZE)
346 | 
347 |         # Custom tensorboard object
348 |         self.tensorboard = ModifiedTensorBoard(name, log_dir="{}logs/{}-{}".format(PATH, name, int(time.time())))
349 |     
350 |         # Used to count when to update target network with main network's weights
351 |         self.target_update_counter = 0
352 | 
353 |         
354 |     # Creates a convolutional block given (filters) number of filters, (dropout) dropout rate, 
355 |     # (bn) a boolean variable indecating the use of BatchNormalization,
356 |     # (pool) a boolean variable indecating the use of MaxPooling2D 
357 |     def conv_block(self, inp, filters=64, bn=True, pool=True, dropout = 0.2):
358 |         _ = Conv2D(filters=filters, kernel_size=3, activation='relu')(inp)
359 |         if bn:
360 |             _ = BatchNormalization()(_)
361 |         if pool:
362 |             _ = MaxPooling2D(pool_size=(2, 2))(_)
363 |         if dropout > 0:
364 |             _ = Dropout(0.2)(_)
365 |         return _
366 |     # Creates the model with the given specifications:
367 |     def create_model(self, conv_list, dense_list):
368 |         # Defines the input layer with shape = ENVIRONMENT_SHAPE
369 |         input_layer = Input(shape=self.env.ENVIRONMENT_SHAPE)
370 |         # Defines the first convolutional block:
371 |         _ = self.conv_block(input_layer, filters=conv_list[0], bn=False, pool=False)
372 |         # If number of convolutional layers is 2 or more, use a loop to create them.
373 |         if len(conv_list)>1:
374 |             for c in conv_list[1:]:
375 |                 _ = self.conv_block(_, filters=c)
376 |         # Flatten the output of the last convolutional layer.
377 |         _  = Flatten()(_)
378 | 
379 |         # Creating the dense layers:
380 |         for d in dense_list:
381 |             _ = Dense(units=d, activation='relu')(_)
382 |         # The output layer has 5 nodes (one node per action)
383 |         output = Dense(units=self.env.ACTION_SPACE_SIZE,
384 |                           activation='linear', name='output')(_)
385 | 
386 |         # Put it all together:
387 |         model = Model(inputs=input_layer, outputs=[output])
388 |         model.compile(optimizer=Adam(lr=0.001),
389 |                       loss={'output': 'mse'},
390 |                       metrics={'output': 'accuracy'})
391 | 
392 |         return model
393 | 
394 |     # Adds step's data to a memory replay array
395 |     # (observation space, action, reward, new observation space, done)
396 |     def update_replay_memory(self, transition):
397 |         self.replay_memory.append(transition)
398 | 
399 |     # Trains main network every step during episode
400 |     def train(self, terminal_state, step):
401 |         # Start training only if certain number of samples is already saved
402 |         if len(self.replay_memory) < MIN_REPLAY_MEMORY_SIZE:
403 |             return
404 | 
405 |         # Get a minibatch of random samples from memory replay table
406 |         minibatch = random.sample(self.replay_memory, MINIBATCH_SIZE)
407 | 
408 |         # Get current states from minibatch, then query NN model for Q values
409 |         current_states = np.array([transition[0] for transition in minibatch])
410 |         current_qs_list = self.model.predict(current_states.reshape(-1, *env.ENVIRONMENT_SHAPE))
411 |         
412 | 
413 |         # Get future states from minibatch, then query NN model for Q values
414 |         # When using target network, query it, otherwise main network should be queried
415 |         new_current_states = np.array([transition[3] for transition in minibatch])
416 |         future_qs_list = self.target_model.predict(new_current_states.reshape(-1, *env.ENVIRONMENT_SHAPE))
417 | 
418 |         X = []
419 |         y = []
420 | 
421 |         # Now we need to enumerate our batches
422 |         for index, (current_state, action, reward, new_current_state, done) in enumerate(minibatch):
423 | 
424 |             # If not a terminal state, get new q from future states, otherwise set it to 0
425 |             # almost like with Q Learning, but we use just part of equation here
426 |             if not done:
427 |                 max_future_q = np.max(future_qs_list[index])
428 |                 new_q = reward + DISCOUNT * max_future_q
429 |             else:
430 |                 new_q = reward
431 | 
432 |             # Update Q value for given state
433 |             current_qs = current_qs_list[index]
434 |             current_qs[action] = new_q
435 | 
436 |             # And append to our training data
437 |             X.append(current_state)
438 |             y.append(current_qs)
439 | 
440 |             
441 |         # Fit on all samples as one batch, log only on terminal state
442 |         self.model.fit(x = np.array(X).reshape(-1, *env.ENVIRONMENT_SHAPE),
443 |                        y = np.array(y),
444 |                        batch_size = MINIBATCH_SIZE, verbose = 0,
445 |                        shuffle=False, callbacks=[self.tensorboard] if terminal_state else None)
446 | 
447 |         # Update target network counter every episode
448 |         if terminal_state:
449 |             self.target_update_counter += 1
450 | 
451 |         # If counter reaches set value, update target network with weights of main network
452 |         if self.target_update_counter > UPDATE_TARGET_EVERY:
453 |             self.target_model.set_weights(self.model.get_weights())
454 |             self.target_update_counter = 0
455 | 
456 |     # Queries main network for Q values given current observation space (environment state)
457 |     def get_qs(self, state):
458 |         return self.model.predict(state.reshape(-1, *env.ENVIRONMENT_SHAPE))
459 | ######################################################################################
460 | def save_model_and_weights(agent, model_name, episode, max_reward, average_reward, min_reward):
461 |     checkpoint_name = f"{model_name}| Eps({episode}) | max({max_reward:_>7.2f}) | avg({average_reward:_>7.2f}) | min({min_reward:_>7.2f}).model"
462 |     agent.model.save(f'{PATH}models/{checkpoint_name}')
463 |     best_weights = agent.model.get_weights()
464 |     return best_weights
465 | ######################################################################################
466 | # ## Constants:
467 | # RL Constants:
468 | DISCOUNT               = 0.99
469 | REPLAY_MEMORY_SIZE     = 3_000   # How many last steps to keep for model training
470 | MIN_REPLAY_MEMORY_SIZE = 1_000   # Minimum number of steps in a memory to start training
471 | UPDATE_TARGET_EVERY    = 20      # Terminal states (end of episodes)
472 | MIN_REWARD             = 1000    # For model save
473 | SAVE_MODEL_EVERY       = 1000    # Episodes
474 | SHOW_EVERY             = 20      # Episodes
475 | EPISODES               = 100  # Number of episodes
476 | #  Stats settings
477 | AGGREGATE_STATS_EVERY = 20  # episodes
478 | SHOW_PREVIEW          = False
479 | ######################################################################################
480 | # Models Arch :
481 |  # [{[conv_list], [dense_list], [util_list], MINIBATCH_SIZE, {EF_Settings}, {ECC_Settings}} ]
482 |   
483 | models_arch = [  {"conv_list":[32], "dense_list":[32,32], "util_list":["ECC2", "1A-5Ac"],
484 |                   "MINIBATCH_SIZE":128, "best_only":False,
485 |                  "EF_Settings":{"EF_Enabled":False}, "ECC_Settings":{"ECC_Enabled":False}},
486 |                
487 |                 {"conv_list":[32], "dense_list":[32,32,32], "util_list":["ECC2", "1A-5Ac"],
488 |                  "MINIBATCH_SIZE":128, "best_only":False,
489 |                  "EF_Settings":{"EF_Enabled":False}, "ECC_Settings":{"ECC_Enabled":False}},
490 |                
491 |                {"conv_list":[32], "dense_list":[32,32], "util_list":["ECC2", "1A-5Ac"],
492 |                 "MINIBATCH_SIZE":128, "best_only":False,
493 |                  "EF_Settings":{"EF_Enabled":True, "FLUCTUATIONS":2},
494 |                 "ECC_Settings":{"ECC_Enabled":True, "MAX_EPS_NO_INC":int(EPISODES*0.2)}}]
495 | 
496 | # A dataframe used to store grid search results
497 | res = pd.DataFrame(columns = ["Model Name","Convolution Layers", "Dense Layers", "Batch Size", "ECC", "EF",
498 |                                "Best Only" , "Average Reward", "Best Average", "Epsilon 4 Best Average",
499 |                                 "Best Average On", "Max Reward", "Epsilon 4 Max Reward", "Max Reward On",
500 |                                "Total Training Time (min)", "Time Per Episode (sec)"])
501 | ######################################################################################
502 | # Grid Search:
503 | for i, m in enumerate(models_arch):
504 |     startTime = time.time() # Used to count episode training time 
505 |     MINIBATCH_SIZE = m["MINIBATCH_SIZE"]
506 |     
507 |     # Exploration settings :
508 |     # Epsilon Fluctuation (EF):
509 |     EF_Enabled          = m["EF_Settings"]["EF_Enabled"]  # Enable Epsilon Fluctuation
510 |     MAX_EPSILON         = 1      # Maximum epsilon value
511 |     MIN_EPSILON         = 0.001    # Minimum epsilon value
512 |     if EF_Enabled:
513 |         FLUCTUATIONS    = m["EF_Settings"]["FLUCTUATIONS"]     # How many times epsilon will fluctuate
514 |         FLUCTUATE_EVERY = int(EPISODES/FLUCTUATIONS) # Episodes
515 |         EPSILON_DECAY   = MAX_EPSILON - (MAX_EPSILON/FLUCTUATE_EVERY)
516 |         epsilon         = 1  # not a constant, going to be decayed
517 |     else:
518 |         EPSILON_DECAY   = MAX_EPSILON - (MAX_EPSILON/(0.8*EPISODES))
519 |         epsilon         = 1  # not a constant, going to be decayed
520 | 
521 |     # Initialize some variables: 
522 |     best_average        = -100
523 |     best_score          = -100
524 | 
525 |     # Epsilon Conditional Constantation (ECC):
526 |     ECC_Enabled         = m["ECC_Settings"]["ECC_Enabled"]
527 |     avg_reward_info     = [[1, best_average, epsilon]] # [[episode1, reward1 , epsilon1] ... [episode_n, reward_n , epsilon_n]]
528 |     max_reward_info     = [[1, best_score ,  epsilon]]
529 |     if ECC_Enabled : MAX_EPS_NO_INC      = m["ECC_Settings"]["MAX_EPS_NO_INC"] # Maximum number of episodes without any increment in reward average
530 |     eps_no_inc_counter  = 0 # Counts episodes with no increment in reward
531 | 
532 | 
533 |     # For stats
534 |     ep_rewards = [best_average]
535 | 
536 | 
537 | 
538 |     env = Environment()
539 |     env.MOVE_WALL_EVERY = 1 # Every how many frames the wall moves.
540 | 
541 | 
542 |     agent = DQNAgent(f"M{i}", env, m["conv_list"], m["dense_list"], m["util_list"])
543 |     MODEL_NAME = agent.name
544 |     
545 | 
546 |     best_weights = [agent.model.get_weights()]
547 | 
548 |     # Uncomment these two lines if you want to show preview on your screen
549 |     # WINDOW          = pygame.display.set_mode((env.WINDOW_WIDTH, env.WINDOW_HEIGHT))
550 |     # clock           = pygame.time.Clock()
551 | 
552 |     # Iterate over episodes
553 |     for episode in tqdm(range(1, EPISODES + 1), ascii=True, unit='episodes'):
554 |         if m["best_only"]: agent.model.set_weights(best_weights[0])
555 |         # agent.target_model.set_weights(best_weights[0])
556 |         
557 |         score_increased = False
558 |         # Update tensorboard step every episode
559 |         agent.tensorboard.step = episode
560 | 
561 |         # Restarting episode - reset episode reward and step number
562 |         episode_reward = 0
563 |         step    = 1
564 |         action = 0
565 |         # Reset environment and get initial state
566 |         current_state = env.reset()
567 |         game_over     = env.game_over
568 |         while not game_over:
569 |             # This part stays mostly the same, the change is to query a model for Q values
570 |             if np.random.random() > epsilon:
571 |                 # Get action from Q table
572 |                 action = np.argmax(agent.get_qs(current_state))
573 | 
574 |             else:
575 |                 # Get random action 
576 |                 action = choice(env.ACTION_SPACE)
577 | 
578 |             new_state, reward, game_over = env.step(action)
579 |             
580 |             # Transform new continuous state to new discrete state and count reward
581 |             episode_reward += reward
582 | 
583 |             # Uncomment the next block if you want to show preview on your screen
584 |             # if SHOW_PREVIEW and not episode % SHOW_EVERY:
585 |             #     clock.tick(27)
586 |             #     env.render(WINDOW)
587 | 
588 |             # Every step we update replay memory and train main network
589 |             agent.update_replay_memory((current_state, action, reward, new_state, game_over))
590 |             agent.train(game_over, step)
591 |             
592 |             current_state = new_state
593 |             step += 1
594 | 
595 |         if ECC_Enabled : eps_no_inc_counter += 1
596 |         # Append episode reward to a list and log stats (every given number of episodes)
597 |         ep_rewards.append(episode_reward)
598 | 
599 |         if not episode % AGGREGATE_STATS_EVERY:
600 |             average_reward = sum(ep_rewards[-AGGREGATE_STATS_EVERY:])/len(ep_rewards[-AGGREGATE_STATS_EVERY:])
601 |             min_reward = min(ep_rewards[-AGGREGATE_STATS_EVERY:])
602 |             max_reward = max(ep_rewards[-AGGREGATE_STATS_EVERY:])
603 |             agent.tensorboard.update_stats(reward_avg=average_reward, reward_min=min_reward, reward_max=max_reward, epsilon=epsilon)
604 |             
605 |             # Save models, but only when avg reward is greater or equal a set value
606 |             if not episode % SAVE_MODEL_EVERY:
607 |                 # Save Agent :
608 |                 _ = save_model_and_weights(agent, MODEL_NAME, episode, max_reward, average_reward, min_reward)
609 |                 
610 | 
611 |             if average_reward > best_average:
612 |                 best_average = average_reward
613 |                 # update ECC variables:
614 |                 avg_reward_info.append([episode, best_average, epsilon])
615 |                 eps_no_inc_counter = 0 
616 |                 # Save Agent :
617 |                 best_weights[0] = save_model_and_weights(agent, MODEL_NAME, episode, max_reward, average_reward, min_reward)
618 | 
619 |             if ECC_Enabled and eps_no_inc_counter >= MAX_EPS_NO_INC:
620 |                 epsilon = avg_reward_info[-1][2] # Get epsilon value of the last best reward 
621 |                 eps_no_inc_counter = 0
622 | 
623 |         if episode_reward > best_score:
624 |             try:
625 |                 best_score = episode_reward
626 |                 max_reward_info.append([episode, best_score, epsilon])
627 |                 
628 |                 # Save Agent :
629 |                 best_weights[0] = save_model_and_weights(agent, MODEL_NAME, episode, max_reward, average_reward, min_reward)
630 |   
631 |             except:
632 |                 pass
633 |         
634 |         # Decay epsilon
635 |         if epsilon > MIN_EPSILON:
636 |             epsilon *= EPSILON_DECAY
637 |             epsilon = max(MIN_EPSILON, epsilon)
638 | 
639 |         # Epsilon Fluctuation:
640 |         if EF_Enabled:
641 |             if not episode % FLUCTUATE_EVERY:
642 |                 epsilon = MAX_EPSILON
643 | 
644 |     endTime = time.time()
645 |     total_train_time_sec = round((endTime - startTime))
646 |     total_train_time_min = round((endTime - startTime)/60,2)
647 |     time_per_episode_sec = round((total_train_time_sec)/EPISODES,3)
648 | 
649 |     # Get Average reward:
650 |     average_reward = round(sum(ep_rewards)/len(ep_rewards), 2)
651 |     
652 |     # Update Results DataFrames:
653 |     res = res.append({"Model Name":MODEL_NAME, "Convolution Layers":m["conv_list"], "Dense Layers":m["dense_list"],
654 |                       "Batch Size":m["MINIBATCH_SIZE"], "ECC":m["ECC_Settings"], "EF":m["EF_Settings"],
655 |                                "Best Only":m["best_only"], "Average Reward":average_reward,
656 |                                "Best Average":avg_reward_info[-1][1], "Epsilon 4 Best Average":avg_reward_info[-1][2],
657 |                                "Best Average On":avg_reward_info[-1][0], "Max Reward":max_reward_info[-1][1],
658 |                                "Epsilon 4 Max Reward":max_reward_info[-1][2], "Max Reward On":max_reward_info[-1][0],
659 |                                "Total Training Time (min)":total_train_time_min, "Time Per Episode (sec)":time_per_episode_sec}
660 |                     , ignore_index=True)
661 |     res = res.sort_values(by = 'Best Average')
662 |     avg_df = pd.DataFrame(data = avg_reward_info, columns=["Episode", "Average Reward", "Epsilon"])
663 |     max_df = pd.DataFrame(data = max_reward_info, columns=["Episode", "Max Reward", "Epsilon"])
664 |     
665 |     # Save dataFrames
666 |     res.to_csv(f"{PATH}results/Results.csv")
667 |     avg_df.to_csv(f"{PATH}results/{MODEL_NAME}-Results-Avg.csv")
668 |     max_df.to_csv(f"{PATH}results/{MODEL_NAME}-Results-Max.csv")
669 | 
670 | TendTime = time.time()
671 | ######################################################################################
672 | print( f"Training took {round((TendTime - TstartTime)/60)  } Minutes ")
673 | print( f"Training took {round((TendTime - TstartTime)/3600)  } Hours ")
674 | ######################################################################################
675 | 


--------------------------------------------------------------------------------
/images/EnvExp.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModMaamari/reinforcement-learning-using-python/fd535079d7ca95be856af9a505b327d4350cc0f0/images/EnvExp.jpg


--------------------------------------------------------------------------------
/images/gifs/EnvPlayed.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModMaamari/reinforcement-learning-using-python/fd535079d7ca95be856af9a505b327d4350cc0f0/images/gifs/EnvPlayed.gif


--------------------------------------------------------------------------------
/images/gifs/envExp.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModMaamari/reinforcement-learning-using-python/fd535079d7ca95be856af9a505b327d4350cc0f0/images/gifs/envExp.gif


--------------------------------------------------------------------------------
/images/wall.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModMaamari/reinforcement-learning-using-python/fd535079d7ca95be856af9a505b327d4350cc0f0/images/wall.jpg


--------------------------------------------------------------------------------