├── Section 5 └── Doom │ ├── image_preprocessing.py │ ├── experience_replay.py │ └── ai.py ├── LICENSE ├── Section 3 └── Self_Driving_Car │ ├── car.kv │ ├── ai.py │ ├── map.py │ └── map_commented.py └── README.md /Section 5/Doom/image_preprocessing.py: -------------------------------------------------------------------------------- 1 | # Image Preprocessing 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | from scipy.misc import imresize 6 | from gym.core import ObservationWrapper 7 | from gym.spaces.box import Box 8 | 9 | # Preprocessing the Images 10 | 11 | class PreprocessImage(ObservationWrapper): 12 | 13 | def __init__(self, env, height = 64, width = 64, grayscale = True, crop = lambda img: img): 14 | super(PreprocessImage, self).__init__(env) 15 | self.img_size = (height, width) 16 | self.grayscale = grayscale 17 | self.crop = crop 18 | n_colors = 1 if self.grayscale else 3 19 | self.observation_space = Box(0.0, 1.0, [n_colors, height, width]) 20 | 21 | def _observation(self, img): 22 | img = self.crop(img) 23 | img = imresize(img, self.img_size) 24 | if self.grayscale: 25 | img = img.mean(-1, keepdims = True) 26 | img = np.transpose(img, (2, 0, 1)) 27 | img = img.astype('float32') / 255. 28 | return img 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Packt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Section 3/Self_Driving_Car/car.kv: -------------------------------------------------------------------------------- 1 | #:kivy 1.0.9 2 | # ref: https://kivy.org/docs/tutorials/pong.html 3 | 4 | : 5 | size: 20, 10 6 | canvas: 7 | PushMatrix 8 | Rotate: 9 | angle: self.angle 10 | origin: self.center 11 | Rectangle: 12 | pos: self.pos 13 | size: self.size 14 | PopMatrix 15 | 16 | : 17 | size: 10,10 18 | canvas: 19 | Color: 20 | rgba: 1,0,0,1 21 | Ellipse: 22 | pos: self.pos 23 | size: self.size 24 | : 25 | size: 10,10 26 | canvas: 27 | Color: 28 | rgba: 0,1,1,1 29 | Ellipse: 30 | pos: self.pos 31 | size: self.size 32 | 33 | : 34 | size: 10,10 35 | canvas: 36 | Color: 37 | rgba: 1,1,0,1 38 | Ellipse: 39 | pos: self.pos 40 | size: self.size 41 | 42 | : 43 | car: game_car 44 | ball1: game_ball1 45 | ball2: game_ball2 46 | ball3: game_ball3 47 | 48 | Car: 49 | id: game_car 50 | center: self.parent.center 51 | Ball1: 52 | id: game_ball1 53 | center: self.parent.center 54 | Ball2: 55 | id: game_ball2 56 | center: self.parent.center 57 | Ball3: 58 | id: game_ball3 59 | center: self.parent.center 60 | -------------------------------------------------------------------------------- /Section 5/Doom/experience_replay.py: -------------------------------------------------------------------------------- 1 | # Experience Replay 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | from collections import namedtuple, deque 6 | 7 | # Defining one Step 8 | Step = namedtuple('Step', ['state', 'action', 'reward', 'done']) 9 | 10 | # Making the AI progress on several (n_step) steps 11 | 12 | class NStepProgress: 13 | 14 | def __init__(self, env, ai, n_step): 15 | self.ai = ai 16 | self.rewards = [] 17 | self.env = env 18 | self.n_step = n_step 19 | 20 | def __iter__(self): 21 | state = self.env.reset() 22 | history = deque() 23 | reward = 0.0 24 | while True: 25 | action = self.ai(np.array([state]))[0][0] 26 | next_state, r, is_done, _ = self.env.step(action) 27 | reward += r 28 | history.append(Step(state = state, action = action, reward = r, done = is_done)) 29 | while len(history) > self.n_step + 1: 30 | history.popleft() 31 | if len(history) == self.n_step + 1: 32 | yield tuple(history) 33 | state = next_state 34 | if is_done: 35 | if len(history) > self.n_step + 1: 36 | history.popleft() 37 | while len(history) >= 1: 38 | yield tuple(history) 39 | history.popleft() 40 | self.rewards.append(reward) 41 | reward = 0.0 42 | state = self.env.reset() 43 | history.clear() 44 | 45 | def rewards_steps(self): 46 | rewards_steps = self.rewards 47 | self.rewards = [] 48 | return rewards_steps 49 | 50 | # Implementing Experience Replay 51 | 52 | class ReplayMemory: 53 | 54 | def __init__(self, n_steps, capacity = 10000): 55 | self.capacity = capacity 56 | self.n_steps = n_steps 57 | self.n_steps_iter = iter(n_steps) 58 | self.buffer = deque() 59 | 60 | def sample_batch(self, batch_size): # creates an iterator that returns random batches 61 | ofs = 0 62 | vals = list(self.buffer) 63 | np.random.shuffle(vals) 64 | while (ofs+1)*batch_size <= len(self.buffer): 65 | yield vals[ofs*batch_size:(ofs+1)*batch_size] 66 | ofs += 1 67 | 68 | def run_steps(self, samples): 69 | while samples > 0: 70 | entry = next(self.n_steps_iter) # 10 consecutive steps 71 | self.buffer.append(entry) # we put 200 for the current episode 72 | samples -= 1 73 | while len(self.buffer) > self.capacity: # we accumulate no more than the capacity (10000) 74 | self.buffer.popleft() 75 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Hands-On Deep Q-Learning [Video] 2 | This is the code repository for [Hands-On Deep Q-Learning [Video]](https://www.packtpub.com/big-data-and-business-intelligence/hands-deep-q-learning-video?utm_source=github&utm_medium=repository&utm_campaign=9781789957549), published by [Packt](https://www.packtpub.com/?utm_source=github). It contains all the supporting project files necessary to work through the video course from start to finish. 3 | ## About the Video Course 4 | Do you want to build a virtual self-driving car AI application using the most cutting-edge algorithm of Reinforcement Learning: Deep Q-Learning? Do you want to create an intelligence that can win the famous 90's game—DOOM—by using Deep Convolutional Q-Learning? Deep Q-Learning is the most robust and powerful technique in Artificial Intelligence for solving complex real-world problems. Artificial Intelligence is making our lives easy day by day and reducing human effort everywhere in social media, websites, online stores, and even business. With a less talk and more action approach, this course will lead you through various implementations of Reinforcement Learning techniques by building a virtual self-driving car application and an AI to beat the monsters in DOOM. 5 | You may be wondering that why we create artificial intelligence in a game environment. That is because, once we have created our artificial intelligence in a gaming environment with the help of OpenAIGym, we can use those same principles to solve complex real-world problems just by changing and tweaking algorithm parameters. Get your hands on this course to learn the most fascinating technology in the field of Artificial Intelligence and leverage the power of Reinforcement Learning right away! 6 | 7 |

What You Will Learn

8 |

9 |

Get grips on various Reinforcement Learning techniques while building Artificial Intelligence using PYTORCH, Kivy and OpenAIGym 11 |
A solid understanding of Deep Q-Learning intuitions and its functioning 12 |
Optimize performance and efficiency by implementing Deep Q-Learning 13 |
Create a virtual Self Driving Car application with Deep Q-Learning 14 |
Make an Intelligence to win the game named DOOM using Deep Convolutional Q-Learning 15 |
Understand the working behind Artificial Intelligence

16 | 17 | ## Instructions and Navigation 18 | ### Assumed Knowledge 19 | To fully benefit from the coverage included in this course, you will need:
20 | This course is designed for AI engineers, Machine Learning engineers, or aspiring Reinforcement Learning and Data Science professionals keen to extend their skillset to Reinforcement Learning using PyTorch, Kivy, and OpenAIGym. Basic familiarity with Python programming is required. 21 | ### Technical Requirements 22 | This course has the following software requirements:
23 | If a CUDA-enabled GPU is available: 24 | CUDA Toolkit 9.0 25 | https://developer.nvidia.com/cuda-90-download-archive 26 | cuDNN 7.0 for CUDA 9.0 27 | https://developer.nvidia.com/cudnn 28 | Python installation with pip (Python 3.x recommended https://www.python.org/downloads/) 29 | Text editor or IDE (Anaconda distribution Community recommended(Jupyter Notebook/Spyder ) https://www.anaconda.com/download/) 30 | git (Version Control System https://git-scm.com/downloads) 31 | 32 | 33 | 34 | ## Related Products 35 | * [Microservices Development on Azure with Java [Video]](https://www.packtpub.com/virtualization-and-cloud/microservices-development-azure-java-video?utm_source=github&utm_medium=repository&utm_campaign=9781789808858) 36 | 37 | * [Data Wrangling with Python 3.x [Video]](https://www.packtpub.com/application-development/data-wrangling-python-3x-video?utm_source=github&utm_medium=repository&utm_campaign=9781789956597) 38 | 39 | * [Troubleshooting Vue.js [Video]](https://www.packtpub.com/application-development/troubleshooting-vuejs-video?utm_source=github&utm_medium=repository&utm_campaign=9781788993531) 40 | 41 | -------------------------------------------------------------------------------- /Section 3/Self_Driving_Car/ai.py: -------------------------------------------------------------------------------- 1 | # AI for Self Driving Car 2 | 3 | # Importing the libraries 4 | 5 | import numpy as np 6 | import random 7 | import os 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | import torch.optim as optim 12 | import torch.autograd as autograd 13 | from torch.autograd import Variable 14 | 15 | # Creating the architecture of the Neural Network 16 | 17 | class Network(nn.Module): 18 | 19 | def __init__(self, input_size, nb_action): 20 | super(Network, self).__init__() 21 | self.input_size = input_size 22 | self.nb_action = nb_action 23 | self.fc1 = nn.Linear(input_size, 30) 24 | self.fc2 = nn.Linear(30, nb_action) 25 | 26 | def forward(self, state): 27 | x = F.relu(self.fc1(state)) 28 | q_values = self.fc2(x) 29 | return q_values 30 | 31 | # Implementing Experience Replay 32 | 33 | class ReplayMemory(object): 34 | 35 | def __init__(self, capacity): 36 | self.capacity = capacity 37 | self.memory = [] 38 | 39 | def push(self, event): 40 | self.memory.append(event) 41 | if len(self.memory) > self.capacity: 42 | del self.memory[0] 43 | 44 | def sample(self, batch_size): 45 | samples = zip(*random.sample(self.memory, batch_size)) 46 | return map(lambda x: Variable(torch.cat(x, 0)), samples) 47 | 48 | # Implementing Deep Q Learning 49 | 50 | class Dqn(): 51 | 52 | def __init__(self, input_size, nb_action, gamma): 53 | self.gamma = gamma 54 | self.reward_window = [] 55 | self.model = Network(input_size, nb_action) 56 | self.memory = ReplayMemory(100000) 57 | self.optimizer = optim.Adam(self.model.parameters(), lr = 0.001) 58 | self.last_state = torch.Tensor(input_size).unsqueeze(0) 59 | self.last_action = 0 60 | self.last_reward = 0 61 | 62 | def select_action(self, state): 63 | probs = F.softmax(self.model(Variable(state, volatile = True))*100) # T=100 64 | action = probs.multinomial() 65 | return action.data[0,0] 66 | 67 | def learn(self, batch_state, batch_next_state, batch_reward, batch_action): 68 | outputs = self.model(batch_state).gather(1, batch_action.unsqueeze(1)).squeeze(1) 69 | next_outputs = self.model(batch_next_state).detach().max(1)[0] 70 | target = self.gamma*next_outputs + batch_reward 71 | td_loss = F.smooth_l1_loss(outputs, target) 72 | self.optimizer.zero_grad() 73 | td_loss.backward(retain_variables = True) 74 | self.optimizer.step() 75 | 76 | def update(self, reward, new_signal): 77 | new_state = torch.Tensor(new_signal).float().unsqueeze(0) 78 | self.memory.push((self.last_state, new_state, torch.LongTensor([int(self.last_action)]), torch.Tensor([self.last_reward]))) 79 | action = self.select_action(new_state) 80 | if len(self.memory.memory) > 100: 81 | batch_state, batch_next_state, batch_action, batch_reward = self.memory.sample(100) 82 | self.learn(batch_state, batch_next_state, batch_reward, batch_action) 83 | self.last_action = action 84 | self.last_state = new_state 85 | self.last_reward = reward 86 | self.reward_window.append(reward) 87 | if len(self.reward_window) > 1000: 88 | del self.reward_window[0] 89 | return action 90 | 91 | def score(self): 92 | return sum(self.reward_window)/(len(self.reward_window)+1.) 93 | 94 | def save(self): 95 | torch.save({'state_dict': self.model.state_dict(), 96 | 'optimizer' : self.optimizer.state_dict(), 97 | }, 'last_brain.pth') 98 | 99 | def load(self): 100 | if os.path.isfile('last_brain.pth'): 101 | print("=> loading checkpoint... ") 102 | checkpoint = torch.load('last_brain.pth') 103 | self.model.load_state_dict(checkpoint['state_dict']) 104 | self.optimizer.load_state_dict(checkpoint['optimizer']) 105 | print("done !") 106 | else: 107 | print("no checkpoint found...") -------------------------------------------------------------------------------- /Section 5/Doom/ai.py: -------------------------------------------------------------------------------- 1 | # AI for Doom 2 | 3 | 4 | 5 | # Importing the libraries 6 | import numpy as np 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | import torch.optim as optim 11 | from torch.autograd import Variable 12 | 13 | # Importing the packages for OpenAI and Doom 14 | import gym 15 | from gym.wrappers import SkipWrapper 16 | from ppaquette_gym_doom.wrappers.action_space import ToDiscrete 17 | 18 | # Importing the other Python files 19 | import experience_replay, image_preprocessing 20 | 21 | 22 | 23 | # Part 1 - Building the AI 24 | 25 | # Making the brain 26 | 27 | class CNN(nn.Module): 28 | 29 | def __init__(self, number_actions): 30 | super(CNN, self).__init__() 31 | self.convolution1 = nn.Conv2d(in_channels = 1, out_channels = 32, kernel_size = 5) 32 | self.convolution2 = nn.Conv2d(in_channels = 32, out_channels = 32, kernel_size = 3) 33 | self.convolution3 = nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 2) 34 | self.fc1 = nn.Linear(in_features = self.count_neurons((1, 80, 80)), out_features = 40) 35 | self.fc2 = nn.Linear(in_features = 40, out_features = number_actions) 36 | 37 | def count_neurons(self, image_dim): 38 | x = Variable(torch.rand(1, *image_dim)) 39 | x = F.relu(F.max_pool2d(self.convolution1(x), 3, 2)) 40 | x = F.relu(F.max_pool2d(self.convolution2(x), 3, 2)) 41 | x = F.relu(F.max_pool2d(self.convolution3(x), 3, 2)) 42 | return x.data.view(1, -1).size(1) 43 | 44 | def forward(self, x): 45 | x = F.relu(F.max_pool2d(self.convolution1(x), 3, 2)) 46 | x = F.relu(F.max_pool2d(self.convolution2(x), 3, 2)) 47 | x = F.relu(F.max_pool2d(self.convolution3(x), 3, 2)) 48 | x = x.view(x.size(0), -1) 49 | x = F.relu(self.fc1(x)) 50 | x = self.fc2(x) 51 | return x 52 | 53 | # Making the body 54 | 55 | class SoftmaxBody(nn.Module): 56 | 57 | def __init__(self, T): 58 | super(SoftmaxBody, self).__init__() 59 | self.T = T 60 | 61 | def forward(self, outputs): 62 | probs = F.softmax(outputs * self.T) 63 | actions = probs.multinomial() 64 | return actions 65 | 66 | # Making the AI 67 | 68 | class AI: 69 | 70 | def __init__(self, brain, body): 71 | self.brain = brain 72 | self.body = body 73 | 74 | def __call__(self, inputs): 75 | input = Variable(torch.from_numpy(np.array(inputs, dtype = np.float32))) 76 | output = self.brain(input) 77 | actions = self.body(output) 78 | return actions.data.numpy() 79 | 80 | 81 | 82 | # Part 2 - Training the AI with Deep Convolutional Q-Learning 83 | 84 | # Getting the Doom environment 85 | doom_env = image_preprocessing.PreprocessImage(SkipWrapper(4)(ToDiscrete("minimal")(gym.make("ppaquette/DoomCorridor-v0"))), width = 80, height = 80, grayscale = True) 86 | doom_env = gym.wrappers.Monitor(doom_env, "videos", force = True) 87 | number_actions = doom_env.action_space.n 88 | 89 | # Building an AI 90 | cnn = CNN(number_actions) 91 | softmax_body = SoftmaxBody(T = 1.0) 92 | ai = AI(brain = cnn, body = softmax_body) 93 | 94 | # Setting up Experience Replay 95 | n_steps = experience_replay.NStepProgress(env = doom_env, ai = ai, n_step = 10) 96 | memory = experience_replay.ReplayMemory(n_steps = n_steps, capacity = 10000) 97 | 98 | # Implementing Eligibility Trace 99 | def eligibility_trace(batch): 100 | gamma = 0.99 101 | inputs = [] 102 | targets = [] 103 | for series in batch: 104 | input = Variable(torch.from_numpy(np.array([series[0].state, series[-1].state], dtype = np.float32))) 105 | output = cnn(input) 106 | cumul_reward = 0.0 if series[-1].done else output[1].data.max() 107 | for step in reversed(series[:-1]): 108 | cumul_reward = step.reward + gamma * cumul_reward 109 | state = series[0].state 110 | target = output[0].data 111 | target[series[0].action] = cumul_reward 112 | inputs.append(state) 113 | targets.append(target) 114 | return torch.from_numpy(np.array(inputs, dtype = np.float32)), torch.stack(targets) 115 | 116 | # Making the moving average on 100 steps 117 | class MA: 118 | def __init__(self, size): 119 | self.list_of_rewards = [] 120 | self.size = size 121 | def add(self, rewards): 122 | if isinstance(rewards, list): 123 | self.list_of_rewards += rewards 124 | else: 125 | self.list_of_rewards.append(rewards) 126 | while len(self.list_of_rewards) > self.size: 127 | del self.list_of_rewards[0] 128 | def average(self): 129 | return np.mean(self.list_of_rewards) 130 | ma = MA(100) 131 | 132 | # Training the AI 133 | loss = nn.MSELoss() 134 | optimizer = optim.Adam(cnn.parameters(), lr = 0.001) 135 | nb_epochs = 100 136 | for epoch in range(1, nb_epochs + 1): 137 | memory.run_steps(200) 138 | for batch in memory.sample_batch(128): 139 | inputs, targets = eligibility_trace(batch) 140 | inputs, targets = Variable(inputs), Variable(targets) 141 | predictions = cnn(inputs) 142 | loss_error = loss(predictions, targets) 143 | optimizer.zero_grad() 144 | loss_error.backward() 145 | optimizer.step() 146 | rewards_steps = n_steps.rewards_steps() 147 | ma.add(rewards_steps) 148 | avg_reward = ma.average() 149 | print("Epoch: %s, Average Reward: %s" % (str(epoch), str(avg_reward))) 150 | if avg_reward >= 1500: 151 | print("Congratulations, your AI wins") 152 | break 153 | 154 | # Closing the Doom environment 155 | doom_env.close() 156 | -------------------------------------------------------------------------------- /Section 3/Self_Driving_Car/map.py: -------------------------------------------------------------------------------- 1 | # Self Driving Car 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | from random import random, randint 6 | import matplotlib.pyplot as plt 7 | import time 8 | 9 | # Importing the Kivy packages 10 | from kivy.app import App 11 | from kivy.uix.widget import Widget 12 | from kivy.uix.button import Button 13 | from kivy.graphics import Color, Ellipse, Line 14 | from kivy.config import Config 15 | from kivy.properties import NumericProperty, ReferenceListProperty, ObjectProperty 16 | from kivy.vector import Vector 17 | from kivy.clock import Clock 18 | 19 | # Importing the Dqn object from our AI in ai.py 20 | from ai import Dqn 21 | 22 | # Adding this line if we don't want the right click to put a red point 23 | Config.set('input', 'mouse', 'mouse,multitouch_on_demand') 24 | 25 | # Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map 26 | last_x = 0 27 | last_y = 0 28 | n_points = 0 29 | length = 0 30 | 31 | # Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function 32 | brain = Dqn(5,3,0.9) 33 | action2rotation = [0,20,-20] 34 | last_reward = 0 35 | scores = [] 36 | 37 | # Initializing the map 38 | first_update = True 39 | def init(): 40 | global sand 41 | global goal_x 42 | global goal_y 43 | global first_update 44 | sand = np.zeros((longueur,largeur)) 45 | goal_x = 20 46 | goal_y = largeur - 20 47 | first_update = False 48 | 49 | # Initializing the last distance 50 | last_distance = 0 51 | 52 | # Creating the car class 53 | 54 | class Car(Widget): 55 | 56 | angle = NumericProperty(0) 57 | rotation = NumericProperty(0) 58 | velocity_x = NumericProperty(0) 59 | velocity_y = NumericProperty(0) 60 | velocity = ReferenceListProperty(velocity_x, velocity_y) 61 | sensor1_x = NumericProperty(0) 62 | sensor1_y = NumericProperty(0) 63 | sensor1 = ReferenceListProperty(sensor1_x, sensor1_y) 64 | sensor2_x = NumericProperty(0) 65 | sensor2_y = NumericProperty(0) 66 | sensor2 = ReferenceListProperty(sensor2_x, sensor2_y) 67 | sensor3_x = NumericProperty(0) 68 | sensor3_y = NumericProperty(0) 69 | sensor3 = ReferenceListProperty(sensor3_x, sensor3_y) 70 | signal1 = NumericProperty(0) 71 | signal2 = NumericProperty(0) 72 | signal3 = NumericProperty(0) 73 | 74 | def move(self, rotation): 75 | self.pos = Vector(*self.velocity) + self.pos 76 | self.rotation = rotation 77 | self.angle = self.angle + self.rotation 78 | self.sensor1 = Vector(30, 0).rotate(self.angle) + self.pos 79 | self.sensor2 = Vector(30, 0).rotate((self.angle+30)%360) + self.pos 80 | self.sensor3 = Vector(30, 0).rotate((self.angle-30)%360) + self.pos 81 | self.signal1 = int(np.sum(sand[int(self.sensor1_x)-10:int(self.sensor1_x)+10, int(self.sensor1_y)-10:int(self.sensor1_y)+10]))/400. 82 | self.signal2 = int(np.sum(sand[int(self.sensor2_x)-10:int(self.sensor2_x)+10, int(self.sensor2_y)-10:int(self.sensor2_y)+10]))/400. 83 | self.signal3 = int(np.sum(sand[int(self.sensor3_x)-10:int(self.sensor3_x)+10, int(self.sensor3_y)-10:int(self.sensor3_y)+10]))/400. 84 | if self.sensor1_x>longueur-10 or self.sensor1_x<10 or self.sensor1_y>largeur-10 or self.sensor1_y<10: 85 | self.signal1 = 1. 86 | if self.sensor2_x>longueur-10 or self.sensor2_x<10 or self.sensor2_y>largeur-10 or self.sensor2_y<10: 87 | self.signal2 = 1. 88 | if self.sensor3_x>longueur-10 or self.sensor3_x<10 or self.sensor3_y>largeur-10 or self.sensor3_y<10: 89 | self.signal3 = 1. 90 | 91 | class Ball1(Widget): 92 | pass 93 | class Ball2(Widget): 94 | pass 95 | class Ball3(Widget): 96 | pass 97 | 98 | # Creating the game class 99 | 100 | class Game(Widget): 101 | 102 | car = ObjectProperty(None) 103 | ball1 = ObjectProperty(None) 104 | ball2 = ObjectProperty(None) 105 | ball3 = ObjectProperty(None) 106 | 107 | def serve_car(self): 108 | self.car.center = self.center 109 | self.car.velocity = Vector(6, 0) 110 | 111 | def update(self, dt): 112 | 113 | global brain 114 | global last_reward 115 | global scores 116 | global last_distance 117 | global goal_x 118 | global goal_y 119 | global longueur 120 | global largeur 121 | 122 | longueur = self.width 123 | largeur = self.height 124 | if first_update: 125 | init() 126 | 127 | xx = goal_x - self.car.x 128 | yy = goal_y - self.car.y 129 | orientation = Vector(*self.car.velocity).angle((xx,yy))/180. 130 | last_signal = [self.car.signal1, self.car.signal2, self.car.signal3, orientation, -orientation] 131 | action = brain.update(last_reward, last_signal) 132 | scores.append(brain.score()) 133 | rotation = action2rotation[action] 134 | self.car.move(rotation) 135 | distance = np.sqrt((self.car.x - goal_x)**2 + (self.car.y - goal_y)**2) 136 | self.ball1.pos = self.car.sensor1 137 | self.ball2.pos = self.car.sensor2 138 | self.ball3.pos = self.car.sensor3 139 | 140 | if sand[int(self.car.x),int(self.car.y)] > 0: 141 | self.car.velocity = Vector(1, 0).rotate(self.car.angle) 142 | last_reward = -1 143 | else: # otherwise 144 | self.car.velocity = Vector(6, 0).rotate(self.car.angle) 145 | last_reward = -0.2 146 | if distance < last_distance: 147 | last_reward = 0.1 148 | 149 | if self.car.x < 10: 150 | self.car.x = 10 151 | last_reward = -1 152 | if self.car.x > self.width - 10: 153 | self.car.x = self.width - 10 154 | last_reward = -1 155 | if self.car.y < 10: 156 | self.car.y = 10 157 | last_reward = -1 158 | if self.car.y > self.height - 10: 159 | self.car.y = self.height - 10 160 | last_reward = -1 161 | 162 | if distance < 100: 163 | goal_x = self.width-goal_x 164 | goal_y = self.height-goal_y 165 | last_distance = distance 166 | 167 | # Adding the painting tools 168 | 169 | class MyPaintWidget(Widget): 170 | 171 | def on_touch_down(self, touch): 172 | global length, n_points, last_x, last_y 173 | with self.canvas: 174 | Color(0.8,0.7,0) 175 | d = 10. 176 | touch.ud['line'] = Line(points = (touch.x, touch.y), width = 10) 177 | last_x = int(touch.x) 178 | last_y = int(touch.y) 179 | n_points = 0 180 | length = 0 181 | sand[int(touch.x),int(touch.y)] = 1 182 | 183 | def on_touch_move(self, touch): 184 | global length, n_points, last_x, last_y 185 | if touch.button == 'left': 186 | touch.ud['line'].points += [touch.x, touch.y] 187 | x = int(touch.x) 188 | y = int(touch.y) 189 | length += np.sqrt(max((x - last_x)**2 + (y - last_y)**2, 2)) 190 | n_points += 1. 191 | density = n_points/(length) 192 | touch.ud['line'].width = int(20 * density + 1) 193 | sand[int(touch.x) - 10 : int(touch.x) + 10, int(touch.y) - 10 : int(touch.y) + 10] = 1 194 | last_x = x 195 | last_y = y 196 | 197 | # Adding the API Buttons (clear, save and load) 198 | 199 | class CarApp(App): 200 | 201 | def build(self): 202 | parent = Game() 203 | parent.serve_car() 204 | Clock.schedule_interval(parent.update, 1.0/60.0) 205 | self.painter = MyPaintWidget() 206 | clearbtn = Button(text = 'clear') 207 | savebtn = Button(text = 'save', pos = (parent.width, 0)) 208 | loadbtn = Button(text = 'load', pos = (2 * parent.width, 0)) 209 | clearbtn.bind(on_release = self.clear_canvas) 210 | savebtn.bind(on_release = self.save) 211 | loadbtn.bind(on_release = self.load) 212 | parent.add_widget(self.painter) 213 | parent.add_widget(clearbtn) 214 | parent.add_widget(savebtn) 215 | parent.add_widget(loadbtn) 216 | return parent 217 | 218 | def clear_canvas(self, obj): 219 | global sand 220 | self.painter.canvas.clear() 221 | sand = np.zeros((longueur,largeur)) 222 | 223 | def save(self, obj): 224 | print("saving brain...") 225 | brain.save() 226 | plt.plot(scores) 227 | plt.show() 228 | 229 | def load(self, obj): 230 | print("loading last saved brain...") 231 | brain.load() 232 | 233 | # Running the whole thing 234 | if __name__ == '__main__': 235 | CarApp().run() 236 | -------------------------------------------------------------------------------- /Section 3/Self_Driving_Car/map_commented.py: -------------------------------------------------------------------------------- 1 | # Self Driving Car 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | from random import random, randint 6 | import matplotlib.pyplot as plt 7 | import time 8 | 9 | # Importing the Kivy packages 10 | from kivy.app import App 11 | from kivy.uix.widget import Widget 12 | from kivy.uix.button import Button 13 | from kivy.graphics import Color, Ellipse, Line 14 | from kivy.config import Config 15 | from kivy.properties import NumericProperty, ReferenceListProperty, ObjectProperty 16 | from kivy.vector import Vector 17 | from kivy.clock import Clock 18 | 19 | # Importing the Dqn object from our AI in ia.py 20 | from ai import Dqn 21 | 22 | # Adding this line if we don't want the right click to put a red point 23 | Config.set('input', 'mouse', 'mouse,multitouch_on_demand') 24 | 25 | # Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map 26 | last_x = 0 27 | last_y = 0 28 | n_points = 0 # the total number of points in the last drawing 29 | length = 0 # the length of the last drawing 30 | 31 | # Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function 32 | brain = Dqn(5,3,0.9) # 5 sensors, 3 actions, gama = 0.9 33 | action2rotation = [0,20,-20] # action = 0 => no rotation, action = 1 => rotate 20 degres, action = 2 => rotate -20 degres 34 | last_reward = 0 # initializing the last reward 35 | scores = [] # initializing the mean score curve (sliding window of the rewards) with respect to time 36 | 37 | # Initializing the map 38 | first_update = True # using this trick to initialize the map only once 39 | def init(): 40 | global sand # sand is an array that has as many cells as our graphic interface has pixels. Each cell has a one if there is sand, 0 otherwise. 41 | global goal_x # x-coordinate of the goal (where the car has to go, that is the airport or the downtown) 42 | global goal_y # y-coordinate of the goal (where the car has to go, that is the airport or the downtown) 43 | sand = np.zeros((longueur,largeur)) # initializing the sand array with only zeros 44 | goal_x = 20 # the goal to reach is at the upper left of the map (the x-coordinate is 20 and not 0 because the car gets bad reward if it touches the wall) 45 | goal_y = largeur - 20 # the goal to reach is at the upper left of the map (y-coordinate) 46 | first_update = False # trick to initialize the map only once 47 | 48 | # Initializing the last distance 49 | last_distance = 0 50 | 51 | # Creating the car class (to understand "NumericProperty" and "ReferenceListProperty", see kivy tutorials: https://kivy.org/docs/tutorials/pong.html) 52 | 53 | class Car(Widget): 54 | 55 | angle = NumericProperty(0) # initializing the angle of the car (angle between the x-axis of the map and the axis of the car) 56 | rotation = NumericProperty(0) # initializing the last rotation of the car (after playing the action, the car does a rotation of 0, 20 or -20 degrees) 57 | velocity_x = NumericProperty(0) # initializing the x-coordinate of the velocity vector 58 | velocity_y = NumericProperty(0) # initializing the y-coordinate of the velocity vector 59 | velocity = ReferenceListProperty(velocity_x, velocity_y) # velocity vector 60 | sensor1_x = NumericProperty(0) # initializing the x-coordinate of the first sensor (the one that looks forward) 61 | sensor1_y = NumericProperty(0) # initializing the y-coordinate of the first sensor (the one that looks forward) 62 | sensor1 = ReferenceListProperty(sensor1_x, sensor1_y) # first sensor vector 63 | sensor2_x = NumericProperty(0) # initializing the x-coordinate of the second sensor (the one that looks 30 degrees to the left) 64 | sensor2_y = NumericProperty(0) # initializing the y-coordinate of the second sensor (the one that looks 30 degrees to the left) 65 | sensor2 = ReferenceListProperty(sensor2_x, sensor2_y) # second sensor vector 66 | sensor3_x = NumericProperty(0) # initializing the x-coordinate of the third sensor (the one that looks 30 degrees to the right) 67 | sensor3_y = NumericProperty(0) # initializing the y-coordinate of the third sensor (the one that looks 30 degrees to the right) 68 | sensor3 = ReferenceListProperty(sensor3_x, sensor3_y) # third sensor vector 69 | signal1 = NumericProperty(0) # initializing the signal received by sensor 1 70 | signal2 = NumericProperty(0) # initializing the signal received by sensor 2 71 | signal3 = NumericProperty(0) # initializing the signal received by sensor 3 72 | 73 | def move(self, rotation): 74 | self.pos = Vector(*self.velocity) + self.pos # updating the position of the car according to its last position and velocity 75 | self.rotation = rotation # getting the rotation of the car 76 | self.angle = self.angle + self.rotation # updating the angle 77 | self.sensor1 = Vector(30, 0).rotate(self.angle) + self.pos # updating the position of sensor 1 78 | self.sensor2 = Vector(30, 0).rotate((self.angle+30)%360) + self.pos # updating the position of sensor 2 79 | self.sensor3 = Vector(30, 0).rotate((self.angle-30)%360) + self.pos # updating the position of sensor 3 80 | self.signal1 = int(np.sum(sand[int(self.sensor1_x)-10:int(self.sensor1_x)+10, int(self.sensor1_y)-10:int(self.sensor1_y)+10]))/400. # getting the signal received by sensor 1 (density of sand around sensor 1) 81 | self.signal2 = int(np.sum(sand[int(self.sensor2_x)-10:int(self.sensor2_x)+10, int(self.sensor2_y)-10:int(self.sensor2_y)+10]))/400. # getting the signal received by sensor 2 (density of sand around sensor 2) 82 | self.signal3 = int(np.sum(sand[int(self.sensor3_x)-10:int(self.sensor3_x)+10, int(self.sensor3_y)-10:int(self.sensor3_y)+10]))/400. # getting the signal received by sensor 3 (density of sand around sensor 3) 83 | if self.sensor1_x > longueur-10 or self.sensor1_x<10 or self.sensor1_y>largeur-10 or self.sensor1_y<10: # if sensor 1 is out of the map (the car is facing one edge of the map) 84 | self.signal1 = 1. # sensor 1 detects full sand 85 | if self.sensor2_x > longueur-10 or self.sensor2_x<10 or self.sensor2_y>largeur-10 or self.sensor2_y<10: # if sensor 2 is out of the map (the car is facing one edge of the map) 86 | self.signal2 = 1. # sensor 2 detects full sand 87 | if self.sensor3_x > longueur-10 or self.sensor3_x<10 or self.sensor3_y>largeur-10 or self.sensor3_y<10: # if sensor 3 is out of the map (the car is facing one edge of the map) 88 | self.signal3 = 1. # sensor 3 detects full sand 89 | 90 | class Ball1(Widget): # sensor 1 (see kivy tutorials: kivy https://kivy.org/docs/tutorials/pong.html) 91 | pass 92 | class Ball2(Widget): # sensor 2 (see kivy tutorials: kivy https://kivy.org/docs/tutorials/pong.html) 93 | pass 94 | class Ball3(Widget): # sensor 3 (see kivy tutorials: kivy https://kivy.org/docs/tutorials/pong.html) 95 | pass 96 | 97 | # Creating the game class (to understand "ObjectProperty", see kivy tutorials: kivy https://kivy.org/docs/tutorials/pong.html) 98 | 99 | class Game(Widget): 100 | 101 | car = ObjectProperty(None) # getting the car object from our kivy file 102 | ball1 = ObjectProperty(None) # getting the sensor 1 object from our kivy file 103 | ball2 = ObjectProperty(None) # getting the sensor 2 object from our kivy file 104 | ball3 = ObjectProperty(None) # getting the sensor 3 object from our kivy file 105 | 106 | def serve_car(self): # starting the car when we launch the application 107 | self.car.center = self.center # the car will start at the center of the map 108 | self.car.velocity = Vector(6, 0) # the car will start to go horizontally to the right with a speed of 6 109 | 110 | def update(self, dt): # the big update function that updates everything that needs to be updated at each discrete time t when reaching a new state (getting new signals from the sensors) 111 | 112 | global brain # specifying the global variables (the brain of the car, that is our AI) 113 | global last_reward # specifying the global variables (the last reward) 114 | global scores # specifying the global variables (the means of the rewards) 115 | global last_distance # specifying the global variables (the last distance from the car to the goal) 116 | global goal_x # specifying the global variables (x-coordinate of the goal) 117 | global goal_y # specifying the global variables (y-coordinate of the goal) 118 | global longueur # specifying the global variables (width of the map) 119 | global largeur # specifying the global variables (height of the map) 120 | 121 | longueur = self.width # width of the map (horizontal edge) 122 | largeur = self.height # height of the map (vertical edge) 123 | if first_update: # trick to initialize the map only once 124 | init() 125 | 126 | xx = goal_x - self.car.x # difference of x-coordinates between the goal and the car 127 | yy = goal_y - self.car.y # difference of y-coordinates between the goal and the car 128 | orientation = Vector(*self.car.velocity).angle((xx,yy))/180. # direction of the car with respect to the goal (if the car is heading perfectly towards the goal, then orientation = 0) 129 | last_signal = [self.car.signal1, self.car.signal2, self.car.signal3, orientation, -orientation] # our input state vector, composed of the three signals received by the three sensors, plus the orientation and -orientation 130 | action = brain.update(last_reward, last_signal) # playing the action from our ai (the object brain of the dqn class) 131 | scores.append(brain.score()) # appending the score (mean of the last 100 rewards to the reward window) 132 | rotation = action2rotation[action] # converting the action played (0, 1 or 2) into the rotation angle (0°, 20° or -20°) 133 | self.car.move(rotation) # moving the car according to this last rotation angle 134 | distance = np.sqrt((self.car.x - goal_x)**2 + (self.car.y - goal_y)**2) # getting the new distance between the car and the goal right after the car moved 135 | self.ball1.pos = self.car.sensor1 # updating the position of the first sensor (ball1) right after the car moved 136 | self.ball2.pos = self.car.sensor2 # updating the position of the second sensor (ball2) right after the car moved 137 | self.ball3.pos = self.car.sensor3 # updating the position of the third sensor (ball3) right after the car moved 138 | 139 | if sand[int(self.car.x),int(self.car.y)] > 0: # if the car is on the sand 140 | self.car.velocity = Vector(1, 0).rotate(self.car.angle) # it is slowed down (speed = 1) 141 | last_reward = -1 # and reward = -1 142 | else: # otherwise 143 | self.car.velocity = Vector(6, 0).rotate(self.car.angle) # it goes to a normal speed (speed = 6) 144 | last_reward = -0.2 # and it gets bad reward (-0.2) 145 | if distance < last_distance: # however if it getting close to the goal 146 | last_reward = 0.1 # it still gets slightly positive reward 0.1 147 | 148 | if self.car.x < 10: # if the car is in the left edge of the frame 149 | self.car.x = 10 # it is not slowed down 150 | last_reward = -1 # but it gets bad reward -1 151 | if self.car.x > self.width-10: # if the car is in the right edge of the frame 152 | self.car.x = self.width-10 # it is not slowed down 153 | last_reward = -1 # but it gets bad reward -1 154 | if self.car.y < 10: # if the car is in the bottom edge of the frame 155 | self.car.y = 10 # it is not slowed down 156 | last_reward = -1 # but it gets bad reward -1 157 | if self.car.y > self.height-10: # if the car is in the upper edge of the frame 158 | self.car.y = self.height-10 # it is not slowed down 159 | last_reward = -1 # but it gets bad reward -1 160 | 161 | if distance < 100: # when the car reaches its goal 162 | goal_x = self.width - goal_x # the goal becomes the bottom right corner of the map (the downtown), and vice versa (updating of the x-coordinate of the goal) 163 | goal_y = self.height - goal_y # the goal becomes the bottom right corner of the map (the downtown), and vice versa (updating of the y-coordinate of the goal) 164 | 165 | # Updating the last distance from the car to the goal 166 | last_distance = distance 167 | 168 | # Painting for graphic interface (see kivy tutorials: https://kivy.org/docs/tutorials/firstwidget.html) 169 | 170 | class MyPaintWidget(Widget): 171 | 172 | def on_touch_down(self, touch): # putting some sand when we do a left click 173 | global length,n_points,last_x,last_y 174 | with self.canvas: 175 | Color(0.8,0.7,0) 176 | d=10. 177 | touch.ud['line'] = Line(points = (touch.x, touch.y), width = 10) 178 | last_x = int(touch.x) 179 | last_y = int(touch.y) 180 | n_points = 0 181 | length = 0 182 | sand[int(touch.x),int(touch.y)] = 1 183 | 184 | def on_touch_move(self, touch): # putting some sand when we move the mouse while pressing left 185 | global length,n_points,last_x,last_y 186 | if touch.button=='left': 187 | touch.ud['line'].points += [touch.x, touch.y] 188 | x = int(touch.x) 189 | y = int(touch.y) 190 | length += np.sqrt(max((x - last_x)**2 + (y - last_y)**2, 2)) 191 | n_points += 1. 192 | density = n_points/(length) 193 | touch.ud['line'].width = int(20*density + 1) 194 | sand[int(touch.x) - 10 : int(touch.x) + 10, int(touch.y) - 10 : int(touch.y) + 10] = 1 195 | last_x = x 196 | last_y = y 197 | 198 | # API and switches interface (see kivy tutorials: https://kivy.org/docs/tutorials/pong.html) 199 | 200 | class CarApp(App): 201 | 202 | def build(self): # building the app 203 | parent = Game() 204 | parent.serve_car() 205 | Clock.schedule_interval(parent.update, 1.0 / 60.0) 206 | self.painter = MyPaintWidget() 207 | clearbtn = Button(text='clear') 208 | savebtn = Button(text='save',pos=(parent.width,0)) 209 | loadbtn = Button(text='load',pos=(2*parent.width,0)) 210 | clearbtn.bind(on_release=self.clear_canvas) 211 | savebtn.bind(on_release=self.save) 212 | loadbtn.bind(on_release=self.load) 213 | parent.add_widget(self.painter) 214 | parent.add_widget(clearbtn) 215 | parent.add_widget(savebtn) 216 | parent.add_widget(loadbtn) 217 | return parent 218 | 219 | def clear_canvas(self, obj): # clear button 220 | global sand 221 | self.painter.canvas.clear() 222 | sand = np.zeros((longueur,largeur)) 223 | 224 | def save(self, obj): # save button 225 | print("saving brain...") 226 | brain.save() 227 | plt.plot(scores) 228 | plt.show() 229 | 230 | def load(self, obj): # load button 231 | print("loading last saved brain...") 232 | brain.load() 233 | 234 | # Running the app 235 | if __name__ == '__main__': 236 | CarApp().run() 237 | --------------------------------------------------------------------------------