├── README.md
├── ai.py
├── car.kv
├── gui.py
├── last_brain.pth
└── output.jpg
/README.md:
--------------------------------------------------------------------------------
1 | # Optimal Path Planning: Deep Reinforcement Learning
2 | Optimal Path Planning with Deep Reinforcement Learning
3 |
4 | Basic concepts of Q learning algorithm, markov Decision Processes, Temporal Difference, and Deep Q Networks are used
5 | to train a tiny car find the optimal path from top left corner to bottom right corner.
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/ai.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on Mon May 7 20:12:40 2018
3 |
4 | @author: nader
5 | """
6 |
7 | import numpy as np
8 | import random
9 | #for loading the brain
10 | import os
11 | import torch
12 | #module for implementing neural network
13 | import torch.nn as nn
14 | #contains functions for implementing neural network
15 | import torch.nn.functional as F
16 | #for optimizing stochastic gradient decent
17 | import torch.optim as optim
18 | #distencer to tensor and gradient
19 | #import torch.autograd as autograd
20 | from torch.autograd import Variable
21 |
22 | # Neural Network
23 | class Network(nn.Module):
24 |
25 | def __init__(self, input_size, nb_action):
26 | super(Network, self).__init__()
27 | self.input_size = input_size
28 | self.nb_action = nb_action
29 | # nn.Linear => all neuran in input layer are connected to hidden layer
30 | # fc1 , fc2 => full connections
31 | self.fc1 = nn.Linear(input_size, 30)
32 | self.fc2 = nn.Linear(30, nb_action)
33 |
34 | def forward(self, state):# function for propagation
35 | # we apply rectifier fuction to hidden neurons and we should give our input connection input states
36 | x = F.relu(self.fc1(state))
37 | q_values = self.fc2(x)
38 | return q_values
39 |
40 | #Experience Replay
41 | class ReplayMemory(object):
42 |
43 | def __init__(self, capacity):
44 | #max number of transitions in memory event
45 | self.capacity = capacity
46 | #list of capacity events
47 | self.memory = []
48 |
49 | def push(self, event):
50 | self.memory.append(event)
51 | if len(self.memory) > self.capacity:
52 | del self.memory[0]
53 |
54 | def sample(self, batch_size):
55 | #(state1,action1,reward1),(state2,acition2,reward2) => (state1,state2),(action1,action2),(reward1,reward2)
56 | #we are getting random objects from memory that have size equal to batch_size
57 | samples = zip(*random.sample(self.memory, batch_size))
58 | #we finally put each of this batch to a pytorch variable whitch each one will recieve a gradient
59 | #we should concatenate each batch in sample with respect with first dimenstion so that in each row state action and reward corespond to same time t
60 | return map(lambda x: Variable(torch.cat(x, 0)), samples)
61 |
62 |
63 | class Dqn():
64 |
65 | def __init__(self, input_size, nb_action, gamma):
66 | self.gamma = gamma
67 | self.reward_window = []
68 | self.model = Network(input_size, nb_action)
69 | self.memory = ReplayMemory(100000)
70 | #parameters of our model and the learning rate
71 | self.optimizer = optim.Adam(self.model.parameters(), lr = 0.001)
72 | #for pytorch it not only has to be torch tensor and one more dimension that corresponds to the batch
73 | self.last_state = torch.Tensor(input_size).unsqueeze(0)
74 | #go straight
75 | self.last_action = 0
76 | self.last_reward = 0
77 |
78 | def select_action(self, state):
79 | #we feed input state to network and we get outputs which are q value for each action
80 | #and use softmax to get final action
81 | # softmax => we want to go to best action but we want to go to others too(probability for each q value)
82 | # state should be torch var so we wrap this which is a tensor to torch var
83 | #with volatile=true we wont include gradient assosiated with this state to the graph
84 | probs = F.softmax(self.model(Variable(state, volatile = True))*100) # T=100
85 | action = probs.multinomial()
86 | #cause multinomial returns a pytorch var with fake batch we need to select index [0,0]
87 |
88 | # m = torch.distributions.Categorical(probs)
89 | # action=m.sample()
90 | return action.data[0,0]
91 |
92 | def learn(self, batch_state, batch_next_state, batch_reward, batch_action):
93 | #we need gather result of action which were played
94 | outputs = self.model(batch_state).gather(1, batch_action.unsqueeze(1)).squeeze(1)
95 | #we get max q value in next_state(0) according to all actions(1)
96 | next_outputs = self.model(batch_next_state).detach().max(1)[0]
97 | target = self.gamma*next_outputs + batch_reward
98 | #we use hubble loss
99 | td_loss = F.smooth_l1_loss(outputs, target)
100 | # zero_grad reinitializes the optimizer in every iteration of loop
101 | self.optimizer.zero_grad()
102 | #free memory because we go several times on the loss
103 | td_loss.backward(retain_variables = True)
104 | #.step updates th weights by backpropagating
105 | self.optimizer.step()
106 |
107 | def update(self, reward, new_signal):
108 | #convert signal to Tensor(float) since it is input of neural network and add dimention according to batch
109 | new_state = torch.Tensor(new_signal).float().unsqueeze(0)
110 | #torch.longtensor converts int to long in tensor
111 | self.memory.push((self.last_state, new_state, torch.LongTensor([int(self.last_action)]), torch.Tensor([self.last_reward])))
112 | action = self.select_action(new_state)
113 | if len(self.memory.memory) > 100:
114 | #we get 100 from each of them
115 | batch_state, batch_next_state, batch_action, batch_reward = self.memory.sample(100)
116 | self.learn(batch_state, batch_next_state, batch_reward, batch_action)
117 | self.last_action = action
118 | self.last_state = new_state
119 | self.last_reward = reward
120 | self.reward_window.append(reward)
121 | if len(self.reward_window) > 1000:
122 | del self.reward_window[0]
123 | return action
124 |
125 | def score(self):
126 | return sum(self.reward_window)/(len(self.reward_window)+1.)
127 |
128 | def save(self):
129 | torch.save({'state_dict': self.model.state_dict(),
130 | 'optimizer' : self.optimizer.state_dict(),
131 | }, 'last_brain.pth')
132 |
133 | def load(self):
134 | if os.path.isfile('last_brain.pth'):
135 | print("=> loading checkpoint... ")
136 | checkpoint = torch.load('last_brain.pth')
137 | self.model.load_state_dict(checkpoint['state_dict'])
138 | self.optimizer.load_state_dict(checkpoint['optimizer'])
139 | print("done !")
140 | else:
141 | print("no checkpoint found...")
--------------------------------------------------------------------------------
/car.kv:
--------------------------------------------------------------------------------
1 | #:kivy 1.0.9
2 | # ref: https://kivy.org/docs/tutorials/pong.html
3 |
4 | :
5 | size: 20, 10
6 | canvas:
7 | PushMatrix
8 | Rotate:
9 | angle: self.angle
10 | origin: self.center
11 | Rectangle:
12 | pos: self.pos
13 | size: self.size
14 | PopMatrix
15 |
16 | :
17 | size: 10,10
18 | canvas:
19 | Color:
20 | rgba: 1,1,0,1
21 | Ellipse:
22 | pos: self.pos
23 | size: self.size
24 | :
25 | size: 10,10
26 | canvas:
27 | Color:
28 | rgba: 1,1,0,1
29 | Ellipse:
30 | pos: self.pos
31 | size: self.size
32 |
33 | :
34 | size: 10,10
35 | canvas:
36 | Color:
37 | rgba: 1,1,0,1
38 | Ellipse:
39 | pos: self.pos
40 | size: self.size
41 |
42 | :
43 | car: game_car
44 | ball1: game_ball1
45 | ball2: game_ball2
46 | ball3: game_ball3
47 |
48 | Car:
49 | id: game_car
50 | center: self.parent.center
51 | Ball1:
52 | id: game_ball1
53 | center: self.parent.center
54 | Ball2:
55 | id: game_ball2
56 | center: self.parent.center
57 | Ball3:
58 | id: game_ball3
59 | center: self.parent.center
60 |
--------------------------------------------------------------------------------
/gui.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on Mon May 7 20:13:21 2018
3 |
4 | @author: nader
5 | """
6 |
7 | import numpy as np
8 | from random import random, randint
9 | import matplotlib.pyplot as plt
10 | #import time
11 |
12 | from kivy.app import App
13 | from kivy.uix.widget import Widget
14 | from kivy.uix.button import Button
15 | from kivy.graphics import Color, Ellipse, Line
16 | from kivy.config import Config
17 | from kivy.properties import NumericProperty, ReferenceListProperty, ObjectProperty
18 | from kivy.vector import Vector
19 | from kivy.clock import Clock
20 |
21 | # Importing the AI
22 | from ai import Dqn
23 |
24 | # We use the code below to stop adding red cirles by right clicking
25 | Config.set('input', 'mouse', 'mouse,multitouch_on_demand')
26 |
27 | # we use last_x and last_y to keep track of last point we draw on canvas
28 | last_x = 0
29 | last_y = 0
30 | n_points = 0
31 | length = 0
32 |
33 | # Initialization of Brain with 5 sensors and 3 actions and 0.9 discount factor
34 | brain = Dqn(5,3,0.9)
35 | action2rotation = [0,20,-20]
36 | last_reward = 0
37 | scores = []
38 |
39 | # map initialization
40 | first_update = True
41 | def init():
42 | #sand is an array with size of screen pixel size
43 | global sand
44 | global goal_x
45 | global goal_y
46 | global first_update
47 | #longueu,largeu
48 | #sand = np.zeros((length,width))
49 | # initializing sand array with 0
50 | sand=np.zeros((length,width))
51 | goal_x = 20
52 | goal_y = width - 20
53 | first_update = False
54 |
55 | last_distance = 0
56 |
57 |
58 | class Car(Widget):
59 |
60 | # initializing the angle of the car
61 | angle = NumericProperty(0)
62 | # initializing rotation of the car
63 | rotation = NumericProperty(0)
64 | # initializing speed in x-vector
65 | velocity_x = NumericProperty(0)
66 | # initializing speed in y-vector
67 | velocity_y = NumericProperty(0)
68 | # speed vector
69 | velocity = ReferenceListProperty(velocity_x, velocity_y)
70 | # initializing x of forward sensor
71 | sensor1_x = NumericProperty(0)
72 | # initializing y of forward sensor
73 | sensor1_y = NumericProperty(0)
74 | # forward sensor vector
75 | sensor1 = ReferenceListProperty(sensor1_x, sensor1_y)
76 | # initializing x of left sensor
77 | sensor2_x = NumericProperty(0)
78 | # initializing y of left sensor
79 | sensor2_y = NumericProperty(0)
80 | # left sensor vector
81 | sensor2 = ReferenceListProperty(sensor2_x, sensor2_y)
82 | # initializing x of right sensor
83 | sensor3_x = NumericProperty(0)
84 | # initializing y of right sensor
85 | sensor3_y = NumericProperty(0)
86 | # right sensor vector
87 | sensor3 = ReferenceListProperty(sensor3_x, sensor3_y)
88 | # initializing signal recieved from f-sensor
89 | signal1 = NumericProperty(0)
90 | # initializing signal recieved from l-sensor
91 | signal2 = NumericProperty(0)
92 | # initializing signal recieved from r-sensor
93 | signal3 = NumericProperty(0)
94 |
95 | def move(self, rotation):
96 | # update position of car according to its last position and speed
97 | self.pos = Vector(*self.velocity) + self.pos
98 | self.rotation = rotation
99 | self.angle = self.angle + self.rotation
100 |
101 | # updating position of sensors
102 | self.sensor1 = Vector(30, 0).rotate(self.angle) + self.pos
103 | self.sensor2 = Vector(30, 0).rotate((self.angle+30)%360) + self.pos
104 | self.sensor3 = Vector(30, 0).rotate((self.angle-30)%360) + self.pos
105 |
106 | # getting signals recieved from sensors => density of wall or sand aside it
107 | self.signal1 = int(np.sum(sand[int(self.sensor1_x)-10:int(self.sensor1_x)+10, int(self.sensor1_y)-10:int(self.sensor1_y)+10]))/400.
108 | self.signal2 = int(np.sum(sand[int(self.sensor2_x)-10:int(self.sensor2_x)+10, int(self.sensor2_y)-10:int(self.sensor2_y)+10]))/400.
109 | self.signal3 = int(np.sum(sand[int(self.sensor3_x)-10:int(self.sensor3_x)+10, int(self.sensor3_y)-10:int(self.sensor3_y)+10]))/400.
110 |
111 | # checking if any sensor has detected full density of wall or sand
112 | if self.sensor1_x>length-10 or self.sensor1_x<10 or self.sensor1_y>width-10 or self.sensor1_y<10:
113 | self.signal1 = 1.
114 | if self.sensor2_x>length-10 or self.sensor2_x<10 or self.sensor2_y>width-10 or self.sensor2_y<10:
115 | self.signal2 = 1.
116 | if self.sensor3_x>length-10 or self.sensor3_x<10 or self.sensor3_y>width-10 or self.sensor3_y<10:
117 | self.signal3 = 1.
118 |
119 | # sensors
120 | class Ball1(Widget):
121 | pass
122 | class Ball2(Widget):
123 | pass
124 | class Ball3(Widget):
125 | pass
126 |
127 | # the main class
128 | class Game(Widget):
129 |
130 | # getting objects from kivy file
131 | car = ObjectProperty(None)
132 | ball1 = ObjectProperty(None)
133 | ball2 = ObjectProperty(None)
134 | ball3 = ObjectProperty(None)
135 |
136 | def serve_car(self):
137 | # car starts in the center of screen going right with speed of 6
138 | self.car.center = self.center
139 | self.car.velocity = Vector(6, 0)
140 |
141 | def update(self, dt):# update function for updating everything in new
142 |
143 | global brain
144 | global last_reward
145 | global scores
146 | global last_distance
147 | global goal_x
148 | global goal_y
149 | global length
150 | global width
151 |
152 | length = self.width
153 | width = self.height
154 |
155 | # to initialize map only once
156 | if first_update:
157 | init()
158 |
159 | diffrence_x = goal_x - self.car.x
160 | diffrence_y = goal_y - self.car.y
161 | # setting orientation of the agent according to goal
162 | orientation = Vector(*self.car.velocity).angle((diffrence_x,diffrence_y))/180.
163 | # our input state according to sensors and orientation
164 | last_signal = [self.car.signal1, self.car.signal2, self.car.signal3, orientation, -orientation]
165 | # getting action from ai
166 | action = brain.update(last_reward, last_signal)
167 | # appending new score to **score window**
168 | scores.append(brain.score())
169 | rotation = action2rotation[action]
170 | # moving car according to rotation
171 | self.car.move(rotation)
172 | # setting new distance
173 | distance = np.sqrt((self.car.x - goal_x)**2 + (self.car.y - goal_y)**2)
174 | # updating sensors new position
175 | self.ball1.pos = self.car.sensor1
176 | self.ball2.pos = self.car.sensor2
177 | self.ball3.pos = self.car.sensor3
178 |
179 | if sand[int(self.car.x),int(self.car.y)] > 0:# changing speed when going into walls or sand
180 | self.car.velocity = Vector(1, 0).rotate(self.car.angle)
181 | # getting a very bad reward => -1
182 | last_reward = -0.5
183 | else:
184 | self.car.velocity = Vector(6, 0).rotate(self.car.angle)
185 | # normal move . reward => -0.2
186 | last_reward = -0.2
187 | if distance < last_distance:
188 | # if in correct direction get a little positive reward
189 | last_reward = 0.3
190 |
191 | if self.car.x < 10:# if car goes to left border of screen
192 | self.car.x = 10
193 | last_reward = -1
194 | if self.car.x > self.width - 10:# if car goes to right side border of screen
195 | self.car.x = self.width - 10
196 | last_reward = -1
197 | if self.car.y < 10:# if car goes to top border of screen
198 | self.car.y = 10
199 | last_reward = -1
200 | if self.car.y > self.height - 10:# if car goes to bottom border of screen
201 | self.car.y = self.height - 10
202 | last_reward = -1
203 |
204 | if distance < 100:
205 | goal_x = self.width-goal_x
206 | goal_y = self.height-goal_y
207 | # updating last distance to goal
208 | last_distance = distance
209 |
210 |
211 | class MyPaintWidget(Widget):
212 |
213 | def on_touch_down(self, touch):
214 | global length, n_points, last_x, last_y
215 | with self.canvas:
216 | Color(0.6,0.5,0.1)
217 | d = 10.
218 | touch.ud['line'] = Line(points = (touch.x, touch.y), width = 10)
219 | last_x = int(touch.x)
220 | last_y = int(touch.y)
221 | n_points = 0
222 | length = 0
223 | sand[int(touch.x),int(touch.y)] = 1
224 |
225 | def on_touch_move(self, touch):
226 | global length, n_points, last_x, last_y
227 | if touch.button == 'left':
228 | touch.ud['line'].points += [touch.x, touch.y]
229 | x = int(touch.x)
230 | y = int(touch.y)
231 | length += np.sqrt(max((x - last_x)**2 + (y - last_y)**2, 2))
232 | n_points += 1.
233 | density = n_points/(length)
234 | touch.ud['line'].width = int(20 * density + 1)
235 | sand[int(touch.x) - 10 : int(touch.x) + 10, int(touch.y) - 10 : int(touch.y) + 10] = 1
236 | last_x = x
237 | last_y = y
238 |
239 |
240 | class CarApp(App):
241 |
242 | def build(self):
243 | parent = Game()
244 | parent.serve_car()
245 | Clock.schedule_interval(parent.update, 1.0/60.0)
246 | self.painter = MyPaintWidget()
247 | clearbtn = Button(text = 'clear')
248 | savebtn = Button(text = 'save', pos = (parent.width, 0))
249 | loadbtn = Button(text = 'load', pos = (2 * parent.width, 0))
250 | clearbtn.bind(on_release = self.clear_canvas)
251 | savebtn.bind(on_release = self.save)
252 | loadbtn.bind(on_release = self.load)
253 | parent.add_widget(self.painter)
254 | parent.add_widget(clearbtn)
255 | parent.add_widget(savebtn)
256 | parent.add_widget(loadbtn)
257 | return parent
258 |
259 | def clear_canvas(self, obj):
260 | global sand
261 | self.painter.canvas.clear()
262 | sand = np.zeros((length,width))
263 |
264 | def save(self, obj):
265 | print("saving brain...")
266 | brain.save()
267 | plt.plot(scores)
268 | plt.show()
269 |
270 | def load(self, obj):
271 | print("loading last saved brain...")
272 | brain.load()
273 |
274 | if __name__ == '__main__':
275 | CarApp().run()
276 |
--------------------------------------------------------------------------------
/last_brain.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/naderAsadi/Optimal-Path-Planning-Deep-Reinforcement-Learning/e38452c00f2f9e15bd8f6762c6e4e7a3111c85d7/last_brain.pth
--------------------------------------------------------------------------------
/output.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/naderAsadi/Optimal-Path-Planning-Deep-Reinforcement-Learning/e38452c00f2f9e15bd8f6762c6e4e7a3111c85d7/output.jpg
--------------------------------------------------------------------------------