├── README.md
├── LICENSE
├── .gitignore
├── potential field2.py
├── GSA.py
├── QL_path.py
└── DQN_PATH.py


/README.md:
--------------------------------------------------------------------------------
 1 | # Robot_path_planning
 2 | Robot path planning in static and dynamic environment (GSA, artificial potential field, QL, DDQL)  
 3 | 1) GSA.py  
 4 | Use Gravitational Search Algorithm to path planning with static obstacles.  
 5 | 2) potential field2.py  
 6 | Example path planning using artificial potential field algorithm in dynamic environment.
 7 | 3) QL_path.py  
 8 | Use Q-learning to path plannin in dynamic environment.  
 9 | Observation: terget and two dynamic obstacles.  
10 | 4) DQN_PATH.py  
11 | Create DDQN to path planning in dynamic environment.  
12 | WARNING!!!
13 | Stabilization alghorithm:  
14 | - two Q matrix,  
15 | - use Hubert loss function,  
16 | - use the range of reward values [0, 1]
17 | 
18 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 mn270
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/potential field2.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Example how use algorithm potential field for robotic path planning in dynamic environment.
  3 | """
  4 | import numpy as np
  5 | import matplotlib.pyplot as plt
  6 | import random
  7 | 
  8 | # Parameters
  9 | 
 10 | 
 11 | KP = 2.0  # attractive potential gain
 12 | ETA = 100.0  # repulsive potential gain
 13 | AREA_WIDTH = 30.0  # potential area width [m]
 14 | agents = 10
 15 | 
 16 | 
 17 | # show_animation = True
 18 | 
 19 | def calc_attractive_potential(x, y, gx, gy):
 20 |     return 0.5 * KP * np.hypot(x - gx, y - gy)
 21 | 
 22 | 
 23 | def calc_repulsive_potential(x, y, ox, oy, rr):
 24 |     # search nearest obstacle
 25 |     minid = -1
 26 |     dmin = float("inf")
 27 |     for i, _ in enumerate(ox):
 28 |         d = np.hypot(x - ox[i], y - oy[i])
 29 |         if dmin >= d:
 30 |             dmin = d
 31 |             minid = i
 32 | 
 33 |     # calc repulsive potential
 34 |     dq = np.hypot(x - ox[minid], y - oy[minid])
 35 | 
 36 |     if dq <= rr:
 37 |         if dq <= 0.1:
 38 |             dq = 0.1
 39 | 
 40 |         return 0.5 * ETA * (1.0 / dq - 1.0 / rr) ** 2
 41 |     else:
 42 |         return 0.0
 43 | 
 44 | 
 45 | def get_motion_model():
 46 |     motion = [[1, 0],
 47 |               [0, 1],
 48 |               [-1, 0],
 49 |               [0, -1],
 50 |               [-1, -1],
 51 |               [-1, 1],
 52 |               [1, -1],
 53 |               [1, 1]]
 54 | 
 55 |     return motion
 56 | 
 57 | 
 58 | def draw_heatmap(data):
 59 |     data = np.array(data).T
 60 |     plt.pcolor(data, vmax=100.0, cmap=plt.cm.Blues)
 61 | 
 62 | 
 63 | class people():
 64 |     def __init__(self, sx, sy, gx, gy, reso, rr, str):
 65 |         self.ix = sx
 66 |         self.iy = sy
 67 |         self.sx = sx
 68 |         self.sy = sy
 69 |         self.gx = gx
 70 |         self.gy = gy
 71 |         self.reso = reso
 72 |         self.rr = rr
 73 |         self.d = np.hypot(self.sx - self.gx, self.sy - self.gy)
 74 |         self.str = str
 75 | 
 76 |     def calc_potential_field(self, ox, oy, ):
 77 |         minx = - AREA_WIDTH
 78 |         miny = - AREA_WIDTH
 79 |         maxx = AREA_WIDTH
 80 |         maxy = AREA_WIDTH
 81 |         xw = int(round((maxx - minx) / self.reso))
 82 |         yw = int(round(np.divide(np.subtract(maxy, miny), self.reso)))
 83 | 
 84 |         # calc each potential
 85 |         pmap = [[0.0 for i in range(yw)] for i in range(xw)]
 86 | 
 87 |         for ix in range(xw):
 88 |             x = np.add(np.multiply(ix, self.reso), minx)
 89 | 
 90 |             for iy in range(yw):
 91 |                 y = np.add(np.multiply(iy, self.reso), miny)
 92 |                 ug = calc_attractive_potential(x, y, self.gx, self.gy)
 93 |                 uo = calc_repulsive_potential(x, y, ox, oy, self.rr)
 94 |                 uf = np.add(ug, uo)
 95 |                 pmap[ix][iy] = uf
 96 | 
 97 |         return pmap, minx, miny
 98 | 
 99 |     def potential_field_planning(self, ox, oy, show_animation):
100 | 
101 |         # calc potential field
102 |         pmap, minx, miny = self.calc_potential_field(ox, oy)
103 | 
104 |         # search path
105 | 
106 |         self.ix = round((self.sx - minx) / self.reso)
107 |         self.iy = round((self.sy - miny) / self.reso)
108 |         gix = round((self.gx - minx) / self.reso)
109 |         giy = round((self.gy - miny) / self.reso)
110 | 
111 |         if show_animation:
112 |             draw_heatmap(pmap)
113 |             plt.plot(self.ix, self.iy, "*k")
114 |             plt.plot(gix, giy, "*m")
115 | 
116 |         rx, ry = [self.sx], [self.sy]
117 |         self.potential(show_animation, ox, oy)
118 | 
119 |         return rx, ry
120 | 
121 |     def potential(self, show_animation, ox, oy):
122 |         motion = get_motion_model()
123 |         pmap, minx, miny = self.calc_potential_field(ox, oy)
124 |         if show_animation:
125 |             draw_heatmap(pmap)
126 |         if self.d >= self.reso:
127 |             minp = float("inf")
128 |             minix, miniy = -1, -1
129 |             for i, _ in enumerate(motion):
130 |                 inx = int(self.ix + motion[i][0] + random.random())
131 |                 iny = int(self.iy + motion[i][1] + random.random())
132 |                 if inx >= len(pmap) or iny >= len(pmap[0]):
133 |                     p = float("inf")  # outside area
134 |                 else:
135 |                     p = pmap[inx][iny]
136 |                 if minp > p:
137 |                     minp = p
138 |                     minix = inx
139 |                     miniy = iny
140 |             self.ix = minix
141 |             self.iy = miniy
142 |             xp = self.ix * self.reso + minx
143 |             yp = self.iy * self.reso + miny
144 |             self.d = np.hypot(self.gx - xp, self.gy - yp)
145 |             self.sx = xp
146 |             self.sy = yp
147 | 
148 |             #  if show_animation:
149 |             plt.plot(self.ix, self.iy, self.str)
150 |             plt.pause(0.000000001)
151 | 
152 |         return 0
153 | 
154 | 
155 | def main():
156 |     print("potential_field_planning start")
157 | 
158 |     sx = -28.0  # start x position [m]
159 |     sy = -28.0  # start y positon [m]
160 |     gx = 28.0  # goal x position [m]
161 |     gy = 28.0  # goal y position [m]
162 |     grid_size = 0.5  # potential grid size [m]
163 |     robot_radius = 5.0  # robot radius [m]
164 |     viel = random.randint(20, 40)
165 |     show = True
166 |     ox = []
167 |     oy = []
168 |     ox_ = []
169 |     oy_ = []
170 |     agent = []
171 |     str = [".g", ".c", ".m", ".y", ".k"]
172 | 
173 |     robot = people(sx, sy, gx, gy, grid_size, robot_radius, ".r")
174 |     for _ in range(agents):
175 |         agent.append(
176 |             people(random.randint(-26, 26), random.randint(-26, 26), random.randint(-26, 26), random.randint(-26, 26),
177 |                    grid_size, robot_radius, str[random.randint(0, 4)]))
178 |     ox.append(robot.ix)
179 |     oy.append(robot.iy)
180 |     for i in range(agents):
181 |         ox.append(agent[i].ix)
182 |         oy.append(agent[i].iy)
183 | 
184 |     for _ in range(viel):
185 |         ox.append(random.randint(-26, 26))
186 |         oy.append(random.randint(-26, 26))
187 | 
188 |     for i in range(len(ox)):
189 |         ox_.append(ox[i])
190 |         oy_.append(oy[i])
191 | 
192 |     # if show_animation:
193 |     plt.grid(True)
194 |     plt.axis("equal")
195 | 
196 |     # path generation
197 |     ox_[0] = 0
198 |     oy_[0] = 0
199 |     robot.potential_field_planning(ox_, oy_, show)
200 |     ox[0] = robot.sx
201 |     oy[0] = robot.sy
202 |     ox_[0] = ox[0]
203 |     oy_[0] = oy[0]
204 |     for i in range(agents):
205 |         ox_[i + 1] = 0
206 |         oy_[i + 1] = 0
207 |         agent[i].potential_field_planning(ox_, oy_, False)
208 |         ox[i + 1] = agent[i].sx
209 |         oy[i + 1] = agent[i].sy
210 |         ox_[i + 1] = ox[i + 1]
211 |         oy_[i + 1] = oy[i + 1]
212 |     while robot.d >= robot.reso:
213 |         ox_[0] = 0
214 |         oy_[0] = 0
215 |         ox[0] = robot.sx
216 |         oy[0] = robot.sy
217 |         robot.potential(show, ox_, oy_)
218 |         ox_[0] = ox[0]
219 |         oy_[0] = oy[0]
220 |         for i in range(agents):
221 |             ox[i + 1] = agent[i].sx
222 |             oy[i + 1] = agent[i].sy
223 |             ox_[i + 1] = 0
224 |             oy_[i + 1] = 0
225 |             agent[i].potential(False, ox_, oy_)
226 |             ox_[i + 1] = ox[i + 1]
227 |             oy_[i + 1] = oy[i + 1]
228 |     #  if show_animation:
229 |     print("Goal!!")
230 |     plt.show()
231 | 
232 | 
233 | if __name__ == '__main__':
234 |     print(__file__ + " start!!")
235 |     main()
236 |     print(__file__ + " Done!!")
237 | 


--------------------------------------------------------------------------------
/GSA.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Example how to use a Gravitational Search Algorithm to path planning
  3 | """
  4 | 
  5 | import numpy as np
  6 | import matplotlib.pyplot as plt
  7 | import random
  8 | import math
  9 | 
 10 | # Parameters
 11 | N = 30  # number of agents
 12 | G0 = 20  # G0 gravity 50
 13 | G = G0
 14 | Eps = 0.001  # small constant ???
 15 | Xs = -45  # START position
 16 | Ys = -45
 17 | Xg = 45  # GOAL position
 18 | Yg = 45
 19 | alfa = 1000  # penalty
 20 | Area = 100  # size area
 21 | robot_radius = 4.0
 22 | Episodes = 600.0  # simulation time
 23 | zet = 0.1  # zeta parameter exp to change gravity
 24 | SHOW = 3000
 25 | len = Area / 2
 26 | xmax = ymax = len
 27 | xmin = ymin = -len
 28 | 
 29 | 
 30 | class obtacle():
 31 |     """
 32 |     Create static obstacles
 33 |     """
 34 |     def __init__(self, x, y):
 35 |         self.x = x
 36 |         self.y = y
 37 | 
 38 | 
 39 | class agent():
 40 |     """
 41 |     Create agent (one planet, object, mass)
 42 |     """
 43 |     def __init__(self, x, y):
 44 |         self.x = []
 45 |         self.y = []
 46 |         self.fit_mat = []
 47 |         self.m = 0.0
 48 |         self.M = 0.0
 49 |         self.x.append(Xs)
 50 |         self.y.append(Ys)
 51 |         self.x.append(Xs + random.randint(-3, 3))
 52 |         self.y.append(Ys + random.randint(-3, 3))
 53 |         self.fit = math.sqrt(pow(Xg - Xs, 2) + pow(Yg - Ys, 2))
 54 |         self.Vx = random.randint(-3, 3)
 55 |         self.Vy = random.randint(-3, 3)
 56 |         self.ax = 0.0
 57 |         self.ay = 0.0
 58 |         self.Fx = 0.0
 59 |         self.Fy = 0.0
 60 |         self.Succes = 0
 61 |         self.colision = False
 62 | 
 63 |     def calc_position(self, t):
 64 |         """
 65 |         Calculate actual position 
 66 |         :param t:
 67 |         :return:
 68 |         """
 69 |         x = math.ceil(self.x[t] + self.Vx)
 70 |         y = math.ceil(self.y[t] + self.Vy)
 71 |         x += random.randint(-1, 1)
 72 |         y += random.randint(-1, 1)
 73 |         if (self.fit < 5):
 74 |             self.y.append(Yg)
 75 |             self.x.append(Xg)
 76 |             self.Succes = 1
 77 | 
 78 |         else:
 79 |             self.y.append(y)
 80 |             self.x.append(x)
 81 | 
 82 |     def calc_fit(self, obtacles, t):
 83 |         colision = False
 84 |         if ((self.x[t] - self.x[t - 1]) == 0):
 85 |             A = 1
 86 |             B = self.x[t]
 87 |             C = 0
 88 |         else:
 89 |             A = (self.y[t] - self.y[t - 1]) / (self.x[t] - self.x[t - 1])
 90 |             C = self.y[t - 1] - self.x[t - 1] * (self.y[t] - self.y[t - 1]) / (self.x[t] - self.x[t - 1])
 91 |             B = -1
 92 |         d = math.sqrt(pow(A, 2) + pow(B, 2))
 93 |         if (self.x[t - 1] > self.x[t]):
 94 |             Xup = self.x[t - 1]
 95 |             Xdown = self.x[t]
 96 |         else:
 97 |             Xup = self.x[t]
 98 |             Xdown = self.x[t - 1]
 99 |         if (self.y[t - 1] > self.y[t]):
100 |             Yup = self.y[t - 1]
101 |             Ydown = self.x[t]
102 |         else:
103 |             Yup = self.y[t]
104 |             Ydown = self.y[t - 1]
105 |         for i, _ in enumerate(obtacles):
106 |             if ((abs(A * obtacles[i].x + B * obtacles[i].y + C) / d < robot_radius / 2) and (
107 |                     obtacles[i].x < Xup + robot_radius / 2) and (obtacles[i].x > Xdown - robot_radius / 2) and (
108 |                     obtacles[i].y < Yup + robot_radius / 2) and (obtacles[i].y > Ydown - robot_radius / 2)):
109 |                 colision = True
110 |         if (colision):
111 |             self.fit = math.sqrt(pow(Xg - self.x[t], 2) + pow(Yg - self.y[t], 2)) + alfa
112 |             self.colision = True
113 |         else:
114 |             self.fit = math.sqrt(pow(Xg - self.x[t], 2) + pow(Yg - self.y[t], 2))
115 |         self.fit_mat.append(self.fit)
116 | 
117 |     def calc_m(self, worst, bw):
118 |         k = self.fit - worst
119 |         if (k == 0):
120 |             k = -random.random()
121 |         self.m = k / bw
122 |         # self.m = k / N
123 | 
124 |     def calc_M(self, sum_m):
125 |         self.M = self.m / sum_m
126 | 
127 |     def calc_V(self):
128 |         self.Vx = random.random() * self.Vx + self.ax
129 |         self.Vy = random.random() * self.Vy + self.ay
130 | 
131 |     def calc_a(self):
132 |         self.ax = self.Fx / self.M
133 |         self.ay = self.Fy / self.M
134 | 
135 |     def calc_F(self, agents, i, t, limit_k):
136 |         self.Fx = 0
137 |         self.Fy = 0
138 |         for n in range(N):
139 |             if n != i:
140 |                 if (agents[n].fit < limit_k):
141 |                     dx = agents[n].x[t] - self.x[t]
142 |                     dy = agents[n].y[t] - self.y[t]
143 |                     E = math.sqrt(pow(dx, 2) + pow(dy, 2))
144 |                     GME = G * self.M * agents[n].M / (E + Eps)
145 |                     Fx = GME * dx
146 |                     self.Fx = random.random() * Fx + self.Fx
147 |                     Fy = GME * dy
148 |                     self.Fy = random.random() * Fy + self.Fy
149 | 
150 | 
151 | def path_planning(agents, obtacles, G):
152 |     SUCCES = 0
153 |     plt.plot(Xs, Ys, "*k")
154 |     plt.plot(Xg, Yg, "*m")
155 |     t = 0
156 |     while ((t < Episodes) and (N / 2 > SUCCES)):
157 |         t += 1
158 |         SUCCES = 0
159 |         worst = agents[0].fit
160 |         best = agents[0].fit
161 |         b = 0
162 |         for i in range(N):
163 |             agents[i].calc_fit(obtacles, t)
164 |             if (agents[i].fit > worst):
165 |                 worst = agents[i].fit
166 |             if (agents[i].fit < best):
167 |                 best = agents[i].fit
168 |                 b = i
169 |         SUM_m = 0
170 |         bw = best - worst
171 |         for i in range(N):
172 |             agents[i].calc_m(worst, bw)
173 |             SUM_m += agents[i].m
174 | 
175 |         for i in range(N):
176 |             agents[i].calc_M(SUM_m)
177 |             agents[i].calc_F(agents, i, t, worst + 3 * bw / 4)
178 |             agents[i].calc_a()
179 |             agents[i].calc_V()
180 | 
181 |         for i in range(N):
182 |             agents[i].calc_position(t)
183 |             SUCCES += agents[i].Succes
184 |             if (t > SHOW):
185 |                 plt.plot(agents[i].x[t - 1], agents[i].y[t - 1], ".w")
186 |                 plt.plot(agents[i].x[t], agents[i].y[t], ".b")
187 |                 plt.pause(0.1)
188 | 
189 |         G = G0 * math.exp(zet * -t / Episodes)
190 | 
191 |     D = []
192 |     index = []
193 |     fit_data = []
194 |     L = 0
195 |     for i in range(N):
196 |         d = 0
197 |         if (not agents[i].colision):
198 |             for n in range(t - 1):
199 |                 d += math.sqrt(
200 |                     pow(agents[i].x[n + 1] - agents[i].x[n], 2) + pow(agents[i].y[n + 1] - agents[i].y[n], 2))
201 |             D.append(math.ceil(d))
202 |             index.append(i)
203 |             L += 1
204 |     id = D.index(min(D))
205 |     id = index[id]
206 | 
207 |     for n in range(t):
208 |         s = 0
209 |         for i in range(L):
210 |             s += agents[index[i]].fit_mat[n]
211 |         fit_data.append(s / L)
212 | 
213 |     return id, t, fit_data, min(D)
214 | 
215 | 
216 | def main():
217 |     print("potential_field_planning start")
218 |     obtacles = []
219 |     agents = []
220 |     viel = random.randint(20, 30)
221 |     plt.grid(True)
222 |     plt.axis("equal")
223 | 
224 |     for i in range(viel):
225 |         obtacles.append(obtacle(random.randint(-len, len), random.randint(-len, len)))
226 |         plt.plot(obtacles[i].x, obtacles[i].y, "o")
227 | 
228 |     plt.grid(True)
229 |     plt.axis("equal")
230 |     for _ in range(N):
231 |         agents.append(agent(Xs, Ys))
232 | 
233 |     id, t, data, path = path_planning(agents, obtacles, G)
234 | 
235 |     plt.cla()
236 |     fig1 = plt.figure()
237 |     ax1 = fig1.add_subplot(1, 1, 1)
238 |     k = 9
239 |     for i in range(viel):
240 |         ax1.plot(obtacles[i].x, obtacles[i].y, "o")
241 |     ax1.plot(Xs, Ys, "*k")
242 |     ax1.plot(Xg, Yg, "*m")
243 | 
244 |     ax1.plot(agents[id].x, agents[id].y, linewidth=1)
245 |     ax1.set_title('path')
246 | 
247 |     fig = plt.figure()
248 |     ax = fig.add_subplot(1, 1, 1)
249 |     ax.plot(agents[id].fit_mat, color='tab:blue', label='best_fit')
250 |     ax.plot(data, color='tab:orange', label='average_fit')
251 |     ax.set_title('fit function')
252 |     fig.legend(loc='up right')
253 | 
254 |     print(t)
255 |     print(path)
256 | 
257 |     while (1):
258 |         plt.show()
259 | 
260 | 
261 | if __name__ == '__main__':
262 |     print(__file__ + " start!!")
263 |     main()
264 | 
265 |     print(__file__ + " Done!!")
266 | 


--------------------------------------------------------------------------------
/QL_path.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Create path planner with Q-learning.
  3 | Precise description environment function, can be found in DQN_PATH.py
  4 | """
  5 | import numpy as np
  6 | from scipy.spatial.distance import pdist, squareform
  7 | import matplotlib.pyplot as plt
  8 | import scipy.integrate as integrate
  9 | import matplotlib.animation as animation
 10 | import random
 11 | from random import uniform
 12 | import math
 13 | import pickle
 14 | from matplotlib import style
 15 | import time
 16 | 
 17 | style.use("ggplot")
 18 | 
 19 | MOVE_PENALTY = 1
 20 | PEOPLE_PENALTY = 300
 21 | TARGET_REWARD = 40
 22 | epsilon = 2
 23 | EPS_DECAY = 0.9998  # Every episode will be epsilon*EPS_DECAY
 24 | SHOW_EVERY = 3000  # how often to play through env visually.
 25 | 
 26 | start_q_table = None  # None or Filename
 27 | 
 28 | LEARNING_RATE = 0.1
 29 | DISCOUNT = 0.95
 30 | 
 31 | AREA_WIDTH = 2
 32 | PEOPLES = 2
 33 | MAX_FRAMES = 50
 34 | RADIUS = 1
 35 | START_X = -1
 36 | START_Y = -1
 37 | EPISODE = 0
 38 | peoples_x = []
 39 | peoples_y = []
 40 | peoples = []
 41 | points_rx = []
 42 | points_ry = []
 43 | points_tx = []
 44 | points_ty = []
 45 | 
 46 | 
 47 | class robot():
 48 |     def __init__(self):
 49 |         self.x = 0
 50 |         self.y = 0
 51 |         self.tx = 0
 52 |         self.ty = 0
 53 |         self.peoples_array = []
 54 |         self.map = []
 55 |         self.ep = 0
 56 |         self.eps = epsilon
 57 | 
 58 |     def sub(self):
 59 |         observe = [(int(self.x - self.tx), int(self.y - self.ty))]
 60 |         for i in range(PEOPLES):
 61 |             observe1 = (int(self.x - self.peoples_array[i].x), int(self.y - self.peoples_array[i].y))
 62 |             observe.insert(i + 1, observe1)
 63 |         return tuple(observe)
 64 | 
 65 |     def get_action_model(self, id):
 66 |         action = [[1, 0],
 67 |                   [0, 1],
 68 |                   [-1, 0],
 69 |                   [0, -1],
 70 |                   [-1, -1],
 71 |                   [-1, 1],
 72 |                   [1, -1],
 73 |                   [1, 1]]
 74 |         return action[id], id
 75 | 
 76 |     def calc_reward(self):
 77 |         d = []
 78 |         for i in range(PEOPLES):
 79 |             d.append(math.sqrt(pow(self.peoples_array[i].x - self.x, 2) + pow(self.peoples_array[i].y - self.y, 2)))
 80 |         D = min(d)
 81 | 
 82 |         if (D < RADIUS / 2):
 83 |             reward = -PEOPLE_PENALTY
 84 |         elif (math.sqrt(pow(self.tx - self.x, 2) + pow(self.ty - self.y, 2)) < RADIUS / 2):
 85 |             reward = TARGET_REWARD
 86 |         else:
 87 |             reward = -MOVE_PENALTY
 88 |         self.distance = math.sqrt(pow(self.tx - self.x, 2) + pow(self.ty - self.y, 2))
 89 |         return reward
 90 | 
 91 |     def get_state(self):
 92 |         pmap = [[0.0 for i in range(int(2 * AREA_WIDTH))] for i in range(int(2 * AREA_WIDTH))]
 93 |         self.map = pmap
 94 |         self.map[int((AREA_WIDTH + self.x) * 31 / 32)][int((AREA_WIDTH + self.y) * 31 / 32)] = 0.5
 95 |         self.map[int((AREA_WIDTH + self.tx) * 31 / 32)][int((AREA_WIDTH + self.ty) * 31 / 32)] = 1
 96 |         for i in range(PEOPLES):
 97 |             self.map[int((AREA_WIDTH + self.peoples_array[i].x) * 31 / 32)][
 98 |                 int((AREA_WIDTH + self.peoples_array[i].y) * 31 / 32)] = -1
 99 |         return self.map
100 | 
101 |     def reset(self):
102 |         self.x = START_X
103 |         self.y = START_Y
104 |         self.tx = random.uniform(-AREA_WIDTH, AREA_WIDTH)
105 |         self.ty = random.uniform(-AREA_WIDTH, AREA_WIDTH)
106 |         self.ep = 0
107 |         peoples = []
108 |         for _ in range(PEOPLES):
109 |             peoples.append(people())
110 |         self.peoples_array = peoples
111 |         state = self.get_state()
112 |         return state
113 | 
114 |     def step(self, action):
115 |         self.x = self.x + action[0] * 0.2
116 |         self.y = self.y + action[1] * 0.2
117 |         if (self.x > AREA_WIDTH):
118 |             self.x = AREA_WIDTH
119 |         if (self.x < -AREA_WIDTH):
120 |             self.x = -AREA_WIDTH
121 |         if (self.y > AREA_WIDTH):
122 |             self.y = AREA_WIDTH
123 |         if (self.y < -AREA_WIDTH):
124 |             self.y = -AREA_WIDTH
125 | 
126 |         for i in range(PEOPLES):
127 |             self.peoples_array[i].calculate_position()
128 |         state = self.get_state()
129 |         reward = self.calc_reward()
130 |         if ((reward == -PEOPLE_PENALTY) or (reward == TARGET_REWARD)):
131 |             terminated = True
132 |         else:
133 |             terminated = False
134 | 
135 |         return state, reward, terminated
136 | 
137 | 
138 | class people():
139 |     def __init__(self):
140 |         self.x = random.uniform(-AREA_WIDTH, AREA_WIDTH)
141 |         self.y = random.uniform(-AREA_WIDTH, AREA_WIDTH)
142 |         self.vx = random.uniform(-0.2, 0.2)
143 |         self.vy = random.uniform(-0.2, 0.2)
144 | 
145 |     def calculate_position(self):
146 |         x = self.x + self.vx
147 |         y = self.y + self.vy
148 |         if (x > AREA_WIDTH):
149 |             x = AREA_WIDTH
150 |             self.vx = random.uniform(-0.2, -0.0001)
151 |             self.vy = random.uniform(-0.2, 0.2)
152 |         if (x < -AREA_WIDTH):
153 |             x = -AREA_WIDTH
154 |             self.vx = random.uniform(0.0001, 0.2)
155 |             self.vy = random.uniform(-0.2, 0.2)
156 |         if (y > AREA_WIDTH):
157 |             y = AREA_WIDTH
158 |             self.vx = random.uniform(-0.2, 0.2)
159 |             self.vy = random.uniform(-0.2, -0.0001)
160 |         if (y < -AREA_WIDTH):
161 |             y = -AREA_WIDTH
162 |             self.vx = random.uniform(-0.2, 0.2)
163 |             self.vy = random.uniform(0.0001, 0.2)
164 |         self.x = x
165 |         self.y = y
166 | 
167 | 
168 | if start_q_table is None:
169 |     # initialize the q-table#
170 |     q_table = {}
171 |     for i in range(-2 * AREA_WIDTH, 2 * AREA_WIDTH + 1):
172 |         for ii in range(-2 * AREA_WIDTH, 2 * AREA_WIDTH + 1):
173 |             for iii in range(-2 * AREA_WIDTH, 2 * AREA_WIDTH + 1):
174 |                 for iiii in range(-2 * AREA_WIDTH, 2 * AREA_WIDTH + 1):
175 |                     for iiiii in range(-2 * AREA_WIDTH, 2 * AREA_WIDTH + 1):
176 |                         for iiiiii in range(-2 * AREA_WIDTH, 2 * AREA_WIDTH + 1):
177 |                             q_table[((i, ii), (iii, iiii), (iiiii, iiiiii))] = [np.random.uniform(-10, 0) for i in
178 |                                                                                 range(8)]
179 | 
180 | else:
181 |     with open(start_q_table, "rb") as f:
182 |         q_table = pickle.load(f)
183 | 
184 | episode_rewards = []
185 | 
186 | 
187 | def simulation(Simulation):
188 |     info = False
189 |     obs = env.sub()
190 |     # print(obs)
191 |     if (not Simulation):
192 |         if np.random.random() > env.eps:
193 |             # GET THE ACTION
194 |             action, action_id = env.get_action_model(np.argmax(q_table[obs]))
195 |         else:
196 |             action, action_id = env.get_action_model(np.random.randint(0, 8))
197 |         # Take the action!
198 |         _, reward, terminated = env.step(action)
199 | 
200 |         new_obs = env.sub()
201 |         max_future_q = np.max(q_table[new_obs])
202 |         current_q = q_table[obs][action_id]
203 | 
204 |         if terminated == True:
205 |             new_q = reward
206 |         else:
207 |             new_q = (1 - LEARNING_RATE) * current_q + LEARNING_RATE * (reward + DISCOUNT * max_future_q)
208 |         q_table[obs][action_id] = new_q
209 | 
210 |         env.ep += reward
211 |     else:
212 |         action, action_id = env.get_action_model(np.argmax(q_table[obs]))
213 |         _, reward, terminated = env.step(action)
214 |         env.ep += reward
215 |         if (terminated):
216 |             print(env.ep)
217 |     if terminated == True:
218 |         episode_rewards.append(env.ep)
219 |         env.eps *= EPS_DECAY
220 |         info = True
221 |         env.reset()
222 | 
223 |     return info
224 | 
225 | 
226 | def update_data(frame_number):
227 |     # action = env.get_action_model(random.randint(0,7))
228 |     # env.step(action)
229 |     simulation(True)
230 |     for i in range(PEOPLES):
231 |         peoples_x[i] = env.peoples_array[i].x
232 |         peoples_y[i] = env.peoples_array[i].y
233 | 
234 |     obj1.set_data(env.x, env.y)
235 |     obj2.set_data(env.tx, env.ty)
236 |     # new data to plot
237 |     obj.set_data(peoples_x, peoples_y)
238 |     return None
239 | 
240 | 
241 | def anim():
242 |     for i in range(PEOPLES):
243 |         peoples_x.append(env.peoples_array[i].x)
244 |         peoples_y.append(env.peoples_array[i].y)
245 | 
246 |     points_rx.append(env.x)
247 |     points_ry.append(env.y)
248 |     points_tx.append(env.tx)
249 |     points_ty.append(env.ty)
250 |     # set view limits
251 |     ax = plt.axes()
252 |     ax.set_ylim(-AREA_WIDTH, AREA_WIDTH)
253 |     ax.set_xlim(-AREA_WIDTH, AREA_WIDTH)
254 | 
255 |     # animation controller - have to be assigned to variable
256 |     anim = animation.FuncAnimation(fig, update_data, MAX_FRAMES, interval=50)  # , repeat=False)
257 |     plt.show()
258 | 
259 | 
260 | env = robot()
261 | env.reset()
262 | for i in range(1000000):
263 |     for ii in range(20):
264 |         while (1):
265 |             if (simulation(False)):
266 |                 break
267 |     print(i)
268 | 
269 | moving_avg = np.convolve(episode_rewards, np.ones((SHOW_EVERY,)) / SHOW_EVERY, mode='valid')
270 | plt.plot([i for i in range(len(moving_avg))], moving_avg)
271 | # plt.plot(episode_rewards)
272 | plt.ylabel(f"Reward ma")
273 | plt.xlabel("episode #")
274 | plt.show()
275 | 
276 | env.reset()
277 | fig = plt.figure()
278 | obj, = plt.plot([], [], 'ro')
279 | obj1, = plt.plot([], [], 'yo')
280 | obj2, = plt.plot([], [], 'bo')
281 | anim()
282 | 


--------------------------------------------------------------------------------
/DQN_PATH.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Realization DDQN robot path planning in dynamic environment
  3 | """
  4 | import numpy as np
  5 | from scipy.spatial.distance import pdist, squareform
  6 | import matplotlib.pyplot as plt
  7 | import scipy.integrate as integrate
  8 | import matplotlib.animation as animation
  9 | import random
 10 | from random import uniform
 11 | import math
 12 | # import keras.backend.tensorflow_backend as backend
 13 | from tensorflow.keras.models import Sequential
 14 | from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Activation, Flatten
 15 | from tensorflow.keras.optimizers import Adam
 16 | from tensorflow.keras.callbacks import TensorBoard
 17 | from tensorflow.keras.callbacks import History
 18 | import tensorflow as tf
 19 | from collections import deque
 20 | from tqdm import tqdm
 21 | from tensorflow.keras.callbacks import TensorBoard
 22 | import pickle
 23 | from matplotlib import style
 24 | from time import time
 25 | import os
 26 | 
 27 | # turn off warnings
 28 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
 29 | 
 30 | # For more repetitive results
 31 | random.seed(1)
 32 | np.random.seed(1)
 33 | tf.random.set_seed(1)
 34 | 
 35 | style.use("ggplot")
 36 | 
 37 | MOVE_PENALTY = 0.01
 38 | PEOPLE_PENALTY = 1
 39 | TARGET_REWARD = 1
 40 | epsilon = 1
 41 | EPSILON_DECAY = 0.99978
 42 | MIN_EPSILON = 0.001
 43 | SHOW_EVERY = 3000  # how often to play through env visually.
 44 | REPLAY_MEMORY_SIZE = 100_000  # How many last steps to keep for model training
 45 | MIN_REPLAY_MEMORY_SIZE = 1_000  # Minimum number of steps in a memory to start training
 46 | MINIBATCH_SIZE = 64
 47 | # How many steps (samples) to use for training
 48 | UPDATE_TARGET_EVERY = 5
 49 | MIN_REWARD = -200  # For model save
 50 | MEMORY_FRACTION = 0.20
 51 | 
 52 | DISCOUNT = 0.85
 53 | 
 54 | AREA_WIDTH = 5
 55 | PEOPLES = 2
 56 | MAX_FRAMES = 50
 57 | RADIUS = 1
 58 | START_X = -2
 59 | START_Y = -2
 60 | EPISODE = 0
 61 | peoples_x = []
 62 | peoples_y = []
 63 | peoples = []
 64 | points_rx = []
 65 | points_ry = []
 66 | points_tx = []
 67 | points_ty = []
 68 | 
 69 | history = History()
 70 | 
 71 | 
 72 | class robot():
 73 |     def __init__(self):
 74 |         self.x = 0
 75 |         self.y = 0
 76 |         self.tx = 0
 77 |         self.ty = 0
 78 |         self.peoples_array = []
 79 |         self.map = []
 80 |         self.ep = 0
 81 |         self.eps = epsilon
 82 |         self.steps = 0
 83 | 
 84 |     def sub(self):
 85 |         """
 86 |         Create vector of observation, distance between robot, obstacles and target.
 87 |         :return: (tuple []) observation
 88 |         """
 89 |         observe = [(int(self.x - self.tx), int(self.y - self.ty))]
 90 |         for i in range(PEOPLES):
 91 |             observe1 = (int(self.x - self.peoples_array[i].x), int(self.y - self.peoples_array[i].y))
 92 |             observe.insert(i + 1, observe1)
 93 |         return tuple(observe)
 94 | 
 95 |     def get_action_model(self, id):
 96 |         """
 97 |         Encode action vector (movement).
 98 |         :param id: (int) action_id
 99 |         :return: (int []) action, (int) action_id
100 |         """
101 |         action = [[1, 0],
102 |                   [0, 1],
103 |                   [-1, 0],
104 |                   [0, -1],
105 |                   [-1, -1],
106 |                   [-1, 1],
107 |                   [1, -1],
108 |                   [1, 1]]
109 |         return action[id], id
110 | 
111 |     def calc_reward(self):
112 |         """
113 |         Caculate reward, collision detect.
114 |         :return: (float) reward
115 |         """
116 |         collision = False
117 |         for i in range(PEOPLES):
118 |             if (self.peoples_array[i].x == self.x and self.peoples_array[i].y == self.y):
119 |                 collision = True
120 | 
121 |         if (collision):
122 |             reward = -PEOPLE_PENALTY
123 |         elif (self.x == int(self.tx) and self.y == int(
124 |                 self.ty)):
125 |             reward = TARGET_REWARD
126 |         else:
127 |             reward = -MOVE_PENALTY
128 |         self.distance = math.sqrt(pow(self.tx - self.x, 2) + pow(self.ty - self.y, 2))
129 |         return reward
130 | 
131 |     def get_state(self):
132 |         """
133 |         Create a map - image with 3 channels
134 |         1st channel - robot
135 |         2sd channel - target
136 |         3rd channel - obstacles
137 |         0 - empty position
138 |         1 - busy position
139 |         :return: (float []) map
140 |         """
141 |         pmap = np.zeros((2 * AREA_WIDTH + 1, 2 * AREA_WIDTH + 1, 3), dtype=float)
142 |         pmap[int(AREA_WIDTH + self.x)][int(AREA_WIDTH + self.y)] += [1, 0, 0]
143 |         pmap[int(AREA_WIDTH + self.tx)][int(AREA_WIDTH + self.ty)] += [0, 1, 0]
144 |         for i in range(PEOPLES):
145 |             pmap[int(AREA_WIDTH + self.peoples_array[i].x)][int(AREA_WIDTH + self.peoples_array[i].y)] += [0, 0, 1]
146 | 
147 |         self.map = pmap
148 |         return self.map
149 | 
150 |     def reset(self):
151 |         """
152 |         Reset environment
153 |         :return: (float []) map
154 |         """
155 |         self.x = random.randint(-AREA_WIDTH, AREA_WIDTH)
156 |         self.y = random.randint(-AREA_WIDTH, AREA_WIDTH)
157 |         self.tx = random.randint(-AREA_WIDTH, AREA_WIDTH)
158 |         self.ty = random.randint(-AREA_WIDTH, AREA_WIDTH)
159 |         if (math.sqrt(pow(self.tx - self.x, 2) + pow(self.ty - self.y, 2)) < 2):
160 |             self.tx = -self.x
161 |             self.ty = -self.y
162 |         self.ep = 0
163 |         peoples = []
164 |         for i in range(PEOPLES):
165 |             peoples.append(people())
166 |             if (math.sqrt(pow(peoples[i].x - self.x, 2) + pow(peoples[i].y - self.y, 2)) < 2):
167 |                 peoples[i].x = -self.x
168 |                 peoples[i].y = -self.y
169 |         self.peoples_array = peoples
170 |         state = self.get_state()
171 |         self.steps = 0
172 |         return state
173 | 
174 |     def step(self, action):
175 |         """
176 |         1) Change robot position.
177 |         2) Change obstacles position.
178 |         3) Calculate reward.
179 |         4) Check ending condition.
180 |         :param action: (int [])
181 |         :return: (float []) state, (float) reward, (bool) terminated
182 |         """
183 |         self.steps += 1
184 |         self.x = self.x + action[0]
185 |         self.y = self.y + action[1]
186 |         if (self.x > AREA_WIDTH):
187 |             self.x = AREA_WIDTH
188 |         if (self.x < -AREA_WIDTH):
189 |             self.x = -AREA_WIDTH
190 |         if (self.y > AREA_WIDTH):
191 |             self.y = AREA_WIDTH
192 |         if (self.y < -AREA_WIDTH):
193 |             self.y = -AREA_WIDTH
194 | 
195 |         for i in range(PEOPLES):
196 |             self.peoples_array[i].calculate_position()
197 |         state = self.get_state()
198 |         reward = self.calc_reward()
199 |         if ((reward == -PEOPLE_PENALTY) or (reward == TARGET_REWARD) or (self.steps >= 200)):
200 |             terminated = True
201 |         else:
202 |             terminated = False
203 | 
204 |         return state, reward, terminated
205 | 
206 | 
207 | class people():
208 |     """
209 |     Dynamic obstacles class (people)
210 |     """
211 |     def __init__(self):
212 |         self.x = random.randint(-AREA_WIDTH, AREA_WIDTH)
213 |         self.y = random.randint(-AREA_WIDTH, AREA_WIDTH)
214 |         self.vx = random.randint(-1, 1)  # random.uniform(-0.2,0.2)
215 |         self.vy = random.randint(-1, 1)  # random.uniform(-0.2,0.2)
216 | 
217 |     def calculate_position(self):
218 |         """
219 |         Calculate obstacles position
220 |         :return: NULL
221 |         """
222 |         x = self.x + self.vx
223 |         y = self.y + self.vy
224 |         if (x > AREA_WIDTH):
225 |             x = AREA_WIDTH
226 |             self.vx = -1  # random.uniform(-0.2,-0.0001)
227 |             self.vy = random.randint(-1, 1)  # random.uniform(-0.2, 0.2)
228 |         if (x < -AREA_WIDTH):
229 |             x = -AREA_WIDTH
230 |             self.vx = 1  # random.uniform(0.0001,0.2)
231 |             self.vy = random.randint(-1, 1)  # random.uniform(-0.2, 0.2)
232 |         if (y > AREA_WIDTH):
233 |             y = AREA_WIDTH
234 |             self.vx = random.randint(-1, 1)  # random.uniform(-0.2, 0.2)
235 |             self.vy = -1  # random.uniform(-0.2,-0.0001)
236 |         if (y < -AREA_WIDTH):
237 |             y = -AREA_WIDTH
238 |             self.vx = random.randint(-1, 1)  # random.uniform(-0.2, 0.2)
239 |             self.vy = 1  # random.uniform(0.0001,0.2)
240 |         self.x = x
241 |         self.y = y
242 | 
243 | 
244 | class DQNAgent:
245 |     """
246 |     Create DQN model and save history.
247 |     """
248 |     def __init__(self):
249 | 
250 |         # Main model
251 |         self.model = self.create_model()
252 |         self.history = None
253 |         # Target network
254 |         self.target_model = self.create_model()
255 |         self.target_model.set_weights(self.model.get_weights())
256 | 
257 |         # An array with last n steps for training
258 |         self.replay_memory = deque(maxlen=REPLAY_MEMORY_SIZE)
259 | 
260 |         # Used to count when to update target network with main network's weights
261 |         self.target_update_counter = 0
262 |         self.tensorboard = TensorBoard(log_dir="logs/{}".format(time()))
263 | 
264 |     def create_model(self):
265 |         """
266 |         Create model
267 |         :return: model
268 |         """
269 |         model = Sequential()
270 |         model.add(Conv2D(32, (5, 5), input_shape=(2 * AREA_WIDTH + 1, 2 * AREA_WIDTH + 1, 3), padding='same'))
271 |         model.add(Activation('relu'))
272 |         model.add(Conv2D(64, (3, 3), input_shape=(2 * AREA_WIDTH + 1, 2 * AREA_WIDTH + 1, 3), padding='same'))
273 |         model.add(Activation('relu'))
274 |         model.add(Conv2D(64, (2, 2), padding='same'))
275 |         model.add(Activation('relu'))
276 |         model.add(Flatten())
277 |         model.add(Dense(512))
278 |         model.add(Dense(8, activation='linear'))
279 |         model.compile(loss="huber_loss", optimizer='Adam', metrics=['accuracy'])
280 |         return model
281 | 
282 |     # Adds step's data to a memory replay array
283 |     # (observation space, action, reward, new observation space, done)
284 |     def update_replay_memory(self, transition):
285 |         self.replay_memory.append(transition)
286 | 
287 |     # Trains main network every step during episode
288 |     def train(self, terminal_state, step):
289 | 
290 |         # Start training only if certain number of samples is already saved
291 |         if len(self.replay_memory) < MIN_REPLAY_MEMORY_SIZE:
292 |             return
293 | 
294 |         # Get a minibatch of random samples from memory replay table
295 |         minibatch = random.sample(self.replay_memory, MINIBATCH_SIZE)
296 | 
297 |         # Get current states from minibatch, then query NN model for Q values
298 |         current_states = np.array([transition[0] for transition in minibatch])
299 |         current_qs_list = self.model.predict(current_states)
300 | 
301 |         # Get future states from minibatch, then query NN model for Q values
302 |         # When using target network, query it, otherwise main network should be queried
303 |         new_current_states = np.array([transition[3] for transition in minibatch])
304 |         future_qs_list = self.target_model.predict(new_current_states)
305 | 
306 |         X = []
307 |         y = []
308 | 
309 |         # Now we need to enumerate our batches
310 |         for index, (current_state, action, reward, new_current_state, done) in enumerate(minibatch):
311 | 
312 |             # If not a terminal state, get new q from future states, otherwise set it to 0
313 |             # almost like with Q Learning, but we use just part of equation here
314 |             if not done:
315 |                 max_future_q = np.max(future_qs_list[index])
316 |                 new_q = reward + DISCOUNT * max_future_q
317 |             else:
318 |                 new_q = reward
319 | 
320 |             # Update Q value for given state
321 |             current_qs = current_qs_list[index]
322 |             current_qs[action] = new_q
323 | 
324 |             # And append to our training data
325 |             X.append(current_state)
326 |             y.append(current_qs)
327 | 
328 |         # Fit on all samples as one batch, log only on terminal state
329 |         self.history = self.model.fit(np.array(X), np.array(y), batch_size=MINIBATCH_SIZE, verbose=0, shuffle=False,
330 |                                       callbacks=[self.tensorboard])
331 | 
332 |         # Update target network counter every episode
333 |         if terminal_state:
334 |             self.target_update_counter += 1
335 | 
336 |         # If counter reaches set value, update target network with weights of main network
337 |         if self.target_update_counter > UPDATE_TARGET_EVERY:
338 |             self.target_model.set_weights(self.model.get_weights())
339 |             self.target_update_counter = 0
340 |         accurancy.append(self.history.history['accuracy'][0])
341 |         loss.append(self.history.history['loss'][0])
342 | 
343 |     # Queries main network for Q values given current observation space (environment state)
344 |     def get_qs(self, state):
345 |         return self.model.predict(np.array(state).reshape(-1, *state.shape))[0]
346 | 
347 | 
348 | episode_rewards = []
349 | agent = DQNAgent()
350 | accurancy = []
351 | loss = []
352 | 
353 | 
354 | def simulation(Simulation):
355 |     info = False
356 |     obs = env.sub()
357 |     # print(obs)
358 |     if (not Simulation):
359 |         episode_reward = 0
360 |         step = 1
361 |         current_state = env.reset()
362 | 
363 |         # Reset flag and start iterating until episode ends
364 |         done = False
365 |         while not done:
366 | 
367 |             # This part stays mostly the same, the change is to query a model for Q values
368 |             if np.random.random() > env.eps:
369 |                 # Get action from Q table
370 |                 id = np.argmax(agent.get_qs(current_state))
371 |                 action, _ = env.get_action_model(id)
372 |             else:
373 |                 # Get random action
374 |                 id = np.random.randint(0, 8)
375 |                 action, _ = env.get_action_model(id)
376 | 
377 |             new_state, reward, done = env.step(action)
378 | 
379 |             # Transform new continous state to new discrete state and count reward
380 |             env.ep += reward
381 | 
382 |             # Every step we update replay memory and train main network
383 |             agent.update_replay_memory((current_state, id, reward, new_state, done))
384 |             agent.train(done, step)
385 | 
386 |             current_state = new_state
387 |             step += 1
388 | 
389 |         # Append episode reward to a list and log stats (every given number of episodes)
390 | 
391 |         # Decay epsilon
392 |         if env.eps > MIN_EPSILON:
393 |             env.eps *= EPSILON_DECAY
394 |             env.eps = max(MIN_EPSILON, env.eps)
395 |     else:
396 |         action, _ = env.get_action_model(np.argmax(agent.get_qs(env.map)))
397 |         new_state, reward, done = env.step(action)
398 | 
399 |     if done == True:
400 |         episode_rewards.append(env.ep)
401 |         env.eps *= EPSILON_DECAY
402 |         info = True
403 |         env.reset()
404 | 
405 |     return info
406 | 
407 | 
408 | def update_data(frame_number):
409 |     simulation(True)
410 |     for i in range(PEOPLES):
411 |         peoples_x[i] = env.peoples_array[i].x
412 |         peoples_y[i] = env.peoples_array[i].y
413 | 
414 |     obj1.set_data(env.x, env.y)
415 |     obj2.set_data(env.tx, env.ty)
416 |     # new data to plot
417 |     obj.set_data(peoples_x, peoples_y)
418 |     return None
419 | 
420 | 
421 | def anim(plt):
422 |     for i in range(PEOPLES):
423 |         peoples_x.append(env.peoples_array[i].x)
424 |         peoples_y.append(env.peoples_array[i].y)
425 | 
426 |     points_rx.append(env.x)
427 |     points_ry.append(env.y)
428 |     points_tx.append(env.tx)
429 |     points_ty.append(env.ty)
430 |     # set view limits
431 | 
432 |     ax = plt.axes()
433 |     ax.set_ylim(-AREA_WIDTH, AREA_WIDTH)
434 |     ax.set_xlim(-AREA_WIDTH, AREA_WIDTH)
435 | 
436 |     # animation controller - have to be assigned to variable
437 |     anim = animation.FuncAnimation(fig, update_data, MAX_FRAMES, interval=50)  # , repeat=False)
438 |     plt.show()
439 | 
440 | 
441 | env = robot()
442 | env.reset()
443 | eps_history = []
444 | k = 16
445 | for i in tqdm(range(1000), ascii=True, unit='episodes'):
446 |     for ii in tqdm(range(k), ascii=True, unit='step'):
447 |         while (1):
448 |             if (simulation(False)):
449 |                 break
450 |     print()
451 |     print('Epoch:', i)
452 |     print('Reward:', episode_rewards[i * k + ii])
453 |     print('Eps:', env.eps)
454 |     eps_history.append(env.eps)
455 |     if (len(accurancy) > 0):
456 |         print('accurancy:', accurancy[len(accurancy) - 1])
457 |         print('loss:', loss[len(loss) - 1])
458 | 
459 | scale = 100
460 | scale1 = 2000
461 | moving_avg = np.convolve(episode_rewards, np.ones((scale,)) / scale, mode='valid')
462 | plt.plot([i for i in range(len(moving_avg))], moving_avg)
463 | # plt.plot(episode_rewards)
464 | plt.ylabel(f"Reward ma")
465 | plt.xlabel("episode #")
466 | plt.show()
467 | 
468 | 
469 | # Plot results
470 | accurancy = np.array(accurancy) * 100
471 | accurancy = accurancy.tolist()
472 | moving_avg1 = np.convolve(accurancy, np.ones((scale1,)) / scale1, mode='valid')
473 | plt.plot([i for i in range(len(moving_avg1))], moving_avg1)
474 | # plt.plot(episode_rewards)
475 | plt.ylabel(f"Accurancy")
476 | plt.xlabel("episode #")
477 | plt.show()
478 | 
479 | moving_avg2 = np.convolve(loss, np.ones((scale1,)) / scale1, mode='valid')
480 | plt.plot([i for i in range(len(moving_avg2))], moving_avg2)
481 | # plt.plot(episode_rewards)
482 | plt.ylabel(f"loss")
483 | plt.xlabel("episode #")
484 | plt.show()
485 | 
486 | moving_avg3 = np.convolve(eps_history, np.ones((scale,)) / scale, mode='valid')
487 | plt.plot([i for i in range(len(moving_avg3))], moving_avg3)
488 | # plt.plot(episode_rewards)
489 | plt.ylabel(f"epsilone")
490 | plt.xlabel("episode #")
491 | plt.show()
492 | 
493 | env.reset()
494 | fig = plt.figure()
495 | obj, = plt.plot([], [], 'ro')
496 | obj1, = plt.plot([], [], 'yo')
497 | obj2, = plt.plot([], [], 'bo')
498 | anim(plt)
499 | 


--------------------------------------------------------------------------------