├── README.md ├── LICENSE ├── .gitignore ├── potential field2.py ├── GSA.py ├── QL_path.py └── DQN_PATH.py /README.md: -------------------------------------------------------------------------------- 1 | # Robot_path_planning 2 | Robot path planning in static and dynamic environment (GSA, artificial potential field, QL, DDQL) 3 | 1) GSA.py 4 | Use Gravitational Search Algorithm to path planning with static obstacles. 5 | 2) potential field2.py 6 | Example path planning using artificial potential field algorithm in dynamic environment. 7 | 3) QL_path.py 8 | Use Q-learning to path plannin in dynamic environment. 9 | Observation: terget and two dynamic obstacles. 10 | 4) DQN_PATH.py 11 | Create DDQN to path planning in dynamic environment. 12 | WARNING!!! 13 | Stabilization alghorithm: 14 | - two Q matrix, 15 | - use Hubert loss function, 16 | - use the range of reward values [0, 1] 17 | 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 mn270 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /potential field2.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example how use algorithm potential field for robotic path planning in dynamic environment. 3 | """ 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import random 7 | 8 | # Parameters 9 | 10 | 11 | KP = 2.0 # attractive potential gain 12 | ETA = 100.0 # repulsive potential gain 13 | AREA_WIDTH = 30.0 # potential area width [m] 14 | agents = 10 15 | 16 | 17 | # show_animation = True 18 | 19 | def calc_attractive_potential(x, y, gx, gy): 20 | return 0.5 * KP * np.hypot(x - gx, y - gy) 21 | 22 | 23 | def calc_repulsive_potential(x, y, ox, oy, rr): 24 | # search nearest obstacle 25 | minid = -1 26 | dmin = float("inf") 27 | for i, _ in enumerate(ox): 28 | d = np.hypot(x - ox[i], y - oy[i]) 29 | if dmin >= d: 30 | dmin = d 31 | minid = i 32 | 33 | # calc repulsive potential 34 | dq = np.hypot(x - ox[minid], y - oy[minid]) 35 | 36 | if dq <= rr: 37 | if dq <= 0.1: 38 | dq = 0.1 39 | 40 | return 0.5 * ETA * (1.0 / dq - 1.0 / rr) ** 2 41 | else: 42 | return 0.0 43 | 44 | 45 | def get_motion_model(): 46 | motion = [[1, 0], 47 | [0, 1], 48 | [-1, 0], 49 | [0, -1], 50 | [-1, -1], 51 | [-1, 1], 52 | [1, -1], 53 | [1, 1]] 54 | 55 | return motion 56 | 57 | 58 | def draw_heatmap(data): 59 | data = np.array(data).T 60 | plt.pcolor(data, vmax=100.0, cmap=plt.cm.Blues) 61 | 62 | 63 | class people(): 64 | def __init__(self, sx, sy, gx, gy, reso, rr, str): 65 | self.ix = sx 66 | self.iy = sy 67 | self.sx = sx 68 | self.sy = sy 69 | self.gx = gx 70 | self.gy = gy 71 | self.reso = reso 72 | self.rr = rr 73 | self.d = np.hypot(self.sx - self.gx, self.sy - self.gy) 74 | self.str = str 75 | 76 | def calc_potential_field(self, ox, oy, ): 77 | minx = - AREA_WIDTH 78 | miny = - AREA_WIDTH 79 | maxx = AREA_WIDTH 80 | maxy = AREA_WIDTH 81 | xw = int(round((maxx - minx) / self.reso)) 82 | yw = int(round(np.divide(np.subtract(maxy, miny), self.reso))) 83 | 84 | # calc each potential 85 | pmap = [[0.0 for i in range(yw)] for i in range(xw)] 86 | 87 | for ix in range(xw): 88 | x = np.add(np.multiply(ix, self.reso), minx) 89 | 90 | for iy in range(yw): 91 | y = np.add(np.multiply(iy, self.reso), miny) 92 | ug = calc_attractive_potential(x, y, self.gx, self.gy) 93 | uo = calc_repulsive_potential(x, y, ox, oy, self.rr) 94 | uf = np.add(ug, uo) 95 | pmap[ix][iy] = uf 96 | 97 | return pmap, minx, miny 98 | 99 | def potential_field_planning(self, ox, oy, show_animation): 100 | 101 | # calc potential field 102 | pmap, minx, miny = self.calc_potential_field(ox, oy) 103 | 104 | # search path 105 | 106 | self.ix = round((self.sx - minx) / self.reso) 107 | self.iy = round((self.sy - miny) / self.reso) 108 | gix = round((self.gx - minx) / self.reso) 109 | giy = round((self.gy - miny) / self.reso) 110 | 111 | if show_animation: 112 | draw_heatmap(pmap) 113 | plt.plot(self.ix, self.iy, "*k") 114 | plt.plot(gix, giy, "*m") 115 | 116 | rx, ry = [self.sx], [self.sy] 117 | self.potential(show_animation, ox, oy) 118 | 119 | return rx, ry 120 | 121 | def potential(self, show_animation, ox, oy): 122 | motion = get_motion_model() 123 | pmap, minx, miny = self.calc_potential_field(ox, oy) 124 | if show_animation: 125 | draw_heatmap(pmap) 126 | if self.d >= self.reso: 127 | minp = float("inf") 128 | minix, miniy = -1, -1 129 | for i, _ in enumerate(motion): 130 | inx = int(self.ix + motion[i][0] + random.random()) 131 | iny = int(self.iy + motion[i][1] + random.random()) 132 | if inx >= len(pmap) or iny >= len(pmap[0]): 133 | p = float("inf") # outside area 134 | else: 135 | p = pmap[inx][iny] 136 | if minp > p: 137 | minp = p 138 | minix = inx 139 | miniy = iny 140 | self.ix = minix 141 | self.iy = miniy 142 | xp = self.ix * self.reso + minx 143 | yp = self.iy * self.reso + miny 144 | self.d = np.hypot(self.gx - xp, self.gy - yp) 145 | self.sx = xp 146 | self.sy = yp 147 | 148 | # if show_animation: 149 | plt.plot(self.ix, self.iy, self.str) 150 | plt.pause(0.000000001) 151 | 152 | return 0 153 | 154 | 155 | def main(): 156 | print("potential_field_planning start") 157 | 158 | sx = -28.0 # start x position [m] 159 | sy = -28.0 # start y positon [m] 160 | gx = 28.0 # goal x position [m] 161 | gy = 28.0 # goal y position [m] 162 | grid_size = 0.5 # potential grid size [m] 163 | robot_radius = 5.0 # robot radius [m] 164 | viel = random.randint(20, 40) 165 | show = True 166 | ox = [] 167 | oy = [] 168 | ox_ = [] 169 | oy_ = [] 170 | agent = [] 171 | str = [".g", ".c", ".m", ".y", ".k"] 172 | 173 | robot = people(sx, sy, gx, gy, grid_size, robot_radius, ".r") 174 | for _ in range(agents): 175 | agent.append( 176 | people(random.randint(-26, 26), random.randint(-26, 26), random.randint(-26, 26), random.randint(-26, 26), 177 | grid_size, robot_radius, str[random.randint(0, 4)])) 178 | ox.append(robot.ix) 179 | oy.append(robot.iy) 180 | for i in range(agents): 181 | ox.append(agent[i].ix) 182 | oy.append(agent[i].iy) 183 | 184 | for _ in range(viel): 185 | ox.append(random.randint(-26, 26)) 186 | oy.append(random.randint(-26, 26)) 187 | 188 | for i in range(len(ox)): 189 | ox_.append(ox[i]) 190 | oy_.append(oy[i]) 191 | 192 | # if show_animation: 193 | plt.grid(True) 194 | plt.axis("equal") 195 | 196 | # path generation 197 | ox_[0] = 0 198 | oy_[0] = 0 199 | robot.potential_field_planning(ox_, oy_, show) 200 | ox[0] = robot.sx 201 | oy[0] = robot.sy 202 | ox_[0] = ox[0] 203 | oy_[0] = oy[0] 204 | for i in range(agents): 205 | ox_[i + 1] = 0 206 | oy_[i + 1] = 0 207 | agent[i].potential_field_planning(ox_, oy_, False) 208 | ox[i + 1] = agent[i].sx 209 | oy[i + 1] = agent[i].sy 210 | ox_[i + 1] = ox[i + 1] 211 | oy_[i + 1] = oy[i + 1] 212 | while robot.d >= robot.reso: 213 | ox_[0] = 0 214 | oy_[0] = 0 215 | ox[0] = robot.sx 216 | oy[0] = robot.sy 217 | robot.potential(show, ox_, oy_) 218 | ox_[0] = ox[0] 219 | oy_[0] = oy[0] 220 | for i in range(agents): 221 | ox[i + 1] = agent[i].sx 222 | oy[i + 1] = agent[i].sy 223 | ox_[i + 1] = 0 224 | oy_[i + 1] = 0 225 | agent[i].potential(False, ox_, oy_) 226 | ox_[i + 1] = ox[i + 1] 227 | oy_[i + 1] = oy[i + 1] 228 | # if show_animation: 229 | print("Goal!!") 230 | plt.show() 231 | 232 | 233 | if __name__ == '__main__': 234 | print(__file__ + " start!!") 235 | main() 236 | print(__file__ + " Done!!") 237 | -------------------------------------------------------------------------------- /GSA.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example how to use a Gravitational Search Algorithm to path planning 3 | """ 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | import random 8 | import math 9 | 10 | # Parameters 11 | N = 30 # number of agents 12 | G0 = 20 # G0 gravity 50 13 | G = G0 14 | Eps = 0.001 # small constant ??? 15 | Xs = -45 # START position 16 | Ys = -45 17 | Xg = 45 # GOAL position 18 | Yg = 45 19 | alfa = 1000 # penalty 20 | Area = 100 # size area 21 | robot_radius = 4.0 22 | Episodes = 600.0 # simulation time 23 | zet = 0.1 # zeta parameter exp to change gravity 24 | SHOW = 3000 25 | len = Area / 2 26 | xmax = ymax = len 27 | xmin = ymin = -len 28 | 29 | 30 | class obtacle(): 31 | """ 32 | Create static obstacles 33 | """ 34 | def __init__(self, x, y): 35 | self.x = x 36 | self.y = y 37 | 38 | 39 | class agent(): 40 | """ 41 | Create agent (one planet, object, mass) 42 | """ 43 | def __init__(self, x, y): 44 | self.x = [] 45 | self.y = [] 46 | self.fit_mat = [] 47 | self.m = 0.0 48 | self.M = 0.0 49 | self.x.append(Xs) 50 | self.y.append(Ys) 51 | self.x.append(Xs + random.randint(-3, 3)) 52 | self.y.append(Ys + random.randint(-3, 3)) 53 | self.fit = math.sqrt(pow(Xg - Xs, 2) + pow(Yg - Ys, 2)) 54 | self.Vx = random.randint(-3, 3) 55 | self.Vy = random.randint(-3, 3) 56 | self.ax = 0.0 57 | self.ay = 0.0 58 | self.Fx = 0.0 59 | self.Fy = 0.0 60 | self.Succes = 0 61 | self.colision = False 62 | 63 | def calc_position(self, t): 64 | """ 65 | Calculate actual position 66 | :param t: 67 | :return: 68 | """ 69 | x = math.ceil(self.x[t] + self.Vx) 70 | y = math.ceil(self.y[t] + self.Vy) 71 | x += random.randint(-1, 1) 72 | y += random.randint(-1, 1) 73 | if (self.fit < 5): 74 | self.y.append(Yg) 75 | self.x.append(Xg) 76 | self.Succes = 1 77 | 78 | else: 79 | self.y.append(y) 80 | self.x.append(x) 81 | 82 | def calc_fit(self, obtacles, t): 83 | colision = False 84 | if ((self.x[t] - self.x[t - 1]) == 0): 85 | A = 1 86 | B = self.x[t] 87 | C = 0 88 | else: 89 | A = (self.y[t] - self.y[t - 1]) / (self.x[t] - self.x[t - 1]) 90 | C = self.y[t - 1] - self.x[t - 1] * (self.y[t] - self.y[t - 1]) / (self.x[t] - self.x[t - 1]) 91 | B = -1 92 | d = math.sqrt(pow(A, 2) + pow(B, 2)) 93 | if (self.x[t - 1] > self.x[t]): 94 | Xup = self.x[t - 1] 95 | Xdown = self.x[t] 96 | else: 97 | Xup = self.x[t] 98 | Xdown = self.x[t - 1] 99 | if (self.y[t - 1] > self.y[t]): 100 | Yup = self.y[t - 1] 101 | Ydown = self.x[t] 102 | else: 103 | Yup = self.y[t] 104 | Ydown = self.y[t - 1] 105 | for i, _ in enumerate(obtacles): 106 | if ((abs(A * obtacles[i].x + B * obtacles[i].y + C) / d < robot_radius / 2) and ( 107 | obtacles[i].x < Xup + robot_radius / 2) and (obtacles[i].x > Xdown - robot_radius / 2) and ( 108 | obtacles[i].y < Yup + robot_radius / 2) and (obtacles[i].y > Ydown - robot_radius / 2)): 109 | colision = True 110 | if (colision): 111 | self.fit = math.sqrt(pow(Xg - self.x[t], 2) + pow(Yg - self.y[t], 2)) + alfa 112 | self.colision = True 113 | else: 114 | self.fit = math.sqrt(pow(Xg - self.x[t], 2) + pow(Yg - self.y[t], 2)) 115 | self.fit_mat.append(self.fit) 116 | 117 | def calc_m(self, worst, bw): 118 | k = self.fit - worst 119 | if (k == 0): 120 | k = -random.random() 121 | self.m = k / bw 122 | # self.m = k / N 123 | 124 | def calc_M(self, sum_m): 125 | self.M = self.m / sum_m 126 | 127 | def calc_V(self): 128 | self.Vx = random.random() * self.Vx + self.ax 129 | self.Vy = random.random() * self.Vy + self.ay 130 | 131 | def calc_a(self): 132 | self.ax = self.Fx / self.M 133 | self.ay = self.Fy / self.M 134 | 135 | def calc_F(self, agents, i, t, limit_k): 136 | self.Fx = 0 137 | self.Fy = 0 138 | for n in range(N): 139 | if n != i: 140 | if (agents[n].fit < limit_k): 141 | dx = agents[n].x[t] - self.x[t] 142 | dy = agents[n].y[t] - self.y[t] 143 | E = math.sqrt(pow(dx, 2) + pow(dy, 2)) 144 | GME = G * self.M * agents[n].M / (E + Eps) 145 | Fx = GME * dx 146 | self.Fx = random.random() * Fx + self.Fx 147 | Fy = GME * dy 148 | self.Fy = random.random() * Fy + self.Fy 149 | 150 | 151 | def path_planning(agents, obtacles, G): 152 | SUCCES = 0 153 | plt.plot(Xs, Ys, "*k") 154 | plt.plot(Xg, Yg, "*m") 155 | t = 0 156 | while ((t < Episodes) and (N / 2 > SUCCES)): 157 | t += 1 158 | SUCCES = 0 159 | worst = agents[0].fit 160 | best = agents[0].fit 161 | b = 0 162 | for i in range(N): 163 | agents[i].calc_fit(obtacles, t) 164 | if (agents[i].fit > worst): 165 | worst = agents[i].fit 166 | if (agents[i].fit < best): 167 | best = agents[i].fit 168 | b = i 169 | SUM_m = 0 170 | bw = best - worst 171 | for i in range(N): 172 | agents[i].calc_m(worst, bw) 173 | SUM_m += agents[i].m 174 | 175 | for i in range(N): 176 | agents[i].calc_M(SUM_m) 177 | agents[i].calc_F(agents, i, t, worst + 3 * bw / 4) 178 | agents[i].calc_a() 179 | agents[i].calc_V() 180 | 181 | for i in range(N): 182 | agents[i].calc_position(t) 183 | SUCCES += agents[i].Succes 184 | if (t > SHOW): 185 | plt.plot(agents[i].x[t - 1], agents[i].y[t - 1], ".w") 186 | plt.plot(agents[i].x[t], agents[i].y[t], ".b") 187 | plt.pause(0.1) 188 | 189 | G = G0 * math.exp(zet * -t / Episodes) 190 | 191 | D = [] 192 | index = [] 193 | fit_data = [] 194 | L = 0 195 | for i in range(N): 196 | d = 0 197 | if (not agents[i].colision): 198 | for n in range(t - 1): 199 | d += math.sqrt( 200 | pow(agents[i].x[n + 1] - agents[i].x[n], 2) + pow(agents[i].y[n + 1] - agents[i].y[n], 2)) 201 | D.append(math.ceil(d)) 202 | index.append(i) 203 | L += 1 204 | id = D.index(min(D)) 205 | id = index[id] 206 | 207 | for n in range(t): 208 | s = 0 209 | for i in range(L): 210 | s += agents[index[i]].fit_mat[n] 211 | fit_data.append(s / L) 212 | 213 | return id, t, fit_data, min(D) 214 | 215 | 216 | def main(): 217 | print("potential_field_planning start") 218 | obtacles = [] 219 | agents = [] 220 | viel = random.randint(20, 30) 221 | plt.grid(True) 222 | plt.axis("equal") 223 | 224 | for i in range(viel): 225 | obtacles.append(obtacle(random.randint(-len, len), random.randint(-len, len))) 226 | plt.plot(obtacles[i].x, obtacles[i].y, "o") 227 | 228 | plt.grid(True) 229 | plt.axis("equal") 230 | for _ in range(N): 231 | agents.append(agent(Xs, Ys)) 232 | 233 | id, t, data, path = path_planning(agents, obtacles, G) 234 | 235 | plt.cla() 236 | fig1 = plt.figure() 237 | ax1 = fig1.add_subplot(1, 1, 1) 238 | k = 9 239 | for i in range(viel): 240 | ax1.plot(obtacles[i].x, obtacles[i].y, "o") 241 | ax1.plot(Xs, Ys, "*k") 242 | ax1.plot(Xg, Yg, "*m") 243 | 244 | ax1.plot(agents[id].x, agents[id].y, linewidth=1) 245 | ax1.set_title('path') 246 | 247 | fig = plt.figure() 248 | ax = fig.add_subplot(1, 1, 1) 249 | ax.plot(agents[id].fit_mat, color='tab:blue', label='best_fit') 250 | ax.plot(data, color='tab:orange', label='average_fit') 251 | ax.set_title('fit function') 252 | fig.legend(loc='up right') 253 | 254 | print(t) 255 | print(path) 256 | 257 | while (1): 258 | plt.show() 259 | 260 | 261 | if __name__ == '__main__': 262 | print(__file__ + " start!!") 263 | main() 264 | 265 | print(__file__ + " Done!!") 266 | -------------------------------------------------------------------------------- /QL_path.py: -------------------------------------------------------------------------------- 1 | """ 2 | Create path planner with Q-learning. 3 | Precise description environment function, can be found in DQN_PATH.py 4 | """ 5 | import numpy as np 6 | from scipy.spatial.distance import pdist, squareform 7 | import matplotlib.pyplot as plt 8 | import scipy.integrate as integrate 9 | import matplotlib.animation as animation 10 | import random 11 | from random import uniform 12 | import math 13 | import pickle 14 | from matplotlib import style 15 | import time 16 | 17 | style.use("ggplot") 18 | 19 | MOVE_PENALTY = 1 20 | PEOPLE_PENALTY = 300 21 | TARGET_REWARD = 40 22 | epsilon = 2 23 | EPS_DECAY = 0.9998 # Every episode will be epsilon*EPS_DECAY 24 | SHOW_EVERY = 3000 # how often to play through env visually. 25 | 26 | start_q_table = None # None or Filename 27 | 28 | LEARNING_RATE = 0.1 29 | DISCOUNT = 0.95 30 | 31 | AREA_WIDTH = 2 32 | PEOPLES = 2 33 | MAX_FRAMES = 50 34 | RADIUS = 1 35 | START_X = -1 36 | START_Y = -1 37 | EPISODE = 0 38 | peoples_x = [] 39 | peoples_y = [] 40 | peoples = [] 41 | points_rx = [] 42 | points_ry = [] 43 | points_tx = [] 44 | points_ty = [] 45 | 46 | 47 | class robot(): 48 | def __init__(self): 49 | self.x = 0 50 | self.y = 0 51 | self.tx = 0 52 | self.ty = 0 53 | self.peoples_array = [] 54 | self.map = [] 55 | self.ep = 0 56 | self.eps = epsilon 57 | 58 | def sub(self): 59 | observe = [(int(self.x - self.tx), int(self.y - self.ty))] 60 | for i in range(PEOPLES): 61 | observe1 = (int(self.x - self.peoples_array[i].x), int(self.y - self.peoples_array[i].y)) 62 | observe.insert(i + 1, observe1) 63 | return tuple(observe) 64 | 65 | def get_action_model(self, id): 66 | action = [[1, 0], 67 | [0, 1], 68 | [-1, 0], 69 | [0, -1], 70 | [-1, -1], 71 | [-1, 1], 72 | [1, -1], 73 | [1, 1]] 74 | return action[id], id 75 | 76 | def calc_reward(self): 77 | d = [] 78 | for i in range(PEOPLES): 79 | d.append(math.sqrt(pow(self.peoples_array[i].x - self.x, 2) + pow(self.peoples_array[i].y - self.y, 2))) 80 | D = min(d) 81 | 82 | if (D < RADIUS / 2): 83 | reward = -PEOPLE_PENALTY 84 | elif (math.sqrt(pow(self.tx - self.x, 2) + pow(self.ty - self.y, 2)) < RADIUS / 2): 85 | reward = TARGET_REWARD 86 | else: 87 | reward = -MOVE_PENALTY 88 | self.distance = math.sqrt(pow(self.tx - self.x, 2) + pow(self.ty - self.y, 2)) 89 | return reward 90 | 91 | def get_state(self): 92 | pmap = [[0.0 for i in range(int(2 * AREA_WIDTH))] for i in range(int(2 * AREA_WIDTH))] 93 | self.map = pmap 94 | self.map[int((AREA_WIDTH + self.x) * 31 / 32)][int((AREA_WIDTH + self.y) * 31 / 32)] = 0.5 95 | self.map[int((AREA_WIDTH + self.tx) * 31 / 32)][int((AREA_WIDTH + self.ty) * 31 / 32)] = 1 96 | for i in range(PEOPLES): 97 | self.map[int((AREA_WIDTH + self.peoples_array[i].x) * 31 / 32)][ 98 | int((AREA_WIDTH + self.peoples_array[i].y) * 31 / 32)] = -1 99 | return self.map 100 | 101 | def reset(self): 102 | self.x = START_X 103 | self.y = START_Y 104 | self.tx = random.uniform(-AREA_WIDTH, AREA_WIDTH) 105 | self.ty = random.uniform(-AREA_WIDTH, AREA_WIDTH) 106 | self.ep = 0 107 | peoples = [] 108 | for _ in range(PEOPLES): 109 | peoples.append(people()) 110 | self.peoples_array = peoples 111 | state = self.get_state() 112 | return state 113 | 114 | def step(self, action): 115 | self.x = self.x + action[0] * 0.2 116 | self.y = self.y + action[1] * 0.2 117 | if (self.x > AREA_WIDTH): 118 | self.x = AREA_WIDTH 119 | if (self.x < -AREA_WIDTH): 120 | self.x = -AREA_WIDTH 121 | if (self.y > AREA_WIDTH): 122 | self.y = AREA_WIDTH 123 | if (self.y < -AREA_WIDTH): 124 | self.y = -AREA_WIDTH 125 | 126 | for i in range(PEOPLES): 127 | self.peoples_array[i].calculate_position() 128 | state = self.get_state() 129 | reward = self.calc_reward() 130 | if ((reward == -PEOPLE_PENALTY) or (reward == TARGET_REWARD)): 131 | terminated = True 132 | else: 133 | terminated = False 134 | 135 | return state, reward, terminated 136 | 137 | 138 | class people(): 139 | def __init__(self): 140 | self.x = random.uniform(-AREA_WIDTH, AREA_WIDTH) 141 | self.y = random.uniform(-AREA_WIDTH, AREA_WIDTH) 142 | self.vx = random.uniform(-0.2, 0.2) 143 | self.vy = random.uniform(-0.2, 0.2) 144 | 145 | def calculate_position(self): 146 | x = self.x + self.vx 147 | y = self.y + self.vy 148 | if (x > AREA_WIDTH): 149 | x = AREA_WIDTH 150 | self.vx = random.uniform(-0.2, -0.0001) 151 | self.vy = random.uniform(-0.2, 0.2) 152 | if (x < -AREA_WIDTH): 153 | x = -AREA_WIDTH 154 | self.vx = random.uniform(0.0001, 0.2) 155 | self.vy = random.uniform(-0.2, 0.2) 156 | if (y > AREA_WIDTH): 157 | y = AREA_WIDTH 158 | self.vx = random.uniform(-0.2, 0.2) 159 | self.vy = random.uniform(-0.2, -0.0001) 160 | if (y < -AREA_WIDTH): 161 | y = -AREA_WIDTH 162 | self.vx = random.uniform(-0.2, 0.2) 163 | self.vy = random.uniform(0.0001, 0.2) 164 | self.x = x 165 | self.y = y 166 | 167 | 168 | if start_q_table is None: 169 | # initialize the q-table# 170 | q_table = {} 171 | for i in range(-2 * AREA_WIDTH, 2 * AREA_WIDTH + 1): 172 | for ii in range(-2 * AREA_WIDTH, 2 * AREA_WIDTH + 1): 173 | for iii in range(-2 * AREA_WIDTH, 2 * AREA_WIDTH + 1): 174 | for iiii in range(-2 * AREA_WIDTH, 2 * AREA_WIDTH + 1): 175 | for iiiii in range(-2 * AREA_WIDTH, 2 * AREA_WIDTH + 1): 176 | for iiiiii in range(-2 * AREA_WIDTH, 2 * AREA_WIDTH + 1): 177 | q_table[((i, ii), (iii, iiii), (iiiii, iiiiii))] = [np.random.uniform(-10, 0) for i in 178 | range(8)] 179 | 180 | else: 181 | with open(start_q_table, "rb") as f: 182 | q_table = pickle.load(f) 183 | 184 | episode_rewards = [] 185 | 186 | 187 | def simulation(Simulation): 188 | info = False 189 | obs = env.sub() 190 | # print(obs) 191 | if (not Simulation): 192 | if np.random.random() > env.eps: 193 | # GET THE ACTION 194 | action, action_id = env.get_action_model(np.argmax(q_table[obs])) 195 | else: 196 | action, action_id = env.get_action_model(np.random.randint(0, 8)) 197 | # Take the action! 198 | _, reward, terminated = env.step(action) 199 | 200 | new_obs = env.sub() 201 | max_future_q = np.max(q_table[new_obs]) 202 | current_q = q_table[obs][action_id] 203 | 204 | if terminated == True: 205 | new_q = reward 206 | else: 207 | new_q = (1 - LEARNING_RATE) * current_q + LEARNING_RATE * (reward + DISCOUNT * max_future_q) 208 | q_table[obs][action_id] = new_q 209 | 210 | env.ep += reward 211 | else: 212 | action, action_id = env.get_action_model(np.argmax(q_table[obs])) 213 | _, reward, terminated = env.step(action) 214 | env.ep += reward 215 | if (terminated): 216 | print(env.ep) 217 | if terminated == True: 218 | episode_rewards.append(env.ep) 219 | env.eps *= EPS_DECAY 220 | info = True 221 | env.reset() 222 | 223 | return info 224 | 225 | 226 | def update_data(frame_number): 227 | # action = env.get_action_model(random.randint(0,7)) 228 | # env.step(action) 229 | simulation(True) 230 | for i in range(PEOPLES): 231 | peoples_x[i] = env.peoples_array[i].x 232 | peoples_y[i] = env.peoples_array[i].y 233 | 234 | obj1.set_data(env.x, env.y) 235 | obj2.set_data(env.tx, env.ty) 236 | # new data to plot 237 | obj.set_data(peoples_x, peoples_y) 238 | return None 239 | 240 | 241 | def anim(): 242 | for i in range(PEOPLES): 243 | peoples_x.append(env.peoples_array[i].x) 244 | peoples_y.append(env.peoples_array[i].y) 245 | 246 | points_rx.append(env.x) 247 | points_ry.append(env.y) 248 | points_tx.append(env.tx) 249 | points_ty.append(env.ty) 250 | # set view limits 251 | ax = plt.axes() 252 | ax.set_ylim(-AREA_WIDTH, AREA_WIDTH) 253 | ax.set_xlim(-AREA_WIDTH, AREA_WIDTH) 254 | 255 | # animation controller - have to be assigned to variable 256 | anim = animation.FuncAnimation(fig, update_data, MAX_FRAMES, interval=50) # , repeat=False) 257 | plt.show() 258 | 259 | 260 | env = robot() 261 | env.reset() 262 | for i in range(1000000): 263 | for ii in range(20): 264 | while (1): 265 | if (simulation(False)): 266 | break 267 | print(i) 268 | 269 | moving_avg = np.convolve(episode_rewards, np.ones((SHOW_EVERY,)) / SHOW_EVERY, mode='valid') 270 | plt.plot([i for i in range(len(moving_avg))], moving_avg) 271 | # plt.plot(episode_rewards) 272 | plt.ylabel(f"Reward ma") 273 | plt.xlabel("episode #") 274 | plt.show() 275 | 276 | env.reset() 277 | fig = plt.figure() 278 | obj, = plt.plot([], [], 'ro') 279 | obj1, = plt.plot([], [], 'yo') 280 | obj2, = plt.plot([], [], 'bo') 281 | anim() 282 | -------------------------------------------------------------------------------- /DQN_PATH.py: -------------------------------------------------------------------------------- 1 | """ 2 | Realization DDQN robot path planning in dynamic environment 3 | """ 4 | import numpy as np 5 | from scipy.spatial.distance import pdist, squareform 6 | import matplotlib.pyplot as plt 7 | import scipy.integrate as integrate 8 | import matplotlib.animation as animation 9 | import random 10 | from random import uniform 11 | import math 12 | # import keras.backend.tensorflow_backend as backend 13 | from tensorflow.keras.models import Sequential 14 | from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Activation, Flatten 15 | from tensorflow.keras.optimizers import Adam 16 | from tensorflow.keras.callbacks import TensorBoard 17 | from tensorflow.keras.callbacks import History 18 | import tensorflow as tf 19 | from collections import deque 20 | from tqdm import tqdm 21 | from tensorflow.keras.callbacks import TensorBoard 22 | import pickle 23 | from matplotlib import style 24 | from time import time 25 | import os 26 | 27 | # turn off warnings 28 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 29 | 30 | # For more repetitive results 31 | random.seed(1) 32 | np.random.seed(1) 33 | tf.random.set_seed(1) 34 | 35 | style.use("ggplot") 36 | 37 | MOVE_PENALTY = 0.01 38 | PEOPLE_PENALTY = 1 39 | TARGET_REWARD = 1 40 | epsilon = 1 41 | EPSILON_DECAY = 0.99978 42 | MIN_EPSILON = 0.001 43 | SHOW_EVERY = 3000 # how often to play through env visually. 44 | REPLAY_MEMORY_SIZE = 100_000 # How many last steps to keep for model training 45 | MIN_REPLAY_MEMORY_SIZE = 1_000 # Minimum number of steps in a memory to start training 46 | MINIBATCH_SIZE = 64 47 | # How many steps (samples) to use for training 48 | UPDATE_TARGET_EVERY = 5 49 | MIN_REWARD = -200 # For model save 50 | MEMORY_FRACTION = 0.20 51 | 52 | DISCOUNT = 0.85 53 | 54 | AREA_WIDTH = 5 55 | PEOPLES = 2 56 | MAX_FRAMES = 50 57 | RADIUS = 1 58 | START_X = -2 59 | START_Y = -2 60 | EPISODE = 0 61 | peoples_x = [] 62 | peoples_y = [] 63 | peoples = [] 64 | points_rx = [] 65 | points_ry = [] 66 | points_tx = [] 67 | points_ty = [] 68 | 69 | history = History() 70 | 71 | 72 | class robot(): 73 | def __init__(self): 74 | self.x = 0 75 | self.y = 0 76 | self.tx = 0 77 | self.ty = 0 78 | self.peoples_array = [] 79 | self.map = [] 80 | self.ep = 0 81 | self.eps = epsilon 82 | self.steps = 0 83 | 84 | def sub(self): 85 | """ 86 | Create vector of observation, distance between robot, obstacles and target. 87 | :return: (tuple []) observation 88 | """ 89 | observe = [(int(self.x - self.tx), int(self.y - self.ty))] 90 | for i in range(PEOPLES): 91 | observe1 = (int(self.x - self.peoples_array[i].x), int(self.y - self.peoples_array[i].y)) 92 | observe.insert(i + 1, observe1) 93 | return tuple(observe) 94 | 95 | def get_action_model(self, id): 96 | """ 97 | Encode action vector (movement). 98 | :param id: (int) action_id 99 | :return: (int []) action, (int) action_id 100 | """ 101 | action = [[1, 0], 102 | [0, 1], 103 | [-1, 0], 104 | [0, -1], 105 | [-1, -1], 106 | [-1, 1], 107 | [1, -1], 108 | [1, 1]] 109 | return action[id], id 110 | 111 | def calc_reward(self): 112 | """ 113 | Caculate reward, collision detect. 114 | :return: (float) reward 115 | """ 116 | collision = False 117 | for i in range(PEOPLES): 118 | if (self.peoples_array[i].x == self.x and self.peoples_array[i].y == self.y): 119 | collision = True 120 | 121 | if (collision): 122 | reward = -PEOPLE_PENALTY 123 | elif (self.x == int(self.tx) and self.y == int( 124 | self.ty)): 125 | reward = TARGET_REWARD 126 | else: 127 | reward = -MOVE_PENALTY 128 | self.distance = math.sqrt(pow(self.tx - self.x, 2) + pow(self.ty - self.y, 2)) 129 | return reward 130 | 131 | def get_state(self): 132 | """ 133 | Create a map - image with 3 channels 134 | 1st channel - robot 135 | 2sd channel - target 136 | 3rd channel - obstacles 137 | 0 - empty position 138 | 1 - busy position 139 | :return: (float []) map 140 | """ 141 | pmap = np.zeros((2 * AREA_WIDTH + 1, 2 * AREA_WIDTH + 1, 3), dtype=float) 142 | pmap[int(AREA_WIDTH + self.x)][int(AREA_WIDTH + self.y)] += [1, 0, 0] 143 | pmap[int(AREA_WIDTH + self.tx)][int(AREA_WIDTH + self.ty)] += [0, 1, 0] 144 | for i in range(PEOPLES): 145 | pmap[int(AREA_WIDTH + self.peoples_array[i].x)][int(AREA_WIDTH + self.peoples_array[i].y)] += [0, 0, 1] 146 | 147 | self.map = pmap 148 | return self.map 149 | 150 | def reset(self): 151 | """ 152 | Reset environment 153 | :return: (float []) map 154 | """ 155 | self.x = random.randint(-AREA_WIDTH, AREA_WIDTH) 156 | self.y = random.randint(-AREA_WIDTH, AREA_WIDTH) 157 | self.tx = random.randint(-AREA_WIDTH, AREA_WIDTH) 158 | self.ty = random.randint(-AREA_WIDTH, AREA_WIDTH) 159 | if (math.sqrt(pow(self.tx - self.x, 2) + pow(self.ty - self.y, 2)) < 2): 160 | self.tx = -self.x 161 | self.ty = -self.y 162 | self.ep = 0 163 | peoples = [] 164 | for i in range(PEOPLES): 165 | peoples.append(people()) 166 | if (math.sqrt(pow(peoples[i].x - self.x, 2) + pow(peoples[i].y - self.y, 2)) < 2): 167 | peoples[i].x = -self.x 168 | peoples[i].y = -self.y 169 | self.peoples_array = peoples 170 | state = self.get_state() 171 | self.steps = 0 172 | return state 173 | 174 | def step(self, action): 175 | """ 176 | 1) Change robot position. 177 | 2) Change obstacles position. 178 | 3) Calculate reward. 179 | 4) Check ending condition. 180 | :param action: (int []) 181 | :return: (float []) state, (float) reward, (bool) terminated 182 | """ 183 | self.steps += 1 184 | self.x = self.x + action[0] 185 | self.y = self.y + action[1] 186 | if (self.x > AREA_WIDTH): 187 | self.x = AREA_WIDTH 188 | if (self.x < -AREA_WIDTH): 189 | self.x = -AREA_WIDTH 190 | if (self.y > AREA_WIDTH): 191 | self.y = AREA_WIDTH 192 | if (self.y < -AREA_WIDTH): 193 | self.y = -AREA_WIDTH 194 | 195 | for i in range(PEOPLES): 196 | self.peoples_array[i].calculate_position() 197 | state = self.get_state() 198 | reward = self.calc_reward() 199 | if ((reward == -PEOPLE_PENALTY) or (reward == TARGET_REWARD) or (self.steps >= 200)): 200 | terminated = True 201 | else: 202 | terminated = False 203 | 204 | return state, reward, terminated 205 | 206 | 207 | class people(): 208 | """ 209 | Dynamic obstacles class (people) 210 | """ 211 | def __init__(self): 212 | self.x = random.randint(-AREA_WIDTH, AREA_WIDTH) 213 | self.y = random.randint(-AREA_WIDTH, AREA_WIDTH) 214 | self.vx = random.randint(-1, 1) # random.uniform(-0.2,0.2) 215 | self.vy = random.randint(-1, 1) # random.uniform(-0.2,0.2) 216 | 217 | def calculate_position(self): 218 | """ 219 | Calculate obstacles position 220 | :return: NULL 221 | """ 222 | x = self.x + self.vx 223 | y = self.y + self.vy 224 | if (x > AREA_WIDTH): 225 | x = AREA_WIDTH 226 | self.vx = -1 # random.uniform(-0.2,-0.0001) 227 | self.vy = random.randint(-1, 1) # random.uniform(-0.2, 0.2) 228 | if (x < -AREA_WIDTH): 229 | x = -AREA_WIDTH 230 | self.vx = 1 # random.uniform(0.0001,0.2) 231 | self.vy = random.randint(-1, 1) # random.uniform(-0.2, 0.2) 232 | if (y > AREA_WIDTH): 233 | y = AREA_WIDTH 234 | self.vx = random.randint(-1, 1) # random.uniform(-0.2, 0.2) 235 | self.vy = -1 # random.uniform(-0.2,-0.0001) 236 | if (y < -AREA_WIDTH): 237 | y = -AREA_WIDTH 238 | self.vx = random.randint(-1, 1) # random.uniform(-0.2, 0.2) 239 | self.vy = 1 # random.uniform(0.0001,0.2) 240 | self.x = x 241 | self.y = y 242 | 243 | 244 | class DQNAgent: 245 | """ 246 | Create DQN model and save history. 247 | """ 248 | def __init__(self): 249 | 250 | # Main model 251 | self.model = self.create_model() 252 | self.history = None 253 | # Target network 254 | self.target_model = self.create_model() 255 | self.target_model.set_weights(self.model.get_weights()) 256 | 257 | # An array with last n steps for training 258 | self.replay_memory = deque(maxlen=REPLAY_MEMORY_SIZE) 259 | 260 | # Used to count when to update target network with main network's weights 261 | self.target_update_counter = 0 262 | self.tensorboard = TensorBoard(log_dir="logs/{}".format(time())) 263 | 264 | def create_model(self): 265 | """ 266 | Create model 267 | :return: model 268 | """ 269 | model = Sequential() 270 | model.add(Conv2D(32, (5, 5), input_shape=(2 * AREA_WIDTH + 1, 2 * AREA_WIDTH + 1, 3), padding='same')) 271 | model.add(Activation('relu')) 272 | model.add(Conv2D(64, (3, 3), input_shape=(2 * AREA_WIDTH + 1, 2 * AREA_WIDTH + 1, 3), padding='same')) 273 | model.add(Activation('relu')) 274 | model.add(Conv2D(64, (2, 2), padding='same')) 275 | model.add(Activation('relu')) 276 | model.add(Flatten()) 277 | model.add(Dense(512)) 278 | model.add(Dense(8, activation='linear')) 279 | model.compile(loss="huber_loss", optimizer='Adam', metrics=['accuracy']) 280 | return model 281 | 282 | # Adds step's data to a memory replay array 283 | # (observation space, action, reward, new observation space, done) 284 | def update_replay_memory(self, transition): 285 | self.replay_memory.append(transition) 286 | 287 | # Trains main network every step during episode 288 | def train(self, terminal_state, step): 289 | 290 | # Start training only if certain number of samples is already saved 291 | if len(self.replay_memory) < MIN_REPLAY_MEMORY_SIZE: 292 | return 293 | 294 | # Get a minibatch of random samples from memory replay table 295 | minibatch = random.sample(self.replay_memory, MINIBATCH_SIZE) 296 | 297 | # Get current states from minibatch, then query NN model for Q values 298 | current_states = np.array([transition[0] for transition in minibatch]) 299 | current_qs_list = self.model.predict(current_states) 300 | 301 | # Get future states from minibatch, then query NN model for Q values 302 | # When using target network, query it, otherwise main network should be queried 303 | new_current_states = np.array([transition[3] for transition in minibatch]) 304 | future_qs_list = self.target_model.predict(new_current_states) 305 | 306 | X = [] 307 | y = [] 308 | 309 | # Now we need to enumerate our batches 310 | for index, (current_state, action, reward, new_current_state, done) in enumerate(minibatch): 311 | 312 | # If not a terminal state, get new q from future states, otherwise set it to 0 313 | # almost like with Q Learning, but we use just part of equation here 314 | if not done: 315 | max_future_q = np.max(future_qs_list[index]) 316 | new_q = reward + DISCOUNT * max_future_q 317 | else: 318 | new_q = reward 319 | 320 | # Update Q value for given state 321 | current_qs = current_qs_list[index] 322 | current_qs[action] = new_q 323 | 324 | # And append to our training data 325 | X.append(current_state) 326 | y.append(current_qs) 327 | 328 | # Fit on all samples as one batch, log only on terminal state 329 | self.history = self.model.fit(np.array(X), np.array(y), batch_size=MINIBATCH_SIZE, verbose=0, shuffle=False, 330 | callbacks=[self.tensorboard]) 331 | 332 | # Update target network counter every episode 333 | if terminal_state: 334 | self.target_update_counter += 1 335 | 336 | # If counter reaches set value, update target network with weights of main network 337 | if self.target_update_counter > UPDATE_TARGET_EVERY: 338 | self.target_model.set_weights(self.model.get_weights()) 339 | self.target_update_counter = 0 340 | accurancy.append(self.history.history['accuracy'][0]) 341 | loss.append(self.history.history['loss'][0]) 342 | 343 | # Queries main network for Q values given current observation space (environment state) 344 | def get_qs(self, state): 345 | return self.model.predict(np.array(state).reshape(-1, *state.shape))[0] 346 | 347 | 348 | episode_rewards = [] 349 | agent = DQNAgent() 350 | accurancy = [] 351 | loss = [] 352 | 353 | 354 | def simulation(Simulation): 355 | info = False 356 | obs = env.sub() 357 | # print(obs) 358 | if (not Simulation): 359 | episode_reward = 0 360 | step = 1 361 | current_state = env.reset() 362 | 363 | # Reset flag and start iterating until episode ends 364 | done = False 365 | while not done: 366 | 367 | # This part stays mostly the same, the change is to query a model for Q values 368 | if np.random.random() > env.eps: 369 | # Get action from Q table 370 | id = np.argmax(agent.get_qs(current_state)) 371 | action, _ = env.get_action_model(id) 372 | else: 373 | # Get random action 374 | id = np.random.randint(0, 8) 375 | action, _ = env.get_action_model(id) 376 | 377 | new_state, reward, done = env.step(action) 378 | 379 | # Transform new continous state to new discrete state and count reward 380 | env.ep += reward 381 | 382 | # Every step we update replay memory and train main network 383 | agent.update_replay_memory((current_state, id, reward, new_state, done)) 384 | agent.train(done, step) 385 | 386 | current_state = new_state 387 | step += 1 388 | 389 | # Append episode reward to a list and log stats (every given number of episodes) 390 | 391 | # Decay epsilon 392 | if env.eps > MIN_EPSILON: 393 | env.eps *= EPSILON_DECAY 394 | env.eps = max(MIN_EPSILON, env.eps) 395 | else: 396 | action, _ = env.get_action_model(np.argmax(agent.get_qs(env.map))) 397 | new_state, reward, done = env.step(action) 398 | 399 | if done == True: 400 | episode_rewards.append(env.ep) 401 | env.eps *= EPSILON_DECAY 402 | info = True 403 | env.reset() 404 | 405 | return info 406 | 407 | 408 | def update_data(frame_number): 409 | simulation(True) 410 | for i in range(PEOPLES): 411 | peoples_x[i] = env.peoples_array[i].x 412 | peoples_y[i] = env.peoples_array[i].y 413 | 414 | obj1.set_data(env.x, env.y) 415 | obj2.set_data(env.tx, env.ty) 416 | # new data to plot 417 | obj.set_data(peoples_x, peoples_y) 418 | return None 419 | 420 | 421 | def anim(plt): 422 | for i in range(PEOPLES): 423 | peoples_x.append(env.peoples_array[i].x) 424 | peoples_y.append(env.peoples_array[i].y) 425 | 426 | points_rx.append(env.x) 427 | points_ry.append(env.y) 428 | points_tx.append(env.tx) 429 | points_ty.append(env.ty) 430 | # set view limits 431 | 432 | ax = plt.axes() 433 | ax.set_ylim(-AREA_WIDTH, AREA_WIDTH) 434 | ax.set_xlim(-AREA_WIDTH, AREA_WIDTH) 435 | 436 | # animation controller - have to be assigned to variable 437 | anim = animation.FuncAnimation(fig, update_data, MAX_FRAMES, interval=50) # , repeat=False) 438 | plt.show() 439 | 440 | 441 | env = robot() 442 | env.reset() 443 | eps_history = [] 444 | k = 16 445 | for i in tqdm(range(1000), ascii=True, unit='episodes'): 446 | for ii in tqdm(range(k), ascii=True, unit='step'): 447 | while (1): 448 | if (simulation(False)): 449 | break 450 | print() 451 | print('Epoch:', i) 452 | print('Reward:', episode_rewards[i * k + ii]) 453 | print('Eps:', env.eps) 454 | eps_history.append(env.eps) 455 | if (len(accurancy) > 0): 456 | print('accurancy:', accurancy[len(accurancy) - 1]) 457 | print('loss:', loss[len(loss) - 1]) 458 | 459 | scale = 100 460 | scale1 = 2000 461 | moving_avg = np.convolve(episode_rewards, np.ones((scale,)) / scale, mode='valid') 462 | plt.plot([i for i in range(len(moving_avg))], moving_avg) 463 | # plt.plot(episode_rewards) 464 | plt.ylabel(f"Reward ma") 465 | plt.xlabel("episode #") 466 | plt.show() 467 | 468 | 469 | # Plot results 470 | accurancy = np.array(accurancy) * 100 471 | accurancy = accurancy.tolist() 472 | moving_avg1 = np.convolve(accurancy, np.ones((scale1,)) / scale1, mode='valid') 473 | plt.plot([i for i in range(len(moving_avg1))], moving_avg1) 474 | # plt.plot(episode_rewards) 475 | plt.ylabel(f"Accurancy") 476 | plt.xlabel("episode #") 477 | plt.show() 478 | 479 | moving_avg2 = np.convolve(loss, np.ones((scale1,)) / scale1, mode='valid') 480 | plt.plot([i for i in range(len(moving_avg2))], moving_avg2) 481 | # plt.plot(episode_rewards) 482 | plt.ylabel(f"loss") 483 | plt.xlabel("episode #") 484 | plt.show() 485 | 486 | moving_avg3 = np.convolve(eps_history, np.ones((scale,)) / scale, mode='valid') 487 | plt.plot([i for i in range(len(moving_avg3))], moving_avg3) 488 | # plt.plot(episode_rewards) 489 | plt.ylabel(f"epsilone") 490 | plt.xlabel("episode #") 491 | plt.show() 492 | 493 | env.reset() 494 | fig = plt.figure() 495 | obj, = plt.plot([], [], 'ro') 496 | obj1, = plt.plot([], [], 'yo') 497 | obj2, = plt.plot([], [], 'bo') 498 | anim(plt) 499 | --------------------------------------------------------------------------------