├── .gitignore
├── README.md
├── gridworld_env_revisit.py
├── gridwrold_env.py
├── gridwrold_env_2.py
├── gridwrold_env_board.py
├── gridwrold_env_multi.py
├── gridwrold_env_multi_circle.py
├── gridwrold_env_multi_circle_2.py
├── gridwrold_env_multi_circle_3.py
├── gridwrold_env_random.py
├── src
    ├── coverage_animation.gif
    └── training_plot.png
├── train.py
└── train_2.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/*


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # UAV Coverage Path Planning for single/multi drones
 2 | 
 3 | This program is for planing coverage path for a single or multiple drones on a discrete gridworld.
 4 | 
 5 | ![2 Drones Coverage Path on 10x10 Field](src/coverage_animation.gif)
 6 | 
 7 | *2 Drones Coverage Path Animation*
 8 | 
 9 | ![Training Plot over Episode of 2 drones on 10x10 Feild](src/training_plot.png)
10 | 
11 | *Training Plot over Episode of 2 drones on 10x10 Feild*
12 | 
13 | ## Installation
14 | 
15 | - Install [stable-baseline](https://stable-baselines.readthedocs.io/en/master/index.html)
16 |   - Baselines requires python3 (>=3.5) with the development headers. You’ll also need system packages CMake, OpenMPI and zlib. Those can be installed as follows.
17 |   - Stable-Baselines supports Tensorflow versions from 1.8.0 to 1.15.0, and does not work on Tensorflow versions 2.0.0 and above.
18 |   - `pip install stable-baselines`
19 | - Install tabulate
20 |   - `pip install tabulate`
21 |   
22 |  
23 | ## Training
24 | 
25 | - Import the specific environment in traning.py
26 | - `python3 training.py`
27 | 


--------------------------------------------------------------------------------
/gridworld_env_revisit.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from gym.utils import seeding
  3 | from gym import spaces
  4 | import numpy as np
  5 | from enum import IntEnum
  6 | from copy import deepcopy
  7 | from tabulate import tabulate
  8 | 
  9 | 
 10 | class Board:
 11 |     """Represent a grid and operations on it"""
 12 |     def __init__(self, width, height):
 13 |         self.width = width
 14 |         self.height = height
 15 | 
 16 |         self.data = np.zeros((height, width), dtype=np.int)
 17 | 
 18 |         self.total_grids = width * height
 19 |         self.visited_grids = 0
 20 | 
 21 |     def set(self, i, j):
 22 |         """
 23 |         Increment the visited counts in the grid
 24 |         :param i: row
 25 |         :param j: column
 26 |         """
 27 |         assert i >= 0 and i < self.height
 28 |         assert j >= 0 and j < self.width
 29 | 
 30 |         if self.data[i, j] == 0:    # Not visited
 31 |             self.visited_grids += 1
 32 | 
 33 |         self.data[i, j] += 1
 34 | 
 35 |         return self.data[i, j]
 36 | 
 37 |     def get(self, i, j):
 38 |         """
 39 |         Increment the visited counts in the grid
 40 |         :param i: row
 41 |         :param j: column
 42 |         :return:
 43 |         """
 44 |         assert i >= 0 and i < self.height
 45 |         assert j >= 0 and j < self.width
 46 |         return self.data[i, j]
 47 | 
 48 |     def is_valid(self, i, j):
 49 |         """Check if a position is in the boundary"""
 50 |         return 0 <= i < self.height and 0 <= j < self.width
 51 | 
 52 |     def is_filled(self):
 53 |         for row in self.data:
 54 |             for i in row:
 55 |                 if not i == 0:
 56 |                     return False
 57 | 
 58 |     def __str__(self):
 59 |         return str(self.data)
 60 | 
 61 | 
 62 | class GridworldEnv(gym.Env):
 63 |     """
 64 |     Gridworld Environment that represents a rectangle world
 65 |     """
 66 | 
 67 |     metadata = {'render.modes': ['human']}
 68 | 
 69 |     class Actions(IntEnum):
 70 |         left = 0
 71 |         right = 1
 72 |         up = 2
 73 |         down = 3
 74 | 
 75 |     def __init__(self, width, height, seed=1337):
 76 |         super(GridworldEnv, self).__init__()
 77 |         # self.world = np.array((width, height), dtype=int)
 78 | 
 79 |         # Environment configuration
 80 |         self.width = width
 81 |         self.height = height
 82 |         self.size = width * height
 83 | 
 84 |         # Information for the agent
 85 |         self.agent_pos = (0, 0)
 86 |         self.steps = [(0, 0)]
 87 |         self.board = Board(width, height)
 88 | 
 89 |         # For gym
 90 |         # Actions are discrete integer values
 91 |         self.action_space = spaces.Discrete(4)
 92 |         # Observations are number of cells
 93 |         self.observation_space = spaces.Box(low=0, high=max((width, height)),
 94 |                                             shape=(2, ), dtype=np.int)
 95 | 
 96 |         # Initialize the state
 97 |         self.reset()
 98 | 
 99 |         # Initialize the RNG
100 |         self.seed(seed=seed)
101 | 
102 |         # Action enumeration for this environment
103 |         self.actions = GridworldEnv.Actions
104 | 
105 |     def reset(self):
106 |         # Current position and direction of the agent
107 |         h = 0
108 |         w = 0
109 |         self.agent_pos = (h, w)
110 | 
111 |         self.board = Board(self.width, self.height)
112 |         self.board.set(h, w)
113 | 
114 |         board = deepcopy(self.board.data)
115 |         board[0, 0] = -1
116 |         board = board.flatten()
117 | 
118 |         # Step count since episode start
119 |         self.steps = [(0, 0)]
120 | 
121 |         # Return first observation
122 |         return np.array([0, 0])
123 | 
124 |     def seed(self, seed=1337):
125 |         """
126 |         Seed the random number generator
127 |         """
128 |         self.np_random, _ = seeding.np_random(seed)
129 |         return [seed]
130 | 
131 |     def step(self, action):
132 |         done = False
133 | 
134 |         # Get the coordinate for the new position
135 |         prev_i, prev_j = self.agent_pos
136 |         i, j = self.agent_pos
137 | 
138 |         if action == self.actions.left:
139 |             j -= 1
140 |         elif action == self.actions.right:
141 |             j += 1
142 |         elif action == self.actions.up:
143 |             i -= 1
144 |         elif action == self.actions.down:
145 |             i += 1
146 | 
147 |         if not self.board.is_valid(i, j):   # New position out of bound
148 |             return np.array([prev_i, prev_j]), -1, False, {}
149 | 
150 |         self.agent_pos = (i, j)
151 |         # Update the step information
152 |         self.steps.append((i, j))
153 |         self.board.set(i, j)
154 | 
155 |         pos = np.array([i, j])
156 | 
157 |         if self.board.get(i, j) > 5:   # The grid has been visited more than 5 times
158 |             return pos, 0, True, {}
159 |         elif len(self.steps) == self.size:  # All grids has been visited once
160 |             return pos, 100000, True, {}
161 |         else:   # The grid has not been visited
162 |             return pos, 1, False, {}
163 | 
164 |     def render(self, mode='human', close=False):
165 |         print("board:")
166 |         print(self.board.data)
167 |         print("path:", self.get_path())
168 |         print("pos:", self.agent_pos)
169 |         print("")
170 | 
171 |     def get_path(self):
172 |         """
173 |         Get the path on the field
174 |         :return:
175 |         """
176 |         board = np.zeros((self.height, self.width), dtype=np.int)
177 |         for index, pos in enumerate(self.steps):
178 |             i, j = pos
179 |             board[i, j] = index
180 | 
181 |         table = tabulate(board)
182 |         return table
183 | 
184 | 


--------------------------------------------------------------------------------
/gridwrold_env.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from gym.utils import seeding
  3 | from gym import spaces
  4 | import numpy as np
  5 | from enum import IntEnum
  6 | from copy import deepcopy
  7 | from tabulate import tabulate
  8 | 
  9 | 
 10 | class Board:
 11 |     """Represent a grid and operations on it"""
 12 |     def __init__(self, width, height):
 13 |         self.width = width
 14 |         self.height = height
 15 | 
 16 |         self.data = np.zeros((height, width), dtype=np.int)
 17 | 
 18 |         self.total_grids = width * height
 19 |         self.visited_grids = 0
 20 | 
 21 |     def set(self, i, j):
 22 |         """
 23 |         Increment the visited counts in the grid
 24 |         :param i: row
 25 |         :param j: column
 26 |         """
 27 |         assert i >= 0 and i < self.height
 28 |         assert j >= 0 and j < self.width
 29 | 
 30 |         if self.data[i, j] == 0:    # Not visited
 31 |             self.visited_grids += 1
 32 | 
 33 |         self.data[i, j] += 1
 34 | 
 35 |         return self.data[i, j]
 36 | 
 37 |     def get(self, i, j):
 38 |         """
 39 |         Increment the visited counts in the grid
 40 |         :param i: row
 41 |         :param j: column
 42 |         :return:
 43 |         """
 44 |         assert i >= 0 and i < self.height
 45 |         assert j >= 0 and j < self.width
 46 |         return self.data[i, j]
 47 | 
 48 |     def is_valid(self, i, j):
 49 |         """Check if a position is in the boundary"""
 50 |         return 0 <= i < self.height and 0 <= j < self.width
 51 | 
 52 |     def is_filled(self):
 53 |         return self.total_grids == self.visited_grids
 54 | 
 55 |     def __str__(self):
 56 |         return str(self.data)
 57 | 
 58 | 
 59 | class GridworldEnv(gym.Env):
 60 |     """
 61 |     Gridworld Environment that represents a rectangle world
 62 |     """
 63 | 
 64 |     metadata = {'render.modes': ['human']}
 65 | 
 66 |     class Actions(IntEnum):
 67 |         left = 0
 68 |         right = 1
 69 |         up = 2
 70 |         down = 3
 71 | 
 72 |     def __init__(self, width, height, seed=1337):
 73 |         super(GridworldEnv, self).__init__()
 74 |         # self.world = np.array((width, height), dtype=int)
 75 | 
 76 |         # Environment configuration
 77 |         self.width = width
 78 |         self.height = height
 79 |         self.size = width * height
 80 | 
 81 |         # Information for the agent
 82 |         self.agent_pos = (0, 0)
 83 |         self.steps = [(0, 0)]
 84 |         self.board = Board(width, height)
 85 | 
 86 |         # For gym
 87 |         # Actions are discrete integer values
 88 |         self.action_space = spaces.Discrete(4)
 89 |         # Observations are number of cells
 90 |         self.observation_space = spaces.Box(low=-1, high=2,
 91 |                                             shape=(self.size, ), dtype=np.int)
 92 | 
 93 |         # Initialize the state
 94 |         self.reset()
 95 | 
 96 |         # Initialize the RNG
 97 |         self.seed(seed=seed)
 98 | 
 99 |         # Action enumeration for this environment
100 |         self.actions = GridworldEnv.Actions
101 | 
102 |     def reset(self):
103 |         # Current position and direction of the agent
104 |         h = 0
105 |         w = 0
106 |         self.agent_pos = (h, w)
107 | 
108 |         self.board = Board(self.width, self.height)
109 |         self.board.set(h, w)
110 | 
111 |         board = deepcopy(self.board.data)
112 |         board[0, 0] = -1
113 |         board = board.flatten()
114 | 
115 |         # Step count since episode start
116 |         self.steps = [(0, 0)]
117 | 
118 |         # Return first observation
119 |         return board.flatten()
120 | 
121 |     def seed(self, seed=1337):
122 |         """
123 |         Seed the random number generator
124 |         """
125 |         self.np_random, _ = seeding.np_random(seed)
126 |         return [seed]
127 | 
128 |     def step(self, action):
129 |         done = False
130 | 
131 |         # Get the coordinate for the new position
132 |         prev_i, prev_j = self.agent_pos
133 |         i, j = self.agent_pos
134 | 
135 |         if action == self.actions.left:
136 |             j -= 1
137 |         elif action == self.actions.right:
138 |             j += 1
139 |         elif action == self.actions.up:
140 |             i -= 1
141 |         elif action == self.actions.down:
142 |             i += 1
143 | 
144 |         if not self.board.is_valid(i, j):   # New position out of bound
145 |             board = deepcopy(self.board.data)
146 |             board[prev_i, prev_j] = -1
147 |             board = board.flatten()
148 |             return board, -1, False, {}
149 | 
150 |         self.agent_pos = (i, j)
151 |         # Update the step information
152 |         self.steps.append((i, j))
153 |         self.board.set(i, j)
154 |         board = deepcopy(self.board.data)
155 |         board[i, j] = -1
156 |         board = board.flatten()
157 | 
158 |         if self.board.get(i, j) > 1:   # The grid has been visited
159 |             return board, 0, True, {}
160 |         elif len(self.steps) >= self.size:  # All grids has been visited once
161 |             return board, 100000, True, {}
162 |         else:   # The grid has not been visited
163 |             return board, 1, False, {}
164 | 
165 |     def render(self, mode='human', close=False):
166 |         print("board:")
167 |         print(self.board.data)
168 |         print("path:", self.get_path())
169 |         print("pos:", self.agent_pos)
170 |         print("")
171 | 
172 |     def get_path(self):
173 |         """
174 |         Get the path on the field
175 |         :return:
176 |         """
177 |         board = np.zeros((self.height, self.width), dtype=np.int)
178 |         for index, pos in enumerate(self.steps):
179 |             i, j = pos
180 |             board[i, j] = index
181 | 
182 |         table = tabulate(board)
183 |         return table
184 | 
185 | 


--------------------------------------------------------------------------------
/gridwrold_env_2.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from gym.utils import seeding
  3 | from gym import spaces
  4 | import numpy as np
  5 | from enum import IntEnum
  6 | from copy import deepcopy
  7 | from tabulate import tabulate
  8 | 
  9 | 
 10 | class Board:
 11 |     """Represent a grid and operations on it"""
 12 |     def __init__(self, width, height):
 13 |         self.width = width
 14 |         self.height = height
 15 | 
 16 |         self.data = np.zeros((height, width), dtype=np.int)
 17 | 
 18 |         self.total_grids = width * height
 19 |         self.visited_grids = 0
 20 | 
 21 |     def set(self, i, j):
 22 |         """
 23 |         Increment the visited counts in the grid
 24 |         :param i: row
 25 |         :param j: column
 26 |         """
 27 |         assert i >= 0 and i < self.height
 28 |         assert j >= 0 and j < self.width
 29 | 
 30 |         if self.data[i, j] == 0:    # Not visited
 31 |             self.visited_grids += 1
 32 | 
 33 |         self.data[i, j] += 1
 34 | 
 35 |         return self.data[i, j]
 36 | 
 37 |     def get(self, i, j):
 38 |         """
 39 |         Increment the visited counts in the grid
 40 |         :param i: row
 41 |         :param j: column
 42 |         :return:
 43 |         """
 44 |         assert i >= 0 and i < self.height
 45 |         assert j >= 0 and j < self.width
 46 |         return self.data[i, j]
 47 | 
 48 |     def is_valid(self, i, j):
 49 |         """Check if a position is in the boundary"""
 50 |         return 0 <= i < self.height and 0 <= j < self.width
 51 | 
 52 |     def is_filled(self):
 53 |         return self.total_grids == self.visited_grids
 54 | 
 55 |     def __str__(self):
 56 |         return str(self.data)
 57 | 
 58 | 
 59 | class GridworldEnv(gym.Env):
 60 |     """
 61 |     Gridworld Environment that represents a rectangle world
 62 |     """
 63 | 
 64 |     metadata = {'render.modes': ['human']}
 65 | 
 66 |     class Actions(IntEnum):
 67 |         left = 0
 68 |         right = 1
 69 |         up = 2
 70 |         down = 3
 71 | 
 72 |     def __init__(self, width, height, seed=1337):
 73 |         super(GridworldEnv, self).__init__()
 74 |         # self.world = np.array((width, height), dtype=int)
 75 | 
 76 |         # Environment configuration
 77 |         self.width = width
 78 |         self.height = height
 79 |         self.size = width * height
 80 | 
 81 |         # Information for the agent
 82 |         self.agent_pos = (0, 0)
 83 |         self.steps = [(0, 0)]
 84 |         self.board = Board(width, height)
 85 | 
 86 |         # For gym
 87 |         # Actions are discrete integer values
 88 |         self.action_space = spaces.Discrete(4)
 89 |         # Observations are number of cells
 90 |         self.observation_space = spaces.Box(low=0, high=max((width, height)),
 91 |                                             shape=(2, ), dtype=np.int)
 92 | 
 93 |         # Initialize the state
 94 |         self.reset()
 95 | 
 96 |         # Initialize the RNG
 97 |         self.seed(seed=seed)
 98 | 
 99 |         # Action enumeration for this environment
100 |         self.actions = GridworldEnv.Actions
101 | 
102 |     def reset(self):
103 |         # Current position and direction of the agent
104 |         h = 0
105 |         w = 0
106 |         self.agent_pos = (h, w)
107 | 
108 |         self.board = Board(self.width, self.height)
109 |         self.board.set(h, w)
110 | 
111 |         board = deepcopy(self.board.data)
112 |         board[0, 0] = -1
113 |         board = board.flatten()
114 | 
115 |         # Step count since episode start
116 |         self.steps = [(0, 0)]
117 | 
118 |         # Return first observation
119 |         return np.array([0, 0])
120 | 
121 |     def seed(self, seed=1337):
122 |         """
123 |         Seed the random number generator
124 |         """
125 |         self.np_random, _ = seeding.np_random(seed)
126 |         return [seed]
127 | 
128 |     def step(self, action):
129 |         done = False
130 | 
131 |         # Get the coordinate for the new position
132 |         prev_i, prev_j = self.agent_pos
133 |         i, j = self.agent_pos
134 | 
135 |         if action == self.actions.left:
136 |             j -= 1
137 |         elif action == self.actions.right:
138 |             j += 1
139 |         elif action == self.actions.up:
140 |             i -= 1
141 |         elif action == self.actions.down:
142 |             i += 1
143 | 
144 |         if not self.board.is_valid(i, j):   # New position out of bound
145 |             return np.array([prev_i, prev_j]), -1, False, {}
146 | 
147 |         self.agent_pos = (i, j)
148 |         # Update the step information
149 |         self.steps.append((i, j))
150 |         self.board.set(i, j)
151 | 
152 |         pos = np.array([i, j])
153 | 
154 |         if self.board.get(i, j) > 1:   # The grid has been visited
155 |             return pos, 0, True, {}
156 |         elif len(self.steps) == self.size:  # All grids has been visited once
157 |             return pos, 100000, True, {}
158 |         else:   # The grid has not been visited
159 |             return pos, 1, False, {}
160 | 
161 |     def render(self, mode='human', close=False):
162 |         print("board:")
163 |         print(self.board.data)
164 |         print("path:", self.get_path())
165 |         print("pos:", self.agent_pos)
166 |         print("")
167 | 
168 |     def get_path(self):
169 |         """
170 |         Get the path on the field
171 |         :return:
172 |         """
173 |         board = np.zeros((self.height, self.width), dtype=np.int)
174 |         for index, pos in enumerate(self.steps):
175 |             i, j = pos
176 |             board[i, j] = index
177 | 
178 |         table = tabulate(board)
179 |         return table
180 | 
181 | 


--------------------------------------------------------------------------------
/gridwrold_env_board.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from gym.utils import seeding
  3 | from gym import spaces
  4 | import numpy as np
  5 | from enum import IntEnum
  6 | from copy import deepcopy
  7 | from tabulate import tabulate
  8 | 
  9 | 
 10 | class Board:
 11 |     """Represent a grid and operations on it"""
 12 |     def __init__(self, width, height):
 13 |         self.width = width
 14 |         self.height = height
 15 | 
 16 |         self.data = np.zeros((height, width), dtype=np.int)
 17 | 
 18 |         self.total_grids = width * height
 19 |         self.visited_grids = 0
 20 | 
 21 |     def set(self, i, j):
 22 |         """
 23 |         Increment the visited counts in the grid
 24 |         :param i: row
 25 |         :param j: column
 26 |         """
 27 |         assert i >= 0 and i < self.height
 28 |         assert j >= 0 and j < self.width
 29 | 
 30 |         if self.data[i, j] == 0:    # Not visited
 31 |             self.visited_grids += 1
 32 | 
 33 |         self.data[i, j] += 1
 34 | 
 35 |         return self.data[i, j]
 36 | 
 37 |     def get(self, i, j):
 38 |         """
 39 |         Increment the visited counts in the grid
 40 |         :param i: row
 41 |         :param j: column
 42 |         :return:
 43 |         """
 44 |         assert i >= 0 and i < self.height
 45 |         assert j >= 0 and j < self.width
 46 |         return self.data[i, j]
 47 | 
 48 |     def is_valid(self, i, j):
 49 |         """Check if a position is in the boundary"""
 50 |         return 0 <= i < self.height and 0 <= j < self.width
 51 | 
 52 |     def is_filled(self):
 53 |         return self.total_grids == self.visited_grids
 54 | 
 55 |     def __str__(self):
 56 |         return str(self.data)
 57 | 
 58 | 
 59 | class GridworldEnv(gym.Env):
 60 |     """
 61 |     Gridworld Environment that represents a rectangle world
 62 |     """
 63 | 
 64 |     metadata = {'render.modes': ['human']}
 65 | 
 66 |     class Actions(IntEnum):
 67 |         left = 0
 68 |         right = 1
 69 |         up = 2
 70 |         down = 3
 71 | 
 72 |     def __init__(self, width, height, seed=1337):
 73 |         super(GridworldEnv, self).__init__()
 74 |         # self.world = np.array((width, height), dtype=int)
 75 | 
 76 |         # Environment configuration
 77 |         self.width = width
 78 |         self.height = height
 79 |         self.size = width * height
 80 | 
 81 |         # Information for the agent
 82 |         self.agent_pos = (0, 0)
 83 |         self.steps = [(0, 0)]
 84 |         self.board = Board(width, height)
 85 | 
 86 |         # For gym
 87 |         # Actions are discrete integer values
 88 |         self.action_space = spaces.Discrete(4)
 89 |         # Observations are number of cells
 90 |         self.observation_space = spaces.Box(low=0, high=2,
 91 |                                             shape=(self.size, ), dtype=np.int)
 92 | 
 93 |         # Initialize the state
 94 |         self.reset()
 95 | 
 96 |         # Initialize the RNG
 97 |         self.seed(seed=seed)
 98 | 
 99 |         # Action enumeration for this environment
100 |         self.actions = GridworldEnv.Actions
101 | 
102 |     def reset(self):
103 |         # Current position and direction of the agent
104 |         h = 0
105 |         w = 0
106 |         self.agent_pos = (h, w)
107 | 
108 |         self.board = Board(self.width, self.height)
109 |         self.board.set(h, w)
110 | 
111 |         board = deepcopy(self.board.data)
112 |         board[0, 0] = -1
113 |         board = board.flatten()
114 | 
115 |         # Step count since episode start
116 |         self.steps = [(0, 0)]
117 | 
118 |         # Return first observation
119 |         return self.board.data.flatten()
120 | 
121 |     def seed(self, seed=1337):
122 |         """
123 |         Seed the random number generator
124 |         """
125 |         self.np_random, _ = seeding.np_random(seed)
126 |         return [seed]
127 | 
128 |     def step(self, action):
129 |         done = False
130 | 
131 |         # Get the coordinate for the new position
132 |         prev_i, prev_j = self.agent_pos
133 |         i, j = self.agent_pos
134 | 
135 |         if action == self.actions.left:
136 |             j -= 1
137 |         elif action == self.actions.right:
138 |             j += 1
139 |         elif action == self.actions.up:
140 |             i -= 1
141 |         elif action == self.actions.down:
142 |             i += 1
143 | 
144 |         if not self.board.is_valid(i, j):   # New position out of bound
145 |             return self.board.data.flatten(), -1, False, {}
146 | 
147 |         self.agent_pos = (i, j)
148 |         # Update the step information
149 |         self.steps.append((i, j))
150 |         self.board.set(i, j)
151 | 
152 |         board = self.board.data.flatten()
153 | 
154 |         if self.board.get(i, j) > 1:   # The grid has been visited
155 |             return board, 0, True, {}
156 |         elif len(self.steps) >= self.size:  # All grids has been visited once
157 |             return board, 100000, True, {}
158 |         else:   # The grid has not been visited
159 |             return board, 1, False, {}
160 | 
161 |     def render(self, mode='human', close=False):
162 |         print("board:")
163 |         print(self.board.data)
164 |         print("path:")
165 |         print(self.get_path())
166 |         print("pos:", self.agent_pos)
167 |         print("")
168 | 
169 |     def get_path(self):
170 |         """
171 |         Get the path on the field
172 |         :return:
173 |         """
174 |         board = np.zeros((self.height, self.width), dtype=np.int)
175 |         for index, pos in enumerate(self.steps):
176 |             i, j = pos
177 |             board[i, j] = index
178 | 
179 |         table = tabulate(board)
180 |         return table
181 | 
182 | 


--------------------------------------------------------------------------------
/gridwrold_env_multi.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from gym.utils import seeding
  3 | from gym import spaces
  4 | import numpy as np
  5 | from enum import IntEnum
  6 | from itertools import product
  7 | from tabulate import tabulate
  8 | 
  9 | 
 10 | class Board:
 11 |     """Represent a grid and operations on it"""
 12 |     def __init__(self, width, height):
 13 |         self.width = width
 14 |         self.height = height
 15 | 
 16 |         self.data = np.zeros((height, width), dtype=np.int)
 17 | 
 18 |         self.total_grids = width * height
 19 |         self.visited_grids = 0
 20 | 
 21 |     def set(self, i, j):
 22 |         """
 23 |         Increment the visited counts in the grid
 24 |         :param i: row
 25 |         :param j: column
 26 |         """
 27 |         assert i >= 0 and i < self.height
 28 |         assert j >= 0 and j < self.width
 29 | 
 30 |         if self.data[i, j] == 0:    # Not visited
 31 |             self.visited_grids += 1
 32 | 
 33 |         self.data[i, j] += 1
 34 | 
 35 |         return self.data[i, j]
 36 | 
 37 |     def get(self, i, j):
 38 |         """
 39 |         Increment the visited counts in the grid
 40 |         :param i: row
 41 |         :param j: column
 42 |         :return:
 43 |         """
 44 |         assert i >= 0 and i < self.height
 45 |         assert j >= 0 and j < self.width
 46 |         return self.data[i, j]
 47 | 
 48 |     def is_valid(self, i, j):
 49 |         """Check if a position is in the boundary"""
 50 |         return 0 <= i < self.height and 0 <= j < self.width
 51 | 
 52 |     def is_filled(self):
 53 |         """
 54 |         :return: True if the board is filled, otherwise false
 55 |         """
 56 |         for row in self.data:
 57 |             for i in row:
 58 |                 if i == 0:
 59 |                     return False    # Not filled
 60 |         return True
 61 | 
 62 |     def __str__(self):
 63 |         return str(self.data)
 64 | 
 65 | 
 66 | class GridworldEnv(gym.Env):
 67 |     """
 68 |     Gridworld Environment that represents a rectangle world
 69 |     """
 70 | 
 71 |     metadata = {'render.modes': ['human']}
 72 | 
 73 |     class Actions(IntEnum):
 74 |         left = 0
 75 |         right = 1
 76 |         up = 2
 77 |         down = 3
 78 | 
 79 |     def __init__(self, width, height, seed=1337):
 80 |         super(GridworldEnv, self).__init__()
 81 |         # self.world = np.array((width, height), dtype=int)
 82 | 
 83 |         # Environment configuration
 84 |         self.width = width
 85 |         self.height = height
 86 |         self.size = width * height
 87 | 
 88 |         # Information for the agent
 89 |         self.agent_positions = None
 90 |         self.all_steps = None
 91 |         self.board = Board(width, height)
 92 | 
 93 |         # For gym
 94 |         # Actions are discrete integer values
 95 |         self.action_space = spaces.Discrete(16)
 96 |         # Observations are number of cells
 97 |         self.observation_space = spaces.Box(low=0, high=max((width, height)),
 98 |                                             shape=(4, ), dtype=np.int)
 99 | 
100 |         # Initialize the state
101 |         self.reset()
102 | 
103 |         # Initialize the RNG
104 |         self.seed(seed=seed)
105 | 
106 |         # Action enumeration for this environment
107 |         actions = [(1, 0), (-1, 0), (0, 1), (0, -1)]
108 |         self.action_combs = [p for p in product(actions, repeat=2)]     # All possible actions
109 | 
110 |     def reset(self):
111 |         # Current position and direction of the agent
112 |         self.agent_positions = [(0, 0), (self.height - 1, self.width - 1)]
113 | 
114 |         self.board = Board(self.width, self.height)
115 |         self.board.set(0, 0)
116 |         self.board.set(self.height - 1, self.width - 1)
117 | 
118 |         # Step count since episode start
119 |         self.all_steps = [[(0, 0)], [(self.height - 1, self.width - 1)]]
120 | 
121 |         # Return first observation
122 |         return np.array([0, 0, self.height - 1, self.width - 1])
123 | 
124 |     def seed(self, seed=1337):
125 |         """
126 |         Seed the random number generator
127 |         """
128 |         self.np_random, _ = seeding.np_random(seed)
129 |         return [seed]
130 | 
131 |     def get_obs_space(self):
132 |         """
133 |         :return: The agent's positions as a 1D np array
134 |         """
135 |         obs = []
136 |         for pos in self.agent_positions:
137 |             obs.extend(pos)
138 |         return np.array(obs)
139 | 
140 |     def step(self, action):
141 |         action_comb = self.action_combs[action]
142 |         agent_positions = []
143 |         is_visited = False
144 | 
145 |         for index, a in enumerate(action_comb):
146 |             prev_i, prev_j = self.agent_positions[index]
147 |             i, j = prev_i + a[0], prev_j + a[1]
148 |             if not self.board.is_valid(i, j):   # New position out of bound
149 |                 # Skip this step, has a -1 reward
150 |                 return self.get_obs_space(), -1, False, {}
151 |             elif self.board.get(i, j) > 0:  # Revisit a grid
152 |                 # Terminate the episode, has no reward
153 |                 is_visited = True
154 |             agent_positions.append((i, j))
155 | 
156 |         # Update positions
157 |         self.agent_positions = agent_positions
158 | 
159 |         for index, pos in enumerate(self.agent_positions):
160 |             i, j = pos
161 |             self.board.set(i, j)    # Update pos
162 |             self.all_steps[index].append(pos)   # Update steps
163 | 
164 |         if is_visited:  # Is visited
165 |             return self.get_obs_space(), 0, True, {}
166 |         elif self.board.is_filled():  # All grids has been visited once
167 |             self._write_path()
168 |             return self.get_obs_space(), 100000, True, {}
169 |         else:   # The grid has not been visited
170 |             return self.get_obs_space(), 1, False, {}
171 | 
172 |     def _write_path(self):
173 |         with open("path.txt", "w") as fhand:
174 |             path = self.get_path()
175 |             fhand.write(path)
176 | 
177 |     def render(self, mode='human', close=False):
178 |         print("board:")
179 |         print(self.board.data)
180 |         print("steps:", self.all_steps)
181 |         print("path:", self.get_path())
182 |         print("pos:", self.agent_positions)
183 |         print("")
184 | 
185 |     def get_path(self):
186 |         """
187 |         Get the path on the field
188 |         :return:
189 |         """
190 |         tables = ""
191 |         board = [[None for _ in range(self.width)] for _ in range(self.height)]
192 |         steps = self.all_steps[0]
193 |         for index, pos in enumerate(steps):
194 |             i, j = pos
195 |             board[i][j] = f"({index})"
196 | 
197 |         steps = self.all_steps[1]
198 |         for index, pos in enumerate(steps):
199 |             i, j = pos
200 |             board[i][j] = f"[{index}]"
201 |         return tabulate(board)
202 | 
203 | 


--------------------------------------------------------------------------------
/gridwrold_env_multi_circle.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from gym.utils import seeding
  3 | from gym import spaces
  4 | import numpy as np
  5 | from enum import IntEnum
  6 | from itertools import product
  7 | from tabulate import tabulate
  8 | 
  9 | 
 10 | class Board:
 11 |     """Represent a grid and operations on it"""
 12 |     def __init__(self, width, height):
 13 |         self.width = width
 14 |         self.height = height
 15 | 
 16 |         self.data = np.zeros((height, width), dtype=np.int)
 17 | 
 18 |         self.total_grids = width * height
 19 |         self.visited_grids = 0
 20 | 
 21 |     def set(self, i, j):
 22 |         """
 23 |         Increment the visited counts in the grid
 24 |         :param i: row
 25 |         :param j: column
 26 |         """
 27 |         assert i >= 0 and i < self.height
 28 |         assert j >= 0 and j < self.width
 29 | 
 30 |         if self.data[i, j] == 0:    # Not visited
 31 |             self.visited_grids += 1
 32 | 
 33 |         self.data[i, j] += 1
 34 | 
 35 |         return self.data[i, j]
 36 | 
 37 |     def get(self, i, j):
 38 |         """
 39 |         Increment the visited counts in the grid
 40 |         :param i: row
 41 |         :param j: column
 42 |         :return:
 43 |         """
 44 |         assert i >= 0 and i < self.height
 45 |         assert j >= 0 and j < self.width
 46 |         return self.data[i, j]
 47 | 
 48 |     def is_valid(self, i, j):
 49 |         """Check if a position is in the boundary"""
 50 |         return 0 <= i < self.height and 0 <= j < self.width
 51 | 
 52 |     def is_filled(self):
 53 |         """
 54 |         :return: True if the board is filled, otherwise false
 55 |         """
 56 |         for row in self.data:
 57 |             for i in row:
 58 |                 if i == 0:
 59 |                     return False    # Not filled
 60 |         return True
 61 | 
 62 |     def __str__(self):
 63 |         return str(self.data)
 64 | 
 65 | 
 66 | class GridworldEnv(gym.Env):
 67 |     """
 68 |     Gridworld Environment that represents a rectangle world
 69 |     """
 70 | 
 71 |     metadata = {'render.modes': ['human']}
 72 | 
 73 |     class Actions(IntEnum):
 74 |         left = 0
 75 |         right = 1
 76 |         up = 2
 77 |         down = 3
 78 | 
 79 |     def __init__(self, width, height, seed=1337):
 80 |         super(GridworldEnv, self).__init__()
 81 |         # self.world = np.array((width, height), dtype=int)
 82 | 
 83 |         # Environment configuration
 84 |         self.width = width
 85 |         self.height = height
 86 |         self.size = width * height
 87 | 
 88 |         # Information for the agent
 89 |         self.agent_positions = None
 90 |         self.orignal_positions = None
 91 |         self.all_steps = None
 92 |         self.board = Board(width, height)
 93 | 
 94 |         # For gym
 95 |         # Actions are discrete integer values
 96 |         self.action_space = spaces.Discrete(16)
 97 |         # Observations are number of cells
 98 |         self.observation_space = spaces.Box(low=0, high=max((width, height)),
 99 |                                             shape=(4, ), dtype=np.int)
100 | 
101 |         # Initialize the state
102 |         self.reset()
103 | 
104 |         # Initialize the RNG
105 |         self.seed(seed=seed)
106 | 
107 |         # Action enumeration for this environment
108 |         actions = [(1, 0), (-1, 0), (0, 1), (0, -1)]
109 |         self.action_combs = [p for p in product(actions, repeat=2)]     # All possible actions
110 | 
111 |     def reset(self):
112 |         # Current position and direction of the agent
113 |         self.agent_positions = [(0, 0), (self.height - 1, self.width - 1)]
114 |         self.orignal_positions = [(0, 0), (self.height - 1, self.width - 1)]
115 | 
116 |         self.board = Board(self.width, self.height)
117 |         self.board.set(0, 0)
118 |         self.board.set(self.height - 1, self.width - 1)
119 | 
120 |         # Step count since episode start
121 |         self.all_steps = [[(0, 0)], [(self.height - 1, self.width - 1)]]
122 | 
123 |         # Return first observation
124 |         return np.array([0, 0, self.height - 1, self.width - 1])
125 | 
126 |     def seed(self, seed=1337):
127 |         """
128 |         Seed the random number generator
129 |         """
130 |         self.np_random, _ = seeding.np_random(seed)
131 |         return [seed]
132 | 
133 |     def get_obs_space(self):
134 |         """
135 |         :return: The agent's positions as a 1D np array
136 |         """
137 |         obs = []
138 |         for pos in self.agent_positions:
139 |             obs.extend(pos)
140 |         return np.array(obs)
141 | 
142 |     def step(self, action):
143 |         action_comb = self.action_combs[action]
144 |         agent_positions = []
145 |         is_visited = False
146 | 
147 |         for index, a in enumerate(action_comb):
148 |             prev_i, prev_j = self.agent_positions[index]
149 |             i, j = prev_i + a[0], prev_j + a[1]
150 |             if not self.board.is_valid(i, j):   # New position out of bound
151 |                 # Skip this step, has a -1 reward
152 |                 return self.get_obs_space(), -1, False, {}
153 |             elif self.board.get(i, j) > 0:  # Revisit a grid
154 |                 # Terminate the episode, has no reward
155 |                 is_visited = True
156 |             agent_positions.append((i, j))
157 | 
158 |         # Update positions
159 |         self.agent_positions = agent_positions
160 | 
161 |         for index, pos in enumerate(self.agent_positions):
162 |             i, j = pos
163 |             self.board.set(i, j)    # Update pos
164 |             self.all_steps[index].append(pos)   # Update steps
165 | 
166 |         if is_visited:  # Is visited
167 |             return self.get_obs_space(), 0, True, {}
168 |         elif self.board.is_filled():  # All grids has been visited once
169 |             # self._write_path()
170 |             reward = (self.width + self.height) * 20
171 |             for index, pos in enumerate(self.agent_positions):
172 |                 i, j = pos
173 |                 org_i, org_j = self.orignal_positions[index]
174 |                 reward += ((- abs(i - org_i) - abs(j - org_j)) * 10)
175 |             self._write_path()
176 |             return self.get_obs_space(), reward, True, {}
177 |         else:   # The grid has not been visited
178 |             return self.get_obs_space(), 1, False, {}
179 | 
180 |     def _write_path(self):
181 |         with open("path.txt", "w") as fhand:
182 |             path = self.get_path()
183 |             fhand.write(path)
184 | 
185 |     def render(self, mode='human', close=False):
186 |         print("board:")
187 |         print(self.board.data)
188 |         print("steps:", self.all_steps)
189 |         print("path:", self.get_path())
190 |         print("pos:", self.agent_positions)
191 |         print("")
192 | 
193 |     def get_path(self):
194 |         """
195 |         Get the path on the field
196 |         :return:
197 |         """
198 |         tables = ""
199 |         board = [[None for _ in range(self.width)] for _ in range(self.height)]
200 |         steps = self.all_steps[0]
201 |         for index, pos in enumerate(steps):
202 |             i, j = pos
203 |             board[i][j] = f"({index})"
204 | 
205 |         steps = self.all_steps[1]
206 |         for index, pos in enumerate(steps):
207 |             i, j = pos
208 |             board[i][j] = f"[{index}]"
209 |         return tabulate(board)
210 | 
211 | 


--------------------------------------------------------------------------------
/gridwrold_env_multi_circle_2.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from gym.utils import seeding
  3 | from gym import spaces
  4 | import numpy as np
  5 | from enum import IntEnum
  6 | from itertools import product
  7 | from tabulate import tabulate
  8 | 
  9 | 
 10 | class Board:
 11 |     """Represent a grid and operations on it"""
 12 |     def __init__(self, width, height):
 13 |         self.width = width
 14 |         self.height = height
 15 | 
 16 |         self.data = np.zeros((height, width), dtype=np.int)
 17 | 
 18 |         self.total_grids = width * height
 19 |         self.visited_grids = 0
 20 | 
 21 |     def set(self, i, j):
 22 |         """
 23 |         Increment the visited counts in the grid
 24 |         :param i: row
 25 |         :param j: column
 26 |         """
 27 |         assert i >= 0 and i < self.height
 28 |         assert j >= 0 and j < self.width
 29 | 
 30 |         if self.data[i, j] == 0:    # Not visited
 31 |             self.visited_grids += 1
 32 | 
 33 |         self.data[i, j] += 1
 34 | 
 35 |         return self.data[i, j]
 36 | 
 37 |     def get(self, i, j):
 38 |         """
 39 |         Increment the visited counts in the grid
 40 |         :param i: row
 41 |         :param j: column
 42 |         :return:
 43 |         """
 44 |         assert i >= 0 and i < self.height
 45 |         assert j >= 0 and j < self.width
 46 |         return self.data[i, j]
 47 | 
 48 |     def is_valid(self, i, j):
 49 |         """Check if a position is in the boundary"""
 50 |         return 0 <= i < self.height and 0 <= j < self.width
 51 | 
 52 |     def is_filled(self):
 53 |         """
 54 |         :return: True if the board is filled, otherwise false
 55 |         """
 56 |         for row in self.data:
 57 |             for i in row:
 58 |                 if i == 0:
 59 |                     return False    # Not filled
 60 |         return True
 61 | 
 62 |     def __str__(self):
 63 |         return str(self.data)
 64 | 
 65 | 
 66 | class GridworldEnv(gym.Env):
 67 |     """
 68 |     Gridworld Environment that represents a rectangle world
 69 |     """
 70 | 
 71 |     metadata = {'render.modes': ['human']}
 72 | 
 73 |     class Actions(IntEnum):
 74 |         left = 0
 75 |         right = 1
 76 |         up = 2
 77 |         down = 3
 78 | 
 79 |     def __init__(self, width, height, seed=1337):
 80 |         super(GridworldEnv, self).__init__()
 81 |         # self.world = np.array((width, height), dtype=int)
 82 | 
 83 |         # Environment configuration
 84 |         self.width = width
 85 |         self.height = height
 86 |         self.size = width * height
 87 | 
 88 |         # Information for the agent
 89 |         self.agent_positions = None
 90 |         self.orignal_positions = None
 91 |         self.all_steps = None
 92 |         self.board = Board(width, height)
 93 | 
 94 |         # For gym
 95 |         # Actions are discrete integer values
 96 |         self.action_space = spaces.Discrete(16)
 97 |         # Observations are number of cells
 98 |         self.observation_space = spaces.Box(low=0, high=max((width, height)),
 99 |                                             shape=(4, ), dtype=np.int)
100 | 
101 |         # Initialize the state
102 |         self.reset()
103 | 
104 |         # Initialize the RNG
105 |         self.seed(seed=seed)
106 | 
107 |         # Action enumeration for this environment
108 |         actions = [(1, 0), (-1, 0), (0, 1), (0, -1)]
109 |         self.action_combs = [p for p in product(actions, repeat=2)]     # All possible actions
110 | 
111 |     def reset(self):
112 |         # Current position and direction of the agent
113 |         self.agent_positions = [(0, 0), (self.height - 1, self.width - 1)]
114 |         self.orignal_positions = [(0, 0), (self.height - 1, self.width - 1)]
115 | 
116 |         self.board = Board(self.width, self.height)
117 |         self.board.set(0, 0)
118 |         self.board.set(self.height - 1, self.width - 1)
119 | 
120 |         # Step count since episode start
121 |         self.all_steps = [[(0, 0)], [(self.height - 1, self.width - 1)]]
122 | 
123 |         # Return first observation
124 |         return np.array([0, 0, self.height - 1, self.width - 1])
125 | 
126 |     def seed(self, seed=1337):
127 |         """
128 |         Seed the random number generator
129 |         """
130 |         self.np_random, _ = seeding.np_random(seed)
131 |         return [seed]
132 | 
133 |     def get_obs_space(self):
134 |         """
135 |         :return: The agent's positions as a 1D np array
136 |         """
137 |         obs = []
138 |         for pos in self.agent_positions:
139 |             obs.extend(pos)
140 |         return np.array(obs)
141 | 
142 |     def step(self, action):
143 |         action_comb = self.action_combs[action]
144 |         agent_positions = []
145 |         is_visited = False
146 | 
147 |         for index, a in enumerate(action_comb):
148 |             prev_i, prev_j = self.agent_positions[index]
149 |             i, j = prev_i + a[0], prev_j + a[1]
150 |             if not self.board.is_valid(i, j):   # New position out of bound
151 |                 # Skip this step, has a -1 reward
152 |                 return self.get_obs_space(), -1, False, {}
153 |             elif self.board.get(i, j) > 0:  # Revisit a grid
154 |                 # Terminate the episode, has no reward
155 |                 is_visited = True
156 |             agent_positions.append((i, j))
157 | 
158 |         # Update positions
159 |         self.agent_positions = agent_positions
160 | 
161 |         for index, pos in enumerate(self.agent_positions):
162 |             i, j = pos
163 |             self.board.set(i, j)    # Update pos
164 |             self.all_steps[index].append(pos)   # Update steps
165 | 
166 |         reward = 0
167 |         for index, pos in enumerate(self.agent_positions):
168 |             i, j = pos
169 |             org_i, org_j = self.orignal_positions[index]
170 |             reward += (- abs(i - org_i) - abs(j - org_j))
171 | 
172 |         if is_visited:  # Is visited
173 |             return self.get_obs_space(), reward, True, {}
174 |         elif self.board.is_filled():  # All grids has been visited once
175 |             # self._write_path()
176 |             self._write_path()
177 |             return self.get_obs_space(), reward + 1, True, {}
178 |         else:   # The grid has not been visited
179 |             return self.get_obs_space(), 1, False, {}
180 | 
181 |     def _write_path(self):
182 |         with open("path.txt", "w") as fhand:
183 |             path = self.get_path()
184 |             fhand.write(path)
185 | 
186 |     def render(self, mode='human', close=False):
187 |         print("board:")
188 |         print(self.board.data)
189 |         print("steps:", self.all_steps)
190 |         print("path:", self.get_path())
191 |         print("pos:", self.agent_positions)
192 |         print("")
193 | 
194 |     def get_path(self):
195 |         """
196 |         Get the path on the field
197 |         :return:
198 |         """
199 |         tables = ""
200 |         board = [[None for _ in range(self.width)] for _ in range(self.height)]
201 |         steps = self.all_steps[0]
202 |         for index, pos in enumerate(steps):
203 |             i, j = pos
204 |             board[i][j] = f"({index})"
205 | 
206 |         steps = self.all_steps[1]
207 |         for index, pos in enumerate(steps):
208 |             i, j = pos
209 |             board[i][j] = f"[{index}]"
210 |         return tabulate(board)
211 | 
212 | 


--------------------------------------------------------------------------------
/gridwrold_env_multi_circle_3.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from gym.utils import seeding
  3 | from gym import spaces
  4 | import numpy as np
  5 | from enum import IntEnum
  6 | from itertools import product
  7 | from tabulate import tabulate
  8 | 
  9 | 
 10 | class Board:
 11 |     """Represent a grid and operations on it"""
 12 |     def __init__(self, width, height):
 13 |         self.width = width
 14 |         self.height = height
 15 | 
 16 |         self.data = np.zeros((height, width), dtype=np.int)
 17 | 
 18 |         self.total_grids = width * height
 19 |         self.visited_grids = 0
 20 | 
 21 |     def set(self, i, j):
 22 |         """
 23 |         Increment the visited counts in the grid
 24 |         :param i: row
 25 |         :param j: column
 26 |         """
 27 |         assert i >= 0 and i < self.height
 28 |         assert j >= 0 and j < self.width
 29 | 
 30 |         if self.data[i, j] == 0:    # Not visited
 31 |             self.visited_grids += 1
 32 | 
 33 |         self.data[i, j] += 1
 34 | 
 35 |         return self.data[i, j]
 36 | 
 37 |     def get(self, i, j):
 38 |         """
 39 |         Increment the visited counts in the grid
 40 |         :param i: row
 41 |         :param j: column
 42 |         :return:
 43 |         """
 44 |         assert i >= 0 and i < self.height
 45 |         assert j >= 0 and j < self.width
 46 |         return self.data[i, j]
 47 | 
 48 |     def is_valid(self, i, j):
 49 |         """Check if a position is in the boundary"""
 50 |         return 0 <= i < self.height and 0 <= j < self.width
 51 | 
 52 |     def is_filled(self):
 53 |         """
 54 |         :return: True if the board is filled, otherwise false
 55 |         """
 56 |         for row in self.data:
 57 |             for i in row:
 58 |                 if i == 0:
 59 |                     return False    # Not filled
 60 |         return True
 61 | 
 62 |     def __str__(self):
 63 |         return str(self.data)
 64 | 
 65 | 
 66 | class GridworldEnv(gym.Env):
 67 |     """
 68 |     Gridworld Environment that represents a rectangle world
 69 |     """
 70 | 
 71 |     metadata = {'render.modes': ['human']}
 72 | 
 73 |     class Actions(IntEnum):
 74 |         left = 0
 75 |         right = 1
 76 |         up = 2
 77 |         down = 3
 78 | 
 79 |     def __init__(self, width, height, seed=1337):
 80 |         super(GridworldEnv, self).__init__()
 81 |         # self.world = np.array((width, height), dtype=int)
 82 | 
 83 |         # Environment configuration
 84 |         self.width = width
 85 |         self.height = height
 86 |         self.size = width * height
 87 | 
 88 |         # Information for the agent
 89 |         self.agent_positions = None
 90 |         self.orignal_positions = None
 91 |         self.all_steps = None
 92 |         self.board = Board(width, height)
 93 | 
 94 |         # For gym
 95 |         # Actions are discrete integer values
 96 |         self.action_space = spaces.Discrete(16)
 97 |         # Observations are number of cells
 98 |         self.observation_space = spaces.Box(low=0, high=max((width, height)),
 99 |                                             shape=(4, ), dtype=np.int)
100 | 
101 |         # Initialize the state
102 |         self.reset()
103 | 
104 |         # Initialize the RNG
105 |         self.seed(seed=seed)
106 | 
107 |         # Action enumeration for this environment
108 |         actions = [(1, 0), (-1, 0), (0, 1), (0, -1)]
109 |         self.action_combs = [p for p in product(actions, repeat=2)]     # All possible actions
110 | 
111 |     def reset(self):
112 |         # Current position and direction of the agent
113 |         self.agent_positions = [(0, 0), (self.height - 1, self.width - 1)]
114 |         self.orignal_positions = [(0, 0), (self.height - 1, self.width - 1)]
115 | 
116 |         self.board = Board(self.width, self.height)
117 |         self.board.set(0, 0)
118 |         self.board.set(self.height - 1, self.width - 1)
119 | 
120 |         # Step count since episode start
121 |         self.all_steps = [[(0, 0)], [(self.height - 1, self.width - 1)]]
122 | 
123 |         # Return first observation
124 |         return np.array([0, 0, self.height - 1, self.width - 1])
125 | 
126 |     def seed(self, seed=1337):
127 |         """
128 |         Seed the random number generator
129 |         """
130 |         self.np_random, _ = seeding.np_random(seed)
131 |         return [seed]
132 | 
133 |     def get_obs_space(self):
134 |         """
135 |         :return: The agent's positions as a 1D np array
136 |         """
137 |         obs = []
138 |         for pos in self.agent_positions:
139 |             obs.extend(pos)
140 |         return np.array(obs)
141 | 
142 |     def step(self, action):
143 |         action_comb = self.action_combs[action]
144 |         agent_positions = []
145 |         is_visited = False
146 | 
147 |         for index, a in enumerate(action_comb):
148 |             prev_i, prev_j = self.agent_positions[index]
149 |             i, j = prev_i + a[0], prev_j + a[1]
150 |             if not self.board.is_valid(i, j):   # New position out of bound
151 |                 # Skip this step, has a -1 reward
152 |                 return self.get_obs_space(), -1, False, {}
153 |             elif self.board.get(i, j) > 0:  # Revisit a grid
154 |                 # Terminate the episode, has no reward
155 |                 is_visited = True
156 |             agent_positions.append((i, j))
157 | 
158 |         # Update positions
159 |         self.agent_positions = agent_positions
160 | 
161 |         for index, pos in enumerate(self.agent_positions):
162 |             i, j = pos
163 |             self.board.set(i, j)    # Update pos
164 |             self.all_steps[index].append(pos)   # Update steps
165 | 
166 |         if is_visited:  # Is visited
167 |             return self.get_obs_space(), 0, True, {}
168 |         elif self.board.is_filled():  # All grids has been visited once
169 |             # self._write_path()
170 |             reward = (self.width + self.height) * 2
171 |             for index, pos in enumerate(self.agent_positions):
172 |                 i, j = pos
173 |                 org_i, org_j = self.orignal_positions[index]
174 |                 reward += (- abs(i - org_i) - abs(j - org_j))
175 |             self._write_path()
176 |             return self.get_obs_space(), reward, True, {}
177 |         else:   # The grid has not been visited
178 |             return self.get_obs_space(), 1, False, {}
179 | 
180 |     def _write_path(self):
181 |         with open("path.txt", "w") as fhand:
182 |             path = self.get_path()
183 |             fhand.write(path)
184 | 
185 |     def render(self, mode='human', close=False):
186 |         print("board:")
187 |         print(self.board.data)
188 |         print("steps:", self.all_steps)
189 |         print("path:", self.get_path())
190 |         print("pos:", self.agent_positions)
191 |         print("")
192 | 
193 |     def get_path(self):
194 |         """
195 |         Get the path on the field
196 |         :return:
197 |         """
198 |         tables = ""
199 |         board = [[None for _ in range(self.width)] for _ in range(self.height)]
200 |         steps = self.all_steps[0]
201 |         for index, pos in enumerate(steps):
202 |             i, j = pos
203 |             board[i][j] = f"({index})"
204 | 
205 |         steps = self.all_steps[1]
206 |         for index, pos in enumerate(steps):
207 |             i, j = pos
208 |             board[i][j] = f"[{index}]"
209 |         return tabulate(board)
210 | 
211 | 


--------------------------------------------------------------------------------
/gridwrold_env_random.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from gym.utils import seeding
  3 | from gym import spaces
  4 | import numpy as np
  5 | from enum import IntEnum
  6 | from copy import deepcopy
  7 | from tabulate import tabulate
  8 | import random
  9 | 
 10 | 
 11 | class Board:
 12 |     """Represent a grid and operations on it"""
 13 |     def __init__(self, width, height):
 14 |         self.width = width
 15 |         self.height = height
 16 | 
 17 |         self.data = np.zeros((height, width), dtype=np.int)
 18 | 
 19 |         self.total_grids = width * height
 20 |         self.visited_grids = 0
 21 | 
 22 |     def set(self, i, j):
 23 |         """
 24 |         Increment the visited counts in the grid
 25 |         :param i: row
 26 |         :param j: column
 27 |         """
 28 |         assert i >= 0 and i < self.height
 29 |         assert j >= 0 and j < self.width
 30 | 
 31 |         if self.data[i, j] == 0:    # Not visited
 32 |             self.visited_grids += 1
 33 | 
 34 |         self.data[i, j] += 1
 35 | 
 36 |         return self.data[i, j]
 37 | 
 38 |     def get(self, i, j):
 39 |         """
 40 |         Increment the visited counts in the grid
 41 |         :param i: row
 42 |         :param j: column
 43 |         :return:
 44 |         """
 45 |         assert i >= 0 and i < self.height
 46 |         assert j >= 0 and j < self.width
 47 |         return self.data[i, j]
 48 | 
 49 |     def is_valid(self, i, j):
 50 |         """Check if a position is in the boundary"""
 51 |         return 0 <= i < self.height and 0 <= j < self.width
 52 | 
 53 |     def is_filled(self):
 54 |         return self.total_grids == self.visited_grids
 55 | 
 56 |     def __str__(self):
 57 |         return str(self.data)
 58 | 
 59 | 
 60 | class GridworldEnv(gym.Env):
 61 |     """
 62 |     Gridworld Environment that represents a rectangle world
 63 |     """
 64 | 
 65 |     metadata = {'render.modes': ['human']}
 66 | 
 67 |     class Actions(IntEnum):
 68 |         left = 0
 69 |         right = 1
 70 |         up = 2
 71 |         down = 3
 72 | 
 73 |     def __init__(self, width, height, seed=1337):
 74 |         super(GridworldEnv, self).__init__()
 75 |         # self.world = np.array((width, height), dtype=int)
 76 | 
 77 |         # Environment configuration
 78 |         self.width = width
 79 |         self.height = height
 80 |         self.size = width * height
 81 | 
 82 |         # Information for the agent
 83 |         self.agent_pos = (0, 0)
 84 |         self.steps = [(0, 0)]
 85 |         self.board = Board(width, height)
 86 | 
 87 |         # For gym
 88 |         # Actions are discrete integer values
 89 |         self.action_space = spaces.Discrete(4)
 90 |         # Observations are number of cells
 91 |         self.observation_space = spaces.Box(low=0, high=max((width, height)),
 92 |                                             shape=(2, ), dtype=np.int)
 93 | 
 94 |         # Initialize the state
 95 |         self.reset()
 96 | 
 97 |         # Initialize the RNG
 98 |         self.seed(seed=seed)
 99 | 
100 |         # Action enumeration for this environment
101 |         self.actions = GridworldEnv.Actions
102 | 
103 |     def reset(self):
104 |         # Current position and direction of the agent
105 |         h = random.randint(0, self.height - 1)
106 |         w = random.randint(0, self.width - 1)
107 |         self.agent_pos = (h, w)
108 | 
109 |         self.board = Board(self.width, self.height)
110 |         self.board.set(h, w)
111 | 
112 |         board = deepcopy(self.board.data)
113 |         board[0, 0] = -1
114 |         board = board.flatten()
115 | 
116 |         # Step count since episode start
117 |         self.steps = [(0, 0)]
118 | 
119 |         # Return first observation
120 |         return np.array([h, w])
121 | 
122 |     def seed(self, seed=1337):
123 |         """
124 |         Seed the random number generator
125 |         """
126 |         self.np_random, _ = seeding.np_random(seed)
127 |         return [seed]
128 | 
129 |     def step(self, action):
130 |         done = False
131 | 
132 |         # Get the coordinate for the new position
133 |         prev_i, prev_j = self.agent_pos
134 |         i, j = self.agent_pos
135 | 
136 |         if action == self.actions.left:
137 |             j -= 1
138 |         elif action == self.actions.right:
139 |             j += 1
140 |         elif action == self.actions.up:
141 |             i -= 1
142 |         elif action == self.actions.down:
143 |             i += 1
144 | 
145 |         if not self.board.is_valid(i, j):   # New position out of bound
146 |             return np.array([prev_i, prev_j]), -1, False, {}
147 | 
148 |         self.agent_pos = (i, j)
149 |         # Update the step information
150 |         self.steps.append((i, j))
151 |         self.board.set(i, j)
152 | 
153 |         pos = np.array([i, j])
154 | 
155 |         if self.board.get(i, j) > 1:   # The grid has been visited
156 |             return pos, 0, True, {}
157 |         elif len(self.steps) == self.size:  # All grids has been visited once
158 |             return pos, 100000, True, {}
159 |         else:   # The grid has not been visited
160 |             return pos, 1, False, {}
161 | 
162 |     def render(self, mode='human', close=False):
163 |         print("board:")
164 |         print(self.board.data)
165 |         print("path:", self.get_path())
166 |         print("pos:", self.agent_pos)
167 |         print("")
168 | 
169 |     def get_path(self):
170 |         """
171 |         Get the path on the field
172 |         :return:
173 |         """
174 |         board = np.zeros((self.height, self.width), dtype=np.int)
175 |         for index, pos in enumerate(self.steps):
176 |             i, j = pos
177 |             board[i, j] = index
178 | 
179 |         table = tabulate(board)
180 |         return table
181 | 
182 | 


--------------------------------------------------------------------------------
/src/coverage_animation.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fanzhangg/uav-path-coverage/7fcb068cf38b9c8b7ce1f15906ba4f77344b1e2d/src/coverage_animation.gif


--------------------------------------------------------------------------------
/src/training_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fanzhangg/uav-path-coverage/7fcb068cf38b9c8b7ce1f15906ba4f77344b1e2d/src/training_plot.png


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | from gridwrold_env_multi_circle_3 import GridworldEnv
  2 | from stable_baselines.common.env_checker import check_env
  3 | from stable_baselines import DQN, PPO2, A2C, ACKTR
  4 | from stable_baselines.common.cmd_util import make_vec_env
  5 | import matplotlib.pyplot as plt
  6 | 
  7 | import os
  8 | import numpy as np
  9 | from stable_baselines.bench import Monitor
 10 | from stable_baselines.results_plotter import load_results, ts2xy
 11 | from stable_baselines.common.callbacks import BaseCallback
 12 | 
 13 | 
 14 | class SaveOnBestTrainingRewardCallback(BaseCallback):
 15 |     """
 16 |     Callback for saving a model (the check is done every ``check_freq`` steps)
 17 |     based on the training reward (in practice, we recommend using ``EvalCallback``).
 18 | 
 19 |     :param check_freq: (int)
 20 |     :param log_dir: (str) Path to the folder where the model will be saved.
 21 |       It must contains the file created by the ``Monitor`` wrapper.
 22 |     :param verbose: (int)
 23 |     """
 24 | 
 25 |     def __init__(self, check_freq: int, log_dir: str, verbose=1):
 26 |         super(SaveOnBestTrainingRewardCallback, self).__init__(verbose)
 27 |         self.check_freq = check_freq
 28 |         self.log_dir = log_dir
 29 |         self.save_path = os.path.join(log_dir, 'best_model')
 30 |         self.best_mean_reward = -np.inf
 31 | 
 32 |     def _init_callback(self) -> None:
 33 |         # Create folder if needed
 34 |         if self.save_path is not None:
 35 |             os.makedirs(self.save_path, exist_ok=True)
 36 | 
 37 |     def _on_step(self) -> bool:
 38 |         if self.n_calls % self.check_freq == 0:
 39 | 
 40 |             # Retrieve training reward
 41 |             x, y = ts2xy(load_results(self.log_dir), 'timesteps')
 42 |             if len(x) > 0:
 43 |                 # Mean training reward over the last 100 episodes
 44 |                 mean_reward = np.mean(y[-100:])
 45 |                 if self.verbose > 0:
 46 |                     print("Num timesteps: {}".format(self.num_timesteps))
 47 |                     print(
 48 |                         "Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}".format(self.best_mean_reward,
 49 |                                                                                                  mean_reward))
 50 | 
 51 |                 # New best model, you could save the agent here
 52 |                 if mean_reward > self.best_mean_reward:
 53 |                     self.best_mean_reward = mean_reward
 54 |                     # Example for saving best model
 55 |                     if self.verbose > 0:
 56 |                         print("Saving new best model to {}".format(self.save_path))
 57 |                     self.model.save(self.save_path)
 58 | 
 59 |         return True
 60 | 
 61 | # configurations
 62 | w = 6
 63 | h = 6
 64 | 
 65 | # Create log dir
 66 | log_dir = "/tmp/gym/"
 67 | os.makedirs(log_dir, exist_ok=True)
 68 | 
 69 | # Create the callback: check every 1000 steps
 70 | callback = SaveOnBestTrainingRewardCallback(check_freq=1000, log_dir=log_dir)
 71 | # Init Env
 72 | env = GridworldEnv(w, h)
 73 | env = Monitor(env, log_dir)
 74 | check_env(env, warn=True)
 75 | # Wrap it
 76 | env = make_vec_env(lambda: env, n_envs=1)
 77 | 
 78 | # Train the agent
 79 | model = ACKTR('MlpPolicy', env, verbose=1).learn(100000, callback=callback)
 80 | 
 81 | # Test the trained agent
 82 | obs = env.reset()
 83 | n_steps = w * h // 2
 84 | 
 85 | for step in range(n_steps):
 86 |     action, _ = model.predict(obs, deterministic=True)
 87 |     print("Step {}".format(step + 1))
 88 |     print("Action: ", action)
 89 |     obs, reward, done, info = env.step(action)
 90 |     print('obs=', obs, 'reward=', reward, 'done=', done)
 91 |     env.render()
 92 |     if done:
 93 |         # Note that the VecEnv resets automatically
 94 |         # when a done signal is encountered
 95 |         print("Goal reached!", "reward=", reward)
 96 |         break
 97 | 
 98 | 
 99 | def moving_average(values, window):
100 |     """
101 |     Smooth values by doing a moving average
102 |     :param values: (numpy array)
103 |     :param window: (int)
104 |     :return: (numpy array)
105 |     """
106 |     weights = np.repeat(1.0, window) / window
107 |     return np.convolve(values, weights, 'valid')
108 | 
109 | 
110 | def plot_results(log_folder, title='Learning Curve'):
111 |     """
112 |     plot the results
113 | 
114 |     :param log_folder: (str) the save location of the results to plot
115 |     :param title: (str) the title of the task to plot
116 |     """
117 |     x, y = ts2xy(load_results(log_folder), 'timesteps')
118 |     y = moving_average(y, window=50)
119 |     # Truncate x
120 |     x = x[len(x) - len(y):]
121 | 
122 |     fig = plt.figure(title)
123 |     plt.plot(x, y)
124 |     plt.xlabel('Number of Timesteps')
125 |     plt.ylabel('Rewards')
126 |     plt.title(title + " Smoothed")
127 |     plt.show()
128 | 
129 | 
130 | from stable_baselines import results_plotter
131 | 
132 | # Helper from the library
133 | results_plotter.plot_results([log_dir], 1e5, results_plotter.X_TIMESTEPS, "Hexworld Coverage")
134 | 
135 | plot_results(log_dir)


--------------------------------------------------------------------------------
/train_2.py:
--------------------------------------------------------------------------------
  1 | from gridwrold_env_2 import GridworldEnv
  2 | from stable_baselines.common.env_checker import check_env
  3 | from stable_baselines import DQN, PPO2, A2C, ACKTR
  4 | from stable_baselines.common.cmd_util import make_vec_env
  5 | import matplotlib.pyplot as plt
  6 | 
  7 | import os
  8 | import numpy as np
  9 | from stable_baselines.bench import Monitor
 10 | from stable_baselines.results_plotter import load_results, ts2xy
 11 | from stable_baselines.common.callbacks import BaseCallback
 12 | 
 13 | 
 14 | class SaveOnBestTrainingRewardCallback(BaseCallback):
 15 |     """
 16 |     Callback for saving a model (the check is done every ``check_freq`` steps)
 17 |     based on the training reward (in practice, we recommend using ``EvalCallback``).
 18 | 
 19 |     :param check_freq: (int)
 20 |     :param log_dir: (str) Path to the folder where the model will be saved.
 21 |       It must contains the file created by the ``Monitor`` wrapper.
 22 |     :param verbose: (int)
 23 |     """
 24 | 
 25 |     def __init__(self, check_freq: int, log_dir: str, verbose=1):
 26 |         super(SaveOnBestTrainingRewardCallback, self).__init__(verbose)
 27 |         self.check_freq = check_freq
 28 |         self.log_dir = log_dir
 29 |         self.save_path = os.path.join(log_dir, 'best_model')
 30 |         self.best_mean_reward = -np.inf
 31 | 
 32 |     def _init_callback(self) -> None:
 33 |         # Create folder if needed
 34 |         if self.save_path is not None:
 35 |             os.makedirs(self.save_path, exist_ok=True)
 36 | 
 37 |     def _on_step(self) -> bool:
 38 |         if self.n_calls % self.check_freq == 0:
 39 | 
 40 |             # Retrieve training reward
 41 |             x, y = ts2xy(load_results(self.log_dir), 'timesteps')
 42 |             if len(x) > 0:
 43 |                 # Mean training reward over the last 100 episodes
 44 |                 mean_reward = np.mean(y[-100:])
 45 |                 if self.verbose > 0:
 46 |                     print("Num timesteps: {}".format(self.num_timesteps))
 47 |                     print(
 48 |                         "Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}".format(self.best_mean_reward,
 49 |                                                                                                  mean_reward))
 50 | 
 51 |                 # New best model, you could save the agent here
 52 |                 if mean_reward > self.best_mean_reward:
 53 |                     self.best_mean_reward = mean_reward
 54 |                     # Example for saving best model
 55 |                     if self.verbose > 0:
 56 |                         print("Saving new best model to {}".format(self.save_path))
 57 |                     self.model.save(self.save_path)
 58 | 
 59 |         return True
 60 | 
 61 | 
 62 | # configurations
 63 | w = 6
 64 | h = 6
 65 | 
 66 | # Create log dir
 67 | log_dir = "/tmp/gym/"
 68 | os.makedirs(log_dir, exist_ok=True)
 69 | 
 70 | # Create the callback: check every 1000 steps
 71 | callback = SaveOnBestTrainingRewardCallback(check_freq=1000, log_dir=log_dir)
 72 | # Init Env
 73 | env = GridworldEnv(w, h)
 74 | env = Monitor(env, log_dir)
 75 | check_env(env, warn=True)
 76 | # Wrap it
 77 | env = make_vec_env(lambda: env, n_envs=1)
 78 | 
 79 | # Train the agent
 80 | model = ACKTR('MlpPolicy', env, verbose=1).learn(100000, callback=callback)
 81 | 
 82 | # Test the trained agent
 83 | obs = env.reset()
 84 | n_steps = w * h
 85 | for step in range(n_steps):
 86 |     action, _ = model.predict(obs, deterministic=True)
 87 |     print("Step {}".format(step + 1))
 88 |     print("Action: ", action)
 89 |     obs, reward, done, info = env.step(action)
 90 |     print('obs=', obs, 'reward=', reward, 'done=', done)
 91 |     env.render()
 92 |     if done:
 93 |         # Note that the VecEnv resets automatically
 94 |         # when a done signal is encountered
 95 |         print("Goal reached!", "reward=", reward)
 96 |         break
 97 | 
 98 | 
 99 | def moving_average(values, window):
100 |     """
101 |     Smooth values by doing a moving average
102 |     :param values: (numpy array)
103 |     :param window: (int)
104 |     :return: (numpy array)
105 |     """
106 |     weights = np.repeat(1.0, window) / window
107 |     return np.convolve(values, weights, 'valid')
108 | 
109 | 
110 | def plot_results(log_folder, title='Learning Curve'):
111 |     """
112 |     plot the results
113 | 
114 |     :param log_folder: (str) the save location of the results to plot
115 |     :param title: (str) the title of the task to plot
116 |     """
117 |     x, y = ts2xy(load_results(log_folder), 'timesteps')
118 |     y = moving_average(y, window=50)
119 |     # Truncate x
120 |     x = x[len(x) - len(y):]
121 | 
122 |     fig = plt.figure(title)
123 |     plt.plot(x, y)
124 |     plt.xlabel('Number of Timesteps')
125 |     plt.ylabel('Rewards')
126 |     plt.title(title + " Smoothed")
127 |     plt.show()
128 | 
129 | 
130 | from stable_baselines import results_plotter
131 | 
132 | # Helper from the library
133 | results_plotter.plot_results([log_dir], 1e5, results_plotter.X_TIMESTEPS, "Hexworld Coverage")
134 | 
135 | plot_results(log_dir)


--------------------------------------------------------------------------------