├── .gitignore ├── README.md ├── gridworld_env_revisit.py ├── gridwrold_env.py ├── gridwrold_env_2.py ├── gridwrold_env_board.py ├── gridwrold_env_multi.py ├── gridwrold_env_multi_circle.py ├── gridwrold_env_multi_circle_2.py ├── gridwrold_env_multi_circle_3.py ├── gridwrold_env_random.py ├── src ├── coverage_animation.gif └── training_plot.png ├── train.py └── train_2.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/* -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # UAV Coverage Path Planning for single/multi drones 2 | 3 | This program is for planing coverage path for a single or multiple drones on a discrete gridworld. 4 | 5 | ![2 Drones Coverage Path on 10x10 Field](src/coverage_animation.gif) 6 | 7 | *2 Drones Coverage Path Animation* 8 | 9 | ![Training Plot over Episode of 2 drones on 10x10 Feild](src/training_plot.png) 10 | 11 | *Training Plot over Episode of 2 drones on 10x10 Feild* 12 | 13 | ## Installation 14 | 15 | - Install [stable-baseline](https://stable-baselines.readthedocs.io/en/master/index.html) 16 | - Baselines requires python3 (>=3.5) with the development headers. You’ll also need system packages CMake, OpenMPI and zlib. Those can be installed as follows. 17 | - Stable-Baselines supports Tensorflow versions from 1.8.0 to 1.15.0, and does not work on Tensorflow versions 2.0.0 and above. 18 | - `pip install stable-baselines` 19 | - Install tabulate 20 | - `pip install tabulate` 21 | 22 | 23 | ## Training 24 | 25 | - Import the specific environment in traning.py 26 | - `python3 training.py` 27 | -------------------------------------------------------------------------------- /gridworld_env_revisit.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym.utils import seeding 3 | from gym import spaces 4 | import numpy as np 5 | from enum import IntEnum 6 | from copy import deepcopy 7 | from tabulate import tabulate 8 | 9 | 10 | class Board: 11 | """Represent a grid and operations on it""" 12 | def __init__(self, width, height): 13 | self.width = width 14 | self.height = height 15 | 16 | self.data = np.zeros((height, width), dtype=np.int) 17 | 18 | self.total_grids = width * height 19 | self.visited_grids = 0 20 | 21 | def set(self, i, j): 22 | """ 23 | Increment the visited counts in the grid 24 | :param i: row 25 | :param j: column 26 | """ 27 | assert i >= 0 and i < self.height 28 | assert j >= 0 and j < self.width 29 | 30 | if self.data[i, j] == 0: # Not visited 31 | self.visited_grids += 1 32 | 33 | self.data[i, j] += 1 34 | 35 | return self.data[i, j] 36 | 37 | def get(self, i, j): 38 | """ 39 | Increment the visited counts in the grid 40 | :param i: row 41 | :param j: column 42 | :return: 43 | """ 44 | assert i >= 0 and i < self.height 45 | assert j >= 0 and j < self.width 46 | return self.data[i, j] 47 | 48 | def is_valid(self, i, j): 49 | """Check if a position is in the boundary""" 50 | return 0 <= i < self.height and 0 <= j < self.width 51 | 52 | def is_filled(self): 53 | for row in self.data: 54 | for i in row: 55 | if not i == 0: 56 | return False 57 | 58 | def __str__(self): 59 | return str(self.data) 60 | 61 | 62 | class GridworldEnv(gym.Env): 63 | """ 64 | Gridworld Environment that represents a rectangle world 65 | """ 66 | 67 | metadata = {'render.modes': ['human']} 68 | 69 | class Actions(IntEnum): 70 | left = 0 71 | right = 1 72 | up = 2 73 | down = 3 74 | 75 | def __init__(self, width, height, seed=1337): 76 | super(GridworldEnv, self).__init__() 77 | # self.world = np.array((width, height), dtype=int) 78 | 79 | # Environment configuration 80 | self.width = width 81 | self.height = height 82 | self.size = width * height 83 | 84 | # Information for the agent 85 | self.agent_pos = (0, 0) 86 | self.steps = [(0, 0)] 87 | self.board = Board(width, height) 88 | 89 | # For gym 90 | # Actions are discrete integer values 91 | self.action_space = spaces.Discrete(4) 92 | # Observations are number of cells 93 | self.observation_space = spaces.Box(low=0, high=max((width, height)), 94 | shape=(2, ), dtype=np.int) 95 | 96 | # Initialize the state 97 | self.reset() 98 | 99 | # Initialize the RNG 100 | self.seed(seed=seed) 101 | 102 | # Action enumeration for this environment 103 | self.actions = GridworldEnv.Actions 104 | 105 | def reset(self): 106 | # Current position and direction of the agent 107 | h = 0 108 | w = 0 109 | self.agent_pos = (h, w) 110 | 111 | self.board = Board(self.width, self.height) 112 | self.board.set(h, w) 113 | 114 | board = deepcopy(self.board.data) 115 | board[0, 0] = -1 116 | board = board.flatten() 117 | 118 | # Step count since episode start 119 | self.steps = [(0, 0)] 120 | 121 | # Return first observation 122 | return np.array([0, 0]) 123 | 124 | def seed(self, seed=1337): 125 | """ 126 | Seed the random number generator 127 | """ 128 | self.np_random, _ = seeding.np_random(seed) 129 | return [seed] 130 | 131 | def step(self, action): 132 | done = False 133 | 134 | # Get the coordinate for the new position 135 | prev_i, prev_j = self.agent_pos 136 | i, j = self.agent_pos 137 | 138 | if action == self.actions.left: 139 | j -= 1 140 | elif action == self.actions.right: 141 | j += 1 142 | elif action == self.actions.up: 143 | i -= 1 144 | elif action == self.actions.down: 145 | i += 1 146 | 147 | if not self.board.is_valid(i, j): # New position out of bound 148 | return np.array([prev_i, prev_j]), -1, False, {} 149 | 150 | self.agent_pos = (i, j) 151 | # Update the step information 152 | self.steps.append((i, j)) 153 | self.board.set(i, j) 154 | 155 | pos = np.array([i, j]) 156 | 157 | if self.board.get(i, j) > 5: # The grid has been visited more than 5 times 158 | return pos, 0, True, {} 159 | elif len(self.steps) == self.size: # All grids has been visited once 160 | return pos, 100000, True, {} 161 | else: # The grid has not been visited 162 | return pos, 1, False, {} 163 | 164 | def render(self, mode='human', close=False): 165 | print("board:") 166 | print(self.board.data) 167 | print("path:", self.get_path()) 168 | print("pos:", self.agent_pos) 169 | print("") 170 | 171 | def get_path(self): 172 | """ 173 | Get the path on the field 174 | :return: 175 | """ 176 | board = np.zeros((self.height, self.width), dtype=np.int) 177 | for index, pos in enumerate(self.steps): 178 | i, j = pos 179 | board[i, j] = index 180 | 181 | table = tabulate(board) 182 | return table 183 | 184 | -------------------------------------------------------------------------------- /gridwrold_env.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym.utils import seeding 3 | from gym import spaces 4 | import numpy as np 5 | from enum import IntEnum 6 | from copy import deepcopy 7 | from tabulate import tabulate 8 | 9 | 10 | class Board: 11 | """Represent a grid and operations on it""" 12 | def __init__(self, width, height): 13 | self.width = width 14 | self.height = height 15 | 16 | self.data = np.zeros((height, width), dtype=np.int) 17 | 18 | self.total_grids = width * height 19 | self.visited_grids = 0 20 | 21 | def set(self, i, j): 22 | """ 23 | Increment the visited counts in the grid 24 | :param i: row 25 | :param j: column 26 | """ 27 | assert i >= 0 and i < self.height 28 | assert j >= 0 and j < self.width 29 | 30 | if self.data[i, j] == 0: # Not visited 31 | self.visited_grids += 1 32 | 33 | self.data[i, j] += 1 34 | 35 | return self.data[i, j] 36 | 37 | def get(self, i, j): 38 | """ 39 | Increment the visited counts in the grid 40 | :param i: row 41 | :param j: column 42 | :return: 43 | """ 44 | assert i >= 0 and i < self.height 45 | assert j >= 0 and j < self.width 46 | return self.data[i, j] 47 | 48 | def is_valid(self, i, j): 49 | """Check if a position is in the boundary""" 50 | return 0 <= i < self.height and 0 <= j < self.width 51 | 52 | def is_filled(self): 53 | return self.total_grids == self.visited_grids 54 | 55 | def __str__(self): 56 | return str(self.data) 57 | 58 | 59 | class GridworldEnv(gym.Env): 60 | """ 61 | Gridworld Environment that represents a rectangle world 62 | """ 63 | 64 | metadata = {'render.modes': ['human']} 65 | 66 | class Actions(IntEnum): 67 | left = 0 68 | right = 1 69 | up = 2 70 | down = 3 71 | 72 | def __init__(self, width, height, seed=1337): 73 | super(GridworldEnv, self).__init__() 74 | # self.world = np.array((width, height), dtype=int) 75 | 76 | # Environment configuration 77 | self.width = width 78 | self.height = height 79 | self.size = width * height 80 | 81 | # Information for the agent 82 | self.agent_pos = (0, 0) 83 | self.steps = [(0, 0)] 84 | self.board = Board(width, height) 85 | 86 | # For gym 87 | # Actions are discrete integer values 88 | self.action_space = spaces.Discrete(4) 89 | # Observations are number of cells 90 | self.observation_space = spaces.Box(low=-1, high=2, 91 | shape=(self.size, ), dtype=np.int) 92 | 93 | # Initialize the state 94 | self.reset() 95 | 96 | # Initialize the RNG 97 | self.seed(seed=seed) 98 | 99 | # Action enumeration for this environment 100 | self.actions = GridworldEnv.Actions 101 | 102 | def reset(self): 103 | # Current position and direction of the agent 104 | h = 0 105 | w = 0 106 | self.agent_pos = (h, w) 107 | 108 | self.board = Board(self.width, self.height) 109 | self.board.set(h, w) 110 | 111 | board = deepcopy(self.board.data) 112 | board[0, 0] = -1 113 | board = board.flatten() 114 | 115 | # Step count since episode start 116 | self.steps = [(0, 0)] 117 | 118 | # Return first observation 119 | return board.flatten() 120 | 121 | def seed(self, seed=1337): 122 | """ 123 | Seed the random number generator 124 | """ 125 | self.np_random, _ = seeding.np_random(seed) 126 | return [seed] 127 | 128 | def step(self, action): 129 | done = False 130 | 131 | # Get the coordinate for the new position 132 | prev_i, prev_j = self.agent_pos 133 | i, j = self.agent_pos 134 | 135 | if action == self.actions.left: 136 | j -= 1 137 | elif action == self.actions.right: 138 | j += 1 139 | elif action == self.actions.up: 140 | i -= 1 141 | elif action == self.actions.down: 142 | i += 1 143 | 144 | if not self.board.is_valid(i, j): # New position out of bound 145 | board = deepcopy(self.board.data) 146 | board[prev_i, prev_j] = -1 147 | board = board.flatten() 148 | return board, -1, False, {} 149 | 150 | self.agent_pos = (i, j) 151 | # Update the step information 152 | self.steps.append((i, j)) 153 | self.board.set(i, j) 154 | board = deepcopy(self.board.data) 155 | board[i, j] = -1 156 | board = board.flatten() 157 | 158 | if self.board.get(i, j) > 1: # The grid has been visited 159 | return board, 0, True, {} 160 | elif len(self.steps) >= self.size: # All grids has been visited once 161 | return board, 100000, True, {} 162 | else: # The grid has not been visited 163 | return board, 1, False, {} 164 | 165 | def render(self, mode='human', close=False): 166 | print("board:") 167 | print(self.board.data) 168 | print("path:", self.get_path()) 169 | print("pos:", self.agent_pos) 170 | print("") 171 | 172 | def get_path(self): 173 | """ 174 | Get the path on the field 175 | :return: 176 | """ 177 | board = np.zeros((self.height, self.width), dtype=np.int) 178 | for index, pos in enumerate(self.steps): 179 | i, j = pos 180 | board[i, j] = index 181 | 182 | table = tabulate(board) 183 | return table 184 | 185 | -------------------------------------------------------------------------------- /gridwrold_env_2.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym.utils import seeding 3 | from gym import spaces 4 | import numpy as np 5 | from enum import IntEnum 6 | from copy import deepcopy 7 | from tabulate import tabulate 8 | 9 | 10 | class Board: 11 | """Represent a grid and operations on it""" 12 | def __init__(self, width, height): 13 | self.width = width 14 | self.height = height 15 | 16 | self.data = np.zeros((height, width), dtype=np.int) 17 | 18 | self.total_grids = width * height 19 | self.visited_grids = 0 20 | 21 | def set(self, i, j): 22 | """ 23 | Increment the visited counts in the grid 24 | :param i: row 25 | :param j: column 26 | """ 27 | assert i >= 0 and i < self.height 28 | assert j >= 0 and j < self.width 29 | 30 | if self.data[i, j] == 0: # Not visited 31 | self.visited_grids += 1 32 | 33 | self.data[i, j] += 1 34 | 35 | return self.data[i, j] 36 | 37 | def get(self, i, j): 38 | """ 39 | Increment the visited counts in the grid 40 | :param i: row 41 | :param j: column 42 | :return: 43 | """ 44 | assert i >= 0 and i < self.height 45 | assert j >= 0 and j < self.width 46 | return self.data[i, j] 47 | 48 | def is_valid(self, i, j): 49 | """Check if a position is in the boundary""" 50 | return 0 <= i < self.height and 0 <= j < self.width 51 | 52 | def is_filled(self): 53 | return self.total_grids == self.visited_grids 54 | 55 | def __str__(self): 56 | return str(self.data) 57 | 58 | 59 | class GridworldEnv(gym.Env): 60 | """ 61 | Gridworld Environment that represents a rectangle world 62 | """ 63 | 64 | metadata = {'render.modes': ['human']} 65 | 66 | class Actions(IntEnum): 67 | left = 0 68 | right = 1 69 | up = 2 70 | down = 3 71 | 72 | def __init__(self, width, height, seed=1337): 73 | super(GridworldEnv, self).__init__() 74 | # self.world = np.array((width, height), dtype=int) 75 | 76 | # Environment configuration 77 | self.width = width 78 | self.height = height 79 | self.size = width * height 80 | 81 | # Information for the agent 82 | self.agent_pos = (0, 0) 83 | self.steps = [(0, 0)] 84 | self.board = Board(width, height) 85 | 86 | # For gym 87 | # Actions are discrete integer values 88 | self.action_space = spaces.Discrete(4) 89 | # Observations are number of cells 90 | self.observation_space = spaces.Box(low=0, high=max((width, height)), 91 | shape=(2, ), dtype=np.int) 92 | 93 | # Initialize the state 94 | self.reset() 95 | 96 | # Initialize the RNG 97 | self.seed(seed=seed) 98 | 99 | # Action enumeration for this environment 100 | self.actions = GridworldEnv.Actions 101 | 102 | def reset(self): 103 | # Current position and direction of the agent 104 | h = 0 105 | w = 0 106 | self.agent_pos = (h, w) 107 | 108 | self.board = Board(self.width, self.height) 109 | self.board.set(h, w) 110 | 111 | board = deepcopy(self.board.data) 112 | board[0, 0] = -1 113 | board = board.flatten() 114 | 115 | # Step count since episode start 116 | self.steps = [(0, 0)] 117 | 118 | # Return first observation 119 | return np.array([0, 0]) 120 | 121 | def seed(self, seed=1337): 122 | """ 123 | Seed the random number generator 124 | """ 125 | self.np_random, _ = seeding.np_random(seed) 126 | return [seed] 127 | 128 | def step(self, action): 129 | done = False 130 | 131 | # Get the coordinate for the new position 132 | prev_i, prev_j = self.agent_pos 133 | i, j = self.agent_pos 134 | 135 | if action == self.actions.left: 136 | j -= 1 137 | elif action == self.actions.right: 138 | j += 1 139 | elif action == self.actions.up: 140 | i -= 1 141 | elif action == self.actions.down: 142 | i += 1 143 | 144 | if not self.board.is_valid(i, j): # New position out of bound 145 | return np.array([prev_i, prev_j]), -1, False, {} 146 | 147 | self.agent_pos = (i, j) 148 | # Update the step information 149 | self.steps.append((i, j)) 150 | self.board.set(i, j) 151 | 152 | pos = np.array([i, j]) 153 | 154 | if self.board.get(i, j) > 1: # The grid has been visited 155 | return pos, 0, True, {} 156 | elif len(self.steps) == self.size: # All grids has been visited once 157 | return pos, 100000, True, {} 158 | else: # The grid has not been visited 159 | return pos, 1, False, {} 160 | 161 | def render(self, mode='human', close=False): 162 | print("board:") 163 | print(self.board.data) 164 | print("path:", self.get_path()) 165 | print("pos:", self.agent_pos) 166 | print("") 167 | 168 | def get_path(self): 169 | """ 170 | Get the path on the field 171 | :return: 172 | """ 173 | board = np.zeros((self.height, self.width), dtype=np.int) 174 | for index, pos in enumerate(self.steps): 175 | i, j = pos 176 | board[i, j] = index 177 | 178 | table = tabulate(board) 179 | return table 180 | 181 | -------------------------------------------------------------------------------- /gridwrold_env_board.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym.utils import seeding 3 | from gym import spaces 4 | import numpy as np 5 | from enum import IntEnum 6 | from copy import deepcopy 7 | from tabulate import tabulate 8 | 9 | 10 | class Board: 11 | """Represent a grid and operations on it""" 12 | def __init__(self, width, height): 13 | self.width = width 14 | self.height = height 15 | 16 | self.data = np.zeros((height, width), dtype=np.int) 17 | 18 | self.total_grids = width * height 19 | self.visited_grids = 0 20 | 21 | def set(self, i, j): 22 | """ 23 | Increment the visited counts in the grid 24 | :param i: row 25 | :param j: column 26 | """ 27 | assert i >= 0 and i < self.height 28 | assert j >= 0 and j < self.width 29 | 30 | if self.data[i, j] == 0: # Not visited 31 | self.visited_grids += 1 32 | 33 | self.data[i, j] += 1 34 | 35 | return self.data[i, j] 36 | 37 | def get(self, i, j): 38 | """ 39 | Increment the visited counts in the grid 40 | :param i: row 41 | :param j: column 42 | :return: 43 | """ 44 | assert i >= 0 and i < self.height 45 | assert j >= 0 and j < self.width 46 | return self.data[i, j] 47 | 48 | def is_valid(self, i, j): 49 | """Check if a position is in the boundary""" 50 | return 0 <= i < self.height and 0 <= j < self.width 51 | 52 | def is_filled(self): 53 | return self.total_grids == self.visited_grids 54 | 55 | def __str__(self): 56 | return str(self.data) 57 | 58 | 59 | class GridworldEnv(gym.Env): 60 | """ 61 | Gridworld Environment that represents a rectangle world 62 | """ 63 | 64 | metadata = {'render.modes': ['human']} 65 | 66 | class Actions(IntEnum): 67 | left = 0 68 | right = 1 69 | up = 2 70 | down = 3 71 | 72 | def __init__(self, width, height, seed=1337): 73 | super(GridworldEnv, self).__init__() 74 | # self.world = np.array((width, height), dtype=int) 75 | 76 | # Environment configuration 77 | self.width = width 78 | self.height = height 79 | self.size = width * height 80 | 81 | # Information for the agent 82 | self.agent_pos = (0, 0) 83 | self.steps = [(0, 0)] 84 | self.board = Board(width, height) 85 | 86 | # For gym 87 | # Actions are discrete integer values 88 | self.action_space = spaces.Discrete(4) 89 | # Observations are number of cells 90 | self.observation_space = spaces.Box(low=0, high=2, 91 | shape=(self.size, ), dtype=np.int) 92 | 93 | # Initialize the state 94 | self.reset() 95 | 96 | # Initialize the RNG 97 | self.seed(seed=seed) 98 | 99 | # Action enumeration for this environment 100 | self.actions = GridworldEnv.Actions 101 | 102 | def reset(self): 103 | # Current position and direction of the agent 104 | h = 0 105 | w = 0 106 | self.agent_pos = (h, w) 107 | 108 | self.board = Board(self.width, self.height) 109 | self.board.set(h, w) 110 | 111 | board = deepcopy(self.board.data) 112 | board[0, 0] = -1 113 | board = board.flatten() 114 | 115 | # Step count since episode start 116 | self.steps = [(0, 0)] 117 | 118 | # Return first observation 119 | return self.board.data.flatten() 120 | 121 | def seed(self, seed=1337): 122 | """ 123 | Seed the random number generator 124 | """ 125 | self.np_random, _ = seeding.np_random(seed) 126 | return [seed] 127 | 128 | def step(self, action): 129 | done = False 130 | 131 | # Get the coordinate for the new position 132 | prev_i, prev_j = self.agent_pos 133 | i, j = self.agent_pos 134 | 135 | if action == self.actions.left: 136 | j -= 1 137 | elif action == self.actions.right: 138 | j += 1 139 | elif action == self.actions.up: 140 | i -= 1 141 | elif action == self.actions.down: 142 | i += 1 143 | 144 | if not self.board.is_valid(i, j): # New position out of bound 145 | return self.board.data.flatten(), -1, False, {} 146 | 147 | self.agent_pos = (i, j) 148 | # Update the step information 149 | self.steps.append((i, j)) 150 | self.board.set(i, j) 151 | 152 | board = self.board.data.flatten() 153 | 154 | if self.board.get(i, j) > 1: # The grid has been visited 155 | return board, 0, True, {} 156 | elif len(self.steps) >= self.size: # All grids has been visited once 157 | return board, 100000, True, {} 158 | else: # The grid has not been visited 159 | return board, 1, False, {} 160 | 161 | def render(self, mode='human', close=False): 162 | print("board:") 163 | print(self.board.data) 164 | print("path:") 165 | print(self.get_path()) 166 | print("pos:", self.agent_pos) 167 | print("") 168 | 169 | def get_path(self): 170 | """ 171 | Get the path on the field 172 | :return: 173 | """ 174 | board = np.zeros((self.height, self.width), dtype=np.int) 175 | for index, pos in enumerate(self.steps): 176 | i, j = pos 177 | board[i, j] = index 178 | 179 | table = tabulate(board) 180 | return table 181 | 182 | -------------------------------------------------------------------------------- /gridwrold_env_multi.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym.utils import seeding 3 | from gym import spaces 4 | import numpy as np 5 | from enum import IntEnum 6 | from itertools import product 7 | from tabulate import tabulate 8 | 9 | 10 | class Board: 11 | """Represent a grid and operations on it""" 12 | def __init__(self, width, height): 13 | self.width = width 14 | self.height = height 15 | 16 | self.data = np.zeros((height, width), dtype=np.int) 17 | 18 | self.total_grids = width * height 19 | self.visited_grids = 0 20 | 21 | def set(self, i, j): 22 | """ 23 | Increment the visited counts in the grid 24 | :param i: row 25 | :param j: column 26 | """ 27 | assert i >= 0 and i < self.height 28 | assert j >= 0 and j < self.width 29 | 30 | if self.data[i, j] == 0: # Not visited 31 | self.visited_grids += 1 32 | 33 | self.data[i, j] += 1 34 | 35 | return self.data[i, j] 36 | 37 | def get(self, i, j): 38 | """ 39 | Increment the visited counts in the grid 40 | :param i: row 41 | :param j: column 42 | :return: 43 | """ 44 | assert i >= 0 and i < self.height 45 | assert j >= 0 and j < self.width 46 | return self.data[i, j] 47 | 48 | def is_valid(self, i, j): 49 | """Check if a position is in the boundary""" 50 | return 0 <= i < self.height and 0 <= j < self.width 51 | 52 | def is_filled(self): 53 | """ 54 | :return: True if the board is filled, otherwise false 55 | """ 56 | for row in self.data: 57 | for i in row: 58 | if i == 0: 59 | return False # Not filled 60 | return True 61 | 62 | def __str__(self): 63 | return str(self.data) 64 | 65 | 66 | class GridworldEnv(gym.Env): 67 | """ 68 | Gridworld Environment that represents a rectangle world 69 | """ 70 | 71 | metadata = {'render.modes': ['human']} 72 | 73 | class Actions(IntEnum): 74 | left = 0 75 | right = 1 76 | up = 2 77 | down = 3 78 | 79 | def __init__(self, width, height, seed=1337): 80 | super(GridworldEnv, self).__init__() 81 | # self.world = np.array((width, height), dtype=int) 82 | 83 | # Environment configuration 84 | self.width = width 85 | self.height = height 86 | self.size = width * height 87 | 88 | # Information for the agent 89 | self.agent_positions = None 90 | self.all_steps = None 91 | self.board = Board(width, height) 92 | 93 | # For gym 94 | # Actions are discrete integer values 95 | self.action_space = spaces.Discrete(16) 96 | # Observations are number of cells 97 | self.observation_space = spaces.Box(low=0, high=max((width, height)), 98 | shape=(4, ), dtype=np.int) 99 | 100 | # Initialize the state 101 | self.reset() 102 | 103 | # Initialize the RNG 104 | self.seed(seed=seed) 105 | 106 | # Action enumeration for this environment 107 | actions = [(1, 0), (-1, 0), (0, 1), (0, -1)] 108 | self.action_combs = [p for p in product(actions, repeat=2)] # All possible actions 109 | 110 | def reset(self): 111 | # Current position and direction of the agent 112 | self.agent_positions = [(0, 0), (self.height - 1, self.width - 1)] 113 | 114 | self.board = Board(self.width, self.height) 115 | self.board.set(0, 0) 116 | self.board.set(self.height - 1, self.width - 1) 117 | 118 | # Step count since episode start 119 | self.all_steps = [[(0, 0)], [(self.height - 1, self.width - 1)]] 120 | 121 | # Return first observation 122 | return np.array([0, 0, self.height - 1, self.width - 1]) 123 | 124 | def seed(self, seed=1337): 125 | """ 126 | Seed the random number generator 127 | """ 128 | self.np_random, _ = seeding.np_random(seed) 129 | return [seed] 130 | 131 | def get_obs_space(self): 132 | """ 133 | :return: The agent's positions as a 1D np array 134 | """ 135 | obs = [] 136 | for pos in self.agent_positions: 137 | obs.extend(pos) 138 | return np.array(obs) 139 | 140 | def step(self, action): 141 | action_comb = self.action_combs[action] 142 | agent_positions = [] 143 | is_visited = False 144 | 145 | for index, a in enumerate(action_comb): 146 | prev_i, prev_j = self.agent_positions[index] 147 | i, j = prev_i + a[0], prev_j + a[1] 148 | if not self.board.is_valid(i, j): # New position out of bound 149 | # Skip this step, has a -1 reward 150 | return self.get_obs_space(), -1, False, {} 151 | elif self.board.get(i, j) > 0: # Revisit a grid 152 | # Terminate the episode, has no reward 153 | is_visited = True 154 | agent_positions.append((i, j)) 155 | 156 | # Update positions 157 | self.agent_positions = agent_positions 158 | 159 | for index, pos in enumerate(self.agent_positions): 160 | i, j = pos 161 | self.board.set(i, j) # Update pos 162 | self.all_steps[index].append(pos) # Update steps 163 | 164 | if is_visited: # Is visited 165 | return self.get_obs_space(), 0, True, {} 166 | elif self.board.is_filled(): # All grids has been visited once 167 | self._write_path() 168 | return self.get_obs_space(), 100000, True, {} 169 | else: # The grid has not been visited 170 | return self.get_obs_space(), 1, False, {} 171 | 172 | def _write_path(self): 173 | with open("path.txt", "w") as fhand: 174 | path = self.get_path() 175 | fhand.write(path) 176 | 177 | def render(self, mode='human', close=False): 178 | print("board:") 179 | print(self.board.data) 180 | print("steps:", self.all_steps) 181 | print("path:", self.get_path()) 182 | print("pos:", self.agent_positions) 183 | print("") 184 | 185 | def get_path(self): 186 | """ 187 | Get the path on the field 188 | :return: 189 | """ 190 | tables = "" 191 | board = [[None for _ in range(self.width)] for _ in range(self.height)] 192 | steps = self.all_steps[0] 193 | for index, pos in enumerate(steps): 194 | i, j = pos 195 | board[i][j] = f"({index})" 196 | 197 | steps = self.all_steps[1] 198 | for index, pos in enumerate(steps): 199 | i, j = pos 200 | board[i][j] = f"[{index}]" 201 | return tabulate(board) 202 | 203 | -------------------------------------------------------------------------------- /gridwrold_env_multi_circle.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym.utils import seeding 3 | from gym import spaces 4 | import numpy as np 5 | from enum import IntEnum 6 | from itertools import product 7 | from tabulate import tabulate 8 | 9 | 10 | class Board: 11 | """Represent a grid and operations on it""" 12 | def __init__(self, width, height): 13 | self.width = width 14 | self.height = height 15 | 16 | self.data = np.zeros((height, width), dtype=np.int) 17 | 18 | self.total_grids = width * height 19 | self.visited_grids = 0 20 | 21 | def set(self, i, j): 22 | """ 23 | Increment the visited counts in the grid 24 | :param i: row 25 | :param j: column 26 | """ 27 | assert i >= 0 and i < self.height 28 | assert j >= 0 and j < self.width 29 | 30 | if self.data[i, j] == 0: # Not visited 31 | self.visited_grids += 1 32 | 33 | self.data[i, j] += 1 34 | 35 | return self.data[i, j] 36 | 37 | def get(self, i, j): 38 | """ 39 | Increment the visited counts in the grid 40 | :param i: row 41 | :param j: column 42 | :return: 43 | """ 44 | assert i >= 0 and i < self.height 45 | assert j >= 0 and j < self.width 46 | return self.data[i, j] 47 | 48 | def is_valid(self, i, j): 49 | """Check if a position is in the boundary""" 50 | return 0 <= i < self.height and 0 <= j < self.width 51 | 52 | def is_filled(self): 53 | """ 54 | :return: True if the board is filled, otherwise false 55 | """ 56 | for row in self.data: 57 | for i in row: 58 | if i == 0: 59 | return False # Not filled 60 | return True 61 | 62 | def __str__(self): 63 | return str(self.data) 64 | 65 | 66 | class GridworldEnv(gym.Env): 67 | """ 68 | Gridworld Environment that represents a rectangle world 69 | """ 70 | 71 | metadata = {'render.modes': ['human']} 72 | 73 | class Actions(IntEnum): 74 | left = 0 75 | right = 1 76 | up = 2 77 | down = 3 78 | 79 | def __init__(self, width, height, seed=1337): 80 | super(GridworldEnv, self).__init__() 81 | # self.world = np.array((width, height), dtype=int) 82 | 83 | # Environment configuration 84 | self.width = width 85 | self.height = height 86 | self.size = width * height 87 | 88 | # Information for the agent 89 | self.agent_positions = None 90 | self.orignal_positions = None 91 | self.all_steps = None 92 | self.board = Board(width, height) 93 | 94 | # For gym 95 | # Actions are discrete integer values 96 | self.action_space = spaces.Discrete(16) 97 | # Observations are number of cells 98 | self.observation_space = spaces.Box(low=0, high=max((width, height)), 99 | shape=(4, ), dtype=np.int) 100 | 101 | # Initialize the state 102 | self.reset() 103 | 104 | # Initialize the RNG 105 | self.seed(seed=seed) 106 | 107 | # Action enumeration for this environment 108 | actions = [(1, 0), (-1, 0), (0, 1), (0, -1)] 109 | self.action_combs = [p for p in product(actions, repeat=2)] # All possible actions 110 | 111 | def reset(self): 112 | # Current position and direction of the agent 113 | self.agent_positions = [(0, 0), (self.height - 1, self.width - 1)] 114 | self.orignal_positions = [(0, 0), (self.height - 1, self.width - 1)] 115 | 116 | self.board = Board(self.width, self.height) 117 | self.board.set(0, 0) 118 | self.board.set(self.height - 1, self.width - 1) 119 | 120 | # Step count since episode start 121 | self.all_steps = [[(0, 0)], [(self.height - 1, self.width - 1)]] 122 | 123 | # Return first observation 124 | return np.array([0, 0, self.height - 1, self.width - 1]) 125 | 126 | def seed(self, seed=1337): 127 | """ 128 | Seed the random number generator 129 | """ 130 | self.np_random, _ = seeding.np_random(seed) 131 | return [seed] 132 | 133 | def get_obs_space(self): 134 | """ 135 | :return: The agent's positions as a 1D np array 136 | """ 137 | obs = [] 138 | for pos in self.agent_positions: 139 | obs.extend(pos) 140 | return np.array(obs) 141 | 142 | def step(self, action): 143 | action_comb = self.action_combs[action] 144 | agent_positions = [] 145 | is_visited = False 146 | 147 | for index, a in enumerate(action_comb): 148 | prev_i, prev_j = self.agent_positions[index] 149 | i, j = prev_i + a[0], prev_j + a[1] 150 | if not self.board.is_valid(i, j): # New position out of bound 151 | # Skip this step, has a -1 reward 152 | return self.get_obs_space(), -1, False, {} 153 | elif self.board.get(i, j) > 0: # Revisit a grid 154 | # Terminate the episode, has no reward 155 | is_visited = True 156 | agent_positions.append((i, j)) 157 | 158 | # Update positions 159 | self.agent_positions = agent_positions 160 | 161 | for index, pos in enumerate(self.agent_positions): 162 | i, j = pos 163 | self.board.set(i, j) # Update pos 164 | self.all_steps[index].append(pos) # Update steps 165 | 166 | if is_visited: # Is visited 167 | return self.get_obs_space(), 0, True, {} 168 | elif self.board.is_filled(): # All grids has been visited once 169 | # self._write_path() 170 | reward = (self.width + self.height) * 20 171 | for index, pos in enumerate(self.agent_positions): 172 | i, j = pos 173 | org_i, org_j = self.orignal_positions[index] 174 | reward += ((- abs(i - org_i) - abs(j - org_j)) * 10) 175 | self._write_path() 176 | return self.get_obs_space(), reward, True, {} 177 | else: # The grid has not been visited 178 | return self.get_obs_space(), 1, False, {} 179 | 180 | def _write_path(self): 181 | with open("path.txt", "w") as fhand: 182 | path = self.get_path() 183 | fhand.write(path) 184 | 185 | def render(self, mode='human', close=False): 186 | print("board:") 187 | print(self.board.data) 188 | print("steps:", self.all_steps) 189 | print("path:", self.get_path()) 190 | print("pos:", self.agent_positions) 191 | print("") 192 | 193 | def get_path(self): 194 | """ 195 | Get the path on the field 196 | :return: 197 | """ 198 | tables = "" 199 | board = [[None for _ in range(self.width)] for _ in range(self.height)] 200 | steps = self.all_steps[0] 201 | for index, pos in enumerate(steps): 202 | i, j = pos 203 | board[i][j] = f"({index})" 204 | 205 | steps = self.all_steps[1] 206 | for index, pos in enumerate(steps): 207 | i, j = pos 208 | board[i][j] = f"[{index}]" 209 | return tabulate(board) 210 | 211 | -------------------------------------------------------------------------------- /gridwrold_env_multi_circle_2.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym.utils import seeding 3 | from gym import spaces 4 | import numpy as np 5 | from enum import IntEnum 6 | from itertools import product 7 | from tabulate import tabulate 8 | 9 | 10 | class Board: 11 | """Represent a grid and operations on it""" 12 | def __init__(self, width, height): 13 | self.width = width 14 | self.height = height 15 | 16 | self.data = np.zeros((height, width), dtype=np.int) 17 | 18 | self.total_grids = width * height 19 | self.visited_grids = 0 20 | 21 | def set(self, i, j): 22 | """ 23 | Increment the visited counts in the grid 24 | :param i: row 25 | :param j: column 26 | """ 27 | assert i >= 0 and i < self.height 28 | assert j >= 0 and j < self.width 29 | 30 | if self.data[i, j] == 0: # Not visited 31 | self.visited_grids += 1 32 | 33 | self.data[i, j] += 1 34 | 35 | return self.data[i, j] 36 | 37 | def get(self, i, j): 38 | """ 39 | Increment the visited counts in the grid 40 | :param i: row 41 | :param j: column 42 | :return: 43 | """ 44 | assert i >= 0 and i < self.height 45 | assert j >= 0 and j < self.width 46 | return self.data[i, j] 47 | 48 | def is_valid(self, i, j): 49 | """Check if a position is in the boundary""" 50 | return 0 <= i < self.height and 0 <= j < self.width 51 | 52 | def is_filled(self): 53 | """ 54 | :return: True if the board is filled, otherwise false 55 | """ 56 | for row in self.data: 57 | for i in row: 58 | if i == 0: 59 | return False # Not filled 60 | return True 61 | 62 | def __str__(self): 63 | return str(self.data) 64 | 65 | 66 | class GridworldEnv(gym.Env): 67 | """ 68 | Gridworld Environment that represents a rectangle world 69 | """ 70 | 71 | metadata = {'render.modes': ['human']} 72 | 73 | class Actions(IntEnum): 74 | left = 0 75 | right = 1 76 | up = 2 77 | down = 3 78 | 79 | def __init__(self, width, height, seed=1337): 80 | super(GridworldEnv, self).__init__() 81 | # self.world = np.array((width, height), dtype=int) 82 | 83 | # Environment configuration 84 | self.width = width 85 | self.height = height 86 | self.size = width * height 87 | 88 | # Information for the agent 89 | self.agent_positions = None 90 | self.orignal_positions = None 91 | self.all_steps = None 92 | self.board = Board(width, height) 93 | 94 | # For gym 95 | # Actions are discrete integer values 96 | self.action_space = spaces.Discrete(16) 97 | # Observations are number of cells 98 | self.observation_space = spaces.Box(low=0, high=max((width, height)), 99 | shape=(4, ), dtype=np.int) 100 | 101 | # Initialize the state 102 | self.reset() 103 | 104 | # Initialize the RNG 105 | self.seed(seed=seed) 106 | 107 | # Action enumeration for this environment 108 | actions = [(1, 0), (-1, 0), (0, 1), (0, -1)] 109 | self.action_combs = [p for p in product(actions, repeat=2)] # All possible actions 110 | 111 | def reset(self): 112 | # Current position and direction of the agent 113 | self.agent_positions = [(0, 0), (self.height - 1, self.width - 1)] 114 | self.orignal_positions = [(0, 0), (self.height - 1, self.width - 1)] 115 | 116 | self.board = Board(self.width, self.height) 117 | self.board.set(0, 0) 118 | self.board.set(self.height - 1, self.width - 1) 119 | 120 | # Step count since episode start 121 | self.all_steps = [[(0, 0)], [(self.height - 1, self.width - 1)]] 122 | 123 | # Return first observation 124 | return np.array([0, 0, self.height - 1, self.width - 1]) 125 | 126 | def seed(self, seed=1337): 127 | """ 128 | Seed the random number generator 129 | """ 130 | self.np_random, _ = seeding.np_random(seed) 131 | return [seed] 132 | 133 | def get_obs_space(self): 134 | """ 135 | :return: The agent's positions as a 1D np array 136 | """ 137 | obs = [] 138 | for pos in self.agent_positions: 139 | obs.extend(pos) 140 | return np.array(obs) 141 | 142 | def step(self, action): 143 | action_comb = self.action_combs[action] 144 | agent_positions = [] 145 | is_visited = False 146 | 147 | for index, a in enumerate(action_comb): 148 | prev_i, prev_j = self.agent_positions[index] 149 | i, j = prev_i + a[0], prev_j + a[1] 150 | if not self.board.is_valid(i, j): # New position out of bound 151 | # Skip this step, has a -1 reward 152 | return self.get_obs_space(), -1, False, {} 153 | elif self.board.get(i, j) > 0: # Revisit a grid 154 | # Terminate the episode, has no reward 155 | is_visited = True 156 | agent_positions.append((i, j)) 157 | 158 | # Update positions 159 | self.agent_positions = agent_positions 160 | 161 | for index, pos in enumerate(self.agent_positions): 162 | i, j = pos 163 | self.board.set(i, j) # Update pos 164 | self.all_steps[index].append(pos) # Update steps 165 | 166 | reward = 0 167 | for index, pos in enumerate(self.agent_positions): 168 | i, j = pos 169 | org_i, org_j = self.orignal_positions[index] 170 | reward += (- abs(i - org_i) - abs(j - org_j)) 171 | 172 | if is_visited: # Is visited 173 | return self.get_obs_space(), reward, True, {} 174 | elif self.board.is_filled(): # All grids has been visited once 175 | # self._write_path() 176 | self._write_path() 177 | return self.get_obs_space(), reward + 1, True, {} 178 | else: # The grid has not been visited 179 | return self.get_obs_space(), 1, False, {} 180 | 181 | def _write_path(self): 182 | with open("path.txt", "w") as fhand: 183 | path = self.get_path() 184 | fhand.write(path) 185 | 186 | def render(self, mode='human', close=False): 187 | print("board:") 188 | print(self.board.data) 189 | print("steps:", self.all_steps) 190 | print("path:", self.get_path()) 191 | print("pos:", self.agent_positions) 192 | print("") 193 | 194 | def get_path(self): 195 | """ 196 | Get the path on the field 197 | :return: 198 | """ 199 | tables = "" 200 | board = [[None for _ in range(self.width)] for _ in range(self.height)] 201 | steps = self.all_steps[0] 202 | for index, pos in enumerate(steps): 203 | i, j = pos 204 | board[i][j] = f"({index})" 205 | 206 | steps = self.all_steps[1] 207 | for index, pos in enumerate(steps): 208 | i, j = pos 209 | board[i][j] = f"[{index}]" 210 | return tabulate(board) 211 | 212 | -------------------------------------------------------------------------------- /gridwrold_env_multi_circle_3.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym.utils import seeding 3 | from gym import spaces 4 | import numpy as np 5 | from enum import IntEnum 6 | from itertools import product 7 | from tabulate import tabulate 8 | 9 | 10 | class Board: 11 | """Represent a grid and operations on it""" 12 | def __init__(self, width, height): 13 | self.width = width 14 | self.height = height 15 | 16 | self.data = np.zeros((height, width), dtype=np.int) 17 | 18 | self.total_grids = width * height 19 | self.visited_grids = 0 20 | 21 | def set(self, i, j): 22 | """ 23 | Increment the visited counts in the grid 24 | :param i: row 25 | :param j: column 26 | """ 27 | assert i >= 0 and i < self.height 28 | assert j >= 0 and j < self.width 29 | 30 | if self.data[i, j] == 0: # Not visited 31 | self.visited_grids += 1 32 | 33 | self.data[i, j] += 1 34 | 35 | return self.data[i, j] 36 | 37 | def get(self, i, j): 38 | """ 39 | Increment the visited counts in the grid 40 | :param i: row 41 | :param j: column 42 | :return: 43 | """ 44 | assert i >= 0 and i < self.height 45 | assert j >= 0 and j < self.width 46 | return self.data[i, j] 47 | 48 | def is_valid(self, i, j): 49 | """Check if a position is in the boundary""" 50 | return 0 <= i < self.height and 0 <= j < self.width 51 | 52 | def is_filled(self): 53 | """ 54 | :return: True if the board is filled, otherwise false 55 | """ 56 | for row in self.data: 57 | for i in row: 58 | if i == 0: 59 | return False # Not filled 60 | return True 61 | 62 | def __str__(self): 63 | return str(self.data) 64 | 65 | 66 | class GridworldEnv(gym.Env): 67 | """ 68 | Gridworld Environment that represents a rectangle world 69 | """ 70 | 71 | metadata = {'render.modes': ['human']} 72 | 73 | class Actions(IntEnum): 74 | left = 0 75 | right = 1 76 | up = 2 77 | down = 3 78 | 79 | def __init__(self, width, height, seed=1337): 80 | super(GridworldEnv, self).__init__() 81 | # self.world = np.array((width, height), dtype=int) 82 | 83 | # Environment configuration 84 | self.width = width 85 | self.height = height 86 | self.size = width * height 87 | 88 | # Information for the agent 89 | self.agent_positions = None 90 | self.orignal_positions = None 91 | self.all_steps = None 92 | self.board = Board(width, height) 93 | 94 | # For gym 95 | # Actions are discrete integer values 96 | self.action_space = spaces.Discrete(16) 97 | # Observations are number of cells 98 | self.observation_space = spaces.Box(low=0, high=max((width, height)), 99 | shape=(4, ), dtype=np.int) 100 | 101 | # Initialize the state 102 | self.reset() 103 | 104 | # Initialize the RNG 105 | self.seed(seed=seed) 106 | 107 | # Action enumeration for this environment 108 | actions = [(1, 0), (-1, 0), (0, 1), (0, -1)] 109 | self.action_combs = [p for p in product(actions, repeat=2)] # All possible actions 110 | 111 | def reset(self): 112 | # Current position and direction of the agent 113 | self.agent_positions = [(0, 0), (self.height - 1, self.width - 1)] 114 | self.orignal_positions = [(0, 0), (self.height - 1, self.width - 1)] 115 | 116 | self.board = Board(self.width, self.height) 117 | self.board.set(0, 0) 118 | self.board.set(self.height - 1, self.width - 1) 119 | 120 | # Step count since episode start 121 | self.all_steps = [[(0, 0)], [(self.height - 1, self.width - 1)]] 122 | 123 | # Return first observation 124 | return np.array([0, 0, self.height - 1, self.width - 1]) 125 | 126 | def seed(self, seed=1337): 127 | """ 128 | Seed the random number generator 129 | """ 130 | self.np_random, _ = seeding.np_random(seed) 131 | return [seed] 132 | 133 | def get_obs_space(self): 134 | """ 135 | :return: The agent's positions as a 1D np array 136 | """ 137 | obs = [] 138 | for pos in self.agent_positions: 139 | obs.extend(pos) 140 | return np.array(obs) 141 | 142 | def step(self, action): 143 | action_comb = self.action_combs[action] 144 | agent_positions = [] 145 | is_visited = False 146 | 147 | for index, a in enumerate(action_comb): 148 | prev_i, prev_j = self.agent_positions[index] 149 | i, j = prev_i + a[0], prev_j + a[1] 150 | if not self.board.is_valid(i, j): # New position out of bound 151 | # Skip this step, has a -1 reward 152 | return self.get_obs_space(), -1, False, {} 153 | elif self.board.get(i, j) > 0: # Revisit a grid 154 | # Terminate the episode, has no reward 155 | is_visited = True 156 | agent_positions.append((i, j)) 157 | 158 | # Update positions 159 | self.agent_positions = agent_positions 160 | 161 | for index, pos in enumerate(self.agent_positions): 162 | i, j = pos 163 | self.board.set(i, j) # Update pos 164 | self.all_steps[index].append(pos) # Update steps 165 | 166 | if is_visited: # Is visited 167 | return self.get_obs_space(), 0, True, {} 168 | elif self.board.is_filled(): # All grids has been visited once 169 | # self._write_path() 170 | reward = (self.width + self.height) * 2 171 | for index, pos in enumerate(self.agent_positions): 172 | i, j = pos 173 | org_i, org_j = self.orignal_positions[index] 174 | reward += (- abs(i - org_i) - abs(j - org_j)) 175 | self._write_path() 176 | return self.get_obs_space(), reward, True, {} 177 | else: # The grid has not been visited 178 | return self.get_obs_space(), 1, False, {} 179 | 180 | def _write_path(self): 181 | with open("path.txt", "w") as fhand: 182 | path = self.get_path() 183 | fhand.write(path) 184 | 185 | def render(self, mode='human', close=False): 186 | print("board:") 187 | print(self.board.data) 188 | print("steps:", self.all_steps) 189 | print("path:", self.get_path()) 190 | print("pos:", self.agent_positions) 191 | print("") 192 | 193 | def get_path(self): 194 | """ 195 | Get the path on the field 196 | :return: 197 | """ 198 | tables = "" 199 | board = [[None for _ in range(self.width)] for _ in range(self.height)] 200 | steps = self.all_steps[0] 201 | for index, pos in enumerate(steps): 202 | i, j = pos 203 | board[i][j] = f"({index})" 204 | 205 | steps = self.all_steps[1] 206 | for index, pos in enumerate(steps): 207 | i, j = pos 208 | board[i][j] = f"[{index}]" 209 | return tabulate(board) 210 | 211 | -------------------------------------------------------------------------------- /gridwrold_env_random.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym.utils import seeding 3 | from gym import spaces 4 | import numpy as np 5 | from enum import IntEnum 6 | from copy import deepcopy 7 | from tabulate import tabulate 8 | import random 9 | 10 | 11 | class Board: 12 | """Represent a grid and operations on it""" 13 | def __init__(self, width, height): 14 | self.width = width 15 | self.height = height 16 | 17 | self.data = np.zeros((height, width), dtype=np.int) 18 | 19 | self.total_grids = width * height 20 | self.visited_grids = 0 21 | 22 | def set(self, i, j): 23 | """ 24 | Increment the visited counts in the grid 25 | :param i: row 26 | :param j: column 27 | """ 28 | assert i >= 0 and i < self.height 29 | assert j >= 0 and j < self.width 30 | 31 | if self.data[i, j] == 0: # Not visited 32 | self.visited_grids += 1 33 | 34 | self.data[i, j] += 1 35 | 36 | return self.data[i, j] 37 | 38 | def get(self, i, j): 39 | """ 40 | Increment the visited counts in the grid 41 | :param i: row 42 | :param j: column 43 | :return: 44 | """ 45 | assert i >= 0 and i < self.height 46 | assert j >= 0 and j < self.width 47 | return self.data[i, j] 48 | 49 | def is_valid(self, i, j): 50 | """Check if a position is in the boundary""" 51 | return 0 <= i < self.height and 0 <= j < self.width 52 | 53 | def is_filled(self): 54 | return self.total_grids == self.visited_grids 55 | 56 | def __str__(self): 57 | return str(self.data) 58 | 59 | 60 | class GridworldEnv(gym.Env): 61 | """ 62 | Gridworld Environment that represents a rectangle world 63 | """ 64 | 65 | metadata = {'render.modes': ['human']} 66 | 67 | class Actions(IntEnum): 68 | left = 0 69 | right = 1 70 | up = 2 71 | down = 3 72 | 73 | def __init__(self, width, height, seed=1337): 74 | super(GridworldEnv, self).__init__() 75 | # self.world = np.array((width, height), dtype=int) 76 | 77 | # Environment configuration 78 | self.width = width 79 | self.height = height 80 | self.size = width * height 81 | 82 | # Information for the agent 83 | self.agent_pos = (0, 0) 84 | self.steps = [(0, 0)] 85 | self.board = Board(width, height) 86 | 87 | # For gym 88 | # Actions are discrete integer values 89 | self.action_space = spaces.Discrete(4) 90 | # Observations are number of cells 91 | self.observation_space = spaces.Box(low=0, high=max((width, height)), 92 | shape=(2, ), dtype=np.int) 93 | 94 | # Initialize the state 95 | self.reset() 96 | 97 | # Initialize the RNG 98 | self.seed(seed=seed) 99 | 100 | # Action enumeration for this environment 101 | self.actions = GridworldEnv.Actions 102 | 103 | def reset(self): 104 | # Current position and direction of the agent 105 | h = random.randint(0, self.height - 1) 106 | w = random.randint(0, self.width - 1) 107 | self.agent_pos = (h, w) 108 | 109 | self.board = Board(self.width, self.height) 110 | self.board.set(h, w) 111 | 112 | board = deepcopy(self.board.data) 113 | board[0, 0] = -1 114 | board = board.flatten() 115 | 116 | # Step count since episode start 117 | self.steps = [(0, 0)] 118 | 119 | # Return first observation 120 | return np.array([h, w]) 121 | 122 | def seed(self, seed=1337): 123 | """ 124 | Seed the random number generator 125 | """ 126 | self.np_random, _ = seeding.np_random(seed) 127 | return [seed] 128 | 129 | def step(self, action): 130 | done = False 131 | 132 | # Get the coordinate for the new position 133 | prev_i, prev_j = self.agent_pos 134 | i, j = self.agent_pos 135 | 136 | if action == self.actions.left: 137 | j -= 1 138 | elif action == self.actions.right: 139 | j += 1 140 | elif action == self.actions.up: 141 | i -= 1 142 | elif action == self.actions.down: 143 | i += 1 144 | 145 | if not self.board.is_valid(i, j): # New position out of bound 146 | return np.array([prev_i, prev_j]), -1, False, {} 147 | 148 | self.agent_pos = (i, j) 149 | # Update the step information 150 | self.steps.append((i, j)) 151 | self.board.set(i, j) 152 | 153 | pos = np.array([i, j]) 154 | 155 | if self.board.get(i, j) > 1: # The grid has been visited 156 | return pos, 0, True, {} 157 | elif len(self.steps) == self.size: # All grids has been visited once 158 | return pos, 100000, True, {} 159 | else: # The grid has not been visited 160 | return pos, 1, False, {} 161 | 162 | def render(self, mode='human', close=False): 163 | print("board:") 164 | print(self.board.data) 165 | print("path:", self.get_path()) 166 | print("pos:", self.agent_pos) 167 | print("") 168 | 169 | def get_path(self): 170 | """ 171 | Get the path on the field 172 | :return: 173 | """ 174 | board = np.zeros((self.height, self.width), dtype=np.int) 175 | for index, pos in enumerate(self.steps): 176 | i, j = pos 177 | board[i, j] = index 178 | 179 | table = tabulate(board) 180 | return table 181 | 182 | -------------------------------------------------------------------------------- /src/coverage_animation.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fanzhangg/uav-path-coverage/7fcb068cf38b9c8b7ce1f15906ba4f77344b1e2d/src/coverage_animation.gif -------------------------------------------------------------------------------- /src/training_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fanzhangg/uav-path-coverage/7fcb068cf38b9c8b7ce1f15906ba4f77344b1e2d/src/training_plot.png -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | from gridwrold_env_multi_circle_3 import GridworldEnv 2 | from stable_baselines.common.env_checker import check_env 3 | from stable_baselines import DQN, PPO2, A2C, ACKTR 4 | from stable_baselines.common.cmd_util import make_vec_env 5 | import matplotlib.pyplot as plt 6 | 7 | import os 8 | import numpy as np 9 | from stable_baselines.bench import Monitor 10 | from stable_baselines.results_plotter import load_results, ts2xy 11 | from stable_baselines.common.callbacks import BaseCallback 12 | 13 | 14 | class SaveOnBestTrainingRewardCallback(BaseCallback): 15 | """ 16 | Callback for saving a model (the check is done every ``check_freq`` steps) 17 | based on the training reward (in practice, we recommend using ``EvalCallback``). 18 | 19 | :param check_freq: (int) 20 | :param log_dir: (str) Path to the folder where the model will be saved. 21 | It must contains the file created by the ``Monitor`` wrapper. 22 | :param verbose: (int) 23 | """ 24 | 25 | def __init__(self, check_freq: int, log_dir: str, verbose=1): 26 | super(SaveOnBestTrainingRewardCallback, self).__init__(verbose) 27 | self.check_freq = check_freq 28 | self.log_dir = log_dir 29 | self.save_path = os.path.join(log_dir, 'best_model') 30 | self.best_mean_reward = -np.inf 31 | 32 | def _init_callback(self) -> None: 33 | # Create folder if needed 34 | if self.save_path is not None: 35 | os.makedirs(self.save_path, exist_ok=True) 36 | 37 | def _on_step(self) -> bool: 38 | if self.n_calls % self.check_freq == 0: 39 | 40 | # Retrieve training reward 41 | x, y = ts2xy(load_results(self.log_dir), 'timesteps') 42 | if len(x) > 0: 43 | # Mean training reward over the last 100 episodes 44 | mean_reward = np.mean(y[-100:]) 45 | if self.verbose > 0: 46 | print("Num timesteps: {}".format(self.num_timesteps)) 47 | print( 48 | "Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}".format(self.best_mean_reward, 49 | mean_reward)) 50 | 51 | # New best model, you could save the agent here 52 | if mean_reward > self.best_mean_reward: 53 | self.best_mean_reward = mean_reward 54 | # Example for saving best model 55 | if self.verbose > 0: 56 | print("Saving new best model to {}".format(self.save_path)) 57 | self.model.save(self.save_path) 58 | 59 | return True 60 | 61 | # configurations 62 | w = 6 63 | h = 6 64 | 65 | # Create log dir 66 | log_dir = "/tmp/gym/" 67 | os.makedirs(log_dir, exist_ok=True) 68 | 69 | # Create the callback: check every 1000 steps 70 | callback = SaveOnBestTrainingRewardCallback(check_freq=1000, log_dir=log_dir) 71 | # Init Env 72 | env = GridworldEnv(w, h) 73 | env = Monitor(env, log_dir) 74 | check_env(env, warn=True) 75 | # Wrap it 76 | env = make_vec_env(lambda: env, n_envs=1) 77 | 78 | # Train the agent 79 | model = ACKTR('MlpPolicy', env, verbose=1).learn(100000, callback=callback) 80 | 81 | # Test the trained agent 82 | obs = env.reset() 83 | n_steps = w * h // 2 84 | 85 | for step in range(n_steps): 86 | action, _ = model.predict(obs, deterministic=True) 87 | print("Step {}".format(step + 1)) 88 | print("Action: ", action) 89 | obs, reward, done, info = env.step(action) 90 | print('obs=', obs, 'reward=', reward, 'done=', done) 91 | env.render() 92 | if done: 93 | # Note that the VecEnv resets automatically 94 | # when a done signal is encountered 95 | print("Goal reached!", "reward=", reward) 96 | break 97 | 98 | 99 | def moving_average(values, window): 100 | """ 101 | Smooth values by doing a moving average 102 | :param values: (numpy array) 103 | :param window: (int) 104 | :return: (numpy array) 105 | """ 106 | weights = np.repeat(1.0, window) / window 107 | return np.convolve(values, weights, 'valid') 108 | 109 | 110 | def plot_results(log_folder, title='Learning Curve'): 111 | """ 112 | plot the results 113 | 114 | :param log_folder: (str) the save location of the results to plot 115 | :param title: (str) the title of the task to plot 116 | """ 117 | x, y = ts2xy(load_results(log_folder), 'timesteps') 118 | y = moving_average(y, window=50) 119 | # Truncate x 120 | x = x[len(x) - len(y):] 121 | 122 | fig = plt.figure(title) 123 | plt.plot(x, y) 124 | plt.xlabel('Number of Timesteps') 125 | plt.ylabel('Rewards') 126 | plt.title(title + " Smoothed") 127 | plt.show() 128 | 129 | 130 | from stable_baselines import results_plotter 131 | 132 | # Helper from the library 133 | results_plotter.plot_results([log_dir], 1e5, results_plotter.X_TIMESTEPS, "Hexworld Coverage") 134 | 135 | plot_results(log_dir) -------------------------------------------------------------------------------- /train_2.py: -------------------------------------------------------------------------------- 1 | from gridwrold_env_2 import GridworldEnv 2 | from stable_baselines.common.env_checker import check_env 3 | from stable_baselines import DQN, PPO2, A2C, ACKTR 4 | from stable_baselines.common.cmd_util import make_vec_env 5 | import matplotlib.pyplot as plt 6 | 7 | import os 8 | import numpy as np 9 | from stable_baselines.bench import Monitor 10 | from stable_baselines.results_plotter import load_results, ts2xy 11 | from stable_baselines.common.callbacks import BaseCallback 12 | 13 | 14 | class SaveOnBestTrainingRewardCallback(BaseCallback): 15 | """ 16 | Callback for saving a model (the check is done every ``check_freq`` steps) 17 | based on the training reward (in practice, we recommend using ``EvalCallback``). 18 | 19 | :param check_freq: (int) 20 | :param log_dir: (str) Path to the folder where the model will be saved. 21 | It must contains the file created by the ``Monitor`` wrapper. 22 | :param verbose: (int) 23 | """ 24 | 25 | def __init__(self, check_freq: int, log_dir: str, verbose=1): 26 | super(SaveOnBestTrainingRewardCallback, self).__init__(verbose) 27 | self.check_freq = check_freq 28 | self.log_dir = log_dir 29 | self.save_path = os.path.join(log_dir, 'best_model') 30 | self.best_mean_reward = -np.inf 31 | 32 | def _init_callback(self) -> None: 33 | # Create folder if needed 34 | if self.save_path is not None: 35 | os.makedirs(self.save_path, exist_ok=True) 36 | 37 | def _on_step(self) -> bool: 38 | if self.n_calls % self.check_freq == 0: 39 | 40 | # Retrieve training reward 41 | x, y = ts2xy(load_results(self.log_dir), 'timesteps') 42 | if len(x) > 0: 43 | # Mean training reward over the last 100 episodes 44 | mean_reward = np.mean(y[-100:]) 45 | if self.verbose > 0: 46 | print("Num timesteps: {}".format(self.num_timesteps)) 47 | print( 48 | "Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}".format(self.best_mean_reward, 49 | mean_reward)) 50 | 51 | # New best model, you could save the agent here 52 | if mean_reward > self.best_mean_reward: 53 | self.best_mean_reward = mean_reward 54 | # Example for saving best model 55 | if self.verbose > 0: 56 | print("Saving new best model to {}".format(self.save_path)) 57 | self.model.save(self.save_path) 58 | 59 | return True 60 | 61 | 62 | # configurations 63 | w = 6 64 | h = 6 65 | 66 | # Create log dir 67 | log_dir = "/tmp/gym/" 68 | os.makedirs(log_dir, exist_ok=True) 69 | 70 | # Create the callback: check every 1000 steps 71 | callback = SaveOnBestTrainingRewardCallback(check_freq=1000, log_dir=log_dir) 72 | # Init Env 73 | env = GridworldEnv(w, h) 74 | env = Monitor(env, log_dir) 75 | check_env(env, warn=True) 76 | # Wrap it 77 | env = make_vec_env(lambda: env, n_envs=1) 78 | 79 | # Train the agent 80 | model = ACKTR('MlpPolicy', env, verbose=1).learn(100000, callback=callback) 81 | 82 | # Test the trained agent 83 | obs = env.reset() 84 | n_steps = w * h 85 | for step in range(n_steps): 86 | action, _ = model.predict(obs, deterministic=True) 87 | print("Step {}".format(step + 1)) 88 | print("Action: ", action) 89 | obs, reward, done, info = env.step(action) 90 | print('obs=', obs, 'reward=', reward, 'done=', done) 91 | env.render() 92 | if done: 93 | # Note that the VecEnv resets automatically 94 | # when a done signal is encountered 95 | print("Goal reached!", "reward=", reward) 96 | break 97 | 98 | 99 | def moving_average(values, window): 100 | """ 101 | Smooth values by doing a moving average 102 | :param values: (numpy array) 103 | :param window: (int) 104 | :return: (numpy array) 105 | """ 106 | weights = np.repeat(1.0, window) / window 107 | return np.convolve(values, weights, 'valid') 108 | 109 | 110 | def plot_results(log_folder, title='Learning Curve'): 111 | """ 112 | plot the results 113 | 114 | :param log_folder: (str) the save location of the results to plot 115 | :param title: (str) the title of the task to plot 116 | """ 117 | x, y = ts2xy(load_results(log_folder), 'timesteps') 118 | y = moving_average(y, window=50) 119 | # Truncate x 120 | x = x[len(x) - len(y):] 121 | 122 | fig = plt.figure(title) 123 | plt.plot(x, y) 124 | plt.xlabel('Number of Timesteps') 125 | plt.ylabel('Rewards') 126 | plt.title(title + " Smoothed") 127 | plt.show() 128 | 129 | 130 | from stable_baselines import results_plotter 131 | 132 | # Helper from the library 133 | results_plotter.plot_results([log_dir], 1e5, results_plotter.X_TIMESTEPS, "Hexworld Coverage") 134 | 135 | plot_results(log_dir) --------------------------------------------------------------------------------