├── .gitignore ├── LICENSE ├── README.md ├── Thesis.pdf ├── config.yaml ├── models ├── 310000_nocomm.pth └── 337500.pth ├── pathfinding ├── environment.py ├── main.py ├── models │ └── dhc │ │ ├── __init__.py │ │ ├── buffer.py │ │ ├── evaluate.py │ │ ├── model.py │ │ ├── train.py │ │ ├── visualize.py │ │ └── worker.py ├── movingai.py ├── settings.py ├── test_cases │ ├── 10length_16agents_0.1density.pkl │ ├── 10length_16agents_0.3density.pkl │ ├── 10length_32agents_0.1density.pkl │ ├── 10length_4agents_0.1density.pkl │ ├── 10length_4agents_0.3density.pkl │ ├── 10length_8agents_0.1density.pkl │ ├── 10length_8agents_0.3density.pkl │ ├── 128length_32agents_0.3372density.pkl │ ├── 20length_16agents_0.1density.pkl │ ├── 20length_16agents_0.3density.pkl │ ├── 20length_32agents_0.1density.pkl │ ├── 20length_32agents_0.3density.pkl │ ├── 20length_4agents_0.1density.pkl │ ├── 20length_4agents_0.3density.pkl │ ├── 20length_8agents_0.1density.pkl │ ├── 20length_8agents_0.3density.pkl │ ├── 256length_16agents_0.2654density.pkl │ ├── 256length_64agents_0.2654density.pkl │ ├── 30length_16agents_0.1density.pkl │ ├── 30length_16agents_0.3density.pkl │ ├── 30length_32agents_0.1density.pkl │ ├── 30length_32agents_0.3density.pkl │ ├── 30length_4agents_0.1density.pkl │ ├── 30length_4agents_0.3density.pkl │ ├── 30length_8agents_0.1density.pkl │ ├── 30length_8agents_0.3density.pkl │ ├── 32length_32agents_0.334density.pkl │ ├── 40length_16agents_0.1density.pkl │ ├── 40length_16agents_0.3density.pkl │ ├── 40length_32agents_0.1density.pkl │ ├── 40length_32agents_0.3density.pkl │ ├── 40length_4agents_0.1density.pkl │ ├── 40length_4agents_0.3density.pkl │ ├── 40length_64agents_0.3density.pkl │ ├── 40length_8agents_0.1density.pkl │ ├── 40length_8agents_0.3density.pkl │ ├── 80length_16agents_0.1density.pkl │ ├── 80length_16agents_0.3density.pkl │ ├── 80length_32agents_0.1density.pkl │ ├── 80length_32agents_0.3density.pkl │ ├── 80length_4agents_0.1density.pkl │ ├── 80length_4agents_0.3density.pkl │ ├── 80length_64agents_0.3density.pkl │ ├── 80length_8agents_0.1density.pkl │ ├── 80length_8agents_0.3density.pkl │ ├── Berlin_0_256.map_Berlin_0_256.map.scen_16agents_.pkl │ ├── Berlin_0_256.map_Berlin_0_256.map.scen_1agents_.pkl │ ├── Berlin_0_256.map_Berlin_0_256.map.scen_32agents_.pkl │ ├── Berlin_0_256.map_Berlin_0_256.map.scen_4agents_.pkl │ ├── Berlin_0_256.map_Berlin_0_256.map.scen_64agents_.pkl │ ├── Berlin_0_256.map_Berlin_0_256.map.scen_8agents_.pkl │ ├── den520d.map_den520d-even-1.scen_1agents_.pkl │ ├── den520d.map_den520d-even-1.scen_32agents_.pkl │ ├── den520d.map_den520d-even-1.scen_4agents_.pkl │ ├── den520d.map_den520d-even-1.scen_64agents_.pkl │ ├── ht_chantry.map_ht_chantry-even-1.scen_1agents_.pkl │ ├── ht_chantry.map_ht_chantry-even-1.scen_32agents_.pkl │ ├── ht_chantry.map_ht_chantry-even-1.scen_64agents_.pkl │ ├── ht_chantry.map_ht_chantry-even-1.scen_8agents_.pkl │ ├── lak303d.map_lak303d-even-1.scen_1agents_.pkl │ ├── lak303d.map_lak303d-even-1.scen_32agents_.pkl │ ├── lak303d.map_lak303d-even-1.scen_64agents_.pkl │ ├── lak303d.map_lak303d-even-1.scen_8agents_.pkl │ ├── maze-128-128-2.map_maze-128-128-2-random-1.scen_32agents_.pkl │ ├── room-32-32-4.map_room-32-32-4-random-1.scen_32agents_.pkl │ ├── small-3-rooms-dense.map_small-12-dense.scen_12agents_.pkl │ ├── small-3-rooms-dense2.map_small-12-dense.scen_12agents_.pkl │ ├── small-3-rooms.map_small-12.scen_12agents_.pkl │ ├── small.map_small-4.scen_4agents_.pkl │ ├── small.map_small-8-reversed.scen_8agents_.pkl │ └── small.map_small-8.scen_8agents_.pkl └── utils.py ├── poetry.lock ├── pyproject.toml ├── static ├── DHC_10x10_4_good.gif ├── DHC_40x40_16_dense.gif ├── DHC_40x40_16_good.gif ├── DHC_40x40_4_good.gif ├── DHC_architecture.png ├── DHC_training.png ├── chart_40x40.png └── chart_80x80.png ├── tests └── test_imports.py └── videos ├── 337500_small-3-rooms-dense.map_small-12-dense.scen_12agents_.pkl_0.gif ├── 337500_small-3-rooms-dense2.map_small-12-dense.scen_12agents_.pkl_0.gif ├── 337500_small-3-rooms.map_small-12.scen_12agents_.pkl_0.gif ├── 337500_small.map_small-4.scen_4agents_.pkl_0.gif ├── 337500_small.map_small-8-reversed.scen_8agents_.pkl_0.gif └── 337500_small.map_small-8.scen_8agents_.pkl_0.gif /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | .idea/ 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | *.py,cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # pipenv 89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 92 | # install all needed dependencies. 93 | #Pipfile.lock 94 | 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 96 | __pypackages__/ 97 | 98 | # Celery stuff 99 | celerybeat-schedule 100 | celerybeat.pid 101 | 102 | # SageMath parsed files 103 | *.sage.py 104 | 105 | # Environments 106 | .env 107 | .venv 108 | env/ 109 | venv/ 110 | ENV/ 111 | env.bak/ 112 | venv.bak/ 113 | 114 | # Spyder project settings 115 | .spyderproject 116 | .spyproject 117 | 118 | # Rope project settings 119 | .ropeproject 120 | 121 | # mkdocs documentation 122 | /site 123 | 124 | # mypy 125 | .mypy_cache/ 126 | .dmypy.json 127 | dmypy.json 128 | 129 | # Pyre type checker 130 | .pyre/ 131 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Vlad 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Learnable Decentralized MAPF using reinforcement learning with local communication 2 | 3 | ![PyTorch](https://img.shields.io/badge/PyTorch-%23EE4C2C.svg?style=flat&logo=PyTorch&logoColor=white) 4 | ![Poetry](https://img.shields.io/badge/Poetry-%2300C4CC.svg?style=flat&logo=Poetry&logoColor=white) 5 | ![Black](https://img.shields.io/badge/code%20style-black-000000.svg) 6 | ![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/charliermarsh/ruff/main/assets/badge/v1.json) 7 | 8 | ## Description 9 | 10 | We perform extensive empirical evaluation of one of the state-of-the-art decentralized PO-MAPF algorithms which leverages communication between agents, Distributed Heuristic Communication (DHC). Through comprehensive experiments, the performance of DHC is observed to degrade when agents are faced with complete packet loss during communication. To mitigate this issue, we propose a novel algorithm called DHC-R (DHC-robust). Open-sourced model weights and the codebase are provided. 11 | 12 | ## Requirements 13 | In order for `models.dhc.train` to be successfully run, you have to have a machine equipped with 1 GPU and several CPUs. 14 | Consider having `num_cpus - 2` actors configured through the `dhc.train.num_actors` in `config.yaml` 15 | 16 | **Attention: We do not guarantee the desired performance on a non-GPU machine.** 17 | 18 | While we aim at supporting MacOS, Linux and Windows platforms, the successful training is not guaranteed on a Windows-based machine. 19 | The benchmarking script should work there, though. Please report it [here](https://github.com/acforvs/po-mapf-thesis/issues) if it doesn't. 20 | 21 | ## Setting up 22 | 1. Install [Poetry](https://python-poetry.org) 23 | 2. Run [poetry install](https://python-poetry.org/docs/cli/#install) to install the dependencies 24 | 25 | If you see ``Failed to create the collection: Prompt dismissed..`` this error when trying to run `poetry install`, [consider](https://github.com/python-poetry/poetry/issues/1917#issuecomment-1251667047) executing this line first: 26 | ```shell 27 | export PYTHON_KEYRING_BACKEND=keyring.backends.null.Keyring 28 | ``` 29 | 30 | ## Repository description & Usage 31 | 1. `models` dir contains the weights of the trained models 32 | 2. `config.yaml` - training & model params, environmental settings etc. 33 | 3. `pathfinding/models` provides one with the implementation of different models 34 | 35 | 36 | ## Cite 37 | 38 | ``` 39 | @InProceedings{10.1007/978-3-031-43111-1_14, 40 | author="Savinov, Vladislav 41 | and Yakovlev, Konstantin", 42 | editor="Ronzhin, Andrey 43 | and Sadigov, Aminagha 44 | and Meshcheryakov, Roman", 45 | title="DHC-R: Evaluating ``Distributed Heuristic Communication'' and Improving Robustness for Learnable Decentralized PO-MAPF", 46 | booktitle="Interactive Collaborative Robotics", 47 | year="2023", 48 | publisher="Springer Nature Switzerland", 49 | address="Cham", 50 | pages="151--163", 51 | abstract="Multi-agent pathfinding (MAPF) is a problem of coordinating the movements of multiple agents operating a shared environment that has numerous industrial and research applications. In many practical cases the agents (robots) have limited visibility of the environment and must rely on local observations to make decisions. This scenario, known as partially observable MAPF (PO-MAPF), can be solved through decentralized approaches. In recent years, several learnable algorithms have been proposed for solving PO-MAPF. However, their performance is oftentimes not validated out-of-distribution (OOD), and the code is often not properly open-sourced. In this study, we conduct a comprehensive empirical evaluation of one of the state-of-the-art decentralized PO-MAPF algorithms, Distributed Heuristic Communication (DHC), Ma, Z., Luo, Y., Ma, H.: Distributed heuristic multi-agent path finding with communication. In: 2021 International Conference on Robotics and Automation (ICRA), pp. 8699--8705. IEEE, Xi'an, China (2021), which incorporates communication between agents. Our experiments reveal that the performance of DHC deteriorates when agents encounter complete packet loss during communication. To address this issue, we propose a novel algorithm called DHC-R that employs a similar architecture to the original DHC but introduces randomness into the graph neural network-based communication block, preventing the passage of some data packets during training. Empirical evaluation confirms that DHC-R outperforms DHC in scenarios with packet loss. Open-sourced model weights and the codebase are provided: https://github.com/acforvs/dhc-robust-mapf.", 52 | isbn="978-3-031-43111-1" 53 | } 54 | ``` 55 | 56 | ## Contributing 57 |
58 | See the detailed contribution guide 59 | 60 | 1. Install [black](https://github.com/psf/black), you can likely run 61 | ```shell 62 | pip3 install black 63 | ``` 64 | 65 | 2. Use [black](https://github.com/psf/black) to ensure that the codestyle remains great 66 | ```shell 67 | poetry run black . 68 | ``` 69 | 3. Use [ruff](https://github.com/charliermarsh/ruff) to lint all the files 70 | ```shell 71 | poetry run ruff . 72 | ``` 73 | 4. Make sure tests are OK 74 | ```shell 75 | poetry run pytest 76 | ``` 77 | 5. Create a PR with new features 78 |
79 | 80 | ## References 81 | 82 | [1] 83 | Ma, Ziyuan and Luo, Yudong and Ma, Hang, 2021. Distributed Heuristic Multi-Agent Path Finding with Communication. 84 | 85 | ## License 86 | 87 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://github.com/acforvs/po-mapf-thesis/blob/main/LICENSE) 88 | 89 | 90 | -------------------------------------------------------------------------------- /Thesis.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/Thesis.pdf -------------------------------------------------------------------------------- /config.yaml: -------------------------------------------------------------------------------- 1 | dhc: 2 | cnn_channels: 128 3 | fov: !!python/tuple [9, 9] 4 | observation_radius: 4 # since the FOV is 9x9 5 | observation_shape: !!python/tuple [6, 9, 9] 6 | hidden_dim: 256 7 | max_comm_agents: 3 # includes the agent itself 8 | batch_size: 192 9 | max_num_agents: 16 10 | latent_dim: 784 # 16 * 7 * 7, do not forget to change if the observation_shape is changed 11 | max_episode_length: 256 12 | 13 | communication: 14 | disable_communication: 1 15 | comm_enabled_prob: 0.7 16 | num_comm_layers: 2 17 | num_comm_heads: 2 18 | 19 | buffer: 20 | action_dim: 5 21 | forward_steps: 2 22 | 23 | worker: 24 | episode_capacity: 2048 25 | init_env_settings: !!python/tuple [ 1, 10 ] 26 | max_comm_agents: 3 27 | prioritized_replay_alpha: 0.6 28 | prioritized_replay_beta: 0.4 29 | forward_steps: 2 30 | seq_len: 20 31 | max_map_length: 40 32 | pass_rate: 0.9 33 | learning_starts: 100000 34 | training_times: 600000 35 | target_network_update_freq: 2000 36 | save_interval: 2000 37 | actor_update_steps: 400 38 | 39 | train: 40 | num_actors: 16 41 | log_interval: 10 42 | 43 | 44 | environment: 45 | map_length: 50 46 | num_agents: 2 47 | observation_radius: 4 48 | reward_fn: 49 | move: -0.075 50 | stay_on_goal: 0 51 | stay_off_goal: -0.075 52 | collision: -0.5 53 | finish: 3 54 | 55 | init_env_settings: !!python/tuple [1, 10] 56 | observation_shape: !!python/tuple [6, 9, 9] 57 | action_dim: 5 58 | -------------------------------------------------------------------------------- /models/310000_nocomm.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/models/310000_nocomm.pth -------------------------------------------------------------------------------- /models/337500.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/models/337500.pth -------------------------------------------------------------------------------- /pathfinding/environment.py: -------------------------------------------------------------------------------- 1 | from pathfinding.settings import yaml_data as settings 2 | from pathfinding import movingai 3 | 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | import random 7 | from typing import List 8 | 9 | plt.ion() 10 | 11 | ENV_CONFIG = settings["environment"] 12 | 13 | action_list = np.array([[0, 0], [-1, 0], [1, 0], [0, -1], [0, 1]], dtype=np.int) 14 | 15 | color_map = np.array( 16 | [ 17 | [255, 255, 255], # white 18 | [190, 190, 190], # gray 19 | [0, 191, 255], # blue 20 | [255, 165, 0], # orange 21 | [0, 250, 154], 22 | ] 23 | ) # green 24 | 25 | 26 | def map_partition(map): 27 | """Partitioning map into сomponents.""" 28 | empty_list = np.argwhere(map == 0).tolist() 29 | empty_pos = set([tuple(pos) for pos in empty_list]) 30 | 31 | if not empty_pos: 32 | raise RuntimeError("There are no empty positions found") 33 | 34 | partition_list = [] 35 | while empty_pos: 36 | start_pos = empty_pos.pop() 37 | open_list = [start_pos] 38 | close_list = [] 39 | 40 | while open_list: 41 | x, y = open_list.pop(0) 42 | for dx, dy in ( 43 | (-1, 0), 44 | (1, 0), 45 | (0, -1), 46 | (0, 1), 47 | ): 48 | pos = x + dx, y + dy 49 | if pos in empty_pos: 50 | empty_pos.remove(pos) 51 | open_list.append(pos) 52 | 53 | close_list.append((x, y)) 54 | 55 | if len(close_list) >= 2: 56 | partition_list.append(close_list) 57 | 58 | return partition_list 59 | 60 | 61 | class Environment: 62 | def __init__( 63 | self, 64 | num_agents: int = ENV_CONFIG["init_env_settings"][0], 65 | map_length: int = ENV_CONFIG["init_env_settings"][1], 66 | obs_radius: int = ENV_CONFIG["observation_radius"], 67 | reward_fn: dict = ENV_CONFIG["reward_fn"], 68 | fix_density=None, 69 | curriculum=False, 70 | init_env_settings_set=ENV_CONFIG["init_env_settings"], 71 | should_init: bool = True, 72 | ): 73 | self.curriculum = curriculum 74 | if curriculum: 75 | self.env_set = [init_env_settings_set] 76 | self.num_agents = init_env_settings_set[0] 77 | self.map_size = (init_env_settings_set[1], init_env_settings_set[1]) 78 | else: 79 | self.num_agents = num_agents 80 | self.map_size = (map_length, map_length) 81 | 82 | # set as same as in PRIMAL 83 | if fix_density is None: 84 | self.fix_density = False 85 | self.obstacle_density = np.random.triangular(0, 0.33, 0.5) 86 | else: 87 | self.fix_density = True 88 | self.obstacle_density = fix_density 89 | 90 | self.map = np.random.choice( 91 | 2, self.map_size, p=[1 - self.obstacle_density, self.obstacle_density] 92 | ).astype(np.int) 93 | 94 | partition_list = map_partition(self.map) 95 | self._part = partition_list 96 | 97 | while len(partition_list) == 0: 98 | self.map = np.random.choice( 99 | 2, self.map_size, p=[1 - self.obstacle_density, self.obstacle_density] 100 | ).astype(np.int) 101 | partition_list = map_partition(self.map) 102 | 103 | self.agents_pos = np.empty((self.num_agents, 2), dtype=np.int) 104 | self.goals_pos = np.empty((self.num_agents, 2), dtype=np.int) 105 | 106 | pos_num = sum([len(partition) for partition in partition_list]) 107 | 108 | # loop to assign agent original position and goal position for each agent 109 | for i in range(self.num_agents): 110 | pos_idx = random.randint(0, pos_num - 1) 111 | partition_idx = 0 112 | for partition in partition_list: 113 | if pos_idx >= len(partition): 114 | pos_idx -= len(partition) 115 | partition_idx += 1 116 | else: 117 | break 118 | 119 | pos = random.choice(partition_list[partition_idx]) 120 | partition_list[partition_idx].remove(pos) 121 | self.agents_pos[i] = np.asarray(pos, dtype=np.int) 122 | 123 | pos = random.choice(partition_list[partition_idx]) 124 | partition_list[partition_idx].remove(pos) 125 | self.goals_pos[i] = np.asarray(pos, dtype=np.int) 126 | 127 | partition_list = [ 128 | partition for partition in partition_list if len(partition) >= 2 129 | ] 130 | pos_num = sum([len(partition) for partition in partition_list]) 131 | 132 | self.obs_radius = obs_radius 133 | 134 | self.reward_fn = reward_fn 135 | self.get_heuri_map() 136 | self.steps = 0 137 | 138 | self.last_actions = np.zeros( 139 | (self.num_agents, 5, 2 * obs_radius + 1, 2 * obs_radius + 1), dtype=np.bool 140 | ) 141 | 142 | def update_env_settings_set(self, new_env_settings_set): 143 | self.env_set = new_env_settings_set 144 | 145 | def reset(self, num_agents=None, map_length=None): 146 | if self.curriculum: 147 | rand = random.choice(self.env_set) 148 | self.num_agents = rand[0] 149 | self.map_size = (rand[1], rand[1]) 150 | 151 | elif num_agents is not None and map_length is not None: 152 | self.num_agents = num_agents 153 | self.map_size = (map_length, map_length) 154 | 155 | if not self.fix_density: 156 | self.obstacle_density = np.random.triangular(0, 0.33, 0.5) 157 | 158 | self.map = np.random.choice( 159 | 2, self.map_size, p=[1 - self.obstacle_density, self.obstacle_density] 160 | ).astype(np.float32) 161 | 162 | partition_list = map_partition(self.map) 163 | 164 | while len(partition_list) == 0: 165 | self.map = np.random.choice( 166 | 2, self.map_size, p=[1 - self.obstacle_density, self.obstacle_density] 167 | ).astype(np.float32) 168 | partition_list = map_partition(self.map) 169 | 170 | self.agents_pos = np.empty((self.num_agents, 2), dtype=np.int) 171 | self.goals_pos = np.empty((self.num_agents, 2), dtype=np.int) 172 | 173 | pos_num = sum([len(partition) for partition in partition_list]) 174 | 175 | for i in range(self.num_agents): 176 | pos_idx = random.randint(0, pos_num - 1) 177 | partition_idx = 0 178 | for partition in partition_list: 179 | if pos_idx >= len(partition): 180 | pos_idx -= len(partition) 181 | partition_idx += 1 182 | else: 183 | break 184 | 185 | pos = random.choice(partition_list[partition_idx]) 186 | partition_list[partition_idx].remove(pos) 187 | self.agents_pos[i] = np.asarray(pos, dtype=np.int) 188 | 189 | pos = random.choice(partition_list[partition_idx]) 190 | partition_list[partition_idx].remove(pos) 191 | self.goals_pos[i] = np.asarray(pos, dtype=np.int) 192 | 193 | partition_list = [ 194 | partition for partition in partition_list if len(partition) >= 2 195 | ] 196 | pos_num = sum([len(partition) for partition in partition_list]) 197 | 198 | self.steps = 0 199 | self.get_heuri_map() 200 | 201 | self.last_actions = np.zeros( 202 | (self.num_agents, 5, 2 * self.obs_radius + 1, 2 * self.obs_radius + 1), 203 | dtype=np.bool, 204 | ) 205 | 206 | return self.observe() 207 | 208 | def load(self, map: np.ndarray, agents_pos: np.ndarray, goals_pos: np.ndarray): 209 | self.map = np.copy(map) 210 | self.agents_pos = np.copy(agents_pos) 211 | self.goals_pos = np.copy(goals_pos) 212 | 213 | self.num_agents = agents_pos.shape[0] 214 | self.map_size = (self.map.shape[0], self.map.shape[1]) 215 | 216 | self.steps = 0 217 | 218 | self.imgs = [] 219 | 220 | self.get_heuri_map() 221 | 222 | self.last_actions = np.zeros( 223 | (self.num_agents, 5, 2 * self.obs_radius + 1, 2 * self.obs_radius + 1), 224 | dtype=np.bool, 225 | ) 226 | 227 | def get_heuri_map(self): 228 | dist_map = ( 229 | np.ones((self.num_agents, *self.map_size), dtype=np.int32) * 2147483647 230 | ) 231 | for i in range(self.num_agents): 232 | open_list = list() 233 | x, y = tuple(self.goals_pos[i]) 234 | open_list.append((x, y)) 235 | dist_map[i, x, y] = 0 236 | 237 | while open_list: 238 | x, y = open_list.pop(0) 239 | dist = dist_map[i, x, y] 240 | 241 | up = x - 1, y 242 | if ( 243 | up[0] >= 0 244 | and self.map[up] == 0 245 | and dist_map[i, x - 1, y] > dist + 1 246 | ): 247 | dist_map[i, x - 1, y] = dist + 1 248 | if up not in open_list: 249 | open_list.append(up) 250 | 251 | down = x + 1, y 252 | if ( 253 | down[0] < self.map_size[0] 254 | and self.map[down] == 0 255 | and dist_map[i, x + 1, y] > dist + 1 256 | ): 257 | dist_map[i, x + 1, y] = dist + 1 258 | if down not in open_list: 259 | open_list.append(down) 260 | 261 | left = x, y - 1 262 | if ( 263 | left[1] >= 0 264 | and self.map[left] == 0 265 | and dist_map[i, x, y - 1] > dist + 1 266 | ): 267 | dist_map[i, x, y - 1] = dist + 1 268 | if left not in open_list: 269 | open_list.append(left) 270 | 271 | right = x, y + 1 272 | if ( 273 | right[1] < self.map_size[1] 274 | and self.map[right] == 0 275 | and dist_map[i, x, y + 1] > dist + 1 276 | ): 277 | dist_map[i, x, y + 1] = dist + 1 278 | if right not in open_list: 279 | open_list.append(right) 280 | 281 | self.heuri_map = np.zeros((self.num_agents, 4, *self.map_size), dtype=np.bool) 282 | 283 | for x in range(self.map_size[0]): 284 | for y in range(self.map_size[1]): 285 | if self.map[x, y] == 0: 286 | for i in range(self.num_agents): 287 | if x > 0 and dist_map[i, x - 1, y] < dist_map[i, x, y]: 288 | assert dist_map[i, x - 1, y] == dist_map[i, x, y] - 1 289 | self.heuri_map[i, 0, x, y] = 1 290 | 291 | if ( 292 | x < self.map_size[0] - 1 293 | and dist_map[i, x + 1, y] < dist_map[i, x, y] 294 | ): 295 | assert dist_map[i, x + 1, y] == dist_map[i, x, y] - 1 296 | self.heuri_map[i, 1, x, y] = 1 297 | 298 | if y > 0 and dist_map[i, x, y - 1] < dist_map[i, x, y]: 299 | assert dist_map[i, x, y - 1] == dist_map[i, x, y] - 1 300 | self.heuri_map[i, 2, x, y] = 1 301 | 302 | if ( 303 | y < self.map_size[1] - 1 304 | and dist_map[i, x, y + 1] < dist_map[i, x, y] 305 | ): 306 | assert dist_map[i, x, y + 1] == dist_map[i, x, y] - 1 307 | self.heuri_map[i, 3, x, y] = 1 308 | 309 | self.heuri_map = np.pad( 310 | self.heuri_map, 311 | ( 312 | (0, 0), 313 | (0, 0), 314 | (self.obs_radius, self.obs_radius), 315 | (self.obs_radius, self.obs_radius), 316 | ), 317 | ) 318 | 319 | def step(self, actions: List[int]): 320 | """ 321 | actions: 322 | list of indices 323 | 0 stay 324 | 1 up 325 | 2 down 326 | 3 left 327 | 4 right 328 | """ 329 | 330 | assert ( 331 | len(actions) == self.num_agents 332 | ), "only {} actions as input while {} agents in environment".format( 333 | len(actions), self.num_agents 334 | ) 335 | assert all( 336 | [action_idx < 5 and action_idx >= 0 for action_idx in actions] 337 | ), "action index out of range" 338 | 339 | checking_list = [i for i in range(self.num_agents)] 340 | 341 | rewards = [] 342 | next_pos = np.copy(self.agents_pos) 343 | 344 | # remove unmoving agent id 345 | for agent_id in checking_list.copy(): 346 | if actions[agent_id] == 0: 347 | # unmoving 348 | 349 | if np.array_equal(self.agents_pos[agent_id], self.goals_pos[agent_id]): 350 | rewards.append(self.reward_fn["stay_on_goal"]) 351 | else: 352 | rewards.append(self.reward_fn["stay_off_goal"]) 353 | 354 | checking_list.remove(agent_id) 355 | else: 356 | # move 357 | next_pos[agent_id] += action_list[actions[agent_id]] 358 | rewards.append(self.reward_fn["move"]) 359 | 360 | # first round check, these two conflicts have the heightest priority 361 | for agent_id in checking_list.copy(): 362 | if np.any(next_pos[agent_id] < 0) or np.any( 363 | next_pos[agent_id] >= self.map_size[0] 364 | ): 365 | # agent out of map range 366 | rewards[agent_id] = self.reward_fn["collision"] 367 | next_pos[agent_id] = self.agents_pos[agent_id] 368 | checking_list.remove(agent_id) 369 | 370 | elif self.map[tuple(next_pos[agent_id])] == 1: 371 | # collide obstacle 372 | rewards[agent_id] = self.reward_fn["collision"] 373 | next_pos[agent_id] = self.agents_pos[agent_id] 374 | checking_list.remove(agent_id) 375 | 376 | # second round check, agent swapping conflict 377 | no_conflict = False 378 | while not no_conflict: 379 | no_conflict = True 380 | for agent_id in checking_list: 381 | target_agent_id = np.where( 382 | np.all(next_pos[agent_id] == self.agents_pos, axis=1) 383 | )[0] 384 | 385 | if target_agent_id: 386 | target_agent_id = target_agent_id.item() 387 | assert target_agent_id != agent_id, "logic bug" 388 | 389 | if np.array_equal( 390 | next_pos[target_agent_id], self.agents_pos[agent_id] 391 | ): 392 | assert ( 393 | target_agent_id in checking_list 394 | ), "target_agent_id should be in checking list" 395 | 396 | next_pos[agent_id] = self.agents_pos[agent_id] 397 | rewards[agent_id] = self.reward_fn["collision"] 398 | 399 | next_pos[target_agent_id] = self.agents_pos[target_agent_id] 400 | rewards[target_agent_id] = self.reward_fn["collision"] 401 | 402 | checking_list.remove(agent_id) 403 | checking_list.remove(target_agent_id) 404 | 405 | no_conflict = False 406 | break 407 | 408 | # third round check, agent collision conflict 409 | no_conflict = False 410 | while not no_conflict: 411 | no_conflict = True 412 | for agent_id in checking_list: 413 | collide_agent_id = np.where( 414 | np.all(next_pos == next_pos[agent_id], axis=1) 415 | )[0].tolist() 416 | if len(collide_agent_id) > 1: 417 | # collide agent 418 | 419 | # if all agents in collide agent are in checking list 420 | all_in_checking = True 421 | for id in collide_agent_id.copy(): 422 | if id not in checking_list: 423 | all_in_checking = False 424 | collide_agent_id.remove(id) 425 | 426 | if all_in_checking: 427 | collide_agent_pos = next_pos[collide_agent_id].tolist() 428 | for pos, id in zip(collide_agent_pos, collide_agent_id): 429 | pos.append(id) 430 | collide_agent_pos.sort( 431 | key=lambda x: x[0] * self.map_size[0] + x[1] 432 | ) 433 | 434 | collide_agent_id.remove(collide_agent_pos[0][2]) 435 | 436 | # checking_list.remove(collide_agent_pos[0][2]) 437 | 438 | next_pos[collide_agent_id] = self.agents_pos[collide_agent_id] 439 | for id in collide_agent_id: 440 | rewards[id] = self.reward_fn["collision"] 441 | 442 | for id in collide_agent_id: 443 | checking_list.remove(id) 444 | 445 | no_conflict = False 446 | break 447 | 448 | # self.history.append(np.copy(next_pos)) 449 | self.agents_pos = np.copy(next_pos) 450 | 451 | self.steps += 1 452 | 453 | # check done 454 | if np.array_equal(self.agents_pos, self.goals_pos): 455 | done = True 456 | rewards = [self.reward_fn["finish"] for _ in range(self.num_agents)] 457 | else: 458 | done = False 459 | 460 | info = {"step": self.steps - 1} 461 | 462 | # make sure no overlapping agents 463 | if np.unique(self.agents_pos, axis=0).shape[0] < self.num_agents: 464 | print(self.steps) 465 | print(self.map) 466 | print(self.agents_pos) 467 | raise RuntimeError("unique") 468 | 469 | # update last actions 470 | self.last_actions = np.zeros( 471 | (self.num_agents, 5, 2 * self.obs_radius + 1, 2 * self.obs_radius + 1), 472 | dtype=np.bool, 473 | ) 474 | self.last_actions[np.arange(self.num_agents), np.array(actions)] = 1 475 | 476 | return self.observe(), rewards, done, info 477 | 478 | def observe(self): 479 | """ 480 | return observation and position for each agent 481 | 482 | obs: shape (num_agents, 11, 2*obs_radius+1, 2*obs_radius+1) 483 | layer 1: agent map 484 | layer 2: obstacle map 485 | layer 3-6: heuristic map 486 | layer 7-11: one-hot representation of agent's last action 487 | 488 | pos: used for caculating communication mask 489 | 490 | """ 491 | obs = np.zeros( 492 | (self.num_agents, 6, 2 * self.obs_radius + 1, 2 * self.obs_radius + 1), 493 | dtype=np.bool, 494 | ) 495 | 496 | # 0 represents obstacle to match 0 padding in CNN 497 | obstacle_map = np.pad(self.map, self.obs_radius, "constant", constant_values=0) 498 | 499 | agent_map = np.zeros((self.map_size), dtype=np.bool) 500 | agent_map[self.agents_pos[:, 0], self.agents_pos[:, 1]] = 1 501 | agent_map = np.pad(agent_map, self.obs_radius, "constant", constant_values=0) 502 | 503 | for i, agent_pos in enumerate(self.agents_pos): 504 | x, y = agent_pos 505 | 506 | obs[i, 0] = agent_map[ 507 | x : x + 2 * self.obs_radius + 1, y : y + 2 * self.obs_radius + 1 508 | ] 509 | obs[i, 0, self.obs_radius, self.obs_radius] = 0 510 | obs[i, 1] = obstacle_map[ 511 | x : x + 2 * self.obs_radius + 1, y : y + 2 * self.obs_radius + 1 512 | ] 513 | obs[i, 2:] = self.heuri_map[ 514 | i, :, x : x + 2 * self.obs_radius + 1, y : y + 2 * self.obs_radius + 1 515 | ] 516 | 517 | # obs = np.concatenate((obs, self.last_actions), axis=1) 518 | 519 | return obs, np.copy(self.agents_pos) 520 | 521 | def render(self): 522 | if not hasattr(self, "fig"): 523 | self.fig = plt.figure() 524 | 525 | map = np.copy(self.map) 526 | for agent_id in range(self.num_agents): 527 | if np.array_equal(self.agents_pos[agent_id], self.goals_pos[agent_id]): 528 | map[tuple(self.agents_pos[agent_id])] = 4 529 | else: 530 | map[tuple(self.agents_pos[agent_id])] = 2 531 | map[tuple(self.goals_pos[agent_id])] = 3 532 | 533 | map = map.astype(np.uint8) 534 | # plt.xlabel('step: {}'.format(self.steps)) 535 | 536 | # add text in plot 537 | self.imgs.append([]) 538 | if hasattr(self, "texts"): 539 | for i, ((agent_x, agent_y), (goal_x, goal_y)) in enumerate( 540 | zip(self.agents_pos, self.goals_pos) 541 | ): 542 | self.texts[i].set_position((agent_y, agent_x)) 543 | self.texts[i].set_text(i) 544 | else: 545 | self.texts = [] 546 | for i, ((agent_x, agent_y), (goal_x, goal_y)) in enumerate( 547 | zip(self.agents_pos, self.goals_pos) 548 | ): 549 | text = plt.text( 550 | agent_y, agent_x, i, color="black", ha="center", va="center" 551 | ) 552 | plt.text(goal_y, goal_x, i, color="black", ha="center", va="center") 553 | self.texts.append(text) 554 | 555 | plt.imshow(color_map[map], animated=True) 556 | 557 | plt.show() 558 | # plt.ion() 559 | plt.pause(0.5) 560 | 561 | def close(self, save=False): 562 | plt.close() 563 | del self.fig 564 | 565 | 566 | class MovingAIBenchmarkingEnvironment(Environment): 567 | def __init__( 568 | self, 569 | num_agents: int = 5, 570 | map_filename: str = None, 571 | test_descriptions: list[movingai.TestDescription] = None, 572 | obs_radius: int = ENV_CONFIG["observation_radius"], 573 | reward_fn: dict = ENV_CONFIG["reward_fn"], 574 | should_init: bool = True, 575 | ): 576 | if test_descriptions is not None and len(test_descriptions) != num_agents: 577 | raise ValueError("Number of tests must be equal to num_agents") 578 | 579 | self.num_agents = num_agents 580 | self.obs_radius = obs_radius 581 | self.reward_fn = reward_fn 582 | self.steps = 0 583 | 584 | if should_init: 585 | self.map = movingai.read_map(map_filename) 586 | self.map_size = (len(self.map), len(self.map)) 587 | self.agents_pos = np.array( 588 | [[descr.x0, descr.y0] for descr in test_descriptions] 589 | ) 590 | self.goals_pos = np.array( 591 | [[descr.x1, descr.y1] for descr in test_descriptions] 592 | ) 593 | self.get_heuri_map() 594 | self.last_actions = np.zeros( 595 | (self.num_agents, 5, 2 * obs_radius + 1, 2 * obs_radius + 1), 596 | dtype=np.bool, 597 | ) 598 | -------------------------------------------------------------------------------- /pathfinding/main.py: -------------------------------------------------------------------------------- 1 | from pathfinding.models.dhc import DHCNetwork 2 | 3 | if __name__ == "__main__": 4 | DHCNetwork() 5 | -------------------------------------------------------------------------------- /pathfinding/models/dhc/__init__.py: -------------------------------------------------------------------------------- 1 | from pathfinding.models.dhc.model import Network as DHCNetwork # noqa 2 | from pathfinding.models.dhc.buffer import LocalBuffer # noqa 3 | from pathfinding.models.dhc.worker import GlobalBuffer, Learner, Actor # noqa 4 | -------------------------------------------------------------------------------- /pathfinding/models/dhc/buffer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from pathfinding.settings import yaml_data as settings 4 | 5 | BUF_CONFIG = settings["dhc"]["buffer"] 6 | GENERAL_CONFIG = settings["dhc"] 7 | 8 | 9 | class SumTree: 10 | """used for prioritized experience replay""" 11 | 12 | def __init__(self, capacity: int): 13 | layer = 1 14 | while 2 ** (layer - 1) < capacity: 15 | layer += 1 16 | assert 2 ** (layer - 1) == capacity, "capacity only allow n**2 size" 17 | self.layer = layer 18 | self.tree = np.zeros(2**layer - 1, dtype=np.float64) 19 | self.capacity = capacity 20 | self.size = 0 21 | 22 | def sum(self): 23 | assert ( 24 | np.sum(self.tree[-self.capacity :]) - self.tree[0] < 0.1 25 | ), f"sum is {np.sum(self.tree[-self.capacity :])} but root is {self.tree[0]}" 26 | return self.tree[0] 27 | 28 | def __getitem__(self, idx: int): 29 | assert 0 <= idx < self.capacity 30 | 31 | return self.tree[self.capacity - 1 + idx] 32 | 33 | def batch_sample(self, batch_size: int): 34 | p_sum = self.tree[0] 35 | interval = p_sum / batch_size 36 | 37 | prefixsums = np.arange( 38 | 0, p_sum, interval, dtype=np.float64 39 | ) + np.random.uniform(0, interval, batch_size) 40 | 41 | idxes = np.zeros(batch_size, dtype=np.int) 42 | for _ in range(self.layer - 1): 43 | nodes = self.tree[idxes * 2 + 1] 44 | idxes = np.where(prefixsums < nodes, idxes * 2 + 1, idxes * 2 + 2) 45 | prefixsums = np.where( 46 | idxes % 2 == 0, prefixsums - self.tree[idxes - 1], prefixsums 47 | ) 48 | 49 | priorities = self.tree[idxes] 50 | idxes -= self.capacity - 1 51 | 52 | assert np.all(priorities > 0), f"idx: {idxes}, priority: {priorities}" 53 | assert np.all(idxes >= 0) and np.all(idxes < self.capacity) 54 | 55 | return idxes, priorities 56 | 57 | def batch_update(self, idxes: np.ndarray, priorities: np.ndarray): 58 | idxes += self.capacity - 1 59 | self.tree[idxes] = priorities 60 | 61 | for _ in range(self.layer - 1): 62 | idxes = (idxes - 1) // 2 63 | idxes = np.unique(idxes) 64 | self.tree[idxes] = self.tree[2 * idxes + 1] + self.tree[2 * idxes + 2] 65 | 66 | # check 67 | assert ( 68 | np.sum(self.tree[-self.capacity :]) - self.tree[0] < 0.1 69 | ), f"sum is {np.sum(self.tree[-self.capacity :])} but root is {self.tree[0]}" 70 | 71 | 72 | class LocalBuffer: 73 | __slots__ = ( 74 | "actor_id", 75 | "map_len", 76 | "num_agents", 77 | "obs_buf", 78 | "act_buf", 79 | "rew_buf", 80 | "hid_buf", 81 | "comm_mask_buf", 82 | "q_buf", 83 | "capacity", 84 | "size", 85 | "done", 86 | ) 87 | 88 | def __init__( 89 | self, 90 | actor_id: int, 91 | num_agents: int, 92 | map_len: int, 93 | init_obs: np.ndarray, 94 | capacity: int = GENERAL_CONFIG["max_episode_length"], 95 | obs_shape=GENERAL_CONFIG["observation_shape"], 96 | hidden_dim=GENERAL_CONFIG["hidden_dim"], 97 | action_dim=BUF_CONFIG["action_dim"], 98 | ): 99 | """ 100 | buffer for each episode 101 | """ 102 | self.actor_id = actor_id 103 | self.num_agents = num_agents 104 | self.map_len = map_len 105 | 106 | self.obs_buf = np.zeros((capacity + 1, num_agents, *obs_shape), dtype=np.bool) 107 | self.act_buf = np.zeros((capacity), dtype=np.uint8) 108 | self.rew_buf = np.zeros((capacity), dtype=np.float16) 109 | self.hid_buf = np.zeros((capacity, num_agents, hidden_dim), dtype=np.float16) 110 | self.comm_mask_buf = np.zeros( 111 | (capacity + 1, num_agents, num_agents), dtype=np.bool 112 | ) 113 | self.q_buf = np.zeros((capacity + 1, action_dim), dtype=np.float32) 114 | 115 | self.capacity = capacity 116 | self.size = 0 117 | 118 | self.obs_buf[0] = init_obs 119 | 120 | def __len__(self): 121 | return self.size 122 | 123 | def add( 124 | self, 125 | q_val: np.ndarray, 126 | action: int, 127 | reward: float, 128 | next_obs: np.ndarray, 129 | hidden: np.ndarray, 130 | comm_mask: np.ndarray, 131 | ): 132 | assert self.size < self.capacity 133 | 134 | self.act_buf[self.size] = action 135 | self.rew_buf[self.size] = reward 136 | self.obs_buf[self.size + 1] = next_obs 137 | self.q_buf[self.size] = q_val 138 | self.hid_buf[self.size] = hidden 139 | self.comm_mask_buf[self.size] = comm_mask 140 | 141 | self.size += 1 142 | 143 | def finish(self, last_q_val=None, last_comm_mask=None): 144 | # last q value is None if done 145 | if last_q_val is None: 146 | done = True 147 | else: 148 | done = False 149 | self.q_buf[self.size] = last_q_val 150 | self.comm_mask_buf[self.size] = last_comm_mask 151 | 152 | self.obs_buf = self.obs_buf[: self.size + 1] 153 | self.act_buf = self.act_buf[: self.size] 154 | self.rew_buf = self.rew_buf[: self.size] 155 | self.hid_buf = self.hid_buf[: self.size] 156 | self.q_buf = self.q_buf[: self.size + 1] 157 | self.comm_mask_buf = self.comm_mask_buf[: self.size + 1] 158 | 159 | # caculate td errors for prioritized experience replay 160 | td_errors = np.zeros(self.capacity, dtype=np.float32) 161 | 162 | fwd_steps = BUF_CONFIG["forward_steps"] 163 | 164 | q_max_idx = np.array([min(i + fwd_steps, self.size) for i in range(self.size)]) 165 | gamma = np.array( 166 | [0.99 ** min(fwd_steps, self.size - i) for i in range(self.size)] 167 | ) 168 | q_max = np.max(self.q_buf[q_max_idx], axis=1) * gamma 169 | 170 | ret = self.rew_buf.tolist() + [0 for _ in range(fwd_steps - 1)] 171 | reward = ( 172 | np.convolve( 173 | ret, [0.99 ** (fwd_steps - 1 - i) for i in range(fwd_steps)], "valid" 174 | ) 175 | + q_max 176 | ) 177 | q_val = self.q_buf[np.arange(self.size), self.act_buf] 178 | td_errors[: self.size] = np.abs(reward - q_val).clip(1e-4) 179 | 180 | return ( 181 | self.actor_id, 182 | self.num_agents, 183 | self.map_len, 184 | self.obs_buf, 185 | self.act_buf, 186 | self.rew_buf, 187 | self.hid_buf, 188 | td_errors, 189 | done, 190 | self.size, 191 | self.comm_mask_buf, 192 | ) 193 | -------------------------------------------------------------------------------- /pathfinding/models/dhc/evaluate.py: -------------------------------------------------------------------------------- 1 | import fire 2 | from collections import defaultdict 3 | import numpy as np 4 | import os 5 | import torch 6 | 7 | from pathfinding.environment import Environment, MovingAIBenchmarkingEnvironment 8 | from pathfinding.models.dhc import DHCNetwork 9 | from pathfinding.settings import yaml_data as settings 10 | from pathfinding.utils import test_group, calculate_metrics 11 | 12 | GENERAL_CONFIG = settings["dhc"] 13 | 14 | 15 | def _test_one_case(args): 16 | map, agents_pos, goals_pos, network, env_cls = args 17 | env = env_cls(should_init=False) 18 | env.load(map, agents_pos, goals_pos) 19 | obs, pos = env.observe() 20 | 21 | done, steps = False, 0 22 | network.reset() 23 | 24 | while not done and env.steps < GENERAL_CONFIG["max_episode_length"]: 25 | actions, _, _, _ = network.step( 26 | torch.as_tensor(obs.astype(np.float32)), 27 | torch.as_tensor(pos.astype(np.float32)), 28 | ) 29 | (obs, pos), _, done, _ = env.step(actions) 30 | steps += 1 31 | 32 | return calculate_metrics(env, steps) 33 | 34 | 35 | def _test_generation_fn_random(tests, network): 36 | return [(*test, network, Environment) for test in tests] 37 | 38 | 39 | def _test_generation_fn_moving_ai(tests, network): 40 | return [(*test, network, MovingAIBenchmarkingEnvironment) for test in tests] 41 | 42 | 43 | def test_model( 44 | test_groups=[ 45 | (40, 4, 0.3), 46 | (40, 8, 0.3), 47 | (40, 16, 0.3), 48 | (40, 32, 0.3), 49 | (40, 64, 0.3), 50 | (80, 4, 0.3), 51 | (80, 8, 0.3), 52 | (80, 16, 0.3), 53 | (80, 32, 0.3), 54 | (80, 64, 0.3), 55 | ], 56 | model_number="60000", 57 | is_random_maps: bool = True, 58 | ): 59 | network = DHCNetwork() 60 | network.eval() 61 | device = torch.device("cpu") 62 | network.to(device) 63 | state_dict = torch.load( 64 | os.path.join(".", "models", f"{model_number}.pth"), map_location=device 65 | ) 66 | network.load_state_dict(state_dict) 67 | network.eval() 68 | network.share_memory() 69 | 70 | if is_random_maps: 71 | 72 | def func(x): 73 | return _test_generation_fn_random(x, network) 74 | 75 | else: 76 | 77 | def func(x): 78 | return _test_generation_fn_moving_ai(x, network) 79 | 80 | for group in test_groups: 81 | yield test_group(group, func, _test_one_case, is_random=is_random_maps), group 82 | 83 | 84 | def latex_communication_table( 85 | model_communication_id: str = "337500", 86 | model_nocommunication_id: str = "310000_nocomm", 87 | density=0.1, 88 | agents=(4, 8, 16, 32), 89 | maps=(10, 20, 30, 40), 90 | ): 91 | test_groups = [] 92 | for num_agents in agents: 93 | for map_size in maps: 94 | if density == 0.3 and map_size == 10 and num_agents == 32: 95 | continue 96 | test_groups.append((map_size, num_agents, density)) 97 | header = f"""\\newpage 98 | \\begin{{longtable}}[htb!]{{cc|ccc}} 99 | \caption{{TODO CAPTION}} \label{{table:TODO-TABLE-LABEL}}\\\\ 100 | 101 | \\toprule 102 | \multicolumn{{2}}{{c}}{{Map configuration}} & \multirow{{2}}{{*}}{{Metrics}} & \multicolumn{{2}}{{c}}{{Density {density}}} \\\\ 103 | \# Agents & Size & & DHC (ours) & DHC (original training) \\\\ 104 | \midrule 105 | \endfirsthead 106 | 107 | \multicolumn{{5}}{{c}}% 108 | {{{{Table \\thetable\ continued from previous page}}}} \\\\ 109 | \\toprule 110 | \multicolumn{{2}}{{c}}{{Map configuration}} & \multirow{{2}}{{*}}{{Metrics}} & \multicolumn{{2}}{{c}}{{Density {density}}} \\\\ 111 | \# Agents & Size & & DHC (ours) & DHC (original training) \\\\ 112 | \midrule 113 | \endhead 114 | 115 | \midrule 116 | \multicolumn{{5}}{{r}}{{Continued on next page}} \\ 117 | \endfoot 118 | 119 | \\bottomrule 120 | \endlastfoot""" 121 | 122 | comm = defaultdict(lambda: defaultdict(list)) 123 | nocomm = defaultdict(lambda: defaultdict(list)) 124 | for res, (size, num_agents, density) in test_model( 125 | test_groups, model_nocommunication_id 126 | ): 127 | nocomm[num_agents][size] = res 128 | for res, (size, num_agents, density) in test_model( 129 | test_groups, model_communication_id 130 | ): 131 | comm[num_agents][size] = res 132 | 133 | num_maps = len(maps) 134 | table = [] 135 | metrics = ["CSR, \%", "ISR, \%", "Makespan"] 136 | for i, num_agents in enumerate(agents): 137 | sector = f"\multirow{{{num_maps * 3}}}{{*}}{{{num_agents}}} " 138 | for map_id, map_size in enumerate(maps): 139 | row = ["", f" \multirow{{3}}{{*}}{{${map_size} \\times {map_size}$}} "] 140 | nocomm_list = nocomm[num_agents][map_size] 141 | comm_list = comm[num_agents][map_size] 142 | for metric_id, ((a_mean, a_std), (b_mean, b_std)) in enumerate( 143 | zip(nocomm_list, comm_list) 144 | ): 145 | if metric_id != 0: 146 | row.append("") 147 | row.append(metrics[metric_id]) 148 | if (metric_id != 2 and a_mean >= b_mean) or ( 149 | metric_id == 2 and a_mean <= b_mean 150 | ): # makespan: the less, the better 151 | if metric_id != 0: # std for CSR doesn't make sense 152 | row.append( 153 | f"\\textbf{{{a_mean:.2f}}} $\pm$ \\textbf{{{a_std:.2f}}}" 154 | ) 155 | row.append(f"${b_mean:.2f} \pm {b_std:.2f}$ \\\\ \n") 156 | else: 157 | row.append(f"\\textbf{{{a_mean:.2f}}}") 158 | row.append(f"{b_mean:.2f} \\\\ \n") 159 | else: 160 | if metric_id != 0: # std for CSR doesn't make sense 161 | row.append(f"${a_mean:.2f} \pm {a_std:.2f}$") 162 | row.append( 163 | f"\\textbf{{{b_mean:.2f}}} $\pm$ \\textbf{{{b_std:.2f}}} \\\\ \n" 164 | ) 165 | else: 166 | row.append(f"{a_mean:.2f}") 167 | row.append(f"\\textbf{{{b_mean:.2f}}} \\\\ \n") 168 | sector += " & ".join(row) 169 | if map_id != num_maps - 1: 170 | sector += "\cline{2 - 5} \n" 171 | else: 172 | sector += "\n" 173 | if i == len(agents) - 1: 174 | sector += "\\bottomrule\n" 175 | else: 176 | sector += "\midrule\n" 177 | table.append(sector) 178 | footer = "\\end{longtable}\n" 179 | print(header + "\n".join(table) + footer) 180 | 181 | 182 | if __name__ == "__main__": 183 | fire.Fire() 184 | -------------------------------------------------------------------------------- /pathfinding/models/dhc/model.py: -------------------------------------------------------------------------------- 1 | # credits to https://github.com/ZiyuanMa/DHC/blob/master/model.py 2 | import random 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | if torch.cuda.is_available(): 8 | from torch.cuda.amp import autocast 9 | else: 10 | from torch.cpu.amp import autocast 11 | 12 | from pathfinding.settings import yaml_data as settings 13 | 14 | DHC_CONFIG = settings["dhc"] 15 | 16 | 17 | class ResBlock(nn.Module): 18 | def __init__(self, channels): 19 | super().__init__() 20 | self.block1 = nn.Conv2d(channels, channels, 3, 1, 1) 21 | self.block2 = nn.Conv2d(channels, channels, 3, 1, 1) 22 | 23 | def forward(self, x): 24 | identity = x 25 | 26 | x = F.relu(self.block1(x)) 27 | x = self.block2(x) 28 | 29 | return F.relu(x + identity) 30 | 31 | 32 | class MultiHeadAttention(nn.Module): 33 | def __init__(self, input_dim, output_dim, num_heads): 34 | super().__init__() 35 | self.num_heads = num_heads 36 | self.input_dim = input_dim 37 | self.output_dim = output_dim 38 | self.W_Q = nn.Linear(input_dim, output_dim * num_heads) 39 | self.W_K = nn.Linear(input_dim, output_dim * num_heads) 40 | self.W_V = nn.Linear(input_dim, output_dim * num_heads) 41 | self.W_O = nn.Linear(output_dim * num_heads, output_dim, bias=False) 42 | 43 | def forward(self, input, attn_mask): 44 | # input: [batch_size x num_agents x input_dim] 45 | batch_size, num_agents, input_dim = input.size() 46 | assert input_dim == self.input_dim 47 | 48 | # (B, S, D) -proj-> (B, S, D) -split-> (B, S, H, W) -trans-> (B, H, S, W) 49 | q_s = ( 50 | self.W_Q(input) 51 | .view(batch_size, num_agents, self.num_heads, -1) 52 | .transpose(1, 2) 53 | ) # q_s: [batch_size x num_heads x num_agents x output_dim] 54 | k_s = ( 55 | self.W_K(input) 56 | .view(batch_size, num_agents, self.num_heads, -1) 57 | .transpose(1, 2) 58 | ) # k_s: [batch_size x num_heads x num_agents x output_dim] 59 | v_s = ( 60 | self.W_V(input) 61 | .view(batch_size, num_agents, self.num_heads, -1) 62 | .transpose(1, 2) 63 | ) # v_s: [batch_size x num_heads x num_agents x output_dim] 64 | 65 | if attn_mask.dim() == 2: 66 | attn_mask = attn_mask.unsqueeze(0) 67 | assert ( 68 | attn_mask.size(0) == batch_size 69 | ), f"mask dim {attn_mask.size(0)} while batch size {batch_size}" 70 | 71 | attn_mask = attn_mask.unsqueeze(1).repeat_interleave( 72 | self.num_heads, 1 73 | ) # attn_mask : [batch_size x num_heads x num_agents x num_agents] 74 | assert attn_mask.size() == (batch_size, self.num_heads, num_agents, num_agents) 75 | 76 | # context: [batch_size x num_heads x num_agents x output_dim] 77 | with autocast(enabled=False): 78 | scores = torch.matmul(q_s.float(), k_s.float().transpose(-1, -2)) / ( 79 | self.output_dim**0.5 80 | ) # scores : [batch_size x n_heads x num_agents x num_agents] 81 | scores.masked_fill_( 82 | attn_mask, -1e9 83 | ) # Fills elements of self tensor with value where mask is one. 84 | attn = F.softmax(scores, dim=-1) 85 | 86 | context = torch.matmul(attn, v_s) 87 | context = ( 88 | context.transpose(1, 2) 89 | .contiguous() 90 | .view(batch_size, num_agents, self.num_heads * self.output_dim) 91 | ) # context: [batch_size x len_q x n_heads * d_v] 92 | output = self.W_O(context) 93 | 94 | return output # output: [batch_size x num_agents x output_dim] 95 | 96 | 97 | class CommBlock(nn.Module): 98 | def __init__( 99 | self, 100 | input_dim, 101 | output_dim=64, 102 | num_heads=DHC_CONFIG["communication"]["num_comm_heads"], 103 | num_layers=DHC_CONFIG["communication"]["num_comm_layers"], 104 | ): 105 | super().__init__() 106 | self.input_dim = input_dim 107 | self.output_dim = output_dim 108 | self.num_layers = num_layers 109 | self.self_attn = MultiHeadAttention(input_dim, output_dim, num_heads) 110 | 111 | self.update_cell = nn.GRUCell(output_dim, input_dim) 112 | 113 | def forward(self, latent, comm_mask): 114 | """ 115 | latent shape: batch_size x num_agents x latent_dim 116 | 117 | """ 118 | num_agents = latent.size(1) 119 | 120 | # agent indices of agent that use communication 121 | update_mask = comm_mask.sum(dim=-1) > 1 122 | comm_idx = update_mask.nonzero(as_tuple=True) 123 | 124 | # no agent use communication, return 125 | if len(comm_idx[0]) == 0: 126 | return latent 127 | 128 | if len(comm_idx) > 1: 129 | update_mask = update_mask.unsqueeze(2) 130 | 131 | attn_mask = comm_mask == False # noqa 132 | 133 | for _ in range(self.num_layers): 134 | info = self.self_attn(latent, attn_mask=attn_mask) 135 | if len(comm_idx) == 1: 136 | batch_idx = torch.zeros(len(comm_idx[0]), dtype=torch.long) 137 | latent[batch_idx, comm_idx[0]] = self.update_cell( 138 | info[batch_idx, comm_idx[0]], latent[batch_idx, comm_idx[0]] 139 | ) 140 | else: 141 | update_info = self.update_cell( 142 | info.view(-1, self.output_dim), latent.view(-1, self.input_dim) 143 | ).view(DHC_CONFIG["batch_size"], num_agents, self.input_dim) 144 | latent = torch.where(update_mask, update_info, latent) 145 | 146 | return latent 147 | 148 | 149 | class Network(nn.Module): 150 | def __init__( 151 | self, 152 | input_shape=DHC_CONFIG["observation_shape"], 153 | cnn_channels=DHC_CONFIG["cnn_channels"], 154 | hidden_dim=DHC_CONFIG["hidden_dim"], 155 | max_comm_agents=DHC_CONFIG["max_comm_agents"], 156 | latent_dim=DHC_CONFIG["latent_dim"], 157 | ): 158 | super().__init__() 159 | 160 | self._batch_size = DHC_CONFIG["batch_size"] 161 | self.input_shape = input_shape 162 | self.hidden_dim = hidden_dim 163 | self.latent_dim = latent_dim 164 | self.max_comm_agents = max_comm_agents 165 | 166 | self.obs_encoder = nn.Sequential( 167 | nn.Conv2d(self.input_shape[0], cnn_channels, 3, 1), 168 | nn.ReLU(inplace=True), 169 | ResBlock(cnn_channels), 170 | ResBlock(cnn_channels), 171 | ResBlock(cnn_channels), 172 | nn.Conv2d(cnn_channels, 16, 1, 1), # differs from the paper 173 | # see https://arxiv.org/pdf/2106.11365.pdf, Figure 1 174 | nn.ReLU(inplace=True), 175 | nn.Flatten(), 176 | ) 177 | 178 | self.recurrent = nn.GRUCell(self.latent_dim, self.hidden_dim) 179 | 180 | self.comm = CommBlock(hidden_dim) 181 | 182 | # dueling q structure 183 | self.adv = nn.Linear(hidden_dim, 5) 184 | self.state = nn.Linear(hidden_dim, 1) 185 | self.hidden = None 186 | 187 | self._xavier_init() 188 | 189 | def _xavier_init(self): 190 | for _, m in self.named_modules(): 191 | if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d): 192 | nn.init.xavier_uniform_(m.weight) 193 | if m.bias is not None: 194 | nn.init.constant_(m.bias, 0) 195 | 196 | @torch.no_grad() 197 | def step(self, obs, pos): 198 | num_agents = obs.size(0) 199 | 200 | latent = self.obs_encoder(obs) 201 | 202 | if self.hidden is None: 203 | self.hidden = self.recurrent(latent) 204 | else: 205 | self.hidden = self.recurrent(latent, self.hidden) # e''_i^{t - 1} 206 | 207 | # from num_agents x hidden_dim to 1 x num_agents x hidden_dim 208 | self.hidden = self.hidden.unsqueeze(0) 209 | 210 | # masks for communication block 211 | agents_pos = pos 212 | pos_mat = (agents_pos.unsqueeze(1) - agents_pos.unsqueeze(0)).abs() 213 | dist_mat = (pos_mat[:, :, 0] ** 2 + pos_mat[:, :, 1] ** 2).sqrt() 214 | # mask out agents that out of range of FOV 215 | in_obs_mask = (pos_mat <= DHC_CONFIG["observation_radius"]).all(2) 216 | # mask out agents that are far away 217 | _, ranking = dist_mat.topk( 218 | min(self.max_comm_agents, num_agents), dim=1, largest=False 219 | ) 220 | dist_mask = torch.zeros((num_agents, num_agents), dtype=torch.bool) 221 | dist_mask.scatter_(1, ranking, True) 222 | 223 | comm_mask = torch.bitwise_and(in_obs_mask, dist_mask) 224 | 225 | # print(f'hidden: {self.hidden.shape}') 226 | # [1, 15, 256] 227 | 228 | self.hidden = self.comm(self.hidden, comm_mask) # [1, 15, 256] 229 | 230 | # print(f'hidden after comm: {self.hidden.shape}') 231 | self.hidden = self.hidden.squeeze(0) # [15, 256] 232 | # print(f'hidden after squeeze: {self.hidden.shape}') 233 | 234 | adv_val = self.adv(self.hidden) # [15, 5] 235 | # print(f'adv_val: {adv_val.shape}') 236 | state_val = self.state(self.hidden) # [15, 1 237 | # print(f'state_val: {state_val.shape}') 238 | 239 | q_val = state_val + adv_val - adv_val.mean(1, keepdim=True) 240 | 241 | actions = torch.argmax(q_val, 1).tolist() 242 | 243 | return actions, q_val.numpy(), self.hidden.numpy(), comm_mask.numpy() 244 | 245 | def reset(self): 246 | self.hidden = None 247 | 248 | @autocast() 249 | def forward(self, obs, steps, hidden, comm_mask): 250 | # comm_mask shape: batch_size x seq_len x max_num_agents x max_num_agents 251 | max_steps = obs.size(1) 252 | num_agents = comm_mask.size(2) 253 | 254 | assert comm_mask.size(2) == DHC_CONFIG["max_num_agents"] 255 | 256 | obs = obs.transpose(1, 2) 257 | 258 | obs = obs.contiguous().view(-1, *self.input_shape) 259 | 260 | latent = self.obs_encoder(obs) 261 | 262 | latent = latent.view( 263 | self._batch_size * num_agents, max_steps, self.latent_dim 264 | ).transpose(0, 1) 265 | 266 | hidden_buffer = [] 267 | for i in range(max_steps): 268 | # hidden size: batch_size*num_agents x self.hidden_dim 269 | hidden = self.recurrent(latent[i], hidden) 270 | hidden = hidden.view(self._batch_size, num_agents, self.hidden_dim) 271 | 272 | if DHC_CONFIG["communication"]["disable_communication"]: 273 | if random.random() < DHC_CONFIG["communication"]["comm_enabled_prob"]: 274 | hidden = self.comm(hidden, comm_mask[:, i]) 275 | else: 276 | hidden = self.comm(hidden, comm_mask[:, i]) 277 | # only hidden from agent 0 278 | hidden_buffer.append(hidden[:, 0]) 279 | hidden = hidden.view(self._batch_size * num_agents, self.hidden_dim) 280 | 281 | # hidden buffer size: batch_size x seq_len x self.hidden_dim 282 | hidden_buffer = torch.stack(hidden_buffer).transpose(0, 1) 283 | 284 | # hidden size: batch_size x self.hidden_dim 285 | hidden = hidden_buffer[torch.arange(self._batch_size), steps - 1] 286 | 287 | adv_val = self.adv(hidden) 288 | state_val = self.state(hidden) 289 | 290 | q_val = state_val + adv_val - adv_val.mean(1, keepdim=True) 291 | 292 | return q_val 293 | -------------------------------------------------------------------------------- /pathfinding/models/dhc/train.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | import ray 4 | import time 5 | import torch 6 | 7 | from pathfinding.models.dhc import GlobalBuffer, Learner, Actor 8 | from pathfinding.settings import yaml_data as settings 9 | 10 | TRAIN_CONFIG = settings["dhc"]["train"] 11 | 12 | torch.manual_seed(0) 13 | np.random.seed(0) 14 | random.seed(0) 15 | 16 | 17 | def main( 18 | num_actors=TRAIN_CONFIG["num_actors"], log_interval=TRAIN_CONFIG["log_interval"] 19 | ): 20 | ray.init() 21 | ray_node = ray.nodes()[0] 22 | 23 | # GlobalBuffer + Learner + 1 * num_actors 24 | assert ( 25 | ray_node["Resources"]["CPU"] >= 2 + num_actors 26 | ), "insufficient amount of CPU cores available" 27 | 28 | buffer = GlobalBuffer.remote() 29 | learner = Learner.remote(buffer) 30 | time.sleep(1) 31 | actors = [ 32 | Actor.remote(i, 0.4 ** (1 + (i / (num_actors - 1)) * 7), learner, buffer) 33 | for i in range(num_actors) 34 | ] 35 | 36 | for actor in actors: 37 | actor.run.remote() 38 | 39 | print("Actors were successfully created") 40 | 41 | while not ray.get(buffer.ready.remote()): 42 | time.sleep(5) 43 | ray.get(learner.stats.remote(5)) 44 | ray.get(buffer.stats.remote(5)) 45 | 46 | print("Start training") 47 | buffer.run.remote() 48 | learner.run.remote() 49 | 50 | done = False 51 | while not done: 52 | time.sleep(log_interval) 53 | done = ray.get(learner.stats.remote(log_interval)) 54 | ray.get(buffer.stats.remote(log_interval)) 55 | print() 56 | 57 | 58 | if __name__ == "__main__": 59 | main() 60 | -------------------------------------------------------------------------------- /pathfinding/models/dhc/visualize.py: -------------------------------------------------------------------------------- 1 | import fire 2 | import matplotlib.animation as animation 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | import os 6 | import pickle 7 | import random 8 | import torch 9 | 10 | from pathfinding.environment import Environment, MovingAIBenchmarkingEnvironment 11 | from pathfinding.models.dhc import DHCNetwork 12 | from pathfinding.utils import tests_dir_path 13 | 14 | torch.manual_seed(239) 15 | np.random.seed(239) 16 | random.seed(239) 17 | device = torch.device("cpu") 18 | torch.set_num_threads(1) 19 | 20 | 21 | def get_cmap(n, name="hsv"): 22 | return plt.cm.get_cmap(name, n) 23 | 24 | 25 | def frametamer(imgs, env, init_img): 26 | imgs.append([]) 27 | imgs[-1].append(init_img) 28 | 29 | num_agents = len(env.agents_pos) 30 | eq = env.agents_pos == env.goals_pos 31 | total_positioned = (eq[:, 0] * eq[:, 1]).sum() 32 | 33 | for i, ((agent_x, agent_y), (goal_x, goal_y)) in enumerate( 34 | zip(env.agents_pos, env.goals_pos) 35 | ): 36 | imgs[-1].append( 37 | plt.text(0.02, 0.02, s=f"{total_positioned} / {num_agents}", fontsize=8) 38 | ) 39 | imgs[-1].append( 40 | plt.text( 41 | agent_y, agent_x, i, color="black", ha="center", va="center", fontsize=8 42 | ) 43 | ) 44 | imgs[-1].append( 45 | plt.text( 46 | goal_y, goal_x, i, color="black", ha="center", va="center", fontsize=8 47 | ) 48 | ) 49 | 50 | 51 | def fill_map(env): 52 | map = np.copy(env.map) 53 | for agent_id in range(env.num_agents): 54 | x, y = env.agents_pos[agent_id], env.goals_pos[agent_id] 55 | if np.array_equal(x, y): 56 | map[tuple(x)] = 4 57 | else: 58 | map[tuple(x)] = 2 59 | map[tuple(y)] = 3 60 | map = map.astype(np.uint8) 61 | return map 62 | 63 | 64 | def make_animation_single_text( 65 | model_id: int, test_name: str, test_case_idx: int = 0, steps: int = 256 66 | ): 67 | test_case_idx = int(test_case_idx) 68 | color_map = np.array( 69 | [ 70 | [255, 255, 255], # white 71 | [190, 190, 190], # gray 72 | [0, 191, 255], # blue 73 | [255, 165, 0], # orange 74 | [0, 250, 154], # green 75 | ] 76 | ) 77 | 78 | network = DHCNetwork() 79 | network.eval() 80 | network.to(device) 81 | state_dict = torch.load( 82 | os.path.join(".", "models", f"{model_id}.pth"), map_location=device 83 | ) 84 | network.load_state_dict(state_dict) 85 | 86 | with open(os.path.join(tests_dir_path(), test_name), "rb") as f: 87 | tests = pickle.load(f) 88 | 89 | env = Environment() 90 | env.load(tests[test_case_idx][0], tests[test_case_idx][1], tests[test_case_idx][2]) 91 | 92 | fig = plt.figure(figsize=(4.8, 4.8)) 93 | plt.gca().set_xticks(range(0, len(env.map) + 1, 5)) 94 | plt.gca().set_yticks(range(0, len(env.map) + 1, 5)) 95 | 96 | done = False 97 | obs, pos = env.observe() 98 | 99 | imgs = [] 100 | while not done and env.steps < steps: 101 | map = fill_map(env) 102 | img = plt.imshow(color_map[map], animated=True) 103 | 104 | frametamer(imgs, env, img) 105 | 106 | actions, _, _, _ = network.step( 107 | torch.from_numpy(obs.astype(np.float32)).to(device), 108 | torch.from_numpy(pos.astype(np.float32)).to(device), 109 | ) 110 | (obs, pos), _, done, _ = env.step(actions) 111 | 112 | if done and env.steps < steps: 113 | map = fill_map(env) 114 | 115 | img = plt.imshow(color_map[map], animated=True) 116 | for _ in range(steps - env.steps): 117 | frametamer(imgs, env, img) 118 | 119 | ani = animation.ArtistAnimation( 120 | fig, imgs, interval=600, blit=True, repeat_delay=1000 121 | ) 122 | 123 | video_writer = animation.PillowWriter(fps=10) 124 | 125 | videos_dir = os.path.join(".", "videos") 126 | os.makedirs(videos_dir, exist_ok=True) 127 | ani.save( 128 | os.path.join(videos_dir, f"test_{model_id}_{test_name}_{test_case_idx}.gif"), 129 | writer=video_writer, 130 | ) 131 | 132 | 133 | def make_animation_movingai( 134 | model_id: int, test_name: str, test_case_idx: int = 0, steps: int = 256 135 | ): 136 | test_case_idx = int(test_case_idx) 137 | color_map = np.array( 138 | [ 139 | [255, 255, 255], # white 140 | [190, 190, 190], # gray 141 | [0, 191, 255], # blue 142 | [255, 165, 0], # orange 143 | [0, 250, 154], # green 144 | ] 145 | ) 146 | 147 | network = DHCNetwork() 148 | network.eval() 149 | network.to(device) 150 | state_dict = torch.load( 151 | os.path.join(".", "models", f"{model_id}.pth"), map_location=device 152 | ) 153 | network.load_state_dict(state_dict) 154 | 155 | with open(os.path.join(tests_dir_path(), test_name), "rb") as f: 156 | tests = pickle.load(f) 157 | 158 | env = MovingAIBenchmarkingEnvironment(should_init=False) 159 | env.load(tests[test_case_idx][0], tests[test_case_idx][1], tests[test_case_idx][2]) 160 | 161 | fig = plt.figure(figsize=(4.8, 4.8)) 162 | 163 | done = False 164 | obs, pos = env.observe() 165 | 166 | imgs = [] 167 | while not done and env.steps < steps: 168 | map = fill_map(env) 169 | img = plt.imshow(color_map[map], animated=True) 170 | 171 | frametamer(imgs, env, img) 172 | 173 | actions, _, _, _ = network.step( 174 | torch.from_numpy(obs.astype(np.float32)).to(device), 175 | torch.from_numpy(pos.astype(np.float32)).to(device), 176 | ) 177 | (obs, pos), _, done, _ = env.step(actions) 178 | 179 | if done and env.steps < steps: 180 | map = fill_map(env) 181 | 182 | img = plt.imshow(color_map[map], animated=True) 183 | for _ in range(steps - env.steps): 184 | frametamer(imgs, env, img) 185 | 186 | ani = animation.ArtistAnimation( 187 | fig, imgs, interval=600, blit=True, repeat_delay=1000 188 | ) 189 | 190 | video_writer = animation.PillowWriter(fps=10) 191 | 192 | videos_dir = os.path.join(".", "videos") 193 | os.makedirs(videos_dir, exist_ok=True) 194 | ani.save( 195 | os.path.join(videos_dir, f"{model_id}_{test_name}_{test_case_idx}.gif"), 196 | writer=video_writer, 197 | ) 198 | 199 | 200 | def _make_single_map_image_for_report(): 201 | env = Environment(num_agents=8, map_length=40, fix_density=0.4) 202 | parts = env._part 203 | num_comp = len(parts) 204 | print(num_comp) 205 | 206 | fig = plt.figure(figsize=(4.8, 4.8)) # noqa 207 | 208 | cmap = [ 209 | [255, 255, 255], 210 | [233, 150, 122], 211 | [238, 232, 170], 212 | [152, 251, 152], 213 | [102, 205, 170], 214 | [135, 206, 235], 215 | [255, 182, 193], 216 | [222, 184, 135], 217 | [255, 239, 213], 218 | [240, 255, 240], 219 | [192, 192, 192], 220 | [100, 149, 237], 221 | [72, 61, 139], 222 | [240, 230, 140], 223 | [0, 100, 0], 224 | [143, 188, 143], 225 | [95, 158, 160], 226 | [221, 160, 221], 227 | [250, 235, 215], 228 | [160, 82, 45], 229 | [255, 240, 245], 230 | [245, 255, 250], 231 | [112, 128, 144], 232 | [220, 220, 220], 233 | [255, 127, 80], 234 | [255, 140, 0], 235 | [128, 128, 0], 236 | [124, 252, 0], 237 | [47, 79, 79], 238 | ] 239 | 240 | map = np.copy(env.map) 241 | for agent_id in range(env.num_agents): 242 | x, y = env.agents_pos[agent_id], env.goals_pos[agent_id] 243 | if np.array_equal(x, y): 244 | map[tuple(x)] = 4 245 | else: 246 | map[tuple(x)] = 0 247 | map[tuple(y)] = 0 248 | 249 | plt.plot( 250 | x[1], 251 | x[0], 252 | marker="o", 253 | markersize=8, 254 | markerfacecolor="blue", 255 | markeredgecolor="blue", 256 | ) 257 | plt.text(x[1] - 0.5, x[0] + 0.5, agent_id, fontsize=8, color="white") 258 | plt.plot( 259 | y[1], 260 | y[0], 261 | marker="o", 262 | markersize=8, 263 | markerfacecolor="orange", 264 | markeredgecolor="orange", 265 | ) 266 | plt.text(y[1] - 0.5, y[0] + 0.5, agent_id, fontsize=8) 267 | 268 | map = map.astype(np.uint8) 269 | 270 | color_map = np.array( 271 | [ 272 | [224, 255, 255], 273 | [190, 190, 190], # gray 274 | [0, 191, 255], # blue 275 | [255, 165, 0], # orange 276 | [0, 250, 154], # green 277 | ] 278 | ) 279 | 280 | image = color_map[map] 281 | 282 | for i, c in enumerate(parts): 283 | color = np.array(cmap[i]) 284 | for x, y in c: 285 | image[x, y] = color 286 | 287 | plt.imshow(image) 288 | plt.savefig("agents.png") 289 | 290 | 291 | if __name__ == "__main__": 292 | fire.Fire() 293 | -------------------------------------------------------------------------------- /pathfinding/models/dhc/worker.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import random 4 | import ray 5 | import threading 6 | import time 7 | import torch 8 | import torch.nn as nn 9 | from copy import deepcopy 10 | from torch.cuda.amp import GradScaler 11 | from torch.optim import Adam 12 | from torch.optim.lr_scheduler import MultiStepLR 13 | from typing import Tuple 14 | 15 | from pathfinding.environment import Environment 16 | from pathfinding.models.dhc.buffer import SumTree, LocalBuffer 17 | from pathfinding.models.dhc.model import Network 18 | from pathfinding.settings import yaml_data as settings 19 | 20 | WRK_CONFIG = settings["dhc"]["worker"] 21 | GENERAL_CONFIG = settings["dhc"] 22 | 23 | 24 | @ray.remote(num_cpus=1) 25 | class GlobalBuffer: 26 | def __init__( 27 | self, 28 | episode_capacity=WRK_CONFIG["episode_capacity"], 29 | local_buffer_capacity=GENERAL_CONFIG["max_episode_length"], 30 | init_env_settings=WRK_CONFIG["init_env_settings"], 31 | max_comm_agents=WRK_CONFIG["max_comm_agents"], 32 | alpha=WRK_CONFIG["prioritized_replay_alpha"], 33 | beta=WRK_CONFIG["prioritized_replay_beta"], 34 | max_num_agents=GENERAL_CONFIG["max_num_agents"], 35 | ): 36 | self.capacity = episode_capacity 37 | self.local_buffer_capacity = local_buffer_capacity 38 | self.size = 0 39 | self.ptr = 0 40 | 41 | # prioritized experience replay 42 | self.priority_tree = SumTree(episode_capacity * local_buffer_capacity) 43 | self.alpha = alpha 44 | self.beta = beta 45 | 46 | self.counter = 0 47 | self.batched_data = [] 48 | self.stat_dict = {init_env_settings: []} 49 | self.lock = threading.Lock() 50 | self.env_settings_set = ray.put([init_env_settings]) 51 | 52 | self.obs_buf = np.zeros( 53 | ( 54 | (local_buffer_capacity + 1) * episode_capacity, 55 | max_num_agents, 56 | *GENERAL_CONFIG["observation_shape"], 57 | ), 58 | dtype=np.bool, 59 | ) 60 | self.act_buf = np.zeros( 61 | (local_buffer_capacity * episode_capacity), dtype=np.uint8 62 | ) 63 | self.rew_buf = np.zeros( 64 | (local_buffer_capacity * episode_capacity), dtype=np.float16 65 | ) 66 | self.hid_buf = np.zeros( 67 | ( 68 | local_buffer_capacity * episode_capacity, 69 | max_num_agents, 70 | GENERAL_CONFIG["hidden_dim"], 71 | ), 72 | dtype=np.float16, 73 | ) 74 | self.done_buf = np.zeros(episode_capacity, dtype=np.bool) 75 | self.size_buf = np.zeros(episode_capacity, dtype=np.uint) 76 | self.comm_mask_buf = np.zeros( 77 | ( 78 | (local_buffer_capacity + 1) * episode_capacity, 79 | max_num_agents, 80 | max_num_agents, 81 | ), 82 | dtype=np.bool, 83 | ) 84 | 85 | def __len__(self): 86 | return self.size 87 | 88 | def run(self): 89 | self.background_thread = threading.Thread(target=self.prepare_data, daemon=True) 90 | self.background_thread.start() 91 | 92 | def prepare_data(self): 93 | while True: 94 | if len(self.batched_data) <= 4: 95 | data = self.sample_batch(GENERAL_CONFIG["batch_size"]) 96 | data_id = ray.put(data) 97 | self.batched_data.append(data_id) 98 | else: 99 | time.sleep(0.1) 100 | 101 | def get_data(self): 102 | if len(self.batched_data) == 0: 103 | print("no prepared data") 104 | data = self.sample_batch(GENERAL_CONFIG["batch_size"]) 105 | data_id = ray.put(data) 106 | return data_id 107 | else: 108 | return self.batched_data.pop(0) 109 | 110 | def add(self, data: Tuple): 111 | """ 112 | data: actor_id 0, num_agents 1, 113 | map_len 2, obs_buf 3, act_buf 4, 114 | rew_buf 5, hid_buf 6, td_errors 7, 115 | done 8, size 9, comm_mask 10 116 | """ 117 | if data[0] >= 12: 118 | stat_key = (data[1], data[2]) 119 | 120 | if stat_key in self.stat_dict: 121 | self.stat_dict[stat_key].append(data[8]) 122 | if len(self.stat_dict[stat_key]) == 201: 123 | self.stat_dict[stat_key].pop(0) 124 | 125 | with self.lock: 126 | idxes = np.arange( 127 | self.ptr * self.local_buffer_capacity, 128 | (self.ptr + 1) * self.local_buffer_capacity, 129 | ) 130 | start_idx = self.ptr * self.local_buffer_capacity 131 | # update buffer size 132 | self.size -= self.size_buf[self.ptr].item() 133 | self.size += data[9] 134 | self.counter += data[9] 135 | 136 | self.priority_tree.batch_update(idxes, data[7] ** self.alpha) 137 | 138 | self.obs_buf[ 139 | start_idx + self.ptr : start_idx + self.ptr + data[9] + 1, : data[1] 140 | ] = data[3] 141 | self.act_buf[start_idx : start_idx + data[9]] = data[4] 142 | self.rew_buf[start_idx : start_idx + data[9]] = data[5] 143 | self.hid_buf[start_idx : start_idx + data[9], : data[1]] = data[6] 144 | self.done_buf[self.ptr] = data[8] 145 | self.size_buf[self.ptr] = data[9] 146 | self.comm_mask_buf[ 147 | start_idx + self.ptr : start_idx + self.ptr + data[9] + 1 148 | ] = 0 149 | self.comm_mask_buf[ 150 | start_idx + self.ptr : start_idx + self.ptr + data[9] + 1, 151 | : data[1], 152 | : data[1], 153 | ] = data[10] 154 | 155 | self.ptr = (self.ptr + 1) % self.capacity 156 | 157 | def sample_batch(self, batch_size: int) -> Tuple: 158 | b_obs, b_action, b_reward, b_done, b_steps, b_seq_len, b_comm_mask = ( 159 | [], 160 | [], 161 | [], 162 | [], 163 | [], 164 | [], 165 | [], 166 | ) 167 | idxes, priorities = [], [] 168 | b_hidden = [] 169 | 170 | with self.lock: 171 | idxes, priorities = self.priority_tree.batch_sample(batch_size) 172 | global_idxes = idxes // self.local_buffer_capacity 173 | local_idxes = idxes % self.local_buffer_capacity 174 | 175 | for idx, global_idx, local_idx in zip( 176 | idxes.tolist(), global_idxes.tolist(), local_idxes.tolist() 177 | ): 178 | assert ( 179 | local_idx < self.size_buf[global_idx] 180 | ), f"index is {local_idx} but size is {self.size_buf[global_idx]}" 181 | 182 | conf_seq_len = WRK_CONFIG["seq_len"] 183 | fwd_steps = WRK_CONFIG["forward_steps"] 184 | 185 | steps = min(fwd_steps, (self.size_buf[global_idx].item() - local_idx)) 186 | seq_len = min(local_idx + 1, conf_seq_len) 187 | 188 | if local_idx < conf_seq_len - 1: 189 | obs = self.obs_buf[ 190 | global_idx * (self.local_buffer_capacity + 1) : idx 191 | + global_idx 192 | + 1 193 | + steps 194 | ] 195 | comm_mask = self.comm_mask_buf[ 196 | global_idx * (self.local_buffer_capacity + 1) : idx 197 | + global_idx 198 | + 1 199 | + steps 200 | ] 201 | hidden = np.zeros( 202 | ( 203 | GENERAL_CONFIG["max_num_agents"], 204 | GENERAL_CONFIG["hidden_dim"], 205 | ), 206 | dtype=np.float16, 207 | ) 208 | elif local_idx == conf_seq_len - 1: 209 | obs = self.obs_buf[ 210 | idx 211 | + global_idx 212 | + 1 213 | - conf_seq_len : idx 214 | + global_idx 215 | + 1 216 | + steps 217 | ] 218 | comm_mask = self.comm_mask_buf[ 219 | global_idx * (self.local_buffer_capacity + 1) : idx 220 | + global_idx 221 | + 1 222 | + steps 223 | ] 224 | hidden = np.zeros( 225 | ( 226 | GENERAL_CONFIG["max_num_agents"], 227 | GENERAL_CONFIG["hidden_dim"], 228 | ), 229 | dtype=np.float16, 230 | ) 231 | else: 232 | obs = self.obs_buf[ 233 | idx 234 | + global_idx 235 | + 1 236 | - conf_seq_len : idx 237 | + global_idx 238 | + 1 239 | + steps 240 | ] 241 | comm_mask = self.comm_mask_buf[ 242 | idx 243 | + global_idx 244 | + 1 245 | - conf_seq_len : idx 246 | + global_idx 247 | + 1 248 | + steps 249 | ] 250 | hidden = self.hid_buf[idx - conf_seq_len] 251 | 252 | if obs.shape[0] < conf_seq_len + fwd_steps: 253 | pad_len = conf_seq_len + fwd_steps - obs.shape[0] 254 | obs = np.pad(obs, ((0, pad_len), (0, 0), (0, 0), (0, 0), (0, 0))) 255 | comm_mask = np.pad(comm_mask, ((0, pad_len), (0, 0), (0, 0))) 256 | 257 | action = self.act_buf[idx] 258 | reward = 0 259 | for i in range(steps): 260 | reward += self.rew_buf[idx + i] * 0.99**i 261 | 262 | if ( 263 | self.done_buf[global_idx] 264 | and local_idx >= self.size_buf[global_idx] - fwd_steps 265 | ): 266 | done = True 267 | else: 268 | done = False 269 | 270 | b_obs.append(obs) 271 | b_action.append(action) 272 | b_reward.append(reward) 273 | b_done.append(done) 274 | b_steps.append(steps) 275 | b_seq_len.append(seq_len) 276 | b_hidden.append(hidden) 277 | b_comm_mask.append(comm_mask) 278 | 279 | # importance sampling weight 280 | min_p = np.min(priorities) 281 | weights = np.power(priorities / min_p, -self.beta) 282 | 283 | data = ( 284 | torch.from_numpy(np.stack(b_obs).astype(np.float16)), 285 | torch.LongTensor(b_action).unsqueeze(1), 286 | torch.HalfTensor(b_reward).unsqueeze(1), 287 | torch.HalfTensor(b_done).unsqueeze(1), 288 | torch.HalfTensor(b_steps).unsqueeze(1), 289 | torch.LongTensor(b_seq_len), 290 | torch.from_numpy(np.concatenate(b_hidden)), 291 | torch.from_numpy(np.stack(b_comm_mask)), 292 | idxes, 293 | torch.from_numpy(weights).unsqueeze(1), 294 | self.ptr, 295 | ) 296 | 297 | return data 298 | 299 | def update_priorities( 300 | self, idxes: np.ndarray, priorities: np.ndarray, old_ptr: int 301 | ): 302 | """Update priorities of sampled transitions""" 303 | with self.lock: 304 | # discard the indices that already been discarded 305 | # in replay buffer during training 306 | if self.ptr > old_ptr: 307 | # range from [old_ptr, self.ptr) 308 | mask = (idxes < old_ptr * self.local_buffer_capacity) | ( 309 | idxes >= self.ptr * self.local_buffer_capacity 310 | ) 311 | idxes = idxes[mask] 312 | priorities = priorities[mask] 313 | elif self.ptr < old_ptr: 314 | # range from [0, self.ptr) & [old_ptr, self,capacity) 315 | mask = (idxes < old_ptr * self.local_buffer_capacity) & ( 316 | idxes >= self.ptr * self.local_buffer_capacity 317 | ) 318 | idxes = idxes[mask] 319 | priorities = priorities[mask] 320 | 321 | self.priority_tree.batch_update( 322 | np.copy(idxes), np.copy(priorities) ** self.alpha 323 | ) 324 | 325 | def stats(self, interval: int): 326 | print(f"buffer update speed: {self.counter / interval}/s") 327 | print(f"buffer size: {self.size}") 328 | 329 | print(" ", end="") 330 | for i in range( 331 | WRK_CONFIG["init_env_settings"][1], WRK_CONFIG["max_map_length"] + 1, 5 332 | ): 333 | print(" {:2d} ".format(i), end="") 334 | print() 335 | 336 | for num_agents in range( 337 | WRK_CONFIG["init_env_settings"][0], GENERAL_CONFIG["max_num_agents"] + 1 338 | ): 339 | print("{:2d}".format(num_agents), end="") 340 | for map_len in range( 341 | WRK_CONFIG["init_env_settings"][1], WRK_CONFIG["max_map_length"] + 1, 5 342 | ): 343 | if (num_agents, map_len) in self.stat_dict: 344 | print( 345 | "{:4d}/{:<3d}".format( 346 | sum(self.stat_dict[(num_agents, map_len)]), 347 | len(self.stat_dict[(num_agents, map_len)]), 348 | ), 349 | end="", 350 | ) 351 | else: 352 | print(" N/A ", end="") 353 | print() 354 | 355 | for key, val in self.stat_dict.copy().items(): 356 | if len(val) == 200 and sum(val) >= 200 * WRK_CONFIG["pass_rate"]: 357 | # add number of agents 358 | add_agent_key = (key[0] + 1, key[1]) 359 | if ( 360 | add_agent_key[0] <= GENERAL_CONFIG["max_num_agents"] 361 | and add_agent_key not in self.stat_dict 362 | ): 363 | self.stat_dict[add_agent_key] = [] 364 | 365 | if key[1] < WRK_CONFIG["max_map_length"]: 366 | add_map_key = (key[0], key[1] + 10) 367 | if add_map_key not in self.stat_dict: 368 | self.stat_dict[add_map_key] = [] 369 | 370 | self.env_settings_set = ray.put(list(self.stat_dict.keys())) 371 | 372 | self.counter = 0 373 | 374 | def ready(self): 375 | if len(self) >= WRK_CONFIG["learning_starts"]: 376 | return True 377 | else: 378 | return False 379 | 380 | def get_env_settings(self): 381 | return self.env_settings_set 382 | 383 | def check_done(self): 384 | for i in range(GENERAL_CONFIG["max_num_agents"]): 385 | if (i + 1, WRK_CONFIG["max_map_length"]) not in self.stat_dict: 386 | return False 387 | 388 | max_map_len = self.stat_dict[(i + 1, WRK_CONFIG["max_map_length"])] 389 | 390 | if len(max_map_len) < 200: 391 | return False 392 | elif sum(max_map_len) < 200 * WRK_CONFIG["pass_rate"]: 393 | return False 394 | 395 | return True 396 | 397 | 398 | @ray.remote(num_cpus=1, num_gpus=1) 399 | class Learner: 400 | def __init__(self, buffer: GlobalBuffer, training_steps=10000): 401 | self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 402 | self.model = Network() 403 | self.model.to(self.device) 404 | self.tar_model = deepcopy(self.model) 405 | self.optimizer = Adam(self.model.parameters(), lr=1e-4) 406 | self.scheduler = MultiStepLR( 407 | self.optimizer, milestones=[200000, 400000], gamma=0.5 408 | ) 409 | self.buffer = buffer 410 | self.counter = 0 411 | self.last_counter = 0 412 | self.done = False 413 | self.loss = 0 414 | 415 | self.steps = training_steps 416 | 417 | self.store_weights() 418 | 419 | def get_weights(self): 420 | return self.weights_id 421 | 422 | def store_weights(self): 423 | state_dict = self.model.state_dict() 424 | for k, v in state_dict.items(): 425 | state_dict[k] = v.cpu() 426 | self.weights_id = ray.put(state_dict) 427 | 428 | def run(self): 429 | self.learning_thread = threading.Thread(target=self.train, daemon=True) 430 | self.learning_thread.start() 431 | 432 | def train(self): 433 | scaler = GradScaler() 434 | 435 | while ( 436 | not ray.get(self.buffer.check_done.remote()) 437 | and self.counter < WRK_CONFIG["training_times"] 438 | ): 439 | for i in range(1, self.steps + 1): 440 | data_id = ray.get(self.buffer.get_data.remote()) 441 | data = ray.get(data_id) 442 | 443 | ( 444 | b_obs, 445 | b_action, 446 | b_reward, 447 | b_done, 448 | b_steps, 449 | b_seq_len, 450 | b_hidden, 451 | b_comm_mask, 452 | idxes, 453 | weights, 454 | old_ptr, 455 | ) = data 456 | b_obs, b_action, b_reward = ( 457 | b_obs.to(self.device), 458 | b_action.to(self.device), 459 | b_reward.to(self.device), 460 | ) 461 | b_done, b_steps, weights = ( 462 | b_done.to(self.device), 463 | b_steps.to(self.device), 464 | weights.to(self.device), 465 | ) 466 | b_hidden = b_hidden.to(self.device) 467 | b_comm_mask = b_comm_mask.to(self.device) 468 | 469 | b_next_seq_len = [ 470 | (seq_len + forward_steps).item() 471 | for seq_len, forward_steps in zip(b_seq_len, b_steps) 472 | ] 473 | b_next_seq_len = torch.LongTensor(b_next_seq_len) 474 | 475 | with torch.no_grad(): 476 | b_q_ = (1 - b_done) * self.tar_model( 477 | b_obs, b_next_seq_len, b_hidden, b_comm_mask 478 | ).max(1, keepdim=True)[0] 479 | 480 | b_q = self.model( 481 | b_obs[:, : -WRK_CONFIG["forward_steps"]], 482 | b_seq_len, 483 | b_hidden, 484 | b_comm_mask[:, : -WRK_CONFIG["forward_steps"]], 485 | ).gather(1, b_action) 486 | 487 | td_error = b_q - (b_reward + (0.99**b_steps) * b_q_) 488 | 489 | priorities = td_error.detach().squeeze().abs().clamp(1e-4).cpu().numpy() 490 | 491 | loss = (weights * self.huber_loss(td_error)).mean() 492 | self.loss += loss.item() 493 | 494 | self.optimizer.zero_grad() 495 | scaler.scale(loss).backward() 496 | 497 | scaler.unscale_(self.optimizer) 498 | nn.utils.clip_grad_norm_(self.model.parameters(), 40) 499 | 500 | scaler.step(self.optimizer) 501 | scaler.update() 502 | 503 | self.scheduler.step() 504 | 505 | # store new weights in shared memory 506 | if i % 5 == 0: 507 | self.store_weights() 508 | 509 | self.buffer.update_priorities.remote(idxes, priorities, old_ptr) 510 | 511 | self.counter += 1 512 | 513 | # update target net, save model 514 | if i % WRK_CONFIG["target_network_update_freq"] == 0: 515 | self.tar_model.load_state_dict(self.model.state_dict()) 516 | 517 | if i % WRK_CONFIG["save_interval"] == 0: 518 | os.makedirs(os.path.join(".", "models"), exist_ok=True) 519 | torch.save( 520 | self.model.state_dict(), 521 | os.path.join(".", "models", f"{self.counter}.pth"), 522 | ) 523 | 524 | self.done = True 525 | 526 | def huber_loss(self, td_error, kappa=1.0): 527 | abs_td_error = td_error.abs() 528 | flag = (abs_td_error < kappa).float() 529 | return flag * abs_td_error.pow(2) * 0.5 + (1 - flag) * (abs_td_error - 0.5) 530 | 531 | def stats(self, interval: int): 532 | print(f"number of updates: {self.counter}") 533 | print(f"update speed: {(self.counter - self.last_counter) / interval}/s") 534 | if self.counter != self.last_counter: 535 | print("loss: {:.4f}".format(self.loss / (self.counter - self.last_counter))) 536 | 537 | self.last_counter = self.counter 538 | self.loss = 0 539 | return self.done 540 | 541 | 542 | @ray.remote(num_cpus=1) 543 | class Actor: 544 | def __init__( 545 | self, worker_id: int, epsilon: float, learner: Learner, buffer: GlobalBuffer 546 | ): 547 | self.id = worker_id 548 | self.model = Network() 549 | self.model.eval() 550 | self.env = Environment(curriculum=True) 551 | self.epsilon = epsilon 552 | self.learner = learner 553 | self.global_buffer = buffer 554 | self.max_episode_length = GENERAL_CONFIG["max_episode_length"] 555 | self.counter = 0 556 | 557 | def run(self): 558 | done = False 559 | obs, pos, local_buffer = self.reset() 560 | 561 | while True: 562 | # sample action 563 | actions, q_val, hidden, comm_mask = self.model.step( 564 | torch.from_numpy(obs.astype(np.float32)), 565 | torch.from_numpy(pos.astype(np.float32)), 566 | ) 567 | 568 | if random.random() < self.epsilon: 569 | # Note: only one agent do random action 570 | # in order to keep the environment stable 571 | actions[0] = np.random.randint(0, 5) 572 | # take action in env 573 | (next_obs, next_pos), rewards, done, _ = self.env.step(actions) 574 | # return data and update observation 575 | local_buffer.add( 576 | q_val[0], actions[0], rewards[0], next_obs, hidden, comm_mask 577 | ) 578 | 579 | if done is False and self.env.steps < self.max_episode_length: 580 | obs, pos = next_obs, next_pos 581 | else: 582 | # finish and send buffer 583 | if done: 584 | data = local_buffer.finish() 585 | else: 586 | _, q_val, hidden, comm_mask = self.model.step( 587 | torch.from_numpy(next_obs.astype(np.float32)), 588 | torch.from_numpy(next_pos.astype(np.float32)), 589 | ) 590 | data = local_buffer.finish(q_val[0], comm_mask) 591 | 592 | self.global_buffer.add.remote(data) 593 | done = False 594 | obs, pos, local_buffer = self.reset() 595 | 596 | self.counter += 1 597 | if self.counter == WRK_CONFIG["actor_update_steps"]: 598 | self.update_weights() 599 | self.counter = 0 600 | 601 | def update_weights(self): 602 | """load weights from learner""" 603 | # update network parameters 604 | weights_id = ray.get(self.learner.get_weights.remote()) 605 | weights = ray.get(weights_id) 606 | self.model.load_state_dict(weights) 607 | # update environment settings set (number of agents and map size) 608 | new_env_settings_set = ray.get(self.global_buffer.get_env_settings.remote()) 609 | self.env.update_env_settings_set(ray.get(new_env_settings_set)) 610 | 611 | def reset(self): 612 | self.model.reset() 613 | obs, pos = self.env.reset() 614 | local_buffer = LocalBuffer( 615 | self.id, self.env.num_agents, self.env.map_size[0], obs 616 | ) 617 | return obs, pos, local_buffer 618 | -------------------------------------------------------------------------------- /pathfinding/movingai.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from typing import Optional 3 | 4 | import fire 5 | from dataclasses import dataclass 6 | 7 | 8 | @dataclass 9 | class TestDescription: 10 | x0: int 11 | y0: int 12 | x1: int 13 | y1: int 14 | expected_dist: float 15 | mapfile: Optional[str] 16 | 17 | 18 | def _transform(line: str) -> list[int]: 19 | return [ 20 | int(ch) for ch in line.replace("@", "1").replace(".", "0").replace("T", "1") 21 | ] 22 | 23 | 24 | def read_map(mapfile: str) -> np.array: 25 | with open(mapfile, "r") as map_file: 26 | map_file.readline() 27 | _, h = map_file.readline().split(" ") 28 | _, w = map_file.readline().split(" ") 29 | h, w = int(h), int(w) 30 | map_file.readline() 31 | lines = map_file.read().split("\n") 32 | map = [_transform(line) for line in lines if line] 33 | 34 | h_read, w_read = len(map), len(map[0]) 35 | if h_read != h or w_read != w: 36 | raise ValueError( 37 | "Size of the map read is not equal to the expected size from MovingAI," 38 | f"({h_read}, {w_read}) != ({h}, {w})" 39 | ) 40 | 41 | return np.array(map) 42 | 43 | 44 | def get_map_density(mapfile: str) -> float: 45 | map = read_map(mapfile) 46 | return (map == 1).sum() / (map.shape[0] * map.shape[1]) 47 | 48 | 49 | def read_scenario_from_file(scenfile: str): 50 | tests = [] 51 | 52 | with open(scenfile, "r") as scen_file: 53 | scen_file.readline() 54 | for line in scen_file: 55 | test_no, map_no, h, w, y0, x0, y1, x1, exp_dst = line.split("\t") 56 | x0, y0, x1, y1, exp_dst = int(x0), int(y0), int(x1), int(y1), float(exp_dst) 57 | tests.append(TestDescription(x0, y0, x1, y1, exp_dst, map_no)) 58 | return tests 59 | 60 | 61 | def get_tests_for_multiple_agents( 62 | scenfile: str = None, num_agents: int = 5, max_num_tests: int = 200 63 | ) -> list[list[TestDescription]]: 64 | scens = read_scenario_from_file(scenfile) 65 | tests = [] 66 | num_tests = min(max_num_tests * num_agents - 1, len(scens) - num_agents + 1) 67 | for i in range(0, num_tests, num_agents): 68 | tests.append(scens[i : i + num_agents]) 69 | return tests 70 | 71 | 72 | if __name__ == "__main__": 73 | fire.Fire() 74 | 75 | # success rate: 0.00% 76 | # soft-success rate: 45.78% 77 | # average step: 256.0 78 | 79 | # success rate: 0.00% 80 | # soft-success rate: 85.47% 81 | # average step: 512.0 82 | # 83 | # success rate: 70.00% 84 | # soft-success rate: 95.78% 85 | # average step: 773.1 86 | -------------------------------------------------------------------------------- /pathfinding/settings.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | 4 | 5 | with open(os.path.join(".", "config.yaml"), "r") as yaml_file: 6 | yaml_data = yaml.load(yaml_file, Loader=yaml.FullLoader) 7 | 8 | 9 | __all__ = ["yaml_data"] 10 | -------------------------------------------------------------------------------- /pathfinding/test_cases/10length_16agents_0.1density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/10length_16agents_0.1density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/10length_16agents_0.3density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/10length_16agents_0.3density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/10length_32agents_0.1density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/10length_32agents_0.1density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/10length_4agents_0.1density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/10length_4agents_0.1density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/10length_4agents_0.3density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/10length_4agents_0.3density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/10length_8agents_0.1density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/10length_8agents_0.1density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/10length_8agents_0.3density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/10length_8agents_0.3density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/128length_32agents_0.3372density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/128length_32agents_0.3372density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/20length_16agents_0.1density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/20length_16agents_0.1density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/20length_16agents_0.3density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/20length_16agents_0.3density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/20length_32agents_0.1density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/20length_32agents_0.1density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/20length_32agents_0.3density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/20length_32agents_0.3density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/20length_4agents_0.1density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/20length_4agents_0.1density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/20length_4agents_0.3density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/20length_4agents_0.3density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/20length_8agents_0.1density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/20length_8agents_0.1density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/20length_8agents_0.3density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/20length_8agents_0.3density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/256length_16agents_0.2654density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/256length_16agents_0.2654density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/256length_64agents_0.2654density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/256length_64agents_0.2654density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/30length_16agents_0.1density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/30length_16agents_0.1density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/30length_16agents_0.3density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/30length_16agents_0.3density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/30length_32agents_0.1density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/30length_32agents_0.1density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/30length_32agents_0.3density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/30length_32agents_0.3density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/30length_4agents_0.1density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/30length_4agents_0.1density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/30length_4agents_0.3density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/30length_4agents_0.3density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/30length_8agents_0.1density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/30length_8agents_0.1density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/30length_8agents_0.3density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/30length_8agents_0.3density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/32length_32agents_0.334density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/32length_32agents_0.334density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/40length_16agents_0.1density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/40length_16agents_0.1density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/40length_16agents_0.3density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/40length_16agents_0.3density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/40length_32agents_0.1density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/40length_32agents_0.1density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/40length_32agents_0.3density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/40length_32agents_0.3density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/40length_4agents_0.1density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/40length_4agents_0.1density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/40length_4agents_0.3density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/40length_4agents_0.3density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/40length_64agents_0.3density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/40length_64agents_0.3density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/40length_8agents_0.1density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/40length_8agents_0.1density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/40length_8agents_0.3density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/40length_8agents_0.3density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/80length_16agents_0.1density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/80length_16agents_0.1density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/80length_16agents_0.3density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/80length_16agents_0.3density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/80length_32agents_0.1density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/80length_32agents_0.1density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/80length_32agents_0.3density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/80length_32agents_0.3density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/80length_4agents_0.1density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/80length_4agents_0.1density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/80length_4agents_0.3density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/80length_4agents_0.3density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/80length_64agents_0.3density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/80length_64agents_0.3density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/80length_8agents_0.1density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/80length_8agents_0.1density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/80length_8agents_0.3density.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/80length_8agents_0.3density.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/Berlin_0_256.map_Berlin_0_256.map.scen_16agents_.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/Berlin_0_256.map_Berlin_0_256.map.scen_16agents_.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/Berlin_0_256.map_Berlin_0_256.map.scen_1agents_.pkl: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8d8f765f53025c1042db66cf24ccccfd42d913f77a81dc3c95174e32fb2fbeb9 3 | size 335112169 4 | -------------------------------------------------------------------------------- /pathfinding/test_cases/Berlin_0_256.map_Berlin_0_256.map.scen_32agents_.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/Berlin_0_256.map_Berlin_0_256.map.scen_32agents_.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/Berlin_0_256.map_Berlin_0_256.map.scen_4agents_.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/Berlin_0_256.map_Berlin_0_256.map.scen_4agents_.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/Berlin_0_256.map_Berlin_0_256.map.scen_64agents_.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/Berlin_0_256.map_Berlin_0_256.map.scen_64agents_.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/Berlin_0_256.map_Berlin_0_256.map.scen_8agents_.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/Berlin_0_256.map_Berlin_0_256.map.scen_8agents_.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/den520d.map_den520d-even-1.scen_1agents_.pkl: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:0a2deb1672b72689d84b76bcdfae0bd8c1d3a1efda8dd745e60b77393ce26ffd 3 | size 335112169 4 | -------------------------------------------------------------------------------- /pathfinding/test_cases/den520d.map_den520d-even-1.scen_32agents_.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/den520d.map_den520d-even-1.scen_32agents_.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/den520d.map_den520d-even-1.scen_4agents_.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/den520d.map_den520d-even-1.scen_4agents_.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/den520d.map_den520d-even-1.scen_64agents_.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/den520d.map_den520d-even-1.scen_64agents_.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/ht_chantry.map_ht_chantry-even-1.scen_1agents_.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/ht_chantry.map_ht_chantry-even-1.scen_1agents_.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/ht_chantry.map_ht_chantry-even-1.scen_32agents_.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/ht_chantry.map_ht_chantry-even-1.scen_32agents_.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/ht_chantry.map_ht_chantry-even-1.scen_64agents_.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/ht_chantry.map_ht_chantry-even-1.scen_64agents_.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/ht_chantry.map_ht_chantry-even-1.scen_8agents_.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/ht_chantry.map_ht_chantry-even-1.scen_8agents_.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/lak303d.map_lak303d-even-1.scen_1agents_.pkl: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:677d8b80b14819c82d7c827be4f68abc06e2d6cdb1ee34abf73d0c69ac3c1c41 3 | size 192486091 4 | -------------------------------------------------------------------------------- /pathfinding/test_cases/lak303d.map_lak303d-even-1.scen_32agents_.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/lak303d.map_lak303d-even-1.scen_32agents_.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/lak303d.map_lak303d-even-1.scen_64agents_.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/lak303d.map_lak303d-even-1.scen_64agents_.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/lak303d.map_lak303d-even-1.scen_8agents_.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/lak303d.map_lak303d-even-1.scen_8agents_.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/maze-128-128-2.map_maze-128-128-2-random-1.scen_32agents_.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/maze-128-128-2.map_maze-128-128-2-random-1.scen_32agents_.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/room-32-32-4.map_room-32-32-4-random-1.scen_32agents_.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/room-32-32-4.map_room-32-32-4-random-1.scen_32agents_.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/small-3-rooms-dense.map_small-12-dense.scen_12agents_.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/small-3-rooms-dense.map_small-12-dense.scen_12agents_.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/small-3-rooms-dense2.map_small-12-dense.scen_12agents_.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/small-3-rooms-dense2.map_small-12-dense.scen_12agents_.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/small-3-rooms.map_small-12.scen_12agents_.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/small-3-rooms.map_small-12.scen_12agents_.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/small.map_small-4.scen_4agents_.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/small.map_small-4.scen_4agents_.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/small.map_small-8-reversed.scen_8agents_.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/small.map_small-8-reversed.scen_8agents_.pkl -------------------------------------------------------------------------------- /pathfinding/test_cases/small.map_small-8.scen_8agents_.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/small.map_small-8.scen_8agents_.pkl -------------------------------------------------------------------------------- /pathfinding/utils.py: -------------------------------------------------------------------------------- 1 | import fire 2 | import multiprocessing as mp 3 | import numpy as np 4 | import os 5 | import pickle 6 | 7 | from pathfinding.environment import Environment, MovingAIBenchmarkingEnvironment 8 | from pathfinding.movingai import ( 9 | get_tests_for_multiple_agents, 10 | read_map, 11 | TestDescription, 12 | ) 13 | 14 | 15 | def generate_test_filename(length: int, num_agents: int, density: float, ext="pkl"): 16 | return f"{length}length_{num_agents}agents_{density}density.{ext}" 17 | 18 | 19 | def generate_moving_ai_test_filename( 20 | map_filename: str, scenary_filename: str, num_agents: int, ext="pkl" 21 | ): 22 | map_filename = map_filename.split(os.sep)[-1] 23 | scenary_filename = scenary_filename.split(os.sep)[-1] 24 | return f"{map_filename}_{scenary_filename}_{num_agents}agents_.{ext}" 25 | 26 | 27 | def tests_dir_path(): 28 | return os.path.join(".", "pathfinding", "test_cases") 29 | 30 | 31 | def tests_moving_ai_dir_path(): 32 | return os.path.join("data", "movingai") 33 | 34 | 35 | def generate_test_suits(tests_config, repeat_for: int): 36 | os.makedirs(tests_dir_path(), exist_ok=True) 37 | for map_length, num_agents, density in tests_config: 38 | env = Environment( 39 | num_agents=num_agents, map_length=map_length, fix_density=density 40 | ) 41 | tests = [] 42 | for generated, _ in enumerate(range(repeat_for)): 43 | tests.append( 44 | (np.copy(env.map), np.copy(env.agents_pos), np.copy(env.goals_pos)) 45 | ) 46 | print(generated) 47 | env.reset(num_agents=num_agents, map_length=map_length) 48 | 49 | filename = generate_test_filename(map_length, num_agents, density) 50 | with open(os.path.join(tests_dir_path(), filename), "wb") as file: 51 | pickle.dump(tests, file) 52 | 53 | 54 | def generate_test_suits_moving_ai( 55 | tests_config: list[int], map_filename: str, scenary_filename: str, repeat_for: int 56 | ): 57 | os.makedirs(tests_dir_path(), exist_ok=True) 58 | map_filename = os.path.join(tests_moving_ai_dir_path(), map_filename) 59 | scenary_filename = os.path.join(tests_moving_ai_dir_path(), scenary_filename) 60 | 61 | for num_agents in tests_config: 62 | pkl_tests = [] 63 | tests = get_tests_for_multiple_agents(scenary_filename, num_agents, repeat_for) 64 | for test_set in tests: 65 | env = MovingAIBenchmarkingEnvironment( 66 | num_agents=num_agents, 67 | map_filename=map_filename, 68 | test_descriptions=test_set, 69 | ) 70 | pkl_tests.append( 71 | (np.copy(env.map), np.copy(env.agents_pos), np.copy(env.goals_pos)) 72 | ) 73 | filename = generate_moving_ai_test_filename( 74 | map_filename, 75 | scenary_filename, 76 | num_agents, 77 | ) 78 | with open(os.path.join(tests_dir_path(), filename), "wb") as file: 79 | pickle.dump(pkl_tests, file) 80 | 81 | 82 | def _run_tests(tests_pkl_filename: str, test_generation_fn, singe_test_fn): 83 | pool = mp.Pool(mp.cpu_count()) 84 | 85 | with open(tests_pkl_filename, "rb") as f: 86 | tests = pickle.load(f) 87 | 88 | tests = test_generation_fn(tests) 89 | ret = pool.map(singe_test_fn, tests) 90 | 91 | csr, isr, makespan = zip(*ret) 92 | csr = 100 * np.array(csr) 93 | isr = 100 * np.array(isr) 94 | csr_mean, csr_std = np.mean(csr), np.std(csr) 95 | isr_mean, isr_std = np.mean(isr), np.std(isr) 96 | makespan_mean, makespan_std = np.mean(makespan), np.std(makespan) 97 | 98 | print(f"CSR: {csr_mean} +- {csr_std}%") 99 | print(f"ISR: {isr_mean} +- {isr_std}%") 100 | print(f"Makespan: {makespan_mean} +- {makespan_std}") 101 | print() 102 | 103 | return (csr_mean, csr_std), (isr_mean, isr_std), (makespan_mean, makespan_std) 104 | 105 | 106 | def test_group(test_group, test_generation_fn, singe_test_fn, is_random=True): 107 | if is_random: 108 | length, num_agents, density = test_group 109 | print(f"test group: {length} length {num_agents} agents {density} density") 110 | return _run_tests( 111 | os.path.join( 112 | tests_dir_path(), 113 | generate_test_filename(length, num_agents, density), 114 | ), 115 | test_generation_fn, 116 | singe_test_fn, 117 | ) 118 | else: 119 | num_agents, map_filename, scenary_filename = test_group 120 | print( 121 | f"test group: {map_filename} map " 122 | f"{scenary_filename} scen {num_agents} agents" 123 | ) 124 | return _run_tests( 125 | os.path.join( 126 | tests_dir_path(), 127 | generate_moving_ai_test_filename( 128 | map_filename, scenary_filename, num_agents 129 | ), 130 | ), 131 | test_generation_fn, 132 | singe_test_fn, 133 | ) 134 | 135 | 136 | def calculate_metrics(env: Environment, makespan: int): 137 | pos_equality = env.agents_pos == env.goals_pos 138 | isr = (pos_equality[:, 0] * pos_equality[:, 1]).sum() / env.agents_pos.shape[0] 139 | csr = np.array_equal(env.agents_pos, env.goals_pos) 140 | return csr, isr, makespan 141 | 142 | 143 | def _dump_to_scen_file( 144 | scenfile: str, 145 | tests: list[TestDescription], 146 | map_h: int, 147 | map_w: int, 148 | ): 149 | with open(scenfile, "w") as scen: 150 | print("version 1", file=scen) 151 | for line_no, test in enumerate(tests): 152 | test_str = "\t".join( 153 | map( 154 | str, 155 | [ 156 | line_no, 157 | test.mapfile, 158 | map_h, 159 | map_w, 160 | test.x0, 161 | test.y0, 162 | test.x1, 163 | test.y1, 164 | test.expected_dist, 165 | ], 166 | ) 167 | ) 168 | print(test_str, file=scen) 169 | 170 | 171 | def generate_scen_for_custom_maps( 172 | map_filename: str, 173 | num_agents: int = 8, 174 | num_tests: int = 10, 175 | ): 176 | path_parts = map_filename.split(os.sep) 177 | dirpath, mapfile = f"{os.sep}".join(path_parts[:-1]), path_parts[-1] 178 | scenfile = f"{dirpath}{os.sep}generated_{mapfile.split('.')[-2]}.scen" 179 | custom_map = read_map(map_filename) 180 | 181 | h, w = custom_map.shape 182 | 183 | pos = np.argwhere(custom_map == 0) 184 | 185 | tests = [] 186 | rng = np.random.default_rng() 187 | for _ in range(num_tests): 188 | agents = rng.choice(pos, 2 * num_agents, replace=False) 189 | start, finish = agents[:num_agents], agents[num_agents:] 190 | for s, f in zip(start, finish): 191 | tests.append( 192 | TestDescription( 193 | x0=s[1], 194 | y0=s[0], 195 | x1=f[1], 196 | y1=f[0], 197 | expected_dist=-1, 198 | mapfile=mapfile, 199 | ) 200 | ) 201 | _dump_to_scen_file(scenfile, tests, h, w) 202 | 203 | 204 | if __name__ == "__main__": 205 | fire.Fire() 206 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "po-mapf-thesis" 3 | version = "0.1.0" 4 | description = "Partially observable / decentralized multi-agent pathfinding in Grid Environments using Reinforcement Learning" 5 | authors = ["Vlad Savinov "] 6 | license = "MIT" 7 | readme = "README.md" 8 | packages = [{include = "pathfinding"}] 9 | include = [{path = "tests"}] 10 | 11 | [tool.poetry.dependencies] 12 | python = "^3.9" 13 | torch = "^1.12.1" 14 | pyyaml = "^6.0" 15 | matplotlib = "^3.6.2" 16 | ray = "^2.2.0" 17 | ruff = "^0.0.241" 18 | numpy = "1.23.1" 19 | pyproject-toml = "^0.0.10" 20 | 21 | [tool.poetry.group.dev.dependencies] 22 | torch = "^1.12.1" 23 | black = "^22.10.0" 24 | fire = "^0.4.0" 25 | 26 | 27 | [tool.poetry.group.test.dependencies] 28 | pytest = "^7.2.0" 29 | 30 | [tool.pytest.ini_options] 31 | testpaths = [ 32 | "tests" 33 | ] 34 | 35 | [build-system] 36 | requires = ["poetry-core"] 37 | build-backend = "poetry.core.masonry.api" 38 | 39 | [tool.ruff] 40 | # Enable Pyflakes `E` and `F` codes by default. 41 | select = ["E", "F"] 42 | ignore = [] 43 | 44 | # Allow autofix for all enabled rules (when `--fix`) is provided. 45 | unfixable = [] 46 | 47 | # Exclude a variety of commonly ignored directories. 48 | exclude = [ 49 | ".bzr", 50 | ".direnv", 51 | ".eggs", 52 | ".git", 53 | ".hg", 54 | ".mypy_cache", 55 | ".nox", 56 | ".pants.d", 57 | ".ruff_cache", 58 | ".svn", 59 | ".tox", 60 | ".venv", 61 | "__pypackages__", 62 | "_build", 63 | "buck-out", 64 | "build", 65 | "dist", 66 | "node_modules", 67 | "venv", 68 | ] 69 | per-file-ignores = {} 70 | 71 | # Same as Black. 72 | line-length = 88 73 | 74 | # Allow unused variables when underscore-prefixed. 75 | dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" 76 | 77 | # Assume Python 3.10. 78 | target-version = "py310" 79 | 80 | [tool.ruff.mccabe] 81 | # Unlike Flake8, default to a complexity level of 10. 82 | max-complexity = 10 83 | -------------------------------------------------------------------------------- /static/DHC_10x10_4_good.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/static/DHC_10x10_4_good.gif -------------------------------------------------------------------------------- /static/DHC_40x40_16_dense.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/static/DHC_40x40_16_dense.gif -------------------------------------------------------------------------------- /static/DHC_40x40_16_good.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/static/DHC_40x40_16_good.gif -------------------------------------------------------------------------------- /static/DHC_40x40_4_good.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/static/DHC_40x40_4_good.gif -------------------------------------------------------------------------------- /static/DHC_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/static/DHC_architecture.png -------------------------------------------------------------------------------- /static/DHC_training.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/static/DHC_training.png -------------------------------------------------------------------------------- /static/chart_40x40.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/static/chart_40x40.png -------------------------------------------------------------------------------- /static/chart_80x80.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/static/chart_80x80.png -------------------------------------------------------------------------------- /tests/test_imports.py: -------------------------------------------------------------------------------- 1 | def test_import_dhc(): 2 | from pathfinding.models.dhc import DHCNetwork # noqa 3 | 4 | 5 | def test_import_env(): 6 | from pathfinding.environment import Environment # noqa 7 | 8 | 9 | def test_import_buffer(): 10 | from pathfinding.models.dhc import LocalBuffer # noqa 11 | 12 | 13 | def test_import_worker(): 14 | from pathfinding.models.dhc import Actor, Learner, GlobalBuffer # noqa 15 | -------------------------------------------------------------------------------- /videos/337500_small-3-rooms-dense.map_small-12-dense.scen_12agents_.pkl_0.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/videos/337500_small-3-rooms-dense.map_small-12-dense.scen_12agents_.pkl_0.gif -------------------------------------------------------------------------------- /videos/337500_small-3-rooms-dense2.map_small-12-dense.scen_12agents_.pkl_0.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/videos/337500_small-3-rooms-dense2.map_small-12-dense.scen_12agents_.pkl_0.gif -------------------------------------------------------------------------------- /videos/337500_small-3-rooms.map_small-12.scen_12agents_.pkl_0.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/videos/337500_small-3-rooms.map_small-12.scen_12agents_.pkl_0.gif -------------------------------------------------------------------------------- /videos/337500_small.map_small-4.scen_4agents_.pkl_0.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/videos/337500_small.map_small-4.scen_4agents_.pkl_0.gif -------------------------------------------------------------------------------- /videos/337500_small.map_small-8-reversed.scen_8agents_.pkl_0.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/videos/337500_small.map_small-8-reversed.scen_8agents_.pkl_0.gif -------------------------------------------------------------------------------- /videos/337500_small.map_small-8.scen_8agents_.pkl_0.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/videos/337500_small.map_small-8.scen_8agents_.pkl_0.gif --------------------------------------------------------------------------------