├── .gitignore
├── LICENSE
├── README.md
├── Thesis.pdf
├── config.yaml
├── models
├── 310000_nocomm.pth
└── 337500.pth
├── pathfinding
├── environment.py
├── main.py
├── models
│ └── dhc
│ │ ├── __init__.py
│ │ ├── buffer.py
│ │ ├── evaluate.py
│ │ ├── model.py
│ │ ├── train.py
│ │ ├── visualize.py
│ │ └── worker.py
├── movingai.py
├── settings.py
├── test_cases
│ ├── 10length_16agents_0.1density.pkl
│ ├── 10length_16agents_0.3density.pkl
│ ├── 10length_32agents_0.1density.pkl
│ ├── 10length_4agents_0.1density.pkl
│ ├── 10length_4agents_0.3density.pkl
│ ├── 10length_8agents_0.1density.pkl
│ ├── 10length_8agents_0.3density.pkl
│ ├── 128length_32agents_0.3372density.pkl
│ ├── 20length_16agents_0.1density.pkl
│ ├── 20length_16agents_0.3density.pkl
│ ├── 20length_32agents_0.1density.pkl
│ ├── 20length_32agents_0.3density.pkl
│ ├── 20length_4agents_0.1density.pkl
│ ├── 20length_4agents_0.3density.pkl
│ ├── 20length_8agents_0.1density.pkl
│ ├── 20length_8agents_0.3density.pkl
│ ├── 256length_16agents_0.2654density.pkl
│ ├── 256length_64agents_0.2654density.pkl
│ ├── 30length_16agents_0.1density.pkl
│ ├── 30length_16agents_0.3density.pkl
│ ├── 30length_32agents_0.1density.pkl
│ ├── 30length_32agents_0.3density.pkl
│ ├── 30length_4agents_0.1density.pkl
│ ├── 30length_4agents_0.3density.pkl
│ ├── 30length_8agents_0.1density.pkl
│ ├── 30length_8agents_0.3density.pkl
│ ├── 32length_32agents_0.334density.pkl
│ ├── 40length_16agents_0.1density.pkl
│ ├── 40length_16agents_0.3density.pkl
│ ├── 40length_32agents_0.1density.pkl
│ ├── 40length_32agents_0.3density.pkl
│ ├── 40length_4agents_0.1density.pkl
│ ├── 40length_4agents_0.3density.pkl
│ ├── 40length_64agents_0.3density.pkl
│ ├── 40length_8agents_0.1density.pkl
│ ├── 40length_8agents_0.3density.pkl
│ ├── 80length_16agents_0.1density.pkl
│ ├── 80length_16agents_0.3density.pkl
│ ├── 80length_32agents_0.1density.pkl
│ ├── 80length_32agents_0.3density.pkl
│ ├── 80length_4agents_0.1density.pkl
│ ├── 80length_4agents_0.3density.pkl
│ ├── 80length_64agents_0.3density.pkl
│ ├── 80length_8agents_0.1density.pkl
│ ├── 80length_8agents_0.3density.pkl
│ ├── Berlin_0_256.map_Berlin_0_256.map.scen_16agents_.pkl
│ ├── Berlin_0_256.map_Berlin_0_256.map.scen_1agents_.pkl
│ ├── Berlin_0_256.map_Berlin_0_256.map.scen_32agents_.pkl
│ ├── Berlin_0_256.map_Berlin_0_256.map.scen_4agents_.pkl
│ ├── Berlin_0_256.map_Berlin_0_256.map.scen_64agents_.pkl
│ ├── Berlin_0_256.map_Berlin_0_256.map.scen_8agents_.pkl
│ ├── den520d.map_den520d-even-1.scen_1agents_.pkl
│ ├── den520d.map_den520d-even-1.scen_32agents_.pkl
│ ├── den520d.map_den520d-even-1.scen_4agents_.pkl
│ ├── den520d.map_den520d-even-1.scen_64agents_.pkl
│ ├── ht_chantry.map_ht_chantry-even-1.scen_1agents_.pkl
│ ├── ht_chantry.map_ht_chantry-even-1.scen_32agents_.pkl
│ ├── ht_chantry.map_ht_chantry-even-1.scen_64agents_.pkl
│ ├── ht_chantry.map_ht_chantry-even-1.scen_8agents_.pkl
│ ├── lak303d.map_lak303d-even-1.scen_1agents_.pkl
│ ├── lak303d.map_lak303d-even-1.scen_32agents_.pkl
│ ├── lak303d.map_lak303d-even-1.scen_64agents_.pkl
│ ├── lak303d.map_lak303d-even-1.scen_8agents_.pkl
│ ├── maze-128-128-2.map_maze-128-128-2-random-1.scen_32agents_.pkl
│ ├── room-32-32-4.map_room-32-32-4-random-1.scen_32agents_.pkl
│ ├── small-3-rooms-dense.map_small-12-dense.scen_12agents_.pkl
│ ├── small-3-rooms-dense2.map_small-12-dense.scen_12agents_.pkl
│ ├── small-3-rooms.map_small-12.scen_12agents_.pkl
│ ├── small.map_small-4.scen_4agents_.pkl
│ ├── small.map_small-8-reversed.scen_8agents_.pkl
│ └── small.map_small-8.scen_8agents_.pkl
└── utils.py
├── poetry.lock
├── pyproject.toml
├── static
├── DHC_10x10_4_good.gif
├── DHC_40x40_16_dense.gif
├── DHC_40x40_16_good.gif
├── DHC_40x40_4_good.gif
├── DHC_architecture.png
├── DHC_training.png
├── chart_40x40.png
└── chart_80x80.png
├── tests
└── test_imports.py
└── videos
├── 337500_small-3-rooms-dense.map_small-12-dense.scen_12agents_.pkl_0.gif
├── 337500_small-3-rooms-dense2.map_small-12-dense.scen_12agents_.pkl_0.gif
├── 337500_small-3-rooms.map_small-12.scen_12agents_.pkl_0.gif
├── 337500_small.map_small-4.scen_4agents_.pkl_0.gif
├── 337500_small.map_small-8-reversed.scen_8agents_.pkl_0.gif
└── 337500_small.map_small-8.scen_8agents_.pkl_0.gif
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 | .idea/
30 |
31 | # PyInstaller
32 | # Usually these files are written by a python script from a template
33 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
34 | *.manifest
35 | *.spec
36 |
37 | # Installer logs
38 | pip-log.txt
39 | pip-delete-this-directory.txt
40 |
41 | # Unit test / coverage reports
42 | htmlcov/
43 | .tox/
44 | .nox/
45 | .coverage
46 | .coverage.*
47 | .cache
48 | nosetests.xml
49 | coverage.xml
50 | *.cover
51 | *.py,cover
52 | .hypothesis/
53 | .pytest_cache/
54 |
55 | # Translations
56 | *.mo
57 | *.pot
58 |
59 | # Django stuff:
60 | *.log
61 | local_settings.py
62 | db.sqlite3
63 | db.sqlite3-journal
64 |
65 | # Flask stuff:
66 | instance/
67 | .webassets-cache
68 |
69 | # Scrapy stuff:
70 | .scrapy
71 |
72 | # Sphinx documentation
73 | docs/_build/
74 |
75 | # PyBuilder
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | .python-version
87 |
88 | # pipenv
89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
92 | # install all needed dependencies.
93 | #Pipfile.lock
94 |
95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
96 | __pypackages__/
97 |
98 | # Celery stuff
99 | celerybeat-schedule
100 | celerybeat.pid
101 |
102 | # SageMath parsed files
103 | *.sage.py
104 |
105 | # Environments
106 | .env
107 | .venv
108 | env/
109 | venv/
110 | ENV/
111 | env.bak/
112 | venv.bak/
113 |
114 | # Spyder project settings
115 | .spyderproject
116 | .spyproject
117 |
118 | # Rope project settings
119 | .ropeproject
120 |
121 | # mkdocs documentation
122 | /site
123 |
124 | # mypy
125 | .mypy_cache/
126 | .dmypy.json
127 | dmypy.json
128 |
129 | # Pyre type checker
130 | .pyre/
131 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022 Vlad
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Learnable Decentralized MAPF using reinforcement learning with local communication
2 |
3 | 
4 | 
5 | 
6 | 
7 |
8 | ## Description
9 |
10 | We perform extensive empirical evaluation of one of the state-of-the-art decentralized PO-MAPF algorithms which leverages communication between agents, Distributed Heuristic Communication (DHC). Through comprehensive experiments, the performance of DHC is observed to degrade when agents are faced with complete packet loss during communication. To mitigate this issue, we propose a novel algorithm called DHC-R (DHC-robust). Open-sourced model weights and the codebase are provided.
11 |
12 | ## Requirements
13 | In order for `models.dhc.train` to be successfully run, you have to have a machine equipped with 1 GPU and several CPUs.
14 | Consider having `num_cpus - 2` actors configured through the `dhc.train.num_actors` in `config.yaml`
15 |
16 | **Attention: We do not guarantee the desired performance on a non-GPU machine.**
17 |
18 | While we aim at supporting MacOS, Linux and Windows platforms, the successful training is not guaranteed on a Windows-based machine.
19 | The benchmarking script should work there, though. Please report it [here](https://github.com/acforvs/po-mapf-thesis/issues) if it doesn't.
20 |
21 | ## Setting up
22 | 1. Install [Poetry](https://python-poetry.org)
23 | 2. Run [poetry install](https://python-poetry.org/docs/cli/#install) to install the dependencies
24 |
25 | If you see ``Failed to create the collection: Prompt dismissed..`` this error when trying to run `poetry install`, [consider](https://github.com/python-poetry/poetry/issues/1917#issuecomment-1251667047) executing this line first:
26 | ```shell
27 | export PYTHON_KEYRING_BACKEND=keyring.backends.null.Keyring
28 | ```
29 |
30 | ## Repository description & Usage
31 | 1. `models` dir contains the weights of the trained models
32 | 2. `config.yaml` - training & model params, environmental settings etc.
33 | 3. `pathfinding/models` provides one with the implementation of different models
34 |
35 |
36 | ## Cite
37 |
38 | ```
39 | @InProceedings{10.1007/978-3-031-43111-1_14,
40 | author="Savinov, Vladislav
41 | and Yakovlev, Konstantin",
42 | editor="Ronzhin, Andrey
43 | and Sadigov, Aminagha
44 | and Meshcheryakov, Roman",
45 | title="DHC-R: Evaluating ``Distributed Heuristic Communication'' and Improving Robustness for Learnable Decentralized PO-MAPF",
46 | booktitle="Interactive Collaborative Robotics",
47 | year="2023",
48 | publisher="Springer Nature Switzerland",
49 | address="Cham",
50 | pages="151--163",
51 | abstract="Multi-agent pathfinding (MAPF) is a problem of coordinating the movements of multiple agents operating a shared environment that has numerous industrial and research applications. In many practical cases the agents (robots) have limited visibility of the environment and must rely on local observations to make decisions. This scenario, known as partially observable MAPF (PO-MAPF), can be solved through decentralized approaches. In recent years, several learnable algorithms have been proposed for solving PO-MAPF. However, their performance is oftentimes not validated out-of-distribution (OOD), and the code is often not properly open-sourced. In this study, we conduct a comprehensive empirical evaluation of one of the state-of-the-art decentralized PO-MAPF algorithms, Distributed Heuristic Communication (DHC), Ma, Z., Luo, Y., Ma, H.: Distributed heuristic multi-agent path finding with communication. In: 2021 International Conference on Robotics and Automation (ICRA), pp. 8699--8705. IEEE, Xi'an, China (2021), which incorporates communication between agents. Our experiments reveal that the performance of DHC deteriorates when agents encounter complete packet loss during communication. To address this issue, we propose a novel algorithm called DHC-R that employs a similar architecture to the original DHC but introduces randomness into the graph neural network-based communication block, preventing the passage of some data packets during training. Empirical evaluation confirms that DHC-R outperforms DHC in scenarios with packet loss. Open-sourced model weights and the codebase are provided: https://github.com/acforvs/dhc-robust-mapf.",
52 | isbn="978-3-031-43111-1"
53 | }
54 | ```
55 |
56 | ## Contributing
57 |
58 | See the detailed contribution guide
59 |
60 | 1. Install [black](https://github.com/psf/black), you can likely run
61 | ```shell
62 | pip3 install black
63 | ```
64 |
65 | 2. Use [black](https://github.com/psf/black) to ensure that the codestyle remains great
66 | ```shell
67 | poetry run black .
68 | ```
69 | 3. Use [ruff](https://github.com/charliermarsh/ruff) to lint all the files
70 | ```shell
71 | poetry run ruff .
72 | ```
73 | 4. Make sure tests are OK
74 | ```shell
75 | poetry run pytest
76 | ```
77 | 5. Create a PR with new features
78 |
79 |
80 | ## References
81 |
82 | [1]
83 | Ma, Ziyuan and Luo, Yudong and Ma, Hang, 2021. Distributed Heuristic Multi-Agent Path Finding with Communication.
84 |
85 | ## License
86 |
87 | [](https://github.com/acforvs/po-mapf-thesis/blob/main/LICENSE)
88 |
89 |
90 |
--------------------------------------------------------------------------------
/Thesis.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/Thesis.pdf
--------------------------------------------------------------------------------
/config.yaml:
--------------------------------------------------------------------------------
1 | dhc:
2 | cnn_channels: 128
3 | fov: !!python/tuple [9, 9]
4 | observation_radius: 4 # since the FOV is 9x9
5 | observation_shape: !!python/tuple [6, 9, 9]
6 | hidden_dim: 256
7 | max_comm_agents: 3 # includes the agent itself
8 | batch_size: 192
9 | max_num_agents: 16
10 | latent_dim: 784 # 16 * 7 * 7, do not forget to change if the observation_shape is changed
11 | max_episode_length: 256
12 |
13 | communication:
14 | disable_communication: 1
15 | comm_enabled_prob: 0.7
16 | num_comm_layers: 2
17 | num_comm_heads: 2
18 |
19 | buffer:
20 | action_dim: 5
21 | forward_steps: 2
22 |
23 | worker:
24 | episode_capacity: 2048
25 | init_env_settings: !!python/tuple [ 1, 10 ]
26 | max_comm_agents: 3
27 | prioritized_replay_alpha: 0.6
28 | prioritized_replay_beta: 0.4
29 | forward_steps: 2
30 | seq_len: 20
31 | max_map_length: 40
32 | pass_rate: 0.9
33 | learning_starts: 100000
34 | training_times: 600000
35 | target_network_update_freq: 2000
36 | save_interval: 2000
37 | actor_update_steps: 400
38 |
39 | train:
40 | num_actors: 16
41 | log_interval: 10
42 |
43 |
44 | environment:
45 | map_length: 50
46 | num_agents: 2
47 | observation_radius: 4
48 | reward_fn:
49 | move: -0.075
50 | stay_on_goal: 0
51 | stay_off_goal: -0.075
52 | collision: -0.5
53 | finish: 3
54 |
55 | init_env_settings: !!python/tuple [1, 10]
56 | observation_shape: !!python/tuple [6, 9, 9]
57 | action_dim: 5
58 |
--------------------------------------------------------------------------------
/models/310000_nocomm.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/models/310000_nocomm.pth
--------------------------------------------------------------------------------
/models/337500.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/models/337500.pth
--------------------------------------------------------------------------------
/pathfinding/environment.py:
--------------------------------------------------------------------------------
1 | from pathfinding.settings import yaml_data as settings
2 | from pathfinding import movingai
3 |
4 | import matplotlib.pyplot as plt
5 | import numpy as np
6 | import random
7 | from typing import List
8 |
9 | plt.ion()
10 |
11 | ENV_CONFIG = settings["environment"]
12 |
13 | action_list = np.array([[0, 0], [-1, 0], [1, 0], [0, -1], [0, 1]], dtype=np.int)
14 |
15 | color_map = np.array(
16 | [
17 | [255, 255, 255], # white
18 | [190, 190, 190], # gray
19 | [0, 191, 255], # blue
20 | [255, 165, 0], # orange
21 | [0, 250, 154],
22 | ]
23 | ) # green
24 |
25 |
26 | def map_partition(map):
27 | """Partitioning map into сomponents."""
28 | empty_list = np.argwhere(map == 0).tolist()
29 | empty_pos = set([tuple(pos) for pos in empty_list])
30 |
31 | if not empty_pos:
32 | raise RuntimeError("There are no empty positions found")
33 |
34 | partition_list = []
35 | while empty_pos:
36 | start_pos = empty_pos.pop()
37 | open_list = [start_pos]
38 | close_list = []
39 |
40 | while open_list:
41 | x, y = open_list.pop(0)
42 | for dx, dy in (
43 | (-1, 0),
44 | (1, 0),
45 | (0, -1),
46 | (0, 1),
47 | ):
48 | pos = x + dx, y + dy
49 | if pos in empty_pos:
50 | empty_pos.remove(pos)
51 | open_list.append(pos)
52 |
53 | close_list.append((x, y))
54 |
55 | if len(close_list) >= 2:
56 | partition_list.append(close_list)
57 |
58 | return partition_list
59 |
60 |
61 | class Environment:
62 | def __init__(
63 | self,
64 | num_agents: int = ENV_CONFIG["init_env_settings"][0],
65 | map_length: int = ENV_CONFIG["init_env_settings"][1],
66 | obs_radius: int = ENV_CONFIG["observation_radius"],
67 | reward_fn: dict = ENV_CONFIG["reward_fn"],
68 | fix_density=None,
69 | curriculum=False,
70 | init_env_settings_set=ENV_CONFIG["init_env_settings"],
71 | should_init: bool = True,
72 | ):
73 | self.curriculum = curriculum
74 | if curriculum:
75 | self.env_set = [init_env_settings_set]
76 | self.num_agents = init_env_settings_set[0]
77 | self.map_size = (init_env_settings_set[1], init_env_settings_set[1])
78 | else:
79 | self.num_agents = num_agents
80 | self.map_size = (map_length, map_length)
81 |
82 | # set as same as in PRIMAL
83 | if fix_density is None:
84 | self.fix_density = False
85 | self.obstacle_density = np.random.triangular(0, 0.33, 0.5)
86 | else:
87 | self.fix_density = True
88 | self.obstacle_density = fix_density
89 |
90 | self.map = np.random.choice(
91 | 2, self.map_size, p=[1 - self.obstacle_density, self.obstacle_density]
92 | ).astype(np.int)
93 |
94 | partition_list = map_partition(self.map)
95 | self._part = partition_list
96 |
97 | while len(partition_list) == 0:
98 | self.map = np.random.choice(
99 | 2, self.map_size, p=[1 - self.obstacle_density, self.obstacle_density]
100 | ).astype(np.int)
101 | partition_list = map_partition(self.map)
102 |
103 | self.agents_pos = np.empty((self.num_agents, 2), dtype=np.int)
104 | self.goals_pos = np.empty((self.num_agents, 2), dtype=np.int)
105 |
106 | pos_num = sum([len(partition) for partition in partition_list])
107 |
108 | # loop to assign agent original position and goal position for each agent
109 | for i in range(self.num_agents):
110 | pos_idx = random.randint(0, pos_num - 1)
111 | partition_idx = 0
112 | for partition in partition_list:
113 | if pos_idx >= len(partition):
114 | pos_idx -= len(partition)
115 | partition_idx += 1
116 | else:
117 | break
118 |
119 | pos = random.choice(partition_list[partition_idx])
120 | partition_list[partition_idx].remove(pos)
121 | self.agents_pos[i] = np.asarray(pos, dtype=np.int)
122 |
123 | pos = random.choice(partition_list[partition_idx])
124 | partition_list[partition_idx].remove(pos)
125 | self.goals_pos[i] = np.asarray(pos, dtype=np.int)
126 |
127 | partition_list = [
128 | partition for partition in partition_list if len(partition) >= 2
129 | ]
130 | pos_num = sum([len(partition) for partition in partition_list])
131 |
132 | self.obs_radius = obs_radius
133 |
134 | self.reward_fn = reward_fn
135 | self.get_heuri_map()
136 | self.steps = 0
137 |
138 | self.last_actions = np.zeros(
139 | (self.num_agents, 5, 2 * obs_radius + 1, 2 * obs_radius + 1), dtype=np.bool
140 | )
141 |
142 | def update_env_settings_set(self, new_env_settings_set):
143 | self.env_set = new_env_settings_set
144 |
145 | def reset(self, num_agents=None, map_length=None):
146 | if self.curriculum:
147 | rand = random.choice(self.env_set)
148 | self.num_agents = rand[0]
149 | self.map_size = (rand[1], rand[1])
150 |
151 | elif num_agents is not None and map_length is not None:
152 | self.num_agents = num_agents
153 | self.map_size = (map_length, map_length)
154 |
155 | if not self.fix_density:
156 | self.obstacle_density = np.random.triangular(0, 0.33, 0.5)
157 |
158 | self.map = np.random.choice(
159 | 2, self.map_size, p=[1 - self.obstacle_density, self.obstacle_density]
160 | ).astype(np.float32)
161 |
162 | partition_list = map_partition(self.map)
163 |
164 | while len(partition_list) == 0:
165 | self.map = np.random.choice(
166 | 2, self.map_size, p=[1 - self.obstacle_density, self.obstacle_density]
167 | ).astype(np.float32)
168 | partition_list = map_partition(self.map)
169 |
170 | self.agents_pos = np.empty((self.num_agents, 2), dtype=np.int)
171 | self.goals_pos = np.empty((self.num_agents, 2), dtype=np.int)
172 |
173 | pos_num = sum([len(partition) for partition in partition_list])
174 |
175 | for i in range(self.num_agents):
176 | pos_idx = random.randint(0, pos_num - 1)
177 | partition_idx = 0
178 | for partition in partition_list:
179 | if pos_idx >= len(partition):
180 | pos_idx -= len(partition)
181 | partition_idx += 1
182 | else:
183 | break
184 |
185 | pos = random.choice(partition_list[partition_idx])
186 | partition_list[partition_idx].remove(pos)
187 | self.agents_pos[i] = np.asarray(pos, dtype=np.int)
188 |
189 | pos = random.choice(partition_list[partition_idx])
190 | partition_list[partition_idx].remove(pos)
191 | self.goals_pos[i] = np.asarray(pos, dtype=np.int)
192 |
193 | partition_list = [
194 | partition for partition in partition_list if len(partition) >= 2
195 | ]
196 | pos_num = sum([len(partition) for partition in partition_list])
197 |
198 | self.steps = 0
199 | self.get_heuri_map()
200 |
201 | self.last_actions = np.zeros(
202 | (self.num_agents, 5, 2 * self.obs_radius + 1, 2 * self.obs_radius + 1),
203 | dtype=np.bool,
204 | )
205 |
206 | return self.observe()
207 |
208 | def load(self, map: np.ndarray, agents_pos: np.ndarray, goals_pos: np.ndarray):
209 | self.map = np.copy(map)
210 | self.agents_pos = np.copy(agents_pos)
211 | self.goals_pos = np.copy(goals_pos)
212 |
213 | self.num_agents = agents_pos.shape[0]
214 | self.map_size = (self.map.shape[0], self.map.shape[1])
215 |
216 | self.steps = 0
217 |
218 | self.imgs = []
219 |
220 | self.get_heuri_map()
221 |
222 | self.last_actions = np.zeros(
223 | (self.num_agents, 5, 2 * self.obs_radius + 1, 2 * self.obs_radius + 1),
224 | dtype=np.bool,
225 | )
226 |
227 | def get_heuri_map(self):
228 | dist_map = (
229 | np.ones((self.num_agents, *self.map_size), dtype=np.int32) * 2147483647
230 | )
231 | for i in range(self.num_agents):
232 | open_list = list()
233 | x, y = tuple(self.goals_pos[i])
234 | open_list.append((x, y))
235 | dist_map[i, x, y] = 0
236 |
237 | while open_list:
238 | x, y = open_list.pop(0)
239 | dist = dist_map[i, x, y]
240 |
241 | up = x - 1, y
242 | if (
243 | up[0] >= 0
244 | and self.map[up] == 0
245 | and dist_map[i, x - 1, y] > dist + 1
246 | ):
247 | dist_map[i, x - 1, y] = dist + 1
248 | if up not in open_list:
249 | open_list.append(up)
250 |
251 | down = x + 1, y
252 | if (
253 | down[0] < self.map_size[0]
254 | and self.map[down] == 0
255 | and dist_map[i, x + 1, y] > dist + 1
256 | ):
257 | dist_map[i, x + 1, y] = dist + 1
258 | if down not in open_list:
259 | open_list.append(down)
260 |
261 | left = x, y - 1
262 | if (
263 | left[1] >= 0
264 | and self.map[left] == 0
265 | and dist_map[i, x, y - 1] > dist + 1
266 | ):
267 | dist_map[i, x, y - 1] = dist + 1
268 | if left not in open_list:
269 | open_list.append(left)
270 |
271 | right = x, y + 1
272 | if (
273 | right[1] < self.map_size[1]
274 | and self.map[right] == 0
275 | and dist_map[i, x, y + 1] > dist + 1
276 | ):
277 | dist_map[i, x, y + 1] = dist + 1
278 | if right not in open_list:
279 | open_list.append(right)
280 |
281 | self.heuri_map = np.zeros((self.num_agents, 4, *self.map_size), dtype=np.bool)
282 |
283 | for x in range(self.map_size[0]):
284 | for y in range(self.map_size[1]):
285 | if self.map[x, y] == 0:
286 | for i in range(self.num_agents):
287 | if x > 0 and dist_map[i, x - 1, y] < dist_map[i, x, y]:
288 | assert dist_map[i, x - 1, y] == dist_map[i, x, y] - 1
289 | self.heuri_map[i, 0, x, y] = 1
290 |
291 | if (
292 | x < self.map_size[0] - 1
293 | and dist_map[i, x + 1, y] < dist_map[i, x, y]
294 | ):
295 | assert dist_map[i, x + 1, y] == dist_map[i, x, y] - 1
296 | self.heuri_map[i, 1, x, y] = 1
297 |
298 | if y > 0 and dist_map[i, x, y - 1] < dist_map[i, x, y]:
299 | assert dist_map[i, x, y - 1] == dist_map[i, x, y] - 1
300 | self.heuri_map[i, 2, x, y] = 1
301 |
302 | if (
303 | y < self.map_size[1] - 1
304 | and dist_map[i, x, y + 1] < dist_map[i, x, y]
305 | ):
306 | assert dist_map[i, x, y + 1] == dist_map[i, x, y] - 1
307 | self.heuri_map[i, 3, x, y] = 1
308 |
309 | self.heuri_map = np.pad(
310 | self.heuri_map,
311 | (
312 | (0, 0),
313 | (0, 0),
314 | (self.obs_radius, self.obs_radius),
315 | (self.obs_radius, self.obs_radius),
316 | ),
317 | )
318 |
319 | def step(self, actions: List[int]):
320 | """
321 | actions:
322 | list of indices
323 | 0 stay
324 | 1 up
325 | 2 down
326 | 3 left
327 | 4 right
328 | """
329 |
330 | assert (
331 | len(actions) == self.num_agents
332 | ), "only {} actions as input while {} agents in environment".format(
333 | len(actions), self.num_agents
334 | )
335 | assert all(
336 | [action_idx < 5 and action_idx >= 0 for action_idx in actions]
337 | ), "action index out of range"
338 |
339 | checking_list = [i for i in range(self.num_agents)]
340 |
341 | rewards = []
342 | next_pos = np.copy(self.agents_pos)
343 |
344 | # remove unmoving agent id
345 | for agent_id in checking_list.copy():
346 | if actions[agent_id] == 0:
347 | # unmoving
348 |
349 | if np.array_equal(self.agents_pos[agent_id], self.goals_pos[agent_id]):
350 | rewards.append(self.reward_fn["stay_on_goal"])
351 | else:
352 | rewards.append(self.reward_fn["stay_off_goal"])
353 |
354 | checking_list.remove(agent_id)
355 | else:
356 | # move
357 | next_pos[agent_id] += action_list[actions[agent_id]]
358 | rewards.append(self.reward_fn["move"])
359 |
360 | # first round check, these two conflicts have the heightest priority
361 | for agent_id in checking_list.copy():
362 | if np.any(next_pos[agent_id] < 0) or np.any(
363 | next_pos[agent_id] >= self.map_size[0]
364 | ):
365 | # agent out of map range
366 | rewards[agent_id] = self.reward_fn["collision"]
367 | next_pos[agent_id] = self.agents_pos[agent_id]
368 | checking_list.remove(agent_id)
369 |
370 | elif self.map[tuple(next_pos[agent_id])] == 1:
371 | # collide obstacle
372 | rewards[agent_id] = self.reward_fn["collision"]
373 | next_pos[agent_id] = self.agents_pos[agent_id]
374 | checking_list.remove(agent_id)
375 |
376 | # second round check, agent swapping conflict
377 | no_conflict = False
378 | while not no_conflict:
379 | no_conflict = True
380 | for agent_id in checking_list:
381 | target_agent_id = np.where(
382 | np.all(next_pos[agent_id] == self.agents_pos, axis=1)
383 | )[0]
384 |
385 | if target_agent_id:
386 | target_agent_id = target_agent_id.item()
387 | assert target_agent_id != agent_id, "logic bug"
388 |
389 | if np.array_equal(
390 | next_pos[target_agent_id], self.agents_pos[agent_id]
391 | ):
392 | assert (
393 | target_agent_id in checking_list
394 | ), "target_agent_id should be in checking list"
395 |
396 | next_pos[agent_id] = self.agents_pos[agent_id]
397 | rewards[agent_id] = self.reward_fn["collision"]
398 |
399 | next_pos[target_agent_id] = self.agents_pos[target_agent_id]
400 | rewards[target_agent_id] = self.reward_fn["collision"]
401 |
402 | checking_list.remove(agent_id)
403 | checking_list.remove(target_agent_id)
404 |
405 | no_conflict = False
406 | break
407 |
408 | # third round check, agent collision conflict
409 | no_conflict = False
410 | while not no_conflict:
411 | no_conflict = True
412 | for agent_id in checking_list:
413 | collide_agent_id = np.where(
414 | np.all(next_pos == next_pos[agent_id], axis=1)
415 | )[0].tolist()
416 | if len(collide_agent_id) > 1:
417 | # collide agent
418 |
419 | # if all agents in collide agent are in checking list
420 | all_in_checking = True
421 | for id in collide_agent_id.copy():
422 | if id not in checking_list:
423 | all_in_checking = False
424 | collide_agent_id.remove(id)
425 |
426 | if all_in_checking:
427 | collide_agent_pos = next_pos[collide_agent_id].tolist()
428 | for pos, id in zip(collide_agent_pos, collide_agent_id):
429 | pos.append(id)
430 | collide_agent_pos.sort(
431 | key=lambda x: x[0] * self.map_size[0] + x[1]
432 | )
433 |
434 | collide_agent_id.remove(collide_agent_pos[0][2])
435 |
436 | # checking_list.remove(collide_agent_pos[0][2])
437 |
438 | next_pos[collide_agent_id] = self.agents_pos[collide_agent_id]
439 | for id in collide_agent_id:
440 | rewards[id] = self.reward_fn["collision"]
441 |
442 | for id in collide_agent_id:
443 | checking_list.remove(id)
444 |
445 | no_conflict = False
446 | break
447 |
448 | # self.history.append(np.copy(next_pos))
449 | self.agents_pos = np.copy(next_pos)
450 |
451 | self.steps += 1
452 |
453 | # check done
454 | if np.array_equal(self.agents_pos, self.goals_pos):
455 | done = True
456 | rewards = [self.reward_fn["finish"] for _ in range(self.num_agents)]
457 | else:
458 | done = False
459 |
460 | info = {"step": self.steps - 1}
461 |
462 | # make sure no overlapping agents
463 | if np.unique(self.agents_pos, axis=0).shape[0] < self.num_agents:
464 | print(self.steps)
465 | print(self.map)
466 | print(self.agents_pos)
467 | raise RuntimeError("unique")
468 |
469 | # update last actions
470 | self.last_actions = np.zeros(
471 | (self.num_agents, 5, 2 * self.obs_radius + 1, 2 * self.obs_radius + 1),
472 | dtype=np.bool,
473 | )
474 | self.last_actions[np.arange(self.num_agents), np.array(actions)] = 1
475 |
476 | return self.observe(), rewards, done, info
477 |
478 | def observe(self):
479 | """
480 | return observation and position for each agent
481 |
482 | obs: shape (num_agents, 11, 2*obs_radius+1, 2*obs_radius+1)
483 | layer 1: agent map
484 | layer 2: obstacle map
485 | layer 3-6: heuristic map
486 | layer 7-11: one-hot representation of agent's last action
487 |
488 | pos: used for caculating communication mask
489 |
490 | """
491 | obs = np.zeros(
492 | (self.num_agents, 6, 2 * self.obs_radius + 1, 2 * self.obs_radius + 1),
493 | dtype=np.bool,
494 | )
495 |
496 | # 0 represents obstacle to match 0 padding in CNN
497 | obstacle_map = np.pad(self.map, self.obs_radius, "constant", constant_values=0)
498 |
499 | agent_map = np.zeros((self.map_size), dtype=np.bool)
500 | agent_map[self.agents_pos[:, 0], self.agents_pos[:, 1]] = 1
501 | agent_map = np.pad(agent_map, self.obs_radius, "constant", constant_values=0)
502 |
503 | for i, agent_pos in enumerate(self.agents_pos):
504 | x, y = agent_pos
505 |
506 | obs[i, 0] = agent_map[
507 | x : x + 2 * self.obs_radius + 1, y : y + 2 * self.obs_radius + 1
508 | ]
509 | obs[i, 0, self.obs_radius, self.obs_radius] = 0
510 | obs[i, 1] = obstacle_map[
511 | x : x + 2 * self.obs_radius + 1, y : y + 2 * self.obs_radius + 1
512 | ]
513 | obs[i, 2:] = self.heuri_map[
514 | i, :, x : x + 2 * self.obs_radius + 1, y : y + 2 * self.obs_radius + 1
515 | ]
516 |
517 | # obs = np.concatenate((obs, self.last_actions), axis=1)
518 |
519 | return obs, np.copy(self.agents_pos)
520 |
521 | def render(self):
522 | if not hasattr(self, "fig"):
523 | self.fig = plt.figure()
524 |
525 | map = np.copy(self.map)
526 | for agent_id in range(self.num_agents):
527 | if np.array_equal(self.agents_pos[agent_id], self.goals_pos[agent_id]):
528 | map[tuple(self.agents_pos[agent_id])] = 4
529 | else:
530 | map[tuple(self.agents_pos[agent_id])] = 2
531 | map[tuple(self.goals_pos[agent_id])] = 3
532 |
533 | map = map.astype(np.uint8)
534 | # plt.xlabel('step: {}'.format(self.steps))
535 |
536 | # add text in plot
537 | self.imgs.append([])
538 | if hasattr(self, "texts"):
539 | for i, ((agent_x, agent_y), (goal_x, goal_y)) in enumerate(
540 | zip(self.agents_pos, self.goals_pos)
541 | ):
542 | self.texts[i].set_position((agent_y, agent_x))
543 | self.texts[i].set_text(i)
544 | else:
545 | self.texts = []
546 | for i, ((agent_x, agent_y), (goal_x, goal_y)) in enumerate(
547 | zip(self.agents_pos, self.goals_pos)
548 | ):
549 | text = plt.text(
550 | agent_y, agent_x, i, color="black", ha="center", va="center"
551 | )
552 | plt.text(goal_y, goal_x, i, color="black", ha="center", va="center")
553 | self.texts.append(text)
554 |
555 | plt.imshow(color_map[map], animated=True)
556 |
557 | plt.show()
558 | # plt.ion()
559 | plt.pause(0.5)
560 |
561 | def close(self, save=False):
562 | plt.close()
563 | del self.fig
564 |
565 |
566 | class MovingAIBenchmarkingEnvironment(Environment):
567 | def __init__(
568 | self,
569 | num_agents: int = 5,
570 | map_filename: str = None,
571 | test_descriptions: list[movingai.TestDescription] = None,
572 | obs_radius: int = ENV_CONFIG["observation_radius"],
573 | reward_fn: dict = ENV_CONFIG["reward_fn"],
574 | should_init: bool = True,
575 | ):
576 | if test_descriptions is not None and len(test_descriptions) != num_agents:
577 | raise ValueError("Number of tests must be equal to num_agents")
578 |
579 | self.num_agents = num_agents
580 | self.obs_radius = obs_radius
581 | self.reward_fn = reward_fn
582 | self.steps = 0
583 |
584 | if should_init:
585 | self.map = movingai.read_map(map_filename)
586 | self.map_size = (len(self.map), len(self.map))
587 | self.agents_pos = np.array(
588 | [[descr.x0, descr.y0] for descr in test_descriptions]
589 | )
590 | self.goals_pos = np.array(
591 | [[descr.x1, descr.y1] for descr in test_descriptions]
592 | )
593 | self.get_heuri_map()
594 | self.last_actions = np.zeros(
595 | (self.num_agents, 5, 2 * obs_radius + 1, 2 * obs_radius + 1),
596 | dtype=np.bool,
597 | )
598 |
--------------------------------------------------------------------------------
/pathfinding/main.py:
--------------------------------------------------------------------------------
1 | from pathfinding.models.dhc import DHCNetwork
2 |
3 | if __name__ == "__main__":
4 | DHCNetwork()
5 |
--------------------------------------------------------------------------------
/pathfinding/models/dhc/__init__.py:
--------------------------------------------------------------------------------
1 | from pathfinding.models.dhc.model import Network as DHCNetwork # noqa
2 | from pathfinding.models.dhc.buffer import LocalBuffer # noqa
3 | from pathfinding.models.dhc.worker import GlobalBuffer, Learner, Actor # noqa
4 |
--------------------------------------------------------------------------------
/pathfinding/models/dhc/buffer.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from pathfinding.settings import yaml_data as settings
4 |
5 | BUF_CONFIG = settings["dhc"]["buffer"]
6 | GENERAL_CONFIG = settings["dhc"]
7 |
8 |
9 | class SumTree:
10 | """used for prioritized experience replay"""
11 |
12 | def __init__(self, capacity: int):
13 | layer = 1
14 | while 2 ** (layer - 1) < capacity:
15 | layer += 1
16 | assert 2 ** (layer - 1) == capacity, "capacity only allow n**2 size"
17 | self.layer = layer
18 | self.tree = np.zeros(2**layer - 1, dtype=np.float64)
19 | self.capacity = capacity
20 | self.size = 0
21 |
22 | def sum(self):
23 | assert (
24 | np.sum(self.tree[-self.capacity :]) - self.tree[0] < 0.1
25 | ), f"sum is {np.sum(self.tree[-self.capacity :])} but root is {self.tree[0]}"
26 | return self.tree[0]
27 |
28 | def __getitem__(self, idx: int):
29 | assert 0 <= idx < self.capacity
30 |
31 | return self.tree[self.capacity - 1 + idx]
32 |
33 | def batch_sample(self, batch_size: int):
34 | p_sum = self.tree[0]
35 | interval = p_sum / batch_size
36 |
37 | prefixsums = np.arange(
38 | 0, p_sum, interval, dtype=np.float64
39 | ) + np.random.uniform(0, interval, batch_size)
40 |
41 | idxes = np.zeros(batch_size, dtype=np.int)
42 | for _ in range(self.layer - 1):
43 | nodes = self.tree[idxes * 2 + 1]
44 | idxes = np.where(prefixsums < nodes, idxes * 2 + 1, idxes * 2 + 2)
45 | prefixsums = np.where(
46 | idxes % 2 == 0, prefixsums - self.tree[idxes - 1], prefixsums
47 | )
48 |
49 | priorities = self.tree[idxes]
50 | idxes -= self.capacity - 1
51 |
52 | assert np.all(priorities > 0), f"idx: {idxes}, priority: {priorities}"
53 | assert np.all(idxes >= 0) and np.all(idxes < self.capacity)
54 |
55 | return idxes, priorities
56 |
57 | def batch_update(self, idxes: np.ndarray, priorities: np.ndarray):
58 | idxes += self.capacity - 1
59 | self.tree[idxes] = priorities
60 |
61 | for _ in range(self.layer - 1):
62 | idxes = (idxes - 1) // 2
63 | idxes = np.unique(idxes)
64 | self.tree[idxes] = self.tree[2 * idxes + 1] + self.tree[2 * idxes + 2]
65 |
66 | # check
67 | assert (
68 | np.sum(self.tree[-self.capacity :]) - self.tree[0] < 0.1
69 | ), f"sum is {np.sum(self.tree[-self.capacity :])} but root is {self.tree[0]}"
70 |
71 |
72 | class LocalBuffer:
73 | __slots__ = (
74 | "actor_id",
75 | "map_len",
76 | "num_agents",
77 | "obs_buf",
78 | "act_buf",
79 | "rew_buf",
80 | "hid_buf",
81 | "comm_mask_buf",
82 | "q_buf",
83 | "capacity",
84 | "size",
85 | "done",
86 | )
87 |
88 | def __init__(
89 | self,
90 | actor_id: int,
91 | num_agents: int,
92 | map_len: int,
93 | init_obs: np.ndarray,
94 | capacity: int = GENERAL_CONFIG["max_episode_length"],
95 | obs_shape=GENERAL_CONFIG["observation_shape"],
96 | hidden_dim=GENERAL_CONFIG["hidden_dim"],
97 | action_dim=BUF_CONFIG["action_dim"],
98 | ):
99 | """
100 | buffer for each episode
101 | """
102 | self.actor_id = actor_id
103 | self.num_agents = num_agents
104 | self.map_len = map_len
105 |
106 | self.obs_buf = np.zeros((capacity + 1, num_agents, *obs_shape), dtype=np.bool)
107 | self.act_buf = np.zeros((capacity), dtype=np.uint8)
108 | self.rew_buf = np.zeros((capacity), dtype=np.float16)
109 | self.hid_buf = np.zeros((capacity, num_agents, hidden_dim), dtype=np.float16)
110 | self.comm_mask_buf = np.zeros(
111 | (capacity + 1, num_agents, num_agents), dtype=np.bool
112 | )
113 | self.q_buf = np.zeros((capacity + 1, action_dim), dtype=np.float32)
114 |
115 | self.capacity = capacity
116 | self.size = 0
117 |
118 | self.obs_buf[0] = init_obs
119 |
120 | def __len__(self):
121 | return self.size
122 |
123 | def add(
124 | self,
125 | q_val: np.ndarray,
126 | action: int,
127 | reward: float,
128 | next_obs: np.ndarray,
129 | hidden: np.ndarray,
130 | comm_mask: np.ndarray,
131 | ):
132 | assert self.size < self.capacity
133 |
134 | self.act_buf[self.size] = action
135 | self.rew_buf[self.size] = reward
136 | self.obs_buf[self.size + 1] = next_obs
137 | self.q_buf[self.size] = q_val
138 | self.hid_buf[self.size] = hidden
139 | self.comm_mask_buf[self.size] = comm_mask
140 |
141 | self.size += 1
142 |
143 | def finish(self, last_q_val=None, last_comm_mask=None):
144 | # last q value is None if done
145 | if last_q_val is None:
146 | done = True
147 | else:
148 | done = False
149 | self.q_buf[self.size] = last_q_val
150 | self.comm_mask_buf[self.size] = last_comm_mask
151 |
152 | self.obs_buf = self.obs_buf[: self.size + 1]
153 | self.act_buf = self.act_buf[: self.size]
154 | self.rew_buf = self.rew_buf[: self.size]
155 | self.hid_buf = self.hid_buf[: self.size]
156 | self.q_buf = self.q_buf[: self.size + 1]
157 | self.comm_mask_buf = self.comm_mask_buf[: self.size + 1]
158 |
159 | # caculate td errors for prioritized experience replay
160 | td_errors = np.zeros(self.capacity, dtype=np.float32)
161 |
162 | fwd_steps = BUF_CONFIG["forward_steps"]
163 |
164 | q_max_idx = np.array([min(i + fwd_steps, self.size) for i in range(self.size)])
165 | gamma = np.array(
166 | [0.99 ** min(fwd_steps, self.size - i) for i in range(self.size)]
167 | )
168 | q_max = np.max(self.q_buf[q_max_idx], axis=1) * gamma
169 |
170 | ret = self.rew_buf.tolist() + [0 for _ in range(fwd_steps - 1)]
171 | reward = (
172 | np.convolve(
173 | ret, [0.99 ** (fwd_steps - 1 - i) for i in range(fwd_steps)], "valid"
174 | )
175 | + q_max
176 | )
177 | q_val = self.q_buf[np.arange(self.size), self.act_buf]
178 | td_errors[: self.size] = np.abs(reward - q_val).clip(1e-4)
179 |
180 | return (
181 | self.actor_id,
182 | self.num_agents,
183 | self.map_len,
184 | self.obs_buf,
185 | self.act_buf,
186 | self.rew_buf,
187 | self.hid_buf,
188 | td_errors,
189 | done,
190 | self.size,
191 | self.comm_mask_buf,
192 | )
193 |
--------------------------------------------------------------------------------
/pathfinding/models/dhc/evaluate.py:
--------------------------------------------------------------------------------
1 | import fire
2 | from collections import defaultdict
3 | import numpy as np
4 | import os
5 | import torch
6 |
7 | from pathfinding.environment import Environment, MovingAIBenchmarkingEnvironment
8 | from pathfinding.models.dhc import DHCNetwork
9 | from pathfinding.settings import yaml_data as settings
10 | from pathfinding.utils import test_group, calculate_metrics
11 |
12 | GENERAL_CONFIG = settings["dhc"]
13 |
14 |
15 | def _test_one_case(args):
16 | map, agents_pos, goals_pos, network, env_cls = args
17 | env = env_cls(should_init=False)
18 | env.load(map, agents_pos, goals_pos)
19 | obs, pos = env.observe()
20 |
21 | done, steps = False, 0
22 | network.reset()
23 |
24 | while not done and env.steps < GENERAL_CONFIG["max_episode_length"]:
25 | actions, _, _, _ = network.step(
26 | torch.as_tensor(obs.astype(np.float32)),
27 | torch.as_tensor(pos.astype(np.float32)),
28 | )
29 | (obs, pos), _, done, _ = env.step(actions)
30 | steps += 1
31 |
32 | return calculate_metrics(env, steps)
33 |
34 |
35 | def _test_generation_fn_random(tests, network):
36 | return [(*test, network, Environment) for test in tests]
37 |
38 |
39 | def _test_generation_fn_moving_ai(tests, network):
40 | return [(*test, network, MovingAIBenchmarkingEnvironment) for test in tests]
41 |
42 |
43 | def test_model(
44 | test_groups=[
45 | (40, 4, 0.3),
46 | (40, 8, 0.3),
47 | (40, 16, 0.3),
48 | (40, 32, 0.3),
49 | (40, 64, 0.3),
50 | (80, 4, 0.3),
51 | (80, 8, 0.3),
52 | (80, 16, 0.3),
53 | (80, 32, 0.3),
54 | (80, 64, 0.3),
55 | ],
56 | model_number="60000",
57 | is_random_maps: bool = True,
58 | ):
59 | network = DHCNetwork()
60 | network.eval()
61 | device = torch.device("cpu")
62 | network.to(device)
63 | state_dict = torch.load(
64 | os.path.join(".", "models", f"{model_number}.pth"), map_location=device
65 | )
66 | network.load_state_dict(state_dict)
67 | network.eval()
68 | network.share_memory()
69 |
70 | if is_random_maps:
71 |
72 | def func(x):
73 | return _test_generation_fn_random(x, network)
74 |
75 | else:
76 |
77 | def func(x):
78 | return _test_generation_fn_moving_ai(x, network)
79 |
80 | for group in test_groups:
81 | yield test_group(group, func, _test_one_case, is_random=is_random_maps), group
82 |
83 |
84 | def latex_communication_table(
85 | model_communication_id: str = "337500",
86 | model_nocommunication_id: str = "310000_nocomm",
87 | density=0.1,
88 | agents=(4, 8, 16, 32),
89 | maps=(10, 20, 30, 40),
90 | ):
91 | test_groups = []
92 | for num_agents in agents:
93 | for map_size in maps:
94 | if density == 0.3 and map_size == 10 and num_agents == 32:
95 | continue
96 | test_groups.append((map_size, num_agents, density))
97 | header = f"""\\newpage
98 | \\begin{{longtable}}[htb!]{{cc|ccc}}
99 | \caption{{TODO CAPTION}} \label{{table:TODO-TABLE-LABEL}}\\\\
100 |
101 | \\toprule
102 | \multicolumn{{2}}{{c}}{{Map configuration}} & \multirow{{2}}{{*}}{{Metrics}} & \multicolumn{{2}}{{c}}{{Density {density}}} \\\\
103 | \# Agents & Size & & DHC (ours) & DHC (original training) \\\\
104 | \midrule
105 | \endfirsthead
106 |
107 | \multicolumn{{5}}{{c}}%
108 | {{{{Table \\thetable\ continued from previous page}}}} \\\\
109 | \\toprule
110 | \multicolumn{{2}}{{c}}{{Map configuration}} & \multirow{{2}}{{*}}{{Metrics}} & \multicolumn{{2}}{{c}}{{Density {density}}} \\\\
111 | \# Agents & Size & & DHC (ours) & DHC (original training) \\\\
112 | \midrule
113 | \endhead
114 |
115 | \midrule
116 | \multicolumn{{5}}{{r}}{{Continued on next page}} \\
117 | \endfoot
118 |
119 | \\bottomrule
120 | \endlastfoot"""
121 |
122 | comm = defaultdict(lambda: defaultdict(list))
123 | nocomm = defaultdict(lambda: defaultdict(list))
124 | for res, (size, num_agents, density) in test_model(
125 | test_groups, model_nocommunication_id
126 | ):
127 | nocomm[num_agents][size] = res
128 | for res, (size, num_agents, density) in test_model(
129 | test_groups, model_communication_id
130 | ):
131 | comm[num_agents][size] = res
132 |
133 | num_maps = len(maps)
134 | table = []
135 | metrics = ["CSR, \%", "ISR, \%", "Makespan"]
136 | for i, num_agents in enumerate(agents):
137 | sector = f"\multirow{{{num_maps * 3}}}{{*}}{{{num_agents}}} "
138 | for map_id, map_size in enumerate(maps):
139 | row = ["", f" \multirow{{3}}{{*}}{{${map_size} \\times {map_size}$}} "]
140 | nocomm_list = nocomm[num_agents][map_size]
141 | comm_list = comm[num_agents][map_size]
142 | for metric_id, ((a_mean, a_std), (b_mean, b_std)) in enumerate(
143 | zip(nocomm_list, comm_list)
144 | ):
145 | if metric_id != 0:
146 | row.append("")
147 | row.append(metrics[metric_id])
148 | if (metric_id != 2 and a_mean >= b_mean) or (
149 | metric_id == 2 and a_mean <= b_mean
150 | ): # makespan: the less, the better
151 | if metric_id != 0: # std for CSR doesn't make sense
152 | row.append(
153 | f"\\textbf{{{a_mean:.2f}}} $\pm$ \\textbf{{{a_std:.2f}}}"
154 | )
155 | row.append(f"${b_mean:.2f} \pm {b_std:.2f}$ \\\\ \n")
156 | else:
157 | row.append(f"\\textbf{{{a_mean:.2f}}}")
158 | row.append(f"{b_mean:.2f} \\\\ \n")
159 | else:
160 | if metric_id != 0: # std for CSR doesn't make sense
161 | row.append(f"${a_mean:.2f} \pm {a_std:.2f}$")
162 | row.append(
163 | f"\\textbf{{{b_mean:.2f}}} $\pm$ \\textbf{{{b_std:.2f}}} \\\\ \n"
164 | )
165 | else:
166 | row.append(f"{a_mean:.2f}")
167 | row.append(f"\\textbf{{{b_mean:.2f}}} \\\\ \n")
168 | sector += " & ".join(row)
169 | if map_id != num_maps - 1:
170 | sector += "\cline{2 - 5} \n"
171 | else:
172 | sector += "\n"
173 | if i == len(agents) - 1:
174 | sector += "\\bottomrule\n"
175 | else:
176 | sector += "\midrule\n"
177 | table.append(sector)
178 | footer = "\\end{longtable}\n"
179 | print(header + "\n".join(table) + footer)
180 |
181 |
182 | if __name__ == "__main__":
183 | fire.Fire()
184 |
--------------------------------------------------------------------------------
/pathfinding/models/dhc/model.py:
--------------------------------------------------------------------------------
1 | # credits to https://github.com/ZiyuanMa/DHC/blob/master/model.py
2 | import random
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 |
7 | if torch.cuda.is_available():
8 | from torch.cuda.amp import autocast
9 | else:
10 | from torch.cpu.amp import autocast
11 |
12 | from pathfinding.settings import yaml_data as settings
13 |
14 | DHC_CONFIG = settings["dhc"]
15 |
16 |
17 | class ResBlock(nn.Module):
18 | def __init__(self, channels):
19 | super().__init__()
20 | self.block1 = nn.Conv2d(channels, channels, 3, 1, 1)
21 | self.block2 = nn.Conv2d(channels, channels, 3, 1, 1)
22 |
23 | def forward(self, x):
24 | identity = x
25 |
26 | x = F.relu(self.block1(x))
27 | x = self.block2(x)
28 |
29 | return F.relu(x + identity)
30 |
31 |
32 | class MultiHeadAttention(nn.Module):
33 | def __init__(self, input_dim, output_dim, num_heads):
34 | super().__init__()
35 | self.num_heads = num_heads
36 | self.input_dim = input_dim
37 | self.output_dim = output_dim
38 | self.W_Q = nn.Linear(input_dim, output_dim * num_heads)
39 | self.W_K = nn.Linear(input_dim, output_dim * num_heads)
40 | self.W_V = nn.Linear(input_dim, output_dim * num_heads)
41 | self.W_O = nn.Linear(output_dim * num_heads, output_dim, bias=False)
42 |
43 | def forward(self, input, attn_mask):
44 | # input: [batch_size x num_agents x input_dim]
45 | batch_size, num_agents, input_dim = input.size()
46 | assert input_dim == self.input_dim
47 |
48 | # (B, S, D) -proj-> (B, S, D) -split-> (B, S, H, W) -trans-> (B, H, S, W)
49 | q_s = (
50 | self.W_Q(input)
51 | .view(batch_size, num_agents, self.num_heads, -1)
52 | .transpose(1, 2)
53 | ) # q_s: [batch_size x num_heads x num_agents x output_dim]
54 | k_s = (
55 | self.W_K(input)
56 | .view(batch_size, num_agents, self.num_heads, -1)
57 | .transpose(1, 2)
58 | ) # k_s: [batch_size x num_heads x num_agents x output_dim]
59 | v_s = (
60 | self.W_V(input)
61 | .view(batch_size, num_agents, self.num_heads, -1)
62 | .transpose(1, 2)
63 | ) # v_s: [batch_size x num_heads x num_agents x output_dim]
64 |
65 | if attn_mask.dim() == 2:
66 | attn_mask = attn_mask.unsqueeze(0)
67 | assert (
68 | attn_mask.size(0) == batch_size
69 | ), f"mask dim {attn_mask.size(0)} while batch size {batch_size}"
70 |
71 | attn_mask = attn_mask.unsqueeze(1).repeat_interleave(
72 | self.num_heads, 1
73 | ) # attn_mask : [batch_size x num_heads x num_agents x num_agents]
74 | assert attn_mask.size() == (batch_size, self.num_heads, num_agents, num_agents)
75 |
76 | # context: [batch_size x num_heads x num_agents x output_dim]
77 | with autocast(enabled=False):
78 | scores = torch.matmul(q_s.float(), k_s.float().transpose(-1, -2)) / (
79 | self.output_dim**0.5
80 | ) # scores : [batch_size x n_heads x num_agents x num_agents]
81 | scores.masked_fill_(
82 | attn_mask, -1e9
83 | ) # Fills elements of self tensor with value where mask is one.
84 | attn = F.softmax(scores, dim=-1)
85 |
86 | context = torch.matmul(attn, v_s)
87 | context = (
88 | context.transpose(1, 2)
89 | .contiguous()
90 | .view(batch_size, num_agents, self.num_heads * self.output_dim)
91 | ) # context: [batch_size x len_q x n_heads * d_v]
92 | output = self.W_O(context)
93 |
94 | return output # output: [batch_size x num_agents x output_dim]
95 |
96 |
97 | class CommBlock(nn.Module):
98 | def __init__(
99 | self,
100 | input_dim,
101 | output_dim=64,
102 | num_heads=DHC_CONFIG["communication"]["num_comm_heads"],
103 | num_layers=DHC_CONFIG["communication"]["num_comm_layers"],
104 | ):
105 | super().__init__()
106 | self.input_dim = input_dim
107 | self.output_dim = output_dim
108 | self.num_layers = num_layers
109 | self.self_attn = MultiHeadAttention(input_dim, output_dim, num_heads)
110 |
111 | self.update_cell = nn.GRUCell(output_dim, input_dim)
112 |
113 | def forward(self, latent, comm_mask):
114 | """
115 | latent shape: batch_size x num_agents x latent_dim
116 |
117 | """
118 | num_agents = latent.size(1)
119 |
120 | # agent indices of agent that use communication
121 | update_mask = comm_mask.sum(dim=-1) > 1
122 | comm_idx = update_mask.nonzero(as_tuple=True)
123 |
124 | # no agent use communication, return
125 | if len(comm_idx[0]) == 0:
126 | return latent
127 |
128 | if len(comm_idx) > 1:
129 | update_mask = update_mask.unsqueeze(2)
130 |
131 | attn_mask = comm_mask == False # noqa
132 |
133 | for _ in range(self.num_layers):
134 | info = self.self_attn(latent, attn_mask=attn_mask)
135 | if len(comm_idx) == 1:
136 | batch_idx = torch.zeros(len(comm_idx[0]), dtype=torch.long)
137 | latent[batch_idx, comm_idx[0]] = self.update_cell(
138 | info[batch_idx, comm_idx[0]], latent[batch_idx, comm_idx[0]]
139 | )
140 | else:
141 | update_info = self.update_cell(
142 | info.view(-1, self.output_dim), latent.view(-1, self.input_dim)
143 | ).view(DHC_CONFIG["batch_size"], num_agents, self.input_dim)
144 | latent = torch.where(update_mask, update_info, latent)
145 |
146 | return latent
147 |
148 |
149 | class Network(nn.Module):
150 | def __init__(
151 | self,
152 | input_shape=DHC_CONFIG["observation_shape"],
153 | cnn_channels=DHC_CONFIG["cnn_channels"],
154 | hidden_dim=DHC_CONFIG["hidden_dim"],
155 | max_comm_agents=DHC_CONFIG["max_comm_agents"],
156 | latent_dim=DHC_CONFIG["latent_dim"],
157 | ):
158 | super().__init__()
159 |
160 | self._batch_size = DHC_CONFIG["batch_size"]
161 | self.input_shape = input_shape
162 | self.hidden_dim = hidden_dim
163 | self.latent_dim = latent_dim
164 | self.max_comm_agents = max_comm_agents
165 |
166 | self.obs_encoder = nn.Sequential(
167 | nn.Conv2d(self.input_shape[0], cnn_channels, 3, 1),
168 | nn.ReLU(inplace=True),
169 | ResBlock(cnn_channels),
170 | ResBlock(cnn_channels),
171 | ResBlock(cnn_channels),
172 | nn.Conv2d(cnn_channels, 16, 1, 1), # differs from the paper
173 | # see https://arxiv.org/pdf/2106.11365.pdf, Figure 1
174 | nn.ReLU(inplace=True),
175 | nn.Flatten(),
176 | )
177 |
178 | self.recurrent = nn.GRUCell(self.latent_dim, self.hidden_dim)
179 |
180 | self.comm = CommBlock(hidden_dim)
181 |
182 | # dueling q structure
183 | self.adv = nn.Linear(hidden_dim, 5)
184 | self.state = nn.Linear(hidden_dim, 1)
185 | self.hidden = None
186 |
187 | self._xavier_init()
188 |
189 | def _xavier_init(self):
190 | for _, m in self.named_modules():
191 | if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
192 | nn.init.xavier_uniform_(m.weight)
193 | if m.bias is not None:
194 | nn.init.constant_(m.bias, 0)
195 |
196 | @torch.no_grad()
197 | def step(self, obs, pos):
198 | num_agents = obs.size(0)
199 |
200 | latent = self.obs_encoder(obs)
201 |
202 | if self.hidden is None:
203 | self.hidden = self.recurrent(latent)
204 | else:
205 | self.hidden = self.recurrent(latent, self.hidden) # e''_i^{t - 1}
206 |
207 | # from num_agents x hidden_dim to 1 x num_agents x hidden_dim
208 | self.hidden = self.hidden.unsqueeze(0)
209 |
210 | # masks for communication block
211 | agents_pos = pos
212 | pos_mat = (agents_pos.unsqueeze(1) - agents_pos.unsqueeze(0)).abs()
213 | dist_mat = (pos_mat[:, :, 0] ** 2 + pos_mat[:, :, 1] ** 2).sqrt()
214 | # mask out agents that out of range of FOV
215 | in_obs_mask = (pos_mat <= DHC_CONFIG["observation_radius"]).all(2)
216 | # mask out agents that are far away
217 | _, ranking = dist_mat.topk(
218 | min(self.max_comm_agents, num_agents), dim=1, largest=False
219 | )
220 | dist_mask = torch.zeros((num_agents, num_agents), dtype=torch.bool)
221 | dist_mask.scatter_(1, ranking, True)
222 |
223 | comm_mask = torch.bitwise_and(in_obs_mask, dist_mask)
224 |
225 | # print(f'hidden: {self.hidden.shape}')
226 | # [1, 15, 256]
227 |
228 | self.hidden = self.comm(self.hidden, comm_mask) # [1, 15, 256]
229 |
230 | # print(f'hidden after comm: {self.hidden.shape}')
231 | self.hidden = self.hidden.squeeze(0) # [15, 256]
232 | # print(f'hidden after squeeze: {self.hidden.shape}')
233 |
234 | adv_val = self.adv(self.hidden) # [15, 5]
235 | # print(f'adv_val: {adv_val.shape}')
236 | state_val = self.state(self.hidden) # [15, 1
237 | # print(f'state_val: {state_val.shape}')
238 |
239 | q_val = state_val + adv_val - adv_val.mean(1, keepdim=True)
240 |
241 | actions = torch.argmax(q_val, 1).tolist()
242 |
243 | return actions, q_val.numpy(), self.hidden.numpy(), comm_mask.numpy()
244 |
245 | def reset(self):
246 | self.hidden = None
247 |
248 | @autocast()
249 | def forward(self, obs, steps, hidden, comm_mask):
250 | # comm_mask shape: batch_size x seq_len x max_num_agents x max_num_agents
251 | max_steps = obs.size(1)
252 | num_agents = comm_mask.size(2)
253 |
254 | assert comm_mask.size(2) == DHC_CONFIG["max_num_agents"]
255 |
256 | obs = obs.transpose(1, 2)
257 |
258 | obs = obs.contiguous().view(-1, *self.input_shape)
259 |
260 | latent = self.obs_encoder(obs)
261 |
262 | latent = latent.view(
263 | self._batch_size * num_agents, max_steps, self.latent_dim
264 | ).transpose(0, 1)
265 |
266 | hidden_buffer = []
267 | for i in range(max_steps):
268 | # hidden size: batch_size*num_agents x self.hidden_dim
269 | hidden = self.recurrent(latent[i], hidden)
270 | hidden = hidden.view(self._batch_size, num_agents, self.hidden_dim)
271 |
272 | if DHC_CONFIG["communication"]["disable_communication"]:
273 | if random.random() < DHC_CONFIG["communication"]["comm_enabled_prob"]:
274 | hidden = self.comm(hidden, comm_mask[:, i])
275 | else:
276 | hidden = self.comm(hidden, comm_mask[:, i])
277 | # only hidden from agent 0
278 | hidden_buffer.append(hidden[:, 0])
279 | hidden = hidden.view(self._batch_size * num_agents, self.hidden_dim)
280 |
281 | # hidden buffer size: batch_size x seq_len x self.hidden_dim
282 | hidden_buffer = torch.stack(hidden_buffer).transpose(0, 1)
283 |
284 | # hidden size: batch_size x self.hidden_dim
285 | hidden = hidden_buffer[torch.arange(self._batch_size), steps - 1]
286 |
287 | adv_val = self.adv(hidden)
288 | state_val = self.state(hidden)
289 |
290 | q_val = state_val + adv_val - adv_val.mean(1, keepdim=True)
291 |
292 | return q_val
293 |
--------------------------------------------------------------------------------
/pathfinding/models/dhc/train.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import random
3 | import ray
4 | import time
5 | import torch
6 |
7 | from pathfinding.models.dhc import GlobalBuffer, Learner, Actor
8 | from pathfinding.settings import yaml_data as settings
9 |
10 | TRAIN_CONFIG = settings["dhc"]["train"]
11 |
12 | torch.manual_seed(0)
13 | np.random.seed(0)
14 | random.seed(0)
15 |
16 |
17 | def main(
18 | num_actors=TRAIN_CONFIG["num_actors"], log_interval=TRAIN_CONFIG["log_interval"]
19 | ):
20 | ray.init()
21 | ray_node = ray.nodes()[0]
22 |
23 | # GlobalBuffer + Learner + 1 * num_actors
24 | assert (
25 | ray_node["Resources"]["CPU"] >= 2 + num_actors
26 | ), "insufficient amount of CPU cores available"
27 |
28 | buffer = GlobalBuffer.remote()
29 | learner = Learner.remote(buffer)
30 | time.sleep(1)
31 | actors = [
32 | Actor.remote(i, 0.4 ** (1 + (i / (num_actors - 1)) * 7), learner, buffer)
33 | for i in range(num_actors)
34 | ]
35 |
36 | for actor in actors:
37 | actor.run.remote()
38 |
39 | print("Actors were successfully created")
40 |
41 | while not ray.get(buffer.ready.remote()):
42 | time.sleep(5)
43 | ray.get(learner.stats.remote(5))
44 | ray.get(buffer.stats.remote(5))
45 |
46 | print("Start training")
47 | buffer.run.remote()
48 | learner.run.remote()
49 |
50 | done = False
51 | while not done:
52 | time.sleep(log_interval)
53 | done = ray.get(learner.stats.remote(log_interval))
54 | ray.get(buffer.stats.remote(log_interval))
55 | print()
56 |
57 |
58 | if __name__ == "__main__":
59 | main()
60 |
--------------------------------------------------------------------------------
/pathfinding/models/dhc/visualize.py:
--------------------------------------------------------------------------------
1 | import fire
2 | import matplotlib.animation as animation
3 | import matplotlib.pyplot as plt
4 | import numpy as np
5 | import os
6 | import pickle
7 | import random
8 | import torch
9 |
10 | from pathfinding.environment import Environment, MovingAIBenchmarkingEnvironment
11 | from pathfinding.models.dhc import DHCNetwork
12 | from pathfinding.utils import tests_dir_path
13 |
14 | torch.manual_seed(239)
15 | np.random.seed(239)
16 | random.seed(239)
17 | device = torch.device("cpu")
18 | torch.set_num_threads(1)
19 |
20 |
21 | def get_cmap(n, name="hsv"):
22 | return plt.cm.get_cmap(name, n)
23 |
24 |
25 | def frametamer(imgs, env, init_img):
26 | imgs.append([])
27 | imgs[-1].append(init_img)
28 |
29 | num_agents = len(env.agents_pos)
30 | eq = env.agents_pos == env.goals_pos
31 | total_positioned = (eq[:, 0] * eq[:, 1]).sum()
32 |
33 | for i, ((agent_x, agent_y), (goal_x, goal_y)) in enumerate(
34 | zip(env.agents_pos, env.goals_pos)
35 | ):
36 | imgs[-1].append(
37 | plt.text(0.02, 0.02, s=f"{total_positioned} / {num_agents}", fontsize=8)
38 | )
39 | imgs[-1].append(
40 | plt.text(
41 | agent_y, agent_x, i, color="black", ha="center", va="center", fontsize=8
42 | )
43 | )
44 | imgs[-1].append(
45 | plt.text(
46 | goal_y, goal_x, i, color="black", ha="center", va="center", fontsize=8
47 | )
48 | )
49 |
50 |
51 | def fill_map(env):
52 | map = np.copy(env.map)
53 | for agent_id in range(env.num_agents):
54 | x, y = env.agents_pos[agent_id], env.goals_pos[agent_id]
55 | if np.array_equal(x, y):
56 | map[tuple(x)] = 4
57 | else:
58 | map[tuple(x)] = 2
59 | map[tuple(y)] = 3
60 | map = map.astype(np.uint8)
61 | return map
62 |
63 |
64 | def make_animation_single_text(
65 | model_id: int, test_name: str, test_case_idx: int = 0, steps: int = 256
66 | ):
67 | test_case_idx = int(test_case_idx)
68 | color_map = np.array(
69 | [
70 | [255, 255, 255], # white
71 | [190, 190, 190], # gray
72 | [0, 191, 255], # blue
73 | [255, 165, 0], # orange
74 | [0, 250, 154], # green
75 | ]
76 | )
77 |
78 | network = DHCNetwork()
79 | network.eval()
80 | network.to(device)
81 | state_dict = torch.load(
82 | os.path.join(".", "models", f"{model_id}.pth"), map_location=device
83 | )
84 | network.load_state_dict(state_dict)
85 |
86 | with open(os.path.join(tests_dir_path(), test_name), "rb") as f:
87 | tests = pickle.load(f)
88 |
89 | env = Environment()
90 | env.load(tests[test_case_idx][0], tests[test_case_idx][1], tests[test_case_idx][2])
91 |
92 | fig = plt.figure(figsize=(4.8, 4.8))
93 | plt.gca().set_xticks(range(0, len(env.map) + 1, 5))
94 | plt.gca().set_yticks(range(0, len(env.map) + 1, 5))
95 |
96 | done = False
97 | obs, pos = env.observe()
98 |
99 | imgs = []
100 | while not done and env.steps < steps:
101 | map = fill_map(env)
102 | img = plt.imshow(color_map[map], animated=True)
103 |
104 | frametamer(imgs, env, img)
105 |
106 | actions, _, _, _ = network.step(
107 | torch.from_numpy(obs.astype(np.float32)).to(device),
108 | torch.from_numpy(pos.astype(np.float32)).to(device),
109 | )
110 | (obs, pos), _, done, _ = env.step(actions)
111 |
112 | if done and env.steps < steps:
113 | map = fill_map(env)
114 |
115 | img = plt.imshow(color_map[map], animated=True)
116 | for _ in range(steps - env.steps):
117 | frametamer(imgs, env, img)
118 |
119 | ani = animation.ArtistAnimation(
120 | fig, imgs, interval=600, blit=True, repeat_delay=1000
121 | )
122 |
123 | video_writer = animation.PillowWriter(fps=10)
124 |
125 | videos_dir = os.path.join(".", "videos")
126 | os.makedirs(videos_dir, exist_ok=True)
127 | ani.save(
128 | os.path.join(videos_dir, f"test_{model_id}_{test_name}_{test_case_idx}.gif"),
129 | writer=video_writer,
130 | )
131 |
132 |
133 | def make_animation_movingai(
134 | model_id: int, test_name: str, test_case_idx: int = 0, steps: int = 256
135 | ):
136 | test_case_idx = int(test_case_idx)
137 | color_map = np.array(
138 | [
139 | [255, 255, 255], # white
140 | [190, 190, 190], # gray
141 | [0, 191, 255], # blue
142 | [255, 165, 0], # orange
143 | [0, 250, 154], # green
144 | ]
145 | )
146 |
147 | network = DHCNetwork()
148 | network.eval()
149 | network.to(device)
150 | state_dict = torch.load(
151 | os.path.join(".", "models", f"{model_id}.pth"), map_location=device
152 | )
153 | network.load_state_dict(state_dict)
154 |
155 | with open(os.path.join(tests_dir_path(), test_name), "rb") as f:
156 | tests = pickle.load(f)
157 |
158 | env = MovingAIBenchmarkingEnvironment(should_init=False)
159 | env.load(tests[test_case_idx][0], tests[test_case_idx][1], tests[test_case_idx][2])
160 |
161 | fig = plt.figure(figsize=(4.8, 4.8))
162 |
163 | done = False
164 | obs, pos = env.observe()
165 |
166 | imgs = []
167 | while not done and env.steps < steps:
168 | map = fill_map(env)
169 | img = plt.imshow(color_map[map], animated=True)
170 |
171 | frametamer(imgs, env, img)
172 |
173 | actions, _, _, _ = network.step(
174 | torch.from_numpy(obs.astype(np.float32)).to(device),
175 | torch.from_numpy(pos.astype(np.float32)).to(device),
176 | )
177 | (obs, pos), _, done, _ = env.step(actions)
178 |
179 | if done and env.steps < steps:
180 | map = fill_map(env)
181 |
182 | img = plt.imshow(color_map[map], animated=True)
183 | for _ in range(steps - env.steps):
184 | frametamer(imgs, env, img)
185 |
186 | ani = animation.ArtistAnimation(
187 | fig, imgs, interval=600, blit=True, repeat_delay=1000
188 | )
189 |
190 | video_writer = animation.PillowWriter(fps=10)
191 |
192 | videos_dir = os.path.join(".", "videos")
193 | os.makedirs(videos_dir, exist_ok=True)
194 | ani.save(
195 | os.path.join(videos_dir, f"{model_id}_{test_name}_{test_case_idx}.gif"),
196 | writer=video_writer,
197 | )
198 |
199 |
200 | def _make_single_map_image_for_report():
201 | env = Environment(num_agents=8, map_length=40, fix_density=0.4)
202 | parts = env._part
203 | num_comp = len(parts)
204 | print(num_comp)
205 |
206 | fig = plt.figure(figsize=(4.8, 4.8)) # noqa
207 |
208 | cmap = [
209 | [255, 255, 255],
210 | [233, 150, 122],
211 | [238, 232, 170],
212 | [152, 251, 152],
213 | [102, 205, 170],
214 | [135, 206, 235],
215 | [255, 182, 193],
216 | [222, 184, 135],
217 | [255, 239, 213],
218 | [240, 255, 240],
219 | [192, 192, 192],
220 | [100, 149, 237],
221 | [72, 61, 139],
222 | [240, 230, 140],
223 | [0, 100, 0],
224 | [143, 188, 143],
225 | [95, 158, 160],
226 | [221, 160, 221],
227 | [250, 235, 215],
228 | [160, 82, 45],
229 | [255, 240, 245],
230 | [245, 255, 250],
231 | [112, 128, 144],
232 | [220, 220, 220],
233 | [255, 127, 80],
234 | [255, 140, 0],
235 | [128, 128, 0],
236 | [124, 252, 0],
237 | [47, 79, 79],
238 | ]
239 |
240 | map = np.copy(env.map)
241 | for agent_id in range(env.num_agents):
242 | x, y = env.agents_pos[agent_id], env.goals_pos[agent_id]
243 | if np.array_equal(x, y):
244 | map[tuple(x)] = 4
245 | else:
246 | map[tuple(x)] = 0
247 | map[tuple(y)] = 0
248 |
249 | plt.plot(
250 | x[1],
251 | x[0],
252 | marker="o",
253 | markersize=8,
254 | markerfacecolor="blue",
255 | markeredgecolor="blue",
256 | )
257 | plt.text(x[1] - 0.5, x[0] + 0.5, agent_id, fontsize=8, color="white")
258 | plt.plot(
259 | y[1],
260 | y[0],
261 | marker="o",
262 | markersize=8,
263 | markerfacecolor="orange",
264 | markeredgecolor="orange",
265 | )
266 | plt.text(y[1] - 0.5, y[0] + 0.5, agent_id, fontsize=8)
267 |
268 | map = map.astype(np.uint8)
269 |
270 | color_map = np.array(
271 | [
272 | [224, 255, 255],
273 | [190, 190, 190], # gray
274 | [0, 191, 255], # blue
275 | [255, 165, 0], # orange
276 | [0, 250, 154], # green
277 | ]
278 | )
279 |
280 | image = color_map[map]
281 |
282 | for i, c in enumerate(parts):
283 | color = np.array(cmap[i])
284 | for x, y in c:
285 | image[x, y] = color
286 |
287 | plt.imshow(image)
288 | plt.savefig("agents.png")
289 |
290 |
291 | if __name__ == "__main__":
292 | fire.Fire()
293 |
--------------------------------------------------------------------------------
/pathfinding/models/dhc/worker.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import os
3 | import random
4 | import ray
5 | import threading
6 | import time
7 | import torch
8 | import torch.nn as nn
9 | from copy import deepcopy
10 | from torch.cuda.amp import GradScaler
11 | from torch.optim import Adam
12 | from torch.optim.lr_scheduler import MultiStepLR
13 | from typing import Tuple
14 |
15 | from pathfinding.environment import Environment
16 | from pathfinding.models.dhc.buffer import SumTree, LocalBuffer
17 | from pathfinding.models.dhc.model import Network
18 | from pathfinding.settings import yaml_data as settings
19 |
20 | WRK_CONFIG = settings["dhc"]["worker"]
21 | GENERAL_CONFIG = settings["dhc"]
22 |
23 |
24 | @ray.remote(num_cpus=1)
25 | class GlobalBuffer:
26 | def __init__(
27 | self,
28 | episode_capacity=WRK_CONFIG["episode_capacity"],
29 | local_buffer_capacity=GENERAL_CONFIG["max_episode_length"],
30 | init_env_settings=WRK_CONFIG["init_env_settings"],
31 | max_comm_agents=WRK_CONFIG["max_comm_agents"],
32 | alpha=WRK_CONFIG["prioritized_replay_alpha"],
33 | beta=WRK_CONFIG["prioritized_replay_beta"],
34 | max_num_agents=GENERAL_CONFIG["max_num_agents"],
35 | ):
36 | self.capacity = episode_capacity
37 | self.local_buffer_capacity = local_buffer_capacity
38 | self.size = 0
39 | self.ptr = 0
40 |
41 | # prioritized experience replay
42 | self.priority_tree = SumTree(episode_capacity * local_buffer_capacity)
43 | self.alpha = alpha
44 | self.beta = beta
45 |
46 | self.counter = 0
47 | self.batched_data = []
48 | self.stat_dict = {init_env_settings: []}
49 | self.lock = threading.Lock()
50 | self.env_settings_set = ray.put([init_env_settings])
51 |
52 | self.obs_buf = np.zeros(
53 | (
54 | (local_buffer_capacity + 1) * episode_capacity,
55 | max_num_agents,
56 | *GENERAL_CONFIG["observation_shape"],
57 | ),
58 | dtype=np.bool,
59 | )
60 | self.act_buf = np.zeros(
61 | (local_buffer_capacity * episode_capacity), dtype=np.uint8
62 | )
63 | self.rew_buf = np.zeros(
64 | (local_buffer_capacity * episode_capacity), dtype=np.float16
65 | )
66 | self.hid_buf = np.zeros(
67 | (
68 | local_buffer_capacity * episode_capacity,
69 | max_num_agents,
70 | GENERAL_CONFIG["hidden_dim"],
71 | ),
72 | dtype=np.float16,
73 | )
74 | self.done_buf = np.zeros(episode_capacity, dtype=np.bool)
75 | self.size_buf = np.zeros(episode_capacity, dtype=np.uint)
76 | self.comm_mask_buf = np.zeros(
77 | (
78 | (local_buffer_capacity + 1) * episode_capacity,
79 | max_num_agents,
80 | max_num_agents,
81 | ),
82 | dtype=np.bool,
83 | )
84 |
85 | def __len__(self):
86 | return self.size
87 |
88 | def run(self):
89 | self.background_thread = threading.Thread(target=self.prepare_data, daemon=True)
90 | self.background_thread.start()
91 |
92 | def prepare_data(self):
93 | while True:
94 | if len(self.batched_data) <= 4:
95 | data = self.sample_batch(GENERAL_CONFIG["batch_size"])
96 | data_id = ray.put(data)
97 | self.batched_data.append(data_id)
98 | else:
99 | time.sleep(0.1)
100 |
101 | def get_data(self):
102 | if len(self.batched_data) == 0:
103 | print("no prepared data")
104 | data = self.sample_batch(GENERAL_CONFIG["batch_size"])
105 | data_id = ray.put(data)
106 | return data_id
107 | else:
108 | return self.batched_data.pop(0)
109 |
110 | def add(self, data: Tuple):
111 | """
112 | data: actor_id 0, num_agents 1,
113 | map_len 2, obs_buf 3, act_buf 4,
114 | rew_buf 5, hid_buf 6, td_errors 7,
115 | done 8, size 9, comm_mask 10
116 | """
117 | if data[0] >= 12:
118 | stat_key = (data[1], data[2])
119 |
120 | if stat_key in self.stat_dict:
121 | self.stat_dict[stat_key].append(data[8])
122 | if len(self.stat_dict[stat_key]) == 201:
123 | self.stat_dict[stat_key].pop(0)
124 |
125 | with self.lock:
126 | idxes = np.arange(
127 | self.ptr * self.local_buffer_capacity,
128 | (self.ptr + 1) * self.local_buffer_capacity,
129 | )
130 | start_idx = self.ptr * self.local_buffer_capacity
131 | # update buffer size
132 | self.size -= self.size_buf[self.ptr].item()
133 | self.size += data[9]
134 | self.counter += data[9]
135 |
136 | self.priority_tree.batch_update(idxes, data[7] ** self.alpha)
137 |
138 | self.obs_buf[
139 | start_idx + self.ptr : start_idx + self.ptr + data[9] + 1, : data[1]
140 | ] = data[3]
141 | self.act_buf[start_idx : start_idx + data[9]] = data[4]
142 | self.rew_buf[start_idx : start_idx + data[9]] = data[5]
143 | self.hid_buf[start_idx : start_idx + data[9], : data[1]] = data[6]
144 | self.done_buf[self.ptr] = data[8]
145 | self.size_buf[self.ptr] = data[9]
146 | self.comm_mask_buf[
147 | start_idx + self.ptr : start_idx + self.ptr + data[9] + 1
148 | ] = 0
149 | self.comm_mask_buf[
150 | start_idx + self.ptr : start_idx + self.ptr + data[9] + 1,
151 | : data[1],
152 | : data[1],
153 | ] = data[10]
154 |
155 | self.ptr = (self.ptr + 1) % self.capacity
156 |
157 | def sample_batch(self, batch_size: int) -> Tuple:
158 | b_obs, b_action, b_reward, b_done, b_steps, b_seq_len, b_comm_mask = (
159 | [],
160 | [],
161 | [],
162 | [],
163 | [],
164 | [],
165 | [],
166 | )
167 | idxes, priorities = [], []
168 | b_hidden = []
169 |
170 | with self.lock:
171 | idxes, priorities = self.priority_tree.batch_sample(batch_size)
172 | global_idxes = idxes // self.local_buffer_capacity
173 | local_idxes = idxes % self.local_buffer_capacity
174 |
175 | for idx, global_idx, local_idx in zip(
176 | idxes.tolist(), global_idxes.tolist(), local_idxes.tolist()
177 | ):
178 | assert (
179 | local_idx < self.size_buf[global_idx]
180 | ), f"index is {local_idx} but size is {self.size_buf[global_idx]}"
181 |
182 | conf_seq_len = WRK_CONFIG["seq_len"]
183 | fwd_steps = WRK_CONFIG["forward_steps"]
184 |
185 | steps = min(fwd_steps, (self.size_buf[global_idx].item() - local_idx))
186 | seq_len = min(local_idx + 1, conf_seq_len)
187 |
188 | if local_idx < conf_seq_len - 1:
189 | obs = self.obs_buf[
190 | global_idx * (self.local_buffer_capacity + 1) : idx
191 | + global_idx
192 | + 1
193 | + steps
194 | ]
195 | comm_mask = self.comm_mask_buf[
196 | global_idx * (self.local_buffer_capacity + 1) : idx
197 | + global_idx
198 | + 1
199 | + steps
200 | ]
201 | hidden = np.zeros(
202 | (
203 | GENERAL_CONFIG["max_num_agents"],
204 | GENERAL_CONFIG["hidden_dim"],
205 | ),
206 | dtype=np.float16,
207 | )
208 | elif local_idx == conf_seq_len - 1:
209 | obs = self.obs_buf[
210 | idx
211 | + global_idx
212 | + 1
213 | - conf_seq_len : idx
214 | + global_idx
215 | + 1
216 | + steps
217 | ]
218 | comm_mask = self.comm_mask_buf[
219 | global_idx * (self.local_buffer_capacity + 1) : idx
220 | + global_idx
221 | + 1
222 | + steps
223 | ]
224 | hidden = np.zeros(
225 | (
226 | GENERAL_CONFIG["max_num_agents"],
227 | GENERAL_CONFIG["hidden_dim"],
228 | ),
229 | dtype=np.float16,
230 | )
231 | else:
232 | obs = self.obs_buf[
233 | idx
234 | + global_idx
235 | + 1
236 | - conf_seq_len : idx
237 | + global_idx
238 | + 1
239 | + steps
240 | ]
241 | comm_mask = self.comm_mask_buf[
242 | idx
243 | + global_idx
244 | + 1
245 | - conf_seq_len : idx
246 | + global_idx
247 | + 1
248 | + steps
249 | ]
250 | hidden = self.hid_buf[idx - conf_seq_len]
251 |
252 | if obs.shape[0] < conf_seq_len + fwd_steps:
253 | pad_len = conf_seq_len + fwd_steps - obs.shape[0]
254 | obs = np.pad(obs, ((0, pad_len), (0, 0), (0, 0), (0, 0), (0, 0)))
255 | comm_mask = np.pad(comm_mask, ((0, pad_len), (0, 0), (0, 0)))
256 |
257 | action = self.act_buf[idx]
258 | reward = 0
259 | for i in range(steps):
260 | reward += self.rew_buf[idx + i] * 0.99**i
261 |
262 | if (
263 | self.done_buf[global_idx]
264 | and local_idx >= self.size_buf[global_idx] - fwd_steps
265 | ):
266 | done = True
267 | else:
268 | done = False
269 |
270 | b_obs.append(obs)
271 | b_action.append(action)
272 | b_reward.append(reward)
273 | b_done.append(done)
274 | b_steps.append(steps)
275 | b_seq_len.append(seq_len)
276 | b_hidden.append(hidden)
277 | b_comm_mask.append(comm_mask)
278 |
279 | # importance sampling weight
280 | min_p = np.min(priorities)
281 | weights = np.power(priorities / min_p, -self.beta)
282 |
283 | data = (
284 | torch.from_numpy(np.stack(b_obs).astype(np.float16)),
285 | torch.LongTensor(b_action).unsqueeze(1),
286 | torch.HalfTensor(b_reward).unsqueeze(1),
287 | torch.HalfTensor(b_done).unsqueeze(1),
288 | torch.HalfTensor(b_steps).unsqueeze(1),
289 | torch.LongTensor(b_seq_len),
290 | torch.from_numpy(np.concatenate(b_hidden)),
291 | torch.from_numpy(np.stack(b_comm_mask)),
292 | idxes,
293 | torch.from_numpy(weights).unsqueeze(1),
294 | self.ptr,
295 | )
296 |
297 | return data
298 |
299 | def update_priorities(
300 | self, idxes: np.ndarray, priorities: np.ndarray, old_ptr: int
301 | ):
302 | """Update priorities of sampled transitions"""
303 | with self.lock:
304 | # discard the indices that already been discarded
305 | # in replay buffer during training
306 | if self.ptr > old_ptr:
307 | # range from [old_ptr, self.ptr)
308 | mask = (idxes < old_ptr * self.local_buffer_capacity) | (
309 | idxes >= self.ptr * self.local_buffer_capacity
310 | )
311 | idxes = idxes[mask]
312 | priorities = priorities[mask]
313 | elif self.ptr < old_ptr:
314 | # range from [0, self.ptr) & [old_ptr, self,capacity)
315 | mask = (idxes < old_ptr * self.local_buffer_capacity) & (
316 | idxes >= self.ptr * self.local_buffer_capacity
317 | )
318 | idxes = idxes[mask]
319 | priorities = priorities[mask]
320 |
321 | self.priority_tree.batch_update(
322 | np.copy(idxes), np.copy(priorities) ** self.alpha
323 | )
324 |
325 | def stats(self, interval: int):
326 | print(f"buffer update speed: {self.counter / interval}/s")
327 | print(f"buffer size: {self.size}")
328 |
329 | print(" ", end="")
330 | for i in range(
331 | WRK_CONFIG["init_env_settings"][1], WRK_CONFIG["max_map_length"] + 1, 5
332 | ):
333 | print(" {:2d} ".format(i), end="")
334 | print()
335 |
336 | for num_agents in range(
337 | WRK_CONFIG["init_env_settings"][0], GENERAL_CONFIG["max_num_agents"] + 1
338 | ):
339 | print("{:2d}".format(num_agents), end="")
340 | for map_len in range(
341 | WRK_CONFIG["init_env_settings"][1], WRK_CONFIG["max_map_length"] + 1, 5
342 | ):
343 | if (num_agents, map_len) in self.stat_dict:
344 | print(
345 | "{:4d}/{:<3d}".format(
346 | sum(self.stat_dict[(num_agents, map_len)]),
347 | len(self.stat_dict[(num_agents, map_len)]),
348 | ),
349 | end="",
350 | )
351 | else:
352 | print(" N/A ", end="")
353 | print()
354 |
355 | for key, val in self.stat_dict.copy().items():
356 | if len(val) == 200 and sum(val) >= 200 * WRK_CONFIG["pass_rate"]:
357 | # add number of agents
358 | add_agent_key = (key[0] + 1, key[1])
359 | if (
360 | add_agent_key[0] <= GENERAL_CONFIG["max_num_agents"]
361 | and add_agent_key not in self.stat_dict
362 | ):
363 | self.stat_dict[add_agent_key] = []
364 |
365 | if key[1] < WRK_CONFIG["max_map_length"]:
366 | add_map_key = (key[0], key[1] + 10)
367 | if add_map_key not in self.stat_dict:
368 | self.stat_dict[add_map_key] = []
369 |
370 | self.env_settings_set = ray.put(list(self.stat_dict.keys()))
371 |
372 | self.counter = 0
373 |
374 | def ready(self):
375 | if len(self) >= WRK_CONFIG["learning_starts"]:
376 | return True
377 | else:
378 | return False
379 |
380 | def get_env_settings(self):
381 | return self.env_settings_set
382 |
383 | def check_done(self):
384 | for i in range(GENERAL_CONFIG["max_num_agents"]):
385 | if (i + 1, WRK_CONFIG["max_map_length"]) not in self.stat_dict:
386 | return False
387 |
388 | max_map_len = self.stat_dict[(i + 1, WRK_CONFIG["max_map_length"])]
389 |
390 | if len(max_map_len) < 200:
391 | return False
392 | elif sum(max_map_len) < 200 * WRK_CONFIG["pass_rate"]:
393 | return False
394 |
395 | return True
396 |
397 |
398 | @ray.remote(num_cpus=1, num_gpus=1)
399 | class Learner:
400 | def __init__(self, buffer: GlobalBuffer, training_steps=10000):
401 | self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
402 | self.model = Network()
403 | self.model.to(self.device)
404 | self.tar_model = deepcopy(self.model)
405 | self.optimizer = Adam(self.model.parameters(), lr=1e-4)
406 | self.scheduler = MultiStepLR(
407 | self.optimizer, milestones=[200000, 400000], gamma=0.5
408 | )
409 | self.buffer = buffer
410 | self.counter = 0
411 | self.last_counter = 0
412 | self.done = False
413 | self.loss = 0
414 |
415 | self.steps = training_steps
416 |
417 | self.store_weights()
418 |
419 | def get_weights(self):
420 | return self.weights_id
421 |
422 | def store_weights(self):
423 | state_dict = self.model.state_dict()
424 | for k, v in state_dict.items():
425 | state_dict[k] = v.cpu()
426 | self.weights_id = ray.put(state_dict)
427 |
428 | def run(self):
429 | self.learning_thread = threading.Thread(target=self.train, daemon=True)
430 | self.learning_thread.start()
431 |
432 | def train(self):
433 | scaler = GradScaler()
434 |
435 | while (
436 | not ray.get(self.buffer.check_done.remote())
437 | and self.counter < WRK_CONFIG["training_times"]
438 | ):
439 | for i in range(1, self.steps + 1):
440 | data_id = ray.get(self.buffer.get_data.remote())
441 | data = ray.get(data_id)
442 |
443 | (
444 | b_obs,
445 | b_action,
446 | b_reward,
447 | b_done,
448 | b_steps,
449 | b_seq_len,
450 | b_hidden,
451 | b_comm_mask,
452 | idxes,
453 | weights,
454 | old_ptr,
455 | ) = data
456 | b_obs, b_action, b_reward = (
457 | b_obs.to(self.device),
458 | b_action.to(self.device),
459 | b_reward.to(self.device),
460 | )
461 | b_done, b_steps, weights = (
462 | b_done.to(self.device),
463 | b_steps.to(self.device),
464 | weights.to(self.device),
465 | )
466 | b_hidden = b_hidden.to(self.device)
467 | b_comm_mask = b_comm_mask.to(self.device)
468 |
469 | b_next_seq_len = [
470 | (seq_len + forward_steps).item()
471 | for seq_len, forward_steps in zip(b_seq_len, b_steps)
472 | ]
473 | b_next_seq_len = torch.LongTensor(b_next_seq_len)
474 |
475 | with torch.no_grad():
476 | b_q_ = (1 - b_done) * self.tar_model(
477 | b_obs, b_next_seq_len, b_hidden, b_comm_mask
478 | ).max(1, keepdim=True)[0]
479 |
480 | b_q = self.model(
481 | b_obs[:, : -WRK_CONFIG["forward_steps"]],
482 | b_seq_len,
483 | b_hidden,
484 | b_comm_mask[:, : -WRK_CONFIG["forward_steps"]],
485 | ).gather(1, b_action)
486 |
487 | td_error = b_q - (b_reward + (0.99**b_steps) * b_q_)
488 |
489 | priorities = td_error.detach().squeeze().abs().clamp(1e-4).cpu().numpy()
490 |
491 | loss = (weights * self.huber_loss(td_error)).mean()
492 | self.loss += loss.item()
493 |
494 | self.optimizer.zero_grad()
495 | scaler.scale(loss).backward()
496 |
497 | scaler.unscale_(self.optimizer)
498 | nn.utils.clip_grad_norm_(self.model.parameters(), 40)
499 |
500 | scaler.step(self.optimizer)
501 | scaler.update()
502 |
503 | self.scheduler.step()
504 |
505 | # store new weights in shared memory
506 | if i % 5 == 0:
507 | self.store_weights()
508 |
509 | self.buffer.update_priorities.remote(idxes, priorities, old_ptr)
510 |
511 | self.counter += 1
512 |
513 | # update target net, save model
514 | if i % WRK_CONFIG["target_network_update_freq"] == 0:
515 | self.tar_model.load_state_dict(self.model.state_dict())
516 |
517 | if i % WRK_CONFIG["save_interval"] == 0:
518 | os.makedirs(os.path.join(".", "models"), exist_ok=True)
519 | torch.save(
520 | self.model.state_dict(),
521 | os.path.join(".", "models", f"{self.counter}.pth"),
522 | )
523 |
524 | self.done = True
525 |
526 | def huber_loss(self, td_error, kappa=1.0):
527 | abs_td_error = td_error.abs()
528 | flag = (abs_td_error < kappa).float()
529 | return flag * abs_td_error.pow(2) * 0.5 + (1 - flag) * (abs_td_error - 0.5)
530 |
531 | def stats(self, interval: int):
532 | print(f"number of updates: {self.counter}")
533 | print(f"update speed: {(self.counter - self.last_counter) / interval}/s")
534 | if self.counter != self.last_counter:
535 | print("loss: {:.4f}".format(self.loss / (self.counter - self.last_counter)))
536 |
537 | self.last_counter = self.counter
538 | self.loss = 0
539 | return self.done
540 |
541 |
542 | @ray.remote(num_cpus=1)
543 | class Actor:
544 | def __init__(
545 | self, worker_id: int, epsilon: float, learner: Learner, buffer: GlobalBuffer
546 | ):
547 | self.id = worker_id
548 | self.model = Network()
549 | self.model.eval()
550 | self.env = Environment(curriculum=True)
551 | self.epsilon = epsilon
552 | self.learner = learner
553 | self.global_buffer = buffer
554 | self.max_episode_length = GENERAL_CONFIG["max_episode_length"]
555 | self.counter = 0
556 |
557 | def run(self):
558 | done = False
559 | obs, pos, local_buffer = self.reset()
560 |
561 | while True:
562 | # sample action
563 | actions, q_val, hidden, comm_mask = self.model.step(
564 | torch.from_numpy(obs.astype(np.float32)),
565 | torch.from_numpy(pos.astype(np.float32)),
566 | )
567 |
568 | if random.random() < self.epsilon:
569 | # Note: only one agent do random action
570 | # in order to keep the environment stable
571 | actions[0] = np.random.randint(0, 5)
572 | # take action in env
573 | (next_obs, next_pos), rewards, done, _ = self.env.step(actions)
574 | # return data and update observation
575 | local_buffer.add(
576 | q_val[0], actions[0], rewards[0], next_obs, hidden, comm_mask
577 | )
578 |
579 | if done is False and self.env.steps < self.max_episode_length:
580 | obs, pos = next_obs, next_pos
581 | else:
582 | # finish and send buffer
583 | if done:
584 | data = local_buffer.finish()
585 | else:
586 | _, q_val, hidden, comm_mask = self.model.step(
587 | torch.from_numpy(next_obs.astype(np.float32)),
588 | torch.from_numpy(next_pos.astype(np.float32)),
589 | )
590 | data = local_buffer.finish(q_val[0], comm_mask)
591 |
592 | self.global_buffer.add.remote(data)
593 | done = False
594 | obs, pos, local_buffer = self.reset()
595 |
596 | self.counter += 1
597 | if self.counter == WRK_CONFIG["actor_update_steps"]:
598 | self.update_weights()
599 | self.counter = 0
600 |
601 | def update_weights(self):
602 | """load weights from learner"""
603 | # update network parameters
604 | weights_id = ray.get(self.learner.get_weights.remote())
605 | weights = ray.get(weights_id)
606 | self.model.load_state_dict(weights)
607 | # update environment settings set (number of agents and map size)
608 | new_env_settings_set = ray.get(self.global_buffer.get_env_settings.remote())
609 | self.env.update_env_settings_set(ray.get(new_env_settings_set))
610 |
611 | def reset(self):
612 | self.model.reset()
613 | obs, pos = self.env.reset()
614 | local_buffer = LocalBuffer(
615 | self.id, self.env.num_agents, self.env.map_size[0], obs
616 | )
617 | return obs, pos, local_buffer
618 |
--------------------------------------------------------------------------------
/pathfinding/movingai.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from typing import Optional
3 |
4 | import fire
5 | from dataclasses import dataclass
6 |
7 |
8 | @dataclass
9 | class TestDescription:
10 | x0: int
11 | y0: int
12 | x1: int
13 | y1: int
14 | expected_dist: float
15 | mapfile: Optional[str]
16 |
17 |
18 | def _transform(line: str) -> list[int]:
19 | return [
20 | int(ch) for ch in line.replace("@", "1").replace(".", "0").replace("T", "1")
21 | ]
22 |
23 |
24 | def read_map(mapfile: str) -> np.array:
25 | with open(mapfile, "r") as map_file:
26 | map_file.readline()
27 | _, h = map_file.readline().split(" ")
28 | _, w = map_file.readline().split(" ")
29 | h, w = int(h), int(w)
30 | map_file.readline()
31 | lines = map_file.read().split("\n")
32 | map = [_transform(line) for line in lines if line]
33 |
34 | h_read, w_read = len(map), len(map[0])
35 | if h_read != h or w_read != w:
36 | raise ValueError(
37 | "Size of the map read is not equal to the expected size from MovingAI,"
38 | f"({h_read}, {w_read}) != ({h}, {w})"
39 | )
40 |
41 | return np.array(map)
42 |
43 |
44 | def get_map_density(mapfile: str) -> float:
45 | map = read_map(mapfile)
46 | return (map == 1).sum() / (map.shape[0] * map.shape[1])
47 |
48 |
49 | def read_scenario_from_file(scenfile: str):
50 | tests = []
51 |
52 | with open(scenfile, "r") as scen_file:
53 | scen_file.readline()
54 | for line in scen_file:
55 | test_no, map_no, h, w, y0, x0, y1, x1, exp_dst = line.split("\t")
56 | x0, y0, x1, y1, exp_dst = int(x0), int(y0), int(x1), int(y1), float(exp_dst)
57 | tests.append(TestDescription(x0, y0, x1, y1, exp_dst, map_no))
58 | return tests
59 |
60 |
61 | def get_tests_for_multiple_agents(
62 | scenfile: str = None, num_agents: int = 5, max_num_tests: int = 200
63 | ) -> list[list[TestDescription]]:
64 | scens = read_scenario_from_file(scenfile)
65 | tests = []
66 | num_tests = min(max_num_tests * num_agents - 1, len(scens) - num_agents + 1)
67 | for i in range(0, num_tests, num_agents):
68 | tests.append(scens[i : i + num_agents])
69 | return tests
70 |
71 |
72 | if __name__ == "__main__":
73 | fire.Fire()
74 |
75 | # success rate: 0.00%
76 | # soft-success rate: 45.78%
77 | # average step: 256.0
78 |
79 | # success rate: 0.00%
80 | # soft-success rate: 85.47%
81 | # average step: 512.0
82 | #
83 | # success rate: 70.00%
84 | # soft-success rate: 95.78%
85 | # average step: 773.1
86 |
--------------------------------------------------------------------------------
/pathfinding/settings.py:
--------------------------------------------------------------------------------
1 | import os
2 | import yaml
3 |
4 |
5 | with open(os.path.join(".", "config.yaml"), "r") as yaml_file:
6 | yaml_data = yaml.load(yaml_file, Loader=yaml.FullLoader)
7 |
8 |
9 | __all__ = ["yaml_data"]
10 |
--------------------------------------------------------------------------------
/pathfinding/test_cases/10length_16agents_0.1density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/10length_16agents_0.1density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/10length_16agents_0.3density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/10length_16agents_0.3density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/10length_32agents_0.1density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/10length_32agents_0.1density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/10length_4agents_0.1density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/10length_4agents_0.1density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/10length_4agents_0.3density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/10length_4agents_0.3density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/10length_8agents_0.1density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/10length_8agents_0.1density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/10length_8agents_0.3density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/10length_8agents_0.3density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/128length_32agents_0.3372density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/128length_32agents_0.3372density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/20length_16agents_0.1density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/20length_16agents_0.1density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/20length_16agents_0.3density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/20length_16agents_0.3density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/20length_32agents_0.1density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/20length_32agents_0.1density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/20length_32agents_0.3density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/20length_32agents_0.3density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/20length_4agents_0.1density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/20length_4agents_0.1density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/20length_4agents_0.3density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/20length_4agents_0.3density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/20length_8agents_0.1density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/20length_8agents_0.1density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/20length_8agents_0.3density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/20length_8agents_0.3density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/256length_16agents_0.2654density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/256length_16agents_0.2654density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/256length_64agents_0.2654density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/256length_64agents_0.2654density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/30length_16agents_0.1density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/30length_16agents_0.1density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/30length_16agents_0.3density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/30length_16agents_0.3density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/30length_32agents_0.1density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/30length_32agents_0.1density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/30length_32agents_0.3density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/30length_32agents_0.3density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/30length_4agents_0.1density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/30length_4agents_0.1density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/30length_4agents_0.3density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/30length_4agents_0.3density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/30length_8agents_0.1density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/30length_8agents_0.1density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/30length_8agents_0.3density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/30length_8agents_0.3density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/32length_32agents_0.334density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/32length_32agents_0.334density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/40length_16agents_0.1density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/40length_16agents_0.1density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/40length_16agents_0.3density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/40length_16agents_0.3density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/40length_32agents_0.1density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/40length_32agents_0.1density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/40length_32agents_0.3density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/40length_32agents_0.3density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/40length_4agents_0.1density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/40length_4agents_0.1density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/40length_4agents_0.3density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/40length_4agents_0.3density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/40length_64agents_0.3density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/40length_64agents_0.3density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/40length_8agents_0.1density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/40length_8agents_0.1density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/40length_8agents_0.3density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/40length_8agents_0.3density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/80length_16agents_0.1density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/80length_16agents_0.1density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/80length_16agents_0.3density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/80length_16agents_0.3density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/80length_32agents_0.1density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/80length_32agents_0.1density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/80length_32agents_0.3density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/80length_32agents_0.3density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/80length_4agents_0.1density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/80length_4agents_0.1density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/80length_4agents_0.3density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/80length_4agents_0.3density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/80length_64agents_0.3density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/80length_64agents_0.3density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/80length_8agents_0.1density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/80length_8agents_0.1density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/80length_8agents_0.3density.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/80length_8agents_0.3density.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/Berlin_0_256.map_Berlin_0_256.map.scen_16agents_.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/Berlin_0_256.map_Berlin_0_256.map.scen_16agents_.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/Berlin_0_256.map_Berlin_0_256.map.scen_1agents_.pkl:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:8d8f765f53025c1042db66cf24ccccfd42d913f77a81dc3c95174e32fb2fbeb9
3 | size 335112169
4 |
--------------------------------------------------------------------------------
/pathfinding/test_cases/Berlin_0_256.map_Berlin_0_256.map.scen_32agents_.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/Berlin_0_256.map_Berlin_0_256.map.scen_32agents_.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/Berlin_0_256.map_Berlin_0_256.map.scen_4agents_.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/Berlin_0_256.map_Berlin_0_256.map.scen_4agents_.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/Berlin_0_256.map_Berlin_0_256.map.scen_64agents_.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/Berlin_0_256.map_Berlin_0_256.map.scen_64agents_.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/Berlin_0_256.map_Berlin_0_256.map.scen_8agents_.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/Berlin_0_256.map_Berlin_0_256.map.scen_8agents_.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/den520d.map_den520d-even-1.scen_1agents_.pkl:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:0a2deb1672b72689d84b76bcdfae0bd8c1d3a1efda8dd745e60b77393ce26ffd
3 | size 335112169
4 |
--------------------------------------------------------------------------------
/pathfinding/test_cases/den520d.map_den520d-even-1.scen_32agents_.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/den520d.map_den520d-even-1.scen_32agents_.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/den520d.map_den520d-even-1.scen_4agents_.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/den520d.map_den520d-even-1.scen_4agents_.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/den520d.map_den520d-even-1.scen_64agents_.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/den520d.map_den520d-even-1.scen_64agents_.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/ht_chantry.map_ht_chantry-even-1.scen_1agents_.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/ht_chantry.map_ht_chantry-even-1.scen_1agents_.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/ht_chantry.map_ht_chantry-even-1.scen_32agents_.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/ht_chantry.map_ht_chantry-even-1.scen_32agents_.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/ht_chantry.map_ht_chantry-even-1.scen_64agents_.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/ht_chantry.map_ht_chantry-even-1.scen_64agents_.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/ht_chantry.map_ht_chantry-even-1.scen_8agents_.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/ht_chantry.map_ht_chantry-even-1.scen_8agents_.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/lak303d.map_lak303d-even-1.scen_1agents_.pkl:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:677d8b80b14819c82d7c827be4f68abc06e2d6cdb1ee34abf73d0c69ac3c1c41
3 | size 192486091
4 |
--------------------------------------------------------------------------------
/pathfinding/test_cases/lak303d.map_lak303d-even-1.scen_32agents_.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/lak303d.map_lak303d-even-1.scen_32agents_.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/lak303d.map_lak303d-even-1.scen_64agents_.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/lak303d.map_lak303d-even-1.scen_64agents_.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/lak303d.map_lak303d-even-1.scen_8agents_.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/lak303d.map_lak303d-even-1.scen_8agents_.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/maze-128-128-2.map_maze-128-128-2-random-1.scen_32agents_.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/maze-128-128-2.map_maze-128-128-2-random-1.scen_32agents_.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/room-32-32-4.map_room-32-32-4-random-1.scen_32agents_.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/room-32-32-4.map_room-32-32-4-random-1.scen_32agents_.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/small-3-rooms-dense.map_small-12-dense.scen_12agents_.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/small-3-rooms-dense.map_small-12-dense.scen_12agents_.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/small-3-rooms-dense2.map_small-12-dense.scen_12agents_.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/small-3-rooms-dense2.map_small-12-dense.scen_12agents_.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/small-3-rooms.map_small-12.scen_12agents_.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/small-3-rooms.map_small-12.scen_12agents_.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/small.map_small-4.scen_4agents_.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/small.map_small-4.scen_4agents_.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/small.map_small-8-reversed.scen_8agents_.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/small.map_small-8-reversed.scen_8agents_.pkl
--------------------------------------------------------------------------------
/pathfinding/test_cases/small.map_small-8.scen_8agents_.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/pathfinding/test_cases/small.map_small-8.scen_8agents_.pkl
--------------------------------------------------------------------------------
/pathfinding/utils.py:
--------------------------------------------------------------------------------
1 | import fire
2 | import multiprocessing as mp
3 | import numpy as np
4 | import os
5 | import pickle
6 |
7 | from pathfinding.environment import Environment, MovingAIBenchmarkingEnvironment
8 | from pathfinding.movingai import (
9 | get_tests_for_multiple_agents,
10 | read_map,
11 | TestDescription,
12 | )
13 |
14 |
15 | def generate_test_filename(length: int, num_agents: int, density: float, ext="pkl"):
16 | return f"{length}length_{num_agents}agents_{density}density.{ext}"
17 |
18 |
19 | def generate_moving_ai_test_filename(
20 | map_filename: str, scenary_filename: str, num_agents: int, ext="pkl"
21 | ):
22 | map_filename = map_filename.split(os.sep)[-1]
23 | scenary_filename = scenary_filename.split(os.sep)[-1]
24 | return f"{map_filename}_{scenary_filename}_{num_agents}agents_.{ext}"
25 |
26 |
27 | def tests_dir_path():
28 | return os.path.join(".", "pathfinding", "test_cases")
29 |
30 |
31 | def tests_moving_ai_dir_path():
32 | return os.path.join("data", "movingai")
33 |
34 |
35 | def generate_test_suits(tests_config, repeat_for: int):
36 | os.makedirs(tests_dir_path(), exist_ok=True)
37 | for map_length, num_agents, density in tests_config:
38 | env = Environment(
39 | num_agents=num_agents, map_length=map_length, fix_density=density
40 | )
41 | tests = []
42 | for generated, _ in enumerate(range(repeat_for)):
43 | tests.append(
44 | (np.copy(env.map), np.copy(env.agents_pos), np.copy(env.goals_pos))
45 | )
46 | print(generated)
47 | env.reset(num_agents=num_agents, map_length=map_length)
48 |
49 | filename = generate_test_filename(map_length, num_agents, density)
50 | with open(os.path.join(tests_dir_path(), filename), "wb") as file:
51 | pickle.dump(tests, file)
52 |
53 |
54 | def generate_test_suits_moving_ai(
55 | tests_config: list[int], map_filename: str, scenary_filename: str, repeat_for: int
56 | ):
57 | os.makedirs(tests_dir_path(), exist_ok=True)
58 | map_filename = os.path.join(tests_moving_ai_dir_path(), map_filename)
59 | scenary_filename = os.path.join(tests_moving_ai_dir_path(), scenary_filename)
60 |
61 | for num_agents in tests_config:
62 | pkl_tests = []
63 | tests = get_tests_for_multiple_agents(scenary_filename, num_agents, repeat_for)
64 | for test_set in tests:
65 | env = MovingAIBenchmarkingEnvironment(
66 | num_agents=num_agents,
67 | map_filename=map_filename,
68 | test_descriptions=test_set,
69 | )
70 | pkl_tests.append(
71 | (np.copy(env.map), np.copy(env.agents_pos), np.copy(env.goals_pos))
72 | )
73 | filename = generate_moving_ai_test_filename(
74 | map_filename,
75 | scenary_filename,
76 | num_agents,
77 | )
78 | with open(os.path.join(tests_dir_path(), filename), "wb") as file:
79 | pickle.dump(pkl_tests, file)
80 |
81 |
82 | def _run_tests(tests_pkl_filename: str, test_generation_fn, singe_test_fn):
83 | pool = mp.Pool(mp.cpu_count())
84 |
85 | with open(tests_pkl_filename, "rb") as f:
86 | tests = pickle.load(f)
87 |
88 | tests = test_generation_fn(tests)
89 | ret = pool.map(singe_test_fn, tests)
90 |
91 | csr, isr, makespan = zip(*ret)
92 | csr = 100 * np.array(csr)
93 | isr = 100 * np.array(isr)
94 | csr_mean, csr_std = np.mean(csr), np.std(csr)
95 | isr_mean, isr_std = np.mean(isr), np.std(isr)
96 | makespan_mean, makespan_std = np.mean(makespan), np.std(makespan)
97 |
98 | print(f"CSR: {csr_mean} +- {csr_std}%")
99 | print(f"ISR: {isr_mean} +- {isr_std}%")
100 | print(f"Makespan: {makespan_mean} +- {makespan_std}")
101 | print()
102 |
103 | return (csr_mean, csr_std), (isr_mean, isr_std), (makespan_mean, makespan_std)
104 |
105 |
106 | def test_group(test_group, test_generation_fn, singe_test_fn, is_random=True):
107 | if is_random:
108 | length, num_agents, density = test_group
109 | print(f"test group: {length} length {num_agents} agents {density} density")
110 | return _run_tests(
111 | os.path.join(
112 | tests_dir_path(),
113 | generate_test_filename(length, num_agents, density),
114 | ),
115 | test_generation_fn,
116 | singe_test_fn,
117 | )
118 | else:
119 | num_agents, map_filename, scenary_filename = test_group
120 | print(
121 | f"test group: {map_filename} map "
122 | f"{scenary_filename} scen {num_agents} agents"
123 | )
124 | return _run_tests(
125 | os.path.join(
126 | tests_dir_path(),
127 | generate_moving_ai_test_filename(
128 | map_filename, scenary_filename, num_agents
129 | ),
130 | ),
131 | test_generation_fn,
132 | singe_test_fn,
133 | )
134 |
135 |
136 | def calculate_metrics(env: Environment, makespan: int):
137 | pos_equality = env.agents_pos == env.goals_pos
138 | isr = (pos_equality[:, 0] * pos_equality[:, 1]).sum() / env.agents_pos.shape[0]
139 | csr = np.array_equal(env.agents_pos, env.goals_pos)
140 | return csr, isr, makespan
141 |
142 |
143 | def _dump_to_scen_file(
144 | scenfile: str,
145 | tests: list[TestDescription],
146 | map_h: int,
147 | map_w: int,
148 | ):
149 | with open(scenfile, "w") as scen:
150 | print("version 1", file=scen)
151 | for line_no, test in enumerate(tests):
152 | test_str = "\t".join(
153 | map(
154 | str,
155 | [
156 | line_no,
157 | test.mapfile,
158 | map_h,
159 | map_w,
160 | test.x0,
161 | test.y0,
162 | test.x1,
163 | test.y1,
164 | test.expected_dist,
165 | ],
166 | )
167 | )
168 | print(test_str, file=scen)
169 |
170 |
171 | def generate_scen_for_custom_maps(
172 | map_filename: str,
173 | num_agents: int = 8,
174 | num_tests: int = 10,
175 | ):
176 | path_parts = map_filename.split(os.sep)
177 | dirpath, mapfile = f"{os.sep}".join(path_parts[:-1]), path_parts[-1]
178 | scenfile = f"{dirpath}{os.sep}generated_{mapfile.split('.')[-2]}.scen"
179 | custom_map = read_map(map_filename)
180 |
181 | h, w = custom_map.shape
182 |
183 | pos = np.argwhere(custom_map == 0)
184 |
185 | tests = []
186 | rng = np.random.default_rng()
187 | for _ in range(num_tests):
188 | agents = rng.choice(pos, 2 * num_agents, replace=False)
189 | start, finish = agents[:num_agents], agents[num_agents:]
190 | for s, f in zip(start, finish):
191 | tests.append(
192 | TestDescription(
193 | x0=s[1],
194 | y0=s[0],
195 | x1=f[1],
196 | y1=f[0],
197 | expected_dist=-1,
198 | mapfile=mapfile,
199 | )
200 | )
201 | _dump_to_scen_file(scenfile, tests, h, w)
202 |
203 |
204 | if __name__ == "__main__":
205 | fire.Fire()
206 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "po-mapf-thesis"
3 | version = "0.1.0"
4 | description = "Partially observable / decentralized multi-agent pathfinding in Grid Environments using Reinforcement Learning"
5 | authors = ["Vlad Savinov "]
6 | license = "MIT"
7 | readme = "README.md"
8 | packages = [{include = "pathfinding"}]
9 | include = [{path = "tests"}]
10 |
11 | [tool.poetry.dependencies]
12 | python = "^3.9"
13 | torch = "^1.12.1"
14 | pyyaml = "^6.0"
15 | matplotlib = "^3.6.2"
16 | ray = "^2.2.0"
17 | ruff = "^0.0.241"
18 | numpy = "1.23.1"
19 | pyproject-toml = "^0.0.10"
20 |
21 | [tool.poetry.group.dev.dependencies]
22 | torch = "^1.12.1"
23 | black = "^22.10.0"
24 | fire = "^0.4.0"
25 |
26 |
27 | [tool.poetry.group.test.dependencies]
28 | pytest = "^7.2.0"
29 |
30 | [tool.pytest.ini_options]
31 | testpaths = [
32 | "tests"
33 | ]
34 |
35 | [build-system]
36 | requires = ["poetry-core"]
37 | build-backend = "poetry.core.masonry.api"
38 |
39 | [tool.ruff]
40 | # Enable Pyflakes `E` and `F` codes by default.
41 | select = ["E", "F"]
42 | ignore = []
43 |
44 | # Allow autofix for all enabled rules (when `--fix`) is provided.
45 | unfixable = []
46 |
47 | # Exclude a variety of commonly ignored directories.
48 | exclude = [
49 | ".bzr",
50 | ".direnv",
51 | ".eggs",
52 | ".git",
53 | ".hg",
54 | ".mypy_cache",
55 | ".nox",
56 | ".pants.d",
57 | ".ruff_cache",
58 | ".svn",
59 | ".tox",
60 | ".venv",
61 | "__pypackages__",
62 | "_build",
63 | "buck-out",
64 | "build",
65 | "dist",
66 | "node_modules",
67 | "venv",
68 | ]
69 | per-file-ignores = {}
70 |
71 | # Same as Black.
72 | line-length = 88
73 |
74 | # Allow unused variables when underscore-prefixed.
75 | dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
76 |
77 | # Assume Python 3.10.
78 | target-version = "py310"
79 |
80 | [tool.ruff.mccabe]
81 | # Unlike Flake8, default to a complexity level of 10.
82 | max-complexity = 10
83 |
--------------------------------------------------------------------------------
/static/DHC_10x10_4_good.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/static/DHC_10x10_4_good.gif
--------------------------------------------------------------------------------
/static/DHC_40x40_16_dense.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/static/DHC_40x40_16_dense.gif
--------------------------------------------------------------------------------
/static/DHC_40x40_16_good.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/static/DHC_40x40_16_good.gif
--------------------------------------------------------------------------------
/static/DHC_40x40_4_good.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/static/DHC_40x40_4_good.gif
--------------------------------------------------------------------------------
/static/DHC_architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/static/DHC_architecture.png
--------------------------------------------------------------------------------
/static/DHC_training.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/static/DHC_training.png
--------------------------------------------------------------------------------
/static/chart_40x40.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/static/chart_40x40.png
--------------------------------------------------------------------------------
/static/chart_80x80.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/static/chart_80x80.png
--------------------------------------------------------------------------------
/tests/test_imports.py:
--------------------------------------------------------------------------------
1 | def test_import_dhc():
2 | from pathfinding.models.dhc import DHCNetwork # noqa
3 |
4 |
5 | def test_import_env():
6 | from pathfinding.environment import Environment # noqa
7 |
8 |
9 | def test_import_buffer():
10 | from pathfinding.models.dhc import LocalBuffer # noqa
11 |
12 |
13 | def test_import_worker():
14 | from pathfinding.models.dhc import Actor, Learner, GlobalBuffer # noqa
15 |
--------------------------------------------------------------------------------
/videos/337500_small-3-rooms-dense.map_small-12-dense.scen_12agents_.pkl_0.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/videos/337500_small-3-rooms-dense.map_small-12-dense.scen_12agents_.pkl_0.gif
--------------------------------------------------------------------------------
/videos/337500_small-3-rooms-dense2.map_small-12-dense.scen_12agents_.pkl_0.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/videos/337500_small-3-rooms-dense2.map_small-12-dense.scen_12agents_.pkl_0.gif
--------------------------------------------------------------------------------
/videos/337500_small-3-rooms.map_small-12.scen_12agents_.pkl_0.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/videos/337500_small-3-rooms.map_small-12.scen_12agents_.pkl_0.gif
--------------------------------------------------------------------------------
/videos/337500_small.map_small-4.scen_4agents_.pkl_0.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/videos/337500_small.map_small-4.scen_4agents_.pkl_0.gif
--------------------------------------------------------------------------------
/videos/337500_small.map_small-8-reversed.scen_8agents_.pkl_0.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/videos/337500_small.map_small-8-reversed.scen_8agents_.pkl_0.gif
--------------------------------------------------------------------------------
/videos/337500_small.map_small-8.scen_8agents_.pkl_0.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acforvs/dhc-robust-mapf/183804942e612187905a8946df3fa7972fdb5c91/videos/337500_small.map_small-8.scen_8agents_.pkl_0.gif
--------------------------------------------------------------------------------