├── LICENSE
├── README.md
├── alg_parameters.py
├── driver.py
├── episodic_buffer.py
├── eval_model.py
├── mapf_gym.py
├── model.py
├── net.py
├── od_mstar3
    ├── SortedCollection.py
    ├── __pycache__
    │   ├── SortedCollection.cpython-37.pyc
    │   ├── col_set_addition.cpython-37.pyc
    │   ├── interface.cpython-37.pyc
    │   ├── od_mstar.cpython-37.pyc
    │   └── workspace_graph.cpython-37.pyc
    ├── build
    │   ├── lib.linux-x86_64-3.7
    │   │   └── cpp_mstar.cpython-37m-x86_64-linux-gnu.so
    │   └── temp.linux-x86_64-3.7
    │   │   ├── col_checker.o
    │   │   ├── cython_od_mstar.o
    │   │   ├── grid_planning.o
    │   │   ├── grid_policy.o
    │   │   ├── od_mstar.o
    │   │   └── policy.o
    ├── col_checker.cpp
    ├── col_checker.hpp
    ├── col_set.hpp
    ├── col_set_addition.py
    ├── cpp_mstar.cpython-37m-x86_64-linux-gnu.so
    ├── cython_od_mstar.cpp
    ├── cython_od_mstar.pyx
    ├── grid_planning.cpp
    ├── grid_planning.hpp
    ├── grid_policy.cpp
    ├── grid_policy.hpp
    ├── interface.py
    ├── mstar_type_defs.hpp
    ├── mstar_utils.hpp
    ├── od_mstar.cpp
    ├── od_mstar.hpp
    ├── od_mstar.py
    ├── od_vertex.hpp
    ├── policy.cpp
    ├── policy.hpp
    ├── prune_graph.py
    ├── setup.py
    └── workspace_graph.py
├── requirements.txt
├── runner.py
├── transformer
    ├── __pycache__
    │   ├── encoder_model.cpython-37.pyc
    │   ├── layers.cpython-37.pyc
    │   ├── modules.cpython-37.pyc
    │   └── sub_layers.cpython-37.pyc
    ├── encoder_model.py
    ├── layers.py
    ├── modules.py
    └── sub_layers.py
└── util.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 MARMot Lab @ NUS-ME
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # SCRIMP
 2 | This is the code for implementing the SCRIMP algorithm :`SCRIMP: Scalable Communication for Reinforcement- and Imitation-Learning-Based Multi-Agent Pathfinding`
 3 | 
 4 | ## Requirements
 5 | 
 6 | Python == 3.7
 7 |    ```
 8 |     pip install -r requirements.txt
 9 |    ```
10 |     
11 | 
12 | ## Setting up Code
13 | 
14 | * cd into the od_mstar3 folder.
15 | * python3 setup.py build_ext --inplace.
16 | * Check by going back to the root of the git folder, running python3 and `import od_mstar3.cpp_mstar`.
17 |     
18 | ## Running Code
19 | 
20 | * Modify the parameters in `alg_parameters.py` to set the desired training setting and recording methods.
21 | * Call python `driver.py`.
22 |     
23 | ## Key Files
24 | 
25 | `alg_parameters.py` - Training parameters.
26 | 
27 | `driver.py` - Driver of program. Holds global training network for PPO.
28 | 
29 | `episodic_buffer.py` - Defines the episodic buffer used to generate intrinsic rewards.
30 | 
31 | `eval_model.py` - Evaluates trained model.
32 | 
33 | `mapf_gym.py` - Defines the classical Reinforcement Learning environment of Multi-Agent Pathfinding.
34 | 
35 | `model.py` - Defines the neural network-based operation model. 
36 | 
37 | `net.py` - Defines network architecture.
38 | 
39 | `runner.py` - A single process for collecting training data. 
40 | 
41 | 
42 | ## Other Links
43 | 
44 | Fully trained SCRIMP model - https://www.dropbox.com/scl/fo/ekhxyt7gm575kfwaerwb5/h?rlkey=j3cdikwofz0zelj2oci9q97k8&dl=0
45 | 
46 | 
47 | ## Authors
48 | 
49 | Yutong Wang
50 | 
51 | Bairan Xiang
52 | 
53 | Shinan Huang
54 | 
55 | Guillaume Sartoretti
56 | 


--------------------------------------------------------------------------------
/alg_parameters.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | 
  3 | """ Hyperparameters of SCRIMP!"""
  4 | 
  5 | 
  6 | class EnvParameters:
  7 |     N_AGENTS = 8  # number of agents used in training
  8 |     N_ACTIONS = 5
  9 |     EPISODE_LEN = 256  # maximum episode length in training
 10 |     FOV_SIZE = 3
 11 |     WORLD_SIZE = (10, 40)
 12 |     OBSTACLE_PROB = (0.0, 0.5)
 13 |     ACTION_COST = -0.3
 14 |     IDLE_COST = -0.3
 15 |     GOAL_REWARD = 0.0
 16 |     COLLISION_COST = -2
 17 |     BLOCKING_COST = -1
 18 | 
 19 | 
 20 | class TrainingParameters:
 21 |     lr = 1e-5
 22 |     GAMMA = 0.95  # discount factor
 23 |     LAM = 0.95  # For GAE
 24 |     CLIP_RANGE = 0.2
 25 |     MAX_GRAD_NORM = 10
 26 |     ENTROPY_COEF = 0.01
 27 |     IN_VALUE_COEF = 0.08
 28 |     EX_VALUE_COEF = 0.08
 29 |     POLICY_COEF = 10
 30 |     VALID_COEF = 0.5
 31 |     BLOCK_COEF = 0.5
 32 |     N_EPOCHS = 10
 33 |     N_ENVS = 16  # number of processes
 34 |     N_MAX_STEPS = 3e7  # maximum number of time steps used in training
 35 |     N_STEPS = 2 ** 10  # number of time steps per process per data collection
 36 |     MINIBATCH_SIZE = int(2 ** 10)
 37 |     DEMONSTRATION_PROB = 0.1  # imitation learning rate
 38 | 
 39 | 
 40 | class NetParameters:
 41 |     NET_SIZE = 512
 42 |     NUM_CHANNEL = 8  # number of channels of observations -[FOV_SIZE x FOV_SIZEx NUM_CHANNEL]
 43 |     GOAL_REPR_SIZE = 12
 44 |     VECTOR_LEN = 7  # [dx, dy, d total,extrinsic rewards,intrinsic reward, min dist respect to buffer, action t-1]
 45 |     N_POSITION = 1024  # maximum number of unique ID
 46 |     D_MODEL = NET_SIZE  # for input and inner feature of attention
 47 |     D_HIDDEN = 1024  # for feed-forward network
 48 |     N_LAYERS = 1  # number of computation block
 49 |     N_HEAD = 8
 50 |     D_K = 32
 51 |     D_V = 32
 52 | 
 53 | 
 54 | class TieBreakingParameters:
 55 |     DIST_FACTOR = 0.1
 56 | 
 57 | 
 58 | class IntrinsicParameters:
 59 |     K = 3  # threshold for obtaining intrinsic reward
 60 |     CAPACITY = 80
 61 |     ADD_THRESHOLD = 3
 62 |     N_ADD_INTRINSIC = 1e6  # number of steps to start giving intrinsic reward
 63 |     SURROGATE1 = 0.2
 64 |     SURROGATE2 = 1
 65 | 
 66 | 
 67 | class SetupParameters:
 68 |     SEED = 1234
 69 |     USE_GPU_LOCAL = False
 70 |     USE_GPU_GLOBAL = True
 71 |     NUM_GPU = 1
 72 | 
 73 | 
 74 | class RecordingParameters:
 75 |     RETRAIN = False
 76 |     WANDB =  True
 77 |     TENSORBOARD = True
 78 |     TXT_WRITER =  True
 79 |     ENTITY = 'yutong'
 80 |     TIME = datetime.datetime.now().strftime('%d-%m-%y%H%M')
 81 |     EXPERIMENT_PROJECT = 'MAPF'
 82 |     EXPERIMENT_NAME = 'SCRIMP'
 83 |     EXPERIMENT_NOTE = ''
 84 |     SAVE_INTERVAL = 5e5  # interval of saving model
 85 |     BEST_INTERVAL = 0  # interval of saving model with the best performance
 86 |     GIF_INTERVAL = 1e6  # interval of saving gif
 87 |     EVAL_INTERVAL = TrainingParameters.N_ENVS * TrainingParameters.N_STEPS  # interval of evaluating training model0
 88 |     EVAL_EPISODES = 1  # number of episode used in evaluation
 89 |     RECORD_BEST = False
 90 |     MODEL_PATH = './models' + '/' + EXPERIMENT_PROJECT + '/' + EXPERIMENT_NAME + TIME
 91 |     GIFS_PATH = './gifs' + '/' + EXPERIMENT_PROJECT + '/' + EXPERIMENT_NAME + TIME
 92 |     SUMMARY_PATH = './summaries' + '/' + EXPERIMENT_PROJECT + '/' + EXPERIMENT_NAME + TIME
 93 |     TXT_NAME = 'alg.txt'
 94 |     LOSS_NAME = ['all_loss', 'policy_loss', 'policy_entropy', 'critic_loss_in', 'critic_loss_ex', 'valid_loss',
 95 |                  'blocking_loss', 'clipfrac',
 96 |                  'grad_norm', 'advantage']
 97 | 
 98 | 
 99 | all_args = {'N_AGENTS': EnvParameters.N_AGENTS, 'N_ACTIONS': EnvParameters.N_ACTIONS,
100 |             'EPISODE_LEN': EnvParameters.EPISODE_LEN, 'FOV_SIZE': EnvParameters.FOV_SIZE,
101 |             'WORLD_SIZE': EnvParameters.WORLD_SIZE,
102 |             'OBSTACLE_PROB': EnvParameters.OBSTACLE_PROB,
103 |             'ACTION_COST': EnvParameters.ACTION_COST,
104 |             'IDLE_COST': EnvParameters.IDLE_COST, 'GOAL_REWARD': EnvParameters.GOAL_REWARD,
105 |             'COLLISION_COST': EnvParameters.COLLISION_COST,
106 |             'BLOCKING_COST': EnvParameters.BLOCKING_COST,
107 |             'lr': TrainingParameters.lr, 'GAMMA': TrainingParameters.GAMMA, 'LAM': TrainingParameters.LAM,
108 |             'CLIPRANGE': TrainingParameters.CLIP_RANGE, 'MAX_GRAD_NORM': TrainingParameters.MAX_GRAD_NORM,
109 |             'ENTROPY_COEF': TrainingParameters.ENTROPY_COEF,
110 |             'IN_VALUE_COEF': TrainingParameters.IN_VALUE_COEF, 'EX_VALUE_COEF': TrainingParameters.EX_VALUE_COEF,
111 |             'POLICY_COEF': TrainingParameters.POLICY_COEF,
112 |             'VALID_COEF': TrainingParameters.VALID_COEF, 'BLOCK_COEF': TrainingParameters.BLOCK_COEF,
113 |             'N_EPOCHS': TrainingParameters.N_EPOCHS, 'N_ENVS': TrainingParameters.N_ENVS,
114 |             'N_MAX_STEPS': TrainingParameters.N_MAX_STEPS,
115 |             'N_STEPS': TrainingParameters.N_STEPS, 'MINIBATCH_SIZE': TrainingParameters.MINIBATCH_SIZE,
116 |             'DEMONSTRATION_PROB': TrainingParameters.DEMONSTRATION_PROB,
117 |             'NET_SIZE': NetParameters.NET_SIZE, 'NUM_CHANNEL': NetParameters.NUM_CHANNEL,
118 |             'GOAL_REPR_SIZE': NetParameters.GOAL_REPR_SIZE, 'VECTOR_LEN': NetParameters.VECTOR_LEN,
119 |             'N_POSITION': NetParameters.N_POSITION,
120 |             'D_MODEL': NetParameters.D_MODEL, 'D_HIDDEN': NetParameters.D_HIDDEN, 'N_LAYERS': NetParameters.N_LAYERS,
121 |             'N_HEAD': NetParameters.N_HEAD, 'D_K': NetParameters.D_K, 'D_V': NetParameters.D_V,
122 |             'DIST_FACTOR': TieBreakingParameters.DIST_FACTOR, 'K': IntrinsicParameters.K,
123 |             'CAPACITY': IntrinsicParameters.CAPACITY, 'ADD_THRESHOLD': IntrinsicParameters.ADD_THRESHOLD,
124 |             'N_ADD_INTRINSIC': IntrinsicParameters.N_ADD_INTRINSIC,
125 |             'SURROGATE1': IntrinsicParameters.SURROGATE1, 'SURROGATE2': IntrinsicParameters.SURROGATE2,
126 |             'SEED': SetupParameters.SEED, 'USE_GPU_LOCAL': SetupParameters.USE_GPU_LOCAL,
127 |             'USE_GPU_GLOBAL': SetupParameters.USE_GPU_GLOBAL,
128 |             'NUM_GPU': SetupParameters.NUM_GPU, 'RETRAIN': RecordingParameters.RETRAIN,
129 |             'WANDB': RecordingParameters.WANDB,
130 |             'TENSORBOARD': RecordingParameters.TENSORBOARD, 'TXT_WRITER': RecordingParameters.TXT_WRITER,
131 |             'ENTITY': RecordingParameters.ENTITY,
132 |             'TIME': RecordingParameters.TIME, 'EXPERIMENT_PROJECT': RecordingParameters.EXPERIMENT_PROJECT,
133 |             'EXPERIMENT_NAME': RecordingParameters.EXPERIMENT_NAME,
134 |             'EXPERIMENT_NOTE': RecordingParameters.EXPERIMENT_NOTE,
135 |             'SAVE_INTERVAL': RecordingParameters.SAVE_INTERVAL, "BEST_INTERVAL": RecordingParameters.BEST_INTERVAL,
136 |             'GIF_INTERVAL': RecordingParameters.GIF_INTERVAL, 'EVAL_INTERVAL': RecordingParameters.EVAL_INTERVAL,
137 |             'EVAL_EPISODES': RecordingParameters.EVAL_EPISODES, 'RECORD_BEST': RecordingParameters.RECORD_BEST,
138 |             'MODEL_PATH': RecordingParameters.MODEL_PATH, 'GIFS_PATH': RecordingParameters.GIFS_PATH,
139 |             'SUMMARY_PATH': RecordingParameters.SUMMARY_PATH,
140 |             'TXT_NAME': RecordingParameters.TXT_NAME}
141 | 


--------------------------------------------------------------------------------
/driver.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import os.path as osp
  3 | 
  4 | import numpy as np
  5 | import ray
  6 | import setproctitle
  7 | from torch.utils.tensorboard import SummaryWriter
  8 | import torch
  9 | import wandb
 10 | 
 11 | from alg_parameters import *
 12 | from episodic_buffer import EpisodicBuffer
 13 | from mapf_gym import MAPFEnv
 14 | from model import Model
 15 | from runner import Runner
 16 | from util import set_global_seeds, write_to_tensorboard, write_to_wandb, make_gif, reset_env, one_step, update_perf
 17 | 
 18 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 19 | ray.init(num_gpus=SetupParameters.NUM_GPU)
 20 | print("Welcome to SCRIMP on MAPF!\n")
 21 | 
 22 | 
 23 | def main():
 24 |     """main code"""
 25 |     # preparing for training
 26 |     if RecordingParameters.RETRAIN:
 27 |         restore_path = './local_model'
 28 |         net_path_checkpoint = restore_path + "/net_checkpoint.pkl"
 29 |         net_dict = torch.load(net_path_checkpoint)
 30 | 
 31 |     if RecordingParameters.WANDB:
 32 |         if RecordingParameters.RETRAIN:
 33 |             wandb_id = None
 34 |         else:
 35 |             wandb_id = wandb.util.generate_id()
 36 |         wandb.init(project=RecordingParameters.EXPERIMENT_PROJECT,
 37 |                    name=RecordingParameters.EXPERIMENT_NAME,
 38 |                    entity=RecordingParameters.ENTITY,
 39 |                    notes=RecordingParameters.EXPERIMENT_NOTE,
 40 |                    config=all_args,
 41 |                    id=wandb_id,
 42 |                    resume='allow')
 43 |         print('id is:{}'.format(wandb_id))
 44 |         print('Launching wandb...\n')
 45 | 
 46 |     if RecordingParameters.TENSORBOARD:
 47 |         if RecordingParameters.RETRAIN:
 48 |             summary_path = ''
 49 |         else:
 50 |             summary_path = RecordingParameters.SUMMARY_PATH
 51 |         if not os.path.exists(summary_path):
 52 |             os.makedirs(summary_path)
 53 |         global_summary = SummaryWriter(summary_path)
 54 |         print('Launching tensorboard...\n')
 55 | 
 56 |         if RecordingParameters.TXT_WRITER:
 57 |             txt_path = summary_path + '/' + RecordingParameters.TXT_NAME
 58 |             with open(txt_path, "w") as f:
 59 |                 f.write(str(all_args))
 60 |             print('Logging txt...\n')
 61 | 
 62 |     setproctitle.setproctitle(
 63 |         RecordingParameters.EXPERIMENT_PROJECT + RecordingParameters.EXPERIMENT_NAME + "@" + RecordingParameters.ENTITY)
 64 |     set_global_seeds(SetupParameters.SEED)
 65 | 
 66 |     # create classes
 67 |     global_device = torch.device('cuda') if SetupParameters.USE_GPU_GLOBAL else torch.device('cpu')
 68 |     local_device = torch.device('cuda') if SetupParameters.USE_GPU_LOCAL else torch.device('cpu')
 69 |     global_model = Model(0, global_device, True)
 70 | 
 71 |     if RecordingParameters.RETRAIN:
 72 |         global_model.network.load_state_dict(net_dict['model'])
 73 |         global_model.net_optimizer.load_state_dict(net_dict['optimizer'])
 74 | 
 75 |     envs = [Runner.remote(i + 1) for i in range(TrainingParameters.N_ENVS)]
 76 |     eval_env = MAPFEnv(num_agents=EnvParameters.N_AGENTS)
 77 |     eval_memory = EpisodicBuffer(0, EnvParameters.N_AGENTS)
 78 | 
 79 |     if RecordingParameters.RETRAIN:
 80 |         curr_steps = net_dict["step"]
 81 |         curr_episodes = net_dict["episode"]
 82 |         best_perf = net_dict["reward"]
 83 |     else:
 84 |         curr_steps = curr_episodes = best_perf = 0
 85 | 
 86 |     update_done = True
 87 |     demon = True
 88 |     job_list = []
 89 |     last_test_t = -RecordingParameters.EVAL_INTERVAL - 1
 90 |     last_model_t = -RecordingParameters.SAVE_INTERVAL - 1
 91 |     last_best_t = -RecordingParameters.BEST_INTERVAL - 1
 92 |     last_gif_t = -RecordingParameters.GIF_INTERVAL - 1
 93 | 
 94 |     # start training
 95 |     try:
 96 |         while curr_steps < TrainingParameters.N_MAX_STEPS:
 97 |             if update_done:
 98 |                 # start a data collection
 99 |                 if global_device != local_device:
100 |                     net_weights = global_model.network.to(local_device).state_dict()
101 |                     global_model.network.to(global_device)
102 |                 else:
103 |                     net_weights = global_model.network.state_dict()
104 |                 net_weights_id = ray.put(net_weights)
105 |                 curr_steps_id = ray.put(curr_steps)
106 |                 demon_probs = np.random.rand()
107 |                 if demon_probs < TrainingParameters.DEMONSTRATION_PROB:
108 |                     demon = True
109 |                     for i, env in enumerate(envs):
110 |                         job_list.append(env.imitation.remote(net_weights_id, curr_steps_id))
111 |                 else:
112 |                     demon = False
113 |                     for i, env in enumerate(envs):
114 |                         job_list.append(env.run.remote(net_weights_id, curr_steps_id))
115 | 
116 |             # get data from multiple processes
117 |             done_id, job_list = ray.wait(job_list, num_returns=TrainingParameters.N_ENVS)
118 |             update_done = True if job_list == [] else False
119 |             done_len = len(done_id)
120 |             job_results = ray.get(done_id)
121 |             if demon:
122 |                 # get imitation learning data
123 |                 mb_obs, mb_vector, mb_actions, mb_hidden_state = [], [], [], []
124 |                 mb_message = []
125 |                 for results in range(done_len):
126 |                     mb_obs.append(job_results[results][0])
127 |                     mb_vector.append(job_results[results][1])
128 |                     mb_actions.append(job_results[results][2])
129 |                     mb_hidden_state.append(job_results[results][3])
130 |                     mb_message.append(job_results[results][4])
131 |                     curr_episodes += job_results[results][-2]
132 |                     curr_steps += job_results[results][-1]
133 |                 mb_obs = np.concatenate(mb_obs, axis=0)
134 |                 mb_vector = np.concatenate(mb_vector, axis=0)
135 |                 mb_hidden_state = np.concatenate(mb_hidden_state, axis=0)
136 |                 mb_actions = np.concatenate(mb_actions, axis=0)
137 |                 mb_message = np.concatenate(mb_message, axis=0)
138 | 
139 |                 # training of imitation learning
140 |                 mb_imitation_loss = []
141 |                 for start in range(0, np.shape(mb_obs)[0], TrainingParameters.MINIBATCH_SIZE):
142 |                     end = start + TrainingParameters.MINIBATCH_SIZE
143 |                     slices = (arr[start:end] for arr in
144 |                               (mb_obs, mb_vector, mb_actions, mb_hidden_state, mb_message))
145 |                     mb_imitation_loss.append(global_model.imitation_train(*slices))
146 |                 mb_imitation_loss = np.nanmean(mb_imitation_loss, axis=0)
147 | 
148 |                 # record training result
149 |                 if RecordingParameters.WANDB:
150 |                     write_to_wandb(curr_steps, imitation_loss=mb_imitation_loss, evaluate=False)
151 |                 if RecordingParameters.TENSORBOARD:
152 |                     write_to_tensorboard(global_summary, curr_steps, imitation_loss=mb_imitation_loss, evaluate=False)
153 |             else:
154 |                 # get reinforcement learning data
155 |                 curr_steps += done_len * TrainingParameters.N_STEPS
156 |                 mb_obs, mb_vector, mb_returns_in, mb_returns_ex, mb_returns_all, mb_values_in, \
157 |                     mb_values_ex, mb_values_all, mb_actions, mb_ps, mb_hidden_state, mb_train_valid,\
158 |                     mb_blocking = [], [], [], [], [], [], [], [], [], [], [], [], []
159 |                 mb_message = []
160 |                 performance_dict = {'per_r': [], 'per_in_r': [], 'per_ex_r': [], 'per_valid_rate': [],
161 |                                     'per_episode_len': [], 'per_block': [],
162 |                                     'per_leave_goal': [], 'per_final_goals': [], 'per_half_goals': [],
163 |                                     'per_block_acc': [], 'per_max_goals': [], 'per_num_collide': [],
164 |                                     'rewarded_rate': []}
165 |                 for results in range(done_len):
166 |                     mb_obs.append(job_results[results][0])
167 |                     mb_vector.append(job_results[results][1])
168 |                     mb_returns_in.append(job_results[results][2])
169 |                     mb_returns_ex.append(job_results[results][3])
170 |                     mb_returns_all.append(job_results[results][4])
171 |                     mb_values_in.append(job_results[results][5])
172 |                     mb_values_ex.append(job_results[results][6])
173 |                     mb_values_all.append(job_results[results][7])
174 |                     mb_actions.append(job_results[results][8])
175 |                     mb_ps.append(job_results[results][9])
176 |                     mb_hidden_state.append(job_results[results][10])
177 |                     mb_train_valid.append(job_results[results][11])
178 |                     mb_blocking.append(job_results[results][12])
179 |                     mb_message.append(job_results[results][13])
180 |                     curr_episodes += job_results[results][-2]
181 |                     for i in performance_dict.keys():
182 |                         performance_dict[i].append(np.nanmean(job_results[results][-1][i]))
183 | 
184 |                 for i in performance_dict.keys():
185 |                     performance_dict[i] = np.nanmean(performance_dict[i])
186 | 
187 |                 mb_obs = np.concatenate(mb_obs, axis=0)
188 |                 mb_vector = np.concatenate(mb_vector, axis=0)
189 |                 mb_returns_in = np.concatenate(mb_returns_in, axis=0)
190 |                 mb_returns_ex = np.concatenate(mb_returns_ex, axis=0)
191 |                 mb_returns_all = np.concatenate(mb_returns_all, axis=0)
192 |                 mb_values_in = np.concatenate(mb_values_in, axis=0)
193 |                 mb_values_ex = np.concatenate(mb_values_ex, axis=0)
194 |                 mb_values_all = np.concatenate(mb_values_all, axis=0)
195 |                 mb_actions = np.concatenate(mb_actions, axis=0)
196 |                 mb_ps = np.concatenate(mb_ps, axis=0)
197 |                 mb_hidden_state = np.concatenate(mb_hidden_state, axis=0)
198 |                 mb_train_valid = np.concatenate(mb_train_valid, axis=0)
199 |                 mb_blocking = np.concatenate(mb_blocking, axis=0)
200 |                 mb_message = np.concatenate(mb_message, axis=0)
201 | 
202 |                 # training of reinforcement learning
203 |                 mb_loss = []
204 |                 inds = np.arange(done_len * TrainingParameters.N_STEPS)
205 |                 for _ in range(TrainingParameters.N_EPOCHS):
206 |                     np.random.shuffle(inds)
207 |                     for start in range(0, done_len * TrainingParameters.N_STEPS, TrainingParameters.MINIBATCH_SIZE):
208 |                         end = start + TrainingParameters.MINIBATCH_SIZE
209 |                         mb_inds = inds[start:end]
210 |                         slices = (arr[mb_inds] for arr in
211 |                                   (mb_obs, mb_vector, mb_returns_in, mb_returns_ex, mb_returns_all, mb_values_in,
212 |                                    mb_values_ex, mb_values_all, mb_actions, mb_ps, mb_hidden_state,
213 |                                    mb_train_valid, mb_blocking, mb_message))
214 |                         mb_loss.append(global_model.train(*slices))
215 | 
216 |                 # record training result
217 |                 if RecordingParameters.WANDB:
218 |                     write_to_wandb(curr_steps, performance_dict, mb_loss, evaluate=False)
219 |                 if RecordingParameters.TENSORBOARD:
220 |                     write_to_tensorboard(global_summary, curr_steps, performance_dict, mb_loss, evaluate=False)
221 | 
222 |             if (curr_steps - last_test_t) / RecordingParameters.EVAL_INTERVAL >= 1.0:
223 |                 # if save gif
224 |                 if (curr_steps - last_gif_t) / RecordingParameters.GIF_INTERVAL >= 1.0:
225 |                     save_gif = True
226 |                     last_gif_t = curr_steps
227 |                 else:
228 |                     save_gif = False
229 | 
230 |                 # evaluate training model
231 |                 last_test_t = curr_steps
232 |                 with torch.no_grad():
233 |                     # greedy_eval_performance_dict = evaluate(eval_env,eval_memory, global_model,
234 |                     # global_device, save_gif, curr_steps, True)
235 |                     eval_performance_dict = evaluate(eval_env, eval_memory, global_model, global_device, save_gif,
236 |                                                      curr_steps, False)
237 |                 # record evaluation result
238 |                 if RecordingParameters.WANDB:
239 |                     # write_to_wandb(curr_steps, greedy_eval_performance_dict, evaluate=True, greedy=True)
240 |                     write_to_wandb(curr_steps, eval_performance_dict, evaluate=True, greedy=False)
241 |                 if RecordingParameters.TENSORBOARD:
242 |                     # write_to_tensorboard(global_summary, curr_steps, greedy_eval_performance_dict, evaluate=True,
243 |                     #                      greedy=True)
244 |                     write_to_tensorboard(global_summary, curr_steps, eval_performance_dict, evaluate=True, greedy=False,
245 |                                          )
246 | 
247 |                 print('episodes: {}, step: {},episode reward: {}, final goals: {} \n'.format(
248 |                     curr_episodes, curr_steps, eval_performance_dict['per_r'],
249 |                     eval_performance_dict['per_final_goals']))
250 |                 # save model with the best performance
251 |                 if RecordingParameters.RECORD_BEST:
252 |                     if eval_performance_dict['per_r'] > best_perf and (
253 |                             curr_steps - last_best_t) / RecordingParameters.BEST_INTERVAL >= 1.0:
254 |                         best_perf = eval_performance_dict['per_r']
255 |                         last_best_t = curr_steps
256 |                         print('Saving best model \n')
257 |                         model_path = osp.join(RecordingParameters.MODEL_PATH, 'best_model')
258 |                         if not os.path.exists(model_path):
259 |                             os.makedirs(model_path)
260 |                         path_checkpoint = model_path + "/net_checkpoint.pkl"
261 |                         net_checkpoint = {"model": global_model.network.state_dict(),
262 |                                           "optimizer": global_model.net_optimizer.state_dict(),
263 |                                           "step": curr_steps,
264 |                                           "episode": curr_episodes,
265 |                                           "reward": best_perf}
266 |                         torch.save(net_checkpoint, path_checkpoint)
267 | 
268 |             # save model
269 |             if (curr_steps - last_model_t) / RecordingParameters.SAVE_INTERVAL >= 1.0:
270 |                 last_model_t = curr_steps
271 |                 print('Saving Model !\n')
272 |                 model_path = osp.join(RecordingParameters.MODEL_PATH, '%.5i' % curr_steps)
273 |                 os.makedirs(model_path)
274 |                 path_checkpoint = model_path + "/net_checkpoint.pkl"
275 |                 net_checkpoint = {"model": global_model.network.state_dict(),
276 |                                   "optimizer": global_model.net_optimizer.state_dict(),
277 |                                   "step": curr_steps,
278 |                                   "episode": curr_episodes,
279 |                                   "reward": eval_performance_dict['per_r']}
280 |                 torch.save(net_checkpoint, path_checkpoint)
281 | 
282 |     except KeyboardInterrupt:
283 |         print("CTRL-C pressed. killing remote workers")
284 |     finally:
285 |         # save final model
286 |         print('Saving Final Model !\n')
287 |         model_path = RecordingParameters.MODEL_PATH + '/final'
288 |         os.makedirs(model_path)
289 |         path_checkpoint = model_path + "/net_checkpoint.pkl"
290 |         net_checkpoint = {"model": global_model.network.state_dict(),
291 |                           "optimizer": global_model.net_optimizer.state_dict(),
292 |                           "step": curr_steps,
293 |                           "episode": curr_episodes,
294 |                           "reward": eval_performance_dict['per_r']}
295 |         torch.save(net_checkpoint, path_checkpoint)
296 |         global_summary.close()
297 |         # killing
298 |         for e in envs:
299 |             ray.kill(e)
300 |         if RecordingParameters.WANDB:
301 |             wandb.finish()
302 | 
303 | 
304 | def evaluate(eval_env, episodic_buffer, model, device, save_gif, curr_steps, greedy):
305 |     """Evaluate Model."""
306 |     eval_performance_dict = {'per_r': [], 'per_ex_r': [], 'per_in_r': [], 'per_valid_rate': [], 'per_episode_len': [],
307 |                              'per_block': [], 'per_leave_goal': [], 'per_final_goals': [], 'per_half_goals': [],
308 |                              'per_block_acc': [], 'per_max_goals': [], 'per_num_collide': [], 'rewarded_rate': []}
309 |     episode_frames = []
310 | 
311 |     for i in range(RecordingParameters.EVAL_EPISODES):
312 |         num_agent = EnvParameters.N_AGENTS
313 | 
314 |         # reset environment and buffer
315 |         message = torch.zeros((1, num_agent, NetParameters.NET_SIZE)).to(device)
316 |         hidden_state = (torch.zeros((num_agent, NetParameters.NET_SIZE // 2)).to(device),
317 |                         torch.zeros((num_agent, NetParameters.NET_SIZE // 2)).to(device))
318 | 
319 |         done, valid_actions, obs, vector, _ = reset_env(eval_env, num_agent)
320 |         episodic_buffer.reset(curr_steps, num_agent)
321 |         new_xy = eval_env.get_positions()
322 |         episodic_buffer.batch_add(new_xy)
323 | 
324 |         one_episode_perf = {'num_step': 0, 'episode_reward': 0, 'invalid': 0, 'block': 0,
325 |                             'num_leave_goal': 0, 'wrong_blocking': 0, 'num_collide': 0, 'reward_count': 0,
326 |                             'ex_reward': 0, 'in_reward': 0}
327 |         if save_gif:
328 |             episode_frames.append(eval_env._render(mode='rgb_array', screen_width=900, screen_height=900))
329 | 
330 |         # stepping
331 |         while not done:
332 |             # predict
333 |             actions, pre_block, hidden_state, num_invalid, v_all, ps, message = model.evaluate(obs, vector,
334 |                                                                                                valid_actions,
335 |                                                                                                hidden_state,
336 |                                                                                                greedy,
337 |                                                                                                episodic_buffer.no_reward,
338 |                                                                                                message, num_agent)
339 |             one_episode_perf['invalid'] += num_invalid
340 | 
341 |             # move
342 |             rewards, valid_actions, obs, vector, _, done, _, num_on_goals, one_episode_perf, max_on_goals, \
343 |                 _, _, on_goal = one_step(eval_env, one_episode_perf, actions, pre_block, model, v_all, hidden_state,
344 |                                          ps, episodic_buffer.no_reward, message, episodic_buffer, num_agent)
345 | 
346 |             new_xy = eval_env.get_positions()
347 |             processed_rewards, be_rewarded, intrinsic_reward, min_dist = episodic_buffer.if_reward(new_xy, rewards,
348 |                                                                                                    done, on_goal)
349 |             one_episode_perf['reward_count'] += be_rewarded
350 |             vector[:, :, 3] = rewards
351 |             vector[:, :, 4] = intrinsic_reward
352 |             vector[:, :, 5] = min_dist
353 | 
354 |             if save_gif:
355 |                 episode_frames.append(eval_env._render(mode='rgb_array', screen_width=900, screen_height=900))
356 | 
357 |             one_episode_perf['episode_reward'] += np.sum(processed_rewards)
358 |             one_episode_perf['ex_reward'] += np.sum(rewards)
359 |             one_episode_perf['in_reward'] += np.sum(intrinsic_reward)
360 |             if one_episode_perf['num_step'] == EnvParameters.EPISODE_LEN // 2:
361 |                 eval_performance_dict['per_half_goals'].append(num_on_goals)
362 | 
363 |             if done:
364 |                 # save gif
365 |                 if save_gif:
366 |                     if not os.path.exists(RecordingParameters.GIFS_PATH):
367 |                         os.makedirs(RecordingParameters.GIFS_PATH)
368 |                     images = np.array(episode_frames)
369 |                     make_gif(images,
370 |                              '{}/steps_{:d}_reward{:.1f}_final_goals{:.1f}_greedy{:d}.gif'.format(
371 |                                  RecordingParameters.GIFS_PATH,
372 |                                  curr_steps, one_episode_perf[
373 |                                      'episode_reward'],
374 |                                  num_on_goals, greedy))
375 |                     save_gif = False
376 | 
377 |                 eval_performance_dict = update_perf(one_episode_perf, eval_performance_dict, num_on_goals, max_on_goals,
378 |                                                     num_agent)
379 | 
380 |     # average performance of multiple episodes
381 |     for i in eval_performance_dict.keys():
382 |         eval_performance_dict[i] = np.nanmean(eval_performance_dict[i])
383 | 
384 |     return eval_performance_dict
385 | 
386 | 
387 | if __name__ == "__main__":
388 |     main()
389 | 


--------------------------------------------------------------------------------
/episodic_buffer.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | 
  3 | import numpy as np
  4 | 
  5 | from alg_parameters import *
  6 | 
  7 | 
  8 | class EpisodicBuffer(object):
  9 |     """create a parallel episodic buffer for all agents"""
 10 | 
 11 |     def __init__(self, total_step, num_agent):
 12 |         """initialization"""
 13 |         self._capacity = int(IntrinsicParameters.CAPACITY)
 14 |         self.xy_memory = np.zeros((self._capacity, num_agent, 2))
 15 |         self._count = np.zeros(num_agent, dtype=np.int64)
 16 |         self.num_agent = num_agent
 17 |         self.min_step = IntrinsicParameters.N_ADD_INTRINSIC
 18 |         self.surrogate1 = IntrinsicParameters.SURROGATE1
 19 |         self.surrogate2 = IntrinsicParameters.SURROGATE2
 20 |         self.no_reward = False
 21 |         if total_step < self.min_step:
 22 |             self.no_reward = True
 23 | 
 24 |     @property
 25 |     def capacity(self):
 26 |         return self._capacity
 27 | 
 28 |     def id_len(self, id_index):
 29 |         """current size"""
 30 |         return min(self._count[id_index], self._capacity)
 31 | 
 32 |     def reset(self, total_step, num_agent):
 33 |         """reset the buffer"""
 34 |         self.num_agent = num_agent
 35 |         self.no_reward = False
 36 |         if total_step < self.min_step:
 37 |             self.no_reward = True
 38 |         self._count = np.zeros(self.num_agent, dtype=np.int64)
 39 |         self.xy_memory = np.zeros((self._capacity, self.num_agent, 2))
 40 | 
 41 |     def add(self, xy_position, id_index):
 42 |         """add an position to the buffer"""
 43 |         if self._count[id_index] >= self._capacity:
 44 |             index = np.random.randint(low=0, high=self._capacity)
 45 |         else:
 46 |             index = self._count[id_index]
 47 | 
 48 |         self.xy_memory[index, id_index] = xy_position
 49 |         self._count[id_index] += 1
 50 | 
 51 |     def batch_add(self, xy_position):
 52 |         """add position batch to the buffer"""
 53 |         self.xy_memory[0] = xy_position
 54 |         self._count += 1
 55 | 
 56 |     def if_reward(self, new_xy, rewards, done, on_goal):
 57 |         """familiarity between the current position and the ones from the buffer"""
 58 |         processed_rewards = np.zeros((1, self.num_agent))
 59 |         bonus = np.zeros((1, self.num_agent))
 60 |         reward_count = 0
 61 |         min_dist = np.zeros((1, self.num_agent))
 62 | 
 63 |         for i in range(self.num_agent):
 64 |             size = self.id_len(i)
 65 |             new_xy_array = np.array([new_xy[i]] * int(size))
 66 |             dist = np.sqrt(np.sum(np.square(new_xy_array - self.xy_memory[:size, i]), axis=-1))
 67 |             novelty = np.asarray(dist < random.randint(1, IntrinsicParameters.K), dtype=np.int64)
 68 | 
 69 |             aggregated = np.max(novelty)
 70 |             bonus[:, i] = np.asarray([0.0 if done or on_goal[i] else self.surrogate2 - aggregated])
 71 |             scale_factor = self.surrogate1
 72 |             if self.no_reward:
 73 |                 scale_factor = 0.0
 74 |             intrinsic_reward = scale_factor * bonus[:, i]
 75 |             processed_rewards[:, i] = rewards[:, i] + intrinsic_reward
 76 |             if all(intrinsic_reward != 0):
 77 |                 reward_count += 1
 78 | 
 79 |             min_dist[:, i] = np.min(dist)
 80 |             if min_dist[:, i] >= IntrinsicParameters.ADD_THRESHOLD:
 81 |                 self.add(new_xy[i], i)
 82 | 
 83 |         return processed_rewards, reward_count, bonus, min_dist
 84 | 
 85 |     def image_if_reward(self, new_xy, done, on_goal):
 86 |         """similar to if_reward but it is only used when breaking a tie"""
 87 |         bonus = np.zeros((1, self.num_agent))
 88 |         min_dist = np.zeros((1, self.num_agent))
 89 | 
 90 |         for i in range(self.num_agent):
 91 |             size = self.id_len(i)
 92 |             new_xy_array = np.array([new_xy[i]] * int(size))
 93 |             dist = np.sqrt(np.sum(np.square(new_xy_array - self.xy_memory[:size, i]), axis=-1))
 94 |             novelty = np.asarray(dist < random.randint(1, IntrinsicParameters.K), dtype=np.int64)
 95 | 
 96 |             aggregated = np.max(novelty)
 97 |             bonus[:, i] = np.asarray([0.0 if done or on_goal[i] else self.surrogate2 - aggregated])
 98 |             min_dist[:, i] = np.min(dist)
 99 | 
100 |         return bonus, min_dist
101 | 


--------------------------------------------------------------------------------
/eval_model.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | import wandb
  6 | 
  7 | from alg_parameters import *
  8 | from episodic_buffer import EpisodicBuffer
  9 | from mapf_gym import MAPFEnv
 10 | from model import Model
 11 | from util import reset_env, make_gif, set_global_seeds
 12 | 
 13 | NUM_TIMES = 100
 14 | CASE = [[8, 10, 0], [8, 10, 0.15], [8, 10, 0.3], [16, 20, 0.0], [16, 20, 0.15], [16, 20, 0.3], [32, 30, 0.0],
 15 |         [32, 30, 0.15], [32, 30, 0.3], [64, 40, 0.0], [64, 40, 0.15], [64, 40, 0.3], [128, 40, 0.0],
 16 |         [128, 40, 0.15], [128, 40, 0.3]]
 17 | set_global_seeds(SetupParameters.SEED)
 18 | 
 19 | 
 20 | def one_step(env0, actions, model0, pre_value, input_state, ps, one_episode_perf, message, episodic_buffer0):
 21 |     obs, vector, reward, done, _, on_goal, _, _, _, _, _, max_on_goal, num_collide, _, modify_actions = env0.joint_step(
 22 |         actions, one_episode_perf['episode_len'], model0, pre_value, input_state, ps, no_reward=False, message=message,
 23 |         episodic_buffer=episodic_buffer0)
 24 | 
 25 |     one_episode_perf['collide'] += num_collide
 26 |     vector[:, :, -1] = modify_actions
 27 |     one_episode_perf['episode_len'] += 1
 28 |     return reward, obs, vector, done, one_episode_perf, max_on_goal, on_goal
 29 | 
 30 | 
 31 | def evaluate(eval_env, model0, device, episodic_buffer0, num_agent, save_gif0):
 32 |     """Evaluate Model."""
 33 |     one_episode_perf = {'episode_len': 0, 'max_goals': 0, 'collide': 0, 'success_rate': 0}
 34 |     episode_frames = []
 35 | 
 36 |     done, _, obs, vector, _ = reset_env(eval_env, num_agent)
 37 | 
 38 |     episodic_buffer0.reset(2e6, num_agent)
 39 |     new_xy = eval_env.get_positions()
 40 |     episodic_buffer0.batch_add(new_xy)
 41 | 
 42 |     message = torch.zeros((1, num_agent, NetParameters.NET_SIZE)).to(torch.device('cpu'))
 43 |     hidden_state = (torch.zeros((num_agent, NetParameters.NET_SIZE // 2)).to(device),
 44 |                     torch.zeros((num_agent, NetParameters.NET_SIZE // 2)).to(device))
 45 | 
 46 |     if save_gif0:
 47 |         episode_frames.append(eval_env._render(mode='rgb_array', screen_width=900, screen_height=900))
 48 | 
 49 |     while not done:
 50 |         actions, hidden_state, v_all, ps, message = model0.final_evaluate(obs, vector, hidden_state, message, num_agent,
 51 |                                                                           greedy=False)
 52 | 
 53 |         rewards, obs, vector, done, one_episode_perf, max_on_goals, on_goal = one_step(eval_env, actions, model0, v_all,
 54 |                                                                                        hidden_state, ps,
 55 |                                                                                        one_episode_perf, message,
 56 |                                                                                        episodic_buffer0)
 57 |         new_xy = eval_env.get_positions()
 58 |         processed_rewards, _, intrinsic_reward, min_dist = episodic_buffer0.if_reward(new_xy, rewards, done, on_goal)
 59 | 
 60 |         vector[:, :, 3] = rewards
 61 |         vector[:, :, 4] = intrinsic_reward
 62 |         vector[:, :, 5] = min_dist
 63 | 
 64 |         if save_gif0:
 65 |             episode_frames.append(eval_env._render(mode='rgb_array', screen_width=900, screen_height=900))
 66 | 
 67 |         if done:
 68 |             if one_episode_perf['episode_len'] < EnvParameters.EPISODE_LEN - 1:
 69 |                 one_episode_perf['success_rate'] = 1
 70 |             one_episode_perf['max_goals'] = max_on_goals
 71 |             one_episode_perf['collide'] = one_episode_perf['collide'] / (
 72 |                     (one_episode_perf['episode_len'] + 1) * num_agent)
 73 |             if save_gif0:
 74 |                 if not os.path.exists(RecordingParameters.GIFS_PATH):
 75 |                     os.makedirs(RecordingParameters.GIFS_PATH)
 76 |                 images = np.array(episode_frames)
 77 |                 make_gif(images, '{}/evaluation.gif'.format(
 78 |                     RecordingParameters.GIFS_PATH))
 79 | 
 80 |     return one_episode_perf
 81 | 
 82 | 
 83 | if __name__ == "__main__":
 84 |     # download trained model0
 85 |     model_path = './final'
 86 |     path_checkpoint = model_path + "/net_checkpoint.pkl"
 87 |     model = Model(0, torch.device('cpu'))
 88 |     model.network.load_state_dict(torch.load(path_checkpoint)['model'])
 89 | 
 90 |     # recording
 91 |     wandb_id = wandb.util.generate_id()
 92 |     wandb.init(project='MAPF_evaluation',
 93 |                name='evaluation_global_SCRIMP',
 94 |                entity=RecordingParameters.ENTITY,
 95 |                notes=RecordingParameters.EXPERIMENT_NOTE,
 96 |                config=all_args,
 97 |                id=wandb_id,
 98 |                resume='allow')
 99 |     print('id is:{}'.format(wandb_id))
100 |     print('Launching wandb...\n')
101 |     save_gif = True
102 | 
103 |     # start evaluation
104 |     for k in CASE:
105 |         # remember to modify the corresponding code (size,prob) in the 'mapf_gym.py'
106 |         env = MAPFEnv(num_agents=k[0], size=k[1], prob=k[2])
107 |         episodic_buffer = EpisodicBuffer(2e6, k[0])
108 | 
109 |         all_perf_dict = {'episode_len': [], 'max_goals': [], 'collide': [], 'success_rate': []}
110 |         all_perf_dict_std = {'episode_len': [], 'max_goals': [], 'collide': []}
111 |         print('agent: {}, world: {}, obstacle: {}'.format(k[0], k[1], k[2]))
112 | 
113 |         for j in range(NUM_TIMES):
114 |             eval_performance_dict = evaluate(env, model, torch.device('cpu'), episodic_buffer, k[0], save_gif)
115 |             save_gif = False  # here we only record gif once
116 |             if j % 20 == 0:
117 |                 print(j)
118 | 
119 |             for i in eval_performance_dict.keys():  # for one episode
120 |                 if i == 'episode_len':
121 |                     if eval_performance_dict['success_rate'] == 1:
122 |                         all_perf_dict[i].append(eval_performance_dict[i])  # only record success episode
123 |                     else:
124 |                         continue
125 |                 else:
126 |                     all_perf_dict[i].append(eval_performance_dict[i])
127 | 
128 |         for i in all_perf_dict.keys():  # for all episodes
129 |             if i != 'success_rate':
130 |                 all_perf_dict_std[i] = np.std(all_perf_dict[i])
131 |             all_perf_dict[i] = np.nanmean(all_perf_dict[i])
132 | 
133 |         print('EL: {}, MR: {}, CO: {},SR:{}'.format(round(all_perf_dict['episode_len'], 2),
134 |                                                     round(all_perf_dict['max_goals'], 2),
135 |                                                     round(all_perf_dict['collide'] * 100, 2),
136 |                                                     all_perf_dict['success_rate'] * 100))
137 |         print('EL_STD: {}, MR_STD: {}, CO_STD: {}'.format(round(all_perf_dict_std['episode_len'], 2),
138 |                                                           round(all_perf_dict_std['max_goals'], 2),
139 |                                                           round(all_perf_dict_std['collide'] * 100, 2)))
140 |         print('-----------------------------------------------------------------------------------------------')
141 | 
142 |     print('finished')
143 |     wandb.finish()
144 | 


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn.functional as F
  4 | import torch.optim as optim
  5 | from torch.cuda.amp.autocast_mode import autocast
  6 | from torch.cuda.amp.grad_scaler import GradScaler
  7 | 
  8 | from alg_parameters import *
  9 | from net import SCRIMPNet
 10 | 
 11 | 
 12 | class Model(object):
 13 |     """model0 of agents"""
 14 | 
 15 |     def __init__(self, env_id, device, global_model=False):
 16 |         """initialization"""
 17 |         self.ID = env_id
 18 |         self.device = device
 19 |         self.network = SCRIMPNet().to(device)  # neural network
 20 |         if global_model:
 21 |             self.net_optimizer = optim.Adam(self.network.parameters(), lr=TrainingParameters.lr)
 22 |             # self.multi_gpu_net = torch.nn.DataParallel(self.network) # training on multiple GPU
 23 |             self.net_scaler = GradScaler()  # automatic mixed precision
 24 | 
 25 |     def step(self, observation, vector, valid_action, input_state, no_reward, message, num_agent):
 26 |         """using neural network in training for prediction"""
 27 |         num_invalid = 0
 28 |         observation = torch.from_numpy(observation).to(self.device)
 29 |         vector = torch.from_numpy(vector).to(self.device)
 30 |         ps, v_in, v_ex, block, _, output_state, _, message = self.network(observation, vector, input_state,
 31 |                                                                           message)
 32 | 
 33 |         actions = np.zeros(num_agent)
 34 |         ps = np.squeeze(ps.cpu().detach().numpy())
 35 |         v_in = v_in.cpu().detach().numpy()  # intrinsic state values
 36 |         v_ex = v_ex.cpu().detach().numpy()  # extrinsic  state values
 37 |         scale_factor = IntrinsicParameters.SURROGATE1
 38 |         if no_reward:
 39 |             scale_factor = 0.0
 40 |         v_all = v_ex + scale_factor * v_in  # total state values
 41 |         block = np.squeeze(block.cpu().detach().numpy())
 42 | 
 43 |         for i in range(num_agent):
 44 |             if np.argmax(ps[i], axis=-1) not in valid_action[i]:
 45 |                 num_invalid += 1
 46 |             # choose action from complete action distribution
 47 |             actions[i] = np.random.choice(range(EnvParameters.N_ACTIONS), p=ps[i].ravel())
 48 |         return actions, ps, v_in, v_ex, v_all, block, output_state, num_invalid, message
 49 | 
 50 |     def evaluate(self, observation, vector, valid_action, input_state, greedy, no_reward, message, num_agent):
 51 |         """using neural network in evaluations of training code for prediction"""
 52 |         num_invalid = 0
 53 |         eval_action = np.zeros(num_agent)
 54 |         observation = torch.from_numpy(np.asarray(observation)).to(self.device)
 55 |         vector = torch.from_numpy(vector).to(self.device)
 56 |         ps, v_in, v_ex, block, _, output_state, _, message = self.network(observation, vector, input_state, message)
 57 | 
 58 |         ps = np.squeeze(ps.cpu().detach().numpy())
 59 |         block = np.squeeze(block.cpu().detach().numpy())
 60 |         greedy_action = np.argmax(ps, axis=-1)
 61 |         scale_factor = IntrinsicParameters.SURROGATE1
 62 |         if no_reward:
 63 |             scale_factor = 0.0
 64 |         v_all = v_ex + scale_factor * v_in
 65 |         v_all = v_all.cpu().detach().numpy()
 66 | 
 67 |         for i in range(num_agent):
 68 |             if greedy_action[i] not in valid_action[i]:
 69 |                 num_invalid += 1
 70 |             if not greedy:
 71 |                 eval_action[i] = np.random.choice(range(EnvParameters.N_ACTIONS), p=ps[i].ravel())
 72 |         if greedy:
 73 |             eval_action = greedy_action
 74 |         return eval_action, block, output_state, num_invalid, v_all, ps, message
 75 | 
 76 |     def value(self, obs, vector, input_state, no_reward, message):
 77 |         """using neural network to predict state values"""
 78 |         obs = torch.from_numpy(obs).to(self.device)
 79 |         vector = torch.from_numpy(vector).to(self.device)
 80 |         _, v_in, v_ex, _, _, _, _, _ = self.network(obs, vector, input_state, message)
 81 |         v_in = v_in.cpu().detach().numpy()
 82 |         v_ex = v_ex.cpu().detach().numpy()
 83 | 
 84 |         scale_factor = IntrinsicParameters.SURROGATE1
 85 |         if no_reward:
 86 |             scale_factor = 0.0
 87 |         v_all = v_ex + scale_factor * v_in
 88 |         return v_in, v_ex, v_all
 89 | 
 90 |     def generate_state(self, obs, vector, input_state, message):
 91 |         """generate corresponding hidden states and messages in imitation learning"""
 92 |         obs = torch.from_numpy(obs).to(self.device)
 93 |         vector = torch.from_numpy(vector).to(self.device)
 94 |         _, _, _, _, _, output_state, _, message = self.network(obs, vector, input_state, message)
 95 |         return output_state, message
 96 | 
 97 |     def final_evaluate(self, observation, vector, input_state, message, num_agent, greedy):
 98 |         """using neural network in independent evaluations for prediction"""
 99 |         eval_action = np.zeros(num_agent)
100 |         observation = torch.from_numpy(np.asarray(observation)).to(self.device)
101 |         vector = torch.from_numpy(vector).to(self.device)
102 |         ps, v_in, v_ex, _, _, output_state, _, message = self.network(observation, vector, input_state, message)
103 | 
104 |         ps = np.squeeze(ps.cpu().detach().numpy())
105 |         greedy_action = np.argmax(ps, axis=-1)
106 |         scale_factor = IntrinsicParameters.SURROGATE1
107 |         v_all = v_ex + scale_factor * v_in
108 |         v_all = v_all.cpu().detach().numpy()
109 | 
110 |         for i in range(num_agent):
111 |             if not greedy:
112 |                 eval_action[i] = np.random.choice(range(EnvParameters.N_ACTIONS), p=ps[i].ravel())
113 |         if greedy:
114 |             eval_action = greedy_action
115 |         return eval_action, output_state, v_all, ps, message
116 | 
117 |     def train(self, observation, vector, returns_in, returns_ex, returns_all, old_v_in, old_v_ex, old_v_all, action,
118 |               old_ps, input_state, train_valid, target_blockings, message):
119 |         """train model0 by reinforcement learning"""
120 |         self.net_optimizer.zero_grad()
121 |         # from numpy to torch
122 |         observation = torch.from_numpy(observation).to(self.device)
123 |         vector = torch.from_numpy(vector).to(self.device)
124 |         message = torch.from_numpy(message).to(self.device)
125 | 
126 |         returns_in = torch.from_numpy(returns_in).to(self.device)
127 |         returns_ex = torch.from_numpy(returns_ex).to(self.device)
128 |         returns_all = torch.from_numpy(returns_all).to(self.device)
129 | 
130 |         old_v_in = torch.from_numpy(old_v_in).to(self.device)
131 |         old_v_ex = torch.from_numpy(old_v_ex).to(self.device)
132 |         old_v_all = torch.from_numpy(old_v_all).to(self.device)
133 | 
134 |         action = torch.from_numpy(action).to(self.device)
135 |         action = torch.unsqueeze(action, -1)
136 |         old_ps = torch.from_numpy(old_ps).to(self.device)
137 | 
138 |         train_valid = torch.from_numpy(train_valid).to(self.device)
139 |         target_blockings = torch.from_numpy(target_blockings).to(self.device)
140 | 
141 |         input_state_h = torch.from_numpy(
142 |             np.reshape(input_state[:, 0], (-1, NetParameters.NET_SIZE // 2))).to(self.device)
143 |         input_state_c = torch.from_numpy(
144 |             np.reshape(input_state[:, 1], (-1, NetParameters.NET_SIZE // 2))).to(self.device)
145 |         input_state = (input_state_h, input_state_c)
146 | 
147 |         advantage = returns_all - old_v_all
148 |         advantage = (advantage - advantage.mean()) / (advantage.std() + 1e-6)
149 | 
150 |         with autocast():
151 |             new_ps, new_v_in, new_v_ex, block, policy_sig, _, _, _ = self.network(observation, vector, input_state,
152 |                                                                                   message)
153 |             new_p = new_ps.gather(-1, action)
154 |             old_p = old_ps.gather(-1, action)
155 |             ratio = torch.exp(torch.log(torch.clamp(new_p, 1e-6, 1.0)) - torch.log(torch.clamp(old_p, 1e-6, 1.0)))
156 | 
157 |             entropy = torch.mean(-torch.sum(new_ps * torch.log(torch.clamp(new_ps, 1e-6, 1.0)), dim=-1, keepdim=True))
158 | 
159 |             # intrinsic critic loss
160 |             new_v_in = torch.squeeze(new_v_in)
161 |             new_v_clipped_in = old_v_in + torch.clamp(new_v_in - old_v_in, - TrainingParameters.CLIP_RANGE,
162 |                                                       TrainingParameters.CLIP_RANGE)
163 |             value_losses1_in = torch.square(new_v_in - returns_in)
164 |             value_losses2_in = torch.square(new_v_clipped_in - returns_in)
165 |             critic_loss_in = torch.mean(torch.maximum(value_losses1_in, value_losses2_in))
166 | 
167 |             # extrinsic critic loss
168 |             new_v_ex = torch.squeeze(new_v_ex)
169 |             new_v_clipped_ex = old_v_ex + torch.clamp(new_v_ex - old_v_ex, - TrainingParameters.CLIP_RANGE,
170 |                                                       TrainingParameters.CLIP_RANGE)
171 |             value_losses1_ex = torch.square(new_v_ex - returns_ex)
172 |             value_losses2_ex = torch.square(new_v_clipped_ex - returns_ex)
173 |             critic_loss_ex = torch.mean(torch.maximum(value_losses1_ex, value_losses2_ex))
174 | 
175 |             # actor loss
176 |             ratio = torch.squeeze(ratio)
177 |             policy_losses = advantage * ratio
178 |             policy_losses2 = advantage * torch.clamp(ratio, 1.0 - TrainingParameters.CLIP_RANGE,
179 |                                                      1.0 + TrainingParameters.CLIP_RANGE)
180 |             policy_loss = torch.mean(torch.min(policy_losses, policy_losses2))
181 | 
182 |             # valid loss and blocking loss decreased by supervised learning
183 |             valid_loss = - torch.mean(torch.log(torch.clamp(policy_sig, 1e-6, 1.0 - 1e-6)) *
184 |                                       train_valid + torch.log(torch.clamp(1 - policy_sig, 1e-6, 1.0 - 1e-6)) * (
185 |                                               1 - train_valid))
186 |             block = torch.squeeze(block)
187 |             blocking_loss = - torch.mean(target_blockings * torch.log(torch.clamp(block, 1e-6, 1.0 - 1e-6))
188 |                                          + (1 - target_blockings) * torch.log(torch.clamp(1 - block, 1e-6, 1.0 - 1e-6)))
189 | 
190 |             # total loss
191 |             all_loss = -policy_loss - entropy * TrainingParameters.ENTROPY_COEF + \
192 |                 TrainingParameters.IN_VALUE_COEF * critic_loss_in + \
193 |                 TrainingParameters.EX_VALUE_COEF * critic_loss_ex + TrainingParameters.VALID_COEF * valid_loss \
194 |                 + TrainingParameters.BLOCK_COEF * blocking_loss
195 | 
196 |         clip_frac = torch.mean(torch.greater(torch.abs(ratio - 1.0), TrainingParameters.CLIP_RANGE).float())
197 | 
198 |         self.net_scaler.scale(all_loss).backward()
199 |         self.net_scaler.unscale_(self.net_optimizer)
200 | 
201 |         # Clip gradient
202 |         grad_norm = torch.nn.utils.clip_grad_norm_(self.network.parameters(), TrainingParameters.MAX_GRAD_NORM)
203 | 
204 |         self.net_scaler.step(self.net_optimizer)
205 |         self.net_scaler.update()
206 | 
207 |         stats_list = [all_loss.cpu().detach().numpy(), policy_loss.cpu().detach().numpy(),
208 |                       entropy.cpu().detach().numpy(),
209 |                       critic_loss_in.cpu().detach().numpy(), critic_loss_ex.cpu().detach().numpy(),
210 |                       valid_loss.cpu().detach().numpy(),
211 |                       blocking_loss.cpu().detach().numpy(),
212 |                       clip_frac.cpu().detach().numpy(), grad_norm.cpu().detach().numpy(),
213 |                       torch.mean(advantage).cpu().detach().numpy()]  # for recording
214 | 
215 |         return stats_list
216 | 
217 |     def set_weights(self, weights):
218 |         """load global weights to local models"""
219 |         self.network.load_state_dict(weights)
220 | 
221 |     def imitation_train(self, observation, vector, optimal_action, input_state, message):
222 |         """train model0 by imitation learning"""
223 |         self.net_optimizer.zero_grad()
224 | 
225 |         observation = torch.from_numpy(observation).to(self.device)
226 |         vector = torch.from_numpy(vector).to(self.device)
227 |         optimal_action = torch.from_numpy(optimal_action).to(self.device)
228 |         message = torch.from_numpy(message).to(self.device)
229 |         input_state_h = torch.from_numpy(
230 |             np.reshape(input_state[:, 0], (-1, NetParameters.NET_SIZE // 2))).to(self.device)
231 |         input_state_c = torch.from_numpy(
232 |             np.reshape(input_state[:, 1], (-1, NetParameters.NET_SIZE // 2))).to(self.device)
233 | 
234 |         input_state = (input_state_h, input_state_c)
235 | 
236 |         with autocast():
237 |             _, _, _, _, _, _, logits, _ = self.network(observation, vector, input_state, message)
238 |             logits = torch.swapaxes(logits, 1, 2)
239 |             imitation_loss = F.cross_entropy(logits, optimal_action)
240 | 
241 |         self.net_scaler.scale(imitation_loss).backward()
242 |         self.net_scaler.unscale_(self.net_optimizer)
243 |         # clip gradient
244 |         grad_norm = torch.nn.utils.clip_grad_norm_(self.network.parameters(), TrainingParameters.MAX_GRAD_NORM)
245 |         self.net_scaler.step(self.net_optimizer)
246 |         self.net_scaler.update()
247 | 
248 |         return [imitation_loss.cpu().detach().numpy(), grad_norm.cpu().detach().numpy()]  # for recording
249 | 


--------------------------------------------------------------------------------
/net.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | from torch.cuda.amp.autocast_mode import autocast
  6 | 
  7 | from alg_parameters import *
  8 | from transformer.encoder_model import TransformerEncoder
  9 | 
 10 | 
 11 | def normalized_columns_initializer(weights, std=1.0):
 12 |     """weight initializer"""
 13 |     out = torch.randn(weights.size())
 14 |     out *= std / torch.sqrt(out.pow(2).sum(1).expand_as(out))
 15 |     return out
 16 | 
 17 | 
 18 | def weights_init(m):
 19 |     """initialize weights"""
 20 |     class_name = m.__class__.__name__
 21 |     if class_name.find('Conv') != -1:
 22 |         weight_shape = list(m.weight.data.size())
 23 |         fan_in = np.prod(weight_shape[1:4])
 24 |         fan_out = np.prod(weight_shape[2:4]) * weight_shape[0]
 25 |         w_bound = np.sqrt(6. / (fan_in + fan_out))
 26 |         m.weight.data.uniform_(-w_bound, w_bound)
 27 |         m.bias.data.fill_(0)
 28 |     elif class_name.find('Linear') != -1:
 29 |         weight_shape = list(m.weight.data.size())
 30 |         fan_in = weight_shape[1]
 31 |         fan_out = weight_shape[0]
 32 |         w_bound = np.sqrt(6. / (fan_in + fan_out))
 33 |         m.weight.data.uniform_(-w_bound, w_bound)
 34 |         if m.bias is not None:
 35 |             m.bias.data.fill_(0)
 36 | 
 37 | 
 38 | class SCRIMPNet(nn.Module):
 39 |     """network with transformer-based communication mechanism"""
 40 | 
 41 |     def __init__(self):
 42 |         """initialization"""
 43 |         super(SCRIMPNet, self).__init__()
 44 |         # observation encoder
 45 |         self.conv1 = nn.Conv2d(NetParameters.NUM_CHANNEL, NetParameters.NET_SIZE // 4, 2, 1, 1)
 46 |         self.conv1a = nn.Conv2d(NetParameters.NET_SIZE // 4, NetParameters.NET_SIZE // 4, 2, 1, 1)
 47 |         self.conv1b = nn.Conv2d(NetParameters.NET_SIZE // 4, NetParameters.NET_SIZE // 4, 2, 1, 1)
 48 |         self.pool1 = nn.MaxPool2d(2)
 49 |         self.conv2 = nn.Conv2d(NetParameters.NET_SIZE // 4, NetParameters.NET_SIZE // 2, 2, 1, 1)
 50 |         self.conv2a = nn.Conv2d(NetParameters.NET_SIZE // 2, NetParameters.NET_SIZE // 2, 2, 1, 1)
 51 |         self.conv2b = nn.Conv2d(NetParameters.NET_SIZE // 2, NetParameters.NET_SIZE // 2, 2, 1, 1)
 52 |         self.pool2 = nn.MaxPool2d(2)
 53 |         self.conv3 = nn.Conv2d(NetParameters.NET_SIZE // 2, NetParameters.NET_SIZE - NetParameters.GOAL_REPR_SIZE, 3,
 54 |                                1, 0)
 55 |         self.fully_connected_1 = nn.Linear(NetParameters.VECTOR_LEN, NetParameters.GOAL_REPR_SIZE)
 56 |         self.fully_connected_2 = nn.Linear(NetParameters.NET_SIZE, NetParameters.NET_SIZE)
 57 |         self.fully_connected_3 = nn.Linear(NetParameters.NET_SIZE, NetParameters.NET_SIZE)
 58 |         self.lstm_memory = nn.LSTMCell(input_size=NetParameters.NET_SIZE, hidden_size=NetParameters.NET_SIZE // 2)
 59 | 
 60 |         # output heads
 61 |         self.fully_connected_4 = nn.Linear(NetParameters.NET_SIZE * 2 + NetParameters.NET_SIZE // 2,
 62 |                                            NetParameters.NET_SIZE)
 63 |         self.policy_layer = nn.Linear(NetParameters.NET_SIZE, EnvParameters.N_ACTIONS)
 64 |         self.softmax_layer = nn.Softmax(dim=-1)
 65 |         self.value_layer_in = nn.Linear(NetParameters.NET_SIZE, 1)
 66 |         self.value_layer_ex = nn.Linear(NetParameters.NET_SIZE, 1)
 67 |         self.blocking_layer = nn.Linear(NetParameters.NET_SIZE, 1)
 68 |         self.message_layer = nn.Linear(NetParameters.NET_SIZE, NetParameters.NET_SIZE)
 69 | 
 70 |         # transformer based communication block
 71 |         self.communication_layer = TransformerEncoder(d_model=NetParameters.D_MODEL,
 72 |                                                       d_hidden=NetParameters.D_HIDDEN,
 73 |                                                       n_layers=NetParameters.N_LAYERS, n_head=NetParameters.N_HEAD,
 74 |                                                       d_k=NetParameters.D_K,
 75 |                                                       d_v=NetParameters.D_V, n_position=NetParameters.N_POSITION)
 76 | 
 77 |         self.apply(weights_init)
 78 |         for p in self.communication_layer.parameters():
 79 |             if p.dim() > 1:
 80 |                 nn.init.xavier_uniform_(p)
 81 | 
 82 |     @autocast()
 83 |     def forward(self, obs, vector, input_state, message):
 84 |         """run neural network"""
 85 |         num_agent = obs.shape[1]
 86 |         obs = torch.reshape(obs, (-1,  NetParameters.NUM_CHANNEL, EnvParameters.FOV_SIZE, EnvParameters.FOV_SIZE))
 87 |         vector = torch.reshape(vector, (-1, NetParameters.VECTOR_LEN))
 88 |         # matrix input
 89 |         x_1 = F.relu(self.conv1(obs))
 90 |         x_1 = F.relu(self.conv1a(x_1))
 91 |         x_1 = F.relu(self.conv1b(x_1))
 92 |         x_1 = self.pool1(x_1)
 93 |         x_1 = F.relu(self.conv2(x_1))
 94 |         x_1 = F.relu(self.conv2a(x_1))
 95 |         x_1 = F.relu(self.conv2b(x_1))
 96 |         x_1 = self.pool2(x_1)
 97 |         x_1 = self.conv3(x_1)
 98 |         x_1 = F.relu(x_1.view(x_1.size(0), -1))
 99 |         # vector input
100 |         x_2 = F.relu(self.fully_connected_1(vector))
101 |         # Concatenation
102 |         x_3 = torch.cat((x_1, x_2), -1)
103 |         h1 = F.relu(self.fully_connected_2(x_3))
104 |         h1 = self.fully_connected_3(h1)
105 |         h2 = F.relu(h1 + x_3)
106 |         # LSTM cell
107 |         memories, memory_c = self.lstm_memory(h2, input_state)
108 |         output_state = (memories, memory_c)
109 |         memories = torch.reshape(memories, (-1, num_agent, NetParameters.NET_SIZE // 2))
110 |         h2 = torch.reshape(h2, (-1, num_agent, NetParameters.NET_SIZE))
111 | 
112 |         c1 = self.communication_layer(message)
113 | 
114 |         c1 = torch.cat([c1, memories, h2], -1)
115 |         c1 = F.relu(self.fully_connected_4(c1))
116 |         policy_layer = self.policy_layer(c1)
117 |         policy = self.softmax_layer(policy_layer)
118 |         policy_sig = torch.sigmoid(policy_layer)
119 |         value_in = self.value_layer_in(c1)
120 |         value_ex = self.value_layer_ex(c1)
121 |         blocking = torch.sigmoid(self.blocking_layer(c1))
122 |         message = self.message_layer(c1)
123 |         return policy, value_in, value_ex, blocking, policy_sig, output_state, policy_layer, message
124 | 
125 | 


--------------------------------------------------------------------------------
/od_mstar3/SortedCollection.py:
--------------------------------------------------------------------------------
  1 | from bisect import bisect_left, bisect_right
  2 | 
  3 | 
  4 | class SortedCollection(object):
  5 |     """Sequence sorted by a key function.
  6 | 
  7 |     SortedCollection() is much easier to work with than using bisect()
  8 |     directly. It supports key functions like those use in sorted(),
  9 |     min(), and max(). The result of the key function call is saved so
 10 |     that keys can be searched efficiently.
 11 | 
 12 |     Instead of returning an insertion-point which can be hard to
 13 |     interpret, the five find-methods return a specific item in the
 14 |     sequence. They can scan for exact matches, the last item
 15 |     less-than-or-equal to a key, or the first item greater-than-or-equal
 16 |     to a key.
 17 | 
 18 |     Once found, an item's ordinal position can be located with the
 19 |     index() method. New items can be added with the insert() and
 20 |     insert_right() methods.  Old items can be deleted with the remove()
 21 |     method.
 22 | 
 23 |     The usual sequence methods are provided to support indexing,
 24 |     slicing, length lookup, clearing, copying, forward and reverse
 25 |     iteration, contains checking, item counts, item removal, and a nice
 26 |     looking repr.
 27 | 
 28 |     Finding and indexing are O(log n) operations while iteration and
 29 |     insertion are O(n).  The initial sort is O(n log n).
 30 | 
 31 |     The key function is stored in the 'key' attibute for easy
 32 |     introspection or so that you can assign a new key function
 33 |     (triggering an automatic re-sort).
 34 | 
 35 |     In short, the class was designed to handle all of the common use
 36 |     cases for bisect but with a simpler API and support for key
 37 |     functions.
 38 | 
 39 |     >>> from pprint import pprint
 40 |     >>> from operator import itemgetter
 41 | 
 42 |     >>> s = SortedCollection(key=itemgetter(2))
 43 |     >>> for record in [
 44 |     ...         ('roger', 'young', 30),
 45 |     ...         ('angela', 'jones', 28),
 46 |     ...         ('bill', 'smith', 22),
 47 |     ...         ('david', 'thomas', 32)]:
 48 |     ...     s.insert(record)
 49 | 
 50 |     >>> pprint(list(s))         # show records sorted by age
 51 |     [('bill', 'smith', 22),
 52 |      ('angela', 'jones', 28),
 53 |      ('roger', 'young', 30),
 54 |      ('david', 'thomas', 32)]
 55 | 
 56 |     >>> s.find_le(29)           # find oldest person aged 29 or younger
 57 |     ('angela', 'jones', 28)
 58 |     >>> s.find_lt(28)           # find oldest person under 28
 59 |     ('bill', 'smith', 22)
 60 |     >>> s.find_gt(28)           # find youngest person over 28
 61 |     ('roger', 'young', 30)
 62 | 
 63 |     >>> r = s.find_ge(32)       # find youngest person aged 32 or older
 64 |     >>> s.index(r)              # get the index of their record
 65 |     3
 66 |     >>> s[3]                    # fetch the record at that index
 67 |     ('david', 'thomas', 32)
 68 | 
 69 |     >>> s.key = itemgetter(0)   # now sort by first name
 70 |     >>> pprint(list(s))
 71 |     [('angela', 'jones', 28),
 72 |      ('bill', 'smith', 22),
 73 |      ('david', 'thomas', 32),
 74 |      ('roger', 'young', 30)]
 75 | 
 76 |     """
 77 | 
 78 |     def __init__(self, iterable=(), key=None):
 79 |         self._given_key = key
 80 |         key = (lambda x: x) if key is None else key
 81 |         decorated = sorted((key(item), item) for item in iterable)
 82 |         self._keys = [k for k, item in decorated]
 83 |         self._items = [item for k, item in decorated]
 84 |         self._key = key
 85 | 
 86 |     def _getkey(self):
 87 |         return self._key
 88 | 
 89 |     def _setkey(self, key):
 90 |         if key is not self._key:
 91 |             self.__init__(self._items, key=key)
 92 | 
 93 |     def _delkey(self):
 94 |         self._setkey(None)
 95 | 
 96 |     key = property(_getkey, _setkey, _delkey, 'key function')
 97 | 
 98 |     def clear(self):
 99 |         self.__init__([], self._key)
100 | 
101 |     def copy(self):
102 |         return self.__class__(self, self._key)
103 | 
104 |     def __len__(self):
105 |         return len(self._items)
106 | 
107 |     def __getitem__(self, i):
108 |         return self._items[i]
109 | 
110 |     def __iter__(self):
111 |         return iter(self._items)
112 | 
113 |     def __reversed__(self):
114 |         return reversed(self._items)
115 | 
116 |     def __repr__(self):
117 |         return '%s(%r, key=%s)' % (
118 |             self.__class__.__name__,
119 |             self._items,
120 |             getattr(self._given_key, '__name__', repr(self._given_key))
121 |         )
122 | 
123 |     def __reduce__(self):
124 |         return self.__class__, (self._items, self._given_key)
125 | 
126 |     def __contains__(self, item):
127 |         """So if an item has its key value changed, you are not going to
128 |         be able to recover its value
129 |         """
130 |         k = self._key(item)
131 |         i = bisect_left(self._keys, k)
132 |         j = bisect_right(self._keys, k)
133 |         return item in self._items[i:j]
134 | 
135 |     def resort(self):
136 |         """If all the key values are expected to have changed
137 |         dramatically, resort the items list, and regenerate the internal
138 |         representation
139 | 
140 |         Note that this operation is not guaranteed to be stable, as it
141 |         depends on the ordering of a key, item pair, and the ordering of
142 |         the items is effectively arbitrary
143 |         """
144 |         decorated = sorted((self.key(item), item) for item in self._items)
145 |         self._keys = [k for k, item in decorated]
146 |         self._items = [item for k, item in decorated]
147 | 
148 |     def index(self, item):
149 |         """Find the position of an item.  Raise ValueError if not found."""
150 |         k = self._key(item)
151 |         i = bisect_left(self._keys, k)
152 |         j = bisect_right(self._keys, k)
153 |         return self._items[i:j].index(item) + i
154 | 
155 |     def count(self, item):
156 |         """Return number of occurrences of item"""
157 |         k = self._key(item)
158 |         i = bisect_left(self._keys, k)
159 |         j = bisect_right(self._keys, k)
160 |         return self._items[i:j].count(item)
161 | 
162 |     def insert(self, item):
163 |         """Insert a new item.  If equal keys are found, add to the left"""
164 |         k = self._key(item)
165 |         i = bisect_left(self._keys, k)
166 |         self._keys.insert(i, k)
167 |         self._items.insert(i, item)
168 | 
169 |     def insert_right(self, item):
170 |         """Insert a new item.  If equal keys are found, add to the right"""
171 |         k = self._key(item)
172 |         i = bisect_right(self._keys, k)
173 |         self._keys.insert(i, k)
174 |         self._items.insert(i, item)
175 | 
176 |     def remove(self, item):
177 |         """Remove first occurence of item.
178 | 
179 |         Raise ValueError if not found
180 |         """
181 |         i = self.index(item)
182 |         del self._keys[i]
183 |         del self._items[i]
184 | 
185 |     def pop(self):
186 |         """returns the rightmost value (greatest key value)"""
187 |         del self._keys[-1]
188 |         return self._items.pop()
189 | 
190 |     def consistent_pop(self):
191 |         """returns the rightmost value (greatest key value) and checks
192 |         whether its cached key value is consistent with its current
193 |         cost.
194 | 
195 |         returns:
196 |           value with greatest cached key
197 |           boolean: True if cached key is same as current key
198 |         """
199 |         cached_key = self._keys.pop()
200 |         val = self._items.pop()
201 |         return val, self._key(val) == cached_key
202 | 
203 |     def find(self, k):
204 |         """Return first item with a key == k.
205 |         Will fail if the key value of k was changed since it was
206 |         inserted
207 | 
208 |         Raise ValueError if not found.
209 |         """
210 |         i = bisect_left(self._keys, k)
211 |         if i != len(self) and self._keys[i] == k:
212 |             return self._items[i]
213 |         raise ValueError('No item found with key equal to: %r' % (k, ))
214 | 
215 |     def find_le(self, k):
216 |         """Return last item with a key <= k.
217 | 
218 |         Raise ValueError if not found.
219 |         """
220 |         i = bisect_right(self._keys, k)
221 |         if i:
222 |             return self._items[i - 1]
223 |         raise ValueError('No item found with key at or below: %r' % (k, ))
224 | 
225 |     def find_lt(self, k):
226 |         """Return last item with a key < k.
227 | 
228 |         Raise ValueError if not found.
229 |         """
230 |         i = bisect_left(self._keys, k)
231 |         if i:
232 |             return self._items[i - 1]
233 |         raise ValueError('No item found with key below: %r' % (k, ))
234 | 
235 |     def find_ge(self, k):
236 |         """Return first item with a key >= equal to k.
237 | 
238 |         Raise ValueError if not found
239 |         """
240 |         i = bisect_left(self._keys, k)
241 |         if i != len(self):
242 |             return self._items[i]
243 |         raise ValueError('No item found with key at or above: %r' % (k, ))
244 | 
245 |     def find_gt(self, k):
246 |         """Return first item with a key > k.
247 | 
248 |         Raise ValueError if not found
249 |         """
250 |         i = bisect_right(self._keys, k)
251 |         if i != len(self):
252 |             return self._items[i]
253 |         raise ValueError('No item found with key above: %r' % (k, ))
254 | 


--------------------------------------------------------------------------------
/od_mstar3/__pycache__/SortedCollection.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/od_mstar3/__pycache__/SortedCollection.cpython-37.pyc


--------------------------------------------------------------------------------
/od_mstar3/__pycache__/col_set_addition.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/od_mstar3/__pycache__/col_set_addition.cpython-37.pyc


--------------------------------------------------------------------------------
/od_mstar3/__pycache__/interface.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/od_mstar3/__pycache__/interface.cpython-37.pyc


--------------------------------------------------------------------------------
/od_mstar3/__pycache__/od_mstar.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/od_mstar3/__pycache__/od_mstar.cpython-37.pyc


--------------------------------------------------------------------------------
/od_mstar3/__pycache__/workspace_graph.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/od_mstar3/__pycache__/workspace_graph.cpython-37.pyc


--------------------------------------------------------------------------------
/od_mstar3/build/lib.linux-x86_64-3.7/cpp_mstar.cpython-37m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/od_mstar3/build/lib.linux-x86_64-3.7/cpp_mstar.cpython-37m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/od_mstar3/build/temp.linux-x86_64-3.7/col_checker.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/od_mstar3/build/temp.linux-x86_64-3.7/col_checker.o


--------------------------------------------------------------------------------
/od_mstar3/build/temp.linux-x86_64-3.7/cython_od_mstar.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/od_mstar3/build/temp.linux-x86_64-3.7/cython_od_mstar.o


--------------------------------------------------------------------------------
/od_mstar3/build/temp.linux-x86_64-3.7/grid_planning.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/od_mstar3/build/temp.linux-x86_64-3.7/grid_planning.o


--------------------------------------------------------------------------------
/od_mstar3/build/temp.linux-x86_64-3.7/grid_policy.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/od_mstar3/build/temp.linux-x86_64-3.7/grid_policy.o


--------------------------------------------------------------------------------
/od_mstar3/build/temp.linux-x86_64-3.7/od_mstar.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/od_mstar3/build/temp.linux-x86_64-3.7/od_mstar.o


--------------------------------------------------------------------------------
/od_mstar3/build/temp.linux-x86_64-3.7/policy.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/od_mstar3/build/temp.linux-x86_64-3.7/policy.o


--------------------------------------------------------------------------------
/od_mstar3/col_checker.cpp:
--------------------------------------------------------------------------------
 1 | #include "col_checker.hpp"
 2 | #include "col_set.hpp"
 3 | 
 4 | using namespace mstar;
 5 | 
 6 | // /**
 7 | //  * Performs simple pebble motion on the graph collision checking
 8 | //  *
 9 | //  * @param c1 source
10 | //  * @param c2 target
11 | //  *
12 | //  * @return collision set of the edge
13 | //  */
14 | // template<class T>
15 | // ColSet simple_edge_check(const T &c1,
16 | // 			 const T&c2){
17 | //   ColSet col;
18 | //   for (uint i = 0; i < c1.size(); i++){
19 | //     for (uint j = i; j < c1.size(); j++){
20 | //       if (c2[i] == c2[j] || (c1[i] == c2[j] && c1[j] == c2[i])){
21 | // 	add_col_set_in_place({{i, j}}, col);
22 | //       }
23 | //     }
24 | //   }
25 | //   return col;
26 | // }
27 | 
28 | /**
29 |  * Iterator version
30 |  */
31 | template<class T>
32 | ColSet simple_edge_check(T source_start, T source_end,
33 | 			 T target_start, T target_end){
34 |   int size = source_end - source_start;
35 |   ColSet col;
36 |   for (uint i = 0; i < size; i++){
37 |     for (uint j = i + 1; j < size; j++){
38 |       if (*(target_start + i) == *(target_start + j) ||
39 | 	  (*(source_start + i) == *(target_start + j) &&
40 | 	   *(source_start + j) == *(target_start + i))){
41 | 	add_col_set_in_place({{i, j}}, col);
42 |       }
43 |     }
44 |   }
45 |   return col;
46 | }
47 | 
48 | ColSet SimpleGraphColCheck::check_edge(const OdCoord &c1,
49 | 				       const OdCoord &c2,
50 | 				       const std::vector<int> ids) const{
51 |   if (c2.is_standard()){
52 |     return simple_edge_check(c1.coord.cbegin(), c1.coord.cend(),
53 | 			     c2.coord.cbegin(), c2.coord.cend());
54 |   }
55 |   // c2 is an intermediate vertex, so only check for collisions between
56 |   // robots with an assigned move in c2
57 |   int size = c2.move_tuple.size();
58 |   return simple_edge_check(c1.coord.cbegin(), c1.coord.cbegin() + size,
59 | 			   c2.move_tuple.cbegin(), c2.move_tuple.cend());
60 | }
61 | 


--------------------------------------------------------------------------------
/od_mstar3/col_checker.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef MSTAR_COL_CHECKER_H
 2 | #define MSTAR_COL_CHECKER_H
 3 | 
 4 | #include "mstar_type_defs.hpp"
 5 | 
 6 | namespace mstar{
 7 | 
 8 |   class ColChecker{
 9 |   public:
10 |     virtual ~ColChecker(){};
11 |     virtual ColSet check_edge(const OdCoord &c1, const OdCoord &c2,
12 | 			      const std::vector<int> ids) const = 0;
13 |   };
14 | 
15 |   /**
16 |    * Collision checker for simple bidirected graphs, where no edges overlap
17 |    *
18 |    * I.e. for pebble motion on the graph where you only have to worry about
19 |    * robots swapping positions, and not about diagonals crossing.  Allows
20 |    * for rotations
21 |    */
22 |     class SimpleGraphColCheck: public ColChecker{
23 |     public:
24 |       /**
25 |        * Checks for collision while traversing the edge from c1 to c2
26 |        *
27 |        * Finds collisions both while traversing the edge and when at the
28 |        * goal configuration.
29 |        *
30 |        * @param c1 the source coordinate of the edge
31 |        * @param c2 the target coordinate of the edge
32 |        * @param ids list of global robot ids.  Necessary for heterogeneous
33 |        *            robots
34 |        *
35 |        * @return the collision set containing the colliding robots
36 |        */
37 |       ColSet check_edge(const OdCoord &c1, const OdCoord &c2,
38 | 			const std::vector<int> ids) const;
39 |   };
40 | };
41 | 
42 | #endif
43 | 


--------------------------------------------------------------------------------
/od_mstar3/col_set.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef MSTAR_COL_SET_H
  2 | #define MSTAR_COL_SET_H
  3 | 
  4 | #include <algorithm>
  5 | 
  6 | /***********************************************************************
  7 |  * Provides logic for combining collision sets
  8 |  *
  9 |  * Assumes that a collision set is of form T<T<int>> where T are
 10 |  * collections and the inner collection is sorted
 11 |  **********************************************************************/
 12 | 
 13 | namespace mstar{
 14 |   /**
 15 |    * tests if two sets are disjoint
 16 |    * 
 17 |    * Currently doesnt try to leverage sorted.  Empty sets will always be
 18 |    * treated as disjoint
 19 |    *
 20 |    * @param s1, s2 The sets to check
 21 |    *
 22 |    * @return True if disjoint, else false
 23 |    */
 24 |   template <class T> bool is_disjoint(const T &s1, const T &s2){
 25 |     for (auto i = s1.cbegin(); i != s1.cend(); ++i){
 26 |       for (auto j = s2.cbegin(); j != s2.cend(); ++j){
 27 | 	if (*i == *j){
 28 | 	  return false;
 29 | 	}
 30 |       }
 31 |     }
 32 |     return true;
 33 |   };
 34 | 
 35 |   /**
 36 |    * Tests if s1 is a superset of s2
 37 |    *
 38 |    * Uses == to compare elements.  Does not leverage sorted values
 39 |    *
 40 |    * @param s1 potential superset
 41 |    * @param s2 potential subset
 42 |    *
 43 |    * @return True if s1 is a superset of s2, otherwise false
 44 |    */
 45 |   template <class T> bool is_superset(const T &s1, const T &s2){
 46 |     for (auto j = s2.cbegin(); j != s2.cend(); ++j){
 47 |       bool included = false;
 48 |       for (auto i = s1.cbegin(); i != s1.cend(); ++i){
 49 | 	if (*i == *j){
 50 | 	  included = true;
 51 | 	  break;
 52 | 	}
 53 |       }
 54 |       if (!included){
 55 | 	return false;
 56 |       }
 57 |     }
 58 |     return true;
 59 |   };
 60 | 
 61 |   /**
 62 |    * specialization of is_superset that exploits sorted values
 63 |    */
 64 |   template <class T, class... extra>
 65 |   bool is_superset(const std::set<T, extra...> &s1,
 66 | 		   const std::set<T, extra...> &s2){
 67 |     return std::includes(s1.cbegin(), s1.cend(), s2.cbegin(), s2.cend());
 68 |   }
 69 | 
 70 |   /**
 71 |    * Merges two sorted sets
 72 |    *
 73 |    * Elements of the set must be sorted.  Container of the sets must be
 74 |    * resizeable for output
 75 |    *
 76 |    */
 77 |   template <class T> T merge(const T &s1, const T &s2){
 78 |     T out(s1.size() + s2.size());
 79 |     auto it = std::set_union(s1.begin(), s1.end(), s2.begin(), s2.end(),
 80 | 			     out.begin());
 81 |     out.resize(it - out.begin());
 82 |     return out;
 83 |   }
 84 | 
 85 |   template <class T, class... extra>
 86 |   std::set<T, extra...> merge(std::set<T, extra...> s1,
 87 | 			      const std::set<T, extra...> &s2){
 88 |     s1.insert(s2.cbegin(), s2.cend());
 89 |     return s1;
 90 |   }
 91 | 
 92 |   /**
 93 |    * Adds c1 to c2
 94 |    *
 95 |    * Mutates c2
 96 |    *
 97 |    * @param c1 collision set 1
 98 |    * @param c2 collision set 2
 99 |    *
100 |    * @return true if c2 is changed, else false
101 |    */
102 |   template <class T, template<class, class...> class TT, class... args>
103 |   bool add_col_set_in_place(TT<T, args...> c1, TT<T, args...> &c2){
104 |     bool changed = false;
105 |     // TODO: This could be more efficient
106 |     while (c1.size() > 0){
107 |       int i = 0;
108 |       // whether c1[-1] overlaps any element of c2
109 |       bool found_overlap = false;
110 |       while (i < c2.size()){
111 |   	if (!is_disjoint(c2[i], c1.back())) {
112 |   	  // found overlap
113 |   	  if (is_superset(c2[i], c1.back())){
114 |   	      // current element in c1 contained by the element in c2, so
115 |   	      // the c1 element can be dropped
116 |   	      c1.pop_back();
117 |   	      found_overlap = true;
118 |   	      break;
119 |   	    }
120 |   	  // Non-trivial overlap.  Need to add the union of the current
121 |   	  // elements back to c1 to check if there is any further overlap
122 |   	  // with elements of c2
123 | 	  
124 | 	  // Could just merge in place, but doubt it really matters
125 | 	  c1.back().insert(c2[i].cbegin(), c2[i].cend());
126 |   	  c2.erase(c2.begin() + i);
127 | 	  found_overlap = true;
128 | 	  changed = true;
129 | 	  break;
130 | 	} else{
131 | 	  // no overlap between c1[-1] and c2[i], so check next element
132 | 	  // of c2
133 | 	  ++i;
134 | 	}
135 |       }
136 |       if (!found_overlap){
137 | 	// no overlap between c1[-1] and all elements of c2, so can
138 | 	// be added to c2 (although this will force checks against
139 | 	c2.push_back(c1.back());
140 | 	c1.pop_back();
141 | 	changed = true;
142 |       }
143 |     }
144 |     return changed;
145 |   }
146 | 
147 |   /**
148 |    * Adds two collision sets, c1, c2
149 |    *
150 |    * The template monstrosity is necessary because std::vectors require two
151 |    * parameters of which we care about one (the type), and the other is the
152 |    * allocator.  Other containers may require more
153 |    *
154 |    * @param c1 collision set 1
155 |    * @param c2 collision set 2
156 |    *
157 |    * @return A new collision set formed by adding c1 and c2
158 |    */
159 |   template <class T, template<class, class...> class TT, class... args>
160 |   TT<T, args...> add_col_set(TT<T, args...> c1, TT<T, args...> c2){
161 |     add_col_set_in_place(c1, c2);
162 |     return c2;
163 |   }
164 | 
165 |   /**
166 |    * Computes the collision set used for expansion
167 |    *
168 |    * Based the generating collision set of a vertex, which is the collision
169 |    * set of the vertex's predecessor when the predecessor was expanded.  It
170 |    * is useful as it specifies which partial solutions have been cached.
171 |    * For example, if the generating collision set is {{1, 2}}, then a
172 |    * subplanner already knows how to get robots 1 and 2 to the goal, and it
173 |    * is more efficient to directly query that subplanner, rather than set the
174 |    * collision set to be empty.
175 |    *
176 |    * However, you have to account for new collisions, as stored in the
177 |    * vertex's collision set.  If a collision set element is a subset of an
178 |    * element of the generating collision set, use the element form the
179 |    * generating collision set.  If a generating collision set element has
180 |    * a non-empty intersection with a element of the collision set that is
181 |    * not a subset, don't use that generating collision set element
182 |    *
183 |    * @param col_set the collision set of the vertex
184 |    * @param gen_set the generating collision set of the vertex
185 |    *
186 |    * @return A new collision set to use when expanding the vertex
187 |    */
188 |   template <class T, template<class, class...> class TT, class... args>
189 |   TT<T, args...> col_set_to_expand(TT<T, args...> col_set,
190 | 				   TT<T, args...> gen_set){
191 |     TT<T, args...> ret;
192 |     while(gen_set.size() > 0){
193 |       // Check the last element of the generating collision set.  Either it
194 |       // can be used, or there is a non-superset intersection, and it must
195 |       // be removed
196 | 
197 |       // Need to keep any elements of the collision set that are subsets
198 |       // of the generating collision set element, as a later element of the
199 |       // collision set may invalidate the generating collision set element
200 |       TT<T, args...> elements_to_remove;
201 | 
202 |       uint i = 0;
203 | 
204 |       bool gen_set_elem_valid = true;
205 |       while (i < col_set.size()){
206 | 	if (is_superset(gen_set.back(), col_set[i])){
207 | 	  elements_to_remove.push_back(col_set[i]);
208 | 	  col_set.erase(col_set.begin() + i);
209 | 	} else if (!is_disjoint(gen_set.back(), col_set[i])){
210 | 	  // generating collision set element has a non-empty intersection
211 | 	  // with a collision set element that is not a sub-set, so is
212 | 	  // invalid
213 | 	  gen_set.pop_back();
214 | 	  // Need to return any collision set elements that were removed as
215 | 	  // being subsets of gen_set.back
216 | 	  col_set.insert(col_set.end(), elements_to_remove.begin(),
217 | 			 elements_to_remove.end());
218 | 	  gen_set_elem_valid = false;
219 | 	  break;
220 | 	} else{
221 | 	  i += 1;
222 | 	}
223 |       }
224 |       if (gen_set_elem_valid){
225 | 	ret.push_back(gen_set.back());
226 | 	gen_set.pop_back();
227 |       }
228 |     }
229 |     // Any remaining collision set elements were not contained by any element
230 |     // of the generating collision set, so should be used directly
231 |     ret.insert(ret.end(), col_set.begin(), col_set.end());
232 |     return ret;
233 |   };
234 |    
235 | }
236 | 
237 | #endif
238 | 


--------------------------------------------------------------------------------
/od_mstar3/col_set_addition.py:
--------------------------------------------------------------------------------
  1 | """Encapsulates the basic collision set addition functions, so they can
  2 | be accessible to any code that uses it
  3 | 
  4 | Also provides exceptions for indicating no solution or out of time
  5 | """
  6 | 
  7 | 
  8 | def add_col_set_recursive(c1, c2):
  9 |     """Returns a new collision set resulting from adding c1 to c2.  No
 10 |     side effecting
 11 | 
 12 |     collision set is done for the recursive case, where
 13 |     ({1, 2}, ) + ({3, 4}, ) = ({1, 2}, {3, 4})
 14 | 
 15 |     c1, c2 - tuples of (immutable) sets
 16 | 
 17 |     returns:
 18 |     recursive collision set containing c1 and c2
 19 | 
 20 |     """
 21 |     # Make shallow copies
 22 |     c1 = list(c1)
 23 |     c2 = list(c2)
 24 |     while len(c1) > 0:
 25 |         i = 0
 26 |         # Whether c1[-1] overlaps with any element of c2
 27 |         found_overlap = False
 28 |         while i < len(c2):
 29 |             if not c2[i].isdisjoint(c1[-1]):
 30 |                 # Found overlap
 31 |                 if c2[i].issuperset(c1[-1]):
 32 |                     # No change in c2
 33 |                     c1.pop()
 34 |                     found_overlap = True
 35 |                     break
 36 |                 # Have found a non-trivial overlap.  Need to add the
 37 |                 # union to  c1 so that we can check if the union has any
 38 |                 # further overlap with elements of c2
 39 |                 temp = c2.pop(i)
 40 |                 # replace c2[i] with the union of c2[i] and c1[-1]
 41 |                 c1.append(temp.union(c1.pop()))
 42 |                 found_overlap = True
 43 |                 break
 44 |             else:
 45 |                 # No overlap between c1[-1] and c2[i], so check next
 46 |                 # element of c2
 47 |                 i += 1
 48 |         if not found_overlap:
 49 |             # c1[-1] has no overlap with any element of c2, so it can be
 50 |             # added as is to c2
 51 |             c2.append(c1.pop())
 52 |     return tuple(c2)
 53 | 
 54 | 
 55 | def add_col_set(c1, c2):
 56 |     """Adds the collision sets c1 to c2.  c2 is assumed to contain a
 57 |     single,
 58 |     possibly empty, set
 59 | 
 60 |     c1, c2 - input collision sets
 61 | 
 62 |     returns:
 63 |     combined collision set containing c1 and c2
 64 | 
 65 |     """
 66 |     temp = frozenset([])
 67 |     if len(c2) >= 1:
 68 |         temp = c2[0]
 69 |         assert len(c2) == 1
 70 |     for i in c1:
 71 |         temp = temp.union(i)
 72 |     if len(temp) == 0:
 73 |         return ()
 74 |     return (temp, )
 75 | 
 76 | 
 77 | def col_set_add(c1, c2, recursive):
 78 |     """Adds two collision sets
 79 | 
 80 |     c1, c2     - input collision sets
 81 |     recursive - boolean, whether to perform recursive M* style addition
 82 | 
 83 |     returns:
 84 |     collision set containing c1 and c2
 85 | 
 86 |     """
 87 |     if recursive:
 88 |         return add_col_set_recursive(c1, c2)
 89 |     else:
 90 |         return add_col_set(c1, c2)
 91 | 
 92 | 
 93 | def effective_col_set(col_set, prev_col_set):
 94 |     """Computes the effective collision set to use given the current
 95 |     collision set and the collision set used to get to the current node
 96 | 
 97 |     Only makes sense when used with recursive M*
 98 | 
 99 |     The purpose of this code is that in recursive M*, you invoke a
100 |     subplanner to figure out how to get to the goal, which caches the
101 |     entire path to the goal .  The next step, you have an empty
102 |     collision set, so you don't query the subplanner with the cached
103 |     path, and have to find a bunch of collisions before using the cached
104 |     solution.  This is intended for use with a memory of what the
105 |     collision set was when you reached a given node.
106 | 
107 |     Computes the "effecitve collision set".  Elements of the memorized
108 |     collision set are used if they have no non-empty intersections with
109 |     elements of the current collision set that are not subsets of the
110 |     memorized component.
111 | 
112 |     elements of col_set are NOT used if they are contained within some
113 |     element of prev_col_set that is used.  Elements of prev_col_set are
114 |     used if they completely contain all elements of col_set with which
115 |     they intersect
116 | 
117 |     col_set      - current collision set
118 |     prev_col_set - "memorized" collision set, i.e. the collision set of
119 |                    the optimal predecessor at the time the path from the
120 |                    optimal predecessor was first found
121 | 
122 |     returns:
123 |     effective collision set.  Consists of the elements of the previous
124 |     collision set, which should index subplanners which have cached
125 |     paths available, and elements of the current collision set which
126 |     are not contained within prev_col_set
127 |     """
128 |     effective_set = []
129 |     prev_col_set = list(prev_col_set)
130 |     col_set = list(col_set)
131 |     while(len(prev_col_set) > 0):
132 |         # Need to keep around the elements of col_set that won't be
133 |         # used, because the containing element of prev_col_set may be
134 |         # invalidated by a later element of col_set
135 |         col_set_to_remove = []
136 |         j = 0
137 |         while (j < len(col_set)):
138 |             if col_set[j].issubset(prev_col_set[-1]):
139 |                 # this element is contained in prev_col_set, so can be
140 |                 # skipped unless prev_col_set-1] is invalidated by some
141 |                 # later element of col_set
142 |                 col_set_to_remove.append(col_set.pop(j))
143 |             elif not col_set[j].isdisjoint(prev_col_set[-1]):
144 |                 # this element partially overlaps prev_col_set,
145 |                 # invalidating it, so cannot use this element of
146 |                 # prev_col_set
147 |                 prev_col_set.pop()
148 |                 # return the elements of col_set we were going to remove
149 |                 col_set.extend(col_set_to_remove)
150 |                 break
151 |             else:
152 |                 j += 1
153 |         else:
154 |             # Never broke, so prev_col_set can be used as part of the
155 |             # effective collision set
156 |             effective_set.append(prev_col_set.pop())
157 |     # Just copy over any elements of col_set that survived
158 |     effective_set.extend(col_set)
159 |     return tuple(effective_set)
160 | 
161 | 
162 | class OutOfTimeError(Exception):
163 |     def __init__(self, value=None):
164 |         self.value = value
165 | 
166 |     def __str__(self):
167 |         return repr(self.value)
168 | 
169 | 
170 | class NoSolutionError(Exception):
171 |     def __init__(self, value=None):
172 |         self.value = value
173 | 
174 |     def __str__(self):
175 |         return repr(self.value)
176 | 
177 | 
178 | class OutOfScopeError(NoSolutionError):
179 |     def __init__(self, value=None, col_set=()):
180 |         self.value = value
181 |         self.col_set = col_set
182 | 
183 |     def __str__(self):
184 |         return repr(self.value)
185 | 


--------------------------------------------------------------------------------
/od_mstar3/cpp_mstar.cpython-37m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/od_mstar3/cpp_mstar.cpython-37m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/od_mstar3/cython_od_mstar.pyx:
--------------------------------------------------------------------------------
 1 | # distutils: language = c++
 2 | # distutils: sources = policy.cpp col_checker.cpp od_mstar.cpp grid_policy.cpp grid_planning.cpp
 3 | from libcpp cimport bool
 4 | from libcpp.vector cimport vector
 5 | from libcpp.pair cimport pair
 6 | 
 7 | from od_mstar3.col_set_addition import OutOfTimeError, NoSolutionError
 8 | 
 9 | cdef extern from "grid_planning.hpp" namespace "mstar":
10 |     vector[vector[pair[int, int]]] find_grid_path(
11 |         const vector[vector[bool]] &obstacles,
12 |         const vector[pair[int, int]] &init_pos,
13 |         const vector[pair[int, int]] &goals,
14 |         double inflation, int time_limit) except +
15 | 
16 | def find_path(world, init_pos, goals, inflation, time_limit):
17 |     """Finds a path invoking C++ implementation
18 | 
19 |     Uses recursive ODrM* to explore a 4 connected grid
20 | 
21 |     world - matrix specifying obstacles, 1 for obstacle, 0 for free
22 |     init_pos  - [[x, y], ...] specifying start position for each robot
23 |     goals     - [[x, y], ...] specifying goal position for each robot
24 |     inflation - inflation factor for heuristic
25 |     time_limit - time until failure in seconds
26 | 
27 |     returns:
28 |     [[[x1, y1], ...], [[x2, y2], ...], ...] path in the joint
29 |     configuration space
30 | 
31 |     raises:
32 |     NoSolutionError if problem has no solution
33 |     OutOfTimeError if the planner ran out of time
34 |     """
35 | 
36 |     import resource
37 |     resource.setrlimit(resource.RLIMIT_AS, (2**33,2**33)) # 8Gb
38 | 
39 |     # convert to boolean.  For some reason coercion doesn't seem to
40 |     # work properly
41 |     cdef vector[vector[bool]] obs
42 |     cdef vector[bool] temp
43 |     for row in world:
44 |         temp = vector[bool]()
45 |         for i in row:
46 |             temp.push_back(i == 1)
47 |         obs.push_back(temp)
48 |     try:
49 |         return find_grid_path(obs, init_pos, goals, inflation, time_limit)
50 |     except Exception as e:
51 |         if str(e) == "Out of Time":
52 |             raise OutOfTimeError()
53 |         elif str(e) == "No Solution":
54 |             raise NoSolutionError()
55 |         else:
56 |             raise e
57 | 


--------------------------------------------------------------------------------
/od_mstar3/grid_planning.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <utility>
 3 | #include <memory>
 4 | 
 5 | #include "grid_planning.hpp"
 6 | #include "grid_policy.hpp"
 7 | #include "od_mstar.hpp"
 8 | #include "mstar_type_defs.hpp"
 9 | 
10 | using namespace mstar;
11 | 
12 | /**
13 |  * Converts from (row, column) coordinates to vertex index
14 |  */
15 | OdCoord to_internal(std::vector<std::pair<int, int>> coord,
16 | 				  int cols){
17 |   std::vector<RobCoord> out;
18 |   for (auto &c: coord){
19 |     out.push_back(c.first * cols + c.second);
20 |   }
21 |   return OdCoord(out, {});
22 | };
23 | 
24 | /**
25 |  * Converts from vertex index to (row, column) format
26 |  */
27 | std::vector<std::pair<int, int>> from_internal(OdCoord coord,
28 | 					       int cols){
29 |   std::vector<std::pair<int, int>> out;
30 |   for (auto &c: coord.coord){
31 |     out.push_back({c / cols, c % cols});
32 |   }
33 |   return out;
34 | };
35 | 
36 | std::vector<std::vector<std::pair<int, int>>> mstar::find_grid_path(
37 |   const std::vector<std::vector<bool>> &obstacles,
38 |   const std::vector<std::pair<int, int>> &init_pos,
39 |   const std::vector<std::pair<int, int>> &goals,
40 |   double inflation, int time_limit){
41 |   // compute time limit first, as the policies fully compute 
42 |   // Need to convert time limit to std::chrono format
43 |   time_point t = std::chrono::system_clock::now();
44 |   t += Clock::duration(std::chrono::seconds(time_limit));
45 | 
46 |   int cols = (int) obstacles[0].size();
47 |   OdCoord _init = to_internal(init_pos, cols);
48 |   OdCoord _goal = to_internal(goals, cols);
49 |   std::vector<std::shared_ptr<Policy>> policies = {};
50 |   for (const auto &goal: goals){
51 |     policies.push_back(std::shared_ptr<Policy>(
52 | 			 grid_policy_ptr(obstacles, goal)));
53 |   }
54 |   OdMstar planner(policies, _goal, inflation, t,
55 | 		  std::shared_ptr<ColChecker>(new SimpleGraphColCheck()));
56 |   OdPath path = planner.find_path(_init);
57 |   std::vector<std::vector<std::pair<int, int>>> out;
58 |   for (auto &coord: path){
59 |     out.push_back(from_internal(coord, cols));
60 |   }
61 |   return out;
62 | }
63 | 


--------------------------------------------------------------------------------
/od_mstar3/grid_planning.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef MSTAR_GRID_PLANNING_H
 2 | #define MSTAR_GRID_PLANNING_H
 3 | 
 4 | #include <vector>
 5 | #include <utility>
 6 | 
 7 | /*********************************************************************
 8 |  * Provides convienence functions for planning on 4-connected graphs
 9 |  ********************************************************************/
10 | 
11 | namespace mstar{
12 |   /**
13 |    * Helper function for finding paths in 4 connected paths
14 |    *
15 |    * The world is specified as a matrix where true indicates the presence
16 |    * of obstacles and false indicates a clear space.  Coordinates for
17 |    * individual robots are indicated as (row, column)
18 |    *
19 |    * @param obstacles matrix indicating obstacle positions.  True is obstacle
20 |    * @param init_pos list of (row, column) pairs definining the initial
21 |    *                 position of the robots
22 |    * @param goals list of (row, column) pairs defining the goal configuration
23 |    *              of the robots
24 |    * @param inflation inflation factor used to weight the heuristic
25 |    * @param time_limit seconds until the code declares failure
26 |    *
27 |    * @return Path in the joint configuration space.  Each configuration is
28 |    *         a vector of (row, col) pairs specifying the position of
29 |    *         individual robots
30 |    */
31 |   std::vector<std::vector<std::pair<int, int> > > find_grid_path(
32 |     const std::vector<std::vector<bool> > &obstacles,
33 |     const std::vector<std::pair<int, int> > &init_pos,
34 |     const std::vector<std::pair<int, int> > &goals,
35 |     double inflation, int time_limit);
36 | }
37 | 
38 | #endif
39 | 
40 | 
41 | 
42 | 
43 | 
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/od_mstar3/grid_policy.cpp:
--------------------------------------------------------------------------------
 1 | #include "grid_policy.hpp"
 2 | 
 3 | using namespace mstar;
 4 | 
 5 | Graph get_graph(const std::vector<std::vector<bool>> &world_map,
 6 | 		const std::pair<int, int> &goal){
 7 |   int rows = (int) world_map.size();
 8 |   int columns = (int) world_map[0].size();
 9 |   typedef std::pair<int,int> E;
10 |   std::vector<E> edges;
11 |   std::vector<double> weights;
12 | 
13 |   std::vector<std::pair<int, int>> offsets = {{-1, 0}, {0, 1}, {1, 0},
14 | 					      {0, -1}, {0, 0}};
15 |   for (int row = 0; row < rows; ++row){
16 |     for (int col = 0; col < columns; ++col){
17 |       if (world_map[row][col]){
18 | 	continue;
19 |       }
20 |       for (auto &off: offsets){
21 | 	int r = row + off.first;
22 | 	int c = col + off.second;
23 | 	if( r >= 0 && r < rows && c >= 0 && c < columns && ! world_map[r][c]){
24 | 	  // edge from (row, col) to (r, c)
25 | 	  // should be a more direct way, but boost is hating me
26 | 	  edges.push_back({row * columns + col, r * columns + c});
27 | 	  if (row == r && col == c && r == goal.first && c == goal.second){
28 | 	    weights.push_back(0.);
29 | 	  }else{
30 | 	    weights.push_back(1.);
31 | 	  }
32 | 	}
33 |       }
34 |     }
35 |   }
36 |   return Graph(edges.begin(), edges.end(), weights.begin(), rows * columns);
37 | }
38 | 
39 | /**
40 |  * Generates a policy for a 4 connected grid
41 |  *
42 |  * The internal coordinates are of the form row * num_rows + col
43 |  * Allows for weighting at the goal for free
44 |  *
45 |  * @param world_map matrix of values describing grid true for obstacle,
46 |  *                  false for clear
47 |  * @param goal (row, column) of goal
48 |  *
49 |  * @return Policy object describing problem
50 |  */
51 | Policy mstar::grid_policy(const std::vector<std::vector<bool>> &world_map,
52 | 		   const std::pair<int, int> &goal){
53 |   int columns = (int) world_map[0].size();
54 |   return Policy(get_graph(world_map, goal), goal.first * columns + goal.second);
55 | }
56 | 
57 | Policy* mstar::grid_policy_ptr(const std::vector<std::vector<bool>> &world_map,
58 | 			const std::pair<int, int> &goal){
59 |   int columns = (int) world_map[0].size();
60 |   return new Policy(get_graph(world_map, goal),
61 | 		    goal.first * columns + goal.second);
62 | }
63 | 


--------------------------------------------------------------------------------
/od_mstar3/grid_policy.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef MSTAR_GRID_POLICY_H
 2 | #define MSTAR_GRID_POLICY_H
 3 | 
 4 | /**************************************************************************
 5 |  * Generates policy for grid maps
 6 |  **************************************************************************/
 7 | 
 8 | #include <vector>
 9 | #include <utility>
10 | 
11 | #include "mstar_type_defs.hpp"
12 | #include "policy.hpp"
13 | #include <boost/graph/adjacency_list.hpp>
14 | #include <boost/graph/graph_traits.hpp>
15 | 
16 | namespace mstar{
17 | 
18 |   /**
19 |    * Generates a policy for a 4 connected grid
20 |    *
21 |    * The internal coordinates are of the form row * num_rows + col
22 |    * Allows for weighting at the goal for free
23 |    *
24 |    * @param world_map matrix of values describing grid true for obstacle,
25 |    *                  false for clear
26 |    * @param goal (row, column) of goal
27 |    *
28 |    * @return Policy object describing problem
29 |    */
30 |   Policy grid_policy(const std::vector<std::vector<bool>> &world_map,
31 | 		     const std::pair<int, int> &goal);
32 | 
33 |   Policy* grid_policy_ptr(const std::vector<std::vector<bool>> &world_map,
34 | 			  const std::pair<int, int> &goal);
35 | }
36 | 
37 | #endif
38 | 


--------------------------------------------------------------------------------
/od_mstar3/interface.py:
--------------------------------------------------------------------------------
  1 | """This module defines interfaces for the low-level graphs and
  2 | policies used in Mstar. In general terms, these classes represent:
  3 | 
  4 |     1.  Graphs representing the configuration space.  These graphs are
  5 |         structured so that each node in the graph represents a
  6 |         configuration, and each edge represents a permissible transition
  7 |         between two different configurations.
  8 | 
  9 |         *All of these graphs subclass the Graph_Interface class
 10 | 
 11 |     2.  Policies, which define paths in a configuration space from an
 12 |         initial configuration to a goal configuration.  Policies are
 13 |         comprised of nodes, each of which represents a configuration
 14 |         in the configuration space.  Each node in a policy has a pointer
 15 |         to its optimal neighbor, i.e., the next node in the optimal path
 16 |         to the goal node.  Policy classes compute optimal paths by using
 17 |         some search algorithm to search the graphs generated in the
 18 |         classes described above.
 19 | 
 20 |         *All of these graphs subclass the Policy_Interface class
 21 | 
 22 |     3.  Configuration graph edge checking, which determines whether
 23 |         moving between two configurations is permissible.  For example,
 24 |         configuration graph edge checking should not allow a robot to
 25 |         move out of bounds of the workspace.
 26 | 
 27 |     4.  Planner edge checking, which determines whether moving between
 28 |         two states of robot positions will result in any collisions.
 29 |         For example, planner edge checking should check to see if two
 30 |         robots pass through each other as they move between positions.
 31 | """
 32 | 
 33 | 
 34 | class Graph_Interface(object):
 35 | 
 36 |     """Interface for configuration space generators
 37 | 
 38 |     This graph interface enumerates the methods that any
 39 |     configuration space generator should implement.  These graphs are
 40 |     used by policy graphs such as A*.
 41 |     """
 42 | 
 43 |     def get_edge_cost(self, coord1, coord2):
 44 |         """Returns edge_cost of going from coord1 to coord2."""
 45 |         raise NotImplementedError
 46 | 
 47 |     def get_neighbors(self, coord):
 48 |         """Returns the collision free neighbors of the specified coord.
 49 | 
 50 |         Return value is a list of tuples each of which are a coordinate
 51 |         """
 52 |         raise NotImplementedError
 53 | 
 54 |     # This is a function to return the in neighbors of a coordinate.
 55 |     # Designed by default to handle un-directed graphs
 56 |     get_in_neighbors = get_neighbors
 57 | 
 58 | 
 59 | class Policy_Interface(object):
 60 | 
 61 |     """Interface showing required implemented functions for all policies
 62 | 
 63 |     This interface enumerates the functions that must be exposed by
 64 |     policies for M* to function correctly. A policy object with this
 65 |     interface provides a route for a single robot.  Underneath the policy
 66 |     interface is a graph object which describes the configuration space
 67 |     through which robots can move.  The underlying graph object does all
 68 |     of the work of calculating the configuration space based on the
 69 |     actual environment in which the robot is moving
 70 | 
 71 |     **All config inputs must be hashable**
 72 |     """
 73 | 
 74 |     def get_cost(self, config):
 75 |         """Returns the cost of moving from given position to goal"""
 76 |         raise NotImplementedError
 77 | 
 78 |     def get_edge_cost(self, config1, config2):
 79 |         """Returns the cost of traversing an edge in the underlying
 80 |         graph
 81 |         """
 82 |         raise NotImplementedError
 83 | 
 84 |     def get_step(self, config):
 85 |         """Returns the configurations of the optimal neighbor of config"""
 86 |         raise NotImplementedError
 87 | 
 88 |     def get_neighbors(self, config):
 89 |         """Returns neighboring configurations of config
 90 | 
 91 |         This function returns the configurations which are next to
 92 |         config
 93 | 
 94 |         Return list of tuples, each of which is a coordinate
 95 |         """
 96 |         raise NotImplementedError
 97 | 
 98 |     def get_graph_size(self, correct_for_size=True):
 99 |         """Returns number of nodes in graph"""
100 |         raise NotImplementedError
101 | 
102 |     def get_limited_offset_neighbors(self, config, max_offset, min_offset=0):
103 |         """Returns set of neighbors between the offset arguments"""
104 |         raise NotImplementedError
105 | 
106 |     def get_offset_neighbors(self, config, offset):
107 |         """Returns neighbors of coord with offset specified by argument"""
108 |         raise NotImplementedError
109 | 
110 |     def get_offsets(self, config):
111 |         """Return the offsets of the neighbors"""
112 |         raise NotImplementedError
113 | 
114 | 
115 | class Config_Edge_Checker(object):
116 |     """Checks robot collisions with objects and edges of workspace"""
117 | 
118 |     def col_check(self, state, recursive):
119 |         """Checks for collisions at a single state
120 | 
121 |         state     - list of coordinates of robots
122 |         recursive - generate collisions sets for rM*
123 | 
124 |         Returns:
125 |         M* collision set in type set if recursive false
126 |         rM* collision set in type set if recursive true
127 |         """
128 |         raise NotImplementedError
129 | 
130 | 
131 | class Planner_Edge_Checker(object):
132 |     """Checks for robot collisions on an edge in a planner's graph
133 | 
134 |     Currently, no methods have to be implemented because the collision
135 |     methods change based on the graph.
136 |     """
137 | 
138 |     def pass_through(self, state1, state2, recursive=False):
139 |         """Detects pass through collisions
140 | 
141 |         state1 - list of robot coordinates describing initial state
142 |         state2 - list of robot coordinates describing final state,
143 | 
144 |         Returns:
145 |             M* collision set in type set if recursive false
146 |             rM* collision set in type set if recursive true
147 |         """
148 |         raise NotImplementedError
149 | 
150 |     def col_check(self, state, recursive):
151 |         """Checks for collisions at a single state
152 | 
153 |         state     - list of coordinates of robots
154 |         recursive - generate collisions sets for rM*
155 | 
156 |         Returns:
157 |             M* collision set in type set if recursive false
158 |             rM* collision set in type set if recursive true
159 |         """
160 |         raise NotImplementedError
161 | 
162 |     def cross_over(self, state1, state2, recursive=False):
163 |         """Detects cross over and pass through collisions
164 | 
165 | 
166 |         state1 - list of robot coordinates describing initial state
167 |         state2 - list of robot coordinates describing final state
168 | 
169 |         Returns:
170 |             M* collision set in type set if recursive false
171 |             rM* collision set in type set if recursive true
172 |         """
173 |         raise NotImplementedError
174 | 
175 |     def simple_pass_through(self, state1, state2):
176 |         """Check for pass through collisions
177 | 
178 |         state1 - list of robot coordinates describing initial state
179 |         state2 - list of robot coordinates describing final state
180 | 
181 |         Returns:
182 |         True if pass through collision
183 |         False otherwise
184 |         """
185 |         raise NotImplementedError
186 | 
187 |     def simple_col_check(self, state):
188 |         """Checks for robot-robot collisions at state,
189 | 
190 |         state - list of robot coordinates
191 | 
192 |         returns:
193 |         True if collision
194 |         False otherwise
195 |         """
196 |         raise NotImplementedError
197 | 
198 |     def simple_cross_over(self, state1, state2):
199 |         """Check for cross over collisions in 8-connected worlds
200 | 
201 |         state1 - list of robot coordinates describing initial state
202 |         state2 - list of robot coordinates describing final state
203 | 
204 |         returns:
205 |         True if collision exists
206 |         False otherwise
207 |         """
208 |         raise NotImplementedError
209 | 
210 |     def simple_incremental_cross_over(self, state1, state2):
211 |         """Check for cross over collisions in 8-connected worlds.
212 | 
213 |         Assumes that collision checking has been performed for everything
214 |         but the last robot in the coordinates.  To be used to save a bit
215 |         of time for partial expansion approaches
216 | 
217 |         state1 - list of robot coordinates describing initial state
218 |         state2 - list of robot coordinates describing final state
219 | 
220 |         returns:
221 |         True if collision exists
222 |         False otherwise
223 |         """
224 |         raise NotImplementedError
225 | 
226 |     def simple_incremental_col_check(self, state1):
227 |         """Checks for robot-robot collisions at c1,
228 | 
229 |         Assumes that collision checking has been performed for everything
230 |         but the last robot in the coordinates.  To be used to save a bit
231 |         of time for partial expansion approaches
232 | 
233 |         state1 - list of robot coordinates
234 | 
235 |         returns:
236 |         True if collision exists
237 |         False otherwise
238 |         """
239 |         raise NotImplementedError
240 | 
241 |     def single_bot_outpath_check(self, cur_coord, prev_coord, cur_t, paths):
242 |         """Tests for collisions from prev_coord to cur_coord
243 | 
244 |         Checks for cross over collisions and collisions at the same
245 |         location when moving from cur_coord to prev_coord while robots
246 |         are moving in paths
247 | 
248 |         cur_coord - position of a single robot
249 | 
250 |         Returns:
251 | 
252 |         True if collision exists
253 |         False otherwise
254 |         """
255 |         raise NotImplementedError
256 | 
257 |     def simple_prio_col_check(self, coord, t, paths, pcoord=None,
258 |                               conn_8=False):
259 |         """Returns true, if collision is detected, false otherwise
260 |         at the moment only used to check the obstacle collisions, but
261 |         didn't want to reject the other code already
262 | 
263 |         coord - coord of potential new neighbor
264 |         t - current time step
265 |         paths - previously found paths
266 |         pcoord - previous coordinate of the path
267 | 
268 |         Returns:
269 |         True if collision exists
270 |         False otherwise
271 |         """
272 |         raise NotImplementedError
273 | 
274 |     def incremental_col_check(self, state, recursive):
275 |         """Checks for robot-robot collisions in state
276 | 
277 |         state     - list of coordinates of robots
278 |         recursive - generate collisions sets for rM*
279 | 
280 |         Only checks whether the last robot is
281 |         involved in a collision, for use with incremental methods
282 | 
283 |         Returns:
284 |             M* collision set in type set if recursive false
285 |             rM* collision set in type set if recursive true
286 |         """
287 |         raise NotImplementedError
288 | 
289 |     def incremental_cross_over(self, state1, state2, recursive=False):
290 |         """Detects cross over collisions as well as pass through
291 |         collisions.
292 | 
293 |         Only checks if the last robot is involved in a collision, for use
294 |         with partial expansion approaches.
295 | 
296 |         state1 - list of robot coordinates describing initial state
297 |         state2 - list of robot coordinates describing final state,
298 | 
299 |         Returns:
300 |             M* collision set in type set if recursive false
301 |             rM* collision set in type set if recursive true
302 |         """
303 |         raise NotImplementedError
304 | 
305 |     def single_bot_cross_over(self, coord1, pcoord1, coord2, pcoord2):
306 |         """Checks for cross-over and collisions between robots 1 and 2
307 | 
308 |         Robots are moving from pcoord to coord
309 | 
310 |         pcoord1 - first position of first robot
311 |         coord1  - second position of first robot
312 |         pcoord2 - first position of second robot
313 |         coord2  - second position of second robot
314 | 
315 |         Returns:
316 |         True if collision
317 |         False otherwise
318 |         """
319 |         raise NotImplementedError
320 | 
321 |     def prio_col_check(self, coord, pcoord, t, paths=None, conn_8=False,
322 |                        recursive=False):
323 |         """Collision checking with paths passed as constraints
324 | 
325 |         coord  - current node
326 |         pcoord - previous node
327 |         t      - timestep
328 |         paths  - paths that need to be avoided
329 | 
330 |         Returns: (collision sets are of type set)
331 |             M* collision set if collision exists and recursive is false
332 |             rM* collision set if collision exists and recursive is true
333 |             None if no collision exists
334 |         """
335 |         raise NotImplementedError
336 | 


--------------------------------------------------------------------------------
/od_mstar3/mstar_type_defs.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef MSTAR_TYPE_DEFS
 2 | #define MSTAR_TYPE_DEFS
 3 | 
 4 | /**************************************************************************
 5 |  * Provides type defs that are used in multiple files
 6 |  *************************************************************************/
 7 | 
 8 | #include <boost/graph/graph_traits.hpp>
 9 | #include <boost/graph/adjacency_list.hpp>
10 | #include <vector>
11 | #include <chrono>
12 | 
13 | namespace mstar{
14 |   /**
15 |    * Defines the graph type for individual robots.
16 |    *
17 |    * Assumes robot positions are indicated by integers, costs by doubles,
18 |    * and assumes that the edge_weight property is filled
19 |    */
20 |   typedef boost::adjacency_list<
21 |     boost::vecS, boost::vecS, boost::bidirectionalS,  boost::no_property,
22 |     boost::property<boost::edge_weight_t, double>> Graph;
23 | 
24 |   // type that defines the position of the robot
25 |   typedef int RobCoord;
26 | 
27 |   // represents the coordinate of an OD node, also used to index graphs
28 |   struct OdCoord{
29 |     std::vector<RobCoord> coord, move_tuple;
30 | 
31 |     OdCoord(std::vector<RobCoord> in_coord, std::vector<RobCoord> in_move){
32 |       coord = in_coord;
33 |       move_tuple = in_move;
34 |     }
35 | 
36 |     OdCoord(): coord(), move_tuple(){}
37 | 
38 |     bool operator==(const OdCoord &other) const{
39 |       return (coord == other.coord) && (move_tuple == other.move_tuple);
40 |     }
41 | 
42 |     bool is_standard() const{
43 |       return move_tuple.size() == 0;
44 |     }
45 |   };
46 | 
47 |   // Holds a path in the joint configuration space
48 |   typedef std::vector<OdCoord> OdPath;
49 | 
50 |   // defines a single set of mutually colliding robots.
51 |   // Must be sorted in order of increasing value for logic to hold
52 |   typedef std::set<uint> ColSetElement;
53 | 
54 |   // Defines a full collision set
55 |   typedef std::vector<ColSetElement> ColSet;
56 | 
57 |   // defines times for checking purposes
58 |   typedef std::chrono::system_clock Clock;
59 |   typedef Clock::time_point time_point;
60 | }
61 | 
62 | #endif
63 | 


--------------------------------------------------------------------------------
/od_mstar3/mstar_utils.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef MSTAR_UTILS_H
 2 | #define MSTAR_UTILS_H
 3 | 
 4 | /**
 5 |  * Defines convinence functions for testing or other purposes not directly
 6 |  * related to the actual planning
 7 |  */
 8 | 
 9 | #include <iostream>
10 | 
11 | #include "mstar_type_defs.hpp"
12 | 
13 | namespace mstar{
14 |   void print_od_path(const OdPath &path){
15 |     for (const OdCoord &pos: path){
16 |       std::cout << "{";
17 |       for (const RobCoord &i: pos.coord){
18 | 	std::cout << i << " ";
19 |       }
20 |       std::cout << "}" << std::endl;
21 |     }
22 |   };
23 | 
24 |   void print_path(const std::vector<std::vector<std::pair<int, int>>> &path){
25 |     for (const auto &coord: path){
26 |       std::cout << "{";
27 |       for (const auto &c: coord){
28 | 	std::cout << "(" << c.first << ", " << c.second << ") ";
29 |       }
30 |       std::cout << "}" << std::endl;
31 |     }
32 |   };
33 | };
34 | 
35 | #endif
36 | 


--------------------------------------------------------------------------------
/od_mstar3/od_mstar.cpp:
--------------------------------------------------------------------------------
  1 | #include <chrono>
  2 | #include <cassert>
  3 | 
  4 | #include "od_mstar.hpp"
  5 | 
  6 | using namespace mstar;
  7 | 
  8 | OdMstar::OdMstar(std::vector<std::shared_ptr<Policy>> policies,
  9 | 		 OdCoord goals, double inflation,
 10 | 		 time_point end_time, std::shared_ptr<ColChecker> col_checker){
 11 |   subplanners_ = new std::unordered_map<ColSetElement,
 12 | 					std::shared_ptr<OdMstar>>();
 13 |   policies_ = policies;
 14 |   // top-level planner, so construct a set of all robot ids
 15 |   for (int i = 0; i < (int) goals.coord.size(); ++i){
 16 |     ids_.push_back(i);
 17 |   }
 18 |   goals_ = goals;
 19 |   end_time_ = end_time;
 20 |   inflation_ = inflation;
 21 |   planning_iter_ = 0;
 22 |   num_bots_ = (int) ids_.size();
 23 |   col_checker_ = col_checker;
 24 |   top_level_ = true;
 25 | }
 26 | 
 27 | OdMstar::OdMstar(const ColSetElement &robots, OdMstar &parent){
 28 |   subplanners_ = parent.subplanners_;
 29 |   policies_ = parent.policies_;
 30 |   for (int i: robots){
 31 |     ids_.push_back(parent.ids_[i]);
 32 |     goals_.coord.push_back(parent.goals_.coord[i]);
 33 |   }
 34 |   end_time_ = parent.end_time_;
 35 |   inflation_ = parent.inflation_;
 36 |   planning_iter_ = 0;
 37 |   num_bots_ = (int) ids_.size();
 38 |   col_checker_ = parent.col_checker_;
 39 |   top_level_ = false;
 40 | }
 41 | 
 42 | OdMstar::~OdMstar(){
 43 |   if (top_level_){
 44 |     delete subplanners_;
 45 |   }
 46 | }
 47 | 
 48 | OdPath OdMstar::find_path(OdCoord init_pos){
 49 |   reset();
 50 | 
 51 |   // Configure the initial vertex
 52 |   // identified by setting the back_ptr to itself
 53 |   OdVertex *first = get_vertex(init_pos);
 54 |   first->reset(planning_iter_);
 55 |   first->back_ptr = first;
 56 |   first->cost = 0;
 57 |   first->open = true;
 58 | 
 59 |   OpenList open_list;
 60 |   open_list.push(first);
 61 | 
 62 |   while (open_list.size() > 0){
 63 |     if (std::chrono::system_clock::now() > end_time_){
 64 |       throw OutOfTimeError();
 65 |     }
 66 | 
 67 |     OdVertex *vert = open_list.top();
 68 |     open_list.pop();
 69 |     vert->open = false;
 70 |     if (vert->closed){
 71 |       continue;
 72 |     }
 73 | 
 74 |     // check if this is the goal vertex
 75 |     if (vert->coord == goals_){
 76 |       vert->forwards_ptr = vert;
 77 |     }
 78 |     if (vert->forwards_ptr != nullptr){
 79 |       // Either the goal or on a previous found path to the goal
 80 |       return trace_path(vert);
 81 |     }
 82 | 
 83 |     expand(vert, open_list);
 84 |   }
 85 |   throw NoSolutionError();
 86 | }
 87 | 
 88 | void OdMstar::reset(){
 89 |   planning_iter_++;
 90 | }
 91 | 
 92 | double OdMstar::heuristic(const OdCoord &coord){
 93 |   // Heuristic is computed from the assigned move for elements of the
 94 |   // move tuple, and from the base coordinate for all others
 95 |   double h = 0;
 96 |   uint i = 0;
 97 |   while (i < coord.move_tuple.size()){
 98 |     h += policies_[ids_[i]]->get_cost(coord.move_tuple[i]);
 99 |     ++i;
100 |   }
101 |   while (i < coord.coord.size()){
102 |     h += policies_[ids_[i]]->get_cost(coord.coord[i]);
103 |     ++i;
104 |   }
105 |   return h * inflation_;
106 | }
107 | 
108 | OdVertex* OdMstar::get_vertex(const OdCoord &coord){
109 |   // returns a pair with the first element an interator to a <key, vertex>
110 |   // pair and the second to a bool which is true if there was not a
111 |   // preexisting value
112 |   auto p = graph_.emplace(coord, coord);
113 |   p.first->second.reset(planning_iter_);
114 |   if (p.second){
115 |     // new vertex, so need to set heuristic
116 |     p.first->second.h = heuristic(coord);
117 |   }
118 |   return &p.first->second;
119 | }
120 | 
121 | OdCoord get_vertex_step(OdVertex * vert){
122 |   assert(vert != nullptr);
123 |   while (1){
124 |     if (vert->forwards_ptr->coord.is_standard()){
125 |       return vert->forwards_ptr->coord;
126 |     }
127 |     vert = vert->forwards_ptr;
128 |     assert(vert != nullptr);
129 |   }
130 | }
131 | 
132 | OdCoord OdMstar::get_step(const OdCoord &init_pos){
133 |   OdVertex* vert = OdMstar::get_vertex(init_pos);
134 |   if (vert->forwards_ptr != nullptr){
135 |     return get_vertex_step(vert);
136 |   }
137 |   find_path(init_pos);
138 |   return get_vertex_step(vert);
139 | }
140 | 
141 | void OdMstar::expand(OdVertex *vertex, OpenList &open_list){
142 |   vertex->closed = true;
143 |   ColSet gen_set = col_set_to_expand(vertex->col_set, vertex->gen_set);
144 |   if (gen_set.size() == 1 && (int) gen_set[0].size() == num_bots_){
145 |     // the generating collision set contains all robots, so no caching
146 |     // would be possible.  Therefore, don't use
147 |     gen_set = vertex->col_set;
148 |   }
149 | 
150 |   std::vector<OdCoord> neighbors = get_neighbors(vertex->coord, gen_set);
151 | 
152 |   // accumulates the collision sets that occur while trying to move to
153 |   // any of the neighbors
154 |   ColSet col_set;
155 |   for (OdCoord &new_coord: neighbors){
156 |     ColSet new_col = col_checker_->check_edge(vertex->coord, new_coord, ids_);
157 |     if (!new_col.empty()){
158 |       // State not accessible due to collisions
159 |       add_col_set_in_place(new_col, col_set);
160 |       continue;
161 |     }
162 |     
163 |     OdVertex *new_vert = get_vertex(new_coord);
164 |     new_vert->back_prop_set.insert(vertex);
165 |     // Always need to at the collision set of any vertex we can reach
166 |     // to its successors, as otherwise we would need to wait for another
167 |     // robot to collide downstream before triggering back propagation
168 |     add_col_set_in_place(new_vert->col_set, col_set);
169 | 
170 |     if (new_vert->closed){
171 |       continue;
172 |     }
173 | 
174 |     double new_cost = vertex->cost + edge_cost(vertex->coord, new_coord);
175 |     if (new_cost >= new_vert->cost){
176 |       continue;
177 |     }
178 |     new_vert->cost = new_cost;
179 |     new_vert->back_ptr = vertex;
180 |     new_vert->open = true;
181 |     new_vert->gen_set = gen_set;
182 |     open_list.push(new_vert);
183 | 
184 |     // Add an intermediate vertex's parent's col_set to its col_set, so
185 |     // moves for later robots can be explored.  Not necessary, but should
186 |     // reduce thrashing
187 |     if (!new_vert->coord.is_standard()){
188 |       add_col_set_in_place(vertex->col_set, new_vert->col_set);
189 |     }
190 |   }
191 |   back_prop_col_set(vertex, col_set, open_list);
192 | }
193 | 
194 | std::vector<OdCoord> OdMstar::get_neighbors(const OdCoord &coord,
195 | 					    const ColSet &col_set){
196 |   // If the collision set contains all robots, invoke the non-recursive
197 |   // base case
198 |   if (col_set.size() == 1 && (int) col_set[0].size() == num_bots_){
199 |     return get_all_neighbors(coord);
200 |   }
201 |   
202 |   assert(coord.is_standard());
203 | 
204 |   // Generate the step along the joint policy
205 |   std::vector<RobCoord> policy_step;
206 |   for (int i = 0; i < num_bots_; i++){
207 |     policy_step.push_back(policies_[ids_[i]]->get_step(coord.coord[i]));
208 |   }
209 | 
210 |   // Iterate over colliding sets of robots, and integrate the results
211 |   // of the sub planning for each set
212 |   for (const ColSetElement &elem: col_set){
213 |     // The collision set contains the local ids (relative to the robots in
214 |     // this subplanner) of the robots in collision
215 |     // To properly index child subplanners, need to convert to global robot
216 |     // ids, so that the subplanners will be properly globally accessible
217 |     ColSetElement global_col;
218 |     for (auto &local_id: elem){
219 |       global_col.insert(ids_[local_id]);
220 |     }
221 |     // Get, and if necessary construct, the appropriate subplanner.
222 |     // returns a pair <p, bool> where bool is true if a new subplanner
223 |     // was generated, and p is an iterator to a pair <key, val>
224 |     if (subplanners_->find(global_col) == subplanners_->end()){
225 |       subplanners_->insert(
226 | 	{global_col, std::shared_ptr<OdMstar>(new OdMstar(elem, *this))});
227 |     }
228 |     OdMstar *planner = subplanners_->at(global_col).get();
229 |     // create the query point
230 |     std::vector<RobCoord> new_base;
231 |     for (const int &i: elem){
232 |       new_base.push_back(coord.coord[i]);
233 |     }
234 | 
235 |     OdCoord step;
236 |     try{
237 |       step = planner->get_step(OdCoord(new_base, {}));
238 |     } catch(NoSolutionError &e){
239 |       // no solution for that subset of robots, so return no neighbors
240 |       // only likely to be relevant on directed graphs
241 |       return {};
242 |     }
243 | 
244 |     int elem_dex = 0;
245 |     // now need to copy into the relevant positions in policy_step
246 |     for (auto i: elem){
247 |       policy_step[i] = step.coord[elem_dex];
248 |       ++elem_dex; // could play with post appending, but don't want to
249 |     }
250 |   }
251 |   return {OdCoord({policy_step}, {})};
252 | }
253 | 
254 | std::vector<OdCoord> OdMstar::get_all_neighbors(const OdCoord &coord){
255 |   // get the coordinate of the robot to assign a new move
256 |   uint move_index = coord.move_tuple.size();
257 |   std::vector<std::vector<RobCoord>> new_moves;
258 |   for (RobCoord &move: policies_[ids_[move_index]]->get_out_neighbors(
259 | 	 coord.coord[move_index])){
260 |     std::vector<RobCoord> new_move(coord.move_tuple);
261 |     new_move.push_back(move);
262 |     new_moves.push_back(new_move);
263 |   }
264 |   std::vector<OdCoord> ret;
265 |   if (move_index + 1 < coord.coord.size()){
266 |     // generating intermediate vertices
267 |     for (auto &move_tuple: new_moves){
268 |       ret.push_back(OdCoord(coord.coord, move_tuple));
269 |     }
270 |   } else {
271 |     // generating standard vertices
272 |     for (auto &move_tuple: new_moves){
273 |       ret.push_back(OdCoord(move_tuple, {}));
274 |     }
275 |   }
276 |   return ret;
277 | }
278 | 
279 | double OdMstar::edge_cost(const OdCoord &source, const OdCoord &target){
280 |   if (source.is_standard() && target.is_standard()){
281 |     // transition between standard vertex, so all robots are assigned moves and
282 |     // incur costs
283 |     double cost = 0;
284 |     for (int i = 0; i < num_bots_; ++i){
285 |       cost += policies_[ids_[i]]->get_edge_cost(source.coord[i],
286 | 						target.coord[i]);
287 |     }
288 |     return cost;
289 |   } else {
290 |     // transition from intermediate vertex, so only one robot is assigned
291 |     // a move and incurs cost
292 |     uint move_index = source.move_tuple.size();
293 |     if (target.is_standard()){
294 |       return policies_[ids_[move_index]]->get_edge_cost(
295 | 	source.coord[move_index], target.coord[move_index]);
296 |     } else{
297 |       return policies_[ids_[move_index]]->get_edge_cost(
298 | 	source.coord[move_index], target.move_tuple[move_index]);
299 |     }
300 |   }
301 | }
302 | 
303 | OdPath OdMstar::trace_path(OdVertex *vert){
304 |   OdPath path;
305 |   back_trace_path(vert, vert->forwards_ptr, path);
306 |   forwards_trace_path(vert, path);
307 |   return path;
308 | }
309 | 
310 | void OdMstar::back_trace_path(OdVertex *vert, OdVertex *successor,
311 | 			      OdPath &path){
312 |   vert->forwards_ptr = successor;
313 |   // check if this is the final, terminal state, which is not required
314 |   // to have a zero-cost self loop, so could get problems
315 |   if (vert != successor){
316 |     vert->h = successor->h + edge_cost(vert->coord, successor->coord);
317 |   } else{
318 |     vert->h = 0;
319 |   }
320 |   if (vert->coord.is_standard()){
321 |     path.insert(path.begin(), vert->coord);
322 |   }
323 |   if (vert->back_ptr != vert){
324 |     back_trace_path(vert->back_ptr, vert, path);
325 |   }
326 | }
327 | 
328 | void OdMstar::forwards_trace_path(OdVertex *vert, OdPath &path){
329 |   if (vert->forwards_ptr != vert){
330 |     if (vert->forwards_ptr->coord.is_standard()){
331 |       path.push_back(vert->forwards_ptr->coord);
332 |     }
333 |     forwards_trace_path(vert->forwards_ptr, path);
334 |   }
335 | }
336 | 
337 | void OdMstar::back_prop_col_set(OdVertex *vert, const ColSet &col_set,
338 | 				OpenList &open_list){
339 |   bool further = add_col_set_in_place(col_set, vert->col_set);
340 |   if (further){
341 |     vert->closed = false;
342 |     if (! vert->open){
343 |       vert->open = true;
344 |       open_list.push(vert);
345 |     }
346 | 
347 |     for(OdVertex *predecessor: vert->back_prop_set){
348 |       back_prop_col_set(predecessor, vert->col_set, open_list);
349 |     }
350 |   }
351 | }
352 | 


--------------------------------------------------------------------------------
/od_mstar3/od_mstar.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef MSTAR_OD_MSTAR_H
  2 | #define MSTAR_OD_MSTAR_H
  3 | 
  4 | #include <unordered_map>
  5 | #include <functional>
  6 | #include <queue>
  7 | #include <memory>
  8 | #include <exception>
  9 | 
 10 | #include <boost/functional/hash_fwd.hpp>
 11 | 
 12 | #include "mstar_type_defs.hpp"
 13 | #include "col_set.hpp"
 14 | #include "od_vertex.hpp"
 15 | #include "col_checker.hpp"
 16 | #include "policy.hpp"
 17 | 
 18 | namespace std{
 19 |   template <> struct hash<mstar::OdCoord>{
 20 |     size_t operator()(const mstar::OdCoord &val) const{
 21 |       size_t hash = boost::hash_range(val.coord.cbegin(), val.coord.cend());
 22 |       boost::hash_combine<size_t>(
 23 | 	hash,
 24 | 	boost::hash_range(val.move_tuple.cbegin(), val.move_tuple.cend()));
 25 |       return hash;
 26 |     }
 27 |   };
 28 | 
 29 |   template <> struct hash<std::vector<int>>{
 30 |     size_t operator()(const std::vector<int> &val) const{
 31 |       return boost::hash_range(val.cbegin(), val.cend());
 32 |     }
 33 |   };
 34 | 
 35 |   template <> struct hash<mstar::ColSetElement>{
 36 |     size_t operator()(const mstar::ColSetElement &val) const{
 37 |       return boost::hash_range(val.cbegin(), val.cend());
 38 |     }
 39 |   };
 40 | }
 41 | 
 42 | 
 43 | namespace mstar{
 44 | 
 45 |   struct greater_cost{
 46 |     bool operator()(const mstar::OdVertex *x, const mstar::OdVertex *y) const{
 47 |       if (x == nullptr || y == nullptr){
 48 | 	return true;
 49 |       }
 50 |       return *x > *y;
 51 |     }
 52 |   };
 53 | 
 54 |   // Sort in decreasing order to give cheap access to the cheapest elements
 55 |   typedef std::priority_queue<OdVertex*, std::vector<OdVertex*>,
 56 | 			      greater_cost> OpenList;
 57 | 
 58 |   class OdMstar {    
 59 |   public:
 60 |     /**
 61 |      * Constructs a new, top level M* planner
 62 |      *
 63 |      * @param policies pointer to vector of policies.
 64 |      *                 OdMstar does not take ownership
 65 |      * @param goals goal configuration of entire system
 66 |      * @param inflation inflation factor
 67 |      * @param end_time time at which M* will declare failure
 68 |      * @param checker collision checking object
 69 |      */
 70 |     OdMstar(
 71 |       std::vector<std::shared_ptr<Policy>> policies,
 72 |       OdCoord goals, double inflation, time_point end_time,
 73 |       std::shared_ptr<ColChecker> col_checker);
 74 | 
 75 |     /**
 76 |      * Creates a subplanner for a subsest of the robots
 77 |      *
 78 |      * robots is a collision set element in the frame of parent, not global
 79 |      * robot ids
 80 |      */
 81 |     OdMstar(const ColSetElement &robots, OdMstar &parent);
 82 | 
 83 |     ~OdMstar();
 84 | 
 85 |     /**
 86 |      * Computes the optimal path to the goal from init_pos
 87 |      *
 88 |      * @param init_pos coordinate of the initial joint configuration
 89 |      *
 90 |      * @return the path in the joint configuration graph to the goal
 91 |      *
 92 |      * @throws OutOfTimeError ran out of planning time
 93 |      * @throws NoSolutionError no path to goal from init_pos
 94 |      */
 95 |     OdPath find_path(OdCoord init_pos);
 96 | 
 97 |   private:
 98 |     /**TODO: fix
 99 |      * This is kind of horrifying, but I cannot store the OdMstar objects
100 |      * directly in the unordered map, as I get ungodly errors that look
101 |      * like they come from an allocator.  Adding copy constructor and
102 |      * assignment operator doesn't work, so its something involved about
103 |      * STL.  Think this works, but annoying
104 |      */
105 |     std::unordered_map<ColSetElement, std::shared_ptr<OdMstar>> *subplanners_;
106 |     std::vector<std::shared_ptr<Policy>> policies_;
107 |     // ids of the robots this planner handles.  Assumed to be in ascending
108 |     // order
109 |     std::vector<int> ids_;
110 |     OdCoord goals_;
111 |     // holds the nodes in the joint configuration space
112 |     std::unordered_map<OdCoord, OdVertex> graph_;
113 |     time_point end_time_; // When planning will be halted
114 |     double inflation_; // inflation factor for heuristic
115 |     int planning_iter_; // current planning iteration
116 |     int num_bots_;
117 |     std::shared_ptr<ColChecker> col_checker_;
118 |     bool top_level_; // tracks if the top level planner
119 | 
120 |     OdMstar(const OdMstar &that) = delete;
121 | 
122 |     /**
123 |      * Resets planning for a new planning iteration.
124 |      *
125 |      * Does not reset forwards_ptrs, as those should be valid across
126 |      * iterations
127 |      */
128 |     void reset();
129 | 
130 |     /**
131 |      * Computes the heuristic value of a vertex at a given coordinate
132 |      *
133 |      * @param coord coordinate for which to compute a heuristic value
134 |      *
135 |      * @return the (inflated) heuristic value
136 |      */
137 |     double heuristic(const OdCoord &coord);
138 | 
139 |     /**
140 |      * Returns a reference to the vertex at a given coordinate
141 |      *
142 |      * this->graph retains ownership of the vertex.  Will create the vertex
143 |      * if it does not already exist.
144 |      *
145 |      * @param coord coordinate of the desired vertex
146 |      *
147 |      * @return pointer to the vertex at coord.
148 |      */
149 |     OdVertex* get_vertex(const OdCoord &coord);
150 | 
151 |     /**
152 |      * Returns the optimal next step from init_pos
153 |      *
154 |      * Will compute the full path if necessary, but preferentially uses
155 |      * cached results in forwards_ptrs.  Expected to only be called from
156 |      * a standard coordinate, and to only return a standard coordinate
157 |      *
158 |      * @param init_pos coordinate to compute the optimal next step from
159 |      *
160 |      * @returns the coordinate of the optimal next step towards the goal
161 |      */
162 |     OdCoord get_step(const OdCoord &init_pos);
163 | 
164 |     /**
165 |      * Generates the neighbors of vertex and add them to the open list
166 |      *
167 |      * @param vertex OdVertex to expand
168 |      * @param open_list the sorted open list being used
169 |      */
170 |     void expand(OdVertex *vertex, OpenList &open_list);
171 | 
172 |     /**
173 |      * Returns the limited neighbors of coord using recursive calculation
174 |      *
175 |      * @param coord Coordinates of vertex to generate neighbor thereof
176 |      * @param col_set collision set of vertex to generate neighbors
177 |      *
178 |      * @return list of limited neighbors
179 |      */
180 |     std::vector<OdCoord> get_neighbors(
181 |       const OdCoord &coord, const ColSet &col_set);
182 | 
183 |     /**
184 |      * Returns the limited neighbors of coord using non-recursive computation
185 |      *
186 |      * Called when the collision set contains all of the robots, as a base
187 |      * case for get_neighbors, thus always generate all possible neighbors
188 |      *
189 |      * @param coord Coordinates of vertex to generate neighbor thereof
190 |      *
191 |      * @return list of limited neighbors
192 |      */
193 |      std::vector<OdCoord> get_all_neighbors(
194 |        const OdCoord &coord);
195 | 
196 |     /**
197 |      * Returns the cost of traversing a given edge
198 |      *
199 |      * @param source coordinate of the source vertex
200 |      * @param target coordinate of the target vertex
201 |      *
202 |      * @return the cost of the edge
203 |      */
204 |     double edge_cost(const OdCoord &source, const OdCoord &target);
205 | 
206 |     /**
207 |      * Returns the path through a vertex
208 |      *
209 |      * Assumes that back_ptr and forwards_ptr are set and non-none at vert
210 |      * Identifies each end of the path by looking for a back_ptr/forwards_ptr
211 |      * pointed at the holder
212 |      *
213 |      * @param vert the vertex to trace a path through
214 |      *
215 |      * @return the path passing through vert containing only standard vertices
216 |      */
217 |     OdPath trace_path(OdVertex *vert);
218 | 
219 |     /**
220 |      * Generates the path to the specified vertex
221 |      *
222 |      * Sets forward_ptrs to cache the path, and updates the heuristic
223 |      * values of the vertices on the path so we can end the moment a
224 |      * vertex on a cached path is expanded.
225 |      *
226 |      * TODO: double check that making the heuristic inconsistent in this
227 |      * fashion is OK.
228 |      *
229 |      * @param vert the vertex to trace the path to
230 |      * @param successor the successor of vert on the path
231 |      * @param path place to construct path
232 |      */
233 |     void back_trace_path(OdVertex *vert, OdVertex *successor, OdPath &path);
234 | 
235 |     /**
236 |      * Genertes the path from the specified vertex to the goal
237 |      *
238 |      * Non-trivial only if vert lies on a previously cached path
239 |      *
240 |      * @param vert the vertex to trace the path from
241 |      * @param path place to construct path
242 |      */
243 |     void forwards_trace_path(OdVertex *vert, OdPath &path);
244 | 
245 |     /**
246 |      * Backpropagates collision set information to all predecessors of a
247 |      * vertex.
248 |      *
249 |      * Adds vertices whose collision set changes back to the open list
250 |      *
251 |      * @param vertex pointer to the vertex to back propagate from
252 |      * @param col_set the collision set that triggered backpropagation
253 |      * @param open_list the current open list
254 |      */
255 |     void back_prop_col_set(OdVertex *vert, const ColSet &col_set,
256 | 			   OpenList &open_list);
257 |   };  
258 | 
259 |   struct OutOfTimeError : public std::exception{
260 |     const char * what () const throw(){
261 |       return "Out of Time";
262 |     }
263 |   };
264 | 
265 |   struct NoSolutionError : public std::exception{
266 |     const char * what () const throw(){
267 |       return "No Solution";
268 |     }
269 |   };
270 | 
271 | };
272 | 
273 | #endif
274 | 


--------------------------------------------------------------------------------
/od_mstar3/od_vertex.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef MSTAR_OD_VERTEX_H
 2 | #define MSTAR_OD_VERTEX_H
 3 | 
 4 | #include <limits>
 5 | 
 6 | #include "mstar_type_defs.hpp"
 7 | 
 8 | namespace mstar{
 9 | 
10 |   struct OdVertex{
11 |     OdCoord coord;
12 |     ColSet col_set, gen_set; // Collision set and generating collision set
13 |     int updated; // last planning iteration used
14 |     bool closed, open;
15 |     double cost, h;
16 |     OdVertex* back_ptr; // optimal way to reach this
17 |     std::set<OdVertex*> back_prop_set; // all explored ways to reach this
18 |     OdVertex* forwards_ptr; // way to goal from this
19 | 
20 |     OdVertex(OdCoord coord):
21 |       coord(coord), col_set(), updated(0), closed(false), open(false),
22 |       cost(std::numeric_limits<double>::max()), h(),
23 |       back_ptr(nullptr), back_prop_set(), forwards_ptr(nullptr)
24 |       {};
25 | 
26 |     bool operator>=(const OdVertex &other) const{
27 |       return cost + h >= other.cost + other.h;
28 |     }
29 | 
30 |     bool operator>(const OdVertex &other) const{
31 |       return cost + h > other.cost + other.h;
32 |     }
33 | 
34 |     bool operator<=(const OdVertex &other) const{
35 |       return cost + h <= other.cost + other.h;
36 |     }
37 | 
38 |     bool operator<(const OdVertex &other) const{
39 |       return cost + h < other.cost + other.h;
40 |     }
41 | 
42 |     /**
43 |      * Resets a vertex used in a previous planning iteration
44 |      *
45 |      * @param t Current planning iteration
46 |      */
47 |     void reset(int t){
48 |       if (t > updated){
49 | 	updated = t;
50 | 	open = false;
51 | 	closed = false;
52 | 	cost = std::numeric_limits<double>::max();
53 | 	back_ptr = nullptr;
54 | 	back_prop_set = std::set<OdVertex *>();
55 |       }
56 |     }
57 |   };
58 | 
59 | }
60 | 
61 | #endif
62 | 


--------------------------------------------------------------------------------
/od_mstar3/policy.cpp:
--------------------------------------------------------------------------------
 1 | #include <boost/graph/dijkstra_shortest_paths.hpp>
 2 | #include <boost/graph/reverse_graph.hpp>
 3 | 
 4 | #include "policy.hpp"
 5 | 
 6 | using namespace mstar;
 7 | 
 8 | 
 9 | Policy::Policy(const Graph &g, const RobCoord goal){
10 |   g_ = g;
11 |   costs_ = std::vector<double>(num_vertices(g_));
12 |   predecessors_.resize(boost::num_vertices(g_));
13 | 
14 |   boost::dijkstra_shortest_paths(
15 |     boost::make_reverse_graph(g_), goal,
16 |     boost::predecessor_map(&predecessors_[0]).distance_map(&costs_[0]));
17 |   edge_weight_map_ = boost::get(boost::edge_weight_t(), g_);
18 | }
19 | 
20 | 
21 | double Policy::get_cost(RobCoord coord){
22 |   return costs_[coord];
23 | }
24 | 
25 | 
26 | double Policy::get_edge_cost(RobCoord u, RobCoord v){
27 |   // boost::edge returns pair<edge_descriptor, bool>
28 |   return boost::get(edge_weight_map_, boost::edge(u, v, g_).first);
29 | }
30 | 
31 | 
32 | std::vector<RobCoord> Policy::get_out_neighbors(RobCoord coord){
33 |   std::vector<RobCoord> out;
34 |   for (auto adj_verts = boost::adjacent_vertices(coord, g_);
35 |        adj_verts.first != adj_verts.second; adj_verts.first++){
36 |     out.push_back(*(adj_verts.first));
37 |   }
38 |   return out;
39 | }
40 | 
41 | RobCoord Policy::get_step(RobCoord coord){
42 |   return predecessors_[coord];
43 | }
44 | 


--------------------------------------------------------------------------------
/od_mstar3/policy.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef MSTAR_POLICY_H
 2 | #define MSTAR_POLICY_H
 3 | 
 4 | /****************************************************************************
 5 |  * Provides a wrapper for the Boost graphs
 6 |  ***************************************************************************/
 7 | 
 8 | #include <iostream>
 9 | #include <boost/graph/graph_traits.hpp>
10 | #include <boost/graph/adjacency_list.hpp>
11 | 
12 | #include "mstar_type_defs.hpp"
13 | 
14 | 
15 | namespace mstar{
16 |   /**
17 |    * Generates an individual policy for a robot to reach a specified goal
18 |    */
19 |   class Policy{
20 |   private:
21 |     Graph g_; // the boost graph this wraps
22 |     std::vector<double> costs_; // holds cost to goal from every configuration
23 |     boost::property_map<Graph, boost::edge_weight_t>::type edge_weight_map_;
24 |     std::vector<int> predecessors_;
25 | 
26 |   public:
27 |     /**
28 |      * @param g The graph describing the workspace
29 |      * @param goal The goal coordinate of the robot
30 |      */
31 |     Policy(const Graph &g, const RobCoord goal);
32 | 
33 |     /**
34 |      * Returns the cost-to-go from a vertex
35 |      * @param vert Vertex to query cost from
36 |      *
37 |      * @return the cost to go until the goal is reached
38 |      */
39 |     double get_cost(RobCoord coord);
40 | 
41 |     /**
42 |      * Returns cost of traversing the edge (u, v)
43 |      *
44 |      * Does not check whether the edge exists
45 |      *
46 |      * @param u Source vertex of the edge
47 |      * @param v Destination vertex of the dge
48 |      *
49 |      * @return the cost of the edge
50 |      */
51 |     double get_edge_cost(RobCoord u, RobCoord v);
52 | 
53 |     /**
54 |      * Returns the out-neighbors of a given coordinate
55 |      * @param coord Vertex to get out neighbors of
56 |      */
57 |     std::vector<RobCoord> get_out_neighbors(RobCoord coord);
58 | 
59 |     /**
60 |      * Returns the successor of the specified coordinate
61 |      *
62 |      * @param coord coordinate to compute the successor thereof
63 |      *
64 |      * @return coordinate of next step
65 |      */
66 |     RobCoord get_step(RobCoord coord);
67 |   };
68 | }
69 | 
70 | #endif
71 | 


--------------------------------------------------------------------------------
/od_mstar3/prune_graph.py:
--------------------------------------------------------------------------------
 1 | from od_mstar3 import workspace_graph
 2 | import networkx as nx #Python network analysis module
 3 | 
 4 | 
 5 | 
 6 | def to_networkx_graph(obs_map):
 7 |     '''Reads in a standard obs_map list and converts it to a networkx
 8 |     digraph
 9 |     obs_map - list of lists, 0 for empty cell, 1 for obstacle'''
10 |     #Create a workspace_graph object to generate neighbors
11 |     g = workspace_graph.Astar_Graph(obs_map,[0,0])
12 |     G = nx.DiGraph() #Creates the graph object
13 |     #Populate graph with nodes
14 |     for x in range(len(obs_map)):
15 |         for y in range(len(obs_map[x])):
16 |             if obs_map[x][y] == 0:
17 |                 G.add_node((x,y))
18 |     #Add edges
19 |     for i in G.nodes():
20 |         #Stored nodes by their coordinates in G
21 |         for j in g.get_neighbors(i):
22 |             G.add_edge(i,j)
23 |     return G
24 | 
25 | def prune_opposing_edge(G,num_edges=1):
26 |     '''Reads in a networkx digraph and prunes the edge opposing the most
27 |     between (i.e. edge on the most shortest path connections).  If this edge
28 |     doesn't have an opposing edge, or if the removal of said edge would
29 |     reduce the connectivity of the space, the next most between edge is pruned
30 |     instead.  Since computing completeness can be expensive, allows multiple
31 |     edges to be pruned before computing the impact of said prunning on
32 |     completeness is computed'''
33 |     #Get the current number of strongly connected components, can't decrease
34 |     #without preventing some paths from being found
35 |     num_components = nx.number_strongly_connected_components(G)
36 |     pruned = 0
37 |     # print 'computing betweeness'
38 |     betweenness = nx.edge_betweenness_centrality(G)
39 |     # print 'betweenness computed'
40 |     while pruned < num_edges:
41 |         max_bet = max(betweenness.values())
42 |         if max_bet <= 0:
43 |             #Set betweeness to -1 if can't prune, set to 0 not between
44 |             return G
45 |         edge = betweenness.keys()[betweenness.values().index(max_bet)]
46 |         if not (edge[1],edge[0]) in G.edges():
47 |             #Already been pruned
48 |             betweenness[edge] = -1
49 |             # print 'no edge'
50 |             continue
51 |         #Test if pruning the edge will break connectivity
52 |         temp_graph = G.copy()
53 |         temp_graph.remove_edge(edge[1],edge[0])
54 |         if num_components == nx.number_strongly_connected_components(temp_graph):
55 |             #Can safely prune this edge
56 |             G = temp_graph
57 |             pruned+=1
58 |             betweenness[edge] = -1
59 |             betweenness.pop((edge[1],edge[0]))
60 |             # print 'pruned'
61 |             #Need to prevent further edges from being pruned from this vertex
62 |             for neighbor in G.neighbors(edge[1]):
63 |                 betweenness[(edge[1],neighbor)] = -1
64 |         else:
65 |             betweenness[edge] = -1
66 |             # print 'breaks con %s' %(str(edge))
67 |     return G
68 | 


--------------------------------------------------------------------------------
/od_mstar3/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup, Extension
2 | from Cython.Build import cythonize
3 | 
4 | setup(ext_modules = cythonize(Extension(
5 |            "cpp_mstar",                                
6 |            sources=["cython_od_mstar.pyx"], 
7 |            extra_compile_args=["-std=c++11"]
8 |       )))
9 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | Cython==0.29.28
 2 | gym==0.10.5
 3 | imageio==2.9.0
 4 | ipdb==0.13.9
 5 | matplotlib==3.3.2
 6 | networkx==2.8.8
 7 | numpy==1.21.6
 8 | ray==1.8.0
 9 | setproctitle==1.2.2
10 | tensorflow==1.14.0
11 | torch==1.11.0
12 | wandb==0.12.14
13 | 


--------------------------------------------------------------------------------
/runner.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import ray
  3 | import torch
  4 | 
  5 | from alg_parameters import *
  6 | from episodic_buffer import EpisodicBuffer
  7 | from mapf_gym import MAPFEnv
  8 | from model import Model
  9 | from od_mstar3 import od_mstar
 10 | from od_mstar3.col_set_addition import OutOfTimeError, NoSolutionError
 11 | from util import one_step, update_perf, reset_env,set_global_seeds
 12 | 
 13 | 
 14 | @ray.remote(num_cpus=1, num_gpus=SetupParameters.NUM_GPU / (TrainingParameters.N_ENVS + 1))
 15 | class Runner(object):
 16 |     """sub-process used to collect experience"""
 17 | 
 18 |     def __init__(self, env_id):
 19 |         """initialize model0 and environment"""
 20 |         self.ID = env_id
 21 |         set_global_seeds(env_id*123)
 22 |         self.num_agent = EnvParameters.N_AGENTS
 23 |         self.imitation_num_agent = EnvParameters.N_AGENTS
 24 |         self.one_episode_perf = {'num_step': 0, 'episode_reward': 0, 'invalid': 0, 'block': 0, 'num_leave_goal': 0,
 25 |                                  'wrong_blocking': 0, 'num_collide': 0, 'reward_count': 0, 'ex_reward': 0,
 26 |                                  'in_reward': 0}
 27 | 
 28 |         self.env = MAPFEnv(num_agents=self.num_agent)
 29 |         self.imitation_env = MAPFEnv(num_agents=self.imitation_num_agent)
 30 | 
 31 |         self.local_device = torch.device('cuda') if SetupParameters.USE_GPU_LOCAL else torch.device('cpu')
 32 |         self.local_model = Model(env_id, self.local_device)
 33 |         self.hidden_state = (
 34 |             torch.zeros((self.num_agent, NetParameters.NET_SIZE // 2)).to(self.local_device),
 35 |             torch.zeros((self.num_agent, NetParameters.NET_SIZE // 2)).to(self.local_device))
 36 |         self.message = torch.zeros((1, self.num_agent, NetParameters.NET_SIZE)).to(self.local_device)
 37 | 
 38 |         self.done, self.valid_actions, self.obs, self.vector, self.train_valid = reset_env(self.env, self.num_agent)
 39 | 
 40 |         self.episodic_buffer = EpisodicBuffer(0, self.num_agent)
 41 |         new_xy = self.env.get_positions()
 42 |         self.episodic_buffer.batch_add(new_xy)
 43 | 
 44 |         self.imitation_episodic_buffer = EpisodicBuffer(0, self.imitation_num_agent)
 45 | 
 46 |     def run(self, weights, total_steps):
 47 |         """run multiple steps and collect data for reinforcement learning"""
 48 |         with torch.no_grad():
 49 |             mb_obs, mb_vector, mb_rewards_in, mb_rewards_ex, mb_rewards_all, mb_values_in, mb_values_ex, \
 50 |                 mb_values_all, mb_done, mb_ps, mb_actions = [], [], [], [], [], [], [], [], [], [], []
 51 |             mb_hidden_state = []
 52 |             mb_message = []
 53 |             mb_train_valid, mb_blocking = [], []
 54 |             performance_dict = {'per_r': [], 'per_in_r': [], 'per_ex_r': [], 'per_valid_rate': [],
 55 |                                 'per_episode_len': [], 'per_block': [],
 56 |                                 'per_leave_goal': [], 'per_final_goals': [], 'per_half_goals': [], 'per_block_acc': [],
 57 |                                 'per_max_goals': [], 'per_num_collide': [], 'rewarded_rate': []}
 58 | 
 59 |             self.local_model.set_weights(weights)
 60 |             for _ in range(TrainingParameters.N_STEPS):
 61 |                 mb_obs.append(self.obs)
 62 |                 mb_vector.append(self.vector)
 63 |                 mb_hidden_state.append(
 64 |                     [self.hidden_state[0].cpu().detach().numpy(), self.hidden_state[1].cpu().detach().numpy()])
 65 |                 mb_message.append(self.message)
 66 |                 actions, ps, values_in, values_ex, values_all, pre_block, self.hidden_state, num_invalid, self.message = \
 67 |                     self.local_model.step(self.obs, self.vector, self.valid_actions, self.hidden_state,
 68 |                                           self.episodic_buffer.no_reward, self.message, self.num_agent)
 69 |                 self.one_episode_perf['invalid'] += num_invalid
 70 |                 mb_values_in.append(values_in)
 71 |                 mb_values_ex.append(values_ex)
 72 |                 mb_values_all.append(values_all)
 73 |                 mb_train_valid.append(self.train_valid)
 74 |                 mb_ps.append(ps)
 75 |                 mb_done.append(self.done)
 76 | 
 77 |                 rewards, self.valid_actions, self.obs, self.vector, self.train_valid, self.done, blockings, \
 78 |                     num_on_goals, self.one_episode_perf, max_on_goals, action_status, modify_actions, on_goal \
 79 |                     = one_step(self.env, self.one_episode_perf, actions, pre_block, self.local_model, values_all,
 80 |                                self.hidden_state, ps, self.episodic_buffer.no_reward, self.message, self.episodic_buffer,
 81 |                                self.num_agent)
 82 | 
 83 |                 new_xy = self.env.get_positions()
 84 |                 processed_rewards, be_rewarded, intrinsic_rewards, min_dist = self.episodic_buffer.if_reward(new_xy,
 85 |                                                                                                              rewards,
 86 |                                                                                                              self.done,
 87 |                                                                                                              on_goal)
 88 |                 self.one_episode_perf['reward_count'] += be_rewarded
 89 |                 self.vector[:, :, 3] = rewards
 90 |                 self.vector[:, :, 4] = intrinsic_rewards
 91 |                 self.vector[:, :, 5] = min_dist
 92 | 
 93 |                 mb_actions.append(modify_actions)
 94 |                 for i in range(self.num_agent):
 95 |                     if action_status[i] == -3:
 96 |                         mb_train_valid[-1][i][int(modify_actions[i])] = 0
 97 | 
 98 |                 mb_rewards_all.append(processed_rewards)
 99 |                 mb_rewards_in.append(intrinsic_rewards)
100 |                 mb_rewards_ex.append(rewards)
101 |                 mb_blocking.append(blockings)
102 | 
103 |                 self.one_episode_perf['episode_reward'] += np.sum(processed_rewards)
104 |                 self.one_episode_perf['ex_reward'] += np.sum(rewards)
105 |                 self.one_episode_perf['in_reward'] += np.sum(intrinsic_rewards)
106 |                 if self.one_episode_perf['num_step'] == EnvParameters.EPISODE_LEN // 2:
107 |                     performance_dict['per_half_goals'].append(num_on_goals)
108 | 
109 |                 if self.done:
110 |                     performance_dict = update_perf(self.one_episode_perf, performance_dict, num_on_goals, max_on_goals,
111 |                                                    self.num_agent)
112 |                     self.one_episode_perf = {'num_step': 0, 'episode_reward': 0, 'invalid': 0, 'block': 0,
113 |                                              'num_leave_goal': 0, 'wrong_blocking': 0, 'num_collide': 0,
114 |                                              'reward_count': 0, 'ex_reward': 0, 'in_reward': 0}
115 |                     self.num_agent = EnvParameters.N_AGENTS
116 | 
117 |                     self.done, self.valid_actions, self.obs, self.vector, self.train_valid = reset_env(self.env,
118 |                                                                                                        self.num_agent)
119 |                     self.done = True
120 | 
121 |                     self.hidden_state = (
122 |                         torch.zeros((self.num_agent, NetParameters.NET_SIZE // 2)).to(self.local_device),
123 |                         torch.zeros((self.num_agent, NetParameters.NET_SIZE // 2)).to(self.local_device))
124 |                     self.message = torch.zeros((1, self.num_agent, NetParameters.NET_SIZE)).to(self.local_device)
125 | 
126 |                     self.episodic_buffer.reset(total_steps, self.num_agent)
127 |                     new_xy = self.env.get_positions()
128 |                     self.episodic_buffer.batch_add(new_xy)
129 | 
130 |             mb_obs = np.concatenate(mb_obs, axis=0)
131 |             mb_vector = np.concatenate(mb_vector, axis=0)
132 | 
133 |             mb_rewards_in = np.concatenate(mb_rewards_in, axis=0)
134 |             mb_rewards_ex = np.concatenate(mb_rewards_ex, axis=0)
135 |             mb_rewards_all = np.concatenate(mb_rewards_all, axis=0)
136 | 
137 |             mb_values_in = np.squeeze(np.concatenate(mb_values_in, axis=0), axis=-1)
138 |             mb_values_ex = np.squeeze(np.concatenate(mb_values_ex, axis=0), axis=-1)
139 |             mb_values_all = np.squeeze(np.concatenate(mb_values_all, axis=0), axis=-1)
140 | 
141 |             mb_actions = np.asarray(mb_actions, dtype=np.int64)
142 |             mb_ps = np.stack(mb_ps)
143 |             mb_done = np.asarray(mb_done, dtype=np.bool_)
144 |             mb_hidden_state = np.stack(mb_hidden_state)
145 |             mb_message = np.concatenate(mb_message, axis=0)
146 |             mb_train_valid = np.stack(mb_train_valid)
147 |             mb_blocking = np.concatenate(mb_blocking, axis=0)
148 | 
149 |             last_values_in, last_values_ex, last_values_all = np.squeeze(
150 |                 self.local_model.value(self.obs, self.vector, self.hidden_state, self.episodic_buffer.no_reward,
151 |                                        self.message))
152 | 
153 |             # calculate advantages
154 |             mb_advs_in = np.zeros_like(mb_rewards_in)
155 |             mb_advs_ex = np.zeros_like(mb_rewards_ex)
156 |             mb_advs_all = np.zeros_like(mb_rewards_all)
157 |             last_gaelam_in = last_gaelam_ex = last_gaelam_all = 0
158 |             for t in reversed(range(TrainingParameters.N_STEPS)):
159 |                 if t == TrainingParameters.N_STEPS - 1:
160 |                     next_nonterminal = 1.0 - self.done
161 |                     next_values_in = last_values_in
162 |                     next_values_ex = last_values_ex
163 |                     next_values_all = last_values_all
164 |                 else:
165 |                     next_nonterminal = 1.0 - mb_done[t + 1]
166 |                     next_values_in = mb_values_in[t + 1]
167 |                     next_values_ex = mb_values_ex[t + 1]
168 |                     next_values_all = mb_values_all[t + 1]
169 | 
170 |                 delta_in = np.subtract(np.add(mb_rewards_in[t], TrainingParameters.GAMMA * next_nonterminal *
171 |                                               next_values_in), mb_values_in[t])
172 |                 delta_ex = np.subtract(np.add(mb_rewards_ex[t], TrainingParameters.GAMMA * next_nonterminal *
173 |                                               next_values_ex), mb_values_ex[t])
174 |                 delta_all = np.subtract(np.add(mb_rewards_all[t], TrainingParameters.GAMMA * next_nonterminal *
175 |                                                next_values_all), mb_values_all[t])
176 | 
177 |                 mb_advs_in[t] = last_gaelam_in = np.add(delta_in,
178 |                                                         TrainingParameters.GAMMA * TrainingParameters.LAM
179 |                                                         * next_nonterminal * last_gaelam_in)
180 |                 mb_advs_ex[t] = last_gaelam_ex = np.add(delta_ex,
181 |                                                         TrainingParameters.GAMMA * TrainingParameters.LAM
182 |                                                         * next_nonterminal * last_gaelam_ex)
183 |                 mb_advs_all[t] = last_gaelam_all = np.add(delta_all,
184 |                                                           TrainingParameters.GAMMA * TrainingParameters.LAM
185 |                                                           * next_nonterminal * last_gaelam_all)
186 | 
187 |             mb_returns_in = np.add(mb_advs_in, mb_values_in)
188 |             mb_returns_ex = np.add(mb_advs_ex, mb_values_ex)
189 |             mb_returns_all = np.add(mb_advs_all, mb_values_all)
190 | 
191 |         return mb_obs, mb_vector, mb_returns_in, mb_returns_ex, mb_returns_all, mb_values_in, mb_values_ex, \
192 |             mb_values_all, mb_actions, mb_ps, mb_hidden_state, mb_train_valid, mb_blocking, mb_message, \
193 |             len(performance_dict['per_r']), performance_dict
194 | 
195 |     def imitation(self, weights, total_steps):
196 |         """run multiple steps and collect corresponding data for imitation learning"""
197 |         with torch.no_grad():
198 |             self.local_model.set_weights(weights)
199 | 
200 |             mb_obs, mb_vector, mb_hidden_state, mb_actions = [], [], [], []
201 |             mb_message = []
202 |             step = 0
203 |             episode = 0
204 |             self.imitation_num_agent = EnvParameters.N_AGENTS
205 |             while step <= TrainingParameters.N_STEPS:
206 |                 self.imitation_env._reset(num_agents=self.imitation_num_agent)
207 | 
208 |                 self.imitation_episodic_buffer.reset(total_steps, self.imitation_num_agent)
209 |                 new_xy = self.imitation_env.get_positions()
210 |                 self.imitation_episodic_buffer.batch_add(new_xy)
211 | 
212 |                 world = self.imitation_env.get_obstacle_map()
213 |                 start_positions = tuple(self.imitation_env.get_positions())
214 |                 goals = tuple(self.imitation_env.get_goals())
215 | 
216 |                 try:
217 |                     obs = None
218 |                     mstar_path = od_mstar.find_path(world, start_positions, goals, inflation=2, time_limit=5)
219 |                     obs, vector, actions, hidden_state, message = self.parse_path(mstar_path)
220 |                 except OutOfTimeError:
221 |                     print("timeout")
222 |                 except NoSolutionError:
223 |                     print("nosol????", start_positions)
224 | 
225 |                 if obs is not None:  # no error
226 |                     mb_obs.append(obs)
227 |                     mb_vector.append(vector)
228 |                     mb_actions.append(actions)
229 |                     mb_hidden_state.append(hidden_state)
230 |                     mb_message.append(message)
231 |                     step += np.shape(vector)[0]
232 |                     episode += 1
233 | 
234 |             mb_obs = np.concatenate(mb_obs, axis=0)
235 |             mb_vector = np.concatenate(mb_vector, axis=0)
236 |             mb_actions = np.concatenate(mb_actions, axis=0)
237 |             mb_hidden_state = np.concatenate(mb_hidden_state, axis=0)
238 |             mb_message = np.concatenate(mb_message, axis=0)
239 |         return mb_obs, mb_vector, mb_actions, mb_hidden_state, mb_message, episode, step
240 | 
241 |     def parse_path(self, path):
242 |         """take the path generated from M* and create the corresponding inputs and actions"""
243 |         mb_obs, mb_vector, mb_actions, mb_hidden_state = [], [], [], []
244 |         mb_message = []
245 |         hidden_state = (
246 |             torch.zeros((self.imitation_num_agent, NetParameters.NET_SIZE // 2)).to(self.local_device),
247 |             torch.zeros((self.imitation_num_agent, NetParameters.NET_SIZE // 2)).to(self.local_device))
248 |         obs = np.zeros((1, self.imitation_num_agent, NetParameters.NUM_CHANNEL, EnvParameters.FOV_SIZE, EnvParameters.FOV_SIZE),
249 |                        dtype=np.float32)
250 |         vector = np.zeros((1, self.imitation_num_agent, NetParameters.VECTOR_LEN), dtype=np.float32)
251 |         message = torch.zeros((1, self.imitation_num_agent, NetParameters.NET_SIZE)).to(self.local_device)
252 | 
253 |         for i in range(self.imitation_num_agent):
254 |             s = self.imitation_env.observe(i + 1)
255 |             obs[:, i, :, :, :] = s[0]
256 |             vector[:, i, : 3] = s[1]
257 | 
258 |         for t in range(len(path[:-1])):
259 |             mb_obs.append(obs)
260 |             mb_vector.append(vector)
261 |             mb_hidden_state.append([hidden_state[0].cpu().detach().numpy(), hidden_state[1].cpu().detach().numpy()])
262 |             mb_message.append(message)
263 | 
264 |             hidden_state, message = self.local_model.generate_state(obs, vector, hidden_state, message)
265 | 
266 |             actions = np.zeros(self.imitation_num_agent)
267 |             for i in range(self.imitation_num_agent):
268 |                 pos = path[t][i]
269 |                 new_pos = path[t + 1][i]  # guaranteed to be in bounds by loop guard
270 |                 direction = (new_pos[0] - pos[0], new_pos[1] - pos[1])
271 |                 actions[i] = self.imitation_env.world.get_action(direction)
272 |             mb_actions.append(actions)
273 | 
274 |             obs, vector, rewards, done, _, on_goal, _, valid_actions, _, _, _, _, _, _, _ = \
275 |                 self.imitation_env.joint_step(actions, 0, model='imitation', pre_value=None, input_state=None,
276 |                                               ps=None, no_reward=None, message=None, episodic_buffer=None)
277 | 
278 |             vector[:, :, -1] = actions
279 |             new_xy = self.imitation_env.get_positions()
280 |             _, _, intrinsic_reward, min_dist = self.imitation_episodic_buffer.if_reward(new_xy, rewards, done, on_goal)
281 |             vector[:, :, 3] = rewards
282 |             vector[:, :, 4] = intrinsic_reward
283 |             vector[:, :, 5] = min_dist
284 | 
285 |             if not all(valid_actions):  # M* can not generate collisions
286 |                 print('invalid action')
287 |                 return None, None, None, None
288 | 
289 |         mb_obs = np.concatenate(mb_obs, axis=0)
290 |         mb_message = np.concatenate(mb_message, axis=0)
291 |         mb_vector = np.concatenate(mb_vector, axis=0)
292 |         mb_actions = np.asarray(mb_actions, dtype=np.int64)
293 |         mb_hidden_state = np.stack(mb_hidden_state)
294 |         return mb_obs, mb_vector, mb_actions, mb_hidden_state, mb_message
295 | 


--------------------------------------------------------------------------------
/transformer/__pycache__/encoder_model.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/transformer/__pycache__/encoder_model.cpython-37.pyc


--------------------------------------------------------------------------------
/transformer/__pycache__/layers.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/transformer/__pycache__/layers.cpython-37.pyc


--------------------------------------------------------------------------------
/transformer/__pycache__/modules.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/transformer/__pycache__/modules.cpython-37.pyc


--------------------------------------------------------------------------------
/transformer/__pycache__/sub_layers.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/transformer/__pycache__/sub_layers.cpython-37.pyc


--------------------------------------------------------------------------------
/transformer/encoder_model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | 
 5 | from transformer.layers import EncoderLayer
 6 | 
 7 | 
 8 | class Encoder(nn.Module):
 9 |     """a encoder model with self attention mechanism"""
10 | 
11 |     def __init__(self, d_model, d_hidden, n_layers, n_head, d_k, d_v):
12 |         """create multiple computation blocks"""
13 |         super().__init__()
14 |         self.layer_stack = nn.ModuleList([EncoderLayer(d_model, d_hidden, n_head, d_k, d_v) for _ in range(n_layers)])
15 | 
16 |     def forward(self, enc_output, return_attns=False):
17 |         """use self attention to merge messages"""
18 |         enc_slf_attn_list = []
19 |         for enc_layer in self.layer_stack:
20 |             enc_output, enc_slf_attn = enc_layer(enc_output)
21 |             enc_slf_attn_list += [enc_slf_attn] if return_attns else []
22 | 
23 |         if return_attns:
24 |             return enc_output, enc_slf_attn_list
25 |         return enc_output,
26 | 
27 | 
28 | class PositionalEncoding(nn.Module):
29 |     """sinusoidal position embedding"""
30 | 
31 |     def __init__(self, d_hid, n_position=200):
32 |         """create table"""
33 |         super(PositionalEncoding, self).__init__()
34 |         self.register_buffer('pos_table', self._get_sinusoid_encoding_table(n_position, d_hid))
35 | 
36 |     def _get_sinusoid_encoding_table(self, n_position, d_hid):
37 |         """sinusoid position encoding table"""
38 | 
39 |         def get_position_angle_vec(position):
40 |             return [position / np.power(10000, 2 * (hid_j // 2) / d_hid) for hid_j in range(d_hid)]
41 | 
42 |         sinusoid_table = np.array([get_position_angle_vec(pos_i) for pos_i in range(n_position)])
43 |         sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2])  # dim 2i
44 |         sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2])  # dim 2i+1
45 | 
46 |         return torch.FloatTensor(sinusoid_table).unsqueeze(0)
47 | 
48 |     def forward(self, x):
49 |         """encode unique agent id """
50 |         return x + self.pos_table[:, :x.size(1)].clone().detach()
51 | 
52 | 
53 | class TransformerEncoder(nn.Module):
54 |     """a sequence to sequence model with attention mechanism"""
55 | 
56 |     def __init__(self, d_model, d_hidden, n_layers, n_head, d_k, d_v, n_position):
57 |         """initialization"""
58 |         super().__init__()
59 |         self.encoder = Encoder(d_model=d_model, d_hidden=d_hidden,
60 |                                n_layers=n_layers, n_head=n_head, d_k=d_k, d_v=d_v)
61 | 
62 |         self.position_enc = PositionalEncoding(d_model, n_position=n_position)
63 | 
64 |     def forward(self, encoder_input):
65 |         """run encoder"""
66 |         encoder_input = self.position_enc(encoder_input)
67 | 
68 |         enc_output, *_ = self.encoder(encoder_input)
69 | 
70 |         return enc_output
71 | 


--------------------------------------------------------------------------------
/transformer/layers.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | from transformer.sub_layers import MultiHeadAttention, PositionwiseFeedForward, GatingMechanism
 4 | 
 5 | 
 6 | class EncoderLayer(nn.Module):
 7 |     """compose with two different sub-layers"""
 8 | 
 9 |     def __init__(self, d_model, d_hidden, n_head, d_k, d_v):
10 |         """define one computation block"""
11 |         super(EncoderLayer, self).__init__()
12 |         self.gate1 = GatingMechanism(d_model)
13 |         self.gate2 = GatingMechanism(d_model)
14 |         self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v)
15 |         self.pos_ffn = PositionwiseFeedForward(d_model, d_hidden)
16 |         self.norm1 = nn.LayerNorm(d_model, eps=1e-6)
17 |         self.norm2 = nn.LayerNorm(d_model, eps=1e-6)
18 | 
19 |     def forward(self, enc_input):
20 |         """run a computation block"""
21 |         enc_output = self.norm1(enc_input)
22 |         enc_output, enc_slf_attn = self.slf_attn(
23 |             enc_output, enc_output, enc_output)
24 |         enc_output_1 = self.gate1(enc_input, enc_output)
25 |         enc_output = self.pos_ffn(self.norm2(enc_output_1))
26 |         enc_output = self.gate2(enc_output_1, enc_output)
27 |         return enc_output, enc_slf_attn
28 | 


--------------------------------------------------------------------------------
/transformer/modules.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class ScaledDotProductAttention(nn.Module):
 7 |     """scaled dot-product attention"""
 8 | 
 9 |     def __init__(self, temperature):
10 |         """initialization"""
11 |         super().__init__()
12 |         self.temperature = temperature
13 | 
14 |     def forward(self, q, k, v):
15 |         """ run multiple independent attention heads in parallel"""
16 |         attn = torch.matmul(q / self.temperature, k.transpose(2, 3))
17 |         # attn = attn.masked_fill(mask == 0, -1e6) # if mask==0,the input value will =-1e6
18 |         # then the attention score will around 0
19 |         attn = F.softmax(attn, dim=-1)  # attention score
20 |         output = torch.matmul(attn, v)
21 |         return output, attn
22 | 


--------------------------------------------------------------------------------
/transformer/sub_layers.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from transformer.modules import ScaledDotProductAttention
 6 | 
 7 | 
 8 | class MultiHeadAttention(nn.Module):
 9 |     """multi-head self attention module"""
10 | 
11 |     def __init__(self, n_head, d_model, d_k, d_v):
12 |         super().__init__()
13 |         """initialization"""
14 |         self.n_head = n_head
15 |         self.d_k = d_k
16 |         self.d_v = d_v
17 | 
18 |         self.w_qs = nn.Linear(d_model, n_head * d_k, bias=False)
19 |         self.w_ks = nn.Linear(d_model, n_head * d_k, bias=False)
20 |         self.w_vs = nn.Linear(d_model, n_head * d_v, bias=False)
21 |         self.fc = nn.Linear(n_head * d_v, d_model, bias=False)
22 | 
23 |         self.attention = ScaledDotProductAttention(temperature=d_k ** 0.5)
24 | 
25 |     def forward(self, q, k, v):
26 |         """calculate multi-head attention"""
27 |         d_k, d_v, n_head = self.d_k, self.d_v, self.n_head
28 |         sz_b, len_q, len_k, len_v = q.size(0), q.size(1), k.size(1), v.size(1)
29 | 
30 |         # pass through the pre-attention projection
31 |         q = self.w_qs(q).view(sz_b, len_q, n_head, d_k)
32 |         k = self.w_ks(k).view(sz_b, len_k, n_head, d_k)
33 |         v = self.w_vs(v).view(sz_b, len_v, n_head, d_v)
34 | 
35 |         # transpose for attention dot product
36 |         q, k, v = q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2)
37 |         # calculate attention
38 |         q, attn = self.attention(q, k, v)
39 |         # combine the last two dimensions to concatenate all the heads together
40 |         q = q.transpose(1, 2).contiguous().view(sz_b, len_q, -1)
41 |         q = self.fc(q)
42 | 
43 |         return q, attn
44 | 
45 | 
46 | class PositionwiseFeedForward(nn.Module):
47 |     """A two-feed-forward-layer module"""
48 | 
49 |     def __init__(self, d_in, d_hid):
50 |         """Initialization"""
51 |         super().__init__()
52 |         self.w_1 = nn.Linear(d_in, d_hid)
53 |         self.w_2 = nn.Linear(d_hid, d_in)
54 | 
55 |     def forward(self, x):
56 |         """run a ff layer"""
57 |         x = self.w_2(F.relu(self.w_1(x)))
58 |         return x
59 | 
60 | 
61 | class GatingMechanism(nn.Module):
62 |     """a GRU cell"""
63 | 
64 |     def __init__(self, d_model, bg=2):
65 |         """Initialization"""
66 |         super(GatingMechanism, self).__init__()
67 |         self.Wr = nn.Linear(d_model, d_model)
68 |         self.Ur = nn.Linear(d_model, d_model)
69 |         self.Wz = nn.Linear(d_model, d_model)
70 |         self.Uz = nn.Linear(d_model, d_model)
71 |         self.Wg = nn.Linear(d_model, d_model)
72 |         self.Ug = nn.Linear(d_model, d_model)
73 |         self.bg = torch.nn.Parameter(torch.full([d_model], bg, dtype=torch.float32))
74 | 
75 |         self.sigmoid = nn.Sigmoid()
76 |         self.tanh = nn.Tanh()
77 | 
78 |     def forward(self, x, y):  # x is residual, y is input
79 |         """run a GRU in the place of residual connection"""
80 |         r = self.sigmoid(self.Wr(y) + self.Ur(x))
81 |         z = self.sigmoid(self.Wz(y) + self.Uz(x) - self.bg)
82 |         h = self.tanh(self.Wg(y) + self.Ug(torch.mul(r, x)))
83 |         g = torch.mul(1 - z, x) + torch.mul(z, h)
84 |         return g
85 | 


--------------------------------------------------------------------------------
/util.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | 
  3 | import imageio
  4 | import numpy as np
  5 | import torch
  6 | import wandb
  7 | 
  8 | from alg_parameters import *
  9 | 
 10 | 
 11 | def set_global_seeds(i):
 12 |     """set seed for fair comparison"""
 13 |     torch.manual_seed(i)
 14 |     torch.cuda.manual_seed(i)
 15 |     torch.cuda.manual_seed_all(i)
 16 |     np.random.seed(i)
 17 |     random.seed(i)
 18 |     torch.backends.cudnn.deterministic = True
 19 | 
 20 | 
 21 | def write_to_tensorboard(global_summary, step, performance_dict=None, mb_loss=None, imitation_loss=None, evaluate=True,
 22 |                          greedy=True):
 23 |     """record performance using tensorboard"""
 24 |     if imitation_loss is not None:
 25 |         global_summary.add_scalar(tag='Loss/Imitation_loss', scalar_value=imitation_loss[0], global_step=step)
 26 |         global_summary.add_scalar(tag='Grad/Imitation_grad', scalar_value=imitation_loss[1], global_step=step)
 27 | 
 28 |         global_summary.flush()
 29 |         return
 30 |     if evaluate:
 31 |         if greedy:
 32 |             global_summary.add_scalar(tag='Perf_greedy_eval/Reward', scalar_value=performance_dict['per_r'], global_step=step)
 33 |             global_summary.add_scalar(tag='Perf_greedy_eval/In_Reward', scalar_value=performance_dict['per_in_r'], global_step=step)
 34 |             global_summary.add_scalar(tag='Perf_greedy_eval/Ex_Reward', scalar_value=performance_dict['per_ex_r'], global_step=step)
 35 |             global_summary.add_scalar(tag='Perf_greedy_eval/Valid_rate', scalar_value=performance_dict['per_valid_rate'], global_step=step)
 36 |             global_summary.add_scalar(tag='Perf_greedy_eval/Episode_length', scalar_value=performance_dict['per_episode_len'], global_step=step)
 37 |             global_summary.add_scalar(tag='Perf_greedy_eval/Num_block', scalar_value=performance_dict['per_block'], global_step=step)
 38 |             global_summary.add_scalar(tag='Perf_greedy_eval/Num_leave_goal',scalar_value=performance_dict['per_leave_goal'], global_step=step)
 39 |             global_summary.add_scalar(tag='Perf_greedy_eval/Final_goals', scalar_value=performance_dict['per_final_goals'], global_step=step)
 40 |             global_summary.add_scalar(tag='Perf_greedy_eval/Half_goals', scalar_value=performance_dict['per_half_goals'], global_step=step)
 41 |             global_summary.add_scalar(tag='Perf_greedy_eval/Block_accuracy', scalar_value=performance_dict['per_block_acc'], global_step=step)
 42 |             global_summary.add_scalar(tag='Perf_greedy_eval/Max_goals', scalar_value=performance_dict['per_max_goals'], global_step=step)
 43 |             global_summary.add_scalar(tag='Perf_greedy_eval/Num_collide', scalar_value=performance_dict['per_num_collide'], global_step=step)
 44 | 
 45 |         else:
 46 |             global_summary.add_scalar(tag='Perf_random_eval/Reward', scalar_value=performance_dict['per_r'], global_step=step)
 47 |             global_summary.add_scalar(tag='Perf_random_eval/In_Reward', scalar_value=performance_dict['per_in_r'], global_step=step)
 48 |             global_summary.add_scalar(tag='Perf_random_eval/Ex_Reward', scalar_value=performance_dict['per_ex_r'], global_step=step)
 49 |             global_summary.add_scalar(tag='Perf_random_eval/Valid_rate',scalar_value=performance_dict['per_valid_rate'], global_step=step)
 50 |             global_summary.add_scalar(tag='Perf_random_eval/Episode_length',scalar_value=performance_dict['per_episode_len'], global_step=step)
 51 |             global_summary.add_scalar(tag='Perf_random_eval/Num_block', scalar_value=performance_dict['per_block'], global_step=step)
 52 |             global_summary.add_scalar(tag='Perf_random_eval/Num_leave_goal', scalar_value=performance_dict['per_leave_goal'], global_step=step)
 53 |             global_summary.add_scalar(tag='Perf_random_eval/Final_goals', scalar_value=performance_dict['per_final_goals'], global_step=step)
 54 |             global_summary.add_scalar(tag='Perf_random_eval/Half_goals', scalar_value=performance_dict['per_half_goals'], global_step=step)
 55 |             global_summary.add_scalar(tag='Perf_random_eval/Block_accuracy', scalar_value=performance_dict['per_block_acc'], global_step=step)
 56 |             global_summary.add_scalar(tag='Perf_random_eval/Max_goals', scalar_value=performance_dict['per_max_goals'], global_step=step)
 57 |             global_summary.add_scalar(tag='Perf_random_eval/Num_collide', scalar_value=performance_dict['per_num_collide'], global_step=step)
 58 | 
 59 |     else:
 60 |         loss_vals = np.nanmean(mb_loss, axis=0)
 61 |         global_summary.add_scalar(tag='Perf/Reward', scalar_value=performance_dict['per_r'], global_step=step)
 62 |         global_summary.add_scalar(tag='Perf/In_Reward', scalar_value=performance_dict['per_in_r'], global_step=step)
 63 |         global_summary.add_scalar(tag='Perf/Ex_Reward', scalar_value=performance_dict['per_ex_r'], global_step=step)
 64 |         global_summary.add_scalar(tag='Perf/Valid_rate', scalar_value=performance_dict['per_valid_rate'], global_step=step)
 65 |         global_summary.add_scalar(tag='Perf/Episode_length',scalar_value=performance_dict['per_episode_len'], global_step=step)
 66 |         global_summary.add_scalar(tag='Perf/Num_block', scalar_value=performance_dict['per_block'], global_step=step)
 67 |         global_summary.add_scalar(tag='Perf/Num_leave_goal', scalar_value=performance_dict['per_leave_goal'], global_step=step)
 68 |         global_summary.add_scalar(tag='Perf/Final_goals', scalar_value=performance_dict['per_final_goals'], global_step=step)
 69 |         global_summary.add_scalar(tag='Perf/Half_goals', scalar_value=performance_dict['per_half_goals'], global_step=step)
 70 |         global_summary.add_scalar(tag='Perf/Block_accuracy', scalar_value=performance_dict['per_block_acc'], global_step=step)
 71 |         global_summary.add_scalar(tag='Perf/Max_goals', scalar_value=performance_dict['per_max_goals'], global_step=step)
 72 |         global_summary.add_scalar(tag='Perf/Num_collide', scalar_value=performance_dict['per_num_collide'], global_step=step)
 73 |         global_summary.add_scalar(tag='Perf/Rewarded_rate', scalar_value=performance_dict['rewarded_rate'], global_step=step)
 74 | 
 75 |         for (val, name) in zip(loss_vals, RecordingParameters.LOSS_NAME):
 76 |             if name == 'grad_norm':
 77 |                 global_summary.add_scalar(tag='Grad/' + name, scalar_value=val, global_step=step)
 78 |             else:
 79 |                 global_summary.add_scalar(tag='Loss/' + name, scalar_value=val, global_step=step)
 80 | 
 81 |     global_summary.flush()
 82 | 
 83 | 
 84 | def write_to_wandb(step, performance_dict=None, mb_loss=None, imitation_loss=None, evaluate=True, greedy=True):
 85 |     """record performance using wandb"""
 86 |     if imitation_loss is not None:
 87 |         wandb.log({'Loss/Imitation_loss': imitation_loss[0]}, step=step)
 88 |         wandb.log({'Grad/Imitation_grad': imitation_loss[1]}, step=step)
 89 |         return
 90 |     if evaluate:
 91 |         if greedy:
 92 |             wandb.log({'Perf_greedy_eval/Reward': performance_dict['per_r']}, step=step)
 93 |             wandb.log({'Perf_greedy_eval/In_Reward': performance_dict['per_in_r']}, step=step)
 94 |             wandb.log({'Perf_greedy_eval/Ex_Reward': performance_dict['per_ex_r']}, step=step)
 95 |             wandb.log({'Perf_greedy_eval/Valid_rate': performance_dict['per_valid_rate']}, step=step)
 96 |             wandb.log({'Perf_greedy_eval/Episode_length': performance_dict['per_episode_len']}, step=step)
 97 |             wandb.log({'Perf_greedy_eval/Num_block': performance_dict['per_block']}, step=step)
 98 |             wandb.log({'Perf_greedy_eval/Num_leave_goal': performance_dict['per_leave_goal']}, step=step)
 99 |             wandb.log({'Perf_greedy_eval/Final_goals': performance_dict['per_final_goals']}, step=step)
100 |             wandb.log({'Perf_greedy_eval/Half_goals': performance_dict['per_half_goals']}, step=step)
101 |             wandb.log({'Perf_greedy_eval/Block_accuracy': performance_dict['per_block_acc']}, step=step)
102 |             wandb.log({'Perf_greedy_eval/Max_goals': performance_dict['per_max_goals']}, step=step)
103 |             wandb.log({'Perf_greedy_eval/Num_collide': performance_dict['per_num_collide']}, step=step)
104 | 
105 |         else:
106 |             wandb.log({'Perf_random_eval/Reward': performance_dict['per_r']}, step=step)
107 |             wandb.log({'Perf_random_eval/In_Reward': performance_dict['per_in_r']}, step=step)
108 |             wandb.log({'Perf_random_eval/Ex_Reward': performance_dict['per_ex_r']}, step=step)
109 |             wandb.log({'Perf_random_eval/Valid_rate': performance_dict['per_valid_rate']}, step=step)
110 |             wandb.log({'Perf_random_eval/Episode_length': performance_dict['per_episode_len']}, step=step)
111 |             wandb.log({'Perf_random_eval/Num_block': performance_dict['per_block']}, step=step)
112 |             wandb.log({'Perf_random_eval/Num_leave_goal': performance_dict['per_leave_goal']}, step=step)
113 |             wandb.log({'Perf_random_eval/Final_goals': performance_dict['per_final_goals']}, step=step)
114 |             wandb.log({'Perf_random_eval/Half_goals': performance_dict['per_half_goals']}, step=step)
115 |             wandb.log({'Perf_random_eval/Block_accuracy': performance_dict['per_block_acc']}, step=step)
116 |             wandb.log({'Perf_random_eval/Max_goals': performance_dict['per_max_goals']}, step=step)
117 |             wandb.log({'Perf_random_eval/Num_collide': performance_dict['per_num_collide']}, step=step)
118 | 
119 |     else:
120 |         loss_vals = np.nanmean(mb_loss, axis=0)
121 |         wandb.log({'Perf/Reward': performance_dict['per_r']}, step=step)
122 |         wandb.log({'Perf/In_Reward': performance_dict['per_in_r']}, step=step)
123 |         wandb.log({'Perf/Ex_Reward': performance_dict['per_ex_r']}, step=step)
124 |         wandb.log({'Perf/Valid_rate': performance_dict['per_valid_rate']}, step=step)
125 |         wandb.log({'Perf/Episode_length': performance_dict['per_episode_len']}, step=step)
126 |         wandb.log({'Perf/Num_block': performance_dict['per_block']}, step=step)
127 |         wandb.log({'Perf/Num_leave_goal': performance_dict['per_leave_goal']}, step=step)
128 |         wandb.log({'Perf/Final_goals': performance_dict['per_final_goals']}, step=step)
129 |         wandb.log({'Perf/Half_goals': performance_dict['per_half_goals']}, step=step)
130 |         wandb.log({'Perf/Block_accuracy': performance_dict['per_block_acc']}, step=step)
131 |         wandb.log({'Perf/Max_goals': performance_dict['per_max_goals']}, step=step)
132 |         wandb.log({'Perf/Num_collide': performance_dict['per_num_collide']},
133 |                   step=step)
134 |         wandb.log({'Perf/Rewarded_rate': performance_dict['rewarded_rate']},
135 |                   step=step)
136 | 
137 |         for (val, name) in zip(loss_vals, RecordingParameters.LOSS_NAME):
138 |             if name == 'grad_norm':
139 |                 wandb.log({'Grad/' + name: val}, step=step)
140 |             else:
141 |                 wandb.log({'Loss/' + name: val}, step=step)
142 | 
143 | 
144 | def make_gif(images, file_name):
145 |     """record gif"""
146 |     imageio.mimwrite(file_name, images, subrectangles=True)
147 |     print("wrote gif")
148 | 
149 | 
150 | def reset_env(env, num_agent):
151 |     """reset environment"""
152 |     done = env._reset(num_agent)
153 |     prev_action = np.zeros(num_agent)
154 |     valid_actions = []
155 |     obs = np.zeros((1, num_agent, NetParameters.NUM_CHANNEL, EnvParameters.FOV_SIZE, EnvParameters.FOV_SIZE), dtype=np.float32)
156 |     vector = np.zeros((1, num_agent, NetParameters.VECTOR_LEN), dtype=np.float32)
157 |     train_valid = np.zeros((num_agent, EnvParameters.N_ACTIONS), dtype=np.float32)
158 | 
159 |     for i in range(num_agent):
160 |         valid_action = env.list_next_valid_actions(i + 1)
161 |         s = env.observe(i + 1)
162 |         obs[:, i, :, :, :] = s[0]
163 |         vector[:, i, : 3] = s[1]
164 |         vector[:, i, -1] = prev_action[i]
165 |         valid_actions.append(valid_action)
166 |         train_valid[i, valid_action] = 1
167 |     return done, valid_actions, obs, vector, train_valid
168 | 
169 | 
170 | def one_step(env, one_episode_perf, actions, pre_block, model, pre_value, input_state, ps, no_reward, message,
171 |              episodic_buffer, num_agent):
172 |     """run one step"""
173 |     train_valid = np.zeros((num_agent, EnvParameters.N_ACTIONS), dtype=np.float32)
174 |     obs, vector, rewards, done, next_valid_actions, on_goal, blockings, valid_actions, num_blockings, leave_goals, \
175 |         num_on_goal, max_on_goal, num_collide, action_status, modify_actions \
176 |         = env.joint_step(actions, one_episode_perf['num_step'], model, pre_value,
177 |                          input_state, ps, no_reward, message, episodic_buffer)
178 | 
179 |     one_episode_perf['block'] += num_blockings
180 |     one_episode_perf['num_leave_goal'] += leave_goals
181 |     one_episode_perf['num_collide'] += num_collide
182 |     vector[:, :, -1] = modify_actions
183 |     for i in range(num_agent):
184 |         train_valid[i, next_valid_actions[i]] = 1
185 |         if (pre_block[i] < 0.5) == blockings[:, i]:
186 |             one_episode_perf['wrong_blocking'] += 1
187 |     one_episode_perf['num_step'] += 1
188 |     return rewards, next_valid_actions, obs, vector, train_valid, done, blockings, num_on_goal, one_episode_perf, \
189 |         max_on_goal, action_status, modify_actions, on_goal
190 | 
191 | 
192 | def update_perf(one_episode_perf, performance_dict, num_on_goals, max_on_goals, num_agent):
193 |     """record batch performance"""
194 |     performance_dict['per_ex_r'].append(one_episode_perf['ex_reward'])
195 |     performance_dict['per_in_r'].append(one_episode_perf['in_reward'])
196 |     performance_dict['per_r'].append(one_episode_perf['episode_reward'])
197 |     performance_dict['per_valid_rate'].append(
198 |         ((one_episode_perf['num_step'] * num_agent) - one_episode_perf['invalid']) / (
199 |                 one_episode_perf['num_step'] * num_agent))
200 |     performance_dict['per_episode_len'].append(one_episode_perf['num_step'])
201 |     performance_dict['per_block'].append(one_episode_perf['block'])
202 |     performance_dict['per_leave_goal'].append(one_episode_perf['num_leave_goal'])
203 |     performance_dict['per_num_collide'].append(one_episode_perf['num_collide'])
204 |     performance_dict['per_final_goals'].append(num_on_goals)
205 |     performance_dict['per_block_acc'].append(
206 |         ((one_episode_perf['num_step'] * num_agent) - one_episode_perf['wrong_blocking']) / (
207 |                 one_episode_perf['num_step'] * num_agent))
208 |     performance_dict['per_max_goals'].append(max_on_goals)
209 |     performance_dict['rewarded_rate'].append(
210 |         one_episode_perf['reward_count'] / (one_episode_perf['num_step'] * num_agent))
211 |     return performance_dict
212 | 


--------------------------------------------------------------------------------