├── LICENSE ├── README.md ├── alg_parameters.py ├── driver.py ├── episodic_buffer.py ├── eval_model.py ├── mapf_gym.py ├── model.py ├── net.py ├── od_mstar3 ├── SortedCollection.py ├── __pycache__ │ ├── SortedCollection.cpython-37.pyc │ ├── col_set_addition.cpython-37.pyc │ ├── interface.cpython-37.pyc │ ├── od_mstar.cpython-37.pyc │ └── workspace_graph.cpython-37.pyc ├── build │ ├── lib.linux-x86_64-3.7 │ │ └── cpp_mstar.cpython-37m-x86_64-linux-gnu.so │ └── temp.linux-x86_64-3.7 │ │ ├── col_checker.o │ │ ├── cython_od_mstar.o │ │ ├── grid_planning.o │ │ ├── grid_policy.o │ │ ├── od_mstar.o │ │ └── policy.o ├── col_checker.cpp ├── col_checker.hpp ├── col_set.hpp ├── col_set_addition.py ├── cpp_mstar.cpython-37m-x86_64-linux-gnu.so ├── cython_od_mstar.cpp ├── cython_od_mstar.pyx ├── grid_planning.cpp ├── grid_planning.hpp ├── grid_policy.cpp ├── grid_policy.hpp ├── interface.py ├── mstar_type_defs.hpp ├── mstar_utils.hpp ├── od_mstar.cpp ├── od_mstar.hpp ├── od_mstar.py ├── od_vertex.hpp ├── policy.cpp ├── policy.hpp ├── prune_graph.py ├── setup.py └── workspace_graph.py ├── requirements.txt ├── runner.py ├── transformer ├── __pycache__ │ ├── encoder_model.cpython-37.pyc │ ├── layers.cpython-37.pyc │ ├── modules.cpython-37.pyc │ └── sub_layers.cpython-37.pyc ├── encoder_model.py ├── layers.py ├── modules.py └── sub_layers.py └── util.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 MARMot Lab @ NUS-ME 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SCRIMP 2 | This is the code for implementing the SCRIMP algorithm :`SCRIMP: Scalable Communication for Reinforcement- and Imitation-Learning-Based Multi-Agent Pathfinding` 3 | 4 | ## Requirements 5 | 6 | Python == 3.7 7 | ``` 8 | pip install -r requirements.txt 9 | ``` 10 | 11 | 12 | ## Setting up Code 13 | 14 | * cd into the od_mstar3 folder. 15 | * python3 setup.py build_ext --inplace. 16 | * Check by going back to the root of the git folder, running python3 and `import od_mstar3.cpp_mstar`. 17 | 18 | ## Running Code 19 | 20 | * Modify the parameters in `alg_parameters.py` to set the desired training setting and recording methods. 21 | * Call python `driver.py`. 22 | 23 | ## Key Files 24 | 25 | `alg_parameters.py` - Training parameters. 26 | 27 | `driver.py` - Driver of program. Holds global training network for PPO. 28 | 29 | `episodic_buffer.py` - Defines the episodic buffer used to generate intrinsic rewards. 30 | 31 | `eval_model.py` - Evaluates trained model. 32 | 33 | `mapf_gym.py` - Defines the classical Reinforcement Learning environment of Multi-Agent Pathfinding. 34 | 35 | `model.py` - Defines the neural network-based operation model. 36 | 37 | `net.py` - Defines network architecture. 38 | 39 | `runner.py` - A single process for collecting training data. 40 | 41 | 42 | ## Other Links 43 | 44 | Fully trained SCRIMP model - https://www.dropbox.com/scl/fo/ekhxyt7gm575kfwaerwb5/h?rlkey=j3cdikwofz0zelj2oci9q97k8&dl=0 45 | 46 | 47 | ## Authors 48 | 49 | Yutong Wang 50 | 51 | Bairan Xiang 52 | 53 | Shinan Huang 54 | 55 | Guillaume Sartoretti 56 | -------------------------------------------------------------------------------- /alg_parameters.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | """ Hyperparameters of SCRIMP!""" 4 | 5 | 6 | class EnvParameters: 7 | N_AGENTS = 8 # number of agents used in training 8 | N_ACTIONS = 5 9 | EPISODE_LEN = 256 # maximum episode length in training 10 | FOV_SIZE = 3 11 | WORLD_SIZE = (10, 40) 12 | OBSTACLE_PROB = (0.0, 0.5) 13 | ACTION_COST = -0.3 14 | IDLE_COST = -0.3 15 | GOAL_REWARD = 0.0 16 | COLLISION_COST = -2 17 | BLOCKING_COST = -1 18 | 19 | 20 | class TrainingParameters: 21 | lr = 1e-5 22 | GAMMA = 0.95 # discount factor 23 | LAM = 0.95 # For GAE 24 | CLIP_RANGE = 0.2 25 | MAX_GRAD_NORM = 10 26 | ENTROPY_COEF = 0.01 27 | IN_VALUE_COEF = 0.08 28 | EX_VALUE_COEF = 0.08 29 | POLICY_COEF = 10 30 | VALID_COEF = 0.5 31 | BLOCK_COEF = 0.5 32 | N_EPOCHS = 10 33 | N_ENVS = 16 # number of processes 34 | N_MAX_STEPS = 3e7 # maximum number of time steps used in training 35 | N_STEPS = 2 ** 10 # number of time steps per process per data collection 36 | MINIBATCH_SIZE = int(2 ** 10) 37 | DEMONSTRATION_PROB = 0.1 # imitation learning rate 38 | 39 | 40 | class NetParameters: 41 | NET_SIZE = 512 42 | NUM_CHANNEL = 8 # number of channels of observations -[FOV_SIZE x FOV_SIZEx NUM_CHANNEL] 43 | GOAL_REPR_SIZE = 12 44 | VECTOR_LEN = 7 # [dx, dy, d total,extrinsic rewards,intrinsic reward, min dist respect to buffer, action t-1] 45 | N_POSITION = 1024 # maximum number of unique ID 46 | D_MODEL = NET_SIZE # for input and inner feature of attention 47 | D_HIDDEN = 1024 # for feed-forward network 48 | N_LAYERS = 1 # number of computation block 49 | N_HEAD = 8 50 | D_K = 32 51 | D_V = 32 52 | 53 | 54 | class TieBreakingParameters: 55 | DIST_FACTOR = 0.1 56 | 57 | 58 | class IntrinsicParameters: 59 | K = 3 # threshold for obtaining intrinsic reward 60 | CAPACITY = 80 61 | ADD_THRESHOLD = 3 62 | N_ADD_INTRINSIC = 1e6 # number of steps to start giving intrinsic reward 63 | SURROGATE1 = 0.2 64 | SURROGATE2 = 1 65 | 66 | 67 | class SetupParameters: 68 | SEED = 1234 69 | USE_GPU_LOCAL = False 70 | USE_GPU_GLOBAL = True 71 | NUM_GPU = 1 72 | 73 | 74 | class RecordingParameters: 75 | RETRAIN = False 76 | WANDB = True 77 | TENSORBOARD = True 78 | TXT_WRITER = True 79 | ENTITY = 'yutong' 80 | TIME = datetime.datetime.now().strftime('%d-%m-%y%H%M') 81 | EXPERIMENT_PROJECT = 'MAPF' 82 | EXPERIMENT_NAME = 'SCRIMP' 83 | EXPERIMENT_NOTE = '' 84 | SAVE_INTERVAL = 5e5 # interval of saving model 85 | BEST_INTERVAL = 0 # interval of saving model with the best performance 86 | GIF_INTERVAL = 1e6 # interval of saving gif 87 | EVAL_INTERVAL = TrainingParameters.N_ENVS * TrainingParameters.N_STEPS # interval of evaluating training model0 88 | EVAL_EPISODES = 1 # number of episode used in evaluation 89 | RECORD_BEST = False 90 | MODEL_PATH = './models' + '/' + EXPERIMENT_PROJECT + '/' + EXPERIMENT_NAME + TIME 91 | GIFS_PATH = './gifs' + '/' + EXPERIMENT_PROJECT + '/' + EXPERIMENT_NAME + TIME 92 | SUMMARY_PATH = './summaries' + '/' + EXPERIMENT_PROJECT + '/' + EXPERIMENT_NAME + TIME 93 | TXT_NAME = 'alg.txt' 94 | LOSS_NAME = ['all_loss', 'policy_loss', 'policy_entropy', 'critic_loss_in', 'critic_loss_ex', 'valid_loss', 95 | 'blocking_loss', 'clipfrac', 96 | 'grad_norm', 'advantage'] 97 | 98 | 99 | all_args = {'N_AGENTS': EnvParameters.N_AGENTS, 'N_ACTIONS': EnvParameters.N_ACTIONS, 100 | 'EPISODE_LEN': EnvParameters.EPISODE_LEN, 'FOV_SIZE': EnvParameters.FOV_SIZE, 101 | 'WORLD_SIZE': EnvParameters.WORLD_SIZE, 102 | 'OBSTACLE_PROB': EnvParameters.OBSTACLE_PROB, 103 | 'ACTION_COST': EnvParameters.ACTION_COST, 104 | 'IDLE_COST': EnvParameters.IDLE_COST, 'GOAL_REWARD': EnvParameters.GOAL_REWARD, 105 | 'COLLISION_COST': EnvParameters.COLLISION_COST, 106 | 'BLOCKING_COST': EnvParameters.BLOCKING_COST, 107 | 'lr': TrainingParameters.lr, 'GAMMA': TrainingParameters.GAMMA, 'LAM': TrainingParameters.LAM, 108 | 'CLIPRANGE': TrainingParameters.CLIP_RANGE, 'MAX_GRAD_NORM': TrainingParameters.MAX_GRAD_NORM, 109 | 'ENTROPY_COEF': TrainingParameters.ENTROPY_COEF, 110 | 'IN_VALUE_COEF': TrainingParameters.IN_VALUE_COEF, 'EX_VALUE_COEF': TrainingParameters.EX_VALUE_COEF, 111 | 'POLICY_COEF': TrainingParameters.POLICY_COEF, 112 | 'VALID_COEF': TrainingParameters.VALID_COEF, 'BLOCK_COEF': TrainingParameters.BLOCK_COEF, 113 | 'N_EPOCHS': TrainingParameters.N_EPOCHS, 'N_ENVS': TrainingParameters.N_ENVS, 114 | 'N_MAX_STEPS': TrainingParameters.N_MAX_STEPS, 115 | 'N_STEPS': TrainingParameters.N_STEPS, 'MINIBATCH_SIZE': TrainingParameters.MINIBATCH_SIZE, 116 | 'DEMONSTRATION_PROB': TrainingParameters.DEMONSTRATION_PROB, 117 | 'NET_SIZE': NetParameters.NET_SIZE, 'NUM_CHANNEL': NetParameters.NUM_CHANNEL, 118 | 'GOAL_REPR_SIZE': NetParameters.GOAL_REPR_SIZE, 'VECTOR_LEN': NetParameters.VECTOR_LEN, 119 | 'N_POSITION': NetParameters.N_POSITION, 120 | 'D_MODEL': NetParameters.D_MODEL, 'D_HIDDEN': NetParameters.D_HIDDEN, 'N_LAYERS': NetParameters.N_LAYERS, 121 | 'N_HEAD': NetParameters.N_HEAD, 'D_K': NetParameters.D_K, 'D_V': NetParameters.D_V, 122 | 'DIST_FACTOR': TieBreakingParameters.DIST_FACTOR, 'K': IntrinsicParameters.K, 123 | 'CAPACITY': IntrinsicParameters.CAPACITY, 'ADD_THRESHOLD': IntrinsicParameters.ADD_THRESHOLD, 124 | 'N_ADD_INTRINSIC': IntrinsicParameters.N_ADD_INTRINSIC, 125 | 'SURROGATE1': IntrinsicParameters.SURROGATE1, 'SURROGATE2': IntrinsicParameters.SURROGATE2, 126 | 'SEED': SetupParameters.SEED, 'USE_GPU_LOCAL': SetupParameters.USE_GPU_LOCAL, 127 | 'USE_GPU_GLOBAL': SetupParameters.USE_GPU_GLOBAL, 128 | 'NUM_GPU': SetupParameters.NUM_GPU, 'RETRAIN': RecordingParameters.RETRAIN, 129 | 'WANDB': RecordingParameters.WANDB, 130 | 'TENSORBOARD': RecordingParameters.TENSORBOARD, 'TXT_WRITER': RecordingParameters.TXT_WRITER, 131 | 'ENTITY': RecordingParameters.ENTITY, 132 | 'TIME': RecordingParameters.TIME, 'EXPERIMENT_PROJECT': RecordingParameters.EXPERIMENT_PROJECT, 133 | 'EXPERIMENT_NAME': RecordingParameters.EXPERIMENT_NAME, 134 | 'EXPERIMENT_NOTE': RecordingParameters.EXPERIMENT_NOTE, 135 | 'SAVE_INTERVAL': RecordingParameters.SAVE_INTERVAL, "BEST_INTERVAL": RecordingParameters.BEST_INTERVAL, 136 | 'GIF_INTERVAL': RecordingParameters.GIF_INTERVAL, 'EVAL_INTERVAL': RecordingParameters.EVAL_INTERVAL, 137 | 'EVAL_EPISODES': RecordingParameters.EVAL_EPISODES, 'RECORD_BEST': RecordingParameters.RECORD_BEST, 138 | 'MODEL_PATH': RecordingParameters.MODEL_PATH, 'GIFS_PATH': RecordingParameters.GIFS_PATH, 139 | 'SUMMARY_PATH': RecordingParameters.SUMMARY_PATH, 140 | 'TXT_NAME': RecordingParameters.TXT_NAME} 141 | -------------------------------------------------------------------------------- /driver.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | 4 | import numpy as np 5 | import ray 6 | import setproctitle 7 | from torch.utils.tensorboard import SummaryWriter 8 | import torch 9 | import wandb 10 | 11 | from alg_parameters import * 12 | from episodic_buffer import EpisodicBuffer 13 | from mapf_gym import MAPFEnv 14 | from model import Model 15 | from runner import Runner 16 | from util import set_global_seeds, write_to_tensorboard, write_to_wandb, make_gif, reset_env, one_step, update_perf 17 | 18 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 19 | ray.init(num_gpus=SetupParameters.NUM_GPU) 20 | print("Welcome to SCRIMP on MAPF!\n") 21 | 22 | 23 | def main(): 24 | """main code""" 25 | # preparing for training 26 | if RecordingParameters.RETRAIN: 27 | restore_path = './local_model' 28 | net_path_checkpoint = restore_path + "/net_checkpoint.pkl" 29 | net_dict = torch.load(net_path_checkpoint) 30 | 31 | if RecordingParameters.WANDB: 32 | if RecordingParameters.RETRAIN: 33 | wandb_id = None 34 | else: 35 | wandb_id = wandb.util.generate_id() 36 | wandb.init(project=RecordingParameters.EXPERIMENT_PROJECT, 37 | name=RecordingParameters.EXPERIMENT_NAME, 38 | entity=RecordingParameters.ENTITY, 39 | notes=RecordingParameters.EXPERIMENT_NOTE, 40 | config=all_args, 41 | id=wandb_id, 42 | resume='allow') 43 | print('id is:{}'.format(wandb_id)) 44 | print('Launching wandb...\n') 45 | 46 | if RecordingParameters.TENSORBOARD: 47 | if RecordingParameters.RETRAIN: 48 | summary_path = '' 49 | else: 50 | summary_path = RecordingParameters.SUMMARY_PATH 51 | if not os.path.exists(summary_path): 52 | os.makedirs(summary_path) 53 | global_summary = SummaryWriter(summary_path) 54 | print('Launching tensorboard...\n') 55 | 56 | if RecordingParameters.TXT_WRITER: 57 | txt_path = summary_path + '/' + RecordingParameters.TXT_NAME 58 | with open(txt_path, "w") as f: 59 | f.write(str(all_args)) 60 | print('Logging txt...\n') 61 | 62 | setproctitle.setproctitle( 63 | RecordingParameters.EXPERIMENT_PROJECT + RecordingParameters.EXPERIMENT_NAME + "@" + RecordingParameters.ENTITY) 64 | set_global_seeds(SetupParameters.SEED) 65 | 66 | # create classes 67 | global_device = torch.device('cuda') if SetupParameters.USE_GPU_GLOBAL else torch.device('cpu') 68 | local_device = torch.device('cuda') if SetupParameters.USE_GPU_LOCAL else torch.device('cpu') 69 | global_model = Model(0, global_device, True) 70 | 71 | if RecordingParameters.RETRAIN: 72 | global_model.network.load_state_dict(net_dict['model']) 73 | global_model.net_optimizer.load_state_dict(net_dict['optimizer']) 74 | 75 | envs = [Runner.remote(i + 1) for i in range(TrainingParameters.N_ENVS)] 76 | eval_env = MAPFEnv(num_agents=EnvParameters.N_AGENTS) 77 | eval_memory = EpisodicBuffer(0, EnvParameters.N_AGENTS) 78 | 79 | if RecordingParameters.RETRAIN: 80 | curr_steps = net_dict["step"] 81 | curr_episodes = net_dict["episode"] 82 | best_perf = net_dict["reward"] 83 | else: 84 | curr_steps = curr_episodes = best_perf = 0 85 | 86 | update_done = True 87 | demon = True 88 | job_list = [] 89 | last_test_t = -RecordingParameters.EVAL_INTERVAL - 1 90 | last_model_t = -RecordingParameters.SAVE_INTERVAL - 1 91 | last_best_t = -RecordingParameters.BEST_INTERVAL - 1 92 | last_gif_t = -RecordingParameters.GIF_INTERVAL - 1 93 | 94 | # start training 95 | try: 96 | while curr_steps < TrainingParameters.N_MAX_STEPS: 97 | if update_done: 98 | # start a data collection 99 | if global_device != local_device: 100 | net_weights = global_model.network.to(local_device).state_dict() 101 | global_model.network.to(global_device) 102 | else: 103 | net_weights = global_model.network.state_dict() 104 | net_weights_id = ray.put(net_weights) 105 | curr_steps_id = ray.put(curr_steps) 106 | demon_probs = np.random.rand() 107 | if demon_probs < TrainingParameters.DEMONSTRATION_PROB: 108 | demon = True 109 | for i, env in enumerate(envs): 110 | job_list.append(env.imitation.remote(net_weights_id, curr_steps_id)) 111 | else: 112 | demon = False 113 | for i, env in enumerate(envs): 114 | job_list.append(env.run.remote(net_weights_id, curr_steps_id)) 115 | 116 | # get data from multiple processes 117 | done_id, job_list = ray.wait(job_list, num_returns=TrainingParameters.N_ENVS) 118 | update_done = True if job_list == [] else False 119 | done_len = len(done_id) 120 | job_results = ray.get(done_id) 121 | if demon: 122 | # get imitation learning data 123 | mb_obs, mb_vector, mb_actions, mb_hidden_state = [], [], [], [] 124 | mb_message = [] 125 | for results in range(done_len): 126 | mb_obs.append(job_results[results][0]) 127 | mb_vector.append(job_results[results][1]) 128 | mb_actions.append(job_results[results][2]) 129 | mb_hidden_state.append(job_results[results][3]) 130 | mb_message.append(job_results[results][4]) 131 | curr_episodes += job_results[results][-2] 132 | curr_steps += job_results[results][-1] 133 | mb_obs = np.concatenate(mb_obs, axis=0) 134 | mb_vector = np.concatenate(mb_vector, axis=0) 135 | mb_hidden_state = np.concatenate(mb_hidden_state, axis=0) 136 | mb_actions = np.concatenate(mb_actions, axis=0) 137 | mb_message = np.concatenate(mb_message, axis=0) 138 | 139 | # training of imitation learning 140 | mb_imitation_loss = [] 141 | for start in range(0, np.shape(mb_obs)[0], TrainingParameters.MINIBATCH_SIZE): 142 | end = start + TrainingParameters.MINIBATCH_SIZE 143 | slices = (arr[start:end] for arr in 144 | (mb_obs, mb_vector, mb_actions, mb_hidden_state, mb_message)) 145 | mb_imitation_loss.append(global_model.imitation_train(*slices)) 146 | mb_imitation_loss = np.nanmean(mb_imitation_loss, axis=0) 147 | 148 | # record training result 149 | if RecordingParameters.WANDB: 150 | write_to_wandb(curr_steps, imitation_loss=mb_imitation_loss, evaluate=False) 151 | if RecordingParameters.TENSORBOARD: 152 | write_to_tensorboard(global_summary, curr_steps, imitation_loss=mb_imitation_loss, evaluate=False) 153 | else: 154 | # get reinforcement learning data 155 | curr_steps += done_len * TrainingParameters.N_STEPS 156 | mb_obs, mb_vector, mb_returns_in, mb_returns_ex, mb_returns_all, mb_values_in, \ 157 | mb_values_ex, mb_values_all, mb_actions, mb_ps, mb_hidden_state, mb_train_valid,\ 158 | mb_blocking = [], [], [], [], [], [], [], [], [], [], [], [], [] 159 | mb_message = [] 160 | performance_dict = {'per_r': [], 'per_in_r': [], 'per_ex_r': [], 'per_valid_rate': [], 161 | 'per_episode_len': [], 'per_block': [], 162 | 'per_leave_goal': [], 'per_final_goals': [], 'per_half_goals': [], 163 | 'per_block_acc': [], 'per_max_goals': [], 'per_num_collide': [], 164 | 'rewarded_rate': []} 165 | for results in range(done_len): 166 | mb_obs.append(job_results[results][0]) 167 | mb_vector.append(job_results[results][1]) 168 | mb_returns_in.append(job_results[results][2]) 169 | mb_returns_ex.append(job_results[results][3]) 170 | mb_returns_all.append(job_results[results][4]) 171 | mb_values_in.append(job_results[results][5]) 172 | mb_values_ex.append(job_results[results][6]) 173 | mb_values_all.append(job_results[results][7]) 174 | mb_actions.append(job_results[results][8]) 175 | mb_ps.append(job_results[results][9]) 176 | mb_hidden_state.append(job_results[results][10]) 177 | mb_train_valid.append(job_results[results][11]) 178 | mb_blocking.append(job_results[results][12]) 179 | mb_message.append(job_results[results][13]) 180 | curr_episodes += job_results[results][-2] 181 | for i in performance_dict.keys(): 182 | performance_dict[i].append(np.nanmean(job_results[results][-1][i])) 183 | 184 | for i in performance_dict.keys(): 185 | performance_dict[i] = np.nanmean(performance_dict[i]) 186 | 187 | mb_obs = np.concatenate(mb_obs, axis=0) 188 | mb_vector = np.concatenate(mb_vector, axis=0) 189 | mb_returns_in = np.concatenate(mb_returns_in, axis=0) 190 | mb_returns_ex = np.concatenate(mb_returns_ex, axis=0) 191 | mb_returns_all = np.concatenate(mb_returns_all, axis=0) 192 | mb_values_in = np.concatenate(mb_values_in, axis=0) 193 | mb_values_ex = np.concatenate(mb_values_ex, axis=0) 194 | mb_values_all = np.concatenate(mb_values_all, axis=0) 195 | mb_actions = np.concatenate(mb_actions, axis=0) 196 | mb_ps = np.concatenate(mb_ps, axis=0) 197 | mb_hidden_state = np.concatenate(mb_hidden_state, axis=0) 198 | mb_train_valid = np.concatenate(mb_train_valid, axis=0) 199 | mb_blocking = np.concatenate(mb_blocking, axis=0) 200 | mb_message = np.concatenate(mb_message, axis=0) 201 | 202 | # training of reinforcement learning 203 | mb_loss = [] 204 | inds = np.arange(done_len * TrainingParameters.N_STEPS) 205 | for _ in range(TrainingParameters.N_EPOCHS): 206 | np.random.shuffle(inds) 207 | for start in range(0, done_len * TrainingParameters.N_STEPS, TrainingParameters.MINIBATCH_SIZE): 208 | end = start + TrainingParameters.MINIBATCH_SIZE 209 | mb_inds = inds[start:end] 210 | slices = (arr[mb_inds] for arr in 211 | (mb_obs, mb_vector, mb_returns_in, mb_returns_ex, mb_returns_all, mb_values_in, 212 | mb_values_ex, mb_values_all, mb_actions, mb_ps, mb_hidden_state, 213 | mb_train_valid, mb_blocking, mb_message)) 214 | mb_loss.append(global_model.train(*slices)) 215 | 216 | # record training result 217 | if RecordingParameters.WANDB: 218 | write_to_wandb(curr_steps, performance_dict, mb_loss, evaluate=False) 219 | if RecordingParameters.TENSORBOARD: 220 | write_to_tensorboard(global_summary, curr_steps, performance_dict, mb_loss, evaluate=False) 221 | 222 | if (curr_steps - last_test_t) / RecordingParameters.EVAL_INTERVAL >= 1.0: 223 | # if save gif 224 | if (curr_steps - last_gif_t) / RecordingParameters.GIF_INTERVAL >= 1.0: 225 | save_gif = True 226 | last_gif_t = curr_steps 227 | else: 228 | save_gif = False 229 | 230 | # evaluate training model 231 | last_test_t = curr_steps 232 | with torch.no_grad(): 233 | # greedy_eval_performance_dict = evaluate(eval_env,eval_memory, global_model, 234 | # global_device, save_gif, curr_steps, True) 235 | eval_performance_dict = evaluate(eval_env, eval_memory, global_model, global_device, save_gif, 236 | curr_steps, False) 237 | # record evaluation result 238 | if RecordingParameters.WANDB: 239 | # write_to_wandb(curr_steps, greedy_eval_performance_dict, evaluate=True, greedy=True) 240 | write_to_wandb(curr_steps, eval_performance_dict, evaluate=True, greedy=False) 241 | if RecordingParameters.TENSORBOARD: 242 | # write_to_tensorboard(global_summary, curr_steps, greedy_eval_performance_dict, evaluate=True, 243 | # greedy=True) 244 | write_to_tensorboard(global_summary, curr_steps, eval_performance_dict, evaluate=True, greedy=False, 245 | ) 246 | 247 | print('episodes: {}, step: {},episode reward: {}, final goals: {} \n'.format( 248 | curr_episodes, curr_steps, eval_performance_dict['per_r'], 249 | eval_performance_dict['per_final_goals'])) 250 | # save model with the best performance 251 | if RecordingParameters.RECORD_BEST: 252 | if eval_performance_dict['per_r'] > best_perf and ( 253 | curr_steps - last_best_t) / RecordingParameters.BEST_INTERVAL >= 1.0: 254 | best_perf = eval_performance_dict['per_r'] 255 | last_best_t = curr_steps 256 | print('Saving best model \n') 257 | model_path = osp.join(RecordingParameters.MODEL_PATH, 'best_model') 258 | if not os.path.exists(model_path): 259 | os.makedirs(model_path) 260 | path_checkpoint = model_path + "/net_checkpoint.pkl" 261 | net_checkpoint = {"model": global_model.network.state_dict(), 262 | "optimizer": global_model.net_optimizer.state_dict(), 263 | "step": curr_steps, 264 | "episode": curr_episodes, 265 | "reward": best_perf} 266 | torch.save(net_checkpoint, path_checkpoint) 267 | 268 | # save model 269 | if (curr_steps - last_model_t) / RecordingParameters.SAVE_INTERVAL >= 1.0: 270 | last_model_t = curr_steps 271 | print('Saving Model !\n') 272 | model_path = osp.join(RecordingParameters.MODEL_PATH, '%.5i' % curr_steps) 273 | os.makedirs(model_path) 274 | path_checkpoint = model_path + "/net_checkpoint.pkl" 275 | net_checkpoint = {"model": global_model.network.state_dict(), 276 | "optimizer": global_model.net_optimizer.state_dict(), 277 | "step": curr_steps, 278 | "episode": curr_episodes, 279 | "reward": eval_performance_dict['per_r']} 280 | torch.save(net_checkpoint, path_checkpoint) 281 | 282 | except KeyboardInterrupt: 283 | print("CTRL-C pressed. killing remote workers") 284 | finally: 285 | # save final model 286 | print('Saving Final Model !\n') 287 | model_path = RecordingParameters.MODEL_PATH + '/final' 288 | os.makedirs(model_path) 289 | path_checkpoint = model_path + "/net_checkpoint.pkl" 290 | net_checkpoint = {"model": global_model.network.state_dict(), 291 | "optimizer": global_model.net_optimizer.state_dict(), 292 | "step": curr_steps, 293 | "episode": curr_episodes, 294 | "reward": eval_performance_dict['per_r']} 295 | torch.save(net_checkpoint, path_checkpoint) 296 | global_summary.close() 297 | # killing 298 | for e in envs: 299 | ray.kill(e) 300 | if RecordingParameters.WANDB: 301 | wandb.finish() 302 | 303 | 304 | def evaluate(eval_env, episodic_buffer, model, device, save_gif, curr_steps, greedy): 305 | """Evaluate Model.""" 306 | eval_performance_dict = {'per_r': [], 'per_ex_r': [], 'per_in_r': [], 'per_valid_rate': [], 'per_episode_len': [], 307 | 'per_block': [], 'per_leave_goal': [], 'per_final_goals': [], 'per_half_goals': [], 308 | 'per_block_acc': [], 'per_max_goals': [], 'per_num_collide': [], 'rewarded_rate': []} 309 | episode_frames = [] 310 | 311 | for i in range(RecordingParameters.EVAL_EPISODES): 312 | num_agent = EnvParameters.N_AGENTS 313 | 314 | # reset environment and buffer 315 | message = torch.zeros((1, num_agent, NetParameters.NET_SIZE)).to(device) 316 | hidden_state = (torch.zeros((num_agent, NetParameters.NET_SIZE // 2)).to(device), 317 | torch.zeros((num_agent, NetParameters.NET_SIZE // 2)).to(device)) 318 | 319 | done, valid_actions, obs, vector, _ = reset_env(eval_env, num_agent) 320 | episodic_buffer.reset(curr_steps, num_agent) 321 | new_xy = eval_env.get_positions() 322 | episodic_buffer.batch_add(new_xy) 323 | 324 | one_episode_perf = {'num_step': 0, 'episode_reward': 0, 'invalid': 0, 'block': 0, 325 | 'num_leave_goal': 0, 'wrong_blocking': 0, 'num_collide': 0, 'reward_count': 0, 326 | 'ex_reward': 0, 'in_reward': 0} 327 | if save_gif: 328 | episode_frames.append(eval_env._render(mode='rgb_array', screen_width=900, screen_height=900)) 329 | 330 | # stepping 331 | while not done: 332 | # predict 333 | actions, pre_block, hidden_state, num_invalid, v_all, ps, message = model.evaluate(obs, vector, 334 | valid_actions, 335 | hidden_state, 336 | greedy, 337 | episodic_buffer.no_reward, 338 | message, num_agent) 339 | one_episode_perf['invalid'] += num_invalid 340 | 341 | # move 342 | rewards, valid_actions, obs, vector, _, done, _, num_on_goals, one_episode_perf, max_on_goals, \ 343 | _, _, on_goal = one_step(eval_env, one_episode_perf, actions, pre_block, model, v_all, hidden_state, 344 | ps, episodic_buffer.no_reward, message, episodic_buffer, num_agent) 345 | 346 | new_xy = eval_env.get_positions() 347 | processed_rewards, be_rewarded, intrinsic_reward, min_dist = episodic_buffer.if_reward(new_xy, rewards, 348 | done, on_goal) 349 | one_episode_perf['reward_count'] += be_rewarded 350 | vector[:, :, 3] = rewards 351 | vector[:, :, 4] = intrinsic_reward 352 | vector[:, :, 5] = min_dist 353 | 354 | if save_gif: 355 | episode_frames.append(eval_env._render(mode='rgb_array', screen_width=900, screen_height=900)) 356 | 357 | one_episode_perf['episode_reward'] += np.sum(processed_rewards) 358 | one_episode_perf['ex_reward'] += np.sum(rewards) 359 | one_episode_perf['in_reward'] += np.sum(intrinsic_reward) 360 | if one_episode_perf['num_step'] == EnvParameters.EPISODE_LEN // 2: 361 | eval_performance_dict['per_half_goals'].append(num_on_goals) 362 | 363 | if done: 364 | # save gif 365 | if save_gif: 366 | if not os.path.exists(RecordingParameters.GIFS_PATH): 367 | os.makedirs(RecordingParameters.GIFS_PATH) 368 | images = np.array(episode_frames) 369 | make_gif(images, 370 | '{}/steps_{:d}_reward{:.1f}_final_goals{:.1f}_greedy{:d}.gif'.format( 371 | RecordingParameters.GIFS_PATH, 372 | curr_steps, one_episode_perf[ 373 | 'episode_reward'], 374 | num_on_goals, greedy)) 375 | save_gif = False 376 | 377 | eval_performance_dict = update_perf(one_episode_perf, eval_performance_dict, num_on_goals, max_on_goals, 378 | num_agent) 379 | 380 | # average performance of multiple episodes 381 | for i in eval_performance_dict.keys(): 382 | eval_performance_dict[i] = np.nanmean(eval_performance_dict[i]) 383 | 384 | return eval_performance_dict 385 | 386 | 387 | if __name__ == "__main__": 388 | main() 389 | -------------------------------------------------------------------------------- /episodic_buffer.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import numpy as np 4 | 5 | from alg_parameters import * 6 | 7 | 8 | class EpisodicBuffer(object): 9 | """create a parallel episodic buffer for all agents""" 10 | 11 | def __init__(self, total_step, num_agent): 12 | """initialization""" 13 | self._capacity = int(IntrinsicParameters.CAPACITY) 14 | self.xy_memory = np.zeros((self._capacity, num_agent, 2)) 15 | self._count = np.zeros(num_agent, dtype=np.int64) 16 | self.num_agent = num_agent 17 | self.min_step = IntrinsicParameters.N_ADD_INTRINSIC 18 | self.surrogate1 = IntrinsicParameters.SURROGATE1 19 | self.surrogate2 = IntrinsicParameters.SURROGATE2 20 | self.no_reward = False 21 | if total_step < self.min_step: 22 | self.no_reward = True 23 | 24 | @property 25 | def capacity(self): 26 | return self._capacity 27 | 28 | def id_len(self, id_index): 29 | """current size""" 30 | return min(self._count[id_index], self._capacity) 31 | 32 | def reset(self, total_step, num_agent): 33 | """reset the buffer""" 34 | self.num_agent = num_agent 35 | self.no_reward = False 36 | if total_step < self.min_step: 37 | self.no_reward = True 38 | self._count = np.zeros(self.num_agent, dtype=np.int64) 39 | self.xy_memory = np.zeros((self._capacity, self.num_agent, 2)) 40 | 41 | def add(self, xy_position, id_index): 42 | """add an position to the buffer""" 43 | if self._count[id_index] >= self._capacity: 44 | index = np.random.randint(low=0, high=self._capacity) 45 | else: 46 | index = self._count[id_index] 47 | 48 | self.xy_memory[index, id_index] = xy_position 49 | self._count[id_index] += 1 50 | 51 | def batch_add(self, xy_position): 52 | """add position batch to the buffer""" 53 | self.xy_memory[0] = xy_position 54 | self._count += 1 55 | 56 | def if_reward(self, new_xy, rewards, done, on_goal): 57 | """familiarity between the current position and the ones from the buffer""" 58 | processed_rewards = np.zeros((1, self.num_agent)) 59 | bonus = np.zeros((1, self.num_agent)) 60 | reward_count = 0 61 | min_dist = np.zeros((1, self.num_agent)) 62 | 63 | for i in range(self.num_agent): 64 | size = self.id_len(i) 65 | new_xy_array = np.array([new_xy[i]] * int(size)) 66 | dist = np.sqrt(np.sum(np.square(new_xy_array - self.xy_memory[:size, i]), axis=-1)) 67 | novelty = np.asarray(dist < random.randint(1, IntrinsicParameters.K), dtype=np.int64) 68 | 69 | aggregated = np.max(novelty) 70 | bonus[:, i] = np.asarray([0.0 if done or on_goal[i] else self.surrogate2 - aggregated]) 71 | scale_factor = self.surrogate1 72 | if self.no_reward: 73 | scale_factor = 0.0 74 | intrinsic_reward = scale_factor * bonus[:, i] 75 | processed_rewards[:, i] = rewards[:, i] + intrinsic_reward 76 | if all(intrinsic_reward != 0): 77 | reward_count += 1 78 | 79 | min_dist[:, i] = np.min(dist) 80 | if min_dist[:, i] >= IntrinsicParameters.ADD_THRESHOLD: 81 | self.add(new_xy[i], i) 82 | 83 | return processed_rewards, reward_count, bonus, min_dist 84 | 85 | def image_if_reward(self, new_xy, done, on_goal): 86 | """similar to if_reward but it is only used when breaking a tie""" 87 | bonus = np.zeros((1, self.num_agent)) 88 | min_dist = np.zeros((1, self.num_agent)) 89 | 90 | for i in range(self.num_agent): 91 | size = self.id_len(i) 92 | new_xy_array = np.array([new_xy[i]] * int(size)) 93 | dist = np.sqrt(np.sum(np.square(new_xy_array - self.xy_memory[:size, i]), axis=-1)) 94 | novelty = np.asarray(dist < random.randint(1, IntrinsicParameters.K), dtype=np.int64) 95 | 96 | aggregated = np.max(novelty) 97 | bonus[:, i] = np.asarray([0.0 if done or on_goal[i] else self.surrogate2 - aggregated]) 98 | min_dist[:, i] = np.min(dist) 99 | 100 | return bonus, min_dist 101 | -------------------------------------------------------------------------------- /eval_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import torch 5 | import wandb 6 | 7 | from alg_parameters import * 8 | from episodic_buffer import EpisodicBuffer 9 | from mapf_gym import MAPFEnv 10 | from model import Model 11 | from util import reset_env, make_gif, set_global_seeds 12 | 13 | NUM_TIMES = 100 14 | CASE = [[8, 10, 0], [8, 10, 0.15], [8, 10, 0.3], [16, 20, 0.0], [16, 20, 0.15], [16, 20, 0.3], [32, 30, 0.0], 15 | [32, 30, 0.15], [32, 30, 0.3], [64, 40, 0.0], [64, 40, 0.15], [64, 40, 0.3], [128, 40, 0.0], 16 | [128, 40, 0.15], [128, 40, 0.3]] 17 | set_global_seeds(SetupParameters.SEED) 18 | 19 | 20 | def one_step(env0, actions, model0, pre_value, input_state, ps, one_episode_perf, message, episodic_buffer0): 21 | obs, vector, reward, done, _, on_goal, _, _, _, _, _, max_on_goal, num_collide, _, modify_actions = env0.joint_step( 22 | actions, one_episode_perf['episode_len'], model0, pre_value, input_state, ps, no_reward=False, message=message, 23 | episodic_buffer=episodic_buffer0) 24 | 25 | one_episode_perf['collide'] += num_collide 26 | vector[:, :, -1] = modify_actions 27 | one_episode_perf['episode_len'] += 1 28 | return reward, obs, vector, done, one_episode_perf, max_on_goal, on_goal 29 | 30 | 31 | def evaluate(eval_env, model0, device, episodic_buffer0, num_agent, save_gif0): 32 | """Evaluate Model.""" 33 | one_episode_perf = {'episode_len': 0, 'max_goals': 0, 'collide': 0, 'success_rate': 0} 34 | episode_frames = [] 35 | 36 | done, _, obs, vector, _ = reset_env(eval_env, num_agent) 37 | 38 | episodic_buffer0.reset(2e6, num_agent) 39 | new_xy = eval_env.get_positions() 40 | episodic_buffer0.batch_add(new_xy) 41 | 42 | message = torch.zeros((1, num_agent, NetParameters.NET_SIZE)).to(torch.device('cpu')) 43 | hidden_state = (torch.zeros((num_agent, NetParameters.NET_SIZE // 2)).to(device), 44 | torch.zeros((num_agent, NetParameters.NET_SIZE // 2)).to(device)) 45 | 46 | if save_gif0: 47 | episode_frames.append(eval_env._render(mode='rgb_array', screen_width=900, screen_height=900)) 48 | 49 | while not done: 50 | actions, hidden_state, v_all, ps, message = model0.final_evaluate(obs, vector, hidden_state, message, num_agent, 51 | greedy=False) 52 | 53 | rewards, obs, vector, done, one_episode_perf, max_on_goals, on_goal = one_step(eval_env, actions, model0, v_all, 54 | hidden_state, ps, 55 | one_episode_perf, message, 56 | episodic_buffer0) 57 | new_xy = eval_env.get_positions() 58 | processed_rewards, _, intrinsic_reward, min_dist = episodic_buffer0.if_reward(new_xy, rewards, done, on_goal) 59 | 60 | vector[:, :, 3] = rewards 61 | vector[:, :, 4] = intrinsic_reward 62 | vector[:, :, 5] = min_dist 63 | 64 | if save_gif0: 65 | episode_frames.append(eval_env._render(mode='rgb_array', screen_width=900, screen_height=900)) 66 | 67 | if done: 68 | if one_episode_perf['episode_len'] < EnvParameters.EPISODE_LEN - 1: 69 | one_episode_perf['success_rate'] = 1 70 | one_episode_perf['max_goals'] = max_on_goals 71 | one_episode_perf['collide'] = one_episode_perf['collide'] / ( 72 | (one_episode_perf['episode_len'] + 1) * num_agent) 73 | if save_gif0: 74 | if not os.path.exists(RecordingParameters.GIFS_PATH): 75 | os.makedirs(RecordingParameters.GIFS_PATH) 76 | images = np.array(episode_frames) 77 | make_gif(images, '{}/evaluation.gif'.format( 78 | RecordingParameters.GIFS_PATH)) 79 | 80 | return one_episode_perf 81 | 82 | 83 | if __name__ == "__main__": 84 | # download trained model0 85 | model_path = './final' 86 | path_checkpoint = model_path + "/net_checkpoint.pkl" 87 | model = Model(0, torch.device('cpu')) 88 | model.network.load_state_dict(torch.load(path_checkpoint)['model']) 89 | 90 | # recording 91 | wandb_id = wandb.util.generate_id() 92 | wandb.init(project='MAPF_evaluation', 93 | name='evaluation_global_SCRIMP', 94 | entity=RecordingParameters.ENTITY, 95 | notes=RecordingParameters.EXPERIMENT_NOTE, 96 | config=all_args, 97 | id=wandb_id, 98 | resume='allow') 99 | print('id is:{}'.format(wandb_id)) 100 | print('Launching wandb...\n') 101 | save_gif = True 102 | 103 | # start evaluation 104 | for k in CASE: 105 | # remember to modify the corresponding code (size,prob) in the 'mapf_gym.py' 106 | env = MAPFEnv(num_agents=k[0], size=k[1], prob=k[2]) 107 | episodic_buffer = EpisodicBuffer(2e6, k[0]) 108 | 109 | all_perf_dict = {'episode_len': [], 'max_goals': [], 'collide': [], 'success_rate': []} 110 | all_perf_dict_std = {'episode_len': [], 'max_goals': [], 'collide': []} 111 | print('agent: {}, world: {}, obstacle: {}'.format(k[0], k[1], k[2])) 112 | 113 | for j in range(NUM_TIMES): 114 | eval_performance_dict = evaluate(env, model, torch.device('cpu'), episodic_buffer, k[0], save_gif) 115 | save_gif = False # here we only record gif once 116 | if j % 20 == 0: 117 | print(j) 118 | 119 | for i in eval_performance_dict.keys(): # for one episode 120 | if i == 'episode_len': 121 | if eval_performance_dict['success_rate'] == 1: 122 | all_perf_dict[i].append(eval_performance_dict[i]) # only record success episode 123 | else: 124 | continue 125 | else: 126 | all_perf_dict[i].append(eval_performance_dict[i]) 127 | 128 | for i in all_perf_dict.keys(): # for all episodes 129 | if i != 'success_rate': 130 | all_perf_dict_std[i] = np.std(all_perf_dict[i]) 131 | all_perf_dict[i] = np.nanmean(all_perf_dict[i]) 132 | 133 | print('EL: {}, MR: {}, CO: {},SR:{}'.format(round(all_perf_dict['episode_len'], 2), 134 | round(all_perf_dict['max_goals'], 2), 135 | round(all_perf_dict['collide'] * 100, 2), 136 | all_perf_dict['success_rate'] * 100)) 137 | print('EL_STD: {}, MR_STD: {}, CO_STD: {}'.format(round(all_perf_dict_std['episode_len'], 2), 138 | round(all_perf_dict_std['max_goals'], 2), 139 | round(all_perf_dict_std['collide'] * 100, 2))) 140 | print('-----------------------------------------------------------------------------------------------') 141 | 142 | print('finished') 143 | wandb.finish() 144 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn.functional as F 4 | import torch.optim as optim 5 | from torch.cuda.amp.autocast_mode import autocast 6 | from torch.cuda.amp.grad_scaler import GradScaler 7 | 8 | from alg_parameters import * 9 | from net import SCRIMPNet 10 | 11 | 12 | class Model(object): 13 | """model0 of agents""" 14 | 15 | def __init__(self, env_id, device, global_model=False): 16 | """initialization""" 17 | self.ID = env_id 18 | self.device = device 19 | self.network = SCRIMPNet().to(device) # neural network 20 | if global_model: 21 | self.net_optimizer = optim.Adam(self.network.parameters(), lr=TrainingParameters.lr) 22 | # self.multi_gpu_net = torch.nn.DataParallel(self.network) # training on multiple GPU 23 | self.net_scaler = GradScaler() # automatic mixed precision 24 | 25 | def step(self, observation, vector, valid_action, input_state, no_reward, message, num_agent): 26 | """using neural network in training for prediction""" 27 | num_invalid = 0 28 | observation = torch.from_numpy(observation).to(self.device) 29 | vector = torch.from_numpy(vector).to(self.device) 30 | ps, v_in, v_ex, block, _, output_state, _, message = self.network(observation, vector, input_state, 31 | message) 32 | 33 | actions = np.zeros(num_agent) 34 | ps = np.squeeze(ps.cpu().detach().numpy()) 35 | v_in = v_in.cpu().detach().numpy() # intrinsic state values 36 | v_ex = v_ex.cpu().detach().numpy() # extrinsic state values 37 | scale_factor = IntrinsicParameters.SURROGATE1 38 | if no_reward: 39 | scale_factor = 0.0 40 | v_all = v_ex + scale_factor * v_in # total state values 41 | block = np.squeeze(block.cpu().detach().numpy()) 42 | 43 | for i in range(num_agent): 44 | if np.argmax(ps[i], axis=-1) not in valid_action[i]: 45 | num_invalid += 1 46 | # choose action from complete action distribution 47 | actions[i] = np.random.choice(range(EnvParameters.N_ACTIONS), p=ps[i].ravel()) 48 | return actions, ps, v_in, v_ex, v_all, block, output_state, num_invalid, message 49 | 50 | def evaluate(self, observation, vector, valid_action, input_state, greedy, no_reward, message, num_agent): 51 | """using neural network in evaluations of training code for prediction""" 52 | num_invalid = 0 53 | eval_action = np.zeros(num_agent) 54 | observation = torch.from_numpy(np.asarray(observation)).to(self.device) 55 | vector = torch.from_numpy(vector).to(self.device) 56 | ps, v_in, v_ex, block, _, output_state, _, message = self.network(observation, vector, input_state, message) 57 | 58 | ps = np.squeeze(ps.cpu().detach().numpy()) 59 | block = np.squeeze(block.cpu().detach().numpy()) 60 | greedy_action = np.argmax(ps, axis=-1) 61 | scale_factor = IntrinsicParameters.SURROGATE1 62 | if no_reward: 63 | scale_factor = 0.0 64 | v_all = v_ex + scale_factor * v_in 65 | v_all = v_all.cpu().detach().numpy() 66 | 67 | for i in range(num_agent): 68 | if greedy_action[i] not in valid_action[i]: 69 | num_invalid += 1 70 | if not greedy: 71 | eval_action[i] = np.random.choice(range(EnvParameters.N_ACTIONS), p=ps[i].ravel()) 72 | if greedy: 73 | eval_action = greedy_action 74 | return eval_action, block, output_state, num_invalid, v_all, ps, message 75 | 76 | def value(self, obs, vector, input_state, no_reward, message): 77 | """using neural network to predict state values""" 78 | obs = torch.from_numpy(obs).to(self.device) 79 | vector = torch.from_numpy(vector).to(self.device) 80 | _, v_in, v_ex, _, _, _, _, _ = self.network(obs, vector, input_state, message) 81 | v_in = v_in.cpu().detach().numpy() 82 | v_ex = v_ex.cpu().detach().numpy() 83 | 84 | scale_factor = IntrinsicParameters.SURROGATE1 85 | if no_reward: 86 | scale_factor = 0.0 87 | v_all = v_ex + scale_factor * v_in 88 | return v_in, v_ex, v_all 89 | 90 | def generate_state(self, obs, vector, input_state, message): 91 | """generate corresponding hidden states and messages in imitation learning""" 92 | obs = torch.from_numpy(obs).to(self.device) 93 | vector = torch.from_numpy(vector).to(self.device) 94 | _, _, _, _, _, output_state, _, message = self.network(obs, vector, input_state, message) 95 | return output_state, message 96 | 97 | def final_evaluate(self, observation, vector, input_state, message, num_agent, greedy): 98 | """using neural network in independent evaluations for prediction""" 99 | eval_action = np.zeros(num_agent) 100 | observation = torch.from_numpy(np.asarray(observation)).to(self.device) 101 | vector = torch.from_numpy(vector).to(self.device) 102 | ps, v_in, v_ex, _, _, output_state, _, message = self.network(observation, vector, input_state, message) 103 | 104 | ps = np.squeeze(ps.cpu().detach().numpy()) 105 | greedy_action = np.argmax(ps, axis=-1) 106 | scale_factor = IntrinsicParameters.SURROGATE1 107 | v_all = v_ex + scale_factor * v_in 108 | v_all = v_all.cpu().detach().numpy() 109 | 110 | for i in range(num_agent): 111 | if not greedy: 112 | eval_action[i] = np.random.choice(range(EnvParameters.N_ACTIONS), p=ps[i].ravel()) 113 | if greedy: 114 | eval_action = greedy_action 115 | return eval_action, output_state, v_all, ps, message 116 | 117 | def train(self, observation, vector, returns_in, returns_ex, returns_all, old_v_in, old_v_ex, old_v_all, action, 118 | old_ps, input_state, train_valid, target_blockings, message): 119 | """train model0 by reinforcement learning""" 120 | self.net_optimizer.zero_grad() 121 | # from numpy to torch 122 | observation = torch.from_numpy(observation).to(self.device) 123 | vector = torch.from_numpy(vector).to(self.device) 124 | message = torch.from_numpy(message).to(self.device) 125 | 126 | returns_in = torch.from_numpy(returns_in).to(self.device) 127 | returns_ex = torch.from_numpy(returns_ex).to(self.device) 128 | returns_all = torch.from_numpy(returns_all).to(self.device) 129 | 130 | old_v_in = torch.from_numpy(old_v_in).to(self.device) 131 | old_v_ex = torch.from_numpy(old_v_ex).to(self.device) 132 | old_v_all = torch.from_numpy(old_v_all).to(self.device) 133 | 134 | action = torch.from_numpy(action).to(self.device) 135 | action = torch.unsqueeze(action, -1) 136 | old_ps = torch.from_numpy(old_ps).to(self.device) 137 | 138 | train_valid = torch.from_numpy(train_valid).to(self.device) 139 | target_blockings = torch.from_numpy(target_blockings).to(self.device) 140 | 141 | input_state_h = torch.from_numpy( 142 | np.reshape(input_state[:, 0], (-1, NetParameters.NET_SIZE // 2))).to(self.device) 143 | input_state_c = torch.from_numpy( 144 | np.reshape(input_state[:, 1], (-1, NetParameters.NET_SIZE // 2))).to(self.device) 145 | input_state = (input_state_h, input_state_c) 146 | 147 | advantage = returns_all - old_v_all 148 | advantage = (advantage - advantage.mean()) / (advantage.std() + 1e-6) 149 | 150 | with autocast(): 151 | new_ps, new_v_in, new_v_ex, block, policy_sig, _, _, _ = self.network(observation, vector, input_state, 152 | message) 153 | new_p = new_ps.gather(-1, action) 154 | old_p = old_ps.gather(-1, action) 155 | ratio = torch.exp(torch.log(torch.clamp(new_p, 1e-6, 1.0)) - torch.log(torch.clamp(old_p, 1e-6, 1.0))) 156 | 157 | entropy = torch.mean(-torch.sum(new_ps * torch.log(torch.clamp(new_ps, 1e-6, 1.0)), dim=-1, keepdim=True)) 158 | 159 | # intrinsic critic loss 160 | new_v_in = torch.squeeze(new_v_in) 161 | new_v_clipped_in = old_v_in + torch.clamp(new_v_in - old_v_in, - TrainingParameters.CLIP_RANGE, 162 | TrainingParameters.CLIP_RANGE) 163 | value_losses1_in = torch.square(new_v_in - returns_in) 164 | value_losses2_in = torch.square(new_v_clipped_in - returns_in) 165 | critic_loss_in = torch.mean(torch.maximum(value_losses1_in, value_losses2_in)) 166 | 167 | # extrinsic critic loss 168 | new_v_ex = torch.squeeze(new_v_ex) 169 | new_v_clipped_ex = old_v_ex + torch.clamp(new_v_ex - old_v_ex, - TrainingParameters.CLIP_RANGE, 170 | TrainingParameters.CLIP_RANGE) 171 | value_losses1_ex = torch.square(new_v_ex - returns_ex) 172 | value_losses2_ex = torch.square(new_v_clipped_ex - returns_ex) 173 | critic_loss_ex = torch.mean(torch.maximum(value_losses1_ex, value_losses2_ex)) 174 | 175 | # actor loss 176 | ratio = torch.squeeze(ratio) 177 | policy_losses = advantage * ratio 178 | policy_losses2 = advantage * torch.clamp(ratio, 1.0 - TrainingParameters.CLIP_RANGE, 179 | 1.0 + TrainingParameters.CLIP_RANGE) 180 | policy_loss = torch.mean(torch.min(policy_losses, policy_losses2)) 181 | 182 | # valid loss and blocking loss decreased by supervised learning 183 | valid_loss = - torch.mean(torch.log(torch.clamp(policy_sig, 1e-6, 1.0 - 1e-6)) * 184 | train_valid + torch.log(torch.clamp(1 - policy_sig, 1e-6, 1.0 - 1e-6)) * ( 185 | 1 - train_valid)) 186 | block = torch.squeeze(block) 187 | blocking_loss = - torch.mean(target_blockings * torch.log(torch.clamp(block, 1e-6, 1.0 - 1e-6)) 188 | + (1 - target_blockings) * torch.log(torch.clamp(1 - block, 1e-6, 1.0 - 1e-6))) 189 | 190 | # total loss 191 | all_loss = -policy_loss - entropy * TrainingParameters.ENTROPY_COEF + \ 192 | TrainingParameters.IN_VALUE_COEF * critic_loss_in + \ 193 | TrainingParameters.EX_VALUE_COEF * critic_loss_ex + TrainingParameters.VALID_COEF * valid_loss \ 194 | + TrainingParameters.BLOCK_COEF * blocking_loss 195 | 196 | clip_frac = torch.mean(torch.greater(torch.abs(ratio - 1.0), TrainingParameters.CLIP_RANGE).float()) 197 | 198 | self.net_scaler.scale(all_loss).backward() 199 | self.net_scaler.unscale_(self.net_optimizer) 200 | 201 | # Clip gradient 202 | grad_norm = torch.nn.utils.clip_grad_norm_(self.network.parameters(), TrainingParameters.MAX_GRAD_NORM) 203 | 204 | self.net_scaler.step(self.net_optimizer) 205 | self.net_scaler.update() 206 | 207 | stats_list = [all_loss.cpu().detach().numpy(), policy_loss.cpu().detach().numpy(), 208 | entropy.cpu().detach().numpy(), 209 | critic_loss_in.cpu().detach().numpy(), critic_loss_ex.cpu().detach().numpy(), 210 | valid_loss.cpu().detach().numpy(), 211 | blocking_loss.cpu().detach().numpy(), 212 | clip_frac.cpu().detach().numpy(), grad_norm.cpu().detach().numpy(), 213 | torch.mean(advantage).cpu().detach().numpy()] # for recording 214 | 215 | return stats_list 216 | 217 | def set_weights(self, weights): 218 | """load global weights to local models""" 219 | self.network.load_state_dict(weights) 220 | 221 | def imitation_train(self, observation, vector, optimal_action, input_state, message): 222 | """train model0 by imitation learning""" 223 | self.net_optimizer.zero_grad() 224 | 225 | observation = torch.from_numpy(observation).to(self.device) 226 | vector = torch.from_numpy(vector).to(self.device) 227 | optimal_action = torch.from_numpy(optimal_action).to(self.device) 228 | message = torch.from_numpy(message).to(self.device) 229 | input_state_h = torch.from_numpy( 230 | np.reshape(input_state[:, 0], (-1, NetParameters.NET_SIZE // 2))).to(self.device) 231 | input_state_c = torch.from_numpy( 232 | np.reshape(input_state[:, 1], (-1, NetParameters.NET_SIZE // 2))).to(self.device) 233 | 234 | input_state = (input_state_h, input_state_c) 235 | 236 | with autocast(): 237 | _, _, _, _, _, _, logits, _ = self.network(observation, vector, input_state, message) 238 | logits = torch.swapaxes(logits, 1, 2) 239 | imitation_loss = F.cross_entropy(logits, optimal_action) 240 | 241 | self.net_scaler.scale(imitation_loss).backward() 242 | self.net_scaler.unscale_(self.net_optimizer) 243 | # clip gradient 244 | grad_norm = torch.nn.utils.clip_grad_norm_(self.network.parameters(), TrainingParameters.MAX_GRAD_NORM) 245 | self.net_scaler.step(self.net_optimizer) 246 | self.net_scaler.update() 247 | 248 | return [imitation_loss.cpu().detach().numpy(), grad_norm.cpu().detach().numpy()] # for recording 249 | -------------------------------------------------------------------------------- /net.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from torch.cuda.amp.autocast_mode import autocast 6 | 7 | from alg_parameters import * 8 | from transformer.encoder_model import TransformerEncoder 9 | 10 | 11 | def normalized_columns_initializer(weights, std=1.0): 12 | """weight initializer""" 13 | out = torch.randn(weights.size()) 14 | out *= std / torch.sqrt(out.pow(2).sum(1).expand_as(out)) 15 | return out 16 | 17 | 18 | def weights_init(m): 19 | """initialize weights""" 20 | class_name = m.__class__.__name__ 21 | if class_name.find('Conv') != -1: 22 | weight_shape = list(m.weight.data.size()) 23 | fan_in = np.prod(weight_shape[1:4]) 24 | fan_out = np.prod(weight_shape[2:4]) * weight_shape[0] 25 | w_bound = np.sqrt(6. / (fan_in + fan_out)) 26 | m.weight.data.uniform_(-w_bound, w_bound) 27 | m.bias.data.fill_(0) 28 | elif class_name.find('Linear') != -1: 29 | weight_shape = list(m.weight.data.size()) 30 | fan_in = weight_shape[1] 31 | fan_out = weight_shape[0] 32 | w_bound = np.sqrt(6. / (fan_in + fan_out)) 33 | m.weight.data.uniform_(-w_bound, w_bound) 34 | if m.bias is not None: 35 | m.bias.data.fill_(0) 36 | 37 | 38 | class SCRIMPNet(nn.Module): 39 | """network with transformer-based communication mechanism""" 40 | 41 | def __init__(self): 42 | """initialization""" 43 | super(SCRIMPNet, self).__init__() 44 | # observation encoder 45 | self.conv1 = nn.Conv2d(NetParameters.NUM_CHANNEL, NetParameters.NET_SIZE // 4, 2, 1, 1) 46 | self.conv1a = nn.Conv2d(NetParameters.NET_SIZE // 4, NetParameters.NET_SIZE // 4, 2, 1, 1) 47 | self.conv1b = nn.Conv2d(NetParameters.NET_SIZE // 4, NetParameters.NET_SIZE // 4, 2, 1, 1) 48 | self.pool1 = nn.MaxPool2d(2) 49 | self.conv2 = nn.Conv2d(NetParameters.NET_SIZE // 4, NetParameters.NET_SIZE // 2, 2, 1, 1) 50 | self.conv2a = nn.Conv2d(NetParameters.NET_SIZE // 2, NetParameters.NET_SIZE // 2, 2, 1, 1) 51 | self.conv2b = nn.Conv2d(NetParameters.NET_SIZE // 2, NetParameters.NET_SIZE // 2, 2, 1, 1) 52 | self.pool2 = nn.MaxPool2d(2) 53 | self.conv3 = nn.Conv2d(NetParameters.NET_SIZE // 2, NetParameters.NET_SIZE - NetParameters.GOAL_REPR_SIZE, 3, 54 | 1, 0) 55 | self.fully_connected_1 = nn.Linear(NetParameters.VECTOR_LEN, NetParameters.GOAL_REPR_SIZE) 56 | self.fully_connected_2 = nn.Linear(NetParameters.NET_SIZE, NetParameters.NET_SIZE) 57 | self.fully_connected_3 = nn.Linear(NetParameters.NET_SIZE, NetParameters.NET_SIZE) 58 | self.lstm_memory = nn.LSTMCell(input_size=NetParameters.NET_SIZE, hidden_size=NetParameters.NET_SIZE // 2) 59 | 60 | # output heads 61 | self.fully_connected_4 = nn.Linear(NetParameters.NET_SIZE * 2 + NetParameters.NET_SIZE // 2, 62 | NetParameters.NET_SIZE) 63 | self.policy_layer = nn.Linear(NetParameters.NET_SIZE, EnvParameters.N_ACTIONS) 64 | self.softmax_layer = nn.Softmax(dim=-1) 65 | self.value_layer_in = nn.Linear(NetParameters.NET_SIZE, 1) 66 | self.value_layer_ex = nn.Linear(NetParameters.NET_SIZE, 1) 67 | self.blocking_layer = nn.Linear(NetParameters.NET_SIZE, 1) 68 | self.message_layer = nn.Linear(NetParameters.NET_SIZE, NetParameters.NET_SIZE) 69 | 70 | # transformer based communication block 71 | self.communication_layer = TransformerEncoder(d_model=NetParameters.D_MODEL, 72 | d_hidden=NetParameters.D_HIDDEN, 73 | n_layers=NetParameters.N_LAYERS, n_head=NetParameters.N_HEAD, 74 | d_k=NetParameters.D_K, 75 | d_v=NetParameters.D_V, n_position=NetParameters.N_POSITION) 76 | 77 | self.apply(weights_init) 78 | for p in self.communication_layer.parameters(): 79 | if p.dim() > 1: 80 | nn.init.xavier_uniform_(p) 81 | 82 | @autocast() 83 | def forward(self, obs, vector, input_state, message): 84 | """run neural network""" 85 | num_agent = obs.shape[1] 86 | obs = torch.reshape(obs, (-1, NetParameters.NUM_CHANNEL, EnvParameters.FOV_SIZE, EnvParameters.FOV_SIZE)) 87 | vector = torch.reshape(vector, (-1, NetParameters.VECTOR_LEN)) 88 | # matrix input 89 | x_1 = F.relu(self.conv1(obs)) 90 | x_1 = F.relu(self.conv1a(x_1)) 91 | x_1 = F.relu(self.conv1b(x_1)) 92 | x_1 = self.pool1(x_1) 93 | x_1 = F.relu(self.conv2(x_1)) 94 | x_1 = F.relu(self.conv2a(x_1)) 95 | x_1 = F.relu(self.conv2b(x_1)) 96 | x_1 = self.pool2(x_1) 97 | x_1 = self.conv3(x_1) 98 | x_1 = F.relu(x_1.view(x_1.size(0), -1)) 99 | # vector input 100 | x_2 = F.relu(self.fully_connected_1(vector)) 101 | # Concatenation 102 | x_3 = torch.cat((x_1, x_2), -1) 103 | h1 = F.relu(self.fully_connected_2(x_3)) 104 | h1 = self.fully_connected_3(h1) 105 | h2 = F.relu(h1 + x_3) 106 | # LSTM cell 107 | memories, memory_c = self.lstm_memory(h2, input_state) 108 | output_state = (memories, memory_c) 109 | memories = torch.reshape(memories, (-1, num_agent, NetParameters.NET_SIZE // 2)) 110 | h2 = torch.reshape(h2, (-1, num_agent, NetParameters.NET_SIZE)) 111 | 112 | c1 = self.communication_layer(message) 113 | 114 | c1 = torch.cat([c1, memories, h2], -1) 115 | c1 = F.relu(self.fully_connected_4(c1)) 116 | policy_layer = self.policy_layer(c1) 117 | policy = self.softmax_layer(policy_layer) 118 | policy_sig = torch.sigmoid(policy_layer) 119 | value_in = self.value_layer_in(c1) 120 | value_ex = self.value_layer_ex(c1) 121 | blocking = torch.sigmoid(self.blocking_layer(c1)) 122 | message = self.message_layer(c1) 123 | return policy, value_in, value_ex, blocking, policy_sig, output_state, policy_layer, message 124 | 125 | -------------------------------------------------------------------------------- /od_mstar3/SortedCollection.py: -------------------------------------------------------------------------------- 1 | from bisect import bisect_left, bisect_right 2 | 3 | 4 | class SortedCollection(object): 5 | """Sequence sorted by a key function. 6 | 7 | SortedCollection() is much easier to work with than using bisect() 8 | directly. It supports key functions like those use in sorted(), 9 | min(), and max(). The result of the key function call is saved so 10 | that keys can be searched efficiently. 11 | 12 | Instead of returning an insertion-point which can be hard to 13 | interpret, the five find-methods return a specific item in the 14 | sequence. They can scan for exact matches, the last item 15 | less-than-or-equal to a key, or the first item greater-than-or-equal 16 | to a key. 17 | 18 | Once found, an item's ordinal position can be located with the 19 | index() method. New items can be added with the insert() and 20 | insert_right() methods. Old items can be deleted with the remove() 21 | method. 22 | 23 | The usual sequence methods are provided to support indexing, 24 | slicing, length lookup, clearing, copying, forward and reverse 25 | iteration, contains checking, item counts, item removal, and a nice 26 | looking repr. 27 | 28 | Finding and indexing are O(log n) operations while iteration and 29 | insertion are O(n). The initial sort is O(n log n). 30 | 31 | The key function is stored in the 'key' attibute for easy 32 | introspection or so that you can assign a new key function 33 | (triggering an automatic re-sort). 34 | 35 | In short, the class was designed to handle all of the common use 36 | cases for bisect but with a simpler API and support for key 37 | functions. 38 | 39 | >>> from pprint import pprint 40 | >>> from operator import itemgetter 41 | 42 | >>> s = SortedCollection(key=itemgetter(2)) 43 | >>> for record in [ 44 | ... ('roger', 'young', 30), 45 | ... ('angela', 'jones', 28), 46 | ... ('bill', 'smith', 22), 47 | ... ('david', 'thomas', 32)]: 48 | ... s.insert(record) 49 | 50 | >>> pprint(list(s)) # show records sorted by age 51 | [('bill', 'smith', 22), 52 | ('angela', 'jones', 28), 53 | ('roger', 'young', 30), 54 | ('david', 'thomas', 32)] 55 | 56 | >>> s.find_le(29) # find oldest person aged 29 or younger 57 | ('angela', 'jones', 28) 58 | >>> s.find_lt(28) # find oldest person under 28 59 | ('bill', 'smith', 22) 60 | >>> s.find_gt(28) # find youngest person over 28 61 | ('roger', 'young', 30) 62 | 63 | >>> r = s.find_ge(32) # find youngest person aged 32 or older 64 | >>> s.index(r) # get the index of their record 65 | 3 66 | >>> s[3] # fetch the record at that index 67 | ('david', 'thomas', 32) 68 | 69 | >>> s.key = itemgetter(0) # now sort by first name 70 | >>> pprint(list(s)) 71 | [('angela', 'jones', 28), 72 | ('bill', 'smith', 22), 73 | ('david', 'thomas', 32), 74 | ('roger', 'young', 30)] 75 | 76 | """ 77 | 78 | def __init__(self, iterable=(), key=None): 79 | self._given_key = key 80 | key = (lambda x: x) if key is None else key 81 | decorated = sorted((key(item), item) for item in iterable) 82 | self._keys = [k for k, item in decorated] 83 | self._items = [item for k, item in decorated] 84 | self._key = key 85 | 86 | def _getkey(self): 87 | return self._key 88 | 89 | def _setkey(self, key): 90 | if key is not self._key: 91 | self.__init__(self._items, key=key) 92 | 93 | def _delkey(self): 94 | self._setkey(None) 95 | 96 | key = property(_getkey, _setkey, _delkey, 'key function') 97 | 98 | def clear(self): 99 | self.__init__([], self._key) 100 | 101 | def copy(self): 102 | return self.__class__(self, self._key) 103 | 104 | def __len__(self): 105 | return len(self._items) 106 | 107 | def __getitem__(self, i): 108 | return self._items[i] 109 | 110 | def __iter__(self): 111 | return iter(self._items) 112 | 113 | def __reversed__(self): 114 | return reversed(self._items) 115 | 116 | def __repr__(self): 117 | return '%s(%r, key=%s)' % ( 118 | self.__class__.__name__, 119 | self._items, 120 | getattr(self._given_key, '__name__', repr(self._given_key)) 121 | ) 122 | 123 | def __reduce__(self): 124 | return self.__class__, (self._items, self._given_key) 125 | 126 | def __contains__(self, item): 127 | """So if an item has its key value changed, you are not going to 128 | be able to recover its value 129 | """ 130 | k = self._key(item) 131 | i = bisect_left(self._keys, k) 132 | j = bisect_right(self._keys, k) 133 | return item in self._items[i:j] 134 | 135 | def resort(self): 136 | """If all the key values are expected to have changed 137 | dramatically, resort the items list, and regenerate the internal 138 | representation 139 | 140 | Note that this operation is not guaranteed to be stable, as it 141 | depends on the ordering of a key, item pair, and the ordering of 142 | the items is effectively arbitrary 143 | """ 144 | decorated = sorted((self.key(item), item) for item in self._items) 145 | self._keys = [k for k, item in decorated] 146 | self._items = [item for k, item in decorated] 147 | 148 | def index(self, item): 149 | """Find the position of an item. Raise ValueError if not found.""" 150 | k = self._key(item) 151 | i = bisect_left(self._keys, k) 152 | j = bisect_right(self._keys, k) 153 | return self._items[i:j].index(item) + i 154 | 155 | def count(self, item): 156 | """Return number of occurrences of item""" 157 | k = self._key(item) 158 | i = bisect_left(self._keys, k) 159 | j = bisect_right(self._keys, k) 160 | return self._items[i:j].count(item) 161 | 162 | def insert(self, item): 163 | """Insert a new item. If equal keys are found, add to the left""" 164 | k = self._key(item) 165 | i = bisect_left(self._keys, k) 166 | self._keys.insert(i, k) 167 | self._items.insert(i, item) 168 | 169 | def insert_right(self, item): 170 | """Insert a new item. If equal keys are found, add to the right""" 171 | k = self._key(item) 172 | i = bisect_right(self._keys, k) 173 | self._keys.insert(i, k) 174 | self._items.insert(i, item) 175 | 176 | def remove(self, item): 177 | """Remove first occurence of item. 178 | 179 | Raise ValueError if not found 180 | """ 181 | i = self.index(item) 182 | del self._keys[i] 183 | del self._items[i] 184 | 185 | def pop(self): 186 | """returns the rightmost value (greatest key value)""" 187 | del self._keys[-1] 188 | return self._items.pop() 189 | 190 | def consistent_pop(self): 191 | """returns the rightmost value (greatest key value) and checks 192 | whether its cached key value is consistent with its current 193 | cost. 194 | 195 | returns: 196 | value with greatest cached key 197 | boolean: True if cached key is same as current key 198 | """ 199 | cached_key = self._keys.pop() 200 | val = self._items.pop() 201 | return val, self._key(val) == cached_key 202 | 203 | def find(self, k): 204 | """Return first item with a key == k. 205 | Will fail if the key value of k was changed since it was 206 | inserted 207 | 208 | Raise ValueError if not found. 209 | """ 210 | i = bisect_left(self._keys, k) 211 | if i != len(self) and self._keys[i] == k: 212 | return self._items[i] 213 | raise ValueError('No item found with key equal to: %r' % (k, )) 214 | 215 | def find_le(self, k): 216 | """Return last item with a key <= k. 217 | 218 | Raise ValueError if not found. 219 | """ 220 | i = bisect_right(self._keys, k) 221 | if i: 222 | return self._items[i - 1] 223 | raise ValueError('No item found with key at or below: %r' % (k, )) 224 | 225 | def find_lt(self, k): 226 | """Return last item with a key < k. 227 | 228 | Raise ValueError if not found. 229 | """ 230 | i = bisect_left(self._keys, k) 231 | if i: 232 | return self._items[i - 1] 233 | raise ValueError('No item found with key below: %r' % (k, )) 234 | 235 | def find_ge(self, k): 236 | """Return first item with a key >= equal to k. 237 | 238 | Raise ValueError if not found 239 | """ 240 | i = bisect_left(self._keys, k) 241 | if i != len(self): 242 | return self._items[i] 243 | raise ValueError('No item found with key at or above: %r' % (k, )) 244 | 245 | def find_gt(self, k): 246 | """Return first item with a key > k. 247 | 248 | Raise ValueError if not found 249 | """ 250 | i = bisect_right(self._keys, k) 251 | if i != len(self): 252 | return self._items[i] 253 | raise ValueError('No item found with key above: %r' % (k, )) 254 | -------------------------------------------------------------------------------- /od_mstar3/__pycache__/SortedCollection.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/od_mstar3/__pycache__/SortedCollection.cpython-37.pyc -------------------------------------------------------------------------------- /od_mstar3/__pycache__/col_set_addition.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/od_mstar3/__pycache__/col_set_addition.cpython-37.pyc -------------------------------------------------------------------------------- /od_mstar3/__pycache__/interface.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/od_mstar3/__pycache__/interface.cpython-37.pyc -------------------------------------------------------------------------------- /od_mstar3/__pycache__/od_mstar.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/od_mstar3/__pycache__/od_mstar.cpython-37.pyc -------------------------------------------------------------------------------- /od_mstar3/__pycache__/workspace_graph.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/od_mstar3/__pycache__/workspace_graph.cpython-37.pyc -------------------------------------------------------------------------------- /od_mstar3/build/lib.linux-x86_64-3.7/cpp_mstar.cpython-37m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/od_mstar3/build/lib.linux-x86_64-3.7/cpp_mstar.cpython-37m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /od_mstar3/build/temp.linux-x86_64-3.7/col_checker.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/od_mstar3/build/temp.linux-x86_64-3.7/col_checker.o -------------------------------------------------------------------------------- /od_mstar3/build/temp.linux-x86_64-3.7/cython_od_mstar.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/od_mstar3/build/temp.linux-x86_64-3.7/cython_od_mstar.o -------------------------------------------------------------------------------- /od_mstar3/build/temp.linux-x86_64-3.7/grid_planning.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/od_mstar3/build/temp.linux-x86_64-3.7/grid_planning.o -------------------------------------------------------------------------------- /od_mstar3/build/temp.linux-x86_64-3.7/grid_policy.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/od_mstar3/build/temp.linux-x86_64-3.7/grid_policy.o -------------------------------------------------------------------------------- /od_mstar3/build/temp.linux-x86_64-3.7/od_mstar.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/od_mstar3/build/temp.linux-x86_64-3.7/od_mstar.o -------------------------------------------------------------------------------- /od_mstar3/build/temp.linux-x86_64-3.7/policy.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/od_mstar3/build/temp.linux-x86_64-3.7/policy.o -------------------------------------------------------------------------------- /od_mstar3/col_checker.cpp: -------------------------------------------------------------------------------- 1 | #include "col_checker.hpp" 2 | #include "col_set.hpp" 3 | 4 | using namespace mstar; 5 | 6 | // /** 7 | // * Performs simple pebble motion on the graph collision checking 8 | // * 9 | // * @param c1 source 10 | // * @param c2 target 11 | // * 12 | // * @return collision set of the edge 13 | // */ 14 | // template 15 | // ColSet simple_edge_check(const T &c1, 16 | // const T&c2){ 17 | // ColSet col; 18 | // for (uint i = 0; i < c1.size(); i++){ 19 | // for (uint j = i; j < c1.size(); j++){ 20 | // if (c2[i] == c2[j] || (c1[i] == c2[j] && c1[j] == c2[i])){ 21 | // add_col_set_in_place({{i, j}}, col); 22 | // } 23 | // } 24 | // } 25 | // return col; 26 | // } 27 | 28 | /** 29 | * Iterator version 30 | */ 31 | template 32 | ColSet simple_edge_check(T source_start, T source_end, 33 | T target_start, T target_end){ 34 | int size = source_end - source_start; 35 | ColSet col; 36 | for (uint i = 0; i < size; i++){ 37 | for (uint j = i + 1; j < size; j++){ 38 | if (*(target_start + i) == *(target_start + j) || 39 | (*(source_start + i) == *(target_start + j) && 40 | *(source_start + j) == *(target_start + i))){ 41 | add_col_set_in_place({{i, j}}, col); 42 | } 43 | } 44 | } 45 | return col; 46 | } 47 | 48 | ColSet SimpleGraphColCheck::check_edge(const OdCoord &c1, 49 | const OdCoord &c2, 50 | const std::vector ids) const{ 51 | if (c2.is_standard()){ 52 | return simple_edge_check(c1.coord.cbegin(), c1.coord.cend(), 53 | c2.coord.cbegin(), c2.coord.cend()); 54 | } 55 | // c2 is an intermediate vertex, so only check for collisions between 56 | // robots with an assigned move in c2 57 | int size = c2.move_tuple.size(); 58 | return simple_edge_check(c1.coord.cbegin(), c1.coord.cbegin() + size, 59 | c2.move_tuple.cbegin(), c2.move_tuple.cend()); 60 | } 61 | -------------------------------------------------------------------------------- /od_mstar3/col_checker.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MSTAR_COL_CHECKER_H 2 | #define MSTAR_COL_CHECKER_H 3 | 4 | #include "mstar_type_defs.hpp" 5 | 6 | namespace mstar{ 7 | 8 | class ColChecker{ 9 | public: 10 | virtual ~ColChecker(){}; 11 | virtual ColSet check_edge(const OdCoord &c1, const OdCoord &c2, 12 | const std::vector ids) const = 0; 13 | }; 14 | 15 | /** 16 | * Collision checker for simple bidirected graphs, where no edges overlap 17 | * 18 | * I.e. for pebble motion on the graph where you only have to worry about 19 | * robots swapping positions, and not about diagonals crossing. Allows 20 | * for rotations 21 | */ 22 | class SimpleGraphColCheck: public ColChecker{ 23 | public: 24 | /** 25 | * Checks for collision while traversing the edge from c1 to c2 26 | * 27 | * Finds collisions both while traversing the edge and when at the 28 | * goal configuration. 29 | * 30 | * @param c1 the source coordinate of the edge 31 | * @param c2 the target coordinate of the edge 32 | * @param ids list of global robot ids. Necessary for heterogeneous 33 | * robots 34 | * 35 | * @return the collision set containing the colliding robots 36 | */ 37 | ColSet check_edge(const OdCoord &c1, const OdCoord &c2, 38 | const std::vector ids) const; 39 | }; 40 | }; 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /od_mstar3/col_set.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MSTAR_COL_SET_H 2 | #define MSTAR_COL_SET_H 3 | 4 | #include 5 | 6 | /*********************************************************************** 7 | * Provides logic for combining collision sets 8 | * 9 | * Assumes that a collision set is of form T> where T are 10 | * collections and the inner collection is sorted 11 | **********************************************************************/ 12 | 13 | namespace mstar{ 14 | /** 15 | * tests if two sets are disjoint 16 | * 17 | * Currently doesnt try to leverage sorted. Empty sets will always be 18 | * treated as disjoint 19 | * 20 | * @param s1, s2 The sets to check 21 | * 22 | * @return True if disjoint, else false 23 | */ 24 | template bool is_disjoint(const T &s1, const T &s2){ 25 | for (auto i = s1.cbegin(); i != s1.cend(); ++i){ 26 | for (auto j = s2.cbegin(); j != s2.cend(); ++j){ 27 | if (*i == *j){ 28 | return false; 29 | } 30 | } 31 | } 32 | return true; 33 | }; 34 | 35 | /** 36 | * Tests if s1 is a superset of s2 37 | * 38 | * Uses == to compare elements. Does not leverage sorted values 39 | * 40 | * @param s1 potential superset 41 | * @param s2 potential subset 42 | * 43 | * @return True if s1 is a superset of s2, otherwise false 44 | */ 45 | template bool is_superset(const T &s1, const T &s2){ 46 | for (auto j = s2.cbegin(); j != s2.cend(); ++j){ 47 | bool included = false; 48 | for (auto i = s1.cbegin(); i != s1.cend(); ++i){ 49 | if (*i == *j){ 50 | included = true; 51 | break; 52 | } 53 | } 54 | if (!included){ 55 | return false; 56 | } 57 | } 58 | return true; 59 | }; 60 | 61 | /** 62 | * specialization of is_superset that exploits sorted values 63 | */ 64 | template 65 | bool is_superset(const std::set &s1, 66 | const std::set &s2){ 67 | return std::includes(s1.cbegin(), s1.cend(), s2.cbegin(), s2.cend()); 68 | } 69 | 70 | /** 71 | * Merges two sorted sets 72 | * 73 | * Elements of the set must be sorted. Container of the sets must be 74 | * resizeable for output 75 | * 76 | */ 77 | template T merge(const T &s1, const T &s2){ 78 | T out(s1.size() + s2.size()); 79 | auto it = std::set_union(s1.begin(), s1.end(), s2.begin(), s2.end(), 80 | out.begin()); 81 | out.resize(it - out.begin()); 82 | return out; 83 | } 84 | 85 | template 86 | std::set merge(std::set s1, 87 | const std::set &s2){ 88 | s1.insert(s2.cbegin(), s2.cend()); 89 | return s1; 90 | } 91 | 92 | /** 93 | * Adds c1 to c2 94 | * 95 | * Mutates c2 96 | * 97 | * @param c1 collision set 1 98 | * @param c2 collision set 2 99 | * 100 | * @return true if c2 is changed, else false 101 | */ 102 | template class TT, class... args> 103 | bool add_col_set_in_place(TT c1, TT &c2){ 104 | bool changed = false; 105 | // TODO: This could be more efficient 106 | while (c1.size() > 0){ 107 | int i = 0; 108 | // whether c1[-1] overlaps any element of c2 109 | bool found_overlap = false; 110 | while (i < c2.size()){ 111 | if (!is_disjoint(c2[i], c1.back())) { 112 | // found overlap 113 | if (is_superset(c2[i], c1.back())){ 114 | // current element in c1 contained by the element in c2, so 115 | // the c1 element can be dropped 116 | c1.pop_back(); 117 | found_overlap = true; 118 | break; 119 | } 120 | // Non-trivial overlap. Need to add the union of the current 121 | // elements back to c1 to check if there is any further overlap 122 | // with elements of c2 123 | 124 | // Could just merge in place, but doubt it really matters 125 | c1.back().insert(c2[i].cbegin(), c2[i].cend()); 126 | c2.erase(c2.begin() + i); 127 | found_overlap = true; 128 | changed = true; 129 | break; 130 | } else{ 131 | // no overlap between c1[-1] and c2[i], so check next element 132 | // of c2 133 | ++i; 134 | } 135 | } 136 | if (!found_overlap){ 137 | // no overlap between c1[-1] and all elements of c2, so can 138 | // be added to c2 (although this will force checks against 139 | c2.push_back(c1.back()); 140 | c1.pop_back(); 141 | changed = true; 142 | } 143 | } 144 | return changed; 145 | } 146 | 147 | /** 148 | * Adds two collision sets, c1, c2 149 | * 150 | * The template monstrosity is necessary because std::vectors require two 151 | * parameters of which we care about one (the type), and the other is the 152 | * allocator. Other containers may require more 153 | * 154 | * @param c1 collision set 1 155 | * @param c2 collision set 2 156 | * 157 | * @return A new collision set formed by adding c1 and c2 158 | */ 159 | template class TT, class... args> 160 | TT add_col_set(TT c1, TT c2){ 161 | add_col_set_in_place(c1, c2); 162 | return c2; 163 | } 164 | 165 | /** 166 | * Computes the collision set used for expansion 167 | * 168 | * Based the generating collision set of a vertex, which is the collision 169 | * set of the vertex's predecessor when the predecessor was expanded. It 170 | * is useful as it specifies which partial solutions have been cached. 171 | * For example, if the generating collision set is {{1, 2}}, then a 172 | * subplanner already knows how to get robots 1 and 2 to the goal, and it 173 | * is more efficient to directly query that subplanner, rather than set the 174 | * collision set to be empty. 175 | * 176 | * However, you have to account for new collisions, as stored in the 177 | * vertex's collision set. If a collision set element is a subset of an 178 | * element of the generating collision set, use the element form the 179 | * generating collision set. If a generating collision set element has 180 | * a non-empty intersection with a element of the collision set that is 181 | * not a subset, don't use that generating collision set element 182 | * 183 | * @param col_set the collision set of the vertex 184 | * @param gen_set the generating collision set of the vertex 185 | * 186 | * @return A new collision set to use when expanding the vertex 187 | */ 188 | template class TT, class... args> 189 | TT col_set_to_expand(TT col_set, 190 | TT gen_set){ 191 | TT ret; 192 | while(gen_set.size() > 0){ 193 | // Check the last element of the generating collision set. Either it 194 | // can be used, or there is a non-superset intersection, and it must 195 | // be removed 196 | 197 | // Need to keep any elements of the collision set that are subsets 198 | // of the generating collision set element, as a later element of the 199 | // collision set may invalidate the generating collision set element 200 | TT elements_to_remove; 201 | 202 | uint i = 0; 203 | 204 | bool gen_set_elem_valid = true; 205 | while (i < col_set.size()){ 206 | if (is_superset(gen_set.back(), col_set[i])){ 207 | elements_to_remove.push_back(col_set[i]); 208 | col_set.erase(col_set.begin() + i); 209 | } else if (!is_disjoint(gen_set.back(), col_set[i])){ 210 | // generating collision set element has a non-empty intersection 211 | // with a collision set element that is not a sub-set, so is 212 | // invalid 213 | gen_set.pop_back(); 214 | // Need to return any collision set elements that were removed as 215 | // being subsets of gen_set.back 216 | col_set.insert(col_set.end(), elements_to_remove.begin(), 217 | elements_to_remove.end()); 218 | gen_set_elem_valid = false; 219 | break; 220 | } else{ 221 | i += 1; 222 | } 223 | } 224 | if (gen_set_elem_valid){ 225 | ret.push_back(gen_set.back()); 226 | gen_set.pop_back(); 227 | } 228 | } 229 | // Any remaining collision set elements were not contained by any element 230 | // of the generating collision set, so should be used directly 231 | ret.insert(ret.end(), col_set.begin(), col_set.end()); 232 | return ret; 233 | }; 234 | 235 | } 236 | 237 | #endif 238 | -------------------------------------------------------------------------------- /od_mstar3/col_set_addition.py: -------------------------------------------------------------------------------- 1 | """Encapsulates the basic collision set addition functions, so they can 2 | be accessible to any code that uses it 3 | 4 | Also provides exceptions for indicating no solution or out of time 5 | """ 6 | 7 | 8 | def add_col_set_recursive(c1, c2): 9 | """Returns a new collision set resulting from adding c1 to c2. No 10 | side effecting 11 | 12 | collision set is done for the recursive case, where 13 | ({1, 2}, ) + ({3, 4}, ) = ({1, 2}, {3, 4}) 14 | 15 | c1, c2 - tuples of (immutable) sets 16 | 17 | returns: 18 | recursive collision set containing c1 and c2 19 | 20 | """ 21 | # Make shallow copies 22 | c1 = list(c1) 23 | c2 = list(c2) 24 | while len(c1) > 0: 25 | i = 0 26 | # Whether c1[-1] overlaps with any element of c2 27 | found_overlap = False 28 | while i < len(c2): 29 | if not c2[i].isdisjoint(c1[-1]): 30 | # Found overlap 31 | if c2[i].issuperset(c1[-1]): 32 | # No change in c2 33 | c1.pop() 34 | found_overlap = True 35 | break 36 | # Have found a non-trivial overlap. Need to add the 37 | # union to c1 so that we can check if the union has any 38 | # further overlap with elements of c2 39 | temp = c2.pop(i) 40 | # replace c2[i] with the union of c2[i] and c1[-1] 41 | c1.append(temp.union(c1.pop())) 42 | found_overlap = True 43 | break 44 | else: 45 | # No overlap between c1[-1] and c2[i], so check next 46 | # element of c2 47 | i += 1 48 | if not found_overlap: 49 | # c1[-1] has no overlap with any element of c2, so it can be 50 | # added as is to c2 51 | c2.append(c1.pop()) 52 | return tuple(c2) 53 | 54 | 55 | def add_col_set(c1, c2): 56 | """Adds the collision sets c1 to c2. c2 is assumed to contain a 57 | single, 58 | possibly empty, set 59 | 60 | c1, c2 - input collision sets 61 | 62 | returns: 63 | combined collision set containing c1 and c2 64 | 65 | """ 66 | temp = frozenset([]) 67 | if len(c2) >= 1: 68 | temp = c2[0] 69 | assert len(c2) == 1 70 | for i in c1: 71 | temp = temp.union(i) 72 | if len(temp) == 0: 73 | return () 74 | return (temp, ) 75 | 76 | 77 | def col_set_add(c1, c2, recursive): 78 | """Adds two collision sets 79 | 80 | c1, c2 - input collision sets 81 | recursive - boolean, whether to perform recursive M* style addition 82 | 83 | returns: 84 | collision set containing c1 and c2 85 | 86 | """ 87 | if recursive: 88 | return add_col_set_recursive(c1, c2) 89 | else: 90 | return add_col_set(c1, c2) 91 | 92 | 93 | def effective_col_set(col_set, prev_col_set): 94 | """Computes the effective collision set to use given the current 95 | collision set and the collision set used to get to the current node 96 | 97 | Only makes sense when used with recursive M* 98 | 99 | The purpose of this code is that in recursive M*, you invoke a 100 | subplanner to figure out how to get to the goal, which caches the 101 | entire path to the goal . The next step, you have an empty 102 | collision set, so you don't query the subplanner with the cached 103 | path, and have to find a bunch of collisions before using the cached 104 | solution. This is intended for use with a memory of what the 105 | collision set was when you reached a given node. 106 | 107 | Computes the "effecitve collision set". Elements of the memorized 108 | collision set are used if they have no non-empty intersections with 109 | elements of the current collision set that are not subsets of the 110 | memorized component. 111 | 112 | elements of col_set are NOT used if they are contained within some 113 | element of prev_col_set that is used. Elements of prev_col_set are 114 | used if they completely contain all elements of col_set with which 115 | they intersect 116 | 117 | col_set - current collision set 118 | prev_col_set - "memorized" collision set, i.e. the collision set of 119 | the optimal predecessor at the time the path from the 120 | optimal predecessor was first found 121 | 122 | returns: 123 | effective collision set. Consists of the elements of the previous 124 | collision set, which should index subplanners which have cached 125 | paths available, and elements of the current collision set which 126 | are not contained within prev_col_set 127 | """ 128 | effective_set = [] 129 | prev_col_set = list(prev_col_set) 130 | col_set = list(col_set) 131 | while(len(prev_col_set) > 0): 132 | # Need to keep around the elements of col_set that won't be 133 | # used, because the containing element of prev_col_set may be 134 | # invalidated by a later element of col_set 135 | col_set_to_remove = [] 136 | j = 0 137 | while (j < len(col_set)): 138 | if col_set[j].issubset(prev_col_set[-1]): 139 | # this element is contained in prev_col_set, so can be 140 | # skipped unless prev_col_set-1] is invalidated by some 141 | # later element of col_set 142 | col_set_to_remove.append(col_set.pop(j)) 143 | elif not col_set[j].isdisjoint(prev_col_set[-1]): 144 | # this element partially overlaps prev_col_set, 145 | # invalidating it, so cannot use this element of 146 | # prev_col_set 147 | prev_col_set.pop() 148 | # return the elements of col_set we were going to remove 149 | col_set.extend(col_set_to_remove) 150 | break 151 | else: 152 | j += 1 153 | else: 154 | # Never broke, so prev_col_set can be used as part of the 155 | # effective collision set 156 | effective_set.append(prev_col_set.pop()) 157 | # Just copy over any elements of col_set that survived 158 | effective_set.extend(col_set) 159 | return tuple(effective_set) 160 | 161 | 162 | class OutOfTimeError(Exception): 163 | def __init__(self, value=None): 164 | self.value = value 165 | 166 | def __str__(self): 167 | return repr(self.value) 168 | 169 | 170 | class NoSolutionError(Exception): 171 | def __init__(self, value=None): 172 | self.value = value 173 | 174 | def __str__(self): 175 | return repr(self.value) 176 | 177 | 178 | class OutOfScopeError(NoSolutionError): 179 | def __init__(self, value=None, col_set=()): 180 | self.value = value 181 | self.col_set = col_set 182 | 183 | def __str__(self): 184 | return repr(self.value) 185 | -------------------------------------------------------------------------------- /od_mstar3/cpp_mstar.cpython-37m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/od_mstar3/cpp_mstar.cpython-37m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /od_mstar3/cython_od_mstar.pyx: -------------------------------------------------------------------------------- 1 | # distutils: language = c++ 2 | # distutils: sources = policy.cpp col_checker.cpp od_mstar.cpp grid_policy.cpp grid_planning.cpp 3 | from libcpp cimport bool 4 | from libcpp.vector cimport vector 5 | from libcpp.pair cimport pair 6 | 7 | from od_mstar3.col_set_addition import OutOfTimeError, NoSolutionError 8 | 9 | cdef extern from "grid_planning.hpp" namespace "mstar": 10 | vector[vector[pair[int, int]]] find_grid_path( 11 | const vector[vector[bool]] &obstacles, 12 | const vector[pair[int, int]] &init_pos, 13 | const vector[pair[int, int]] &goals, 14 | double inflation, int time_limit) except + 15 | 16 | def find_path(world, init_pos, goals, inflation, time_limit): 17 | """Finds a path invoking C++ implementation 18 | 19 | Uses recursive ODrM* to explore a 4 connected grid 20 | 21 | world - matrix specifying obstacles, 1 for obstacle, 0 for free 22 | init_pos - [[x, y], ...] specifying start position for each robot 23 | goals - [[x, y], ...] specifying goal position for each robot 24 | inflation - inflation factor for heuristic 25 | time_limit - time until failure in seconds 26 | 27 | returns: 28 | [[[x1, y1], ...], [[x2, y2], ...], ...] path in the joint 29 | configuration space 30 | 31 | raises: 32 | NoSolutionError if problem has no solution 33 | OutOfTimeError if the planner ran out of time 34 | """ 35 | 36 | import resource 37 | resource.setrlimit(resource.RLIMIT_AS, (2**33,2**33)) # 8Gb 38 | 39 | # convert to boolean. For some reason coercion doesn't seem to 40 | # work properly 41 | cdef vector[vector[bool]] obs 42 | cdef vector[bool] temp 43 | for row in world: 44 | temp = vector[bool]() 45 | for i in row: 46 | temp.push_back(i == 1) 47 | obs.push_back(temp) 48 | try: 49 | return find_grid_path(obs, init_pos, goals, inflation, time_limit) 50 | except Exception as e: 51 | if str(e) == "Out of Time": 52 | raise OutOfTimeError() 53 | elif str(e) == "No Solution": 54 | raise NoSolutionError() 55 | else: 56 | raise e 57 | -------------------------------------------------------------------------------- /od_mstar3/grid_planning.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "grid_planning.hpp" 6 | #include "grid_policy.hpp" 7 | #include "od_mstar.hpp" 8 | #include "mstar_type_defs.hpp" 9 | 10 | using namespace mstar; 11 | 12 | /** 13 | * Converts from (row, column) coordinates to vertex index 14 | */ 15 | OdCoord to_internal(std::vector> coord, 16 | int cols){ 17 | std::vector out; 18 | for (auto &c: coord){ 19 | out.push_back(c.first * cols + c.second); 20 | } 21 | return OdCoord(out, {}); 22 | }; 23 | 24 | /** 25 | * Converts from vertex index to (row, column) format 26 | */ 27 | std::vector> from_internal(OdCoord coord, 28 | int cols){ 29 | std::vector> out; 30 | for (auto &c: coord.coord){ 31 | out.push_back({c / cols, c % cols}); 32 | } 33 | return out; 34 | }; 35 | 36 | std::vector>> mstar::find_grid_path( 37 | const std::vector> &obstacles, 38 | const std::vector> &init_pos, 39 | const std::vector> &goals, 40 | double inflation, int time_limit){ 41 | // compute time limit first, as the policies fully compute 42 | // Need to convert time limit to std::chrono format 43 | time_point t = std::chrono::system_clock::now(); 44 | t += Clock::duration(std::chrono::seconds(time_limit)); 45 | 46 | int cols = (int) obstacles[0].size(); 47 | OdCoord _init = to_internal(init_pos, cols); 48 | OdCoord _goal = to_internal(goals, cols); 49 | std::vector> policies = {}; 50 | for (const auto &goal: goals){ 51 | policies.push_back(std::shared_ptr( 52 | grid_policy_ptr(obstacles, goal))); 53 | } 54 | OdMstar planner(policies, _goal, inflation, t, 55 | std::shared_ptr(new SimpleGraphColCheck())); 56 | OdPath path = planner.find_path(_init); 57 | std::vector>> out; 58 | for (auto &coord: path){ 59 | out.push_back(from_internal(coord, cols)); 60 | } 61 | return out; 62 | } 63 | -------------------------------------------------------------------------------- /od_mstar3/grid_planning.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MSTAR_GRID_PLANNING_H 2 | #define MSTAR_GRID_PLANNING_H 3 | 4 | #include 5 | #include 6 | 7 | /********************************************************************* 8 | * Provides convienence functions for planning on 4-connected graphs 9 | ********************************************************************/ 10 | 11 | namespace mstar{ 12 | /** 13 | * Helper function for finding paths in 4 connected paths 14 | * 15 | * The world is specified as a matrix where true indicates the presence 16 | * of obstacles and false indicates a clear space. Coordinates for 17 | * individual robots are indicated as (row, column) 18 | * 19 | * @param obstacles matrix indicating obstacle positions. True is obstacle 20 | * @param init_pos list of (row, column) pairs definining the initial 21 | * position of the robots 22 | * @param goals list of (row, column) pairs defining the goal configuration 23 | * of the robots 24 | * @param inflation inflation factor used to weight the heuristic 25 | * @param time_limit seconds until the code declares failure 26 | * 27 | * @return Path in the joint configuration space. Each configuration is 28 | * a vector of (row, col) pairs specifying the position of 29 | * individual robots 30 | */ 31 | std::vector > > find_grid_path( 32 | const std::vector > &obstacles, 33 | const std::vector > &init_pos, 34 | const std::vector > &goals, 35 | double inflation, int time_limit); 36 | } 37 | 38 | #endif 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /od_mstar3/grid_policy.cpp: -------------------------------------------------------------------------------- 1 | #include "grid_policy.hpp" 2 | 3 | using namespace mstar; 4 | 5 | Graph get_graph(const std::vector> &world_map, 6 | const std::pair &goal){ 7 | int rows = (int) world_map.size(); 8 | int columns = (int) world_map[0].size(); 9 | typedef std::pair E; 10 | std::vector edges; 11 | std::vector weights; 12 | 13 | std::vector> offsets = {{-1, 0}, {0, 1}, {1, 0}, 14 | {0, -1}, {0, 0}}; 15 | for (int row = 0; row < rows; ++row){ 16 | for (int col = 0; col < columns; ++col){ 17 | if (world_map[row][col]){ 18 | continue; 19 | } 20 | for (auto &off: offsets){ 21 | int r = row + off.first; 22 | int c = col + off.second; 23 | if( r >= 0 && r < rows && c >= 0 && c < columns && ! world_map[r][c]){ 24 | // edge from (row, col) to (r, c) 25 | // should be a more direct way, but boost is hating me 26 | edges.push_back({row * columns + col, r * columns + c}); 27 | if (row == r && col == c && r == goal.first && c == goal.second){ 28 | weights.push_back(0.); 29 | }else{ 30 | weights.push_back(1.); 31 | } 32 | } 33 | } 34 | } 35 | } 36 | return Graph(edges.begin(), edges.end(), weights.begin(), rows * columns); 37 | } 38 | 39 | /** 40 | * Generates a policy for a 4 connected grid 41 | * 42 | * The internal coordinates are of the form row * num_rows + col 43 | * Allows for weighting at the goal for free 44 | * 45 | * @param world_map matrix of values describing grid true for obstacle, 46 | * false for clear 47 | * @param goal (row, column) of goal 48 | * 49 | * @return Policy object describing problem 50 | */ 51 | Policy mstar::grid_policy(const std::vector> &world_map, 52 | const std::pair &goal){ 53 | int columns = (int) world_map[0].size(); 54 | return Policy(get_graph(world_map, goal), goal.first * columns + goal.second); 55 | } 56 | 57 | Policy* mstar::grid_policy_ptr(const std::vector> &world_map, 58 | const std::pair &goal){ 59 | int columns = (int) world_map[0].size(); 60 | return new Policy(get_graph(world_map, goal), 61 | goal.first * columns + goal.second); 62 | } 63 | -------------------------------------------------------------------------------- /od_mstar3/grid_policy.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MSTAR_GRID_POLICY_H 2 | #define MSTAR_GRID_POLICY_H 3 | 4 | /************************************************************************** 5 | * Generates policy for grid maps 6 | **************************************************************************/ 7 | 8 | #include 9 | #include 10 | 11 | #include "mstar_type_defs.hpp" 12 | #include "policy.hpp" 13 | #include 14 | #include 15 | 16 | namespace mstar{ 17 | 18 | /** 19 | * Generates a policy for a 4 connected grid 20 | * 21 | * The internal coordinates are of the form row * num_rows + col 22 | * Allows for weighting at the goal for free 23 | * 24 | * @param world_map matrix of values describing grid true for obstacle, 25 | * false for clear 26 | * @param goal (row, column) of goal 27 | * 28 | * @return Policy object describing problem 29 | */ 30 | Policy grid_policy(const std::vector> &world_map, 31 | const std::pair &goal); 32 | 33 | Policy* grid_policy_ptr(const std::vector> &world_map, 34 | const std::pair &goal); 35 | } 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /od_mstar3/interface.py: -------------------------------------------------------------------------------- 1 | """This module defines interfaces for the low-level graphs and 2 | policies used in Mstar. In general terms, these classes represent: 3 | 4 | 1. Graphs representing the configuration space. These graphs are 5 | structured so that each node in the graph represents a 6 | configuration, and each edge represents a permissible transition 7 | between two different configurations. 8 | 9 | *All of these graphs subclass the Graph_Interface class 10 | 11 | 2. Policies, which define paths in a configuration space from an 12 | initial configuration to a goal configuration. Policies are 13 | comprised of nodes, each of which represents a configuration 14 | in the configuration space. Each node in a policy has a pointer 15 | to its optimal neighbor, i.e., the next node in the optimal path 16 | to the goal node. Policy classes compute optimal paths by using 17 | some search algorithm to search the graphs generated in the 18 | classes described above. 19 | 20 | *All of these graphs subclass the Policy_Interface class 21 | 22 | 3. Configuration graph edge checking, which determines whether 23 | moving between two configurations is permissible. For example, 24 | configuration graph edge checking should not allow a robot to 25 | move out of bounds of the workspace. 26 | 27 | 4. Planner edge checking, which determines whether moving between 28 | two states of robot positions will result in any collisions. 29 | For example, planner edge checking should check to see if two 30 | robots pass through each other as they move between positions. 31 | """ 32 | 33 | 34 | class Graph_Interface(object): 35 | 36 | """Interface for configuration space generators 37 | 38 | This graph interface enumerates the methods that any 39 | configuration space generator should implement. These graphs are 40 | used by policy graphs such as A*. 41 | """ 42 | 43 | def get_edge_cost(self, coord1, coord2): 44 | """Returns edge_cost of going from coord1 to coord2.""" 45 | raise NotImplementedError 46 | 47 | def get_neighbors(self, coord): 48 | """Returns the collision free neighbors of the specified coord. 49 | 50 | Return value is a list of tuples each of which are a coordinate 51 | """ 52 | raise NotImplementedError 53 | 54 | # This is a function to return the in neighbors of a coordinate. 55 | # Designed by default to handle un-directed graphs 56 | get_in_neighbors = get_neighbors 57 | 58 | 59 | class Policy_Interface(object): 60 | 61 | """Interface showing required implemented functions for all policies 62 | 63 | This interface enumerates the functions that must be exposed by 64 | policies for M* to function correctly. A policy object with this 65 | interface provides a route for a single robot. Underneath the policy 66 | interface is a graph object which describes the configuration space 67 | through which robots can move. The underlying graph object does all 68 | of the work of calculating the configuration space based on the 69 | actual environment in which the robot is moving 70 | 71 | **All config inputs must be hashable** 72 | """ 73 | 74 | def get_cost(self, config): 75 | """Returns the cost of moving from given position to goal""" 76 | raise NotImplementedError 77 | 78 | def get_edge_cost(self, config1, config2): 79 | """Returns the cost of traversing an edge in the underlying 80 | graph 81 | """ 82 | raise NotImplementedError 83 | 84 | def get_step(self, config): 85 | """Returns the configurations of the optimal neighbor of config""" 86 | raise NotImplementedError 87 | 88 | def get_neighbors(self, config): 89 | """Returns neighboring configurations of config 90 | 91 | This function returns the configurations which are next to 92 | config 93 | 94 | Return list of tuples, each of which is a coordinate 95 | """ 96 | raise NotImplementedError 97 | 98 | def get_graph_size(self, correct_for_size=True): 99 | """Returns number of nodes in graph""" 100 | raise NotImplementedError 101 | 102 | def get_limited_offset_neighbors(self, config, max_offset, min_offset=0): 103 | """Returns set of neighbors between the offset arguments""" 104 | raise NotImplementedError 105 | 106 | def get_offset_neighbors(self, config, offset): 107 | """Returns neighbors of coord with offset specified by argument""" 108 | raise NotImplementedError 109 | 110 | def get_offsets(self, config): 111 | """Return the offsets of the neighbors""" 112 | raise NotImplementedError 113 | 114 | 115 | class Config_Edge_Checker(object): 116 | """Checks robot collisions with objects and edges of workspace""" 117 | 118 | def col_check(self, state, recursive): 119 | """Checks for collisions at a single state 120 | 121 | state - list of coordinates of robots 122 | recursive - generate collisions sets for rM* 123 | 124 | Returns: 125 | M* collision set in type set if recursive false 126 | rM* collision set in type set if recursive true 127 | """ 128 | raise NotImplementedError 129 | 130 | 131 | class Planner_Edge_Checker(object): 132 | """Checks for robot collisions on an edge in a planner's graph 133 | 134 | Currently, no methods have to be implemented because the collision 135 | methods change based on the graph. 136 | """ 137 | 138 | def pass_through(self, state1, state2, recursive=False): 139 | """Detects pass through collisions 140 | 141 | state1 - list of robot coordinates describing initial state 142 | state2 - list of robot coordinates describing final state, 143 | 144 | Returns: 145 | M* collision set in type set if recursive false 146 | rM* collision set in type set if recursive true 147 | """ 148 | raise NotImplementedError 149 | 150 | def col_check(self, state, recursive): 151 | """Checks for collisions at a single state 152 | 153 | state - list of coordinates of robots 154 | recursive - generate collisions sets for rM* 155 | 156 | Returns: 157 | M* collision set in type set if recursive false 158 | rM* collision set in type set if recursive true 159 | """ 160 | raise NotImplementedError 161 | 162 | def cross_over(self, state1, state2, recursive=False): 163 | """Detects cross over and pass through collisions 164 | 165 | 166 | state1 - list of robot coordinates describing initial state 167 | state2 - list of robot coordinates describing final state 168 | 169 | Returns: 170 | M* collision set in type set if recursive false 171 | rM* collision set in type set if recursive true 172 | """ 173 | raise NotImplementedError 174 | 175 | def simple_pass_through(self, state1, state2): 176 | """Check for pass through collisions 177 | 178 | state1 - list of robot coordinates describing initial state 179 | state2 - list of robot coordinates describing final state 180 | 181 | Returns: 182 | True if pass through collision 183 | False otherwise 184 | """ 185 | raise NotImplementedError 186 | 187 | def simple_col_check(self, state): 188 | """Checks for robot-robot collisions at state, 189 | 190 | state - list of robot coordinates 191 | 192 | returns: 193 | True if collision 194 | False otherwise 195 | """ 196 | raise NotImplementedError 197 | 198 | def simple_cross_over(self, state1, state2): 199 | """Check for cross over collisions in 8-connected worlds 200 | 201 | state1 - list of robot coordinates describing initial state 202 | state2 - list of robot coordinates describing final state 203 | 204 | returns: 205 | True if collision exists 206 | False otherwise 207 | """ 208 | raise NotImplementedError 209 | 210 | def simple_incremental_cross_over(self, state1, state2): 211 | """Check for cross over collisions in 8-connected worlds. 212 | 213 | Assumes that collision checking has been performed for everything 214 | but the last robot in the coordinates. To be used to save a bit 215 | of time for partial expansion approaches 216 | 217 | state1 - list of robot coordinates describing initial state 218 | state2 - list of robot coordinates describing final state 219 | 220 | returns: 221 | True if collision exists 222 | False otherwise 223 | """ 224 | raise NotImplementedError 225 | 226 | def simple_incremental_col_check(self, state1): 227 | """Checks for robot-robot collisions at c1, 228 | 229 | Assumes that collision checking has been performed for everything 230 | but the last robot in the coordinates. To be used to save a bit 231 | of time for partial expansion approaches 232 | 233 | state1 - list of robot coordinates 234 | 235 | returns: 236 | True if collision exists 237 | False otherwise 238 | """ 239 | raise NotImplementedError 240 | 241 | def single_bot_outpath_check(self, cur_coord, prev_coord, cur_t, paths): 242 | """Tests for collisions from prev_coord to cur_coord 243 | 244 | Checks for cross over collisions and collisions at the same 245 | location when moving from cur_coord to prev_coord while robots 246 | are moving in paths 247 | 248 | cur_coord - position of a single robot 249 | 250 | Returns: 251 | 252 | True if collision exists 253 | False otherwise 254 | """ 255 | raise NotImplementedError 256 | 257 | def simple_prio_col_check(self, coord, t, paths, pcoord=None, 258 | conn_8=False): 259 | """Returns true, if collision is detected, false otherwise 260 | at the moment only used to check the obstacle collisions, but 261 | didn't want to reject the other code already 262 | 263 | coord - coord of potential new neighbor 264 | t - current time step 265 | paths - previously found paths 266 | pcoord - previous coordinate of the path 267 | 268 | Returns: 269 | True if collision exists 270 | False otherwise 271 | """ 272 | raise NotImplementedError 273 | 274 | def incremental_col_check(self, state, recursive): 275 | """Checks for robot-robot collisions in state 276 | 277 | state - list of coordinates of robots 278 | recursive - generate collisions sets for rM* 279 | 280 | Only checks whether the last robot is 281 | involved in a collision, for use with incremental methods 282 | 283 | Returns: 284 | M* collision set in type set if recursive false 285 | rM* collision set in type set if recursive true 286 | """ 287 | raise NotImplementedError 288 | 289 | def incremental_cross_over(self, state1, state2, recursive=False): 290 | """Detects cross over collisions as well as pass through 291 | collisions. 292 | 293 | Only checks if the last robot is involved in a collision, for use 294 | with partial expansion approaches. 295 | 296 | state1 - list of robot coordinates describing initial state 297 | state2 - list of robot coordinates describing final state, 298 | 299 | Returns: 300 | M* collision set in type set if recursive false 301 | rM* collision set in type set if recursive true 302 | """ 303 | raise NotImplementedError 304 | 305 | def single_bot_cross_over(self, coord1, pcoord1, coord2, pcoord2): 306 | """Checks for cross-over and collisions between robots 1 and 2 307 | 308 | Robots are moving from pcoord to coord 309 | 310 | pcoord1 - first position of first robot 311 | coord1 - second position of first robot 312 | pcoord2 - first position of second robot 313 | coord2 - second position of second robot 314 | 315 | Returns: 316 | True if collision 317 | False otherwise 318 | """ 319 | raise NotImplementedError 320 | 321 | def prio_col_check(self, coord, pcoord, t, paths=None, conn_8=False, 322 | recursive=False): 323 | """Collision checking with paths passed as constraints 324 | 325 | coord - current node 326 | pcoord - previous node 327 | t - timestep 328 | paths - paths that need to be avoided 329 | 330 | Returns: (collision sets are of type set) 331 | M* collision set if collision exists and recursive is false 332 | rM* collision set if collision exists and recursive is true 333 | None if no collision exists 334 | """ 335 | raise NotImplementedError 336 | -------------------------------------------------------------------------------- /od_mstar3/mstar_type_defs.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MSTAR_TYPE_DEFS 2 | #define MSTAR_TYPE_DEFS 3 | 4 | /************************************************************************** 5 | * Provides type defs that are used in multiple files 6 | *************************************************************************/ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | namespace mstar{ 14 | /** 15 | * Defines the graph type for individual robots. 16 | * 17 | * Assumes robot positions are indicated by integers, costs by doubles, 18 | * and assumes that the edge_weight property is filled 19 | */ 20 | typedef boost::adjacency_list< 21 | boost::vecS, boost::vecS, boost::bidirectionalS, boost::no_property, 22 | boost::property> Graph; 23 | 24 | // type that defines the position of the robot 25 | typedef int RobCoord; 26 | 27 | // represents the coordinate of an OD node, also used to index graphs 28 | struct OdCoord{ 29 | std::vector coord, move_tuple; 30 | 31 | OdCoord(std::vector in_coord, std::vector in_move){ 32 | coord = in_coord; 33 | move_tuple = in_move; 34 | } 35 | 36 | OdCoord(): coord(), move_tuple(){} 37 | 38 | bool operator==(const OdCoord &other) const{ 39 | return (coord == other.coord) && (move_tuple == other.move_tuple); 40 | } 41 | 42 | bool is_standard() const{ 43 | return move_tuple.size() == 0; 44 | } 45 | }; 46 | 47 | // Holds a path in the joint configuration space 48 | typedef std::vector OdPath; 49 | 50 | // defines a single set of mutually colliding robots. 51 | // Must be sorted in order of increasing value for logic to hold 52 | typedef std::set ColSetElement; 53 | 54 | // Defines a full collision set 55 | typedef std::vector ColSet; 56 | 57 | // defines times for checking purposes 58 | typedef std::chrono::system_clock Clock; 59 | typedef Clock::time_point time_point; 60 | } 61 | 62 | #endif 63 | -------------------------------------------------------------------------------- /od_mstar3/mstar_utils.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MSTAR_UTILS_H 2 | #define MSTAR_UTILS_H 3 | 4 | /** 5 | * Defines convinence functions for testing or other purposes not directly 6 | * related to the actual planning 7 | */ 8 | 9 | #include 10 | 11 | #include "mstar_type_defs.hpp" 12 | 13 | namespace mstar{ 14 | void print_od_path(const OdPath &path){ 15 | for (const OdCoord &pos: path){ 16 | std::cout << "{"; 17 | for (const RobCoord &i: pos.coord){ 18 | std::cout << i << " "; 19 | } 20 | std::cout << "}" << std::endl; 21 | } 22 | }; 23 | 24 | void print_path(const std::vector>> &path){ 25 | for (const auto &coord: path){ 26 | std::cout << "{"; 27 | for (const auto &c: coord){ 28 | std::cout << "(" << c.first << ", " << c.second << ") "; 29 | } 30 | std::cout << "}" << std::endl; 31 | } 32 | }; 33 | }; 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /od_mstar3/od_mstar.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "od_mstar.hpp" 5 | 6 | using namespace mstar; 7 | 8 | OdMstar::OdMstar(std::vector> policies, 9 | OdCoord goals, double inflation, 10 | time_point end_time, std::shared_ptr col_checker){ 11 | subplanners_ = new std::unordered_map>(); 13 | policies_ = policies; 14 | // top-level planner, so construct a set of all robot ids 15 | for (int i = 0; i < (int) goals.coord.size(); ++i){ 16 | ids_.push_back(i); 17 | } 18 | goals_ = goals; 19 | end_time_ = end_time; 20 | inflation_ = inflation; 21 | planning_iter_ = 0; 22 | num_bots_ = (int) ids_.size(); 23 | col_checker_ = col_checker; 24 | top_level_ = true; 25 | } 26 | 27 | OdMstar::OdMstar(const ColSetElement &robots, OdMstar &parent){ 28 | subplanners_ = parent.subplanners_; 29 | policies_ = parent.policies_; 30 | for (int i: robots){ 31 | ids_.push_back(parent.ids_[i]); 32 | goals_.coord.push_back(parent.goals_.coord[i]); 33 | } 34 | end_time_ = parent.end_time_; 35 | inflation_ = parent.inflation_; 36 | planning_iter_ = 0; 37 | num_bots_ = (int) ids_.size(); 38 | col_checker_ = parent.col_checker_; 39 | top_level_ = false; 40 | } 41 | 42 | OdMstar::~OdMstar(){ 43 | if (top_level_){ 44 | delete subplanners_; 45 | } 46 | } 47 | 48 | OdPath OdMstar::find_path(OdCoord init_pos){ 49 | reset(); 50 | 51 | // Configure the initial vertex 52 | // identified by setting the back_ptr to itself 53 | OdVertex *first = get_vertex(init_pos); 54 | first->reset(planning_iter_); 55 | first->back_ptr = first; 56 | first->cost = 0; 57 | first->open = true; 58 | 59 | OpenList open_list; 60 | open_list.push(first); 61 | 62 | while (open_list.size() > 0){ 63 | if (std::chrono::system_clock::now() > end_time_){ 64 | throw OutOfTimeError(); 65 | } 66 | 67 | OdVertex *vert = open_list.top(); 68 | open_list.pop(); 69 | vert->open = false; 70 | if (vert->closed){ 71 | continue; 72 | } 73 | 74 | // check if this is the goal vertex 75 | if (vert->coord == goals_){ 76 | vert->forwards_ptr = vert; 77 | } 78 | if (vert->forwards_ptr != nullptr){ 79 | // Either the goal or on a previous found path to the goal 80 | return trace_path(vert); 81 | } 82 | 83 | expand(vert, open_list); 84 | } 85 | throw NoSolutionError(); 86 | } 87 | 88 | void OdMstar::reset(){ 89 | planning_iter_++; 90 | } 91 | 92 | double OdMstar::heuristic(const OdCoord &coord){ 93 | // Heuristic is computed from the assigned move for elements of the 94 | // move tuple, and from the base coordinate for all others 95 | double h = 0; 96 | uint i = 0; 97 | while (i < coord.move_tuple.size()){ 98 | h += policies_[ids_[i]]->get_cost(coord.move_tuple[i]); 99 | ++i; 100 | } 101 | while (i < coord.coord.size()){ 102 | h += policies_[ids_[i]]->get_cost(coord.coord[i]); 103 | ++i; 104 | } 105 | return h * inflation_; 106 | } 107 | 108 | OdVertex* OdMstar::get_vertex(const OdCoord &coord){ 109 | // returns a pair with the first element an interator to a 110 | // pair and the second to a bool which is true if there was not a 111 | // preexisting value 112 | auto p = graph_.emplace(coord, coord); 113 | p.first->second.reset(planning_iter_); 114 | if (p.second){ 115 | // new vertex, so need to set heuristic 116 | p.first->second.h = heuristic(coord); 117 | } 118 | return &p.first->second; 119 | } 120 | 121 | OdCoord get_vertex_step(OdVertex * vert){ 122 | assert(vert != nullptr); 123 | while (1){ 124 | if (vert->forwards_ptr->coord.is_standard()){ 125 | return vert->forwards_ptr->coord; 126 | } 127 | vert = vert->forwards_ptr; 128 | assert(vert != nullptr); 129 | } 130 | } 131 | 132 | OdCoord OdMstar::get_step(const OdCoord &init_pos){ 133 | OdVertex* vert = OdMstar::get_vertex(init_pos); 134 | if (vert->forwards_ptr != nullptr){ 135 | return get_vertex_step(vert); 136 | } 137 | find_path(init_pos); 138 | return get_vertex_step(vert); 139 | } 140 | 141 | void OdMstar::expand(OdVertex *vertex, OpenList &open_list){ 142 | vertex->closed = true; 143 | ColSet gen_set = col_set_to_expand(vertex->col_set, vertex->gen_set); 144 | if (gen_set.size() == 1 && (int) gen_set[0].size() == num_bots_){ 145 | // the generating collision set contains all robots, so no caching 146 | // would be possible. Therefore, don't use 147 | gen_set = vertex->col_set; 148 | } 149 | 150 | std::vector neighbors = get_neighbors(vertex->coord, gen_set); 151 | 152 | // accumulates the collision sets that occur while trying to move to 153 | // any of the neighbors 154 | ColSet col_set; 155 | for (OdCoord &new_coord: neighbors){ 156 | ColSet new_col = col_checker_->check_edge(vertex->coord, new_coord, ids_); 157 | if (!new_col.empty()){ 158 | // State not accessible due to collisions 159 | add_col_set_in_place(new_col, col_set); 160 | continue; 161 | } 162 | 163 | OdVertex *new_vert = get_vertex(new_coord); 164 | new_vert->back_prop_set.insert(vertex); 165 | // Always need to at the collision set of any vertex we can reach 166 | // to its successors, as otherwise we would need to wait for another 167 | // robot to collide downstream before triggering back propagation 168 | add_col_set_in_place(new_vert->col_set, col_set); 169 | 170 | if (new_vert->closed){ 171 | continue; 172 | } 173 | 174 | double new_cost = vertex->cost + edge_cost(vertex->coord, new_coord); 175 | if (new_cost >= new_vert->cost){ 176 | continue; 177 | } 178 | new_vert->cost = new_cost; 179 | new_vert->back_ptr = vertex; 180 | new_vert->open = true; 181 | new_vert->gen_set = gen_set; 182 | open_list.push(new_vert); 183 | 184 | // Add an intermediate vertex's parent's col_set to its col_set, so 185 | // moves for later robots can be explored. Not necessary, but should 186 | // reduce thrashing 187 | if (!new_vert->coord.is_standard()){ 188 | add_col_set_in_place(vertex->col_set, new_vert->col_set); 189 | } 190 | } 191 | back_prop_col_set(vertex, col_set, open_list); 192 | } 193 | 194 | std::vector OdMstar::get_neighbors(const OdCoord &coord, 195 | const ColSet &col_set){ 196 | // If the collision set contains all robots, invoke the non-recursive 197 | // base case 198 | if (col_set.size() == 1 && (int) col_set[0].size() == num_bots_){ 199 | return get_all_neighbors(coord); 200 | } 201 | 202 | assert(coord.is_standard()); 203 | 204 | // Generate the step along the joint policy 205 | std::vector policy_step; 206 | for (int i = 0; i < num_bots_; i++){ 207 | policy_step.push_back(policies_[ids_[i]]->get_step(coord.coord[i])); 208 | } 209 | 210 | // Iterate over colliding sets of robots, and integrate the results 211 | // of the sub planning for each set 212 | for (const ColSetElement &elem: col_set){ 213 | // The collision set contains the local ids (relative to the robots in 214 | // this subplanner) of the robots in collision 215 | // To properly index child subplanners, need to convert to global robot 216 | // ids, so that the subplanners will be properly globally accessible 217 | ColSetElement global_col; 218 | for (auto &local_id: elem){ 219 | global_col.insert(ids_[local_id]); 220 | } 221 | // Get, and if necessary construct, the appropriate subplanner. 222 | // returns a pair where bool is true if a new subplanner 223 | // was generated, and p is an iterator to a pair 224 | if (subplanners_->find(global_col) == subplanners_->end()){ 225 | subplanners_->insert( 226 | {global_col, std::shared_ptr(new OdMstar(elem, *this))}); 227 | } 228 | OdMstar *planner = subplanners_->at(global_col).get(); 229 | // create the query point 230 | std::vector new_base; 231 | for (const int &i: elem){ 232 | new_base.push_back(coord.coord[i]); 233 | } 234 | 235 | OdCoord step; 236 | try{ 237 | step = planner->get_step(OdCoord(new_base, {})); 238 | } catch(NoSolutionError &e){ 239 | // no solution for that subset of robots, so return no neighbors 240 | // only likely to be relevant on directed graphs 241 | return {}; 242 | } 243 | 244 | int elem_dex = 0; 245 | // now need to copy into the relevant positions in policy_step 246 | for (auto i: elem){ 247 | policy_step[i] = step.coord[elem_dex]; 248 | ++elem_dex; // could play with post appending, but don't want to 249 | } 250 | } 251 | return {OdCoord({policy_step}, {})}; 252 | } 253 | 254 | std::vector OdMstar::get_all_neighbors(const OdCoord &coord){ 255 | // get the coordinate of the robot to assign a new move 256 | uint move_index = coord.move_tuple.size(); 257 | std::vector> new_moves; 258 | for (RobCoord &move: policies_[ids_[move_index]]->get_out_neighbors( 259 | coord.coord[move_index])){ 260 | std::vector new_move(coord.move_tuple); 261 | new_move.push_back(move); 262 | new_moves.push_back(new_move); 263 | } 264 | std::vector ret; 265 | if (move_index + 1 < coord.coord.size()){ 266 | // generating intermediate vertices 267 | for (auto &move_tuple: new_moves){ 268 | ret.push_back(OdCoord(coord.coord, move_tuple)); 269 | } 270 | } else { 271 | // generating standard vertices 272 | for (auto &move_tuple: new_moves){ 273 | ret.push_back(OdCoord(move_tuple, {})); 274 | } 275 | } 276 | return ret; 277 | } 278 | 279 | double OdMstar::edge_cost(const OdCoord &source, const OdCoord &target){ 280 | if (source.is_standard() && target.is_standard()){ 281 | // transition between standard vertex, so all robots are assigned moves and 282 | // incur costs 283 | double cost = 0; 284 | for (int i = 0; i < num_bots_; ++i){ 285 | cost += policies_[ids_[i]]->get_edge_cost(source.coord[i], 286 | target.coord[i]); 287 | } 288 | return cost; 289 | } else { 290 | // transition from intermediate vertex, so only one robot is assigned 291 | // a move and incurs cost 292 | uint move_index = source.move_tuple.size(); 293 | if (target.is_standard()){ 294 | return policies_[ids_[move_index]]->get_edge_cost( 295 | source.coord[move_index], target.coord[move_index]); 296 | } else{ 297 | return policies_[ids_[move_index]]->get_edge_cost( 298 | source.coord[move_index], target.move_tuple[move_index]); 299 | } 300 | } 301 | } 302 | 303 | OdPath OdMstar::trace_path(OdVertex *vert){ 304 | OdPath path; 305 | back_trace_path(vert, vert->forwards_ptr, path); 306 | forwards_trace_path(vert, path); 307 | return path; 308 | } 309 | 310 | void OdMstar::back_trace_path(OdVertex *vert, OdVertex *successor, 311 | OdPath &path){ 312 | vert->forwards_ptr = successor; 313 | // check if this is the final, terminal state, which is not required 314 | // to have a zero-cost self loop, so could get problems 315 | if (vert != successor){ 316 | vert->h = successor->h + edge_cost(vert->coord, successor->coord); 317 | } else{ 318 | vert->h = 0; 319 | } 320 | if (vert->coord.is_standard()){ 321 | path.insert(path.begin(), vert->coord); 322 | } 323 | if (vert->back_ptr != vert){ 324 | back_trace_path(vert->back_ptr, vert, path); 325 | } 326 | } 327 | 328 | void OdMstar::forwards_trace_path(OdVertex *vert, OdPath &path){ 329 | if (vert->forwards_ptr != vert){ 330 | if (vert->forwards_ptr->coord.is_standard()){ 331 | path.push_back(vert->forwards_ptr->coord); 332 | } 333 | forwards_trace_path(vert->forwards_ptr, path); 334 | } 335 | } 336 | 337 | void OdMstar::back_prop_col_set(OdVertex *vert, const ColSet &col_set, 338 | OpenList &open_list){ 339 | bool further = add_col_set_in_place(col_set, vert->col_set); 340 | if (further){ 341 | vert->closed = false; 342 | if (! vert->open){ 343 | vert->open = true; 344 | open_list.push(vert); 345 | } 346 | 347 | for(OdVertex *predecessor: vert->back_prop_set){ 348 | back_prop_col_set(predecessor, vert->col_set, open_list); 349 | } 350 | } 351 | } 352 | -------------------------------------------------------------------------------- /od_mstar3/od_mstar.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MSTAR_OD_MSTAR_H 2 | #define MSTAR_OD_MSTAR_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | #include "mstar_type_defs.hpp" 13 | #include "col_set.hpp" 14 | #include "od_vertex.hpp" 15 | #include "col_checker.hpp" 16 | #include "policy.hpp" 17 | 18 | namespace std{ 19 | template <> struct hash{ 20 | size_t operator()(const mstar::OdCoord &val) const{ 21 | size_t hash = boost::hash_range(val.coord.cbegin(), val.coord.cend()); 22 | boost::hash_combine( 23 | hash, 24 | boost::hash_range(val.move_tuple.cbegin(), val.move_tuple.cend())); 25 | return hash; 26 | } 27 | }; 28 | 29 | template <> struct hash>{ 30 | size_t operator()(const std::vector &val) const{ 31 | return boost::hash_range(val.cbegin(), val.cend()); 32 | } 33 | }; 34 | 35 | template <> struct hash{ 36 | size_t operator()(const mstar::ColSetElement &val) const{ 37 | return boost::hash_range(val.cbegin(), val.cend()); 38 | } 39 | }; 40 | } 41 | 42 | 43 | namespace mstar{ 44 | 45 | struct greater_cost{ 46 | bool operator()(const mstar::OdVertex *x, const mstar::OdVertex *y) const{ 47 | if (x == nullptr || y == nullptr){ 48 | return true; 49 | } 50 | return *x > *y; 51 | } 52 | }; 53 | 54 | // Sort in decreasing order to give cheap access to the cheapest elements 55 | typedef std::priority_queue, 56 | greater_cost> OpenList; 57 | 58 | class OdMstar { 59 | public: 60 | /** 61 | * Constructs a new, top level M* planner 62 | * 63 | * @param policies pointer to vector of policies. 64 | * OdMstar does not take ownership 65 | * @param goals goal configuration of entire system 66 | * @param inflation inflation factor 67 | * @param end_time time at which M* will declare failure 68 | * @param checker collision checking object 69 | */ 70 | OdMstar( 71 | std::vector> policies, 72 | OdCoord goals, double inflation, time_point end_time, 73 | std::shared_ptr col_checker); 74 | 75 | /** 76 | * Creates a subplanner for a subsest of the robots 77 | * 78 | * robots is a collision set element in the frame of parent, not global 79 | * robot ids 80 | */ 81 | OdMstar(const ColSetElement &robots, OdMstar &parent); 82 | 83 | ~OdMstar(); 84 | 85 | /** 86 | * Computes the optimal path to the goal from init_pos 87 | * 88 | * @param init_pos coordinate of the initial joint configuration 89 | * 90 | * @return the path in the joint configuration graph to the goal 91 | * 92 | * @throws OutOfTimeError ran out of planning time 93 | * @throws NoSolutionError no path to goal from init_pos 94 | */ 95 | OdPath find_path(OdCoord init_pos); 96 | 97 | private: 98 | /**TODO: fix 99 | * This is kind of horrifying, but I cannot store the OdMstar objects 100 | * directly in the unordered map, as I get ungodly errors that look 101 | * like they come from an allocator. Adding copy constructor and 102 | * assignment operator doesn't work, so its something involved about 103 | * STL. Think this works, but annoying 104 | */ 105 | std::unordered_map> *subplanners_; 106 | std::vector> policies_; 107 | // ids of the robots this planner handles. Assumed to be in ascending 108 | // order 109 | std::vector ids_; 110 | OdCoord goals_; 111 | // holds the nodes in the joint configuration space 112 | std::unordered_map graph_; 113 | time_point end_time_; // When planning will be halted 114 | double inflation_; // inflation factor for heuristic 115 | int planning_iter_; // current planning iteration 116 | int num_bots_; 117 | std::shared_ptr col_checker_; 118 | bool top_level_; // tracks if the top level planner 119 | 120 | OdMstar(const OdMstar &that) = delete; 121 | 122 | /** 123 | * Resets planning for a new planning iteration. 124 | * 125 | * Does not reset forwards_ptrs, as those should be valid across 126 | * iterations 127 | */ 128 | void reset(); 129 | 130 | /** 131 | * Computes the heuristic value of a vertex at a given coordinate 132 | * 133 | * @param coord coordinate for which to compute a heuristic value 134 | * 135 | * @return the (inflated) heuristic value 136 | */ 137 | double heuristic(const OdCoord &coord); 138 | 139 | /** 140 | * Returns a reference to the vertex at a given coordinate 141 | * 142 | * this->graph retains ownership of the vertex. Will create the vertex 143 | * if it does not already exist. 144 | * 145 | * @param coord coordinate of the desired vertex 146 | * 147 | * @return pointer to the vertex at coord. 148 | */ 149 | OdVertex* get_vertex(const OdCoord &coord); 150 | 151 | /** 152 | * Returns the optimal next step from init_pos 153 | * 154 | * Will compute the full path if necessary, but preferentially uses 155 | * cached results in forwards_ptrs. Expected to only be called from 156 | * a standard coordinate, and to only return a standard coordinate 157 | * 158 | * @param init_pos coordinate to compute the optimal next step from 159 | * 160 | * @returns the coordinate of the optimal next step towards the goal 161 | */ 162 | OdCoord get_step(const OdCoord &init_pos); 163 | 164 | /** 165 | * Generates the neighbors of vertex and add them to the open list 166 | * 167 | * @param vertex OdVertex to expand 168 | * @param open_list the sorted open list being used 169 | */ 170 | void expand(OdVertex *vertex, OpenList &open_list); 171 | 172 | /** 173 | * Returns the limited neighbors of coord using recursive calculation 174 | * 175 | * @param coord Coordinates of vertex to generate neighbor thereof 176 | * @param col_set collision set of vertex to generate neighbors 177 | * 178 | * @return list of limited neighbors 179 | */ 180 | std::vector get_neighbors( 181 | const OdCoord &coord, const ColSet &col_set); 182 | 183 | /** 184 | * Returns the limited neighbors of coord using non-recursive computation 185 | * 186 | * Called when the collision set contains all of the robots, as a base 187 | * case for get_neighbors, thus always generate all possible neighbors 188 | * 189 | * @param coord Coordinates of vertex to generate neighbor thereof 190 | * 191 | * @return list of limited neighbors 192 | */ 193 | std::vector get_all_neighbors( 194 | const OdCoord &coord); 195 | 196 | /** 197 | * Returns the cost of traversing a given edge 198 | * 199 | * @param source coordinate of the source vertex 200 | * @param target coordinate of the target vertex 201 | * 202 | * @return the cost of the edge 203 | */ 204 | double edge_cost(const OdCoord &source, const OdCoord &target); 205 | 206 | /** 207 | * Returns the path through a vertex 208 | * 209 | * Assumes that back_ptr and forwards_ptr are set and non-none at vert 210 | * Identifies each end of the path by looking for a back_ptr/forwards_ptr 211 | * pointed at the holder 212 | * 213 | * @param vert the vertex to trace a path through 214 | * 215 | * @return the path passing through vert containing only standard vertices 216 | */ 217 | OdPath trace_path(OdVertex *vert); 218 | 219 | /** 220 | * Generates the path to the specified vertex 221 | * 222 | * Sets forward_ptrs to cache the path, and updates the heuristic 223 | * values of the vertices on the path so we can end the moment a 224 | * vertex on a cached path is expanded. 225 | * 226 | * TODO: double check that making the heuristic inconsistent in this 227 | * fashion is OK. 228 | * 229 | * @param vert the vertex to trace the path to 230 | * @param successor the successor of vert on the path 231 | * @param path place to construct path 232 | */ 233 | void back_trace_path(OdVertex *vert, OdVertex *successor, OdPath &path); 234 | 235 | /** 236 | * Genertes the path from the specified vertex to the goal 237 | * 238 | * Non-trivial only if vert lies on a previously cached path 239 | * 240 | * @param vert the vertex to trace the path from 241 | * @param path place to construct path 242 | */ 243 | void forwards_trace_path(OdVertex *vert, OdPath &path); 244 | 245 | /** 246 | * Backpropagates collision set information to all predecessors of a 247 | * vertex. 248 | * 249 | * Adds vertices whose collision set changes back to the open list 250 | * 251 | * @param vertex pointer to the vertex to back propagate from 252 | * @param col_set the collision set that triggered backpropagation 253 | * @param open_list the current open list 254 | */ 255 | void back_prop_col_set(OdVertex *vert, const ColSet &col_set, 256 | OpenList &open_list); 257 | }; 258 | 259 | struct OutOfTimeError : public std::exception{ 260 | const char * what () const throw(){ 261 | return "Out of Time"; 262 | } 263 | }; 264 | 265 | struct NoSolutionError : public std::exception{ 266 | const char * what () const throw(){ 267 | return "No Solution"; 268 | } 269 | }; 270 | 271 | }; 272 | 273 | #endif 274 | -------------------------------------------------------------------------------- /od_mstar3/od_vertex.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MSTAR_OD_VERTEX_H 2 | #define MSTAR_OD_VERTEX_H 3 | 4 | #include 5 | 6 | #include "mstar_type_defs.hpp" 7 | 8 | namespace mstar{ 9 | 10 | struct OdVertex{ 11 | OdCoord coord; 12 | ColSet col_set, gen_set; // Collision set and generating collision set 13 | int updated; // last planning iteration used 14 | bool closed, open; 15 | double cost, h; 16 | OdVertex* back_ptr; // optimal way to reach this 17 | std::set back_prop_set; // all explored ways to reach this 18 | OdVertex* forwards_ptr; // way to goal from this 19 | 20 | OdVertex(OdCoord coord): 21 | coord(coord), col_set(), updated(0), closed(false), open(false), 22 | cost(std::numeric_limits::max()), h(), 23 | back_ptr(nullptr), back_prop_set(), forwards_ptr(nullptr) 24 | {}; 25 | 26 | bool operator>=(const OdVertex &other) const{ 27 | return cost + h >= other.cost + other.h; 28 | } 29 | 30 | bool operator>(const OdVertex &other) const{ 31 | return cost + h > other.cost + other.h; 32 | } 33 | 34 | bool operator<=(const OdVertex &other) const{ 35 | return cost + h <= other.cost + other.h; 36 | } 37 | 38 | bool operator<(const OdVertex &other) const{ 39 | return cost + h < other.cost + other.h; 40 | } 41 | 42 | /** 43 | * Resets a vertex used in a previous planning iteration 44 | * 45 | * @param t Current planning iteration 46 | */ 47 | void reset(int t){ 48 | if (t > updated){ 49 | updated = t; 50 | open = false; 51 | closed = false; 52 | cost = std::numeric_limits::max(); 53 | back_ptr = nullptr; 54 | back_prop_set = std::set(); 55 | } 56 | } 57 | }; 58 | 59 | } 60 | 61 | #endif 62 | -------------------------------------------------------------------------------- /od_mstar3/policy.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "policy.hpp" 5 | 6 | using namespace mstar; 7 | 8 | 9 | Policy::Policy(const Graph &g, const RobCoord goal){ 10 | g_ = g; 11 | costs_ = std::vector(num_vertices(g_)); 12 | predecessors_.resize(boost::num_vertices(g_)); 13 | 14 | boost::dijkstra_shortest_paths( 15 | boost::make_reverse_graph(g_), goal, 16 | boost::predecessor_map(&predecessors_[0]).distance_map(&costs_[0])); 17 | edge_weight_map_ = boost::get(boost::edge_weight_t(), g_); 18 | } 19 | 20 | 21 | double Policy::get_cost(RobCoord coord){ 22 | return costs_[coord]; 23 | } 24 | 25 | 26 | double Policy::get_edge_cost(RobCoord u, RobCoord v){ 27 | // boost::edge returns pair 28 | return boost::get(edge_weight_map_, boost::edge(u, v, g_).first); 29 | } 30 | 31 | 32 | std::vector Policy::get_out_neighbors(RobCoord coord){ 33 | std::vector out; 34 | for (auto adj_verts = boost::adjacent_vertices(coord, g_); 35 | adj_verts.first != adj_verts.second; adj_verts.first++){ 36 | out.push_back(*(adj_verts.first)); 37 | } 38 | return out; 39 | } 40 | 41 | RobCoord Policy::get_step(RobCoord coord){ 42 | return predecessors_[coord]; 43 | } 44 | -------------------------------------------------------------------------------- /od_mstar3/policy.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MSTAR_POLICY_H 2 | #define MSTAR_POLICY_H 3 | 4 | /**************************************************************************** 5 | * Provides a wrapper for the Boost graphs 6 | ***************************************************************************/ 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include "mstar_type_defs.hpp" 13 | 14 | 15 | namespace mstar{ 16 | /** 17 | * Generates an individual policy for a robot to reach a specified goal 18 | */ 19 | class Policy{ 20 | private: 21 | Graph g_; // the boost graph this wraps 22 | std::vector costs_; // holds cost to goal from every configuration 23 | boost::property_map::type edge_weight_map_; 24 | std::vector predecessors_; 25 | 26 | public: 27 | /** 28 | * @param g The graph describing the workspace 29 | * @param goal The goal coordinate of the robot 30 | */ 31 | Policy(const Graph &g, const RobCoord goal); 32 | 33 | /** 34 | * Returns the cost-to-go from a vertex 35 | * @param vert Vertex to query cost from 36 | * 37 | * @return the cost to go until the goal is reached 38 | */ 39 | double get_cost(RobCoord coord); 40 | 41 | /** 42 | * Returns cost of traversing the edge (u, v) 43 | * 44 | * Does not check whether the edge exists 45 | * 46 | * @param u Source vertex of the edge 47 | * @param v Destination vertex of the dge 48 | * 49 | * @return the cost of the edge 50 | */ 51 | double get_edge_cost(RobCoord u, RobCoord v); 52 | 53 | /** 54 | * Returns the out-neighbors of a given coordinate 55 | * @param coord Vertex to get out neighbors of 56 | */ 57 | std::vector get_out_neighbors(RobCoord coord); 58 | 59 | /** 60 | * Returns the successor of the specified coordinate 61 | * 62 | * @param coord coordinate to compute the successor thereof 63 | * 64 | * @return coordinate of next step 65 | */ 66 | RobCoord get_step(RobCoord coord); 67 | }; 68 | } 69 | 70 | #endif 71 | -------------------------------------------------------------------------------- /od_mstar3/prune_graph.py: -------------------------------------------------------------------------------- 1 | from od_mstar3 import workspace_graph 2 | import networkx as nx #Python network analysis module 3 | 4 | 5 | 6 | def to_networkx_graph(obs_map): 7 | '''Reads in a standard obs_map list and converts it to a networkx 8 | digraph 9 | obs_map - list of lists, 0 for empty cell, 1 for obstacle''' 10 | #Create a workspace_graph object to generate neighbors 11 | g = workspace_graph.Astar_Graph(obs_map,[0,0]) 12 | G = nx.DiGraph() #Creates the graph object 13 | #Populate graph with nodes 14 | for x in range(len(obs_map)): 15 | for y in range(len(obs_map[x])): 16 | if obs_map[x][y] == 0: 17 | G.add_node((x,y)) 18 | #Add edges 19 | for i in G.nodes(): 20 | #Stored nodes by their coordinates in G 21 | for j in g.get_neighbors(i): 22 | G.add_edge(i,j) 23 | return G 24 | 25 | def prune_opposing_edge(G,num_edges=1): 26 | '''Reads in a networkx digraph and prunes the edge opposing the most 27 | between (i.e. edge on the most shortest path connections). If this edge 28 | doesn't have an opposing edge, or if the removal of said edge would 29 | reduce the connectivity of the space, the next most between edge is pruned 30 | instead. Since computing completeness can be expensive, allows multiple 31 | edges to be pruned before computing the impact of said prunning on 32 | completeness is computed''' 33 | #Get the current number of strongly connected components, can't decrease 34 | #without preventing some paths from being found 35 | num_components = nx.number_strongly_connected_components(G) 36 | pruned = 0 37 | # print 'computing betweeness' 38 | betweenness = nx.edge_betweenness_centrality(G) 39 | # print 'betweenness computed' 40 | while pruned < num_edges: 41 | max_bet = max(betweenness.values()) 42 | if max_bet <= 0: 43 | #Set betweeness to -1 if can't prune, set to 0 not between 44 | return G 45 | edge = betweenness.keys()[betweenness.values().index(max_bet)] 46 | if not (edge[1],edge[0]) in G.edges(): 47 | #Already been pruned 48 | betweenness[edge] = -1 49 | # print 'no edge' 50 | continue 51 | #Test if pruning the edge will break connectivity 52 | temp_graph = G.copy() 53 | temp_graph.remove_edge(edge[1],edge[0]) 54 | if num_components == nx.number_strongly_connected_components(temp_graph): 55 | #Can safely prune this edge 56 | G = temp_graph 57 | pruned+=1 58 | betweenness[edge] = -1 59 | betweenness.pop((edge[1],edge[0])) 60 | # print 'pruned' 61 | #Need to prevent further edges from being pruned from this vertex 62 | for neighbor in G.neighbors(edge[1]): 63 | betweenness[(edge[1],neighbor)] = -1 64 | else: 65 | betweenness[edge] = -1 66 | # print 'breaks con %s' %(str(edge)) 67 | return G 68 | -------------------------------------------------------------------------------- /od_mstar3/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup, Extension 2 | from Cython.Build import cythonize 3 | 4 | setup(ext_modules = cythonize(Extension( 5 | "cpp_mstar", 6 | sources=["cython_od_mstar.pyx"], 7 | extra_compile_args=["-std=c++11"] 8 | ))) 9 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Cython==0.29.28 2 | gym==0.10.5 3 | imageio==2.9.0 4 | ipdb==0.13.9 5 | matplotlib==3.3.2 6 | networkx==2.8.8 7 | numpy==1.21.6 8 | ray==1.8.0 9 | setproctitle==1.2.2 10 | tensorflow==1.14.0 11 | torch==1.11.0 12 | wandb==0.12.14 13 | -------------------------------------------------------------------------------- /runner.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import ray 3 | import torch 4 | 5 | from alg_parameters import * 6 | from episodic_buffer import EpisodicBuffer 7 | from mapf_gym import MAPFEnv 8 | from model import Model 9 | from od_mstar3 import od_mstar 10 | from od_mstar3.col_set_addition import OutOfTimeError, NoSolutionError 11 | from util import one_step, update_perf, reset_env,set_global_seeds 12 | 13 | 14 | @ray.remote(num_cpus=1, num_gpus=SetupParameters.NUM_GPU / (TrainingParameters.N_ENVS + 1)) 15 | class Runner(object): 16 | """sub-process used to collect experience""" 17 | 18 | def __init__(self, env_id): 19 | """initialize model0 and environment""" 20 | self.ID = env_id 21 | set_global_seeds(env_id*123) 22 | self.num_agent = EnvParameters.N_AGENTS 23 | self.imitation_num_agent = EnvParameters.N_AGENTS 24 | self.one_episode_perf = {'num_step': 0, 'episode_reward': 0, 'invalid': 0, 'block': 0, 'num_leave_goal': 0, 25 | 'wrong_blocking': 0, 'num_collide': 0, 'reward_count': 0, 'ex_reward': 0, 26 | 'in_reward': 0} 27 | 28 | self.env = MAPFEnv(num_agents=self.num_agent) 29 | self.imitation_env = MAPFEnv(num_agents=self.imitation_num_agent) 30 | 31 | self.local_device = torch.device('cuda') if SetupParameters.USE_GPU_LOCAL else torch.device('cpu') 32 | self.local_model = Model(env_id, self.local_device) 33 | self.hidden_state = ( 34 | torch.zeros((self.num_agent, NetParameters.NET_SIZE // 2)).to(self.local_device), 35 | torch.zeros((self.num_agent, NetParameters.NET_SIZE // 2)).to(self.local_device)) 36 | self.message = torch.zeros((1, self.num_agent, NetParameters.NET_SIZE)).to(self.local_device) 37 | 38 | self.done, self.valid_actions, self.obs, self.vector, self.train_valid = reset_env(self.env, self.num_agent) 39 | 40 | self.episodic_buffer = EpisodicBuffer(0, self.num_agent) 41 | new_xy = self.env.get_positions() 42 | self.episodic_buffer.batch_add(new_xy) 43 | 44 | self.imitation_episodic_buffer = EpisodicBuffer(0, self.imitation_num_agent) 45 | 46 | def run(self, weights, total_steps): 47 | """run multiple steps and collect data for reinforcement learning""" 48 | with torch.no_grad(): 49 | mb_obs, mb_vector, mb_rewards_in, mb_rewards_ex, mb_rewards_all, mb_values_in, mb_values_ex, \ 50 | mb_values_all, mb_done, mb_ps, mb_actions = [], [], [], [], [], [], [], [], [], [], [] 51 | mb_hidden_state = [] 52 | mb_message = [] 53 | mb_train_valid, mb_blocking = [], [] 54 | performance_dict = {'per_r': [], 'per_in_r': [], 'per_ex_r': [], 'per_valid_rate': [], 55 | 'per_episode_len': [], 'per_block': [], 56 | 'per_leave_goal': [], 'per_final_goals': [], 'per_half_goals': [], 'per_block_acc': [], 57 | 'per_max_goals': [], 'per_num_collide': [], 'rewarded_rate': []} 58 | 59 | self.local_model.set_weights(weights) 60 | for _ in range(TrainingParameters.N_STEPS): 61 | mb_obs.append(self.obs) 62 | mb_vector.append(self.vector) 63 | mb_hidden_state.append( 64 | [self.hidden_state[0].cpu().detach().numpy(), self.hidden_state[1].cpu().detach().numpy()]) 65 | mb_message.append(self.message) 66 | actions, ps, values_in, values_ex, values_all, pre_block, self.hidden_state, num_invalid, self.message = \ 67 | self.local_model.step(self.obs, self.vector, self.valid_actions, self.hidden_state, 68 | self.episodic_buffer.no_reward, self.message, self.num_agent) 69 | self.one_episode_perf['invalid'] += num_invalid 70 | mb_values_in.append(values_in) 71 | mb_values_ex.append(values_ex) 72 | mb_values_all.append(values_all) 73 | mb_train_valid.append(self.train_valid) 74 | mb_ps.append(ps) 75 | mb_done.append(self.done) 76 | 77 | rewards, self.valid_actions, self.obs, self.vector, self.train_valid, self.done, blockings, \ 78 | num_on_goals, self.one_episode_perf, max_on_goals, action_status, modify_actions, on_goal \ 79 | = one_step(self.env, self.one_episode_perf, actions, pre_block, self.local_model, values_all, 80 | self.hidden_state, ps, self.episodic_buffer.no_reward, self.message, self.episodic_buffer, 81 | self.num_agent) 82 | 83 | new_xy = self.env.get_positions() 84 | processed_rewards, be_rewarded, intrinsic_rewards, min_dist = self.episodic_buffer.if_reward(new_xy, 85 | rewards, 86 | self.done, 87 | on_goal) 88 | self.one_episode_perf['reward_count'] += be_rewarded 89 | self.vector[:, :, 3] = rewards 90 | self.vector[:, :, 4] = intrinsic_rewards 91 | self.vector[:, :, 5] = min_dist 92 | 93 | mb_actions.append(modify_actions) 94 | for i in range(self.num_agent): 95 | if action_status[i] == -3: 96 | mb_train_valid[-1][i][int(modify_actions[i])] = 0 97 | 98 | mb_rewards_all.append(processed_rewards) 99 | mb_rewards_in.append(intrinsic_rewards) 100 | mb_rewards_ex.append(rewards) 101 | mb_blocking.append(blockings) 102 | 103 | self.one_episode_perf['episode_reward'] += np.sum(processed_rewards) 104 | self.one_episode_perf['ex_reward'] += np.sum(rewards) 105 | self.one_episode_perf['in_reward'] += np.sum(intrinsic_rewards) 106 | if self.one_episode_perf['num_step'] == EnvParameters.EPISODE_LEN // 2: 107 | performance_dict['per_half_goals'].append(num_on_goals) 108 | 109 | if self.done: 110 | performance_dict = update_perf(self.one_episode_perf, performance_dict, num_on_goals, max_on_goals, 111 | self.num_agent) 112 | self.one_episode_perf = {'num_step': 0, 'episode_reward': 0, 'invalid': 0, 'block': 0, 113 | 'num_leave_goal': 0, 'wrong_blocking': 0, 'num_collide': 0, 114 | 'reward_count': 0, 'ex_reward': 0, 'in_reward': 0} 115 | self.num_agent = EnvParameters.N_AGENTS 116 | 117 | self.done, self.valid_actions, self.obs, self.vector, self.train_valid = reset_env(self.env, 118 | self.num_agent) 119 | self.done = True 120 | 121 | self.hidden_state = ( 122 | torch.zeros((self.num_agent, NetParameters.NET_SIZE // 2)).to(self.local_device), 123 | torch.zeros((self.num_agent, NetParameters.NET_SIZE // 2)).to(self.local_device)) 124 | self.message = torch.zeros((1, self.num_agent, NetParameters.NET_SIZE)).to(self.local_device) 125 | 126 | self.episodic_buffer.reset(total_steps, self.num_agent) 127 | new_xy = self.env.get_positions() 128 | self.episodic_buffer.batch_add(new_xy) 129 | 130 | mb_obs = np.concatenate(mb_obs, axis=0) 131 | mb_vector = np.concatenate(mb_vector, axis=0) 132 | 133 | mb_rewards_in = np.concatenate(mb_rewards_in, axis=0) 134 | mb_rewards_ex = np.concatenate(mb_rewards_ex, axis=0) 135 | mb_rewards_all = np.concatenate(mb_rewards_all, axis=0) 136 | 137 | mb_values_in = np.squeeze(np.concatenate(mb_values_in, axis=0), axis=-1) 138 | mb_values_ex = np.squeeze(np.concatenate(mb_values_ex, axis=0), axis=-1) 139 | mb_values_all = np.squeeze(np.concatenate(mb_values_all, axis=0), axis=-1) 140 | 141 | mb_actions = np.asarray(mb_actions, dtype=np.int64) 142 | mb_ps = np.stack(mb_ps) 143 | mb_done = np.asarray(mb_done, dtype=np.bool_) 144 | mb_hidden_state = np.stack(mb_hidden_state) 145 | mb_message = np.concatenate(mb_message, axis=0) 146 | mb_train_valid = np.stack(mb_train_valid) 147 | mb_blocking = np.concatenate(mb_blocking, axis=0) 148 | 149 | last_values_in, last_values_ex, last_values_all = np.squeeze( 150 | self.local_model.value(self.obs, self.vector, self.hidden_state, self.episodic_buffer.no_reward, 151 | self.message)) 152 | 153 | # calculate advantages 154 | mb_advs_in = np.zeros_like(mb_rewards_in) 155 | mb_advs_ex = np.zeros_like(mb_rewards_ex) 156 | mb_advs_all = np.zeros_like(mb_rewards_all) 157 | last_gaelam_in = last_gaelam_ex = last_gaelam_all = 0 158 | for t in reversed(range(TrainingParameters.N_STEPS)): 159 | if t == TrainingParameters.N_STEPS - 1: 160 | next_nonterminal = 1.0 - self.done 161 | next_values_in = last_values_in 162 | next_values_ex = last_values_ex 163 | next_values_all = last_values_all 164 | else: 165 | next_nonterminal = 1.0 - mb_done[t + 1] 166 | next_values_in = mb_values_in[t + 1] 167 | next_values_ex = mb_values_ex[t + 1] 168 | next_values_all = mb_values_all[t + 1] 169 | 170 | delta_in = np.subtract(np.add(mb_rewards_in[t], TrainingParameters.GAMMA * next_nonterminal * 171 | next_values_in), mb_values_in[t]) 172 | delta_ex = np.subtract(np.add(mb_rewards_ex[t], TrainingParameters.GAMMA * next_nonterminal * 173 | next_values_ex), mb_values_ex[t]) 174 | delta_all = np.subtract(np.add(mb_rewards_all[t], TrainingParameters.GAMMA * next_nonterminal * 175 | next_values_all), mb_values_all[t]) 176 | 177 | mb_advs_in[t] = last_gaelam_in = np.add(delta_in, 178 | TrainingParameters.GAMMA * TrainingParameters.LAM 179 | * next_nonterminal * last_gaelam_in) 180 | mb_advs_ex[t] = last_gaelam_ex = np.add(delta_ex, 181 | TrainingParameters.GAMMA * TrainingParameters.LAM 182 | * next_nonterminal * last_gaelam_ex) 183 | mb_advs_all[t] = last_gaelam_all = np.add(delta_all, 184 | TrainingParameters.GAMMA * TrainingParameters.LAM 185 | * next_nonterminal * last_gaelam_all) 186 | 187 | mb_returns_in = np.add(mb_advs_in, mb_values_in) 188 | mb_returns_ex = np.add(mb_advs_ex, mb_values_ex) 189 | mb_returns_all = np.add(mb_advs_all, mb_values_all) 190 | 191 | return mb_obs, mb_vector, mb_returns_in, mb_returns_ex, mb_returns_all, mb_values_in, mb_values_ex, \ 192 | mb_values_all, mb_actions, mb_ps, mb_hidden_state, mb_train_valid, mb_blocking, mb_message, \ 193 | len(performance_dict['per_r']), performance_dict 194 | 195 | def imitation(self, weights, total_steps): 196 | """run multiple steps and collect corresponding data for imitation learning""" 197 | with torch.no_grad(): 198 | self.local_model.set_weights(weights) 199 | 200 | mb_obs, mb_vector, mb_hidden_state, mb_actions = [], [], [], [] 201 | mb_message = [] 202 | step = 0 203 | episode = 0 204 | self.imitation_num_agent = EnvParameters.N_AGENTS 205 | while step <= TrainingParameters.N_STEPS: 206 | self.imitation_env._reset(num_agents=self.imitation_num_agent) 207 | 208 | self.imitation_episodic_buffer.reset(total_steps, self.imitation_num_agent) 209 | new_xy = self.imitation_env.get_positions() 210 | self.imitation_episodic_buffer.batch_add(new_xy) 211 | 212 | world = self.imitation_env.get_obstacle_map() 213 | start_positions = tuple(self.imitation_env.get_positions()) 214 | goals = tuple(self.imitation_env.get_goals()) 215 | 216 | try: 217 | obs = None 218 | mstar_path = od_mstar.find_path(world, start_positions, goals, inflation=2, time_limit=5) 219 | obs, vector, actions, hidden_state, message = self.parse_path(mstar_path) 220 | except OutOfTimeError: 221 | print("timeout") 222 | except NoSolutionError: 223 | print("nosol????", start_positions) 224 | 225 | if obs is not None: # no error 226 | mb_obs.append(obs) 227 | mb_vector.append(vector) 228 | mb_actions.append(actions) 229 | mb_hidden_state.append(hidden_state) 230 | mb_message.append(message) 231 | step += np.shape(vector)[0] 232 | episode += 1 233 | 234 | mb_obs = np.concatenate(mb_obs, axis=0) 235 | mb_vector = np.concatenate(mb_vector, axis=0) 236 | mb_actions = np.concatenate(mb_actions, axis=0) 237 | mb_hidden_state = np.concatenate(mb_hidden_state, axis=0) 238 | mb_message = np.concatenate(mb_message, axis=0) 239 | return mb_obs, mb_vector, mb_actions, mb_hidden_state, mb_message, episode, step 240 | 241 | def parse_path(self, path): 242 | """take the path generated from M* and create the corresponding inputs and actions""" 243 | mb_obs, mb_vector, mb_actions, mb_hidden_state = [], [], [], [] 244 | mb_message = [] 245 | hidden_state = ( 246 | torch.zeros((self.imitation_num_agent, NetParameters.NET_SIZE // 2)).to(self.local_device), 247 | torch.zeros((self.imitation_num_agent, NetParameters.NET_SIZE // 2)).to(self.local_device)) 248 | obs = np.zeros((1, self.imitation_num_agent, NetParameters.NUM_CHANNEL, EnvParameters.FOV_SIZE, EnvParameters.FOV_SIZE), 249 | dtype=np.float32) 250 | vector = np.zeros((1, self.imitation_num_agent, NetParameters.VECTOR_LEN), dtype=np.float32) 251 | message = torch.zeros((1, self.imitation_num_agent, NetParameters.NET_SIZE)).to(self.local_device) 252 | 253 | for i in range(self.imitation_num_agent): 254 | s = self.imitation_env.observe(i + 1) 255 | obs[:, i, :, :, :] = s[0] 256 | vector[:, i, : 3] = s[1] 257 | 258 | for t in range(len(path[:-1])): 259 | mb_obs.append(obs) 260 | mb_vector.append(vector) 261 | mb_hidden_state.append([hidden_state[0].cpu().detach().numpy(), hidden_state[1].cpu().detach().numpy()]) 262 | mb_message.append(message) 263 | 264 | hidden_state, message = self.local_model.generate_state(obs, vector, hidden_state, message) 265 | 266 | actions = np.zeros(self.imitation_num_agent) 267 | for i in range(self.imitation_num_agent): 268 | pos = path[t][i] 269 | new_pos = path[t + 1][i] # guaranteed to be in bounds by loop guard 270 | direction = (new_pos[0] - pos[0], new_pos[1] - pos[1]) 271 | actions[i] = self.imitation_env.world.get_action(direction) 272 | mb_actions.append(actions) 273 | 274 | obs, vector, rewards, done, _, on_goal, _, valid_actions, _, _, _, _, _, _, _ = \ 275 | self.imitation_env.joint_step(actions, 0, model='imitation', pre_value=None, input_state=None, 276 | ps=None, no_reward=None, message=None, episodic_buffer=None) 277 | 278 | vector[:, :, -1] = actions 279 | new_xy = self.imitation_env.get_positions() 280 | _, _, intrinsic_reward, min_dist = self.imitation_episodic_buffer.if_reward(new_xy, rewards, done, on_goal) 281 | vector[:, :, 3] = rewards 282 | vector[:, :, 4] = intrinsic_reward 283 | vector[:, :, 5] = min_dist 284 | 285 | if not all(valid_actions): # M* can not generate collisions 286 | print('invalid action') 287 | return None, None, None, None 288 | 289 | mb_obs = np.concatenate(mb_obs, axis=0) 290 | mb_message = np.concatenate(mb_message, axis=0) 291 | mb_vector = np.concatenate(mb_vector, axis=0) 292 | mb_actions = np.asarray(mb_actions, dtype=np.int64) 293 | mb_hidden_state = np.stack(mb_hidden_state) 294 | return mb_obs, mb_vector, mb_actions, mb_hidden_state, mb_message 295 | -------------------------------------------------------------------------------- /transformer/__pycache__/encoder_model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/transformer/__pycache__/encoder_model.cpython-37.pyc -------------------------------------------------------------------------------- /transformer/__pycache__/layers.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/transformer/__pycache__/layers.cpython-37.pyc -------------------------------------------------------------------------------- /transformer/__pycache__/modules.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/transformer/__pycache__/modules.cpython-37.pyc -------------------------------------------------------------------------------- /transformer/__pycache__/sub_layers.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marmotlab/SCRIMP/df3e7d95ed87b1967a164105d33b7bca4309cdec/transformer/__pycache__/sub_layers.cpython-37.pyc -------------------------------------------------------------------------------- /transformer/encoder_model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | from transformer.layers import EncoderLayer 6 | 7 | 8 | class Encoder(nn.Module): 9 | """a encoder model with self attention mechanism""" 10 | 11 | def __init__(self, d_model, d_hidden, n_layers, n_head, d_k, d_v): 12 | """create multiple computation blocks""" 13 | super().__init__() 14 | self.layer_stack = nn.ModuleList([EncoderLayer(d_model, d_hidden, n_head, d_k, d_v) for _ in range(n_layers)]) 15 | 16 | def forward(self, enc_output, return_attns=False): 17 | """use self attention to merge messages""" 18 | enc_slf_attn_list = [] 19 | for enc_layer in self.layer_stack: 20 | enc_output, enc_slf_attn = enc_layer(enc_output) 21 | enc_slf_attn_list += [enc_slf_attn] if return_attns else [] 22 | 23 | if return_attns: 24 | return enc_output, enc_slf_attn_list 25 | return enc_output, 26 | 27 | 28 | class PositionalEncoding(nn.Module): 29 | """sinusoidal position embedding""" 30 | 31 | def __init__(self, d_hid, n_position=200): 32 | """create table""" 33 | super(PositionalEncoding, self).__init__() 34 | self.register_buffer('pos_table', self._get_sinusoid_encoding_table(n_position, d_hid)) 35 | 36 | def _get_sinusoid_encoding_table(self, n_position, d_hid): 37 | """sinusoid position encoding table""" 38 | 39 | def get_position_angle_vec(position): 40 | return [position / np.power(10000, 2 * (hid_j // 2) / d_hid) for hid_j in range(d_hid)] 41 | 42 | sinusoid_table = np.array([get_position_angle_vec(pos_i) for pos_i in range(n_position)]) 43 | sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2]) # dim 2i 44 | sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2]) # dim 2i+1 45 | 46 | return torch.FloatTensor(sinusoid_table).unsqueeze(0) 47 | 48 | def forward(self, x): 49 | """encode unique agent id """ 50 | return x + self.pos_table[:, :x.size(1)].clone().detach() 51 | 52 | 53 | class TransformerEncoder(nn.Module): 54 | """a sequence to sequence model with attention mechanism""" 55 | 56 | def __init__(self, d_model, d_hidden, n_layers, n_head, d_k, d_v, n_position): 57 | """initialization""" 58 | super().__init__() 59 | self.encoder = Encoder(d_model=d_model, d_hidden=d_hidden, 60 | n_layers=n_layers, n_head=n_head, d_k=d_k, d_v=d_v) 61 | 62 | self.position_enc = PositionalEncoding(d_model, n_position=n_position) 63 | 64 | def forward(self, encoder_input): 65 | """run encoder""" 66 | encoder_input = self.position_enc(encoder_input) 67 | 68 | enc_output, *_ = self.encoder(encoder_input) 69 | 70 | return enc_output 71 | -------------------------------------------------------------------------------- /transformer/layers.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from transformer.sub_layers import MultiHeadAttention, PositionwiseFeedForward, GatingMechanism 4 | 5 | 6 | class EncoderLayer(nn.Module): 7 | """compose with two different sub-layers""" 8 | 9 | def __init__(self, d_model, d_hidden, n_head, d_k, d_v): 10 | """define one computation block""" 11 | super(EncoderLayer, self).__init__() 12 | self.gate1 = GatingMechanism(d_model) 13 | self.gate2 = GatingMechanism(d_model) 14 | self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v) 15 | self.pos_ffn = PositionwiseFeedForward(d_model, d_hidden) 16 | self.norm1 = nn.LayerNorm(d_model, eps=1e-6) 17 | self.norm2 = nn.LayerNorm(d_model, eps=1e-6) 18 | 19 | def forward(self, enc_input): 20 | """run a computation block""" 21 | enc_output = self.norm1(enc_input) 22 | enc_output, enc_slf_attn = self.slf_attn( 23 | enc_output, enc_output, enc_output) 24 | enc_output_1 = self.gate1(enc_input, enc_output) 25 | enc_output = self.pos_ffn(self.norm2(enc_output_1)) 26 | enc_output = self.gate2(enc_output_1, enc_output) 27 | return enc_output, enc_slf_attn 28 | -------------------------------------------------------------------------------- /transformer/modules.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class ScaledDotProductAttention(nn.Module): 7 | """scaled dot-product attention""" 8 | 9 | def __init__(self, temperature): 10 | """initialization""" 11 | super().__init__() 12 | self.temperature = temperature 13 | 14 | def forward(self, q, k, v): 15 | """ run multiple independent attention heads in parallel""" 16 | attn = torch.matmul(q / self.temperature, k.transpose(2, 3)) 17 | # attn = attn.masked_fill(mask == 0, -1e6) # if mask==0,the input value will =-1e6 18 | # then the attention score will around 0 19 | attn = F.softmax(attn, dim=-1) # attention score 20 | output = torch.matmul(attn, v) 21 | return output, attn 22 | -------------------------------------------------------------------------------- /transformer/sub_layers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from transformer.modules import ScaledDotProductAttention 6 | 7 | 8 | class MultiHeadAttention(nn.Module): 9 | """multi-head self attention module""" 10 | 11 | def __init__(self, n_head, d_model, d_k, d_v): 12 | super().__init__() 13 | """initialization""" 14 | self.n_head = n_head 15 | self.d_k = d_k 16 | self.d_v = d_v 17 | 18 | self.w_qs = nn.Linear(d_model, n_head * d_k, bias=False) 19 | self.w_ks = nn.Linear(d_model, n_head * d_k, bias=False) 20 | self.w_vs = nn.Linear(d_model, n_head * d_v, bias=False) 21 | self.fc = nn.Linear(n_head * d_v, d_model, bias=False) 22 | 23 | self.attention = ScaledDotProductAttention(temperature=d_k ** 0.5) 24 | 25 | def forward(self, q, k, v): 26 | """calculate multi-head attention""" 27 | d_k, d_v, n_head = self.d_k, self.d_v, self.n_head 28 | sz_b, len_q, len_k, len_v = q.size(0), q.size(1), k.size(1), v.size(1) 29 | 30 | # pass through the pre-attention projection 31 | q = self.w_qs(q).view(sz_b, len_q, n_head, d_k) 32 | k = self.w_ks(k).view(sz_b, len_k, n_head, d_k) 33 | v = self.w_vs(v).view(sz_b, len_v, n_head, d_v) 34 | 35 | # transpose for attention dot product 36 | q, k, v = q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2) 37 | # calculate attention 38 | q, attn = self.attention(q, k, v) 39 | # combine the last two dimensions to concatenate all the heads together 40 | q = q.transpose(1, 2).contiguous().view(sz_b, len_q, -1) 41 | q = self.fc(q) 42 | 43 | return q, attn 44 | 45 | 46 | class PositionwiseFeedForward(nn.Module): 47 | """A two-feed-forward-layer module""" 48 | 49 | def __init__(self, d_in, d_hid): 50 | """Initialization""" 51 | super().__init__() 52 | self.w_1 = nn.Linear(d_in, d_hid) 53 | self.w_2 = nn.Linear(d_hid, d_in) 54 | 55 | def forward(self, x): 56 | """run a ff layer""" 57 | x = self.w_2(F.relu(self.w_1(x))) 58 | return x 59 | 60 | 61 | class GatingMechanism(nn.Module): 62 | """a GRU cell""" 63 | 64 | def __init__(self, d_model, bg=2): 65 | """Initialization""" 66 | super(GatingMechanism, self).__init__() 67 | self.Wr = nn.Linear(d_model, d_model) 68 | self.Ur = nn.Linear(d_model, d_model) 69 | self.Wz = nn.Linear(d_model, d_model) 70 | self.Uz = nn.Linear(d_model, d_model) 71 | self.Wg = nn.Linear(d_model, d_model) 72 | self.Ug = nn.Linear(d_model, d_model) 73 | self.bg = torch.nn.Parameter(torch.full([d_model], bg, dtype=torch.float32)) 74 | 75 | self.sigmoid = nn.Sigmoid() 76 | self.tanh = nn.Tanh() 77 | 78 | def forward(self, x, y): # x is residual, y is input 79 | """run a GRU in the place of residual connection""" 80 | r = self.sigmoid(self.Wr(y) + self.Ur(x)) 81 | z = self.sigmoid(self.Wz(y) + self.Uz(x) - self.bg) 82 | h = self.tanh(self.Wg(y) + self.Ug(torch.mul(r, x))) 83 | g = torch.mul(1 - z, x) + torch.mul(z, h) 84 | return g 85 | -------------------------------------------------------------------------------- /util.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import imageio 4 | import numpy as np 5 | import torch 6 | import wandb 7 | 8 | from alg_parameters import * 9 | 10 | 11 | def set_global_seeds(i): 12 | """set seed for fair comparison""" 13 | torch.manual_seed(i) 14 | torch.cuda.manual_seed(i) 15 | torch.cuda.manual_seed_all(i) 16 | np.random.seed(i) 17 | random.seed(i) 18 | torch.backends.cudnn.deterministic = True 19 | 20 | 21 | def write_to_tensorboard(global_summary, step, performance_dict=None, mb_loss=None, imitation_loss=None, evaluate=True, 22 | greedy=True): 23 | """record performance using tensorboard""" 24 | if imitation_loss is not None: 25 | global_summary.add_scalar(tag='Loss/Imitation_loss', scalar_value=imitation_loss[0], global_step=step) 26 | global_summary.add_scalar(tag='Grad/Imitation_grad', scalar_value=imitation_loss[1], global_step=step) 27 | 28 | global_summary.flush() 29 | return 30 | if evaluate: 31 | if greedy: 32 | global_summary.add_scalar(tag='Perf_greedy_eval/Reward', scalar_value=performance_dict['per_r'], global_step=step) 33 | global_summary.add_scalar(tag='Perf_greedy_eval/In_Reward', scalar_value=performance_dict['per_in_r'], global_step=step) 34 | global_summary.add_scalar(tag='Perf_greedy_eval/Ex_Reward', scalar_value=performance_dict['per_ex_r'], global_step=step) 35 | global_summary.add_scalar(tag='Perf_greedy_eval/Valid_rate', scalar_value=performance_dict['per_valid_rate'], global_step=step) 36 | global_summary.add_scalar(tag='Perf_greedy_eval/Episode_length', scalar_value=performance_dict['per_episode_len'], global_step=step) 37 | global_summary.add_scalar(tag='Perf_greedy_eval/Num_block', scalar_value=performance_dict['per_block'], global_step=step) 38 | global_summary.add_scalar(tag='Perf_greedy_eval/Num_leave_goal',scalar_value=performance_dict['per_leave_goal'], global_step=step) 39 | global_summary.add_scalar(tag='Perf_greedy_eval/Final_goals', scalar_value=performance_dict['per_final_goals'], global_step=step) 40 | global_summary.add_scalar(tag='Perf_greedy_eval/Half_goals', scalar_value=performance_dict['per_half_goals'], global_step=step) 41 | global_summary.add_scalar(tag='Perf_greedy_eval/Block_accuracy', scalar_value=performance_dict['per_block_acc'], global_step=step) 42 | global_summary.add_scalar(tag='Perf_greedy_eval/Max_goals', scalar_value=performance_dict['per_max_goals'], global_step=step) 43 | global_summary.add_scalar(tag='Perf_greedy_eval/Num_collide', scalar_value=performance_dict['per_num_collide'], global_step=step) 44 | 45 | else: 46 | global_summary.add_scalar(tag='Perf_random_eval/Reward', scalar_value=performance_dict['per_r'], global_step=step) 47 | global_summary.add_scalar(tag='Perf_random_eval/In_Reward', scalar_value=performance_dict['per_in_r'], global_step=step) 48 | global_summary.add_scalar(tag='Perf_random_eval/Ex_Reward', scalar_value=performance_dict['per_ex_r'], global_step=step) 49 | global_summary.add_scalar(tag='Perf_random_eval/Valid_rate',scalar_value=performance_dict['per_valid_rate'], global_step=step) 50 | global_summary.add_scalar(tag='Perf_random_eval/Episode_length',scalar_value=performance_dict['per_episode_len'], global_step=step) 51 | global_summary.add_scalar(tag='Perf_random_eval/Num_block', scalar_value=performance_dict['per_block'], global_step=step) 52 | global_summary.add_scalar(tag='Perf_random_eval/Num_leave_goal', scalar_value=performance_dict['per_leave_goal'], global_step=step) 53 | global_summary.add_scalar(tag='Perf_random_eval/Final_goals', scalar_value=performance_dict['per_final_goals'], global_step=step) 54 | global_summary.add_scalar(tag='Perf_random_eval/Half_goals', scalar_value=performance_dict['per_half_goals'], global_step=step) 55 | global_summary.add_scalar(tag='Perf_random_eval/Block_accuracy', scalar_value=performance_dict['per_block_acc'], global_step=step) 56 | global_summary.add_scalar(tag='Perf_random_eval/Max_goals', scalar_value=performance_dict['per_max_goals'], global_step=step) 57 | global_summary.add_scalar(tag='Perf_random_eval/Num_collide', scalar_value=performance_dict['per_num_collide'], global_step=step) 58 | 59 | else: 60 | loss_vals = np.nanmean(mb_loss, axis=0) 61 | global_summary.add_scalar(tag='Perf/Reward', scalar_value=performance_dict['per_r'], global_step=step) 62 | global_summary.add_scalar(tag='Perf/In_Reward', scalar_value=performance_dict['per_in_r'], global_step=step) 63 | global_summary.add_scalar(tag='Perf/Ex_Reward', scalar_value=performance_dict['per_ex_r'], global_step=step) 64 | global_summary.add_scalar(tag='Perf/Valid_rate', scalar_value=performance_dict['per_valid_rate'], global_step=step) 65 | global_summary.add_scalar(tag='Perf/Episode_length',scalar_value=performance_dict['per_episode_len'], global_step=step) 66 | global_summary.add_scalar(tag='Perf/Num_block', scalar_value=performance_dict['per_block'], global_step=step) 67 | global_summary.add_scalar(tag='Perf/Num_leave_goal', scalar_value=performance_dict['per_leave_goal'], global_step=step) 68 | global_summary.add_scalar(tag='Perf/Final_goals', scalar_value=performance_dict['per_final_goals'], global_step=step) 69 | global_summary.add_scalar(tag='Perf/Half_goals', scalar_value=performance_dict['per_half_goals'], global_step=step) 70 | global_summary.add_scalar(tag='Perf/Block_accuracy', scalar_value=performance_dict['per_block_acc'], global_step=step) 71 | global_summary.add_scalar(tag='Perf/Max_goals', scalar_value=performance_dict['per_max_goals'], global_step=step) 72 | global_summary.add_scalar(tag='Perf/Num_collide', scalar_value=performance_dict['per_num_collide'], global_step=step) 73 | global_summary.add_scalar(tag='Perf/Rewarded_rate', scalar_value=performance_dict['rewarded_rate'], global_step=step) 74 | 75 | for (val, name) in zip(loss_vals, RecordingParameters.LOSS_NAME): 76 | if name == 'grad_norm': 77 | global_summary.add_scalar(tag='Grad/' + name, scalar_value=val, global_step=step) 78 | else: 79 | global_summary.add_scalar(tag='Loss/' + name, scalar_value=val, global_step=step) 80 | 81 | global_summary.flush() 82 | 83 | 84 | def write_to_wandb(step, performance_dict=None, mb_loss=None, imitation_loss=None, evaluate=True, greedy=True): 85 | """record performance using wandb""" 86 | if imitation_loss is not None: 87 | wandb.log({'Loss/Imitation_loss': imitation_loss[0]}, step=step) 88 | wandb.log({'Grad/Imitation_grad': imitation_loss[1]}, step=step) 89 | return 90 | if evaluate: 91 | if greedy: 92 | wandb.log({'Perf_greedy_eval/Reward': performance_dict['per_r']}, step=step) 93 | wandb.log({'Perf_greedy_eval/In_Reward': performance_dict['per_in_r']}, step=step) 94 | wandb.log({'Perf_greedy_eval/Ex_Reward': performance_dict['per_ex_r']}, step=step) 95 | wandb.log({'Perf_greedy_eval/Valid_rate': performance_dict['per_valid_rate']}, step=step) 96 | wandb.log({'Perf_greedy_eval/Episode_length': performance_dict['per_episode_len']}, step=step) 97 | wandb.log({'Perf_greedy_eval/Num_block': performance_dict['per_block']}, step=step) 98 | wandb.log({'Perf_greedy_eval/Num_leave_goal': performance_dict['per_leave_goal']}, step=step) 99 | wandb.log({'Perf_greedy_eval/Final_goals': performance_dict['per_final_goals']}, step=step) 100 | wandb.log({'Perf_greedy_eval/Half_goals': performance_dict['per_half_goals']}, step=step) 101 | wandb.log({'Perf_greedy_eval/Block_accuracy': performance_dict['per_block_acc']}, step=step) 102 | wandb.log({'Perf_greedy_eval/Max_goals': performance_dict['per_max_goals']}, step=step) 103 | wandb.log({'Perf_greedy_eval/Num_collide': performance_dict['per_num_collide']}, step=step) 104 | 105 | else: 106 | wandb.log({'Perf_random_eval/Reward': performance_dict['per_r']}, step=step) 107 | wandb.log({'Perf_random_eval/In_Reward': performance_dict['per_in_r']}, step=step) 108 | wandb.log({'Perf_random_eval/Ex_Reward': performance_dict['per_ex_r']}, step=step) 109 | wandb.log({'Perf_random_eval/Valid_rate': performance_dict['per_valid_rate']}, step=step) 110 | wandb.log({'Perf_random_eval/Episode_length': performance_dict['per_episode_len']}, step=step) 111 | wandb.log({'Perf_random_eval/Num_block': performance_dict['per_block']}, step=step) 112 | wandb.log({'Perf_random_eval/Num_leave_goal': performance_dict['per_leave_goal']}, step=step) 113 | wandb.log({'Perf_random_eval/Final_goals': performance_dict['per_final_goals']}, step=step) 114 | wandb.log({'Perf_random_eval/Half_goals': performance_dict['per_half_goals']}, step=step) 115 | wandb.log({'Perf_random_eval/Block_accuracy': performance_dict['per_block_acc']}, step=step) 116 | wandb.log({'Perf_random_eval/Max_goals': performance_dict['per_max_goals']}, step=step) 117 | wandb.log({'Perf_random_eval/Num_collide': performance_dict['per_num_collide']}, step=step) 118 | 119 | else: 120 | loss_vals = np.nanmean(mb_loss, axis=0) 121 | wandb.log({'Perf/Reward': performance_dict['per_r']}, step=step) 122 | wandb.log({'Perf/In_Reward': performance_dict['per_in_r']}, step=step) 123 | wandb.log({'Perf/Ex_Reward': performance_dict['per_ex_r']}, step=step) 124 | wandb.log({'Perf/Valid_rate': performance_dict['per_valid_rate']}, step=step) 125 | wandb.log({'Perf/Episode_length': performance_dict['per_episode_len']}, step=step) 126 | wandb.log({'Perf/Num_block': performance_dict['per_block']}, step=step) 127 | wandb.log({'Perf/Num_leave_goal': performance_dict['per_leave_goal']}, step=step) 128 | wandb.log({'Perf/Final_goals': performance_dict['per_final_goals']}, step=step) 129 | wandb.log({'Perf/Half_goals': performance_dict['per_half_goals']}, step=step) 130 | wandb.log({'Perf/Block_accuracy': performance_dict['per_block_acc']}, step=step) 131 | wandb.log({'Perf/Max_goals': performance_dict['per_max_goals']}, step=step) 132 | wandb.log({'Perf/Num_collide': performance_dict['per_num_collide']}, 133 | step=step) 134 | wandb.log({'Perf/Rewarded_rate': performance_dict['rewarded_rate']}, 135 | step=step) 136 | 137 | for (val, name) in zip(loss_vals, RecordingParameters.LOSS_NAME): 138 | if name == 'grad_norm': 139 | wandb.log({'Grad/' + name: val}, step=step) 140 | else: 141 | wandb.log({'Loss/' + name: val}, step=step) 142 | 143 | 144 | def make_gif(images, file_name): 145 | """record gif""" 146 | imageio.mimwrite(file_name, images, subrectangles=True) 147 | print("wrote gif") 148 | 149 | 150 | def reset_env(env, num_agent): 151 | """reset environment""" 152 | done = env._reset(num_agent) 153 | prev_action = np.zeros(num_agent) 154 | valid_actions = [] 155 | obs = np.zeros((1, num_agent, NetParameters.NUM_CHANNEL, EnvParameters.FOV_SIZE, EnvParameters.FOV_SIZE), dtype=np.float32) 156 | vector = np.zeros((1, num_agent, NetParameters.VECTOR_LEN), dtype=np.float32) 157 | train_valid = np.zeros((num_agent, EnvParameters.N_ACTIONS), dtype=np.float32) 158 | 159 | for i in range(num_agent): 160 | valid_action = env.list_next_valid_actions(i + 1) 161 | s = env.observe(i + 1) 162 | obs[:, i, :, :, :] = s[0] 163 | vector[:, i, : 3] = s[1] 164 | vector[:, i, -1] = prev_action[i] 165 | valid_actions.append(valid_action) 166 | train_valid[i, valid_action] = 1 167 | return done, valid_actions, obs, vector, train_valid 168 | 169 | 170 | def one_step(env, one_episode_perf, actions, pre_block, model, pre_value, input_state, ps, no_reward, message, 171 | episodic_buffer, num_agent): 172 | """run one step""" 173 | train_valid = np.zeros((num_agent, EnvParameters.N_ACTIONS), dtype=np.float32) 174 | obs, vector, rewards, done, next_valid_actions, on_goal, blockings, valid_actions, num_blockings, leave_goals, \ 175 | num_on_goal, max_on_goal, num_collide, action_status, modify_actions \ 176 | = env.joint_step(actions, one_episode_perf['num_step'], model, pre_value, 177 | input_state, ps, no_reward, message, episodic_buffer) 178 | 179 | one_episode_perf['block'] += num_blockings 180 | one_episode_perf['num_leave_goal'] += leave_goals 181 | one_episode_perf['num_collide'] += num_collide 182 | vector[:, :, -1] = modify_actions 183 | for i in range(num_agent): 184 | train_valid[i, next_valid_actions[i]] = 1 185 | if (pre_block[i] < 0.5) == blockings[:, i]: 186 | one_episode_perf['wrong_blocking'] += 1 187 | one_episode_perf['num_step'] += 1 188 | return rewards, next_valid_actions, obs, vector, train_valid, done, blockings, num_on_goal, one_episode_perf, \ 189 | max_on_goal, action_status, modify_actions, on_goal 190 | 191 | 192 | def update_perf(one_episode_perf, performance_dict, num_on_goals, max_on_goals, num_agent): 193 | """record batch performance""" 194 | performance_dict['per_ex_r'].append(one_episode_perf['ex_reward']) 195 | performance_dict['per_in_r'].append(one_episode_perf['in_reward']) 196 | performance_dict['per_r'].append(one_episode_perf['episode_reward']) 197 | performance_dict['per_valid_rate'].append( 198 | ((one_episode_perf['num_step'] * num_agent) - one_episode_perf['invalid']) / ( 199 | one_episode_perf['num_step'] * num_agent)) 200 | performance_dict['per_episode_len'].append(one_episode_perf['num_step']) 201 | performance_dict['per_block'].append(one_episode_perf['block']) 202 | performance_dict['per_leave_goal'].append(one_episode_perf['num_leave_goal']) 203 | performance_dict['per_num_collide'].append(one_episode_perf['num_collide']) 204 | performance_dict['per_final_goals'].append(num_on_goals) 205 | performance_dict['per_block_acc'].append( 206 | ((one_episode_perf['num_step'] * num_agent) - one_episode_perf['wrong_blocking']) / ( 207 | one_episode_perf['num_step'] * num_agent)) 208 | performance_dict['per_max_goals'].append(max_on_goals) 209 | performance_dict['rewarded_rate'].append( 210 | one_episode_perf['reward_count'] / (one_episode_perf['num_step'] * num_agent)) 211 | return performance_dict 212 | --------------------------------------------------------------------------------