├── .gitignore ├── .pylintrc ├── README.md ├── crowd_nav ├── __init__.py ├── configs │ ├── __init__.py │ └── icra_benchmark │ │ ├── __init__.py │ │ ├── config.py │ │ ├── mp_detach.py │ │ ├── mp_linear.py │ │ ├── mp_separate.py │ │ ├── mp_separate_dp.py │ │ ├── rgl.py │ │ └── sarl.py ├── policy │ ├── cadrl.py │ ├── gcn.py │ ├── graph_model.py │ ├── helpers.py │ ├── lstm_rl.py │ ├── model_predictive_rl.py │ ├── multi_human_rl.py │ ├── policy_factory.py │ ├── sarl.py │ ├── state_predictor.py │ └── value_estimator.py ├── test.py ├── tests │ └── test_polices.py ├── train.py └── utils │ ├── __init__.py │ ├── explorer.py │ ├── memory.py │ ├── plot.py │ ├── plot2.py │ └── trainer.py ├── crowd_sim ├── README.md ├── __init__.py └── envs │ ├── __init__.py │ ├── crowd_sim.py │ ├── policy │ ├── __init__.py │ ├── linear.py │ ├── orca.py │ ├── policy.py │ ├── policy_factory.py │ └── socialforce.py │ ├── realsim_utils │ ├── GrandCentral.py │ └── __init__.py │ └── utils │ ├── __init__.py │ ├── action.py │ ├── agent.py │ ├── human.py │ ├── info.py │ ├── robot.py │ ├── state.py │ └── utils.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # editors 2 | .idea/ 3 | .vscode/ 4 | 5 | # Python 6 | *.egg-info/ 7 | .pytest_cache/ 8 | __pycache__ 9 | .cache/ 10 | venv*/ 11 | 12 | # data 13 | data/ 14 | *.mp4 15 | videos/ 16 | *.log 17 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [BASIC] 2 | 3 | #variable-rgx=[a-z0-9_]{1,30}$ 4 | good-names=dx,dy,i,j,k,px,py,r,th,vx,vy,x,y,z,ob 5 | 6 | [TYPECHECK] 7 | 8 | # List of members which are set dynamically and missed by pylint inference 9 | # system, and so shouldn't trigger E1101 when accessed. Python regular 10 | # expressions are accepted. 11 | generated-members=numpy.*,torch.* 12 | 13 | 14 | disable=missing-docstring,useless-object-inheritance 15 | max-line-length=120 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RelationalGraphLearning 2 | This repository contains the codes for our paper, which is accepted at IROS 2020. 3 | For more details, please refer to the [paper](https://arxiv.org/abs/1909.13165). 4 | 5 | 6 | ## Abstract 7 | We present a relational graph learning approach for robotic crowd navigation using model-based deep reinforcement 8 | learning that plans actions by looking into the future. 9 | Our approach reasons about the relations between all agents based on their latent features and uses a Graph Convolutional 10 | Network to encode higher-order interactions in each agent's state representation, which is subsequently leveraged for 11 | state prediction and value estimation. 12 | The ability to predict human motion allows us to perform multi-step lookahead planning, taking into account the temporal 13 | evolution of human crowds. 14 | We evaluate our approach against a state-of-the-art baseline for crowd navigation and ablations of our model to 15 | demonstrate that navigation with our approach is more efficient, results in fewer collisions, and avoids failure cases 16 | involving oscillatory and freezing behaviors. 17 | 18 | 19 | 20 | ## Method Overview 21 | 22 | 23 | 24 | ## Setup 25 | 1. Install [Python-RVO2](https://github.com/sybrenstuvel/Python-RVO2) library 26 | 2. Install [socialforce](https://github.com/ChanganVR/socialforce) library 27 | 2. Install crowd_sim and crowd_nav into pip 28 | ``` 29 | pip install -e . 30 | ``` 31 | 32 | ## Getting Started 33 | This repository are organized in two parts: crowd_sim/ folder contains the simulation environment and 34 | crowd_nav/ folder contains codes for training and testing the policies. Details of the simulation framework can be found 35 | [here](crowd_sim/README.md). Below are the instructions for training and testing policies, and they should be executed 36 | inside the crowd_nav/ folder. 37 | 38 | 39 | 1. Train a policy. 40 | ``` 41 | python train.py --policy rgl 42 | ``` 43 | 2. Test policies with 500 test cases. 44 | ``` 45 | python test.py --policy rgl --model_dir data/output --phase test 46 | ``` 47 | 3. Run policy for one episode and visualize the result. 48 | ``` 49 | python test.py --policy rgl --model_dir data/output --phase test --visualize --test_case 0 50 | ``` 51 | 4. Plot training curve 52 | ``` 53 | python utils/plot.py data/output/output.log 54 | ``` 55 | 56 | 57 | 58 | ## Video Demo 59 | [](https://youtu.be/U3quW30Eu3A) 60 | 61 | 62 | ## Citation 63 | If you find the codes or paper useful for your research, please cite the following papers: 64 | ```bibtex 65 | @inproceedings{chen2020relational, 66 | title={Relational Graph Learning for Crowd Navigation}, 67 | author={Changan Chen and Sha Hu and Payam Nikdel and Greg Mori and Manolis Savva}, 68 | year={2020}, 69 | booktitle={IROS} 70 | } 71 | @inproceedings{chen2019crowdnav, 72 | title={Crowd-Robot Interaction: Crowd-aware Robot Navigation with Attention-based Deep Reinforcement Learning}, 73 | author={Changan Chen and Yuejiang Liu and Sven Kreiss and Alexandre Alahi}, 74 | year={2019}, 75 | booktitle={ICRA} 76 | } 77 | ``` 78 | -------------------------------------------------------------------------------- /crowd_nav/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChanganVR/RelationalGraphLearning/8e87aa5ed8221efd688f8e6857ba4c38637bf6e1/crowd_nav/__init__.py -------------------------------------------------------------------------------- /crowd_nav/configs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChanganVR/RelationalGraphLearning/8e87aa5ed8221efd688f8e6857ba4c38637bf6e1/crowd_nav/configs/__init__.py -------------------------------------------------------------------------------- /crowd_nav/configs/icra_benchmark/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChanganVR/RelationalGraphLearning/8e87aa5ed8221efd688f8e6857ba4c38637bf6e1/crowd_nav/configs/icra_benchmark/__init__.py -------------------------------------------------------------------------------- /crowd_nav/configs/icra_benchmark/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Never Modify this file! Always copy the settings you want to change to your local file. 3 | """ 4 | 5 | 6 | import numpy as np 7 | 8 | 9 | class Config(object): 10 | def __init__(self): 11 | pass 12 | 13 | 14 | class BaseEnvConfig(object): 15 | env = Config() 16 | env.time_limit = 30 17 | env.time_step = 0.25 18 | env.val_size = 100 19 | env.test_size = 500 20 | env.train_size = np.iinfo(np.uint32).max - 2000 21 | env.randomize_attributes = False 22 | env.robot_sensor_range = 5 23 | 24 | reward = Config() 25 | reward.success_reward = 1 26 | reward.collision_penalty = -0.25 27 | reward.discomfort_dist = 0.2 28 | reward.discomfort_penalty_factor = 0.5 29 | 30 | sim = Config() 31 | sim.train_val_scenario = 'circle_crossing' 32 | sim.test_scenario = 'circle_crossing' 33 | sim.square_width = 20 34 | sim.circle_radius = 4 35 | sim.human_num = 5 36 | sim.nonstop_human = False 37 | sim.centralized_planning = True 38 | 39 | humans = Config() 40 | humans.visible = True 41 | humans.policy = 'orca' 42 | humans.radius = 0.3 43 | humans.v_pref = 1 44 | humans.sensor = 'coordinates' 45 | 46 | robot = Config() 47 | robot.visible = False 48 | robot.policy = 'none' 49 | robot.radius = 0.3 50 | robot.v_pref = 1 51 | robot.sensor = 'coordinates' 52 | 53 | def __init__(self, debug=False): 54 | if debug: 55 | self.env.val_size = 1 56 | self.env.test_size = 1 57 | 58 | 59 | class BasePolicyConfig(object): 60 | rl = Config() 61 | rl.gamma = 0.9 62 | 63 | om = Config() 64 | om.cell_num = 4 65 | om.cell_size = 1 66 | om.om_channel_size = 3 67 | 68 | action_space = Config() 69 | action_space.kinematics = 'holonomic' 70 | action_space.speed_samples = 5 71 | action_space.rotation_samples = 16 72 | action_space.sampling = 'exponential' 73 | action_space.query_env = False 74 | action_space.rotation_constraint = np.pi / 3 75 | 76 | cadrl = Config() 77 | cadrl.mlp_dims = [150, 100, 100, 1] 78 | cadrl.multiagent_training = False 79 | 80 | lstm_rl = Config() 81 | lstm_rl.global_state_dim = 50 82 | lstm_rl.mlp1_dims = [150, 100, 100, 50] 83 | lstm_rl.mlp2_dims = [150, 100, 100, 1] 84 | lstm_rl.multiagent_training = True 85 | lstm_rl.with_om = False 86 | lstm_rl.with_interaction_module = True 87 | 88 | srl = Config() 89 | srl.mlp1_dims = [150, 100, 100, 50] 90 | srl.mlp2_dims = [150, 100, 100, 1] 91 | srl.multiagent_training = True 92 | srl.with_om = True 93 | 94 | sarl = Config() 95 | sarl.mlp1_dims = [150, 100] 96 | sarl.mlp2_dims = [100, 50] 97 | sarl.attention_dims = [100, 100, 1] 98 | sarl.mlp3_dims = [150, 100, 100, 1] 99 | sarl.multiagent_training = True 100 | sarl.with_om = True 101 | sarl.with_global_state = True 102 | 103 | gcn = Config() 104 | gcn.multiagent_training = True 105 | gcn.num_layer = 2 106 | gcn.X_dim = 32 107 | gcn.wr_dims = [64, gcn.X_dim] 108 | gcn.wh_dims = [64, gcn.X_dim] 109 | gcn.final_state_dim = gcn.X_dim 110 | gcn.gcn2_w1_dim = gcn.X_dim 111 | gcn.planning_dims = [150, 100, 100, 1] 112 | gcn.similarity_function = 'embedded_gaussian' 113 | gcn.layerwise_graph = True 114 | gcn.skip_connection = False 115 | 116 | gnn = Config() 117 | gnn.multiagent_training = True 118 | gnn.node_dim = 32 119 | gnn.wr_dims = [64, gnn.node_dim] 120 | gnn.wh_dims = [64, gnn.node_dim] 121 | gnn.edge_dim = 32 122 | gnn.planning_dims = [150, 100, 100, 1] 123 | 124 | def __init__(self, debug=False): 125 | pass 126 | 127 | 128 | class BaseTrainConfig(object): 129 | trainer = Config() 130 | trainer.batch_size = 100 131 | trainer.optimizer = 'Adam' 132 | 133 | imitation_learning = Config() 134 | imitation_learning.il_episodes = 2000 135 | imitation_learning.il_policy = 'orca' 136 | imitation_learning.il_epochs = 50 137 | imitation_learning.il_learning_rate = 0.001 138 | imitation_learning.safety_space = 0.15 139 | 140 | train = Config() 141 | train.rl_train_epochs = 1 142 | train.rl_learning_rate = 0.001 143 | # number of batches to train at the end of training episode il_episodes 144 | train.train_batches = 100 145 | # training episodes in outer loop 146 | train.train_episodes = 10000 147 | # number of episodes sampled in one training episode 148 | train.sample_episodes = 1 149 | train.target_update_interval = 1000 150 | train.evaluation_interval = 1000 151 | # the memory pool can roughly store 2K episodes, total size = episodes * 50 152 | train.capacity = 100000 153 | train.epsilon_start = 0.5 154 | train.epsilon_end = 0.1 155 | train.epsilon_decay = 4000 156 | train.checkpoint_interval = 1000 157 | 158 | train.train_with_pretend_batch = False 159 | 160 | def __init__(self, debug=False): 161 | if debug: 162 | self.imitation_learning.il_episodes = 10 163 | self.imitation_learning.il_epochs = 5 164 | self.train.train_episodes = 1 165 | self.train.checkpoint_interval = self.train.train_episodes 166 | self.train.evaluation_interval = 1 167 | self.train.target_update_interval = 1 168 | -------------------------------------------------------------------------------- /crowd_nav/configs/icra_benchmark/mp_detach.py: -------------------------------------------------------------------------------- 1 | from crowd_nav.configs.icra_benchmark.config import BaseEnvConfig, BasePolicyConfig, BaseTrainConfig, Config 2 | 3 | 4 | class EnvConfig(BaseEnvConfig): 5 | def __init__(self, debug=False): 6 | super(EnvConfig, self).__init__(debug) 7 | 8 | 9 | class PolicyConfig(BasePolicyConfig): 10 | def __init__(self, debug=False): 11 | super(PolicyConfig, self).__init__(debug) 12 | self.name = 'model_predictive_rl' 13 | 14 | # gcn 15 | self.gcn.num_layer = 2 16 | self.gcn.X_dim = 32 17 | self.gcn.similarity_function = 'embedded_gaussian' 18 | self.gcn.layerwise_graph = False 19 | self.gcn.skip_connection = True 20 | 21 | self.model_predictive_rl = Config() 22 | self.model_predictive_rl.linear_state_predictor = False 23 | self.model_predictive_rl.planning_depth = 1 24 | self.model_predictive_rl.planning_width = 1 25 | self.model_predictive_rl.do_action_clip = False 26 | self.model_predictive_rl.motion_predictor_dims = [64, 5] 27 | self.model_predictive_rl.value_network_dims = [32, 100, 100, 1] 28 | self.model_predictive_rl.share_graph_model = True 29 | 30 | 31 | class TrainConfig(BaseTrainConfig): 32 | def __init__(self, debug=False): 33 | super(TrainConfig, self).__init__(debug) 34 | 35 | self.train.freeze_state_predictor = False 36 | self.train.detach_state_predictor = True 37 | self.train.reduce_sp_update_frequency = False 38 | -------------------------------------------------------------------------------- /crowd_nav/configs/icra_benchmark/mp_linear.py: -------------------------------------------------------------------------------- 1 | from crowd_nav.configs.icra_benchmark.config import BaseEnvConfig, BasePolicyConfig, BaseTrainConfig, Config 2 | 3 | 4 | class EnvConfig(BaseEnvConfig): 5 | def __init__(self, debug=False): 6 | super(EnvConfig, self).__init__(debug) 7 | 8 | 9 | class PolicyConfig(BasePolicyConfig): 10 | def __init__(self, debug=False): 11 | super(PolicyConfig, self).__init__(debug) 12 | self.name = 'model_predictive_rl' 13 | 14 | # gcn 15 | self.gcn.num_layer = 2 16 | self.gcn.X_dim = 32 17 | self.gcn.similarity_function = 'embedded_gaussian' 18 | self.gcn.layerwise_graph = False 19 | self.gcn.skip_connection = True 20 | 21 | self.model_predictive_rl = Config() 22 | self.model_predictive_rl.linear_state_predictor = True 23 | self.model_predictive_rl.planning_depth = 1 24 | self.model_predictive_rl.planning_width = 1 25 | self.model_predictive_rl.do_action_clip = False 26 | self.model_predictive_rl.motion_predictor_dims = [64, 5] 27 | self.model_predictive_rl.value_network_dims = [32, 100, 100, 1] 28 | self.model_predictive_rl.share_graph_model = False 29 | 30 | 31 | class TrainConfig(BaseTrainConfig): 32 | def __init__(self, debug=False): 33 | super(TrainConfig, self).__init__(debug) 34 | 35 | self.train.freeze_state_predictor = False 36 | self.train.detach_state_predictor = False 37 | self.train.reduce_sp_update_frequency = False 38 | -------------------------------------------------------------------------------- /crowd_nav/configs/icra_benchmark/mp_separate.py: -------------------------------------------------------------------------------- 1 | from crowd_nav.configs.icra_benchmark.config import BaseEnvConfig, BasePolicyConfig, BaseTrainConfig, Config 2 | 3 | 4 | class EnvConfig(BaseEnvConfig): 5 | def __init__(self, debug=False): 6 | super(EnvConfig, self).__init__(debug) 7 | 8 | 9 | class PolicyConfig(BasePolicyConfig): 10 | def __init__(self, debug=False): 11 | super(PolicyConfig, self).__init__(debug) 12 | self.name = 'model_predictive_rl' 13 | 14 | # gcn 15 | self.gcn.num_layer = 2 16 | self.gcn.X_dim = 32 17 | self.gcn.similarity_function = 'embedded_gaussian' 18 | self.gcn.layerwise_graph = False 19 | self.gcn.skip_connection = True 20 | 21 | self.model_predictive_rl = Config() 22 | self.model_predictive_rl.linear_state_predictor = False 23 | self.model_predictive_rl.planning_depth = 1 24 | self.model_predictive_rl.planning_width = 1 25 | self.model_predictive_rl.do_action_clip = False 26 | self.model_predictive_rl.motion_predictor_dims = [64, 5] 27 | self.model_predictive_rl.value_network_dims = [32, 100, 100, 1] 28 | self.model_predictive_rl.share_graph_model = False 29 | 30 | 31 | class TrainConfig(BaseTrainConfig): 32 | def __init__(self, debug=False): 33 | super(TrainConfig, self).__init__(debug) 34 | 35 | self.train.freeze_state_predictor = False 36 | self.train.detach_state_predictor = False 37 | self.train.reduce_sp_update_frequency = False 38 | -------------------------------------------------------------------------------- /crowd_nav/configs/icra_benchmark/mp_separate_dp.py: -------------------------------------------------------------------------------- 1 | from crowd_nav.configs.icra_benchmark.config import BaseEnvConfig, BasePolicyConfig, BaseTrainConfig, Config 2 | 3 | 4 | class EnvConfig(BaseEnvConfig): 5 | def __init__(self, debug=False): 6 | super(EnvConfig, self).__init__(debug) 7 | 8 | 9 | class PolicyConfig(BasePolicyConfig): 10 | def __init__(self, debug=False): 11 | super(PolicyConfig, self).__init__(debug) 12 | self.name = 'model_predictive_rl' 13 | 14 | # gcn 15 | self.gcn.num_layer = 2 16 | self.gcn.X_dim = 32 17 | self.gcn.similarity_function = 'embedded_gaussian' 18 | self.gcn.layerwise_graph = False 19 | self.gcn.skip_connection = True 20 | 21 | self.model_predictive_rl = Config() 22 | self.model_predictive_rl.linear_state_predictor = False 23 | self.model_predictive_rl.planning_depth = 2 24 | self.model_predictive_rl.planning_width = 2 25 | self.model_predictive_rl.do_action_clip = True 26 | self.model_predictive_rl.motion_predictor_dims = [64, 5] 27 | self.model_predictive_rl.value_network_dims = [32, 100, 100, 1] 28 | self.model_predictive_rl.share_graph_model = False 29 | 30 | 31 | class TrainConfig(BaseTrainConfig): 32 | def __init__(self, debug=False): 33 | super(TrainConfig, self).__init__(debug) 34 | 35 | self.train.freeze_state_predictor = False 36 | self.train.detach_state_predictor = False 37 | self.train.reduce_sp_update_frequency = False 38 | -------------------------------------------------------------------------------- /crowd_nav/configs/icra_benchmark/rgl.py: -------------------------------------------------------------------------------- 1 | from crowd_nav.configs.icra_benchmark.config import BaseEnvConfig, BasePolicyConfig, BaseTrainConfig, Config 2 | 3 | 4 | class EnvConfig(BaseEnvConfig): 5 | def __init__(self, debug=False): 6 | super(EnvConfig, self).__init__(debug) 7 | 8 | 9 | class PolicyConfig(BasePolicyConfig): 10 | def __init__(self, debug=False): 11 | super(PolicyConfig, self).__init__(debug) 12 | self.name = 'gcn' 13 | 14 | # gcn 15 | self.gcn.num_layer = 2 16 | self.gcn.X_dim = 32 17 | self.gcn.similarity_function = 'embedded_gaussian' 18 | self.gcn.layerwise_graph = False 19 | self.gcn.skip_connection = True 20 | 21 | 22 | class TrainConfig(BaseTrainConfig): 23 | def __init__(self, debug=False): 24 | super(TrainConfig, self).__init__(debug) 25 | -------------------------------------------------------------------------------- /crowd_nav/configs/icra_benchmark/sarl.py: -------------------------------------------------------------------------------- 1 | from crowd_nav.configs.icra_benchmark.config import BaseEnvConfig, BasePolicyConfig, BaseTrainConfig, Config 2 | 3 | 4 | class EnvConfig(BaseEnvConfig): 5 | def __init__(self, debug=False): 6 | super(EnvConfig, self).__init__(debug) 7 | 8 | 9 | class PolicyConfig(BasePolicyConfig): 10 | def __init__(self, debug=False): 11 | super(PolicyConfig, self).__init__(debug) 12 | self.name = 'sarl' 13 | 14 | 15 | class TrainConfig(BaseTrainConfig): 16 | def __init__(self, debug=False): 17 | super(TrainConfig, self).__init__(debug) 18 | -------------------------------------------------------------------------------- /crowd_nav/policy/cadrl.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | import itertools 5 | import logging 6 | from crowd_sim.envs.policy.policy import Policy 7 | from crowd_sim.envs.utils.action import ActionRot, ActionXY 8 | from crowd_sim.envs.utils.state import ObservableState, FullState 9 | 10 | 11 | def mlp(input_dim, mlp_dims, last_relu=False): 12 | layers = [] 13 | mlp_dims = [input_dim] + mlp_dims 14 | for i in range(len(mlp_dims) - 1): 15 | layers.append(nn.Linear(mlp_dims[i], mlp_dims[i + 1])) 16 | if i != len(mlp_dims) - 2 or last_relu: 17 | layers.append(nn.ReLU()) 18 | net = nn.Sequential(*layers) 19 | return net 20 | 21 | 22 | class ValueNetwork(nn.Module): 23 | def __init__(self, input_dim, mlp_dims): 24 | super().__init__() 25 | self.value_network = mlp(input_dim, mlp_dims) 26 | 27 | def forward(self, state_input): 28 | # input size: (batch_size, # of humans, state_length) 29 | if isinstance(state_input, tuple): 30 | state = state_input[0] 31 | else: 32 | state = state_input 33 | 34 | value = self.value_network(state.squeeze(dim=1)) 35 | return value 36 | 37 | 38 | class CADRL(Policy): 39 | def __init__(self): 40 | super().__init__() 41 | self.name = 'CADRL' 42 | self.trainable = True 43 | self.multiagent_training = None 44 | self.kinematics = None 45 | self.epsilon = None 46 | self.gamma = None 47 | self.sampling = None 48 | self.speed_samples = None 49 | self.rotation_samples = None 50 | self.query_env = None 51 | self.action_space = None 52 | self.rotation_constraint = None 53 | self.speeds = None 54 | self.rotations = None 55 | self.action_values = None 56 | self.with_om = None 57 | self.cell_num = None 58 | self.cell_size = None 59 | self.om_channel_size = None 60 | self.self_state_dim = 6 61 | self.human_state_dim = 7 62 | self.joint_state_dim = self.self_state_dim + self.human_state_dim 63 | 64 | def configure(self, config): 65 | self.set_common_parameters(config) 66 | self.model = ValueNetwork(self.joint_state_dim, config.cadrl.mlp_dims) 67 | self.multiagent_training = config.cadrl.multiagent_training 68 | logging.info('Policy: CADRL without occupancy map') 69 | 70 | def set_common_parameters(self, config): 71 | self.gamma = config.rl.gamma 72 | self.kinematics = config.action_space.kinematics 73 | self.sampling = config.action_space.sampling 74 | self.speed_samples = config.action_space.speed_samples 75 | self.rotation_samples = config.action_space.rotation_samples 76 | self.query_env = config.action_space.query_env 77 | self.rotation_constraint = config.action_space.rotation_constraint 78 | self.cell_num = config.om.cell_num 79 | self.cell_size = config.om.cell_size 80 | self.om_channel_size = config.om.om_channel_size 81 | 82 | logging.info('Query environment: {}'.format(self.query_env)) 83 | 84 | def set_device(self, device): 85 | self.device = device 86 | self.model.to(device) 87 | 88 | def set_epsilon(self, epsilon): 89 | self.epsilon = epsilon 90 | 91 | def build_action_space(self, v_pref): 92 | """ 93 | Action space consists of 25 uniformly sampled actions in permitted range and 25 randomly sampled actions. 94 | """ 95 | holonomic = True if self.kinematics == 'holonomic' else False 96 | speeds = [(np.exp((i + 1) / self.speed_samples) - 1) / (np.e - 1) * v_pref for i in range(self.speed_samples)] 97 | if holonomic: 98 | rotations = np.linspace(0, 2 * np.pi, self.rotation_samples, endpoint=False) 99 | else: 100 | rotations = np.linspace(-self.rotation_constraint, self.rotation_constraint, self.rotation_samples) 101 | 102 | action_space = [ActionXY(0, 0) if holonomic else ActionRot(0, 0)] 103 | for rotation, speed in itertools.product(rotations, speeds): 104 | if holonomic: 105 | action_space.append(ActionXY(speed * np.cos(rotation), speed * np.sin(rotation))) 106 | else: 107 | action_space.append(ActionRot(speed, rotation)) 108 | 109 | self.speeds = speeds 110 | self.rotations = rotations 111 | self.action_space = action_space 112 | 113 | def propagate(self, state, action): 114 | if isinstance(state, ObservableState): 115 | # propagate state of humans 116 | next_px = state.px + action.vx * self.time_step 117 | next_py = state.py + action.vy * self.time_step 118 | next_state = ObservableState(next_px, next_py, action.vx, action.vy, state.radius) 119 | elif isinstance(state, FullState): 120 | # propagate state of current agent 121 | # perform action without rotation 122 | if self.kinematics == 'holonomic': 123 | next_px = state.px + action.vx * self.time_step 124 | next_py = state.py + action.vy * self.time_step 125 | next_state = FullState(next_px, next_py, action.vx, action.vy, state.radius, 126 | state.gx, state.gy, state.v_pref, state.theta) 127 | else: 128 | next_theta = state.theta + action.r 129 | next_vx = action.v * np.cos(next_theta) 130 | next_vy = action.v * np.sin(next_theta) 131 | next_px = state.px + next_vx * self.time_step 132 | next_py = state.py + next_vy * self.time_step 133 | next_state = FullState(next_px, next_py, next_vx, next_vy, state.radius, state.gx, state.gy, 134 | state.v_pref, next_theta) 135 | else: 136 | raise ValueError('Type error') 137 | 138 | return next_state 139 | 140 | def predict(self, state): 141 | """ 142 | Input state is the joint state of robot concatenated by the observable state of other agents 143 | 144 | To predict the best action, agent samples actions and propagates one step to see how good the next state is 145 | thus the reward function is needed 146 | 147 | """ 148 | if self.phase is None or self.device is None: 149 | raise AttributeError('Phase, device attributes have to be set!') 150 | if self.phase == 'train' and self.epsilon is None: 151 | raise AttributeError('Epsilon attribute has to be set in training phase') 152 | 153 | if self.reach_destination(state): 154 | return ActionXY(0, 0) if self.kinematics == 'holonomic' else ActionRot(0, 0) 155 | if self.action_space is None: 156 | self.build_action_space(state.self_state.v_pref) 157 | if not state.human_states: 158 | assert self.phase != 'train' 159 | return self.select_greedy_action(state.self_state) 160 | 161 | probability = np.random.random() 162 | if self.phase == 'train' and probability < self.epsilon: 163 | max_action = self.action_space[np.random.choice(len(self.action_space))] 164 | else: 165 | self.action_values = list() 166 | max_min_value = float('-inf') 167 | max_action = None 168 | for action in self.action_space: 169 | next_self_state = self.propagate(state.self_state, action) 170 | if self.query_env: 171 | next_human_states, reward, done, info = self.env.onestep_lookahead(action) 172 | else: 173 | next_human_states = [self.propagate(human_state, ActionXY(human_state.vx, human_state.vy)) 174 | for human_state in state.human_states] 175 | reward = self.compute_reward(next_self_state, next_human_states) 176 | batch_next_states = torch.cat([torch.Tensor([next_self_state + next_human_state]).to(self.device) 177 | for next_human_state in next_human_states], dim=0) 178 | 179 | # VALUE UPDATE 180 | outputs = self.model(self.rotate(batch_next_states)) 181 | min_output, min_index = torch.min(outputs, 0) 182 | min_value = reward + pow(self.gamma, self.time_step * state.self_state.v_pref) * min_output.data.item() 183 | self.action_values.append(min_value) 184 | if min_value > max_min_value: 185 | max_min_value = min_value 186 | max_action = action 187 | 188 | if self.phase == 'train': 189 | self.last_state = self.transform(state) 190 | 191 | return max_action 192 | 193 | def select_greedy_action(self, self_state): 194 | # find the greedy action given kinematic constraints and return the closest action in the action space 195 | direction = np.arctan2(self_state.gy - self_state.py, self_state.gx - self_state.px) 196 | distance = np.linalg.norm((self_state.gy - self_state.py, self_state.gx - self_state.px)) 197 | if self.kinematics == 'holonomic': 198 | speed = min(distance / self.time_step, self_state.v_pref) 199 | vx = np.cos(direction) * speed 200 | vy = np.sin(direction) * speed 201 | 202 | min_diff = float('inf') 203 | closest_action = None 204 | for action in self.action_space: 205 | diff = np.linalg.norm(np.array(action) - np.array((vx, vy))) 206 | if diff < min_diff: 207 | min_diff = diff 208 | closest_action = action 209 | else: 210 | rotation = direction - self_state.theta 211 | # if goal is not in the field of view, always rotate first 212 | if rotation < self.rotations[0]: 213 | closest_action = ActionRot(self.speeds[0], self.rotations[0]) 214 | elif rotation > self.rotations[-1]: 215 | closest_action = ActionRot(self.speeds[0], self.rotations[-1]) 216 | else: 217 | speed = min(distance / self.time_step, self_state.v_pref) 218 | 219 | min_diff = float('inf') 220 | closest_action = None 221 | for action in self.action_space: 222 | diff = np.linalg.norm(np.array((np.cos(action.r) * action.v, np.sin(action.r) * action.v)) - 223 | np.array((np.cos(rotation) * speed), np.sin(rotation) * action.v)) 224 | if diff < min_diff: 225 | min_diff = diff 226 | closest_action = action 227 | 228 | return closest_action 229 | 230 | def transform(self, state): 231 | """ 232 | Take the state passed from agent and transform it to tensor for batch training 233 | 234 | :param state: 235 | :return: tensor of shape (len(state), ) 236 | """ 237 | state = torch.Tensor([state.self_state + state.human_states[0]]).to(self.device) 238 | state = self.rotate(state) 239 | return state 240 | 241 | def rotate(self, state): 242 | """ 243 | Transform the coordinate to agent-centric. 244 | Input state tensor is of size (batch_size, state_length) 245 | 246 | """ 247 | # 'px', 'py', 'vx', 'vy', 'radius', 'gx', 'gy', 'v_pref', 'theta', 'px1', 'py1', 'vx1', 'vy1', 'radius1' 248 | # 0 1 2 3 4 5 6 7 8 9 10 11 12 13 249 | batch = state.shape[0] 250 | dx = (state[:, 5] - state[:, 0]).reshape((batch, -1)) 251 | dy = (state[:, 6] - state[:, 1]).reshape((batch, -1)) 252 | rot = torch.atan2(state[:, 6] - state[:, 1], state[:, 5] - state[:, 0]) 253 | 254 | dg = torch.norm(torch.cat([dx, dy], dim=1), 2, dim=1, keepdim=True) 255 | v_pref = state[:, 7].reshape((batch, -1)) 256 | vx = (state[:, 2] * torch.cos(rot) + state[:, 3] * torch.sin(rot)).reshape((batch, -1)) 257 | vy = (state[:, 3] * torch.cos(rot) - state[:, 2] * torch.sin(rot)).reshape((batch, -1)) 258 | 259 | radius = state[:, 4].reshape((batch, -1)) 260 | if self.kinematics == 'unicycle': 261 | theta = (state[:, 8] - rot).reshape((batch, -1)) 262 | else: 263 | theta = torch.zeros_like(v_pref) 264 | 265 | vx1 = (state[:, 11] * torch.cos(rot) + state[:, 12] * torch.sin(rot)).reshape((batch, -1)) 266 | vy1 = (state[:, 12] * torch.cos(rot) - state[:, 11] * torch.sin(rot)).reshape((batch, -1)) 267 | px1 = (state[:, 9] - state[:, 0]) * torch.cos(rot) + (state[:, 10] - state[:, 1]) * torch.sin(rot) 268 | px1 = px1.reshape((batch, -1)) 269 | py1 = (state[:, 10] - state[:, 1]) * torch.cos(rot) - (state[:, 9] - state[:, 0]) * torch.sin(rot) 270 | py1 = py1.reshape((batch, -1)) 271 | radius1 = state[:, 13].reshape((batch, -1)) 272 | radius_sum = radius + radius1 273 | da = torch.norm(torch.cat([(state[:, 0] - state[:, 9]).reshape((batch, -1)), (state[:, 1] - state[:, 10]). 274 | reshape((batch, -1))], dim=1), 2, dim=1, keepdim=True) 275 | new_state = torch.cat([dg, v_pref, theta, radius, vx, vy, px1, py1, vx1, vy1, radius1, da, radius_sum], dim=1) 276 | return new_state -------------------------------------------------------------------------------- /crowd_nav/policy/gcn.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import itertools 3 | import torch 4 | import torch.nn as nn 5 | from torch.nn.functional import softmax, relu 6 | from torch.nn import Parameter 7 | from crowd_nav.policy.cadrl import mlp 8 | from crowd_nav.policy.multi_human_rl import MultiHumanRL 9 | 10 | 11 | class ValueNetwork(nn.Module): 12 | def __init__(self, input_dim, self_state_dim, num_layer, X_dim, wr_dims, wh_dims, final_state_dim, 13 | gcn2_w1_dim, planning_dims, similarity_function, layerwise_graph, skip_connection): 14 | super().__init__() 15 | # design choice 16 | 17 | # 'gaussian', 'embedded_gaussian', 'cosine', 'cosine_softmax', 'concatenation' 18 | self.similarity_function = similarity_function 19 | logging.info('self.similarity_func: {}'.format(self.similarity_function)) 20 | human_state_dim = input_dim - self_state_dim 21 | self.self_state_dim = self_state_dim 22 | self.human_state_dim = human_state_dim 23 | self.num_layer = num_layer 24 | self.X_dim = X_dim 25 | self.layerwise_graph = layerwise_graph 26 | self.skip_connection = skip_connection 27 | 28 | self.w_r = mlp(self_state_dim, wr_dims, last_relu=True) 29 | self.w_h = mlp(human_state_dim, wh_dims, last_relu=True) 30 | 31 | if self.similarity_function == 'embedded_gaussian': 32 | self.w_a = Parameter(torch.randn(self.X_dim, self.X_dim)) 33 | elif self.similarity_function == 'concatenation': 34 | self.w_a = mlp(2 * X_dim, [2 * X_dim, 1], last_relu=True) 35 | 36 | if num_layer == 1: 37 | self.w1 = Parameter(torch.randn(self.X_dim, final_state_dim)) 38 | elif num_layer == 2: 39 | self.w1 = Parameter(torch.randn(self.X_dim, gcn2_w1_dim)) 40 | self.w2 = Parameter(torch.randn(gcn2_w1_dim, final_state_dim)) 41 | else: 42 | raise NotImplementedError 43 | 44 | self.value_net = mlp(final_state_dim, planning_dims) 45 | 46 | # for visualization 47 | self.A = None 48 | 49 | def compute_similarity_matrix(self, X): 50 | if self.similarity_function == 'embedded_gaussian': 51 | A = torch.matmul(torch.matmul(X, self.w_a), X.permute(0, 2, 1)) 52 | normalized_A = softmax(A, dim=2) 53 | elif self.similarity_function == 'gaussian': 54 | A = torch.matmul(X, X.permute(0, 2, 1)) 55 | normalized_A = softmax(A, dim=2) 56 | elif self.similarity_function == 'cosine': 57 | A = torch.matmul(X, X.permute(0, 2, 1)) 58 | magnitudes = torch.norm(A, dim=2, keepdim=True) 59 | norm_matrix = torch.matmul(magnitudes, magnitudes.permute(0, 2, 1)) 60 | normalized_A = torch.div(A, norm_matrix) 61 | elif self.similarity_function == 'cosine_softmax': 62 | A = torch.matmul(X, X.permute(0, 2, 1)) 63 | magnitudes = torch.norm(A, dim=2, keepdim=True) 64 | norm_matrix = torch.matmul(magnitudes, magnitudes.permute(0, 2, 1)) 65 | normalized_A = softmax(torch.div(A, norm_matrix), dim=2) 66 | elif self.similarity_function == 'concatenation': 67 | indices = [pair for pair in itertools.product(list(range(X.size(1))), repeat=2)] 68 | selected_features = torch.index_select(X, dim=1, index=torch.LongTensor(indices).reshape(-1)) 69 | pairwise_features = selected_features.reshape((-1, X.size(1) * X.size(1), X.size(2) * 2)) 70 | A = self.w_a(pairwise_features).reshape(-1, X.size(1), X.size(1)) 71 | normalized_A = A 72 | elif self.similarity_function == 'squared': 73 | A = torch.matmul(X, X.permute(0, 2, 1)) 74 | squared_A = A * A 75 | normalized_A = squared_A / torch.sum(squared_A, dim=2, keepdim=True) 76 | elif self.similarity_function == 'equal_attention': 77 | normalized_A = (torch.ones(X.size(1), X.size(1)) / X.size(1)).expand(X.size(0), X.size(1), X.size(1)) 78 | elif self.similarity_function == 'diagonal': 79 | normalized_A = (torch.eye(X.size(1), X.size(1))).expand(X.size(0), X.size(1), X.size(1)) 80 | else: 81 | raise NotImplementedError 82 | 83 | return normalized_A 84 | 85 | def forward(self, state_input): 86 | if isinstance(state_input, tuple): 87 | state, lengths = state_input 88 | else: 89 | state = state_input 90 | # lengths = torch.IntTensor([state.size()[1]]) 91 | 92 | self_state = state[:, 0, :self.self_state_dim] 93 | human_states = state[:, :, self.self_state_dim:] 94 | 95 | # compute feature matrix X 96 | self_state_embedings = self.w_r(self_state) 97 | human_state_embedings = self.w_h(human_states) 98 | X = torch.cat([self_state_embedings.unsqueeze(1), human_state_embedings], dim=1) 99 | 100 | # compute matrix A 101 | normalized_A = self.compute_similarity_matrix(X) 102 | self.A = normalized_A[0, :, :].data.cpu().numpy() 103 | 104 | # graph convolution 105 | if self.num_layer == 0: 106 | feat = X[:, 0, :] 107 | elif self.num_layer == 1: 108 | h1 = relu(torch.matmul(torch.matmul(normalized_A, X), self.w1)) 109 | feat = h1[:, 0, :] 110 | else: 111 | # compute h1 and h2 112 | if not self.skip_connection: 113 | h1 = relu(torch.matmul(torch.matmul(normalized_A, X), self.w1)) 114 | else: 115 | h1 = relu(torch.matmul(torch.matmul(normalized_A, X), self.w1)) + X 116 | if self.layerwise_graph: 117 | normalized_A2 = self.compute_similarity_matrix(h1) 118 | else: 119 | normalized_A2 = normalized_A 120 | if not self.skip_connection: 121 | h2 = relu(torch.matmul(torch.matmul(normalized_A2, h1), self.w2)) 122 | else: 123 | h2 = relu(torch.matmul(torch.matmul(normalized_A2, h1), self.w2)) + h1 124 | feat = h2[:, 0, :] 125 | 126 | # do planning using only the final layer feature of the agent 127 | value = self.value_net(feat) 128 | return value 129 | 130 | 131 | class GCN(MultiHumanRL): 132 | def __init__(self): 133 | super().__init__() 134 | self.name = 'GCN' 135 | 136 | def configure(self, config): 137 | self.multiagent_training = config.gcn.multiagent_training 138 | num_layer = config.gcn.num_layer 139 | X_dim = config.gcn.X_dim 140 | wr_dims = config.gcn.wr_dims 141 | wh_dims = config.gcn.wh_dims 142 | final_state_dim = config.gcn.final_state_dim 143 | gcn2_w1_dim = config.gcn.gcn2_w1_dim 144 | planning_dims = config.gcn.planning_dims 145 | similarity_function = config.gcn.similarity_function 146 | layerwise_graph = config.gcn.layerwise_graph 147 | skip_connection = config.gcn.skip_connection 148 | self.set_common_parameters(config) 149 | self.model = ValueNetwork(self.input_dim(), self.self_state_dim, num_layer, X_dim, wr_dims, wh_dims, 150 | final_state_dim, gcn2_w1_dim, planning_dims, similarity_function, layerwise_graph, 151 | skip_connection) 152 | logging.info('self.model:{}'.format(self.model)) 153 | logging.info('GCN layers: {}'.format(num_layer)) 154 | logging.info('Policy: {}'.format(self.name)) 155 | 156 | def get_matrix_A(self): 157 | return self.model.A 158 | -------------------------------------------------------------------------------- /crowd_nav/policy/graph_model.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import itertools 3 | import torch 4 | import torch.nn as nn 5 | from torch.nn.functional import softmax, relu 6 | from torch.nn import Parameter 7 | from crowd_nav.policy.helpers import mlp 8 | 9 | 10 | class RGL(nn.Module): 11 | def __init__(self, config, robot_state_dim, human_state_dim): 12 | """ The current code might not be compatible with models trained with previous version 13 | """ 14 | super().__init__() 15 | self.multiagent_training = config.gcn.multiagent_training 16 | num_layer = config.gcn.num_layer 17 | X_dim = config.gcn.X_dim 18 | wr_dims = config.gcn.wr_dims 19 | wh_dims = config.gcn.wh_dims 20 | final_state_dim = config.gcn.final_state_dim 21 | similarity_function = config.gcn.similarity_function 22 | layerwise_graph = config.gcn.layerwise_graph 23 | skip_connection = config.gcn.skip_connection 24 | 25 | # design choice 26 | 27 | # 'gaussian', 'embedded_gaussian', 'cosine', 'cosine_softmax', 'concatenation' 28 | self.similarity_function = similarity_function 29 | self.robot_state_dim = robot_state_dim 30 | self.human_state_dim = human_state_dim 31 | self.num_layer = num_layer 32 | self.X_dim = X_dim 33 | self.layerwise_graph = layerwise_graph 34 | self.skip_connection = skip_connection 35 | 36 | logging.info('Similarity_func: {}'.format(self.similarity_function)) 37 | logging.info('Layerwise_graph: {}'.format(self.layerwise_graph)) 38 | logging.info('Skip_connection: {}'.format(self.skip_connection)) 39 | logging.info('Number of layers: {}'.format(self.num_layer)) 40 | 41 | self.w_r = mlp(robot_state_dim, wr_dims, last_relu=True) 42 | self.w_h = mlp(human_state_dim, wh_dims, last_relu=True) 43 | 44 | if self.similarity_function == 'embedded_gaussian': 45 | self.w_a = Parameter(torch.randn(self.X_dim, self.X_dim)) 46 | elif self.similarity_function == 'concatenation': 47 | self.w_a = mlp(2 * X_dim, [2 * X_dim, 1], last_relu=True) 48 | 49 | # TODO: try other dim size 50 | embedding_dim = self.X_dim 51 | self.Ws = torch.nn.ParameterList() 52 | for i in range(self.num_layer): 53 | if i == 0: 54 | self.Ws.append(Parameter(torch.randn(self.X_dim, embedding_dim))) 55 | elif i == self.num_layer - 1: 56 | self.Ws.append(Parameter(torch.randn(embedding_dim, final_state_dim))) 57 | else: 58 | self.Ws.append(Parameter(torch.randn(embedding_dim, embedding_dim))) 59 | 60 | # for visualization 61 | self.A = None 62 | 63 | def compute_similarity_matrix(self, X): 64 | if self.similarity_function == 'embedded_gaussian': 65 | A = torch.matmul(torch.matmul(X, self.w_a), X.permute(0, 2, 1)) 66 | normalized_A = softmax(A, dim=2) 67 | elif self.similarity_function == 'gaussian': 68 | A = torch.matmul(X, X.permute(0, 2, 1)) 69 | normalized_A = softmax(A, dim=2) 70 | elif self.similarity_function == 'cosine': 71 | A = torch.matmul(X, X.permute(0, 2, 1)) 72 | magnitudes = torch.norm(A, dim=2, keepdim=True) 73 | norm_matrix = torch.matmul(magnitudes, magnitudes.permute(0, 2, 1)) 74 | normalized_A = torch.div(A, norm_matrix) 75 | elif self.similarity_function == 'cosine_softmax': 76 | A = torch.matmul(X, X.permute(0, 2, 1)) 77 | magnitudes = torch.norm(A, dim=2, keepdim=True) 78 | norm_matrix = torch.matmul(magnitudes, magnitudes.permute(0, 2, 1)) 79 | normalized_A = softmax(torch.div(A, norm_matrix), dim=2) 80 | elif self.similarity_function == 'concatenation': 81 | indices = [pair for pair in itertools.product(list(range(X.size(1))), repeat=2)] 82 | selected_features = torch.index_select(X, dim=1, index=torch.LongTensor(indices).reshape(-1)) 83 | pairwise_features = selected_features.reshape((-1, X.size(1) * X.size(1), X.size(2) * 2)) 84 | A = self.w_a(pairwise_features).reshape(-1, X.size(1), X.size(1)) 85 | normalized_A = A 86 | elif self.similarity_function == 'squared': 87 | A = torch.matmul(X, X.permute(0, 2, 1)) 88 | squared_A = A * A 89 | normalized_A = squared_A / torch.sum(squared_A, dim=2, keepdim=True) 90 | elif self.similarity_function == 'equal_attention': 91 | normalized_A = (torch.ones(X.size(1), X.size(1)) / X.size(1)).expand(X.size(0), X.size(1), X.size(1)) 92 | elif self.similarity_function == 'diagonal': 93 | normalized_A = (torch.eye(X.size(1), X.size(1))).expand(X.size(0), X.size(1), X.size(1)) 94 | else: 95 | raise NotImplementedError 96 | 97 | return normalized_A 98 | 99 | def forward(self, state): 100 | """ 101 | Embed current state tensor pair (robot_state, human_states) into a latent space 102 | Each tensor is of shape (batch_size, # of agent, features) 103 | :param state: 104 | :return: 105 | """ 106 | robot_state, human_states = state 107 | 108 | # compute feature matrix X 109 | robot_state_embedings = self.w_r(robot_state) 110 | human_state_embedings = self.w_h(human_states) 111 | X = torch.cat([robot_state_embedings, human_state_embedings], dim=1) 112 | 113 | # compute matrix A 114 | if not self.layerwise_graph: 115 | normalized_A = self.compute_similarity_matrix(X) 116 | self.A = normalized_A[0, :, :].data.cpu().numpy() 117 | 118 | next_H = H = X 119 | for i in range(self.num_layer): 120 | if self.layerwise_graph: 121 | A = self.compute_similarity_matrix(H) 122 | next_H = relu(torch.matmul(torch.matmul(A, H), self.Ws[i])) 123 | else: 124 | next_H = relu(torch.matmul(torch.matmul(normalized_A, H), self.Ws[i])) 125 | 126 | if self.skip_connection: 127 | next_H += H 128 | H = next_H 129 | 130 | return next_H 131 | 132 | 133 | # class RGL(nn.Module): 134 | # def __init__(self, config, robot_state_dim, human_state_dim): 135 | # super().__init__() 136 | # self.multiagent_training = config.gcn.multiagent_training 137 | # num_layer = config.gcn.num_layer 138 | # X_dim = config.gcn.X_dim 139 | # wr_dims = config.gcn.wr_dims 140 | # wh_dims = config.gcn.wh_dims 141 | # final_state_dim = config.gcn.final_state_dim 142 | # gcn2_w1_dim = config.gcn.gcn2_w1_dim 143 | # similarity_function = config.gcn.similarity_function 144 | # layerwise_graph = config.gcn.layerwise_graph 145 | # skip_connection = config.gcn.skip_connection 146 | # 147 | # # design choice 148 | # 149 | # # 'gaussian', 'embedded_gaussian', 'cosine', 'cosine_softmax', 'concatenation' 150 | # self.similarity_function = similarity_function 151 | # logging.info('self.similarity_func: {}'.format(self.similarity_function)) 152 | # self.robot_state_dim = robot_state_dim 153 | # self.human_state_dim = human_state_dim 154 | # self.num_layer = num_layer 155 | # self.X_dim = X_dim 156 | # self.layerwise_graph = layerwise_graph 157 | # self.skip_connection = skip_connection 158 | # 159 | # self.w_r = mlp(robot_state_dim, wr_dims, last_relu=True) 160 | # self.w_h = mlp(human_state_dim, wh_dims, last_relu=True) 161 | # 162 | # if self.similarity_function == 'embedded_gaussian': 163 | # self.w_a = Parameter(torch.randn(self.X_dim, self.X_dim)) 164 | # elif self.similarity_function == 'concatenation': 165 | # self.w_a = mlp(2 * X_dim, [2 * X_dim, 1], last_relu=True) 166 | # 167 | # if num_layer == 1: 168 | # self.w1 = Parameter(torch.randn(self.X_dim, final_state_dim)) 169 | # elif num_layer == 2: 170 | # self.w1 = Parameter(torch.randn(self.X_dim, gcn2_w1_dim)) 171 | # self.w2 = Parameter(torch.randn(gcn2_w1_dim, final_state_dim)) 172 | # else: 173 | # raise NotImplementedError 174 | # 175 | # # for visualization 176 | # self.A = None 177 | # 178 | # def compute_similarity_matrix(self, X): 179 | # if self.similarity_function == 'embedded_gaussian': 180 | # A = torch.matmul(torch.matmul(X, self.w_a), X.permute(0, 2, 1)) 181 | # normalized_A = softmax(A, dim=2) 182 | # elif self.similarity_function == 'gaussian': 183 | # A = torch.matmul(X, X.permute(0, 2, 1)) 184 | # normalized_A = softmax(A, dim=2) 185 | # elif self.similarity_function == 'cosine': 186 | # A = torch.matmul(X, X.permute(0, 2, 1)) 187 | # magnitudes = torch.norm(A, dim=2, keepdim=True) 188 | # norm_matrix = torch.matmul(magnitudes, magnitudes.permute(0, 2, 1)) 189 | # normalized_A = torch.div(A, norm_matrix) 190 | # elif self.similarity_function == 'cosine_softmax': 191 | # A = torch.matmul(X, X.permute(0, 2, 1)) 192 | # magnitudes = torch.norm(A, dim=2, keepdim=True) 193 | # norm_matrix = torch.matmul(magnitudes, magnitudes.permute(0, 2, 1)) 194 | # normalized_A = softmax(torch.div(A, norm_matrix), dim=2) 195 | # elif self.similarity_function == 'concatenation': 196 | # indices = [pair for pair in itertools.product(list(range(X.size(1))), repeat=2)] 197 | # selected_features = torch.index_select(X, dim=1, index=torch.LongTensor(indices).reshape(-1)) 198 | # pairwise_features = selected_features.reshape((-1, X.size(1) * X.size(1), X.size(2) * 2)) 199 | # A = self.w_a(pairwise_features).reshape(-1, X.size(1), X.size(1)) 200 | # normalized_A = A 201 | # elif self.similarity_function == 'squared': 202 | # A = torch.matmul(X, X.permute(0, 2, 1)) 203 | # squared_A = A * A 204 | # normalized_A = squared_A / torch.sum(squared_A, dim=2, keepdim=True) 205 | # elif self.similarity_function == 'equal_attention': 206 | # normalized_A = (torch.ones(X.size(1), X.size(1)) / X.size(1)).expand(X.size(0), X.size(1), X.size(1)) 207 | # elif self.similarity_function == 'diagonal': 208 | # normalized_A = (torch.eye(X.size(1), X.size(1))).expand(X.size(0), X.size(1), X.size(1)) 209 | # else: 210 | # raise NotImplementedError 211 | # 212 | # return normalized_A 213 | # 214 | # def forward(self, state): 215 | # """ 216 | # Embed current state tensor pair (robot_state, human_states) into a latent space 217 | # Each tensor is of shape (batch_size, # of agent, features) 218 | # :param state: 219 | # :return: 220 | # """ 221 | # robot_state, human_states = state 222 | # 223 | # # compute feature matrix X 224 | # robot_state_embedings = self.w_r(robot_state) 225 | # human_state_embedings = self.w_h(human_states) 226 | # X = torch.cat([robot_state_embedings, human_state_embedings], dim=1) 227 | # 228 | # # compute matrix A 229 | # normalized_A = self.compute_similarity_matrix(X) 230 | # self.A = normalized_A[0, :, :].data.cpu().numpy() 231 | # 232 | # # graph convolution 233 | # if self.num_layer == 0: 234 | # state_embedding = X 235 | # elif self.num_layer == 1: 236 | # h1 = relu(torch.matmul(torch.matmul(normalized_A, X), self.w1)) 237 | # state_embedding = h1 238 | # else: 239 | # # compute h1 and h2 240 | # if not self.skip_connection: 241 | # h1 = relu(torch.matmul(torch.matmul(normalized_A, X), self.w1)) 242 | # else: 243 | # h1 = relu(torch.matmul(torch.matmul(normalized_A, X), self.w1)) + X 244 | # if self.layerwise_graph: 245 | # normalized_A2 = self.compute_similarity_matrix(h1) 246 | # else: 247 | # normalized_A2 = normalized_A 248 | # if not self.skip_connection: 249 | # h2 = relu(torch.matmul(torch.matmul(normalized_A2, h1), self.w2)) 250 | # else: 251 | # h2 = relu(torch.matmul(torch.matmul(normalized_A2, h1), self.w2)) + h1 252 | # state_embedding = h2 253 | # 254 | # return state_embedding 255 | -------------------------------------------------------------------------------- /crowd_nav/policy/helpers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | def mlp(input_dim, mlp_dims, last_relu=False): 6 | layers = [] 7 | mlp_dims = [input_dim] + mlp_dims 8 | for i in range(len(mlp_dims) - 1): 9 | layers.append(nn.Linear(mlp_dims[i], mlp_dims[i + 1])) 10 | if i != len(mlp_dims) - 2 or last_relu: 11 | layers.append(nn.ReLU()) 12 | net = nn.Sequential(*layers) 13 | return net 14 | -------------------------------------------------------------------------------- /crowd_nav/policy/lstm_rl.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | import logging 5 | from crowd_nav.policy.cadrl import mlp 6 | from crowd_nav.policy.multi_human_rl import MultiHumanRL 7 | 8 | 9 | class ValueNetwork1(nn.Module): 10 | def __init__(self, input_dim, self_state_dim, mlp_dims, lstm_hidden_dim): 11 | super().__init__() 12 | self.self_state_dim = self_state_dim 13 | self.lstm_hidden_dim = lstm_hidden_dim 14 | self.mlp = mlp(self_state_dim + lstm_hidden_dim, mlp_dims) 15 | self.lstm = nn.LSTM(input_dim, lstm_hidden_dim, batch_first=True) 16 | 17 | def forward(self, state_input): 18 | """ 19 | First transform the world coordinates to self-centric coordinates and then do forward computation 20 | 21 | """ 22 | if isinstance(state_input, tuple): 23 | state, lengths = state_input 24 | else: 25 | state = state_input 26 | lengths = torch.IntTensor([state.size()[1]]) 27 | self_state = state[:, 0, :self.self_state_dim] 28 | packed_sequence = torch.nn.utils.rnn.pack_padded_sequence(state, lengths, batch_first=True) 29 | _, (hn, cn) = self.lstm(packed_sequence) 30 | hn = hn.squeeze(0) 31 | joint_state = torch.cat([self_state, hn], dim=1) 32 | value = self.mlp(joint_state) 33 | return value 34 | 35 | 36 | class ValueNetwork2(nn.Module): 37 | def __init__(self, input_dim, self_state_dim, mlp1_dims, mlp_dims, lstm_hidden_dim): 38 | super().__init__() 39 | self.self_state_dim = self_state_dim 40 | self.lstm_hidden_dim = lstm_hidden_dim 41 | self.mlp1 = mlp(input_dim, mlp1_dims) 42 | self.lstm = nn.LSTM(mlp1_dims[-1], lstm_hidden_dim, batch_first=True) 43 | self.mlp = mlp(self_state_dim + lstm_hidden_dim, mlp_dims) 44 | 45 | def forward(self, state_input): 46 | """ 47 | First transform the world coordinates to self-centric coordinates and then do forward computation 48 | 49 | :return: 50 | """ 51 | if isinstance(state_input, tuple): 52 | state, lengths = state_input 53 | else: 54 | state = state_input 55 | lengths = torch.IntTensor([state.size()[1]]) 56 | 57 | size = state.shape 58 | self_state = state[:, 0, :self.self_state_dim] 59 | 60 | state = torch.reshape(state, (-1, size[2])) 61 | mlp1_output = self.mlp1(state) 62 | mlp1_output = torch.reshape(mlp1_output, (size[0], size[1], -1)) 63 | packed_mlp1_output = torch.nn.utils.rnn.pack_padded_sequence(mlp1_output, lengths, batch_first=True) 64 | 65 | output, (hn, cn) = self.lstm(packed_mlp1_output) 66 | hn = hn.squeeze(0) 67 | joint_state = torch.cat([self_state, hn], dim=1) 68 | value = self.mlp(joint_state) 69 | return value 70 | 71 | 72 | class LstmRL(MultiHumanRL): 73 | def __init__(self): 74 | super().__init__() 75 | self.name = 'LSTM-RL' 76 | self.with_interaction_module = None 77 | 78 | def configure(self, config): 79 | self.set_common_parameters(config) 80 | self.with_om = config.lstm_rl.with_om 81 | self.multiagent_training = config.lstm_rl.multiagent_training 82 | 83 | mlp_dims = config.lstm_rl.mlp2_dims 84 | global_state_dim = config.lstm_rl.global_state_dim 85 | with_interaction_module = config.lstm_rl.with_interaction_module 86 | if with_interaction_module: 87 | mlp1_dims = config.lstm_rl.mlp1_dims 88 | self.model = ValueNetwork2(self.input_dim(), self.self_state_dim, mlp1_dims, mlp_dims, global_state_dim) 89 | else: 90 | self.model = ValueNetwork1(self.input_dim(), self.self_state_dim, mlp_dims, global_state_dim) 91 | logging.info('Policy: {}LSTM-RL {} pairwise interaction module'.format( 92 | 'OM-' if self.with_om else '', 'w/' if with_interaction_module else 'w/o')) 93 | 94 | def predict(self, state): 95 | """ 96 | Input state is the joint state of robot concatenated with the observable state of other agents 97 | 98 | To predict the best action, agent samples actions and propagates one step to see how good the next state is 99 | thus the reward function is needed 100 | 101 | """ 102 | 103 | def dist(human): 104 | # sort human order by decreasing distance to the robot 105 | return np.linalg.norm(np.array(human.position) - np.array(state.self_state.position)) 106 | 107 | state.human_states = sorted(state.human_states, key=dist, reverse=True) 108 | return super().predict(state) 109 | 110 | -------------------------------------------------------------------------------- /crowd_nav/policy/model_predictive_rl.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import torch 3 | import numpy as np 4 | from numpy.linalg import norm 5 | import itertools 6 | from crowd_sim.envs.policy.policy import Policy 7 | from crowd_sim.envs.utils.action import ActionRot, ActionXY 8 | from crowd_sim.envs.utils.state import tensor_to_joint_state 9 | from crowd_sim.envs.utils.utils import point_to_segment_dist 10 | from crowd_nav.policy.state_predictor import StatePredictor, LinearStatePredictor 11 | from crowd_nav.policy.graph_model import RGL 12 | from crowd_nav.policy.value_estimator import ValueEstimator 13 | 14 | 15 | class ModelPredictiveRL(Policy): 16 | def __init__(self): 17 | super().__init__() 18 | self.name = 'ModelPredictiveRL' 19 | self.trainable = True 20 | self.multiagent_training = True 21 | self.kinematics = None 22 | self.epsilon = None 23 | self.gamma = None 24 | self.sampling = None 25 | self.speed_samples = None 26 | self.rotation_samples = None 27 | self.action_space = None 28 | self.rotation_constraint = None 29 | self.speeds = None 30 | self.rotations = None 31 | self.action_values = None 32 | self.robot_state_dim = 9 33 | self.human_state_dim = 5 34 | self.v_pref = 1 35 | self.share_graph_model = None 36 | self.value_estimator = None 37 | self.linear_state_predictor = None 38 | self.state_predictor = None 39 | self.planning_depth = None 40 | self.planning_width = None 41 | self.do_action_clip = None 42 | self.sparse_search = None 43 | self.sparse_speed_samples = 2 44 | self.sparse_rotation_samples = 8 45 | self.action_group_index = [] 46 | self.traj = None 47 | 48 | def configure(self, config): 49 | self.set_common_parameters(config) 50 | self.planning_depth = config.model_predictive_rl.planning_depth 51 | self.do_action_clip = config.model_predictive_rl.do_action_clip 52 | if hasattr(config.model_predictive_rl, 'sparse_search'): 53 | self.sparse_search = config.model_predictive_rl.sparse_search 54 | self.planning_width = config.model_predictive_rl.planning_width 55 | self.share_graph_model = config.model_predictive_rl.share_graph_model 56 | self.linear_state_predictor = config.model_predictive_rl.linear_state_predictor 57 | 58 | if self.linear_state_predictor: 59 | self.state_predictor = LinearStatePredictor(config, self.time_step) 60 | graph_model = RGL(config, self.robot_state_dim, self.human_state_dim) 61 | self.value_estimator = ValueEstimator(config, graph_model) 62 | self.model = [graph_model, self.value_estimator.value_network] 63 | else: 64 | if self.share_graph_model: 65 | graph_model = RGL(config, self.robot_state_dim, self.human_state_dim) 66 | self.value_estimator = ValueEstimator(config, graph_model) 67 | self.state_predictor = StatePredictor(config, graph_model, self.time_step) 68 | self.model = [graph_model, self.value_estimator.value_network, self.state_predictor.human_motion_predictor] 69 | else: 70 | graph_model1 = RGL(config, self.robot_state_dim, self.human_state_dim) 71 | self.value_estimator = ValueEstimator(config, graph_model1) 72 | graph_model2 = RGL(config, self.robot_state_dim, self.human_state_dim) 73 | self.state_predictor = StatePredictor(config, graph_model2, self.time_step) 74 | self.model = [graph_model1, graph_model2, self.value_estimator.value_network, 75 | self.state_predictor.human_motion_predictor] 76 | 77 | logging.info('Planning depth: {}'.format(self.planning_depth)) 78 | logging.info('Planning width: {}'.format(self.planning_width)) 79 | logging.info('Sparse search: {}'.format(self.sparse_search)) 80 | 81 | if self.planning_depth > 1 and not self.do_action_clip: 82 | logging.warning('Performing d-step planning without action space clipping!') 83 | 84 | def set_common_parameters(self, config): 85 | self.gamma = config.rl.gamma 86 | self.kinematics = config.action_space.kinematics 87 | self.sampling = config.action_space.sampling 88 | self.speed_samples = config.action_space.speed_samples 89 | self.rotation_samples = config.action_space.rotation_samples 90 | self.rotation_constraint = config.action_space.rotation_constraint 91 | 92 | def set_device(self, device): 93 | self.device = device 94 | for model in self.model: 95 | model.to(device) 96 | 97 | def set_epsilon(self, epsilon): 98 | self.epsilon = epsilon 99 | 100 | def set_time_step(self, time_step): 101 | self.time_step = time_step 102 | self.state_predictor.time_step = time_step 103 | 104 | def get_normalized_gamma(self): 105 | return pow(self.gamma, self.time_step * self.v_pref) 106 | 107 | def get_model(self): 108 | return self.value_estimator 109 | 110 | def get_state_dict(self): 111 | if self.state_predictor.trainable: 112 | if self.share_graph_model: 113 | return { 114 | 'graph_model': self.value_estimator.graph_model.state_dict(), 115 | 'value_network': self.value_estimator.value_network.state_dict(), 116 | 'motion_predictor': self.state_predictor.human_motion_predictor.state_dict() 117 | } 118 | else: 119 | return { 120 | 'graph_model1': self.value_estimator.graph_model.state_dict(), 121 | 'graph_model2': self.state_predictor.graph_model.state_dict(), 122 | 'value_network': self.value_estimator.value_network.state_dict(), 123 | 'motion_predictor': self.state_predictor.human_motion_predictor.state_dict() 124 | } 125 | else: 126 | return { 127 | 'graph_model': self.value_estimator.graph_model.state_dict(), 128 | 'value_network': self.value_estimator.value_network.state_dict() 129 | } 130 | 131 | def get_traj(self): 132 | return self.traj 133 | 134 | def load_state_dict(self, state_dict): 135 | if self.state_predictor.trainable: 136 | if self.share_graph_model: 137 | self.value_estimator.graph_model.load_state_dict(state_dict['graph_model']) 138 | else: 139 | self.value_estimator.graph_model.load_state_dict(state_dict['graph_model1']) 140 | self.state_predictor.graph_model.load_state_dict(state_dict['graph_model2']) 141 | 142 | self.value_estimator.value_network.load_state_dict(state_dict['value_network']) 143 | self.state_predictor.human_motion_predictor.load_state_dict(state_dict['motion_predictor']) 144 | else: 145 | self.value_estimator.graph_model.load_state_dict(state_dict['graph_model']) 146 | self.value_estimator.value_network.load_state_dict(state_dict['value_network']) 147 | 148 | def save_model(self, file): 149 | torch.save(self.get_state_dict(), file) 150 | 151 | def load_model(self, file): 152 | checkpoint = torch.load(file) 153 | self.load_state_dict(checkpoint) 154 | 155 | def build_action_space(self, v_pref): 156 | """ 157 | Action space consists of 25 uniformly sampled actions in permitted range and 25 randomly sampled actions. 158 | """ 159 | holonomic = True if self.kinematics == 'holonomic' else False 160 | speeds = [(np.exp((i + 1) / self.speed_samples) - 1) / (np.e - 1) * v_pref for i in range(self.speed_samples)] 161 | if holonomic: 162 | rotations = np.linspace(0, 2 * np.pi, self.rotation_samples, endpoint=False) 163 | else: 164 | rotations = np.linspace(-self.rotation_constraint, self.rotation_constraint, self.rotation_samples) 165 | 166 | action_space = [ActionXY(0, 0) if holonomic else ActionRot(0, 0)] 167 | for j, speed in enumerate(speeds): 168 | if j == 0: 169 | # index for action (0, 0) 170 | self.action_group_index.append(0) 171 | # only two groups in speeds 172 | if j < 3: 173 | speed_index = 0 174 | else: 175 | speed_index = 1 176 | 177 | for i, rotation in enumerate(rotations): 178 | rotation_index = i // 2 179 | 180 | action_index = speed_index * self.sparse_rotation_samples + rotation_index 181 | self.action_group_index.append(action_index) 182 | 183 | if holonomic: 184 | action_space.append(ActionXY(speed * np.cos(rotation), speed * np.sin(rotation))) 185 | else: 186 | action_space.append(ActionRot(speed, rotation)) 187 | 188 | self.speeds = speeds 189 | self.rotations = rotations 190 | self.action_space = action_space 191 | 192 | def predict(self, state): 193 | """ 194 | A base class for all methods that takes pairwise joint state as input to value network. 195 | The input to the value network is always of shape (batch_size, # humans, rotated joint state length) 196 | 197 | """ 198 | if self.phase is None or self.device is None: 199 | raise AttributeError('Phase, device attributes have to be set!') 200 | if self.phase == 'train' and self.epsilon is None: 201 | raise AttributeError('Epsilon attribute has to be set in training phase') 202 | 203 | if self.reach_destination(state): 204 | return ActionXY(0, 0) if self.kinematics == 'holonomic' else ActionRot(0, 0) 205 | if self.action_space is None: 206 | self.build_action_space(state.robot_state.v_pref) 207 | 208 | probability = np.random.random() 209 | if self.phase == 'train' and probability < self.epsilon: 210 | max_action = self.action_space[np.random.choice(len(self.action_space))] 211 | else: 212 | max_action = None 213 | max_value = float('-inf') 214 | max_traj = None 215 | 216 | if self.do_action_clip: 217 | state_tensor = state.to_tensor(add_batch_size=True, device=self.device) 218 | action_space_clipped = self.action_clip(state_tensor, self.action_space, self.planning_width) 219 | else: 220 | action_space_clipped = self.action_space 221 | 222 | for action in action_space_clipped: 223 | state_tensor = state.to_tensor(add_batch_size=True, device=self.device) 224 | next_state = self.state_predictor(state_tensor, action) 225 | max_next_return, max_next_traj = self.V_planning(next_state, self.planning_depth, self.planning_width) 226 | reward_est = self.estimate_reward(state, action) 227 | value = reward_est + self.get_normalized_gamma() * max_next_return 228 | if value > max_value: 229 | max_value = value 230 | max_action = action 231 | max_traj = [(state_tensor, action, reward_est)] + max_next_traj 232 | if max_action is None: 233 | raise ValueError('Value network is not well trained.') 234 | 235 | if self.phase == 'train': 236 | self.last_state = self.transform(state) 237 | else: 238 | self.traj = max_traj 239 | 240 | return max_action 241 | 242 | def action_clip(self, state, action_space, width, depth=1): 243 | values = [] 244 | 245 | for action in action_space: 246 | next_state_est = self.state_predictor(state, action) 247 | next_return, _ = self.V_planning(next_state_est, depth, width) 248 | reward_est = self.estimate_reward(state, action) 249 | value = reward_est + self.get_normalized_gamma() * next_return 250 | values.append(value) 251 | 252 | if self.sparse_search: 253 | # self.sparse_speed_samples = 2 254 | # search in a sparse grained action space 255 | added_groups = set() 256 | max_indices = np.argsort(np.array(values))[::-1] 257 | clipped_action_space = [] 258 | for index in max_indices: 259 | if self.action_group_index[index] not in added_groups: 260 | clipped_action_space.append(action_space[index]) 261 | added_groups.add(self.action_group_index[index]) 262 | if len(clipped_action_space) == width: 263 | break 264 | else: 265 | max_indexes = np.argpartition(np.array(values), -width)[-width:] 266 | clipped_action_space = [action_space[i] for i in max_indexes] 267 | 268 | # print(clipped_action_space) 269 | return clipped_action_space 270 | 271 | def V_planning(self, state, depth, width): 272 | """ Plans n steps into future. Computes the value for the current state as well as the trajectories 273 | defined as a list of (state, action, reward) triples 274 | 275 | """ 276 | 277 | current_state_value = self.value_estimator(state) 278 | if depth == 1: 279 | return current_state_value, [(state, None, None)] 280 | 281 | if self.do_action_clip: 282 | action_space_clipped = self.action_clip(state, self.action_space, width) 283 | else: 284 | action_space_clipped = self.action_space 285 | 286 | returns = [] 287 | trajs = [] 288 | 289 | for action in action_space_clipped: 290 | next_state_est = self.state_predictor(state, action) 291 | reward_est = self.estimate_reward(state, action) 292 | next_value, next_traj = self.V_planning(next_state_est, depth - 1, self.planning_width) 293 | return_value = current_state_value / depth + (depth - 1) / depth * (self.get_normalized_gamma() * next_value + reward_est) 294 | 295 | returns.append(return_value) 296 | trajs.append([(state, action, reward_est)] + next_traj) 297 | 298 | max_index = np.argmax(returns) 299 | max_return = returns[max_index] 300 | max_traj = trajs[max_index] 301 | 302 | return max_return, max_traj 303 | 304 | def estimate_reward(self, state, action): 305 | """ If the time step is small enough, it's okay to model agent as linear movement during this period 306 | 307 | """ 308 | # collision detection 309 | if isinstance(state, list) or isinstance(state, tuple): 310 | state = tensor_to_joint_state(state) 311 | human_states = state.human_states 312 | robot_state = state.robot_state 313 | 314 | dmin = float('inf') 315 | collision = False 316 | for i, human in enumerate(human_states): 317 | px = human.px - robot_state.px 318 | py = human.py - robot_state.py 319 | if self.kinematics == 'holonomic': 320 | vx = human.vx - action.vx 321 | vy = human.vy - action.vy 322 | else: 323 | vx = human.vx - action.v * np.cos(action.r + robot_state.theta) 324 | vy = human.vy - action.v * np.sin(action.r + robot_state.theta) 325 | ex = px + vx * self.time_step 326 | ey = py + vy * self.time_step 327 | # closest distance between boundaries of two agents 328 | closest_dist = point_to_segment_dist(px, py, ex, ey, 0, 0) - human.radius - robot_state.radius 329 | if closest_dist < 0: 330 | collision = True 331 | break 332 | elif closest_dist < dmin: 333 | dmin = closest_dist 334 | 335 | # check if reaching the goal 336 | if self.kinematics == 'holonomic': 337 | px = robot_state.px + action.vx * self.time_step 338 | py = robot_state.py + action.vy * self.time_step 339 | else: 340 | theta = robot_state.theta + action.r 341 | px = robot_state.px + np.cos(theta) * action.v * self.time_step 342 | py = robot_state.py + np.sin(theta) * action.v * self.time_step 343 | 344 | end_position = np.array((px, py)) 345 | reaching_goal = norm(end_position - np.array([robot_state.gx, robot_state.gy])) < robot_state.radius 346 | 347 | if collision: 348 | reward = -0.25 349 | elif reaching_goal: 350 | reward = 1 351 | elif dmin < 0.2: 352 | # adjust the reward based on FPS 353 | reward = (dmin - 0.2) * 0.5 * self.time_step 354 | else: 355 | reward = 0 356 | 357 | return reward 358 | 359 | def transform(self, state): 360 | """ 361 | Take the JointState to tensors 362 | 363 | :param state: 364 | :return: tensor of shape (# of agent, len(state)) 365 | """ 366 | robot_state_tensor = torch.Tensor([state.robot_state.to_tuple()]).to(self.device) 367 | human_states_tensor = torch.Tensor([human_state.to_tuple() for human_state in state.human_states]). \ 368 | to(self.device) 369 | 370 | return robot_state_tensor, human_states_tensor 371 | -------------------------------------------------------------------------------- /crowd_nav/policy/multi_human_rl.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import logging 3 | import numpy as np 4 | from crowd_sim.envs.utils.action import ActionRot, ActionXY 5 | from crowd_nav.policy.cadrl import CADRL 6 | 7 | 8 | class MultiHumanRL(CADRL): 9 | def __init__(self): 10 | super().__init__() 11 | 12 | def predict(self, state): 13 | """ 14 | A base class for all methods that takes pairwise joint state as input to value network. 15 | The input to the value network is always of shape (batch_size, # humans, rotated joint state length) 16 | 17 | """ 18 | if self.phase is None or self.device is None: 19 | raise AttributeError('Phase, device attributes have to be set!') 20 | if self.phase == 'train' and self.epsilon is None: 21 | raise AttributeError('Epsilon attribute has to be set in training phase') 22 | 23 | if self.reach_destination(state): 24 | return ActionXY(0, 0) if self.kinematics == 'holonomic' else ActionRot(0, 0) 25 | if self.action_space is None: 26 | self.build_action_space(state.robot_state.v_pref) 27 | if not state.human_states: 28 | assert self.phase != 'train' 29 | if hasattr(self, 'attention_weights'): 30 | self.attention_weights = list() 31 | return self.select_greedy_action(state.robot_state) 32 | 33 | occupancy_maps = None 34 | probability = np.random.random() 35 | if self.phase == 'train' and probability < self.epsilon: 36 | max_action = self.action_space[np.random.choice(len(self.action_space))] 37 | else: 38 | self.action_values = list() 39 | max_value = float('-inf') 40 | max_action = None 41 | for action in self.action_space: 42 | next_robot_state = self.propagate(state.robot_state, action) 43 | if self.query_env: 44 | next_human_states, reward, done, info = self.env.onestep_lookahead(action) 45 | else: 46 | next_human_states = [self.propagate(human_state, ActionXY(human_state.vx, human_state.vy)) 47 | for human_state in state.human_states] 48 | reward = self.compute_reward(next_robot_state, next_human_states) 49 | batch_next_states = torch.cat([torch.Tensor([next_robot_state + next_human_state]).to(self.device) 50 | for next_human_state in next_human_states], dim=0) 51 | rotated_batch_input = self.rotate(batch_next_states).unsqueeze(0) 52 | if self.with_om: 53 | if occupancy_maps is None: 54 | occupancy_maps = self.build_occupancy_maps(next_human_states).unsqueeze(0) 55 | rotated_batch_input = torch.cat([rotated_batch_input, occupancy_maps], dim=2) 56 | # VALUE UPDATE 57 | next_state_value = self.model(rotated_batch_input).data.item() 58 | value = reward + pow(self.gamma, self.time_step * state.robot_state.v_pref) * next_state_value 59 | self.action_values.append(value) 60 | if value > max_value: 61 | max_value = value 62 | max_action = action 63 | if hasattr(self, 'attention_weights'): 64 | self.attention_weights = self.model.attention_weights 65 | if max_action is None: 66 | raise ValueError('Value network is not well trained. ') 67 | 68 | if self.phase == 'train': 69 | self.last_state = self.transform(state) 70 | 71 | return max_action 72 | 73 | def compute_reward(self, nav, humans): 74 | # collision detection 75 | dmin = float('inf') 76 | collision = False 77 | for i, human in enumerate(humans): 78 | dist = np.linalg.norm((nav.px - human.px, nav.py - human.py)) - nav.radius - human.radius 79 | if dist < 0: 80 | collision = True 81 | break 82 | if dist < dmin: 83 | dmin = dist 84 | 85 | # check if reaching the goal 86 | reaching_goal = np.linalg.norm((nav.px - nav.gx, nav.py - nav.gy)) < nav.radius 87 | if collision: 88 | reward = -0.25 89 | elif reaching_goal: 90 | reward = 1 91 | elif dmin < 0.2: 92 | reward = (dmin - 0.2) * 0.5 * self.time_step 93 | else: 94 | reward = 0 95 | 96 | return reward 97 | 98 | def transform(self, state): 99 | """ 100 | Take the state passed from agent and transform it to the input of value network 101 | 102 | :param state: 103 | :return: tensor of shape (# of humans, len(state)) 104 | """ 105 | state_tensor = torch.cat([torch.Tensor([state.robot_state + human_state]).to(self.device) 106 | for human_state in state.human_states], dim=0) 107 | rotated_state_tensor = self.rotate(state_tensor) 108 | if self.with_om: 109 | occupancy_maps = self.build_occupancy_maps(state.human_states) 110 | rotated_state_tensor = torch.cat([rotated_state_tensor, occupancy_maps], dim=1) 111 | 112 | return rotated_state_tensor 113 | 114 | def input_dim(self): 115 | return self.joint_state_dim + (self.cell_num ** 2 * self.om_channel_size if self.with_om else 0) 116 | 117 | def build_occupancy_maps(self, human_states): 118 | """ 119 | 120 | :param human_states: 121 | :return: tensor of shape (# human - 1, self.cell_num ** 2) 122 | """ 123 | occupancy_maps = [] 124 | for human in human_states: 125 | other_humans = np.concatenate([np.array([(other_human.px, other_human.py, other_human.vx, other_human.vy)]) 126 | for other_human in human_states if other_human != human], axis=0) 127 | other_px = other_humans[:, 0] - human.px 128 | other_py = other_humans[:, 1] - human.py 129 | # new x-axis is in the direction of human's velocity 130 | human_velocity_angle = np.arctan2(human.vy, human.vx) 131 | other_human_orientation = np.arctan2(other_py, other_px) 132 | rotation = other_human_orientation - human_velocity_angle 133 | distance = np.linalg.norm([other_px, other_py], axis=0) 134 | other_px = np.cos(rotation) * distance 135 | other_py = np.sin(rotation) * distance 136 | 137 | # compute indices of humans in the grid 138 | other_x_index = np.floor(other_px / self.cell_size + self.cell_num / 2) 139 | other_y_index = np.floor(other_py / self.cell_size + self.cell_num / 2) 140 | other_x_index[other_x_index < 0] = float('-inf') 141 | other_x_index[other_x_index >= self.cell_num] = float('-inf') 142 | other_y_index[other_y_index < 0] = float('-inf') 143 | other_y_index[other_y_index >= self.cell_num] = float('-inf') 144 | grid_indices = self.cell_num * other_y_index + other_x_index 145 | occupancy_map = np.isin(range(self.cell_num ** 2), grid_indices) 146 | if self.om_channel_size == 1: 147 | occupancy_maps.append([occupancy_map.astype(int)]) 148 | else: 149 | # calculate relative velocity for other agents 150 | other_human_velocity_angles = np.arctan2(other_humans[:, 3], other_humans[:, 2]) 151 | rotation = other_human_velocity_angles - human_velocity_angle 152 | speed = np.linalg.norm(other_humans[:, 2:4], axis=1) 153 | other_vx = np.cos(rotation) * speed 154 | other_vy = np.sin(rotation) * speed 155 | dm = [list() for _ in range(self.cell_num ** 2 * self.om_channel_size)] 156 | for i, index in np.ndenumerate(grid_indices): 157 | if index in range(self.cell_num ** 2): 158 | if self.om_channel_size == 2: 159 | dm[2 * int(index)].append(other_vx[i]) 160 | dm[2 * int(index) + 1].append(other_vy[i]) 161 | elif self.om_channel_size == 3: 162 | dm[2 * int(index)].append(1) 163 | dm[2 * int(index) + 1].append(other_vx[i]) 164 | dm[2 * int(index) + 2].append(other_vy[i]) 165 | else: 166 | raise NotImplementedError 167 | for i, cell in enumerate(dm): 168 | dm[i] = sum(dm[i]) / len(dm[i]) if len(dm[i]) != 0 else 0 169 | occupancy_maps.append([dm]) 170 | 171 | return torch.from_numpy(np.concatenate(occupancy_maps, axis=0)).float() 172 | 173 | -------------------------------------------------------------------------------- /crowd_nav/policy/policy_factory.py: -------------------------------------------------------------------------------- 1 | from crowd_sim.envs.policy.policy_factory import policy_factory 2 | from crowd_nav.policy.cadrl import CADRL 3 | from crowd_nav.policy.lstm_rl import LstmRL 4 | from crowd_nav.policy.sarl import SARL 5 | from crowd_nav.policy.gcn import GCN 6 | from crowd_nav.policy.model_predictive_rl import ModelPredictiveRL 7 | 8 | 9 | policy_factory['cadrl'] = CADRL 10 | policy_factory['lstm_rl'] = LstmRL 11 | policy_factory['sarl'] = SARL 12 | policy_factory['gcn'] = GCN 13 | policy_factory['model_predictive_rl'] = ModelPredictiveRL 14 | -------------------------------------------------------------------------------- /crowd_nav/policy/sarl.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.utils.rnn as rnn_utils 4 | import logging 5 | from crowd_nav.policy.cadrl import mlp 6 | from crowd_nav.policy.multi_human_rl import MultiHumanRL 7 | 8 | 9 | class ValueNetwork(nn.Module): 10 | def __init__(self, input_dim, self_state_dim, mlp1_dims, mlp2_dims, mlp3_dims, attention_dims, with_global_state, 11 | cell_size, cell_num): 12 | super().__init__() 13 | self.self_state_dim = self_state_dim 14 | self.global_state_dim = mlp1_dims[-1] 15 | self.mlp1 = mlp(input_dim, mlp1_dims, last_relu=True) 16 | self.mlp2 = mlp(mlp1_dims[-1], mlp2_dims) 17 | self.with_global_state = with_global_state 18 | if with_global_state: 19 | self.attention = mlp(mlp1_dims[-1] * 2, attention_dims) 20 | else: 21 | self.attention = mlp(mlp1_dims[-1], attention_dims) 22 | self.cell_size = cell_size 23 | self.cell_num = cell_num 24 | mlp3_input_dim = mlp2_dims[-1] + self.self_state_dim 25 | self.mlp3 = mlp(mlp3_input_dim, mlp3_dims) 26 | self.attention_weights = None 27 | 28 | def forward(self, state_input): 29 | """ 30 | First transform the world coordinates to self-centric coordinates and then do forward computation 31 | 32 | :param state_input: tensor of shape (batch_size, # of humans, length of a rotated state) 33 | :return: 34 | """ 35 | if isinstance(state_input, tuple): 36 | state, lengths = state_input 37 | else: 38 | state = state_input 39 | lengths = torch.IntTensor([state.size()[1]]) 40 | 41 | size = state.shape 42 | self_state = state[:, 0, :self.self_state_dim] 43 | mlp1_output = self.mlp1(state.reshape((-1, size[2]))) 44 | mlp2_output = self.mlp2(mlp1_output) 45 | 46 | if self.with_global_state: 47 | # compute attention scores 48 | global_state = torch.mean(mlp1_output.view(size[0], size[1], -1), 1, keepdim=True) 49 | global_state = global_state.expand((size[0], size[1], self.global_state_dim)).\ 50 | contiguous().view(-1, self.global_state_dim) 51 | attention_input = torch.cat([mlp1_output, global_state], dim=1) 52 | else: 53 | attention_input = mlp1_output 54 | scores = self.attention(attention_input).view(size[0], size[1], 1).squeeze(dim=2) 55 | 56 | # masked softmax 57 | mask = rnn_utils.pad_sequence([torch.ones(length.item()) for length in lengths], batch_first=True) 58 | masked_scores = scores * mask.float() 59 | max_scores = torch.max(masked_scores, dim=1, keepdim=True)[0] 60 | exps = torch.exp(masked_scores - max_scores) 61 | masked_exps = exps * mask.float() 62 | masked_sums = masked_exps.sum(1, keepdim=True) 63 | weights = (masked_exps / masked_sums).unsqueeze(2) 64 | self.attention_weights = weights[0, :, 0].data.cpu().numpy() 65 | 66 | # output feature is a linear combination of input features 67 | features = mlp2_output.view(size[0], size[1], -1) 68 | weighted_feature = torch.sum(torch.mul(weights, features), dim=1) 69 | 70 | # concatenate agent's state with global weighted humans' state 71 | joint_state = torch.cat([self_state, weighted_feature], dim=1) 72 | value = self.mlp3(joint_state) 73 | return value 74 | 75 | 76 | class SARL(MultiHumanRL): 77 | def __init__(self): 78 | super().__init__() 79 | self.name = 'SARL' 80 | self.attention_weights = None 81 | 82 | def configure(self, config): 83 | self.set_common_parameters(config) 84 | self.with_om = config.sarl.with_om 85 | self.multiagent_training = config.sarl.multiagent_training 86 | 87 | mlp1_dims = config.sarl.mlp1_dims 88 | mlp2_dims = config.sarl.mlp2_dims 89 | mlp3_dims = config.sarl.mlp3_dims 90 | attention_dims = config.sarl.attention_dims 91 | with_global_state = config.sarl.with_global_state 92 | self.model = ValueNetwork(self.input_dim(), self.self_state_dim, mlp1_dims, mlp2_dims, mlp3_dims, 93 | attention_dims, with_global_state, self.cell_size, self.cell_num) 94 | if self.with_om: 95 | self.name = 'OM-SARL' 96 | logging.info('Policy: {} {} global state'.format(self.name, 'w/' if with_global_state else 'w/o')) 97 | 98 | def get_attention_weights(self): 99 | return self.attention_weights 100 | -------------------------------------------------------------------------------- /crowd_nav/policy/state_predictor.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | from crowd_nav.policy.helpers import mlp 5 | 6 | 7 | class StatePredictor(nn.Module): 8 | def __init__(self, config, graph_model, time_step): 9 | """ 10 | This function predicts the next state given the current state as input. 11 | It uses a graph model to encode the state into a latent space and predict each human's next state. 12 | """ 13 | super().__init__() 14 | self.trainable = True 15 | self.kinematics = config.action_space.kinematics 16 | self.graph_model = graph_model 17 | self.human_motion_predictor = mlp(config.gcn.X_dim, config.model_predictive_rl.motion_predictor_dims) 18 | self.time_step = time_step 19 | 20 | def forward(self, state, action, detach=False): 21 | """ Predict the next state tensor given current state as input. 22 | 23 | :return: tensor of shape (batch_size, # of agents, feature_size) 24 | """ 25 | assert len(state[0].shape) == 3 26 | assert len(state[1].shape) == 3 27 | 28 | state_embedding = self.graph_model(state) 29 | if detach: 30 | state_embedding = state_embedding.detach() 31 | if action is None: 32 | # for training purpose 33 | next_robot_state = None 34 | else: 35 | next_robot_state = self.compute_next_state(state[0], action) 36 | next_human_states = self.human_motion_predictor(state_embedding)[:, 1:, :] 37 | 38 | next_observation = [next_robot_state, next_human_states] 39 | return next_observation 40 | 41 | def compute_next_state(self, robot_state, action): 42 | # currently it can not perform parallel computation 43 | if robot_state.shape[0] != 1: 44 | raise NotImplementedError 45 | 46 | # px, py, vx, vy, radius, gx, gy, v_pref, theta 47 | next_state = robot_state.clone().squeeze() 48 | if self.kinematics == 'holonomic': 49 | next_state[0] = next_state[0] + action.vx * self.time_step 50 | next_state[1] = next_state[1] + action.vy * self.time_step 51 | next_state[2] = action.vx 52 | next_state[3] = action.vy 53 | else: 54 | next_state[7] = next_state[7] + action.r 55 | next_state[0] = next_state[0] + np.cos(next_state[7]) * action.v * self.time_step 56 | next_state[1] = next_state[1] + np.sin(next_state[7]) * action.v * self.time_step 57 | next_state[2] = np.cos(next_state[7]) * action.v 58 | next_state[3] = np.sin(next_state[7]) * action.v 59 | 60 | return next_state.unsqueeze(0).unsqueeze(0) 61 | 62 | 63 | class LinearStatePredictor(object): 64 | def __init__(self, config, time_step): 65 | """ 66 | This function predicts the next state given the current state as input. 67 | It uses a graph model to encode the state into a latent space and predict each human's next state. 68 | """ 69 | super().__init__() 70 | self.trainable = False 71 | self.kinematics = config.action_space.kinematics 72 | self.time_step = time_step 73 | 74 | def __call__(self, state, action): 75 | """ Predict the next state tensor given current state as input. 76 | 77 | :return: tensor of shape (batch_size, # of agents, feature_size) 78 | """ 79 | assert len(state[0].shape) == 3 80 | assert len(state[1].shape) == 3 81 | 82 | next_robot_state = self.compute_next_state(state[0], action) 83 | next_human_states = self.linear_motion_approximator(state[1]) 84 | 85 | next_observation = [next_robot_state, next_human_states] 86 | return next_observation 87 | 88 | def compute_next_state(self, robot_state, action): 89 | # currently it can not perform parallel computation 90 | if robot_state.shape[0] != 1: 91 | raise NotImplementedError 92 | 93 | # px, py, vx, vy, radius, gx, gy, v_pref, theta 94 | next_state = robot_state.clone().squeeze() 95 | if self.kinematics == 'holonomic': 96 | next_state[0] = next_state[0] + action.vx * self.time_step 97 | next_state[1] = next_state[1] + action.vy * self.time_step 98 | next_state[2] = action.vx 99 | next_state[3] = action.vy 100 | else: 101 | next_state[7] = next_state[7] + action.r 102 | next_state[0] = next_state[0] + np.cos(next_state[7]) * action.v * self.time_step 103 | next_state[1] = next_state[1] + np.sin(next_state[7]) * action.v * self.time_step 104 | next_state[2] = np.cos(next_state[7]) * action.v 105 | next_state[3] = np.sin(next_state[7]) * action.v 106 | 107 | return next_state.unsqueeze(0).unsqueeze(0) 108 | 109 | @staticmethod 110 | def linear_motion_approximator(human_states): 111 | """ approximate human states with linear motion, input shape : (batch_size, human_num, human_state_size) 112 | """ 113 | # px, py, vx, vy, radius 114 | next_state = human_states.clone().squeeze() 115 | next_state[:, 0] = next_state[:, 0] + next_state[:, 2] 116 | next_state[:, 1] = next_state[:, 1] + next_state[:, 3] 117 | 118 | return next_state.unsqueeze(0) 119 | 120 | -------------------------------------------------------------------------------- /crowd_nav/policy/value_estimator.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from crowd_nav.policy.helpers import mlp 3 | 4 | 5 | class ValueEstimator(nn.Module): 6 | def __init__(self, config, graph_model): 7 | super().__init__() 8 | self.graph_model = graph_model 9 | self.value_network = mlp(config.gcn.X_dim, config.model_predictive_rl.value_network_dims) 10 | 11 | def forward(self, state): 12 | """ Embed state into a latent space. Take the first row of the feature matrix as state representation. 13 | """ 14 | assert len(state[0].shape) == 3 15 | assert len(state[1].shape) == 3 16 | 17 | # only use the feature of robot node as state representation 18 | state_embedding = self.graph_model(state)[:, 0, :] 19 | value = self.value_network(state_embedding) 20 | return value 21 | -------------------------------------------------------------------------------- /crowd_nav/test.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import argparse 3 | import importlib.util 4 | import os 5 | import torch 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | import gym 9 | from crowd_nav.utils.explorer import Explorer 10 | from crowd_nav.policy.policy_factory import policy_factory 11 | from crowd_sim.envs.utils.robot import Robot 12 | from crowd_sim.envs.policy.orca import ORCA 13 | 14 | 15 | def main(args): 16 | # configure logging and device 17 | level = logging.DEBUG if args.debug else logging.INFO 18 | logging.basicConfig(level=level, format='%(asctime)s, %(levelname)s: %(message)s', 19 | datefmt="%Y-%m-%d %H:%M:%S") 20 | device = torch.device("cuda:0" if torch.cuda.is_available() and args.gpu else "cpu") 21 | logging.info('Using device: %s', device) 22 | 23 | if args.model_dir is not None: 24 | if args.config is not None: 25 | config_file = args.config 26 | else: 27 | config_file = os.path.join(args.model_dir, 'config.py') 28 | if args.il: 29 | model_weights = os.path.join(args.model_dir, 'il_model.pth') 30 | logging.info('Loaded IL weights') 31 | elif args.rl: 32 | if os.path.exists(os.path.join(args.model_dir, 'resumed_rl_model.pth')): 33 | model_weights = os.path.join(args.model_dir, 'resumed_rl_model.pth') 34 | else: 35 | print(os.listdir(args.model_dir)) 36 | model_weights = os.path.join(args.model_dir, sorted(os.listdir(args.model_dir))[-1]) 37 | logging.info('Loaded RL weights') 38 | else: 39 | model_weights = os.path.join(args.model_dir, 'best_val.pth') 40 | logging.info('Loaded RL weights with best VAL') 41 | 42 | else: 43 | config_file = args.config 44 | 45 | spec = importlib.util.spec_from_file_location('config', config_file) 46 | if spec is None: 47 | parser.error('Config file not found.') 48 | config = importlib.util.module_from_spec(spec) 49 | spec.loader.exec_module(config) 50 | 51 | # configure policy 52 | policy_config = config.PolicyConfig(args.debug) 53 | policy = policy_factory[policy_config.name]() 54 | if args.planning_depth is not None: 55 | policy_config.model_predictive_rl.do_action_clip = True 56 | policy_config.model_predictive_rl.planning_depth = args.planning_depth 57 | if args.planning_width is not None: 58 | policy_config.model_predictive_rl.do_action_clip = True 59 | policy_config.model_predictive_rl.planning_width = args.planning_width 60 | if args.sparse_search: 61 | policy_config.model_predictive_rl.sparse_search = True 62 | 63 | policy.configure(policy_config) 64 | if policy.trainable: 65 | if args.model_dir is None: 66 | parser.error('Trainable policy must be specified with a model weights directory') 67 | policy.load_model(model_weights) 68 | 69 | # configure environment 70 | env_config = config.EnvConfig(args.debug) 71 | 72 | if args.human_num is not None: 73 | env_config.sim.human_num = args.human_num 74 | env = gym.make('CrowdSim-v0') 75 | env.configure(env_config) 76 | 77 | if args.square: 78 | env.test_scenario = 'square_crossing' 79 | if args.circle: 80 | env.test_scenario = 'circle_crossing' 81 | if args.test_scenario is not None: 82 | env.test_scenario = args.test_scenario 83 | 84 | robot = Robot(env_config, 'robot') 85 | env.set_robot(robot) 86 | robot.time_step = env.time_step 87 | robot.set_policy(policy) 88 | explorer = Explorer(env, robot, device, None, gamma=0.9) 89 | 90 | train_config = config.TrainConfig(args.debug) 91 | epsilon_end = train_config.train.epsilon_end 92 | if not isinstance(robot.policy, ORCA): 93 | robot.policy.set_epsilon(epsilon_end) 94 | 95 | policy.set_phase(args.phase) 96 | policy.set_device(device) 97 | # set safety space for ORCA in non-cooperative simulation 98 | if isinstance(robot.policy, ORCA): 99 | if robot.visible: 100 | robot.policy.safety_space = args.safety_space 101 | else: 102 | robot.policy.safety_space = args.safety_space 103 | logging.info('ORCA agent buffer: %f', robot.policy.safety_space) 104 | 105 | policy.set_env(env) 106 | robot.print_info() 107 | 108 | if args.visualize: 109 | rewards = [] 110 | ob = env.reset(args.phase, args.test_case) 111 | done = False 112 | last_pos = np.array(robot.get_position()) 113 | while not done: 114 | action = robot.act(ob) 115 | ob, _, done, info = env.step(action) 116 | rewards.append(_) 117 | current_pos = np.array(robot.get_position()) 118 | logging.debug('Speed: %.2f', np.linalg.norm(current_pos - last_pos) / robot.time_step) 119 | last_pos = current_pos 120 | gamma = 0.9 121 | cumulative_reward = sum([pow(gamma, t * robot.time_step * robot.v_pref) 122 | * reward for t, reward in enumerate(rewards)]) 123 | 124 | if args.traj: 125 | env.render('traj', args.video_file) 126 | else: 127 | if args.video_dir is not None: 128 | if policy_config.name == 'gcn': 129 | args.video_file = os.path.join(args.video_dir, policy_config.name + '_' + policy_config.gcn.similarity_function) 130 | else: 131 | args.video_file = os.path.join(args.video_dir, policy_config.name) 132 | args.video_file = args.video_file + '_' + args.phase + '_' + str(args.test_case) + '.mp4' 133 | env.render('video', args.video_file) 134 | logging.info('It takes %.2f seconds to finish. Final status is %s, cumulative_reward is %f', env.global_time, info, cumulative_reward) 135 | if robot.visible and info == 'reach goal': 136 | human_times = env.get_human_times() 137 | logging.info('Average time for humans to reach goal: %.2f', sum(human_times) / len(human_times)) 138 | else: 139 | explorer.run_k_episodes(env.case_size[args.phase], args.phase, print_failure=True) 140 | if args.plot_test_scenarios_hist: 141 | test_angle_seeds = np.array(env.test_scene_seeds) 142 | b = [i * 0.01 for i in range(101)] 143 | n, bins, patches = plt.hist(test_angle_seeds, b, facecolor='g') 144 | plt.savefig(os.path.join(args.model_dir, 'test_scene_hist.png')) 145 | plt.close() 146 | 147 | 148 | if __name__ == '__main__': 149 | parser = argparse.ArgumentParser('Parse configuration file') 150 | parser.add_argument('--config', type=str, default=None) 151 | parser.add_argument('--policy', type=str, default='model_predictive_rl') 152 | parser.add_argument('-m', '--model_dir', type=str, default=None) 153 | parser.add_argument('--il', default=False, action='store_true') 154 | parser.add_argument('--rl', default=False, action='store_true') 155 | parser.add_argument('--gpu', default=False, action='store_true') 156 | parser.add_argument('-v', '--visualize', default=False, action='store_true') 157 | parser.add_argument('--phase', type=str, default='test') 158 | parser.add_argument('-c', '--test_case', type=int, default=None) 159 | parser.add_argument('--square', default=False, action='store_true') 160 | parser.add_argument('--circle', default=False, action='store_true') 161 | parser.add_argument('--video_file', type=str, default=None) 162 | parser.add_argument('--video_dir', type=str, default=None) 163 | parser.add_argument('--traj', default=False, action='store_true') 164 | parser.add_argument('--debug', default=False, action='store_true') 165 | parser.add_argument('--human_num', type=int, default=None) 166 | parser.add_argument('--safety_space', type=float, default=0.2) 167 | parser.add_argument('--test_scenario', type=str, default=None) 168 | parser.add_argument('--plot_test_scenarios_hist', default=True, action='store_true') 169 | parser.add_argument('-d', '--planning_depth', type=int, default=None) 170 | parser.add_argument('-w', '--planning_width', type=int, default=None) 171 | parser.add_argument('--sparse_search', default=False, action='store_true') 172 | 173 | sys_args = parser.parse_args() 174 | 175 | main(sys_args) 176 | -------------------------------------------------------------------------------- /crowd_nav/tests/test_polices.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from crowd_nav.train import main 3 | 4 | 5 | def get_default_args(): 6 | parser = argparse.ArgumentParser('Parse configuration file') 7 | parser.add_argument('--policy', type=str, default='cadrl') 8 | parser.add_argument('--config', type=str, default='config.py') 9 | parser.add_argument('--output_dir', type=str, default='data/output') 10 | parser.add_argument('--overwrite', default=False, action='store_true') 11 | parser.add_argument('--weights', type=str) 12 | parser.add_argument('--resume', default=False, action='store_true') 13 | parser.add_argument('--gpu', default=False, action='store_true') 14 | parser.add_argument('--debug', default=False, action='store_true') 15 | args = parser.parse_args(args=[]) 16 | return args 17 | 18 | 19 | def test_sarl(): 20 | args = get_default_args() 21 | args.policy = 'sarl' 22 | args.config = 'configs/tests_config.py' 23 | args.debug = True 24 | args.overwrite = True 25 | main(args) 26 | 27 | 28 | def test_cadrl(): 29 | args = get_default_args() 30 | args.policy = 'cadrl' 31 | args.config = 'configs/tests_config.py' 32 | args.debug = True 33 | args.overwrite = True 34 | main(args) 35 | 36 | 37 | def test_lstm_rl(): 38 | args = get_default_args() 39 | args.policy = 'lstm_rl' 40 | args.config = 'configs/tests_config.py' 41 | args.debug = True 42 | args.overwrite = True 43 | main(args) 44 | 45 | 46 | def test_gcn(): 47 | args = get_default_args() 48 | args.policy = 'gcn' 49 | args.config = 'configs/tests_config.py' 50 | args.debug = True 51 | args.overwrite = True 52 | main(args) 53 | -------------------------------------------------------------------------------- /crowd_nav/train.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import logging 3 | import argparse 4 | import os 5 | import shutil 6 | import importlib.util 7 | import torch 8 | import gym 9 | import copy 10 | import git 11 | import re 12 | from tensorboardX import SummaryWriter 13 | from crowd_sim.envs.utils.robot import Robot 14 | from crowd_nav.utils.trainer import VNRLTrainer, MPRLTrainer 15 | from crowd_nav.utils.memory import ReplayMemory 16 | from crowd_nav.utils.explorer import Explorer 17 | from crowd_nav.policy.policy_factory import policy_factory 18 | 19 | 20 | def set_random_seeds(seed): 21 | """ 22 | Sets the random seeds for pytorch cpu and gpu 23 | """ 24 | torch.manual_seed(seed) 25 | torch.cuda.manual_seed_all(seed) 26 | return None 27 | 28 | 29 | def main(args): 30 | set_random_seeds(args.randomseed) 31 | # configure paths 32 | make_new_dir = True 33 | if os.path.exists(args.output_dir): 34 | if args.overwrite: 35 | shutil.rmtree(args.output_dir) 36 | else: 37 | key = input('Output directory already exists! Overwrite the folder? (y/n)') 38 | if key == 'y' and not args.resume: 39 | shutil.rmtree(args.output_dir) 40 | else: 41 | make_new_dir = False 42 | if make_new_dir: 43 | os.makedirs(args.output_dir) 44 | shutil.copy(args.config, os.path.join(args.output_dir, 'config.py')) 45 | 46 | # # insert the arguments from command line to the config file 47 | # with open(os.path.join(args.output_dir, 'config.py'), 'r') as fo: 48 | # config_text = fo.read() 49 | # search_pairs = {r"gcn.X_dim = \d*": "gcn.X_dim = {}".format(args.X_dim), 50 | # r"gcn.num_layer = \d": "gcn.num_layer = {}".format(args.layers), 51 | # r"gcn.similarity_function = '\w*'": "gcn.similarity_function = '{}'".format(args.sim_func), 52 | # r"gcn.layerwise_graph = \w*": "gcn.layerwise_graph = {}".format(args.layerwise_graph), 53 | # r"gcn.skip_connection = \w*": "gcn.skip_connection = {}".format(args.skip_connection)} 54 | # 55 | # for find, replace in search_pairs.items(): 56 | # config_text = re.sub(find, replace, config_text) 57 | # 58 | # with open(os.path.join(args.output_dir, 'config.py'), 'w') as fo: 59 | # fo.write(config_text) 60 | 61 | args.config = os.path.join(args.output_dir, 'config.py') 62 | log_file = os.path.join(args.output_dir, 'output.log') 63 | il_weight_file = os.path.join(args.output_dir, 'il_model.pth') 64 | rl_weight_file = os.path.join(args.output_dir, 'rl_model.pth') 65 | 66 | spec = importlib.util.spec_from_file_location('config', args.config) 67 | if spec is None: 68 | parser.error('Config file not found.') 69 | config = importlib.util.module_from_spec(spec) 70 | spec.loader.exec_module(config) 71 | 72 | # configure logging 73 | mode = 'a' if args.resume else 'w' 74 | file_handler = logging.FileHandler(log_file, mode=mode) 75 | stdout_handler = logging.StreamHandler(sys.stdout) 76 | level = logging.INFO if not args.debug else logging.DEBUG 77 | logging.basicConfig(level=level, handlers=[stdout_handler, file_handler], 78 | format='%(asctime)s, %(levelname)s: %(message)s', datefmt="%Y-%m-%d %H:%M:%S") 79 | repo = git.Repo(search_parent_directories=True) 80 | logging.info('Current git head hash code: {}'.format(repo.head.object.hexsha)) 81 | logging.info('Current config content is :{}'.format(config)) 82 | device = torch.device("cuda:0" if torch.cuda.is_available() and args.gpu else "cpu") 83 | logging.info('Using device: %s', device) 84 | writer = SummaryWriter(log_dir=args.output_dir) 85 | 86 | # configure policy 87 | policy_config = config.PolicyConfig() 88 | policy = policy_factory[policy_config.name]() 89 | if not policy.trainable: 90 | parser.error('Policy has to be trainable') 91 | policy.configure(policy_config) 92 | policy.set_device(device) 93 | 94 | # configure environment 95 | env_config = config.EnvConfig(args.debug) 96 | env = gym.make('CrowdSim-v0') 97 | env.configure(env_config) 98 | robot = Robot(env_config, 'robot') 99 | robot.time_step = env.time_step 100 | env.set_robot(robot) 101 | 102 | # read training parameters 103 | train_config = config.TrainConfig(args.debug) 104 | rl_learning_rate = train_config.train.rl_learning_rate 105 | train_batches = train_config.train.train_batches 106 | train_episodes = train_config.train.train_episodes 107 | sample_episodes = train_config.train.sample_episodes 108 | target_update_interval = train_config.train.target_update_interval 109 | evaluation_interval = train_config.train.evaluation_interval 110 | capacity = train_config.train.capacity 111 | epsilon_start = train_config.train.epsilon_start 112 | epsilon_end = train_config.train.epsilon_end 113 | epsilon_decay = train_config.train.epsilon_decay 114 | checkpoint_interval = train_config.train.checkpoint_interval 115 | 116 | # configure trainer and explorer 117 | memory = ReplayMemory(capacity) 118 | model = policy.get_model() 119 | batch_size = train_config.trainer.batch_size 120 | optimizer = train_config.trainer.optimizer 121 | if policy_config.name == 'model_predictive_rl': 122 | trainer = MPRLTrainer(model, policy.state_predictor, memory, device, policy, writer, batch_size, optimizer, env.human_num, 123 | reduce_sp_update_frequency=train_config.train.reduce_sp_update_frequency, 124 | freeze_state_predictor=train_config.train.freeze_state_predictor, 125 | detach_state_predictor=train_config.train.detach_state_predictor, 126 | share_graph_model=policy_config.model_predictive_rl.share_graph_model) 127 | else: 128 | trainer = VNRLTrainer(model, memory, device, policy, batch_size, optimizer, writer) 129 | explorer = Explorer(env, robot, device, writer, memory, policy.gamma, target_policy=policy) 130 | 131 | # imitation learning 132 | if args.resume: 133 | if not os.path.exists(rl_weight_file): 134 | logging.error('RL weights does not exist') 135 | model.load_state_dict(torch.load(rl_weight_file)) 136 | rl_weight_file = os.path.join(args.output_dir, 'resumed_rl_model.pth') 137 | logging.info('Load reinforcement learning trained weights. Resume training') 138 | elif os.path.exists(il_weight_file): 139 | model.load_state_dict(torch.load(il_weight_file)) 140 | logging.info('Load imitation learning trained weights.') 141 | else: 142 | il_episodes = train_config.imitation_learning.il_episodes 143 | il_policy = train_config.imitation_learning.il_policy 144 | il_epochs = train_config.imitation_learning.il_epochs 145 | il_learning_rate = train_config.imitation_learning.il_learning_rate 146 | trainer.set_learning_rate(il_learning_rate) 147 | if robot.visible: 148 | safety_space = 0 149 | else: 150 | safety_space = train_config.imitation_learning.safety_space 151 | il_policy = policy_factory[il_policy]() 152 | il_policy.multiagent_training = policy.multiagent_training 153 | il_policy.safety_space = safety_space 154 | robot.set_policy(il_policy) 155 | explorer.run_k_episodes(il_episodes, 'train', update_memory=True, imitation_learning=True) 156 | trainer.optimize_epoch(il_epochs) 157 | policy.save_model(il_weight_file) 158 | logging.info('Finish imitation learning. Weights saved.') 159 | logging.info('Experience set size: %d/%d', len(memory), memory.capacity) 160 | 161 | trainer.update_target_model(model) 162 | 163 | # reinforcement learning 164 | policy.set_env(env) 165 | robot.set_policy(policy) 166 | robot.print_info() 167 | trainer.set_learning_rate(rl_learning_rate) 168 | # fill the memory pool with some RL experience 169 | if args.resume: 170 | robot.policy.set_epsilon(epsilon_end) 171 | explorer.run_k_episodes(100, 'train', update_memory=True, episode=0) 172 | logging.info('Experience set size: %d/%d', len(memory), memory.capacity) 173 | episode = 0 174 | best_val_reward = -1 175 | best_val_model = None 176 | # evaluate the model after imitation learning 177 | 178 | if episode % evaluation_interval == 0: 179 | logging.info('Evaluate the model instantly after imitation learning on the validation cases') 180 | explorer.run_k_episodes(env.case_size['val'], 'val', episode=episode) 181 | explorer.log('val', episode // evaluation_interval) 182 | 183 | if args.test_after_every_eval: 184 | explorer.run_k_episodes(env.case_size['test'], 'test', episode=episode, print_failure=True) 185 | explorer.log('test', episode // evaluation_interval) 186 | 187 | episode = 0 188 | while episode < train_episodes: 189 | if args.resume: 190 | epsilon = epsilon_end 191 | else: 192 | if episode < epsilon_decay: 193 | epsilon = epsilon_start + (epsilon_end - epsilon_start) / epsilon_decay * episode 194 | else: 195 | epsilon = epsilon_end 196 | robot.policy.set_epsilon(epsilon) 197 | 198 | # sample k episodes into memory and optimize over the generated memory 199 | explorer.run_k_episodes(sample_episodes, 'train', update_memory=True, episode=episode) 200 | explorer.log('train', episode) 201 | 202 | trainer.optimize_batch(train_batches, episode) 203 | episode += 1 204 | 205 | if episode % target_update_interval == 0: 206 | trainer.update_target_model(model) 207 | # evaluate the model 208 | if episode % evaluation_interval == 0: 209 | _, _, _, reward, _ = explorer.run_k_episodes(env.case_size['val'], 'val', episode=episode) 210 | explorer.log('val', episode // evaluation_interval) 211 | 212 | if episode % checkpoint_interval == 0 and reward > best_val_reward: 213 | best_val_reward = reward 214 | best_val_model = copy.deepcopy(policy.get_state_dict()) 215 | # test after every evaluation to check how the generalization performance evolves 216 | if args.test_after_every_eval: 217 | explorer.run_k_episodes(env.case_size['test'], 'test', episode=episode, print_failure=True) 218 | explorer.log('test', episode // evaluation_interval) 219 | 220 | if episode != 0 and episode % checkpoint_interval == 0: 221 | current_checkpoint = episode // checkpoint_interval - 1 222 | save_every_checkpoint_rl_weight_file = rl_weight_file.split('.')[0] + '_' + str(current_checkpoint) + '.pth' 223 | policy.save_model(save_every_checkpoint_rl_weight_file) 224 | 225 | # # test with the best val model 226 | if best_val_model is not None: 227 | policy.load_state_dict(best_val_model) 228 | torch.save(best_val_model, os.path.join(args.output_dir, 'best_val.pth')) 229 | logging.info('Save the best val model with the reward: {}'.format(best_val_reward)) 230 | explorer.run_k_episodes(env.case_size['test'], 'test', episode=episode, print_failure=True) 231 | 232 | 233 | if __name__ == '__main__': 234 | parser = argparse.ArgumentParser('Parse configuration file') 235 | parser.add_argument('--policy', type=str, default='model_predictive_rl') 236 | parser.add_argument('--config', type=str, default='configs/icra_benchmark/mp_separate.py') 237 | parser.add_argument('--output_dir', type=str, default='data/output') 238 | parser.add_argument('--overwrite', default=False, action='store_true') 239 | parser.add_argument('--weights', type=str) 240 | parser.add_argument('--resume', default=False, action='store_true') 241 | parser.add_argument('--gpu', default=False, action='store_true') 242 | parser.add_argument('--debug', default=False, action='store_true') 243 | parser.add_argument('--test_after_every_eval', default=False, action='store_true') 244 | parser.add_argument('--randomseed', type=int, default=17) 245 | 246 | # arguments for GCN 247 | # parser.add_argument('--X_dim', type=int, default=32) 248 | # parser.add_argument('--layers', type=int, default=2) 249 | # parser.add_argument('--sim_func', type=str, default='embedded_gaussian') 250 | # parser.add_argument('--layerwise_graph', default=False, action='store_true') 251 | # parser.add_argument('--skip_connection', default=True, action='store_true') 252 | 253 | sys_args = parser.parse_args() 254 | 255 | main(sys_args) 256 | -------------------------------------------------------------------------------- /crowd_nav/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChanganVR/RelationalGraphLearning/8e87aa5ed8221efd688f8e6857ba4c38637bf6e1/crowd_nav/utils/__init__.py -------------------------------------------------------------------------------- /crowd_nav/utils/explorer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import copy 4 | import torch 5 | from tqdm import tqdm 6 | from crowd_sim.envs.utils.info import * 7 | 8 | 9 | class Explorer(object): 10 | def __init__(self, env, robot, device, writer, memory=None, gamma=None, target_policy=None): 11 | self.env = env 12 | self.robot = robot 13 | self.device = device 14 | self.writer = writer 15 | self.memory = memory 16 | self.gamma = gamma 17 | self.target_policy = target_policy 18 | self.statistics = None 19 | 20 | # @profile 21 | def run_k_episodes(self, k, phase, update_memory=False, imitation_learning=False, episode=None, epoch=None, 22 | print_failure=False): 23 | self.robot.policy.set_phase(phase) 24 | success_times = [] 25 | collision_times = [] 26 | timeout_times = [] 27 | success = 0 28 | collision = 0 29 | timeout = 0 30 | discomfort = 0 31 | min_dist = [] 32 | cumulative_rewards = [] 33 | average_returns = [] 34 | collision_cases = [] 35 | timeout_cases = [] 36 | 37 | if k != 1: 38 | pbar = tqdm(total=k) 39 | else: 40 | pbar = None 41 | 42 | for i in range(k): 43 | ob = self.env.reset(phase) 44 | done = False 45 | states = [] 46 | actions = [] 47 | rewards = [] 48 | while not done: 49 | action = self.robot.act(ob) 50 | ob, reward, done, info = self.env.step(action) 51 | states.append(self.robot.policy.last_state) 52 | actions.append(action) 53 | rewards.append(reward) 54 | 55 | if isinstance(info, Discomfort): 56 | discomfort += 1 57 | min_dist.append(info.min_dist) 58 | 59 | if isinstance(info, ReachGoal): 60 | success += 1 61 | success_times.append(self.env.global_time) 62 | elif isinstance(info, Collision): 63 | collision += 1 64 | collision_cases.append(i) 65 | collision_times.append(self.env.global_time) 66 | elif isinstance(info, Timeout): 67 | timeout += 1 68 | timeout_cases.append(i) 69 | timeout_times.append(self.env.time_limit) 70 | else: 71 | raise ValueError('Invalid end signal from environment') 72 | 73 | if update_memory: 74 | if isinstance(info, ReachGoal) or isinstance(info, Collision): 75 | # only add positive(success) or negative(collision) experience in experience set 76 | self.update_memory(states, actions, rewards, imitation_learning) 77 | 78 | cumulative_rewards.append(sum([pow(self.gamma, t * self.robot.time_step * self.robot.v_pref) 79 | * reward for t, reward in enumerate(rewards)])) 80 | returns = [] 81 | for step in range(len(rewards)): 82 | step_return = sum([pow(self.gamma, t * self.robot.time_step * self.robot.v_pref) 83 | * reward for t, reward in enumerate(rewards[step:])]) 84 | returns.append(step_return) 85 | average_returns.append(average(returns)) 86 | 87 | if pbar: 88 | pbar.update(1) 89 | success_rate = success / k 90 | collision_rate = collision / k 91 | assert success + collision + timeout == k 92 | avg_nav_time = sum(success_times) / len(success_times) if success_times else self.env.time_limit 93 | 94 | extra_info = '' if episode is None else 'in episode {} '.format(episode) 95 | extra_info = extra_info + '' if epoch is None else extra_info + ' in epoch {} '.format(epoch) 96 | logging.info('{:<5} {}has success rate: {:.2f}, collision rate: {:.2f}, nav time: {:.2f}, total reward: {:.4f},' 97 | ' average return: {:.4f}'. format(phase.upper(), extra_info, success_rate, collision_rate, 98 | avg_nav_time, average(cumulative_rewards), 99 | average(average_returns))) 100 | if phase in ['val', 'test']: 101 | total_time = sum(success_times + collision_times + timeout_times) 102 | logging.info('Frequency of being in danger: %.2f and average min separate distance in danger: %.2f', 103 | discomfort / total_time, average(min_dist)) 104 | 105 | if print_failure: 106 | logging.info('Collision cases: ' + ' '.join([str(x) for x in collision_cases])) 107 | logging.info('Timeout cases: ' + ' '.join([str(x) for x in timeout_cases])) 108 | 109 | self.statistics = success_rate, collision_rate, avg_nav_time, average(cumulative_rewards), average(average_returns) 110 | 111 | return self.statistics 112 | 113 | def update_memory(self, states, actions, rewards, imitation_learning=False): 114 | if self.memory is None or self.gamma is None: 115 | raise ValueError('Memory or gamma value is not set!') 116 | 117 | for i, state in enumerate(states[:-1]): 118 | reward = rewards[i] 119 | 120 | # VALUE UPDATE 121 | if imitation_learning: 122 | # define the value of states in IL as cumulative discounted rewards, which is the same in RL 123 | state = self.target_policy.transform(state) 124 | next_state = self.target_policy.transform(states[i+1]) 125 | value = sum([pow(self.gamma, (t - i) * self.robot.time_step * self.robot.v_pref) * reward * 126 | (1 if t >= i else 0) for t, reward in enumerate(rewards)]) 127 | else: 128 | next_state = states[i+1] 129 | if i == len(states) - 1: 130 | # terminal state 131 | value = reward 132 | else: 133 | value = 0 134 | value = torch.Tensor([value]).to(self.device) 135 | reward = torch.Tensor([rewards[i]]).to(self.device) 136 | 137 | if self.target_policy.name == 'ModelPredictiveRL': 138 | self.memory.push((state[0], state[1], value, reward, next_state[0], next_state[1])) 139 | else: 140 | self.memory.push((state, value, reward, next_state)) 141 | 142 | def log(self, tag_prefix, global_step): 143 | sr, cr, time, reward, avg_return = self.statistics 144 | self.writer.add_scalar(tag_prefix + '/success_rate', sr, global_step) 145 | self.writer.add_scalar(tag_prefix + '/collision_rate', cr, global_step) 146 | self.writer.add_scalar(tag_prefix + '/time', time, global_step) 147 | self.writer.add_scalar(tag_prefix + '/reward', reward, global_step) 148 | self.writer.add_scalar(tag_prefix + '/avg_return', avg_return, global_step) 149 | 150 | 151 | def average(input_list): 152 | if input_list: 153 | return sum(input_list) / len(input_list) 154 | else: 155 | return 0 156 | -------------------------------------------------------------------------------- /crowd_nav/utils/memory.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import Dataset 2 | 3 | 4 | class ReplayMemory(Dataset): 5 | def __init__(self, capacity): 6 | self.capacity = capacity 7 | self.memory = list() 8 | self.position = 0 9 | 10 | def push(self, item): 11 | # replace old experience with new experience 12 | if len(self.memory) < self.position + 1: 13 | self.memory.append(item) 14 | else: 15 | self.memory[self.position] = item 16 | self.position = (self.position + 1) % self.capacity 17 | 18 | def is_full(self): 19 | return len(self.memory) == self.capacity 20 | 21 | def __getitem__(self, item): 22 | return self.memory[item] 23 | 24 | def __len__(self): 25 | return len(self.memory) 26 | 27 | def clear(self): 28 | self.memory = list() 29 | -------------------------------------------------------------------------------- /crowd_nav/utils/plot.py: -------------------------------------------------------------------------------- 1 | import re 2 | import argparse 3 | import os 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | 7 | 8 | def running_mean(x, n): 9 | cumsum = np.cumsum(np.insert(x, 0, 0)) 10 | return (cumsum[n:] - cumsum[:-n]) / float(n) 11 | 12 | 13 | def main(): 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('log_files', type=str, nargs='+') 16 | parser.add_argument('--plot_sr', default=False, action='store_true') 17 | parser.add_argument('--plot_cr', default=False, action='store_true') 18 | parser.add_argument('--plot_time', default=False, action='store_true') 19 | parser.add_argument('--plot_reward', default=True, action='store_true') 20 | parser.add_argument('--plot_train', default=True, action='store_true') 21 | parser.add_argument('--plot_val', default=False, action='store_true') 22 | parser.add_argument('--plot_all', default=False, action='store_true') 23 | parser.add_argument('--window_size', type=int, default=100) 24 | args = parser.parse_args() 25 | 26 | models = [] 27 | max_episodes = None 28 | 29 | ax1 = ax2 = ax3 = ax4 = None 30 | ax1_legends = [] 31 | ax2_legends = [] 32 | ax3_legends = [] 33 | ax4_legends = [] 34 | 35 | if args.plot_all: 36 | log_dir = args.log_files[0] 37 | if not os.path.isdir(log_dir): 38 | parser.error('Input argument should be the directory containing all experiment folders') 39 | # args.log_files = [os.path.join(log_dir, exp_dir, 'output.log') for exp_dir in os.listdir(log_dir)] 40 | args.log_files = [os.path.join(log_dir, exp_dir, 'output.log') for exp_dir in 41 | ['sarl_linear_adam', 'mp_detach_skip', 'rgl_linear_adam', 'rgl_no_transformation', 42 | 'mp_separate_graph']] 43 | 44 | args.log_files = sorted(args.log_files) 45 | if not models: 46 | models = [os.path.basename(log_file[:-11]) for log_file in args.log_files] 47 | for i, log_file in enumerate(args.log_files): 48 | with open(log_file, 'r') as file: 49 | log = file.read() 50 | 51 | val_pattern = r"VAL in episode (?P\d+) has success rate: (?P[0-1].\d+), " \ 52 | r"collision rate: (?P[0-1].\d+), nav time: (?P