├── .gitignore
├── .pylintrc
├── README.md
├── crowd_nav
├── __init__.py
├── configs
│ ├── __init__.py
│ └── icra_benchmark
│ │ ├── __init__.py
│ │ ├── config.py
│ │ ├── mp_detach.py
│ │ ├── mp_linear.py
│ │ ├── mp_separate.py
│ │ ├── mp_separate_dp.py
│ │ ├── rgl.py
│ │ └── sarl.py
├── policy
│ ├── cadrl.py
│ ├── gcn.py
│ ├── graph_model.py
│ ├── helpers.py
│ ├── lstm_rl.py
│ ├── model_predictive_rl.py
│ ├── multi_human_rl.py
│ ├── policy_factory.py
│ ├── sarl.py
│ ├── state_predictor.py
│ └── value_estimator.py
├── test.py
├── tests
│ └── test_polices.py
├── train.py
└── utils
│ ├── __init__.py
│ ├── explorer.py
│ ├── memory.py
│ ├── plot.py
│ ├── plot2.py
│ └── trainer.py
├── crowd_sim
├── README.md
├── __init__.py
└── envs
│ ├── __init__.py
│ ├── crowd_sim.py
│ ├── policy
│ ├── __init__.py
│ ├── linear.py
│ ├── orca.py
│ ├── policy.py
│ ├── policy_factory.py
│ └── socialforce.py
│ ├── realsim_utils
│ ├── GrandCentral.py
│ └── __init__.py
│ └── utils
│ ├── __init__.py
│ ├── action.py
│ ├── agent.py
│ ├── human.py
│ ├── info.py
│ ├── robot.py
│ ├── state.py
│ └── utils.py
└── setup.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # editors
2 | .idea/
3 | .vscode/
4 |
5 | # Python
6 | *.egg-info/
7 | .pytest_cache/
8 | __pycache__
9 | .cache/
10 | venv*/
11 |
12 | # data
13 | data/
14 | *.mp4
15 | videos/
16 | *.log
17 |
--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
1 | [BASIC]
2 |
3 | #variable-rgx=[a-z0-9_]{1,30}$
4 | good-names=dx,dy,i,j,k,px,py,r,th,vx,vy,x,y,z,ob
5 |
6 | [TYPECHECK]
7 |
8 | # List of members which are set dynamically and missed by pylint inference
9 | # system, and so shouldn't trigger E1101 when accessed. Python regular
10 | # expressions are accepted.
11 | generated-members=numpy.*,torch.*
12 |
13 |
14 | disable=missing-docstring,useless-object-inheritance
15 | max-line-length=120
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # RelationalGraphLearning
2 | This repository contains the codes for our paper, which is accepted at IROS 2020.
3 | For more details, please refer to the [paper](https://arxiv.org/abs/1909.13165).
4 |
5 |
6 | ## Abstract
7 | We present a relational graph learning approach for robotic crowd navigation using model-based deep reinforcement
8 | learning that plans actions by looking into the future.
9 | Our approach reasons about the relations between all agents based on their latent features and uses a Graph Convolutional
10 | Network to encode higher-order interactions in each agent's state representation, which is subsequently leveraged for
11 | state prediction and value estimation.
12 | The ability to predict human motion allows us to perform multi-step lookahead planning, taking into account the temporal
13 | evolution of human crowds.
14 | We evaluate our approach against a state-of-the-art baseline for crowd navigation and ablations of our model to
15 | demonstrate that navigation with our approach is more efficient, results in fewer collisions, and avoids failure cases
16 | involving oscillatory and freezing behaviors.
17 |
18 |
19 |
20 | ## Method Overview
21 |
22 |
23 |
24 | ## Setup
25 | 1. Install [Python-RVO2](https://github.com/sybrenstuvel/Python-RVO2) library
26 | 2. Install [socialforce](https://github.com/ChanganVR/socialforce) library
27 | 2. Install crowd_sim and crowd_nav into pip
28 | ```
29 | pip install -e .
30 | ```
31 |
32 | ## Getting Started
33 | This repository are organized in two parts: crowd_sim/ folder contains the simulation environment and
34 | crowd_nav/ folder contains codes for training and testing the policies. Details of the simulation framework can be found
35 | [here](crowd_sim/README.md). Below are the instructions for training and testing policies, and they should be executed
36 | inside the crowd_nav/ folder.
37 |
38 |
39 | 1. Train a policy.
40 | ```
41 | python train.py --policy rgl
42 | ```
43 | 2. Test policies with 500 test cases.
44 | ```
45 | python test.py --policy rgl --model_dir data/output --phase test
46 | ```
47 | 3. Run policy for one episode and visualize the result.
48 | ```
49 | python test.py --policy rgl --model_dir data/output --phase test --visualize --test_case 0
50 | ```
51 | 4. Plot training curve
52 | ```
53 | python utils/plot.py data/output/output.log
54 | ```
55 |
56 |
57 |
58 | ## Video Demo
59 | [
](https://youtu.be/U3quW30Eu3A)
60 |
61 |
62 | ## Citation
63 | If you find the codes or paper useful for your research, please cite the following papers:
64 | ```bibtex
65 | @inproceedings{chen2020relational,
66 | title={Relational Graph Learning for Crowd Navigation},
67 | author={Changan Chen and Sha Hu and Payam Nikdel and Greg Mori and Manolis Savva},
68 | year={2020},
69 | booktitle={IROS}
70 | }
71 | @inproceedings{chen2019crowdnav,
72 | title={Crowd-Robot Interaction: Crowd-aware Robot Navigation with Attention-based Deep Reinforcement Learning},
73 | author={Changan Chen and Yuejiang Liu and Sven Kreiss and Alexandre Alahi},
74 | year={2019},
75 | booktitle={ICRA}
76 | }
77 | ```
78 |
--------------------------------------------------------------------------------
/crowd_nav/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChanganVR/RelationalGraphLearning/8e87aa5ed8221efd688f8e6857ba4c38637bf6e1/crowd_nav/__init__.py
--------------------------------------------------------------------------------
/crowd_nav/configs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChanganVR/RelationalGraphLearning/8e87aa5ed8221efd688f8e6857ba4c38637bf6e1/crowd_nav/configs/__init__.py
--------------------------------------------------------------------------------
/crowd_nav/configs/icra_benchmark/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChanganVR/RelationalGraphLearning/8e87aa5ed8221efd688f8e6857ba4c38637bf6e1/crowd_nav/configs/icra_benchmark/__init__.py
--------------------------------------------------------------------------------
/crowd_nav/configs/icra_benchmark/config.py:
--------------------------------------------------------------------------------
1 | """
2 | Never Modify this file! Always copy the settings you want to change to your local file.
3 | """
4 |
5 |
6 | import numpy as np
7 |
8 |
9 | class Config(object):
10 | def __init__(self):
11 | pass
12 |
13 |
14 | class BaseEnvConfig(object):
15 | env = Config()
16 | env.time_limit = 30
17 | env.time_step = 0.25
18 | env.val_size = 100
19 | env.test_size = 500
20 | env.train_size = np.iinfo(np.uint32).max - 2000
21 | env.randomize_attributes = False
22 | env.robot_sensor_range = 5
23 |
24 | reward = Config()
25 | reward.success_reward = 1
26 | reward.collision_penalty = -0.25
27 | reward.discomfort_dist = 0.2
28 | reward.discomfort_penalty_factor = 0.5
29 |
30 | sim = Config()
31 | sim.train_val_scenario = 'circle_crossing'
32 | sim.test_scenario = 'circle_crossing'
33 | sim.square_width = 20
34 | sim.circle_radius = 4
35 | sim.human_num = 5
36 | sim.nonstop_human = False
37 | sim.centralized_planning = True
38 |
39 | humans = Config()
40 | humans.visible = True
41 | humans.policy = 'orca'
42 | humans.radius = 0.3
43 | humans.v_pref = 1
44 | humans.sensor = 'coordinates'
45 |
46 | robot = Config()
47 | robot.visible = False
48 | robot.policy = 'none'
49 | robot.radius = 0.3
50 | robot.v_pref = 1
51 | robot.sensor = 'coordinates'
52 |
53 | def __init__(self, debug=False):
54 | if debug:
55 | self.env.val_size = 1
56 | self.env.test_size = 1
57 |
58 |
59 | class BasePolicyConfig(object):
60 | rl = Config()
61 | rl.gamma = 0.9
62 |
63 | om = Config()
64 | om.cell_num = 4
65 | om.cell_size = 1
66 | om.om_channel_size = 3
67 |
68 | action_space = Config()
69 | action_space.kinematics = 'holonomic'
70 | action_space.speed_samples = 5
71 | action_space.rotation_samples = 16
72 | action_space.sampling = 'exponential'
73 | action_space.query_env = False
74 | action_space.rotation_constraint = np.pi / 3
75 |
76 | cadrl = Config()
77 | cadrl.mlp_dims = [150, 100, 100, 1]
78 | cadrl.multiagent_training = False
79 |
80 | lstm_rl = Config()
81 | lstm_rl.global_state_dim = 50
82 | lstm_rl.mlp1_dims = [150, 100, 100, 50]
83 | lstm_rl.mlp2_dims = [150, 100, 100, 1]
84 | lstm_rl.multiagent_training = True
85 | lstm_rl.with_om = False
86 | lstm_rl.with_interaction_module = True
87 |
88 | srl = Config()
89 | srl.mlp1_dims = [150, 100, 100, 50]
90 | srl.mlp2_dims = [150, 100, 100, 1]
91 | srl.multiagent_training = True
92 | srl.with_om = True
93 |
94 | sarl = Config()
95 | sarl.mlp1_dims = [150, 100]
96 | sarl.mlp2_dims = [100, 50]
97 | sarl.attention_dims = [100, 100, 1]
98 | sarl.mlp3_dims = [150, 100, 100, 1]
99 | sarl.multiagent_training = True
100 | sarl.with_om = True
101 | sarl.with_global_state = True
102 |
103 | gcn = Config()
104 | gcn.multiagent_training = True
105 | gcn.num_layer = 2
106 | gcn.X_dim = 32
107 | gcn.wr_dims = [64, gcn.X_dim]
108 | gcn.wh_dims = [64, gcn.X_dim]
109 | gcn.final_state_dim = gcn.X_dim
110 | gcn.gcn2_w1_dim = gcn.X_dim
111 | gcn.planning_dims = [150, 100, 100, 1]
112 | gcn.similarity_function = 'embedded_gaussian'
113 | gcn.layerwise_graph = True
114 | gcn.skip_connection = False
115 |
116 | gnn = Config()
117 | gnn.multiagent_training = True
118 | gnn.node_dim = 32
119 | gnn.wr_dims = [64, gnn.node_dim]
120 | gnn.wh_dims = [64, gnn.node_dim]
121 | gnn.edge_dim = 32
122 | gnn.planning_dims = [150, 100, 100, 1]
123 |
124 | def __init__(self, debug=False):
125 | pass
126 |
127 |
128 | class BaseTrainConfig(object):
129 | trainer = Config()
130 | trainer.batch_size = 100
131 | trainer.optimizer = 'Adam'
132 |
133 | imitation_learning = Config()
134 | imitation_learning.il_episodes = 2000
135 | imitation_learning.il_policy = 'orca'
136 | imitation_learning.il_epochs = 50
137 | imitation_learning.il_learning_rate = 0.001
138 | imitation_learning.safety_space = 0.15
139 |
140 | train = Config()
141 | train.rl_train_epochs = 1
142 | train.rl_learning_rate = 0.001
143 | # number of batches to train at the end of training episode il_episodes
144 | train.train_batches = 100
145 | # training episodes in outer loop
146 | train.train_episodes = 10000
147 | # number of episodes sampled in one training episode
148 | train.sample_episodes = 1
149 | train.target_update_interval = 1000
150 | train.evaluation_interval = 1000
151 | # the memory pool can roughly store 2K episodes, total size = episodes * 50
152 | train.capacity = 100000
153 | train.epsilon_start = 0.5
154 | train.epsilon_end = 0.1
155 | train.epsilon_decay = 4000
156 | train.checkpoint_interval = 1000
157 |
158 | train.train_with_pretend_batch = False
159 |
160 | def __init__(self, debug=False):
161 | if debug:
162 | self.imitation_learning.il_episodes = 10
163 | self.imitation_learning.il_epochs = 5
164 | self.train.train_episodes = 1
165 | self.train.checkpoint_interval = self.train.train_episodes
166 | self.train.evaluation_interval = 1
167 | self.train.target_update_interval = 1
168 |
--------------------------------------------------------------------------------
/crowd_nav/configs/icra_benchmark/mp_detach.py:
--------------------------------------------------------------------------------
1 | from crowd_nav.configs.icra_benchmark.config import BaseEnvConfig, BasePolicyConfig, BaseTrainConfig, Config
2 |
3 |
4 | class EnvConfig(BaseEnvConfig):
5 | def __init__(self, debug=False):
6 | super(EnvConfig, self).__init__(debug)
7 |
8 |
9 | class PolicyConfig(BasePolicyConfig):
10 | def __init__(self, debug=False):
11 | super(PolicyConfig, self).__init__(debug)
12 | self.name = 'model_predictive_rl'
13 |
14 | # gcn
15 | self.gcn.num_layer = 2
16 | self.gcn.X_dim = 32
17 | self.gcn.similarity_function = 'embedded_gaussian'
18 | self.gcn.layerwise_graph = False
19 | self.gcn.skip_connection = True
20 |
21 | self.model_predictive_rl = Config()
22 | self.model_predictive_rl.linear_state_predictor = False
23 | self.model_predictive_rl.planning_depth = 1
24 | self.model_predictive_rl.planning_width = 1
25 | self.model_predictive_rl.do_action_clip = False
26 | self.model_predictive_rl.motion_predictor_dims = [64, 5]
27 | self.model_predictive_rl.value_network_dims = [32, 100, 100, 1]
28 | self.model_predictive_rl.share_graph_model = True
29 |
30 |
31 | class TrainConfig(BaseTrainConfig):
32 | def __init__(self, debug=False):
33 | super(TrainConfig, self).__init__(debug)
34 |
35 | self.train.freeze_state_predictor = False
36 | self.train.detach_state_predictor = True
37 | self.train.reduce_sp_update_frequency = False
38 |
--------------------------------------------------------------------------------
/crowd_nav/configs/icra_benchmark/mp_linear.py:
--------------------------------------------------------------------------------
1 | from crowd_nav.configs.icra_benchmark.config import BaseEnvConfig, BasePolicyConfig, BaseTrainConfig, Config
2 |
3 |
4 | class EnvConfig(BaseEnvConfig):
5 | def __init__(self, debug=False):
6 | super(EnvConfig, self).__init__(debug)
7 |
8 |
9 | class PolicyConfig(BasePolicyConfig):
10 | def __init__(self, debug=False):
11 | super(PolicyConfig, self).__init__(debug)
12 | self.name = 'model_predictive_rl'
13 |
14 | # gcn
15 | self.gcn.num_layer = 2
16 | self.gcn.X_dim = 32
17 | self.gcn.similarity_function = 'embedded_gaussian'
18 | self.gcn.layerwise_graph = False
19 | self.gcn.skip_connection = True
20 |
21 | self.model_predictive_rl = Config()
22 | self.model_predictive_rl.linear_state_predictor = True
23 | self.model_predictive_rl.planning_depth = 1
24 | self.model_predictive_rl.planning_width = 1
25 | self.model_predictive_rl.do_action_clip = False
26 | self.model_predictive_rl.motion_predictor_dims = [64, 5]
27 | self.model_predictive_rl.value_network_dims = [32, 100, 100, 1]
28 | self.model_predictive_rl.share_graph_model = False
29 |
30 |
31 | class TrainConfig(BaseTrainConfig):
32 | def __init__(self, debug=False):
33 | super(TrainConfig, self).__init__(debug)
34 |
35 | self.train.freeze_state_predictor = False
36 | self.train.detach_state_predictor = False
37 | self.train.reduce_sp_update_frequency = False
38 |
--------------------------------------------------------------------------------
/crowd_nav/configs/icra_benchmark/mp_separate.py:
--------------------------------------------------------------------------------
1 | from crowd_nav.configs.icra_benchmark.config import BaseEnvConfig, BasePolicyConfig, BaseTrainConfig, Config
2 |
3 |
4 | class EnvConfig(BaseEnvConfig):
5 | def __init__(self, debug=False):
6 | super(EnvConfig, self).__init__(debug)
7 |
8 |
9 | class PolicyConfig(BasePolicyConfig):
10 | def __init__(self, debug=False):
11 | super(PolicyConfig, self).__init__(debug)
12 | self.name = 'model_predictive_rl'
13 |
14 | # gcn
15 | self.gcn.num_layer = 2
16 | self.gcn.X_dim = 32
17 | self.gcn.similarity_function = 'embedded_gaussian'
18 | self.gcn.layerwise_graph = False
19 | self.gcn.skip_connection = True
20 |
21 | self.model_predictive_rl = Config()
22 | self.model_predictive_rl.linear_state_predictor = False
23 | self.model_predictive_rl.planning_depth = 1
24 | self.model_predictive_rl.planning_width = 1
25 | self.model_predictive_rl.do_action_clip = False
26 | self.model_predictive_rl.motion_predictor_dims = [64, 5]
27 | self.model_predictive_rl.value_network_dims = [32, 100, 100, 1]
28 | self.model_predictive_rl.share_graph_model = False
29 |
30 |
31 | class TrainConfig(BaseTrainConfig):
32 | def __init__(self, debug=False):
33 | super(TrainConfig, self).__init__(debug)
34 |
35 | self.train.freeze_state_predictor = False
36 | self.train.detach_state_predictor = False
37 | self.train.reduce_sp_update_frequency = False
38 |
--------------------------------------------------------------------------------
/crowd_nav/configs/icra_benchmark/mp_separate_dp.py:
--------------------------------------------------------------------------------
1 | from crowd_nav.configs.icra_benchmark.config import BaseEnvConfig, BasePolicyConfig, BaseTrainConfig, Config
2 |
3 |
4 | class EnvConfig(BaseEnvConfig):
5 | def __init__(self, debug=False):
6 | super(EnvConfig, self).__init__(debug)
7 |
8 |
9 | class PolicyConfig(BasePolicyConfig):
10 | def __init__(self, debug=False):
11 | super(PolicyConfig, self).__init__(debug)
12 | self.name = 'model_predictive_rl'
13 |
14 | # gcn
15 | self.gcn.num_layer = 2
16 | self.gcn.X_dim = 32
17 | self.gcn.similarity_function = 'embedded_gaussian'
18 | self.gcn.layerwise_graph = False
19 | self.gcn.skip_connection = True
20 |
21 | self.model_predictive_rl = Config()
22 | self.model_predictive_rl.linear_state_predictor = False
23 | self.model_predictive_rl.planning_depth = 2
24 | self.model_predictive_rl.planning_width = 2
25 | self.model_predictive_rl.do_action_clip = True
26 | self.model_predictive_rl.motion_predictor_dims = [64, 5]
27 | self.model_predictive_rl.value_network_dims = [32, 100, 100, 1]
28 | self.model_predictive_rl.share_graph_model = False
29 |
30 |
31 | class TrainConfig(BaseTrainConfig):
32 | def __init__(self, debug=False):
33 | super(TrainConfig, self).__init__(debug)
34 |
35 | self.train.freeze_state_predictor = False
36 | self.train.detach_state_predictor = False
37 | self.train.reduce_sp_update_frequency = False
38 |
--------------------------------------------------------------------------------
/crowd_nav/configs/icra_benchmark/rgl.py:
--------------------------------------------------------------------------------
1 | from crowd_nav.configs.icra_benchmark.config import BaseEnvConfig, BasePolicyConfig, BaseTrainConfig, Config
2 |
3 |
4 | class EnvConfig(BaseEnvConfig):
5 | def __init__(self, debug=False):
6 | super(EnvConfig, self).__init__(debug)
7 |
8 |
9 | class PolicyConfig(BasePolicyConfig):
10 | def __init__(self, debug=False):
11 | super(PolicyConfig, self).__init__(debug)
12 | self.name = 'gcn'
13 |
14 | # gcn
15 | self.gcn.num_layer = 2
16 | self.gcn.X_dim = 32
17 | self.gcn.similarity_function = 'embedded_gaussian'
18 | self.gcn.layerwise_graph = False
19 | self.gcn.skip_connection = True
20 |
21 |
22 | class TrainConfig(BaseTrainConfig):
23 | def __init__(self, debug=False):
24 | super(TrainConfig, self).__init__(debug)
25 |
--------------------------------------------------------------------------------
/crowd_nav/configs/icra_benchmark/sarl.py:
--------------------------------------------------------------------------------
1 | from crowd_nav.configs.icra_benchmark.config import BaseEnvConfig, BasePolicyConfig, BaseTrainConfig, Config
2 |
3 |
4 | class EnvConfig(BaseEnvConfig):
5 | def __init__(self, debug=False):
6 | super(EnvConfig, self).__init__(debug)
7 |
8 |
9 | class PolicyConfig(BasePolicyConfig):
10 | def __init__(self, debug=False):
11 | super(PolicyConfig, self).__init__(debug)
12 | self.name = 'sarl'
13 |
14 |
15 | class TrainConfig(BaseTrainConfig):
16 | def __init__(self, debug=False):
17 | super(TrainConfig, self).__init__(debug)
18 |
--------------------------------------------------------------------------------
/crowd_nav/policy/cadrl.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import numpy as np
4 | import itertools
5 | import logging
6 | from crowd_sim.envs.policy.policy import Policy
7 | from crowd_sim.envs.utils.action import ActionRot, ActionXY
8 | from crowd_sim.envs.utils.state import ObservableState, FullState
9 |
10 |
11 | def mlp(input_dim, mlp_dims, last_relu=False):
12 | layers = []
13 | mlp_dims = [input_dim] + mlp_dims
14 | for i in range(len(mlp_dims) - 1):
15 | layers.append(nn.Linear(mlp_dims[i], mlp_dims[i + 1]))
16 | if i != len(mlp_dims) - 2 or last_relu:
17 | layers.append(nn.ReLU())
18 | net = nn.Sequential(*layers)
19 | return net
20 |
21 |
22 | class ValueNetwork(nn.Module):
23 | def __init__(self, input_dim, mlp_dims):
24 | super().__init__()
25 | self.value_network = mlp(input_dim, mlp_dims)
26 |
27 | def forward(self, state_input):
28 | # input size: (batch_size, # of humans, state_length)
29 | if isinstance(state_input, tuple):
30 | state = state_input[0]
31 | else:
32 | state = state_input
33 |
34 | value = self.value_network(state.squeeze(dim=1))
35 | return value
36 |
37 |
38 | class CADRL(Policy):
39 | def __init__(self):
40 | super().__init__()
41 | self.name = 'CADRL'
42 | self.trainable = True
43 | self.multiagent_training = None
44 | self.kinematics = None
45 | self.epsilon = None
46 | self.gamma = None
47 | self.sampling = None
48 | self.speed_samples = None
49 | self.rotation_samples = None
50 | self.query_env = None
51 | self.action_space = None
52 | self.rotation_constraint = None
53 | self.speeds = None
54 | self.rotations = None
55 | self.action_values = None
56 | self.with_om = None
57 | self.cell_num = None
58 | self.cell_size = None
59 | self.om_channel_size = None
60 | self.self_state_dim = 6
61 | self.human_state_dim = 7
62 | self.joint_state_dim = self.self_state_dim + self.human_state_dim
63 |
64 | def configure(self, config):
65 | self.set_common_parameters(config)
66 | self.model = ValueNetwork(self.joint_state_dim, config.cadrl.mlp_dims)
67 | self.multiagent_training = config.cadrl.multiagent_training
68 | logging.info('Policy: CADRL without occupancy map')
69 |
70 | def set_common_parameters(self, config):
71 | self.gamma = config.rl.gamma
72 | self.kinematics = config.action_space.kinematics
73 | self.sampling = config.action_space.sampling
74 | self.speed_samples = config.action_space.speed_samples
75 | self.rotation_samples = config.action_space.rotation_samples
76 | self.query_env = config.action_space.query_env
77 | self.rotation_constraint = config.action_space.rotation_constraint
78 | self.cell_num = config.om.cell_num
79 | self.cell_size = config.om.cell_size
80 | self.om_channel_size = config.om.om_channel_size
81 |
82 | logging.info('Query environment: {}'.format(self.query_env))
83 |
84 | def set_device(self, device):
85 | self.device = device
86 | self.model.to(device)
87 |
88 | def set_epsilon(self, epsilon):
89 | self.epsilon = epsilon
90 |
91 | def build_action_space(self, v_pref):
92 | """
93 | Action space consists of 25 uniformly sampled actions in permitted range and 25 randomly sampled actions.
94 | """
95 | holonomic = True if self.kinematics == 'holonomic' else False
96 | speeds = [(np.exp((i + 1) / self.speed_samples) - 1) / (np.e - 1) * v_pref for i in range(self.speed_samples)]
97 | if holonomic:
98 | rotations = np.linspace(0, 2 * np.pi, self.rotation_samples, endpoint=False)
99 | else:
100 | rotations = np.linspace(-self.rotation_constraint, self.rotation_constraint, self.rotation_samples)
101 |
102 | action_space = [ActionXY(0, 0) if holonomic else ActionRot(0, 0)]
103 | for rotation, speed in itertools.product(rotations, speeds):
104 | if holonomic:
105 | action_space.append(ActionXY(speed * np.cos(rotation), speed * np.sin(rotation)))
106 | else:
107 | action_space.append(ActionRot(speed, rotation))
108 |
109 | self.speeds = speeds
110 | self.rotations = rotations
111 | self.action_space = action_space
112 |
113 | def propagate(self, state, action):
114 | if isinstance(state, ObservableState):
115 | # propagate state of humans
116 | next_px = state.px + action.vx * self.time_step
117 | next_py = state.py + action.vy * self.time_step
118 | next_state = ObservableState(next_px, next_py, action.vx, action.vy, state.radius)
119 | elif isinstance(state, FullState):
120 | # propagate state of current agent
121 | # perform action without rotation
122 | if self.kinematics == 'holonomic':
123 | next_px = state.px + action.vx * self.time_step
124 | next_py = state.py + action.vy * self.time_step
125 | next_state = FullState(next_px, next_py, action.vx, action.vy, state.radius,
126 | state.gx, state.gy, state.v_pref, state.theta)
127 | else:
128 | next_theta = state.theta + action.r
129 | next_vx = action.v * np.cos(next_theta)
130 | next_vy = action.v * np.sin(next_theta)
131 | next_px = state.px + next_vx * self.time_step
132 | next_py = state.py + next_vy * self.time_step
133 | next_state = FullState(next_px, next_py, next_vx, next_vy, state.radius, state.gx, state.gy,
134 | state.v_pref, next_theta)
135 | else:
136 | raise ValueError('Type error')
137 |
138 | return next_state
139 |
140 | def predict(self, state):
141 | """
142 | Input state is the joint state of robot concatenated by the observable state of other agents
143 |
144 | To predict the best action, agent samples actions and propagates one step to see how good the next state is
145 | thus the reward function is needed
146 |
147 | """
148 | if self.phase is None or self.device is None:
149 | raise AttributeError('Phase, device attributes have to be set!')
150 | if self.phase == 'train' and self.epsilon is None:
151 | raise AttributeError('Epsilon attribute has to be set in training phase')
152 |
153 | if self.reach_destination(state):
154 | return ActionXY(0, 0) if self.kinematics == 'holonomic' else ActionRot(0, 0)
155 | if self.action_space is None:
156 | self.build_action_space(state.self_state.v_pref)
157 | if not state.human_states:
158 | assert self.phase != 'train'
159 | return self.select_greedy_action(state.self_state)
160 |
161 | probability = np.random.random()
162 | if self.phase == 'train' and probability < self.epsilon:
163 | max_action = self.action_space[np.random.choice(len(self.action_space))]
164 | else:
165 | self.action_values = list()
166 | max_min_value = float('-inf')
167 | max_action = None
168 | for action in self.action_space:
169 | next_self_state = self.propagate(state.self_state, action)
170 | if self.query_env:
171 | next_human_states, reward, done, info = self.env.onestep_lookahead(action)
172 | else:
173 | next_human_states = [self.propagate(human_state, ActionXY(human_state.vx, human_state.vy))
174 | for human_state in state.human_states]
175 | reward = self.compute_reward(next_self_state, next_human_states)
176 | batch_next_states = torch.cat([torch.Tensor([next_self_state + next_human_state]).to(self.device)
177 | for next_human_state in next_human_states], dim=0)
178 |
179 | # VALUE UPDATE
180 | outputs = self.model(self.rotate(batch_next_states))
181 | min_output, min_index = torch.min(outputs, 0)
182 | min_value = reward + pow(self.gamma, self.time_step * state.self_state.v_pref) * min_output.data.item()
183 | self.action_values.append(min_value)
184 | if min_value > max_min_value:
185 | max_min_value = min_value
186 | max_action = action
187 |
188 | if self.phase == 'train':
189 | self.last_state = self.transform(state)
190 |
191 | return max_action
192 |
193 | def select_greedy_action(self, self_state):
194 | # find the greedy action given kinematic constraints and return the closest action in the action space
195 | direction = np.arctan2(self_state.gy - self_state.py, self_state.gx - self_state.px)
196 | distance = np.linalg.norm((self_state.gy - self_state.py, self_state.gx - self_state.px))
197 | if self.kinematics == 'holonomic':
198 | speed = min(distance / self.time_step, self_state.v_pref)
199 | vx = np.cos(direction) * speed
200 | vy = np.sin(direction) * speed
201 |
202 | min_diff = float('inf')
203 | closest_action = None
204 | for action in self.action_space:
205 | diff = np.linalg.norm(np.array(action) - np.array((vx, vy)))
206 | if diff < min_diff:
207 | min_diff = diff
208 | closest_action = action
209 | else:
210 | rotation = direction - self_state.theta
211 | # if goal is not in the field of view, always rotate first
212 | if rotation < self.rotations[0]:
213 | closest_action = ActionRot(self.speeds[0], self.rotations[0])
214 | elif rotation > self.rotations[-1]:
215 | closest_action = ActionRot(self.speeds[0], self.rotations[-1])
216 | else:
217 | speed = min(distance / self.time_step, self_state.v_pref)
218 |
219 | min_diff = float('inf')
220 | closest_action = None
221 | for action in self.action_space:
222 | diff = np.linalg.norm(np.array((np.cos(action.r) * action.v, np.sin(action.r) * action.v)) -
223 | np.array((np.cos(rotation) * speed), np.sin(rotation) * action.v))
224 | if diff < min_diff:
225 | min_diff = diff
226 | closest_action = action
227 |
228 | return closest_action
229 |
230 | def transform(self, state):
231 | """
232 | Take the state passed from agent and transform it to tensor for batch training
233 |
234 | :param state:
235 | :return: tensor of shape (len(state), )
236 | """
237 | state = torch.Tensor([state.self_state + state.human_states[0]]).to(self.device)
238 | state = self.rotate(state)
239 | return state
240 |
241 | def rotate(self, state):
242 | """
243 | Transform the coordinate to agent-centric.
244 | Input state tensor is of size (batch_size, state_length)
245 |
246 | """
247 | # 'px', 'py', 'vx', 'vy', 'radius', 'gx', 'gy', 'v_pref', 'theta', 'px1', 'py1', 'vx1', 'vy1', 'radius1'
248 | # 0 1 2 3 4 5 6 7 8 9 10 11 12 13
249 | batch = state.shape[0]
250 | dx = (state[:, 5] - state[:, 0]).reshape((batch, -1))
251 | dy = (state[:, 6] - state[:, 1]).reshape((batch, -1))
252 | rot = torch.atan2(state[:, 6] - state[:, 1], state[:, 5] - state[:, 0])
253 |
254 | dg = torch.norm(torch.cat([dx, dy], dim=1), 2, dim=1, keepdim=True)
255 | v_pref = state[:, 7].reshape((batch, -1))
256 | vx = (state[:, 2] * torch.cos(rot) + state[:, 3] * torch.sin(rot)).reshape((batch, -1))
257 | vy = (state[:, 3] * torch.cos(rot) - state[:, 2] * torch.sin(rot)).reshape((batch, -1))
258 |
259 | radius = state[:, 4].reshape((batch, -1))
260 | if self.kinematics == 'unicycle':
261 | theta = (state[:, 8] - rot).reshape((batch, -1))
262 | else:
263 | theta = torch.zeros_like(v_pref)
264 |
265 | vx1 = (state[:, 11] * torch.cos(rot) + state[:, 12] * torch.sin(rot)).reshape((batch, -1))
266 | vy1 = (state[:, 12] * torch.cos(rot) - state[:, 11] * torch.sin(rot)).reshape((batch, -1))
267 | px1 = (state[:, 9] - state[:, 0]) * torch.cos(rot) + (state[:, 10] - state[:, 1]) * torch.sin(rot)
268 | px1 = px1.reshape((batch, -1))
269 | py1 = (state[:, 10] - state[:, 1]) * torch.cos(rot) - (state[:, 9] - state[:, 0]) * torch.sin(rot)
270 | py1 = py1.reshape((batch, -1))
271 | radius1 = state[:, 13].reshape((batch, -1))
272 | radius_sum = radius + radius1
273 | da = torch.norm(torch.cat([(state[:, 0] - state[:, 9]).reshape((batch, -1)), (state[:, 1] - state[:, 10]).
274 | reshape((batch, -1))], dim=1), 2, dim=1, keepdim=True)
275 | new_state = torch.cat([dg, v_pref, theta, radius, vx, vy, px1, py1, vx1, vy1, radius1, da, radius_sum], dim=1)
276 | return new_state
--------------------------------------------------------------------------------
/crowd_nav/policy/gcn.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import itertools
3 | import torch
4 | import torch.nn as nn
5 | from torch.nn.functional import softmax, relu
6 | from torch.nn import Parameter
7 | from crowd_nav.policy.cadrl import mlp
8 | from crowd_nav.policy.multi_human_rl import MultiHumanRL
9 |
10 |
11 | class ValueNetwork(nn.Module):
12 | def __init__(self, input_dim, self_state_dim, num_layer, X_dim, wr_dims, wh_dims, final_state_dim,
13 | gcn2_w1_dim, planning_dims, similarity_function, layerwise_graph, skip_connection):
14 | super().__init__()
15 | # design choice
16 |
17 | # 'gaussian', 'embedded_gaussian', 'cosine', 'cosine_softmax', 'concatenation'
18 | self.similarity_function = similarity_function
19 | logging.info('self.similarity_func: {}'.format(self.similarity_function))
20 | human_state_dim = input_dim - self_state_dim
21 | self.self_state_dim = self_state_dim
22 | self.human_state_dim = human_state_dim
23 | self.num_layer = num_layer
24 | self.X_dim = X_dim
25 | self.layerwise_graph = layerwise_graph
26 | self.skip_connection = skip_connection
27 |
28 | self.w_r = mlp(self_state_dim, wr_dims, last_relu=True)
29 | self.w_h = mlp(human_state_dim, wh_dims, last_relu=True)
30 |
31 | if self.similarity_function == 'embedded_gaussian':
32 | self.w_a = Parameter(torch.randn(self.X_dim, self.X_dim))
33 | elif self.similarity_function == 'concatenation':
34 | self.w_a = mlp(2 * X_dim, [2 * X_dim, 1], last_relu=True)
35 |
36 | if num_layer == 1:
37 | self.w1 = Parameter(torch.randn(self.X_dim, final_state_dim))
38 | elif num_layer == 2:
39 | self.w1 = Parameter(torch.randn(self.X_dim, gcn2_w1_dim))
40 | self.w2 = Parameter(torch.randn(gcn2_w1_dim, final_state_dim))
41 | else:
42 | raise NotImplementedError
43 |
44 | self.value_net = mlp(final_state_dim, planning_dims)
45 |
46 | # for visualization
47 | self.A = None
48 |
49 | def compute_similarity_matrix(self, X):
50 | if self.similarity_function == 'embedded_gaussian':
51 | A = torch.matmul(torch.matmul(X, self.w_a), X.permute(0, 2, 1))
52 | normalized_A = softmax(A, dim=2)
53 | elif self.similarity_function == 'gaussian':
54 | A = torch.matmul(X, X.permute(0, 2, 1))
55 | normalized_A = softmax(A, dim=2)
56 | elif self.similarity_function == 'cosine':
57 | A = torch.matmul(X, X.permute(0, 2, 1))
58 | magnitudes = torch.norm(A, dim=2, keepdim=True)
59 | norm_matrix = torch.matmul(magnitudes, magnitudes.permute(0, 2, 1))
60 | normalized_A = torch.div(A, norm_matrix)
61 | elif self.similarity_function == 'cosine_softmax':
62 | A = torch.matmul(X, X.permute(0, 2, 1))
63 | magnitudes = torch.norm(A, dim=2, keepdim=True)
64 | norm_matrix = torch.matmul(magnitudes, magnitudes.permute(0, 2, 1))
65 | normalized_A = softmax(torch.div(A, norm_matrix), dim=2)
66 | elif self.similarity_function == 'concatenation':
67 | indices = [pair for pair in itertools.product(list(range(X.size(1))), repeat=2)]
68 | selected_features = torch.index_select(X, dim=1, index=torch.LongTensor(indices).reshape(-1))
69 | pairwise_features = selected_features.reshape((-1, X.size(1) * X.size(1), X.size(2) * 2))
70 | A = self.w_a(pairwise_features).reshape(-1, X.size(1), X.size(1))
71 | normalized_A = A
72 | elif self.similarity_function == 'squared':
73 | A = torch.matmul(X, X.permute(0, 2, 1))
74 | squared_A = A * A
75 | normalized_A = squared_A / torch.sum(squared_A, dim=2, keepdim=True)
76 | elif self.similarity_function == 'equal_attention':
77 | normalized_A = (torch.ones(X.size(1), X.size(1)) / X.size(1)).expand(X.size(0), X.size(1), X.size(1))
78 | elif self.similarity_function == 'diagonal':
79 | normalized_A = (torch.eye(X.size(1), X.size(1))).expand(X.size(0), X.size(1), X.size(1))
80 | else:
81 | raise NotImplementedError
82 |
83 | return normalized_A
84 |
85 | def forward(self, state_input):
86 | if isinstance(state_input, tuple):
87 | state, lengths = state_input
88 | else:
89 | state = state_input
90 | # lengths = torch.IntTensor([state.size()[1]])
91 |
92 | self_state = state[:, 0, :self.self_state_dim]
93 | human_states = state[:, :, self.self_state_dim:]
94 |
95 | # compute feature matrix X
96 | self_state_embedings = self.w_r(self_state)
97 | human_state_embedings = self.w_h(human_states)
98 | X = torch.cat([self_state_embedings.unsqueeze(1), human_state_embedings], dim=1)
99 |
100 | # compute matrix A
101 | normalized_A = self.compute_similarity_matrix(X)
102 | self.A = normalized_A[0, :, :].data.cpu().numpy()
103 |
104 | # graph convolution
105 | if self.num_layer == 0:
106 | feat = X[:, 0, :]
107 | elif self.num_layer == 1:
108 | h1 = relu(torch.matmul(torch.matmul(normalized_A, X), self.w1))
109 | feat = h1[:, 0, :]
110 | else:
111 | # compute h1 and h2
112 | if not self.skip_connection:
113 | h1 = relu(torch.matmul(torch.matmul(normalized_A, X), self.w1))
114 | else:
115 | h1 = relu(torch.matmul(torch.matmul(normalized_A, X), self.w1)) + X
116 | if self.layerwise_graph:
117 | normalized_A2 = self.compute_similarity_matrix(h1)
118 | else:
119 | normalized_A2 = normalized_A
120 | if not self.skip_connection:
121 | h2 = relu(torch.matmul(torch.matmul(normalized_A2, h1), self.w2))
122 | else:
123 | h2 = relu(torch.matmul(torch.matmul(normalized_A2, h1), self.w2)) + h1
124 | feat = h2[:, 0, :]
125 |
126 | # do planning using only the final layer feature of the agent
127 | value = self.value_net(feat)
128 | return value
129 |
130 |
131 | class GCN(MultiHumanRL):
132 | def __init__(self):
133 | super().__init__()
134 | self.name = 'GCN'
135 |
136 | def configure(self, config):
137 | self.multiagent_training = config.gcn.multiagent_training
138 | num_layer = config.gcn.num_layer
139 | X_dim = config.gcn.X_dim
140 | wr_dims = config.gcn.wr_dims
141 | wh_dims = config.gcn.wh_dims
142 | final_state_dim = config.gcn.final_state_dim
143 | gcn2_w1_dim = config.gcn.gcn2_w1_dim
144 | planning_dims = config.gcn.planning_dims
145 | similarity_function = config.gcn.similarity_function
146 | layerwise_graph = config.gcn.layerwise_graph
147 | skip_connection = config.gcn.skip_connection
148 | self.set_common_parameters(config)
149 | self.model = ValueNetwork(self.input_dim(), self.self_state_dim, num_layer, X_dim, wr_dims, wh_dims,
150 | final_state_dim, gcn2_w1_dim, planning_dims, similarity_function, layerwise_graph,
151 | skip_connection)
152 | logging.info('self.model:{}'.format(self.model))
153 | logging.info('GCN layers: {}'.format(num_layer))
154 | logging.info('Policy: {}'.format(self.name))
155 |
156 | def get_matrix_A(self):
157 | return self.model.A
158 |
--------------------------------------------------------------------------------
/crowd_nav/policy/graph_model.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import itertools
3 | import torch
4 | import torch.nn as nn
5 | from torch.nn.functional import softmax, relu
6 | from torch.nn import Parameter
7 | from crowd_nav.policy.helpers import mlp
8 |
9 |
10 | class RGL(nn.Module):
11 | def __init__(self, config, robot_state_dim, human_state_dim):
12 | """ The current code might not be compatible with models trained with previous version
13 | """
14 | super().__init__()
15 | self.multiagent_training = config.gcn.multiagent_training
16 | num_layer = config.gcn.num_layer
17 | X_dim = config.gcn.X_dim
18 | wr_dims = config.gcn.wr_dims
19 | wh_dims = config.gcn.wh_dims
20 | final_state_dim = config.gcn.final_state_dim
21 | similarity_function = config.gcn.similarity_function
22 | layerwise_graph = config.gcn.layerwise_graph
23 | skip_connection = config.gcn.skip_connection
24 |
25 | # design choice
26 |
27 | # 'gaussian', 'embedded_gaussian', 'cosine', 'cosine_softmax', 'concatenation'
28 | self.similarity_function = similarity_function
29 | self.robot_state_dim = robot_state_dim
30 | self.human_state_dim = human_state_dim
31 | self.num_layer = num_layer
32 | self.X_dim = X_dim
33 | self.layerwise_graph = layerwise_graph
34 | self.skip_connection = skip_connection
35 |
36 | logging.info('Similarity_func: {}'.format(self.similarity_function))
37 | logging.info('Layerwise_graph: {}'.format(self.layerwise_graph))
38 | logging.info('Skip_connection: {}'.format(self.skip_connection))
39 | logging.info('Number of layers: {}'.format(self.num_layer))
40 |
41 | self.w_r = mlp(robot_state_dim, wr_dims, last_relu=True)
42 | self.w_h = mlp(human_state_dim, wh_dims, last_relu=True)
43 |
44 | if self.similarity_function == 'embedded_gaussian':
45 | self.w_a = Parameter(torch.randn(self.X_dim, self.X_dim))
46 | elif self.similarity_function == 'concatenation':
47 | self.w_a = mlp(2 * X_dim, [2 * X_dim, 1], last_relu=True)
48 |
49 | # TODO: try other dim size
50 | embedding_dim = self.X_dim
51 | self.Ws = torch.nn.ParameterList()
52 | for i in range(self.num_layer):
53 | if i == 0:
54 | self.Ws.append(Parameter(torch.randn(self.X_dim, embedding_dim)))
55 | elif i == self.num_layer - 1:
56 | self.Ws.append(Parameter(torch.randn(embedding_dim, final_state_dim)))
57 | else:
58 | self.Ws.append(Parameter(torch.randn(embedding_dim, embedding_dim)))
59 |
60 | # for visualization
61 | self.A = None
62 |
63 | def compute_similarity_matrix(self, X):
64 | if self.similarity_function == 'embedded_gaussian':
65 | A = torch.matmul(torch.matmul(X, self.w_a), X.permute(0, 2, 1))
66 | normalized_A = softmax(A, dim=2)
67 | elif self.similarity_function == 'gaussian':
68 | A = torch.matmul(X, X.permute(0, 2, 1))
69 | normalized_A = softmax(A, dim=2)
70 | elif self.similarity_function == 'cosine':
71 | A = torch.matmul(X, X.permute(0, 2, 1))
72 | magnitudes = torch.norm(A, dim=2, keepdim=True)
73 | norm_matrix = torch.matmul(magnitudes, magnitudes.permute(0, 2, 1))
74 | normalized_A = torch.div(A, norm_matrix)
75 | elif self.similarity_function == 'cosine_softmax':
76 | A = torch.matmul(X, X.permute(0, 2, 1))
77 | magnitudes = torch.norm(A, dim=2, keepdim=True)
78 | norm_matrix = torch.matmul(magnitudes, magnitudes.permute(0, 2, 1))
79 | normalized_A = softmax(torch.div(A, norm_matrix), dim=2)
80 | elif self.similarity_function == 'concatenation':
81 | indices = [pair for pair in itertools.product(list(range(X.size(1))), repeat=2)]
82 | selected_features = torch.index_select(X, dim=1, index=torch.LongTensor(indices).reshape(-1))
83 | pairwise_features = selected_features.reshape((-1, X.size(1) * X.size(1), X.size(2) * 2))
84 | A = self.w_a(pairwise_features).reshape(-1, X.size(1), X.size(1))
85 | normalized_A = A
86 | elif self.similarity_function == 'squared':
87 | A = torch.matmul(X, X.permute(0, 2, 1))
88 | squared_A = A * A
89 | normalized_A = squared_A / torch.sum(squared_A, dim=2, keepdim=True)
90 | elif self.similarity_function == 'equal_attention':
91 | normalized_A = (torch.ones(X.size(1), X.size(1)) / X.size(1)).expand(X.size(0), X.size(1), X.size(1))
92 | elif self.similarity_function == 'diagonal':
93 | normalized_A = (torch.eye(X.size(1), X.size(1))).expand(X.size(0), X.size(1), X.size(1))
94 | else:
95 | raise NotImplementedError
96 |
97 | return normalized_A
98 |
99 | def forward(self, state):
100 | """
101 | Embed current state tensor pair (robot_state, human_states) into a latent space
102 | Each tensor is of shape (batch_size, # of agent, features)
103 | :param state:
104 | :return:
105 | """
106 | robot_state, human_states = state
107 |
108 | # compute feature matrix X
109 | robot_state_embedings = self.w_r(robot_state)
110 | human_state_embedings = self.w_h(human_states)
111 | X = torch.cat([robot_state_embedings, human_state_embedings], dim=1)
112 |
113 | # compute matrix A
114 | if not self.layerwise_graph:
115 | normalized_A = self.compute_similarity_matrix(X)
116 | self.A = normalized_A[0, :, :].data.cpu().numpy()
117 |
118 | next_H = H = X
119 | for i in range(self.num_layer):
120 | if self.layerwise_graph:
121 | A = self.compute_similarity_matrix(H)
122 | next_H = relu(torch.matmul(torch.matmul(A, H), self.Ws[i]))
123 | else:
124 | next_H = relu(torch.matmul(torch.matmul(normalized_A, H), self.Ws[i]))
125 |
126 | if self.skip_connection:
127 | next_H += H
128 | H = next_H
129 |
130 | return next_H
131 |
132 |
133 | # class RGL(nn.Module):
134 | # def __init__(self, config, robot_state_dim, human_state_dim):
135 | # super().__init__()
136 | # self.multiagent_training = config.gcn.multiagent_training
137 | # num_layer = config.gcn.num_layer
138 | # X_dim = config.gcn.X_dim
139 | # wr_dims = config.gcn.wr_dims
140 | # wh_dims = config.gcn.wh_dims
141 | # final_state_dim = config.gcn.final_state_dim
142 | # gcn2_w1_dim = config.gcn.gcn2_w1_dim
143 | # similarity_function = config.gcn.similarity_function
144 | # layerwise_graph = config.gcn.layerwise_graph
145 | # skip_connection = config.gcn.skip_connection
146 | #
147 | # # design choice
148 | #
149 | # # 'gaussian', 'embedded_gaussian', 'cosine', 'cosine_softmax', 'concatenation'
150 | # self.similarity_function = similarity_function
151 | # logging.info('self.similarity_func: {}'.format(self.similarity_function))
152 | # self.robot_state_dim = robot_state_dim
153 | # self.human_state_dim = human_state_dim
154 | # self.num_layer = num_layer
155 | # self.X_dim = X_dim
156 | # self.layerwise_graph = layerwise_graph
157 | # self.skip_connection = skip_connection
158 | #
159 | # self.w_r = mlp(robot_state_dim, wr_dims, last_relu=True)
160 | # self.w_h = mlp(human_state_dim, wh_dims, last_relu=True)
161 | #
162 | # if self.similarity_function == 'embedded_gaussian':
163 | # self.w_a = Parameter(torch.randn(self.X_dim, self.X_dim))
164 | # elif self.similarity_function == 'concatenation':
165 | # self.w_a = mlp(2 * X_dim, [2 * X_dim, 1], last_relu=True)
166 | #
167 | # if num_layer == 1:
168 | # self.w1 = Parameter(torch.randn(self.X_dim, final_state_dim))
169 | # elif num_layer == 2:
170 | # self.w1 = Parameter(torch.randn(self.X_dim, gcn2_w1_dim))
171 | # self.w2 = Parameter(torch.randn(gcn2_w1_dim, final_state_dim))
172 | # else:
173 | # raise NotImplementedError
174 | #
175 | # # for visualization
176 | # self.A = None
177 | #
178 | # def compute_similarity_matrix(self, X):
179 | # if self.similarity_function == 'embedded_gaussian':
180 | # A = torch.matmul(torch.matmul(X, self.w_a), X.permute(0, 2, 1))
181 | # normalized_A = softmax(A, dim=2)
182 | # elif self.similarity_function == 'gaussian':
183 | # A = torch.matmul(X, X.permute(0, 2, 1))
184 | # normalized_A = softmax(A, dim=2)
185 | # elif self.similarity_function == 'cosine':
186 | # A = torch.matmul(X, X.permute(0, 2, 1))
187 | # magnitudes = torch.norm(A, dim=2, keepdim=True)
188 | # norm_matrix = torch.matmul(magnitudes, magnitudes.permute(0, 2, 1))
189 | # normalized_A = torch.div(A, norm_matrix)
190 | # elif self.similarity_function == 'cosine_softmax':
191 | # A = torch.matmul(X, X.permute(0, 2, 1))
192 | # magnitudes = torch.norm(A, dim=2, keepdim=True)
193 | # norm_matrix = torch.matmul(magnitudes, magnitudes.permute(0, 2, 1))
194 | # normalized_A = softmax(torch.div(A, norm_matrix), dim=2)
195 | # elif self.similarity_function == 'concatenation':
196 | # indices = [pair for pair in itertools.product(list(range(X.size(1))), repeat=2)]
197 | # selected_features = torch.index_select(X, dim=1, index=torch.LongTensor(indices).reshape(-1))
198 | # pairwise_features = selected_features.reshape((-1, X.size(1) * X.size(1), X.size(2) * 2))
199 | # A = self.w_a(pairwise_features).reshape(-1, X.size(1), X.size(1))
200 | # normalized_A = A
201 | # elif self.similarity_function == 'squared':
202 | # A = torch.matmul(X, X.permute(0, 2, 1))
203 | # squared_A = A * A
204 | # normalized_A = squared_A / torch.sum(squared_A, dim=2, keepdim=True)
205 | # elif self.similarity_function == 'equal_attention':
206 | # normalized_A = (torch.ones(X.size(1), X.size(1)) / X.size(1)).expand(X.size(0), X.size(1), X.size(1))
207 | # elif self.similarity_function == 'diagonal':
208 | # normalized_A = (torch.eye(X.size(1), X.size(1))).expand(X.size(0), X.size(1), X.size(1))
209 | # else:
210 | # raise NotImplementedError
211 | #
212 | # return normalized_A
213 | #
214 | # def forward(self, state):
215 | # """
216 | # Embed current state tensor pair (robot_state, human_states) into a latent space
217 | # Each tensor is of shape (batch_size, # of agent, features)
218 | # :param state:
219 | # :return:
220 | # """
221 | # robot_state, human_states = state
222 | #
223 | # # compute feature matrix X
224 | # robot_state_embedings = self.w_r(robot_state)
225 | # human_state_embedings = self.w_h(human_states)
226 | # X = torch.cat([robot_state_embedings, human_state_embedings], dim=1)
227 | #
228 | # # compute matrix A
229 | # normalized_A = self.compute_similarity_matrix(X)
230 | # self.A = normalized_A[0, :, :].data.cpu().numpy()
231 | #
232 | # # graph convolution
233 | # if self.num_layer == 0:
234 | # state_embedding = X
235 | # elif self.num_layer == 1:
236 | # h1 = relu(torch.matmul(torch.matmul(normalized_A, X), self.w1))
237 | # state_embedding = h1
238 | # else:
239 | # # compute h1 and h2
240 | # if not self.skip_connection:
241 | # h1 = relu(torch.matmul(torch.matmul(normalized_A, X), self.w1))
242 | # else:
243 | # h1 = relu(torch.matmul(torch.matmul(normalized_A, X), self.w1)) + X
244 | # if self.layerwise_graph:
245 | # normalized_A2 = self.compute_similarity_matrix(h1)
246 | # else:
247 | # normalized_A2 = normalized_A
248 | # if not self.skip_connection:
249 | # h2 = relu(torch.matmul(torch.matmul(normalized_A2, h1), self.w2))
250 | # else:
251 | # h2 = relu(torch.matmul(torch.matmul(normalized_A2, h1), self.w2)) + h1
252 | # state_embedding = h2
253 | #
254 | # return state_embedding
255 |
--------------------------------------------------------------------------------
/crowd_nav/policy/helpers.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 |
5 | def mlp(input_dim, mlp_dims, last_relu=False):
6 | layers = []
7 | mlp_dims = [input_dim] + mlp_dims
8 | for i in range(len(mlp_dims) - 1):
9 | layers.append(nn.Linear(mlp_dims[i], mlp_dims[i + 1]))
10 | if i != len(mlp_dims) - 2 or last_relu:
11 | layers.append(nn.ReLU())
12 | net = nn.Sequential(*layers)
13 | return net
14 |
--------------------------------------------------------------------------------
/crowd_nav/policy/lstm_rl.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import numpy as np
4 | import logging
5 | from crowd_nav.policy.cadrl import mlp
6 | from crowd_nav.policy.multi_human_rl import MultiHumanRL
7 |
8 |
9 | class ValueNetwork1(nn.Module):
10 | def __init__(self, input_dim, self_state_dim, mlp_dims, lstm_hidden_dim):
11 | super().__init__()
12 | self.self_state_dim = self_state_dim
13 | self.lstm_hidden_dim = lstm_hidden_dim
14 | self.mlp = mlp(self_state_dim + lstm_hidden_dim, mlp_dims)
15 | self.lstm = nn.LSTM(input_dim, lstm_hidden_dim, batch_first=True)
16 |
17 | def forward(self, state_input):
18 | """
19 | First transform the world coordinates to self-centric coordinates and then do forward computation
20 |
21 | """
22 | if isinstance(state_input, tuple):
23 | state, lengths = state_input
24 | else:
25 | state = state_input
26 | lengths = torch.IntTensor([state.size()[1]])
27 | self_state = state[:, 0, :self.self_state_dim]
28 | packed_sequence = torch.nn.utils.rnn.pack_padded_sequence(state, lengths, batch_first=True)
29 | _, (hn, cn) = self.lstm(packed_sequence)
30 | hn = hn.squeeze(0)
31 | joint_state = torch.cat([self_state, hn], dim=1)
32 | value = self.mlp(joint_state)
33 | return value
34 |
35 |
36 | class ValueNetwork2(nn.Module):
37 | def __init__(self, input_dim, self_state_dim, mlp1_dims, mlp_dims, lstm_hidden_dim):
38 | super().__init__()
39 | self.self_state_dim = self_state_dim
40 | self.lstm_hidden_dim = lstm_hidden_dim
41 | self.mlp1 = mlp(input_dim, mlp1_dims)
42 | self.lstm = nn.LSTM(mlp1_dims[-1], lstm_hidden_dim, batch_first=True)
43 | self.mlp = mlp(self_state_dim + lstm_hidden_dim, mlp_dims)
44 |
45 | def forward(self, state_input):
46 | """
47 | First transform the world coordinates to self-centric coordinates and then do forward computation
48 |
49 | :return:
50 | """
51 | if isinstance(state_input, tuple):
52 | state, lengths = state_input
53 | else:
54 | state = state_input
55 | lengths = torch.IntTensor([state.size()[1]])
56 |
57 | size = state.shape
58 | self_state = state[:, 0, :self.self_state_dim]
59 |
60 | state = torch.reshape(state, (-1, size[2]))
61 | mlp1_output = self.mlp1(state)
62 | mlp1_output = torch.reshape(mlp1_output, (size[0], size[1], -1))
63 | packed_mlp1_output = torch.nn.utils.rnn.pack_padded_sequence(mlp1_output, lengths, batch_first=True)
64 |
65 | output, (hn, cn) = self.lstm(packed_mlp1_output)
66 | hn = hn.squeeze(0)
67 | joint_state = torch.cat([self_state, hn], dim=1)
68 | value = self.mlp(joint_state)
69 | return value
70 |
71 |
72 | class LstmRL(MultiHumanRL):
73 | def __init__(self):
74 | super().__init__()
75 | self.name = 'LSTM-RL'
76 | self.with_interaction_module = None
77 |
78 | def configure(self, config):
79 | self.set_common_parameters(config)
80 | self.with_om = config.lstm_rl.with_om
81 | self.multiagent_training = config.lstm_rl.multiagent_training
82 |
83 | mlp_dims = config.lstm_rl.mlp2_dims
84 | global_state_dim = config.lstm_rl.global_state_dim
85 | with_interaction_module = config.lstm_rl.with_interaction_module
86 | if with_interaction_module:
87 | mlp1_dims = config.lstm_rl.mlp1_dims
88 | self.model = ValueNetwork2(self.input_dim(), self.self_state_dim, mlp1_dims, mlp_dims, global_state_dim)
89 | else:
90 | self.model = ValueNetwork1(self.input_dim(), self.self_state_dim, mlp_dims, global_state_dim)
91 | logging.info('Policy: {}LSTM-RL {} pairwise interaction module'.format(
92 | 'OM-' if self.with_om else '', 'w/' if with_interaction_module else 'w/o'))
93 |
94 | def predict(self, state):
95 | """
96 | Input state is the joint state of robot concatenated with the observable state of other agents
97 |
98 | To predict the best action, agent samples actions and propagates one step to see how good the next state is
99 | thus the reward function is needed
100 |
101 | """
102 |
103 | def dist(human):
104 | # sort human order by decreasing distance to the robot
105 | return np.linalg.norm(np.array(human.position) - np.array(state.self_state.position))
106 |
107 | state.human_states = sorted(state.human_states, key=dist, reverse=True)
108 | return super().predict(state)
109 |
110 |
--------------------------------------------------------------------------------
/crowd_nav/policy/model_predictive_rl.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import torch
3 | import numpy as np
4 | from numpy.linalg import norm
5 | import itertools
6 | from crowd_sim.envs.policy.policy import Policy
7 | from crowd_sim.envs.utils.action import ActionRot, ActionXY
8 | from crowd_sim.envs.utils.state import tensor_to_joint_state
9 | from crowd_sim.envs.utils.utils import point_to_segment_dist
10 | from crowd_nav.policy.state_predictor import StatePredictor, LinearStatePredictor
11 | from crowd_nav.policy.graph_model import RGL
12 | from crowd_nav.policy.value_estimator import ValueEstimator
13 |
14 |
15 | class ModelPredictiveRL(Policy):
16 | def __init__(self):
17 | super().__init__()
18 | self.name = 'ModelPredictiveRL'
19 | self.trainable = True
20 | self.multiagent_training = True
21 | self.kinematics = None
22 | self.epsilon = None
23 | self.gamma = None
24 | self.sampling = None
25 | self.speed_samples = None
26 | self.rotation_samples = None
27 | self.action_space = None
28 | self.rotation_constraint = None
29 | self.speeds = None
30 | self.rotations = None
31 | self.action_values = None
32 | self.robot_state_dim = 9
33 | self.human_state_dim = 5
34 | self.v_pref = 1
35 | self.share_graph_model = None
36 | self.value_estimator = None
37 | self.linear_state_predictor = None
38 | self.state_predictor = None
39 | self.planning_depth = None
40 | self.planning_width = None
41 | self.do_action_clip = None
42 | self.sparse_search = None
43 | self.sparse_speed_samples = 2
44 | self.sparse_rotation_samples = 8
45 | self.action_group_index = []
46 | self.traj = None
47 |
48 | def configure(self, config):
49 | self.set_common_parameters(config)
50 | self.planning_depth = config.model_predictive_rl.planning_depth
51 | self.do_action_clip = config.model_predictive_rl.do_action_clip
52 | if hasattr(config.model_predictive_rl, 'sparse_search'):
53 | self.sparse_search = config.model_predictive_rl.sparse_search
54 | self.planning_width = config.model_predictive_rl.planning_width
55 | self.share_graph_model = config.model_predictive_rl.share_graph_model
56 | self.linear_state_predictor = config.model_predictive_rl.linear_state_predictor
57 |
58 | if self.linear_state_predictor:
59 | self.state_predictor = LinearStatePredictor(config, self.time_step)
60 | graph_model = RGL(config, self.robot_state_dim, self.human_state_dim)
61 | self.value_estimator = ValueEstimator(config, graph_model)
62 | self.model = [graph_model, self.value_estimator.value_network]
63 | else:
64 | if self.share_graph_model:
65 | graph_model = RGL(config, self.robot_state_dim, self.human_state_dim)
66 | self.value_estimator = ValueEstimator(config, graph_model)
67 | self.state_predictor = StatePredictor(config, graph_model, self.time_step)
68 | self.model = [graph_model, self.value_estimator.value_network, self.state_predictor.human_motion_predictor]
69 | else:
70 | graph_model1 = RGL(config, self.robot_state_dim, self.human_state_dim)
71 | self.value_estimator = ValueEstimator(config, graph_model1)
72 | graph_model2 = RGL(config, self.robot_state_dim, self.human_state_dim)
73 | self.state_predictor = StatePredictor(config, graph_model2, self.time_step)
74 | self.model = [graph_model1, graph_model2, self.value_estimator.value_network,
75 | self.state_predictor.human_motion_predictor]
76 |
77 | logging.info('Planning depth: {}'.format(self.planning_depth))
78 | logging.info('Planning width: {}'.format(self.planning_width))
79 | logging.info('Sparse search: {}'.format(self.sparse_search))
80 |
81 | if self.planning_depth > 1 and not self.do_action_clip:
82 | logging.warning('Performing d-step planning without action space clipping!')
83 |
84 | def set_common_parameters(self, config):
85 | self.gamma = config.rl.gamma
86 | self.kinematics = config.action_space.kinematics
87 | self.sampling = config.action_space.sampling
88 | self.speed_samples = config.action_space.speed_samples
89 | self.rotation_samples = config.action_space.rotation_samples
90 | self.rotation_constraint = config.action_space.rotation_constraint
91 |
92 | def set_device(self, device):
93 | self.device = device
94 | for model in self.model:
95 | model.to(device)
96 |
97 | def set_epsilon(self, epsilon):
98 | self.epsilon = epsilon
99 |
100 | def set_time_step(self, time_step):
101 | self.time_step = time_step
102 | self.state_predictor.time_step = time_step
103 |
104 | def get_normalized_gamma(self):
105 | return pow(self.gamma, self.time_step * self.v_pref)
106 |
107 | def get_model(self):
108 | return self.value_estimator
109 |
110 | def get_state_dict(self):
111 | if self.state_predictor.trainable:
112 | if self.share_graph_model:
113 | return {
114 | 'graph_model': self.value_estimator.graph_model.state_dict(),
115 | 'value_network': self.value_estimator.value_network.state_dict(),
116 | 'motion_predictor': self.state_predictor.human_motion_predictor.state_dict()
117 | }
118 | else:
119 | return {
120 | 'graph_model1': self.value_estimator.graph_model.state_dict(),
121 | 'graph_model2': self.state_predictor.graph_model.state_dict(),
122 | 'value_network': self.value_estimator.value_network.state_dict(),
123 | 'motion_predictor': self.state_predictor.human_motion_predictor.state_dict()
124 | }
125 | else:
126 | return {
127 | 'graph_model': self.value_estimator.graph_model.state_dict(),
128 | 'value_network': self.value_estimator.value_network.state_dict()
129 | }
130 |
131 | def get_traj(self):
132 | return self.traj
133 |
134 | def load_state_dict(self, state_dict):
135 | if self.state_predictor.trainable:
136 | if self.share_graph_model:
137 | self.value_estimator.graph_model.load_state_dict(state_dict['graph_model'])
138 | else:
139 | self.value_estimator.graph_model.load_state_dict(state_dict['graph_model1'])
140 | self.state_predictor.graph_model.load_state_dict(state_dict['graph_model2'])
141 |
142 | self.value_estimator.value_network.load_state_dict(state_dict['value_network'])
143 | self.state_predictor.human_motion_predictor.load_state_dict(state_dict['motion_predictor'])
144 | else:
145 | self.value_estimator.graph_model.load_state_dict(state_dict['graph_model'])
146 | self.value_estimator.value_network.load_state_dict(state_dict['value_network'])
147 |
148 | def save_model(self, file):
149 | torch.save(self.get_state_dict(), file)
150 |
151 | def load_model(self, file):
152 | checkpoint = torch.load(file)
153 | self.load_state_dict(checkpoint)
154 |
155 | def build_action_space(self, v_pref):
156 | """
157 | Action space consists of 25 uniformly sampled actions in permitted range and 25 randomly sampled actions.
158 | """
159 | holonomic = True if self.kinematics == 'holonomic' else False
160 | speeds = [(np.exp((i + 1) / self.speed_samples) - 1) / (np.e - 1) * v_pref for i in range(self.speed_samples)]
161 | if holonomic:
162 | rotations = np.linspace(0, 2 * np.pi, self.rotation_samples, endpoint=False)
163 | else:
164 | rotations = np.linspace(-self.rotation_constraint, self.rotation_constraint, self.rotation_samples)
165 |
166 | action_space = [ActionXY(0, 0) if holonomic else ActionRot(0, 0)]
167 | for j, speed in enumerate(speeds):
168 | if j == 0:
169 | # index for action (0, 0)
170 | self.action_group_index.append(0)
171 | # only two groups in speeds
172 | if j < 3:
173 | speed_index = 0
174 | else:
175 | speed_index = 1
176 |
177 | for i, rotation in enumerate(rotations):
178 | rotation_index = i // 2
179 |
180 | action_index = speed_index * self.sparse_rotation_samples + rotation_index
181 | self.action_group_index.append(action_index)
182 |
183 | if holonomic:
184 | action_space.append(ActionXY(speed * np.cos(rotation), speed * np.sin(rotation)))
185 | else:
186 | action_space.append(ActionRot(speed, rotation))
187 |
188 | self.speeds = speeds
189 | self.rotations = rotations
190 | self.action_space = action_space
191 |
192 | def predict(self, state):
193 | """
194 | A base class for all methods that takes pairwise joint state as input to value network.
195 | The input to the value network is always of shape (batch_size, # humans, rotated joint state length)
196 |
197 | """
198 | if self.phase is None or self.device is None:
199 | raise AttributeError('Phase, device attributes have to be set!')
200 | if self.phase == 'train' and self.epsilon is None:
201 | raise AttributeError('Epsilon attribute has to be set in training phase')
202 |
203 | if self.reach_destination(state):
204 | return ActionXY(0, 0) if self.kinematics == 'holonomic' else ActionRot(0, 0)
205 | if self.action_space is None:
206 | self.build_action_space(state.robot_state.v_pref)
207 |
208 | probability = np.random.random()
209 | if self.phase == 'train' and probability < self.epsilon:
210 | max_action = self.action_space[np.random.choice(len(self.action_space))]
211 | else:
212 | max_action = None
213 | max_value = float('-inf')
214 | max_traj = None
215 |
216 | if self.do_action_clip:
217 | state_tensor = state.to_tensor(add_batch_size=True, device=self.device)
218 | action_space_clipped = self.action_clip(state_tensor, self.action_space, self.planning_width)
219 | else:
220 | action_space_clipped = self.action_space
221 |
222 | for action in action_space_clipped:
223 | state_tensor = state.to_tensor(add_batch_size=True, device=self.device)
224 | next_state = self.state_predictor(state_tensor, action)
225 | max_next_return, max_next_traj = self.V_planning(next_state, self.planning_depth, self.planning_width)
226 | reward_est = self.estimate_reward(state, action)
227 | value = reward_est + self.get_normalized_gamma() * max_next_return
228 | if value > max_value:
229 | max_value = value
230 | max_action = action
231 | max_traj = [(state_tensor, action, reward_est)] + max_next_traj
232 | if max_action is None:
233 | raise ValueError('Value network is not well trained.')
234 |
235 | if self.phase == 'train':
236 | self.last_state = self.transform(state)
237 | else:
238 | self.traj = max_traj
239 |
240 | return max_action
241 |
242 | def action_clip(self, state, action_space, width, depth=1):
243 | values = []
244 |
245 | for action in action_space:
246 | next_state_est = self.state_predictor(state, action)
247 | next_return, _ = self.V_planning(next_state_est, depth, width)
248 | reward_est = self.estimate_reward(state, action)
249 | value = reward_est + self.get_normalized_gamma() * next_return
250 | values.append(value)
251 |
252 | if self.sparse_search:
253 | # self.sparse_speed_samples = 2
254 | # search in a sparse grained action space
255 | added_groups = set()
256 | max_indices = np.argsort(np.array(values))[::-1]
257 | clipped_action_space = []
258 | for index in max_indices:
259 | if self.action_group_index[index] not in added_groups:
260 | clipped_action_space.append(action_space[index])
261 | added_groups.add(self.action_group_index[index])
262 | if len(clipped_action_space) == width:
263 | break
264 | else:
265 | max_indexes = np.argpartition(np.array(values), -width)[-width:]
266 | clipped_action_space = [action_space[i] for i in max_indexes]
267 |
268 | # print(clipped_action_space)
269 | return clipped_action_space
270 |
271 | def V_planning(self, state, depth, width):
272 | """ Plans n steps into future. Computes the value for the current state as well as the trajectories
273 | defined as a list of (state, action, reward) triples
274 |
275 | """
276 |
277 | current_state_value = self.value_estimator(state)
278 | if depth == 1:
279 | return current_state_value, [(state, None, None)]
280 |
281 | if self.do_action_clip:
282 | action_space_clipped = self.action_clip(state, self.action_space, width)
283 | else:
284 | action_space_clipped = self.action_space
285 |
286 | returns = []
287 | trajs = []
288 |
289 | for action in action_space_clipped:
290 | next_state_est = self.state_predictor(state, action)
291 | reward_est = self.estimate_reward(state, action)
292 | next_value, next_traj = self.V_planning(next_state_est, depth - 1, self.planning_width)
293 | return_value = current_state_value / depth + (depth - 1) / depth * (self.get_normalized_gamma() * next_value + reward_est)
294 |
295 | returns.append(return_value)
296 | trajs.append([(state, action, reward_est)] + next_traj)
297 |
298 | max_index = np.argmax(returns)
299 | max_return = returns[max_index]
300 | max_traj = trajs[max_index]
301 |
302 | return max_return, max_traj
303 |
304 | def estimate_reward(self, state, action):
305 | """ If the time step is small enough, it's okay to model agent as linear movement during this period
306 |
307 | """
308 | # collision detection
309 | if isinstance(state, list) or isinstance(state, tuple):
310 | state = tensor_to_joint_state(state)
311 | human_states = state.human_states
312 | robot_state = state.robot_state
313 |
314 | dmin = float('inf')
315 | collision = False
316 | for i, human in enumerate(human_states):
317 | px = human.px - robot_state.px
318 | py = human.py - robot_state.py
319 | if self.kinematics == 'holonomic':
320 | vx = human.vx - action.vx
321 | vy = human.vy - action.vy
322 | else:
323 | vx = human.vx - action.v * np.cos(action.r + robot_state.theta)
324 | vy = human.vy - action.v * np.sin(action.r + robot_state.theta)
325 | ex = px + vx * self.time_step
326 | ey = py + vy * self.time_step
327 | # closest distance between boundaries of two agents
328 | closest_dist = point_to_segment_dist(px, py, ex, ey, 0, 0) - human.radius - robot_state.radius
329 | if closest_dist < 0:
330 | collision = True
331 | break
332 | elif closest_dist < dmin:
333 | dmin = closest_dist
334 |
335 | # check if reaching the goal
336 | if self.kinematics == 'holonomic':
337 | px = robot_state.px + action.vx * self.time_step
338 | py = robot_state.py + action.vy * self.time_step
339 | else:
340 | theta = robot_state.theta + action.r
341 | px = robot_state.px + np.cos(theta) * action.v * self.time_step
342 | py = robot_state.py + np.sin(theta) * action.v * self.time_step
343 |
344 | end_position = np.array((px, py))
345 | reaching_goal = norm(end_position - np.array([robot_state.gx, robot_state.gy])) < robot_state.radius
346 |
347 | if collision:
348 | reward = -0.25
349 | elif reaching_goal:
350 | reward = 1
351 | elif dmin < 0.2:
352 | # adjust the reward based on FPS
353 | reward = (dmin - 0.2) * 0.5 * self.time_step
354 | else:
355 | reward = 0
356 |
357 | return reward
358 |
359 | def transform(self, state):
360 | """
361 | Take the JointState to tensors
362 |
363 | :param state:
364 | :return: tensor of shape (# of agent, len(state))
365 | """
366 | robot_state_tensor = torch.Tensor([state.robot_state.to_tuple()]).to(self.device)
367 | human_states_tensor = torch.Tensor([human_state.to_tuple() for human_state in state.human_states]). \
368 | to(self.device)
369 |
370 | return robot_state_tensor, human_states_tensor
371 |
--------------------------------------------------------------------------------
/crowd_nav/policy/multi_human_rl.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import logging
3 | import numpy as np
4 | from crowd_sim.envs.utils.action import ActionRot, ActionXY
5 | from crowd_nav.policy.cadrl import CADRL
6 |
7 |
8 | class MultiHumanRL(CADRL):
9 | def __init__(self):
10 | super().__init__()
11 |
12 | def predict(self, state):
13 | """
14 | A base class for all methods that takes pairwise joint state as input to value network.
15 | The input to the value network is always of shape (batch_size, # humans, rotated joint state length)
16 |
17 | """
18 | if self.phase is None or self.device is None:
19 | raise AttributeError('Phase, device attributes have to be set!')
20 | if self.phase == 'train' and self.epsilon is None:
21 | raise AttributeError('Epsilon attribute has to be set in training phase')
22 |
23 | if self.reach_destination(state):
24 | return ActionXY(0, 0) if self.kinematics == 'holonomic' else ActionRot(0, 0)
25 | if self.action_space is None:
26 | self.build_action_space(state.robot_state.v_pref)
27 | if not state.human_states:
28 | assert self.phase != 'train'
29 | if hasattr(self, 'attention_weights'):
30 | self.attention_weights = list()
31 | return self.select_greedy_action(state.robot_state)
32 |
33 | occupancy_maps = None
34 | probability = np.random.random()
35 | if self.phase == 'train' and probability < self.epsilon:
36 | max_action = self.action_space[np.random.choice(len(self.action_space))]
37 | else:
38 | self.action_values = list()
39 | max_value = float('-inf')
40 | max_action = None
41 | for action in self.action_space:
42 | next_robot_state = self.propagate(state.robot_state, action)
43 | if self.query_env:
44 | next_human_states, reward, done, info = self.env.onestep_lookahead(action)
45 | else:
46 | next_human_states = [self.propagate(human_state, ActionXY(human_state.vx, human_state.vy))
47 | for human_state in state.human_states]
48 | reward = self.compute_reward(next_robot_state, next_human_states)
49 | batch_next_states = torch.cat([torch.Tensor([next_robot_state + next_human_state]).to(self.device)
50 | for next_human_state in next_human_states], dim=0)
51 | rotated_batch_input = self.rotate(batch_next_states).unsqueeze(0)
52 | if self.with_om:
53 | if occupancy_maps is None:
54 | occupancy_maps = self.build_occupancy_maps(next_human_states).unsqueeze(0)
55 | rotated_batch_input = torch.cat([rotated_batch_input, occupancy_maps], dim=2)
56 | # VALUE UPDATE
57 | next_state_value = self.model(rotated_batch_input).data.item()
58 | value = reward + pow(self.gamma, self.time_step * state.robot_state.v_pref) * next_state_value
59 | self.action_values.append(value)
60 | if value > max_value:
61 | max_value = value
62 | max_action = action
63 | if hasattr(self, 'attention_weights'):
64 | self.attention_weights = self.model.attention_weights
65 | if max_action is None:
66 | raise ValueError('Value network is not well trained. ')
67 |
68 | if self.phase == 'train':
69 | self.last_state = self.transform(state)
70 |
71 | return max_action
72 |
73 | def compute_reward(self, nav, humans):
74 | # collision detection
75 | dmin = float('inf')
76 | collision = False
77 | for i, human in enumerate(humans):
78 | dist = np.linalg.norm((nav.px - human.px, nav.py - human.py)) - nav.radius - human.radius
79 | if dist < 0:
80 | collision = True
81 | break
82 | if dist < dmin:
83 | dmin = dist
84 |
85 | # check if reaching the goal
86 | reaching_goal = np.linalg.norm((nav.px - nav.gx, nav.py - nav.gy)) < nav.radius
87 | if collision:
88 | reward = -0.25
89 | elif reaching_goal:
90 | reward = 1
91 | elif dmin < 0.2:
92 | reward = (dmin - 0.2) * 0.5 * self.time_step
93 | else:
94 | reward = 0
95 |
96 | return reward
97 |
98 | def transform(self, state):
99 | """
100 | Take the state passed from agent and transform it to the input of value network
101 |
102 | :param state:
103 | :return: tensor of shape (# of humans, len(state))
104 | """
105 | state_tensor = torch.cat([torch.Tensor([state.robot_state + human_state]).to(self.device)
106 | for human_state in state.human_states], dim=0)
107 | rotated_state_tensor = self.rotate(state_tensor)
108 | if self.with_om:
109 | occupancy_maps = self.build_occupancy_maps(state.human_states)
110 | rotated_state_tensor = torch.cat([rotated_state_tensor, occupancy_maps], dim=1)
111 |
112 | return rotated_state_tensor
113 |
114 | def input_dim(self):
115 | return self.joint_state_dim + (self.cell_num ** 2 * self.om_channel_size if self.with_om else 0)
116 |
117 | def build_occupancy_maps(self, human_states):
118 | """
119 |
120 | :param human_states:
121 | :return: tensor of shape (# human - 1, self.cell_num ** 2)
122 | """
123 | occupancy_maps = []
124 | for human in human_states:
125 | other_humans = np.concatenate([np.array([(other_human.px, other_human.py, other_human.vx, other_human.vy)])
126 | for other_human in human_states if other_human != human], axis=0)
127 | other_px = other_humans[:, 0] - human.px
128 | other_py = other_humans[:, 1] - human.py
129 | # new x-axis is in the direction of human's velocity
130 | human_velocity_angle = np.arctan2(human.vy, human.vx)
131 | other_human_orientation = np.arctan2(other_py, other_px)
132 | rotation = other_human_orientation - human_velocity_angle
133 | distance = np.linalg.norm([other_px, other_py], axis=0)
134 | other_px = np.cos(rotation) * distance
135 | other_py = np.sin(rotation) * distance
136 |
137 | # compute indices of humans in the grid
138 | other_x_index = np.floor(other_px / self.cell_size + self.cell_num / 2)
139 | other_y_index = np.floor(other_py / self.cell_size + self.cell_num / 2)
140 | other_x_index[other_x_index < 0] = float('-inf')
141 | other_x_index[other_x_index >= self.cell_num] = float('-inf')
142 | other_y_index[other_y_index < 0] = float('-inf')
143 | other_y_index[other_y_index >= self.cell_num] = float('-inf')
144 | grid_indices = self.cell_num * other_y_index + other_x_index
145 | occupancy_map = np.isin(range(self.cell_num ** 2), grid_indices)
146 | if self.om_channel_size == 1:
147 | occupancy_maps.append([occupancy_map.astype(int)])
148 | else:
149 | # calculate relative velocity for other agents
150 | other_human_velocity_angles = np.arctan2(other_humans[:, 3], other_humans[:, 2])
151 | rotation = other_human_velocity_angles - human_velocity_angle
152 | speed = np.linalg.norm(other_humans[:, 2:4], axis=1)
153 | other_vx = np.cos(rotation) * speed
154 | other_vy = np.sin(rotation) * speed
155 | dm = [list() for _ in range(self.cell_num ** 2 * self.om_channel_size)]
156 | for i, index in np.ndenumerate(grid_indices):
157 | if index in range(self.cell_num ** 2):
158 | if self.om_channel_size == 2:
159 | dm[2 * int(index)].append(other_vx[i])
160 | dm[2 * int(index) + 1].append(other_vy[i])
161 | elif self.om_channel_size == 3:
162 | dm[2 * int(index)].append(1)
163 | dm[2 * int(index) + 1].append(other_vx[i])
164 | dm[2 * int(index) + 2].append(other_vy[i])
165 | else:
166 | raise NotImplementedError
167 | for i, cell in enumerate(dm):
168 | dm[i] = sum(dm[i]) / len(dm[i]) if len(dm[i]) != 0 else 0
169 | occupancy_maps.append([dm])
170 |
171 | return torch.from_numpy(np.concatenate(occupancy_maps, axis=0)).float()
172 |
173 |
--------------------------------------------------------------------------------
/crowd_nav/policy/policy_factory.py:
--------------------------------------------------------------------------------
1 | from crowd_sim.envs.policy.policy_factory import policy_factory
2 | from crowd_nav.policy.cadrl import CADRL
3 | from crowd_nav.policy.lstm_rl import LstmRL
4 | from crowd_nav.policy.sarl import SARL
5 | from crowd_nav.policy.gcn import GCN
6 | from crowd_nav.policy.model_predictive_rl import ModelPredictiveRL
7 |
8 |
9 | policy_factory['cadrl'] = CADRL
10 | policy_factory['lstm_rl'] = LstmRL
11 | policy_factory['sarl'] = SARL
12 | policy_factory['gcn'] = GCN
13 | policy_factory['model_predictive_rl'] = ModelPredictiveRL
14 |
--------------------------------------------------------------------------------
/crowd_nav/policy/sarl.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.utils.rnn as rnn_utils
4 | import logging
5 | from crowd_nav.policy.cadrl import mlp
6 | from crowd_nav.policy.multi_human_rl import MultiHumanRL
7 |
8 |
9 | class ValueNetwork(nn.Module):
10 | def __init__(self, input_dim, self_state_dim, mlp1_dims, mlp2_dims, mlp3_dims, attention_dims, with_global_state,
11 | cell_size, cell_num):
12 | super().__init__()
13 | self.self_state_dim = self_state_dim
14 | self.global_state_dim = mlp1_dims[-1]
15 | self.mlp1 = mlp(input_dim, mlp1_dims, last_relu=True)
16 | self.mlp2 = mlp(mlp1_dims[-1], mlp2_dims)
17 | self.with_global_state = with_global_state
18 | if with_global_state:
19 | self.attention = mlp(mlp1_dims[-1] * 2, attention_dims)
20 | else:
21 | self.attention = mlp(mlp1_dims[-1], attention_dims)
22 | self.cell_size = cell_size
23 | self.cell_num = cell_num
24 | mlp3_input_dim = mlp2_dims[-1] + self.self_state_dim
25 | self.mlp3 = mlp(mlp3_input_dim, mlp3_dims)
26 | self.attention_weights = None
27 |
28 | def forward(self, state_input):
29 | """
30 | First transform the world coordinates to self-centric coordinates and then do forward computation
31 |
32 | :param state_input: tensor of shape (batch_size, # of humans, length of a rotated state)
33 | :return:
34 | """
35 | if isinstance(state_input, tuple):
36 | state, lengths = state_input
37 | else:
38 | state = state_input
39 | lengths = torch.IntTensor([state.size()[1]])
40 |
41 | size = state.shape
42 | self_state = state[:, 0, :self.self_state_dim]
43 | mlp1_output = self.mlp1(state.reshape((-1, size[2])))
44 | mlp2_output = self.mlp2(mlp1_output)
45 |
46 | if self.with_global_state:
47 | # compute attention scores
48 | global_state = torch.mean(mlp1_output.view(size[0], size[1], -1), 1, keepdim=True)
49 | global_state = global_state.expand((size[0], size[1], self.global_state_dim)).\
50 | contiguous().view(-1, self.global_state_dim)
51 | attention_input = torch.cat([mlp1_output, global_state], dim=1)
52 | else:
53 | attention_input = mlp1_output
54 | scores = self.attention(attention_input).view(size[0], size[1], 1).squeeze(dim=2)
55 |
56 | # masked softmax
57 | mask = rnn_utils.pad_sequence([torch.ones(length.item()) for length in lengths], batch_first=True)
58 | masked_scores = scores * mask.float()
59 | max_scores = torch.max(masked_scores, dim=1, keepdim=True)[0]
60 | exps = torch.exp(masked_scores - max_scores)
61 | masked_exps = exps * mask.float()
62 | masked_sums = masked_exps.sum(1, keepdim=True)
63 | weights = (masked_exps / masked_sums).unsqueeze(2)
64 | self.attention_weights = weights[0, :, 0].data.cpu().numpy()
65 |
66 | # output feature is a linear combination of input features
67 | features = mlp2_output.view(size[0], size[1], -1)
68 | weighted_feature = torch.sum(torch.mul(weights, features), dim=1)
69 |
70 | # concatenate agent's state with global weighted humans' state
71 | joint_state = torch.cat([self_state, weighted_feature], dim=1)
72 | value = self.mlp3(joint_state)
73 | return value
74 |
75 |
76 | class SARL(MultiHumanRL):
77 | def __init__(self):
78 | super().__init__()
79 | self.name = 'SARL'
80 | self.attention_weights = None
81 |
82 | def configure(self, config):
83 | self.set_common_parameters(config)
84 | self.with_om = config.sarl.with_om
85 | self.multiagent_training = config.sarl.multiagent_training
86 |
87 | mlp1_dims = config.sarl.mlp1_dims
88 | mlp2_dims = config.sarl.mlp2_dims
89 | mlp3_dims = config.sarl.mlp3_dims
90 | attention_dims = config.sarl.attention_dims
91 | with_global_state = config.sarl.with_global_state
92 | self.model = ValueNetwork(self.input_dim(), self.self_state_dim, mlp1_dims, mlp2_dims, mlp3_dims,
93 | attention_dims, with_global_state, self.cell_size, self.cell_num)
94 | if self.with_om:
95 | self.name = 'OM-SARL'
96 | logging.info('Policy: {} {} global state'.format(self.name, 'w/' if with_global_state else 'w/o'))
97 |
98 | def get_attention_weights(self):
99 | return self.attention_weights
100 |
--------------------------------------------------------------------------------
/crowd_nav/policy/state_predictor.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import numpy as np
4 | from crowd_nav.policy.helpers import mlp
5 |
6 |
7 | class StatePredictor(nn.Module):
8 | def __init__(self, config, graph_model, time_step):
9 | """
10 | This function predicts the next state given the current state as input.
11 | It uses a graph model to encode the state into a latent space and predict each human's next state.
12 | """
13 | super().__init__()
14 | self.trainable = True
15 | self.kinematics = config.action_space.kinematics
16 | self.graph_model = graph_model
17 | self.human_motion_predictor = mlp(config.gcn.X_dim, config.model_predictive_rl.motion_predictor_dims)
18 | self.time_step = time_step
19 |
20 | def forward(self, state, action, detach=False):
21 | """ Predict the next state tensor given current state as input.
22 |
23 | :return: tensor of shape (batch_size, # of agents, feature_size)
24 | """
25 | assert len(state[0].shape) == 3
26 | assert len(state[1].shape) == 3
27 |
28 | state_embedding = self.graph_model(state)
29 | if detach:
30 | state_embedding = state_embedding.detach()
31 | if action is None:
32 | # for training purpose
33 | next_robot_state = None
34 | else:
35 | next_robot_state = self.compute_next_state(state[0], action)
36 | next_human_states = self.human_motion_predictor(state_embedding)[:, 1:, :]
37 |
38 | next_observation = [next_robot_state, next_human_states]
39 | return next_observation
40 |
41 | def compute_next_state(self, robot_state, action):
42 | # currently it can not perform parallel computation
43 | if robot_state.shape[0] != 1:
44 | raise NotImplementedError
45 |
46 | # px, py, vx, vy, radius, gx, gy, v_pref, theta
47 | next_state = robot_state.clone().squeeze()
48 | if self.kinematics == 'holonomic':
49 | next_state[0] = next_state[0] + action.vx * self.time_step
50 | next_state[1] = next_state[1] + action.vy * self.time_step
51 | next_state[2] = action.vx
52 | next_state[3] = action.vy
53 | else:
54 | next_state[7] = next_state[7] + action.r
55 | next_state[0] = next_state[0] + np.cos(next_state[7]) * action.v * self.time_step
56 | next_state[1] = next_state[1] + np.sin(next_state[7]) * action.v * self.time_step
57 | next_state[2] = np.cos(next_state[7]) * action.v
58 | next_state[3] = np.sin(next_state[7]) * action.v
59 |
60 | return next_state.unsqueeze(0).unsqueeze(0)
61 |
62 |
63 | class LinearStatePredictor(object):
64 | def __init__(self, config, time_step):
65 | """
66 | This function predicts the next state given the current state as input.
67 | It uses a graph model to encode the state into a latent space and predict each human's next state.
68 | """
69 | super().__init__()
70 | self.trainable = False
71 | self.kinematics = config.action_space.kinematics
72 | self.time_step = time_step
73 |
74 | def __call__(self, state, action):
75 | """ Predict the next state tensor given current state as input.
76 |
77 | :return: tensor of shape (batch_size, # of agents, feature_size)
78 | """
79 | assert len(state[0].shape) == 3
80 | assert len(state[1].shape) == 3
81 |
82 | next_robot_state = self.compute_next_state(state[0], action)
83 | next_human_states = self.linear_motion_approximator(state[1])
84 |
85 | next_observation = [next_robot_state, next_human_states]
86 | return next_observation
87 |
88 | def compute_next_state(self, robot_state, action):
89 | # currently it can not perform parallel computation
90 | if robot_state.shape[0] != 1:
91 | raise NotImplementedError
92 |
93 | # px, py, vx, vy, radius, gx, gy, v_pref, theta
94 | next_state = robot_state.clone().squeeze()
95 | if self.kinematics == 'holonomic':
96 | next_state[0] = next_state[0] + action.vx * self.time_step
97 | next_state[1] = next_state[1] + action.vy * self.time_step
98 | next_state[2] = action.vx
99 | next_state[3] = action.vy
100 | else:
101 | next_state[7] = next_state[7] + action.r
102 | next_state[0] = next_state[0] + np.cos(next_state[7]) * action.v * self.time_step
103 | next_state[1] = next_state[1] + np.sin(next_state[7]) * action.v * self.time_step
104 | next_state[2] = np.cos(next_state[7]) * action.v
105 | next_state[3] = np.sin(next_state[7]) * action.v
106 |
107 | return next_state.unsqueeze(0).unsqueeze(0)
108 |
109 | @staticmethod
110 | def linear_motion_approximator(human_states):
111 | """ approximate human states with linear motion, input shape : (batch_size, human_num, human_state_size)
112 | """
113 | # px, py, vx, vy, radius
114 | next_state = human_states.clone().squeeze()
115 | next_state[:, 0] = next_state[:, 0] + next_state[:, 2]
116 | next_state[:, 1] = next_state[:, 1] + next_state[:, 3]
117 |
118 | return next_state.unsqueeze(0)
119 |
120 |
--------------------------------------------------------------------------------
/crowd_nav/policy/value_estimator.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | from crowd_nav.policy.helpers import mlp
3 |
4 |
5 | class ValueEstimator(nn.Module):
6 | def __init__(self, config, graph_model):
7 | super().__init__()
8 | self.graph_model = graph_model
9 | self.value_network = mlp(config.gcn.X_dim, config.model_predictive_rl.value_network_dims)
10 |
11 | def forward(self, state):
12 | """ Embed state into a latent space. Take the first row of the feature matrix as state representation.
13 | """
14 | assert len(state[0].shape) == 3
15 | assert len(state[1].shape) == 3
16 |
17 | # only use the feature of robot node as state representation
18 | state_embedding = self.graph_model(state)[:, 0, :]
19 | value = self.value_network(state_embedding)
20 | return value
21 |
--------------------------------------------------------------------------------
/crowd_nav/test.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import argparse
3 | import importlib.util
4 | import os
5 | import torch
6 | import numpy as np
7 | import matplotlib.pyplot as plt
8 | import gym
9 | from crowd_nav.utils.explorer import Explorer
10 | from crowd_nav.policy.policy_factory import policy_factory
11 | from crowd_sim.envs.utils.robot import Robot
12 | from crowd_sim.envs.policy.orca import ORCA
13 |
14 |
15 | def main(args):
16 | # configure logging and device
17 | level = logging.DEBUG if args.debug else logging.INFO
18 | logging.basicConfig(level=level, format='%(asctime)s, %(levelname)s: %(message)s',
19 | datefmt="%Y-%m-%d %H:%M:%S")
20 | device = torch.device("cuda:0" if torch.cuda.is_available() and args.gpu else "cpu")
21 | logging.info('Using device: %s', device)
22 |
23 | if args.model_dir is not None:
24 | if args.config is not None:
25 | config_file = args.config
26 | else:
27 | config_file = os.path.join(args.model_dir, 'config.py')
28 | if args.il:
29 | model_weights = os.path.join(args.model_dir, 'il_model.pth')
30 | logging.info('Loaded IL weights')
31 | elif args.rl:
32 | if os.path.exists(os.path.join(args.model_dir, 'resumed_rl_model.pth')):
33 | model_weights = os.path.join(args.model_dir, 'resumed_rl_model.pth')
34 | else:
35 | print(os.listdir(args.model_dir))
36 | model_weights = os.path.join(args.model_dir, sorted(os.listdir(args.model_dir))[-1])
37 | logging.info('Loaded RL weights')
38 | else:
39 | model_weights = os.path.join(args.model_dir, 'best_val.pth')
40 | logging.info('Loaded RL weights with best VAL')
41 |
42 | else:
43 | config_file = args.config
44 |
45 | spec = importlib.util.spec_from_file_location('config', config_file)
46 | if spec is None:
47 | parser.error('Config file not found.')
48 | config = importlib.util.module_from_spec(spec)
49 | spec.loader.exec_module(config)
50 |
51 | # configure policy
52 | policy_config = config.PolicyConfig(args.debug)
53 | policy = policy_factory[policy_config.name]()
54 | if args.planning_depth is not None:
55 | policy_config.model_predictive_rl.do_action_clip = True
56 | policy_config.model_predictive_rl.planning_depth = args.planning_depth
57 | if args.planning_width is not None:
58 | policy_config.model_predictive_rl.do_action_clip = True
59 | policy_config.model_predictive_rl.planning_width = args.planning_width
60 | if args.sparse_search:
61 | policy_config.model_predictive_rl.sparse_search = True
62 |
63 | policy.configure(policy_config)
64 | if policy.trainable:
65 | if args.model_dir is None:
66 | parser.error('Trainable policy must be specified with a model weights directory')
67 | policy.load_model(model_weights)
68 |
69 | # configure environment
70 | env_config = config.EnvConfig(args.debug)
71 |
72 | if args.human_num is not None:
73 | env_config.sim.human_num = args.human_num
74 | env = gym.make('CrowdSim-v0')
75 | env.configure(env_config)
76 |
77 | if args.square:
78 | env.test_scenario = 'square_crossing'
79 | if args.circle:
80 | env.test_scenario = 'circle_crossing'
81 | if args.test_scenario is not None:
82 | env.test_scenario = args.test_scenario
83 |
84 | robot = Robot(env_config, 'robot')
85 | env.set_robot(robot)
86 | robot.time_step = env.time_step
87 | robot.set_policy(policy)
88 | explorer = Explorer(env, robot, device, None, gamma=0.9)
89 |
90 | train_config = config.TrainConfig(args.debug)
91 | epsilon_end = train_config.train.epsilon_end
92 | if not isinstance(robot.policy, ORCA):
93 | robot.policy.set_epsilon(epsilon_end)
94 |
95 | policy.set_phase(args.phase)
96 | policy.set_device(device)
97 | # set safety space for ORCA in non-cooperative simulation
98 | if isinstance(robot.policy, ORCA):
99 | if robot.visible:
100 | robot.policy.safety_space = args.safety_space
101 | else:
102 | robot.policy.safety_space = args.safety_space
103 | logging.info('ORCA agent buffer: %f', robot.policy.safety_space)
104 |
105 | policy.set_env(env)
106 | robot.print_info()
107 |
108 | if args.visualize:
109 | rewards = []
110 | ob = env.reset(args.phase, args.test_case)
111 | done = False
112 | last_pos = np.array(robot.get_position())
113 | while not done:
114 | action = robot.act(ob)
115 | ob, _, done, info = env.step(action)
116 | rewards.append(_)
117 | current_pos = np.array(robot.get_position())
118 | logging.debug('Speed: %.2f', np.linalg.norm(current_pos - last_pos) / robot.time_step)
119 | last_pos = current_pos
120 | gamma = 0.9
121 | cumulative_reward = sum([pow(gamma, t * robot.time_step * robot.v_pref)
122 | * reward for t, reward in enumerate(rewards)])
123 |
124 | if args.traj:
125 | env.render('traj', args.video_file)
126 | else:
127 | if args.video_dir is not None:
128 | if policy_config.name == 'gcn':
129 | args.video_file = os.path.join(args.video_dir, policy_config.name + '_' + policy_config.gcn.similarity_function)
130 | else:
131 | args.video_file = os.path.join(args.video_dir, policy_config.name)
132 | args.video_file = args.video_file + '_' + args.phase + '_' + str(args.test_case) + '.mp4'
133 | env.render('video', args.video_file)
134 | logging.info('It takes %.2f seconds to finish. Final status is %s, cumulative_reward is %f', env.global_time, info, cumulative_reward)
135 | if robot.visible and info == 'reach goal':
136 | human_times = env.get_human_times()
137 | logging.info('Average time for humans to reach goal: %.2f', sum(human_times) / len(human_times))
138 | else:
139 | explorer.run_k_episodes(env.case_size[args.phase], args.phase, print_failure=True)
140 | if args.plot_test_scenarios_hist:
141 | test_angle_seeds = np.array(env.test_scene_seeds)
142 | b = [i * 0.01 for i in range(101)]
143 | n, bins, patches = plt.hist(test_angle_seeds, b, facecolor='g')
144 | plt.savefig(os.path.join(args.model_dir, 'test_scene_hist.png'))
145 | plt.close()
146 |
147 |
148 | if __name__ == '__main__':
149 | parser = argparse.ArgumentParser('Parse configuration file')
150 | parser.add_argument('--config', type=str, default=None)
151 | parser.add_argument('--policy', type=str, default='model_predictive_rl')
152 | parser.add_argument('-m', '--model_dir', type=str, default=None)
153 | parser.add_argument('--il', default=False, action='store_true')
154 | parser.add_argument('--rl', default=False, action='store_true')
155 | parser.add_argument('--gpu', default=False, action='store_true')
156 | parser.add_argument('-v', '--visualize', default=False, action='store_true')
157 | parser.add_argument('--phase', type=str, default='test')
158 | parser.add_argument('-c', '--test_case', type=int, default=None)
159 | parser.add_argument('--square', default=False, action='store_true')
160 | parser.add_argument('--circle', default=False, action='store_true')
161 | parser.add_argument('--video_file', type=str, default=None)
162 | parser.add_argument('--video_dir', type=str, default=None)
163 | parser.add_argument('--traj', default=False, action='store_true')
164 | parser.add_argument('--debug', default=False, action='store_true')
165 | parser.add_argument('--human_num', type=int, default=None)
166 | parser.add_argument('--safety_space', type=float, default=0.2)
167 | parser.add_argument('--test_scenario', type=str, default=None)
168 | parser.add_argument('--plot_test_scenarios_hist', default=True, action='store_true')
169 | parser.add_argument('-d', '--planning_depth', type=int, default=None)
170 | parser.add_argument('-w', '--planning_width', type=int, default=None)
171 | parser.add_argument('--sparse_search', default=False, action='store_true')
172 |
173 | sys_args = parser.parse_args()
174 |
175 | main(sys_args)
176 |
--------------------------------------------------------------------------------
/crowd_nav/tests/test_polices.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from crowd_nav.train import main
3 |
4 |
5 | def get_default_args():
6 | parser = argparse.ArgumentParser('Parse configuration file')
7 | parser.add_argument('--policy', type=str, default='cadrl')
8 | parser.add_argument('--config', type=str, default='config.py')
9 | parser.add_argument('--output_dir', type=str, default='data/output')
10 | parser.add_argument('--overwrite', default=False, action='store_true')
11 | parser.add_argument('--weights', type=str)
12 | parser.add_argument('--resume', default=False, action='store_true')
13 | parser.add_argument('--gpu', default=False, action='store_true')
14 | parser.add_argument('--debug', default=False, action='store_true')
15 | args = parser.parse_args(args=[])
16 | return args
17 |
18 |
19 | def test_sarl():
20 | args = get_default_args()
21 | args.policy = 'sarl'
22 | args.config = 'configs/tests_config.py'
23 | args.debug = True
24 | args.overwrite = True
25 | main(args)
26 |
27 |
28 | def test_cadrl():
29 | args = get_default_args()
30 | args.policy = 'cadrl'
31 | args.config = 'configs/tests_config.py'
32 | args.debug = True
33 | args.overwrite = True
34 | main(args)
35 |
36 |
37 | def test_lstm_rl():
38 | args = get_default_args()
39 | args.policy = 'lstm_rl'
40 | args.config = 'configs/tests_config.py'
41 | args.debug = True
42 | args.overwrite = True
43 | main(args)
44 |
45 |
46 | def test_gcn():
47 | args = get_default_args()
48 | args.policy = 'gcn'
49 | args.config = 'configs/tests_config.py'
50 | args.debug = True
51 | args.overwrite = True
52 | main(args)
53 |
--------------------------------------------------------------------------------
/crowd_nav/train.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import logging
3 | import argparse
4 | import os
5 | import shutil
6 | import importlib.util
7 | import torch
8 | import gym
9 | import copy
10 | import git
11 | import re
12 | from tensorboardX import SummaryWriter
13 | from crowd_sim.envs.utils.robot import Robot
14 | from crowd_nav.utils.trainer import VNRLTrainer, MPRLTrainer
15 | from crowd_nav.utils.memory import ReplayMemory
16 | from crowd_nav.utils.explorer import Explorer
17 | from crowd_nav.policy.policy_factory import policy_factory
18 |
19 |
20 | def set_random_seeds(seed):
21 | """
22 | Sets the random seeds for pytorch cpu and gpu
23 | """
24 | torch.manual_seed(seed)
25 | torch.cuda.manual_seed_all(seed)
26 | return None
27 |
28 |
29 | def main(args):
30 | set_random_seeds(args.randomseed)
31 | # configure paths
32 | make_new_dir = True
33 | if os.path.exists(args.output_dir):
34 | if args.overwrite:
35 | shutil.rmtree(args.output_dir)
36 | else:
37 | key = input('Output directory already exists! Overwrite the folder? (y/n)')
38 | if key == 'y' and not args.resume:
39 | shutil.rmtree(args.output_dir)
40 | else:
41 | make_new_dir = False
42 | if make_new_dir:
43 | os.makedirs(args.output_dir)
44 | shutil.copy(args.config, os.path.join(args.output_dir, 'config.py'))
45 |
46 | # # insert the arguments from command line to the config file
47 | # with open(os.path.join(args.output_dir, 'config.py'), 'r') as fo:
48 | # config_text = fo.read()
49 | # search_pairs = {r"gcn.X_dim = \d*": "gcn.X_dim = {}".format(args.X_dim),
50 | # r"gcn.num_layer = \d": "gcn.num_layer = {}".format(args.layers),
51 | # r"gcn.similarity_function = '\w*'": "gcn.similarity_function = '{}'".format(args.sim_func),
52 | # r"gcn.layerwise_graph = \w*": "gcn.layerwise_graph = {}".format(args.layerwise_graph),
53 | # r"gcn.skip_connection = \w*": "gcn.skip_connection = {}".format(args.skip_connection)}
54 | #
55 | # for find, replace in search_pairs.items():
56 | # config_text = re.sub(find, replace, config_text)
57 | #
58 | # with open(os.path.join(args.output_dir, 'config.py'), 'w') as fo:
59 | # fo.write(config_text)
60 |
61 | args.config = os.path.join(args.output_dir, 'config.py')
62 | log_file = os.path.join(args.output_dir, 'output.log')
63 | il_weight_file = os.path.join(args.output_dir, 'il_model.pth')
64 | rl_weight_file = os.path.join(args.output_dir, 'rl_model.pth')
65 |
66 | spec = importlib.util.spec_from_file_location('config', args.config)
67 | if spec is None:
68 | parser.error('Config file not found.')
69 | config = importlib.util.module_from_spec(spec)
70 | spec.loader.exec_module(config)
71 |
72 | # configure logging
73 | mode = 'a' if args.resume else 'w'
74 | file_handler = logging.FileHandler(log_file, mode=mode)
75 | stdout_handler = logging.StreamHandler(sys.stdout)
76 | level = logging.INFO if not args.debug else logging.DEBUG
77 | logging.basicConfig(level=level, handlers=[stdout_handler, file_handler],
78 | format='%(asctime)s, %(levelname)s: %(message)s', datefmt="%Y-%m-%d %H:%M:%S")
79 | repo = git.Repo(search_parent_directories=True)
80 | logging.info('Current git head hash code: {}'.format(repo.head.object.hexsha))
81 | logging.info('Current config content is :{}'.format(config))
82 | device = torch.device("cuda:0" if torch.cuda.is_available() and args.gpu else "cpu")
83 | logging.info('Using device: %s', device)
84 | writer = SummaryWriter(log_dir=args.output_dir)
85 |
86 | # configure policy
87 | policy_config = config.PolicyConfig()
88 | policy = policy_factory[policy_config.name]()
89 | if not policy.trainable:
90 | parser.error('Policy has to be trainable')
91 | policy.configure(policy_config)
92 | policy.set_device(device)
93 |
94 | # configure environment
95 | env_config = config.EnvConfig(args.debug)
96 | env = gym.make('CrowdSim-v0')
97 | env.configure(env_config)
98 | robot = Robot(env_config, 'robot')
99 | robot.time_step = env.time_step
100 | env.set_robot(robot)
101 |
102 | # read training parameters
103 | train_config = config.TrainConfig(args.debug)
104 | rl_learning_rate = train_config.train.rl_learning_rate
105 | train_batches = train_config.train.train_batches
106 | train_episodes = train_config.train.train_episodes
107 | sample_episodes = train_config.train.sample_episodes
108 | target_update_interval = train_config.train.target_update_interval
109 | evaluation_interval = train_config.train.evaluation_interval
110 | capacity = train_config.train.capacity
111 | epsilon_start = train_config.train.epsilon_start
112 | epsilon_end = train_config.train.epsilon_end
113 | epsilon_decay = train_config.train.epsilon_decay
114 | checkpoint_interval = train_config.train.checkpoint_interval
115 |
116 | # configure trainer and explorer
117 | memory = ReplayMemory(capacity)
118 | model = policy.get_model()
119 | batch_size = train_config.trainer.batch_size
120 | optimizer = train_config.trainer.optimizer
121 | if policy_config.name == 'model_predictive_rl':
122 | trainer = MPRLTrainer(model, policy.state_predictor, memory, device, policy, writer, batch_size, optimizer, env.human_num,
123 | reduce_sp_update_frequency=train_config.train.reduce_sp_update_frequency,
124 | freeze_state_predictor=train_config.train.freeze_state_predictor,
125 | detach_state_predictor=train_config.train.detach_state_predictor,
126 | share_graph_model=policy_config.model_predictive_rl.share_graph_model)
127 | else:
128 | trainer = VNRLTrainer(model, memory, device, policy, batch_size, optimizer, writer)
129 | explorer = Explorer(env, robot, device, writer, memory, policy.gamma, target_policy=policy)
130 |
131 | # imitation learning
132 | if args.resume:
133 | if not os.path.exists(rl_weight_file):
134 | logging.error('RL weights does not exist')
135 | model.load_state_dict(torch.load(rl_weight_file))
136 | rl_weight_file = os.path.join(args.output_dir, 'resumed_rl_model.pth')
137 | logging.info('Load reinforcement learning trained weights. Resume training')
138 | elif os.path.exists(il_weight_file):
139 | model.load_state_dict(torch.load(il_weight_file))
140 | logging.info('Load imitation learning trained weights.')
141 | else:
142 | il_episodes = train_config.imitation_learning.il_episodes
143 | il_policy = train_config.imitation_learning.il_policy
144 | il_epochs = train_config.imitation_learning.il_epochs
145 | il_learning_rate = train_config.imitation_learning.il_learning_rate
146 | trainer.set_learning_rate(il_learning_rate)
147 | if robot.visible:
148 | safety_space = 0
149 | else:
150 | safety_space = train_config.imitation_learning.safety_space
151 | il_policy = policy_factory[il_policy]()
152 | il_policy.multiagent_training = policy.multiagent_training
153 | il_policy.safety_space = safety_space
154 | robot.set_policy(il_policy)
155 | explorer.run_k_episodes(il_episodes, 'train', update_memory=True, imitation_learning=True)
156 | trainer.optimize_epoch(il_epochs)
157 | policy.save_model(il_weight_file)
158 | logging.info('Finish imitation learning. Weights saved.')
159 | logging.info('Experience set size: %d/%d', len(memory), memory.capacity)
160 |
161 | trainer.update_target_model(model)
162 |
163 | # reinforcement learning
164 | policy.set_env(env)
165 | robot.set_policy(policy)
166 | robot.print_info()
167 | trainer.set_learning_rate(rl_learning_rate)
168 | # fill the memory pool with some RL experience
169 | if args.resume:
170 | robot.policy.set_epsilon(epsilon_end)
171 | explorer.run_k_episodes(100, 'train', update_memory=True, episode=0)
172 | logging.info('Experience set size: %d/%d', len(memory), memory.capacity)
173 | episode = 0
174 | best_val_reward = -1
175 | best_val_model = None
176 | # evaluate the model after imitation learning
177 |
178 | if episode % evaluation_interval == 0:
179 | logging.info('Evaluate the model instantly after imitation learning on the validation cases')
180 | explorer.run_k_episodes(env.case_size['val'], 'val', episode=episode)
181 | explorer.log('val', episode // evaluation_interval)
182 |
183 | if args.test_after_every_eval:
184 | explorer.run_k_episodes(env.case_size['test'], 'test', episode=episode, print_failure=True)
185 | explorer.log('test', episode // evaluation_interval)
186 |
187 | episode = 0
188 | while episode < train_episodes:
189 | if args.resume:
190 | epsilon = epsilon_end
191 | else:
192 | if episode < epsilon_decay:
193 | epsilon = epsilon_start + (epsilon_end - epsilon_start) / epsilon_decay * episode
194 | else:
195 | epsilon = epsilon_end
196 | robot.policy.set_epsilon(epsilon)
197 |
198 | # sample k episodes into memory and optimize over the generated memory
199 | explorer.run_k_episodes(sample_episodes, 'train', update_memory=True, episode=episode)
200 | explorer.log('train', episode)
201 |
202 | trainer.optimize_batch(train_batches, episode)
203 | episode += 1
204 |
205 | if episode % target_update_interval == 0:
206 | trainer.update_target_model(model)
207 | # evaluate the model
208 | if episode % evaluation_interval == 0:
209 | _, _, _, reward, _ = explorer.run_k_episodes(env.case_size['val'], 'val', episode=episode)
210 | explorer.log('val', episode // evaluation_interval)
211 |
212 | if episode % checkpoint_interval == 0 and reward > best_val_reward:
213 | best_val_reward = reward
214 | best_val_model = copy.deepcopy(policy.get_state_dict())
215 | # test after every evaluation to check how the generalization performance evolves
216 | if args.test_after_every_eval:
217 | explorer.run_k_episodes(env.case_size['test'], 'test', episode=episode, print_failure=True)
218 | explorer.log('test', episode // evaluation_interval)
219 |
220 | if episode != 0 and episode % checkpoint_interval == 0:
221 | current_checkpoint = episode // checkpoint_interval - 1
222 | save_every_checkpoint_rl_weight_file = rl_weight_file.split('.')[0] + '_' + str(current_checkpoint) + '.pth'
223 | policy.save_model(save_every_checkpoint_rl_weight_file)
224 |
225 | # # test with the best val model
226 | if best_val_model is not None:
227 | policy.load_state_dict(best_val_model)
228 | torch.save(best_val_model, os.path.join(args.output_dir, 'best_val.pth'))
229 | logging.info('Save the best val model with the reward: {}'.format(best_val_reward))
230 | explorer.run_k_episodes(env.case_size['test'], 'test', episode=episode, print_failure=True)
231 |
232 |
233 | if __name__ == '__main__':
234 | parser = argparse.ArgumentParser('Parse configuration file')
235 | parser.add_argument('--policy', type=str, default='model_predictive_rl')
236 | parser.add_argument('--config', type=str, default='configs/icra_benchmark/mp_separate.py')
237 | parser.add_argument('--output_dir', type=str, default='data/output')
238 | parser.add_argument('--overwrite', default=False, action='store_true')
239 | parser.add_argument('--weights', type=str)
240 | parser.add_argument('--resume', default=False, action='store_true')
241 | parser.add_argument('--gpu', default=False, action='store_true')
242 | parser.add_argument('--debug', default=False, action='store_true')
243 | parser.add_argument('--test_after_every_eval', default=False, action='store_true')
244 | parser.add_argument('--randomseed', type=int, default=17)
245 |
246 | # arguments for GCN
247 | # parser.add_argument('--X_dim', type=int, default=32)
248 | # parser.add_argument('--layers', type=int, default=2)
249 | # parser.add_argument('--sim_func', type=str, default='embedded_gaussian')
250 | # parser.add_argument('--layerwise_graph', default=False, action='store_true')
251 | # parser.add_argument('--skip_connection', default=True, action='store_true')
252 |
253 | sys_args = parser.parse_args()
254 |
255 | main(sys_args)
256 |
--------------------------------------------------------------------------------
/crowd_nav/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChanganVR/RelationalGraphLearning/8e87aa5ed8221efd688f8e6857ba4c38637bf6e1/crowd_nav/utils/__init__.py
--------------------------------------------------------------------------------
/crowd_nav/utils/explorer.py:
--------------------------------------------------------------------------------
1 | import os
2 | import logging
3 | import copy
4 | import torch
5 | from tqdm import tqdm
6 | from crowd_sim.envs.utils.info import *
7 |
8 |
9 | class Explorer(object):
10 | def __init__(self, env, robot, device, writer, memory=None, gamma=None, target_policy=None):
11 | self.env = env
12 | self.robot = robot
13 | self.device = device
14 | self.writer = writer
15 | self.memory = memory
16 | self.gamma = gamma
17 | self.target_policy = target_policy
18 | self.statistics = None
19 |
20 | # @profile
21 | def run_k_episodes(self, k, phase, update_memory=False, imitation_learning=False, episode=None, epoch=None,
22 | print_failure=False):
23 | self.robot.policy.set_phase(phase)
24 | success_times = []
25 | collision_times = []
26 | timeout_times = []
27 | success = 0
28 | collision = 0
29 | timeout = 0
30 | discomfort = 0
31 | min_dist = []
32 | cumulative_rewards = []
33 | average_returns = []
34 | collision_cases = []
35 | timeout_cases = []
36 |
37 | if k != 1:
38 | pbar = tqdm(total=k)
39 | else:
40 | pbar = None
41 |
42 | for i in range(k):
43 | ob = self.env.reset(phase)
44 | done = False
45 | states = []
46 | actions = []
47 | rewards = []
48 | while not done:
49 | action = self.robot.act(ob)
50 | ob, reward, done, info = self.env.step(action)
51 | states.append(self.robot.policy.last_state)
52 | actions.append(action)
53 | rewards.append(reward)
54 |
55 | if isinstance(info, Discomfort):
56 | discomfort += 1
57 | min_dist.append(info.min_dist)
58 |
59 | if isinstance(info, ReachGoal):
60 | success += 1
61 | success_times.append(self.env.global_time)
62 | elif isinstance(info, Collision):
63 | collision += 1
64 | collision_cases.append(i)
65 | collision_times.append(self.env.global_time)
66 | elif isinstance(info, Timeout):
67 | timeout += 1
68 | timeout_cases.append(i)
69 | timeout_times.append(self.env.time_limit)
70 | else:
71 | raise ValueError('Invalid end signal from environment')
72 |
73 | if update_memory:
74 | if isinstance(info, ReachGoal) or isinstance(info, Collision):
75 | # only add positive(success) or negative(collision) experience in experience set
76 | self.update_memory(states, actions, rewards, imitation_learning)
77 |
78 | cumulative_rewards.append(sum([pow(self.gamma, t * self.robot.time_step * self.robot.v_pref)
79 | * reward for t, reward in enumerate(rewards)]))
80 | returns = []
81 | for step in range(len(rewards)):
82 | step_return = sum([pow(self.gamma, t * self.robot.time_step * self.robot.v_pref)
83 | * reward for t, reward in enumerate(rewards[step:])])
84 | returns.append(step_return)
85 | average_returns.append(average(returns))
86 |
87 | if pbar:
88 | pbar.update(1)
89 | success_rate = success / k
90 | collision_rate = collision / k
91 | assert success + collision + timeout == k
92 | avg_nav_time = sum(success_times) / len(success_times) if success_times else self.env.time_limit
93 |
94 | extra_info = '' if episode is None else 'in episode {} '.format(episode)
95 | extra_info = extra_info + '' if epoch is None else extra_info + ' in epoch {} '.format(epoch)
96 | logging.info('{:<5} {}has success rate: {:.2f}, collision rate: {:.2f}, nav time: {:.2f}, total reward: {:.4f},'
97 | ' average return: {:.4f}'. format(phase.upper(), extra_info, success_rate, collision_rate,
98 | avg_nav_time, average(cumulative_rewards),
99 | average(average_returns)))
100 | if phase in ['val', 'test']:
101 | total_time = sum(success_times + collision_times + timeout_times)
102 | logging.info('Frequency of being in danger: %.2f and average min separate distance in danger: %.2f',
103 | discomfort / total_time, average(min_dist))
104 |
105 | if print_failure:
106 | logging.info('Collision cases: ' + ' '.join([str(x) for x in collision_cases]))
107 | logging.info('Timeout cases: ' + ' '.join([str(x) for x in timeout_cases]))
108 |
109 | self.statistics = success_rate, collision_rate, avg_nav_time, average(cumulative_rewards), average(average_returns)
110 |
111 | return self.statistics
112 |
113 | def update_memory(self, states, actions, rewards, imitation_learning=False):
114 | if self.memory is None or self.gamma is None:
115 | raise ValueError('Memory or gamma value is not set!')
116 |
117 | for i, state in enumerate(states[:-1]):
118 | reward = rewards[i]
119 |
120 | # VALUE UPDATE
121 | if imitation_learning:
122 | # define the value of states in IL as cumulative discounted rewards, which is the same in RL
123 | state = self.target_policy.transform(state)
124 | next_state = self.target_policy.transform(states[i+1])
125 | value = sum([pow(self.gamma, (t - i) * self.robot.time_step * self.robot.v_pref) * reward *
126 | (1 if t >= i else 0) for t, reward in enumerate(rewards)])
127 | else:
128 | next_state = states[i+1]
129 | if i == len(states) - 1:
130 | # terminal state
131 | value = reward
132 | else:
133 | value = 0
134 | value = torch.Tensor([value]).to(self.device)
135 | reward = torch.Tensor([rewards[i]]).to(self.device)
136 |
137 | if self.target_policy.name == 'ModelPredictiveRL':
138 | self.memory.push((state[0], state[1], value, reward, next_state[0], next_state[1]))
139 | else:
140 | self.memory.push((state, value, reward, next_state))
141 |
142 | def log(self, tag_prefix, global_step):
143 | sr, cr, time, reward, avg_return = self.statistics
144 | self.writer.add_scalar(tag_prefix + '/success_rate', sr, global_step)
145 | self.writer.add_scalar(tag_prefix + '/collision_rate', cr, global_step)
146 | self.writer.add_scalar(tag_prefix + '/time', time, global_step)
147 | self.writer.add_scalar(tag_prefix + '/reward', reward, global_step)
148 | self.writer.add_scalar(tag_prefix + '/avg_return', avg_return, global_step)
149 |
150 |
151 | def average(input_list):
152 | if input_list:
153 | return sum(input_list) / len(input_list)
154 | else:
155 | return 0
156 |
--------------------------------------------------------------------------------
/crowd_nav/utils/memory.py:
--------------------------------------------------------------------------------
1 | from torch.utils.data import Dataset
2 |
3 |
4 | class ReplayMemory(Dataset):
5 | def __init__(self, capacity):
6 | self.capacity = capacity
7 | self.memory = list()
8 | self.position = 0
9 |
10 | def push(self, item):
11 | # replace old experience with new experience
12 | if len(self.memory) < self.position + 1:
13 | self.memory.append(item)
14 | else:
15 | self.memory[self.position] = item
16 | self.position = (self.position + 1) % self.capacity
17 |
18 | def is_full(self):
19 | return len(self.memory) == self.capacity
20 |
21 | def __getitem__(self, item):
22 | return self.memory[item]
23 |
24 | def __len__(self):
25 | return len(self.memory)
26 |
27 | def clear(self):
28 | self.memory = list()
29 |
--------------------------------------------------------------------------------
/crowd_nav/utils/plot.py:
--------------------------------------------------------------------------------
1 | import re
2 | import argparse
3 | import os
4 | import matplotlib.pyplot as plt
5 | import numpy as np
6 |
7 |
8 | def running_mean(x, n):
9 | cumsum = np.cumsum(np.insert(x, 0, 0))
10 | return (cumsum[n:] - cumsum[:-n]) / float(n)
11 |
12 |
13 | def main():
14 | parser = argparse.ArgumentParser()
15 | parser.add_argument('log_files', type=str, nargs='+')
16 | parser.add_argument('--plot_sr', default=False, action='store_true')
17 | parser.add_argument('--plot_cr', default=False, action='store_true')
18 | parser.add_argument('--plot_time', default=False, action='store_true')
19 | parser.add_argument('--plot_reward', default=True, action='store_true')
20 | parser.add_argument('--plot_train', default=True, action='store_true')
21 | parser.add_argument('--plot_val', default=False, action='store_true')
22 | parser.add_argument('--plot_all', default=False, action='store_true')
23 | parser.add_argument('--window_size', type=int, default=100)
24 | args = parser.parse_args()
25 |
26 | models = []
27 | max_episodes = None
28 |
29 | ax1 = ax2 = ax3 = ax4 = None
30 | ax1_legends = []
31 | ax2_legends = []
32 | ax3_legends = []
33 | ax4_legends = []
34 |
35 | if args.plot_all:
36 | log_dir = args.log_files[0]
37 | if not os.path.isdir(log_dir):
38 | parser.error('Input argument should be the directory containing all experiment folders')
39 | # args.log_files = [os.path.join(log_dir, exp_dir, 'output.log') for exp_dir in os.listdir(log_dir)]
40 | args.log_files = [os.path.join(log_dir, exp_dir, 'output.log') for exp_dir in
41 | ['sarl_linear_adam', 'mp_detach_skip', 'rgl_linear_adam', 'rgl_no_transformation',
42 | 'mp_separate_graph']]
43 |
44 | args.log_files = sorted(args.log_files)
45 | if not models:
46 | models = [os.path.basename(log_file[:-11]) for log_file in args.log_files]
47 | for i, log_file in enumerate(args.log_files):
48 | with open(log_file, 'r') as file:
49 | log = file.read()
50 |
51 | val_pattern = r"VAL in episode (?P\d+) has success rate: (?P[0-1].\d+), " \
52 | r"collision rate: (?P[0-1].\d+), nav time: (?P