├── .gitignore
├── .pylintrc
├── README.md
├── crowd_nav
    ├── __init__.py
    ├── configs
    │   ├── __init__.py
    │   └── icra_benchmark
    │   │   ├── __init__.py
    │   │   ├── config.py
    │   │   ├── mp_detach.py
    │   │   ├── mp_linear.py
    │   │   ├── mp_separate.py
    │   │   ├── mp_separate_dp.py
    │   │   ├── rgl.py
    │   │   └── sarl.py
    ├── policy
    │   ├── cadrl.py
    │   ├── gcn.py
    │   ├── graph_model.py
    │   ├── helpers.py
    │   ├── lstm_rl.py
    │   ├── model_predictive_rl.py
    │   ├── multi_human_rl.py
    │   ├── policy_factory.py
    │   ├── sarl.py
    │   ├── state_predictor.py
    │   └── value_estimator.py
    ├── test.py
    ├── tests
    │   └── test_polices.py
    ├── train.py
    └── utils
    │   ├── __init__.py
    │   ├── explorer.py
    │   ├── memory.py
    │   ├── plot.py
    │   ├── plot2.py
    │   └── trainer.py
├── crowd_sim
    ├── README.md
    ├── __init__.py
    └── envs
    │   ├── __init__.py
    │   ├── crowd_sim.py
    │   ├── policy
    │       ├── __init__.py
    │       ├── linear.py
    │       ├── orca.py
    │       ├── policy.py
    │       ├── policy_factory.py
    │       └── socialforce.py
    │   ├── realsim_utils
    │       ├── GrandCentral.py
    │       └── __init__.py
    │   └── utils
    │       ├── __init__.py
    │       ├── action.py
    │       ├── agent.py
    │       ├── human.py
    │       ├── info.py
    │       ├── robot.py
    │       ├── state.py
    │       └── utils.py
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # editors
 2 | .idea/
 3 | .vscode/
 4 | 
 5 | # Python
 6 | *.egg-info/
 7 | .pytest_cache/
 8 | __pycache__
 9 | .cache/
10 | venv*/
11 | 
12 | # data
13 | data/
14 | *.mp4
15 | videos/
16 | *.log
17 | 


--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
 1 | [BASIC]
 2 | 
 3 | #variable-rgx=[a-z0-9_]{1,30}$
 4 | good-names=dx,dy,i,j,k,px,py,r,th,vx,vy,x,y,z,ob
 5 | 
 6 | [TYPECHECK]
 7 | 
 8 | # List of members which are set dynamically and missed by pylint inference
 9 | # system, and so shouldn't trigger E1101 when accessed. Python regular
10 | # expressions are accepted.
11 | generated-members=numpy.*,torch.*
12 | 
13 | 
14 | disable=missing-docstring,useless-object-inheritance
15 | max-line-length=120


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # RelationalGraphLearning
 2 | This repository contains the codes for our paper, which is accepted at IROS 2020. 
 3 | For more details, please refer to the [paper](https://arxiv.org/abs/1909.13165).
 4 | 
 5 | 
 6 | ## Abstract
 7 | We present a relational graph learning approach for robotic crowd navigation using model-based deep reinforcement 
 8 | learning that plans actions by looking into the future.
 9 | Our approach reasons about the relations between all agents based on their latent features and uses a Graph Convolutional 
10 | Network to encode higher-order interactions in each agent's state representation, which is subsequently leveraged for 
11 | state prediction and value estimation.
12 | The ability to predict human motion allows us to perform multi-step lookahead planning, taking into account the temporal 
13 | evolution of human crowds.
14 | We evaluate our approach against a state-of-the-art baseline for crowd navigation and ablations of our model to 
15 | demonstrate that navigation with our approach is more efficient, results in fewer collisions, and avoids failure cases 
16 | involving oscillatory and freezing behaviors.
17 | 
18 | 
19 | 
20 | ## Method Overview
21 | <img src="https://i.imgur.com/8unQNIv.png" width="1000" />
22 | 
23 | 
24 | ## Setup
25 | 1. Install [Python-RVO2](https://github.com/sybrenstuvel/Python-RVO2) library
26 | 2. Install [socialforce](https://github.com/ChanganVR/socialforce) library
27 | 2. Install crowd_sim and crowd_nav into pip
28 | ```
29 | pip install -e .
30 | ```
31 | 
32 | ## Getting Started
33 | This repository are organized in two parts: crowd_sim/ folder contains the simulation environment and
34 | crowd_nav/ folder contains codes for training and testing the policies. Details of the simulation framework can be found
35 | [here](crowd_sim/README.md). Below are the instructions for training and testing policies, and they should be executed
36 | inside the crowd_nav/ folder.
37 | 
38 | 
39 | 1. Train a policy.
40 | ```
41 | python train.py --policy rgl
42 | ```
43 | 2. Test policies with 500 test cases.
44 | ```
45 | python test.py --policy rgl --model_dir data/output --phase test
46 | ```
47 | 3. Run policy for one episode and visualize the result.
48 | ```
49 | python test.py --policy rgl --model_dir data/output --phase test --visualize --test_case 0
50 | ```
51 | 4. Plot training curve
52 | ```
53 | python utils/plot.py data/output/output.log
54 | ```
55 | 
56 | 
57 | 
58 | ## Video Demo
59 | [<img src="https://i.imgur.com/SnqVhit.png" width="70%">](https://youtu.be/U3quW30Eu3A)
60 | 
61 | 
62 | ## Citation
63 | If you find the codes or paper useful for your research, please cite the following papers:
64 | ```bibtex
65 | @inproceedings{chen2020relational,
66 |     title={Relational Graph Learning for Crowd Navigation},
67 |     author={Changan Chen and Sha Hu and Payam Nikdel and Greg Mori and Manolis Savva},
68 |     year={2020},
69 |     booktitle={IROS}
70 | }
71 | @inproceedings{chen2019crowdnav,
72 |     title={Crowd-Robot Interaction: Crowd-aware Robot Navigation with Attention-based Deep Reinforcement Learning},
73 |     author={Changan Chen and Yuejiang Liu and Sven Kreiss and Alexandre Alahi},
74 |     year={2019},
75 |     booktitle={ICRA}
76 | }
77 | ```
78 | 


--------------------------------------------------------------------------------
/crowd_nav/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChanganVR/RelationalGraphLearning/8e87aa5ed8221efd688f8e6857ba4c38637bf6e1/crowd_nav/__init__.py


--------------------------------------------------------------------------------
/crowd_nav/configs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChanganVR/RelationalGraphLearning/8e87aa5ed8221efd688f8e6857ba4c38637bf6e1/crowd_nav/configs/__init__.py


--------------------------------------------------------------------------------
/crowd_nav/configs/icra_benchmark/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChanganVR/RelationalGraphLearning/8e87aa5ed8221efd688f8e6857ba4c38637bf6e1/crowd_nav/configs/icra_benchmark/__init__.py


--------------------------------------------------------------------------------
/crowd_nav/configs/icra_benchmark/config.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Never Modify this file! Always copy the settings you want to change to your local file.
  3 | """
  4 | 
  5 | 
  6 | import numpy as np
  7 | 
  8 | 
  9 | class Config(object):
 10 |     def __init__(self):
 11 |         pass
 12 | 
 13 | 
 14 | class BaseEnvConfig(object):
 15 |     env = Config()
 16 |     env.time_limit = 30
 17 |     env.time_step = 0.25
 18 |     env.val_size = 100
 19 |     env.test_size = 500
 20 |     env.train_size = np.iinfo(np.uint32).max - 2000
 21 |     env.randomize_attributes = False
 22 |     env.robot_sensor_range = 5
 23 | 
 24 |     reward = Config()
 25 |     reward.success_reward = 1
 26 |     reward.collision_penalty = -0.25
 27 |     reward.discomfort_dist = 0.2
 28 |     reward.discomfort_penalty_factor = 0.5
 29 | 
 30 |     sim = Config()
 31 |     sim.train_val_scenario = 'circle_crossing'
 32 |     sim.test_scenario = 'circle_crossing'
 33 |     sim.square_width = 20
 34 |     sim.circle_radius = 4
 35 |     sim.human_num = 5
 36 |     sim.nonstop_human = False
 37 |     sim.centralized_planning = True
 38 | 
 39 |     humans = Config()
 40 |     humans.visible = True
 41 |     humans.policy = 'orca'
 42 |     humans.radius = 0.3
 43 |     humans.v_pref = 1
 44 |     humans.sensor = 'coordinates'
 45 | 
 46 |     robot = Config()
 47 |     robot.visible = False
 48 |     robot.policy = 'none'
 49 |     robot.radius = 0.3
 50 |     robot.v_pref = 1
 51 |     robot.sensor = 'coordinates'
 52 | 
 53 |     def __init__(self, debug=False):
 54 |         if debug:
 55 |             self.env.val_size = 1
 56 |             self.env.test_size = 1
 57 | 
 58 | 
 59 | class BasePolicyConfig(object):
 60 |     rl = Config()
 61 |     rl.gamma = 0.9
 62 | 
 63 |     om = Config()
 64 |     om.cell_num = 4
 65 |     om.cell_size = 1
 66 |     om.om_channel_size = 3
 67 | 
 68 |     action_space = Config()
 69 |     action_space.kinematics = 'holonomic'
 70 |     action_space.speed_samples = 5
 71 |     action_space.rotation_samples = 16
 72 |     action_space.sampling = 'exponential'
 73 |     action_space.query_env = False
 74 |     action_space.rotation_constraint = np.pi / 3
 75 | 
 76 |     cadrl = Config()
 77 |     cadrl.mlp_dims = [150, 100, 100, 1]
 78 |     cadrl.multiagent_training = False
 79 | 
 80 |     lstm_rl = Config()
 81 |     lstm_rl.global_state_dim = 50
 82 |     lstm_rl.mlp1_dims = [150, 100, 100, 50]
 83 |     lstm_rl.mlp2_dims = [150, 100, 100, 1]
 84 |     lstm_rl.multiagent_training = True
 85 |     lstm_rl.with_om = False
 86 |     lstm_rl.with_interaction_module = True
 87 | 
 88 |     srl = Config()
 89 |     srl.mlp1_dims = [150, 100, 100, 50]
 90 |     srl.mlp2_dims = [150, 100, 100, 1]
 91 |     srl.multiagent_training = True
 92 |     srl.with_om = True
 93 | 
 94 |     sarl = Config()
 95 |     sarl.mlp1_dims = [150, 100]
 96 |     sarl.mlp2_dims = [100, 50]
 97 |     sarl.attention_dims = [100, 100, 1]
 98 |     sarl.mlp3_dims = [150, 100, 100, 1]
 99 |     sarl.multiagent_training = True
100 |     sarl.with_om = True
101 |     sarl.with_global_state = True
102 | 
103 |     gcn = Config()
104 |     gcn.multiagent_training = True
105 |     gcn.num_layer = 2
106 |     gcn.X_dim = 32
107 |     gcn.wr_dims = [64, gcn.X_dim]
108 |     gcn.wh_dims = [64, gcn.X_dim]
109 |     gcn.final_state_dim = gcn.X_dim
110 |     gcn.gcn2_w1_dim = gcn.X_dim
111 |     gcn.planning_dims = [150, 100, 100, 1]
112 |     gcn.similarity_function = 'embedded_gaussian'
113 |     gcn.layerwise_graph = True
114 |     gcn.skip_connection = False
115 | 
116 |     gnn = Config()
117 |     gnn.multiagent_training = True
118 |     gnn.node_dim = 32
119 |     gnn.wr_dims = [64, gnn.node_dim]
120 |     gnn.wh_dims = [64, gnn.node_dim]
121 |     gnn.edge_dim = 32
122 |     gnn.planning_dims = [150, 100, 100, 1]
123 | 
124 |     def __init__(self, debug=False):
125 |         pass
126 | 
127 | 
128 | class BaseTrainConfig(object):
129 |     trainer = Config()
130 |     trainer.batch_size = 100
131 |     trainer.optimizer = 'Adam'
132 | 
133 |     imitation_learning = Config()
134 |     imitation_learning.il_episodes = 2000
135 |     imitation_learning.il_policy = 'orca'
136 |     imitation_learning.il_epochs = 50
137 |     imitation_learning.il_learning_rate = 0.001
138 |     imitation_learning.safety_space = 0.15
139 | 
140 |     train = Config()
141 |     train.rl_train_epochs = 1
142 |     train.rl_learning_rate = 0.001
143 |     # number of batches to train at the end of training episode il_episodes
144 |     train.train_batches = 100
145 |     # training episodes in outer loop
146 |     train.train_episodes = 10000
147 |     # number of episodes sampled in one training episode
148 |     train.sample_episodes = 1
149 |     train.target_update_interval = 1000
150 |     train.evaluation_interval = 1000
151 |     # the memory pool can roughly store 2K episodes, total size = episodes * 50
152 |     train.capacity = 100000
153 |     train.epsilon_start = 0.5
154 |     train.epsilon_end = 0.1
155 |     train.epsilon_decay = 4000
156 |     train.checkpoint_interval = 1000
157 | 
158 |     train.train_with_pretend_batch = False
159 | 
160 |     def __init__(self, debug=False):
161 |         if debug:
162 |             self.imitation_learning.il_episodes = 10
163 |             self.imitation_learning.il_epochs = 5
164 |             self.train.train_episodes = 1
165 |             self.train.checkpoint_interval = self.train.train_episodes
166 |             self.train.evaluation_interval = 1
167 |             self.train.target_update_interval = 1
168 | 


--------------------------------------------------------------------------------
/crowd_nav/configs/icra_benchmark/mp_detach.py:
--------------------------------------------------------------------------------
 1 | from crowd_nav.configs.icra_benchmark.config import BaseEnvConfig, BasePolicyConfig, BaseTrainConfig, Config
 2 | 
 3 | 
 4 | class EnvConfig(BaseEnvConfig):
 5 |     def __init__(self, debug=False):
 6 |         super(EnvConfig, self).__init__(debug)
 7 | 
 8 | 
 9 | class PolicyConfig(BasePolicyConfig):
10 |     def __init__(self, debug=False):
11 |         super(PolicyConfig, self).__init__(debug)
12 |         self.name = 'model_predictive_rl'
13 | 
14 |         # gcn
15 |         self.gcn.num_layer = 2
16 |         self.gcn.X_dim = 32
17 |         self.gcn.similarity_function = 'embedded_gaussian'
18 |         self.gcn.layerwise_graph = False
19 |         self.gcn.skip_connection = True
20 | 
21 |         self.model_predictive_rl = Config()
22 |         self.model_predictive_rl.linear_state_predictor = False
23 |         self.model_predictive_rl.planning_depth = 1
24 |         self.model_predictive_rl.planning_width = 1
25 |         self.model_predictive_rl.do_action_clip = False
26 |         self.model_predictive_rl.motion_predictor_dims = [64, 5]
27 |         self.model_predictive_rl.value_network_dims = [32, 100, 100, 1]
28 |         self.model_predictive_rl.share_graph_model = True
29 | 
30 | 
31 | class TrainConfig(BaseTrainConfig):
32 |     def __init__(self, debug=False):
33 |         super(TrainConfig, self).__init__(debug)
34 | 
35 |         self.train.freeze_state_predictor = False
36 |         self.train.detach_state_predictor = True
37 |         self.train.reduce_sp_update_frequency = False
38 | 


--------------------------------------------------------------------------------
/crowd_nav/configs/icra_benchmark/mp_linear.py:
--------------------------------------------------------------------------------
 1 | from crowd_nav.configs.icra_benchmark.config import BaseEnvConfig, BasePolicyConfig, BaseTrainConfig, Config
 2 | 
 3 | 
 4 | class EnvConfig(BaseEnvConfig):
 5 |     def __init__(self, debug=False):
 6 |         super(EnvConfig, self).__init__(debug)
 7 | 
 8 | 
 9 | class PolicyConfig(BasePolicyConfig):
10 |     def __init__(self, debug=False):
11 |         super(PolicyConfig, self).__init__(debug)
12 |         self.name = 'model_predictive_rl'
13 | 
14 |         # gcn
15 |         self.gcn.num_layer = 2
16 |         self.gcn.X_dim = 32
17 |         self.gcn.similarity_function = 'embedded_gaussian'
18 |         self.gcn.layerwise_graph = False
19 |         self.gcn.skip_connection = True
20 | 
21 |         self.model_predictive_rl = Config()
22 |         self.model_predictive_rl.linear_state_predictor = True
23 |         self.model_predictive_rl.planning_depth = 1
24 |         self.model_predictive_rl.planning_width = 1
25 |         self.model_predictive_rl.do_action_clip = False
26 |         self.model_predictive_rl.motion_predictor_dims = [64, 5]
27 |         self.model_predictive_rl.value_network_dims = [32, 100, 100, 1]
28 |         self.model_predictive_rl.share_graph_model = False
29 | 
30 | 
31 | class TrainConfig(BaseTrainConfig):
32 |     def __init__(self, debug=False):
33 |         super(TrainConfig, self).__init__(debug)
34 | 
35 |         self.train.freeze_state_predictor = False
36 |         self.train.detach_state_predictor = False
37 |         self.train.reduce_sp_update_frequency = False
38 | 


--------------------------------------------------------------------------------
/crowd_nav/configs/icra_benchmark/mp_separate.py:
--------------------------------------------------------------------------------
 1 | from crowd_nav.configs.icra_benchmark.config import BaseEnvConfig, BasePolicyConfig, BaseTrainConfig, Config
 2 | 
 3 | 
 4 | class EnvConfig(BaseEnvConfig):
 5 |     def __init__(self, debug=False):
 6 |         super(EnvConfig, self).__init__(debug)
 7 | 
 8 | 
 9 | class PolicyConfig(BasePolicyConfig):
10 |     def __init__(self, debug=False):
11 |         super(PolicyConfig, self).__init__(debug)
12 |         self.name = 'model_predictive_rl'
13 | 
14 |         # gcn
15 |         self.gcn.num_layer = 2
16 |         self.gcn.X_dim = 32
17 |         self.gcn.similarity_function = 'embedded_gaussian'
18 |         self.gcn.layerwise_graph = False
19 |         self.gcn.skip_connection = True
20 | 
21 |         self.model_predictive_rl = Config()
22 |         self.model_predictive_rl.linear_state_predictor = False
23 |         self.model_predictive_rl.planning_depth = 1
24 |         self.model_predictive_rl.planning_width = 1
25 |         self.model_predictive_rl.do_action_clip = False
26 |         self.model_predictive_rl.motion_predictor_dims = [64, 5]
27 |         self.model_predictive_rl.value_network_dims = [32, 100, 100, 1]
28 |         self.model_predictive_rl.share_graph_model = False
29 | 
30 | 
31 | class TrainConfig(BaseTrainConfig):
32 |     def __init__(self, debug=False):
33 |         super(TrainConfig, self).__init__(debug)
34 | 
35 |         self.train.freeze_state_predictor = False
36 |         self.train.detach_state_predictor = False
37 |         self.train.reduce_sp_update_frequency = False
38 | 


--------------------------------------------------------------------------------
/crowd_nav/configs/icra_benchmark/mp_separate_dp.py:
--------------------------------------------------------------------------------
 1 | from crowd_nav.configs.icra_benchmark.config import BaseEnvConfig, BasePolicyConfig, BaseTrainConfig, Config
 2 | 
 3 | 
 4 | class EnvConfig(BaseEnvConfig):
 5 |     def __init__(self, debug=False):
 6 |         super(EnvConfig, self).__init__(debug)
 7 | 
 8 | 
 9 | class PolicyConfig(BasePolicyConfig):
10 |     def __init__(self, debug=False):
11 |         super(PolicyConfig, self).__init__(debug)
12 |         self.name = 'model_predictive_rl'
13 | 
14 |         # gcn
15 |         self.gcn.num_layer = 2
16 |         self.gcn.X_dim = 32
17 |         self.gcn.similarity_function = 'embedded_gaussian'
18 |         self.gcn.layerwise_graph = False
19 |         self.gcn.skip_connection = True
20 | 
21 |         self.model_predictive_rl = Config()
22 |         self.model_predictive_rl.linear_state_predictor = False
23 |         self.model_predictive_rl.planning_depth = 2
24 |         self.model_predictive_rl.planning_width = 2
25 |         self.model_predictive_rl.do_action_clip = True
26 |         self.model_predictive_rl.motion_predictor_dims = [64, 5]
27 |         self.model_predictive_rl.value_network_dims = [32, 100, 100, 1]
28 |         self.model_predictive_rl.share_graph_model = False
29 | 
30 | 
31 | class TrainConfig(BaseTrainConfig):
32 |     def __init__(self, debug=False):
33 |         super(TrainConfig, self).__init__(debug)
34 | 
35 |         self.train.freeze_state_predictor = False
36 |         self.train.detach_state_predictor = False
37 |         self.train.reduce_sp_update_frequency = False
38 | 


--------------------------------------------------------------------------------
/crowd_nav/configs/icra_benchmark/rgl.py:
--------------------------------------------------------------------------------
 1 | from crowd_nav.configs.icra_benchmark.config import BaseEnvConfig, BasePolicyConfig, BaseTrainConfig, Config
 2 | 
 3 | 
 4 | class EnvConfig(BaseEnvConfig):
 5 |     def __init__(self, debug=False):
 6 |         super(EnvConfig, self).__init__(debug)
 7 | 
 8 | 
 9 | class PolicyConfig(BasePolicyConfig):
10 |     def __init__(self, debug=False):
11 |         super(PolicyConfig, self).__init__(debug)
12 |         self.name = 'gcn'
13 | 
14 |         # gcn
15 |         self.gcn.num_layer = 2
16 |         self.gcn.X_dim = 32
17 |         self.gcn.similarity_function = 'embedded_gaussian'
18 |         self.gcn.layerwise_graph = False
19 |         self.gcn.skip_connection = True
20 | 
21 | 
22 | class TrainConfig(BaseTrainConfig):
23 |     def __init__(self, debug=False):
24 |         super(TrainConfig, self).__init__(debug)
25 | 


--------------------------------------------------------------------------------
/crowd_nav/configs/icra_benchmark/sarl.py:
--------------------------------------------------------------------------------
 1 | from crowd_nav.configs.icra_benchmark.config import BaseEnvConfig, BasePolicyConfig, BaseTrainConfig, Config
 2 | 
 3 | 
 4 | class EnvConfig(BaseEnvConfig):
 5 |     def __init__(self, debug=False):
 6 |         super(EnvConfig, self).__init__(debug)
 7 | 
 8 | 
 9 | class PolicyConfig(BasePolicyConfig):
10 |     def __init__(self, debug=False):
11 |         super(PolicyConfig, self).__init__(debug)
12 |         self.name = 'sarl'
13 | 
14 | 
15 | class TrainConfig(BaseTrainConfig):
16 |     def __init__(self, debug=False):
17 |         super(TrainConfig, self).__init__(debug)
18 | 


--------------------------------------------------------------------------------
/crowd_nav/policy/cadrl.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | import itertools
  5 | import logging
  6 | from crowd_sim.envs.policy.policy import Policy
  7 | from crowd_sim.envs.utils.action import ActionRot, ActionXY
  8 | from crowd_sim.envs.utils.state import ObservableState, FullState
  9 | 
 10 | 
 11 | def mlp(input_dim, mlp_dims, last_relu=False):
 12 |     layers = []
 13 |     mlp_dims = [input_dim] + mlp_dims
 14 |     for i in range(len(mlp_dims) - 1):
 15 |         layers.append(nn.Linear(mlp_dims[i], mlp_dims[i + 1]))
 16 |         if i != len(mlp_dims) - 2 or last_relu:
 17 |             layers.append(nn.ReLU())
 18 |     net = nn.Sequential(*layers)
 19 |     return net
 20 | 
 21 | 
 22 | class ValueNetwork(nn.Module):
 23 |     def __init__(self, input_dim, mlp_dims):
 24 |         super().__init__()
 25 |         self.value_network = mlp(input_dim, mlp_dims)
 26 | 
 27 |     def forward(self, state_input):
 28 |         # input size: (batch_size, # of humans, state_length)
 29 |         if isinstance(state_input, tuple):
 30 |             state = state_input[0]
 31 |         else:
 32 |             state = state_input
 33 | 
 34 |         value = self.value_network(state.squeeze(dim=1))
 35 |         return value
 36 | 
 37 | 
 38 | class CADRL(Policy):
 39 |     def __init__(self):
 40 |         super().__init__()
 41 |         self.name = 'CADRL'
 42 |         self.trainable = True
 43 |         self.multiagent_training = None
 44 |         self.kinematics = None
 45 |         self.epsilon = None
 46 |         self.gamma = None
 47 |         self.sampling = None
 48 |         self.speed_samples = None
 49 |         self.rotation_samples = None
 50 |         self.query_env = None
 51 |         self.action_space = None
 52 |         self.rotation_constraint = None
 53 |         self.speeds = None
 54 |         self.rotations = None
 55 |         self.action_values = None
 56 |         self.with_om = None
 57 |         self.cell_num = None
 58 |         self.cell_size = None
 59 |         self.om_channel_size = None
 60 |         self.self_state_dim = 6
 61 |         self.human_state_dim = 7
 62 |         self.joint_state_dim = self.self_state_dim + self.human_state_dim
 63 | 
 64 |     def configure(self, config):
 65 |         self.set_common_parameters(config)
 66 |         self.model = ValueNetwork(self.joint_state_dim, config.cadrl.mlp_dims)
 67 |         self.multiagent_training = config.cadrl.multiagent_training
 68 |         logging.info('Policy: CADRL without occupancy map')
 69 | 
 70 |     def set_common_parameters(self, config):
 71 |         self.gamma = config.rl.gamma
 72 |         self.kinematics = config.action_space.kinematics
 73 |         self.sampling = config.action_space.sampling
 74 |         self.speed_samples = config.action_space.speed_samples
 75 |         self.rotation_samples = config.action_space.rotation_samples
 76 |         self.query_env = config.action_space.query_env
 77 |         self.rotation_constraint = config.action_space.rotation_constraint
 78 |         self.cell_num = config.om.cell_num
 79 |         self.cell_size = config.om.cell_size
 80 |         self.om_channel_size = config.om.om_channel_size
 81 | 
 82 |         logging.info('Query environment: {}'.format(self.query_env))
 83 | 
 84 |     def set_device(self, device):
 85 |         self.device = device
 86 |         self.model.to(device)
 87 | 
 88 |     def set_epsilon(self, epsilon):
 89 |         self.epsilon = epsilon
 90 | 
 91 |     def build_action_space(self, v_pref):
 92 |         """
 93 |         Action space consists of 25 uniformly sampled actions in permitted range and 25 randomly sampled actions.
 94 |         """
 95 |         holonomic = True if self.kinematics == 'holonomic' else False
 96 |         speeds = [(np.exp((i + 1) / self.speed_samples) - 1) / (np.e - 1) * v_pref for i in range(self.speed_samples)]
 97 |         if holonomic:
 98 |             rotations = np.linspace(0, 2 * np.pi, self.rotation_samples, endpoint=False)
 99 |         else:
100 |             rotations = np.linspace(-self.rotation_constraint, self.rotation_constraint, self.rotation_samples)
101 | 
102 |         action_space = [ActionXY(0, 0) if holonomic else ActionRot(0, 0)]
103 |         for rotation, speed in itertools.product(rotations, speeds):
104 |             if holonomic:
105 |                 action_space.append(ActionXY(speed * np.cos(rotation), speed * np.sin(rotation)))
106 |             else:
107 |                 action_space.append(ActionRot(speed, rotation))
108 | 
109 |         self.speeds = speeds
110 |         self.rotations = rotations
111 |         self.action_space = action_space
112 | 
113 |     def propagate(self, state, action):
114 |         if isinstance(state, ObservableState):
115 |             # propagate state of humans
116 |             next_px = state.px + action.vx * self.time_step
117 |             next_py = state.py + action.vy * self.time_step
118 |             next_state = ObservableState(next_px, next_py, action.vx, action.vy, state.radius)
119 |         elif isinstance(state, FullState):
120 |             # propagate state of current agent
121 |             # perform action without rotation
122 |             if self.kinematics == 'holonomic':
123 |                 next_px = state.px + action.vx * self.time_step
124 |                 next_py = state.py + action.vy * self.time_step
125 |                 next_state = FullState(next_px, next_py, action.vx, action.vy, state.radius,
126 |                                        state.gx, state.gy, state.v_pref, state.theta)
127 |             else:
128 |                 next_theta = state.theta + action.r
129 |                 next_vx = action.v * np.cos(next_theta)
130 |                 next_vy = action.v * np.sin(next_theta)
131 |                 next_px = state.px + next_vx * self.time_step
132 |                 next_py = state.py + next_vy * self.time_step
133 |                 next_state = FullState(next_px, next_py, next_vx, next_vy, state.radius, state.gx, state.gy,
134 |                                        state.v_pref, next_theta)
135 |         else:
136 |             raise ValueError('Type error')
137 | 
138 |         return next_state
139 | 
140 |     def predict(self, state):
141 |         """
142 |         Input state is the joint state of robot concatenated by the observable state of other agents
143 | 
144 |         To predict the best action, agent samples actions and propagates one step to see how good the next state is
145 |         thus the reward function is needed
146 | 
147 |         """
148 |         if self.phase is None or self.device is None:
149 |             raise AttributeError('Phase, device attributes have to be set!')
150 |         if self.phase == 'train' and self.epsilon is None:
151 |             raise AttributeError('Epsilon attribute has to be set in training phase')
152 | 
153 |         if self.reach_destination(state):
154 |             return ActionXY(0, 0) if self.kinematics == 'holonomic' else ActionRot(0, 0)
155 |         if self.action_space is None:
156 |             self.build_action_space(state.self_state.v_pref)
157 |         if not state.human_states:
158 |             assert self.phase != 'train'
159 |             return self.select_greedy_action(state.self_state)
160 | 
161 |         probability = np.random.random()
162 |         if self.phase == 'train' and probability < self.epsilon:
163 |             max_action = self.action_space[np.random.choice(len(self.action_space))]
164 |         else:
165 |             self.action_values = list()
166 |             max_min_value = float('-inf')
167 |             max_action = None
168 |             for action in self.action_space:
169 |                 next_self_state = self.propagate(state.self_state, action)
170 |                 if self.query_env:
171 |                     next_human_states, reward, done, info = self.env.onestep_lookahead(action)
172 |                 else:
173 |                     next_human_states = [self.propagate(human_state, ActionXY(human_state.vx, human_state.vy))
174 |                                          for human_state in state.human_states]
175 |                     reward = self.compute_reward(next_self_state, next_human_states)
176 |                 batch_next_states = torch.cat([torch.Tensor([next_self_state + next_human_state]).to(self.device)
177 |                                               for next_human_state in next_human_states], dim=0)
178 | 
179 |                 # VALUE UPDATE
180 |                 outputs = self.model(self.rotate(batch_next_states))
181 |                 min_output, min_index = torch.min(outputs, 0)
182 |                 min_value = reward + pow(self.gamma, self.time_step * state.self_state.v_pref) * min_output.data.item()
183 |                 self.action_values.append(min_value)
184 |                 if min_value > max_min_value:
185 |                     max_min_value = min_value
186 |                     max_action = action
187 | 
188 |         if self.phase == 'train':
189 |             self.last_state = self.transform(state)
190 | 
191 |         return max_action
192 | 
193 |     def select_greedy_action(self, self_state):
194 |         # find the greedy action given kinematic constraints and return the closest action in the action space
195 |         direction = np.arctan2(self_state.gy - self_state.py, self_state.gx - self_state.px)
196 |         distance = np.linalg.norm((self_state.gy - self_state.py, self_state.gx - self_state.px))
197 |         if self.kinematics == 'holonomic':
198 |             speed = min(distance / self.time_step, self_state.v_pref)
199 |             vx = np.cos(direction) * speed
200 |             vy = np.sin(direction) * speed
201 | 
202 |             min_diff = float('inf')
203 |             closest_action = None
204 |             for action in self.action_space:
205 |                 diff = np.linalg.norm(np.array(action) - np.array((vx, vy)))
206 |                 if diff < min_diff:
207 |                     min_diff = diff
208 |                     closest_action = action
209 |         else:
210 |             rotation = direction - self_state.theta
211 |             # if goal is not in the field of view, always rotate first
212 |             if rotation < self.rotations[0]:
213 |                 closest_action = ActionRot(self.speeds[0], self.rotations[0])
214 |             elif rotation > self.rotations[-1]:
215 |                 closest_action = ActionRot(self.speeds[0], self.rotations[-1])
216 |             else:
217 |                 speed = min(distance / self.time_step, self_state.v_pref)
218 | 
219 |                 min_diff = float('inf')
220 |                 closest_action = None
221 |                 for action in self.action_space:
222 |                     diff = np.linalg.norm(np.array((np.cos(action.r) * action.v, np.sin(action.r) * action.v)) -
223 |                                           np.array((np.cos(rotation) * speed), np.sin(rotation) * action.v))
224 |                     if diff < min_diff:
225 |                         min_diff = diff
226 |                         closest_action = action
227 | 
228 |         return closest_action
229 | 
230 |     def transform(self, state):
231 |         """
232 |         Take the state passed from agent and transform it to tensor for batch training
233 | 
234 |         :param state:
235 |         :return: tensor of shape (len(state), )
236 |         """
237 |         state = torch.Tensor([state.self_state + state.human_states[0]]).to(self.device)
238 |         state = self.rotate(state)
239 |         return state
240 | 
241 |     def rotate(self, state):
242 |         """
243 |         Transform the coordinate to agent-centric.
244 |         Input state tensor is of size (batch_size, state_length)
245 | 
246 |         """
247 |         # 'px', 'py', 'vx', 'vy', 'radius', 'gx', 'gy', 'v_pref', 'theta', 'px1', 'py1', 'vx1', 'vy1', 'radius1'
248 |         #  0     1      2     3      4        5     6      7         8       9     10      11     12       13
249 |         batch = state.shape[0]
250 |         dx = (state[:, 5] - state[:, 0]).reshape((batch, -1))
251 |         dy = (state[:, 6] - state[:, 1]).reshape((batch, -1))
252 |         rot = torch.atan2(state[:, 6] - state[:, 1], state[:, 5] - state[:, 0])
253 | 
254 |         dg = torch.norm(torch.cat([dx, dy], dim=1), 2, dim=1, keepdim=True)
255 |         v_pref = state[:, 7].reshape((batch, -1))
256 |         vx = (state[:, 2] * torch.cos(rot) + state[:, 3] * torch.sin(rot)).reshape((batch, -1))
257 |         vy = (state[:, 3] * torch.cos(rot) - state[:, 2] * torch.sin(rot)).reshape((batch, -1))
258 | 
259 |         radius = state[:, 4].reshape((batch, -1))
260 |         if self.kinematics == 'unicycle':
261 |             theta = (state[:, 8] - rot).reshape((batch, -1))
262 |         else:
263 |             theta = torch.zeros_like(v_pref)
264 | 
265 |         vx1 = (state[:, 11] * torch.cos(rot) + state[:, 12] * torch.sin(rot)).reshape((batch, -1))
266 |         vy1 = (state[:, 12] * torch.cos(rot) - state[:, 11] * torch.sin(rot)).reshape((batch, -1))
267 |         px1 = (state[:, 9] - state[:, 0]) * torch.cos(rot) + (state[:, 10] - state[:, 1]) * torch.sin(rot)
268 |         px1 = px1.reshape((batch, -1))
269 |         py1 = (state[:, 10] - state[:, 1]) * torch.cos(rot) - (state[:, 9] - state[:, 0]) * torch.sin(rot)
270 |         py1 = py1.reshape((batch, -1))
271 |         radius1 = state[:, 13].reshape((batch, -1))
272 |         radius_sum = radius + radius1
273 |         da = torch.norm(torch.cat([(state[:, 0] - state[:, 9]).reshape((batch, -1)), (state[:, 1] - state[:, 10]).
274 |                                   reshape((batch, -1))], dim=1), 2, dim=1, keepdim=True)
275 |         new_state = torch.cat([dg, v_pref, theta, radius, vx, vy, px1, py1, vx1, vy1, radius1, da, radius_sum], dim=1)
276 |         return new_state


--------------------------------------------------------------------------------
/crowd_nav/policy/gcn.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import itertools
  3 | import torch
  4 | import torch.nn as nn
  5 | from torch.nn.functional import softmax, relu
  6 | from torch.nn import Parameter
  7 | from crowd_nav.policy.cadrl import mlp
  8 | from crowd_nav.policy.multi_human_rl import MultiHumanRL
  9 | 
 10 | 
 11 | class ValueNetwork(nn.Module):
 12 |     def __init__(self, input_dim, self_state_dim, num_layer, X_dim, wr_dims, wh_dims, final_state_dim,
 13 |                  gcn2_w1_dim, planning_dims, similarity_function, layerwise_graph, skip_connection):
 14 |         super().__init__()
 15 |         # design choice
 16 | 
 17 |         # 'gaussian', 'embedded_gaussian', 'cosine', 'cosine_softmax', 'concatenation'
 18 |         self.similarity_function = similarity_function
 19 |         logging.info('self.similarity_func: {}'.format(self.similarity_function))
 20 |         human_state_dim = input_dim - self_state_dim
 21 |         self.self_state_dim = self_state_dim
 22 |         self.human_state_dim = human_state_dim
 23 |         self.num_layer = num_layer
 24 |         self.X_dim = X_dim
 25 |         self.layerwise_graph = layerwise_graph
 26 |         self.skip_connection = skip_connection
 27 | 
 28 |         self.w_r = mlp(self_state_dim, wr_dims, last_relu=True)
 29 |         self.w_h = mlp(human_state_dim, wh_dims, last_relu=True)
 30 | 
 31 |         if self.similarity_function == 'embedded_gaussian':
 32 |             self.w_a = Parameter(torch.randn(self.X_dim, self.X_dim))
 33 |         elif self.similarity_function == 'concatenation':
 34 |             self.w_a = mlp(2 * X_dim, [2 * X_dim, 1], last_relu=True)
 35 | 
 36 |         if num_layer == 1:
 37 |             self.w1 = Parameter(torch.randn(self.X_dim, final_state_dim))
 38 |         elif num_layer == 2:
 39 |             self.w1 = Parameter(torch.randn(self.X_dim, gcn2_w1_dim))
 40 |             self.w2 = Parameter(torch.randn(gcn2_w1_dim, final_state_dim))
 41 |         else:
 42 |             raise NotImplementedError
 43 | 
 44 |         self.value_net = mlp(final_state_dim, planning_dims)
 45 | 
 46 |         # for visualization
 47 |         self.A = None
 48 | 
 49 |     def compute_similarity_matrix(self, X):
 50 |         if self.similarity_function == 'embedded_gaussian':
 51 |             A = torch.matmul(torch.matmul(X, self.w_a), X.permute(0, 2, 1))
 52 |             normalized_A = softmax(A, dim=2)
 53 |         elif self.similarity_function == 'gaussian':
 54 |             A = torch.matmul(X, X.permute(0, 2, 1))
 55 |             normalized_A = softmax(A, dim=2)
 56 |         elif self.similarity_function == 'cosine':
 57 |             A = torch.matmul(X, X.permute(0, 2, 1))
 58 |             magnitudes = torch.norm(A, dim=2, keepdim=True)
 59 |             norm_matrix = torch.matmul(magnitudes, magnitudes.permute(0, 2, 1))
 60 |             normalized_A = torch.div(A, norm_matrix)
 61 |         elif self.similarity_function == 'cosine_softmax':
 62 |             A = torch.matmul(X, X.permute(0, 2, 1))
 63 |             magnitudes = torch.norm(A, dim=2, keepdim=True)
 64 |             norm_matrix = torch.matmul(magnitudes, magnitudes.permute(0, 2, 1))
 65 |             normalized_A = softmax(torch.div(A, norm_matrix), dim=2)
 66 |         elif self.similarity_function == 'concatenation':
 67 |             indices = [pair for pair in itertools.product(list(range(X.size(1))), repeat=2)]
 68 |             selected_features = torch.index_select(X, dim=1, index=torch.LongTensor(indices).reshape(-1))
 69 |             pairwise_features = selected_features.reshape((-1, X.size(1) * X.size(1), X.size(2) * 2))
 70 |             A = self.w_a(pairwise_features).reshape(-1, X.size(1), X.size(1))
 71 |             normalized_A = A
 72 |         elif self.similarity_function == 'squared':
 73 |             A = torch.matmul(X, X.permute(0, 2, 1))
 74 |             squared_A = A * A
 75 |             normalized_A = squared_A / torch.sum(squared_A, dim=2, keepdim=True)
 76 |         elif self.similarity_function == 'equal_attention':
 77 |             normalized_A = (torch.ones(X.size(1), X.size(1)) / X.size(1)).expand(X.size(0), X.size(1), X.size(1))
 78 |         elif self.similarity_function == 'diagonal':
 79 |             normalized_A = (torch.eye(X.size(1), X.size(1))).expand(X.size(0), X.size(1), X.size(1))
 80 |         else:
 81 |             raise NotImplementedError
 82 | 
 83 |         return normalized_A
 84 | 
 85 |     def forward(self, state_input):
 86 |         if isinstance(state_input, tuple):
 87 |             state, lengths = state_input
 88 |         else:
 89 |             state = state_input
 90 |             # lengths = torch.IntTensor([state.size()[1]])
 91 | 
 92 |         self_state = state[:, 0, :self.self_state_dim]
 93 |         human_states = state[:, :, self.self_state_dim:]
 94 | 
 95 |         # compute feature matrix X
 96 |         self_state_embedings = self.w_r(self_state)
 97 |         human_state_embedings = self.w_h(human_states)
 98 |         X = torch.cat([self_state_embedings.unsqueeze(1), human_state_embedings], dim=1)
 99 | 
100 |         # compute matrix A
101 |         normalized_A = self.compute_similarity_matrix(X)
102 |         self.A = normalized_A[0, :, :].data.cpu().numpy()
103 | 
104 |         # graph convolution
105 |         if self.num_layer == 0:
106 |             feat = X[:, 0, :]
107 |         elif self.num_layer == 1:
108 |             h1 = relu(torch.matmul(torch.matmul(normalized_A, X), self.w1))
109 |             feat = h1[:, 0, :]
110 |         else:
111 |             # compute h1 and h2
112 |             if not self.skip_connection:
113 |                 h1 = relu(torch.matmul(torch.matmul(normalized_A, X), self.w1))
114 |             else:
115 |                 h1 = relu(torch.matmul(torch.matmul(normalized_A, X), self.w1)) + X
116 |             if self.layerwise_graph:
117 |                 normalized_A2 = self.compute_similarity_matrix(h1)
118 |             else:
119 |                 normalized_A2 = normalized_A
120 |             if not self.skip_connection:
121 |                 h2 = relu(torch.matmul(torch.matmul(normalized_A2, h1), self.w2))
122 |             else:
123 |                 h2 = relu(torch.matmul(torch.matmul(normalized_A2, h1), self.w2)) + h1
124 |             feat = h2[:, 0, :]
125 | 
126 |         # do planning using only the final layer feature of the agent
127 |         value = self.value_net(feat)
128 |         return value
129 | 
130 | 
131 | class GCN(MultiHumanRL):
132 |     def __init__(self):
133 |         super().__init__()
134 |         self.name = 'GCN'
135 | 
136 |     def configure(self, config):
137 |         self.multiagent_training = config.gcn.multiagent_training
138 |         num_layer = config.gcn.num_layer
139 |         X_dim = config.gcn.X_dim
140 |         wr_dims = config.gcn.wr_dims
141 |         wh_dims = config.gcn.wh_dims
142 |         final_state_dim = config.gcn.final_state_dim
143 |         gcn2_w1_dim = config.gcn.gcn2_w1_dim
144 |         planning_dims = config.gcn.planning_dims
145 |         similarity_function = config.gcn.similarity_function
146 |         layerwise_graph = config.gcn.layerwise_graph
147 |         skip_connection = config.gcn.skip_connection
148 |         self.set_common_parameters(config)
149 |         self.model = ValueNetwork(self.input_dim(), self.self_state_dim, num_layer, X_dim, wr_dims, wh_dims,
150 |                                   final_state_dim, gcn2_w1_dim, planning_dims, similarity_function, layerwise_graph,
151 |                                   skip_connection)
152 |         logging.info('self.model:{}'.format(self.model))
153 |         logging.info('GCN layers: {}'.format(num_layer))
154 |         logging.info('Policy: {}'.format(self.name))
155 | 
156 |     def get_matrix_A(self):
157 |         return self.model.A
158 | 


--------------------------------------------------------------------------------
/crowd_nav/policy/graph_model.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import itertools
  3 | import torch
  4 | import torch.nn as nn
  5 | from torch.nn.functional import softmax, relu
  6 | from torch.nn import Parameter
  7 | from crowd_nav.policy.helpers import mlp
  8 | 
  9 | 
 10 | class RGL(nn.Module):
 11 |     def __init__(self, config, robot_state_dim, human_state_dim):
 12 |         """ The current code might not be compatible with models trained with previous version
 13 |         """
 14 |         super().__init__()
 15 |         self.multiagent_training = config.gcn.multiagent_training
 16 |         num_layer = config.gcn.num_layer
 17 |         X_dim = config.gcn.X_dim
 18 |         wr_dims = config.gcn.wr_dims
 19 |         wh_dims = config.gcn.wh_dims
 20 |         final_state_dim = config.gcn.final_state_dim
 21 |         similarity_function = config.gcn.similarity_function
 22 |         layerwise_graph = config.gcn.layerwise_graph
 23 |         skip_connection = config.gcn.skip_connection
 24 | 
 25 |         # design choice
 26 | 
 27 |         # 'gaussian', 'embedded_gaussian', 'cosine', 'cosine_softmax', 'concatenation'
 28 |         self.similarity_function = similarity_function
 29 |         self.robot_state_dim = robot_state_dim
 30 |         self.human_state_dim = human_state_dim
 31 |         self.num_layer = num_layer
 32 |         self.X_dim = X_dim
 33 |         self.layerwise_graph = layerwise_graph
 34 |         self.skip_connection = skip_connection
 35 | 
 36 |         logging.info('Similarity_func: {}'.format(self.similarity_function))
 37 |         logging.info('Layerwise_graph: {}'.format(self.layerwise_graph))
 38 |         logging.info('Skip_connection: {}'.format(self.skip_connection))
 39 |         logging.info('Number of layers: {}'.format(self.num_layer))
 40 | 
 41 |         self.w_r = mlp(robot_state_dim, wr_dims, last_relu=True)
 42 |         self.w_h = mlp(human_state_dim, wh_dims, last_relu=True)
 43 | 
 44 |         if self.similarity_function == 'embedded_gaussian':
 45 |             self.w_a = Parameter(torch.randn(self.X_dim, self.X_dim))
 46 |         elif self.similarity_function == 'concatenation':
 47 |             self.w_a = mlp(2 * X_dim, [2 * X_dim, 1], last_relu=True)
 48 | 
 49 |         # TODO: try other dim size
 50 |         embedding_dim = self.X_dim
 51 |         self.Ws = torch.nn.ParameterList()
 52 |         for i in range(self.num_layer):
 53 |             if i == 0:
 54 |                 self.Ws.append(Parameter(torch.randn(self.X_dim, embedding_dim)))
 55 |             elif i == self.num_layer - 1:
 56 |                 self.Ws.append(Parameter(torch.randn(embedding_dim, final_state_dim)))
 57 |             else:
 58 |                 self.Ws.append(Parameter(torch.randn(embedding_dim, embedding_dim)))
 59 | 
 60 |         # for visualization
 61 |         self.A = None
 62 | 
 63 |     def compute_similarity_matrix(self, X):
 64 |         if self.similarity_function == 'embedded_gaussian':
 65 |             A = torch.matmul(torch.matmul(X, self.w_a), X.permute(0, 2, 1))
 66 |             normalized_A = softmax(A, dim=2)
 67 |         elif self.similarity_function == 'gaussian':
 68 |             A = torch.matmul(X, X.permute(0, 2, 1))
 69 |             normalized_A = softmax(A, dim=2)
 70 |         elif self.similarity_function == 'cosine':
 71 |             A = torch.matmul(X, X.permute(0, 2, 1))
 72 |             magnitudes = torch.norm(A, dim=2, keepdim=True)
 73 |             norm_matrix = torch.matmul(magnitudes, magnitudes.permute(0, 2, 1))
 74 |             normalized_A = torch.div(A, norm_matrix)
 75 |         elif self.similarity_function == 'cosine_softmax':
 76 |             A = torch.matmul(X, X.permute(0, 2, 1))
 77 |             magnitudes = torch.norm(A, dim=2, keepdim=True)
 78 |             norm_matrix = torch.matmul(magnitudes, magnitudes.permute(0, 2, 1))
 79 |             normalized_A = softmax(torch.div(A, norm_matrix), dim=2)
 80 |         elif self.similarity_function == 'concatenation':
 81 |             indices = [pair for pair in itertools.product(list(range(X.size(1))), repeat=2)]
 82 |             selected_features = torch.index_select(X, dim=1, index=torch.LongTensor(indices).reshape(-1))
 83 |             pairwise_features = selected_features.reshape((-1, X.size(1) * X.size(1), X.size(2) * 2))
 84 |             A = self.w_a(pairwise_features).reshape(-1, X.size(1), X.size(1))
 85 |             normalized_A = A
 86 |         elif self.similarity_function == 'squared':
 87 |             A = torch.matmul(X, X.permute(0, 2, 1))
 88 |             squared_A = A * A
 89 |             normalized_A = squared_A / torch.sum(squared_A, dim=2, keepdim=True)
 90 |         elif self.similarity_function == 'equal_attention':
 91 |             normalized_A = (torch.ones(X.size(1), X.size(1)) / X.size(1)).expand(X.size(0), X.size(1), X.size(1))
 92 |         elif self.similarity_function == 'diagonal':
 93 |             normalized_A = (torch.eye(X.size(1), X.size(1))).expand(X.size(0), X.size(1), X.size(1))
 94 |         else:
 95 |             raise NotImplementedError
 96 | 
 97 |         return normalized_A
 98 | 
 99 |     def forward(self, state):
100 |         """
101 |         Embed current state tensor pair (robot_state, human_states) into a latent space
102 |         Each tensor is of shape (batch_size, # of agent, features)
103 |         :param state:
104 |         :return:
105 |         """
106 |         robot_state, human_states = state
107 | 
108 |         # compute feature matrix X
109 |         robot_state_embedings = self.w_r(robot_state)
110 |         human_state_embedings = self.w_h(human_states)
111 |         X = torch.cat([robot_state_embedings, human_state_embedings], dim=1)
112 | 
113 |         # compute matrix A
114 |         if not self.layerwise_graph:
115 |             normalized_A = self.compute_similarity_matrix(X)
116 |             self.A = normalized_A[0, :, :].data.cpu().numpy()
117 | 
118 |         next_H = H = X
119 |         for i in range(self.num_layer):
120 |             if self.layerwise_graph:
121 |                 A = self.compute_similarity_matrix(H)
122 |                 next_H = relu(torch.matmul(torch.matmul(A, H), self.Ws[i]))
123 |             else:
124 |                 next_H = relu(torch.matmul(torch.matmul(normalized_A, H), self.Ws[i]))
125 | 
126 |             if self.skip_connection:
127 |                 next_H += H
128 |             H = next_H
129 | 
130 |         return next_H
131 | 
132 | 
133 | # class RGL(nn.Module):
134 | #     def __init__(self, config, robot_state_dim, human_state_dim):
135 | #         super().__init__()
136 | #         self.multiagent_training = config.gcn.multiagent_training
137 | #         num_layer = config.gcn.num_layer
138 | #         X_dim = config.gcn.X_dim
139 | #         wr_dims = config.gcn.wr_dims
140 | #         wh_dims = config.gcn.wh_dims
141 | #         final_state_dim = config.gcn.final_state_dim
142 | #         gcn2_w1_dim = config.gcn.gcn2_w1_dim
143 | #         similarity_function = config.gcn.similarity_function
144 | #         layerwise_graph = config.gcn.layerwise_graph
145 | #         skip_connection = config.gcn.skip_connection
146 | #
147 | #         # design choice
148 | #
149 | #         # 'gaussian', 'embedded_gaussian', 'cosine', 'cosine_softmax', 'concatenation'
150 | #         self.similarity_function = similarity_function
151 | #         logging.info('self.similarity_func: {}'.format(self.similarity_function))
152 | #         self.robot_state_dim = robot_state_dim
153 | #         self.human_state_dim = human_state_dim
154 | #         self.num_layer = num_layer
155 | #         self.X_dim = X_dim
156 | #         self.layerwise_graph = layerwise_graph
157 | #         self.skip_connection = skip_connection
158 | #
159 | #         self.w_r = mlp(robot_state_dim, wr_dims, last_relu=True)
160 | #         self.w_h = mlp(human_state_dim, wh_dims, last_relu=True)
161 | #
162 | #         if self.similarity_function == 'embedded_gaussian':
163 | #             self.w_a = Parameter(torch.randn(self.X_dim, self.X_dim))
164 | #         elif self.similarity_function == 'concatenation':
165 | #             self.w_a = mlp(2 * X_dim, [2 * X_dim, 1], last_relu=True)
166 | #
167 | #         if num_layer == 1:
168 | #             self.w1 = Parameter(torch.randn(self.X_dim, final_state_dim))
169 | #         elif num_layer == 2:
170 | #             self.w1 = Parameter(torch.randn(self.X_dim, gcn2_w1_dim))
171 | #             self.w2 = Parameter(torch.randn(gcn2_w1_dim, final_state_dim))
172 | #         else:
173 | #             raise NotImplementedError
174 | #
175 | #         # for visualization
176 | #         self.A = None
177 | #
178 | #     def compute_similarity_matrix(self, X):
179 | #         if self.similarity_function == 'embedded_gaussian':
180 | #             A = torch.matmul(torch.matmul(X, self.w_a), X.permute(0, 2, 1))
181 | #             normalized_A = softmax(A, dim=2)
182 | #         elif self.similarity_function == 'gaussian':
183 | #             A = torch.matmul(X, X.permute(0, 2, 1))
184 | #             normalized_A = softmax(A, dim=2)
185 | #         elif self.similarity_function == 'cosine':
186 | #             A = torch.matmul(X, X.permute(0, 2, 1))
187 | #             magnitudes = torch.norm(A, dim=2, keepdim=True)
188 | #             norm_matrix = torch.matmul(magnitudes, magnitudes.permute(0, 2, 1))
189 | #             normalized_A = torch.div(A, norm_matrix)
190 | #         elif self.similarity_function == 'cosine_softmax':
191 | #             A = torch.matmul(X, X.permute(0, 2, 1))
192 | #             magnitudes = torch.norm(A, dim=2, keepdim=True)
193 | #             norm_matrix = torch.matmul(magnitudes, magnitudes.permute(0, 2, 1))
194 | #             normalized_A = softmax(torch.div(A, norm_matrix), dim=2)
195 | #         elif self.similarity_function == 'concatenation':
196 | #             indices = [pair for pair in itertools.product(list(range(X.size(1))), repeat=2)]
197 | #             selected_features = torch.index_select(X, dim=1, index=torch.LongTensor(indices).reshape(-1))
198 | #             pairwise_features = selected_features.reshape((-1, X.size(1) * X.size(1), X.size(2) * 2))
199 | #             A = self.w_a(pairwise_features).reshape(-1, X.size(1), X.size(1))
200 | #             normalized_A = A
201 | #         elif self.similarity_function == 'squared':
202 | #             A = torch.matmul(X, X.permute(0, 2, 1))
203 | #             squared_A = A * A
204 | #             normalized_A = squared_A / torch.sum(squared_A, dim=2, keepdim=True)
205 | #         elif self.similarity_function == 'equal_attention':
206 | #             normalized_A = (torch.ones(X.size(1), X.size(1)) / X.size(1)).expand(X.size(0), X.size(1), X.size(1))
207 | #         elif self.similarity_function == 'diagonal':
208 | #             normalized_A = (torch.eye(X.size(1), X.size(1))).expand(X.size(0), X.size(1), X.size(1))
209 | #         else:
210 | #             raise NotImplementedError
211 | #
212 | #         return normalized_A
213 | #
214 | #     def forward(self, state):
215 | #         """
216 | #         Embed current state tensor pair (robot_state, human_states) into a latent space
217 | #         Each tensor is of shape (batch_size, # of agent, features)
218 | #         :param state:
219 | #         :return:
220 | #         """
221 | #         robot_state, human_states = state
222 | #
223 | #         # compute feature matrix X
224 | #         robot_state_embedings = self.w_r(robot_state)
225 | #         human_state_embedings = self.w_h(human_states)
226 | #         X = torch.cat([robot_state_embedings, human_state_embedings], dim=1)
227 | #
228 | #         # compute matrix A
229 | #         normalized_A = self.compute_similarity_matrix(X)
230 | #         self.A = normalized_A[0, :, :].data.cpu().numpy()
231 | #
232 | #         # graph convolution
233 | #         if self.num_layer == 0:
234 | #             state_embedding = X
235 | #         elif self.num_layer == 1:
236 | #             h1 = relu(torch.matmul(torch.matmul(normalized_A, X), self.w1))
237 | #             state_embedding = h1
238 | #         else:
239 | #             # compute h1 and h2
240 | #             if not self.skip_connection:
241 | #                 h1 = relu(torch.matmul(torch.matmul(normalized_A, X), self.w1))
242 | #             else:
243 | #                 h1 = relu(torch.matmul(torch.matmul(normalized_A, X), self.w1)) + X
244 | #             if self.layerwise_graph:
245 | #                 normalized_A2 = self.compute_similarity_matrix(h1)
246 | #             else:
247 | #                 normalized_A2 = normalized_A
248 | #             if not self.skip_connection:
249 | #                 h2 = relu(torch.matmul(torch.matmul(normalized_A2, h1), self.w2))
250 | #             else:
251 | #                 h2 = relu(torch.matmul(torch.matmul(normalized_A2, h1), self.w2)) + h1
252 | #             state_embedding = h2
253 | #
254 | #         return state_embedding
255 | 


--------------------------------------------------------------------------------
/crowd_nav/policy/helpers.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | def mlp(input_dim, mlp_dims, last_relu=False):
 6 |     layers = []
 7 |     mlp_dims = [input_dim] + mlp_dims
 8 |     for i in range(len(mlp_dims) - 1):
 9 |         layers.append(nn.Linear(mlp_dims[i], mlp_dims[i + 1]))
10 |         if i != len(mlp_dims) - 2 or last_relu:
11 |             layers.append(nn.ReLU())
12 |     net = nn.Sequential(*layers)
13 |     return net
14 | 


--------------------------------------------------------------------------------
/crowd_nav/policy/lstm_rl.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | import logging
  5 | from crowd_nav.policy.cadrl import mlp
  6 | from crowd_nav.policy.multi_human_rl import MultiHumanRL
  7 | 
  8 | 
  9 | class ValueNetwork1(nn.Module):
 10 |     def __init__(self, input_dim, self_state_dim, mlp_dims, lstm_hidden_dim):
 11 |         super().__init__()
 12 |         self.self_state_dim = self_state_dim
 13 |         self.lstm_hidden_dim = lstm_hidden_dim
 14 |         self.mlp = mlp(self_state_dim + lstm_hidden_dim, mlp_dims)
 15 |         self.lstm = nn.LSTM(input_dim, lstm_hidden_dim, batch_first=True)
 16 | 
 17 |     def forward(self, state_input):
 18 |         """
 19 |         First transform the world coordinates to self-centric coordinates and then do forward computation
 20 | 
 21 |         """
 22 |         if isinstance(state_input, tuple):
 23 |             state, lengths = state_input
 24 |         else:
 25 |             state = state_input
 26 |             lengths = torch.IntTensor([state.size()[1]])
 27 |         self_state = state[:, 0, :self.self_state_dim]
 28 |         packed_sequence = torch.nn.utils.rnn.pack_padded_sequence(state, lengths, batch_first=True)
 29 |         _, (hn, cn) = self.lstm(packed_sequence)
 30 |         hn = hn.squeeze(0)
 31 |         joint_state = torch.cat([self_state, hn], dim=1)
 32 |         value = self.mlp(joint_state)
 33 |         return value
 34 | 
 35 | 
 36 | class ValueNetwork2(nn.Module):
 37 |     def __init__(self, input_dim, self_state_dim, mlp1_dims, mlp_dims, lstm_hidden_dim):
 38 |         super().__init__()
 39 |         self.self_state_dim = self_state_dim
 40 |         self.lstm_hidden_dim = lstm_hidden_dim
 41 |         self.mlp1 = mlp(input_dim, mlp1_dims)
 42 |         self.lstm = nn.LSTM(mlp1_dims[-1], lstm_hidden_dim, batch_first=True)
 43 |         self.mlp = mlp(self_state_dim + lstm_hidden_dim, mlp_dims)
 44 | 
 45 |     def forward(self, state_input):
 46 |         """
 47 |         First transform the world coordinates to self-centric coordinates and then do forward computation
 48 | 
 49 |         :return:
 50 |         """
 51 |         if isinstance(state_input, tuple):
 52 |             state, lengths = state_input
 53 |         else:
 54 |             state = state_input
 55 |             lengths = torch.IntTensor([state.size()[1]])
 56 | 
 57 |         size = state.shape
 58 |         self_state = state[:, 0, :self.self_state_dim]
 59 | 
 60 |         state = torch.reshape(state, (-1, size[2]))
 61 |         mlp1_output = self.mlp1(state)
 62 |         mlp1_output = torch.reshape(mlp1_output, (size[0], size[1], -1))
 63 |         packed_mlp1_output = torch.nn.utils.rnn.pack_padded_sequence(mlp1_output, lengths, batch_first=True)
 64 | 
 65 |         output, (hn, cn) = self.lstm(packed_mlp1_output)
 66 |         hn = hn.squeeze(0)
 67 |         joint_state = torch.cat([self_state, hn], dim=1)
 68 |         value = self.mlp(joint_state)
 69 |         return value
 70 | 
 71 | 
 72 | class LstmRL(MultiHumanRL):
 73 |     def __init__(self):
 74 |         super().__init__()
 75 |         self.name = 'LSTM-RL'
 76 |         self.with_interaction_module = None
 77 | 
 78 |     def configure(self, config):
 79 |         self.set_common_parameters(config)
 80 |         self.with_om = config.lstm_rl.with_om
 81 |         self.multiagent_training = config.lstm_rl.multiagent_training
 82 | 
 83 |         mlp_dims = config.lstm_rl.mlp2_dims
 84 |         global_state_dim = config.lstm_rl.global_state_dim
 85 |         with_interaction_module = config.lstm_rl.with_interaction_module
 86 |         if with_interaction_module:
 87 |             mlp1_dims = config.lstm_rl.mlp1_dims
 88 |             self.model = ValueNetwork2(self.input_dim(), self.self_state_dim, mlp1_dims, mlp_dims, global_state_dim)
 89 |         else:
 90 |             self.model = ValueNetwork1(self.input_dim(), self.self_state_dim, mlp_dims, global_state_dim)
 91 |         logging.info('Policy: {}LSTM-RL {} pairwise interaction module'.format(
 92 |             'OM-' if self.with_om else '', 'w/' if with_interaction_module else 'w/o'))
 93 | 
 94 |     def predict(self, state):
 95 |         """
 96 |         Input state is the joint state of robot concatenated with the observable state of other agents
 97 | 
 98 |         To predict the best action, agent samples actions and propagates one step to see how good the next state is
 99 |         thus the reward function is needed
100 | 
101 |         """
102 | 
103 |         def dist(human):
104 |             # sort human order by decreasing distance to the robot
105 |             return np.linalg.norm(np.array(human.position) - np.array(state.self_state.position))
106 | 
107 |         state.human_states = sorted(state.human_states, key=dist, reverse=True)
108 |         return super().predict(state)
109 | 
110 | 


--------------------------------------------------------------------------------
/crowd_nav/policy/model_predictive_rl.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import torch
  3 | import numpy as np
  4 | from numpy.linalg import norm
  5 | import itertools
  6 | from crowd_sim.envs.policy.policy import Policy
  7 | from crowd_sim.envs.utils.action import ActionRot, ActionXY
  8 | from crowd_sim.envs.utils.state import tensor_to_joint_state
  9 | from crowd_sim.envs.utils.utils import point_to_segment_dist
 10 | from crowd_nav.policy.state_predictor import StatePredictor, LinearStatePredictor
 11 | from crowd_nav.policy.graph_model import RGL
 12 | from crowd_nav.policy.value_estimator import ValueEstimator
 13 | 
 14 | 
 15 | class ModelPredictiveRL(Policy):
 16 |     def __init__(self):
 17 |         super().__init__()
 18 |         self.name = 'ModelPredictiveRL'
 19 |         self.trainable = True
 20 |         self.multiagent_training = True
 21 |         self.kinematics = None
 22 |         self.epsilon = None
 23 |         self.gamma = None
 24 |         self.sampling = None
 25 |         self.speed_samples = None
 26 |         self.rotation_samples = None
 27 |         self.action_space = None
 28 |         self.rotation_constraint = None
 29 |         self.speeds = None
 30 |         self.rotations = None
 31 |         self.action_values = None
 32 |         self.robot_state_dim = 9
 33 |         self.human_state_dim = 5
 34 |         self.v_pref = 1
 35 |         self.share_graph_model = None
 36 |         self.value_estimator = None
 37 |         self.linear_state_predictor = None
 38 |         self.state_predictor = None
 39 |         self.planning_depth = None
 40 |         self.planning_width = None
 41 |         self.do_action_clip = None
 42 |         self.sparse_search = None
 43 |         self.sparse_speed_samples = 2
 44 |         self.sparse_rotation_samples = 8
 45 |         self.action_group_index = []
 46 |         self.traj = None
 47 | 
 48 |     def configure(self, config):
 49 |         self.set_common_parameters(config)
 50 |         self.planning_depth = config.model_predictive_rl.planning_depth
 51 |         self.do_action_clip = config.model_predictive_rl.do_action_clip
 52 |         if hasattr(config.model_predictive_rl, 'sparse_search'):
 53 |             self.sparse_search = config.model_predictive_rl.sparse_search
 54 |         self.planning_width = config.model_predictive_rl.planning_width
 55 |         self.share_graph_model = config.model_predictive_rl.share_graph_model
 56 |         self.linear_state_predictor = config.model_predictive_rl.linear_state_predictor
 57 | 
 58 |         if self.linear_state_predictor:
 59 |             self.state_predictor = LinearStatePredictor(config, self.time_step)
 60 |             graph_model = RGL(config, self.robot_state_dim, self.human_state_dim)
 61 |             self.value_estimator = ValueEstimator(config, graph_model)
 62 |             self.model = [graph_model, self.value_estimator.value_network]
 63 |         else:
 64 |             if self.share_graph_model:
 65 |                 graph_model = RGL(config, self.robot_state_dim, self.human_state_dim)
 66 |                 self.value_estimator = ValueEstimator(config, graph_model)
 67 |                 self.state_predictor = StatePredictor(config, graph_model, self.time_step)
 68 |                 self.model = [graph_model, self.value_estimator.value_network, self.state_predictor.human_motion_predictor]
 69 |             else:
 70 |                 graph_model1 = RGL(config, self.robot_state_dim, self.human_state_dim)
 71 |                 self.value_estimator = ValueEstimator(config, graph_model1)
 72 |                 graph_model2 = RGL(config, self.robot_state_dim, self.human_state_dim)
 73 |                 self.state_predictor = StatePredictor(config, graph_model2, self.time_step)
 74 |                 self.model = [graph_model1, graph_model2, self.value_estimator.value_network,
 75 |                               self.state_predictor.human_motion_predictor]
 76 | 
 77 |         logging.info('Planning depth: {}'.format(self.planning_depth))
 78 |         logging.info('Planning width: {}'.format(self.planning_width))
 79 |         logging.info('Sparse search: {}'.format(self.sparse_search))
 80 | 
 81 |         if self.planning_depth > 1 and not self.do_action_clip:
 82 |             logging.warning('Performing d-step planning without action space clipping!')
 83 | 
 84 |     def set_common_parameters(self, config):
 85 |         self.gamma = config.rl.gamma
 86 |         self.kinematics = config.action_space.kinematics
 87 |         self.sampling = config.action_space.sampling
 88 |         self.speed_samples = config.action_space.speed_samples
 89 |         self.rotation_samples = config.action_space.rotation_samples
 90 |         self.rotation_constraint = config.action_space.rotation_constraint
 91 | 
 92 |     def set_device(self, device):
 93 |         self.device = device
 94 |         for model in self.model:
 95 |             model.to(device)
 96 | 
 97 |     def set_epsilon(self, epsilon):
 98 |         self.epsilon = epsilon
 99 | 
100 |     def set_time_step(self, time_step):
101 |         self.time_step = time_step
102 |         self.state_predictor.time_step = time_step
103 | 
104 |     def get_normalized_gamma(self):
105 |         return pow(self.gamma, self.time_step * self.v_pref)
106 | 
107 |     def get_model(self):
108 |         return self.value_estimator
109 | 
110 |     def get_state_dict(self):
111 |         if self.state_predictor.trainable:
112 |             if self.share_graph_model:
113 |                 return {
114 |                     'graph_model': self.value_estimator.graph_model.state_dict(),
115 |                     'value_network': self.value_estimator.value_network.state_dict(),
116 |                     'motion_predictor': self.state_predictor.human_motion_predictor.state_dict()
117 |                 }
118 |             else:
119 |                 return {
120 |                     'graph_model1': self.value_estimator.graph_model.state_dict(),
121 |                     'graph_model2': self.state_predictor.graph_model.state_dict(),
122 |                     'value_network': self.value_estimator.value_network.state_dict(),
123 |                     'motion_predictor': self.state_predictor.human_motion_predictor.state_dict()
124 |                 }
125 |         else:
126 |             return {
127 |                     'graph_model': self.value_estimator.graph_model.state_dict(),
128 |                     'value_network': self.value_estimator.value_network.state_dict()
129 |                 }
130 | 
131 |     def get_traj(self):
132 |         return self.traj
133 | 
134 |     def load_state_dict(self, state_dict):
135 |         if self.state_predictor.trainable:
136 |             if self.share_graph_model:
137 |                 self.value_estimator.graph_model.load_state_dict(state_dict['graph_model'])
138 |             else:
139 |                 self.value_estimator.graph_model.load_state_dict(state_dict['graph_model1'])
140 |                 self.state_predictor.graph_model.load_state_dict(state_dict['graph_model2'])
141 | 
142 |             self.value_estimator.value_network.load_state_dict(state_dict['value_network'])
143 |             self.state_predictor.human_motion_predictor.load_state_dict(state_dict['motion_predictor'])
144 |         else:
145 |             self.value_estimator.graph_model.load_state_dict(state_dict['graph_model'])
146 |             self.value_estimator.value_network.load_state_dict(state_dict['value_network'])
147 | 
148 |     def save_model(self, file):
149 |         torch.save(self.get_state_dict(), file)
150 | 
151 |     def load_model(self, file):
152 |         checkpoint = torch.load(file)
153 |         self.load_state_dict(checkpoint)
154 | 
155 |     def build_action_space(self, v_pref):
156 |         """
157 |         Action space consists of 25 uniformly sampled actions in permitted range and 25 randomly sampled actions.
158 |         """
159 |         holonomic = True if self.kinematics == 'holonomic' else False
160 |         speeds = [(np.exp((i + 1) / self.speed_samples) - 1) / (np.e - 1) * v_pref for i in range(self.speed_samples)]
161 |         if holonomic:
162 |             rotations = np.linspace(0, 2 * np.pi, self.rotation_samples, endpoint=False)
163 |         else:
164 |             rotations = np.linspace(-self.rotation_constraint, self.rotation_constraint, self.rotation_samples)
165 | 
166 |         action_space = [ActionXY(0, 0) if holonomic else ActionRot(0, 0)]
167 |         for j, speed in enumerate(speeds):
168 |             if j == 0:
169 |                 # index for action (0, 0)
170 |                 self.action_group_index.append(0)
171 |             # only two groups in speeds
172 |             if j < 3:
173 |                 speed_index = 0
174 |             else:
175 |                 speed_index = 1
176 | 
177 |             for i, rotation in enumerate(rotations):
178 |                 rotation_index = i // 2
179 | 
180 |                 action_index = speed_index * self.sparse_rotation_samples + rotation_index
181 |                 self.action_group_index.append(action_index)
182 | 
183 |                 if holonomic:
184 |                     action_space.append(ActionXY(speed * np.cos(rotation), speed * np.sin(rotation)))
185 |                 else:
186 |                     action_space.append(ActionRot(speed, rotation))
187 | 
188 |         self.speeds = speeds
189 |         self.rotations = rotations
190 |         self.action_space = action_space
191 | 
192 |     def predict(self, state):
193 |         """
194 |         A base class for all methods that takes pairwise joint state as input to value network.
195 |         The input to the value network is always of shape (batch_size, # humans, rotated joint state length)
196 | 
197 |         """
198 |         if self.phase is None or self.device is None:
199 |             raise AttributeError('Phase, device attributes have to be set!')
200 |         if self.phase == 'train' and self.epsilon is None:
201 |             raise AttributeError('Epsilon attribute has to be set in training phase')
202 | 
203 |         if self.reach_destination(state):
204 |             return ActionXY(0, 0) if self.kinematics == 'holonomic' else ActionRot(0, 0)
205 |         if self.action_space is None:
206 |             self.build_action_space(state.robot_state.v_pref)
207 | 
208 |         probability = np.random.random()
209 |         if self.phase == 'train' and probability < self.epsilon:
210 |             max_action = self.action_space[np.random.choice(len(self.action_space))]
211 |         else:
212 |             max_action = None
213 |             max_value = float('-inf')
214 |             max_traj = None
215 | 
216 |             if self.do_action_clip:
217 |                 state_tensor = state.to_tensor(add_batch_size=True, device=self.device)
218 |                 action_space_clipped = self.action_clip(state_tensor, self.action_space, self.planning_width)
219 |             else:
220 |                 action_space_clipped = self.action_space
221 | 
222 |             for action in action_space_clipped:
223 |                 state_tensor = state.to_tensor(add_batch_size=True, device=self.device)
224 |                 next_state = self.state_predictor(state_tensor, action)
225 |                 max_next_return, max_next_traj = self.V_planning(next_state, self.planning_depth, self.planning_width)
226 |                 reward_est = self.estimate_reward(state, action)
227 |                 value = reward_est + self.get_normalized_gamma() * max_next_return
228 |                 if value > max_value:
229 |                     max_value = value
230 |                     max_action = action
231 |                     max_traj = [(state_tensor, action, reward_est)] + max_next_traj
232 |             if max_action is None:
233 |                 raise ValueError('Value network is not well trained.')
234 | 
235 |         if self.phase == 'train':
236 |             self.last_state = self.transform(state)
237 |         else:
238 |             self.traj = max_traj
239 | 
240 |         return max_action
241 | 
242 |     def action_clip(self, state, action_space, width, depth=1):
243 |         values = []
244 | 
245 |         for action in action_space:
246 |             next_state_est = self.state_predictor(state, action)
247 |             next_return, _ = self.V_planning(next_state_est, depth, width)
248 |             reward_est = self.estimate_reward(state, action)
249 |             value = reward_est + self.get_normalized_gamma() * next_return
250 |             values.append(value)
251 | 
252 |         if self.sparse_search:
253 |             # self.sparse_speed_samples = 2
254 |             # search in a sparse grained action space
255 |             added_groups = set()
256 |             max_indices = np.argsort(np.array(values))[::-1]
257 |             clipped_action_space = []
258 |             for index in max_indices:
259 |                 if self.action_group_index[index] not in added_groups:
260 |                     clipped_action_space.append(action_space[index])
261 |                     added_groups.add(self.action_group_index[index])
262 |                     if len(clipped_action_space) == width:
263 |                         break
264 |         else:
265 |             max_indexes = np.argpartition(np.array(values), -width)[-width:]
266 |             clipped_action_space = [action_space[i] for i in max_indexes]
267 | 
268 |         # print(clipped_action_space)
269 |         return clipped_action_space
270 | 
271 |     def V_planning(self, state, depth, width):
272 |         """ Plans n steps into future. Computes the value for the current state as well as the trajectories
273 |         defined as a list of (state, action, reward) triples
274 | 
275 |         """
276 | 
277 |         current_state_value = self.value_estimator(state)
278 |         if depth == 1:
279 |             return current_state_value, [(state, None, None)]
280 | 
281 |         if self.do_action_clip:
282 |             action_space_clipped = self.action_clip(state, self.action_space, width)
283 |         else:
284 |             action_space_clipped = self.action_space
285 | 
286 |         returns = []
287 |         trajs = []
288 | 
289 |         for action in action_space_clipped:
290 |             next_state_est = self.state_predictor(state, action)
291 |             reward_est = self.estimate_reward(state, action)
292 |             next_value, next_traj = self.V_planning(next_state_est, depth - 1, self.planning_width)
293 |             return_value = current_state_value / depth + (depth - 1) / depth * (self.get_normalized_gamma() * next_value + reward_est)
294 | 
295 |             returns.append(return_value)
296 |             trajs.append([(state, action, reward_est)] + next_traj)
297 | 
298 |         max_index = np.argmax(returns)
299 |         max_return = returns[max_index]
300 |         max_traj = trajs[max_index]
301 | 
302 |         return max_return, max_traj
303 | 
304 |     def estimate_reward(self, state, action):
305 |         """ If the time step is small enough, it's okay to model agent as linear movement during this period
306 | 
307 |         """
308 |         # collision detection
309 |         if isinstance(state, list) or isinstance(state, tuple):
310 |             state = tensor_to_joint_state(state)
311 |         human_states = state.human_states
312 |         robot_state = state.robot_state
313 | 
314 |         dmin = float('inf')
315 |         collision = False
316 |         for i, human in enumerate(human_states):
317 |             px = human.px - robot_state.px
318 |             py = human.py - robot_state.py
319 |             if self.kinematics == 'holonomic':
320 |                 vx = human.vx - action.vx
321 |                 vy = human.vy - action.vy
322 |             else:
323 |                 vx = human.vx - action.v * np.cos(action.r + robot_state.theta)
324 |                 vy = human.vy - action.v * np.sin(action.r + robot_state.theta)
325 |             ex = px + vx * self.time_step
326 |             ey = py + vy * self.time_step
327 |             # closest distance between boundaries of two agents
328 |             closest_dist = point_to_segment_dist(px, py, ex, ey, 0, 0) - human.radius - robot_state.radius
329 |             if closest_dist < 0:
330 |                 collision = True
331 |                 break
332 |             elif closest_dist < dmin:
333 |                 dmin = closest_dist
334 | 
335 |         # check if reaching the goal
336 |         if self.kinematics == 'holonomic':
337 |             px = robot_state.px + action.vx * self.time_step
338 |             py = robot_state.py + action.vy * self.time_step
339 |         else:
340 |             theta = robot_state.theta + action.r
341 |             px = robot_state.px + np.cos(theta) * action.v * self.time_step
342 |             py = robot_state.py + np.sin(theta) * action.v * self.time_step
343 | 
344 |         end_position = np.array((px, py))
345 |         reaching_goal = norm(end_position - np.array([robot_state.gx, robot_state.gy])) < robot_state.radius
346 | 
347 |         if collision:
348 |             reward = -0.25
349 |         elif reaching_goal:
350 |             reward = 1
351 |         elif dmin < 0.2:
352 |             # adjust the reward based on FPS
353 |             reward = (dmin - 0.2) * 0.5 * self.time_step
354 |         else:
355 |             reward = 0
356 | 
357 |         return reward
358 | 
359 |     def transform(self, state):
360 |         """
361 |         Take the JointState to tensors
362 | 
363 |         :param state:
364 |         :return: tensor of shape (# of agent, len(state))
365 |         """
366 |         robot_state_tensor = torch.Tensor([state.robot_state.to_tuple()]).to(self.device)
367 |         human_states_tensor = torch.Tensor([human_state.to_tuple() for human_state in state.human_states]). \
368 |             to(self.device)
369 | 
370 |         return robot_state_tensor, human_states_tensor
371 | 


--------------------------------------------------------------------------------
/crowd_nav/policy/multi_human_rl.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import logging
  3 | import numpy as np
  4 | from crowd_sim.envs.utils.action import ActionRot, ActionXY
  5 | from crowd_nav.policy.cadrl import CADRL
  6 | 
  7 | 
  8 | class MultiHumanRL(CADRL):
  9 |     def __init__(self):
 10 |         super().__init__()
 11 | 
 12 |     def predict(self, state):
 13 |         """
 14 |         A base class for all methods that takes pairwise joint state as input to value network.
 15 |         The input to the value network is always of shape (batch_size, # humans, rotated joint state length)
 16 | 
 17 |         """
 18 |         if self.phase is None or self.device is None:
 19 |             raise AttributeError('Phase, device attributes have to be set!')
 20 |         if self.phase == 'train' and self.epsilon is None:
 21 |             raise AttributeError('Epsilon attribute has to be set in training phase')
 22 | 
 23 |         if self.reach_destination(state):
 24 |             return ActionXY(0, 0) if self.kinematics == 'holonomic' else ActionRot(0, 0)
 25 |         if self.action_space is None:
 26 |             self.build_action_space(state.robot_state.v_pref)
 27 |         if not state.human_states:
 28 |             assert self.phase != 'train'
 29 |             if hasattr(self, 'attention_weights'):
 30 |                 self.attention_weights = list()
 31 |             return self.select_greedy_action(state.robot_state)
 32 | 
 33 |         occupancy_maps = None
 34 |         probability = np.random.random()
 35 |         if self.phase == 'train' and probability < self.epsilon:
 36 |             max_action = self.action_space[np.random.choice(len(self.action_space))]
 37 |         else:
 38 |             self.action_values = list()
 39 |             max_value = float('-inf')
 40 |             max_action = None
 41 |             for action in self.action_space:
 42 |                 next_robot_state = self.propagate(state.robot_state, action)
 43 |                 if self.query_env:
 44 |                     next_human_states, reward, done, info = self.env.onestep_lookahead(action)
 45 |                 else:
 46 |                     next_human_states = [self.propagate(human_state, ActionXY(human_state.vx, human_state.vy))
 47 |                                          for human_state in state.human_states]
 48 |                     reward = self.compute_reward(next_robot_state, next_human_states)
 49 |                 batch_next_states = torch.cat([torch.Tensor([next_robot_state + next_human_state]).to(self.device)
 50 |                                               for next_human_state in next_human_states], dim=0)
 51 |                 rotated_batch_input = self.rotate(batch_next_states).unsqueeze(0)
 52 |                 if self.with_om:
 53 |                     if occupancy_maps is None:
 54 |                         occupancy_maps = self.build_occupancy_maps(next_human_states).unsqueeze(0)
 55 |                     rotated_batch_input = torch.cat([rotated_batch_input, occupancy_maps], dim=2)
 56 |                 # VALUE UPDATE
 57 |                 next_state_value = self.model(rotated_batch_input).data.item()
 58 |                 value = reward + pow(self.gamma, self.time_step * state.robot_state.v_pref) * next_state_value
 59 |                 self.action_values.append(value)
 60 |                 if value > max_value:
 61 |                     max_value = value
 62 |                     max_action = action
 63 |                     if hasattr(self, 'attention_weights'):
 64 |                         self.attention_weights = self.model.attention_weights
 65 |             if max_action is None:
 66 |                 raise ValueError('Value network is not well trained. ')
 67 | 
 68 |         if self.phase == 'train':
 69 |             self.last_state = self.transform(state)
 70 | 
 71 |         return max_action
 72 | 
 73 |     def compute_reward(self, nav, humans):
 74 |         # collision detection
 75 |         dmin = float('inf')
 76 |         collision = False
 77 |         for i, human in enumerate(humans):
 78 |             dist = np.linalg.norm((nav.px - human.px, nav.py - human.py)) - nav.radius - human.radius
 79 |             if dist < 0:
 80 |                 collision = True
 81 |                 break
 82 |             if dist < dmin:
 83 |                 dmin = dist
 84 | 
 85 |         # check if reaching the goal
 86 |         reaching_goal = np.linalg.norm((nav.px - nav.gx, nav.py - nav.gy)) < nav.radius
 87 |         if collision:
 88 |             reward = -0.25
 89 |         elif reaching_goal:
 90 |             reward = 1
 91 |         elif dmin < 0.2:
 92 |             reward = (dmin - 0.2) * 0.5 * self.time_step
 93 |         else:
 94 |             reward = 0
 95 | 
 96 |         return reward
 97 | 
 98 |     def transform(self, state):
 99 |         """
100 |         Take the state passed from agent and transform it to the input of value network
101 | 
102 |         :param state:
103 |         :return: tensor of shape (# of humans, len(state))
104 |         """
105 |         state_tensor = torch.cat([torch.Tensor([state.robot_state + human_state]).to(self.device)
106 |                                   for human_state in state.human_states], dim=0)
107 |         rotated_state_tensor = self.rotate(state_tensor)
108 |         if self.with_om:
109 |             occupancy_maps = self.build_occupancy_maps(state.human_states)
110 |             rotated_state_tensor = torch.cat([rotated_state_tensor, occupancy_maps], dim=1)
111 | 
112 |         return rotated_state_tensor
113 | 
114 |     def input_dim(self):
115 |         return self.joint_state_dim + (self.cell_num ** 2 * self.om_channel_size if self.with_om else 0)
116 | 
117 |     def build_occupancy_maps(self, human_states):
118 |         """
119 | 
120 |         :param human_states:
121 |         :return: tensor of shape (# human - 1, self.cell_num ** 2)
122 |         """
123 |         occupancy_maps = []
124 |         for human in human_states:
125 |             other_humans = np.concatenate([np.array([(other_human.px, other_human.py, other_human.vx, other_human.vy)])
126 |                                           for other_human in human_states if other_human != human], axis=0)
127 |             other_px = other_humans[:, 0] - human.px
128 |             other_py = other_humans[:, 1] - human.py
129 |             # new x-axis is in the direction of human's velocity
130 |             human_velocity_angle = np.arctan2(human.vy, human.vx)
131 |             other_human_orientation = np.arctan2(other_py, other_px)
132 |             rotation = other_human_orientation - human_velocity_angle
133 |             distance = np.linalg.norm([other_px, other_py], axis=0)
134 |             other_px = np.cos(rotation) * distance
135 |             other_py = np.sin(rotation) * distance
136 | 
137 |             # compute indices of humans in the grid
138 |             other_x_index = np.floor(other_px / self.cell_size + self.cell_num / 2)
139 |             other_y_index = np.floor(other_py / self.cell_size + self.cell_num / 2)
140 |             other_x_index[other_x_index < 0] = float('-inf')
141 |             other_x_index[other_x_index >= self.cell_num] = float('-inf')
142 |             other_y_index[other_y_index < 0] = float('-inf')
143 |             other_y_index[other_y_index >= self.cell_num] = float('-inf')
144 |             grid_indices = self.cell_num * other_y_index + other_x_index
145 |             occupancy_map = np.isin(range(self.cell_num ** 2), grid_indices)
146 |             if self.om_channel_size == 1:
147 |                 occupancy_maps.append([occupancy_map.astype(int)])
148 |             else:
149 |                 # calculate relative velocity for other agents
150 |                 other_human_velocity_angles = np.arctan2(other_humans[:, 3], other_humans[:, 2])
151 |                 rotation = other_human_velocity_angles - human_velocity_angle
152 |                 speed = np.linalg.norm(other_humans[:, 2:4], axis=1)
153 |                 other_vx = np.cos(rotation) * speed
154 |                 other_vy = np.sin(rotation) * speed
155 |                 dm = [list() for _ in range(self.cell_num ** 2 * self.om_channel_size)]
156 |                 for i, index in np.ndenumerate(grid_indices):
157 |                     if index in range(self.cell_num ** 2):
158 |                         if self.om_channel_size == 2:
159 |                             dm[2 * int(index)].append(other_vx[i])
160 |                             dm[2 * int(index) + 1].append(other_vy[i])
161 |                         elif self.om_channel_size == 3:
162 |                             dm[2 * int(index)].append(1)
163 |                             dm[2 * int(index) + 1].append(other_vx[i])
164 |                             dm[2 * int(index) + 2].append(other_vy[i])
165 |                         else:
166 |                             raise NotImplementedError
167 |                 for i, cell in enumerate(dm):
168 |                     dm[i] = sum(dm[i]) / len(dm[i]) if len(dm[i]) != 0 else 0
169 |                 occupancy_maps.append([dm])
170 | 
171 |         return torch.from_numpy(np.concatenate(occupancy_maps, axis=0)).float()
172 | 
173 | 


--------------------------------------------------------------------------------
/crowd_nav/policy/policy_factory.py:
--------------------------------------------------------------------------------
 1 | from crowd_sim.envs.policy.policy_factory import policy_factory
 2 | from crowd_nav.policy.cadrl import CADRL
 3 | from crowd_nav.policy.lstm_rl import LstmRL
 4 | from crowd_nav.policy.sarl import SARL
 5 | from crowd_nav.policy.gcn import GCN
 6 | from crowd_nav.policy.model_predictive_rl import ModelPredictiveRL
 7 | 
 8 | 
 9 | policy_factory['cadrl'] = CADRL
10 | policy_factory['lstm_rl'] = LstmRL
11 | policy_factory['sarl'] = SARL
12 | policy_factory['gcn'] = GCN
13 | policy_factory['model_predictive_rl'] = ModelPredictiveRL
14 | 


--------------------------------------------------------------------------------
/crowd_nav/policy/sarl.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.utils.rnn as rnn_utils
  4 | import logging
  5 | from crowd_nav.policy.cadrl import mlp
  6 | from crowd_nav.policy.multi_human_rl import MultiHumanRL
  7 | 
  8 | 
  9 | class ValueNetwork(nn.Module):
 10 |     def __init__(self, input_dim, self_state_dim, mlp1_dims, mlp2_dims, mlp3_dims, attention_dims, with_global_state,
 11 |                  cell_size, cell_num):
 12 |         super().__init__()
 13 |         self.self_state_dim = self_state_dim
 14 |         self.global_state_dim = mlp1_dims[-1]
 15 |         self.mlp1 = mlp(input_dim, mlp1_dims, last_relu=True)
 16 |         self.mlp2 = mlp(mlp1_dims[-1], mlp2_dims)
 17 |         self.with_global_state = with_global_state
 18 |         if with_global_state:
 19 |             self.attention = mlp(mlp1_dims[-1] * 2, attention_dims)
 20 |         else:
 21 |             self.attention = mlp(mlp1_dims[-1], attention_dims)
 22 |         self.cell_size = cell_size
 23 |         self.cell_num = cell_num
 24 |         mlp3_input_dim = mlp2_dims[-1] + self.self_state_dim
 25 |         self.mlp3 = mlp(mlp3_input_dim, mlp3_dims)
 26 |         self.attention_weights = None
 27 | 
 28 |     def forward(self, state_input):
 29 |         """
 30 |         First transform the world coordinates to self-centric coordinates and then do forward computation
 31 | 
 32 |         :param state_input: tensor of shape (batch_size, # of humans, length of a rotated state)
 33 |         :return:
 34 |         """
 35 |         if isinstance(state_input, tuple):
 36 |             state, lengths = state_input
 37 |         else:
 38 |             state = state_input
 39 |             lengths = torch.IntTensor([state.size()[1]])
 40 | 
 41 |         size = state.shape
 42 |         self_state = state[:, 0, :self.self_state_dim]
 43 |         mlp1_output = self.mlp1(state.reshape((-1, size[2])))
 44 |         mlp2_output = self.mlp2(mlp1_output)
 45 | 
 46 |         if self.with_global_state:
 47 |             # compute attention scores
 48 |             global_state = torch.mean(mlp1_output.view(size[0], size[1], -1), 1, keepdim=True)
 49 |             global_state = global_state.expand((size[0], size[1], self.global_state_dim)).\
 50 |                 contiguous().view(-1, self.global_state_dim)
 51 |             attention_input = torch.cat([mlp1_output, global_state], dim=1)
 52 |         else:
 53 |             attention_input = mlp1_output
 54 |         scores = self.attention(attention_input).view(size[0], size[1], 1).squeeze(dim=2)
 55 | 
 56 |         # masked softmax
 57 |         mask = rnn_utils.pad_sequence([torch.ones(length.item()) for length in lengths], batch_first=True)
 58 |         masked_scores = scores * mask.float()
 59 |         max_scores = torch.max(masked_scores, dim=1, keepdim=True)[0]
 60 |         exps = torch.exp(masked_scores - max_scores)
 61 |         masked_exps = exps * mask.float()
 62 |         masked_sums = masked_exps.sum(1, keepdim=True)
 63 |         weights = (masked_exps / masked_sums).unsqueeze(2)
 64 |         self.attention_weights = weights[0, :, 0].data.cpu().numpy()
 65 | 
 66 |         # output feature is a linear combination of input features
 67 |         features = mlp2_output.view(size[0], size[1], -1)
 68 |         weighted_feature = torch.sum(torch.mul(weights, features), dim=1)
 69 | 
 70 |         # concatenate agent's state with global weighted humans' state
 71 |         joint_state = torch.cat([self_state, weighted_feature], dim=1)
 72 |         value = self.mlp3(joint_state)
 73 |         return value
 74 | 
 75 | 
 76 | class SARL(MultiHumanRL):
 77 |     def __init__(self):
 78 |         super().__init__()
 79 |         self.name = 'SARL'
 80 |         self.attention_weights = None
 81 | 
 82 |     def configure(self, config):
 83 |         self.set_common_parameters(config)
 84 |         self.with_om = config.sarl.with_om
 85 |         self.multiagent_training = config.sarl.multiagent_training
 86 | 
 87 |         mlp1_dims = config.sarl.mlp1_dims
 88 |         mlp2_dims = config.sarl.mlp2_dims
 89 |         mlp3_dims = config.sarl.mlp3_dims
 90 |         attention_dims = config.sarl.attention_dims
 91 |         with_global_state = config.sarl.with_global_state
 92 |         self.model = ValueNetwork(self.input_dim(), self.self_state_dim, mlp1_dims, mlp2_dims, mlp3_dims,
 93 |                                   attention_dims, with_global_state, self.cell_size, self.cell_num)
 94 |         if self.with_om:
 95 |             self.name = 'OM-SARL'
 96 |         logging.info('Policy: {} {} global state'.format(self.name, 'w/' if with_global_state else 'w/o'))
 97 | 
 98 |     def get_attention_weights(self):
 99 |         return self.attention_weights
100 | 


--------------------------------------------------------------------------------
/crowd_nav/policy/state_predictor.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | from crowd_nav.policy.helpers import mlp
  5 | 
  6 | 
  7 | class StatePredictor(nn.Module):
  8 |     def __init__(self, config, graph_model, time_step):
  9 |         """
 10 |         This function predicts the next state given the current state as input.
 11 |         It uses a graph model to encode the state into a latent space and predict each human's next state.
 12 |         """
 13 |         super().__init__()
 14 |         self.trainable = True
 15 |         self.kinematics = config.action_space.kinematics
 16 |         self.graph_model = graph_model
 17 |         self.human_motion_predictor = mlp(config.gcn.X_dim, config.model_predictive_rl.motion_predictor_dims)
 18 |         self.time_step = time_step
 19 | 
 20 |     def forward(self, state, action, detach=False):
 21 |         """ Predict the next state tensor given current state as input.
 22 | 
 23 |         :return: tensor of shape (batch_size, # of agents, feature_size)
 24 |         """
 25 |         assert len(state[0].shape) == 3
 26 |         assert len(state[1].shape) == 3
 27 | 
 28 |         state_embedding = self.graph_model(state)
 29 |         if detach:
 30 |             state_embedding = state_embedding.detach()
 31 |         if action is None:
 32 |             # for training purpose
 33 |             next_robot_state = None
 34 |         else:
 35 |             next_robot_state = self.compute_next_state(state[0], action)
 36 |         next_human_states = self.human_motion_predictor(state_embedding)[:, 1:, :]
 37 | 
 38 |         next_observation = [next_robot_state, next_human_states]
 39 |         return next_observation
 40 | 
 41 |     def compute_next_state(self, robot_state, action):
 42 |         # currently it can not perform parallel computation
 43 |         if robot_state.shape[0] != 1:
 44 |             raise NotImplementedError
 45 | 
 46 |         # px, py, vx, vy, radius, gx, gy, v_pref, theta
 47 |         next_state = robot_state.clone().squeeze()
 48 |         if self.kinematics == 'holonomic':
 49 |             next_state[0] = next_state[0] + action.vx * self.time_step
 50 |             next_state[1] = next_state[1] + action.vy * self.time_step
 51 |             next_state[2] = action.vx
 52 |             next_state[3] = action.vy
 53 |         else:
 54 |             next_state[7] = next_state[7] + action.r
 55 |             next_state[0] = next_state[0] + np.cos(next_state[7]) * action.v * self.time_step
 56 |             next_state[1] = next_state[1] + np.sin(next_state[7]) * action.v * self.time_step
 57 |             next_state[2] = np.cos(next_state[7]) * action.v
 58 |             next_state[3] = np.sin(next_state[7]) * action.v
 59 | 
 60 |         return next_state.unsqueeze(0).unsqueeze(0)
 61 | 
 62 | 
 63 | class LinearStatePredictor(object):
 64 |     def __init__(self, config, time_step):
 65 |         """
 66 |         This function predicts the next state given the current state as input.
 67 |         It uses a graph model to encode the state into a latent space and predict each human's next state.
 68 |         """
 69 |         super().__init__()
 70 |         self.trainable = False
 71 |         self.kinematics = config.action_space.kinematics
 72 |         self.time_step = time_step
 73 | 
 74 |     def __call__(self, state, action):
 75 |         """ Predict the next state tensor given current state as input.
 76 | 
 77 |         :return: tensor of shape (batch_size, # of agents, feature_size)
 78 |         """
 79 |         assert len(state[0].shape) == 3
 80 |         assert len(state[1].shape) == 3
 81 | 
 82 |         next_robot_state = self.compute_next_state(state[0], action)
 83 |         next_human_states = self.linear_motion_approximator(state[1])
 84 | 
 85 |         next_observation = [next_robot_state, next_human_states]
 86 |         return next_observation
 87 | 
 88 |     def compute_next_state(self, robot_state, action):
 89 |         # currently it can not perform parallel computation
 90 |         if robot_state.shape[0] != 1:
 91 |             raise NotImplementedError
 92 | 
 93 |         # px, py, vx, vy, radius, gx, gy, v_pref, theta
 94 |         next_state = robot_state.clone().squeeze()
 95 |         if self.kinematics == 'holonomic':
 96 |             next_state[0] = next_state[0] + action.vx * self.time_step
 97 |             next_state[1] = next_state[1] + action.vy * self.time_step
 98 |             next_state[2] = action.vx
 99 |             next_state[3] = action.vy
100 |         else:
101 |             next_state[7] = next_state[7] + action.r
102 |             next_state[0] = next_state[0] + np.cos(next_state[7]) * action.v * self.time_step
103 |             next_state[1] = next_state[1] + np.sin(next_state[7]) * action.v * self.time_step
104 |             next_state[2] = np.cos(next_state[7]) * action.v
105 |             next_state[3] = np.sin(next_state[7]) * action.v
106 | 
107 |         return next_state.unsqueeze(0).unsqueeze(0)
108 | 
109 |     @staticmethod
110 |     def linear_motion_approximator(human_states):
111 |         """ approximate human states with linear motion, input shape : (batch_size, human_num, human_state_size)
112 |         """
113 |         # px, py, vx, vy, radius
114 |         next_state = human_states.clone().squeeze()
115 |         next_state[:, 0] = next_state[:, 0] + next_state[:, 2]
116 |         next_state[:, 1] = next_state[:, 1] + next_state[:, 3]
117 | 
118 |         return next_state.unsqueeze(0)
119 | 
120 | 


--------------------------------------------------------------------------------
/crowd_nav/policy/value_estimator.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from crowd_nav.policy.helpers import mlp
 3 | 
 4 | 
 5 | class ValueEstimator(nn.Module):
 6 |     def __init__(self, config, graph_model):
 7 |         super().__init__()
 8 |         self.graph_model = graph_model
 9 |         self.value_network = mlp(config.gcn.X_dim, config.model_predictive_rl.value_network_dims)
10 | 
11 |     def forward(self, state):
12 |         """ Embed state into a latent space. Take the first row of the feature matrix as state representation.
13 |         """
14 |         assert len(state[0].shape) == 3
15 |         assert len(state[1].shape) == 3
16 | 
17 |         # only use the feature of robot node as state representation
18 |         state_embedding = self.graph_model(state)[:, 0, :]
19 |         value = self.value_network(state_embedding)
20 |         return value
21 | 


--------------------------------------------------------------------------------
/crowd_nav/test.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import argparse
  3 | import importlib.util
  4 | import os
  5 | import torch
  6 | import numpy as np
  7 | import matplotlib.pyplot as plt
  8 | import gym
  9 | from crowd_nav.utils.explorer import Explorer
 10 | from crowd_nav.policy.policy_factory import policy_factory
 11 | from crowd_sim.envs.utils.robot import Robot
 12 | from crowd_sim.envs.policy.orca import ORCA
 13 | 
 14 | 
 15 | def main(args):
 16 |     # configure logging and device
 17 |     level = logging.DEBUG if args.debug else logging.INFO
 18 |     logging.basicConfig(level=level, format='%(asctime)s, %(levelname)s: %(message)s',
 19 |                         datefmt="%Y-%m-%d %H:%M:%S")
 20 |     device = torch.device("cuda:0" if torch.cuda.is_available() and args.gpu else "cpu")
 21 |     logging.info('Using device: %s', device)
 22 | 
 23 |     if args.model_dir is not None:
 24 |         if args.config is not None:
 25 |             config_file = args.config
 26 |         else:
 27 |             config_file = os.path.join(args.model_dir, 'config.py')
 28 |         if args.il:
 29 |             model_weights = os.path.join(args.model_dir, 'il_model.pth')
 30 |             logging.info('Loaded IL weights')
 31 |         elif args.rl:
 32 |             if os.path.exists(os.path.join(args.model_dir, 'resumed_rl_model.pth')):
 33 |                 model_weights = os.path.join(args.model_dir, 'resumed_rl_model.pth')
 34 |             else:
 35 |                 print(os.listdir(args.model_dir))
 36 |                 model_weights = os.path.join(args.model_dir, sorted(os.listdir(args.model_dir))[-1])
 37 |             logging.info('Loaded RL weights')
 38 |         else:
 39 |             model_weights = os.path.join(args.model_dir, 'best_val.pth')
 40 |             logging.info('Loaded RL weights with best VAL')
 41 | 
 42 |     else:
 43 |         config_file = args.config
 44 | 
 45 |     spec = importlib.util.spec_from_file_location('config', config_file)
 46 |     if spec is None:
 47 |         parser.error('Config file not found.')
 48 |     config = importlib.util.module_from_spec(spec)
 49 |     spec.loader.exec_module(config)
 50 | 
 51 |     # configure policy
 52 |     policy_config = config.PolicyConfig(args.debug)
 53 |     policy = policy_factory[policy_config.name]()
 54 |     if args.planning_depth is not None:
 55 |         policy_config.model_predictive_rl.do_action_clip = True
 56 |         policy_config.model_predictive_rl.planning_depth = args.planning_depth
 57 |     if args.planning_width is not None:
 58 |         policy_config.model_predictive_rl.do_action_clip = True
 59 |         policy_config.model_predictive_rl.planning_width = args.planning_width
 60 |     if args.sparse_search:
 61 |         policy_config.model_predictive_rl.sparse_search = True
 62 | 
 63 |     policy.configure(policy_config)
 64 |     if policy.trainable:
 65 |         if args.model_dir is None:
 66 |             parser.error('Trainable policy must be specified with a model weights directory')
 67 |         policy.load_model(model_weights)
 68 | 
 69 |     # configure environment
 70 |     env_config = config.EnvConfig(args.debug)
 71 | 
 72 |     if args.human_num is not None:
 73 |         env_config.sim.human_num = args.human_num
 74 |     env = gym.make('CrowdSim-v0')
 75 |     env.configure(env_config)
 76 | 
 77 |     if args.square:
 78 |         env.test_scenario = 'square_crossing'
 79 |     if args.circle:
 80 |         env.test_scenario = 'circle_crossing'
 81 |     if args.test_scenario is not None:
 82 |         env.test_scenario = args.test_scenario
 83 | 
 84 |     robot = Robot(env_config, 'robot')
 85 |     env.set_robot(robot)
 86 |     robot.time_step = env.time_step
 87 |     robot.set_policy(policy)
 88 |     explorer = Explorer(env, robot, device, None, gamma=0.9)
 89 | 
 90 |     train_config = config.TrainConfig(args.debug)
 91 |     epsilon_end = train_config.train.epsilon_end
 92 |     if not isinstance(robot.policy, ORCA):
 93 |         robot.policy.set_epsilon(epsilon_end)
 94 | 
 95 |     policy.set_phase(args.phase)
 96 |     policy.set_device(device)
 97 |     # set safety space for ORCA in non-cooperative simulation
 98 |     if isinstance(robot.policy, ORCA):
 99 |         if robot.visible:
100 |             robot.policy.safety_space = args.safety_space
101 |         else:
102 |             robot.policy.safety_space = args.safety_space
103 |         logging.info('ORCA agent buffer: %f', robot.policy.safety_space)
104 | 
105 |     policy.set_env(env)
106 |     robot.print_info()
107 | 
108 |     if args.visualize:
109 |         rewards = []
110 |         ob = env.reset(args.phase, args.test_case)
111 |         done = False
112 |         last_pos = np.array(robot.get_position())
113 |         while not done:
114 |             action = robot.act(ob)
115 |             ob, _, done, info = env.step(action)
116 |             rewards.append(_)
117 |             current_pos = np.array(robot.get_position())
118 |             logging.debug('Speed: %.2f', np.linalg.norm(current_pos - last_pos) / robot.time_step)
119 |             last_pos = current_pos
120 |         gamma = 0.9
121 |         cumulative_reward = sum([pow(gamma, t * robot.time_step * robot.v_pref)
122 |              * reward for t, reward in enumerate(rewards)])
123 | 
124 |         if args.traj:
125 |             env.render('traj', args.video_file)
126 |         else:
127 |             if args.video_dir is not None:
128 |                 if policy_config.name == 'gcn':
129 |                     args.video_file = os.path.join(args.video_dir, policy_config.name + '_' + policy_config.gcn.similarity_function)
130 |                 else:
131 |                     args.video_file = os.path.join(args.video_dir, policy_config.name)
132 |                 args.video_file = args.video_file + '_' + args.phase + '_' + str(args.test_case) + '.mp4'
133 |             env.render('video', args.video_file)
134 |         logging.info('It takes %.2f seconds to finish. Final status is %s, cumulative_reward is %f', env.global_time, info, cumulative_reward)
135 |         if robot.visible and info == 'reach goal':
136 |             human_times = env.get_human_times()
137 |             logging.info('Average time for humans to reach goal: %.2f', sum(human_times) / len(human_times))
138 |     else:
139 |         explorer.run_k_episodes(env.case_size[args.phase], args.phase, print_failure=True)
140 |         if args.plot_test_scenarios_hist:
141 |             test_angle_seeds = np.array(env.test_scene_seeds)
142 |             b = [i * 0.01 for i in range(101)]
143 |             n, bins, patches = plt.hist(test_angle_seeds, b, facecolor='g')
144 |             plt.savefig(os.path.join(args.model_dir, 'test_scene_hist.png'))
145 |             plt.close()
146 | 
147 | 
148 | if __name__ == '__main__':
149 |     parser = argparse.ArgumentParser('Parse configuration file')
150 |     parser.add_argument('--config', type=str, default=None)
151 |     parser.add_argument('--policy', type=str, default='model_predictive_rl')
152 |     parser.add_argument('-m', '--model_dir', type=str, default=None)
153 |     parser.add_argument('--il', default=False, action='store_true')
154 |     parser.add_argument('--rl', default=False, action='store_true')
155 |     parser.add_argument('--gpu', default=False, action='store_true')
156 |     parser.add_argument('-v', '--visualize', default=False, action='store_true')
157 |     parser.add_argument('--phase', type=str, default='test')
158 |     parser.add_argument('-c', '--test_case', type=int, default=None)
159 |     parser.add_argument('--square', default=False, action='store_true')
160 |     parser.add_argument('--circle', default=False, action='store_true')
161 |     parser.add_argument('--video_file', type=str, default=None)
162 |     parser.add_argument('--video_dir', type=str, default=None)
163 |     parser.add_argument('--traj', default=False, action='store_true')
164 |     parser.add_argument('--debug', default=False, action='store_true')
165 |     parser.add_argument('--human_num', type=int, default=None)
166 |     parser.add_argument('--safety_space', type=float, default=0.2)
167 |     parser.add_argument('--test_scenario', type=str, default=None)
168 |     parser.add_argument('--plot_test_scenarios_hist', default=True, action='store_true')
169 |     parser.add_argument('-d', '--planning_depth', type=int, default=None)
170 |     parser.add_argument('-w', '--planning_width', type=int, default=None)
171 |     parser.add_argument('--sparse_search', default=False, action='store_true')
172 | 
173 |     sys_args = parser.parse_args()
174 | 
175 |     main(sys_args)
176 | 


--------------------------------------------------------------------------------
/crowd_nav/tests/test_polices.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from crowd_nav.train import main
 3 | 
 4 | 
 5 | def get_default_args():
 6 |     parser = argparse.ArgumentParser('Parse configuration file')
 7 |     parser.add_argument('--policy', type=str, default='cadrl')
 8 |     parser.add_argument('--config', type=str, default='config.py')
 9 |     parser.add_argument('--output_dir', type=str, default='data/output')
10 |     parser.add_argument('--overwrite', default=False, action='store_true')
11 |     parser.add_argument('--weights', type=str)
12 |     parser.add_argument('--resume', default=False, action='store_true')
13 |     parser.add_argument('--gpu', default=False, action='store_true')
14 |     parser.add_argument('--debug', default=False, action='store_true')
15 |     args = parser.parse_args(args=[])
16 |     return args
17 | 
18 | 
19 | def test_sarl():
20 |     args = get_default_args()
21 |     args.policy = 'sarl'
22 |     args.config = 'configs/tests_config.py'
23 |     args.debug = True
24 |     args.overwrite = True
25 |     main(args)
26 | 
27 | 
28 | def test_cadrl():
29 |     args = get_default_args()
30 |     args.policy = 'cadrl'
31 |     args.config = 'configs/tests_config.py'
32 |     args.debug = True
33 |     args.overwrite = True
34 |     main(args)
35 | 
36 | 
37 | def test_lstm_rl():
38 |     args = get_default_args()
39 |     args.policy = 'lstm_rl'
40 |     args.config = 'configs/tests_config.py'
41 |     args.debug = True
42 |     args.overwrite = True
43 |     main(args)
44 | 
45 | 
46 | def test_gcn():
47 |     args = get_default_args()
48 |     args.policy = 'gcn'
49 |     args.config = 'configs/tests_config.py'
50 |     args.debug = True
51 |     args.overwrite = True
52 |     main(args)
53 | 


--------------------------------------------------------------------------------
/crowd_nav/train.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import logging
  3 | import argparse
  4 | import os
  5 | import shutil
  6 | import importlib.util
  7 | import torch
  8 | import gym
  9 | import copy
 10 | import git
 11 | import re
 12 | from tensorboardX import SummaryWriter
 13 | from crowd_sim.envs.utils.robot import Robot
 14 | from crowd_nav.utils.trainer import VNRLTrainer, MPRLTrainer
 15 | from crowd_nav.utils.memory import ReplayMemory
 16 | from crowd_nav.utils.explorer import Explorer
 17 | from crowd_nav.policy.policy_factory import policy_factory
 18 | 
 19 | 
 20 | def set_random_seeds(seed):
 21 |     """
 22 |     Sets the random seeds for pytorch cpu and gpu
 23 |     """
 24 |     torch.manual_seed(seed)
 25 |     torch.cuda.manual_seed_all(seed)
 26 |     return None
 27 | 
 28 | 
 29 | def main(args):
 30 |     set_random_seeds(args.randomseed)
 31 |     # configure paths
 32 |     make_new_dir = True
 33 |     if os.path.exists(args.output_dir):
 34 |         if args.overwrite:
 35 |             shutil.rmtree(args.output_dir)
 36 |         else:
 37 |             key = input('Output directory already exists! Overwrite the folder? (y/n)')
 38 |             if key == 'y' and not args.resume:
 39 |                 shutil.rmtree(args.output_dir)
 40 |             else:
 41 |                 make_new_dir = False
 42 |     if make_new_dir:
 43 |         os.makedirs(args.output_dir)
 44 |         shutil.copy(args.config, os.path.join(args.output_dir, 'config.py'))
 45 | 
 46 |         # # insert the arguments from command line to the config file
 47 |         # with open(os.path.join(args.output_dir, 'config.py'), 'r') as fo:
 48 |         #     config_text = fo.read()
 49 |         # search_pairs = {r"gcn.X_dim = \d*": "gcn.X_dim = {}".format(args.X_dim),
 50 |         #                 r"gcn.num_layer = \d": "gcn.num_layer = {}".format(args.layers),
 51 |         #                 r"gcn.similarity_function = '\w*'": "gcn.similarity_function = '{}'".format(args.sim_func),
 52 |         #                 r"gcn.layerwise_graph = \w*": "gcn.layerwise_graph = {}".format(args.layerwise_graph),
 53 |         #                 r"gcn.skip_connection = \w*": "gcn.skip_connection = {}".format(args.skip_connection)}
 54 |         #
 55 |         # for find, replace in search_pairs.items():
 56 |         #     config_text = re.sub(find, replace, config_text)
 57 |         #
 58 |         # with open(os.path.join(args.output_dir, 'config.py'), 'w') as fo:
 59 |         #     fo.write(config_text)
 60 | 
 61 |     args.config = os.path.join(args.output_dir, 'config.py')
 62 |     log_file = os.path.join(args.output_dir, 'output.log')
 63 |     il_weight_file = os.path.join(args.output_dir, 'il_model.pth')
 64 |     rl_weight_file = os.path.join(args.output_dir, 'rl_model.pth')
 65 | 
 66 |     spec = importlib.util.spec_from_file_location('config', args.config)
 67 |     if spec is None:
 68 |         parser.error('Config file not found.')
 69 |     config = importlib.util.module_from_spec(spec)
 70 |     spec.loader.exec_module(config)
 71 | 
 72 |     # configure logging
 73 |     mode = 'a' if args.resume else 'w'
 74 |     file_handler = logging.FileHandler(log_file, mode=mode)
 75 |     stdout_handler = logging.StreamHandler(sys.stdout)
 76 |     level = logging.INFO if not args.debug else logging.DEBUG
 77 |     logging.basicConfig(level=level, handlers=[stdout_handler, file_handler],
 78 |                         format='%(asctime)s, %(levelname)s: %(message)s', datefmt="%Y-%m-%d %H:%M:%S")
 79 |     repo = git.Repo(search_parent_directories=True)
 80 |     logging.info('Current git head hash code: {}'.format(repo.head.object.hexsha))
 81 |     logging.info('Current config content is :{}'.format(config))
 82 |     device = torch.device("cuda:0" if torch.cuda.is_available() and args.gpu else "cpu")
 83 |     logging.info('Using device: %s', device)
 84 |     writer = SummaryWriter(log_dir=args.output_dir)
 85 | 
 86 |     # configure policy
 87 |     policy_config = config.PolicyConfig()
 88 |     policy = policy_factory[policy_config.name]()
 89 |     if not policy.trainable:
 90 |         parser.error('Policy has to be trainable')
 91 |     policy.configure(policy_config)
 92 |     policy.set_device(device)
 93 | 
 94 |     # configure environment
 95 |     env_config = config.EnvConfig(args.debug)
 96 |     env = gym.make('CrowdSim-v0')
 97 |     env.configure(env_config)
 98 |     robot = Robot(env_config, 'robot')
 99 |     robot.time_step = env.time_step
100 |     env.set_robot(robot)
101 | 
102 |     # read training parameters
103 |     train_config = config.TrainConfig(args.debug)
104 |     rl_learning_rate = train_config.train.rl_learning_rate
105 |     train_batches = train_config.train.train_batches
106 |     train_episodes = train_config.train.train_episodes
107 |     sample_episodes = train_config.train.sample_episodes
108 |     target_update_interval = train_config.train.target_update_interval
109 |     evaluation_interval = train_config.train.evaluation_interval
110 |     capacity = train_config.train.capacity
111 |     epsilon_start = train_config.train.epsilon_start
112 |     epsilon_end = train_config.train.epsilon_end
113 |     epsilon_decay = train_config.train.epsilon_decay
114 |     checkpoint_interval = train_config.train.checkpoint_interval
115 | 
116 |     # configure trainer and explorer
117 |     memory = ReplayMemory(capacity)
118 |     model = policy.get_model()
119 |     batch_size = train_config.trainer.batch_size
120 |     optimizer = train_config.trainer.optimizer
121 |     if policy_config.name == 'model_predictive_rl':
122 |         trainer = MPRLTrainer(model, policy.state_predictor, memory, device, policy, writer, batch_size, optimizer, env.human_num,
123 |                               reduce_sp_update_frequency=train_config.train.reduce_sp_update_frequency,
124 |                               freeze_state_predictor=train_config.train.freeze_state_predictor,
125 |                               detach_state_predictor=train_config.train.detach_state_predictor,
126 |                               share_graph_model=policy_config.model_predictive_rl.share_graph_model)
127 |     else:
128 |         trainer = VNRLTrainer(model, memory, device, policy, batch_size, optimizer, writer)
129 |     explorer = Explorer(env, robot, device, writer, memory, policy.gamma, target_policy=policy)
130 | 
131 |     # imitation learning
132 |     if args.resume:
133 |         if not os.path.exists(rl_weight_file):
134 |             logging.error('RL weights does not exist')
135 |         model.load_state_dict(torch.load(rl_weight_file))
136 |         rl_weight_file = os.path.join(args.output_dir, 'resumed_rl_model.pth')
137 |         logging.info('Load reinforcement learning trained weights. Resume training')
138 |     elif os.path.exists(il_weight_file):
139 |         model.load_state_dict(torch.load(il_weight_file))
140 |         logging.info('Load imitation learning trained weights.')
141 |     else:
142 |         il_episodes = train_config.imitation_learning.il_episodes
143 |         il_policy = train_config.imitation_learning.il_policy
144 |         il_epochs = train_config.imitation_learning.il_epochs
145 |         il_learning_rate = train_config.imitation_learning.il_learning_rate
146 |         trainer.set_learning_rate(il_learning_rate)
147 |         if robot.visible:
148 |             safety_space = 0
149 |         else:
150 |             safety_space = train_config.imitation_learning.safety_space
151 |         il_policy = policy_factory[il_policy]()
152 |         il_policy.multiagent_training = policy.multiagent_training
153 |         il_policy.safety_space = safety_space
154 |         robot.set_policy(il_policy)
155 |         explorer.run_k_episodes(il_episodes, 'train', update_memory=True, imitation_learning=True)
156 |         trainer.optimize_epoch(il_epochs)
157 |         policy.save_model(il_weight_file)
158 |         logging.info('Finish imitation learning. Weights saved.')
159 |         logging.info('Experience set size: %d/%d', len(memory), memory.capacity)
160 | 
161 |     trainer.update_target_model(model)
162 | 
163 |     # reinforcement learning
164 |     policy.set_env(env)
165 |     robot.set_policy(policy)
166 |     robot.print_info()
167 |     trainer.set_learning_rate(rl_learning_rate)
168 |     # fill the memory pool with some RL experience
169 |     if args.resume:
170 |         robot.policy.set_epsilon(epsilon_end)
171 |         explorer.run_k_episodes(100, 'train', update_memory=True, episode=0)
172 |         logging.info('Experience set size: %d/%d', len(memory), memory.capacity)
173 |     episode = 0
174 |     best_val_reward = -1
175 |     best_val_model = None
176 |     # evaluate the model after imitation learning
177 | 
178 |     if episode % evaluation_interval == 0:
179 |         logging.info('Evaluate the model instantly after imitation learning on the validation cases')
180 |         explorer.run_k_episodes(env.case_size['val'], 'val', episode=episode)
181 |         explorer.log('val', episode // evaluation_interval)
182 | 
183 |         if args.test_after_every_eval:
184 |             explorer.run_k_episodes(env.case_size['test'], 'test', episode=episode, print_failure=True)
185 |             explorer.log('test', episode // evaluation_interval)
186 | 
187 |     episode = 0
188 |     while episode < train_episodes:
189 |         if args.resume:
190 |             epsilon = epsilon_end
191 |         else:
192 |             if episode < epsilon_decay:
193 |                 epsilon = epsilon_start + (epsilon_end - epsilon_start) / epsilon_decay * episode
194 |             else:
195 |                 epsilon = epsilon_end
196 |         robot.policy.set_epsilon(epsilon)
197 | 
198 |         # sample k episodes into memory and optimize over the generated memory
199 |         explorer.run_k_episodes(sample_episodes, 'train', update_memory=True, episode=episode)
200 |         explorer.log('train', episode)
201 | 
202 |         trainer.optimize_batch(train_batches, episode)
203 |         episode += 1
204 | 
205 |         if episode % target_update_interval == 0:
206 |             trainer.update_target_model(model)
207 |         # evaluate the model
208 |         if episode % evaluation_interval == 0:
209 |             _, _, _, reward, _ = explorer.run_k_episodes(env.case_size['val'], 'val', episode=episode)
210 |             explorer.log('val', episode // evaluation_interval)
211 | 
212 |             if episode % checkpoint_interval == 0 and reward > best_val_reward:
213 |                 best_val_reward = reward
214 |                 best_val_model = copy.deepcopy(policy.get_state_dict())
215 |         # test after every evaluation to check how the generalization performance evolves
216 |             if args.test_after_every_eval:
217 |                 explorer.run_k_episodes(env.case_size['test'], 'test', episode=episode, print_failure=True)
218 |                 explorer.log('test', episode // evaluation_interval)
219 | 
220 |         if episode != 0 and episode % checkpoint_interval == 0:
221 |             current_checkpoint = episode // checkpoint_interval - 1
222 |             save_every_checkpoint_rl_weight_file = rl_weight_file.split('.')[0] + '_' + str(current_checkpoint) + '.pth'
223 |             policy.save_model(save_every_checkpoint_rl_weight_file)
224 | 
225 |     # # test with the best val model
226 |     if best_val_model is not None:
227 |         policy.load_state_dict(best_val_model)
228 |         torch.save(best_val_model, os.path.join(args.output_dir, 'best_val.pth'))
229 |         logging.info('Save the best val model with the reward: {}'.format(best_val_reward))
230 |     explorer.run_k_episodes(env.case_size['test'], 'test', episode=episode, print_failure=True)
231 | 
232 | 
233 | if __name__ == '__main__':
234 |     parser = argparse.ArgumentParser('Parse configuration file')
235 |     parser.add_argument('--policy', type=str, default='model_predictive_rl')
236 |     parser.add_argument('--config', type=str, default='configs/icra_benchmark/mp_separate.py')
237 |     parser.add_argument('--output_dir', type=str, default='data/output')
238 |     parser.add_argument('--overwrite', default=False, action='store_true')
239 |     parser.add_argument('--weights', type=str)
240 |     parser.add_argument('--resume', default=False, action='store_true')
241 |     parser.add_argument('--gpu', default=False, action='store_true')
242 |     parser.add_argument('--debug', default=False, action='store_true')
243 |     parser.add_argument('--test_after_every_eval', default=False, action='store_true')
244 |     parser.add_argument('--randomseed', type=int, default=17)
245 | 
246 |     # arguments for GCN
247 |     # parser.add_argument('--X_dim', type=int, default=32)
248 |     # parser.add_argument('--layers', type=int, default=2)
249 |     # parser.add_argument('--sim_func', type=str, default='embedded_gaussian')
250 |     # parser.add_argument('--layerwise_graph', default=False, action='store_true')
251 |     # parser.add_argument('--skip_connection', default=True, action='store_true')
252 | 
253 |     sys_args = parser.parse_args()
254 | 
255 |     main(sys_args)
256 | 


--------------------------------------------------------------------------------
/crowd_nav/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChanganVR/RelationalGraphLearning/8e87aa5ed8221efd688f8e6857ba4c38637bf6e1/crowd_nav/utils/__init__.py


--------------------------------------------------------------------------------
/crowd_nav/utils/explorer.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import logging
  3 | import copy
  4 | import torch
  5 | from tqdm import tqdm
  6 | from crowd_sim.envs.utils.info import *
  7 | 
  8 | 
  9 | class Explorer(object):
 10 |     def __init__(self, env, robot, device, writer, memory=None, gamma=None, target_policy=None):
 11 |         self.env = env
 12 |         self.robot = robot
 13 |         self.device = device
 14 |         self.writer = writer
 15 |         self.memory = memory
 16 |         self.gamma = gamma
 17 |         self.target_policy = target_policy
 18 |         self.statistics = None
 19 | 
 20 |     # @profile
 21 |     def run_k_episodes(self, k, phase, update_memory=False, imitation_learning=False, episode=None, epoch=None,
 22 |                        print_failure=False):
 23 |         self.robot.policy.set_phase(phase)
 24 |         success_times = []
 25 |         collision_times = []
 26 |         timeout_times = []
 27 |         success = 0
 28 |         collision = 0
 29 |         timeout = 0
 30 |         discomfort = 0
 31 |         min_dist = []
 32 |         cumulative_rewards = []
 33 |         average_returns = []
 34 |         collision_cases = []
 35 |         timeout_cases = []
 36 | 
 37 |         if k != 1:
 38 |             pbar = tqdm(total=k)
 39 |         else:
 40 |             pbar = None
 41 | 
 42 |         for i in range(k):
 43 |             ob = self.env.reset(phase)
 44 |             done = False
 45 |             states = []
 46 |             actions = []
 47 |             rewards = []
 48 |             while not done:
 49 |                 action = self.robot.act(ob)
 50 |                 ob, reward, done, info = self.env.step(action)
 51 |                 states.append(self.robot.policy.last_state)
 52 |                 actions.append(action)
 53 |                 rewards.append(reward)
 54 | 
 55 |                 if isinstance(info, Discomfort):
 56 |                     discomfort += 1
 57 |                     min_dist.append(info.min_dist)
 58 | 
 59 |             if isinstance(info, ReachGoal):
 60 |                 success += 1
 61 |                 success_times.append(self.env.global_time)
 62 |             elif isinstance(info, Collision):
 63 |                 collision += 1
 64 |                 collision_cases.append(i)
 65 |                 collision_times.append(self.env.global_time)
 66 |             elif isinstance(info, Timeout):
 67 |                 timeout += 1
 68 |                 timeout_cases.append(i)
 69 |                 timeout_times.append(self.env.time_limit)
 70 |             else:
 71 |                 raise ValueError('Invalid end signal from environment')
 72 | 
 73 |             if update_memory:
 74 |                 if isinstance(info, ReachGoal) or isinstance(info, Collision):
 75 |                     # only add positive(success) or negative(collision) experience in experience set
 76 |                     self.update_memory(states, actions, rewards, imitation_learning)
 77 | 
 78 |             cumulative_rewards.append(sum([pow(self.gamma, t * self.robot.time_step * self.robot.v_pref)
 79 |                                            * reward for t, reward in enumerate(rewards)]))
 80 |             returns = []
 81 |             for step in range(len(rewards)):
 82 |                 step_return = sum([pow(self.gamma, t * self.robot.time_step * self.robot.v_pref)
 83 |                                    * reward for t, reward in enumerate(rewards[step:])])
 84 |                 returns.append(step_return)
 85 |             average_returns.append(average(returns))
 86 | 
 87 |             if pbar:
 88 |                 pbar.update(1)
 89 |         success_rate = success / k
 90 |         collision_rate = collision / k
 91 |         assert success + collision + timeout == k
 92 |         avg_nav_time = sum(success_times) / len(success_times) if success_times else self.env.time_limit
 93 | 
 94 |         extra_info = '' if episode is None else 'in episode {} '.format(episode)
 95 |         extra_info = extra_info + '' if epoch is None else extra_info + ' in epoch {} '.format(epoch)
 96 |         logging.info('{:<5} {}has success rate: {:.2f}, collision rate: {:.2f}, nav time: {:.2f}, total reward: {:.4f},'
 97 |                      ' average return: {:.4f}'. format(phase.upper(), extra_info, success_rate, collision_rate,
 98 |                                                        avg_nav_time, average(cumulative_rewards),
 99 |                                                        average(average_returns)))
100 |         if phase in ['val', 'test']:
101 |             total_time = sum(success_times + collision_times + timeout_times)
102 |             logging.info('Frequency of being in danger: %.2f and average min separate distance in danger: %.2f',
103 |                          discomfort / total_time, average(min_dist))
104 | 
105 |         if print_failure:
106 |             logging.info('Collision cases: ' + ' '.join([str(x) for x in collision_cases]))
107 |             logging.info('Timeout cases: ' + ' '.join([str(x) for x in timeout_cases]))
108 | 
109 |         self.statistics = success_rate, collision_rate, avg_nav_time, average(cumulative_rewards), average(average_returns)
110 | 
111 |         return self.statistics
112 | 
113 |     def update_memory(self, states, actions, rewards, imitation_learning=False):
114 |         if self.memory is None or self.gamma is None:
115 |             raise ValueError('Memory or gamma value is not set!')
116 |         
117 |         for i, state in enumerate(states[:-1]):
118 |             reward = rewards[i]
119 | 
120 |             # VALUE UPDATE
121 |             if imitation_learning:
122 |                 # define the value of states in IL as cumulative discounted rewards, which is the same in RL
123 |                 state = self.target_policy.transform(state)
124 |                 next_state = self.target_policy.transform(states[i+1])
125 |                 value = sum([pow(self.gamma, (t - i) * self.robot.time_step * self.robot.v_pref) * reward *
126 |                              (1 if t >= i else 0) for t, reward in enumerate(rewards)])
127 |             else:
128 |                 next_state = states[i+1]
129 |                 if i == len(states) - 1:
130 |                     # terminal state
131 |                     value = reward
132 |                 else:
133 |                     value = 0
134 |             value = torch.Tensor([value]).to(self.device)
135 |             reward = torch.Tensor([rewards[i]]).to(self.device)
136 | 
137 |             if self.target_policy.name == 'ModelPredictiveRL':
138 |                 self.memory.push((state[0], state[1], value, reward, next_state[0], next_state[1]))
139 |             else:
140 |                 self.memory.push((state, value, reward, next_state))
141 | 
142 |     def log(self, tag_prefix, global_step):
143 |         sr, cr, time, reward, avg_return = self.statistics
144 |         self.writer.add_scalar(tag_prefix + '/success_rate', sr, global_step)
145 |         self.writer.add_scalar(tag_prefix + '/collision_rate', cr, global_step)
146 |         self.writer.add_scalar(tag_prefix + '/time', time, global_step)
147 |         self.writer.add_scalar(tag_prefix + '/reward', reward, global_step)
148 |         self.writer.add_scalar(tag_prefix + '/avg_return', avg_return, global_step)
149 | 
150 | 
151 | def average(input_list):
152 |     if input_list:
153 |         return sum(input_list) / len(input_list)
154 |     else:
155 |         return 0
156 | 


--------------------------------------------------------------------------------
/crowd_nav/utils/memory.py:
--------------------------------------------------------------------------------
 1 | from torch.utils.data import Dataset
 2 | 
 3 | 
 4 | class ReplayMemory(Dataset):
 5 |     def __init__(self, capacity):
 6 |         self.capacity = capacity
 7 |         self.memory = list()
 8 |         self.position = 0
 9 | 
10 |     def push(self, item):
11 |         # replace old experience with new experience
12 |         if len(self.memory) < self.position + 1:
13 |             self.memory.append(item)
14 |         else:
15 |             self.memory[self.position] = item
16 |         self.position = (self.position + 1) % self.capacity
17 | 
18 |     def is_full(self):
19 |         return len(self.memory) == self.capacity
20 | 
21 |     def __getitem__(self, item):
22 |         return self.memory[item]
23 | 
24 |     def __len__(self):
25 |         return len(self.memory)
26 | 
27 |     def clear(self):
28 |         self.memory = list()
29 | 


--------------------------------------------------------------------------------
/crowd_nav/utils/plot.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import argparse
  3 | import os
  4 | import matplotlib.pyplot as plt
  5 | import numpy as np
  6 | 
  7 | 
  8 | def running_mean(x, n):
  9 |     cumsum = np.cumsum(np.insert(x, 0, 0))
 10 |     return (cumsum[n:] - cumsum[:-n]) / float(n)
 11 | 
 12 | 
 13 | def main():
 14 |     parser = argparse.ArgumentParser()
 15 |     parser.add_argument('log_files', type=str, nargs='+')
 16 |     parser.add_argument('--plot_sr', default=False, action='store_true')
 17 |     parser.add_argument('--plot_cr', default=False, action='store_true')
 18 |     parser.add_argument('--plot_time', default=False, action='store_true')
 19 |     parser.add_argument('--plot_reward', default=True, action='store_true')
 20 |     parser.add_argument('--plot_train', default=True, action='store_true')
 21 |     parser.add_argument('--plot_val', default=False, action='store_true')
 22 |     parser.add_argument('--plot_all', default=False, action='store_true')
 23 |     parser.add_argument('--window_size', type=int, default=100)
 24 |     args = parser.parse_args()
 25 | 
 26 |     models = []
 27 |     max_episodes = None
 28 | 
 29 |     ax1 = ax2 = ax3 = ax4 = None
 30 |     ax1_legends = []
 31 |     ax2_legends = []
 32 |     ax3_legends = []
 33 |     ax4_legends = []
 34 | 
 35 |     if args.plot_all:
 36 |         log_dir = args.log_files[0]
 37 |         if not os.path.isdir(log_dir):
 38 |             parser.error('Input argument should be the directory containing all experiment folders')
 39 |         # args.log_files = [os.path.join(log_dir, exp_dir, 'output.log') for exp_dir in os.listdir(log_dir)]
 40 |         args.log_files = [os.path.join(log_dir, exp_dir, 'output.log') for exp_dir in
 41 |                           ['sarl_linear_adam', 'mp_detach_skip', 'rgl_linear_adam', 'rgl_no_transformation',
 42 |                            'mp_separate_graph']]
 43 | 
 44 |     args.log_files = sorted(args.log_files)
 45 |     if not models:
 46 |         models = [os.path.basename(log_file[:-11]) for log_file in args.log_files]
 47 |     for i, log_file in enumerate(args.log_files):
 48 |         with open(log_file, 'r') as file:
 49 |             log = file.read()
 50 | 
 51 |         val_pattern = r"VAL   in episode (?P<episode>\d+) has success rate: (?P<sr>[0-1].\d+), " \
 52 |                       r"collision rate: (?P<cr>[0-1].\d+), nav time: (?P<time>\d+.\d+), " \
 53 |                       r"total reward: (?P<reward>[-+]?\d+.\d+)"
 54 |         val_episode = []
 55 |         val_sr = []
 56 |         val_cr = []
 57 |         val_time = []
 58 |         val_reward = []
 59 |         for r in re.findall(val_pattern, log):
 60 |             val_episode.append(int(r[0]))
 61 |             val_sr.append(float(r[1]))
 62 |             val_cr.append(float(r[2]))
 63 |             val_time.append(float(r[3]))
 64 |             val_reward.append(float(r[4]))
 65 | 
 66 |         train_pattern = r"TRAIN in episode (?P<episode>\d+)  in epoch 0 has success rate: (?P<sr>[0-1].\d+), " \
 67 |                         r"collision rate: (?P<cr>[0-1].\d+), nav time: (?P<time>\d+.\d+), " \
 68 |                         r"total reward: (?P<reward>[-+]?\d+.\d+)"
 69 |         train_episode = []
 70 |         train_sr = []
 71 |         train_cr = []
 72 |         train_time = []
 73 |         train_reward = []
 74 |         for r in re.findall(train_pattern, log):
 75 |             train_episode.append(int(r[0]))
 76 |             train_sr.append(float(r[1]))
 77 |             train_cr.append(float(r[2]))
 78 |             train_time.append(float(r[3]))
 79 |             train_reward.append(float(r[4]))
 80 |         if max_episodes is not None:
 81 |             train_episode = train_episode[:max_episodes]
 82 |             train_sr = train_sr[:max_episodes]
 83 |             train_cr = train_cr[:max_episodes]
 84 |             train_time = train_time[:max_episodes]
 85 |             train_reward = train_reward[:max_episodes]
 86 | 
 87 |         # smooth training plot
 88 |         train_sr_smooth = running_mean(train_sr, args.window_size)
 89 |         train_cr_smooth = running_mean(train_cr, args.window_size)
 90 |         train_time_smooth = running_mean(train_time, args.window_size)
 91 |         train_reward_smooth = running_mean(train_reward, args.window_size)
 92 | 
 93 |         # plot sr
 94 |         if args.plot_sr:
 95 |             if ax1 is None:
 96 |                 _, ax1 = plt.subplots()
 97 |             if args.plot_train:
 98 |                 ax1.plot(range(len(train_sr_smooth)), train_sr_smooth)
 99 |                 ax1_legends.append(models[i])
100 |             if args.plot_val:
101 |                 ax1.plot(val_episode, val_sr)
102 |                 ax1_legends.append(models[i])
103 | 
104 |             ax1.legend(ax1_legends)
105 |             ax1.set_xlabel('Episodes')
106 |             ax1.set_ylabel('Success Rate')
107 |             ax1.set_title('Success rate')
108 | 
109 |         # plot time
110 |         if args.plot_time:
111 |             if ax2 is None:
112 |                 _, ax2 = plt.subplots()
113 |             if args.plot_train:
114 |                 ax2.plot(range(len(train_time_smooth)), train_time_smooth)
115 |                 ax2_legends.append(models[i])
116 |             if args.plot_val:
117 |                 ax2.plot(val_episode, val_time)
118 |                 ax2_legends.append(models[i])
119 | 
120 |             ax2.legend(ax2_legends)
121 |             ax2.set_xlabel('Episodes')
122 |             ax2.set_ylabel('Time(s)')
123 |             ax2.set_title("Robot's Time to Reach Goal")
124 | 
125 |         # plot cr
126 |         if args.plot_cr:
127 |             if ax3 is None:
128 |                 _, ax3 = plt.subplots()
129 |             if args.plot_train:
130 |                 ax3.plot(range(len(train_cr_smooth)), train_cr_smooth)
131 |                 ax3_legends.append(models[i])
132 |             if args.plot_val:
133 |                 ax3.plot(val_episode, val_cr)
134 |                 ax3_legends.append(models[i])
135 | 
136 |             ax3.legend(ax3_legends)
137 |             ax3.set_xlabel('Episodes')
138 |             ax3.set_ylabel('Collision Rate')
139 |             ax3.set_title('Collision Rate')
140 | 
141 |         # plot reward
142 |         if args.plot_reward:
143 |             if ax4 is None:
144 |                 _, ax4 = plt.subplots()
145 |             if args.plot_train:
146 |                 ax4.plot(range(len(train_reward_smooth)), train_reward_smooth)
147 |                 ax4_legends.append(models[i])
148 |             if args.plot_val:
149 |                 ax4.plot(val_episode, val_reward)
150 |                 ax4_legends.append(models[i])
151 | 
152 |         if args.plot_sr:
153 |             ax1.legend(ax1_legends)
154 |             ax1.set_title('Success rate')
155 | 
156 |         if args.plot_time:
157 |             ax2.legend(ax2_legends)
158 |             ax2.set_title("robot's Time to Reach Goal")
159 | 
160 |         if args.plot_cr:
161 |             ax3.legend(ax3_legends)
162 |             ax3.set_title('Collision Rate')
163 | 
164 |         if args.plot_reward:
165 |             # ax4.legend(ax4_legends, loc='center left', bbox_to_anchor=(1, 0.5))
166 |             ax4.legend(ax4_legends)
167 |             ax4.set_xlabel('Episodes')
168 |             ax4.set_ylabel('Reward')
169 |             plt.tick_params(axis='both', which='major')
170 |             plt.subplots_adjust(left=0.15, right=0.9, top=0.9, bottom=0.125)
171 |             # ax4.set_xlabel('xlabel', fontsize=18)
172 |             # ax4.set_ylabel('ylabel', fontsize=16)
173 |             # ax4.set_title('Cumulative Discounted Reward')
174 | 
175 |     plt.show()
176 | 
177 | 
178 | if __name__ == '__main__':
179 |     main()
180 | 


--------------------------------------------------------------------------------
/crowd_nav/utils/plot2.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import argparse
  3 | import os
  4 | import pprint
  5 | import matplotlib.pyplot as plt
  6 | import numpy as np
  7 | import pandas as pd
  8 | import seaborn as sns
  9 | sns.set(style='darkgrid')
 10 | 
 11 | 
 12 | def running_mean(x, n):
 13 |     cumsum = np.cumsum(np.insert(x, 0, 0))
 14 |     return (cumsum[n:] - cumsum[:-n]) / float(n)
 15 | 
 16 | 
 17 | def main():
 18 |     parser = argparse.ArgumentParser()
 19 |     parser.add_argument('log_files', type=str, nargs='+')
 20 |     parser.add_argument('--plot_sr', default=False, action='store_true')
 21 |     parser.add_argument('--plot_cr', default=False, action='store_true')
 22 |     parser.add_argument('--plot_time', default=False, action='store_true')
 23 |     parser.add_argument('--plot_reward', default=True, action='store_true')
 24 |     parser.add_argument('--plot_train', default=True, action='store_true')
 25 |     parser.add_argument('--plot_val', default=False, action='store_true')
 26 |     parser.add_argument('--plot_all', default=False, action='store_true')
 27 |     parser.add_argument('--window_size', type=int, default=50)
 28 |     parser.add_argument('--models', type=str, default=None)
 29 |     args = parser.parse_args()
 30 |     args.models = args.models.split(',')
 31 | 
 32 |     max_episodes = None
 33 |     _, ax4 = plt.subplots()
 34 | 
 35 |     if args.plot_all:
 36 |         log_dir = args.log_files[0]
 37 |         if not os.path.isdir(log_dir):
 38 |             parser.error('Input argument should be the directory containing all experiment folders')
 39 |         args.log_files = [os.path.join(log_dir, exp_dir, 'output.log') for exp_dir in os.listdir(log_dir)]
 40 | 
 41 |     args.log_files = sorted(args.log_files)
 42 | 
 43 |     if args.models:
 44 |         models = args.models
 45 |     else:
 46 |         models = [os.path.basename(log_file[:-11]) for log_file in args.log_files]
 47 |     models_dict = {}
 48 |     for model in models:
 49 |         models_dict[model] = list()
 50 | 
 51 |     df = pd.DataFrame(columns=['Step', 'Reward', 'Model'])
 52 |     for i, log_file in enumerate(args.log_files):
 53 |         # skip models not present
 54 |         model = None
 55 |         for m in models:
 56 |             if m in log_file:
 57 |                 model = m
 58 |                 models_dict[model].append(log_file)
 59 |                 break
 60 |         if model is None:
 61 |             continue
 62 | 
 63 |         with open(log_file, 'r') as file:
 64 |             log = file.read()
 65 | 
 66 |         # val_pattern = r"VAL   in episode (?P<episode>\d+) has success rate: (?P<sr>[0-1].\d+), " \
 67 |         #               r"collision rate: (?P<cr>[0-1].\d+), nav time: (?P<time>\d+.\d+), " \
 68 |         #               r"total reward: (?P<reward>[-+]?\d+.\d+)"
 69 |         # val_episode = []
 70 |         # val_sr = []
 71 |         # val_cr = []
 72 |         # val_time = []
 73 |         # val_reward = []
 74 |         # for r in re.findall(val_pattern, log):
 75 |         #     val_episode.append(int(r[0]))
 76 |         #     val_sr.append(float(r[1]))
 77 |         #     val_cr.append(float(r[2]))
 78 |         #     val_time.append(float(r[3]))
 79 |         #     val_reward.append(float(r[4]))
 80 | 
 81 |         train_pattern = r"TRAIN in episode (?P<episode>\d+)  in epoch (?P<epoch>\d+) has success rate: (?P<sr>[0-1].\d+), " \
 82 |                         r"collision rate: (?P<cr>[0-1].\d+), nav time: (?P<time>\d+.\d+), " \
 83 |                         r"total reward: (?P<reward>[-+]?\d+.\d+), average return: (?P<return>[-+]?\d+.\d+)"
 84 |         train_episode = []
 85 |         train_sr = []
 86 |         train_cr = []
 87 |         train_time = []
 88 |         train_reward = []
 89 |         train_avg_return = []
 90 |         for r in re.findall(train_pattern, log):
 91 |             train_episode.append(int(r[0]))
 92 |             train_sr.append(float(r[2]))
 93 |             train_cr.append(float(r[3]))
 94 |             train_time.append(float(r[4]))
 95 |             train_reward.append(float(r[5]))
 96 |             train_avg_return.append(float(r[6]))
 97 |         if max_episodes is not None:
 98 |             train_episode = train_episode[:max_episodes]
 99 |             train_sr = train_sr[:max_episodes]
100 |             train_cr = train_cr[:max_episodes]
101 |             train_time = train_time[:max_episodes]
102 |             train_reward = train_reward[:max_episodes]
103 |             train_avg_return = train_avg_return[:max_episodes]
104 | 
105 |         # # smooth training plot
106 |         # train_sr_smooth = running_mean(train_sr, args.window_size)
107 |         # train_cr_smooth = running_mean(train_cr, args.window_size)
108 |         # train_time_smooth = running_mean(train_time, args.window_size)
109 |         train_reward_smooth = running_mean(train_reward, args.window_size)
110 |         train_avg_return_smooth = running_mean(train_avg_return, args.window_size)
111 | 
112 |         smoothed_length = len(train_reward_smooth)
113 |         df = df.append(pd.DataFrame({'Step': range(smoothed_length), 'Reward': train_reward_smooth,
114 |                                      'Model': [model] * smoothed_length}), ignore_index=True)
115 | 
116 |         # smoothed_length = len(train_reward_smooth)
117 |         # df = df.append(pd.DataFrame({'Step': range(smoothed_length), 'Reward': train_avg_return_smooth,
118 |         #                              'Model': [model] * smoothed_length}), ignore_index=True)
119 | 
120 |     pp = pprint.PrettyPrinter(indent=4)
121 |     pp.pprint(models_dict)
122 | 
123 |     sns.lineplot(x="Step", y='Reward', data=df, hue='Model', ax=ax4)
124 | 
125 |     plt.tick_params(axis='both', which='major')
126 |     plt.subplots_adjust(left=0.15, right=0.9, top=0.9, bottom=0.125)
127 | 
128 |     plt.show()
129 | 
130 | 
131 | if __name__ == '__main__':
132 |     main()
133 | 


--------------------------------------------------------------------------------
/crowd_nav/utils/trainer.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import abc
  3 | import copy
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.optim as optim
  7 | from torch.utils.data import DataLoader
  8 | 
  9 | 
 10 | class MPRLTrainer(object):
 11 |     def __init__(self, value_estimator, state_predictor, memory, device, policy, writer, batch_size, optimizer_str, human_num,
 12 |                  reduce_sp_update_frequency, freeze_state_predictor, detach_state_predictor, share_graph_model):
 13 |         """
 14 |         Train the trainable model of a policy
 15 |         """
 16 |         self.value_estimator = value_estimator
 17 |         self.state_predictor = state_predictor
 18 |         self.device = device
 19 |         self.writer = writer
 20 |         self.target_policy = policy
 21 |         self.target_model = None
 22 |         self.criterion = nn.MSELoss().to(device)
 23 |         self.memory = memory
 24 |         self.data_loader = None
 25 |         self.batch_size = batch_size
 26 |         self.optimizer_str = optimizer_str
 27 |         self.reduce_sp_update_frequency = reduce_sp_update_frequency
 28 |         self.state_predictor_update_interval = human_num
 29 |         self.freeze_state_predictor = freeze_state_predictor
 30 |         self.detach_state_predictor = detach_state_predictor
 31 |         self.share_graph_model = share_graph_model
 32 |         self.v_optimizer = None
 33 |         self.s_optimizer = None
 34 | 
 35 |         # for value update
 36 |         self.gamma = 0.9
 37 |         self.time_step = 0.25
 38 |         self.v_pref = 1
 39 | 
 40 |     def update_target_model(self, target_model):
 41 |         self.target_model = copy.deepcopy(target_model)
 42 | 
 43 |     def set_learning_rate(self, learning_rate):
 44 |         if self.optimizer_str == 'Adam':
 45 |             self.v_optimizer = optim.Adam(self.value_estimator.parameters(), lr=learning_rate)
 46 |             if self.state_predictor.trainable:
 47 |                 self.s_optimizer = optim.Adam(self.state_predictor.parameters(), lr=learning_rate)
 48 |         elif self.optimizer_str == 'SGD':
 49 |             self.v_optimizer = optim.SGD(self.value_estimator.parameters(), lr=learning_rate, momentum=0.9)
 50 |             if self.state_predictor.trainable:
 51 |                 self.s_optimizer = optim.SGD(self.state_predictor.parameters(), lr=learning_rate)
 52 |         else:
 53 |             raise NotImplementedError
 54 | 
 55 |         if self.state_predictor.trainable:
 56 |             logging.info('Lr: {} for parameters {} with {} optimizer'.format(learning_rate, ' '.join(
 57 |                 [name for name, param in list(self.value_estimator.named_parameters()) +
 58 |                  list(self.state_predictor.named_parameters())]), self.optimizer_str))
 59 |         else:
 60 |             logging.info('Lr: {} for parameters {} with {} optimizer'.format(learning_rate, ' '.join(
 61 |                 [name for name, param in list(self.value_estimator.named_parameters())]), self.optimizer_str))
 62 | 
 63 |     def optimize_epoch(self, num_epochs):
 64 |         if self.v_optimizer is None:
 65 |             raise ValueError('Learning rate is not set!')
 66 |         if self.data_loader is None:
 67 |             self.data_loader = DataLoader(self.memory, self.batch_size, shuffle=True)
 68 | 
 69 |         for epoch in range(num_epochs):
 70 |             epoch_v_loss = 0
 71 |             epoch_s_loss = 0
 72 |             logging.debug('{}-th epoch starts'.format(epoch))
 73 | 
 74 |             update_counter = 0
 75 |             for data in self.data_loader:
 76 |                 robot_states, human_states, values, _, _, next_human_states = data
 77 | 
 78 |                 # optimize value estimator
 79 |                 self.v_optimizer.zero_grad()
 80 |                 outputs = self.value_estimator((robot_states, human_states))
 81 |                 values = values.to(self.device)
 82 |                 loss = self.criterion(outputs, values)
 83 |                 loss.backward()
 84 |                 self.v_optimizer.step()
 85 |                 epoch_v_loss += loss.data.item()
 86 | 
 87 |                 # optimize state predictor
 88 |                 if self.state_predictor.trainable:
 89 |                     update_state_predictor = True
 90 |                     if update_counter % self.state_predictor_update_interval != 0:
 91 |                         update_state_predictor = False
 92 | 
 93 |                     if update_state_predictor:
 94 |                         self.s_optimizer.zero_grad()
 95 |                         _, next_human_states_est = self.state_predictor((robot_states, human_states), None)
 96 |                         loss = self.criterion(next_human_states_est, next_human_states)
 97 |                         loss.backward()
 98 |                         self.s_optimizer.step()
 99 |                         epoch_s_loss += loss.data.item()
100 |                     update_counter += 1
101 | 
102 |             logging.debug('{}-th epoch ends'.format(epoch))
103 |             self.writer.add_scalar('IL/epoch_v_loss', epoch_v_loss / len(self.memory), epoch)
104 |             self.writer.add_scalar('IL/epoch_s_loss', epoch_s_loss / len(self.memory), epoch)
105 |             logging.info('Average loss in epoch %d: %.2E, %.2E', epoch, epoch_v_loss / len(self.memory),
106 |                          epoch_s_loss / len(self.memory))
107 | 
108 |         return
109 | 
110 |     def optimize_batch(self, num_batches, episode):
111 |         if self.v_optimizer is None:
112 |             raise ValueError('Learning rate is not set!')
113 |         if self.data_loader is None:
114 |             self.data_loader = DataLoader(self.memory, self.batch_size, shuffle=True)
115 |         v_losses = 0
116 |         s_losses = 0
117 |         batch_count = 0
118 |         for data in self.data_loader:
119 |             robot_states, human_states, _, rewards, next_robot_states, next_human_states = data
120 | 
121 |             # optimize value estimator
122 |             self.v_optimizer.zero_grad()
123 |             outputs = self.value_estimator((robot_states, human_states))
124 | 
125 |             gamma_bar = pow(self.gamma, self.time_step * self.v_pref)
126 |             target_values = rewards + gamma_bar * self.target_model((next_robot_states, next_human_states))
127 | 
128 |             # values = values.to(self.device)
129 |             loss = self.criterion(outputs, target_values)
130 |             loss.backward()
131 |             self.v_optimizer.step()
132 |             v_losses += loss.data.item()
133 | 
134 |             # optimize state predictor
135 |             if self.state_predictor.trainable:
136 |                 update_state_predictor = True
137 |                 if self.freeze_state_predictor:
138 |                     update_state_predictor = False
139 |                 elif self.reduce_sp_update_frequency and batch_count % self.state_predictor_update_interval == 0:
140 |                     update_state_predictor = False
141 | 
142 |                 if update_state_predictor:
143 |                     self.s_optimizer.zero_grad()
144 |                     _, next_human_states_est = self.state_predictor((robot_states, human_states), None,
145 |                                                                     detach=self.detach_state_predictor)
146 |                     loss = self.criterion(next_human_states_est, next_human_states)
147 |                     loss.backward()
148 |                     self.s_optimizer.step()
149 |                     s_losses += loss.data.item()
150 | 
151 |             batch_count += 1
152 |             if batch_count > num_batches:
153 |                 break
154 | 
155 |         average_v_loss = v_losses / num_batches
156 |         average_s_loss = s_losses / num_batches
157 |         logging.info('Average loss : %.2E, %.2E', average_v_loss, average_s_loss)
158 |         self.writer.add_scalar('RL/average_v_loss', average_v_loss, episode)
159 |         self.writer.add_scalar('RL/average_s_loss', average_s_loss, episode)
160 | 
161 |         return average_v_loss, average_s_loss
162 | 
163 | 
164 | class VNRLTrainer(object):
165 |     def __init__(self, model, memory, device, policy, batch_size, optimizer_str, writer):
166 |         """
167 |         Train the trainable model of a policy
168 |         """
169 |         self.model = model
170 |         self.device = device
171 |         self.policy = policy
172 |         self.target_model = None
173 |         self.criterion = nn.MSELoss().to(device)
174 |         self.memory = memory
175 |         self.data_loader = None
176 |         self.batch_size = batch_size
177 |         self.optimizer_str = optimizer_str
178 |         self.optimizer = None
179 |         self.writer = writer
180 | 
181 |         # for value update
182 |         self.gamma = 0.9
183 |         self.time_step = 0.25
184 |         self.v_pref = 1
185 | 
186 |     def update_target_model(self, target_model):
187 |         self.target_model = copy.deepcopy(target_model)
188 | 
189 |     def set_learning_rate(self, learning_rate):
190 |         if self.optimizer_str == 'Adam':
191 |             self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
192 |         elif self.optimizer_str == 'SGD':
193 |             self.optimizer = optim.SGD(self.model.parameters(), lr=learning_rate, momentum=0.9)
194 |         else:
195 |             raise NotImplementedError
196 |         logging.info('Lr: {} for parameters {} with {} optimizer'.format(learning_rate, ' '.join(
197 |             [name for name, param in self.model.named_parameters()]), self.optimizer_str))
198 | 
199 |     def optimize_epoch(self, num_epochs):
200 |         if self.optimizer is None:
201 |             raise ValueError('Learning rate is not set!')
202 |         if self.data_loader is None:
203 |             self.data_loader = DataLoader(self.memory, self.batch_size, shuffle=True, collate_fn=pad_batch)
204 |         average_epoch_loss = 0
205 |         for epoch in range(num_epochs):
206 |             epoch_loss = 0
207 |             logging.debug('{}-th epoch starts'.format(epoch))
208 |             for data in self.data_loader:
209 |                 inputs, values, _, _ = data
210 |                 self.optimizer.zero_grad()
211 |                 outputs = self.model(inputs)
212 |                 values = values.to(self.device)
213 |                 loss = self.criterion(outputs, values)
214 |                 loss.backward()
215 |                 self.optimizer.step()
216 |                 epoch_loss += loss.data.item()
217 |             logging.debug('{}-th epoch ends'.format(epoch))
218 |             average_epoch_loss = epoch_loss / len(self.memory)
219 |             self.writer.add_scalar('IL/average_epoch_loss', average_epoch_loss, epoch)
220 |             logging.info('Average loss in epoch %d: %.2E', epoch, average_epoch_loss)
221 | 
222 |         return average_epoch_loss
223 | 
224 |     def optimize_batch(self, num_batches, episode=None):
225 |         if self.optimizer is None:
226 |             raise ValueError('Learning rate is not set!')
227 |         if self.data_loader is None:
228 |             self.data_loader = DataLoader(self.memory, self.batch_size, shuffle=True, collate_fn=pad_batch)
229 |         losses = 0
230 |         batch_count = 0
231 |         for data in self.data_loader:
232 |             inputs, _, rewards, next_states = data
233 |             self.optimizer.zero_grad()
234 |             outputs = self.model(inputs)
235 | 
236 |             gamma_bar = pow(self.gamma, self.time_step * self.v_pref)
237 |             target_values = rewards + gamma_bar * self.target_model(next_states)
238 | 
239 |             loss = self.criterion(outputs, target_values)
240 |             loss.backward()
241 |             self.optimizer.step()
242 |             losses += loss.data.item()
243 |             batch_count += 1
244 |             if batch_count > num_batches:
245 |                 break
246 | 
247 |         average_loss = losses / num_batches
248 |         logging.info('Average loss : %.2E', average_loss)
249 | 
250 |         return average_loss
251 | 
252 | 
253 | def pad_batch(batch):
254 |     """
255 |     args:
256 |         batch - list of (tensor, label)
257 |     return:
258 |         xs - a tensor of all examples in 'batch' after padding
259 |         ys - a LongTensor of all labels in batch
260 |     """
261 |     def sort_states(position):
262 |         # sort the sequences in the decreasing order of length
263 |         sequences = sorted([x[position] for x in batch], reverse=True, key=lambda t: t.size()[0])
264 |         packed_sequences = torch.nn.utils.rnn.pack_sequence(sequences)
265 |         return torch.nn.utils.rnn.pad_packed_sequence(packed_sequences, batch_first=True)
266 | 
267 |     states = sort_states(0)
268 |     values = torch.cat([x[1] for x in batch]).unsqueeze(1)
269 |     rewards = torch.cat([x[2] for x in batch]).unsqueeze(1)
270 |     next_states = sort_states(3)
271 | 
272 |     return states, values, rewards, next_states
273 | 


--------------------------------------------------------------------------------
/crowd_sim/README.md:
--------------------------------------------------------------------------------
 1 | # Simulation Framework
 2 | ## Environment
 3 | The environment contains n+1 agents. N of them are humans controlled by certain unknown
 4 | policy. The other is robot and it's controlled by one known policy.
 5 | The environment is built on top of OpenAI gym library, and has implemented two abstract methods.
 6 | * reset(): the environment will reset positions for all the agents and return observation 
 7 | for robot. Observation for one agent is the observable states of all other agents.
 8 | * step(action): taking action of the robot as input, the environment computes observation
 9 | for each agent and call agent.act(observation) to get actions of agents. Then environment detects
10 | whether there is a collision between agents. If not, the states of agents will be updated. Then 
11 | observation, reward, done will be returned.
12 | 
13 | 
14 | ## Agent
15 | Agent is a base class, and has two derived class of human and robot. Agent class holds
16 | all the physical properties of an agent, including position, velocity, orientation, policy and etc.
17 | * visibility: humans are always visible, but robot can be set to be visible or invisible
18 | * sensor: can be either visual input or coordinate input
19 | * kinematics: can be either holonomic (move in any direction) or unicycle (has rotation constraints)
20 | * act(observation): transform observation to state and pass it to policy
21 | 
22 | 
23 | ## Policy
24 | Policy takes state as input and output an action. Current available policies:
25 | * ORCA: compute collision-free velocity under the reciprocal assumption
26 | * CADRL: learn a value network to predict the value of a state and during inference it predicts action for the most important human
27 | * LSTM-RL: use lstm to encode the human states into one fixed-length vector
28 | * SARL: use pairwise interaction module to model human-robot interaction and use self-attention to aggregate humans' information
29 | * OM-SARL: extend SARL by encoding intra-human interaction with a local map
30 | 
31 | 
32 | ## State
33 | There are multiple definition of states in different cases. The state of an agent representing all
34 | the knowledge of environments is defined as JointState, and it's different from the state of the whole environment.
35 | * ObservableState: position, velocity, radius of one agent
36 | * FullState: position, velocity, radius, goal position, preferred velocity, rotation
37 | * DualState: concatenation of one agent's full state and one another agent's observable state
38 | * JoinState: concatenation of one agent's full state and all other agents' observable states 
39 | 
40 | 
41 | ## Action
42 | There are two types of actions depending on what kinematics constraint the agent has.
43 | * ActionXY: (vx, vy) if kinematics == 'holonomic'
44 | * ActionRot: (velocity, rotation angle) if kinematics == 'unicycle'


--------------------------------------------------------------------------------
/crowd_sim/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.registration import register
2 | 
3 | register(
4 |     id='CrowdSim-v0',
5 |     entry_point='crowd_sim.envs:CrowdSim',
6 | )
7 | 


--------------------------------------------------------------------------------
/crowd_sim/envs/__init__.py:
--------------------------------------------------------------------------------
1 | from .crowd_sim import CrowdSim
2 | 


--------------------------------------------------------------------------------
/crowd_sim/envs/crowd_sim.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import random
  3 | import math
  4 | 
  5 | import gym
  6 | import matplotlib.lines as mlines
  7 | from matplotlib import patches
  8 | import numpy as np
  9 | from numpy.linalg import norm
 10 | 
 11 | from crowd_sim.envs.policy.policy_factory import policy_factory
 12 | from crowd_sim.envs.utils.state import tensor_to_joint_state, JointState
 13 | from crowd_sim.envs.utils.action import ActionRot
 14 | from crowd_sim.envs.utils.human import Human
 15 | from crowd_sim.envs.utils.info import *
 16 | from crowd_sim.envs.utils.utils import point_to_segment_dist
 17 | 
 18 | 
 19 | class CrowdSim(gym.Env):
 20 |     metadata = {'render.modes': ['human']}
 21 | 
 22 |     def __init__(self):
 23 |         """
 24 |         Movement simulation for n+1 agents
 25 |         Agent can either be human or robot.
 26 |         humans are controlled by a unknown and fixed policy.
 27 |         robot is controlled by a known and learnable policy.
 28 | 
 29 |         """
 30 |         self.time_limit = None
 31 |         self.time_step = None
 32 |         self.robot = None
 33 |         self.humans = None
 34 |         self.global_time = None
 35 |         self.robot_sensor_range = None
 36 |         # reward function
 37 |         self.success_reward = None
 38 |         self.collision_penalty = None
 39 |         self.discomfort_dist = None
 40 |         self.discomfort_penalty_factor = None
 41 |         # simulation configuration
 42 |         self.config = None
 43 |         self.case_capacity = None
 44 |         self.case_size = None
 45 |         self.case_counter = None
 46 |         self.randomize_attributes = None
 47 |         self.train_val_scenario = None
 48 |         self.test_scenario = None
 49 |         self.current_scenario = None
 50 |         self.square_width = None
 51 |         self.circle_radius = None
 52 |         self.human_num = None
 53 |         self.nonstop_human = None
 54 |         self.centralized_planning = None
 55 |         self.centralized_planner = None
 56 | 
 57 |         # for visualization
 58 |         self.states = None
 59 |         self.action_values = None
 60 |         self.attention_weights = None
 61 |         self.robot_actions = None
 62 |         self.rewards = None
 63 |         self.As = None
 64 |         self.Xs = None
 65 |         self.feats = None
 66 |         self.trajs = list()
 67 |         self.panel_width = 10
 68 |         self.panel_height = 10
 69 |         self.panel_scale = 1
 70 |         self.test_scene_seeds = []
 71 |         self.dynamic_human_num = []
 72 |         self.human_starts = []
 73 |         self.human_goals = []
 74 | 
 75 |         self.phase = None
 76 | 
 77 |     def configure(self, config):
 78 |         self.config = config
 79 |         self.time_limit = config.env.time_limit
 80 |         self.time_step = config.env.time_step
 81 |         self.randomize_attributes = config.env.randomize_attributes
 82 |         self.robot_sensor_range = config.env.robot_sensor_range
 83 |         self.success_reward = config.reward.success_reward
 84 |         self.collision_penalty = config.reward.collision_penalty
 85 |         self.discomfort_dist = config.reward.discomfort_dist
 86 |         self.discomfort_penalty_factor = config.reward.discomfort_penalty_factor
 87 |         self.case_capacity = {'train': np.iinfo(np.uint32).max - 2000, 'val': 1000, 'test': 1000}
 88 |         self.case_size = {'train': config.env.train_size, 'val': config.env.val_size,
 89 |                           'test': config.env.test_size}
 90 |         self.train_val_scenario = config.sim.train_val_scenario
 91 |         self.test_scenario = config.sim.test_scenario
 92 |         self.square_width = config.sim.square_width
 93 |         self.circle_radius = config.sim.circle_radius
 94 |         self.human_num = config.sim.human_num
 95 | 
 96 |         self.nonstop_human = config.sim.nonstop_human
 97 |         self.centralized_planning = config.sim.centralized_planning
 98 |         self.case_counter = {'train': 0, 'test': 0, 'val': 0}
 99 | 
100 |         human_policy = config.humans.policy
101 |         if self.centralized_planning:
102 |             if human_policy == 'socialforce':
103 |                 logging.warning('Current socialforce policy only works in decentralized way with visible robot!')
104 |             self.centralized_planner = policy_factory['centralized_' + human_policy]()
105 | 
106 |         logging.info('human number: {}'.format(self.human_num))
107 |         if self.randomize_attributes:
108 |             logging.info("Randomize human's radius and preferred speed")
109 |         else:
110 |             logging.info("Not randomize human's radius and preferred speed")
111 |         logging.info('Training simulation: {}, test simulation: {}'.format(self.train_val_scenario, self.test_scenario))
112 |         logging.info('Square width: {}, circle width: {}'.format(self.square_width, self.circle_radius))
113 | 
114 |     def set_robot(self, robot):
115 |         self.robot = robot
116 | 
117 |     def generate_human(self, human=None):
118 |         if human is None:
119 |             human = Human(self.config, 'humans')
120 |         if self.randomize_attributes:
121 |             human.sample_random_attributes()
122 | 
123 |         if self.current_scenario == 'circle_crossing':
124 |             while True:
125 |                 angle = np.random.random() * np.pi * 2
126 |                 # add some noise to simulate all the possible cases robot could meet with human
127 |                 px_noise = (np.random.random() - 0.5) * human.v_pref
128 |                 py_noise = (np.random.random() - 0.5) * human.v_pref
129 |                 px = self.circle_radius * np.cos(angle) + px_noise
130 |                 py = self.circle_radius * np.sin(angle) + py_noise
131 |                 collide = False
132 |                 for agent in [self.robot] + self.humans:
133 |                     min_dist = human.radius + agent.radius + self.discomfort_dist
134 |                     if norm((px - agent.px, py - agent.py)) < min_dist or \
135 |                             norm((px - agent.gx, py - agent.gy)) < min_dist:
136 |                         collide = True
137 |                         break
138 |                 if not collide:
139 |                     break
140 |             human.set(px, py, -px, -py, 0, 0, 0)
141 | 
142 |         elif self.current_scenario == 'square_crossing':
143 |             if np.random.random() > 0.5:
144 |                 sign = -1
145 |             else:
146 |                 sign = 1
147 |             while True:
148 |                 px = np.random.random() * self.square_width * 0.5 * sign
149 |                 py = (np.random.random() - 0.5) * self.square_width
150 |                 collide = False
151 |                 for agent in [self.robot] + self.humans:
152 |                     if norm((px - agent.px, py - agent.py)) < human.radius + agent.radius + self.discomfort_dist:
153 |                         collide = True
154 |                         break
155 |                 if not collide:
156 |                     break
157 |             while True:
158 |                 gx = np.random.random() * self.square_width * 0.5 * - sign
159 |                 gy = (np.random.random() - 0.5) * self.square_width
160 |                 collide = False
161 |                 for agent in [self.robot] + self.humans:
162 |                     if norm((gx - agent.gx, gy - agent.gy)) < human.radius + agent.radius + self.discomfort_dist:
163 |                         collide = True
164 |                         break
165 |                 if not collide:
166 |                     break
167 |             human.set(px, py, gx, gy, 0, 0, 0)
168 | 
169 |         return human
170 | 
171 |     def reset(self, phase='test', test_case=None):
172 |         """
173 |         Set px, py, gx, gy, vx, vy, theta for robot and humans
174 |         :return:
175 |         """
176 |         assert phase in ['train', 'val', 'test']
177 |         self.phase = phase
178 |         if self.robot is None:
179 |             raise AttributeError('Robot has to be set!')
180 | 
181 |         if test_case is not None:
182 |             self.case_counter[phase] = test_case
183 |         self.global_time = 0
184 | 
185 |         base_seed = {'train': self.case_capacity['val'] + self.case_capacity['test'],
186 |                      'val': 0, 'test': self.case_capacity['val']}
187 | 
188 |         self.robot.set(0, -self.circle_radius, 0, self.circle_radius, 0, 0, np.pi / 2)
189 |         if self.case_counter[phase] >= 0:
190 |             np.random.seed(base_seed[phase] + self.case_counter[phase])
191 |             random.seed(base_seed[phase] + self.case_counter[phase])
192 |             if phase == 'test':
193 |                 logging.debug('current test seed is:{}'.format(base_seed[phase] + self.case_counter[phase]))
194 |             if not self.robot.policy.multiagent_training and phase in ['train', 'val']:
195 |                 # only CADRL trains in circle crossing simulation
196 |                 human_num = 1
197 |                 self.current_scenario = 'circle_crossing'
198 |             else:
199 |                 self.current_scenario = self.test_scenario
200 |                 human_num = self.human_num
201 |             self.humans = []
202 |             for _ in range(human_num):
203 |                 self.humans.append(self.generate_human())
204 | 
205 |             # case_counter is always between 0 and case_size[phase]
206 |             self.case_counter[phase] = (self.case_counter[phase] + 1) % self.case_size[phase]
207 |         else:
208 |             assert phase == 'test'
209 |             if self.case_counter[phase] == -1:
210 |                 # for debugging purposes
211 |                 self.human_num = 3
212 |                 self.humans = [Human(self.config, 'humans') for _ in range(self.human_num)]
213 |                 self.humans[0].set(0, -6, 0, 5, 0, 0, np.pi / 2)
214 |                 self.humans[1].set(-5, -5, -5, 5, 0, 0, np.pi / 2)
215 |                 self.humans[2].set(5, -5, 5, 5, 0, 0, np.pi / 2)
216 |             else:
217 |                 raise NotImplementedError
218 |         for agent in [self.robot] + self.humans:
219 |             agent.time_step = self.time_step
220 |             agent.policy.time_step = self.time_step
221 | 
222 |         if self.centralized_planning:
223 |             self.centralized_planner.time_step = self.time_step
224 | 
225 |         self.states = list()
226 |         self.robot_actions = list()
227 |         self.rewards = list()
228 |         if hasattr(self.robot.policy, 'action_values'):
229 |             self.action_values = list()
230 |         if hasattr(self.robot.policy, 'get_attention_weights'):
231 |             self.attention_weights = list()
232 |         if hasattr(self.robot.policy, 'get_matrix_A'):
233 |             self.As = list()
234 |         if hasattr(self.robot.policy, 'get_feat'):
235 |             self.feats = list()
236 |         if hasattr(self.robot.policy, 'get_X'):
237 |             self.Xs = list()
238 |         if hasattr(self.robot.policy, 'trajs'):
239 |             self.trajs = list()
240 | 
241 |         # get current observation
242 |         if self.robot.sensor == 'coordinates':
243 |             ob = self.compute_observation_for(self.robot)
244 |         elif self.robot.sensor == 'RGB':
245 |             raise NotImplementedError
246 | 
247 |         return ob
248 | 
249 |     def onestep_lookahead(self, action):
250 |         return self.step(action, update=False)
251 | 
252 |     def step(self, action, update=True):
253 |         """
254 |         Compute actions for all agents, detect collision, update environment and return (ob, reward, done, info)
255 |         """
256 |         if self.centralized_planning:
257 |             agent_states = [human.get_full_state() for human in self.humans]
258 |             if self.robot.visible:
259 |                 agent_states.append(self.robot.get_full_state())
260 |                 human_actions = self.centralized_planner.predict(agent_states)[:-1]
261 |             else:
262 |                 human_actions = self.centralized_planner.predict(agent_states)
263 |         else:
264 |             human_actions = []
265 |             for human in self.humans:
266 |                 ob = self.compute_observation_for(human)
267 |                 human_actions.append(human.act(ob))
268 | 
269 |         # collision detection
270 |         dmin = float('inf')
271 |         collision = False
272 |         for i, human in enumerate(self.humans):
273 |             px = human.px - self.robot.px
274 |             py = human.py - self.robot.py
275 |             if self.robot.kinematics == 'holonomic':
276 |                 vx = human.vx - action.vx
277 |                 vy = human.vy - action.vy
278 |             else:
279 |                 vx = human.vx - action.v * np.cos(action.r + self.robot.theta)
280 |                 vy = human.vy - action.v * np.sin(action.r + self.robot.theta)
281 |             ex = px + vx * self.time_step
282 |             ey = py + vy * self.time_step
283 |             # closest distance between boundaries of two agents
284 |             closest_dist = point_to_segment_dist(px, py, ex, ey, 0, 0) - human.radius - self.robot.radius
285 |             if closest_dist < 0:
286 |                 collision = True
287 |                 logging.debug("Collision: distance between robot and p{} is {:.2E} at time {:.2E}".format(human.id, closest_dist, self.global_time))
288 |                 break
289 |             elif closest_dist < dmin:
290 |                 dmin = closest_dist
291 | 
292 |         # collision detection between humans
293 |         human_num = len(self.humans)
294 |         for i in range(human_num):
295 |             for j in range(i + 1, human_num):
296 |                 dx = self.humans[i].px - self.humans[j].px
297 |                 dy = self.humans[i].py - self.humans[j].py
298 |                 dist = (dx ** 2 + dy ** 2) ** (1 / 2) - self.humans[i].radius - self.humans[j].radius
299 |                 if dist < 0:
300 |                     # detect collision but don't take humans' collision into account
301 |                     logging.debug('Collision happens between humans in step()')
302 | 
303 |         # check if reaching the goal
304 |         end_position = np.array(self.robot.compute_position(action, self.time_step))
305 |         reaching_goal = norm(end_position - np.array(self.robot.get_goal_position())) < self.robot.radius
306 | 
307 |         if self.global_time >= self.time_limit - 1:
308 |             reward = 0
309 |             done = True
310 |             info = Timeout()
311 |         elif collision:
312 |             reward = self.collision_penalty
313 |             done = True
314 |             info = Collision()
315 |         elif reaching_goal:
316 |             reward = self.success_reward
317 |             done = True
318 |             info = ReachGoal()
319 |         elif dmin < self.discomfort_dist:
320 |             # adjust the reward based on FPS
321 |             reward = (dmin - self.discomfort_dist) * self.discomfort_penalty_factor * self.time_step
322 |             done = False
323 |             info = Discomfort(dmin)
324 |         else:
325 |             reward = 0
326 |             done = False
327 |             info = Nothing()
328 | 
329 |         if update:
330 |             # store state, action value and attention weights
331 |             if hasattr(self.robot.policy, 'action_values'):
332 |                 self.action_values.append(self.robot.policy.action_values)
333 |             if hasattr(self.robot.policy, 'get_attention_weights'):
334 |                 self.attention_weights.append(self.robot.policy.get_attention_weights())
335 |             if hasattr(self.robot.policy, 'get_matrix_A'):
336 |                 self.As.append(self.robot.policy.get_matrix_A())
337 |             if hasattr(self.robot.policy, 'get_feat'):
338 |                 self.feats.append(self.robot.policy.get_feat())
339 |             if hasattr(self.robot.policy, 'get_X'):
340 |                 self.Xs.append(self.robot.policy.get_X())
341 |             if hasattr(self.robot.policy, 'traj'):
342 |                 self.trajs.append(self.robot.policy.get_traj())
343 | 
344 |             # update all agents
345 |             self.robot.step(action)
346 |             for human, action in zip(self.humans, human_actions):
347 |                 human.step(action)
348 |                 if self.nonstop_human and human.reached_destination():
349 |                     self.generate_human(human)
350 | 
351 |             self.global_time += self.time_step
352 |             self.states.append([self.robot.get_full_state(), [human.get_full_state() for human in self.humans],
353 |                                 [human.id for human in self.humans]])
354 |             self.robot_actions.append(action)
355 |             self.rewards.append(reward)
356 | 
357 |             # compute the observation
358 |             if self.robot.sensor == 'coordinates':
359 |                 ob = self.compute_observation_for(self.robot)
360 |             elif self.robot.sensor == 'RGB':
361 |                 raise NotImplementedError
362 |         else:
363 |             if self.robot.sensor == 'coordinates':
364 |                 ob = [human.get_next_observable_state(action) for human, action in zip(self.humans, human_actions)]
365 |             elif self.robot.sensor == 'RGB':
366 |                 raise NotImplementedError
367 | 
368 |         return ob, reward, done, info
369 | 
370 |     def compute_observation_for(self, agent):
371 |         if agent == self.robot:
372 |             ob = []
373 |             for human in self.humans:
374 |                 ob.append(human.get_observable_state())
375 |         else:
376 |             ob = [other_human.get_observable_state() for other_human in self.humans if other_human != agent]
377 |             if self.robot.visible:
378 |                 ob += [self.robot.get_observable_state()]
379 |         return ob
380 | 
381 |     def render(self, mode='video', output_file=None):
382 |         from matplotlib import animation
383 |         import matplotlib.pyplot as plt
384 |         # plt.rcParams['animation.ffmpeg_path'] = '/usr/bin/ffmpeg'
385 |         x_offset = 0.2
386 |         y_offset = 0.4
387 |         cmap = plt.cm.get_cmap('hsv', 10)
388 |         robot_color = 'black'
389 |         arrow_style = patches.ArrowStyle("->", head_length=4, head_width=2)
390 |         display_numbers = True
391 | 
392 |         if mode == 'traj':
393 |             fig, ax = plt.subplots(figsize=(7, 7))
394 |             ax.tick_params(labelsize=16)
395 |             ax.set_xlim(-5, 5)
396 |             ax.set_ylim(-5, 5)
397 |             ax.set_xlabel('x(m)', fontsize=16)
398 |             ax.set_ylabel('y(m)', fontsize=16)
399 | 
400 |             # add human start positions and goals
401 |             human_colors = [cmap(i) for i in range(len(self.humans))]
402 |             for i in range(len(self.humans)):
403 |                 human = self.humans[i]
404 |                 human_goal = mlines.Line2D([human.get_goal_position()[0]], [human.get_goal_position()[1]],
405 |                                            color=human_colors[i],
406 |                                            marker='*', linestyle='None', markersize=15)
407 |                 ax.add_artist(human_goal)
408 |                 human_start = mlines.Line2D([human.get_start_position()[0]], [human.get_start_position()[1]],
409 |                                             color=human_colors[i],
410 |                                             marker='o', linestyle='None', markersize=15)
411 |                 ax.add_artist(human_start)
412 | 
413 |             robot_positions = [self.states[i][0].position for i in range(len(self.states))]
414 |             human_positions = [[self.states[i][1][j].position for j in range(len(self.humans))]
415 |                                for i in range(len(self.states))]
416 | 
417 |             for k in range(len(self.states)):
418 |                 if k % 4 == 0 or k == len(self.states) - 1:
419 |                     robot = plt.Circle(robot_positions[k], self.robot.radius, fill=False, color=robot_color)
420 |                     humans = [plt.Circle(human_positions[k][i], self.humans[i].radius, fill=False, color=cmap(i))
421 |                               for i in range(len(self.humans))]
422 |                     ax.add_artist(robot)
423 |                     for human in humans:
424 |                         ax.add_artist(human)
425 | 
426 |                 # add time annotation
427 |                 global_time = k * self.time_step
428 |                 if global_time % 4 == 0 or k == len(self.states) - 1:
429 |                     agents = humans + [robot]
430 |                     times = [plt.text(agents[i].center[0] - x_offset, agents[i].center[1] - y_offset,
431 |                                       '{:.1f}'.format(global_time),
432 |                                       color='black', fontsize=14) for i in range(self.human_num + 1)]
433 |                     for time in times:
434 |                        ax.add_artist(time)
435 |                 if k != 0:
436 |                     nav_direction = plt.Line2D((self.states[k - 1][0].px, self.states[k][0].px),
437 |                                                (self.states[k - 1][0].py, self.states[k][0].py),
438 |                                                color=robot_color, ls='solid')
439 |                     human_directions = [plt.Line2D((self.states[k - 1][1][i].px, self.states[k][1][i].px),
440 |                                                    (self.states[k - 1][1][i].py, self.states[k][1][i].py),
441 |                                                    color=cmap(i), ls='solid')
442 |                                         for i in range(self.human_num)]
443 |                     ax.add_artist(nav_direction)
444 |                     for human_direction in human_directions:
445 |                         ax.add_artist(human_direction)
446 |             plt.legend([robot], ['Robot'], fontsize=16)
447 |             plt.show()
448 |         elif mode == 'video':
449 |             fig, ax = plt.subplots(figsize=(7, 7))
450 |             ax.tick_params(labelsize=12)
451 |             ax.set_xlim(-11, 11)
452 |             ax.set_ylim(-11, 11)
453 |             ax.set_xlabel('x(m)', fontsize=14)
454 |             ax.set_ylabel('y(m)', fontsize=14)
455 |             show_human_start_goal = False
456 | 
457 |             # add human start positions and goals
458 |             human_colors = [cmap(i) for i in range(len(self.humans))]
459 |             if show_human_start_goal:
460 |                 for i in range(len(self.humans)):
461 |                     human = self.humans[i]
462 |                     human_goal = mlines.Line2D([human.get_goal_position()[0]], [human.get_goal_position()[1]],
463 |                                                color=human_colors[i],
464 |                                                marker='*', linestyle='None', markersize=8)
465 |                     ax.add_artist(human_goal)
466 |                     human_start = mlines.Line2D([human.get_start_position()[0]], [human.get_start_position()[1]],
467 |                                                 color=human_colors[i],
468 |                                                 marker='o', linestyle='None', markersize=8)
469 |                     ax.add_artist(human_start)
470 |             # add robot start position
471 |             robot_start = mlines.Line2D([self.robot.get_start_position()[0]], [self.robot.get_start_position()[1]],
472 |                                         color=robot_color,
473 |                                         marker='o', linestyle='None', markersize=8)
474 |             robot_start_position = [self.robot.get_start_position()[0], self.robot.get_start_position()[1]]
475 |             ax.add_artist(robot_start)
476 |             # add robot and its goal
477 |             robot_positions = [state[0].position for state in self.states]
478 |             goal = mlines.Line2D([self.robot.get_goal_position()[0]], [self.robot.get_goal_position()[1]],
479 |                                  color=robot_color, marker='*', linestyle='None',
480 |                                  markersize=15, label='Goal')
481 |             robot = plt.Circle(robot_positions[0], self.robot.radius, fill=False, color=robot_color)
482 |             # sensor_range = plt.Circle(robot_positions[0], self.robot_sensor_range, fill=False, ls='dashed')
483 |             ax.add_artist(robot)
484 |             ax.add_artist(goal)
485 |             plt.legend([robot, goal], ['Robot', 'Goal'], fontsize=14)
486 | 
487 |             # add humans and their numbers
488 |             human_positions = [[state[1][j].position for j in range(len(self.humans))] for state in self.states]
489 |             humans = [plt.Circle(human_positions[0][i], self.humans[i].radius, fill=False, color=cmap(i))
490 |                       for i in range(len(self.humans))]
491 | 
492 |             # disable showing human numbers
493 |             if display_numbers:
494 |                 human_numbers = [plt.text(humans[i].center[0] - x_offset, humans[i].center[1] + y_offset, str(i),
495 |                                           color='black') for i in range(len(self.humans))]
496 | 
497 |             for i, human in enumerate(humans):
498 |                 ax.add_artist(human)
499 |                 if display_numbers:
500 |                     ax.add_artist(human_numbers[i])
501 | 
502 |             # add time annotation
503 |             time = plt.text(0.4, 0.9, 'Time: {}'.format(0), fontsize=16, transform=ax.transAxes)
504 |             ax.add_artist(time)
505 | 
506 |             # visualize attention scores
507 |             # if hasattr(self.robot.policy, 'get_attention_weights'):
508 |             #     attention_scores = [
509 |             #         plt.text(-5.5, 5 - 0.5 * i, 'Human {}: {:.2f}'.format(i + 1, self.attention_weights[0][i]),
510 |             #                  fontsize=16) for i in range(len(self.humans))]
511 | 
512 |             # compute orientation in each step and use arrow to show the direction
513 |             radius = self.robot.radius
514 |             orientations = []
515 |             for i in range(self.human_num + 1):
516 |                 orientation = []
517 |                 for state in self.states:
518 |                     agent_state = state[0] if i == 0 else state[1][i - 1]
519 |                     if self.robot.kinematics == 'unicycle' and i == 0:
520 |                         direction = (
521 |                         (agent_state.px, agent_state.py), (agent_state.px + radius * np.cos(agent_state.theta),
522 |                                                            agent_state.py + radius * np.sin(agent_state.theta)))
523 |                     else:
524 |                         theta = np.arctan2(agent_state.vy, agent_state.vx)
525 |                         direction = ((agent_state.px, agent_state.py), (agent_state.px + radius * np.cos(theta),
526 |                                                                         agent_state.py + radius * np.sin(theta)))
527 |                     orientation.append(direction)
528 |                 orientations.append(orientation)
529 |                 if i == 0:
530 |                     arrow_color = 'black'
531 |                     arrows = [patches.FancyArrowPatch(*orientation[0], color=arrow_color, arrowstyle=arrow_style)]
532 |                 else:
533 |                     arrows.extend(
534 |                         [patches.FancyArrowPatch(*orientation[0], color=human_colors[i - 1], arrowstyle=arrow_style)])
535 | 
536 |             for arrow in arrows:
537 |                 ax.add_artist(arrow)
538 |             global_step = 0
539 | 
540 |             if len(self.trajs) != 0:
541 |                 human_future_positions = []
542 |                 human_future_circles = []
543 |                 for traj in self.trajs:
544 |                     human_future_position = [[tensor_to_joint_state(traj[step+1][0]).human_states[i].position
545 |                                               for step in range(self.robot.policy.planning_depth)]
546 |                                              for i in range(self.human_num)]
547 |                     human_future_positions.append(human_future_position)
548 | 
549 |                 for i in range(self.human_num):
550 |                     circles = []
551 |                     for j in range(self.robot.policy.planning_depth):
552 |                         circle = plt.Circle(human_future_positions[0][i][j], self.humans[0].radius/(1.7+j), fill=False, color=cmap(i))
553 |                         ax.add_artist(circle)
554 |                         circles.append(circle)
555 |                     human_future_circles.append(circles)
556 | 
557 |             def update(frame_num):
558 |                 nonlocal global_step
559 |                 nonlocal arrows
560 |                 global_step = frame_num
561 |                 robot.center = robot_positions[frame_num]
562 | 
563 |                 for i, human in enumerate(humans):
564 |                     human.center = human_positions[frame_num][i]
565 |                     if display_numbers:
566 |                         human_numbers[i].set_position((human.center[0] - x_offset, human.center[1] + y_offset))
567 |                 for arrow in arrows:
568 |                     arrow.remove()
569 | 
570 |                 for i in range(self.human_num + 1):
571 |                     orientation = orientations[i]
572 |                     if i == 0:
573 |                         arrows = [patches.FancyArrowPatch(*orientation[frame_num], color='black',
574 |                                                           arrowstyle=arrow_style)]
575 |                     else:
576 |                         arrows.extend([patches.FancyArrowPatch(*orientation[frame_num], color=cmap(i - 1),
577 |                                                                arrowstyle=arrow_style)])
578 | 
579 |                 for arrow in arrows:
580 |                     ax.add_artist(arrow)
581 |                     # if hasattr(self.robot.policy, 'get_attention_weights'):
582 |                     #     attention_scores[i].set_text('human {}: {:.2f}'.format(i, self.attention_weights[frame_num][i]))
583 | 
584 |                 time.set_text('Time: {:.2f}'.format(frame_num * self.time_step))
585 | 
586 |                 if len(self.trajs) != 0:
587 |                     for i, circles in enumerate(human_future_circles):
588 |                         for j, circle in enumerate(circles):
589 |                             circle.center = human_future_positions[global_step][i][j]
590 | 
591 |             def plot_value_heatmap():
592 |                 if self.robot.kinematics != 'holonomic':
593 |                     print('Kinematics is not holonomic')
594 |                     return
595 |                 # for agent in [self.states[global_step][0]] + self.states[global_step][1]:
596 |                 #     print(('{:.4f}, ' * 6 + '{:.4f}').format(agent.px, agent.py, agent.gx, agent.gy,
597 |                 #                                              agent.vx, agent.vy, agent.theta))
598 | 
599 |                 # when any key is pressed draw the action value plot
600 |                 fig, axis = plt.subplots()
601 |                 speeds = [0] + self.robot.policy.speeds
602 |                 rotations = self.robot.policy.rotations + [np.pi * 2]
603 |                 r, th = np.meshgrid(speeds, rotations)
604 |                 z = np.array(self.action_values[global_step % len(self.states)][1:])
605 |                 z = (z - np.min(z)) / (np.max(z) - np.min(z))
606 |                 z = np.reshape(z, (self.robot.policy.rotation_samples, self.robot.policy.speed_samples))
607 |                 polar = plt.subplot(projection="polar")
608 |                 polar.tick_params(labelsize=16)
609 |                 mesh = plt.pcolormesh(th, r, z, vmin=0, vmax=1)
610 |                 plt.plot(rotations, r, color='k', ls='none')
611 |                 plt.grid()
612 |                 cbaxes = fig.add_axes([0.85, 0.1, 0.03, 0.8])
613 |                 cbar = plt.colorbar(mesh, cax=cbaxes)
614 |                 cbar.ax.tick_params(labelsize=16)
615 |                 plt.show()
616 | 
617 |             def print_matrix_A():
618 |                 # with np.printoptions(precision=3, suppress=True):
619 |                 #     print(self.As[global_step])
620 |                 h, w = self.As[global_step].shape
621 |                 print('   ' + ' '.join(['{:>5}'.format(i - 1) for i in range(w)]))
622 |                 for i in range(h):
623 |                     print('{:<3}'.format(i-1) + ' '.join(['{:.3f}'.format(self.As[global_step][i][j]) for j in range(w)]))
624 |                 # with np.printoptions(precision=3, suppress=True):
625 |                 #     print('A is: ')
626 |                 #     print(self.As[global_step])
627 | 
628 |             def print_feat():
629 |                 with np.printoptions(precision=3, suppress=True):
630 |                     print('feat is: ')
631 |                     print(self.feats[global_step])
632 | 
633 |             def print_X():
634 |                 with np.printoptions(precision=3, suppress=True):
635 |                     print('X is: ')
636 |                     print(self.Xs[global_step])
637 | 
638 |             def on_click(event):
639 |                 if anim.running:
640 |                     anim.event_source.stop()
641 |                     if event.key == 'a':
642 |                         if hasattr(self.robot.policy, 'get_matrix_A'):
643 |                             print_matrix_A()
644 |                         if hasattr(self.robot.policy, 'get_feat'):
645 |                             print_feat()
646 |                         if hasattr(self.robot.policy, 'get_X'):
647 |                             print_X()
648 |                         # if hasattr(self.robot.policy, 'action_values'):
649 |                         #    plot_value_heatmap()
650 |                 else:
651 |                     anim.event_source.start()
652 |                 anim.running ^= True
653 | 
654 |             fig.canvas.mpl_connect('key_press_event', on_click)
655 |             anim = animation.FuncAnimation(fig, update, frames=len(self.states), interval=self.time_step * 500)
656 |             anim.running = True
657 | 
658 |             if output_file is not None:
659 |                 # save as video
660 |                 ffmpeg_writer = animation.FFMpegWriter(fps=10, metadata=dict(artist='Me'), bitrate=1800)
661 |                 # writer = ffmpeg_writer(fps=10, metadata=dict(artist='Me'), bitrate=1800)
662 |                 anim.save(output_file, writer=ffmpeg_writer)
663 | 
664 |                 # save output file as gif if imagemagic is installed
665 |                 # anim.save(output_file, writer='imagemagic', fps=12)
666 |             else:
667 |                 plt.show()
668 |         else:
669 |             raise NotImplementedError
670 | 


--------------------------------------------------------------------------------
/crowd_sim/envs/policy/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChanganVR/RelationalGraphLearning/8e87aa5ed8221efd688f8e6857ba4c38637bf6e1/crowd_sim/envs/policy/__init__.py


--------------------------------------------------------------------------------
/crowd_sim/envs/policy/linear.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from crowd_sim.envs.policy.policy import Policy
 3 | from crowd_sim.envs.utils.action import ActionXY
 4 | 
 5 | 
 6 | class Linear(Policy):
 7 |     def __init__(self):
 8 |         super().__init__()
 9 |         self.trainable = False
10 |         self.kinematics = 'holonomic'
11 |         self.multiagent_training = True
12 | 
13 |     def configure(self, config):
14 |         assert True
15 | 
16 |     def predict(self, state):
17 |         self_state = state.self_state
18 |         theta = np.arctan2(self_state.gy-self_state.py, self_state.gx-self_state.px)
19 |         vx = np.cos(theta) * self_state.v_pref
20 |         vy = np.sin(theta) * self_state.v_pref
21 |         action = ActionXY(vx, vy)
22 | 
23 |         return action
24 | 


--------------------------------------------------------------------------------
/crowd_sim/envs/policy/orca.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import rvo2
  3 | from crowd_sim.envs.policy.policy import Policy
  4 | from crowd_sim.envs.utils.action import ActionXY
  5 | 
  6 | 
  7 | class ORCA(Policy):
  8 |     def __init__(self):
  9 |         """
 10 |         timeStep        The time step of the simulation.
 11 |                         Must be positive.
 12 |         neighborDist    The default maximum distance (center point
 13 |                         to center point) to other agents a new agent
 14 |                         takes into account in the navigation. The
 15 |                         larger this number, the longer the running
 16 |                         time of the simulation. If the number is too
 17 |                         low, the simulation will not be safe. Must be
 18 |                         non-negative.
 19 |         maxNeighbors    The default maximum number of other agents a
 20 |                         new agent takes into account in the
 21 |                         navigation. The larger this number, the
 22 |                         longer the running time of the simulation.
 23 |                         If the number is too low, the simulation
 24 |                         will not be safe.
 25 |         timeHorizon     The default minimal amount of time for which
 26 |                         a new agent's velocities that are computed
 27 |                         by the simulation are safe with respect to
 28 |                         other agents. The larger this number, the
 29 |                         sooner an agent will respond to the presence
 30 |                         of other agents, but the less freedom the
 31 |                         agent has in choosing its velocities.
 32 |                         Must be positive.
 33 |         timeHorizonObst The default minimal amount of time for which
 34 |                         a new agent's velocities that are computed
 35 |                         by the simulation are safe with respect to
 36 |                         obstacles. The larger this number, the
 37 |                         sooner an agent will respond to the presence
 38 |                         of obstacles, but the less freedom the agent
 39 |                         has in choosing its velocities.
 40 |                         Must be positive.
 41 |         radius          The default radius of a new agent.
 42 |                         Must be non-negative.
 43 |         maxSpeed        The default maximum speed of a new agent.
 44 |                         Must be non-negative.
 45 |         velocity        The default initial two-dimensional linear
 46 |                         velocity of a new agent (optional).
 47 | 
 48 |         ORCA first uses neighborDist and maxNeighbors to find neighbors that need to be taken into account.
 49 |         Here set them to be large enough so that all agents will be considered as neighbors.
 50 |         Time_horizon should be set that at least it's safe for one time step
 51 | 
 52 |         In this work, obstacles are not considered. So the value of time_horizon_obst doesn't matter.
 53 | 
 54 |         """
 55 |         super().__init__()
 56 |         self.name = 'ORCA'
 57 |         self.trainable = False
 58 |         self.multiagent_training = True
 59 |         self.kinematics = 'holonomic'
 60 |         self.safety_space = 0
 61 |         self.neighbor_dist = 10
 62 |         self.max_neighbors = 10
 63 |         self.time_horizon = 5
 64 |         self.time_horizon_obst = 5
 65 |         self.radius = 0.3
 66 |         self.max_speed = 1
 67 |         self.sim = None
 68 | 
 69 |     def configure(self, config):
 70 |         return
 71 | 
 72 |     def set_phase(self, phase):
 73 |         return
 74 | 
 75 |     def predict(self, state):
 76 |         """
 77 |         Create a rvo2 simulation at each time step and run one step
 78 |         Python-RVO2 API: https://github.com/sybrenstuvel/Python-RVO2/blob/master/src/rvo2.pyx
 79 |         How simulation is done in RVO2: https://github.com/sybrenstuvel/Python-RVO2/blob/master/src/Agent.cpp
 80 | 
 81 |         Agent doesn't stop moving after it reaches the goal, because once it stops moving, the reciprocal rule is broken
 82 | 
 83 |         :param state:
 84 |         :return:
 85 |         """
 86 |         robot_state = state.robot_state
 87 |         params = self.neighbor_dist, self.max_neighbors, self.time_horizon, self.time_horizon_obst
 88 |         if self.sim is not None and self.sim.getNumAgents() != len(state.human_states) + 1:
 89 |             del self.sim
 90 |             self.sim = None
 91 |         if self.sim is None:
 92 |             self.sim = rvo2.PyRVOSimulator(self.time_step, *params, self.radius, self.max_speed)
 93 |             self.sim.addAgent(robot_state.position, *params, robot_state.radius + 0.01 + self.safety_space,
 94 |                               robot_state.v_pref, robot_state.velocity)
 95 |             for human_state in state.human_states:
 96 |                 self.sim.addAgent(human_state.position, *params, human_state.radius + 0.01 + self.safety_space,
 97 |                                   self.max_speed, human_state.velocity)
 98 |         else:
 99 |             self.sim.setAgentPosition(0, robot_state.position)
100 |             self.sim.setAgentVelocity(0, robot_state.velocity)
101 |             for i, human_state in enumerate(state.human_states):
102 |                 self.sim.setAgentPosition(i + 1, human_state.position)
103 |                 self.sim.setAgentVelocity(i + 1, human_state.velocity)
104 | 
105 |         # Set the preferred velocity to be a vector of unit magnitude (speed) in the direction of the goal.
106 |         velocity = np.array((robot_state.gx - robot_state.px, robot_state.gy - robot_state.py))
107 |         speed = np.linalg.norm(velocity)
108 |         pref_vel = velocity / speed if speed > 1 else velocity
109 | 
110 |         # Perturb a little to avoid deadlocks due to perfect symmetry.
111 |         # perturb_angle = np.random.random() * 2 * np.pi
112 |         # perturb_dist = np.random.random() * 0.01
113 |         # perturb_vel = np.array((np.cos(perturb_angle), np.sin(perturb_angle))) * perturb_dist
114 |         # pref_vel += perturb_vel
115 | 
116 |         self.sim.setAgentPrefVelocity(0, tuple(pref_vel))
117 |         for i, human_state in enumerate(state.human_states):
118 |             # unknown goal position of other humans
119 |             self.sim.setAgentPrefVelocity(i + 1, (0, 0))
120 | 
121 |         self.sim.doStep()
122 |         action = ActionXY(*self.sim.getAgentVelocity(0))
123 |         self.last_state = state
124 | 
125 |         return action
126 | 
127 | 
128 | class CentralizedORCA(ORCA):
129 |     def __init__(self):
130 |         super().__init__()
131 | 
132 |     def predict(self, state):
133 |         """ Centralized planning for all agents """
134 |         params = self.neighbor_dist, self.max_neighbors, self.time_horizon, self.time_horizon_obst
135 |         if self.sim is not None and self.sim.getNumAgents() != len(state):
136 |             del self.sim
137 |             self.sim = None
138 | 
139 |         if self.sim is None:
140 |             self.sim = rvo2.PyRVOSimulator(self.time_step, *params, self.radius, self.max_speed)
141 |             for agent_state in state:
142 |                 self.sim.addAgent(agent_state.position, *params, agent_state.radius + 0.01 + self.safety_space,
143 |                                   self.max_speed, agent_state.velocity)
144 |         else:
145 |             for i, agent_state in enumerate(state):
146 |                 self.sim.setAgentPosition(i, agent_state.position)
147 |                 self.sim.setAgentVelocity(i, agent_state.velocity)
148 | 
149 |         # Set the preferred velocity to be a vector of unit magnitude (speed) in the direction of the goal.
150 |         for i, agent_state in enumerate(state):
151 |             velocity = np.array((agent_state.gx - agent_state.px, agent_state.gy - agent_state.py))
152 |             speed = np.linalg.norm(velocity)
153 |             pref_vel = velocity / speed if speed > 1 else velocity
154 |             self.sim.setAgentPrefVelocity(i, (pref_vel[0], pref_vel[1]))
155 | 
156 |         self.sim.doStep()
157 |         actions = [ActionXY(*self.sim.getAgentVelocity(i)) for i in range(len(state))]
158 | 
159 |         return actions
160 | 


--------------------------------------------------------------------------------
/crowd_sim/envs/policy/policy.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | import numpy as np
 3 | import torch
 4 | 
 5 | 
 6 | class Policy(object):
 7 |     def __init__(self):
 8 |         """
 9 |         Base class for all policies, has an abstract method predict().
10 |         """
11 |         self.trainable = False
12 |         self.phase = None
13 |         self.model = None
14 |         self.device = None
15 |         self.last_state = None
16 |         self.time_step = None
17 |         # if agent is assumed to know the dynamics of real world
18 |         self.env = None
19 | 
20 |     @abc.abstractmethod
21 |     def configure(self, config):
22 |         return
23 | 
24 |     def set_phase(self, phase):
25 |         self.phase = phase
26 | 
27 |     def set_device(self, device):
28 |         self.device = device
29 | 
30 |     def set_env(self, env):
31 |         self.env = env
32 | 
33 |     def set_time_step(self, time_step):
34 |         self.time_step = time_step
35 | 
36 |     def get_model(self):
37 |         return self.model
38 | 
39 |     def save_model(self, file):
40 |         torch.save(self.model.state_dict(), file)
41 | 
42 |     def load_model(self, file):
43 |         self.model.load_state_dict(torch.load(file))
44 | 
45 |     def get_state_dict(self):
46 |         return self.model.state_dict()
47 | 
48 |     def load_state_dict(self, state_dict):
49 |         self.model.load_state_dict(state_dict)
50 | 
51 |     @abc.abstractmethod
52 |     def predict(self, state):
53 |         """
54 |         Policy takes state as input and output an action
55 | 
56 |         """
57 |         return
58 | 
59 |     @staticmethod
60 |     def reach_destination(state):
61 |         robot_state = state.robot_state
62 |         if np.linalg.norm((robot_state.py - robot_state.gy, robot_state.px - robot_state.gx)) < robot_state.radius:
63 |             return True
64 |         else:
65 |             return False
66 | 


--------------------------------------------------------------------------------
/crowd_sim/envs/policy/policy_factory.py:
--------------------------------------------------------------------------------
 1 | from crowd_sim.envs.policy.linear import Linear
 2 | from crowd_sim.envs.policy.orca import ORCA, CentralizedORCA
 3 | from crowd_sim.envs.policy.socialforce import SocialForce, CentralizedSocialForce
 4 | 
 5 | 
 6 | def none_policy():
 7 |     return None
 8 | 
 9 | 
10 | policy_factory = dict()
11 | policy_factory['linear'] = Linear
12 | policy_factory['orca'] = ORCA
13 | policy_factory['socialforce'] = SocialForce
14 | policy_factory['centralized_orca'] = CentralizedORCA
15 | policy_factory['centralized_socialforce'] = CentralizedSocialForce
16 | policy_factory['none'] = none_policy
17 | 


--------------------------------------------------------------------------------
/crowd_sim/envs/policy/socialforce.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import socialforce
 3 | from crowd_sim.envs.policy.policy import Policy
 4 | from crowd_sim.envs.utils.action import ActionXY
 5 | 
 6 | 
 7 | class SocialForce(Policy):
 8 |     def __init__(self):
 9 |         super().__init__()
10 |         self.name = 'SocialForce'
11 |         self.trainable = False
12 |         self.multiagent_training = None
13 |         self.kinematics = 'holonomic'
14 |         self.initial_speed = 1
15 |         self.v0 = 10
16 |         self.sigma = 0.3
17 |         self.sim = None
18 | 
19 |     def configure(self, config):
20 |         return
21 | 
22 |     def set_phase(self, phase):
23 |         return
24 | 
25 |     def predict(self, state):
26 |         """
27 | 
28 |         :param state:
29 |         :return:
30 |         """
31 |         sf_state = []
32 |         self_state = state.self_state
33 |         sf_state.append((self_state.px, self_state.py, self_state.vx, self_state.vy, self_state.gx, self_state.gy))
34 |         for human_state in state.human_states:
35 |             # approximate desired direction with current velocity
36 |             if human_state.vx == 0 and human_state.vy == 0:
37 |                 gx = np.random.random()
38 |                 gy = np.random.random()
39 |             else:
40 |                 gx = human_state.px + human_state.vx
41 |                 gy = human_state.py + human_state.vy
42 |             sf_state.append((human_state.px, human_state.py, human_state.vx, human_state.vy, gx, gy))
43 |         sim = socialforce.Simulator(np.array(sf_state), delta_t=self.time_step, initial_speed=self.initial_speed,
44 |                                     v0=self.v0, sigma=self.sigma)
45 |         sim.step()
46 |         action = ActionXY(sim.state[0, 2], sim.state[0, 3])
47 | 
48 |         self.last_state = state
49 | 
50 |         return action
51 | 
52 | 
53 | class CentralizedSocialForce(SocialForce):
54 |     """
55 |     Centralized socialforce, a bit different from decentralized socialforce, where the goal position of other agents is
56 |     set to be (0, 0)
57 |     """
58 |     def __init__(self):
59 |         super().__init__()
60 | 
61 |     def predict(self, state):
62 |         sf_state = []
63 |         for agent_state in state:
64 |             sf_state.append((agent_state.px, agent_state.py, agent_state.vx, agent_state.vy,
65 |                              agent_state.gx, agent_state.gy))
66 | 
67 |         sim = socialforce.Simulator(np.array(sf_state), delta_t=self.time_step, initial_speed=self.initial_speed,
68 |                                     v0=self.v0, sigma=self.sigma)
69 |         sim.step()
70 |         actions = [ActionXY(sim.state[i, 2], sim.state[i, 3]) for i in range(len(state))]
71 |         del sim
72 | 
73 |         return actions
74 | 


--------------------------------------------------------------------------------
/crowd_sim/envs/realsim_utils/GrandCentral.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | methods for preprocessing grand central station datasets offline
  3 | and for simulation in crowd_sim.py
  4 | '''
  5 | import scipy.io as sio
  6 | import numpy as np
  7 | import random
  8 | import os
  9 | import sys
 10 | import json
 11 | import csv
 12 | 
 13 | 
 14 | def position2annotation(p):
 15 |     panel_scale = 50
 16 |     panel_width = 1920
 17 |     panel_height = 1080
 18 |     px = p[0]
 19 |     py = p[1]
 20 |     ax = (px * panel_scale / panel_width + 0.5) * panel_width
 21 |     ay = (py * panel_scale / panel_height + 0.5) * panel_height
 22 |     return ax, ay
 23 | 
 24 | 
 25 | def annotation2position(a):
 26 |     panel_scale = 50
 27 |     panel_width = 1920
 28 |     panel_height = 1080
 29 |     ax = a[0]
 30 |     ay = a[1]
 31 |     px = (ax / panel_width - 0.5) * panel_width / panel_scale
 32 |     py = (ay / panel_height - 0.5) * panel_height / panel_scale
 33 |     return px, py
 34 | 
 35 | 
 36 | def count_total_human(human_ids):
 37 |     human_set = set(human_ids[0])
 38 |     for h_ids in human_ids:
 39 |         human_set = human_set | set(h_ids)
 40 | 
 41 | 
 42 | def count_total_human_from_f(f, start_t, end_t):
 43 |     p_set = set(f[start_t])
 44 |     for t in range(start_t, end_t + 1):
 45 |         p_set = p_set | set(f[t])
 46 |     return p_set
 47 | 
 48 | 
 49 | def if_f_p_accord(f_data_list, p_data_list):
 50 |     newf = [[] for i in range(7000)]
 51 |     for i in range(len(p_data_list)):
 52 |         p = p_data_list[i]
 53 |         tlist = [int(t) for t in list(p.keys())]
 54 |         for t in tlist:
 55 |             newf[t].append(i)
 56 |     print(f_data_list == newf[:5000])
 57 | 
 58 | 
 59 | def make_new_f(p_data_list):
 60 |     newf = [[] for i in range(7000)]
 61 |     for i in range(len(p_data_list)):
 62 |         p = p_data_list[i]
 63 |         tlist = [int(t) for t in list(p.keys())]
 64 |         for t in tlist:
 65 |             newf[t].append(i)
 66 |     return newf
 67 | 
 68 | 
 69 | def check_continue(a):
 70 |     b = [i for i in range(min(a), min(a) + len(a))]
 71 |     return a == b
 72 | 
 73 | 
 74 | def add_positions(tra):
 75 |     newtra = {}
 76 |     tlist = [int(t) for t in list(tra.keys())]
 77 |     newtlist = [i for i in range(min(tlist), max(tlist) + 1)]
 78 |     for t in newtlist:
 79 |         if t in tlist:
 80 |             newtra[str(t)] = tra[str(t)]
 81 |         else:
 82 |             newtra[str(t)] = newtra[str(t - 1)]
 83 |     return newtra
 84 | 
 85 | 
 86 | def create_GC_metadata(raw_data_path, meta_data_path):
 87 |     """
 88 |     create data from downloaded raw data to meta data ( a data structure to read easily)
 89 |     :param raw_data_path: downloaded raw data path
 90 |     :param meta_data_path: meta data path
 91 |     :return:
 92 |     """
 93 | 
 94 |     GC_IMAGE_WIDTH = 1920
 95 |     GC_IMAGE_HEIGHT = 1080
 96 | 
 97 |     dir_list = sorted(os.listdir(raw_data_path))
 98 |     p_num = len(dir_list)
 99 | 
100 |     # + 1 because raw GC txt start from 1,just add a fake person whose pid = 0
101 |     p_data_list = [{} for _ in range(p_num + 1)]
102 | 
103 |     # fill p_data
104 |     max_t = 0
105 |     for dir_name in dir_list:
106 | 
107 |         person_trajectory_txt_path = os.path.join(raw_data_path, dir_name)
108 |         pid = int(dir_name.replace('.txt', ''))
109 |         if pid == 1:
110 |             print('check')
111 |         with open(person_trajectory_txt_path, 'r') as f:
112 |             trajectory_list = f.read().split()
113 |             for i in range(len(trajectory_list) // 3):
114 |                 x = int(trajectory_list[3 * i]) / GC_IMAGE_WIDTH
115 |                 y = int(trajectory_list[3 * i + 1]) / GC_IMAGE_HEIGHT
116 |                 t = int(trajectory_list[3 * i + 2]) // 20
117 |                 max_t = max(max_t, t)
118 |                 p_data_list[pid][t] = (x, y)
119 | 
120 |     # fill f_data
121 |     f_data_list = [[] for _ in range(max_t + 1)]
122 |     for pid, p_data in enumerate(p_data_list):
123 |         for t in p_data.keys():
124 |             f_data_list[t].append(pid)
125 | 
126 |     # show some message
127 |     print('pedestrian_data_list size: ', len(p_data_list))
128 |     print('frame_data_list size: ', len(f_data_list))
129 |     print('max num of person in one frame', count_frame_p_num(f_data_list))
130 | 
131 |     with open(meta_data_path, 'w') as f:
132 |         json.dump({'frame_data_list': f_data_list, 'pedestrian_data_list': p_data_list}, f)
133 | 
134 |     print('create %s successfully!' % meta_data_path)
135 | 
136 | 
137 | def count_frame_p_num(f_data_list):
138 |     p_num_list = []
139 |     for f in f_data_list:
140 |         p_num_list.append(len(f))
141 |     return max(p_num_list)
142 | 
143 | 
144 | def main():
145 |     GC_raw_data_path = 'data/GC/Annotation'
146 |     GC_meta_data_path = 'data/GC_meta_data.json'
147 |     GC_train_test_data_path = 'data/GC.npz'
148 | 
149 |     create_GC_metadata(GC_raw_data_path, GC_meta_data_path)
150 |     create_GC_train_test_data(GC_meta_data_path, GC_train_test_data_path)
151 | 
152 | 
153 | if __name__ == '__main__':
154 |     main()
155 | 


--------------------------------------------------------------------------------
/crowd_sim/envs/realsim_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChanganVR/RelationalGraphLearning/8e87aa5ed8221efd688f8e6857ba4c38637bf6e1/crowd_sim/envs/realsim_utils/__init__.py


--------------------------------------------------------------------------------
/crowd_sim/envs/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChanganVR/RelationalGraphLearning/8e87aa5ed8221efd688f8e6857ba4c38637bf6e1/crowd_sim/envs/utils/__init__.py


--------------------------------------------------------------------------------
/crowd_sim/envs/utils/action.py:
--------------------------------------------------------------------------------
1 | from collections import namedtuple
2 | 
3 | ActionXY = namedtuple('ActionXY', ['vx', 'vy'])
4 | ActionRot = namedtuple('ActionRot', ['v', 'r'])
5 | 


--------------------------------------------------------------------------------
/crowd_sim/envs/utils/agent.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | import logging
  3 | import numpy as np
  4 | from numpy.linalg import norm
  5 | from crowd_sim.envs.policy.policy_factory import policy_factory
  6 | from crowd_sim.envs.utils.action import ActionXY, ActionRot
  7 | from crowd_sim.envs.utils.state import ObservableState, FullState
  8 | 
  9 | 
 10 | class Agent(object):
 11 |     def __init__(self, config, section):
 12 |         """
 13 |         Base class for robot and human. Have the physical attributes of an agent.
 14 | 
 15 |         """
 16 |         self.visible = getattr(config, section).visible
 17 |         self.v_pref = getattr(config, section).v_pref
 18 |         self.radius = getattr(config, section).radius
 19 |         self.policy = policy_factory[getattr(config, section).policy]()
 20 |         self.sensor = getattr(config, section).sensor
 21 |         self.kinematics = self.policy.kinematics if self.policy is not None else None
 22 |         self.px = None
 23 |         self.py = None
 24 |         self.gx = None
 25 |         self.gy = None
 26 |         self.vx = None
 27 |         self.vy = None
 28 |         self.theta = None
 29 |         self.time_step = None
 30 | 
 31 |     def print_info(self):
 32 |         logging.info('Agent is {} and has {} kinematic constraint'.format(
 33 |             'visible' if self.visible else 'invisible', self.kinematics))
 34 | 
 35 |     def set_policy(self, policy):
 36 |         if self.time_step is None:
 37 |             raise ValueError('Time step is None')
 38 |         policy.set_time_step(self.time_step)
 39 |         self.policy = policy
 40 |         self.kinematics = policy.kinematics
 41 | 
 42 |     def sample_random_attributes(self):
 43 |         """
 44 |         Sample agent radius and v_pref attribute from certain distribution
 45 |         :return:
 46 |         """
 47 |         self.v_pref = np.random.uniform(0.5, 1.5)
 48 |         self.radius = np.random.uniform(0.3, 0.5)
 49 | 
 50 |     def set(self, px, py, gx, gy, vx, vy, theta, radius=None, v_pref=None):
 51 |         self.px = px
 52 |         self.py = py
 53 |         self.sx = px
 54 |         self.sy = py
 55 |         self.gx = gx
 56 |         self.gy = gy
 57 |         self.vx = vx
 58 |         self.vy = vy
 59 |         self.theta = theta
 60 |         if radius is not None:
 61 |             self.radius = radius
 62 |         if v_pref is not None:
 63 |             self.v_pref = v_pref
 64 | 
 65 |     def get_observable_state(self):
 66 |         return ObservableState(self.px, self.py, self.vx, self.vy, self.radius)
 67 | 
 68 |     def get_next_observable_state(self, action):
 69 |         self.check_validity(action)
 70 |         pos = self.compute_position(action, self.time_step)
 71 |         next_px, next_py = pos
 72 |         if self.kinematics == 'holonomic':
 73 |             next_vx = action.vx
 74 |             next_vy = action.vy
 75 |         else:
 76 |             next_vx = action.v * np.cos(self.theta)
 77 |             next_vy = action.v * np.sin(self.theta)
 78 |         return ObservableState(next_px, next_py, next_vx, next_vy, self.radius)
 79 | 
 80 |     def get_full_state(self):
 81 |         return FullState(self.px, self.py, self.vx, self.vy, self.radius, self.gx, self.gy, self.v_pref, self.theta)
 82 | 
 83 |     def get_position(self):
 84 |         return self.px, self.py
 85 | 
 86 |     def set_position(self, position):
 87 |         self.px = position[0]
 88 |         self.py = position[1]
 89 | 
 90 |     def get_goal_position(self):
 91 |         return self.gx, self.gy
 92 | 
 93 |     def get_start_position(self):
 94 |         return self.sx, self.sy
 95 | 
 96 |     def get_velocity(self):
 97 |         return self.vx, self.vy
 98 | 
 99 |     def set_velocity(self, velocity):
100 |         self.vx = velocity[0]
101 |         self.vy = velocity[1]
102 | 
103 |     @abc.abstractmethod
104 |     def act(self, ob):
105 |         """
106 |         Compute state using received observation and pass it to policy
107 | 
108 |         """
109 |         return
110 | 
111 |     def check_validity(self, action):
112 |         if self.kinematics == 'holonomic':
113 |             assert isinstance(action, ActionXY)
114 |         else:
115 |             assert isinstance(action, ActionRot)
116 | 
117 |     def compute_position(self, action, delta_t):
118 |         self.check_validity(action)
119 |         if self.kinematics == 'holonomic':
120 |             px = self.px + action.vx * delta_t
121 |             py = self.py + action.vy * delta_t
122 |         else:
123 |             theta = self.theta + action.r
124 |             px = self.px + np.cos(theta) * action.v * delta_t
125 |             py = self.py + np.sin(theta) * action.v * delta_t
126 | 
127 |         return px, py
128 | 
129 |     def step(self, action):
130 |         """
131 |         Perform an action and update the state
132 |         """
133 |         self.check_validity(action)
134 |         pos = self.compute_position(action, self.time_step)
135 |         self.px, self.py = pos
136 |         if self.kinematics == 'holonomic':
137 |             self.vx = action.vx
138 |             self.vy = action.vy
139 |         else:
140 |             self.theta = (self.theta + action.r) % (2 * np.pi)
141 |             self.vx = action.v * np.cos(self.theta)
142 |             self.vy = action.v * np.sin(self.theta)
143 | 
144 |     def reached_destination(self):
145 |         return norm(np.array(self.get_position()) - np.array(self.get_goal_position())) < self.radius
146 | 
147 | 


--------------------------------------------------------------------------------
/crowd_sim/envs/utils/human.py:
--------------------------------------------------------------------------------
 1 | from crowd_sim.envs.utils.agent import Agent
 2 | from crowd_sim.envs.utils.state import JointState
 3 | 
 4 | 
 5 | class Human(Agent):
 6 |     def __init__(self, config, section):
 7 |         super().__init__(config, section)
 8 |         self.id = None
 9 | 
10 |     def act(self, ob):
11 |         """
12 |         The state for human is its full state and all other agents' observable states
13 |         :param ob:
14 |         :return:
15 |         """
16 |         state = JointState(self.get_full_state(), ob)
17 |         action = self.policy.predict(state)
18 |         return action
19 | 


--------------------------------------------------------------------------------
/crowd_sim/envs/utils/info.py:
--------------------------------------------------------------------------------
 1 | class Timeout(object):
 2 |     def __init__(self):
 3 |         pass
 4 | 
 5 |     def __str__(self):
 6 |         return 'Timeout'
 7 | 
 8 | 
 9 | class ReachGoal(object):
10 |     def __init__(self):
11 |         pass
12 | 
13 |     def __str__(self):
14 |         return 'Reaching goal'
15 | 
16 | 
17 | class Discomfort(object):
18 |     def __init__(self, min_dist):
19 |         self.min_dist = min_dist
20 | 
21 |     def __str__(self):
22 |         return 'Discomfort'
23 | 
24 | 
25 | class Collision(object):
26 |     def __init__(self):
27 |         pass
28 | 
29 |     def __str__(self):
30 |         return 'Collision'
31 | 
32 | 
33 | class Nothing(object):
34 |     def __init__(self):
35 |         pass
36 | 
37 |     def __str__(self):
38 |         return ''
39 | 


--------------------------------------------------------------------------------
/crowd_sim/envs/utils/robot.py:
--------------------------------------------------------------------------------
 1 | from crowd_sim.envs.utils.agent import Agent
 2 | from crowd_sim.envs.utils.state import JointState
 3 | 
 4 | 
 5 | class Robot(Agent):
 6 |     def __init__(self, config, section):
 7 |         super().__init__(config, section)
 8 | 
 9 |     def act(self, ob):
10 |         if self.policy is None:
11 |             raise AttributeError('Policy attribute has to be set!')
12 | 
13 |         state = JointState(self.get_full_state(), ob)
14 |         action = self.policy.predict(state)
15 |         return action
16 | 


--------------------------------------------------------------------------------
/crowd_sim/envs/utils/state.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class FullState(object):
 5 |     def __init__(self, px, py, vx, vy, radius, gx, gy, v_pref, theta):
 6 |         self.px = px
 7 |         self.py = py
 8 |         self.vx = vx
 9 |         self.vy = vy
10 |         self.radius = radius
11 |         self.gx = gx
12 |         self.gy = gy
13 |         self.v_pref = v_pref
14 |         self.theta = theta
15 | 
16 |         self.position = (self.px, self.py)
17 |         self.goal_position = (self.gx, self.gy)
18 |         self.velocity = (self.vx, self.vy)
19 | 
20 |     def __add__(self, other):
21 |         return other + (self.px, self.py, self.vx, self.vy, self.radius, self.gx, self.gy, self.v_pref, self.theta)
22 | 
23 |     def __str__(self):
24 |         return ' '.join([str(x) for x in [self.px, self.py, self.vx, self.vy, self.radius, self.gx, self.gy,
25 |                                           self.v_pref, self.theta]])
26 | 
27 |     def to_tuple(self):
28 |         return self.px, self.py, self.vx, self.vy, self.radius, self.gx, self.gy, self.v_pref, self.theta
29 | 
30 |     def get_observable_state(self):
31 |         return ObservableState(self.px, self.py, self.vx, self.vy, self.radius)
32 | 
33 | 
34 | class ObservableState(object):
35 |     def __init__(self, px, py, vx, vy, radius):
36 |         self.px = px
37 |         self.py = py
38 |         self.vx = vx
39 |         self.vy = vy
40 |         self.radius = radius
41 | 
42 |         self.position = (self.px, self.py)
43 |         self.velocity = (self.vx, self.vy)
44 | 
45 |     def __add__(self, other):
46 |         return other + (self.px, self.py, self.vx, self.vy, self.radius)
47 | 
48 |     def __str__(self):
49 |         return ' '.join([str(x) for x in [self.px, self.py, self.vx, self.vy, self.radius]])
50 | 
51 |     def to_tuple(self):
52 |         return self.px, self.py, self.vx, self.vy, self.radius
53 | 
54 | 
55 | class JointState(object):
56 |     def __init__(self, robot_state, human_states):
57 |         assert isinstance(robot_state, FullState)
58 |         for human_state in human_states:
59 |             assert isinstance(human_state, ObservableState)
60 | 
61 |         self.robot_state = robot_state
62 |         self.human_states = human_states
63 | 
64 |     def to_tensor(self, add_batch_size=False, device=None):
65 |         robot_state_tensor = torch.Tensor([self.robot_state.to_tuple()])
66 |         human_states_tensor = torch.Tensor([human_state.to_tuple() for human_state in self.human_states])
67 | 
68 |         if add_batch_size:
69 |             robot_state_tensor = robot_state_tensor.unsqueeze(0)
70 |             human_states_tensor = human_states_tensor.unsqueeze(0)
71 | 
72 |         if device == torch.device('cuda:0'):
73 |             robot_state_tensor = robot_state_tensor.cuda()
74 |             human_states_tensor = human_states_tensor.cuda()
75 |         elif device is not None:
76 |             robot_state_tensor.to(device)
77 |             human_states_tensor.to(device)
78 | 
79 |         return robot_state_tensor, human_states_tensor
80 | 
81 | 
82 | def tensor_to_joint_state(state):
83 |     robot_state, human_states = state
84 | 
85 |     robot_state = robot_state.cpu().squeeze().data.numpy()
86 |     robot_state = FullState(robot_state[0], robot_state[1], robot_state[2], robot_state[3], robot_state[4],
87 |                             robot_state[5], robot_state[6], robot_state[7], robot_state[8])
88 |     human_states = human_states.cpu().squeeze(0).data.numpy()
89 |     human_states = [ObservableState(human_state[0], human_state[1], human_state[2], human_state[3],
90 |                                     human_state[4]) for human_state in human_states]
91 | 
92 |     return JointState(robot_state, human_states)
93 | 


--------------------------------------------------------------------------------
/crowd_sim/envs/utils/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def point_to_segment_dist(x1, y1, x2, y2, x3, y3):
 5 |     """
 6 |     Calculate the closest distance between point(x3, y3) and a line segment with two endpoints (x1, y1), (x2, y2)
 7 | 
 8 |     """
 9 |     px = x2 - x1
10 |     py = y2 - y1
11 | 
12 |     if px == 0 and py == 0:
13 |         return np.linalg.norm((x3-x1, y3-y1))
14 | 
15 |     u = ((x3 - x1) * px + (y3 - y1) * py) / (px * px + py * py)
16 | 
17 |     if u > 1:
18 |         u = 1
19 |     elif u < 0:
20 |         u = 0
21 | 
22 |     # (x, y) is the closest point to (x3, y3) on the line segment
23 |     x = x1 + u * px
24 |     y = y1 + u * py
25 | 
26 |     return np.linalg.norm((x - x3, y-y3))
27 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | 
 4 | setup(
 5 |     name='crowdnav',
 6 |     version='0.0.1',
 7 |     packages=[
 8 |         'crowd_nav',
 9 |         'crowd_nav.configs',
10 |         'crowd_nav.policy',
11 |         'crowd_nav.utils',
12 |         'crowd_sim',
13 |         'crowd_sim.envs',
14 |         'crowd_sim.envs.policy',
15 |         'crowd_sim.envs.utils',
16 |     ],
17 |     install_requires=[
18 |         'gitpython',
19 |         'gym',
20 |         'matplotlib',
21 |         'numpy',
22 |         'scipy',
23 |         'torch',
24 |         'torchvision',
25 |         'seaborn',
26 |         'tqdm',
27 |         'tensorboardX'
28 |     ],
29 |     extras_require={
30 |         'test': [
31 |             'pylint',
32 |             'pytest',
33 |         ],
34 |     },
35 | )
36 | 


--------------------------------------------------------------------------------