├── .gitignore
├── LICENSE
├── README.md
├── control
    ├── __init__.py
    ├── compare.py
    ├── configs
    │   ├── README.md
    │   ├── env-noisy.config
    │   ├── env-policies.config
    │   ├── env-train.config
    │   ├── model
    │   │   ├── policy.config
    │   │   └── train.config
    │   ├── reward
    │   │   ├── policy.config
    │   │   └── train.config
    │   ├── sarl
    │   │   ├── env.config
    │   │   ├── policy.config
    │   │   └── train.config
    │   └── training
    │   │   ├── env.config
    │   │   ├── policy.config
    │   │   └── train.config
    ├── policy
    │   ├── __init__.py
    │   ├── policy_factory.py
    │   ├── sarl.py
    │   ├── uncertain_sarl.py
    │   └── value_network.py
    ├── test.py
    ├── train.py
    └── utils
    │   ├── __init__.py
    │   ├── explorer.py
    │   ├── memory.py
    │   ├── plot.py
    │   └── trainer.py
├── hardware
    ├── __init__.py
    ├── demo.py
    └── utils
    │   ├── __init__.py
    │   ├── turtlebot_control.py
    │   ├── turtlebot_hardware.py
    │   └── utils.py
├── setup.py
├── simulation
    ├── __init__.py
    └── envs
    │   ├── __init__.py
    │   ├── crowd_sim.py
    │   ├── policy
    │       ├── __init__.py
    │       ├── cadrl.py
    │       ├── cadrl
    │       │   └── rl_model.pth
    │       ├── linear.py
    │       ├── orca.py
    │       ├── policy.py
    │       └── policy_factory.py
    │   └── utils
    │       ├── __init__.py
    │       ├── action.py
    │       ├── agent.py
    │       ├── functions.py
    │       ├── human.py
    │       ├── info.py
    │       ├── robot.py
    │       ├── scenarios.py
    │       └── state.py
└── uncertainty
    ├── __init__.py
    ├── collect_data.py
    ├── configs
        └── train.config
    ├── estimate_epsilons.py
    ├── network.py
    ├── preprocess_data.py
    ├── train.py
    └── train_models.sh


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Mac
 2 | .DS_Store
 3 | */.DS_Store
 4 | 
 5 | # editors
 6 | .idea/
 7 | .vscode/
 8 | 
 9 | # Python
10 | *.egg-info/
11 | .pytest_cache/
12 | __pycache__
13 | .cache/
14 | venv*/
15 | 
16 | # data
17 | *.mp4
18 | *.png
19 | *.log
20 | *.csv
21 | *.pkl
22 | data/
23 | models/
24 | output/
25 | videos/
26 | 
27 | # jupyter checkpoints
28 | .ipynb_checkpoints
29 | *.ipynb
30 | 
31 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 dbl-blnd
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Stranger Danger
  2 | 
  3 | This is the codebase for the paper titled ["Stranger Danger! Identifying and Avoiding Unpredictable Pedestrians in RL-based Social Robot Navigation,"](https://ieeexplore.ieee.org/document/10610413) which was presented at the 2024 IEEE International Conference on Robotics and Automation (ICRA). This README describes how to reproduce the results achieved in this paper. An extended version of our paper is available on [arXiv](https://arxiv.org/abs/2407.06056), and a video showcasing our methods and results is available on [YouTube](https://youtu.be/9IDhXvCC58w?si=Y0Di3d5NjWj-3nvl). If you find this work useful, please cite our paper using the citation provided at end of this [README](https://github.com/sarapohland/stranger-danger#citing-our-work).
  4 | 
  5 | ## 0) Setup
  6 | 
  7 | 1. Create an environment with Python 3.6 on Ubuntu Linux.
  8 | 2. Install the [Python-RVO2](https://github.com/sybrenstuvel/Python-RVO2) library.
  9 | 
 10 | 	Note: Make sure that CMake and the tested version of Cython are installed. If the build fails, you may need to delete the build folder and try building again.
 11 | 
 12 | 3. Within the main `stranger-danger` directory, run the following command:
 13 | ```
 14 | pip install -e .
 15 | ```
 16 | 
 17 | ## 1) Train Uncertainty Estimation Networks
 18 | 
 19 | The following section details how to train the set of uncertainty estimation networks utilized by two variations of the uncertainty-aware RL policy.
 20 | 
 21 | ### 1.1) Collect pedestrian walking data
 22 | 
 23 | From the `uncertainty` folder, run the following command to collect data of randomized ORCA pedestrians navigating through 100 episodes of six different scenarios:
 24 | ```
 25 | python collect_data.py --vary_param epsilon --num_episodes 100 --output_dir data/
 26 | ```
 27 | 
 28 | This will create CSV files of relevant data in a folder called `data` within the `uncertainty` folder.
 29 | 
 30 | ### 1.2) Preprocess pedestrian walking data
 31 | 
 32 | From the `uncertainty` folder, run the following command to preprocess the collected pedestrian walking data to prepare it to be used as input to the uncertainty network:
 33 | ```
 34 | python preprocess_data.py --data_dir data/
 35 | ```
 36 | 
 37 | This will generate several folders of CSV files within a folder called `preprocessed_data` in the `data` folder you created in the previous step.
 38 | 
 39 | ### 1.3) Train networks with preprocessed data
 40 | 
 41 | From the `uncertainty` folder, run the following script to train and save 20 uncertainty prediction networks with 20 different numbers of time steps:
 42 | ```
 43 | chmod +x train_models.sh 
 44 | ./train_models.sh
 45 | ```
 46 | 
 47 | This will train 20 models using the preprocessed data from the previous step and store each model within its own folder in a larger folder called `models`. The model folders are named such that `uncertain_T` is the model trained using T time steps. Within this folder, there is also a plot of the loss curve and a plot of the model prediction accuracy. *Note: It may take several hours or days to train all 20 models.*
 48 | 
 49 | ## 2) Train Socially-Aware RL Policies
 50 | 
 51 | The following section details how to train the baseline SARL policy and the three variations of this RL policy.
 52 | 
 53 | ### 2.1) Train baseline SARL policy (*SARL*)
 54 | 
 55 | From the `control` folder, run the following command to train the baseline SARL policy without uncertainty-awareness using standard ORCA pedestrians:
 56 | ```
 57 | python train.py --policy sarl --output_dir models/sarl/ --env_config configs/env-train.config --policy_config configs/sarl/policy.config --train_config configs/sarl/train.config
 58 | ```
 59 | 
 60 | This will save the trained RL policy to the folder `models/sarl/` in the `control` directory. The model will be trained using the parameters specified in the configuration files (`env_config`, `policy_config`, and `train_config`). More information on the parameters in these configuration files can be found in the configs [README](https://github.com/sarapohland/stranger-danger/blob/main/control/configs/README.md). You can try training with different discomfort distances by changing the `discomfort_dist` value under `reward` in the [training environment config file](https://github.com/sarapohland/stranger-danger/blob/main/control/configs/env-train.config) *Note: It may take several hours or days to train a single model.*
 61 | 
 62 | ### 2.2) Train policy with modified training process (*Training*)
 63 | 
 64 | From the `control` folder, run the following command to train the RL policy without uncertainty-awareness using Noisy ORCA pedestrians:
 65 | ```
 66 | python train.py --policy sarl --output_dir models/training/ --env_config configs/env-train.config --policy_config configs/sarl/policy.config --train_config configs/sarl/train.config
 67 | ```
 68 | 
 69 | This will save the trained RL policy to the folder `models/training/` in the `control` directory. The model will be trained using the parameters specified in the configuration files (`env_config`, `policy_config`, and `train_config`). More information on the parameters in these configuration files can be found in the configs [README](https://github.com/sarapohland/stranger-danger/blob/main/control/configs/README.md). *Note: It may take several hours or days to train a single model.*
 70 | 
 71 | ### 2.3) Train policy with modified model architecture (*Model*)
 72 | 
 73 | From the `control` folder, run the following command to train the uncertainty-aware RL policy with only the modified model architecture using Noisy ORCA pedestrians:
 74 | ```
 75 | python train.py --policy uncertain_sarl --output_dir models/model/ --env_config configs/env-train.config --policy_config configs/sarl/policy.config --train_config configs/sarl/train.config
 76 | ```
 77 | 
 78 | This will save the trained RL policy to the folder `models/model/` in the `control` directory. The model will be trained using the parameters specified in the configuration files (`env_config`, `policy_config`, and `train_config`). More information on the parameters in these configuration files can be found in the configs [README](https://github.com/sarapohland/stranger-danger/blob/main/control/configs/README.md). *Note: It may take several hours or days to train a single model.*
 79 | 
 80 | ### 2.4) Train policy with modified reward function (*Reward*)
 81 | 
 82 | From the `control` folder, run the following command to train the uncertainty-aware RL policy with the undertainty-dependent reward function using Noisy ORCA pedestrians:
 83 | ```
 84 | python train.py --policy uncertain_sarl --output_dir models/reward/ --env_config configs/env-train.config --policy_config configs/sarl/policy.config --train_config configs/sarl/train.config
 85 | ```
 86 | 
 87 | This will save the trained RL policy to the folder `models/reward/` in the `control` directory. The model will be trained using the parameters specified in the configuration files (`env_config`, `policy_config`, and `train_config`). More information on the parameters in these configuration files can be found in the configs [README](https://github.com/sarapohland/stranger-danger/blob/main/control/configs/README.md). *Note: It may take several hours or days to train a single model.*
 88 | 
 89 | ## 3) Ablation Study on Noisy ORCA Pedestrians
 90 | 
 91 | The following section describes how to reproduce the results provided in the ablation study on noisy ORCA pedestrians.Evaluation parameters can be set in the configuration file `configs/env-noisy.config`. More information on the parameters in this configuration files can be found in the configs [README](https://github.com/sarapohland/stranger-danger/blob/main/control/configs/README.md).
 92 | 
 93 | ### 3.1) Evaluate baseline SARL policy (*SARL*)
 94 | 
 95 | The following command will run 500 random trials evaluating the performance of the baseline SARL policy on Noisy ORCA pedestrians with a maximum unpredictability score of 0.5:
 96 | ```
 97 | python test.py --policy sarl --model_dir models/sarl/ --estimate_eps --max_epsilon 0.5 --stats_file models/sarl/stats-noisy.csv --env_config configs/env-noisy.config
 98 | ```
 99 | 
100 | This script will save a CSV file called `stats-noisy.csv` within the `models/sarl/` folder.
101 | 
102 | ### 3.2) Evaluate policy with modified training process (*Training*)
103 | 
104 | The following command will run 500 random trials evaluating the performance of the *Training* policy on Noisy ORCA pedestrians with a maximum unpredictability score of 0.5:
105 | ```
106 | python test.py --policy sarl --model_dir models/training/ --estimate_eps --max_epsilon 0.5 --stats_file models/training/stats-noisy.csv --env_config configs/env-noisy.config
107 | ```
108 | 
109 | This script will save a CSV file called `stats-noisy.csv` within the `models/training/` folder.
110 | 
111 | ### 3.3) Evaluate policy with modified model architecture (*Model*)
112 | 
113 | The following command will run 500 random trials evaluating the performance of the *Model* policy on Noisy ORCA pedestrians with a maximum unpredictability score of 0.5:
114 | ```
115 | python test.py --policy uncertain_sarl --model_dir models/model/ --estimate_eps --max_epsilon 0.5 --stats_file models/model/stats-noisy.csv --env_config configs/env-noisy.config
116 | ```
117 | 
118 | This script will save a CSV file called `stats-noisy.csv` within the `models/model/` folder. 
119 | 
120 | ### 3.4) Evaluate policy with modified reward function (*Reward*)
121 | 
122 | The following command will run 500 random trials evaluating the performance of the *Reward* policy on Noisy ORCA pedestrians with a maximum unpredictability score of 0.5:
123 | ```
124 | python test.py --policy uncertain_sarl --model_dir models/reward/ --estimate_eps --max_epsilon 0.5 --stats_file models/reward/stats-noisy.csv --env_config configs/env-noisy.config
125 | ```
126 | 
127 | This script will save a CSV file called `stats-noisy.csv` within the `models/reward/` folder. 
128 | 
129 | ### 3.5) Evaluate baseline ORCA policy (*ORCA*)
130 | 
131 | The following command will run 500 random trials evaluating the performance of the baseline ORCA policy on Noisy ORCA pedestrians with a maximum unpredictability score of 0.5:
132 | ```
133 | python test.py --policy orca --estimate_eps --max_epsilon 0.5 --stats_file models/orca/stats-noisy.csv --env_config configs/env-noisy.config
134 | ```
135 | 
136 | This script will save a CSV file called `stats-noisy.csv` within the `models/orca/` folder. 
137 | 
138 | ### 3.6) Create table comparing policy performance
139 | 
140 | To compare the performance of these four RL policies (plus the ORCA policy), you can use the compare script in the `control` directory:
141 | ```
142 | python compare.py --files models/orca/stats-noisy.csv models/sarl/stats-noisy.csv models/training/stats-noisy.csv models/model/stats-noisy.csv models/reward/stats-noisy.csv --names ORCA SARL Training Model Reward
143 | ```
144 | 
145 | This command will print a table of results for the ablation study with LaTeX formating.
146 | 
147 | ### 4) Abalation Study on Diverse, Realistic Pedestrians
148 | 
149 | The following section describes how to reproduce the results provided in the ablation study on diverse, realistic pedestrians. Evaluation parameters can be set in the configuration file `configs/env-policies.config`. More information on the parameters in this configuration files can be found in the configs [README](https://github.com/sarapohland/stranger-danger/blob/main/control/configs/README.md).
150 | 
151 | ### 4.1) Evaluate baseline SARL policy (*SARL*)
152 | 
153 | The following command will run 100 random trials evaluating the performance of the baseline SARL policy on pedestrians operating under various policies (standard ORCA, CADRL, and Linear with randomized parameters):
154 | ```
155 | python test.py --policy sarl --model_dir models/sarl/ --estimate_eps --stats_file models/sarl/stats-policies.csv --env_config configs/env-policies.config
156 | ```
157 | 
158 | This script will save a CSV file called `stats-policies.csv` within the `models/sarl/` folder. 
159 | 
160 | ### 4.2) Evaluate policy with modified training process (*Training*)
161 | 
162 | The following command will run 100 random trials evaluating the performance of the *Training* policy on pedestrians operating under various policies (standard ORCA, CADRL, and Linear with randomized parameters):
163 | ```
164 | python test.py --policy sarl --model_dir models/training/ --estimate_eps --stats_file models/training/stats-policies.csv --env_config configs/env-policies.config
165 | ```
166 | 
167 | This script will save a CSV file called `stats-policies.csv` within the `models/training/` folder. 
168 | 
169 | ### 4.3) Evaluate policy with modified model architecture (*Model*)
170 | 
171 | The following command will run 100 random trials evaluating the performance of the *Model* policy on pedestrians operating under various policies (standard ORCA, CADRL, and Linear with randomized parameters):
172 | ```
173 | python test.py --policy uncertain_sarl --model_dir models/model/ --estimate_eps --stats_file models/model/stats-policies.csv --env_config configs/env-policies.config
174 | ```
175 | 
176 | This script will save a CSV file called `stats-policies.csv` within the `models/model/` folder. 
177 | 
178 | ### 4.4) Evaluate policy with modified reward function (*Reward*)
179 | 
180 | The following command will run 100 random trials evaluating the performance of the *Reward* policy on pedestrians operating under various policies (standard ORCA, CADRL, and Linear with randomized parameters):
181 | ```
182 | python test.py --policy uncertain_sarl --model_dir models/reward/ --estimate_eps --stats_file models/reward/stats-policies.csv --env_config configs/env-policies.config
183 | ```
184 | 
185 | This script will save a CSV file called `stats-policies.csv` within the `models/reward/` folder. 
186 | 
187 | ### 4.5) Evaluate baseline ORCA policy (*ORCA*)
188 | 
189 | The following command will run 100 random trials evaluating the performance of the baseline ORCA policy on pedestrians operating under various policies (standard ORCA, CADRL, and Linear with randomized parameters):
190 | ```
191 | python test.py --policy orca --estimate_eps --stats_file models/orca/stats-policies.csv --env_config configs/env-policies.config
192 | ```
193 | 
194 | This script will save a CSV file called `stats-policies.csv` within the `models/orca/` folder. 
195 | 
196 | ### 4.6) Create table comparing policy performance
197 | 
198 | To compare the performance of these four RL policies (plus the ORCA policy), you can use the compare script in the `control` directory:
199 | ```
200 | python compare.py --files models/orca/stats-policies.csv models/sarl/stats-policies.csv models/training/stats-policies.csv models/model/stats-policies.csv models/reward/stats-policies.csv --names ORCA SARL Training Model Reward
201 | ```
202 | 
203 | This command will print a table of results for the ablation study with LaTeX formating.
204 | 
205 | ## 5) Visualize the Trained RL Policies
206 | 
207 | ### 5.1) Visualize trials of baseline SARL policy (*SARL*)
208 | 
209 | The following command will allow you to visualize a single evaluation trial of the baseline SARL policy:
210 | ```
211 | python test.py --policy sarl --model_dir models/sarl/ --estimate_eps --max_epsilon <epsilon_value> --visualize --test_case 1 --video_file videos/sarl/test1.mp4 --env_config <config_file>
212 | ```
213 | 
214 | This will save a video called `test1.mp4` in the `videos/sarl/` folder showing trial number 1 with the environment configurations specified by `config_file` and the maximum epsilon specified by `epsilon_value`.
215 | 
216 | ### 5.2) Visualize trials of policy with modified training process (*Training*)
217 | 
218 | The following command will allow you to visualize a single evaluation trial of the *Training* policy:
219 | ```
220 | python test.py --policy sarl --model_dir models/training/ --estimate_eps --max_epsilon <epsilon_value> --visualize --test_case 1 --video_file videos/training/test1.mp4 --env_config <config_file>
221 | ```
222 | 
223 | This will save a video called `test1.mp4` in the `videos/training/` folder showing trial number 1 with the environment configurations specified by `config_file` and the maximum epsilon specified by `epsilon_value`.
224 | 
225 | ### 5.3) Visualize trials of policy with modified model architecture (*Model*)
226 | 
227 | The following command will allow you to visualize a single evaluation trial of the *Model* policy:
228 | ```
229 | python test.py --policy uncertain_sarl --model_dir models/model/ --estimate_eps --max_epsilon <epsilon_value> --visualize --test_case 1 --video_file videos/model/test1.mp4 --env_config <config_file>
230 | ```
231 | 
232 | This will save a video called `test1.mp4` in the `videos/model/` folder showing trial number 1 with the environment configurations specified by `config_file` and the maximum epsilon specified by `epsilon_value`.
233 | 
234 | ### 5.4) Visualize trials of policy with modified reward function (*Reward*)
235 | 
236 | The following command will allow you to visualize a single evaluation trial of the *Reward* policy:
237 | ```
238 | python test.py --policy uncertain_sarl --model_dir models/reward/ --estimate_eps --max_epsilon <epsilon_value> --visualize --test_case 1 --video_file videos/reward/test1.mp4 --env_config <config_file>
239 | ```
240 | 
241 | This will save a video called `test1.mp4` in the `videos/reward/` folder showing trial number 1 with the environment configurations specified by `config_file` and the maximum epsilon specified by `epsilon_value`.
242 | 
243 | ## Citing Our Work
244 | 
245 | If you find this codebase useful, please cite the paper associated with this repository:
246 | 
247 | S. Pohland, A. Tan, P. Dutta and C. Tomlin, "Stranger Danger! Identifying and Avoiding Unpredictable Pedestrians in RL-based Social Robot Navigation," 2024 IEEE International Conference on Robotics and Automation (ICRA), Yokohama, Japan, 2024, pp. 15217-15224, doi: 10.1109/ICRA57147.2024.10610413.
248 | 
249 | @InProceedings{stranger-danger, \
250 | &emsp; author="Pohland, Sara and Tan, Alvin and Dutta, Prabal and Tomlin, Claire", \
251 | &emsp; title="Stranger Danger! Identifying and Avoiding Unpredictable Pedestrians in RL-based Social Robot Navigation", \
252 | &emsp; booktitle="2024 IEEE International Conference on Robotics and Automation (ICRA)", \
253 | &emsp; year="2024", \
254 | &emsp; month="May", \
255 | &emsp; publisher="IEEE", \
256 | &emsp; pages="15217--15224", \
257 | &emsp; doi="10.1109/ICRA57147.2024.10610413" \
258 | &emsp; }


--------------------------------------------------------------------------------
/control/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sarapohland/stranger-danger/84a6bad96de4f117d6192b5111eba5531e766e26/control/__init__.py


--------------------------------------------------------------------------------
/control/compare.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import argparse
  3 | import numpy as np
  4 | import pandas as pd
  5 | 
  6 | from ast import literal_eval
  7 | from tabulate import tabulate
  8 | from matplotlib import pyplot as plt
  9 | 
 10 | 
 11 | def get_result_string(a_series):
 12 |     a_result = "{:.2f}".format(np.mean(a_series))
 13 |     for a_quant in [0.9, 0.95]:
 14 |         # Quantile (i.e. Value at risk.)
 15 |         #a_result += "/{:.3f}".format(a_series.quantile(a_quant))
 16 | 
 17 |         # Expected value above the quantile (i.e. Conditional Value at Risk)
 18 |         a_result += "/{:.2f}".format(np.mean(a_series.loc[(a_series >= a_series.quantile(a_quant))]))
 19 |     return a_result
 20 | 
 21 | def main():
 22 |     parser = argparse.ArgumentParser('Parse configuration file')
 23 |     parser.add_argument('--files', action='append', nargs='+')
 24 |     parser.add_argument('--names', action='append', nargs='+')
 25 |     args = parser.parse_args()
 26 | 
 27 |     metric_files = args.files[0]
 28 |     policy_names = args.names[0]
 29 | 
 30 |     # Ignore collisions within the first time step
 31 |     metrics_dfs = []
 32 |     for metric_file in metric_files:
 33 |         metric_df = pd.read_csv(metric_file)
 34 |         for index, row in metric_df.iterrows():
 35 |             collision_times = literal_eval(row['collision_times'])
 36 |             collision_positions = literal_eval(row['collision_positions'])
 37 |             collision_blames = literal_eval(row['rob_collision_blames'])
 38 |             metric_df.at[index, 'collision_times'] = []
 39 |             metric_df.at[index, 'collision_positions'] = []
 40 |             metric_df.at[index, 'rob_collision_blames'] = []
 41 |             for j, collision_time in enumerate(collision_times):
 42 |                 if collision_time > 0.5:
 43 |                     metric_df.at[index, 'collision_times'].append(collision_time)
 44 |                     metric_df.at[index, 'collision_positions'].append(collision_positions[j])
 45 |                     metric_df.at[index, 'rob_collision_blames'].append(collision_blames[j])
 46 |             num_collisions = len(metric_df.at[index, 'collision_times'])
 47 |             metric_df.at[index, 'num_collisions'] = num_collisions
 48 |             if num_collisions == 0 and row['result'] == 'HumanCollision':
 49 |                 metric_df.at[index, 'result'] = 'Timeout' if row['navigation_time'] > 29.0 else 'ReachGoal'
 50 |         metrics_dfs.append(metric_df)
 51 | 
 52 |     # Calculate path lengths
 53 |     for metrics_df in metrics_dfs:
 54 |         metrics_df['path_length'] = metrics_df['navigation_time'] * metrics_df['vel_mean']
 55 | 
 56 |     # Define nominal path length and navigation time
 57 |     V_PREF = 1 # m/s
 58 |     ROOM_HEIGHT = 15 # meters
 59 |     GOAL_RADIUS = 0.3 # meters
 60 |     ROBOT_RADIUS = 0.3 # meters
 61 |     OPT_PATH_LENGTH = 2 * (ROOM_HEIGHT/2 - 1) - GOAL_RADIUS - ROBOT_RADIUS
 62 |     OPT_NAV_TIME = OPT_PATH_LENGTH / V_PREF
 63 | 
 64 |     results = [[name] for name in policy_names]
 65 |     for policy_index, metrics_df in enumerate(metrics_dfs):
 66 |         # success/collision/timeout rates
 67 |         total_trials = len(metrics_df)
 68 |         results[policy_index].append(sum(metrics_df['result'] == 'ReachGoal') / total_trials * 100) # Success Rate
 69 |         results[policy_index].append(sum(metrics_df['result'] == 'Timeout') / total_trials * 100) # Timeout Rate
 70 |         results[policy_index].append(sum(metrics_df['result'] == 'HumanCollision') / total_trials * 100) # Collision Rate
 71 | 
 72 |         # Only consider successful runs so we don't skew our navigation times or
 73 |         # minimum distance values with collisions and timeouts
 74 |         smol_metrics = metrics_df.loc[(metrics_df['result'] == 'ReachGoal') | (metrics_df['result'] == 'HumanCollision')]
 75 | 
 76 |         # normalized navigation time
 77 |         normed_times = smol_metrics['navigation_time'] / OPT_NAV_TIME
 78 | 
 79 |         results[policy_index].append(get_result_string(normed_times))
 80 |         # results[policy_index].append(np.mean(normed_times))
 81 |         # results[policy_index].append("{:.4f} \u00B1 {:.4f}".format(np.mean(normed_times), np.std(normed_times)))
 82 | 
 83 |         # normalized path lengths
 84 |         normed_lengths = smol_metrics['path_length'] / OPT_PATH_LENGTH
 85 |         results[policy_index].append(get_result_string(normed_lengths))
 86 |         # results[policy_index].append(np.mean(normed_lengths))
 87 |         # results[policy_index].append("{:.4f} \u00B1 {:.4f}".format(np.mean(normed_lengths), np.std(normed_lengths)))
 88 | 
 89 |         # total number of collisions between the robot and a pedestrian across all trials
 90 |         sum_collisions = sum(metrics_df['num_collisions'])
 91 |         results[policy_index].append(sum_collisions)
 92 | 
 93 |         # personal space cost
 94 |         costs = metrics_df['avg_max_cost']*1000
 95 |         results[policy_index].append(get_result_string(costs))
 96 |         # results[policy_index].append(np.mean(costs))
 97 |         # results[policy_index].append("{:.4f} \u00B1 {:.4f}".format(np.mean(costs), np.std(costs)))
 98 | 
 99 |         # proportion of time spent in someone's personal space
100 |         intruded_props = metrics_df['pers_time_intruded'] / metrics_df['navigation_time'] * 100
101 |         results[policy_index].append(get_result_string(intruded_props))
102 |         # results[policy_index].append(np.mean(intruded_props))
103 |         # results[policy_index].append("{:.4f} \u00B1 {:.4f}".format(np.mean(intruded_props), np.std(intruded_props)))
104 | 
105 |         # proportion of time spent in someone's intimate space
106 |         intruded_props = metrics_df['int_time_intruded'] / metrics_df['navigation_time'] * 100
107 |         results[policy_index].append(get_result_string(intruded_props))
108 |         # results[policy_index].append(np.mean(intruded_props))
109 |         # results[policy_index].append("{:.4f} \u00B1 {:.4f}".format(np.mean(intruded_props), np.std(intruded_props)))
110 | 
111 |         # # "accountability" of the robot when a collision occurs
112 |         # collision_blames = []
113 |         # for row in metrics_df['rob_collision_blames']:
114 |         #   if row:
115 |         #     collision_blames += [val for val in row]
116 |         # if collision_blames:
117 |         #   avg_accountability = np.mean(collision_blames)
118 |         #   std_accountability = np.std(collision_blames)
119 |         # results[policy_index].append(avg_accountability)
120 |         # # results[policy_index].append("{:.4f} \u00B1 {:.4f}".format(avg_accountability, std_accountability))
121 | 
122 |     # Print the table so it looks nice-ish
123 |     headers = ['Navigation Policy', 'Success Rate', 'Timeout Rate', 'Collision Rate',
124 |             'Relative Navigation Time', 'Relative Path Length', 'Number of Collisions',
125 |             'Personal Space Cost', 'Personal Space Violation', 'Intimate Space Violation']
126 | 
127 |     # print(tabulate(results, headers=headers, tablefmt="github", numalign='center', floatfmt=".4f"))
128 |     print(tabulate(results, headers=headers, tablefmt="latex", numalign='center', floatfmt=".3f"))
129 | 
130 | 
131 | if __name__ == '__main__':
132 |     main()
133 | 


--------------------------------------------------------------------------------
/control/configs/README.md:
--------------------------------------------------------------------------------
  1 | # Configuration Files
  2 | 
  3 | 
  4 | ## Environment Configuration
  5 | 
  6 | ### Environment
  7 | - **Time limit** – The time limit is a float value corresponding to the number of seconds the robot is allowed to take in navigating to its goal before the simulation is ended. This value should be adjusted based on the simulation configurations.
  8 | - **Time step** – The time step is a float value corresponding to the number of seconds between each simulation update. At each time step, the robot and each of the humans chooses a new action according to its policy. It is recommended that this value be set to 0.25 seconds.
  9 | - **Validation size** – The validation size is an integer value corresponding to the number of episodes that are run in the validation stage while training the RL policy.
 10 | - **Test size** – The test size is an integer value corresponding to the number of episode that are used to evaluate the trained policy.
 11 | 
 12 | ### Reward
 13 | - **Success reward** – The success reward is the positive reward the robot receives once it reaches its goal.
 14 | - **Collision penalty** – The collision penalty is the negative reward the robot receives for colliding with a human.
 15 | - **Discomfort distance** – The discomfort distance is the minimum distance between the robot and the human that is considered comfortable.
 16 | - **Discomfort penalty factor** – When the robot is within the discomfort distance, it receives a negative reward based on how close the robot is to the human.
 17 | - **Time penalty** – The time penalty is the negative reward the robot receives at each time step.
 18 | - **Progress reward** – The progress reward is the reward the robot receives for making progress towards its goal at each step.
 19 | - **Goal radius** – The robot is considered to have reached the goal once it is within the goal radius.
 20 | 
 21 | ### Simulation
 22 | - **Room width** – Width of the simulated room in meters.
 23 | - **Room height** – Height of the simulated room in meters.
 24 | - **Default number of humans** – If the number of humans is not being randomized, the default number of humans are placed according to the human generating scheme.
 25 | - **Maximum number of humans** – If the number of humans is being randomized, the number of humans is sampled from a uniform distribution with the minimum and maximum values specified.
 26 | - **Minimum number of humans** –  If the number of humans is being randomized, the number of humans is sampled from a uniform distribution with the minimum and maximum values specified.
 27 | - **Maximum density of humans** – If the number of humans is being randomized based on the amount of open space, the number of humans is sampled from a uniform distribution whose minimum and maximum values are determined by the amount of open space and the minimum and maximum densities specified.
 28 | - **Minimum density of humans** – If the number of humans is being randomized based on the amount of open space, the number of humans is sampled from a uniform distribution whose minimum and maximum values are determined by the amount of open space and the minimum and maximum densities specified.
 29 | - **Randomize number of humans** – If this boolean value is set to true, the number of humans will be sampled from a uniform distribution based on the minimum and maximum number of humans specified.
 30 | - **Randomize density of humans** – If randomize number of humans is set to false and this boolean value is set to true, the number of humans will be sampled from a uniform distribution based on the minimum and maximum density of humans specified. If both boolean values are set to false, the default number of humans will be used.
 31 | - **End on collision** – The simulation will end when the robot collides with a human or obstacle if this boolean value is set to true.
 32 | - **Plan human path** – A global path will be generated for all of the humans to follow from their initial position to their goal position if this boolean value is set to true. Note that this can significantly increase the time required for training/evaluation.
 33 | - **Perpetual** – The humans will be given a new goal position after reaching their goal to allow for perpetual motion if this boolean value is set to true. Note that perpetual motion is not intended to be used in combination with human path planning.
 34 | - **Pedestrian crossing scenario** – Default pedestrian-robot interaction scenario to be used when randomness is set to false. Available scenarios are circle, following, passing, crossing, and random.
 35 | - **Randomness** – If this boolean value is set to true, the pedestrian crossing scenario will be randomly chosen.
 36 | 
 37 | ### Waypoints
 38 | This section is only used for the hardware experiments and can be ignored.
 39 | 
 40 | ### Humans
 41 | - **Visible** – Each human is visible to the robot and all other humans if this boolean value is set to true.
 42 | - **Policy** – The actions of the humans are determined by its policy.
 43 | - **Sensor** – The humans perceive the environment based on their sensor. The only sensor that is currently implemented is called "coordinates" and allows the humans to view the position, velocity, and radius of visible agents.
 44 | - **Observability** – The humans can either have full observability of all visible agents in the environment (if observability is specified as 'full'), or they can only view unobstructed agents within their detection range (if observability is specified as 'partial').
 45 | - **Detection range** – The detection range is how far the humans can view another agent.
 46 | - **Default radius** – If the radius of the humans is not being randomized, all of the humans have the default radius.
 47 | - **Minimum radius** – If the radius of the humans is being randomized, the radius of each human is sampled from a uniform distribution with the minimum and maximum values specified.
 48 | - **Maximum radius** – If the radius of the humans is being randomized, the radius of each human is sampled from a uniform distribution with the minimum and maximum values specified.
 49 | - **Default preferred velocity** –  If the preferred velocity of the humans is not being randomized, all of the humans have the default preferred velocity.
 50 | - **Minimum preferred velocity** – If the preferred velocity of the humans is being randomized, the preferred velocity of each human is sampled from a uniform distribution with the minimum and maximum values specified.
 51 | - **Maximum preferred velocity** – If the preferred velocity of the humans is being randomized, the preferred velocity of each human is sampled from a uniform distribution with the minimum and maximum values specified.
 52 | - **Default neighbor distance** – If the neighbor distance (ORCA parameter) of the humans is not being randomized, all of the humans have the default neighbor distance.
 53 | - **Minimum neighbor distance** – If the neighbor distance of the humans is being randomized, the neighbor distance of each human is sampled from a uniform distribution with the minimum and maximum values specified.
 54 | - **Maximum neighbor distance** – If the neighbor distance of the humans is being randomized, the neighbor distance of each human is sampled from a uniform distribution with the minimum and maximum values specified.
 55 | - **Default time horizon** – If the time horizon (ORCA parameter) of the humans is not being randomized, all of the humans have the default time horizon.
 56 | - **Minimum time horizon** – If the time horizon of the humans is being randomized, the time horizon of each human is sampled from a uniform distribution with the minimum and maximum values specified.
 57 | - **Maximum time horizon** – If the time horizon of the humans is being randomized, the time horizon of each human is sampled from a uniform distribution with the minimum and maximum values specified.
 58 | - **Randomize radius** – If this boolean value is set to true, the radius of each human will be sampled from a uniform distribution. If it is set to false, the default radius will be assigned to each human.
 59 | - **Randomize preferred velocity** – If this boolean value is set to true, the preferred velocity of each human will be sampled from a uniform distribution. If it is set to false, the default preferred velocity will be assigned to each human.
 60 | - **Randomize neighbor distance** – If this boolean value is set to true, the neighbor distance of each human will be sampled from a uniform distribution. If it is set to false, the default neighbor distance will be assigned to each human.
 61 | - **Randomize time horizon** – If this boolean value is set to true, the time horizon of each human will be sampled from a uniform distribution. If it is set to false, the default time horizon will be assigned to each human.
 62 | 
 63 | ### Robot
 64 | - **Visible** – The robot is visible to the humans if this boolean value is set to true.
 65 | - **Policy** – If the robot is using the reinforcement learning policy, this parameter can be set to none. The robot can also be controlled using the ORCA policy.
 66 | - **Sensor** – The robot perceives the environment based on its sensor. The only sensor that is currently implemented is called "coordinates" and allows the robot to view the position, velocity, and radius of visible agents.
 67 | - **Observability** – The robot can either have full observability of all visible agents in the environment (if observability is specified as 'full'), or they can only view unobstructed agents within their detection range (if observability is specified as 'partial').
 68 | - **Detection range** – The detection range is how far the robot can view another agent.
 69 | - **Default radius** – If the radius of the robot is not being randomized, it has the default radius.
 70 | - **Minimum radius** – If the radius of the robot is being randomized, its radius is sampled from a uniform distribution with the minimum and maximum values specified.
 71 | - **Maximum radius** – If the radius of the robot is being randomized, its radius is sampled from a uniform distribution with the minimum and maximum values specified.
 72 | - **Default preferred velocity** –  If the preferred velocity of the robot is not being randomized, it has the default preferred velocity.
 73 | - **Minimum preferred velocity** – If the preferred velocity of the robot is being randomized, its preferred velocity is sampled from a uniform distribution with the minimum and maximum values specified.
 74 | - **Maximum preferred velocity** – If the preferred velocity of the robot is being randomized, its preferred velocity is sampled from a uniform distribution with the minimum and maximum values specified.
 75 | - **Randomize radius** – If this boolean value is set to true, the radius of the robot will be sampled from a uniform distribution. If it is set to false, the default radius will be used.
 76 | - **Randomize preferred velocity** – If this boolean value is set to true, the preferred velocity of the robot will be sampled from a uniform distribution. If it is set to false, the default preferred velocity will be used.
 77 | 
 78 | 
 79 | ## Policy Configuration
 80 | ### Reinforcement Learning (RL)
 81 | - **Gamma** – Gamma is the discount factor used in computing the value function.
 82 | 
 83 | ### Action Space
 84 | - **Kinematics** – The kinematics model used to control simulated model determines the format of the action provided by policy. The model must either be "holonomic" or "non-holonomic." A holonomic robot receives actions as x and y velocites, while a non-holonomic robot receives actions as linear and angular velocities.
 85 | - **Speed samples** – The action space is discretized into n speeds exponentially spaced in the range (0, v_pref] and m rotations evenly spaced in the range [0,2pi), where n is the number of speed samples and m is the number of rotation samples.
 86 | - **Rotation samples** – The action space is discretized into n speeds exponentially spaced in the range (0, v_pref] and m rotations evenly spaced in the range [0,2pi), where n is the number of speed samples and m is the number of rotation samples.
 87 | - **Query environment** – If set to true, the robot predicts the next state of the humans/objects and the rewards by querying the simulation environment. Otherwise, the next state and reward are estimated without using the simulation environment.
 88 | 
 89 | ### Reward Function
 90 | - **Adjust discomfort distance** – If this boolean value is set to true, an uncertainty-dependent discomfort distance is used in the reward function of the RL policy.
 91 | - **Discomfort distance slope** – If eps is the uncertainty associated with a pedestrian, the discomfort distance assigned to this person is d = a * eps + b, where a is the discomfort distance slope and b is the intercept.
 92 | - **Discomfort distance intercept** – If eps is the uncertainty associated with a pedestrian, the discomfort distance assigned to this person is d = a * eps + b, where a is the discomfort distance slope and b is the intercept.
 93 | 
 94 | ### Value Network
 95 | - **MLP1 dimensions** – Hidden layer sizes of the first set of multilayer perecptrons (MLPs) used by our policy.
 96 | - **MLP2 dimensions** – Hidden layer sizes of the second set of multilayer perecptrons (MLPs) used by our policy.
 97 | - **MLP3 dimensions** – Hidden layer sizes of the third set of multilayer perecptrons (MLPs) used by our policy.
 98 | - **Attention dimensions** – Hidden layer sizes of the set of multilayer perecptrons (MLPs) that compute the attention scores used by our policy.
 99 | - **With global state** – If the global state is used, the mean of the first set of MLPs will be used in computing the attention score.
100 | 
101 | ### Safety Policy
102 | This section is only used for the hardware experiments and can be ignored.
103 | 
104 | 
105 | ## Training Configuration
106 | ### Trainer
107 | - **Batch size** – Number of batches of state transitions used for training the reinforcement learning (RL) policy.
108 | - **Maximum agents** – If max agents is set to -1, there is no maximum allowable number of agents.
109 | 
110 | #### Imitation Learning
111 | - **IL episodes** – Number of imitation learning (IL) episodes ran per epoch before training the RL policy in order to initialize the data buffer.
112 | - **IL policy** – Policy used to populate data buffer with state transitions.
113 | - **IL epochs** – Number of imitation learning (IL) epochs used to collect data to initialize the data buffer.
114 | - **IL learning rate** – Learning rate used when running the imitation learning (IL) policy.
115 | - **Safety space** – If the ORCA policy is used for imitation learning and the robot is not visible to the humans, adding safety space improves ORCA performance.
116 | - **Human randomness** – This float indicates the maximum epsilon value of the randomized ORCA policy used to train the imitation learning policy.  
117 | 
118 | ### Reinforcement Learning
119 | - **RL learning rate** – Learning rate used when training the reinforcement learning (IL) policy. 
120 | - **Training batches** – Number of batches to train at the end of each training episode.
121 | - **Training episodes** – Number of training episodes used in the outer loop.
122 | - **Sample episodes** – Number of training episodes sampled.
123 | - **Target update interval** – The target network used to update the value network is updated every n episodes, where n is the target update interval.
124 | - **Evaluation interval** – Validation is performed every n episodes, where n is the evaluation interval.
125 | - **Capacity** – The capacity is the amount of data that the data buffer can hold.
126 | - **Epsilon start** – The initial epsilon value used in the epsilon-greedy policy.
127 | - **Epsilon end** – The final epsilon value used in the epsilon-greedy policy.
128 | - **Epsilon decay** – The epsilon value used in the epsilon-greedy policy decays from the initial value to the final over n episodes, where n is the epsilon decay.
129 | - **Checkpoint interval** – The model is saved every n episodes, where n is the checkpoint interval. Training can be resumed from this checkpoint.
130 | - **Human randomness start** – This float indicates the initial maximum epsilon value of the randomized ORCA policy used to train the RL policy.  
131 | - **Human randomness end** – This float indicates the final maximum epsilon value of the randomized ORCA policy used to train the RL policy.  
132 | - **Human randomness step** – This float indicates how much the maximum epsilon value is incremented each time the value is increased durnig training.  
133 | 


--------------------------------------------------------------------------------
/control/configs/env-noisy.config:
--------------------------------------------------------------------------------
 1 | [env]
 2 | time_limit = 30
 3 | time_step = 0.25
 4 | val_size = 100
 5 | test_size = 500
 6 | 
 7 | 
 8 | [reward]
 9 | success_reward = 1
10 | collision_penalty = -0.25
11 | discomfort_dist = 0.1
12 | discomfort_penalty_factor = 0.5
13 | time_penalty = 0
14 | progress_reward = 0
15 | goal_radius = 0.3
16 | 
17 | 
18 | [sim]
19 | room_width = 15
20 | room_height = 15
21 | num_humans = 20
22 | max_human_num = 20
23 | min_human_num = 5
24 | max_human_dens = 0.05
25 | min_human_dens = 0.02
26 | random_human_num = true
27 | random_human_dens = false
28 | end_on_collision = false
29 | plan_human_path = false
30 | perpetual = false
31 | scenario = random
32 | randomness = true
33 | 
34 | 
35 | [humans]
36 | visible = true
37 | policy = orca
38 | sensor = coordinates
39 | observability = full
40 | range = 100
41 | radius = 0.3
42 | min_radius = 0.2
43 | max_radius = 0.8
44 | v_pref = 1.0
45 | min_v_pref = 0.5
46 | max_v_pref = 2.0
47 | neigh_dist = 10.0
48 | min_neigh_dist = 2.0
49 | max_neigh_dist = 20.0
50 | horizon = 1.5
51 | min_horizon = 0.1
52 | max_horizon = 5.0
53 | randomize_radius = false
54 | randomize_v_pref = false
55 | randomize_neigh_dist = false
56 | randomize_horizon = false
57 | 
58 | 
59 | [robot]
60 | visible = true
61 | policy = none
62 | sensor = coordinates
63 | observability = full
64 | range = 4
65 | radius = 0.3
66 | max_radius = 0.3
67 | min_radius = 0.3
68 | v_pref = 1
69 | max_v_pref = 1
70 | min_v_pref = 1
71 | randomize_radius = false
72 | randomize_v_pref = false


--------------------------------------------------------------------------------
/control/configs/env-policies.config:
--------------------------------------------------------------------------------
 1 | [env]
 2 | time_limit = 30
 3 | time_step = 0.25
 4 | val_size = 100
 5 | test_size = 100
 6 | 
 7 | 
 8 | [reward]
 9 | success_reward = 1
10 | collision_penalty = -0.25
11 | discomfort_dist = 0.1
12 | discomfort_penalty_factor = 0.5
13 | time_penalty = 0
14 | progress_reward = 0
15 | goal_radius = 0.3
16 | 
17 | 
18 | [sim]
19 | room_width = 15
20 | room_height = 15
21 | num_humans = 20
22 | max_human_num = 20
23 | min_human_num = 5
24 | max_human_dens = 0.05
25 | min_human_dens = 0.02
26 | random_human_num = true
27 | random_human_dens = false
28 | end_on_collision = false
29 | plan_human_path = false
30 | perpetual = false
31 | scenario = random
32 | randomness = true
33 | 
34 | 
35 | [humans]
36 | visible = true
37 | policy = random
38 | sensor = coordinates
39 | observability = full
40 | range = 100
41 | radius = 0.3
42 | min_radius = 0.2
43 | max_radius = 0.8
44 | v_pref = 1.0
45 | min_v_pref = 0.5
46 | max_v_pref = 2.0
47 | neigh_dist = 10.0
48 | min_neigh_dist = 2.0
49 | max_neigh_dist = 20.0
50 | horizon = 1.5
51 | min_horizon = 0.1
52 | max_horizon = 5.0
53 | randomize_radius = true
54 | randomize_v_pref = true
55 | randomize_neigh_dist = true
56 | randomize_horizon = true
57 | 
58 | 
59 | [robot]
60 | visible = true
61 | policy = none
62 | sensor = coordinates
63 | observability = full
64 | range = 4
65 | radius = 0.3
66 | max_radius = 0.3
67 | min_radius = 0.3
68 | v_pref = 1
69 | max_v_pref = 1
70 | min_v_pref = 1
71 | randomize_radius = false
72 | randomize_v_pref = false


--------------------------------------------------------------------------------
/control/configs/env-train.config:
--------------------------------------------------------------------------------
 1 | [env]
 2 | time_limit = 30
 3 | time_step = 0.25
 4 | val_size = 100
 5 | test_size = 500
 6 | 
 7 | 
 8 | [reward]
 9 | success_reward = 1
10 | collision_penalty = -0.25
11 | discomfort_dist = 0.1
12 | discomfort_penalty_factor = 0.5
13 | time_penalty = 0
14 | progress_reward = 0
15 | goal_radius = 0.3
16 | 
17 | 
18 | [sim]
19 | room_width = 15
20 | room_height = 15
21 | num_humans = 20
22 | max_human_num = 20
23 | min_human_num = 5
24 | max_human_dens = 0.05
25 | min_human_dens = 0.02
26 | random_human_num = false
27 | random_human_dens = false
28 | end_on_collision = false
29 | plan_human_path = false
30 | perpetual = false
31 | scenario = random
32 | randomness = true
33 | 
34 | 
35 | [humans]
36 | visible = true
37 | policy = orca
38 | sensor = coordinates
39 | observability = full
40 | range = 100
41 | radius = 0.3
42 | min_radius = 0.2
43 | max_radius = 0.8
44 | v_pref = 1.0
45 | min_v_pref = 0.5
46 | max_v_pref = 2.0
47 | neigh_dist = 10.0
48 | min_neigh_dist = 2.0
49 | max_neigh_dist = 20.0
50 | horizon = 1.5
51 | min_horizon = 0.1
52 | max_horizon = 5.0
53 | randomize_radius = false
54 | randomize_v_pref = false
55 | randomize_neigh_dist = false
56 | randomize_horizon = false
57 | 
58 | 
59 | [robot]
60 | visible = false
61 | policy = none
62 | sensor = coordinates
63 | observability = full
64 | range = 4
65 | radius = 0.3
66 | max_radius = 0.3
67 | min_radius = 0.3
68 | v_pref = 1
69 | max_v_pref = 1
70 | min_v_pref = 1
71 | randomize_radius = false
72 | randomize_v_pref = false


--------------------------------------------------------------------------------
/control/configs/model/policy.config:
--------------------------------------------------------------------------------
 1 | [rl]
 2 | gamma = 0.9
 3 | 
 4 | 
 5 | [action_space]
 6 | kinematics = nonholonomic
 7 | speed_samples = 5
 8 | rotation_samples = 16
 9 | query_env = false
10 | 
11 | 
12 | [reward]
13 | adjust_dist = false
14 | dist_slope = 0.0
15 | dist_intercept = 0.2
16 | 
17 | 
18 | [network]
19 | mlp1_dims = 150, 100
20 | mlp2_dims = 100, 50
21 | mlp3_dims = 150, 100, 100, 1
22 | attention_dims = 100, 100, 1
23 | with_global_state = true
24 | 
25 | 
26 | [safety]
27 | safety = false
28 | slow = false
29 | margin = 0.75
30 | spread = 3


--------------------------------------------------------------------------------
/control/configs/model/train.config:
--------------------------------------------------------------------------------
 1 | [trainer]
 2 | batch_size = 100
 3 | max_agents = -1
 4 | 
 5 | 
 6 | [imitation_learning]
 7 | il_episodes = 3000
 8 | il_policy = orca
 9 | il_epochs = 50
10 | il_learning_rate = 0.01
11 | safety_space = 0.15
12 | human_randomness = 0.0
13 | 
14 | 
15 | [train]
16 | rl_learning_rate = 0.001
17 | train_batches = 100
18 | train_episodes = 12000
19 | sample_episodes = 1
20 | target_update_interval = 50
21 | evaluation_interval = 1000
22 | capacity = 100000
23 | epsilon_start = 0.5
24 | epsilon_end = 0.1
25 | epsilon_decay = 4000
26 | checkpoint_interval = 1000
27 | randomness_start = 0.0
28 | randomness_end = 0.5
29 | randomness_step = 0.1
30 | 


--------------------------------------------------------------------------------
/control/configs/reward/policy.config:
--------------------------------------------------------------------------------
 1 | [rl]
 2 | gamma = 0.9
 3 | 
 4 | 
 5 | [action_space]
 6 | kinematics = nonholonomic
 7 | speed_samples = 5
 8 | rotation_samples = 16
 9 | query_env = false
10 | 
11 | 
12 | [reward]
13 | adjust_dist = true
14 | dist_slope = 1.0
15 | dist_intercept = 0.2
16 | 
17 | 
18 | [network]
19 | mlp1_dims = 150, 100
20 | mlp2_dims = 100, 50
21 | mlp3_dims = 150, 100, 100, 1
22 | attention_dims = 100, 100, 1
23 | with_global_state = true
24 | 
25 | 
26 | [safety]
27 | safety = false
28 | slow = false
29 | margin = 0.75
30 | spread = 3


--------------------------------------------------------------------------------
/control/configs/reward/train.config:
--------------------------------------------------------------------------------
 1 | [trainer]
 2 | batch_size = 100
 3 | max_agents = -1
 4 | 
 5 | 
 6 | [imitation_learning]
 7 | il_episodes = 3000
 8 | il_policy = orca
 9 | il_epochs = 50
10 | il_learning_rate = 0.01
11 | safety_space = 0.15
12 | human_randomness = 0.0
13 | 
14 | 
15 | [train]
16 | rl_learning_rate = 0.001
17 | train_batches = 100
18 | train_episodes = 12000
19 | sample_episodes = 1
20 | target_update_interval = 50
21 | evaluation_interval = 1000
22 | capacity = 100000
23 | epsilon_start = 0.5
24 | epsilon_end = 0.1
25 | epsilon_decay = 4000
26 | checkpoint_interval = 1000
27 | randomness_start = 0.0
28 | randomness_end = 0.5
29 | randomness_step = 0.1
30 | 


--------------------------------------------------------------------------------
/control/configs/sarl/env.config:
--------------------------------------------------------------------------------
 1 | [env]
 2 | time_limit = 30
 3 | time_step = 0.25
 4 | val_size = 100
 5 | test_size = 500
 6 | 
 7 | 
 8 | [reward]
 9 | success_reward = 1
10 | collision_penalty = -0.25
11 | discomfort_dist = 0.1
12 | discomfort_penalty_factor = 0.5
13 | time_penalty = 0
14 | progress_reward = 0
15 | goal_radius = 0.3
16 | 
17 | 
18 | [sim]
19 | room_width = 15
20 | room_height = 15
21 | num_humans = 20
22 | max_human_num = 50
23 | min_human_num = 5
24 | max_human_dens = 0.05
25 | min_human_dens = 0.02
26 | random_human_num = false
27 | random_human_dens = false
28 | end_on_collision = false
29 | plan_human_path = false
30 | perpetual = false
31 | scenario = random
32 | randomness = true
33 | 
34 | 
35 | [humans]
36 | visible = true
37 | policy = orca
38 | sensor = coordinates
39 | observability = full
40 | range = 100
41 | radius = 0.3
42 | min_radius = 0.2
43 | max_radius = 0.8
44 | v_pref = 1.0
45 | min_v_pref = 0.5
46 | max_v_pref = 2.0
47 | neigh_dist = 10.0
48 | min_neigh_dist = 2.0
49 | max_neigh_dist = 20.0
50 | horizon = 1.5
51 | min_horizon = 0.1
52 | max_horizon = 5.0
53 | randomize_radius = false
54 | randomize_v_pref = false
55 | randomize_neigh_dist = false
56 | randomize_horizon = false
57 | 
58 | 
59 | [robot]
60 | visible = false
61 | policy = none
62 | sensor = coordinates
63 | observability = full
64 | range = 4
65 | radius = 0.3
66 | max_radius = 0.3
67 | min_radius = 0.3
68 | v_pref = 1
69 | max_v_pref = 1
70 | min_v_pref = 1
71 | randomize_radius = false
72 | randomize_v_pref = false


--------------------------------------------------------------------------------
/control/configs/sarl/policy.config:
--------------------------------------------------------------------------------
 1 | [rl]
 2 | gamma = 0.9
 3 | 
 4 | 
 5 | [action_space]
 6 | kinematics = nonholonomic
 7 | speed_samples = 5
 8 | rotation_samples = 16
 9 | query_env = false
10 | 
11 | 
12 | [reward]
13 | adjust_dist = false
14 | dist_slope = 0.0
15 | dist_intercept = 0.2
16 | 
17 | 
18 | [network]
19 | mlp1_dims = 150, 100
20 | mlp2_dims = 100, 50
21 | mlp3_dims = 150, 100, 100, 1
22 | attention_dims = 100, 100, 1
23 | with_global_state = true
24 | 
25 | 
26 | [safety]
27 | safety = false
28 | slow = false
29 | margin = 0.75
30 | spread = 3


--------------------------------------------------------------------------------
/control/configs/sarl/train.config:
--------------------------------------------------------------------------------
 1 | [trainer]
 2 | batch_size = 100
 3 | max_agents = -1
 4 | 
 5 | 
 6 | [imitation_learning]
 7 | il_episodes = 3000
 8 | il_policy = orca
 9 | il_epochs = 50
10 | il_learning_rate = 0.01
11 | safety_space = 0.15
12 | human_randomness = 0.0
13 | 
14 | 
15 | [train]
16 | rl_learning_rate = 0.001
17 | train_batches = 100
18 | train_episodes = 12000
19 | sample_episodes = 1
20 | target_update_interval = 50
21 | evaluation_interval = 1000
22 | capacity = 100000
23 | epsilon_start = 0.5
24 | epsilon_end = 0.1
25 | epsilon_decay = 4000
26 | checkpoint_interval = 1000
27 | randomness_start = 0.0
28 | randomness_end = 0.0
29 | randomness_step = 0.1
30 | 


--------------------------------------------------------------------------------
/control/configs/training/env.config:
--------------------------------------------------------------------------------
 1 | [env]
 2 | time_limit = 30
 3 | time_step = 0.25
 4 | val_size = 100
 5 | test_size = 500
 6 | 
 7 | 
 8 | [reward]
 9 | success_reward = 1
10 | collision_penalty = -0.25
11 | discomfort_dist = 0.1
12 | discomfort_penalty_factor = 0.5
13 | time_penalty = 0
14 | progress_reward = 0
15 | goal_radius = 0.3
16 | 
17 | 
18 | [sim]
19 | room_width = 15
20 | room_height = 15
21 | num_humans = 20
22 | max_human_num = 50
23 | min_human_num = 5
24 | max_human_dens = 0.05
25 | min_human_dens = 0.02
26 | random_human_num = false
27 | random_human_dens = false
28 | end_on_collision = false
29 | plan_human_path = false
30 | perpetual = false
31 | scenario = random
32 | randomness = true
33 | 
34 | 
35 | [humans]
36 | visible = true
37 | policy = orca
38 | sensor = coordinates
39 | observability = full
40 | range = 100
41 | radius = 0.3
42 | min_radius = 0.2
43 | max_radius = 0.8
44 | v_pref = 1.0
45 | min_v_pref = 0.5
46 | max_v_pref = 2.0
47 | neigh_dist = 10.0
48 | min_neigh_dist = 2.0
49 | max_neigh_dist = 20.0
50 | horizon = 1.5
51 | min_horizon = 0.1
52 | max_horizon = 5.0
53 | randomize_radius = false
54 | randomize_v_pref = false
55 | randomize_neigh_dist = false
56 | randomize_horizon = false
57 | 
58 | 
59 | [robot]
60 | visible = false
61 | policy = none
62 | sensor = coordinates
63 | observability = full
64 | range = 4
65 | radius = 0.3
66 | max_radius = 0.3
67 | min_radius = 0.3
68 | v_pref = 1
69 | max_v_pref = 1
70 | min_v_pref = 1
71 | randomize_radius = false
72 | randomize_v_pref = false


--------------------------------------------------------------------------------
/control/configs/training/policy.config:
--------------------------------------------------------------------------------
 1 | [rl]
 2 | gamma = 0.9
 3 | 
 4 | 
 5 | [action_space]
 6 | kinematics = nonholonomic
 7 | speed_samples = 5
 8 | rotation_samples = 16
 9 | query_env = false
10 | 
11 | 
12 | [reward]
13 | adjust_dist = false
14 | dist_slope = 0.0
15 | dist_intercept = 0.2
16 | 
17 | 
18 | [network]
19 | mlp1_dims = 150, 100
20 | mlp2_dims = 100, 50
21 | mlp3_dims = 150, 100, 100, 1
22 | attention_dims = 100, 100, 1
23 | with_global_state = true
24 | 
25 | 
26 | [safety]
27 | safety = false
28 | slow = false
29 | margin = 0.75
30 | spread = 3


--------------------------------------------------------------------------------
/control/configs/training/train.config:
--------------------------------------------------------------------------------
 1 | [trainer]
 2 | batch_size = 100
 3 | max_agents = -1
 4 | 
 5 | 
 6 | [imitation_learning]
 7 | il_episodes = 3000
 8 | il_policy = orca
 9 | il_epochs = 50
10 | il_learning_rate = 0.01
11 | safety_space = 0.15
12 | human_randomness = 0.0
13 | 
14 | 
15 | [train]
16 | rl_learning_rate = 0.001
17 | train_batches = 100
18 | train_episodes = 12000
19 | sample_episodes = 1
20 | target_update_interval = 50
21 | evaluation_interval = 1000
22 | capacity = 100000
23 | epsilon_start = 0.5
24 | epsilon_end = 0.1
25 | epsilon_decay = 4000
26 | checkpoint_interval = 1000
27 | randomness_start = 0.0
28 | randomness_end = 0.5
29 | randomness_step = 0.1
30 | 


--------------------------------------------------------------------------------
/control/policy/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sarapohland/stranger-danger/84a6bad96de4f117d6192b5111eba5531e766e26/control/policy/__init__.py


--------------------------------------------------------------------------------
/control/policy/policy_factory.py:
--------------------------------------------------------------------------------
1 | from control.policy.sarl import SARL
2 | from control.policy.uncertain_sarl import UNCERTAIN_SARL
3 | from simulation.envs.policy.policy_factory import policy_factory
4 | 
5 | policy_factory['sarl'] = SARL
6 | policy_factory['uncertain_sarl'] = UNCERTAIN_SARL
7 | 


--------------------------------------------------------------------------------
/control/policy/sarl.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import logging
  3 | import itertools
  4 | import numpy as np
  5 | import torch.nn as nn
  6 | import numpy.linalg as la
  7 | 
  8 | from torch.nn.functional import softmax
  9 | from shapely.geometry import Polygon
 10 | 
 11 | from simulation.envs.policy.policy import Policy
 12 | from simulation.envs.utils.action import ActionRot, ActionXY
 13 | from simulation.envs.utils.state import ObservableState, FullState
 14 | from control.policy.value_network import ValueNetwork
 15 | 
 16 | 
 17 | class SARL(Policy):
 18 |     def __init__(self):
 19 |         super().__init__()
 20 |         self.name = 'SARL'
 21 |         self.trainable = True
 22 |         self.kinematics = None
 23 |         self.epsilon = None
 24 |         self.gamma = None
 25 |         self.speed_samples = None
 26 |         self.rotation_samples = None
 27 |         self.query_env = None
 28 |         self.action_space = None
 29 |         self.speeds = None
 30 |         self.rotations = None
 31 |         self.action_values = None
 32 |         self.self_state_dim = 6
 33 |         self.human_state_dim = 7
 34 |         self.joint_state_dim = self.self_state_dim + self.human_state_dim
 35 |         self.multiagent_training = True
 36 | 
 37 |         # Safety features
 38 |         self.safety = None
 39 |         self.walls = None
 40 | 
 41 |     def configure(self, config):
 42 |         self.gamma = config.getfloat('rl', 'gamma')
 43 | 
 44 |         self.kinematics = config.get('action_space', 'kinematics')
 45 |         self.speed_samples = config.getint('action_space', 'speed_samples')
 46 |         self.rotation_samples = config.getint('action_space', 'rotation_samples')
 47 |         self.query_env = config.getboolean('action_space', 'query_env')
 48 | 
 49 |         mlp1_dims = [int(x) for x in config.get('network', 'mlp1_dims').split(', ')]
 50 |         mlp2_dims = [int(x) for x in config.get('network', 'mlp2_dims').split(', ')]
 51 |         mlp3_dims = [int(x) for x in config.get('network', 'mlp3_dims').split(', ')]
 52 |         attention_dims = [int(x) for x in config.get('network', 'attention_dims').split(', ')]
 53 |         with_global_state = config.getboolean('network', 'with_global_state')
 54 | 
 55 |         self.model = ValueNetwork(self.joint_state_dim, self.self_state_dim, mlp1_dims,
 56 |                                     mlp2_dims, mlp3_dims, attention_dims, with_global_state)
 57 |         logging.info('Policy: Baseline SARL')
 58 | 
 59 |         # Safety features
 60 |         self.safety = config.getboolean('safety', 'safety')
 61 |         self.slow = config.getboolean('safety', 'slow')
 62 |         self.margin = config.getfloat('safety', 'margin')
 63 |         self.spread = config.getint('safety', 'spread')
 64 | 
 65 |         if self.safety:
 66 |             logging.info('Safety controller is activated.')
 67 |         else:
 68 |             logging.info('Safety controller is unactivated.')
 69 | 
 70 |     def set_device(self, device):
 71 |         self.device = device
 72 |         self.model.to(device)
 73 | 
 74 |     def set_epsilon(self, epsilon):
 75 |         self.epsilon = epsilon
 76 | 
 77 |     def set_walls(self, walls):
 78 |         self.walls = walls
 79 | 
 80 |     def get_attention_weights(self):
 81 |         return self.model.attention_weights
 82 | 
 83 |     def build_action_space(self, v_pref):
 84 |         holonomic = self.kinematics == 'holonomic'
 85 |         speeds = [(np.exp((i + 1) / self.speed_samples) - 1) / (np.e - 1) * v_pref for i in range(self.speed_samples)]
 86 |         if holonomic:
 87 |             rotations = np.linspace(0, 2 * np.pi, self.rotation_samples, endpoint=False)
 88 |         else:
 89 |             rotations = np.linspace(-np.pi / 4, np.pi / 4, self.rotation_samples)
 90 | 
 91 |         action_space = [ActionXY(0, 0) if holonomic else ActionRot(0, -np.pi/4)]
 92 |         for rotation, speed in itertools.product(rotations, speeds):
 93 |             if holonomic:
 94 |                 action_space.append(ActionXY(speed * np.cos(rotation), speed * np.sin(rotation)))
 95 |             else:
 96 |                 action_space.append(ActionRot(speed, rotation))
 97 | 
 98 |         self.speeds = speeds
 99 |         self.rotations = rotations
100 |         self.action_space = action_space
101 |         self.free_directions = np.full(len(action_space), True)
102 | 
103 |     def propagate(self, state, action):
104 |         if isinstance(state, ObservableState):
105 |             # propagate state of humans
106 |             next_px = state.px + action.vx * self.time_step
107 |             next_py = state.py + action.vy * self.time_step
108 |             next_state = ObservableState(next_px, next_py, action.vx, action.vy, state.radius)
109 |         elif isinstance(state, FullState):
110 |             # propagate state of current agent
111 |             if self.kinematics == 'holonomic':
112 |                 next_px = state.px + action.vx * self.time_step
113 |                 next_py = state.py + action.vy * self.time_step
114 |                 next_state = FullState(next_px, next_py, action.vx, action.vy, state.radius,
115 |                                         state.wx, state.wy, state.v_pref, state.theta)
116 |             else:
117 |                 next_theta = state.theta + action.r
118 |                 next_vx = action.v * np.cos(next_theta)
119 |                 next_vy = action.v * np.sin(next_theta)
120 |                 next_px = state.px + next_vx * self.time_step
121 |                 next_py = state.py + next_vy * self.time_step
122 |                 next_state = FullState(next_px, next_py, next_vx, next_vy, state.radius,
123 |                                         state.wx, state.wy, state.v_pref, next_theta)
124 |         else:
125 |             raise ValueError('State to be propagated is not of a known type.')
126 |         return next_state
127 | 
128 |     def compute_reward(self, nav, obs):
129 |         # check for collision
130 |         dmin = float('inf')
131 |         collision = False
132 |         for i, ob in enumerate(obs):
133 |             dist = la.norm((nav.px - ob.px, nav.py - ob.py)) - nav.radius - ob.radius
134 |             if dist < 0:
135 |                 collision = True
136 |             if dist < dmin:
137 |                 dmin = dist
138 | 
139 |         # check if reaching the goal
140 |         reaching_goal = la.norm((nav.px - nav.gx, nav.py - nav.gy)) < nav.radius
141 | 
142 |         # compute reward
143 |         if reaching_goal:
144 |             reward = 1
145 |         elif collision:
146 |             reward = -0.25
147 |         elif dmin < 0.2:
148 |             reward = (dmin - 0.2) * 0.5 * self.time_step
149 |         else:
150 |             reward = 0
151 |         return reward
152 | 
153 |     def transform(self, state, eps=None):
154 |         state_tensor = torch.cat([torch.Tensor([state.self_state + human_state]).to(self.device)
155 |                                   for human_state in state.human_states], dim=0)
156 |         state_tensor = self.rotate(state_tensor)
157 |         return state_tensor
158 | 
159 |     def rotate(self, state):
160 |         # 'px', 'py', 'vx', 'vy', 'radius', 'gx', 'gy', 'v_pref', 'theta', 'px1', 'py1', 'vx1', 'vy1', 'radius1'
161 |         #  0     1      2     3      4        5     6      7         8       9     10      11     12       13
162 |         batch = state.shape[0]
163 |         dx = (state[:, 5] - state[:, 0]).reshape((batch, -1))
164 |         dy = (state[:, 6] - state[:, 1]).reshape((batch, -1))
165 |         rot = torch.atan2(state[:, 6] - state[:, 1], state[:, 5] - state[:, 0])
166 | 
167 |         dg = torch.norm(torch.cat([dx, dy], dim=1), 2, dim=1, keepdim=True)
168 |         v_pref = state[:, 7].reshape((batch, -1))
169 |         vx = (state[:, 2] * torch.cos(rot) + state[:, 3] * torch.sin(rot)).reshape((batch, -1))
170 |         vy = (state[:, 3] * torch.cos(rot) - state[:, 2] * torch.sin(rot)).reshape((batch, -1))
171 | 
172 |         radius = state[:, 4].reshape((batch, -1))
173 |         if self.kinematics == 'unicycle':
174 |             theta = (state[:, 8] - rot).reshape((batch, -1))
175 |         else:
176 |             theta = torch.zeros_like(v_pref)
177 | 
178 |         vx1 = (state[:, 11] * torch.cos(rot) + state[:, 12] * torch.sin(rot)).reshape((batch, -1))
179 |         vy1 = (state[:, 12] * torch.cos(rot) - state[:, 11] * torch.sin(rot)).reshape((batch, -1))
180 |         px1 = (state[:, 9] - state[:, 0]) * torch.cos(rot) + (state[:, 10] - state[:, 1]) * torch.sin(rot)
181 |         px1 = px1.reshape((batch, -1))
182 |         py1 = (state[:, 10] - state[:, 1]) * torch.cos(rot) - (state[:, 9] - state[:, 0]) * torch.sin(rot)
183 |         py1 = py1.reshape((batch, -1))
184 |         radius1 = state[:, 13].reshape((batch, -1))
185 |         radius_sum = radius + radius1
186 |         da = torch.norm(torch.cat([(state[:, 0] - state[:, 9]).reshape((batch, -1)), (state[:, 1] - state[:, 10]).
187 |                                   reshape((batch, -1))], dim=1), 2, dim=1, keepdim=True)
188 | 
189 |         new_state = torch.cat([dg, v_pref, theta, radius, vx, vy, px1, py1, vx1, vy1, radius1, da, radius_sum], dim=1)
190 |         return new_state
191 | 
192 |     def predict(self, state):
193 |         if self.phase is None or self.device is None:
194 |             raise AttributeError('Phase and device attributes have to be set.')
195 |         if self.phase == 'train' and self.epsilon is None:
196 |             raise AttributeError('Epsilon attribute has to be set in training phase.')
197 | 
198 |         # Stop moving once the goal is reached
199 |         if self.reach_destination(state):
200 |             return ActionXY(0, 0) if self.kinematics == 'holonomic' else ActionRot(0, 0)
201 | 
202 |         # Build the action space at the start
203 |         if self.action_space is None:
204 |             self.build_action_space(state.self_state.v_pref)
205 |             if self.safety:
206 |                 self.build_radar(state.self_state.v_pref, state.self_state.radius)
207 | 
208 |         # Update free_directions
209 |         self.find_free_directions(state.self_state, state.human_states)
210 | 
211 |         # Robot has observed human
212 |         if state.human_states:
213 |             # Select action according to epsilon greedy policy
214 |             probability = np.random.random()
215 |             if self.phase == 'train' and probability < self.epsilon:
216 |                 max_action = self.action_space[np.random.choice(len(self.action_space))]
217 |             else:
218 |                 self.action_values = list()
219 |                 max_value = float('-inf')
220 |                 max_action = None
221 |                 naive_value = float('-inf')
222 |                 for free, action in zip(self.free_directions, self.action_space):
223 |                     # Get input to value network for given action
224 |                     next_self_state = self.propagate(state.self_state, action)
225 |                     if self.query_env:
226 |                         next_human_states, reward, done, info = self.env.onestep_lookahead(action)
227 |                     else:
228 |                         next_human_states = [self.propagate(human_state, ActionXY(human_state.vx, human_state.vy))
229 |                                            for human_state in state.human_states]
230 |                         reward = self.compute_reward(next_self_state, next_human_states)
231 |                     batch_next_states = torch.cat([torch.Tensor([next_self_state + next_human_state]).to(self.device)
232 |                                                   for next_human_state in next_human_states], dim=0)
233 |                     rotated_batch_input = self.rotate(batch_next_states).unsqueeze(0)
234 | 
235 |                     # Compute value of action using value network
236 |                     next_state_value = self.model(rotated_batch_input).data.item()
237 |                     value = reward + pow(self.gamma, self.time_step * state.self_state.v_pref) * next_state_value
238 | 
239 |                     if value > naive_value:
240 |                         naive_value = value
241 |                     if free:
242 |                         self.action_values.append(value)
243 |                         if value > max_value:
244 |                             max_value = value
245 |                             max_action = action
246 |                     else:
247 |                         self.action_values.append(np.nan)
248 | 
249 |                 if max_action is None:
250 |                     raise ValueError('Value network is not well trained.')
251 | 
252 |                 # if there was a "better" action without safety, then the safety is active
253 |                 self.safety_active = max_value != naive_value
254 | 
255 |             if self.phase == 'train':
256 |                 self.last_state = self.transform(state)
257 | 
258 |             return max_action
259 | 
260 |         # Robot has not observed human
261 |         else:
262 |             # Select action towards goal position
263 |             px, py = state.self_state.px, state.self_state.py
264 |             wx, wy = state.self_state.wx, state.self_state.wy
265 |             theta  = state.self_state.theta
266 |             speed = min(np.linalg.norm((wy-py, wx-px)), state.self_state.v_pref)
267 |             angle = np.arctan2(wy-py, wx-px)
268 |             if self.kinematics == 'holonomic':
269 |                 action = ActionXY(speed * np.cos(angle), speed * np.sin(angle))
270 |             else:
271 |                 rot = angle - theta
272 |                 rot = (rot + np.pi) % (2 * np.pi) - np.pi
273 |                 action = ActionRot(speed, rot)
274 |             return self.review_action(action, state)
275 | 
276 |     def build_radar(self, v_pref, robot_radius):
277 |         if self.slow:
278 |             # Use positive margin for safety buffer. Use 0 margin to disable safety buffer.
279 |             if self.margin >= 0:
280 |                 margins = [(np.exp((i + 1) / self.speed_samples) - 1) / (np.e - 1) * self.margin + robot_radius for i in range(self.speed_samples)]
281 |             # Use negative margin for a default 2-second following space.
282 |             else:
283 |                 margins = [(np.exp((i + 1) / self.speed_samples) - 1) / (np.e - 1) * 2 * v_pref + robot_radius for i in range(self.speed_samples)]
284 |         # If we aren't slowing down, we just use widest margins.
285 |         else:
286 |             if self.margin >= 0:
287 |                 margins = [self.margin + robot_radius]
288 |             else:
289 |                 margins = [2*v_pref + robot_radius]
290 | 
291 |         # Rotations are standard.
292 |         holonomic = self.kinematics == 'holonomic'
293 |         if self.rotations is None:
294 |             if holonomic:
295 |                 rotations = np.linspace(0, 2*np.pi, self.rotation_samples, endpoint=False)
296 |             else:
297 |                 rotations = np.linspace(-np.pi / 4, np.pi / 4, self.rotation_samples)
298 |         else:
299 |             rotations = self.rotations
300 | 
301 |         radar_vertices = []
302 |         if holonomic:
303 |             theta = np.pi / self.rotation_samples
304 |         else:
305 |             theta = (np.pi / 4) / self.rotation_samples
306 |         tan_theta = np.tan(theta)
307 |         plus_minus_one = [1, -1]
308 |         for rotation, margin in itertools.product(rotations, margins):
309 |             new_vertices = []
310 |             radar_line = [margin * np.cos(rotation), margin * np.sin(rotation)]
311 |             margin_width = robot_radius
312 |             radar_offset = [margin_width * np.sin(rotation), -margin_width * np.cos(rotation)]
313 |             new_vertices += [(radar_line[0] + sign * radar_offset[0], radar_line[1] + sign * radar_offset[1]) for sign in plus_minus_one]
314 |             new_vertices += [(-sign * radar_offset[0], -sign * radar_offset[1]) for sign in plus_minus_one]
315 |             radar_vertices.append(new_vertices)
316 | 
317 |         self.num_margins = len(margins)
318 |         self.margins = margins
319 |         self.rotations = rotations
320 |         self.radar_vertices = radar_vertices
321 | 
322 |     def find_free_directions(self, robo, obs):
323 |         # If the safety controller is not active, or we don't have any walls, then all directions are free.
324 |         if not self.safety or self.walls is None:
325 |             self.free_directions[1:] = True
326 |             return
327 |         holonomic = self.kinematics == 'holonomic'
328 | 
329 |         free_list = []
330 |         sin_t = np.sin(robo.theta)
331 |         cos_t = np.cos(robo.theta)
332 | 
333 |         for indexo, vertices in enumerate(self.radar_vertices):
334 |             if holonomic:
335 |                 shifted_vertices = [(robo.position[0] + vertex[0], robo.position[1] + vertex[1]) for vertex in vertices]
336 |             else:
337 |                 # rotate counterclockwise by robo.theta radians
338 |                 rotated_vertices = [(cos_t*vertex[0] - sin_t*vertex[1], sin_t*vertex[0] + cos_t*vertex[1]) for vertex in vertices]
339 |                 shifted_vertices = [(robo.position[0] + vertex[0], robo.position[1] + vertex[1]) for vertex in rotated_vertices]
340 |             radar_wedge = Polygon(shifted_vertices)
341 |             free = True
342 |             if self.walls.intersects(Polygon(shifted_vertices)):
343 |                 free = False
344 | 
345 |             xboii = [coolio[0] for coolio in shifted_vertices]*2
346 |             yboii = [coolio[1] for coolio in shifted_vertices]*2
347 | 
348 |             if self.slow:
349 |                 free_list.append(free)
350 |             else:
351 |                 free_list += [free for i in range(self.speed_samples)]
352 | 
353 |         free_list = np.array(free_list)
354 |         roller = np.ones((len(free_list), 1 + 2*self.spread))
355 |         roller[:, 0] = free_list
356 |         for i in range(1, self.spread + 1):
357 |             if holonomic:
358 |                 roller[:, i] = np.roll(free_list, i * self.speed_samples)
359 |                 roller[:, i + self.spread] = np.roll(free_list, -i * self.speed_samples)
360 |             else: # non-holonomic spread does not wrap around
361 |                 roller[i*self.speed_samples:, i] = free_list[:-i*self.speed_samples]
362 |                 roller[:-i*self.speed_samples, i] = free_list[i*self.speed_samples:]
363 | 
364 |         # Update our list of free directions
365 |         self.free_directions[1:] = roller.all(axis=1)
366 | 
367 |     # provide an option to find the nearest safe action
368 |     def review_action(self, proposed_action, state):
369 |         # If we don't care about safety, just return the proposed action
370 |         if not self.safety:
371 |             return proposed_action
372 | 
373 |         # Set up the things if necessary.
374 |         if self.reach_destination(state):
375 |             return ActionXY(0, 0) if self.kinematics == 'holonomic' else ActionRot(0, 0)
376 |         if self.safety and self.radar_vertices is None:
377 |             self.build_radar(state.self_state.v_pref, state.self_state.radius)
378 | 
379 |         # Identify areas that have obstacles in the way
380 |         self.find_free_directions(state.self_state, state.human_states)
381 | 
382 |         # Determine x and y components of proposed action
383 |         holonomic = self.kinematics == 'holonomic'
384 |         if holonomic:
385 |             proposed_ax = proposed_action[0]
386 |             proposed_ay = proposed_action[1]
387 |         else:
388 |             # if we want to rotate a bunch, just rotate without moving forward
389 |             proposed_ax = proposed_action[0] * np.cos(proposed_action[1])
390 |             proposed_ay = proposed_action[0] * np.sin(proposed_action[1])
391 | 
392 |         # We find the closest action to the proposed action that is still safe
393 |         closest_action = 0
394 |         min_dist = np.linalg.norm([proposed_ax, proposed_ay])
395 |         naive_min_dist = min_dist
396 |         if self.action_space is None:
397 |             self.build_action_space(state.self_state.v_pref)
398 |         for i, action in enumerate(self.action_space):
399 |             if holonomic:
400 |                 ax = action[0]
401 |                 ay = action[1]
402 |             else:
403 |                 ax = action[0] * np.cos(action[1])
404 |                 ay = action[0] * np.sin(action[1])
405 |             new_dist = np.linalg.norm([proposed_ax - ax, proposed_ay - ay])
406 |             if new_dist < min_dist and self.free_directions[i]:
407 |                 min_dist = new_dist
408 |                 closest_action = i
409 |             if new_dist < naive_min_dist:
410 |                 naive_min_dist = new_dist
411 |         # log when the safety controller is actively preventing an "optimal" path
412 |         self.safety_active = min_dist != naive_min_dist
413 | 
414 |         if self.slow:
415 |             return self.action_space[closest_action]
416 |         else:
417 |             if holonomic:
418 |                 speed = np.linalg.norm(proposed_action)
419 |                 action_direction = self.action_space[closest_action]
420 |                 speed_factor = speed/np.linalg.norm(action_direction)
421 |                 return ActionXY(action_direction.vx * speed_factor, action_direction.vy * speed_factor)
422 |             else:
423 |                 speed = proposed_action[0]
424 |                 action_direction = self.action_space[closest_action][1]
425 |                 return ActionRot(speed, action_direction)
426 | 


--------------------------------------------------------------------------------
/control/policy/value_network.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | def mlp(input_dim, mlp_dims, last_relu=False):
 5 |     layers = []
 6 |     mlp_dims = [input_dim] + mlp_dims
 7 |     for i in range(len(mlp_dims) - 1):
 8 |         layers.append(nn.Linear(mlp_dims[i], mlp_dims[i + 1]))
 9 |         if i != len(mlp_dims) - 2 or last_relu:
10 |             layers.append(nn.ReLU())
11 |     net = nn.Sequential(*layers)
12 |     return net
13 | 
14 | class ValueNetwork(nn.Module):
15 |     def __init__(self, input_dim, self_state_dim, mlp1_dims, mlp2_dims, mlp3_dims, attention_dims, with_global_state):
16 |         super().__init__()
17 |         self.self_state_dim = self_state_dim
18 |         self.global_state_dim = mlp1_dims[-1]
19 |         self.mlp1 = mlp(input_dim, mlp1_dims, last_relu=True)
20 |         self.mlp2 = mlp(mlp1_dims[-1], mlp2_dims)
21 |         self.with_global_state = with_global_state
22 |         if with_global_state:
23 |             self.attention = mlp(mlp1_dims[-1] * 2, attention_dims)
24 |         else:
25 |             self.attention = mlp(mlp1_dims[-1], attention_dims)
26 |         mlp3_input_dim = mlp2_dims[-1] + self.self_state_dim
27 |         self.mlp3 = mlp(mlp3_input_dim, mlp3_dims)
28 |         self.attention_weights = None
29 | 
30 |     def forward(self, state):
31 |         size = state.shape
32 |         self_state = state[:, 0, :self.self_state_dim]
33 | 
34 |         # compute embedding vector and pairwise interaction feature
35 |         mlp1_input = state.view((-1, size[2]))
36 |         mlp1_output = self.mlp1(mlp1_input)
37 |         mlp2_output = self.mlp2(mlp1_output)
38 | 
39 |         # compute attention scores
40 |         if self.with_global_state:
41 |             global_state = torch.mean(mlp1_output.view(size[0], size[1], -1), 1, keepdim=True)
42 |             global_state = global_state.expand((size[0], size[1], self.global_state_dim)).\
43 |                 contiguous().view(-1, self.global_state_dim)
44 |             attention_input = torch.cat([mlp1_output, global_state], dim=1)
45 |         else:
46 |             attention_input = mlp1_output
47 |         scores = self.attention(attention_input).view(size[0], size[1], 1).squeeze(dim=2)
48 | 
49 |         # compute normalized weights (masked softmax)
50 |         scores_exp = torch.exp(scores) * (scores != 0).float()
51 |         weights = (scores_exp / torch.sum(scores_exp, dim=1, keepdim=True)).unsqueeze(2)
52 |         self.attention_weights = weights[0, :, 0].data.cpu().numpy()
53 | 
54 |         # obtain a compact representation of the observed humans
55 |         # output feature is a linear combination of input features
56 |         features = mlp2_output.view(size[0], size[1], -1)
57 |         weighted_feature = torch.sum(torch.mul(weights, features), dim=1)
58 | 
59 |         # concatenate agent's state with global weighted humans' state
60 |         joint_state = torch.cat([self_state, weighted_feature], dim=1)
61 |         value = self.mlp3(joint_state)
62 |         return value
63 | 


--------------------------------------------------------------------------------
/control/test.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import gym
  3 | import torch
  4 | import logging
  5 | import argparse
  6 | import configparser
  7 | import numpy as np
  8 | import numpy.linalg as la
  9 | 
 10 | from control.utils.explorer import Explorer, collision_blame
 11 | from control.policy.policy_factory import policy_factory
 12 | from simulation.envs.utils.robot import Robot
 13 | from simulation.envs.policy.orca import ORCA
 14 | from simulation.envs.utils.info import *
 15 | 
 16 | # This program throws a lot of RuntimeWarnings that can be ignored, so we suppress warnings.
 17 | # If you are trying to troubleshoot, please comment out these lines. Thanks!
 18 | import warnings
 19 | warnings.filterwarnings('ignore', category=UserWarning)
 20 | warnings.filterwarnings('ignore', category=RuntimeWarning)
 21 | 
 22 | 
 23 | def main():
 24 |     parser = argparse.ArgumentParser('Parse configuration file')
 25 |     parser.add_argument('--policy', type=str, default='orca')
 26 |     parser.add_argument('--phase', type=str, default='test')
 27 |     parser.add_argument('--model_dir', type=str, default=None)
 28 |     parser.add_argument('--test_case', type=int, default=None)
 29 |     parser.add_argument('--max_epsilon', type=float, default=0.0)
 30 |     parser.add_argument('--num_episodes', type=float, default=0.0)
 31 |     parser.add_argument('--video_file', type=str, default=None)
 32 |     parser.add_argument('--stats_file', type=str, default='stats.csv')
 33 |     parser.add_argument('--env_config', type=str, default='configs/env.config')
 34 |     parser.add_argument('--policy_config', type=str, default='configs/policy.config')
 35 |     parser.add_argument('--estimate_eps', default=False, action='store_true')
 36 |     parser.add_argument('--il', default=False, action='store_true')
 37 |     parser.add_argument('--gpu', default=False, action='store_true')
 38 |     parser.add_argument('--traj', default=False, action='store_true')
 39 |     parser.add_argument('--visualize', default=False, action='store_true')
 40 |     args = parser.parse_args()
 41 | 
 42 |     # configure logging and device
 43 |     # logging.basicConfig(level=logging.INFO, format='%(asctime)s, %(levelname)s: %(message)s',
 44 |     #                     datefmt="%Y-%m-%d %H:%M:%S", filename='thicc_boi.log', filemode='a')
 45 |     device = torch.device("cuda:0" if torch.cuda.is_available() and args.gpu else "cpu")
 46 |     # logging.info('Using device: %s', device)
 47 | 
 48 |     # load model and read config files
 49 |     env_config_file = args.env_config
 50 |     if args.model_dir is not None:
 51 |         policy_config_file = os.path.join(args.model_dir, os.path.basename(args.policy_config))
 52 |         if args.il:
 53 |             model_weights = os.path.join(args.model_dir, 'il_model.pth')
 54 |         else:
 55 |             if os.path.exists(os.path.join(args.model_dir, 'resumed_rl_model.pth')):
 56 |                 model_weights = os.path.join(args.model_dir, 'resumed_rl_model.pth')
 57 |             else:
 58 |                 model_weights = os.path.join(args.model_dir, 'rl_model.pth')
 59 |     else:
 60 |         policy_config_file = args.policy_config
 61 | 
 62 |     # configure policy
 63 |     policy = policy_factory[args.policy]()
 64 |     policy_config = configparser.RawConfigParser()
 65 |     policy_config.read(policy_config_file)
 66 |     policy.configure(policy_config)
 67 |     if policy.trainable:
 68 |         if args.model_dir is None:
 69 |             parser.error('Trainable policy must be specified with a model weights directory')
 70 |         policy.get_model().load_state_dict(torch.load(model_weights))
 71 | 
 72 |     # configure environment
 73 |     env_config = configparser.RawConfigParser()
 74 |     env_config.read(env_config_file)
 75 |     env = gym.make('CrowdSim-v0')
 76 |     env.configure(env_config)
 77 |     robot = Robot(env_config, 'robot')
 78 |     robot.set_policy(policy)
 79 |     env.set_robot(robot)
 80 | 
 81 |     # initialize policy
 82 |     policy.set_phase(args.phase)
 83 |     policy.set_device(device)
 84 |     if isinstance(robot.policy, ORCA):
 85 |         if robot.visible:
 86 |             robot.policy.safety_space = 0
 87 |         else:
 88 |             robot.policy.safety_space = 0
 89 |         # logging.info('ORCA agent buffer: %f', robot.policy.safety_space)
 90 |     policy.set_env(env)
 91 |     robot.print_info()
 92 | 
 93 |     # visualize evaluation
 94 |     if args.visualize:
 95 |         ob = env.reset(phase=args.phase, test_case=args.test_case, max_epsilon=args.max_epsilon)
 96 |         done = False
 97 |         last_pos = np.array(robot.get_position())
 98 |         while not done:
 99 |             eps = env.get_epsilons(args.estimate_eps)
100 |             action = robot.act(ob, eps)
101 |             ob, _, done, info = env.step(action)
102 |             current_pos = np.array(robot.get_position())
103 |             # logging.debug('Speed: %.2f', la.norm(current_pos - last_pos) / robot.time_step)
104 |             last_pos = current_pos
105 | 
106 |         print('Testing: {} scenario with {} pedestrians'.format(env.scenario, len(env.humans)))
107 |         print('Result:  {}'.format(info))
108 | 
109 |         if args.traj:
110 |             env.render('traj', args.video_file)
111 |         else:
112 |             env.render('video', args.video_file)
113 | 
114 |     # run evaluation without visualization
115 |     else:
116 |         save_dir = os.path.dirname(args.stats_file)
117 |         if not os.path.exists(save_dir):
118 |             os.makedirs(save_dir)
119 |         explorer = Explorer(env, robot, device, gamma=0.9, stats_file=args.stats_file)
120 |         k = int(args.num_episodes if args.num_episodes > 0 else env.case_size[args.phase])
121 |         explorer.run_k_episodes(k, args.phase, print_failure=True, max_epsilon=args.max_epsilon, estimate_eps=args.estimate_eps)
122 | 
123 | 
124 | if __name__ == '__main__':
125 |     main()
126 | 


--------------------------------------------------------------------------------
/control/train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import gym
  4 | import torch
  5 | import shutil
  6 | import logging
  7 | import argparse
  8 | import configparser
  9 | 
 10 | from simulation.envs.utils.robot import Robot
 11 | from control.utils.trainer import Trainer
 12 | from control.utils.memory import ReplayMemory
 13 | from control.utils.explorer import Explorer
 14 | from control.policy.policy_factory import policy_factory
 15 | 
 16 | # This program throws a lot of RuntimeWarnings that can be ignored, so we suppress warnings.
 17 | # If you are trying to troubleshoot, please comment out these lines. Thanks!
 18 | import warnings
 19 | warnings.filterwarnings('ignore', category=RuntimeWarning)
 20 | 
 21 | def main():
 22 |     parser = argparse.ArgumentParser('Parse configuration file')
 23 |     parser.add_argument('--policy', type=str, default='sarl')
 24 |     parser.add_argument('--env_config', type=str, default='configs/env.config')
 25 |     parser.add_argument('--policy_config', type=str, default='configs/policy.config')
 26 |     parser.add_argument('--train_config', type=str, default='configs/train.config')
 27 |     parser.add_argument('--output_dir', type=str, default='model/')
 28 |     parser.add_argument('--gpu', default=False, action='store_true')
 29 |     parser.add_argument('--debug', default=False, action='store_true')
 30 |     parser.add_argument('--resume', default=False, action='store_true')
 31 |     args = parser.parse_args()
 32 | 
 33 |     # configure paths
 34 |     make_new_dir = True
 35 |     if os.path.exists(args.output_dir):
 36 |         key = input('Output directory already exists! Overwrite the folder? (y/n)')
 37 |         if key == 'y' and not args.resume:
 38 |             shutil.rmtree(args.output_dir)
 39 |         else:
 40 |             make_new_dir = False
 41 |             args.env_config = os.path.join(args.output_dir, os.path.basename(args.env_config))
 42 |             args.policy_config = os.path.join(args.output_dir, os.path.basename(args.policy_config))
 43 |             args.train_config = os.path.join(args.output_dir, os.path.basename(args.train_config))
 44 |     if make_new_dir:
 45 |         os.makedirs(args.output_dir)
 46 |         shutil.copy(args.env_config, args.output_dir)
 47 |         shutil.copy(args.policy_config, args.output_dir)
 48 |         shutil.copy(args.train_config, args.output_dir)
 49 |     log_file = os.path.join(args.output_dir, 'output.log')
 50 |     il_weight_file = os.path.join(args.output_dir, 'il_model.pth')
 51 |     rl_weight_file = os.path.join(args.output_dir, 'rl_model.pth')
 52 | 
 53 |     # configure logging and device
 54 |     mode = 'a' if args.resume else 'w'
 55 |     file_handler = logging.FileHandler(log_file, mode=mode)
 56 |     stdout_handler = logging.StreamHandler(sys.stdout)
 57 |     level = logging.INFO if not args.debug else logging.DEBUG
 58 |     logging.basicConfig(level=level, handlers=[stdout_handler, file_handler],
 59 |                         format='%(asctime)s, %(levelname)s: %(message)s', datefmt="%Y-%m-%d %H:%M:%S")
 60 |     device = torch.device("cuda:0" if torch.cuda.is_available() and args.gpu else "cpu")
 61 |     logging.info('Using device: %s', device)
 62 | 
 63 |     # configure policy
 64 |     policy = policy_factory[args.policy]()
 65 |     if not policy.trainable:
 66 |         parser.error('Policy has to be trainable.')
 67 |     if args.policy_config is None:
 68 |         parser.error('Policy config has to be specified for a trainable network.')
 69 |     policy_config = configparser.RawConfigParser()
 70 |     policy_config.read(args.policy_config)
 71 |     policy.configure(policy_config)
 72 |     policy.set_device(device)
 73 | 
 74 |     # configure environment
 75 |     env_config = configparser.RawConfigParser()
 76 |     env_config.read(args.env_config)
 77 |     env = gym.make('CrowdSim-v0')
 78 |     env.configure(env_config)
 79 |     robot = Robot(env_config, 'robot')
 80 |     env.set_robot(robot)
 81 | 
 82 |     # read training parameters
 83 |     if args.train_config is None:
 84 |         parser.error('Train config file has to be specified for a trainable network.')
 85 |     train_config = configparser.RawConfigParser()
 86 |     train_config.read(args.train_config)
 87 |     rl_learning_rate = train_config.getfloat('train', 'rl_learning_rate')
 88 |     train_batches = train_config.getint('train', 'train_batches')
 89 |     train_episodes = train_config.getint('train', 'train_episodes')
 90 |     sample_episodes = train_config.getint('train', 'sample_episodes')
 91 |     target_update_interval = train_config.getint('train', 'target_update_interval')
 92 |     evaluation_interval = train_config.getint('train', 'evaluation_interval')
 93 |     capacity = train_config.getint('train', 'capacity')
 94 |     epsilon_start = train_config.getfloat('train', 'epsilon_start')
 95 |     epsilon_end = train_config.getfloat('train', 'epsilon_end')
 96 |     epsilon_decay = train_config.getfloat('train', 'epsilon_decay')
 97 |     checkpoint_interval = train_config.getint('train', 'checkpoint_interval')
 98 |     randomness_start = train_config.getfloat('train', 'randomness_start')
 99 |     randomness_end = train_config.getfloat('train', 'randomness_end')
100 |     randomness_step = train_config.getfloat('train', 'randomness_step')
101 | 
102 |     # configure trainer and explorer
103 |     model = policy.get_model()
104 |     memory = ReplayMemory(capacity)
105 |     batch_size = train_config.getint('trainer', 'batch_size')
106 |     max_agents = train_config.getint('trainer', 'max_agents')
107 |     trainer = Trainer(model, memory, device, batch_size)
108 |     explorer = Explorer(env, robot, device, memory, policy.gamma, policy, max_agents)
109 | 
110 |     # imitation learning
111 |     if args.resume:
112 |         if not os.path.exists(rl_weight_file):
113 |             logging.error('RL weights file does not exist; cannot resume training.')
114 |         model.load_state_dict(torch.load(rl_weight_file))
115 |         rl_weight_file = os.path.join(args.output_dir, 'resumed_rl_model.pth')
116 |         logging.info('Resuming training with RL trained weights.')
117 |     elif os.path.exists(il_weight_file):
118 |         model.load_state_dict(torch.load(il_weight_file))
119 |         logging.info('Loading imitation learning trained weights.')
120 |     else:
121 |         il_episodes = train_config.getint('imitation_learning', 'il_episodes')
122 |         il_policy = train_config.get('imitation_learning', 'il_policy')
123 |         il_epochs = train_config.getint('imitation_learning', 'il_epochs')
124 |         il_learning_rate = train_config.getfloat('imitation_learning', 'il_learning_rate')
125 |         human_randomness = train_config.getfloat('imitation_learning', 'human_randomness')
126 |         trainer.set_learning_rate(il_learning_rate)
127 |         if robot.visible:
128 |             safety_space = 0
129 |         else:
130 |             safety_space = train_config.getfloat('imitation_learning', 'safety_space')
131 |         il_policy = policy_factory[il_policy]()
132 |         il_policy.multiagent_training = policy.multiagent_training
133 |         il_policy.safety_space = safety_space
134 |         robot.set_policy(il_policy)
135 |         explorer.run_k_episodes(il_episodes, 'train', update_memory=True, imitation_learning=True, max_epsilon=human_randomness)
136 |         trainer.optimize_epoch(il_epochs)
137 |         torch.save(model.state_dict(), il_weight_file)
138 |         logging.info('Finished imitation learning and saved weights.')
139 |         logging.info('Experience set size: %d/%d', len(memory), memory.capacity)
140 |     explorer.update_target_model(model)
141 | 
142 |     # reinforcement learning
143 |     policy.set_env(env)
144 |     robot.set_policy(policy)
145 |     robot.print_info()
146 |     trainer.set_learning_rate(rl_learning_rate)
147 |     # fill the memory pool with some RL experience
148 |     if args.resume:
149 |         robot.policy.set_epsilon(epsilon_end)
150 |         explorer.run_k_episodes(100, 'train', update_memory=True, episode=0)
151 |         logging.info('Experience set size: %d/%d', len(memory), memory.capacity)
152 | 
153 |     episode = 0
154 |     max_randomness = randomness_start
155 |     randomness_episodes = (randomness_end - randomness_start) / randomness_step + 1
156 |     randomness_interval = round(train_episodes / randomness_episodes)
157 |     print("\n\nStarting epsilon-greedy RL training with {} stages starting from epsilon = {} and going to epsilon = {}.".format(train_episodes, epsilon_start, epsilon_end))
158 |     print("Note: This includes curriculum training, with maximum pedestrian randomness starting at {} and going up to {}.\nMaximum pedestrian randomness increases by {} every {} RL training stages.".format(randomness_start, randomness_end, randomness_step, randomness_interval))
159 |     while episode < train_episodes:
160 |         if args.resume:
161 |             epsilon = epsilon_end
162 |         else:
163 |             if episode < epsilon_decay:
164 |                 epsilon = epsilon_start + (epsilon_end - epsilon_start) / epsilon_decay * episode
165 |             else:
166 |                 epsilon = epsilon_end
167 |         robot.policy.set_epsilon(epsilon)
168 | 
169 |         print("\n========================================")
170 |         print("Stage {} of {}: epsilon-greedy = {}".format(episode + 1, train_episodes, epsilon))
171 |         print("========================================\n")
172 | 
173 | 
174 |         # update the maximum randomness of humans
175 |         if episode != 0 and episode % randomness_interval == 0:
176 |             max_randomness += randomness_step
177 | 
178 |         # evaluate the model
179 |         if episode % evaluation_interval == 0:
180 |             explorer.run_k_episodes(env.case_size['val'], 'val', episode=episode, max_epsilon=0.5)
181 | 
182 |         # sample k episodes into memory and optimize over the generated memory
183 |         explorer.run_k_episodes(sample_episodes, 'train', update_memory=True, episode=episode, max_epsilon=max_randomness)
184 |         trainer.optimize_batch(train_batches)
185 |         episode += 1
186 | 
187 |         if episode % target_update_interval == 0:
188 |             explorer.update_target_model(model)
189 | 
190 |         if episode != 0 and episode % checkpoint_interval == 0:
191 |             torch.save(model.state_dict(), rl_weight_file)
192 | 
193 |     # final test
194 |     explorer.run_k_episodes(env.case_size['test'], 'test', episode=episode, max_epsilon=max_randomness)
195 | 
196 | 
197 | if __name__ == '__main__':
198 |     main()
199 | 


--------------------------------------------------------------------------------
/control/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sarapohland/stranger-danger/84a6bad96de4f117d6192b5111eba5531e766e26/control/utils/__init__.py


--------------------------------------------------------------------------------
/control/utils/explorer.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import math
  3 | import time
  4 | import copy
  5 | import torch
  6 | import pickle
  7 | import logging
  8 | import numpy as np
  9 | import pandas as pd
 10 | 
 11 | from simulation.envs.utils.info import *
 12 | 
 13 | 
 14 | def average(input_list):
 15 |     if input_list:
 16 |         return sum(input_list) / len(input_list)
 17 |     else:
 18 |         return 0
 19 | 
 20 | def curvature(p0, p1, p2):
 21 |     area = _get_area(p0, p1, p2)
 22 |     d0 = _get_dist(p0, p1)
 23 |     d1 = _get_dist(p1, p2)
 24 |     d2 = _get_dist(p2, p0)
 25 |     curv = 4*area/(d0*d1*d2)
 26 |     return 0 if math.isnan(curv) else curv
 27 | 
 28 | def _get_area(p0, p1, p2):
 29 |     return (p1[0] - p0[0])*(p2[1] - p0[1]) - (p1[1] - p0[1])*(p2[0] - p0[0])
 30 | 
 31 | def _get_dist(p0, p1):
 32 |     return np.sqrt((p0[0] - p1[0])**2 + (p0[1] - p1[1])**2)
 33 | 
 34 | def collision_blame(h_pos, h_vel, r_pos, r_vel):
 35 |     w = np.array(h_pos) - np.array(r_pos)
 36 |     w = w / np.linalg.norm(w)
 37 |     robot_blame = r_vel[0] * w[0] + r_vel[1] * w[1]
 38 |     human_blame = -(h_vel[0] * w[0] + h_vel[1] * w[1])
 39 |     return robot_blame - human_blame
 40 | 
 41 | def discomfort_dist(num_hum):
 42 |     m = (0.1 - 0.45) / (20 - 6)
 43 |     b = 0.1 - 20 * m
 44 |     y = m * num_hum + b
 45 |     return np.clip(y, 0.1, 0.45)
 46 | 
 47 | 
 48 | class Explorer(object):
 49 |     def __init__(self, env, robot, device, memory=None, gamma=None, target_policy=None, max_agents=None, stats_file=None):
 50 |         self.env = env
 51 |         self.robot = robot
 52 |         self.device = device
 53 |         self.memory = memory
 54 |         self.gamma = gamma
 55 |         self.target_policy = target_policy
 56 |         self.max_agents = max_agents
 57 |         self.target_model = None
 58 |         self.stats_file = stats_file
 59 | 
 60 |     def update_target_model(self, target_model):
 61 |         self.target_model = copy.deepcopy(target_model)
 62 | 
 63 |     def run_k_episodes(self, k, phase, update_memory=False, episode=None,
 64 |                         imitation_learning=False, print_failure=False, max_epsilon=0, estimate_eps=False):
 65 |         self.robot.policy.set_phase(phase)
 66 | 
 67 |         success_times = []
 68 |         timeout_times = []
 69 |         collision_times = []
 70 | 
 71 |         min_dist = []
 72 |         min_dist_danger = []
 73 |         cumulative_rewards = []
 74 | 
 75 |         timeout_cases = []
 76 |         collision_cases = []
 77 |         # collision_positions = []
 78 | 
 79 |         avg_robot_vel = []
 80 |         avg_robot_acc = []
 81 |         avg_robot_jer = []
 82 |         avg_human_vel = []
 83 |         avg_human_acc = []
 84 |         avg_human_jer = []
 85 | 
 86 |         success = 0
 87 |         collision = 0
 88 |         timeout = 0
 89 |         too_close = 0
 90 | 
 91 |         stats = {'test_case':[],'scenario':[],'perpetual':[],'num_humans':[],'open_space':[],'navigation_time':[],
 92 |                       'result':[],'num_collisions':[],'collision_times':[],'collision_positions':[],'collision_blames':[],
 93 |                       'inference_time_mean':[],'inference_time_std':[],'inference_time_min':[],'inference_time_max':[],
 94 |                       'min_dist_mean':[],'min_dist_std':[],'min_dist_min':[],
 95 |                       'vel_mean':[],'vel_std':[],'acc_mean':[],'acc_std':[],'jerk_mean':[],'jerk_std':[],
 96 |                       'curvature_mean': [], 'curvature_std': [],
 97 |                       'time_intruded':[],'intruded_min_dist_mean':[],'intruded_min_dist_std':[],
 98 |                       'intruded_vel_mean':[],'intruded_vel_std':[],'intruded_acc_mean':[],
 99 |                       'intruded_acc_std':[],'intruded_jerk_mean':[],'intruded_jerk_std':[]}
100 | 
101 |         # Some printouts to help explain the following tqdm loading bar
102 |         if imitation_learning:
103 |             print("Generating data with ORCA for imitation learning ({} episodes with a maximum pedestrian randomness of {}).".format(k, max_epsilon))
104 |         elif phase == 'train':
105 |             print("Generating data for training ({} episodes with a maximum pedestrian randomness of {}).".format(k, max_epsilon))
106 |         elif phase == 'val':
107 |             print("Validating on {} episodes with a maximum pedestrian randomness of {}.".format(k, max_epsilon))
108 |         elif phase == 'test':
109 |             print("Testing on {} episodes with a maximum pedestrian randomness of {}.".format(k, max_epsilon))
110 | 
111 |         from tqdm import tqdm
112 |         for trial in tqdm(range(k)):
113 |             ob = self.env.reset(phase, max_epsilon=max_epsilon)
114 | 
115 |             done = False
116 |             result = None
117 |             states = []
118 |             actions = []
119 |             rewards = []
120 |             epsilons = []
121 |             robot_pos = []
122 |             robot_vel = []
123 |             human_vel = []
124 |             perf_time = []
125 |             dmins = []
126 |             intruded_dmins = []
127 |             times_intruded = 0
128 |             absolute_minimum_distance = np.inf
129 |             vel_intruded = []
130 |             episode_collision_times = []
131 |             episode_collision_pos = []
132 |             episode_collision_blames = []
133 | 
134 |             new_eps = None
135 |             eps = None
136 |             exp_alpha = 0.8
137 | 
138 |             while not done:
139 |                 perf_start = time.perf_counter()
140 |                 new_eps = self.env.get_epsilons(estimate_eps)
141 |                 if eps is None:
142 |                     eps = new_eps
143 |                 else:
144 |                     eps = exp_alpha*eps + (1 - exp_alpha)*new_eps
145 |                 action = self.robot.act(ob, eps)
146 |                 perf_end = time.perf_counter()
147 |                 perf_time.append(perf_end - perf_start)
148 | 
149 |                 ob, reward, done, info = self.env.step(action)
150 |                 states.append(self.robot.policy.last_state)
151 |                 actions.append(action)
152 |                 rewards.append(reward)
153 |                 epsilons.append(eps)
154 | 
155 |                 robot_pos.append([self.robot.px, self.robot.py])
156 |                 robot_vel.append([self.robot.vx, self.robot.vy])
157 |                 for human in self.env.humans:
158 |                     human_vel.append([human.vx, human.vy])
159 | 
160 |                 if isinstance(info, Danger):
161 |                     too_close += 1
162 |                     min_dist_danger.append(info.min_dist)
163 | 
164 |                 if isinstance(info, HumanCollision):
165 |                     h_pos = info.human.get_position()
166 |                     h_vel = info.human.get_velocity()
167 |                     r_pos = self.robot.get_position()
168 |                     r_vel = self.robot.get_velocity()
169 | 
170 |                     episode_collision_times.append(self.env.global_time)
171 |                     episode_collision_pos.append([round(coord,4) for coord in info.coll_pos])
172 |                     episode_collision_blames.append(collision_blame(h_pos, h_vel, r_pos, r_vel))
173 | 
174 |                 dmins.append(self.env.dmin)
175 |                 absolute_minimum_distance = min(absolute_minimum_distance, self.env.dmin)
176 | 
177 |                 # record intrusions into personal space
178 |                 disc_dist = discomfort_dist(len(self.env.humans))
179 |                 if self.env.dmin < disc_dist:
180 |                     times_intruded += 1
181 |                     intruded_dmins.append(self.env.dmin)
182 |                     vel_intruded.append([self.robot.vx, self.robot.vy])
183 | 
184 |             num_collisions = len(episode_collision_times)
185 | 
186 |             if num_collisions > 0:
187 |                 collision += 1
188 |                 collision_cases.append(trial)
189 |                 collision_times.append(episode_collision_times)
190 |                 result = 'HumanCollision'
191 | 
192 |             else:
193 |                 if isinstance(info, ReachGoal):
194 |                     success += 1
195 |                     success_times.append(self.env.global_time)
196 |                     result = 'ReachGoal'
197 |                 elif isinstance(info, HumanCollision):
198 |                     collision += 1
199 |                     collision_cases.append(trial)
200 |                     collision_times.append(self.env.global_time)
201 |                     result = 'HumanCollision'
202 |                 elif isinstance(info, Timeout):
203 |                     timeout += 1
204 |                     timeout_cases.append(trial)
205 |                     timeout_times.append(self.env.time_limit)
206 |                     result = 'Timeout'
207 |                 else:
208 |                     raise ValueError('Invalid end signal from environment')
209 | 
210 |             timeStep = self.env.time_step
211 | 
212 |             # Robot navigation metrics
213 |             curves = []
214 |             for i in range(len(robot_pos)-4):
215 |                 p0 = robot_pos[i]
216 |                 p1 = robot_pos[i+2]
217 |                 p2 = robot_pos[i+4]
218 |                 curves.append(np.abs(curvature(p0, p1, p2)))
219 | 
220 |             robot_vel = np.array(robot_vel)
221 |             robotDF = pd.DataFrame(robot_vel, columns=['vx', 'vy'])
222 |             robotDF[['ax', 'ay']] = robotDF[['vx', 'vy']].diff(4)#/timeStep
223 |             robotDF[['jx', 'jy']] = robotDF[['ax', 'ay']].diff(4)#/timeStep
224 |             robotDF['vel'] = np.sqrt(np.square(robotDF[['vx', 'vy']]).sum(axis=1))
225 |             robotDF['acc'] = np.sqrt(np.square(robotDF[['ax', 'ay']]).sum(axis=1))
226 |             robotDF['jer'] = np.sqrt(np.square(robotDF[['jx', 'jy']]).sum(axis=1))
227 |             avg_robot_vel.append(np.mean(robotDF['vel']))
228 |             avg_robot_acc.append(np.mean(robotDF['acc']))
229 |             avg_robot_jer.append(np.mean(robotDF['jer']))
230 | 
231 |             # Human navigation metrics
232 |             if human_vel:
233 |                 human_vel = np.array(human_vel)
234 |                 humanDF = pd.DataFrame(human_vel, columns=['vx', 'vy'])
235 |                 humanDF[['ax', 'ay']] = humanDF[['vx', 'vy']].diff(4)#/timeStep
236 |                 humanDF[['jx', 'jy']] = humanDF[['ax', 'ay']].diff(4)#/timeStep
237 |                 humanDF['vel'] = np.sqrt(np.square(humanDF[['vx', 'vy']]).sum(axis=1))
238 |                 humanDF['acc'] = np.sqrt(np.square(humanDF[['ax', 'ay']]).sum(axis=1))
239 |                 humanDF['jer'] = np.sqrt(np.square(humanDF[['jx', 'jy']]).sum(axis=1))
240 |                 avg_human_vel.append(np.mean(humanDF['vel']))
241 |                 avg_human_acc.append(np.mean(humanDF['acc']))
242 |                 avg_human_jer.append(np.mean(humanDF['jer']))
243 | 
244 |             if len(vel_intruded) > 0:
245 |                 vel_intruded = np.array(vel_intruded)
246 |                 intrudedDF = pd.DataFrame(vel_intruded, columns=['vx', 'vy'])
247 |                 intrudedDF[['ax', 'ay']] = intrudedDF[['vx', 'vy']].diff(4)#/timeStep
248 |                 intrudedDF[['jx', 'jy']] = intrudedDF[['ax', 'ay']].diff(4)#/timeStep
249 |                 intrudedDF['vel'] = np.sqrt(np.square(intrudedDF[['vx', 'vy']]).sum(axis=1))
250 |                 intrudedDF['acc'] = np.sqrt(np.square(intrudedDF[['ax', 'ay']]).sum(axis=1))
251 |                 intrudedDF['jer'] = np.sqrt(np.square(intrudedDF[['jx', 'jy']]).sum(axis=1))
252 |                 intrude_list = []
253 |                 intrude_list.append(np.mean(intrudedDF['vel']))  # intruded_vel_mean
254 |                 intrude_list.append(np.std(intrudedDF['vel']))   # intruded_vel_std
255 |                 intrude_list.append(np.mean(intrudedDF['acc']))  # intruded_acc_mean
256 |                 intrude_list.append(np.std(intrudedDF['acc']))   # intruded_acc_std
257 |                 intrude_list.append(np.mean(intrudedDF['jer']))  # intruded_jerk_mean
258 |                 intrude_list.append(np.std(intrudedDF['jer']))   # intruded_jerk_std
259 |             else:
260 |                 intrude_list = [np.nan for _ in range(6)]
261 | 
262 |             # Record some metrics into the stats file
263 |             stats['test_case'].append(trial)
264 |             stats['scenario'].append(self.env.scenario)
265 |             stats['perpetual'].append(self.env.perpetual)
266 |             stats['num_humans'].append(len(self.env.humans))
267 |             stats['open_space'].append(self.env.open_space)
268 |             stats['navigation_time'].append(self.env.global_time)
269 |             stats['result'].append(result)
270 |             stats['num_collisions'].append(num_collisions)
271 |             stats['collision_times'].append(episode_collision_times)
272 |             stats['collision_positions'].append(episode_collision_pos)
273 |             stats['collision_blames'].append(episode_collision_blames)
274 |             stats['inference_time_mean'].append(np.mean(perf_time))
275 |             stats['inference_time_std'].append(np.std(perf_time))
276 |             stats['inference_time_min'].append(np.min(perf_time))
277 |             stats['inference_time_max'].append(np.max(perf_time))
278 |             stats['min_dist_mean'].append(np.mean(dmins))
279 |             stats['min_dist_std'].append(np.std(dmins))
280 |             stats['min_dist_min'].append(absolute_minimum_distance)
281 |             stats['curvature_mean'].append(np.mean(curves))
282 |             stats['curvature_std'].append(np.std(curves))
283 |             stats['vel_mean'].append(np.mean(robotDF['vel']))
284 |             stats['vel_std'].append(np.std(robotDF['vel']))
285 |             stats['acc_mean'].append(np.mean(robotDF['acc']))
286 |             stats['acc_std'].append(np.std(robotDF['acc']))
287 |             stats['jerk_mean'].append(np.mean(robotDF['jer']))
288 |             stats['jerk_std'].append(np.std(robotDF['jer']))
289 |             stats['time_intruded'].append(timeStep*times_intruded)
290 |             stats['intruded_min_dist_mean'].append(np.mean(intruded_dmins))
291 |             stats['intruded_min_dist_std'].append(np.std(intruded_dmins))
292 |             stats['intruded_vel_mean'].append(intrude_list[0])
293 |             stats['intruded_vel_std'].append(intrude_list[1])
294 |             stats['intruded_acc_mean'].append(intrude_list[2])
295 |             stats['intruded_acc_std'].append(intrude_list[3])
296 |             stats['intruded_jerk_mean'].append(intrude_list[4])
297 |             stats['intruded_jerk_std'].append(intrude_list[5])
298 | 
299 |             if self.env.dmin != float('inf'):
300 |                 min_dist.append(self.env.dmin)
301 | 
302 |             if update_memory:
303 |                 if isinstance(info, ReachGoal) or isinstance(info, HumanCollision):
304 |                     # only add positive(success) or negative(collision) experience in experience set
305 |                     self.update_memory(states, actions, rewards, epsilons, imitation_learning)
306 | 
307 |             cumulative_rewards.append(sum([pow(self.gamma, t * self.robot.time_step * self.robot.v_pref)
308 |                                            * reward for t, reward in enumerate(rewards)]))
309 | 
310 |         stats_df = pd.DataFrame(stats)
311 |         if self.stats_file is not None:
312 |             stats_df.to_csv(self.stats_file, index=False)
313 | 
314 |         success_rate = success / k
315 |         collision_rate = collision / k
316 |         timeout_rate = timeout / k
317 |         assert success + collision + timeout == k
318 |         avg_nav_time = sum(success_times) / len(success_times) if success_times else self.env.time_limit
319 | 
320 |         extra_info = '' if episode is None else 'in episode {} '.format(episode)
321 |         logging.info('{:<5} {} Success rate: {:.2f}, Collision rate: {:.2f}, Timeout rate: {:.2f}, Nav time: {:.2f}'.
322 |                         format(phase.upper(), extra_info, success_rate, collision_rate, timeout_rate, avg_nav_time))
323 | 
324 |         if phase in ['val', 'test']:
325 |             if min_dist:
326 |                 logging.info('Average minimum distance between robot and pedestrian: %.2f', average(min_dist))
327 |             else:
328 |                 logging.info('Average minimum distance between robot and pedestrian: %.2f', -1)
329 | 
330 |             if min_dist_danger:
331 |                 logging.info('Average minimum distance when robot is too close: %.2f', average(min_dist_danger))
332 |             else:
333 |                 logging.info('Average minimum distance when robot is too close: %.2f', -1)
334 | 
335 |             avg_avg_robot_vel = sum(avg_robot_vel) / len(avg_robot_vel)
336 |             avg_avg_robot_acc = sum(avg_robot_acc) / len(avg_robot_acc)
337 |             avg_avg_robot_jer = sum(avg_robot_jer) / len(avg_robot_jer)
338 | 
339 |             logging.info('Avg robot speed: {:.2f}, Avg robot accel: {:.2f}, Avg robot jerk: {:.2f}'.
340 |                          format(avg_avg_robot_vel, avg_avg_robot_acc, avg_avg_robot_jer))
341 | 
342 |             if avg_human_vel:
343 |                 avg_avg_human_vel = sum(avg_human_vel) / len(avg_human_vel)
344 |                 avg_avg_human_acc = sum(avg_human_acc) / len(avg_human_acc)
345 |                 avg_avg_human_jer = sum(avg_human_jer) / len(avg_human_jer)
346 |             else:
347 |                 avg_avg_human_vel = -1
348 |                 avg_avg_human_acc = -1
349 |                 avg_avg_human_jer = -1
350 | 
351 |             logging.info('Avg pedestrian speed: {:.2f}, Avg pedestrian accel: {:.2f}, Avg pedestrian jerk: {:.2f}'.
352 |                          format(avg_avg_human_vel, avg_avg_human_acc, avg_avg_human_jer))
353 | 
354 |         if print_failure:
355 |             logging.info('Collision cases: ' + ' '.join([str(x) for x in collision_cases]))
356 |             logging.info('Timeout cases: ' + ' '.join([str(x) for x in timeout_cases]))
357 |             # logging.info('Collision locations: ' + ' '.join([str([round(x[0], 2), round(x[1], 2)]) for x in collision_positions]))
358 | 
359 |     def update_memory(self, states, actions, rewards, epsilons, imitation_learning=False):
360 |         if self.memory is None or self.gamma is None:
361 |             raise ValueError('Memory or gamma value is not set!')
362 | 
363 |         for i, state in enumerate(states):
364 |             reward = rewards[i]
365 |             epsilon = epsilons[i]
366 | 
367 |             # VALUE UPDATE
368 |             if imitation_learning:
369 |                 # define the value of states in IL as cumulative discounted rewards, which is the same in RL
370 |                 state = self.target_policy.transform(state, epsilon)
371 |                 value = sum([pow(self.gamma, max(t - i, 0) * self.robot.time_step * self.robot.v_pref) * reward
372 |                              * (1 if t >= i else 0) for t, reward in enumerate(rewards)])
373 |             else:
374 |                 if i == len(states) - 1:
375 |                     value = reward
376 |                 else:
377 |                     next_state = states[i + 1]
378 |                     gamma_bar = pow(self.gamma, self.robot.time_step * self.robot.v_pref)
379 |                     value = reward + gamma_bar * self.target_model(next_state.unsqueeze(0)).data.item()
380 |             value = torch.Tensor([value]).to(self.device)
381 | 
382 |             # transform state of different human_num into fixed-size tensor
383 |             if self.max_agents is not None and self.max_agents > 0:
384 |                 if len(state.size()) == 1:
385 |                     human_num = 1
386 |                     feature_size = state.size()[0]
387 |                 else:
388 |                     human_num, feature_size = state.size()
389 |                 if human_num != self.max_agents:
390 |                     padding = torch.zeros((self.max_agents - human_num, feature_size))
391 |                     state = torch.cat([state, padding])
392 |             self.memory.push((state, value))
393 | 


--------------------------------------------------------------------------------
/control/utils/memory.py:
--------------------------------------------------------------------------------
 1 | from torch.utils.data import Dataset
 2 | 
 3 | 
 4 | class ReplayMemory(Dataset):
 5 |     def __init__(self, capacity):
 6 |         self.capacity = capacity
 7 |         self.memory = list()
 8 |         self.position = 0
 9 | 
10 |     def push(self, item):
11 |         # replace old experience with new experience
12 |         if len(self.memory) < self.position + 1:
13 |             self.memory.append(item)
14 |         else:
15 |             self.memory[self.position] = item
16 |         self.position = (self.position + 1) % self.capacity
17 | 
18 |     def is_full(self):
19 |         return len(self.memory) == self.capacity
20 | 
21 |     def __getitem__(self, item):
22 |         return self.memory[item]
23 | 
24 |     def __len__(self):
25 |         return len(self.memory)
26 | 
27 |     def clear(self):
28 |         self.memory = list()
29 | 


--------------------------------------------------------------------------------
/control/utils/plot.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import argparse
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | 
 7 | def running_mean(x, n):
 8 |     cumsum = np.cumsum(np.insert(x, 0, 0))
 9 |     return (cumsum[n:] - cumsum[:-n]) / float(n)
10 | 
11 | 
12 | def main():
13 |     parser = argparse.ArgumentParser()
14 |     parser.add_argument('model_dir', type=str)
15 |     parser.add_argument('--plot_sr', default=False, action='store_true')
16 |     parser.add_argument('--plot_cr', default=False, action='store_true')
17 |     parser.add_argument('--plot_tr', default=False, action='store_true')
18 |     parser.add_argument('--window_size', type=int, default=200)
19 |     args = parser.parse_args()
20 | 
21 |     ax1_legends = ['Training', 'Validation']
22 | 
23 |     # Read output log file
24 |     with open(args.model_dir + '/output.log', 'r') as file:
25 |         log = file.read()
26 | 
27 |     val_pattern = r"VAL   in episode (?P<episode>\d+)  Success rate: (?P<sr>[0-1].\d+), " \
28 |                     r"Collision rate: (?P<cr>[0-1].\d+), Timeout rate: (?P<tr>[0-1].\d+), Nav time: (?P<time>\d+.\d+)"
29 |     val_episode = []
30 |     val_sr, val_cr, val_tr = [], [], []
31 |     for r in re.findall(val_pattern, log):
32 |         val_episode.append(int(r[0]))
33 |         val_sr.append(float(r[1]))
34 |         val_cr.append(float(r[2]))
35 |         val_tr.append(float(r[3]))
36 | 
37 |     train_pattern = r"TRAIN in episode (?P<episode>\d+)  Success rate: (?P<sr>[0-1].\d+), "\
38 |                         r"Collision rate: (?P<cr>[0-1].\d+), Timeout rate: (?P<tr>[0-1].\d+), Nav time: (?P<time>\d+.\d+)"
39 |     train_episode = []
40 |     train_sr, train_cr, train_tr = [], [], []
41 |     for r in re.findall(train_pattern, log):
42 |         train_episode.append(int(r[0]))
43 |         train_sr.append(float(r[1]))
44 |         train_cr.append(float(r[2]))
45 |         train_tr.append(float(r[3]))
46 | 
47 |     # Smooth training plots
48 |     train_sr_smooth = running_mean(train_sr, args.window_size)
49 |     train_cr_smooth = running_mean(train_cr, args.window_size)
50 |     train_tr_smooth = running_mean(train_tr, args.window_size)
51 | 
52 |     # Plot success rate
53 |     if args.plot_sr:
54 |         _, ax1 = plt.subplots()
55 |         ax1.plot(range(len(train_sr_smooth)), train_sr_smooth)
56 |         ax1.plot(val_episode, val_sr)
57 | 
58 |         ax1.legend(ax1_legends)
59 |         ax1.set_xlabel('Episode')
60 |         ax1.set_ylabel('Success Rate')
61 |         ax1.set_title('Success Rates Across Training Episodes')
62 |         plt.savefig(args.model_dir + '/success.png')
63 | 
64 |     # Plot collision rate
65 |     if args.plot_cr:
66 |         _, ax1 = plt.subplots()
67 |         ax1.plot(range(len(train_cr_smooth)), train_cr_smooth)
68 |         ax1.plot(val_episode, val_cr)
69 | 
70 |         ax1.legend(ax1_legends)
71 |         ax1.set_xlabel('Episode')
72 |         ax1.set_ylabel('Collision Rate')
73 |         ax1.set_title('Collision Rates Across Training Episodes')
74 |         plt.savefig(args.model_dir + '/collision.png')
75 | 
76 |     # Plot timeout rate
77 |     if args.plot_tr:
78 |         _, ax1 = plt.subplots()
79 |         ax1.plot(range(len(train_tr_smooth)), train_tr_smooth)
80 |         ax1.plot(val_episode, val_tr)
81 | 
82 |         ax1.legend(ax1_legends)
83 |         ax1.set_xlabel('Episode')
84 |         ax1.set_ylabel('Timeout Rate')
85 |         ax1.set_title('Timeout Rates Across Training Episodes')
86 |         plt.savefig(args.model_dir + '/timeout.png')
87 | 
88 | 
89 | if __name__ == '__main__':
90 |     main()
91 | 


--------------------------------------------------------------------------------
/control/utils/trainer.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.optim as optim
 5 | from torch.autograd import Variable
 6 | from torch.utils.data import DataLoader
 7 | 
 8 | from tqdm import tqdm 
 9 | 
10 | class Trainer(object):
11 |     def __init__(self, model, memory, device, batch_size):
12 |         self.model = model
13 |         self.memory = memory
14 |         self.device = device
15 |         self.batch_size = batch_size
16 | 
17 |         self.optimizer = None
18 |         self.data_loader = None
19 |         self.criterion = nn.MSELoss().to(device)
20 | 
21 |     def set_learning_rate(self, learning_rate):
22 |         logging.info('Current learning rate: %f', learning_rate)
23 |         self.optimizer = optim.SGD(self.model.parameters(), lr=learning_rate, momentum=0.9)
24 | 
25 |     def optimize_epoch(self, num_epochs):
26 |         if self.optimizer is None:
27 |             raise ValueError('Learning rate is not set!')
28 |         if self.data_loader is None:
29 |             self.data_loader = DataLoader(self.memory, self.batch_size, shuffle=True)
30 | 
31 |         average_epoch_loss = 0
32 |         print("Training the model for {} epochs..".format(num_epochs))
33 |         for epoch in tqdm(range(num_epochs)):
34 |             epoch_loss = 0
35 |             for data in self.data_loader:
36 |                 inputs, values = data
37 |                 inputs = Variable(inputs)
38 |                 values = Variable(values)
39 | 
40 |                 self.optimizer.zero_grad()
41 |                 outputs = self.model(inputs)
42 |                 loss = self.criterion(outputs, values)
43 |                 loss.backward()
44 |                 self.optimizer.step()
45 |                 epoch_loss += loss.data.item()
46 | 
47 |             average_epoch_loss = epoch_loss / len(self.memory)
48 |             logging.debug('Average loss in epoch %d: %.2E', epoch, average_epoch_loss)
49 |         return average_epoch_loss
50 | 
51 |     def optimize_batch(self, num_batches):
52 |         if self.optimizer is None:
53 |             raise ValueError('Learning rate is not set!')
54 |         if self.data_loader is None:
55 |             self.data_loader = DataLoader(self.memory, self.batch_size, shuffle=True)
56 | 
57 |         losses = 0
58 |         for _ in range(num_batches):
59 |             inputs, values = next(iter(self.data_loader))
60 |             inputs = Variable(inputs)
61 |             values = Variable(values)
62 | 
63 |             self.optimizer.zero_grad()
64 |             outputs = self.model(inputs)
65 |             loss = self.criterion(outputs, values)
66 |             loss.backward()
67 |             self.optimizer.step()
68 |             losses += loss.data.item()
69 | 
70 |         average_loss = losses / num_batches
71 |         logging.debug('Average loss : %.2E', average_loss)
72 | 
73 |         return average_loss
74 | 


--------------------------------------------------------------------------------
/hardware/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sarapohland/stranger-danger/84a6bad96de4f117d6192b5111eba5531e766e26/hardware/__init__.py


--------------------------------------------------------------------------------
/hardware/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sarapohland/stranger-danger/84a6bad96de4f117d6192b5111eba5531e766e26/hardware/utils/__init__.py


--------------------------------------------------------------------------------
/hardware/utils/turtlebot_control.py:
--------------------------------------------------------------------------------
 1 | import rospy
 2 | from geometry_msgs.msg import Twist
 3 | 
 4 | cmd = Twist()
 5 | def cmd_callback(data):
 6 |     global cmd
 7 |     cmd = data
 8 | 
 9 | def main():
10 |     rospy.init_node('controller', anonymous=True)
11 |     rate = rospy.Rate(8) # Hz
12 |     cmd_sub = rospy.Subscriber('turtlebot/command', Twist, cmd_callback)
13 |     cmd_pub = rospy.Publisher('cmd_vel_mux/input/navi', Twist, queue_size=10)
14 | 
15 |     straight_cmd = Twist()
16 |     straight_cmd.linear.x = 0.0
17 |     straight_cmd.angular.z = 0.0
18 | 
19 |     while not rospy.is_shutdown():
20 |         cmd_pub.publish(cmd)
21 |         rate.sleep()
22 |         straight_cmd.linear.x = cmd.linear.x
23 |         cmd_pub.publish(straight_cmd)
24 |         rate.sleep()
25 | 
26 | if __name__ == "__main__":
27 |     try:
28 |         main()
29 |     except rospy.ROSInterruptException:
30 |         pass
31 | 


--------------------------------------------------------------------------------
/hardware/utils/turtlebot_hardware.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import rospy
  4 | from nav_msgs.msg import Odometry
  5 | from geometry_msgs.msg import Twist
  6 | from sensor_msgs.msg import Image
  7 | from std_msgs.msg import Empty
  8 | from kobuki_msgs.msg import BumperEvent
  9 | from kobuki_msgs.msg import WheelDropEvent
 10 | from tf.transformations import euler_from_quaternion
 11 | from cv_bridge import CvBridge, CvBridgeError
 12 | 
 13 | 
 14 | class TurtlebotHardware():
 15 | 
 16 |     def __init__(self, params):
 17 |         self.params = params
 18 | 
 19 |         # Current state of robot
 20 |         self.position = np.zeros(2, dtype=np.float32)
 21 |         self.velocity = np.zeros(2, dtype=np.float32)
 22 |         self.angle = 0
 23 | 
 24 |         # Tracked states of robot
 25 |         self.track_states = False
 26 |         self.measured_positions = []
 27 |         self.measured_velocities = []
 28 |         self.measured_angles = []
 29 | 
 30 |         # Collision detection
 31 |         self.hit_obstacle = False
 32 | 
 33 |         # RGB and depth images
 34 |         self.rgb_raw_image = None
 35 |         self.depth_raw_image = None
 36 | 
 37 |         # Initialize ROS node
 38 |         rospy.init_node('Turtlebot_Agent')
 39 | 
 40 |         # Initialize Subscribers
 41 |         self.odom = rospy.Subscriber('/odom', Odometry, self.odom_callback)
 42 |         self.bumper = rospy.Subscriber('/mobile_base/events/bumper', BumperEvent, self.bump_callback)
 43 |         self.wheel_drop = rospy.Subscriber('/mobile_base/events/wheel_drop', WheelDropEvent,  self.wheel_drop_callback)
 44 | 
 45 |         self.bridge = CvBridge()
 46 |         self.rgb_imager = rospy.Subscriber('/camera/rgb/image_raw', Image, self.rgb_imager_callback)
 47 |         self.depth_imager = rospy.Subscriber('/camera/depth/image_raw', Image, self.depth_imager_callback)
 48 | 
 49 |         # Initialize Publishers
 50 |         self.odom_reset = rospy.Publisher('/mobile_base/commands/reset_odometry', Empty, queue_size=5)
 51 |         self.cmd_vel = rospy.Publisher('turtlebot/command', Twist, queue_size=10)
 52 | 
 53 |         # Initialize rospy
 54 |         rospy.sleep(1)
 55 |         self.r = rospy.Rate(int(1./params.dt))  # Set the actuator frequency in Hz
 56 |         self.reset_odom()
 57 | 
 58 |     # Odometry callback
 59 |     def odom_callback(self, data):
 60 |         quaternion = (data.pose.pose.orientation.x, data.pose.pose.orientation.y,
 61 |                       data.pose.pose.orientation.z, data.pose.pose.orientation.w)
 62 |         self.angle = euler_from_quaternion(quaternion)[2]
 63 |         self.position[0] = data.pose.pose.position.x
 64 |         self.position[1] = data.pose.pose.position.y
 65 |         self.velocity[0] = data.twist.twist.linear.x * np.cos(self.angle)
 66 |         self.velocity[1] = data.twist.twist.linear.x * np.sin(self.angle)
 67 |         if self.track_states:
 68 |             self.measured_positions.append(1.*np.array(self.position))
 69 |             self.measured_velocities.append(1.*np.array(self.velocity))
 70 |             self.measured_angles.append(1.*self.angle)
 71 | 
 72 |     # RGB camera callback
 73 |     def rgb_imager_callback(self, data):
 74 |         self.rgb_raw_image = self.bridge.imgmsg_to_cv2(data)
 75 | 
 76 |     # Depth camera callback
 77 |     def depth_imager_callback(self, data):
 78 |         self.depth_raw_image = self.bridge.imgmsg_to_cv2(data)
 79 | 
 80 |     # Wheel drop callback
 81 |     def wheel_drop_callback(self, data):
 82 |         self.hit_obstacle = True
 83 | 
 84 |     # Bump detection callback
 85 |     def bump_callback(self, data):
 86 |         self.hit_obstacle = True
 87 | 
 88 |     # Reset odometry to zero position
 89 |     def reset_odom(self):
 90 |         cmd = Twist()
 91 |         cmd.linear.x = 0.0
 92 |         cmd.angular.z = 0.0
 93 |         while np.linalg.norm(self.velocity) > .001:
 94 |             self.cmd_vel.publish(cmd)
 95 |             self.r.sleep()
 96 | 
 97 |         # Reset the odometer to read [0, 0, 0]
 98 |         self.odom_reset.publish(Empty())
 99 |         rospy.sleep(1)
100 | 
101 |     # Send velocity command
102 |     def apply_command(self, u):
103 |         cmd = Twist()
104 |         if not self.hit_obstacle:
105 |             cmd.linear.x = u[0]
106 |             cmd.angular.z = u[1]
107 |         else:
108 |             cmd.linear.x = 0.0
109 |             cmd.linear.z = 0.0
110 |         self.cmd_vel.publish(cmd)
111 | 


--------------------------------------------------------------------------------
/hardware/utils/utils.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | import numpy as np
  3 | from shapely.geometry import Polygon
  4 | from shapely.ops import unary_union
  5 | from scipy.interpolate import interp1d
  6 | 
  7 | 
  8 | def reached_destination(robot, goal_radius):
  9 |     dist_goal = np.linalg.norm(np.array(robot.get_position()) - np.array(robot.get_goal_position()))
 10 |     return  dist_goal < robot.radius + goal_radius
 11 | 
 12 | def generate_wall(vertices):
 13 |     return Polygon(vertices)
 14 | 
 15 | def generate_room(map_name):
 16 |     half_w = 0.05
 17 |     walls = []
 18 |     # create walls from the appropriate map file
 19 |     with open("../crowd_demo/maps/{}.txt".format(map_name), "r") as map_file:
 20 |         print("Making {} map".format(map_name))
 21 |         for line in map_file:
 22 |             splitty = line.split(',')
 23 |             direction = splitty[0]
 24 |             coords = [float(splitty[i]) for i in range(1, 5)]
 25 | 
 26 |             # horizontal line (assumes coords are left, right)
 27 |             if direction == 'h':
 28 |                 if coords[1] != coords[3]:
 29 |                     warnings.warn('this is not a horizontal line lmao')
 30 |                 walls.append(generate_wall([(coords[0]-half_w, coords[1]-half_w), (coords[0]-half_w, coords[1]+half_w),
 31 |                                             (coords[2]+half_w, coords[3]+half_w), (coords[2]+half_w, coords[3]-half_w)]))
 32 |             # vertical line (assumes coords are bottom, top)
 33 |             elif direction == 'v':
 34 |                 if coords[0] != coords[2]:
 35 |                     warnings.warn('this is not a vertical line lmao')
 36 |                 walls.append(generate_wall([(coords[0]+half_w, coords[1]-half_w), (coords[0]-half_w, coords[1]-half_w),
 37 |                                             (coords[2]-half_w, coords[3]+half_w), (coords[2]+half_w, coords[3]+half_w)]))
 38 |             # block (assumes coords are lower left, upper right)
 39 |             elif direction == 'b':
 40 |                 walls.append(generate_wall([(coords[0], coords[1]), (coords[0], coords[3]), (coords[2], coords[3]), (coords[2], coords[1])]))
 41 |             else:
 42 |                 warnings.warn("the first character in each line of a map file must be 'h' for a horizontal wall, 'v' for a vertical wall, or 'b' for a block")
 43 | 
 44 |     # Take the union of all walls to generate one wall Polygon object
 45 |     return unary_union(walls)
 46 | 
 47 | def get_path_details(path):
 48 |     path_counter = 1
 49 |     with open("../hardware/maps/demo_path.txt", "r") as path_file:
 50 |         for line in path_file:
 51 |             if path_counter == path:
 52 |                 splitty = line.split(',')
 53 |                 return splitty[0], float(splitty[1]), float(splitty[2]), float(splitty[3]), float(splitty[4])
 54 |             path_counter += 1
 55 | 
 56 |     warnings.warn('this path does not exist')
 57 |     return None, 0, 0, 0, 0
 58 | 
 59 | def load_waypoints(path, map_name, config, end):
 60 |     waypoint_radius = config.getfloat('waypoints', 'waypoint_radius')
 61 |     waypoint_min_spacing = config.getfloat('waypoints', 'waypoint_min_spacing')
 62 |     waypoint_max_spacing = config.getfloat('waypoints', 'waypoint_max_spacing')
 63 |     vary_spacing = config.getboolean('waypoints', 'vary_spacing')
 64 |     vary_degrees = config.getfloat('waypoints', 'vary_degrees')
 65 | 
 66 |     # load in waypoints from the desired file
 67 |     waypoints = []
 68 |     with open("../hardware/waypoints/{}{}.txt".format(map_name, path), 'r') as waypoint_file:
 69 |         for line in waypoint_file:
 70 |             waypoints.append([float(coord) for coord in line.split(',')])
 71 |     # for some reason the waypoints generation code saved all the waypoints in reverse order T-T
 72 |     waypoints.reverse()
 73 |     waypoints = np.array(waypoints)
 74 | 
 75 |     # conduct a spline interpolation to construct waypoints that are equally spaced apart
 76 |     t = [0]
 77 |     for i in range(1, len(waypoints)):
 78 |         t.append(t[i-1] + np.linalg.norm(waypoints[i] - waypoints[i-1]))
 79 |     t = np.array(t)
 80 | 
 81 |     # generate nicely-spaced waypoints
 82 |     if vary_spacing:
 83 |         num_new_wp = int(t[-1] / waypoint_min_spacing) + 1
 84 |     else:
 85 |         num_new_wp = int(t[-1] / waypoint_min_spacing) + 1
 86 |     t_new = np.linspace(t[0], t[-1], num=num_new_wp, endpoint=True)
 87 | 
 88 |     x = waypoints[:,0]
 89 |     y = waypoints[:,1]
 90 | 
 91 |     fx = interp1d(t, x, kind='quadratic') #'cubic')
 92 |     fy = interp1d(t, y, kind='quadratic') #'cubic')
 93 | 
 94 |     if vary_spacing:
 95 |         x_new = fx(t_new)
 96 |         y_new = fy(t_new)
 97 |         max_rad = vary_degrees * np.pi / 180.0
 98 |         last_wypt = np.array([x_new[0], y_new[0]])
 99 |         cur_vec = np.array([x_new[1], y_new[1]]) - last_wypt
100 |         cur_dir = cur_vec / np.linalg.norm(cur_vec)
101 |         result = [[x_new[0], y_new[0]]]
102 |         for i in range(2, num_new_wp):
103 |             next_vec = np.array([x_new[i], y_new[i]]) - last_wypt
104 |             next_dist = np.linalg.norm(next_vec)
105 |             next_dir = next_vec / next_dist
106 |             # If this waypoint is too far from the current direction, append the previous waypoint, which should be
107 |             # within the acceptable deviation from the current direction.
108 |             if np.arccos(np.min([1, np.dot(cur_dir, next_dir)])) > max_rad or next_dist > waypoint_max_spacing:
109 |                 result.append([x_new[i-1], y_new[i-1]])
110 |                 last_wypt = np.array([x_new[i-1], y_new[i-1]])
111 |                 cur_vec = np.array([x_new[i], y_new[i]]) - last_wypt
112 |                 cur_dir = cur_vec / np.linalg.norm(cur_vec)
113 |         result.append([x_new[-1], y_new[-1]])
114 |         return result
115 |     else:
116 |         return [[new_x, new_y] for new_x, new_y in zip(fx(t_new), fy(t_new))]
117 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | setup(
 4 |     name='kayak',
 5 |     version='0.0.1',
 6 |     packages=[
 7 |         'control',
 8 |         'control.configs',
 9 |         'control.policy',
10 |         'control.utils',
11 |         'simulation',
12 |         'simulation.envs.policy',
13 |         'simulation.envs.utils',
14 |         'uncertainty',
15 |         'uncertainty.configs',
16 |         'hardware',
17 |         'hardware.utils',
18 |     ],
19 |     install_requires=[
20 |         'gitpython',
21 |         'tabulate',
22 |         'gym==0.20.0',
23 |         'matplotlib==3.3.4',
24 |         'numpy==1.19.5',
25 |         'pandas==1.1.5',
26 |         'setuptools==58.0.4',
27 |         'torch==1.10.1',
28 |         'sklearn==0.0',
29 |         'torchvision==0.11.2',
30 |         'scipy==1.5.4',
31 |         'shapely==1.7.1',
32 |         'tqdm==4.64.0',
33 |         'Pillow==8.4.0'
34 |     ],
35 |     extras_require={
36 |         'test': [
37 |             'pylint',
38 |             'pytest',
39 |         ],
40 |     },
41 | )
42 | 


--------------------------------------------------------------------------------
/simulation/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.registration import register
2 | 
3 | register(
4 |     id='CrowdSim-v0',
5 |     entry_point='simulation.envs:CrowdSim'
6 | )
7 | 


--------------------------------------------------------------------------------
/simulation/envs/__init__.py:
--------------------------------------------------------------------------------
1 | from .crowd_sim import CrowdSim
2 | 


--------------------------------------------------------------------------------
/simulation/envs/policy/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sarapohland/stranger-danger/84a6bad96de4f117d6192b5111eba5531e766e26/simulation/envs/policy/__init__.py


--------------------------------------------------------------------------------
/simulation/envs/policy/cadrl.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | import itertools
  5 | import logging
  6 | import numpy.linalg as la
  7 | from simulation.envs.policy.policy import Policy
  8 | from simulation.envs.utils.action import ActionRot, ActionXY
  9 | from simulation.envs.utils.state import ObservableState, FullState
 10 | 
 11 | 
 12 | def mlp(input_dim, mlp_dims, last_relu=False):
 13 |     layers = []
 14 |     mlp_dims = [input_dim] + mlp_dims
 15 |     for i in range(len(mlp_dims) - 1):
 16 |         layers.append(nn.Linear(mlp_dims[i], mlp_dims[i + 1]))
 17 |         if i != len(mlp_dims) - 2 or last_relu:
 18 |             layers.append(nn.ReLU())
 19 |     net = nn.Sequential(*layers)
 20 |     return net
 21 | 
 22 | 
 23 | class ValueNetwork(nn.Module):
 24 |     def __init__(self, input_dim, mlp_dims):
 25 |         super().__init__()
 26 |         self.value_network = mlp(input_dim, mlp_dims)
 27 | 
 28 |     def forward(self, state):
 29 |         value = self.value_network(state)
 30 |         return value
 31 | 
 32 | 
 33 | class CADRL(Policy):
 34 |     def __init__(self):
 35 |         super().__init__()
 36 |         self.name = 'CADRL'
 37 |         self.trainable = True
 38 |         self.multiagent_training = False
 39 |         self.kinematics = 'nonholonomic'
 40 |         self.epsilon = None
 41 |         self.gamma = 0.9
 42 |         self.speed_samples = 5
 43 |         self.rotation_samples = 16
 44 |         self.query_env = True
 45 |         self.action_space = None
 46 |         self.speeds = None
 47 |         self.rotations = None
 48 |         self.action_values = None
 49 |         self.self_state_dim = 6
 50 |         self.human_state_dim = 7
 51 |         self.joint_state_dim = self.self_state_dim + self.human_state_dim
 52 | 
 53 |         mlp_dims = [150, 100, 100, 1]
 54 |         self.model = ValueNetwork(self.joint_state_dim, mlp_dims)
 55 | 
 56 |     def set_device(self, device):
 57 |         self.device = device
 58 |         self.model.to(device)
 59 | 
 60 |     def set_epsilon(self, epsilon):
 61 |         self.epsilon = epsilon
 62 | 
 63 |     def build_action_space(self, v_pref):
 64 |         """
 65 |         Action space consists of 25 uniformly sampled actions in permitted range and 25 randomly sampled actions.
 66 |         """
 67 |         holonomic = True if self.kinematics == 'holonomic' else False
 68 |         speeds = [(np.exp((i + 1) / self.speed_samples) - 1) / (np.e - 1) * v_pref for i in range(self.speed_samples)]
 69 |         if holonomic:
 70 |             rotations = np.linspace(0, 2 * np.pi, self.rotation_samples, endpoint=False)
 71 |         else:
 72 |             rotations = np.linspace(-np.pi / 4, np.pi / 4, self.rotation_samples)
 73 | 
 74 |         action_space = [ActionXY(0, 0) if holonomic else ActionRot(0, 0)]
 75 |         for rotation, speed in itertools.product(rotations, speeds):
 76 |             if holonomic:
 77 |                 action_space.append(ActionXY(speed * np.cos(rotation), speed * np.sin(rotation)))
 78 |             else:
 79 |                 action_space.append(ActionRot(speed, rotation))
 80 | 
 81 |         self.speeds = speeds
 82 |         self.rotations = rotations
 83 |         self.action_space = action_space
 84 | 
 85 |     def propagate(self, state, action):
 86 |         if isinstance(state, ObservableState):
 87 |             # propagate state of humans
 88 |             next_px = state.px + action.vx * self.time_step
 89 |             next_py = state.py + action.vy * self.time_step
 90 |             next_state = ObservableState(next_px, next_py, action.vx, action.vy, state.radius)
 91 |         elif isinstance(state, FullState):
 92 |             # propagate state of current agent
 93 |             # perform action without rotation
 94 |             if self.kinematics == 'holonomic':
 95 |                 next_px = state.px + action.vx * self.time_step
 96 |                 next_py = state.py + action.vy * self.time_step
 97 |                 next_state = FullState(next_px, next_py, action.vx, action.vy, state.radius,
 98 |                                        state.gx, state.gy, state.v_pref, state.theta)
 99 |             else:
100 |                 next_theta = state.theta + action.r
101 |                 next_vx = action.v * np.cos(next_theta)
102 |                 next_vy = action.v * np.sin(next_theta)
103 |                 next_px = state.px + next_vx * self.time_step
104 |                 next_py = state.py + next_vy * self.time_step
105 |                 next_state = FullState(next_px, next_py, next_vx, next_vy, state.radius, state.gx, state.gy,
106 |                                        state.v_pref, next_theta)
107 |         else:
108 |             raise ValueError('Type error')
109 | 
110 |         return next_state
111 | 
112 |     def predict(self, state):
113 |         """
114 |         Input state is the joint state of robot concatenated by the observable state of other agents
115 | 
116 |         To predict the best action, agent samples actions and propagates one step to see how good the next state is
117 |         thus the reward function is needed
118 | 
119 |         """
120 |         if self.phase is None or self.device is None:
121 |             raise AttributeError('Phase, device attributes have to be set!')
122 |         if self.phase == 'train' and self.epsilon is None:
123 |             raise AttributeError('Epsilon attribute has to be set in training phase')
124 | 
125 |         if self.reach_destination(state):
126 |             return ActionXY(0, 0) if self.kinematics == 'holonomic' else ActionRot(0, 0)
127 |         if self.action_space is None:
128 |             self.build_action_space(state.self_state.v_pref)
129 | 
130 |         probability = np.random.random()
131 |         if self.phase == 'train' and probability < self.epsilon:
132 |             max_action = self.action_space[np.random.choice(len(self.action_space))]
133 |         else:
134 |             self.action_values = list()
135 |             max_min_value = float('-inf')
136 |             max_action = None
137 |             for action in self.action_space:
138 |                 next_self_state = self.propagate(state.self_state, action)
139 |                 next_human_states = [self.propagate(human_state, ActionXY(human_state.vx, human_state.vy))
140 |                                            for human_state in state.human_states]
141 |                 reward = self.compute_reward(next_self_state, next_human_states)
142 |                 batch_next_states = torch.cat([torch.Tensor([next_self_state + next_human_state]).to(self.device)
143 |                                               for next_human_state in next_human_states], dim=0)
144 |                 # VALUE UPDATE
145 |                 outputs = self.model(self.rotate(batch_next_states))
146 |                 min_output, min_index = torch.min(outputs, 0)
147 |                 min_value = reward + pow(self.gamma, self.time_step * state.self_state.v_pref) * min_output.data.item()
148 |                 self.action_values.append(min_value)
149 |                 if min_value > max_min_value:
150 |                     max_min_value = min_value
151 |                     max_action = action
152 | 
153 |         if self.phase == 'train':
154 |             self.last_state = self.transform(state)
155 | 
156 |         return max_action
157 | 
158 |     def transform(self, state, epsilon=None):
159 |         """
160 |         Take the state passed from agent and transform it to tensor for batch training
161 | 
162 |         :param state:
163 |         :return: tensor of shape (len(state), )
164 |         """
165 |         assert len(state.human_states) == 1
166 |         state = torch.Tensor(state.self_state + state.human_states[0]).to(self.device)
167 |         state = self.rotate(state.unsqueeze(0)).squeeze(dim=0)
168 |         return state
169 | 
170 |     def rotate(self, state):
171 |         """
172 |         Transform the coordinate to agent-centric.
173 |         Input state tensor is of size (batch_size, state_length)
174 | 
175 |         """
176 |         # 'px', 'py', 'vx', 'vy', 'radius', 'gx', 'gy', 'v_pref', 'theta', 'px1', 'py1', 'vx1', 'vy1', 'radius1'
177 |         #  0     1      2     3      4        5     6      7         8       9     10      11     12       13
178 |         batch = state.shape[0]
179 |         dx = (state[:, 5] - state[:, 0]).reshape((batch, -1))
180 |         dy = (state[:, 6] - state[:, 1]).reshape((batch, -1))
181 |         rot = torch.atan2(state[:, 6] - state[:, 1], state[:, 5] - state[:, 0])
182 | 
183 |         dg = torch.norm(torch.cat([dx, dy], dim=1), 2, dim=1, keepdim=True)
184 |         v_pref = state[:, 7].reshape((batch, -1))
185 |         vx = (state[:, 2] * torch.cos(rot) + state[:, 3] * torch.sin(rot)).reshape((batch, -1))
186 |         vy = (state[:, 3] * torch.cos(rot) - state[:, 2] * torch.sin(rot)).reshape((batch, -1))
187 | 
188 |         radius = state[:, 4].reshape((batch, -1))
189 |         if self.kinematics == 'unicycle':
190 |             theta = (state[:, 8] - rot).reshape((batch, -1))
191 |         else:
192 |             # set theta to be zero since it's not used
193 |             theta = torch.zeros_like(v_pref)
194 |         vx1 = (state[:, 11] * torch.cos(rot) + state[:, 12] * torch.sin(rot)).reshape((batch, -1))
195 |         vy1 = (state[:, 12] * torch.cos(rot) - state[:, 11] * torch.sin(rot)).reshape((batch, -1))
196 |         px1 = (state[:, 9] - state[:, 0]) * torch.cos(rot) + (state[:, 10] - state[:, 1]) * torch.sin(rot)
197 |         px1 = px1.reshape((batch, -1))
198 |         py1 = (state[:, 10] - state[:, 1]) * torch.cos(rot) - (state[:, 9] - state[:, 0]) * torch.sin(rot)
199 |         py1 = py1.reshape((batch, -1))
200 |         radius1 = state[:, 13].reshape((batch, -1))
201 |         radius_sum = radius + radius1
202 |         da = torch.norm(torch.cat([(state[:, 0] - state[:, 9]).reshape((batch, -1)), (state[:, 1] - state[:, 10]).
203 |                                   reshape((batch, -1))], dim=1), 2, dim=1, keepdim=True)
204 |         new_state = torch.cat([dg, v_pref, theta, radius, vx, vy, px1, py1, vx1, vy1, radius1, da, radius_sum], dim=1)
205 |         return new_state
206 | 
207 | 
208 |     def compute_reward(self, nav, obs):
209 |         # check for collision
210 |         dmin = float('inf')
211 |         collision = False
212 |         for i, ob in enumerate(obs):
213 |             dist = la.norm((nav.px - ob.px, nav.py - ob.py)) - nav.radius - ob.radius
214 |             if dist < 0:
215 |                 collision = True
216 |             if dist < dmin:
217 |                 dmin = dist
218 | 
219 |         # check if reaching the goal
220 |         reaching_goal = la.norm((nav.px - nav.gx, nav.py - nav.gy)) < nav.radius
221 | 
222 |         # compute reward
223 |         if reaching_goal:
224 |             reward = 1
225 |         elif collision:
226 |             reward = -0.25
227 |         elif dmin < 0.2:
228 |             reward = (dmin - 0.2) * 0.5 * self.time_step
229 |         else:
230 |             reward = 0
231 |         return reward
232 | 


--------------------------------------------------------------------------------
/simulation/envs/policy/cadrl/rl_model.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sarapohland/stranger-danger/84a6bad96de4f117d6192b5111eba5531e766e26/simulation/envs/policy/cadrl/rl_model.pth


--------------------------------------------------------------------------------
/simulation/envs/policy/linear.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from simulation.envs.policy.policy import Policy
 3 | from simulation.envs.utils.action import ActionXY
 4 | 
 5 | 
 6 | class Linear(Policy):
 7 |     def __init__(self):
 8 |         super().__init__()
 9 |         self.name = "Linear"
10 |         self.trainable = False
11 |         self.kinematics = 'holonomic'
12 |         self.multiagent_training = True
13 | 
14 |     def configure(self, config):
15 |         assert True
16 | 
17 |     def get_params(self):
18 |         params = {"trainable":self.trainable,
19 |                   "kinematics":self.kinematics,
20 |                   "multiagent_training":self.multiagent_training}
21 |         return params
22 | 
23 |     def predict(self, state):
24 |         self_state = state.self_state
25 |         theta = np.arctan2(self_state.gy - self_state.py, self_state.gx - self_state.px)
26 |         vx = np.cos(theta) * self_state.v_pref
27 |         vy = np.sin(theta) * self_state.v_pref
28 |         action = ActionXY(vx, vy)
29 |         return action
30 | 


--------------------------------------------------------------------------------
/simulation/envs/policy/orca.py:
--------------------------------------------------------------------------------
 1 | import rvo2
 2 | import logging
 3 | import itertools
 4 | import numpy as np
 5 | import numpy.linalg as la
 6 | 
 7 | from simulation.envs.policy.policy import Policy
 8 | from simulation.envs.utils.action import ActionXY
 9 | 
10 | 
11 | class ORCA(Policy):
12 |     def __init__(self):
13 |         super().__init__()
14 |         self.name = 'ORCA'
15 |         self.trainable = False
16 |         self.multiagent_training = None
17 |         self.kinematics = 'holonomic'
18 |         self.safety_space = 0.2
19 |         self.neighbor_dist = 10
20 |         self.max_neighbors = 10
21 |         self.time_horizon = 1.5
22 |         self.time_horizon_obst = 0.5
23 |         self.radius = 0.3
24 |         self.max_speed = 1
25 |         self.sim = None
26 | 
27 |     def configure(self, config):
28 |         return
29 | 
30 |     def set_phase(self, phase):
31 |         return
32 | 
33 |     # Returns the parameters of our policy.
34 |     def get_params(self):
35 |         # Add or remove parameters as necessary.
36 |         params = {"trainable":self.trainable,
37 |                   "multiagent_training":self.multiagent_training,
38 |                   "kinematics":self.kinematics,
39 |                   "safety_space":self.safety_space,
40 |                   "neighbor_dist":self.neighbor_dist,
41 |                   "max_neighbors":self.max_neighbors,
42 |                   "time_horizon":self.time_horizon,
43 |                   "time_horizon_obst":self.time_horizon_obst,
44 |                   "radius":self.radius,
45 |                   "max_speed":self.max_speed}
46 |         return params
47 | 
48 |     def predict(self, state):
49 |         self_state = state.self_state
50 |         params = self.neighbor_dist, self.max_neighbors, self.time_horizon, self.time_horizon_obst
51 |         if self.sim is not None and self.sim.getNumAgents() != len(state.human_states) + 1:
52 |             del self.sim
53 |             self.sim = None
54 |         if self.sim is None:
55 |             self.sim = rvo2.PyRVOSimulator(self.time_step, *params, self.radius, self.max_speed)
56 |             self.sim.addAgent(self_state.position, *params, self_state.radius + 0.01 + self.safety_space,
57 |                               self_state.v_pref, self_state.velocity)
58 |             for agent_state in state.human_states:
59 |                 self.sim.addAgent(agent_state.position, *params, agent_state.radius + 0.01 + self.safety_space,
60 |                                   self.max_speed, agent_state.velocity)
61 |         else:
62 |             self.sim.setAgentPosition(0, self_state.position)
63 |             self.sim.setAgentVelocity(0, self_state.velocity)
64 |             for i, agent_state in enumerate(state.human_states):
65 |                 self.sim.setAgentPosition(i + 1, agent_state.position)
66 |                 self.sim.setAgentVelocity(i + 1, agent_state.velocity)
67 | 
68 |         # Set the preferred velocity to be a vector in the direction of the goal of at most max_speed magnitude (speed).
69 |         velocity = np.array((self_state.gx - self_state.px, self_state.gy - self_state.py))
70 |         speed = la.norm(velocity)
71 |         pref_vel = self.max_speed * velocity / speed if speed > self.max_speed else velocity
72 | 
73 |         self.sim.setAgentPrefVelocity(0, tuple(pref_vel))
74 |         for i, agent_state in enumerate(state.human_states):
75 |             # unknown goal position of other humans
76 |             self.sim.setAgentPrefVelocity(i + 1, (0, 0))
77 | 
78 |         self.sim.doStep()
79 |         action = ActionXY(*self.sim.getAgentVelocity(0))
80 |         self.last_state = state
81 | 
82 |         return action
83 | 


--------------------------------------------------------------------------------
/simulation/envs/policy/policy.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | import numpy as np
 3 | import numpy.linalg as la
 4 | 
 5 | 
 6 | class Policy(object):
 7 |     def __init__(self):
 8 |         self.trainable = False
 9 |         self.uncertainty = False
10 |         self.phase = None
11 |         self.model = None
12 |         self.device = None
13 |         self.last_state = None
14 |         self.time_step = None
15 |         self.env = None
16 | 
17 |     @abc.abstractmethod
18 |     def configure(self, config):
19 |         return
20 | 
21 |     def set_phase(self, phase):
22 |         self.phase = phase
23 | 
24 |     def set_device(self, device):
25 |         self.device = device
26 | 
27 |     def set_env(self, env):
28 |         self.env = env
29 | 
30 |     def get_model(self):
31 |         return self.model
32 | 
33 |     @abc.abstractmethod
34 |     def predict(self, state, eps=None):
35 |         return
36 | 
37 |     @abc.abstractmethod
38 |     def get_params(self):
39 |         return {}
40 | 
41 |     @staticmethod
42 |     def reach_destination(state):
43 |         self_state = state.self_state
44 |         if la.norm((self_state.py - self_state.gy, self_state.px - self_state.gx)) < self_state.radius:
45 |             return True
46 |         else:
47 |             return False
48 | 


--------------------------------------------------------------------------------
/simulation/envs/policy/policy_factory.py:
--------------------------------------------------------------------------------
 1 | from simulation.envs.policy.orca import ORCA
 2 | from simulation.envs.policy.cadrl import CADRL
 3 | from simulation.envs.policy.linear import Linear
 4 | 
 5 | 
 6 | def none_policy():
 7 |     return None
 8 | 
 9 | policy_names = ['linear', 'orca', 'cadrl']
10 | 
11 | policy_factory = dict()
12 | policy_factory['orca'] = ORCA
13 | policy_factory['cadrl'] = CADRL
14 | policy_factory['linear'] = Linear
15 | policy_factory['none'] = none_policy
16 | 


--------------------------------------------------------------------------------
/simulation/envs/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sarapohland/stranger-danger/84a6bad96de4f117d6192b5111eba5531e766e26/simulation/envs/utils/__init__.py


--------------------------------------------------------------------------------
/simulation/envs/utils/action.py:
--------------------------------------------------------------------------------
1 | from collections import namedtuple
2 | 
3 | ActionXY = namedtuple('ActionXY', ['vx', 'vy'])
4 | ActionRot = namedtuple('ActionRot', ['v', 'r'])
5 | 


--------------------------------------------------------------------------------
/simulation/envs/utils/agent.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | import random
  3 | import logging
  4 | import numpy as np
  5 | import numpy.linalg as la
  6 | from simulation.envs.policy.policy_factory import policy_names, policy_factory
  7 | from simulation.envs.utils.action import ActionXY, ActionRot
  8 | from simulation.envs.utils.state import ObservableState, FullState
  9 | 
 10 | 
 11 | class Agent(object):
 12 |     def __init__(self, config, section, seed=None):
 13 |         if seed is not None:
 14 |             random.seed(seed)
 15 | 
 16 |         # Policy
 17 |         policy_name = config.get(section, 'policy')
 18 |         if policy_name == 'random':
 19 |             policy_name = random.choice(policy_names)
 20 | 
 21 |         self.visible = config.getboolean(section, 'visible')
 22 |         self.policy = policy_factory[policy_name]()
 23 |         self.sensor = config.get(section, 'sensor')
 24 |         self.observability = config.get(section, 'observability')
 25 |         self.range = config.getint(section, 'range')
 26 |         self.radius = config.getfloat(section, 'radius')
 27 |         self.v_pref = config.getfloat(section, 'v_pref')
 28 | 
 29 |         # Set radius and preferred velocity for ORCA policy
 30 |         if self.policy is not None and self.policy.name == 'ORCA':
 31 |              self.policy.radius = self.radius
 32 |              self.policy.max_speed = self.v_pref
 33 | 
 34 |         # State
 35 |         self.kinematics = self.policy.kinematics if self.policy is not None else None
 36 |         self.px = None
 37 |         self.py = None
 38 |         self.gx = None
 39 |         self.gy = None
 40 |         self.vx = None
 41 |         self.vy = None
 42 |         self.theta = None
 43 |         self.time_step = None
 44 | 
 45 |     def print_info(self):
 46 |         logging.info('Agent is {} and has {} kinematic constraint'.format(
 47 |             'visible' if self.visible else 'invisible', self.kinematics))
 48 | 
 49 |     def set_policy(self, policy):
 50 |         self.policy = policy
 51 |         self.kinematics = policy.kinematics
 52 | 
 53 |     def set(self, px, py, gx, gy, vx, vy, theta, radius=None, v_pref=None):
 54 |         self.px = px
 55 |         self.py = py
 56 |         self.gx = gx
 57 |         self.gy = gy
 58 |         self.vx = vx
 59 |         self.vy = vy
 60 |         self.theta = theta
 61 |         if radius is not None:
 62 |             self.radius = radius
 63 |         if v_pref is not None:
 64 |             self.v_pref = v_pref
 65 | 
 66 |     def get_observable_state(self):
 67 |         return ObservableState(self.px, self.py, self.vx, self.vy, self.radius)
 68 | 
 69 |     def get_next_observable_state(self, action):
 70 |         self.check_validity(action)
 71 |         pos = self.compute_position(action, self.time_step)
 72 |         next_px, next_py = pos
 73 |         if self.kinematics == 'holonomic':
 74 |             next_vx = action.vx
 75 |             next_vy = action.vy
 76 |         else:
 77 |             next_theta = self.theta + action.r
 78 |             next_vx = action.v * np.cos(next_theta)
 79 |             next_vy = action.v * np.sin(next_theta)
 80 |         return ObservableState(next_px, next_py, next_vx, next_vy, self.radius)
 81 | 
 82 |     def get_full_state(self):
 83 |         return FullState(self.px, self.py, self.vx, self.vy, self.radius, self.gx, self.gy, self.v_pref, self.theta)
 84 | 
 85 |     def get_position(self):
 86 |         return self.px, self.py
 87 | 
 88 |     def set_position(self, position):
 89 |         self.px = position[0]
 90 |         self.py = position[1]
 91 | 
 92 |     def get_goal_position(self):
 93 |         return self.gx, self.gy
 94 | 
 95 |     def set_goal_position(self, goal):
 96 |         self.gx = goal[0]
 97 |         self.gy = goal[1]
 98 | 
 99 |     def get_velocity(self):
100 |         return self.vx, self.vy
101 | 
102 |     def set_velocity(self, velocity):
103 |         self.vx = velocity[0]
104 |         self.vy = velocity[1]
105 | 
106 |     def get_angle(self):
107 |         return self.theta
108 | 
109 |     def set_angle(self, angle):
110 |         self.theta = angle
111 | 
112 |     @abc.abstractmethod
113 |     def act(self, ob, eps=None):
114 |         return
115 | 
116 |     def check_validity(self, action):
117 |         if self.kinematics == 'holonomic':
118 |             assert isinstance(action, ActionXY)
119 |         else:
120 |             assert isinstance(action, ActionRot)
121 | 
122 |     def compute_position(self, action, delta_t):
123 |         self.check_validity(action)
124 |         if self.kinematics == 'holonomic':
125 |             px = self.px + action.vx * delta_t
126 |             py = self.py + action.vy * delta_t
127 |         else:
128 |             theta = self.theta + action.r
129 |             px = self.px + np.cos(theta) * action.v * delta_t
130 |             py = self.py + np.sin(theta) * action.v * delta_t
131 |         return px, py
132 | 
133 |     def step(self, action):
134 |         self.check_validity(action)
135 |         pos = self.compute_position(action, self.time_step)
136 |         self.px, self.py = pos
137 |         if self.kinematics == 'holonomic':
138 |             self.vx = action.vx
139 |             self.vy = action.vy
140 |         else:
141 |             self.theta = (self.theta + action.r) % (2 * np.pi)
142 |             self.vx = action.v * np.cos(self.theta)
143 |             self.vy = action.v * np.sin(self.theta)
144 | 
145 |     def reached_destination(self):
146 |         return la.norm(np.array(self.get_position()) - np.array(self.get_goal_position())) < self.radius
147 | 


--------------------------------------------------------------------------------
/simulation/envs/utils/functions.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import numpy.linalg as la
 3 | 
 4 | def point_to_segment_dist(x1, y1, x2, y2, x3, y3):
 5 |     px = x2 - x1
 6 |     py = y2 - y1
 7 | 
 8 |     if px == 0 and py == 0:
 9 |         return la.norm((x3-x1, y3-y1))
10 | 
11 |     u = ((x3 - x1) * px + (y3 - y1) * py) / (px * px + py * py)
12 | 
13 |     if u > 1:
14 |         u = 1
15 |     elif u < 0:
16 |         u = 0
17 | 
18 |     # (x, y) is the closest point to (x3, y3) on the line segment
19 |     x = x1 + u * px
20 |     y = y1 + u * py
21 | 
22 |     return la.norm((x - x3, y-y3))
23 | 


--------------------------------------------------------------------------------
/simulation/envs/utils/human.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | 
 4 | from numpy.linalg import norm
 5 | from numpy.random import normal
 6 | from simulation.envs.utils.agent import Agent
 7 | from simulation.envs.policy.orca import ORCA
 8 | from simulation.envs.utils.action import ActionXY, ActionRot
 9 | from simulation.envs.utils.state import JointState, FullState
10 | 
11 | 
12 | class Human(Agent):
13 |     def __init__(self, config, section, seed=None):
14 |         super().__init__(config, section)
15 |         self.epsilon = 0.0
16 | 
17 |         # Load model weights for CADRL model
18 |         if self.policy is not None and self.policy.name == 'CADRL':
19 |             model_dir = '../simulation/envs/policy/cadrl/'
20 |             model_weights = os.path.join(model_dir, 'rl_model.pth')
21 |             self.policy.get_model().load_state_dict(torch.load(model_weights))
22 |             self.policy.phase = 'test'
23 |             self.policy.set_device(torch.device("cpu"))
24 | 
25 |     # A function to set how random we the human to be
26 |     def set_epsilon(self, epsilon):
27 |         self.epsilon = epsilon
28 | 
29 |     # A function to set the max speed of the human policy
30 |     def set_policy_speed(self, max_speed):
31 |         self.policy.max_speed = max_speed
32 | 
33 |     # A function to set the time horizon of the human policy
34 |     def set_policy_horizon(self, time_horizon):
35 |         self.policy.time_horizon = time_horizon
36 | 
37 |     # A function to set the safety space of the human policy
38 |     def set_policy_safety(self, safety_space):
39 |         self.policy.safety_space = safety_space
40 | 
41 |     # A function to set the max number of neighbors considered by the human policy
42 |     def set_policy_neighbors(self, max_neighbors):
43 |         self.policy.max_neighbors = max_neighbors
44 | 
45 |     def act(self, ob):
46 |         state = JointState(self.get_full_state(), ob)
47 |         action = self.policy.predict(state)
48 | 
49 |         # We assume a fully-unpredictable person will have this distribution of potential actions
50 |         noise = normal(scale=self.v_pref, size=2)
51 | 
52 |         if isinstance(action, ActionXY):
53 |             return ActionXY(self.epsilon*noise[0] + (1-self.epsilon)*action.vx, self.epsilon*noise[1] + (1-self.epsilon)*action.vy)
54 |         else:
55 |             return action # NOTE: CANNOT ADD NOISE TO NON-HOLONOMIC ACTION
56 | 


--------------------------------------------------------------------------------
/simulation/envs/utils/info.py:
--------------------------------------------------------------------------------
 1 | class Timeout(object):
 2 |     def __init__(self):
 3 |         pass
 4 | 
 5 |     def __str__(self):
 6 |         return 'Timeout'
 7 | 
 8 | 
 9 | class ReachGoal(object):
10 |     def __init__(self):
11 |         pass
12 | 
13 |     def __str__(self):
14 |         return 'Reaching goal'
15 | 
16 | 
17 | class Danger(object):
18 |     def __init__(self, min_dist):
19 |         self.min_dist = min_dist
20 | 
21 |     def __str__(self):
22 |         return 'Too close'
23 | 
24 | 
25 | class HumanCollision(object):
26 |     def __init__(self, position, human=None):
27 |         self.coll_pos = position
28 |         self.human = human
29 | 
30 |     def __str__(self):
31 |         return 'Human collision'
32 | 
33 | 
34 | class Nothing(object):
35 |     def __init__(self):
36 |         pass
37 | 
38 |     def __str__(self):
39 |         return ''
40 | 


--------------------------------------------------------------------------------
/simulation/envs/utils/robot.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from numpy.linalg import norm
 3 | from shapely.geometry import Point
 4 | 
 5 | from simulation.envs.utils.agent import Agent
 6 | from simulation.envs.policy.orca import ORCA
 7 | from simulation.envs.utils.state import JointState, FullState
 8 | 
 9 | class Robot(Agent):
10 |     def __init__(self, config, section):
11 |         super().__init__(config, section)
12 | 
13 |         # Waypoints
14 |         self.waypoints = None
15 |         self.wypt_idx = None
16 |         self.wypt_radius = None
17 |         self.wx = self.gx
18 |         self.wy = self.gy
19 | 
20 |     def set_waypoints(self, waypoints, wypt_radius):
21 |         self.waypoints = waypoints
22 |         self.wypt_idx = 0
23 |         self.wypt_radius = wypt_radius
24 |         self.wx = waypoints[0][0]
25 |         self.wy = waypoints[0][1]
26 | 
27 |     def set_safety(self, safety):
28 |         self.safety = safety
29 | 
30 |     def check_waypoint(self, ob, walls, wx, wy):
31 |         if walls is not None:
32 |             if walls.intersects(Point((wx, wy)).buffer(self.radius + 0.2)):
33 |                 return False
34 |         return True
35 | 
36 |     def get_full_state(self, ob=None):
37 |         return FullState(self.px, self.py, self.vx, self.vy, self.radius, self.gx, self.gy, self.v_pref, self.theta, wx=self.wx, wy=self.wy)
38 | 
39 |     def act(self, ob, eps=None, walls=None):
40 |         if self.policy is None:
41 |             raise AttributeError('Policy attribute has to be set!')
42 |         state = JointState(self.get_full_state(ob), ob)
43 | 
44 |         # Update the next waypoint if necessary
45 |         if self.waypoints:
46 |             dist_to_waypoint = norm([self.px - self.wx, self.py - self.wy])
47 |             while dist_to_waypoint < self.wypt_radius and self.wypt_idx + 1 < len(self.waypoints):
48 |                 self.wypt_idx += 1
49 |                 self.wx = self.waypoints[self.wypt_idx][0]
50 |                 self.wy = self.waypoints[self.wypt_idx][1]
51 |                 dist_to_waypoint = norm([self.px - self.wx, self.py - self.wy])
52 |             # Move the waypoint side to side if it is too close to an obstacle.
53 |             shift_dir = np.array([self.wy - self.py, self.px - self.wx])
54 |             shift_dir = shift_dir/np.linalg.norm(shift_dir)
55 |             shift_attempts = 1
56 |             while not self.check_waypoint(ob, walls, self.wx, self.wy):
57 |                 self.wx += ((-1)**shift_attempts)*shift_attempts*0.1*shift_dir[0]
58 |                 self.wy += ((-1)**shift_attempts)*shift_attempts*0.1*shift_dir[1]
59 |                 shift_attempts += 1
60 | 
61 |         if self.policy.uncertainty:
62 |             action = self.policy.predict(state, eps)
63 |         else:
64 |             action = self.policy.predict(state)
65 |         return action
66 | 


--------------------------------------------------------------------------------
/simulation/envs/utils/scenarios.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import numpy as np
  3 | import numpy.linalg as la
  4 | 
  5 | MAX_TRIES = 1000000
  6 | 
  7 | class GenerateHumans():
  8 |     def __init__(self, room_dims, discomfort_dist, goal_radius, seed):
  9 |         np.random.seed(seed)
 10 |         self.room_dims = room_dims
 11 |         self.goal_radius = goal_radius
 12 |         self.discomfort_dist = 0.1
 13 | 
 14 |     def generate_circle_human(self, human, robot, humans):
 15 |         # set radius of circle
 16 |         circle_radius = np.minimum(self.room_dims[0], self.room_dims[1]) / 2 - 1
 17 | 
 18 |         # choose initial human position on circumference of circle
 19 |         count, collide = 0, True
 20 |         while collide == True:
 21 |             angle = np.random.random() * np.pi * 2
 22 |             px_noise = np.random.random() - 0.5
 23 |             py_noise = np.random.random() - 0.5
 24 |             px = circle_radius * np.cos(angle) + px_noise
 25 |             py = circle_radius * np.sin(angle) + py_noise
 26 |             collide = False
 27 |             if not collide:
 28 |                 for agent in [robot] + humans:
 29 |                     min_dist = human.radius + agent.radius + self.discomfort_dist
 30 |                     if la.norm((px - agent.px, py - agent.py)) < min_dist:
 31 |                         collide = True
 32 |                         count += 1
 33 |                         break
 34 |             if count > MAX_TRIES:
 35 |                 logging.info('Unable to place human in {} tries'.format(MAX_TRIES))
 36 |                 return None
 37 |             if not collide:
 38 |                 break
 39 | 
 40 |         # choose human goal to be opposite of initial position on circle
 41 |         human.set(px, py, -px, -py, 0, 0, 0)
 42 |         return human
 43 | 
 44 |     def generate_perpendicular_human(self, human, robot, humans):
 45 |         # choose initial human position on side of room
 46 |         sign = 1 if np.random.random() > 0.5 else -1
 47 |         count, collide = 0, True
 48 |         while collide == True:
 49 |             px_noise = np.random.random() - 0.5
 50 |             py_noise = np.random.random() - 0.5
 51 |             px = (self.room_dims[0]/2 - 1) * sign + px_noise
 52 |             py = (np.random.random() - 0.5) * (self.room_dims[1] - 2) + py_noise
 53 |             collide = False
 54 |             if not collide:
 55 |                 for agent in [robot] + humans:
 56 |                     min_dist = human.radius + agent.radius + self.discomfort_dist
 57 |                     if la.norm((px - agent.px, py - agent.py)) < min_dist:
 58 |                         collide = True
 59 |                         count += 1
 60 |                         break
 61 |             if count > MAX_TRIES:
 62 |                 logging.info('Unable to place human in {} tries'.format(MAX_TRIES))
 63 |                 return None
 64 |             if not collide:
 65 |                 break
 66 | 
 67 |         # choose human goal position on opposite side of room
 68 |         count, collide = 0, True
 69 |         while collide == True:
 70 |             gx_noise = np.random.random() - 0.5
 71 |             gy_noise = np.random.random() - 0.5
 72 |             gx = (self.room_dims[0]/2 - 1) * -sign + gx_noise
 73 |             gy = (np.random.random() - 0.5) * (self.room_dims[1] - 2) + gy_noise
 74 |             collide = False
 75 |             if not collide:
 76 |                 for agent in [robot] + humans:
 77 |                     min_dist = human.radius + agent.radius + self.discomfort_dist
 78 |                     if la.norm((px - agent.px, py - agent.py)) < min_dist:
 79 |                         collide = True
 80 |                         count += 1
 81 |                         break
 82 |             if count > MAX_TRIES:
 83 |                 logging.info('Unable to place human in {} tries'.format(MAX_TRIES))
 84 |                 return None
 85 |             if not collide:
 86 |                 break
 87 | 
 88 |         human.set(px, py, gx, gy, 0, 0, 0)
 89 |         return human
 90 | 
 91 |     def generate_opposite_human(self, human, robot, humans):
 92 |         # choose initial human position at top of room
 93 |         count, collide = 0, True
 94 |         while collide == True:
 95 |             px_noise = np.random.random() - 0.5
 96 |             py_noise = np.random.random() - 0.5
 97 |             px = (np.random.random() - 0.5) * (self.room_dims[0] - 2) + px_noise
 98 |             py = self.room_dims[1]/2 - 1 + py_noise
 99 |             collide = False
100 |             if not collide:
101 |                 for agent in [robot] + humans:
102 |                     min_dist = human.radius + agent.radius + self.discomfort_dist
103 |                     if la.norm((px - agent.px, py - agent.py)) < min_dist:
104 |                         collide = True
105 |                         count += 1
106 |                         break
107 |             if count > MAX_TRIES:
108 |                 logging.info('Unable to place human in {} tries'.format(MAX_TRIES))
109 |                 return None
110 |             if not collide:
111 |                 break
112 | 
113 |         # choose human goal position at bottom of room
114 |         count, collide = 0, True
115 |         while collide == True:
116 |             gx_noise = np.random.random() - 0.5
117 |             gy_noise = np.random.random() - 0.5
118 |             gx = (np.random.random() - 0.5) * (self.room_dims[0] - 2) + gx_noise
119 |             gy = -(self.room_dims[1]/2 - 1) + gy_noise
120 |             collide = False
121 |             if not collide:
122 |                 for agent in [robot] + humans:
123 |                     min_dist = human.radius + agent.radius + self.discomfort_dist
124 |                     if la.norm((px - agent.px, py - agent.py)) < min_dist:
125 |                         collide = True
126 |                         count += 1
127 |                         break
128 |             if count > MAX_TRIES:
129 |                 logging.info('Unable to place human in {} tries'.format(MAX_TRIES))
130 |                 return None
131 |             if not collide:
132 |                 break
133 | 
134 |         human.set(px, py, gx, gy, 0, 0, 0)
135 |         return human
136 | 
137 |     def generate_same_human(self, human, robot, humans):
138 |         # choose initial human position at bottom of room
139 |         count, collide = 0, True
140 |         while collide == True:
141 |             px_noise = np.random.random() - 0.5
142 |             py_noise = np.random.random() - 0.5
143 |             px = (np.random.random() - 0.5) * (self.room_dims[0] - 2) + px_noise
144 |             py = -(self.room_dims[1]/2 - 1) + py_noise
145 |             collide = False
146 |             if not collide:
147 |                 for agent in [robot] + humans:
148 |                     min_dist = human.radius + agent.radius + self.discomfort_dist
149 |                     if la.norm((px - agent.px, py - agent.py)) < min_dist:
150 |                         collide = True
151 |                         count += 1
152 |                         break
153 |             if count > MAX_TRIES:
154 |                 logging.info('Unable to place human in {} tries'.format(MAX_TRIES))
155 |                 return None
156 |             if not collide:
157 |                 break
158 | 
159 |         # choose human goal position at top of room
160 |         count, collide = 0, True
161 |         while collide == True:
162 |             gx_noise = np.random.random() - 0.5
163 |             gy_noise = np.random.random() - 0.5
164 |             gx = (np.random.random() - 0.5) * (self.room_dims[0] - 2) + gx_noise
165 |             gy = self.room_dims[1]/2 - 1 + gy_noise
166 |             collide = False
167 |             if not collide:
168 |                 for agent in [robot] + humans:
169 |                     min_dist = human.radius + agent.radius + self.discomfort_dist
170 |                     if la.norm((px - agent.px, py - agent.py)) < min_dist:
171 |                         collide = True
172 |                         count += 1
173 |                         break
174 |             if count > MAX_TRIES:
175 |                 logging.info('Unable to place human in {} tries'.format(MAX_TRIES))
176 |                 return None
177 |             if not collide:
178 |                 break
179 | 
180 |         human.set(px, py, gx, gy, 0, 0, 0)
181 |         return human
182 | 
183 |     def generate_random_human(self, human, robot, humans):
184 |         # choose random initial human position
185 |         count, collide = 0, True
186 |         while collide == True:
187 |             px = np.random.random() * self.room_dims[0] - self.room_dims[0]/2
188 |             py = np.random.random() * self.room_dims[1] - self.room_dims[1]/2
189 |             collide = False
190 |             if not collide:
191 |                 for agent in [robot] + humans:
192 |                     min_dist = human.radius + agent.radius + self.discomfort_dist
193 |                     if la.norm((px - agent.px, py - agent.py)) < min_dist:
194 |                         collide = True
195 |                         count += 1
196 |                         break
197 |             if count > MAX_TRIES:
198 |                 logging.info('Unable to place human in {} tries'.format(MAX_TRIES))
199 |                 return None
200 |             if not collide:
201 |                 break
202 | 
203 |         # choose random human goal position
204 |         count, collide = 0, True
205 |         while collide == True:
206 |             gx = np.random.random() * self.room_dims[0] - self.room_dims[0]/2
207 |             gy = np.random.random() * self.room_dims[1] - self.room_dims[1]/2
208 |             collide = False
209 |             if not collide:
210 |                 for agent in [robot] + humans:
211 |                     min_dist = human.radius + agent.radius + self.discomfort_dist
212 |                     if la.norm((gx - agent.gx, gy - agent.gy)) < min_dist:
213 |                         collide = True
214 |                         count += 1
215 |                         break
216 |             if count > MAX_TRIES:
217 |                 logging.info('Unable to place human in {} tries'.format(MAX_TRIES))
218 |                 return None
219 | 
220 |         human.set(px, py, gx, gy, 0, 0, 0)
221 |         return human
222 | 


--------------------------------------------------------------------------------
/simulation/envs/utils/state.py:
--------------------------------------------------------------------------------
 1 | class FullState(object):
 2 |     def __init__(self, px, py, vx, vy, radius, gx, gy, v_pref, theta, wx=None, wy=None):
 3 |         self.px = px
 4 |         self.py = py
 5 |         self.vx = vx
 6 |         self.vy = vy
 7 |         self.radius = radius
 8 |         self.gx = gx
 9 |         self.gy = gy
10 |         self.v_pref = v_pref
11 |         self.theta = theta
12 | 
13 |         if wx is not None and wy is not None:
14 |             self.wx = wx
15 |             self.wy = wy
16 |         else:
17 |             self.wx = gx
18 |             self.wy = gy
19 | 
20 |         self.position = (self.px, self.py)
21 |         self.goal_position = (self.gx, self.gy)
22 |         self.next_waypoint = (self.wx, self.wy)
23 |         self.velocity = (self.vx, self.vy)
24 | 
25 |     def __add__(self, other):
26 |         return other + (self.px, self.py, self.vx, self.vy, self.radius, self.gx, self.gy, self.v_pref, self.theta)
27 | 
28 |     def __str__(self):
29 |         return ' '.join([str(x) for x in [self.px, self.py, self.vx, self.vy, self.radius, self.gx, self.gy,
30 |                                           self.v_pref, self.theta]])
31 | 
32 | 
33 | class ObservableState(object):
34 |     def __init__(self, px, py, vx, vy, radius):
35 |         self.px = px
36 |         self.py = py
37 |         self.vx = vx
38 |         self.vy = vy
39 |         self.radius = radius
40 | 
41 |         self.position = (self.px, self.py)
42 |         self.velocity = (self.vx, self.vy)
43 | 
44 |     def __add__(self, other):
45 |         return other + (self.px, self.py, self.vx, self.vy, self.radius)
46 | 
47 |     def __str__(self):
48 |         return ' '.join([str(x) for x in [self.px, self.py, self.vx, self.vy, self.radius]])
49 | 
50 | 
51 | class JointState(object):
52 |     def __init__(self, self_state, human_states):
53 |         assert isinstance(self_state, FullState)
54 |         for human_state in human_states:
55 |             assert isinstance(human_state, ObservableState)
56 | 
57 |         self.self_state = self_state
58 |         self.human_states = human_states
59 | 


--------------------------------------------------------------------------------
/uncertainty/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sarapohland/stranger-danger/84a6bad96de4f117d6192b5111eba5531e766e26/uncertainty/__init__.py


--------------------------------------------------------------------------------
/uncertainty/collect_data.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import gym
  3 | import time
  4 | import torch
  5 | import shutil
  6 | import logging
  7 | import argparse
  8 | import configparser
  9 | import numpy as np
 10 | import pandas as pd
 11 | 
 12 | from control.policy.policy_factory import policy_factory
 13 | from simulation.envs.utils.robot import Robot
 14 | 
 15 | from tqdm import tqdm
 16 | 
 17 | 
 18 | def run_k_episodes(env, robot, k, offset, param):
 19 |     # Instantiate dicts to store our simulation data in.
 20 |     # 1) A dict to store all the positions of all the actors at every time step for each trial.
 21 |     positions_table = {"trial":[],
 22 |                        "time":[],
 23 |                        "robot":[],
 24 |                        "humans":[]}
 25 |     # 2) A dict to store the policy settings for each actor in each trial.
 26 |     agents_table = {"trial":[],
 27 |                     "actor":[],
 28 |                     "radius":[],
 29 |                     "v_pref":[],
 30 |                     "epsilon":[],
 31 |                     "start":[],
 32 |                     "goal":[],
 33 |                     "discomfort_dist":[],
 34 |                     "policy":[],
 35 |                     "policy_params":[]}
 36 |     # 3) A dict to store the environment configuration for each trial.
 37 |     scenarios_table = {"trial":[],
 38 |                        "scenario":[],
 39 |                        "perpetual":[],
 40 |                        "num_humans":[],
 41 |                        "room_size":[]}
 42 | 
 43 |     for i in tqdm(range(k)):
 44 |         done = False
 45 |         np.random.seed(i)
 46 |         ob = env.reset('test')
 47 |         if param is not None:
 48 |             if param == 'epsilon':
 49 |                 for human in env.humans:
 50 |                     human.set_epsilon(np.random.uniform(0,1))
 51 |             elif param == 'speed':
 52 |                 for human in env.humans:
 53 |                     human.set_policy_speed(np.random.uniform(0,2))
 54 |             elif param == 'horizon':
 55 |                 for human in env.humans:
 56 |                     human.set_policy_horizon(np.random.uniform(0,3))
 57 |             elif param == 'safety':
 58 |                 for human in env.humans:
 59 |                     human.set_policy_safety(np.random.uniform(0,1))
 60 |             elif param == 'neighbors':
 61 |                 for human in env.humans:
 62 |                     human.set_policy_neighbors(round(np.random.uniform(0,20)))
 63 |             else:
 64 |                 raise ValueError('Unknown human parameter to vary')
 65 | 
 66 |         # Store environment configuration for this trial.
 67 |         scenarios_table["trial"].append(i + offset)
 68 |         scenarios_table["scenario"].append(env.scenario)
 69 |         scenarios_table["perpetual"].append(env.perpetual)
 70 |         scenarios_table["num_humans"].append(len(env.humans))
 71 |         scenarios_table["room_size"].append(env.room_dims)
 72 | 
 73 |         # Store robot policy settings for this trial.
 74 |         agents_table["trial"].append(i + offset)
 75 |         agents_table["actor"].append("robot")
 76 |         agents_table["radius"].append(env.robot.radius)
 77 |         agents_table["v_pref"].append(env.robot.v_pref)
 78 |         agents_table["epsilon"].append(0)
 79 |         agents_table["start"].append(env.robot.get_position())
 80 |         agents_table["goal"].append(env.robot.get_goal_position())
 81 |         agents_table["discomfort_dist"].append(env.discomfort_dist)
 82 |         agents_table["policy"].append(env.robot.policy.name)
 83 |         agents_table["policy_params"].append(env.robot.policy.get_params())
 84 | 
 85 |         # Store human policy settings for this trial.
 86 |         agents_table["trial"].append(i + offset)
 87 |         agents_table["actor"].append("humans")
 88 |         temp_dict = {"radius":[],
 89 |                      "v_pref":[],
 90 |                      "epsilon":[],
 91 |                      "start":[],
 92 |                      "goal":[],
 93 |                      "discomfort_dist":[],
 94 |                      "policy":[],
 95 |                      "policy_params":[]}
 96 |         for aHuman in env.humans:
 97 |             temp_dict["radius"].append(aHuman.radius)
 98 |             temp_dict["v_pref"].append(aHuman.v_pref)
 99 |             temp_dict["epsilon"].append(aHuman.epsilon)
100 |             temp_dict["start"].append(aHuman.get_position())
101 |             temp_dict["goal"].append(aHuman.get_goal_position())
102 |             temp_dict["discomfort_dist"].append(env.discomfort_dist) # !! This might change to be agent-specific
103 |             temp_dict["policy"].append(aHuman.policy.name)
104 |             temp_dict["policy_params"].append(aHuman.policy.get_params())
105 |         for aKey in temp_dict:
106 |             agents_table[aKey].append(temp_dict[aKey])
107 | 
108 |         # Store first time step of positions for our agents.
109 |         positions_table["trial"].append(i + offset)
110 |         positions_table["time"].append(env.global_time)
111 |         positions_table["robot"].append(robot.get_position())
112 |         temp_list = []
113 |         for aHuman in env.humans:
114 |             temp_list.append(aHuman.get_position())
115 |         positions_table["humans"].append(temp_list)
116 | 
117 |         while not done:
118 |             action = robot.act(ob)
119 |             ob, reward, done, info = env.step(action)
120 | 
121 |             # Record the positions of our agents.
122 |             positions_table["trial"].append(i + offset)
123 |             positions_table["time"].append(env.global_time)
124 |             positions_table["robot"].append(robot.get_position())
125 |             temp_list = []
126 |             for aHuman in env.humans:
127 |                 temp_list.append(aHuman.get_position())
128 |             positions_table["humans"].append(temp_list)
129 | 
130 |     # Convert our dictionaries to pandas dataframes
131 |     positions_df = pd.DataFrame(positions_table)
132 |     agents_df = pd.DataFrame(agents_table)
133 |     scenarios_df = pd.DataFrame(scenarios_table)
134 |     return positions_df, agents_df, scenarios_df
135 | 
136 | 
137 | def main():
138 |     parser = argparse.ArgumentParser('Parse configuration file')
139 |     parser.add_argument('--policy', type=str, default='orca')
140 |     parser.add_argument('--model_dir', type=str, default=None)
141 |     parser.add_argument('--output_dir', type=str, default='data/')
142 |     parser.add_argument('--env_config', type=str, default='../control/configs/env-train.config')
143 |     parser.add_argument('--policy_config', type=str, default='../control/configs/sarl/policy.config')
144 |     parser.add_argument('--num_episodes', type=int, default=100)
145 |     parser.add_argument('--vary_param', type=str, default=None)
146 |     args = parser.parse_args()
147 | 
148 |     # Read model weights
149 |     if args.model_dir is not None:
150 |         env_config_file = os.path.join(args.model_dir, os.path.basename(args.env_config))
151 |         policy_config_file = os.path.join(args.model_dir, os.path.basename(args.policy_config))
152 |         if args.il:
153 |             model_weights = os.path.join(args.model_dir, 'il_model.pth')
154 |         else:
155 |             if os.path.exists(os.path.join(args.model_dir, 'resumed_rl_model.pth')):
156 |                 model_weights = os.path.join(args.model_dir, 'resumed_rl_model.pth')
157 |             else:
158 |                 model_weights = os.path.join(args.model_dir, 'rl_model.pth')
159 |     else:
160 |         env_config_file = args.env_config
161 |         policy_config_file = args.policy_config
162 | 
163 |     # Configure paths
164 |     make_new_dir = True
165 |     if os.path.exists(args.output_dir):
166 |         key = input('Output directory already exists! Overwrite the folder? (y/n)')
167 |         if key == 'y':
168 |             shutil.rmtree(args.output_dir)
169 |         else:
170 |             make_new_dir = False
171 |             args.train_config = os.path.join(args.output_dir, os.path.basename(args.train_config))
172 |     if make_new_dir:
173 |         os.makedirs(args.output_dir)
174 | 
175 |     log_file = os.path.join(args.output_dir, 'output.log')
176 |     agents_file = os.path.join(args.output_dir, 'agents.csv')
177 |     scenarios_file = os.path.join(args.output_dir, 'scenarios.csv')
178 |     positions_file = os.path.join(args.output_dir, 'positions.csv')
179 | 
180 |     # Configure logging and device
181 |     logging.basicConfig(level=logging.INFO, format='%(asctime)s, %(levelname)s: %(message)s',
182 |                         datefmt="%Y-%m-%d %H:%M:%S", filename=log_file, filemode='a')
183 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
184 |     logging.info('Using device: %s', device)
185 | 
186 |     # Configure policy
187 |     policy = policy_factory[args.policy]()
188 |     policy_config = configparser.RawConfigParser()
189 |     policy_config.read(policy_config_file)
190 |     policy.configure(policy_config)
191 |     if policy.trainable:
192 |         if args.model_dir is None:
193 |             parser.error('Trainable policy must be specified with a model weights directory')
194 |         policy.get_model().load_state_dict(torch.load(model_weights))
195 | 
196 |     # Configure environment
197 |     env_config = configparser.RawConfigParser()
198 |     env_config.read(env_config_file)
199 |     env = gym.make('CrowdSim-v0')
200 |     env.configure(env_config)
201 |     robot = Robot(env_config, 'robot')
202 |     robot.set_policy(policy)
203 |     robot.policy.set_phase('test')
204 |     env.set_robot(robot)
205 | 
206 |     # Initialize policy
207 |     policy.set_phase('test')
208 |     policy.set_device(device)
209 |     policy.set_env(env)
210 |     robot.print_info()
211 | 
212 |     # Run n trials in each scenario
213 |     trials, offset = args.num_episodes, 0
214 |     scenarios  = ['circle', 'perpedendicular', 'opposite', 'same', 'random', 'random']
215 |     perpetuals = [False, False, False, False, False, True]
216 |     positions_frames, agents_frames, scenarios_frames = [], [], []
217 |     print("Starting data collection for {} different pedestrian scenarios..".format(len(scenarios)))
218 |     for cur_ind, (scenario, perpetual) in enumerate(zip(scenarios, perpetuals)):
219 |         print("({} of {}) Running {} trials of '{}' scenario with perpetual = {}.".format(cur_ind+1, len(scenarios), trials, scenario, perpetual))
220 |         env.scenario, env.perpetual = scenario, perpetual
221 |         positions_frame, agents_frame, scenarios_frame = run_k_episodes(env, robot, trials, offset, args.vary_param)
222 |         positions_frames += [positions_frame]
223 |         agents_frames += [agents_frame]
224 |         scenarios_frames += [scenarios_frame]
225 |         offset += trials
226 | 
227 |     # Save pedestrian data to csv
228 |     positions_df = pd.concat(positions_frames)
229 |     agents_df = pd.concat(agents_frames)
230 |     scenarios_df = pd.concat(scenarios_frames)
231 |     positions_df.to_csv(positions_file, index=False)
232 |     agents_df.to_csv(agents_file, index=False)
233 |     scenarios_df.to_csv(scenarios_file, index=False)
234 | 
235 |     print("Saved results in {}, {}, and {}.".format(positions_file, agents_file, scenarios_file))
236 | 
237 | 
238 | if __name__ == '__main__':
239 |     main()
240 | 


--------------------------------------------------------------------------------
/uncertainty/configs/train.config:
--------------------------------------------------------------------------------
 1 | [features]
 2 | time_size = 1
 3 | step_size = 3
 4 | labels = speed, accel
 5 | num_neighbors = 0
 6 | density_radii = 0
 7 | 
 8 | [network]
 9 | hidden_dims = 150, 100, 100, 100, 50
10 | nonlinearity = relu
11 | 
12 | [optimizer]
13 | optimizer = adam
14 | learning_rate = 0.001
15 | momentum = 0
16 | betas = 0.9, 0.999
17 | epsilon = 1e-8
18 | weight_decay = 0
19 | 
20 | [training]
21 | batch_size = 64
22 | epochs = 300


--------------------------------------------------------------------------------
/uncertainty/estimate_epsilons.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | import pickle
 4 | import numpy as np
 5 | import configparser
 6 | 
 7 | from torch import nn
 8 | from torch.utils.data import Dataset, DataLoader
 9 | from sklearn.preprocessing import StandardScaler
10 | 
11 | from uncertainty.network import Net
12 | 
13 | 
14 | class Data(Dataset):
15 |   def __init__(self, X_train, y_train):
16 |     self.X = torch.from_numpy(X_train.astype(np.float32))
17 |     self.y = torch.from_numpy(y_train.astype(np.float32))
18 |     self.len = self.X.shape[0]
19 | 
20 |   def __getitem__(self, index):
21 |     return self.X[index], self.y[index]
22 | 
23 |   def __len__(self):
24 |     return self.len
25 | 
26 | def get_Xy(positions, dt, time_size):
27 |     # Obtain speed and velocity from pedestrian
28 |     pos = np.array(positions).T
29 |     vel = np.diff(pos) / dt
30 |     acc = np.diff(vel) / dt
31 |     speed = np.linalg.norm(vel, axis=0)[1:]
32 |     accel = np.linalg.norm(acc, axis=0)
33 | 
34 |     # Compute the number of features in each sample
35 |     sample_size = 2 * time_size + 1
36 | 
37 |     # Determine how many samples we can use
38 |     num_samples = len(speed) - time_size + 1
39 | 
40 |     # Create an empty array to fill with samples
41 |     X = np.zeros((num_samples, sample_size,))
42 |     y = np.zeros((num_samples,))
43 | 
44 |     # Form each sample
45 |     for a_sample in range(num_samples):
46 |         # The features alternate speed and acceleration values
47 |         for a_time in range(time_size):
48 |             X[a_sample, a_time*2] = speed[a_sample + a_time]
49 |             X[a_sample, a_time*2 + 1] = accel[a_sample + a_time]
50 |     return X, y
51 | 
52 | def get_pred(X, y, model_dir):
53 |     # Obtain model properties
54 |     model_file = "{}/model.pth".format(model_dir)
55 |     train_file = "{}/train.config".format(model_dir)
56 |     train_config = configparser.RawConfigParser()
57 |     train_config.read(train_file)
58 |     hidden_dims = [int(x) for x in train_config.get('network', 'hidden_dims').split(', ')]
59 |     if hidden_dims == [0]: hidden_dims = []
60 |     nonlinearity = train_config.get('network', 'nonlinearity')
61 | 
62 |     # Normalize the input data
63 |     scale_file = "{}/scaler.pkl".format(model_dir)
64 |     with open(scale_file, 'rb') as scale_boi:
65 |         scaler = pickle.load(scale_boi)
66 |     X = scaler.transform(X)
67 | 
68 |     # Create a data loader
69 |     test_data = Data(X, y)
70 | 
71 |     # Load in neural network model
72 |     input_dim = X.shape[1]
73 |     model = Net(input_dim, hidden_dims, nonlinearity)
74 |     model.load_state_dict(torch.load(model_file))
75 |     model.eval()
76 | 
77 |     # Make our predictions
78 |     pred = model(test_data.X).detach().numpy()
79 |     return np.clip(np.array(pred), 0, 1)
80 | 
81 | # Estimate uncertainty values from human position data
82 | def estimate_epsilons(all_positions, dt):
83 |     eps_pred = []
84 |     for i, positions in enumerate(all_positions):
85 |         time_size = np.minimum(20, len(positions) - 2)
86 |         if time_size < 1:
87 |             eps_pred.append(0.5)
88 |         else:
89 |             X, y = get_Xy(positions, dt, time_size)
90 |             model_dir = '../uncertainty/models/uncertain_{}'.format(time_size)
91 |             eps_pred.append(get_pred(X, y, model_dir)[-1])
92 |     return np.array(eps_pred)
93 | 


--------------------------------------------------------------------------------
/uncertainty/network.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | 
 5 | # Multilayer perceptron (MLP)
 6 | class Net(nn.Module):
 7 |     def __init__(self, input_dim, hidden_dims, nonlinearity):
 8 |         super().__init__()
 9 |         layers = []
10 |         mlp_dims = [input_dim] + hidden_dims + [1]
11 |         for i in range(len(mlp_dims) - 1):
12 |             layers.append(nn.Linear(mlp_dims[i], mlp_dims[i + 1]))
13 |             if i < len(mlp_dims) - 2:
14 |                 if nonlinearity == 'relu':
15 |                     layers.append(nn.ReLU())
16 |                 elif nonlinearity == 'leaky':
17 |                     layers.append(nn.LeakyReLU())
18 |                 elif nonlinearity == 'sigmoid':
19 |                     layers.append(nn.Sigmoid())
20 |                 elif nonlinearity == 'tanh':
21 |                     layers.append(nn.Tanh())
22 |                 elif nonlinearity == 'gelu':
23 |                     layers.append(nn.GELU())
24 |                 else:
25 |                     print('Warning: unknown activation function')
26 |         self.net = nn.Sequential(*layers)
27 | 
28 |     def forward(self, x):
29 |         z = self.net(x)
30 |         return z.flatten()
31 | 


--------------------------------------------------------------------------------
/uncertainty/preprocess_data.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import ast
  3 | import argparse
  4 | import numpy as np
  5 | import pandas as pd
  6 | from tqdm import tqdm
  7 | 
  8 | 
  9 | def main():
 10 |     parser = argparse.ArgumentParser('Parse configuration file')
 11 |     parser.add_argument('--data_dir', type=str, default='data/')
 12 |     args = parser.parse_args()
 13 | 
 14 |     # Load in our collected data
 15 |     data_dir = args.data_dir
 16 |     scenarios_df = pd.read_csv("{}/scenarios.csv".format(data_dir))
 17 |     agents_df = pd.read_csv("{}/agents.csv".format(data_dir))
 18 |     positions_df = pd.read_csv("{}/positions.csv".format(data_dir))
 19 |     df_list = [scenarios_df, agents_df, positions_df]
 20 | 
 21 |     # Convert strings into lists, tuples, and dicts as necessary
 22 |     def convert_columns(dfBoi):
 23 |         brackets = set(['[', '{', '('])
 24 |         for aKey in dfBoi:
 25 |             dfBoi[aKey] = dfBoi[aKey].apply(lambda x: ast.literal_eval(str(x)) if str(x)[0] in brackets else x)
 26 | 
 27 |     print("Extracting lists, tuples, and dicts from {} dataframes..".format(len(df_list)))
 28 |     for aDF in tqdm(df_list):
 29 |         convert_columns(aDF)
 30 | 
 31 |     # Helper functions for curvature
 32 |     def get_curvature(p0, p1, p2):
 33 |         area = _get_area(p0, p1, p2)
 34 |         d0 = _get_dist(p0, p1)
 35 |         d1 = _get_dist(p1, p2)
 36 |         d2 = _get_dist(p2, p0)
 37 |         return 4*area/(d0*d1*d2)
 38 | 
 39 |     def _get_area(p0, p1, p2):
 40 |         return (p1[0] - p0[0])*(p2[1] - p0[1]) - (p1[1] - p0[1])*(p2[0] - p0[0])
 41 | 
 42 |     def _get_dist(p0, p1):
 43 |         return np.sqrt((p0[0] - p1[0])**2 + (p0[1] - p1[1])**2)
 44 | 
 45 |     # Define a folder to store our results in
 46 |     results_dir = "{}/preprocessed_data".format(data_dir)
 47 | 
 48 |     # Iterate through all the trials and generate data for each human
 49 |     print("Calculating robot and human statistics across {} trials..".format(scenarios_df['trial'].nunique()))
 50 |     trial_list = scenarios_df['trial'].unique()
 51 |     for a_trial in tqdm(trial_list):
 52 |         # Make a folder to store the results for each trial in
 53 |         save_dir = "{}/trial_{}".format(results_dir, a_trial)
 54 |         os.makedirs(save_dir, exist_ok=True)
 55 | 
 56 |         # Grab the timestamps for this trial
 57 |         times = positions_df.loc[positions_df['trial'] == a_trial]['time']
 58 |         dts = times.diff()
 59 | 
 60 |         # Grab human position values for this trial
 61 |         positions = positions_df.loc[positions_df['trial'] == a_trial]
 62 |         humans_xpos = []
 63 |         humans_ypos = []
 64 |         for aRow in range(len(positions)):
 65 |             human_pos = np.array(positions['humans'].iloc[aRow]).T
 66 |             humans_xpos.append(human_pos[0])
 67 |             humans_ypos.append(human_pos[1])
 68 |         humans_xpos = np.array(humans_xpos)
 69 |         humans_ypos = np.array(humans_ypos)
 70 | 
 71 |         # Grab robot position values for this trial
 72 |         robot_pos = np.array(tuple(positions['robot']))
 73 | 
 74 |         # Generate additional data for each human
 75 |         for a_human in range(scenarios_df.loc[scenarios_df['trial'] == a_trial]['num_humans'].iloc[0]):
 76 |             # If we've already calculated values for this human, we can skip it
 77 |             human_file = "{}/{}.csv".format(save_dir, a_human)
 78 |             if os.path.exists(human_file):
 79 |                 continue
 80 | 
 81 |             # Times and positions
 82 |             human_df = pd.DataFrame({'time' : times,
 83 |                                      'dt' : dts,
 84 |                                      'xpos' : humans_xpos[:,a_human],
 85 |                                      'ypos' : humans_ypos[:,a_human]})
 86 | 
 87 |             # Velocity and acceleration (vector)
 88 |             human_df['xvel'] = human_df['xpos'].diff()/human_df['dt']
 89 |             human_df['xacc'] = human_df['xvel'].diff()/human_df['dt']
 90 |             human_df['yvel'] = human_df['ypos'].diff()/human_df['dt']
 91 |             human_df['yacc'] = human_df['yvel'].diff()/human_df['dt']
 92 | 
 93 |             # Distance from the robot
 94 |             human_df['dist'] = np.sqrt(np.square(human_df[['xpos', 'ypos']] - robot_pos).sum(axis=1))
 95 | 
 96 |             # Speed and acceleration (scalar)
 97 |             human_df['speed'] = np.sqrt(np.square(human_df[['xvel', 'yvel']]).sum(axis=1))
 98 |             human_df['accel'] = np.sqrt(np.square(human_df[['xacc', 'yacc']]).sum(axis=1))
 99 | 
100 |             # Curvature
101 |             curves = [0, 0]
102 |             p1 = [human_df['xpos'].iloc[0], human_df['ypos'].iloc[0]]
103 |             p0 = [human_df['xpos'].iloc[1], human_df['ypos'].iloc[1]]
104 |             for curve_row in range(2, len(human_df)):
105 |                 p2 = p1
106 |                 p1 = p0
107 |                 p0 = [human_df['xpos'].iloc[curve_row], human_df['ypos'].iloc[curve_row]]
108 |                 curves.append(get_curvature(p0, p1, p2))
109 |             human_df['curve'] = curves
110 | 
111 |             # Distances to other people
112 |             neighbor_dists = []
113 |             for curve_row in range(len(human_df)):
114 |                 p0 = np.array([[human_df['xpos'].iloc[curve_row], human_df['ypos'].iloc[curve_row]]])
115 |                 other_pos = np.array([humans_xpos[curve_row,:], humans_ypos[curve_row,:]]).T
116 |                 cur_dists = np.sort(np.sqrt(np.square(p0 - other_pos).sum(axis=1)))
117 |                 neighbor_dists.append(list(cur_dists))
118 |             human_df['neighbor_dists'] = neighbor_dists
119 | 
120 |             # Angular acceleration
121 |             ang_acc = [0]
122 |             for ang_row in range(len(human_df)-1):
123 |                 dot_prod = human_df['xvel'].iloc[ang_row] * human_df['xvel'].iloc[ang_row+1] + human_df['yvel'].iloc[ang_row] * human_df['yvel'].iloc[ang_row+1]
124 |                 mag_prod = human_df['speed'].iloc[ang_row] * human_df['speed'].iloc[ang_row+1]
125 |                 ang_acc.append(np.arccos(dot_prod/mag_prod)/human_df['dt'].iloc[ang_row+1])
126 |             human_df['ang_acc'] = ang_acc
127 | 
128 |             # Linear acceleration (i.e. difference in speed)
129 |             human_df['lin_acc'] = human_df['speed'].diff()/human_df['dt']
130 | 
131 |             # Save our calculations to a file
132 |             human_df.to_csv(human_file)
133 | 
134 |     print("Saved results in {}.".format(results_dir))
135 | 
136 | if __name__ == '__main__':
137 |     main()
138 | 


--------------------------------------------------------------------------------
/uncertainty/train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import ast
  3 | import sys
  4 | import torch
  5 | import shutil
  6 | import pickle
  7 | import logging
  8 | import argparse
  9 | import numpy as np
 10 | import configparser
 11 | import pandas as pd
 12 | from tqdm import tqdm
 13 | import matplotlib.pyplot as plt
 14 | 
 15 | from torch import nn
 16 | from torch.utils.data import Dataset, DataLoader
 17 | from sklearn.preprocessing import StandardScaler
 18 | from sklearn.model_selection import train_test_split
 19 | from scipy.signal import savgol_filter
 20 | 
 21 | from uncertainty.network import Net
 22 | 
 23 | 
 24 | class Data(Dataset):
 25 |   def __init__(self, X_train, y_train):
 26 |     self.X = torch.from_numpy(X_train.astype(np.float32))
 27 |     self.y = torch.from_numpy(y_train.astype(np.float32))
 28 |     self.len = self.X.shape[0]
 29 | 
 30 |   def __getitem__(self, index):
 31 |     return self.X[index], self.y[index]
 32 | 
 33 |   def __len__(self):
 34 |     return self.len
 35 | 
 36 | # Convert strings in a DataFrame into lists, tuples, and dicts as necessary
 37 | def convert_columns(dfBoi):
 38 |     brackets = set(['[', '{', '('])
 39 |     for aKey in dfBoi:
 40 |         dfBoi[aKey] = dfBoi[aKey].apply(lambda x: ast.literal_eval(str(x)) if str(x)[0] in brackets else x)
 41 | 
 42 | def extract_data(data_dir, time_size, step_size, labels, num_neighbors, density_radii):
 43 |     # Load in our collected and preprocessed data, and convert lists accordingly
 44 |     scenarios_df = pd.read_csv("{}/scenarios.csv".format(data_dir))
 45 |     agents_df = pd.read_csv("{}/agents.csv".format(data_dir))
 46 |     df_list = [scenarios_df, agents_df]
 47 |     print("Extracting lists, tuples, and dicts from {} dataframes..".format(len(df_list)))
 48 |     for aDF in tqdm(df_list):
 49 |         convert_columns(aDF)
 50 | 
 51 |     # Instantiate our resulting matrices X and y
 52 |     X = []
 53 |     y = []
 54 | 
 55 |     # Calculate the length of each sample
 56 |     block_size = time_size*len(labels)
 57 |     sample_size = block_size + num_neighbors + len(density_radii)
 58 | 
 59 |     # Iterate through all of our trials.
 60 |     print("Generating training data from {} trials..".format(scenarios_df['trial'].nunique()))
 61 |     trial_list = scenarios_df['trial'].unique()
 62 |     for a_trial in tqdm(trial_list):
 63 |         # Grab the epsilon values for all the humans in this trial
 64 |         human_epsilons = agents_df['epsilon'].loc[(agents_df['trial'] == a_trial) & (agents_df['actor'] == 'humans')].iloc[0]
 65 | 
 66 |         # Iterate through each human in this trial
 67 |         for a_human in range(scenarios_df.loc[scenarios_df['trial'] == a_trial]['num_humans'].iloc[0]):
 68 |             # Load in the human data and convert lists accordingly
 69 |             human_df = pd.read_csv("{}/preprocessed_data/trial_{}/{}.csv".format(data_dir, a_trial, a_human))
 70 |             convert_columns(human_df)
 71 | 
 72 |             # Calculate the number of samples we can extract from this human
 73 |             num_rows = len(human_df) - 2
 74 |             num_samples = 0 if num_rows < time_size else (num_rows - time_size)//step_size + 1
 75 | 
 76 |             # Generate each sample
 77 |             for sample_num in range(num_samples):
 78 |                 # Instantiate a numpy array to hold this sample
 79 |                 a_sample = np.zeros(sample_size)
 80 | 
 81 |                 # Populate our array values
 82 |                 start_row = 2 + sample_num*step_size
 83 |                 a_sample[:block_size] = human_df[labels].iloc[start_row:start_row + time_size].to_numpy().flatten()
 84 |                 for a_neighbor in range(min(num_neighbors, len(human_df['neighbor_dists'][start_row]) - 1)):
 85 |                     a_sample[block_size + a_neighbor] = 1/human_df['neighbor_dists'][start_row][a_neighbor+1]
 86 |                 for rad_num, a_radius in enumerate(density_radii):
 87 |                     a_sample[block_size + num_neighbors + rad_num] = sum([a_dist < a_radius for a_dist in human_df['neighbor_dists'][start_row]])
 88 | 
 89 |                 X.append(a_sample)
 90 |                 y.append(human_epsilons[a_human])
 91 | 
 92 |     # Convert X and y into numpy arrays before returning
 93 |     X = np.array(X)
 94 |     y = np.array(y)
 95 | 
 96 |     return X, y
 97 | 
 98 | def train(dataloader, model, criterion, optimizer, device):
 99 |     model.train()
100 |     train_loss = 0
101 |     size = len(dataloader.dataset)
102 | 
103 |     for X, y in dataloader:
104 |         X, y = X.to(device), y.to(device)
105 |         pred = model(X)
106 |         loss = criterion(pred, y)
107 |         train_loss += loss.item()
108 |         optimizer.zero_grad()
109 |         loss.backward()
110 |         optimizer.step()
111 | 
112 |     avg_loss = train_loss / len(dataloader)
113 |     logging.info('Average training loss: %f', avg_loss)
114 |     return avg_loss
115 | 
116 | def test(dataloader, model, criterion, device):
117 |     model.eval()
118 |     test_loss = 0
119 |     size = len(dataloader.dataset)
120 | 
121 |     with torch.no_grad():
122 |         for X, y in dataloader:
123 |             X, y = X.to(device), y.to(device)
124 |             pred = model(X)
125 |             test_loss += criterion(pred, y).item()
126 | 
127 |     avg_loss = test_loss / len(dataloader)
128 |     logging.info('Average test loss: %f', avg_loss)
129 |     return avg_loss
130 | 
131 | def plot_epsilons(dataloader, model, device, fig_file, args):
132 |     model.eval()
133 | 
134 |     preds = []
135 |     ys = []
136 | 
137 |     with torch.no_grad():
138 |         for X, y in dataloader:
139 |             X, y = X.to(device), y.to(device)
140 |             pred = model(X).detach().cpu().numpy()
141 |             preds.append(pred)
142 |             ys.append(y.detach().cpu().numpy())
143 |     pred = np.hstack(preds)
144 |     y = np.hstack(ys)
145 | 
146 |     # Sort our results by target epsilon
147 |     results = np.vstack([y, pred]).T
148 |     results = results[results[:, 0].argsort()]
149 | 
150 |     # Generate some mean/std and quartiles for plotting
151 |     targets = []
152 |     means = []
153 |     stds = []
154 |     medians = []
155 |     firstQs = []
156 |     thirdQs = []
157 | 
158 |     cur_ind = 0
159 |     while cur_ind < results.shape[0]:
160 |         cur_epsilon = results[cur_ind, 0]
161 |         mask = results[:,0] == cur_epsilon
162 |         cur_preds = results[:,1][mask]
163 | 
164 |         targets.append(cur_epsilon)
165 |         means.append(np.mean(cur_preds))
166 |         stds.append(np.std(cur_preds))
167 |         percentiles = np.percentile(cur_preds, [25, 50, 75])
168 |         firstQs.append(percentiles[0])
169 |         medians.append(percentiles[1])
170 |         thirdQs.append(percentiles[2])
171 | 
172 |         cur_ind += len(cur_preds)
173 | 
174 |     # Smooth our data
175 |     window = (len(targets)//20)*2 + 1 # Some reasonably odd number
176 |     poly_order = 3
177 | 
178 |     targets = savgol_filter(np.array(targets), window, poly_order)
179 |     means = savgol_filter(np.array(means), window, poly_order)
180 |     stds = savgol_filter(np.array(stds), window, poly_order)
181 |     medians = savgol_filter(np.array(medians), window, poly_order)
182 |     firstQs = savgol_filter(np.array(firstQs), window, poly_order)
183 |     thirdQs = savgol_filter(np.array(thirdQs), window, poly_order)
184 | 
185 |     # Plot and save our figure
186 |     plt.figure(figsize=(8, 8))
187 |     plt.title("Evaluation of {}\non test split of {}".format(args.output_dir, args.data_dir))
188 |     plt.xlabel("target epsilon")
189 |     plt.ylabel("predicted epsilon")
190 |     plt.plot(targets, means, 'b-', label="mean +/- std")
191 |     plt.fill_between(targets, means - stds, means + stds, color='b', alpha=0.2)
192 |     plt.plot(targets, medians, "r-", label="median/quartiles")
193 |     plt.fill_between(targets, firstQs, thirdQs, color='r', alpha=0.2)
194 |     plt.plot([0,1], [0,1], color='k', label='y=x')
195 |     plt.gca().set_aspect('equal')
196 |     plt.legend()
197 |     plt.savefig(fig_file, bbox_inches='tight')
198 | 
199 | def main():
200 |     parser = argparse.ArgumentParser('Parse configuration file')
201 |     parser.add_argument('--data_dir', type=str, default='data/')
202 |     parser.add_argument('--output_dir', type=str, default='model/')
203 |     parser.add_argument('--train_config', type=str, default='configs/train.config')
204 |     args = parser.parse_args()
205 | 
206 |     # Configure paths
207 |     make_new_dir = True
208 |     if os.path.exists(args.output_dir):
209 |         key = input('Output directory already exists! Overwrite the folder? (y/n)')
210 |         if key == 'y':
211 |             shutil.rmtree(args.output_dir)
212 |         else:
213 |             make_new_dir = False
214 |             args.train_config = os.path.join(args.output_dir, os.path.basename(args.train_config))
215 |     if make_new_dir:
216 |         os.makedirs(args.output_dir)
217 |         shutil.copy(args.train_config, args.output_dir + '/train.config')
218 | 
219 |     log_file   = os.path.join(args.output_dir, 'output.log')
220 |     scale_file = os.path.join(args.output_dir, 'scaler.pkl')
221 |     model_file = os.path.join(args.output_dir, 'model.pth')
222 |     loss_file  = os.path.join(args.output_dir, 'loss.png')
223 |     fig_file   = os.path.join(args.output_dir, 'epsilons_test.png')
224 | 
225 |     # Configure logging and device
226 |     file_handler = logging.FileHandler(log_file)
227 |     logging.basicConfig(level=logging.INFO, format='%(asctime)s, %(levelname)s: %(message)s',
228 |                         datefmt="%Y-%m-%d %H:%M:%S", filename=log_file, filemode='a')
229 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
230 |     logging.info('Using device: %s', device)
231 | 
232 |     # Read training parameters
233 |     if args.train_config is None:
234 |         parser.error('Train config file has to be specified.')
235 |     train_config = configparser.RawConfigParser()
236 |     train_config.read(args.train_config)
237 |     time_size = train_config.getint('features', 'time_size')
238 |     step_size = train_config.getint('features', 'step_size')
239 |     labels = train_config.get('features', 'labels').split(', ')
240 |     num_neighbors = train_config.getint('features', 'num_neighbors')
241 |     density_radii = [float(x) for x in train_config.get('features', 'density_radii').split(', ')]
242 |     hidden_dims = [int(x) for x in train_config.get('network', 'hidden_dims').split(', ')]
243 |     if hidden_dims == [0]: hidden_dims = []
244 |     nonlinearity = train_config.get('network', 'nonlinearity')
245 |     optimizer = train_config.get('optimizer', 'optimizer')
246 |     learning_rate = train_config.getfloat('optimizer', 'learning_rate')
247 |     momentum = train_config.getfloat('optimizer', 'momentum')
248 |     betas = [float(x) for x in train_config.get('optimizer', 'betas').split(', ')]
249 |     epsilon = train_config.getfloat('optimizer', 'epsilon')
250 |     weight_decay = train_config.getfloat('optimizer', 'weight_decay')
251 |     batch_size = train_config.getint('training', 'batch_size')
252 |     epochs = train_config.getint('training', 'epochs')
253 | 
254 |     # Extract pedestrian data for training
255 |     X, y = extract_data(args.data_dir, time_size, step_size, labels, num_neighbors, density_radii) # TODO: MAY NEED TO UPDATE
256 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
257 | 
258 |     # Normalize input data
259 |     scaler  = StandardScaler()
260 |     X_train = scaler.fit_transform(X_train)
261 |     X_test  = scaler.transform(X_test)
262 |     with open(scale_file, "wb") as output_file:
263 |         pickle.dump(scaler, output_file)
264 | 
265 |     # Create data loaders
266 |     train_data = Data(X_train, y_train)
267 |     test_data  = Data(X_test, y_test)
268 |     train_loader = DataLoader(train_data, batch_size=batch_size)
269 |     test_loader  = DataLoader(test_data, batch_size=batch_size)
270 | 
271 |     # Define neural network model
272 |     input_dim = X_train.shape[1]
273 |     model = Net(input_dim, hidden_dims, nonlinearity).to(device)
274 | 
275 |     # Define loss function and optimizer
276 |     criterion = nn.MSELoss()
277 |     if optimizer == 'sgd':
278 |         optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate,
279 |                         momentum=momentum, weight_decay=weight_decay)
280 |     elif optimizer == 'adam':
281 |         optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate,
282 |                         betas=betas, eps=epsilon, weight_decay=weight_decay)
283 |     else:
284 |         raise ValueError('Optimizers: sgd, adam')
285 | 
286 |     # Run training process over several epochs
287 |     train_loss = []
288 |     test_loss  = []
289 |     print("Training for {} epochs..".format(epochs))
290 |     for t in tqdm(range(epochs)):
291 |         logging.info('Epoch %d -------------------------------', t+1)
292 |         train_loss += [train(train_loader, model, criterion, optimizer, device)]
293 |         test_loss  += [test(test_loader, model, criterion, device)]
294 | 
295 |     # Save plot of loss over epochs
296 |     plt.plot(train_loss, '-b', label='Training')
297 |     plt.plot(test_loss, '-r', label='Evaluation')
298 |     plt.legend(loc="upper right")
299 |     plt.xlabel('Epoch')
300 |     plt.ylabel('Average Loss Across Batches')
301 |     plt.title('Average Training and Evaluation Loss')
302 |     plt.savefig(loss_file)
303 | 
304 |     # Save trained model
305 |     torch.save(model.state_dict(), model_file)
306 |     print("Saved model weights at {}.".format(model_file))
307 | 
308 |     # Evaluate and plot epsilon values
309 |     plot_epsilons(test_loader, model, device, fig_file, args)
310 | 
311 | if __name__ == '__main__':
312 |     main()
313 | 


--------------------------------------------------------------------------------
/uncertainty/train_models.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Script to train models for an increasing number of time steps.
 3 | 
 4 | let num_times_line=2
 5 | 
 6 | let head_num=$num_times_line-1
 7 | let tail_num=$num_times_line+1
 8 | 
 9 | for num_times in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
10 | do
11 |     echo "Training model $num_times of 20.."
12 |     head -n $head_num configs/train.config > train_temp.config
13 |     echo "time_size = $num_times" >> train_temp.config
14 |     tail -n +$tail_num configs/train.config >> train_temp.config
15 | 
16 |     python train.py --data_dir data/ --output_dir models/uncertain_$num_times --train_config train_temp.config
17 | done
18 | rm train_temp.config


--------------------------------------------------------------------------------