├── LICENSE ├── README.md ├── datasets ├── __init__.py ├── argoverse │ ├── README.md │ ├── __init__.py │ ├── create_h5_argo.py │ └── dataset.py ├── interaction_dataset │ ├── README.md │ ├── __init__.py │ ├── create_h5_indst.py │ ├── dataset.py │ └── utils.py ├── nuscenes │ ├── README.md │ ├── __init__.py │ ├── create_h5_nusc.py │ ├── dataset.py │ └── raw_dataset.py └── trajnetpp │ ├── README.md │ ├── __init__.py │ ├── create_data_npys.py │ └── dataset.py ├── evaluate.py ├── models ├── __init__.py ├── autobot_ego.py ├── autobot_joint.py └── context_encoders.py ├── process_args.py ├── requirements.txt ├── train.py ├── useful_scripts ├── generate_argoverse_test.py ├── generate_indst_test.py └── generate_nuscene_results.py └── utils ├── __init__.py ├── metric_helpers.py └── train_helpers.py /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2022, Roger Girgis 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AutoBots Official Repository 2 | 3 | This repository is the official implementation of the [AutoBots](https://arxiv.org/abs/2104.00563) architectures. 4 | We include support for the following datasets: 5 | - [nuScenes](https://www.nuscenes.org/nuscenes) (ego-agent, multi-agent) 6 | - [Argoverse](https://www.argoverse.org/av1.html) (ego-agent) 7 | - [TrajNet++](https://www.aicrowd.com/challenges/trajnet-a-trajectory-forecasting-challenge) Synthetic dataset (multi-agent) 8 | - [Interaction-Dataset](https://interaction-dataset.com/) (multi-agent) 9 | 10 | Visit our [webpage](https://fgolemo.github.io/autobots/) for more information. 11 | 12 | ### Getting Started 13 | 14 | 1. Create a python 3.7 environment. I use Miniconda3 and create with 15 | `conda create --name AutoBots python=3.7` 16 | 2. Run `pip install -r requirements.txt` 17 | 18 | That should be it! 19 | 20 | ### Setup the datasets 21 | 22 | Follow the instructions in the READMEs of each dataset (found in `datasets`). 23 | 24 | ### Training an AutoBot model 25 | 26 | All experiments were performed locally on a single GTX 1080Ti. 27 | The trained models will be saved in `results/{Dataset}/{exp_name}`. 28 | 29 | #### nuScenes 30 | Training AutoBot-Ego on nuScenes while using the raw road segments in the map: 31 | ``` 32 | python train.py --exp-id test --seed 1 --dataset Nuscenes --model-type Autobot-Ego --num-modes 10 --hidden-size 128 --num-encoder-layers 2 --num-decoder-layers 2 --dropout 0.1 --entropy-weight 40.0 --kl-weight 20.0 --use-FDEADE-aux-loss True --use-map-lanes True --tx-hidden-size 384 --batch-size 64 --learning-rate 0.00075 --learning-rate-sched 10 20 30 40 50 --dataset-path /path/to/root/of/nuscenes_h5_files 33 | ``` 34 | 35 | Training AutoBot-Ego on nuScenes while using the Birds-eye-view image of the road network: 36 | ``` 37 | python train.py --exp-id test --seed 1 --dataset Nuscenes --model-type Autobot-Ego --num-modes 10 --hidden-size 128 --num-encoder-layers 2 --num-decoder-layers 2 --dropout 0.1 --entropy-weight 40.0 --kl-weight 20.0 --use-FDEADE-aux-loss True --use-map-image True --tx-hidden-size 384 --batch-size 64 --learning-rate 0.00075 --learning-rate-sched 10 20 30 40 50 --dataset-path /path/to/root/of/nuscenes_h5_files 38 | ``` 39 | 40 | Training AutoBot-Joint on nuScenes while using the raw road segments in the map: 41 | ``` 42 | python train.py --exp-id test --seed 1 --dataset Nuscenes --model-type Autobot-Joint --num-modes 10 --hidden-size 128 --num-encoder-layers 2 --num-decoder-layers 2 --dropout 0.1 --entropy-weight 40.0 --kl-weight 20.0 --use-FDEADE-aux-loss True --use-map-lanes True --tx-hidden-size 384 --batch-size 64 --learning-rate 0.00075 --learning-rate-sched 10 20 30 40 50 --dataset-path /path/to/root/of/nuscenes_h5_files 43 | ``` 44 | 45 | #### Argoverse 46 | Training AutoBot-Ego on Argoverse while using the raw road segments in the map: 47 | ``` 48 | python train.py --exp-id test --seed 1 --dataset Argoverse --model-type Autobot-Ego --num-modes 6 --hidden-size 128 --num-encoder-layers 2 --num-decoder-layers 2 --dropout 0.1 --entropy-weight 40.0 --kl-weight 20.0 --use-FDEADE-aux-loss True --use-map-lanes True --tx-hidden-size 384 --batch-size 64 --learning-rate 0.00075 --learning-rate-sched 10 20 30 40 50 --dataset-path /path/to/root/of/argoverse_h5_files 49 | ``` 50 | 51 | #### TrajNet++ 52 | Training AutoBot-Joint on TrajNet++: 53 | ``` 54 | python train.py --exp-id test --seed 1 --dataset trajnet++ --model-type Autobot-Joint --num-modes 6 --hidden-size 128 --num-encoder-layers 2 --num-decoder-layers 2 --dropout 0.1 --entropy-weight 40.0 --kl-weight 20.0 --use-FDEADE-aux-loss True --tx-hidden-size 384 --batch-size 64 --learning-rate 0.00075 --learning-rate-sched 10 20 30 40 50 --dataset-path /path/to/root/of/npy_files 55 | ``` 56 | 57 | #### Interaction-Dataset 58 | Training AutoBot-Joint on the Interaction-Dataset while using the raw road segments in the map: 59 | ``` 60 | python train.py --exp-id test --seed 1 --dataset interaction-dataset --model-type Autobot-Joint --num-modes 6 --hidden-size 128 --num-encoder-layers 2 --num-decoder-layers 2 --dropout 0.1 --entropy-weight 40.0 --kl-weight 20.0 --use-FDEADE-aux-loss True --tx-hidden-size 384 --batch-size 64 --learning-rate 0.00075 --learning-rate-sched 10 20 30 40 50 --dataset-path /path/to/root/of/interaction_dataset_h5_files 61 | ``` 62 | 63 | 64 | ### Evaluating an AutoBot model 65 | 66 | For all experiments, you can evaluate the trained model on the validation dataset by running: 67 | ``` 68 | python evaluate.py --dataset-path /path/to/root/of/interaction_dataset_h5_files --models-path results/{Dataset}/{exp_name}/{model_epoch}.pth --batch-size 64 69 | ``` 70 | Note that the batch-size may need to be reduced for the Interaction-dataset since evaluation is performed on all agent scenes. 71 | 72 | 73 | ### Extra scripts 74 | 75 | We also provide extra scripts that can be used for submitting to the nuScenes, Argoverse and Interaction-Dataset 76 | Evaluation server. 77 | 78 | For nuScenes: 79 | 80 | ``` 81 | python useful_scripts/generate_nuscene_results.py --dataset-path /path/to/root/of/nuscenes_h5_files --models-path results/Nuscenes/{exp_name}/{model_epoch}.pth 82 | ``` 83 | 84 | For Argoverse: 85 | 86 | ``` 87 | python useful_scripts/generate_argoverse_test.py --dataset-path /path/to/root/of/argoverse_h5_files --models-path results/Argoverse/{exp_name}/{model_epoch}.pth 88 | ``` 89 | 90 | For the Interaction-Dataset: 91 | 92 | ``` 93 | python useful_scripts/generate_indst_test.py --dataset-path /path/to/root/of/interaction_dataset_h5_files --models-path results/interaction-dataset/{exp_name}/{model_epoch}.pth 94 | ``` 95 | 96 | ## Reference 97 | 98 | If you use this repository, please cite our work: 99 | 100 | ``` 101 | @inproceedings{ 102 | girgis2022latent, 103 | title={Latent Variable Sequential Set Transformers for Joint Multi-Agent Motion Prediction}, 104 | author={Roger Girgis and Florian Golemo and Felipe Codevilla and Martin Weiss and Jim Aldon D'Souza and Samira Ebrahimi Kahou and Felix Heide and Christopher Pal}, 105 | booktitle={International Conference on Learning Representations}, 106 | year={2022}, 107 | url={https://openreview.net/forum?id=Dup_dDqkZC5} 108 | } 109 | ``` 110 | 111 | 112 | -------------------------------------------------------------------------------- /datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roggirg/AutoBots/0ceb8f7f4c6e011ed0af879bc9cc3e9c1ad53351/datasets/__init__.py -------------------------------------------------------------------------------- /datasets/argoverse/README.md: -------------------------------------------------------------------------------- 1 | # Argoverse Python package - Installation 2 | 3 | Follow the instructions [here](https://github.com/argoai/argoverse-api) to 4 | download the dataset files and install the Argoverse API. 5 | 6 | After downloading the dataset files, 7 | extract the contents of each dataset split file such that the final folder 8 | structure of the dataset looks like this: 9 | ``` 10 | argoverse 11 | └──train 12 | └──Argoverse-Terms_of_Use.txt 13 | └──data 14 | └──1.csv 15 | └──2.csv 16 | └──... 17 | └──val 18 | └──Argoverse-Terms_of_Use.txt 19 | └──data 20 | └──1.csv 21 | └──2.csv 22 | └──... 23 | └──test 24 | └──Argoverse-Terms_of_Use.txt 25 | └──data 26 | └──1.csv 27 | └──2.csv 28 | └──... 29 | ``` 30 | 31 | Afterwards, run the following to create the h5 files of the dataset: 32 | 33 | ``` 34 | python create_h5_argo.py --raw-dataset-path /path/to/argoverse --split-name [train/val/test] --output-h5-path /path/to/output/h5_files 35 | 36 | ``` 37 | for both train and val. 38 | 39 | Time to create and disk space taken: 40 | 41 | | Split | Time | Final H5 size | 42 | | ----------- | ----------- | ------------- | 43 | | train | 4 hours | 4 GB | 44 | | val | 1 hour | 770 MB | 45 | | test | 2 hours | 1 GB | 46 | -------------------------------------------------------------------------------- /datasets/argoverse/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roggirg/AutoBots/0ceb8f7f4c6e011ed0af879bc9cc3e9c1ad53351/datasets/argoverse/__init__.py -------------------------------------------------------------------------------- /datasets/argoverse/create_h5_argo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import h5py 4 | import os 5 | import time 6 | 7 | import numpy as np 8 | from argoverse.map_representation.map_api import ArgoverseMap 9 | from argoverse.data_loading.argoverse_forecasting_loader import ArgoverseForecastingLoader 10 | from typing import Tuple 11 | 12 | 13 | ''' 14 | Data should be extracted from .tar.gz files into "{split_name}/data", e.g. "{args.raw_dataset_path}/train/data/*.csv" 15 | ''' 16 | 17 | 18 | def angle_of_rotation(yaw: float) -> float: 19 | """ 20 | Given a yaw angle (measured from x axis), find the angle needed to rotate by so that 21 | the yaw is aligned with the y axis (pi / 2). 22 | :param yaw: Radians. Output of quaternion_yaw function. 23 | :return: Angle in radians. 24 | """ 25 | return (np.pi / 2) + np.sign(-yaw) * np.abs(yaw) 26 | 27 | 28 | def make_2d_rotation_matrix(angle_in_radians: float) -> np.ndarray: 29 | """ 30 | Makes rotation matrix to rotate point in x-y plane counterclockwise 31 | by angle_in_radians. 32 | """ 33 | 34 | return np.array([[np.cos(angle_in_radians), -np.sin(angle_in_radians)], 35 | [np.sin(angle_in_radians), np.cos(angle_in_radians)]]) 36 | 37 | 38 | def convert_global_coords_to_local(coordinates: np.ndarray, translation: Tuple[float, float], yaw: float) -> np.ndarray: 39 | """ 40 | Converts global coordinates to coordinates in the frame given by the rotation quaternion and 41 | centered at the translation vector. The rotation is meant to be a z-axis rotation. 42 | :param coordinates: x,y locations. array of shape [n_steps, 2]. 43 | :param translation: Tuple of (x, y, z) location that is the center of the new frame. 44 | :param yaw: heading angle of agent. 45 | :return: x,y locations in frame stored in array of share [n_times, 2]. 46 | """ 47 | 48 | yaw = angle_of_rotation(yaw) 49 | transform = make_2d_rotation_matrix(angle_in_radians=yaw) 50 | 51 | coords = (coordinates - np.atleast_2d(np.array(translation))).T 52 | 53 | return np.dot(transform, coords).T[:, :2] 54 | 55 | 56 | def compute_yaw(ego_input): 57 | diff = ego_input[-1] - ego_input[-10] 58 | return np.arctan2(diff[1], diff[0]) 59 | 60 | 61 | def get_args(): 62 | parser = argparse.ArgumentParser(description="Argoverse H5 Creator") 63 | parser.add_argument("--output-h5-path", type=str, required=True, help="output path to H5 files.") 64 | parser.add_argument("--raw-dataset-path", type=str, required=True, help="raw Dataset path to root of extracted files") 65 | parser.add_argument("--split-name", type=str, default="train", help="split-name to create", choices=["train", "val", "test"]) 66 | args = parser.parse_args() 67 | return args 68 | 69 | 70 | if __name__ == "__main__": 71 | args = get_args() 72 | root_dir = os.path.join(args.raw_dataset_path, args.split_name, 'data') 73 | avm = ArgoverseMap() 74 | afl = ArgoverseForecastingLoader(root_dir) # simply change to your local path of the data 75 | start_time = time.time() 76 | 77 | seq_files = sorted(os.listdir(root_dir)) 78 | num_scenes = len(seq_files) 79 | print("Number of files:", num_scenes) 80 | 81 | if "test" in args.split_name: 82 | num_timesteps = 20 83 | else: 84 | num_timesteps = 50 85 | 86 | max_num_roads = 150 # manually found 87 | max_num_agents = 15 # manually found 88 | 89 | f = h5py.File(os.path.join(args.output_h5_path, args.split_name + '_dataset.hdf5'), 'w') 90 | ego_trajectories = f.create_dataset("ego_trajectories", shape=(num_scenes, num_timesteps, 3), 91 | chunks=(1, num_timesteps, 3), dtype=np.float32) 92 | agent_trajectories = f.create_dataset("agents_trajectories", shape=(num_scenes, num_timesteps, max_num_agents, 3), 93 | chunks=(1, num_timesteps, max_num_agents, 3), dtype=np.float32) 94 | road_pts = f.create_dataset("road_pts", shape=(num_scenes, max_num_roads, 10, 3), 95 | chunks=(1, max_num_roads, 10, 3), dtype=np.float16) 96 | extras = f.create_dataset("extras", shape=(num_scenes, 4), chunks=(1, 4), dtype=np.float32) 97 | orig_egos = f.create_dataset("orig_egos", shape=(num_scenes, num_timesteps, 3), chunks=(1, num_timesteps, 3), 98 | dtype=np.float32) 99 | 100 | for i, seq_file in enumerate(seq_files): 101 | if i % 1000 == 0: 102 | print(i) 103 | fname = os.path.join(root_dir, seq_file) 104 | df = afl.get(fname).seq_df 105 | 106 | # Gather agents' trajectories 107 | ego_x = df[df["OBJECT_TYPE"] == "AGENT"]["X"] 108 | ego_y = df[df["OBJECT_TYPE"] == "AGENT"]["Y"] 109 | ego_traj = np.column_stack((ego_x, ego_y, np.ones_like(ego_x))) 110 | ego_timestamps = df[df["OBJECT_TYPE"] == "AGENT"]["TIMESTAMP"].values 111 | 112 | ego_pred_timestamp = ego_timestamps[19] 113 | others_traj = [] 114 | frames = df.groupby("TRACK_ID") 115 | for group_name, group_data in frames: 116 | object_type = group_data["OBJECT_TYPE"].values[0] 117 | if "AGENT" in object_type: # skip ego agent trajectory 118 | continue 119 | 120 | other_timestamp = group_data["TIMESTAMP"].values 121 | if ego_pred_timestamp not in other_timestamp: 122 | # ignore agents that are not there at the prediction time. 123 | continue 124 | 125 | other_x = group_data["X"].values 126 | other_y = group_data["Y"].values 127 | other_traj = np.column_stack((other_x, other_y, np.ones(len(other_x)))) 128 | if 10 <= len(other_traj) < num_timesteps: # if we have an imcomplete trajectory of an 'other' agent. 129 | temp_other_traj = np.zeros((num_timesteps, 3)) 130 | 131 | for j, timestamp in enumerate(ego_timestamps): 132 | if timestamp == other_timestamp[0]: 133 | temp_other_traj[j:j+len(other_traj)] = other_traj 134 | break 135 | other_traj = temp_other_traj.copy() 136 | elif len(other_traj) < 10: 137 | continue 138 | 139 | if np.linalg.norm(other_traj[19, :2] - ego_traj[19, :2]) > 40: 140 | continue 141 | 142 | others_traj.append(other_traj) 143 | 144 | # Rotating trajectories so that ego is going up. 145 | ego_yaw = compute_yaw(ego_traj[:20, :2]) 146 | rot_ego_traj = convert_global_coords_to_local(ego_traj[:, :2], ego_traj[19, :2], ego_yaw) 147 | rot_ego_traj = np.concatenate((rot_ego_traj, ego_traj[:, 2:]), axis=-1) 148 | rot_others_traj = [] 149 | for other_traj in others_traj: 150 | rot_other_traj = convert_global_coords_to_local(other_traj[:, :2], ego_traj[19, :2], ego_yaw) 151 | rot_other_traj = rot_other_traj * other_traj[:, 2:] 152 | rot_other_traj = np.column_stack((rot_other_traj[:, 0], rot_other_traj[:, 1], other_traj[:, 2])) 153 | rot_others_traj.append(rot_other_traj) 154 | rot_others_traj = np.array(rot_others_traj) 155 | if len(rot_others_traj) == 0: 156 | rot_others_traj = np.zeros((max_num_agents, num_timesteps, 3)) 157 | elif len(rot_others_traj) <= max_num_agents: 158 | temp_rot_others_traj = np.zeros((max_num_agents, num_timesteps, 3)) 159 | temp_rot_others_traj[:len(rot_others_traj)] = rot_others_traj 160 | rot_others_traj = temp_rot_others_traj.copy() 161 | else: 162 | dists = np.linalg.norm(rot_others_traj[:, 19, :2], axis=-1) 163 | closest_inds = np.argsort(dists)[:max_num_agents] 164 | rot_others_traj = rot_others_traj[closest_inds] 165 | 166 | # Get lane centerlines which lie within the range of trajectories and rotate all lanes 167 | city_name = df["CITY_NAME"].values[0] 168 | seq_lane_props = avm.city_lane_centerlines_dict[city_name] 169 | query_bbox = (ego_traj[19, 0] - 50, ego_traj[19, 0] + 50, ego_traj[19, 1] - 50, ego_traj[19, 1] + 50) 170 | lane_centerlines = [] 171 | for lane_id, lane_props in seq_lane_props.items(): 172 | lane_cl = lane_props.centerline 173 | if len(lane_cl) == 1: 174 | continue 175 | 176 | if (np.min(lane_cl[:, 0]) < query_bbox[1] and np.min(lane_cl[:, 1]) < query_bbox[3] 177 | and np.max(lane_cl[:, 0]) > query_bbox[0] and np.max(lane_cl[:, 1]) > query_bbox[2]): 178 | lane_cl = convert_global_coords_to_local(lane_cl[:, :2], ego_traj[19, :2], ego_yaw) 179 | lane_cl = np.concatenate((lane_cl, np.ones((len(lane_cl), 1))), axis=-1) # adding existence mask 180 | if len(lane_cl) < 10: 181 | temp_lane_cl = np.zeros((10, 3)) 182 | temp_lane_cl[:len(lane_cl)] = lane_cl 183 | lane_cl = temp_lane_cl.copy() 184 | elif len(lane_cl) > 10: 185 | print("found lane with more than 10 pts...") 186 | continue 187 | 188 | lane_centerlines.append(lane_cl) 189 | 190 | lane_centerlines = np.array(lane_centerlines) 191 | if len(lane_centerlines) <= max_num_roads: 192 | temp_lane_centerlines = np.zeros((max_num_roads, 10, 3)) 193 | temp_lane_centerlines[:len(lane_centerlines)] = lane_centerlines 194 | lane_centerlines = temp_lane_centerlines.copy() 195 | 196 | extra = [int(seq_file.split(".")[0]), ego_yaw, ego_traj[19, 0], ego_traj[19, 1]] 197 | 198 | ego_trajectories[i] = rot_ego_traj 199 | agent_trajectories[i] = rot_others_traj.transpose(1, 0, 2) 200 | road_pts[i] = lane_centerlines 201 | orig_egos[i] = ego_traj 202 | extras[i] = np.array(extra) 203 | 204 | if (i+1) % 100 == 0: 205 | print("Time taken", time.time() - start_time, "Number of examples", i+1) 206 | -------------------------------------------------------------------------------- /datasets/argoverse/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import h5py 3 | from torch.utils.data import Dataset 4 | import numpy as np 5 | 6 | 7 | class ArgoH5Dataset(Dataset): 8 | def __init__(self, dset_path, split_name="train", orig_ego=False, use_map_lanes=True): 9 | self.data_root = dset_path 10 | self.split_name = split_name 11 | self.orig_ego = orig_ego 12 | self.pred_horizon = 30 13 | self.num_others = 5 14 | self.map_attr = 2 15 | self.k_attr = 2 16 | self.use_map_lanes = use_map_lanes 17 | self.scene_context_img = True 18 | self.predict_yaw = False 19 | 20 | dataset = h5py.File(os.path.join(self.data_root, split_name+'_dataset.hdf5'), 'r') 21 | self.dset_len = len(dataset["ego_trajectories"]) 22 | 23 | def get_input_output_seqs(self, ego_data, agents_data): 24 | in_len = 20 25 | 26 | # Ego 27 | ego_in = ego_data[:in_len] 28 | ego_out = ego_data[in_len:] 29 | 30 | # Other agents 31 | agents_in = agents_data[:in_len, :self.num_others] 32 | return ego_in, ego_out, agents_in 33 | 34 | def __getitem__(self, idx: int): 35 | dataset = h5py.File(os.path.join(self.data_root, self.split_name + '_dataset.hdf5'), 'r') 36 | ego_data = dataset['ego_trajectories'][idx] 37 | agents_data = dataset['agents_trajectories'][idx] 38 | ego_in, ego_out, agents_in = self.get_input_output_seqs(ego_data, agents_data) 39 | 40 | if self.use_map_lanes: 41 | roads = dataset['road_pts'][idx] 42 | else: 43 | roads = np.zeros((1, 1)) # dummy 44 | 45 | if "test" in self.split_name: 46 | extra = dataset['extras'][idx] 47 | return ego_in, agents_in, roads, extra 48 | elif self.orig_ego: # for validation with re-rotation to global coordinates 49 | extra = dataset['extras'][idx] 50 | ego_data = dataset['orig_egos'][idx] 51 | ego_out = ego_data[20:] 52 | return ego_in, ego_out, agents_in, roads, extra 53 | 54 | return ego_in, ego_out, agents_in, roads 55 | 56 | def __len__(self): 57 | return self.dset_len 58 | 59 | 60 | if __name__ == '__main__': 61 | dst = ArgoH5Dataset(dset_path="/hdd2/argoverse", split_name="train", use_map_lanes=True) 62 | -------------------------------------------------------------------------------- /datasets/interaction_dataset/README.md: -------------------------------------------------------------------------------- 1 | # Interaction-Dataset Setup 2 | 3 | Before getting started, you need to get access to the dataset from [here](https://interaction-dataset.com/). 4 | Once you receive the email from their team, download and extract the contents of `*multi*.zip` into `multi_agent`. 5 | 6 | Then, run the following to create the h5 files of the dataset: 7 | ``` 8 | python create_h5_indst.py --output-h5-path /path/to/output/interaction_dataset_h5_file/ --raw-dataset-path /path/to/multi_agent/ --split-name [train/val] 9 | ``` 10 | for both train and val. 11 | Finally, ensure that the output folder containing the H5 files (for train and val) also has a copy of the `maps` folder. 12 | To make things easy, I would recommend simply setting `--output-h5-path /path/to/multi_agent`. 13 | 14 | -------------------------------------------------------------------------------- /datasets/interaction_dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roggirg/AutoBots/0ceb8f7f4c6e011ed0af879bc9cc3e9c1ad53351/datasets/interaction_dataset/__init__.py -------------------------------------------------------------------------------- /datasets/interaction_dataset/create_h5_indst.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import h5py 3 | import glob 4 | import os 5 | import pandas as pd 6 | import numpy as np 7 | from datasets.interaction_dataset.utils import get_minmax_mapfile 8 | 9 | 10 | def get_args(): 11 | parser = argparse.ArgumentParser(description="Interaction-Dataset H5 Creator") 12 | parser.add_argument("--output-h5-path", type=str, required=True, help="output path to H5 files.") 13 | parser.add_argument("--raw-dataset-path", type=str, required=True, help="raw Dataset path to multi-agent folder.") 14 | parser.add_argument("--split-name", type=str, default="train", help="split-name to create", choices=["train", "val"]) 15 | args = parser.parse_args() 16 | return args 17 | 18 | 19 | if __name__ == '__main__': 20 | args = get_args() 21 | datafiles = glob.glob(args.raw_dataset_path + args.split_name + "/*.csv") 22 | num_scenes = 0 23 | for datafile in datafiles: 24 | data = pd.read_csv(datafile) 25 | num_scenes += len(list(set(data['case_id'].tolist()))) 26 | 27 | max_num_agents = 50 # found by inspection 28 | f = h5py.File(os.path.join(args.output_h5_path, args.split_name + '_dataset.hdf5'), 'w') 29 | agent_trajectories = f.create_dataset("agents_trajectories", shape=(num_scenes, max_num_agents, 40, 7), chunks=(1, max_num_agents, 40, 7), dtype=np.float32) 30 | agent_types = f.create_dataset("agents_types", shape=(num_scenes, max_num_agents, 2), chunks=(1, max_num_agents, 2), dtype=np.float32) 31 | metas = f.create_dataset("metas", shape=(num_scenes, 5), chunks=(1, 5)) 32 | map_paths = f.create_dataset("map_paths", shape=(num_scenes, 1), chunks=(1, 1), dtype='S200') 33 | 34 | global_scene_id = 0 35 | for datafile_id, datafile in enumerate(datafiles): 36 | data = pd.read_csv(datafile) 37 | current_roads = datafile.split("/")[-1].split(".")[0].replace("_" + args.split_name, "") 38 | lanelet_map_file = os.path.join(args.raw_dataset_path, "maps", current_roads + ".osm") 39 | xmin, ymin, xmax, ymax = get_minmax_mapfile(lanelet_map_file) 40 | 41 | scene_ids = list(set(data['case_id'].tolist())) 42 | for i, scene_id in enumerate(scene_ids): 43 | if i % 10 == 0: 44 | print(scene_id, "/", len(scene_ids)) 45 | scene_data = data.loc[data['case_id'] == scene_id] 46 | 47 | agent_ids = list(set(data['track_id'].tolist())) 48 | scene_trajectories = [] 49 | scene_agent_types = [] 50 | for agent_id in agent_ids: 51 | agent_data = scene_data.loc[scene_data['track_id'] == agent_id] 52 | if len(agent_data['frame_id'].tolist()) < 40: # only take complete trajectories 53 | continue 54 | curr_agent_type = [1.0, 0.0] if 'car' in agent_data['agent_type'].tolist()[0] else [0.0, 1.0] 55 | scene_agent_types.append(curr_agent_type) 56 | scene_trajectories.append(agent_data[['x', 'y', 'vx', 'vy', 'psi_rad', 'length', 'width']].to_numpy()) 57 | 58 | scene_trajectories = np.array(scene_trajectories) 59 | scene_agent_types = np.array(scene_agent_types) 60 | if len(scene_trajectories) < max_num_agents: 61 | temp_scene_trajectories = np.zeros((max_num_agents, 40, 7)) - 1 62 | temp_scene_trajectories[:len(scene_trajectories)] = scene_trajectories 63 | scene_trajectories = temp_scene_trajectories.copy() 64 | temp_scene_types = np.zeros((max_num_agents, 2)) - 1 65 | temp_scene_types[:len(scene_agent_types)] = scene_agent_types 66 | scene_agent_types = temp_scene_types.copy() 67 | 68 | agent_trajectories[global_scene_id] = scene_trajectories 69 | agent_types[global_scene_id] = scene_agent_types 70 | metas[global_scene_id] = np.array([xmin, ymin, xmax, ymax, datafile_id]) 71 | map_paths[global_scene_id] = lanelet_map_file.encode("ascii", "ignore") 72 | global_scene_id += 1 73 | 74 | 75 | -------------------------------------------------------------------------------- /datasets/interaction_dataset/dataset.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as xml 2 | import h5py 3 | import os 4 | import glob 5 | import matplotlib.pyplot as plt 6 | from sklearn.metrics import euclidean_distances 7 | from torch.utils.data import Dataset 8 | import numpy as np 9 | from datasets.interaction_dataset.utils import LL2XYProjector, Point, get_type, get_subtype, get_x_y_lists, \ 10 | get_relation_members 11 | 12 | 13 | class InteractionDataset(Dataset): 14 | def __init__(self, dset_path, split_name="train", evaluation=False, use_map_lanes=True): 15 | self.data_root = dset_path 16 | self.split_name = split_name 17 | self.pred_horizon = 15 18 | self.num_agent_types = 2 19 | self.use_map_lanes = use_map_lanes 20 | self.predict_yaw = True 21 | self.map_attr = 7 22 | self.k_attr = 8 23 | 24 | dataset = h5py.File(os.path.join(self.data_root, split_name + '_dataset.hdf5'), 'r') 25 | self.dset_len = len(dataset["agents_trajectories"]) 26 | 27 | roads_fnames = glob.glob(os.path.join(self.data_root, "maps", "*.osm")) 28 | self.max_num_pts_per_road_seg = 0 29 | self.max_num_road_segs = 0 30 | self.roads = {} 31 | for osm_fname in roads_fnames: 32 | road_info = self.get_map_lanes(osm_fname, 0., 0.) 33 | if len(road_info) > self.max_num_road_segs: 34 | self.max_num_road_segs = len(road_info) 35 | key_fname = osm_fname.split("/")[-1] 36 | self.roads[key_fname] = road_info 37 | 38 | self.max_num_agents = 0 39 | self.evaluation = evaluation 40 | if not evaluation: 41 | self.num_others = 8 # train with 8 agents 42 | else: 43 | self.num_others = 40 # evaluate with 40 agents. 44 | 45 | def get_map_lanes(self, filename, lat_origin, lon_origin): 46 | projector = LL2XYProjector(lat_origin, lon_origin) 47 | 48 | e = xml.parse(filename).getroot() 49 | 50 | point_dict = dict() 51 | for node in e.findall("node"): 52 | point = Point() 53 | point.x, point.y = projector.latlon2xy(float(node.get('lat')), float(node.get('lon'))) 54 | point_dict[int(node.get('id'))] = point 55 | 56 | unknown_linestring_types = list() 57 | road_lines = [] 58 | 59 | road_lines_dict = {} 60 | exlusion_ids = [] 61 | 62 | min_length = 40 63 | for way in e.findall('way'): 64 | way_type = get_type(way) 65 | 66 | if way_type is None: 67 | raise RuntimeError("Linestring type must be specified") 68 | elif way_type == "curbstone": 69 | mark_type = np.array([1.0, 0.0, 0.0, 1.0]) 70 | elif way_type == "line_thin": 71 | way_subtype = get_subtype(way) 72 | if way_subtype == "dashed": 73 | mark_type = np.array([1.0, 1.0, 0.0, 1.0]) 74 | else: 75 | mark_type = np.array([0.0, 1.0, 0.0, 1.0]) 76 | elif way_type == "line_thick": 77 | way_subtype = get_subtype(way) 78 | if way_subtype == "dashed": 79 | mark_type = np.array([1.0, 1.0, 0.0, 1.0]) 80 | else: 81 | mark_type = np.array([0.0, 1.0, 0.0, 1.0]) 82 | elif way_type == "pedestrian_marking": 83 | mark_type = np.array([0.0, 0.0, 1.0, 1.0]) 84 | elif way_type == "bike_marking": 85 | mark_type = np.array([0.0, 0.0, 1.0, 1.0]) 86 | elif way_type == "stop_line": 87 | mark_type = np.array([1.0, 0.0, 1.0, 1.0]) 88 | elif way_type == "virtual": 89 | # mark_type = np.array([1.0, 1.0, 0.0, 1.0]) 90 | exlusion_ids.append(way.get("id")) 91 | continue 92 | elif way_type == "road_border": 93 | mark_type = np.array([1.0, 1.0, 1.0, 1.0]) 94 | elif way_type == "guard_rail": 95 | mark_type = np.array([1.0, 1.0, 1.0, 1.0]) 96 | elif way_type == "traffic_sign": 97 | exlusion_ids.append(way.get("id")) 98 | continue 99 | else: 100 | if way_type not in unknown_linestring_types: 101 | unknown_linestring_types.append(way_type) 102 | continue 103 | 104 | x_list, y_list = get_x_y_lists(way, point_dict) 105 | if len(x_list) < min_length: 106 | x_list = np.linspace(x_list[0], x_list[-1], min_length).tolist() 107 | y_list = np.linspace(y_list[0], y_list[-1], min_length).tolist() 108 | 109 | lane_pts = np.array([x_list, y_list]).transpose() 110 | mark_type = np.zeros((len(lane_pts), 4)) + mark_type 111 | if len(x_list) > self.max_num_pts_per_road_seg: 112 | self.max_num_pts_per_road_seg = len(x_list) 113 | 114 | lane_pts = np.concatenate((lane_pts, mark_type), axis=1) 115 | road_lines.append(lane_pts) 116 | road_lines_dict[way.get("id")] = lane_pts 117 | 118 | new_roads = np.zeros((160, self.max_num_pts_per_road_seg, 6)) # empirically found max num roads is 157. 119 | for i in range(len(road_lines)): 120 | new_roads[i, :len(road_lines[i])] = road_lines[i] 121 | 122 | used_keys_all = [] 123 | num_relations = len(e.findall('relation')) 124 | relation_lanes = np.zeros((num_relations + len(road_lines), 80, 8)) 125 | counter = 0 126 | for rel in e.findall('relation'): 127 | rel_lane, used_keys = get_relation_members(rel, road_lines_dict, exlusion_ids) 128 | if rel_lane is None: 129 | continue 130 | used_keys_all += used_keys 131 | new_lanes = np.array(rel_lane).reshape((-1, 8)) 132 | relation_lanes[counter] = new_lanes 133 | counter += 1 134 | 135 | # delete all used keys 136 | used_keys_all = np.unique(used_keys_all) 137 | for used_key in used_keys_all: 138 | del road_lines_dict[used_key] 139 | 140 | # add non-used keys 141 | for k in road_lines_dict.keys(): 142 | relation_lanes[counter, :40, :5] = road_lines_dict[k][:, :5] # rest of state (position (2), and type(3)). 143 | relation_lanes[counter, :40, 5:7] = -1.0 # no left-right relationship 144 | relation_lanes[counter, :40, 7] = road_lines_dict[k][:, -1] # mask 145 | counter += 1 146 | 147 | return relation_lanes[relation_lanes[:, :, -1].sum(1) > 0] 148 | 149 | def split_input_output_normalize(self, agents_data, meta_data, agent_types): 150 | if self.evaluation: 151 | in_horizon = 10 152 | else: 153 | in_horizon = 5 154 | 155 | agent_masks = np.expand_dims(agents_data[:, :, 0] != -1, axis=-1).astype(np.float32) # existence mask 156 | agents_data[:, :, :2] -= np.array([[meta_data[0], meta_data[1]]]) # Normalize with translation only 157 | agents_data = np.nan_to_num(agents_data, nan=-1.0) # pedestrians have nans instead of yaw and size 158 | agents_data = np.concatenate([agents_data, agent_masks], axis=-1) 159 | 160 | dists = euclidean_distances(agents_data[:, in_horizon - 1, :2], agents_data[:, in_horizon - 1, :2]) 161 | agent_masks[agent_masks == 0] = np.nan 162 | dists *= agent_masks[:, in_horizon - 1] 163 | dists *= agent_masks[:, in_horizon - 1].transpose() 164 | ego_idx = np.random.randint(0, int(np.nansum(agent_masks[:, in_horizon - 1]))) 165 | closest_agents = np.argsort(dists[ego_idx]) 166 | agents_data = agents_data[closest_agents[:self.num_others + 1]] 167 | agent_types = agent_types[closest_agents[:self.num_others + 1]] 168 | 169 | agents_in = agents_data[1:(self.num_others + 1), :in_horizon] 170 | agents_out = agents_data[1:(self.num_others + 1), in_horizon:, [0, 1, 4, 7]] # returning positions and yaws 171 | ego_in = agents_data[0, :in_horizon] 172 | ego_out = agents_data[0, in_horizon:] 173 | ego_out = ego_out[:, [0, 1, 4, 7]] # returning positions and yaws 174 | 175 | return ego_in, ego_out, agents_in, agents_out, agent_types 176 | 177 | def copy_agent_roads_across_agents(self, agents_in, roads): 178 | new_roads = np.zeros((self.num_others + 1, *roads.shape)) 179 | new_roads[0] = roads # ego 180 | for n in range(self.num_others): 181 | if agents_in[n, -1, -1]: 182 | new_roads[n + 1] = roads 183 | return new_roads 184 | 185 | def make_2d_rotation_matrix(self, angle_in_radians: float) -> np.ndarray: 186 | """ 187 | Makes rotation matrix to rotate point in x-y plane counterclockwise 188 | by angle_in_radians. 189 | """ 190 | return np.array([[np.cos(angle_in_radians), -np.sin(angle_in_radians)], 191 | [np.sin(angle_in_radians), np.cos(angle_in_radians)]]) 192 | 193 | def convert_global_coords_to_local(self, coordinates: np.ndarray, yaw: float) -> np.ndarray: 194 | """ 195 | Converts global coordinates to coordinates in the frame given by the rotation quaternion and 196 | centered at the translation vector. The rotation is meant to be a z-axis rotation. 197 | :param coordinates: x,y locations. array of shape [n_steps, 2]. 198 | :param translation: Tuple of (x, y, z) location that is the center of the new frame. 199 | :param rotation: Tuple representation of quaternion of new frame. 200 | Representation - cos(theta / 2) + (xi + yi + zi)sin(theta / 2). 201 | :return: x,y locations in frame stored in array of share [n_times, 2]. 202 | """ 203 | transform = self.make_2d_rotation_matrix(angle_in_radians=yaw) 204 | if len(coordinates.shape) > 2: 205 | coord_shape = coordinates.shape 206 | return np.dot(transform, coordinates.reshape((-1, 2)).T).T.reshape(*coord_shape) 207 | return np.dot(transform, coordinates.T).T[:, :2] 208 | 209 | def rotate_agents(self, ego_in, ego_out, agents_in, agents_out, roads, agent_types): 210 | new_ego_in = np.zeros( 211 | (ego_in.shape[0], ego_in.shape[1] + 3)) # +2 adding three dimensions for original positions and yaw 212 | new_ego_in[:, 3:] = ego_in 213 | new_ego_in[:, 2] = ego_in[:, 4] - ego_in[-1:, 4] # standardized yaw. 214 | 215 | new_ego_out = np.zeros((ego_out.shape[0], ego_out.shape[1] + 2)) # adding two dimensions for original positions 216 | new_ego_out[:, 2:] = ego_out 217 | new_ego_out[:, 4] -= ego_in[-1:, 4] # standardized yaw. 218 | 219 | new_agents_in = np.zeros((agents_in.shape[0], agents_in.shape[1], agents_in.shape[2] + 3)) # + 2 220 | new_agents_in[:, :, 3:] = agents_in 221 | new_agents_in[:, :, 2] = agents_in[:, :, 4] - agents_in[:, -1:, 4] # standardized yaw. 222 | 223 | new_agents_out = np.zeros((agents_out.shape[0], agents_out.shape[1], agents_out.shape[2] + 2)) 224 | new_agents_out[:, :, 2:] = agents_out 225 | new_agents_out[:, :, 4] -= agents_in[:, -1:, 4] 226 | 227 | new_roads = roads.copy() 228 | 229 | # "ego" stuff 230 | if agent_types[0, 0]: # vehicle 231 | yaw = ego_in[-1, 4] 232 | elif agent_types[0, 1]: # pedestrian/bike 233 | diff = ego_in[-1, :2] - ego_in[-5, :2] 234 | yaw = np.arctan2(diff[1], diff[0]) 235 | angle_of_rotation = (np.pi / 2) + np.sign(-yaw) * np.abs(yaw) 236 | translation = ego_in[-1, :2] 237 | 238 | new_ego_in[:, :2] = self.convert_global_coords_to_local(coordinates=ego_in[:, :2] - translation, 239 | yaw=angle_of_rotation) 240 | # new_ego_in[:, 4:6] = self.convert_global_coords_to_local(coordinates=ego_in[:, 2:4], yaw=angle_of_rotation) 241 | new_ego_in[:, 5:7] = self.convert_global_coords_to_local(coordinates=ego_in[:, 2:4], yaw=angle_of_rotation) 242 | new_ego_out[:, :2] = self.convert_global_coords_to_local(coordinates=ego_out[:, :2] - translation, 243 | yaw=angle_of_rotation) 244 | new_roads[0, :, :, :2] = self.convert_global_coords_to_local(coordinates=new_roads[0, :, :, :2] - translation, 245 | yaw=angle_of_rotation) 246 | new_roads[0][np.where(new_roads[0, :, :, -1] == 0)] = 0.0 247 | 248 | # other agents 249 | for n in range(self.num_others): 250 | if not agents_in[n, -1, -1]: 251 | continue 252 | 253 | if agent_types[n + 1, 0]: # vehicle 254 | yaw = agents_in[n, -1, 4] 255 | elif agent_types[n + 1, 1]: # pedestrian/bike 256 | diff = agents_in[n, -1, :2] - agents_in[n, -5, :2] 257 | yaw = np.arctan2(diff[1], diff[0]) 258 | angle_of_rotation = (np.pi / 2) + np.sign(-yaw) * np.abs(yaw) 259 | translation = agents_in[n, -1, :2] 260 | 261 | new_agents_in[n, :, :2] = self.convert_global_coords_to_local(coordinates=agents_in[n, :, :2] - translation, 262 | yaw=angle_of_rotation) 263 | new_agents_in[n, :, 5:7] = self.convert_global_coords_to_local(coordinates=agents_in[n, :, 2:4], 264 | yaw=angle_of_rotation) 265 | new_agents_out[n, :, :2] = self.convert_global_coords_to_local( 266 | coordinates=agents_out[n, :, :2] - translation, yaw=angle_of_rotation) 267 | new_roads[n + 1, :, :, :2] = self.convert_global_coords_to_local( 268 | coordinates=new_roads[n + 1, :, :, :2] - translation, yaw=angle_of_rotation) 269 | new_roads[n + 1][np.where(new_roads[n + 1, :, :, -1] == 0)] = 0.0 270 | 271 | return new_ego_in, new_ego_out, new_agents_in, new_agents_out, new_roads 272 | 273 | def _plot_debug(self, ego_in, ego_out, agents_in, agents_out, roads): 274 | for n in range(self.num_others + 1): 275 | plt.figure() 276 | if n == 0: 277 | plt.scatter(ego_in[:, 0], ego_in[:, 1], color='k') 278 | plt.scatter(ego_out[:, 0], ego_out[:, 1], color='m') 279 | else: 280 | if agents_in[n - 1, -1, -1]: 281 | plt.scatter(agents_in[n - 1, :, 0], agents_in[n - 1, :, 1], color='k') 282 | plt.scatter(agents_out[n - 1, :, 0], agents_out[n - 1, :, 1], color='m') 283 | for s in range(roads.shape[1]): 284 | for p in range(roads.shape[2]): 285 | if roads[n, s, p, -1]: 286 | plt.scatter(roads[n, s, p, 0], roads[n, s, p, 1], color='g') 287 | plt.show() 288 | exit() 289 | 290 | def __getitem__(self, idx: int): 291 | dataset = h5py.File(os.path.join(self.data_root, self.split_name + '_dataset.hdf5'), 'r') 292 | agents_data = dataset['agents_trajectories'][idx] 293 | agent_types = dataset['agents_types'][idx] 294 | meta_data = dataset['metas'][idx] 295 | if not self.evaluation: 296 | agents_data = agents_data[:, 1::2] # downsampling for efficiency during training. 297 | 298 | road_fname_key = dataset['map_paths'][idx][0].decode("utf-8").split("/")[-1] 299 | roads = self.roads[road_fname_key].copy() 300 | roads[:, :, :2] -= np.expand_dims(np.array([meta_data[:2]]), 0) 301 | 302 | original_roads = np.zeros((self.max_num_road_segs, *roads.shape[1:])) 303 | original_roads[:len(roads)] = roads 304 | roads = original_roads.copy() 305 | 306 | ego_in, ego_out, agents_in, agents_out, agent_types = self.split_input_output_normalize(agents_data, meta_data, 307 | agent_types) 308 | roads = self.copy_agent_roads_across_agents(agents_in, roads) 309 | 310 | # normalize scenes so all agents are going up 311 | if self.evaluation: 312 | translations = np.concatenate((ego_in[-1:, :2], agents_in[:, -1, :2]), axis=0) 313 | ego_in, ego_out, agents_in, agents_out, roads = self.rotate_agents(ego_in, ego_out, agents_in, agents_out, 314 | roads, agent_types) 315 | 316 | ''' 317 | Outputs: 318 | ego_in: One agent we are calling ego who's shape is (T x S_i) where 319 | S_i = [x_loc, y_loc, yaw_loc, x_glob, y_glob, vx_loc, vy_loc, yaw_glob, length, width, existence_mask]. 320 | agents_in: all other agents with shape of (T x num_others x S) where S is the same as above. 321 | ego_out: (T x S_o) where S_o = [x_loc, y_loc, x_glob, y_glob, yaw_loc, existence_mask] 322 | agents_out: (T x num_others x S_o) 323 | roads: (num_others x num_road_segs x num_pts_per_road_seg x S_r) where 324 | S_r = [x_loc, y_loc, [type1,type2,type3], [left, right], existence_mask] 325 | 326 | ''' 327 | 328 | if self.evaluation: 329 | ego_in = ego_in[1::2] 330 | agents_in = agents_in[:, 1::2] 331 | model_ego_in = ego_in.copy() 332 | model_ego_in[:, 3:5] = 0.0 333 | ego_in[:, 0:2] = ego_in[:, 3:5] 334 | model_agents_in = agents_in.copy() 335 | model_agents_in[:, :, 3:5] = 0.0 336 | agents_in[:, :, 0:2] = agents_in[:, :, 3:5] 337 | ego_out[:, 0:2] = ego_out[:, 2:4] # putting the original coordinate systems 338 | agents_out[:, :, 0:2] = agents_out[:, :, 2:4] # putting the original coordinate systems 339 | return model_ego_in, ego_out, model_agents_in.transpose(1, 0, 2), agents_out.transpose(1, 0, 2), roads, \ 340 | agent_types, ego_in, agents_in.transpose(1, 0, 2), original_roads, translations 341 | else: 342 | # Experimentally found that global information actually hurts performance. 343 | ego_in[:, 3:5] = 0.0 344 | agents_in[:, :, 3:5] = 0.0 345 | return ego_in, ego_out, agents_in.transpose(1, 0, 2), agents_out.transpose(1, 0, 2), roads, agent_types 346 | 347 | def __len__(self): 348 | return self.dset_len 349 | -------------------------------------------------------------------------------- /datasets/interaction_dataset/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import matplotlib 4 | import matplotlib.axes 5 | import matplotlib.pyplot as plt 6 | 7 | import xml.etree.ElementTree as xml 8 | import pyproj 9 | import math 10 | 11 | import sys 12 | import numpy as np 13 | import cv2 14 | 15 | 16 | def get_value_list(d): 17 | assert isinstance(d, dict) 18 | if sys.version_info[0] == 2: 19 | item_list = d.values() 20 | elif sys.version_info[0] == 3: 21 | item_list = list(d.values()) 22 | else: 23 | # should not happen 24 | raise RuntimeError("Only python 2 and 3 supported.") 25 | assert isinstance(item_list, list) 26 | return item_list 27 | 28 | 29 | def get_item_iterator(d): 30 | assert isinstance(d, dict) 31 | if sys.version_info[0] == 2: 32 | item_iter = d.iteritems() 33 | assert hasattr(item_iter, "next") 34 | elif sys.version_info[0] == 3: 35 | item_iter = iter(d.items()) 36 | assert hasattr(item_iter, "__next__") 37 | else: 38 | # should not happen 39 | raise RuntimeError("Only python 2 and 3 supported.") 40 | assert hasattr(item_iter, "__iter__") 41 | return item_iter 42 | 43 | 44 | class Point: 45 | def __init__(self): 46 | self.x = None 47 | self.y = None 48 | 49 | 50 | class LL2XYProjector: 51 | def __init__(self, lat_origin, lon_origin): 52 | self.lat_origin = lat_origin 53 | self.lon_origin = lon_origin 54 | self.zone = math.floor((lon_origin + 180.) / 6) + 1 # works for most tiles, and for all in the dataset 55 | self.p = pyproj.Proj(proj='utm', ellps='WGS84', zone=self.zone, datum='WGS84') 56 | [self.x_origin, self.y_origin] = self.p(lon_origin, lat_origin) 57 | 58 | def latlon2xy(self, lat, lon): 59 | [x, y] = self.p(lon, lat) 60 | return [x - self.x_origin, y - self.y_origin] 61 | 62 | 63 | def get_type(element): 64 | for tag in element.findall("tag"): 65 | if tag.get("k") == "type": 66 | return tag.get("v") 67 | return None 68 | 69 | 70 | def get_subtype(element): 71 | for tag in element.findall("tag"): 72 | if tag.get("k") == "subtype": 73 | return tag.get("v") 74 | return None 75 | 76 | 77 | def get_x_y_lists(element, point_dict): 78 | x_list = list() 79 | y_list = list() 80 | for nd in element.findall("nd"): 81 | pt_id = int(nd.get("ref")) 82 | point = point_dict[pt_id] 83 | x_list.append(point.x) 84 | y_list.append(point.y) 85 | return x_list, y_list 86 | 87 | 88 | def set_visible_area(point_dict, axes): 89 | min_x = 10e9 90 | min_y = 10e9 91 | max_x = -10e9 92 | max_y = -10e9 93 | 94 | for id, point in get_item_iterator(point_dict): 95 | min_x = min(point.x, min_x) 96 | min_y = min(point.y, min_y) 97 | max_x = max(point.x, max_x) 98 | max_y = max(point.y, max_y) 99 | 100 | axes.set_aspect('equal', adjustable='box') 101 | axes.set_xlim([min_x - 10, max_x + 10]) 102 | axes.set_ylim([min_y - 10, max_y + 10]) 103 | 104 | 105 | def get_minmax(point_dict): 106 | min_x = 10e9 107 | min_y = 10e9 108 | max_x = -10e9 109 | max_y = -10e9 110 | for id, point in get_item_iterator(point_dict): 111 | min_x = min(point.x, min_x) 112 | min_y = min(point.y, min_y) 113 | max_x = max(point.x, max_x) 114 | max_y = max(point.y, max_y) 115 | return min_x, min_y, max_x, max_y 116 | 117 | 118 | def get_relation_members(rel, lane_dict, exclusion_ids): 119 | found_lanelet = False 120 | for tag in rel.findall("tag"): 121 | if tag.get("v") == "lanelet": 122 | found_lanelet = True 123 | 124 | if not found_lanelet: 125 | return None, None 126 | 127 | lanes = [] 128 | used_lane_ids = [] 129 | for member in rel.findall("member"): 130 | if member.get("type") == "way": 131 | lane_id = member.get("ref") 132 | if lane_id in exclusion_ids: 133 | return None, None 134 | used_lane_ids.append(lane_id) 135 | 136 | lane_role = member.get("role") 137 | lane_role_np = np.zeros((40, 2)) 138 | if "left" in lane_role: 139 | lane_role_np[:, 0] += 3.0 140 | elif "right" in lane_role: 141 | lane_role_np[:, 1] += 3.0 142 | 143 | curr_lane = np.zeros((40, 8)) 144 | curr_lane[:, :5] = lane_dict[lane_id][:, :5] # state (position (2) and type (3)) 145 | curr_lane[:, 5:7] = lane_role_np # relationship (left, right) 146 | curr_lane[:, -1] = lane_dict[lane_id][:, -1] # existence mask 147 | lanes.append(curr_lane) 148 | 149 | return lanes, used_lane_ids 150 | 151 | 152 | def get_minmax_mapfile(filename): 153 | projector = LL2XYProjector(0.0, 0.0) 154 | 155 | e = xml.parse(filename).getroot() 156 | 157 | point_dict = dict() 158 | for node in e.findall("node"): 159 | point = Point() 160 | point.x, point.y = projector.latlon2xy(float(node.get('lat')), float(node.get('lon'))) 161 | point_dict[int(node.get('id'))] = point 162 | 163 | xmin, ymin, xmax, ymax = get_minmax(point_dict) 164 | return xmin, ymin, xmax, ymax 165 | -------------------------------------------------------------------------------- /datasets/nuscenes/README.md: -------------------------------------------------------------------------------- 1 | # nuScenes Installation and Setup 2 | 3 | First, you need to download the dataset and the map expansion (v1.3) from [here](https://www.nuscenes.org/nuscenes#download). 4 | To get setup, follow the instructions [here](https://github.com/nutonomy/nuscenes-devkit) to install the nuscenes devkit. 5 | 6 | Ensure that the final folder structure looks like this: 7 | ``` 8 | v1.0-trainval_full 9 | └──v1.0-trainval 10 | └──attribute.json 11 | └──calibrated_sensor.json 12 | └──category.json 13 | └──ego_pose.json 14 | └──instance.json 15 | └──log.json 16 | └──map.json 17 | └──sample.json 18 | └──sample_annotation.json 19 | └──sample_data.json 20 | └──scene.json 21 | └──sensor.json 22 | └──visibility.json 23 | └──maps 24 | └──basemap 25 | └──boston-seaport.png 26 | └──singapore-hollandvillage.png 27 | └──singapore-onenorth.png 28 | └──singapore-queenstown.png 29 | └──expansion 30 | └──boston-seaport.json 31 | └──singapore-hollandvillage.json 32 | └──singapore-onenorth.json 33 | └──singapore-queenstown.json 34 | └──prediction 35 | └──prediction_scenes.json 36 | └──36092f0b03a857c6a3403e25b4b7aab3.png 37 | └──37819e65e09e5547b8a3ceaefba56bb2.png 38 | └──53992ee3023e5494b90c316c183be829.png 39 | └──93406b464a165eaba6d9de76ca09f5da.png 40 | 41 | ``` 42 | 43 | Once this is done, run the following to create the h5 files of the dataset: 44 | ``` 45 | python create_h5_nusc.py --raw-dataset-path /path/to/v1.0-trainval_full --split-name [train/val] --output-h5-path /path/to/output/nuscenes_h5_file/ 46 | ``` 47 | for both train and val. 48 | -------------------------------------------------------------------------------- /datasets/nuscenes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roggirg/AutoBots/0ceb8f7f4c6e011ed0af879bc9cc3e9c1ad53351/datasets/nuscenes/__init__.py -------------------------------------------------------------------------------- /datasets/nuscenes/create_h5_nusc.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import h5py 4 | import os 5 | import numpy as np 6 | 7 | from datasets.nuscenes.raw_dataset import NuScenesDataset 8 | 9 | 10 | ''' 11 | Train H5 generation takes about 3 hours and is about 56GBs. 12 | Val H5 generation takes about 1 hour and is about 16GBs. 13 | ''' 14 | 15 | 16 | def get_args(): 17 | parser = argparse.ArgumentParser(description="Nuscenes H5 Creator") 18 | parser.add_argument("--output-h5-path", type=str, required=True, help="output path to H5 files.") 19 | parser.add_argument("--raw-dataset-path", type=str, required=True, help="raw Dataset path to v1.0-trainval_full.") 20 | parser.add_argument("--split-name", type=str, default="train", help="split-name to create", choices=["train", "val"]) 21 | parser.add_argument("--ego-range", type=int, nargs="+", default=[75, 75, 75, 75], 22 | help="range around ego in meters [left, right, behind, front].") 23 | args = parser.parse_args() 24 | 25 | return args 26 | 27 | 28 | if __name__ == '__main__': 29 | args = get_args() 30 | max_num_agents = 20 31 | 32 | save_dir = os.path.join(args.output_h5_path, args.split_name) 33 | nuscenes = NuScenesDataset(data_root=args.raw_dataset_path, split_name=args.split_name, 34 | version='v1.0-trainval', ego_range=args.ego_range, num_others=max_num_agents) 35 | num_scenes = len(nuscenes) 36 | 37 | f = h5py.File(os.path.join(args.output_h5_path, args.split_name + '_dataset.hdf5'), 'w') 38 | ego_trajectories = f.create_dataset("ego_trajectories", shape=(num_scenes, 18, 3), chunks=(1, 18, 3), dtype=np.float32) 39 | agent_trajectories = f.create_dataset("agents_trajectories", shape=(num_scenes, 18, max_num_agents, 3), chunks=(1, 18, max_num_agents, 3), dtype=np.float32) 40 | scene_ids = f.create_dataset("scene_ids", shape=(num_scenes, 3), chunks=(1, 3), dtype='S50') 41 | scene_translation = f.create_dataset("translation", shape=(num_scenes, 3), chunks=(1, 3)) 42 | scene_rotation = f.create_dataset("rotation", shape=(num_scenes, 4), chunks=(1, 4)) 43 | agent_types = f.create_dataset("agents_types", shape=(num_scenes, max_num_agents+1), chunks=(1, max_num_agents+1), dtype='S50') 44 | road_pts = f.create_dataset("road_pts", shape=(num_scenes, 100, 40, 4), chunks=(1, 100, 40, 4), dtype=np.float16) 45 | road_imgs = f.create_dataset("large_roads", shape=(num_scenes, 750, 750, 3), chunks=(1, 750, 750, 3), dtype=np.uint8) 46 | 47 | for i, data in enumerate(nuscenes): 48 | if i % 10 == 0: 49 | print(i, "/", num_scenes) 50 | ego_trajectories[i] = data[0] 51 | agent_trajectories[i] = data[1] 52 | 53 | road_imgs[i] = data[2] 54 | 55 | curr_scene_id = [n.encode("ascii", "ignore") for n in [data[3][0], data[3][1], data[3][4]]] 56 | scene_ids[i] = curr_scene_id 57 | 58 | scene_translation[i] = data[3][2] 59 | scene_rotation[i] = data[3][3] 60 | 61 | curr_agent_types = data[4] 62 | while len(curr_agent_types) < max_num_agents + 1: 63 | curr_agent_types.append("None") 64 | agent_types_ascii = [n.encode("ascii", "ignore") for n in curr_agent_types] 65 | agent_types[i] = agent_types_ascii 66 | 67 | road_pts[i] = data[5] 68 | 69 | -------------------------------------------------------------------------------- /datasets/nuscenes/dataset.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import h5py 3 | import os 4 | import numpy as np 5 | from torch.utils.data import Dataset 6 | from torchvision.transforms import transforms 7 | import warnings 8 | warnings.filterwarnings("ignore") 9 | 10 | 11 | class NuscenesH5Dataset(Dataset): 12 | def __init__(self, dset_path, split_name="train", rtn_extras=False, model_type="Autobot-Joint", 13 | use_map_img=False, use_map_lanes=True): 14 | self.data_root = dset_path 15 | self.split_name = split_name 16 | self.rtn_extras = rtn_extras 17 | self.use_map_img = use_map_img 18 | self.use_map_lanes = use_map_lanes 19 | self.pred_horizon = 12 20 | self.num_others = 7 21 | self.map_attr = 3 22 | self.predict_yaw = False 23 | 24 | if "Joint" in model_type: 25 | self.k_attr = 4 26 | self.use_joint_version = True 27 | else: 28 | self.k_attr = 2 29 | self.use_joint_version = False 30 | 31 | if self.use_joint_version and self.use_map_img: 32 | raise Exception("Cannot use joint version with map image. Use Map lanes instead...") 33 | 34 | dataset = h5py.File(os.path.join(self.data_root, self.split_name + '_dataset.hdf5'), 'r') 35 | self.dset_len = len(dataset["ego_trajectories"]) 36 | 37 | self.transforms = transforms.Compose([ 38 | transforms.ToTensor(), 39 | transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) 40 | ]) 41 | 42 | self.unique_agent_types = [ 43 | ['human.pedestrian.adult', 'human.pedestrian.child', 'human.pedestrian.construction_worker', 44 | 'human.pedestrian.personal_mobility', 'human.pedestrian.police_officer', 'human.pedestrian.stroller', 45 | 'human.pedestrian.wheelchair'], 46 | ['vehicle.bicycle'], 47 | ['vehicle.motorcycle'], 48 | ['vehicle.car', 'vehicle.emergency.police'], 49 | ['vehicle.bus.bendy', 'vehicle.bus.rigid'], 50 | ['vehicle.construction', 'vehicle.trailer', 'vehicle.truck'] 51 | ] 52 | self.num_agent_types = len(self.unique_agent_types) 53 | 54 | def get_input_output_seqs(self, ego_data, agents_data): 55 | traj_len = int(np.sum(ego_data[:, 2])) 56 | 57 | # Ego 58 | temp_ego_in = ego_data[:traj_len - self.pred_horizon] # at most shape(6, 3) 59 | ego_out = ego_data[traj_len - self.pred_horizon:traj_len] # guaranteed to be shape(horizon, 3) 60 | ego_in = np.zeros((6, 3)) 61 | ego_in[-len(temp_ego_in):] = temp_ego_in 62 | 63 | # separate agents into in/out. 64 | temp_agents_in = agents_data[:traj_len - self.pred_horizon] # at most shape(6, 3) 65 | agents_in = np.zeros((6, self.num_others, 3)) 66 | agents_in[-len(temp_agents_in):] = temp_agents_in[:, :self.num_others] 67 | agents_out = agents_data[traj_len - self.pred_horizon:traj_len] 68 | 69 | # For competition, only allowed 4 input timesteps at most. 70 | ego_in = ego_in[-4:] 71 | agents_in = agents_in[-4:] 72 | 73 | return ego_in, ego_out, agents_in, agents_out 74 | 75 | def select_valid_others(self, agents_data, agent_types): 76 | # Other agents: we want agents that are not parked and that are the closest to the ego agent. 77 | dist_travelled = [] 78 | for n in range(agents_data.shape[1]): 79 | agent_traj_len = np.sum(agents_data[:, n, 2]) 80 | idx_first_pt = np.argmax(agents_data[:, n, 2]) 81 | dist_travelled.append(np.linalg.norm(agents_data[idx_first_pt + int(agent_traj_len) - 1, n, :2] - 82 | agents_data[idx_first_pt, n, :2], axis=-1)) 83 | 84 | vehicle_agents = agent_types.argmax(axis=1) >= -1 85 | agents_with_atleast_2_ts = agents_data[2:6, :, 2].sum(axis=0) >= 1 86 | active_agents = (np.array(dist_travelled) > 3.0) # * vehicle_agents[1:] * agents_with_atleast_2_ts # 3.0 87 | if agent_types is not None: 88 | agent_types = agent_types[np.concatenate(([True], active_agents))] 89 | 90 | agents_data = agents_data[:, active_agents] 91 | if np.sum(active_agents) < self.num_others: 92 | temp_agents_data = np.zeros((len(agents_data), self.num_others, 3)) 93 | temp_agents_data[:, :np.sum(active_agents)] = agents_data 94 | agents_data = temp_agents_data.copy() 95 | if agent_types is not None: 96 | temp_agent_types = np.zeros((self.num_others+1, agent_types.shape[1])) - 1 97 | temp_agent_types[:np.sum(active_agents)+1] = agent_types 98 | agent_types = temp_agent_types.copy() 99 | 100 | elif np.sum(active_agents) > self.num_others: 101 | # agents are already sorted from raw dataset creation. 102 | agents_data = agents_data[:, :self.num_others] 103 | if agent_types is not None: 104 | agent_types = agent_types[:(self.num_others+1)] 105 | 106 | return agents_data, agent_types 107 | 108 | def get_agent_types(self, agent_types_strings, num_raw_agents): 109 | type_to_onehot = np.eye(self.num_agent_types) 110 | agent_types_id = [] 111 | for agent_type in agent_types_strings: 112 | for i, dset_types in enumerate(self.unique_agent_types): 113 | if agent_type.decode("utf-8") in dset_types: 114 | agent_types_id.append(type_to_onehot[i]) 115 | agent_types_id = np.array(agent_types_id) 116 | if len(agent_types_id) < num_raw_agents+1: 117 | new_agent_types_id = np.zeros((num_raw_agents+1, self.num_agent_types)) - 1 118 | new_agent_types_id[:len(agent_types_id)] = agent_types_id 119 | agent_types_id = new_agent_types_id.copy() 120 | 121 | return agent_types_id 122 | 123 | def mirror_scene(self, ego_in, ego_out, agents_in, agents_out, roads): 124 | if self.use_map_img: 125 | roads = cv2.flip(roads.transpose((1, 2, 0)), 1).transpose((2, 0, 1)) 126 | elif self.use_map_lanes: 127 | roads[:, :, :, 0] = -roads[:, :, :, 0] 128 | roads[:, :, :, 2] = np.pi - roads[:, :, :, 2] 129 | ego_in[:, 0] = -ego_in[:, 0] 130 | ego_out[:, 0] = -ego_out[:, 0] 131 | agents_in[:, :, 0] = -agents_in[:, :, 0] 132 | agents_out[:, :, 0] = -agents_out[:, :, 0] 133 | if self.use_joint_version: 134 | agents_in[:, :, 2] = -agents_in[:, :, 2] 135 | agents_out[:, :, 2] = -agents_out[:, :, 2] 136 | return ego_in, ego_out, agents_in, agents_out, roads 137 | 138 | def make_2d_rotation_matrix(self, angle_in_radians: float) -> np.ndarray: 139 | """ 140 | Makes rotation matrix to rotate point in x-y plane counterclockwise 141 | by angle_in_radians. 142 | """ 143 | return np.array([[np.cos(angle_in_radians), -np.sin(angle_in_radians)], 144 | [np.sin(angle_in_radians), np.cos(angle_in_radians)]]) 145 | 146 | def convert_global_coords_to_local(self, coordinates: np.ndarray, yaw: float) -> np.ndarray: 147 | """ 148 | Converts global coordinates to coordinates in the frame given by the rotation quaternion and 149 | centered at the translation vector. The rotation is meant to be a z-axis rotation. 150 | :param coordinates: x,y locations. array of shape [n_steps, 2]. 151 | :param translation: Tuple of (x, y, z) location that is the center of the new frame. 152 | :param rotation: Tuple representation of quaternion of new frame. 153 | Representation - cos(theta / 2) + (xi + yi + zi)sin(theta / 2). 154 | :return: x,y locations in frame stored in array of share [n_times, 2]. 155 | """ 156 | transform = self.make_2d_rotation_matrix(angle_in_radians=yaw) 157 | if len(coordinates.shape) > 2: 158 | coord_shape = coordinates.shape 159 | return np.dot(transform, coordinates.reshape((-1, 2)).T).T.reshape(*coord_shape) 160 | return np.dot(transform, coordinates.T).T[:, :2] 161 | 162 | def get_agent_roads(self, roads, agents_in): 163 | N = 100 164 | curr_roads = roads.copy() 165 | curr_roads[np.where(curr_roads[:, :, -1] == 0)] = np.nan 166 | mean_roads = np.nanmean(curr_roads, axis=1)[:, :2] 167 | 168 | # Ego Agent 169 | args_closest_roads = np.argsort(np.linalg.norm(np.array([[0.0, 0.0]]) - mean_roads, axis=-1)) 170 | if len(args_closest_roads) >= N: 171 | per_agent_roads = [roads[args_closest_roads[:N]]] 172 | else: 173 | ego_roads = np.zeros((N, 40, 4)) 174 | ego_roads[:len(args_closest_roads)] = roads[args_closest_roads] 175 | per_agent_roads = [ego_roads] 176 | 177 | # Other Agents 178 | for n in range(self.num_others): 179 | if agents_in[-1, n, 2]: 180 | args_closest_roads = np.argsort(np.linalg.norm(agents_in[-1:, n, :2] - mean_roads, axis=-1)) 181 | if len(args_closest_roads) >= N: 182 | per_agent_roads.append(roads[args_closest_roads[:N]]) 183 | else: 184 | agent_roads = np.zeros((N, 40, 4)) 185 | agent_roads[:len(args_closest_roads)] = roads[args_closest_roads] 186 | per_agent_roads.append(agent_roads) 187 | else: 188 | per_agent_roads.append(np.zeros((N, 40, 4))) 189 | 190 | roads = np.array(per_agent_roads) 191 | roads[:, :, :, 2][np.where(roads[:, :, :, 2] < 0.0)] += 2 * np.pi # making all orientations between 0 and 2pi 192 | 193 | # ensure pt closest to ego has an angle of pi/2 194 | temp_ego_roads = roads[0].copy() 195 | temp_ego_roads[np.where(temp_ego_roads[:, :, -1] == 0)] = np.nan 196 | dist = np.linalg.norm(temp_ego_roads[:, :, :2] - np.array([[[0.0, 0.0]]]), axis=-1) 197 | closest_pt = temp_ego_roads[np.where(dist == np.nanmin(dist))] 198 | angle_diff = closest_pt[0, 2] - (np.pi/2) 199 | roads[:, :, :, 2] -= angle_diff 200 | return roads 201 | 202 | def rotate_agent_datas(self, ego_in, ego_out, agents_in, agents_out, roads): 203 | new_ego_in = np.zeros((len(ego_in), ego_in.shape[1]+2)) 204 | new_ego_out = np.zeros((len(ego_out), ego_out.shape[1] + 2)) 205 | new_agents_in = np.zeros((len(agents_in), self.num_others, agents_in.shape[2]+2)) 206 | new_agents_out = np.zeros((len(agents_out), self.num_others, agents_out.shape[2] + 2)) 207 | new_roads = roads.copy() 208 | 209 | # Ego trajectories 210 | new_ego_in[:, :2] = ego_in[:, :2] 211 | new_ego_in[:, 2:] = ego_in 212 | new_ego_out[:, :2] = ego_out[:, :2] 213 | new_ego_out[:, 2:] = ego_out 214 | 215 | for n in range(self.num_others): 216 | new_agents_in[:, n, 2:] = agents_in[:, n] 217 | new_agents_out[:, n, 2:] = agents_out[:, n] 218 | if agents_in[:, n, -1].sum() >= 2: 219 | # then we can use the past to compute the angle to +y-axis 220 | diff = agents_in[-1, n, :2] - agents_in[-2, n, :2] 221 | yaw = np.arctan2(diff[1], diff[0]) 222 | angle_of_rotation = (np.pi / 2) + np.sign(-yaw) * np.abs(yaw) 223 | translation = agents_in[-1, n, :2] 224 | elif agents_in[:, n, -1].sum() == 1: 225 | # then we need to use the future to compute angle to +y-axis 226 | diff = agents_out[0, n, :2] - agents_in[-1, n, :2] 227 | yaw = np.arctan2(diff[1], diff[0]) 228 | translation = agents_in[-1, n, :2] 229 | angle_of_rotation = (np.pi / 2) + np.sign(-yaw) * np.abs(yaw) 230 | else: 231 | # the agent does not exist... 232 | angle_of_rotation = None 233 | translation = None 234 | 235 | if angle_of_rotation is not None: 236 | new_agents_in[:, n, :2] = self.convert_global_coords_to_local(coordinates=agents_in[:, n, :2] - translation, yaw=angle_of_rotation) 237 | new_agents_out[:, n, :2] = self.convert_global_coords_to_local(coordinates=agents_out[:, n, :2] - translation, yaw=angle_of_rotation) 238 | new_agents_in[:, n, :2][np.where(new_agents_in[:, n, -1] == 0)] = 0.0 239 | new_agents_out[:, n, :2][np.where(new_agents_out[:, n, -1] == 0)] = 0.0 240 | if self.use_map_lanes: 241 | new_roads[n+1, :, :, :2] = self.convert_global_coords_to_local(coordinates=new_roads[n+1, :, :, :2] - translation, yaw=angle_of_rotation) 242 | new_roads[n+1, :, :, 2] -= angle_of_rotation 243 | new_roads[n+1][np.where(new_roads[n+1, :, :, -1] == 0)] = 0.0 244 | 245 | return new_ego_in, new_ego_out, new_agents_in, new_agents_out, new_roads 246 | 247 | def __getitem__(self, idx: int): 248 | dataset = h5py.File(os.path.join(self.data_root, self.split_name + '_dataset.hdf5'), 'r') 249 | 250 | ego_data = dataset['ego_trajectories'][idx] 251 | agents_data = dataset['agents_trajectories'][idx] 252 | 253 | agent_types = self.get_agent_types(dataset['agents_types'][idx], num_raw_agents=agents_data.shape[1]) 254 | agents_data, agent_types = self.select_valid_others(agents_data, agent_types) 255 | 256 | in_ego, out_ego, in_agents, out_agents = self.get_input_output_seqs(ego_data, agents_data) 257 | 258 | if self.use_map_img: 259 | # original image is 75m behind, 75m in front, 75m left, 75m right @ 0.2m/px resolution. 260 | # below recovers an image with 0m behind, 75m in front, 30m left, 30m right 261 | road_img_data = dataset['large_roads'][idx][0:375, 225:525] 262 | road_img_data = cv2.resize(road_img_data, dsize=(128, 128)) 263 | roads = self.transforms(road_img_data).numpy() 264 | elif self.use_map_lanes: 265 | roads = dataset['road_pts'][idx] 266 | roads = self.get_agent_roads(roads, in_agents) 267 | else: 268 | roads = np.ones((1, 1)) 269 | 270 | # Normalize all other agents futures 271 | # make agents have 2 sets of x,y positions (one centered @(0,0) and pointing up, and the other being raw 272 | if self.use_joint_version: 273 | in_ego, out_ego, in_agents, out_agents, roads = \ 274 | self.rotate_agent_datas(in_ego, out_ego, in_agents, out_agents, roads) 275 | 276 | city_name = dataset['scene_ids'][idx][-1].decode('utf-8') 277 | if "train" in self.split_name: 278 | should_we_mirror = np.random.choice([0, 1]) 279 | if should_we_mirror: 280 | in_ego, out_ego, in_agents, out_agents, roads = self.mirror_scene(in_ego, out_ego, in_agents, out_agents, roads) 281 | 282 | if self.use_joint_version: 283 | if self.rtn_extras: 284 | extras = [dataset['translation'][idx], dataset['rotation'][idx], dataset['scene_ids'][idx][2].decode("utf-8")] 285 | return in_ego, out_ego, in_agents, out_agents, roads, agent_types, extras, dataset['large_roads'][idx] 286 | 287 | return in_ego, out_ego, in_agents, out_agents, roads, agent_types 288 | else: 289 | if self.use_map_lanes: 290 | ego_roads = roads[0] 291 | else: 292 | ego_roads = roads 293 | 294 | if self.rtn_extras: 295 | # translation, rotation, instance_token, sample_token 296 | extras = [dataset['translation'][idx], dataset['rotation'][idx], 297 | dataset['scene_ids'][idx][0].decode("utf-8"), dataset['scene_ids'][idx][1].decode("utf-8")] 298 | return in_ego, out_ego, in_agents, ego_roads, extras 299 | 300 | return in_ego, out_ego, in_agents, ego_roads 301 | 302 | def __len__(self): 303 | return self.dset_len 304 | 305 | -------------------------------------------------------------------------------- /datasets/nuscenes/raw_dataset.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import os 3 | import math 4 | 5 | from nuscenes.prediction.input_representation.agents import AgentBoxesWithFadedHistory 6 | from nuscenes.prediction.input_representation.combinators import Rasterizer 7 | from nuscenes.prediction.input_representation.interface import InputRepresentation 8 | from pyquaternion import Quaternion 9 | from torch.utils.data import Dataset 10 | from typing import Dict 11 | 12 | 13 | from nuscenes import NuScenes 14 | from nuscenes.prediction import PredictHelper 15 | from nuscenes.eval.prediction.splits import get_prediction_challenge_split 16 | from nuscenes.map_expansion.map_api import NuScenesMap 17 | from nuscenes.prediction.input_representation.static_layers import get_lanes_in_radius, StaticLayerRasterizer, correct_yaw 18 | from nuscenes.prediction.helper import quaternion_yaw, convert_global_coords_to_local 19 | 20 | import matplotlib.pyplot as plt 21 | import numpy as np 22 | 23 | 24 | def is_insquare(points, car_pos, ego_range): 25 | if (car_pos[0] - ego_range[0] < points[0] < car_pos[0] + ego_range[1]) and \ 26 | (car_pos[1] - ego_range[2] < points[1] < car_pos[1] + ego_range[3]): 27 | return True 28 | return False 29 | 30 | 31 | def distance(t1, t2) -> float: 32 | 33 | return math.sqrt((t1[0] - t2[0]) ** 2 + (t1[1] - t2[1]) ** 2) 34 | 35 | 36 | def load_all_maps(helper: PredictHelper, verbose: bool = False) -> Dict[str, NuScenesMap]: 37 | """ 38 | Loads all NuScenesMap instances for all available maps. 39 | :param helper: Instance of PredictHelper. 40 | :param verbose: Whether to print to stdout. 41 | :return: Mapping from map-name to the NuScenesMap api instance. 42 | """ 43 | dataroot = helper.data.dataroot 44 | 45 | json_files = filter( 46 | lambda f: "json" in f and "prediction_scenes" not in f, os.listdir(os.path.join(dataroot, "maps", "expansion")) 47 | ) 48 | maps = {} 49 | 50 | for map_file in json_files: 51 | 52 | map_name = str(map_file.split(".")[0]) 53 | if verbose: 54 | print(f"static_layers.py - Loading Map: {map_name}") 55 | 56 | maps[map_name] = NuScenesMap(dataroot, map_name=map_name) 57 | 58 | return maps 59 | 60 | 61 | class NuScenesDataset(Dataset): 62 | def __init__(self, data_root, split_name, version, ego_range=(25, 25, 10, 50), debug=False, num_others=10): 63 | 64 | super(NuScenesDataset).__init__() 65 | nusc = NuScenes(version=version, dataroot=data_root) 66 | self._helper = PredictHelper(nusc) 67 | self._dataset = get_prediction_challenge_split(split_name, dataroot=data_root) 68 | self._use_pedestrians = False 69 | self._use_only_moving_vehicles = True 70 | self._debug = debug 71 | self.ego_range = ego_range # (left, right, behind, front) 72 | # parameters 73 | self.future_secs = 6 74 | self.past_secs = 3 75 | self.freq_hz = 2 76 | self._number_closest_agents = num_others # We always take this number of closest objects 77 | self._max_number_roads = 100 78 | self._number_future_road_points = 40 # the number of points to include along the current lane. 79 | self._map_dict = load_all_maps(self._helper, verbose=True) 80 | self._static_layer_rasterizer = StaticLayerRasterizer( 81 | self._helper, 82 | layer_names=['drivable_area', 'ped_crossing', 'walkway'], 83 | resolution=0.2, 84 | meters_ahead=ego_range[3], 85 | meters_behind=ego_range[2], 86 | meters_left=ego_range[0], 87 | meters_right=ego_range[1], 88 | ) 89 | 90 | if self._debug: 91 | self._static_layer_rasterizer_1 = StaticLayerRasterizer( 92 | self._helper, 93 | resolution=0.1, 94 | meters_ahead=100, 95 | meters_behind=50, 96 | meters_left=75, 97 | meters_right=75, 98 | ) 99 | self._agent_rasterizer = AgentBoxesWithFadedHistory( 100 | self._helper, 101 | seconds_of_history=1, 102 | resolution=0.1, 103 | meters_ahead=100, 104 | meters_behind=50, 105 | meters_left=75, 106 | meters_right=75, 107 | ) 108 | self._mtp_input_representation = InputRepresentation( 109 | self._static_layer_rasterizer_1, self._agent_rasterizer, Rasterizer() 110 | ) 111 | 112 | def debug_draw_full_map_from_position(self, data_root, instance, sample): 113 | 114 | image_folder = os.path.join(data_root, instance) 115 | if not os.path.exists(image_folder): 116 | os.mkdir(image_folder) 117 | 118 | future = self._helper.get_future_for_agent(instance, sample, seconds=200, in_agent_frame=True, just_xy=False) 119 | past = self._helper.get_past_for_agent(instance, sample, seconds=200, in_agent_frame=True, just_xy=False) 120 | 121 | count = 0 122 | 123 | for token in reversed(past): 124 | img = self._mtp_input_representation.make_input_representation(instance, token["sample_token"]) 125 | plt.imshow(img) 126 | plt.savefig(os.path.join(image_folder, str(count) + ".png")) 127 | count += 1 128 | 129 | for token in future: 130 | img = self._mtp_input_representation.make_input_representation(instance, token["sample_token"]) 131 | plt.imshow(img) 132 | plt.savefig(os.path.join(image_folder, str(count) + ".png")) 133 | count += 1 134 | 135 | def _get_map_features(self, nusc_map, x, y, yaw, radius, reference_position): 136 | curr_map = np.zeros((self._max_number_roads, self._number_future_road_points, 4)) 137 | lanes = get_lanes_in_radius(x=x, y=y, radius=200, map_api=nusc_map, discretization_meters=2.0) 138 | 139 | # need to combine lanes that are connected to avoid random gaps. 140 | combined_lane_ids = [] # list of connected lane ids. 141 | ignore_lane_ids = [] # List of lane ids to ignore because they are connected to prior lane ids. 142 | for lane_id in lanes.keys(): 143 | if lane_id in ignore_lane_ids: 144 | continue 145 | curr_lane_ids = [lane_id] 146 | out_lane_ids = nusc_map.get_outgoing_lane_ids(lane_id) 147 | for out_lane_id in out_lane_ids: 148 | if out_lane_id in lanes.keys(): 149 | curr_lane_ids.append(out_lane_id) 150 | ignore_lane_ids.append(out_lane_id) 151 | 152 | outout_lane_ids = nusc_map.get_outgoing_lane_ids(out_lane_id) 153 | for outout_lane_id in outout_lane_ids: 154 | if outout_lane_id in lanes.keys(): 155 | curr_lane_ids.append(outout_lane_id) 156 | ignore_lane_ids.append(outout_lane_id) 157 | 158 | combined_lane_ids.append(curr_lane_ids) 159 | 160 | relevant_pts = [] 161 | for i in range(len(combined_lane_ids)): 162 | for lane_id in combined_lane_ids[i]: 163 | pts = lanes[lane_id] 164 | road_pts = [] 165 | for pt in pts: 166 | pt = list(pt) 167 | pt.append(1.0) 168 | pt = np.array(pt) 169 | road_pts.append(pt) 170 | if len(road_pts) > 0: 171 | relevant_pts.append(np.array(road_pts)) 172 | 173 | # relevant_pts is a list of lists, which each sublist containing the points on a road in the fov. 174 | if len(relevant_pts) > 0: 175 | # relevant_pts is a list of arrays each with [numb_pts, 3] 176 | # need to sort each one according to its closeness to the agent position 177 | first_pts = [] 178 | for i in range(len(relevant_pts)): 179 | indices = np.argsort(np.linalg.norm(np.array([[x, y]]) - relevant_pts[i][:, :2], axis=1)) 180 | relevant_pts[i] = relevant_pts[i][indices] 181 | first_pts.append(relevant_pts[i][0]) 182 | 183 | # sort using the first point of each road 184 | first_pts = np.array(first_pts) 185 | indices = np.argsort(np.linalg.norm(np.array([[x, y]]) - first_pts[:, :2], axis=1)).tolist() 186 | mymap = np.array(relevant_pts, dtype=object)[indices[:self._max_number_roads]] 187 | for i in range(min(len(mymap), self._max_number_roads)): 188 | curr_map[i, :min(len(mymap[i]), self._number_future_road_points)] = \ 189 | mymap[i][:min(len(mymap[i]), self._number_future_road_points)] 190 | else: 191 | raise Exception("Did not find any lanes in the map...") 192 | 193 | return curr_map 194 | 195 | def _rotate_sample_points(self, annotation, sample_info): 196 | # Rotate all points using ego translation and rotation 197 | del sample_info[annotation['instance_token']] # Remove ego data from here 198 | for key, val in list(sample_info.items()): 199 | if val.size == 0: 200 | # delete empty entries. 201 | del sample_info[key] 202 | continue 203 | sample_info[key] = convert_global_coords_to_local(val, annotation['translation'], annotation['rotation']) 204 | return sample_info 205 | 206 | def choose_agents(self, past_samples, future_samples, sample_token): 207 | # Rectangle around ego at the prediction time 208 | x_left, x_right = -self.ego_range[0], self.ego_range[1] # in meters 209 | y_behind, y_infront = -self.ego_range[2], self.ego_range[3] # in meters 210 | # x_left, x_right = -25, 25 # in meters 211 | # y_behind, y_infront = -10, 30 # in meters 212 | 213 | agent_types = {} 214 | # we only want to consider at the prediction timetep, last point from the past(first in array). 215 | valid_agent_ids = {} 216 | for key, value in past_samples.items(): 217 | info = self._helper.get_sample_annotation(key, sample_token) 218 | agent_type = info['category_name'] 219 | useful_agent_bool = "vehicle" in agent_type or "human" in agent_type 220 | if x_left <= value[0, 0] <= x_right and y_behind <= value[0, 1] <= y_infront and \ 221 | key in future_samples.keys() and useful_agent_bool: 222 | dist_to_ego_at_t = distance(value[0], [0,0]) 223 | valid_agent_ids[key] = dist_to_ego_at_t 224 | agent_types[key] = agent_type 225 | 226 | # sort according to distance to ego at t. 227 | valid_agent_ids = [k for k, v in sorted(valid_agent_ids.items(), key=lambda item: item[1])] 228 | if len(valid_agent_ids) > self._number_closest_agents: 229 | final_valid_keys = valid_agent_ids[:self._number_closest_agents] 230 | else: 231 | final_valid_keys = valid_agent_ids 232 | 233 | # construct numpy array for all agent points 234 | agents_types_list = [] 235 | agents_array = np.zeros((self._number_closest_agents, (self.past_secs+self.future_secs)*2, 3)) 236 | for i, key in enumerate(final_valid_keys): 237 | # flipped past samples like before 238 | curr_agent = np.concatenate((past_samples[key][::-1], future_samples[key]), axis=0) 239 | if len(curr_agent) > 18: 240 | print("too many timesteps...") 241 | curr_agent = curr_agent[:(self.future_secs + self.past_secs) * self.freq_hz] 242 | agents_array[i, :len(curr_agent), :2] = curr_agent 243 | agents_array[i, :len(curr_agent), 2] = 1 244 | agents_types_list.append(agent_types[key]) 245 | 246 | return agents_array, agents_types_list 247 | 248 | def __getitem__(self, idx: int): 249 | instance_token, sample_token = self._dataset[idx].split("_") 250 | annotation = self._helper.get_sample_annotation(instance_token, sample_token) 251 | ego_type = [annotation['category_name']] 252 | road_img = self._static_layer_rasterizer.make_representation(instance_token, sample_token) 253 | road_img = cv2.resize(road_img, (750, 750)) 254 | 255 | # Ego-Agent stuff 256 | p_all_positions = self._helper.get_past_for_agent( 257 | instance_token, sample_token, seconds=self.past_secs, in_agent_frame=False, just_xy=True 258 | ) 259 | f_all_positions = self._helper.get_future_for_agent( 260 | instance_token, sample_token, seconds=self.future_secs, in_agent_frame=False, just_xy=True 261 | ) 262 | 263 | if self._debug: 264 | self.debug_draw_full_map_from_position( 265 | data_root=".", instance=instance_token, sample=sample_token 266 | ) 267 | 268 | all_ego_positions = np.concatenate((p_all_positions[::-1], f_all_positions), axis=0) # need to flip past. 269 | rotated_ego_positions = [] 270 | for coords in all_ego_positions: 271 | rotated_ego_positions.append( 272 | convert_global_coords_to_local(coords, annotation['translation'], annotation['rotation']).squeeze()) 273 | rotated_ego_positions = np.array(rotated_ego_positions) 274 | ego_array = np.zeros(((self.future_secs+self.past_secs)*self.freq_hz, 3)) # 3 for x,y,mask 275 | if len(rotated_ego_positions) > 18: 276 | print("too many timesteps...") 277 | rotated_ego_positions = rotated_ego_positions[:(self.future_secs+self.past_secs)*self.freq_hz] 278 | ego_array[:len(rotated_ego_positions), :2] = rotated_ego_positions 279 | ego_array[:len(rotated_ego_positions), 2] = 1 280 | 281 | # Other agents stuff 282 | p_sample_info = self._helper.get_past_for_sample( 283 | sample_token, seconds=self.past_secs, in_agent_frame=False, just_xy=True 284 | ) 285 | f_sample_info = self._helper.get_future_for_sample( 286 | sample_token, seconds=self.future_secs, in_agent_frame=False, just_xy=True 287 | ) 288 | p_sample_info = self._rotate_sample_points(annotation, p_sample_info) 289 | f_sample_info = self._rotate_sample_points(annotation, f_sample_info) 290 | agents_array, agent_types = self.choose_agents(p_sample_info, f_sample_info, sample_token) 291 | agent_types = ego_type + agent_types 292 | 293 | # Map stuff 294 | map_name = self._helper.get_map_name_from_sample_token(sample_token) 295 | theta = correct_yaw(quaternion_yaw(Quaternion(annotation['rotation']))) 296 | raw_map = self._get_map_features(self._map_dict[map_name], annotation["translation"][0], annotation["translation"][1], theta, 100, [0, 0]) 297 | 298 | rotated_map = np.zeros_like(raw_map) 299 | for road_idx in range(len(raw_map)): 300 | for pt_idx in range(len(raw_map[road_idx])): 301 | if raw_map[road_idx, pt_idx, -1] == 1: 302 | rotated_pt = convert_global_coords_to_local(raw_map[road_idx, pt_idx, :2], annotation['translation'], 303 | annotation['rotation']) 304 | if is_insquare(rotated_pt[0], [0., 0.], self.ego_range): 305 | rotated_map[road_idx, pt_idx, :2] = rotated_pt 306 | rotated_map[road_idx, pt_idx, 2] = theta - raw_map[road_idx, pt_idx, 2] 307 | rotated_map[road_idx, pt_idx, -1] = 1 308 | 309 | extras = [instance_token, sample_token, annotation['translation'], annotation['rotation'], map_name] 310 | return ego_array, agents_array.transpose((1, 0, 2)), road_img, extras, agent_types, rotated_map 311 | 312 | def __len__(self): 313 | return len(self._dataset) 314 | 315 | -------------------------------------------------------------------------------- /datasets/trajnetpp/README.md: -------------------------------------------------------------------------------- 1 | # TrajNet++ Setup 2 | 3 | Download the train.zip dataset from [here](https://github.com/vita-epfl/trajnetplusplusdata/releases/tag/v4.0). 4 | Extract the data 5 | Then run: 6 | 7 | ``` 8 | python create_data_npys.py --raw-dataset-path /path/to/synth_data/ --output-npy-path /path/to/output_npys 9 | ``` 10 | 11 | This script will split the training data into train and validation numpy files called `{split}_orca_synth.npy`. 12 | -------------------------------------------------------------------------------- /datasets/trajnetpp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roggirg/AutoBots/0ceb8f7f4c6e011ed0af879bc9cc3e9c1ad53351/datasets/trajnetpp/__init__.py -------------------------------------------------------------------------------- /datasets/trajnetpp/create_data_npys.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import numpy as np 4 | import trajnetplusplustools 5 | import math 6 | 7 | 8 | ''' 9 | This is only for the synthetic portion of the TrajNet++ dataset. It can be modified to also create files for the real 10 | portion. 11 | ''' 12 | 13 | 14 | def drop_distant(xy, max_num_peds=5): 15 | """ 16 | Only Keep the max_num_peds closest pedestrians 17 | """ 18 | distance_2 = np.sum(np.square(xy - xy[:, 0:1]), axis=2) 19 | smallest_dist_to_ego = np.nanmin(distance_2, axis=0) 20 | return xy[:, np.argsort(smallest_dist_to_ego)[:(max_num_peds)]] 21 | 22 | 23 | def drop_inactive(xy, obs_horizon=9): 24 | """ 25 | Only keep agents that are active at the last timestep in the past. 26 | """ 27 | return xy[:, ~np.isnan(xy[obs_horizon-1, :, 0])] 28 | 29 | 30 | def shift(xy, center): 31 | # theta = random.random() * 2.0 * math.pi 32 | xy = xy - center[np.newaxis, np.newaxis, :] 33 | return xy 34 | 35 | 36 | def theta_rotation(xy, theta): 37 | # theta = random.random() * 2.0 * math.pi 38 | ct = math.cos(theta) 39 | st = math.sin(theta) 40 | 41 | r = np.array([[ct, st], [-st, ct]]) 42 | return np.einsum('ptc,ci->pti', xy, r) 43 | 44 | 45 | def center_scene(xy, obs_length=9, ped_id=0): 46 | ## Center 47 | center = xy[obs_length - 1, ped_id] ## Last Observation 48 | xy = shift(xy, center) 49 | 50 | ## Rotate 51 | last_obs = xy[obs_length - 1, ped_id] 52 | second_last_obs = xy[obs_length - 2, ped_id] 53 | diff = np.array([last_obs[0] - second_last_obs[0], last_obs[1] - second_last_obs[1]]) 54 | thet = np.arctan2(diff[1], diff[0]) 55 | rotation = -thet + np.pi / 2 56 | xy = theta_rotation(xy, rotation) 57 | return xy, rotation, center 58 | 59 | 60 | def inverse_scene(xy, rotation, center): 61 | xy = theta_rotation(xy, -rotation) 62 | xy = shift(xy, -center) 63 | return xy 64 | 65 | 66 | def get_args(): 67 | parser = argparse.ArgumentParser(description="TrajNet++ NPY Creator") 68 | parser.add_argument("--output-npy-path", type=str, required=True, help="output path to H5 files.") 69 | parser.add_argument("--raw-dataset-path", type=str, required=True, help="raw Dataset path to .../synth_data.") 70 | args = parser.parse_args() 71 | return args 72 | 73 | 74 | def prepare_data(raw_path, out_path): 75 | N = 6 76 | files = [f.split('.')[-2] for f in os.listdir(raw_path) if f.endswith('.ndjson')] 77 | for file in files: 78 | reader = trajnetplusplustools.Reader(raw_path + file + '.ndjson', scene_type='tags') 79 | scene = [(file, s_id, s_tag, xypaths) for s_id, s_tag, xypaths in reader.scenes(sample=1.0)] 80 | 81 | val_len = int(len(scene)*0.1) 82 | train_len = len(scene) - val_len 83 | val_file_data = np.zeros((val_len, 21, N, 2)) 84 | train_file_data = np.zeros((train_len, 21, N, 2)) 85 | largest_num_agents = 0.0 86 | for scene_i, (filename, scene_id, s_tag, curr_scene) in enumerate(scene): 87 | curr_scene = drop_distant(curr_scene, max_num_peds=N) 88 | curr_scene, _, _ = center_scene(curr_scene) 89 | 90 | if curr_scene.shape[1] > largest_num_agents: 91 | largest_num_agents = curr_scene.shape[1] 92 | 93 | if curr_scene.shape[1] < N: 94 | # Need to pad array to have shape 21xNx2 95 | temp_curr_scene = np.zeros((21, N, 2)) 96 | temp_curr_scene[:, :, :] = np.nan 97 | temp_curr_scene[:, :curr_scene.shape[1], :] = curr_scene 98 | curr_scene = temp_curr_scene.copy() 99 | 100 | if scene_i < val_len: 101 | val_file_data[scene_i] = curr_scene 102 | else: 103 | train_file_data[scene_i - val_len] = curr_scene 104 | 105 | np.save(os.path.join(out_path, "val_"+file+".npy"), val_file_data) 106 | np.save(os.path.join(out_path, "train_"+file+".npy"), train_file_data) 107 | del val_file_data 108 | del train_file_data 109 | del scene 110 | del reader 111 | print("FILE", file, "Largest_num_agents", largest_num_agents) 112 | 113 | 114 | if __name__ == "__main__": 115 | args = get_args() 116 | prepare_data(raw_path=args.raw_dataset_path, out_path=args.output_npy_path) 117 | -------------------------------------------------------------------------------- /datasets/trajnetpp/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import glob 4 | from torch.utils.data import Dataset 5 | 6 | 7 | class TrajNetPPDataset(Dataset): 8 | def __init__(self, dset_path, split_name="train"): 9 | self.num_others = 5 10 | self.pred_horizon = 12 11 | self.num_agent_types = 1 # code assuming only one type of agent (pedestrians). 12 | self.in_seq_len = 9 13 | self.predict_yaw = False 14 | self.map_attr = 0 # dummy 15 | self.k_attr = 2 16 | 17 | dset_fnames = sorted(glob.glob(os.path.join(dset_path, split_name+"_*.npy"))) 18 | agents_dataset = [] 19 | for dset_fname in dset_fnames: 20 | agents_dataset.append(np.load(dset_fname)) 21 | self.agents_dataset = np.concatenate(agents_dataset)[:, :, :self.num_others+1] 22 | del agents_dataset 23 | 24 | def __getitem__(self, idx: int): 25 | data = self.agents_dataset[idx] 26 | 27 | # Remove nan values and add mask column to state 28 | data_mask = np.ones((data.shape[0], data.shape[1], 3)) 29 | data_mask[:, :, :2] = data 30 | nan_indices = np.where(np.isnan(data[:, :, 0])) 31 | data_mask[nan_indices] = [0, 0, 0] 32 | 33 | # Separate past and future. 34 | agents_in = data_mask[:self.in_seq_len] 35 | agents_out = data_mask[self.in_seq_len:] 36 | 37 | ego_in = agents_in[:, 0] 38 | ego_out = agents_out[:, 0] 39 | 40 | agent_types = np.ones((self.num_others + 1, self.num_agent_types)) 41 | roads = np.ones((1, 1)) # for dataloading to work with other datasets that have images. 42 | 43 | return ego_in, ego_out, agents_in[:, 1:], agents_out[:, 1:], roads, agent_types 44 | 45 | def __len__(self): 46 | return len(self.agents_dataset) 47 | 48 | -------------------------------------------------------------------------------- /evaluate.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import torch 4 | from datasets.argoverse.dataset import ArgoH5Dataset 5 | from datasets.interaction_dataset.dataset import InteractionDataset 6 | from datasets.nuscenes.dataset import NuscenesH5Dataset 7 | from datasets.trajnetpp.dataset import TrajNetPPDataset 8 | from models.autobot_ego import AutoBotEgo 9 | from models.autobot_joint import AutoBotJoint 10 | from process_args import get_eval_args 11 | from utils.metric_helpers import min_xde_K, yaw_from_predictions, interpolate_trajectories, collisions_for_inter_dataset 12 | 13 | 14 | class Evaluator: 15 | def __init__(self, args, model_config, model_dirname): 16 | self.args = args 17 | self.model_config = model_config 18 | self.model_dirname = model_dirname 19 | random.seed(self.model_config.seed) 20 | np.random.seed(self.model_config.seed) 21 | torch.manual_seed(self.model_config.seed) 22 | if torch.cuda.is_available() and not self.args.disable_cuda: 23 | self.device = torch.device("cuda") 24 | torch.cuda.manual_seed(self.model_config.seed) 25 | else: 26 | self.device = torch.device("cpu") 27 | 28 | self.interact_eval = False # for evaluating on the interaction dataset, we need this bool. 29 | self.initialize_dataloader() 30 | self.initialize_model() 31 | 32 | def initialize_dataloader(self): 33 | if "Nuscenes" in self.model_config.dataset: 34 | val_dset = NuscenesH5Dataset(dset_path=self.args.dataset_path, split_name="val", 35 | model_type=self.model_config.model_type, 36 | use_map_img=self.model_config.use_map_image, 37 | use_map_lanes=self.model_config.use_map_lanes) 38 | 39 | elif "interaction-dataset" in self.model_config.dataset: 40 | val_dset = InteractionDataset(dset_path=self.args.dataset_path, split_name="val", 41 | use_map_lanes=self.model_config.use_map_lanes, evaluation=True) 42 | self.interact_eval = True 43 | 44 | elif "trajnet++" in self.model_config.dataset: 45 | val_dset = TrajNetPPDataset(dset_path=self.model_config.dataset_path, split_name="val") 46 | 47 | elif "Argoverse" in self.model_config.dataset: 48 | val_dset = ArgoH5Dataset(dset_path=self.args.dataset_path, split_name="val", 49 | use_map_lanes=self.model_config.use_map_lanes) 50 | 51 | else: 52 | raise NotImplementedError 53 | 54 | self.num_other_agents = val_dset.num_others 55 | self.pred_horizon = val_dset.pred_horizon 56 | self.k_attr = val_dset.k_attr 57 | self.map_attr = val_dset.map_attr 58 | self.predict_yaw = val_dset.predict_yaw 59 | if "Joint" in self.model_config.model_type: 60 | self.num_agent_types = val_dset.num_agent_types 61 | 62 | self.val_loader = torch.utils.data.DataLoader( 63 | val_dset, batch_size=self.args.batch_size, shuffle=True, num_workers=12, drop_last=False, 64 | pin_memory=False 65 | ) 66 | 67 | print("Val dataset loaded with length", len(val_dset)) 68 | 69 | def initialize_model(self): 70 | if "Ego" in self.model_config.model_type: 71 | self.autobot_model = AutoBotEgo(k_attr=self.k_attr, 72 | d_k=self.model_config.hidden_size, 73 | _M=self.num_other_agents, 74 | c=self.model_config.num_modes, 75 | T=self.pred_horizon, 76 | L_enc=self.model_config.num_encoder_layers, 77 | dropout=self.model_config.dropout, 78 | num_heads=self.model_config.tx_num_heads, 79 | L_dec=self.model_config.num_decoder_layers, 80 | tx_hidden_size=self.model_config.tx_hidden_size, 81 | use_map_img=self.model_config.use_map_image, 82 | use_map_lanes=self.model_config.use_map_lanes, 83 | map_attr=self.map_attr).to(self.device) 84 | 85 | elif "Joint" in self.model_config.model_type: 86 | self.autobot_model = AutoBotJoint(k_attr=self.k_attr, 87 | d_k=self.model_config.hidden_size, 88 | _M=self.num_other_agents, 89 | c=self.model_config.num_modes, 90 | T=self.pred_horizon, 91 | L_enc=self.model_config.num_encoder_layers, 92 | dropout=self.model_config.dropout, 93 | num_heads=self.model_config.tx_num_heads, 94 | L_dec=self.model_config.num_decoder_layers, 95 | tx_hidden_size=self.model_config.tx_hidden_size, 96 | use_map_lanes=self.model_config.use_map_lanes, 97 | map_attr=self.map_attr, 98 | num_agent_types=self.num_agent_types, 99 | predict_yaw=self.predict_yaw).to(self.device) 100 | else: 101 | raise NotImplementedError 102 | 103 | model_dicts = torch.load(self.args.models_path, map_location=self.device) 104 | self.autobot_model.load_state_dict(model_dicts["AutoBot"]) 105 | self.autobot_model.eval() 106 | 107 | model_parameters = filter(lambda p: p.requires_grad, self.autobot_model.parameters()) 108 | num_params = sum([np.prod(p.size()) for p in model_parameters]) 109 | print("Number of Model Parameters:", num_params) 110 | 111 | def _data_to_device(self, data): 112 | if "Joint" in self.model_config.model_type: 113 | ego_in, ego_out, agents_in, agents_out, context_img, agent_types = data 114 | ego_in = ego_in.float().to(self.device) 115 | ego_out = ego_out.float().to(self.device) 116 | agents_in = agents_in.float().to(self.device) 117 | agents_out = agents_out.float().to(self.device) 118 | context_img = context_img.float().to(self.device) 119 | agent_types = agent_types.float().to(self.device) 120 | return ego_in, ego_out, agents_in, agents_out, context_img, agent_types 121 | 122 | elif "Ego" in self.model_config.model_type: 123 | ego_in, ego_out, agents_in, roads = data 124 | ego_in = ego_in.float().to(self.device) 125 | ego_out = ego_out.float().to(self.device) 126 | agents_in = agents_in.float().to(self.device) 127 | roads = roads.float().to(self.device) 128 | return ego_in, ego_out, agents_in, roads 129 | 130 | def _compute_ego_errors(self, ego_preds, ego_gt): 131 | ego_gt = ego_gt.transpose(0, 1).unsqueeze(0) 132 | ade_losses = torch.mean(torch.norm(ego_preds[:, :, :, :2] - ego_gt[:, :, :, :2], 2, dim=-1), dim=1).transpose(0, 1).cpu().numpy() 133 | fde_losses = torch.norm(ego_preds[:, -1, :, :2] - ego_gt[:, -1, :, :2], 2, dim=-1).transpose(0, 1).cpu().numpy() 134 | return ade_losses, fde_losses 135 | 136 | def _compute_marginal_errors(self, preds, ego_gt, agents_gt, agents_in): 137 | agent_masks = torch.cat((torch.ones((len(agents_in), 1)).to(self.device), agents_in[:, -1, :, -1]), dim=-1).view(1, 1, len(agents_in), -1) 138 | agent_masks[agent_masks == 0] = float('nan') 139 | agents_gt = torch.cat((ego_gt.unsqueeze(2), agents_gt), dim=2).unsqueeze(0).permute(0, 2, 1, 3, 4) 140 | error = torch.norm(preds[:, :, :, :, :2] - agents_gt[:, :, :, :, :2], 2, dim=-1) * agent_masks 141 | ade_losses = np.nanmean(error.cpu().numpy(), axis=1).transpose(1, 2, 0) 142 | fde_losses = error[:, -1].cpu().numpy().transpose(1, 2, 0) 143 | return ade_losses, fde_losses 144 | 145 | def _compute_joint_errors(self, preds, ego_gt, agents_gt): 146 | agents_gt = torch.cat((ego_gt.unsqueeze(2), agents_gt), dim=2) 147 | agents_masks = agents_gt[:, :, :, -1] 148 | agents_masks[agents_masks == 0] = float('nan') 149 | 150 | ade_losses = [] 151 | for k in range(self.model_config.num_modes): 152 | ade_error = (torch.norm(preds[k, :, :, :, :2].transpose(0, 1) - agents_gt[:, :, :, :2], 2, dim=-1) 153 | * agents_masks).cpu().numpy() 154 | ade_error = np.nanmean(ade_error, axis=(1, 2)) 155 | ade_losses.append(ade_error) 156 | ade_losses = np.array(ade_losses).transpose() 157 | 158 | fde_losses = [] 159 | for k in range(self.model_config.num_modes): 160 | fde_error = (torch.norm(preds[k, -1, :, :, :2] - agents_gt[:, -1, :, :2], 2, dim=-1) * agents_masks[:, -1]).cpu().numpy() 161 | fde_error = np.nanmean(fde_error, axis=1) 162 | fde_losses.append(fde_error) 163 | fde_losses = np.array(fde_losses).transpose() 164 | 165 | return ade_losses, fde_losses 166 | 167 | def autobotjoint_evaluate(self): 168 | with torch.no_grad(): 169 | val_marg_ade_losses = [] 170 | val_marg_fde_losses = [] 171 | val_marg_mode_probs = [] 172 | val_scene_ade_losses = [] 173 | val_scene_fde_losses = [] 174 | val_mode_probs = [] 175 | if self.interact_eval: 176 | total_collisions = [] 177 | for i, data in enumerate(self.val_loader): 178 | if i % 50 == 0: 179 | print(i, "/", len(self.val_loader.dataset) // self.args.batch_size) 180 | 181 | if self.interact_eval: 182 | # for the interaction dataset, we have multiple outputs that we use to interpolate, rotate and 183 | # compute scene collisions almost like they do. 184 | orig_ego_in, orig_agents_in, original_roads, translations = data[6:] 185 | data = data[:6] 186 | orig_ego_in = orig_ego_in.float().to(self.device) 187 | orig_agents_in = orig_agents_in.float().to(self.device) 188 | 189 | ego_in, ego_out, agents_in, agents_out, context_img, agent_types = self._data_to_device(data) 190 | pred_obs, mode_probs = self.autobot_model(ego_in, agents_in, context_img, agent_types) 191 | 192 | if self.interact_eval: 193 | pred_obs = interpolate_trajectories(pred_obs) 194 | pred_obs = yaw_from_predictions(pred_obs, orig_ego_in, orig_agents_in) 195 | scene_collisions, pred_obs, vehicles_only = collisions_for_inter_dataset(pred_obs.cpu().numpy(), 196 | agent_types.cpu().numpy(), 197 | orig_ego_in.cpu().numpy(), 198 | orig_agents_in.cpu().numpy(), 199 | translations.cpu().numpy(), 200 | device=self.device) 201 | total_collisions.append(scene_collisions) 202 | 203 | # Marginal metrics 204 | ade_losses, fde_losses = self._compute_marginal_errors(pred_obs, ego_out, agents_out, agents_in) 205 | val_marg_ade_losses.append(ade_losses.reshape(-1, self.model_config.num_modes)) 206 | val_marg_fde_losses.append(fde_losses.reshape(-1, self.model_config.num_modes)) 207 | val_marg_mode_probs.append( 208 | mode_probs.unsqueeze(1).repeat(1, self.num_other_agents + 1, 1).detach().cpu().numpy().reshape( 209 | -1, self.model_config.num_modes)) 210 | 211 | # Joint metrics 212 | scene_ade_losses, scene_fde_losses = self._compute_joint_errors(pred_obs, ego_out, agents_out) 213 | val_scene_ade_losses.append(scene_ade_losses) 214 | val_scene_fde_losses.append(scene_fde_losses) 215 | val_mode_probs.append(mode_probs.detach().cpu().numpy()) 216 | 217 | val_marg_ade_losses = np.concatenate(val_marg_ade_losses) 218 | val_marg_fde_losses = np.concatenate(val_marg_fde_losses) 219 | val_marg_mode_probs = np.concatenate(val_marg_mode_probs) 220 | 221 | val_scene_ade_losses = np.concatenate(val_scene_ade_losses) 222 | val_scene_fde_losses = np.concatenate(val_scene_fde_losses) 223 | val_mode_probs = np.concatenate(val_mode_probs) 224 | 225 | val_minade_c = min_xde_K(val_marg_ade_losses, val_marg_mode_probs, K=self.model_config.num_modes) 226 | val_minfde_c = min_xde_K(val_marg_fde_losses, val_marg_mode_probs, K=self.model_config.num_modes) 227 | val_sminade_c = min_xde_K(val_scene_ade_losses, val_mode_probs, K=self.model_config.num_modes) 228 | val_sminfde_c = min_xde_K(val_scene_fde_losses, val_mode_probs, K=self.model_config.num_modes) 229 | 230 | print("Marg. minADE c:", val_minade_c[0], "Marg. minFDE c:", val_minfde_c[0]) 231 | print("Scene minADE c:", val_sminade_c[0], "Scene minFDE c:", val_sminfde_c[0]) 232 | 233 | if self.interact_eval: 234 | total_collisions = np.concatenate(total_collisions).mean() 235 | print("Scene Collision Rate", total_collisions) 236 | 237 | def autobotego_evaluate(self): 238 | with torch.no_grad(): 239 | val_ade_losses = [] 240 | val_fde_losses = [] 241 | val_mode_probs = [] 242 | for i, data in enumerate(self.val_loader): 243 | if i % 50 == 0: 244 | print(i, "/", len(self.val_loader.dataset) // self.args.batch_size) 245 | ego_in, ego_out, agents_in, roads = self._data_to_device(data) 246 | 247 | # encode observations 248 | pred_obs, mode_probs = self.autobot_model(ego_in, agents_in, roads) 249 | 250 | ade_losses, fde_losses = self._compute_ego_errors(pred_obs, ego_out) 251 | val_ade_losses.append(ade_losses) 252 | val_fde_losses.append(fde_losses) 253 | val_mode_probs.append(mode_probs.detach().cpu().numpy()) 254 | 255 | val_ade_losses = np.concatenate(val_ade_losses) 256 | val_fde_losses = np.concatenate(val_fde_losses) 257 | val_mode_probs = np.concatenate(val_mode_probs) 258 | val_minade_c = min_xde_K(val_ade_losses, val_mode_probs, K=self.model_config.num_modes) 259 | val_minade_10 = min_xde_K(val_ade_losses, val_mode_probs, K=min(self.model_config.num_modes, 10)) 260 | val_minade_5 = min_xde_K(val_ade_losses, val_mode_probs, K=5) 261 | val_minfde_c = min_xde_K(val_fde_losses, val_mode_probs, K=self.model_config.num_modes) 262 | val_minfde_1 = min_xde_K(val_fde_losses, val_mode_probs, K=1) 263 | 264 | print("minADE_{}:".format(self.model_config.num_modes), val_minade_c[0], 265 | "minADE_10", val_minade_10[0], "minADE_5", val_minade_5[0], 266 | "minFDE_{}:".format(self.model_config.num_modes), val_minfde_c[0], "minFDE_1:", val_minfde_1[0]) 267 | 268 | def evaluate(self): 269 | if "Joint" in self.model_config.model_type: 270 | self.autobotjoint_evaluate() 271 | elif "Ego" in self.model_config.model_type: 272 | self.autobotego_evaluate() 273 | else: 274 | raise NotImplementedError 275 | 276 | 277 | if __name__ == '__main__': 278 | args, config, model_dirname = get_eval_args() 279 | evaluator = Evaluator(args, config, model_dirname) 280 | evaluator.evaluate() 281 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roggirg/AutoBots/0ceb8f7f4c6e011ed0af879bc9cc3e9c1ad53351/models/__init__.py -------------------------------------------------------------------------------- /models/autobot_ego.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | from models.context_encoders import MapEncoderCNN, MapEncoderPts 8 | 9 | 10 | def init(module, weight_init, bias_init, gain=1): 11 | ''' 12 | This function provides weight and bias initializations for linear layers. 13 | ''' 14 | weight_init(module.weight.data, gain=gain) 15 | bias_init(module.bias.data) 16 | return module 17 | 18 | 19 | class PositionalEncoding(nn.Module): 20 | ''' 21 | Standard positional encoding. 22 | ''' 23 | def __init__(self, d_model, dropout=0.1, max_len=20): 24 | super(PositionalEncoding, self).__init__() 25 | self.dropout = nn.Dropout(p=dropout) 26 | pe = torch.zeros(max_len, d_model) 27 | position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) 28 | div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)) 29 | pe[:, 0::2] = torch.sin(position * div_term) 30 | pe[:, 1::2] = torch.cos(position * div_term) 31 | pe = pe.unsqueeze(0).transpose(0, 1) 32 | self.register_buffer('pe', pe) 33 | 34 | def forward(self, x): 35 | ''' 36 | :param x: must be (T, B, H) 37 | :return: 38 | ''' 39 | x = x + self.pe[:x.size(0), :] 40 | return self.dropout(x) 41 | 42 | 43 | class OutputModel(nn.Module): 44 | ''' 45 | This class operates on the output of AutoBot-Ego's decoder representation. It produces the parameters of a 46 | bivariate Gaussian distribution. 47 | ''' 48 | def __init__(self, d_k=64): 49 | super(OutputModel, self).__init__() 50 | self.d_k = d_k 51 | init_ = lambda m: init(m, nn.init.xavier_normal_, lambda x: nn.init.constant_(x, 0), np.sqrt(2)) 52 | self.observation_model = nn.Sequential( 53 | init_(nn.Linear(d_k, d_k)), nn.ReLU(), 54 | init_(nn.Linear(d_k, d_k)), nn.ReLU(), 55 | init_(nn.Linear(d_k, 5)) 56 | ) 57 | self.min_stdev = 0.01 58 | 59 | def forward(self, agent_decoder_state): 60 | T = agent_decoder_state.shape[0] 61 | BK = agent_decoder_state.shape[1] 62 | pred_obs = self.observation_model(agent_decoder_state.reshape(-1, self.d_k)).reshape(T, BK, -1) 63 | 64 | x_mean = pred_obs[:, :, 0] 65 | y_mean = pred_obs[:, :, 1] 66 | x_sigma = F.softplus(pred_obs[:, :, 2]) + self.min_stdev 67 | y_sigma = F.softplus(pred_obs[:, :, 3]) + self.min_stdev 68 | rho = torch.tanh(pred_obs[:, :, 4]) * 0.9 # for stability 69 | return torch.stack([x_mean, y_mean, x_sigma, y_sigma, rho], dim=2) 70 | 71 | 72 | class AutoBotEgo(nn.Module): 73 | ''' 74 | AutoBot-Ego Class. 75 | ''' 76 | def __init__(self, d_k=128, _M=5, c=5, T=30, L_enc=1, dropout=0.0, k_attr=2, map_attr=3, 77 | num_heads=16, L_dec=1, tx_hidden_size=384, use_map_img=False, use_map_lanes=False): 78 | super(AutoBotEgo, self).__init__() 79 | 80 | init_ = lambda m: init(m, nn.init.xavier_normal_, lambda x: nn.init.constant_(x, 0), np.sqrt(2)) 81 | 82 | self.map_attr = map_attr 83 | self.k_attr = k_attr 84 | self.d_k = d_k 85 | self._M = _M # num agents without the ego-agent 86 | self.c = c 87 | self.T = T 88 | self.L_enc = L_enc 89 | self.dropout = dropout 90 | self.num_heads = num_heads 91 | self.L_dec= L_dec 92 | self.tx_hidden_size = tx_hidden_size 93 | self.use_map_img = use_map_img 94 | self.use_map_lanes = use_map_lanes 95 | 96 | # INPUT ENCODERS 97 | self.agents_dynamic_encoder = nn.Sequential(init_(nn.Linear(k_attr, d_k))) 98 | 99 | # ============================== AutoBot-Ego ENCODER ============================== 100 | self.social_attn_layers = [] 101 | self.temporal_attn_layers = [] 102 | for _ in range(self.L_enc): 103 | tx_encoder_layer = nn.TransformerEncoderLayer(d_model=d_k, nhead=self.num_heads, dropout=self.dropout, 104 | dim_feedforward=self.tx_hidden_size) 105 | self.social_attn_layers.append(nn.TransformerEncoder(tx_encoder_layer, num_layers=1)) 106 | 107 | tx_encoder_layer = nn.TransformerEncoderLayer(d_model=d_k, nhead=self.num_heads, dropout=self.dropout, 108 | dim_feedforward=self.tx_hidden_size) 109 | self.temporal_attn_layers.append(nn.TransformerEncoder(tx_encoder_layer, num_layers=1)) 110 | 111 | self.temporal_attn_layers = nn.ModuleList(self.temporal_attn_layers) 112 | self.social_attn_layers = nn.ModuleList(self.social_attn_layers) 113 | 114 | # ============================== MAP ENCODER ========================== 115 | if self.use_map_img: 116 | self.map_encoder = MapEncoderCNN(d_k=d_k, dropout=self.dropout) 117 | self.emb_state_map = nn.Sequential( 118 | init_(nn.Linear(2 * d_k, d_k)), nn.ReLU(), 119 | init_(nn.Linear(d_k, d_k)) 120 | ) 121 | elif self.use_map_lanes: 122 | self.map_encoder = MapEncoderPts(d_k=d_k, map_attr=map_attr, dropout=self.dropout) 123 | self.map_attn_layers = nn.MultiheadAttention(self.d_k, num_heads=self.num_heads, dropout=0.3) 124 | 125 | # ============================== AutoBot-Ego DECODER ============================== 126 | self.Q = nn.Parameter(torch.Tensor(self.T, 1, self.c, self.d_k), requires_grad=True) 127 | nn.init.xavier_uniform_(self.Q) 128 | 129 | self.tx_decoder = [] 130 | for _ in range(self.L_dec): 131 | self.tx_decoder.append(nn.TransformerDecoderLayer(d_model=self.d_k, nhead=self.num_heads, 132 | dropout=self.dropout, 133 | dim_feedforward=self.tx_hidden_size)) 134 | self.tx_decoder = nn.ModuleList(self.tx_decoder) 135 | 136 | # ============================== Positional encoder ============================== 137 | self.pos_encoder = PositionalEncoding(d_k, dropout=0.0) 138 | 139 | # ============================== OUTPUT MODEL ============================== 140 | self.output_model = OutputModel(d_k=self.d_k) 141 | 142 | # ============================== Mode Prob prediction (P(z|X_1:t)) ============================== 143 | self.P = nn.Parameter(torch.Tensor(c, 1, d_k), requires_grad=True) # Appendix C.2. 144 | nn.init.xavier_uniform_(self.P) 145 | 146 | if self.use_map_img: 147 | self.modemap_net = nn.Sequential( 148 | init_(nn.Linear(2*self.d_k, self.d_k)), nn.ReLU(), 149 | init_(nn.Linear(self.d_k, self.d_k)) 150 | ) 151 | elif self.use_map_lanes: 152 | self.mode_map_attn = nn.MultiheadAttention(self.d_k, num_heads=self.num_heads) 153 | 154 | self.prob_decoder = nn.MultiheadAttention(self.d_k, num_heads=self.num_heads, dropout=self.dropout) 155 | self.prob_predictor = init_(nn.Linear(self.d_k, 1)) 156 | 157 | self.train() 158 | 159 | def generate_decoder_mask(self, seq_len, device): 160 | ''' For masking out the subsequent info. ''' 161 | subsequent_mask = (torch.triu(torch.ones((seq_len, seq_len), device=device), diagonal=1)).bool() 162 | return subsequent_mask 163 | 164 | def process_observations(self, ego, agents): 165 | ''' 166 | :param observations: (B, T, N+2, A+1) where N+2 is [ego, other_agents, env] 167 | :return: a tensor of only the agent dynamic states, active_agent masks and env masks. 168 | ''' 169 | # ego stuff 170 | ego_tensor = ego[:, :, :self.k_attr] 171 | env_masks_orig = ego[:, :, -1] 172 | env_masks = (1.0 - env_masks_orig).type(torch.BoolTensor).to(env_masks_orig.device) 173 | env_masks = env_masks.unsqueeze(1).repeat(1, self.c, 1).view(ego.shape[0] * self.c, -1) 174 | 175 | # Agents stuff 176 | temp_masks = torch.cat((torch.ones_like(env_masks_orig.unsqueeze(-1)), agents[:, :, :, -1]), dim=-1) 177 | opps_masks = (1.0 - temp_masks).type(torch.BoolTensor).to(agents.device) # only for agents. 178 | opps_tensor = agents[:, :, :, :self.k_attr] # only opponent states 179 | 180 | return ego_tensor, opps_tensor, opps_masks, env_masks 181 | 182 | def temporal_attn_fn(self, agents_emb, agent_masks, layer): 183 | ''' 184 | :param agents_emb: (T, B, N, H) 185 | :param agent_masks: (B, T, N) 186 | :return: (T, B, N, H) 187 | ''' 188 | T_obs = agents_emb.size(0) 189 | B = agent_masks.size(0) 190 | num_agents = agent_masks.size(2) 191 | temp_masks = agent_masks.permute(0, 2, 1).reshape(-1, T_obs) 192 | temp_masks[:, -1][temp_masks.sum(-1) == T_obs] = False # Ensure that agent's that don't exist don't make NaN. 193 | agents_temp_emb = layer(self.pos_encoder(agents_emb.reshape(T_obs, B * (num_agents), -1)), 194 | src_key_padding_mask=temp_masks) 195 | return agents_temp_emb.view(T_obs, B, num_agents, -1) 196 | 197 | def social_attn_fn(self, agents_emb, agent_masks, layer): 198 | ''' 199 | :param agents_emb: (T, B, N, H) 200 | :param agent_masks: (B, T, N) 201 | :return: (T, B, N, H) 202 | ''' 203 | T_obs = agents_emb.size(0) 204 | B = agent_masks.size(0) 205 | agents_emb = agents_emb.permute(2, 1, 0, 3).reshape(self._M + 1, B * T_obs, -1) 206 | agents_soc_emb = layer(agents_emb, src_key_padding_mask=agent_masks.view(-1, self._M+1)) 207 | agents_soc_emb = agents_soc_emb.view(self._M+1, B, T_obs, -1).permute(2, 1, 0, 3) 208 | return agents_soc_emb 209 | 210 | def forward(self, ego_in, agents_in, roads): 211 | ''' 212 | :param ego_in: [B, T_obs, k_attr+1] with last values being the existence mask. 213 | :param agents_in: [B, T_obs, M-1, k_attr+1] with last values being the existence mask. 214 | :param roads: [B, S, P, map_attr+1] representing the road network if self.use_map_lanes or 215 | [B, 3, 128, 128] image representing the road network if self.use_map_img or 216 | [B, 1, 1] if self.use_map_lanes and self.use_map_img are False. 217 | :return: 218 | pred_obs: shape [c, T, B, 5] c trajectories for the ego agents with every point being the params of 219 | Bivariate Gaussian distribution. 220 | mode_probs: shape [B, c] mode probability predictions P(z|X_{1:T_obs}) 221 | ''' 222 | B = ego_in.size(0) 223 | 224 | # Encode all input observations (k_attr --> d_k) 225 | ego_tensor, _agents_tensor, opps_masks, env_masks = self.process_observations(ego_in, agents_in) 226 | agents_tensor = torch.cat((ego_tensor.unsqueeze(2), _agents_tensor), dim=2) 227 | agents_emb = self.agents_dynamic_encoder(agents_tensor).permute(1, 0, 2, 3) 228 | 229 | # Process through AutoBot's encoder 230 | for i in range(self.L_enc): 231 | agents_emb = self.temporal_attn_fn(agents_emb, opps_masks, layer=self.temporal_attn_layers[i]) 232 | agents_emb = self.social_attn_fn(agents_emb, opps_masks, layer=self.social_attn_layers[i]) 233 | ego_soctemp_emb = agents_emb[:, :, 0] # take ego-agent encodings only. 234 | 235 | # Process map information 236 | if self.use_map_img: 237 | orig_map_features = self.map_encoder(roads) 238 | map_features = orig_map_features.view(B * self.c, -1).unsqueeze(0).repeat(self.T, 1, 1) 239 | elif self.use_map_lanes: 240 | orig_map_features, orig_road_segs_masks = self.map_encoder(roads, ego_soctemp_emb) 241 | map_features = orig_map_features.unsqueeze(2).repeat(1, 1, self.c, 1).view(-1, B*self.c, self.d_k) 242 | road_segs_masks = orig_road_segs_masks.unsqueeze(1).repeat(1, self.c, 1).view(B*self.c, -1) 243 | 244 | # Repeat the tensors for the number of modes for efficient forward pass. 245 | context = ego_soctemp_emb.unsqueeze(2).repeat(1, 1, self.c, 1) 246 | context = context.view(-1, B*self.c, self.d_k) 247 | 248 | # AutoBot-Ego Decoding 249 | out_seq = self.Q.repeat(1, B, 1, 1).view(self.T, B*self.c, -1) 250 | time_masks = self.generate_decoder_mask(seq_len=self.T, device=ego_in.device) 251 | for d in range(self.L_dec): 252 | if self.use_map_img and d == 1: 253 | ego_dec_emb_map = torch.cat((out_seq, map_features), dim=-1) 254 | out_seq = self.emb_state_map(ego_dec_emb_map) + out_seq 255 | elif self.use_map_lanes and d == 1: 256 | ego_dec_emb_map = self.map_attn_layers(query=out_seq, key=map_features, value=map_features, 257 | key_padding_mask=road_segs_masks)[0] 258 | out_seq = out_seq + ego_dec_emb_map 259 | out_seq = self.tx_decoder[d](out_seq, context, tgt_mask=time_masks, memory_key_padding_mask=env_masks) 260 | out_dists = self.output_model(out_seq).reshape(self.T, B, self.c, -1).permute(2, 0, 1, 3) 261 | 262 | # Mode prediction 263 | mode_params_emb = self.P.repeat(1, B, 1) 264 | mode_params_emb = self.prob_decoder(query=mode_params_emb, key=ego_soctemp_emb, value=ego_soctemp_emb)[0] 265 | if self.use_map_img: 266 | mode_params_emb = self.modemap_net(torch.cat((mode_params_emb, orig_map_features.transpose(0, 1)), dim=-1)) 267 | elif self.use_map_lanes: 268 | mode_params_emb = self.mode_map_attn(query=mode_params_emb, key=orig_map_features, value=orig_map_features, 269 | key_padding_mask=orig_road_segs_masks)[0] + mode_params_emb 270 | mode_probs = F.softmax(self.prob_predictor(mode_params_emb).squeeze(-1), dim=0).transpose(0, 1) 271 | 272 | # return [c, T, B, 5], [B, c] 273 | return out_dists, mode_probs 274 | 275 | -------------------------------------------------------------------------------- /models/autobot_joint.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | from models.context_encoders import MapEncoderPtsMA 9 | 10 | 11 | def init(module, weight_init, bias_init, gain=1): 12 | weight_init(module.weight.data, gain=gain) 13 | bias_init(module.bias.data) 14 | return module 15 | 16 | 17 | class PositionalEncoding(nn.Module): 18 | def __init__(self, d_model, dropout=0.1, max_len=20): 19 | super(PositionalEncoding, self).__init__() 20 | self.dropout = nn.Dropout(p=dropout) 21 | pe = torch.zeros(max_len, d_model) 22 | position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) 23 | div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)) 24 | pe[:, 0::2] = torch.sin(position * div_term) 25 | pe[:, 1::2] = torch.cos(position * div_term) 26 | pe = pe.unsqueeze(0).transpose(0, 1) 27 | self.register_buffer('pe', pe) 28 | 29 | def forward(self, x): 30 | ''' 31 | :param x: must be (T, B, H) 32 | :return: 33 | ''' 34 | x = x + self.pe[:x.size(0), :] 35 | return self.dropout(x) 36 | 37 | 38 | class OutputModel(nn.Module): 39 | ''' 40 | This class operates on the output of AutoBot-Joint's decoder representation. It produces the parameters of a 41 | bivariate Gaussian distribution and possibly predicts the yaw. 42 | ''' 43 | def __init__(self, d_k=64, predict_yaw=False): 44 | super(OutputModel, self).__init__() 45 | self.d_k = d_k 46 | self.predict_yaw = predict_yaw 47 | out_len = 5 48 | if predict_yaw: 49 | out_len = 6 50 | 51 | init_ = lambda m: init(m, nn.init.xavier_normal_, lambda x: nn.init.constant_(x, 0), np.sqrt(2)) 52 | self.observation_model = nn.Sequential( 53 | init_(nn.Linear(self.d_k, self.d_k)), nn.ReLU(), 54 | init_(nn.Linear(self.d_k, self.d_k)), nn.ReLU(), 55 | init_(nn.Linear(self.d_k, out_len)) 56 | ) 57 | self.min_stdev = 0.01 58 | 59 | def forward(self, agent_latent_state): 60 | T = agent_latent_state.shape[0] 61 | BK = agent_latent_state.shape[1] 62 | pred_obs = self.observation_model(agent_latent_state.reshape(-1, self.d_k)).reshape(T, BK, -1) 63 | x_mean = pred_obs[:, :, 0] 64 | y_mean = pred_obs[:, :, 1] 65 | x_sigma = F.softplus(pred_obs[:, :, 2]) + self.min_stdev 66 | y_sigma = F.softplus(pred_obs[:, :, 3]) + self.min_stdev 67 | rho = torch.tanh(pred_obs[:, :, 4]) * 0.9 # for stability 68 | if self.predict_yaw: 69 | yaws = pred_obs[:, :, 5] # for stability 70 | return torch.stack([x_mean, y_mean, x_sigma, y_sigma, rho, yaws], dim=2) 71 | else: 72 | return torch.stack([x_mean, y_mean, x_sigma, y_sigma, rho], dim=2) 73 | 74 | 75 | class AutoBotJoint(nn.Module): 76 | ''' 77 | AutoBot-Joint Class. 78 | ''' 79 | def __init__(self, d_k=128, _M=5, c=5, T=30, L_enc=1, dropout=0.0, k_attr=2, map_attr=3, num_heads=16, L_dec=1, 80 | tx_hidden_size=384, use_map_lanes=False, num_agent_types=None, predict_yaw=False): 81 | super(AutoBotJoint, self).__init__() 82 | 83 | init_ = lambda m: init(m, nn.init.xavier_normal_, lambda x: nn.init.constant_(x, 0), np.sqrt(2)) 84 | 85 | self.k_attr = k_attr 86 | self.map_attr = map_attr 87 | self.d_k = d_k 88 | self._M = _M # num agents other then the main agent. 89 | self.c = c 90 | self.T = T 91 | self.L_enc = L_enc 92 | self.dropout = dropout 93 | self.num_heads = num_heads 94 | self.L_dec = L_dec 95 | self.tx_hidden_size = tx_hidden_size 96 | self.use_map_lanes = use_map_lanes 97 | self.predict_yaw = predict_yaw 98 | 99 | # INPUT ENCODERS 100 | self.agents_dynamic_encoder = nn.Sequential(init_(nn.Linear(self.k_attr, self.d_k))) 101 | 102 | # ============================== AutoBot-Joint ENCODER ============================== 103 | self.social_attn_layers = [] 104 | self.temporal_attn_layers = [] 105 | for _ in range(self.L_enc): 106 | tx_encoder_layer = nn.TransformerEncoderLayer(d_model=self.d_k, nhead=self.num_heads, 107 | dropout=self.dropout, dim_feedforward=self.tx_hidden_size) 108 | self.temporal_attn_layers.append(nn.TransformerEncoder(tx_encoder_layer, num_layers=2)) 109 | 110 | tx_encoder_layer = nn.TransformerEncoderLayer(d_model=self.d_k, nhead=self.num_heads, 111 | dropout=self.dropout, dim_feedforward=self.tx_hidden_size) 112 | self.social_attn_layers.append(nn.TransformerEncoder(tx_encoder_layer, num_layers=1)) 113 | 114 | self.temporal_attn_layers = nn.ModuleList(self.temporal_attn_layers) 115 | self.social_attn_layers = nn.ModuleList(self.social_attn_layers) 116 | 117 | # ============================== MAP ENCODER ========================== 118 | if self.use_map_lanes: 119 | self.map_encoder = MapEncoderPtsMA(d_k=self.d_k, map_attr=self.map_attr, dropout=self.dropout) 120 | self.map_attn_layers = nn.MultiheadAttention(self.d_k, num_heads=self.num_heads, dropout=self.dropout) 121 | 122 | # ============================== AGENT TYPES Encoders ============================== 123 | self.emb_agent_types = nn.Sequential(init_(nn.Linear(num_agent_types, self.d_k))) 124 | self.dec_agenttypes_encoder = nn.Sequential( 125 | init_(nn.Linear(2 * self.d_k, self.d_k)), nn.ReLU(), 126 | init_(nn.Linear(self.d_k, self.d_k)) 127 | ) 128 | 129 | # ============================== AutoBot-Joint DECODER ============================== 130 | self.Q = nn.Parameter(torch.Tensor(self.T, 1, self.c, 1, self.d_k), requires_grad=True) 131 | nn.init.xavier_uniform_(self.Q) 132 | 133 | self.social_attn_decoder_layers = [] 134 | self.temporal_attn_decoder_layers = [] 135 | for _ in range(self.L_dec): 136 | tx_decoder_layer = nn.TransformerDecoderLayer(d_model=self.d_k, nhead=self.num_heads, 137 | dropout=self.dropout, dim_feedforward=self.tx_hidden_size) 138 | self.temporal_attn_decoder_layers.append(nn.TransformerDecoder(tx_decoder_layer, num_layers=2)) 139 | tx_encoder_layer = nn.TransformerEncoderLayer(d_model=self.d_k, nhead=self.num_heads, 140 | dropout=self.dropout, dim_feedforward=self.tx_hidden_size) 141 | self.social_attn_decoder_layers.append(nn.TransformerEncoder(tx_encoder_layer, num_layers=1)) 142 | 143 | self.temporal_attn_decoder_layers = nn.ModuleList(self.temporal_attn_decoder_layers) 144 | self.social_attn_decoder_layers = nn.ModuleList(self.social_attn_decoder_layers) 145 | 146 | # ============================== Positional encoder ============================== 147 | self.pos_encoder = PositionalEncoding(self.d_k, dropout=0.0) 148 | 149 | # ============================== OUTPUT MODEL ============================== 150 | self.output_model = OutputModel(d_k=self.d_k, predict_yaw=self.predict_yaw) 151 | 152 | # ============================== Mode Prob prediction (P(z|X_1:t)) ============================== 153 | self.P = nn.Parameter(torch.Tensor(c, 1, 1, d_k), requires_grad=True) # Appendix C.2. 154 | nn.init.xavier_uniform_(self.P) 155 | 156 | if self.use_map_lanes: 157 | self.mode_map_attn = nn.MultiheadAttention(self.d_k, num_heads=self.num_heads, dropout=self.dropout) 158 | 159 | self.prob_decoder = nn.MultiheadAttention(self.d_k, num_heads=self.num_heads, dropout=self.dropout) 160 | self.prob_predictor = init_(nn.Linear(self.d_k, 1)) 161 | 162 | self.train() 163 | 164 | def generate_decoder_mask(self, seq_len, device): 165 | ''' For masking out the subsequent info. ''' 166 | subsequent_mask = (torch.triu(torch.ones((seq_len, seq_len), device=device), diagonal=1)).bool() 167 | return subsequent_mask 168 | 169 | def process_observations(self, ego, agents): 170 | # ego stuff 171 | ego_tensor = ego[:, :, :self.k_attr] 172 | env_masks = ego[:, :, -1] 173 | 174 | # Agents stuff 175 | temp_masks = torch.cat((torch.ones_like(env_masks.unsqueeze(-1)), agents[:, :, :, -1]), dim=-1) 176 | opps_masks = (1.0 - temp_masks).type(torch.BoolTensor).to(agents.device) # only for agents. 177 | opps_tensor = agents[:, :, :, :self.k_attr] # only opponent states 178 | 179 | return ego_tensor, opps_tensor, opps_masks, env_masks 180 | 181 | def temporal_attn_fn(self, agents_emb, agent_masks, layer): 182 | ''' 183 | :param agents_emb: (T, B, N, H) 184 | :param agent_masks: (B, T, N) 185 | :return: (T, B, N, H) 186 | ''' 187 | T_obs = agents_emb.size(0) 188 | B = agent_masks.size(0) 189 | agent_masks = agent_masks.permute(0, 2, 1).reshape(-1, T_obs) 190 | agent_masks[:, -1][agent_masks.sum(-1) == T_obs] = False # Ensure agent's that don't exist don't throw NaNs. 191 | agents_temp_emb = layer(self.pos_encoder(agents_emb.reshape(T_obs, B * (self._M + 1), -1)), 192 | src_key_padding_mask=agent_masks) 193 | return agents_temp_emb.view(T_obs, B, self._M+1, -1) 194 | 195 | def social_attn_fn(self, agents_emb, agent_masks, layer): 196 | ''' 197 | :param agents_emb: (T, B, N, H) 198 | :param agent_masks: (B, T, N) 199 | :return: (T, B, N, H) 200 | ''' 201 | T_obs = agents_emb.size(0) 202 | B = agent_masks.size(0) 203 | agents_emb = agents_emb.permute(2, 1, 0, 3).reshape(self._M + 1, B * T_obs, -1) 204 | agents_soc_emb = layer(agents_emb, src_key_padding_mask=agent_masks.view(-1, self._M+1)) 205 | agents_soc_emb = agents_soc_emb.view(self._M+1, B, T_obs, -1).permute(2, 1, 0, 3) 206 | return agents_soc_emb 207 | 208 | def temporal_attn_decoder_fn(self, agents_emb, context, agent_masks, layer): 209 | ''' 210 | :param agents_emb: (T, BK, N, H) 211 | :param context: (T_in, BK, N, H) 212 | :param agent_masks: (BK, T, N) 213 | :return: (T, BK, N, H) 214 | ''' 215 | T_obs = context.size(0) 216 | BK = agent_masks.size(0) 217 | time_masks = self.generate_decoder_mask(seq_len=self.T, device=agents_emb.device) 218 | agent_masks = agent_masks.permute(0, 2, 1).reshape(-1, T_obs) 219 | agent_masks[:, -1][agent_masks.sum(-1) == T_obs] = False # Ensure that agent's that don't exist don't make NaN. 220 | agents_emb = agents_emb.reshape(self.T, -1, self.d_k) # [T, BxKxN, H] 221 | context = context.view(-1, BK*(self._M+1), self.d_k) 222 | 223 | agents_temp_emb = layer(agents_emb, context, tgt_mask=time_masks, memory_key_padding_mask=agent_masks) 224 | agents_temp_emb = agents_temp_emb.view(self.T, BK, self._M+1, -1) 225 | 226 | return agents_temp_emb 227 | 228 | def social_attn_decoder_fn(self, agents_emb, agent_masks, layer): 229 | ''' 230 | :param agents_emb: (T, BK, N, H) 231 | :param agent_masks: (BK, T, N) 232 | :return: (T, BK, N, H) 233 | ''' 234 | B = agent_masks.size(0) 235 | agent_masks = agent_masks[:, -1:].repeat(1, self.T, 1).view(-1, self._M + 1) # take last timestep of all agents. 236 | agents_emb = agents_emb.permute(2, 1, 0, 3).reshape(self._M + 1, B * self.T, -1) 237 | agents_soc_emb = layer(agents_emb, src_key_padding_mask=agent_masks) 238 | agents_soc_emb = agents_soc_emb.view(self._M + 1, B, self.T, -1).permute(2, 1, 0, 3) 239 | return agents_soc_emb 240 | 241 | def forward(self, ego_in, agents_in, roads, agent_types): 242 | ''' 243 | :param ego_in: one agent called ego, shape [B, T_obs, k_attr+1] with last values being the existence mask. 244 | :param agents_in: other scene agents, shape [B, T_obs, M-1, k_attr+1] with last values being the existence mask. 245 | :param roads: [B, M, S, P, map_attr+1] representing the road network or 246 | [B, 1, 1] if self.use_map_lanes is False. 247 | :param agent_types: [B, M, num_agent_types] one-hot encoding of agent types, with the first agent idx being ego. 248 | :return: 249 | pred_obs: shape [c, T, B, M, 5(6)] c trajectories for all agents with every point being the params of 250 | Bivariate Gaussian distribution (and the yaw prediction if self.predict_yaw). 251 | mode_probs: shape [B, c] mode probability predictions P(z|X_{1:T_obs}) 252 | ''' 253 | B = ego_in.size(0) 254 | 255 | # Encode all input observations 256 | ego_tensor, _agents_tensor, opps_masks, env_masks = self.process_observations(ego_in, agents_in) 257 | agents_tensor = torch.cat((ego_tensor.unsqueeze(2), _agents_tensor), dim=2) 258 | agents_emb = self.agents_dynamic_encoder(agents_tensor).permute(1, 0, 2, 3) 259 | 260 | # Process through AutoBot's encoder 261 | for i in range(self.L_enc): 262 | agents_emb = self.temporal_attn_fn(agents_emb, opps_masks, layer=self.temporal_attn_layers[i]) 263 | agents_emb = self.social_attn_fn(agents_emb, opps_masks, layer=self.social_attn_layers[i]) 264 | 265 | # Process map information 266 | if self.use_map_lanes: 267 | orig_map_features, orig_road_segs_masks = self.map_encoder(roads, agents_emb) 268 | map_features = orig_map_features.unsqueeze(2).repeat(1, 1, self.c, 1, 1).view(-1, B * self.c * (self._M+1), self.d_k) 269 | road_segs_masks = orig_road_segs_masks.unsqueeze(2).repeat(1, self.c, 1, 1).view(B * self.c * (self._M+1), -1) 270 | 271 | # Repeat the tensors for the number of modes. 272 | opps_masks_modes = opps_masks.unsqueeze(1).repeat(1, self.c, 1, 1).view(B*self.c, ego_in.shape[1], -1) 273 | context = agents_emb.unsqueeze(2).repeat(1, 1, self.c, 1, 1) 274 | context = context.view(ego_in.shape[1], B*self.c, self._M+1, self.d_k) 275 | 276 | # embed agent types 277 | agent_types_features = self.emb_agent_types(agent_types).unsqueeze(1).\ 278 | repeat(1, self.c, 1, 1).view(-1, self._M+1, self.d_k) 279 | agent_types_features = agent_types_features.unsqueeze(0).repeat(self.T, 1, 1, 1) 280 | 281 | # AutoBot-Joint Decoding 282 | dec_parameters = self.Q.repeat(1, B, 1, self._M+1, 1).view(self.T, B*self.c, self._M+1, -1) 283 | dec_parameters = torch.cat((dec_parameters, agent_types_features), dim=-1) 284 | dec_parameters = self.dec_agenttypes_encoder(dec_parameters) 285 | agents_dec_emb = dec_parameters 286 | 287 | for d in range(self.L_dec): 288 | if self.use_map_lanes and d == 1: 289 | agents_dec_emb = agents_dec_emb.reshape(self.T, -1, self.d_k) 290 | agents_dec_emb_map = self.map_attn_layers(query=agents_dec_emb, key=map_features, value=map_features, 291 | key_padding_mask=road_segs_masks)[0] 292 | agents_dec_emb = agents_dec_emb + agents_dec_emb_map 293 | agents_dec_emb = agents_dec_emb.reshape(self.T, B*self.c, self._M+1, -1) 294 | 295 | agents_dec_emb = self.temporal_attn_decoder_fn(agents_dec_emb, context, opps_masks_modes, layer=self.temporal_attn_decoder_layers[d]) 296 | agents_dec_emb = self.social_attn_decoder_fn(agents_dec_emb, opps_masks_modes, layer=self.social_attn_decoder_layers[d]) 297 | 298 | out_dists = self.output_model(agents_dec_emb.reshape(self.T, -1, self.d_k)) 299 | out_dists = out_dists.reshape(self.T, B, self.c, self._M+1, -1).permute(2, 0, 1, 3, 4) 300 | 301 | # Mode prediction 302 | mode_params_emb = self.P.repeat(1, B, self._M+1, 1).view(self.c, -1, self.d_k) 303 | mode_params_emb = self.prob_decoder(query=mode_params_emb, key=agents_emb.reshape(-1, B*(self._M+1), self.d_k), 304 | value=agents_emb.reshape(-1, B*(self._M+1), self.d_k))[0] 305 | if self.use_map_lanes: 306 | orig_map_features = orig_map_features.view(-1, B*(self._M+1), self.d_k) 307 | orig_road_segs_masks = orig_road_segs_masks.view(B*(self._M+1), -1) 308 | mode_params_emb = self.mode_map_attn(query=mode_params_emb, key=orig_map_features, value=orig_map_features, 309 | key_padding_mask=orig_road_segs_masks)[0] + mode_params_emb 310 | 311 | mode_probs = self.prob_predictor(mode_params_emb).squeeze(-1).view(self.c, B, self._M+1).sum(2).transpose(0, 1) 312 | mode_probs = F.softmax(mode_probs, dim=1) 313 | 314 | # return # [c, T, B, M, 5], [B, c] 315 | return out_dists, mode_probs 316 | 317 | -------------------------------------------------------------------------------- /models/context_encoders.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | 6 | def init(module, weight_init, bias_init, gain=1): 7 | ''' 8 | This function provides weight and bias initializations for linear layers. 9 | ''' 10 | weight_init(module.weight.data, gain=gain) 11 | bias_init(module.bias.data) 12 | return module 13 | 14 | 15 | class MapEncoderCNN(nn.Module): 16 | ''' 17 | Regular CNN encoder for road image. 18 | ''' 19 | def __init__(self, d_k=64, dropout=0.1, c=10): 20 | super(MapEncoderCNN, self).__init__() 21 | self.dropout = dropout 22 | self.c = c 23 | init_ = lambda m: init(m, nn.init.xavier_normal_, lambda x: nn.init.constant_(x, 0), np.sqrt(2)) 24 | # MAP ENCODER 25 | fm_size = 7 26 | self.map_encoder = nn.Sequential( 27 | init_(nn.Conv2d(3, 32, kernel_size=4, stride=1)), nn.ReLU(), 28 | init_(nn.Conv2d(32, 32, kernel_size=4, stride=2)), nn.ReLU(), 29 | init_(nn.Conv2d(32, 32, kernel_size=3, stride=2)), nn.ReLU(), 30 | init_(nn.Conv2d(32, 32, kernel_size=3, stride=2)), nn.ReLU(), 31 | init_(nn.Conv2d(32, fm_size*self.c, kernel_size=2, stride=2)), nn.ReLU(), 32 | nn.Dropout2d(p=self.dropout) 33 | ) 34 | self.map_feats = nn.Sequential( 35 | init_(nn.Linear(7*7*fm_size, d_k)), nn.ReLU(), 36 | init_(nn.Linear(d_k, d_k)), nn.ReLU(), 37 | ) 38 | 39 | def forward(self, roads): 40 | ''' 41 | :param roads: road image with size (B, 128, 128, 3) 42 | :return: road features, with one for every mode (B, c, d_k) 43 | ''' 44 | B = roads.size(0) # batch size 45 | return self.map_feats(self.map_encoder(roads).view(B, self.c, -1)) 46 | 47 | 48 | class MapEncoderPts(nn.Module): 49 | ''' 50 | This class operates on the road lanes provided as a tensor with shape 51 | (B, num_road_segs, num_pts_per_road_seg, k_attr+1) 52 | ''' 53 | def __init__(self, d_k, map_attr=3, dropout=0.1): 54 | super(MapEncoderPts, self).__init__() 55 | self.dropout = dropout 56 | self.d_k = d_k 57 | self.map_attr = map_attr 58 | init_ = lambda m: init(m, nn.init.xavier_normal_, lambda x: nn.init.constant_(x, 0), np.sqrt(2)) 59 | 60 | self.road_pts_lin = nn.Sequential(init_(nn.Linear(map_attr, self.d_k))) 61 | self.road_pts_attn_layer = nn.MultiheadAttention(self.d_k, num_heads=8, dropout=self.dropout) 62 | self.norm1 = nn.LayerNorm(self.d_k, eps=1e-5) 63 | self.norm2 = nn.LayerNorm(self.d_k, eps=1e-5) 64 | self.map_feats = nn.Sequential( 65 | init_(nn.Linear(self.d_k, self.d_k)), nn.ReLU(), nn.Dropout(self.dropout), 66 | init_(nn.Linear(self.d_k, self.d_k)), 67 | ) 68 | 69 | def get_road_pts_mask(self, roads): 70 | road_segment_mask = torch.sum(roads[:, :, :, -1], dim=2) == 0 71 | road_pts_mask = (1.0 - roads[:, :, :, -1]).type(torch.BoolTensor).to(roads.device).view(-1, roads.shape[2]) 72 | road_pts_mask[:, 0][road_pts_mask.sum(-1) == roads.shape[2]] = False # Ensures no NaNs due to empty rows. 73 | return road_segment_mask, road_pts_mask 74 | 75 | def forward(self, roads, agents_emb): 76 | ''' 77 | :param roads: (B, S, P, k_attr+1) where B is batch size, S is num road segments, P is 78 | num pts per road segment. 79 | :param agents_emb: (T_obs, B, d_k) where T_obs is the observation horizon. THis tensor is obtained from 80 | AutoBot's encoder, and basically represents the observed socio-temporal context of agents. 81 | :return: embedded road segments with shape (S) 82 | ''' 83 | B = roads.shape[0] 84 | S = roads.shape[1] 85 | P = roads.shape[2] 86 | road_segment_mask, road_pts_mask = self.get_road_pts_mask(roads) 87 | road_pts_feats = self.road_pts_lin(roads[:, :, :, :self.map_attr]).view(B*S, P, -1).permute(1, 0, 2) 88 | 89 | # Combining information from each road segment using attention with agent contextual embeddings as queries. 90 | agents_emb = agents_emb[-1].unsqueeze(2).repeat(1, 1, S, 1).view(-1, self.d_k).unsqueeze(0) 91 | road_seg_emb = self.road_pts_attn_layer(query=agents_emb, key=road_pts_feats, value=road_pts_feats, 92 | key_padding_mask=road_pts_mask)[0] 93 | road_seg_emb = self.norm1(road_seg_emb) 94 | road_seg_emb2 = road_seg_emb + self.map_feats(road_seg_emb) 95 | road_seg_emb2 = self.norm2(road_seg_emb2) 96 | road_seg_emb = road_seg_emb2.view(B, S, -1) 97 | 98 | return road_seg_emb.permute(1, 0, 2), road_segment_mask 99 | 100 | 101 | class MapEncoderPtsMA(nn.Module): 102 | ''' 103 | This class operates on the multi-agent road lanes provided as a tensor with shape 104 | (B, num_agents, num_road_segs, num_pts_per_road_seg, k_attr+1) 105 | ''' 106 | def __init__(self, d_k, map_attr=3, dropout=0.1): 107 | super(MapEncoderPtsMA, self).__init__() 108 | self.dropout = dropout 109 | self.d_k = d_k 110 | init_ = lambda m: init(m, nn.init.xavier_normal_, lambda x: nn.init.constant_(x, 0), np.sqrt(2)) 111 | 112 | self.map_attr = map_attr 113 | 114 | # Seed parameters for the map 115 | self.map_seeds = nn.Parameter(torch.Tensor(1, 1, self.d_k), requires_grad=True) 116 | nn.init.xavier_uniform_(self.map_seeds) 117 | 118 | self.road_pts_lin = nn.Sequential(init_(nn.Linear(self.map_attr, self.d_k))) 119 | self.road_pts_attn_layer = nn.MultiheadAttention(self.d_k, num_heads=8, dropout=self.dropout) 120 | self.norm1 = nn.LayerNorm(self.d_k, eps=1e-5) 121 | self.norm2 = nn.LayerNorm(self.d_k, eps=1e-5) 122 | self.map_feats = nn.Sequential( 123 | init_(nn.Linear(self.d_k, self.d_k*3)), nn.ReLU(), nn.Dropout(self.dropout), 124 | init_(nn.Linear(self.d_k*3, self.d_k)), 125 | ) 126 | 127 | def get_road_pts_mask(self, roads): 128 | road_segment_mask = torch.sum(roads[:, :, :, :, -1], dim=3) == 0 129 | road_pts_mask = (1.0 - roads[:, :, :, :, -1]).type(torch.BoolTensor).to(roads.device).view(-1, roads.shape[3]) 130 | 131 | # The next lines ensure that we do not obtain NaNs during training for missing agents or for empty roads. 132 | road_pts_mask[:, 0][road_pts_mask.sum(-1) == roads.shape[3]] = False # for empty agents 133 | road_segment_mask[:, :, 0][road_segment_mask.sum(-1) == road_segment_mask.shape[2]] = False # for empty roads 134 | return road_segment_mask, road_pts_mask 135 | 136 | def forward(self, roads, agents_emb): 137 | ''' 138 | :param roads: (B, M, S, P, k_attr+1) where B is batch size, M is num_agents, S is num road segments, P is 139 | num pts per road segment. 140 | :param agents_emb: (T_obs, B, M, d_k) where T_obs is the observation horizon. THis tensor is obtained from 141 | AutoBot's encoder, and basically represents the observed socio-temporal context of agents. 142 | :return: embedded road segments with shape (S) 143 | ''' 144 | B = roads.shape[0] 145 | M = roads.shape[1] 146 | S = roads.shape[2] 147 | P = roads.shape[3] 148 | road_segment_mask, road_pts_mask = self.get_road_pts_mask(roads) 149 | road_pts_feats = self.road_pts_lin(roads[:, :, :, :, :self.map_attr]).view(B*M*S, P, -1).permute(1, 0, 2) 150 | 151 | # Combining information from each road segment using attention with agent contextual embeddings as queries. 152 | map_seeds = self.map_seeds.repeat(1, B * M * S, 1) 153 | # agents_emb = agents_emb[-1].detach().unsqueeze(2).repeat(1, 1, S, 1).view(-1, self.d_k).unsqueeze(0) 154 | road_seg_emb = self.road_pts_attn_layer(query=map_seeds, key=road_pts_feats, value=road_pts_feats, 155 | key_padding_mask=road_pts_mask)[0] 156 | road_seg_emb = self.norm1(road_seg_emb) 157 | road_seg_emb2 = road_seg_emb + self.map_feats(road_seg_emb) 158 | road_seg_emb2 = self.norm2(road_seg_emb2) 159 | road_seg_emb = road_seg_emb2.view(B, M, S, -1) 160 | 161 | return road_seg_emb.permute(2, 0, 1, 3), road_segment_mask 162 | 163 | -------------------------------------------------------------------------------- /process_args.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import os 4 | from collections import namedtuple 5 | 6 | 7 | def get_train_args(): 8 | parser = argparse.ArgumentParser(description="AutoBots") 9 | # Section: General Configuration 10 | parser.add_argument("--exp-id", type=str, default=None, help="Experiment identifier") 11 | parser.add_argument("--seed", type=int, default=0, help="Random seed") 12 | parser.add_argument("--disable-cuda", action="store_true", help="Disable CUDA") 13 | parser.add_argument("--save-dir", type=str, default=".", help="Directory for saving results") 14 | 15 | # Section: Dataset 16 | parser.add_argument("--dataset", type=str, required=True, choices=["Argoverse", "Nuscenes", "trajnet++", 17 | "interaction-dataset"], 18 | help="Dataset to train on.") 19 | parser.add_argument("--dataset-path", type=str, required=True, help="Path to dataset files.") 20 | parser.add_argument("--use-map-image", type=bool, default=False, help="Use map image if applicable.") 21 | parser.add_argument("--use-map-lanes", type=bool, default=False, help="Use map lanes if applicable.") 22 | 23 | # Section: Algorithm 24 | parser.add_argument("--model-type", type=str, required=True, choices=["Autobot-Joint", "Autobot-Ego"], 25 | help="Whether to train for joint prediction or ego-only prediction.") 26 | parser.add_argument("--num-modes", type=int, default=5, help="Number of discrete latent variables for Autobot.") 27 | parser.add_argument("--hidden-size", type=int, default=128, help="Model's hidden size.") 28 | parser.add_argument("--num-encoder-layers", type=int, default=1, 29 | help="Number of social-temporal layers in Autobot's encoder.") 30 | parser.add_argument("--num-decoder-layers", type=int, default=1, 31 | help="Number of social-temporal layers in Autobot's decoder.") 32 | parser.add_argument("--tx-hidden-size", type=int, default=384, 33 | help="hidden size of transformer layers' feedforward network.") 34 | parser.add_argument("--tx-num-heads", type=int, default=16, help="Transformer number of heads.") 35 | parser.add_argument("--dropout", type=float, default=0.1, help="Dropout strenght used throughout model.") 36 | 37 | # Section: Loss Function 38 | parser.add_argument("--entropy-weight", type=float, default=1.0, metavar="lamda", help="Weight of entropy loss.") 39 | parser.add_argument("--kl-weight", type=float, default=1.0, metavar="lamda", help="Weight of entropy loss.") 40 | parser.add_argument("--use-FDEADE-aux-loss", type=bool, default=True, 41 | help="Whether to use FDE/ADE auxiliary loss in addition to NLL (accelerates learning).") 42 | 43 | # Section: Training params: 44 | parser.add_argument("--batch-size", type=int, default=100, help="Batch size") 45 | parser.add_argument("--learning-rate", type=float, default=1e-4, help="Learning rate") 46 | parser.add_argument("--adam-epsilon", type=float, default=1e-4, help="Adam optimiser epsilon value") 47 | parser.add_argument("--learning-rate-sched", type=int, nargs='+', default=[5, 10, 15, 20], 48 | help="Learning rate Schedule.") 49 | parser.add_argument("--grad-clip-norm", type=float, default=5, metavar="C", help="Gradient clipping norm") 50 | parser.add_argument("--num-epochs", type=int, default=150, metavar="I", help="number of iterations through the dataset.") 51 | args = parser.parse_args() 52 | 53 | if args.use_map_image and args.use_map_lanes: 54 | raise Exception('We do not support having both the map image and the map lanes...') 55 | 56 | # Perform config checks 57 | if "trajnet" in args.dataset: 58 | assert "Joint" in args.model_type, "Can't run AutoBot-Ego on TrajNet..." 59 | assert not args.use_map_image and not args.use_map_lanes, "TrajNet++ has no scene map information..." 60 | elif "Argoverse" in args.dataset: 61 | assert "Ego" in args.model_type, "Can't run AutoBot-Joint on Argoverse..." 62 | elif "interaction-dataset" in args.dataset: 63 | assert "Ego" not in args.model_type, "Can't run AutoBot-Ego on Interaction Dataset..." 64 | assert not args.use_map_image, "Interaction-dataset does not have image-based scene information..." 65 | 66 | results_dirname = create_results_folder(args) 67 | save_config(args, results_dirname) 68 | 69 | return args, results_dirname 70 | 71 | 72 | def get_eval_args(): 73 | parser = argparse.ArgumentParser(description="AutoBot") 74 | parser.add_argument("--models-path", type=str, required=True, help="Load model checkpoint") 75 | parser.add_argument("--dataset-path", type=str, required=True, help="Dataset path.") 76 | parser.add_argument("--batch-size", type=int, default=100, help="Batch size") 77 | parser.add_argument("--disable-cuda", action="store_true", help="Disable CUDA") 78 | args = parser.parse_args() 79 | 80 | config, model_dirname = load_config(args.models_path) 81 | config = namedtuple("config", config.keys())(*config.values()) 82 | return args, config, model_dirname 83 | 84 | 85 | def create_results_folder(args): 86 | model_configname = "" 87 | model_configname += "Autobot_joint" if "Joint" in args.model_type else "Autobot_ego" 88 | model_configname += "_C"+str(args.num_modes) + "_H"+str(args.hidden_size) + "_E"+str(args.num_encoder_layers) 89 | model_configname += "_D"+str(args.num_decoder_layers) + "_TXH"+str(args.tx_hidden_size) + "_NH"+str(args.tx_num_heads) 90 | model_configname += "_EW"+str(int(args.entropy_weight)) + "_KLW"+str(int(args.kl_weight)) 91 | model_configname += "_NormLoss" if args.use_FDEADE_aux_loss else "" 92 | model_configname += "_roadImg" if args.use_map_image else "" 93 | model_configname += "_roadLanes" if args.use_map_lanes else "" 94 | if args.exp_id is not None: 95 | model_configname += ("_" + args.exp_id) 96 | model_configname += "_s"+str(args.seed) 97 | 98 | result_dirname = os.path.join(args.save_dir, "results", args.dataset, model_configname) 99 | if os.path.isdir(result_dirname): 100 | answer = input(result_dirname + " exists. \n Do you wish to overwrite? (y/n)") 101 | if 'y' in answer: 102 | if os.path.isdir(os.path.join(result_dirname, "tb_files")): 103 | for f in os.listdir(os.path.join(result_dirname, "tb_files")): 104 | os.remove(os.path.join(result_dirname, "tb_files", f)) 105 | else: 106 | exit() 107 | os.makedirs(result_dirname, exist_ok=True) 108 | return result_dirname 109 | 110 | 111 | def save_config(args, results_dirname): 112 | argparse_dict = vars(args) 113 | with open(os.path.join(results_dirname, 'config.json'), 'w') as fp: 114 | json.dump(argparse_dict, fp) 115 | 116 | 117 | def load_config(model_path): 118 | model_dirname = os.path.join(*model_path.split("/")[:-1]) 119 | assert os.path.isdir(model_dirname) 120 | with open(os.path.join(model_dirname, 'config.json'), 'r') as fp: 121 | config = json.load(fp) 122 | return config, model_dirname 123 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | h5py==3.3.0 2 | matplotlib==3.4.2 3 | numpy==1.19.0 4 | opencv_python==4.5.3.56 5 | pandas==1.3.1 6 | pyproj==3.2.1 7 | pyquaternion==0.9.9 8 | scikit_learn==1.0.2 9 | scipy==1.7.1 10 | torch==1.9.0 11 | torchvision==0.10.0 12 | trajnetplusplustools==0.3.0 13 | -------------------------------------------------------------------------------- /useful_scripts/generate_argoverse_test.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | import numpy as np 4 | from argoverse.evaluation.competition_util import generate_forecasting_h5 5 | import torch 6 | from sklearn.cluster import AgglomerativeClustering 7 | 8 | from datasets.argoverse.dataset import ArgoH5Dataset 9 | from models.autobot_ego import AutoBotEgo 10 | from process_args import get_eval_args 11 | 12 | 13 | def load_model(args, config, k_attr, num_other_agents, pred_horizon, map_attr): 14 | if torch.cuda.is_available() and not args.disable_cuda: 15 | device = torch.device("cuda") 16 | else: 17 | device = torch.device("cpu") 18 | 19 | autobot_model = AutoBotEgo(k_attr=k_attr, 20 | d_k=config['hidden_size'], 21 | _M=num_other_agents, 22 | c=config['num_modes'], 23 | T=pred_horizon, 24 | L_enc=config['num_encoder_layers'], 25 | dropout=config['dropout'], 26 | num_heads=config['tx_num_heads'], 27 | L_dec=config['num_decoder_layers'], 28 | tx_hidden_size=config['tx_hidden_size'], 29 | use_map_img=config['use_map_image'], 30 | use_map_lanes=config['use_map_lanes'], 31 | map_attr=map_attr).to(device) 32 | 33 | model_dicts = torch.load(args.models_path, map_location={'cuda:1': 'cuda:0'}) 34 | autobot_model.load_state_dict(model_dicts["AutoBot"]) 35 | autobot_model.eval() 36 | 37 | return autobot_model, device 38 | 39 | 40 | def angle_of_rotation(yaw: float) -> float: 41 | """ 42 | Given a yaw angle (measured from x axis), find the angle needed to rotate by so that 43 | the yaw is aligned with the y axis (pi / 2). 44 | :param yaw: Radians. Output of quaternion_yaw function. 45 | :return: Angle in radians. 46 | """ 47 | return (np.pi / 2) + np.sign(-yaw) * np.abs(yaw) 48 | 49 | 50 | def make_2d_rotation_matrix(angle_in_radians: float) -> np.ndarray: 51 | """ 52 | Makes rotation matrix to rotate point in x-y plane counterclockwise 53 | by angle_in_radians. 54 | """ 55 | 56 | return np.array([[np.cos(angle_in_radians), -np.sin(angle_in_radians)], 57 | [np.sin(angle_in_radians), np.cos(angle_in_radians)]]) 58 | 59 | 60 | def convert_local_coords_to_global(coordinates: np.ndarray, translation: Tuple[float, float], yaw: float) -> np.ndarray: 61 | """ 62 | Converts local coordinates to global coordinates. 63 | :param coordinates: x,y locations. array of shape [n_steps, 2] 64 | :param translation: Tuple of (x, y, z) location that is the center of the new frame 65 | :param rotation: Tuple representation of quaternion of new frame. 66 | Representation - cos(theta / 2) + (xi + yi + zi)sin(theta / 2). 67 | :return: x,y locations stored in array of share [n_times, 2]. 68 | """ 69 | yaw = angle_of_rotation(yaw) 70 | transform = make_2d_rotation_matrix(angle_in_radians=-yaw) 71 | 72 | return np.dot(transform, coordinates.T).T[:, :2] + np.atleast_2d(np.array(translation)[:2]) 73 | 74 | 75 | def recompute_probs(pred_trajs, probs): 76 | distances = [] 77 | for k in range(len(pred_trajs)): 78 | distances.append(np.mean(np.linalg.norm(pred_trajs[k] - pred_trajs, axis=-1), axis=-1)) 79 | distances = np.array(distances) 80 | 81 | agg = AgglomerativeClustering(affinity='precomputed', linkage='complete', distance_threshold=None, n_clusters=4) 82 | output = agg.fit_predict(distances) # Returns class labels. 83 | temp_probs = probs.copy() 84 | for element in np.unique(output): 85 | similar_ks = np.where(output == element)[0].tolist() 86 | if len(similar_ks) > 1: 87 | best_k = similar_ks[np.argmax(temp_probs[similar_ks])] 88 | similar_ks.remove(best_k) 89 | for similar_k in similar_ks: 90 | temp_probs[best_k] += temp_probs[similar_k] 91 | temp_probs[similar_k] = 0.0 92 | 93 | return temp_probs 94 | 95 | 96 | if __name__ == "__main__": 97 | args, config, model_dirname = get_eval_args() 98 | test_argoverse = ArgoH5Dataset(args.dataset_path, split_name="test", use_map_lanes=config['use_map_lanes']) 99 | test_loader = torch.utils.data.DataLoader( 100 | test_argoverse, batch_size=args.batch_size, shuffle=False, num_workers=12, drop_last=False, pin_memory=False 101 | ) 102 | print("Test dataset loaded with length", len(test_argoverse)) 103 | 104 | autobot_model, device = load_model(args, config, num_other_agents=test_argoverse.num_others, 105 | pred_horizon=test_argoverse.pred_horizon, k_attr=test_argoverse.k_attr, 106 | map_attr=test_argoverse.map_attr) 107 | 108 | trajectories = {} 109 | probabilities = {} 110 | with torch.no_grad(): 111 | for i, data in enumerate(test_loader): 112 | if i % 25 == 0: 113 | print(i, "/", len(test_argoverse) // args.batch_size) 114 | 115 | ego_in, agents_in, roads, extra = data 116 | ego_in = ego_in.float().to(device) 117 | agents_in = agents_in.float().to(device) 118 | roads = roads.float().to(device) 119 | 120 | pred_obs, mode_probs = autobot_model(ego_in, agents_in, roads) 121 | pred_obs = pred_obs.cpu().numpy() 122 | mode_probs = mode_probs.cpu().numpy() 123 | 124 | for b in range(len(mode_probs)): 125 | glob_pred_obs = [] 126 | for k in range(len(pred_obs)): 127 | glob_pred_obs.append(convert_local_coords_to_global(pred_obs[k, :, b, :2], extra[b, 2:].numpy(), extra[b, 1].item())) 128 | glob_pred_obs = np.array(glob_pred_obs) 129 | new_probs = recompute_probs(glob_pred_obs, mode_probs[b]) 130 | trajectories[int(extra[b, 0].item())] = glob_pred_obs 131 | probabilities[int(extra[b, 0].item())] = new_probs 132 | 133 | fname = args.models_path.split("/")[-1].split(".")[0] 134 | generate_forecasting_h5(data=trajectories, output_path=model_dirname, probabilities=probabilities, filename=fname) 135 | -------------------------------------------------------------------------------- /useful_scripts/generate_indst_test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import csv 3 | import os 4 | import glob 5 | from collections import namedtuple 6 | 7 | import pandas as pd 8 | import xml.etree.ElementTree as xml 9 | 10 | from datasets.interaction_dataset.utils import get_x_y_lists, get_subtype, get_type, Point, LL2XYProjector, \ 11 | get_relation_members 12 | from models.autobot_joint import AutoBotJoint 13 | from process_args import load_config 14 | 15 | import numpy as np 16 | import torch 17 | 18 | from utils.metric_helpers import collisions_for_inter_dataset, interpolate_trajectories, yaw_from_predictions 19 | 20 | 21 | def load_model(model_config, models_path, device): 22 | autobot_model = AutoBotJoint(k_attr=8, 23 | d_k=model_config.hidden_size, 24 | _M=8, 25 | c=model_config.num_modes, 26 | T=15, 27 | L_enc=model_config.num_encoder_layers, 28 | dropout=model_config.dropout, 29 | num_heads=model_config.tx_num_heads, 30 | L_dec=model_config.num_decoder_layers, 31 | tx_hidden_size=model_config.tx_hidden_size, 32 | use_map_lanes=model_config.use_map_lanes, 33 | map_attr=7, 34 | num_agent_types=2, 35 | predict_yaw=True).to(device) 36 | 37 | model_dicts = torch.load(models_path, map_location=device) 38 | autobot_model.load_state_dict(model_dicts["AutoBot"]) 39 | autobot_model.eval() 40 | 41 | model_parameters = filter(lambda p: p.requires_grad, autobot_model.parameters()) 42 | num_params = sum([np.prod(p.size()) for p in model_parameters]) 43 | print("Number of Model Parameters:", num_params) 44 | 45 | return autobot_model 46 | 47 | 48 | def get_dataset_files(dataset_root): 49 | datafiles = glob.glob(os.path.join(dataset_root, "test_multi-agent") + "/*.csv") 50 | return datafiles 51 | 52 | 53 | def get_map_lanes(filename): 54 | print(filename) 55 | projector = LL2XYProjector(0.0, 0.0) 56 | 57 | e = xml.parse(filename).getroot() 58 | 59 | point_dict = dict() 60 | for node in e.findall("node"): 61 | point = Point() 62 | point.x, point.y = projector.latlon2xy(float(node.get('lat')), float(node.get('lon'))) 63 | point_dict[int(node.get('id'))] = point 64 | 65 | unknown_linestring_types = list() 66 | road_lines = [] 67 | 68 | road_lines_dict = {} 69 | exlusion_ids = [] 70 | 71 | min_length = 40 72 | for way in e.findall('way'): 73 | way_type = get_type(way) 74 | 75 | if way_type is None: 76 | raise RuntimeError("Linestring type must be specified") 77 | elif way_type == "curbstone": 78 | mark_type = np.array([1.0, 0.0, 0.0, 1.0]) 79 | elif way_type == "line_thin": 80 | way_subtype = get_subtype(way) 81 | if way_subtype == "dashed": 82 | mark_type = np.array([1.0, 1.0, 0.0, 1.0]) 83 | else: 84 | mark_type = np.array([0.0, 1.0, 0.0, 1.0]) 85 | elif way_type == "line_thick": 86 | way_subtype = get_subtype(way) 87 | if way_subtype == "dashed": 88 | mark_type = np.array([1.0, 1.0, 0.0, 1.0]) 89 | else: 90 | mark_type = np.array([0.0, 1.0, 0.0, 1.0]) 91 | elif way_type == "pedestrian_marking": 92 | mark_type = np.array([0.0, 0.0, 1.0, 1.0]) 93 | elif way_type == "bike_marking": 94 | mark_type = np.array([0.0, 0.0, 1.0, 1.0]) 95 | elif way_type == "stop_line": 96 | mark_type = np.array([1.0, 0.0, 1.0, 1.0]) 97 | elif way_type == "virtual": 98 | exlusion_ids.append(way.get("id")) 99 | continue 100 | elif way_type == "road_border": 101 | mark_type = np.array([1.0, 1.0, 1.0, 1.0]) 102 | elif way_type == "guard_rail": 103 | mark_type = np.array([1.0, 1.0, 1.0, 1.0]) 104 | elif way_type == "traffic_sign": 105 | exlusion_ids.append(way.get("id")) 106 | continue 107 | else: 108 | if way_type not in unknown_linestring_types: 109 | unknown_linestring_types.append(way_type) 110 | continue 111 | 112 | x_list, y_list = get_x_y_lists(way, point_dict) 113 | if len(x_list) < min_length: 114 | x_list = np.linspace(x_list[0], x_list[-1], min_length).tolist() 115 | y_list = np.linspace(y_list[0], y_list[-1], min_length).tolist() 116 | 117 | lane_pts = np.array([x_list, y_list]).transpose() 118 | mark_type = np.zeros((len(lane_pts), 4)) + mark_type 119 | 120 | lane_pts = np.concatenate((lane_pts, mark_type), axis=1) 121 | road_lines.append(lane_pts) 122 | road_lines_dict[way.get("id")] = lane_pts 123 | 124 | new_roads = np.zeros((160, 40, 6)) # empirically found max num roads is 157. 125 | for i in range(len(road_lines)): 126 | new_roads[i, :len(road_lines[i])] = road_lines[i] 127 | 128 | used_keys_all = [] 129 | num_relations = len(e.findall('relation')) 130 | relation_lanes = np.zeros((num_relations + len(road_lines), 80, 8)) 131 | counter = 0 132 | for rel in e.findall('relation'): 133 | rel_lane, used_keys = get_relation_members(rel, road_lines_dict, exlusion_ids) 134 | if rel_lane is None: 135 | continue 136 | used_keys_all += used_keys 137 | new_lanes = np.array(rel_lane).reshape((-1, 8)) 138 | relation_lanes[counter] = new_lanes 139 | counter += 1 140 | 141 | # delete all used keys 142 | used_keys_all = np.unique(used_keys_all) 143 | for used_key in used_keys_all: 144 | del road_lines_dict[used_key] 145 | 146 | # add non-used keys 147 | for k in road_lines_dict.keys(): 148 | relation_lanes[counter, :40, :5] = road_lines_dict[k][:, :5] # rest of state (position (2), and type(3)). 149 | relation_lanes[counter, :40, 5:7] = -1.0 # no left-right relationship 150 | relation_lanes[counter, :40, 7] = road_lines_dict[k][:, -1] # mask 151 | counter += 1 152 | 153 | return relation_lanes[relation_lanes[:, :, -1].sum(1) > 0] 154 | 155 | 156 | def get_ego_and_agents(agents_data): 157 | agent_masks = np.ones((*agents_data.shape[:2], 1)) 158 | agents_data = np.concatenate((agents_data, agent_masks), axis=-1) 159 | ego_in = agents_data[0] 160 | agents_in = agents_data[1:] 161 | return ego_in, agents_in 162 | 163 | 164 | def copy_agent_roads_across_agents(agents_in, roads): 165 | num_agents = agents_in.shape[0] + 1 166 | new_roads = np.zeros((num_agents, *roads.shape)) 167 | new_roads[0] = roads # ego 168 | for n in range(num_agents-1): 169 | if agents_in[n, -1, -1]: 170 | new_roads[n+1] = roads 171 | return new_roads 172 | 173 | 174 | def make_2d_rotation_matrix(angle_in_radians: float) -> np.ndarray: 175 | """ 176 | Makes rotation matrix to rotate point in x-y plane counterclockwise 177 | by angle_in_radians. 178 | """ 179 | return np.array([[np.cos(angle_in_radians), -np.sin(angle_in_radians)], 180 | [np.sin(angle_in_radians), np.cos(angle_in_radians)]]) 181 | 182 | 183 | def convert_global_coords_to_local(coordinates: np.ndarray, yaw: float) -> np.ndarray: 184 | """ 185 | Converts global coordinates to coordinates in the frame given by the rotation quaternion and 186 | centered at the translation vector. The rotation is meant to be a z-axis rotation. 187 | :param coordinates: x,y locations. array of shape [n_steps, 2]. 188 | :param translation: Tuple of (x, y, z) location that is the center of the new frame. 189 | :param rotation: Tuple representation of quaternion of new frame. 190 | Representation - cos(theta / 2) + (xi + yi + zi)sin(theta / 2). 191 | :return: x,y locations in frame stored in array of share [n_times, 2]. 192 | """ 193 | transform = make_2d_rotation_matrix(angle_in_radians=yaw) 194 | if len(coordinates.shape) > 2: 195 | coord_shape = coordinates.shape 196 | return np.dot(transform, coordinates.reshape((-1, 2)).T).T.reshape(*coord_shape) 197 | return np.dot(transform, coordinates.T).T[:, :2] 198 | 199 | 200 | def rotate_agents(ego_in, agents_in, roads, agent_types): 201 | num_others = agents_in.shape[0] 202 | new_ego_in = np.zeros((ego_in.shape[0], ego_in.shape[1] + 3)) 203 | new_ego_in[:, 3:] = ego_in 204 | new_ego_in[:, 2] = ego_in[:, 4] - ego_in[-1:, 4] # standardized yaw. 205 | 206 | new_agents_in = np.zeros((agents_in.shape[0], agents_in.shape[1], agents_in.shape[2] + 3)) # + 2 207 | new_agents_in[:, :, 3:] = agents_in 208 | new_agents_in[:, :, 2] = agents_in[:, :, 4] - agents_in[:, -1:, 4] # standardized yaw. 209 | 210 | new_roads = roads.copy() 211 | 212 | # "ego" stuff 213 | if agent_types[0, 0]: # vehicle 214 | yaw = ego_in[-1, 4] 215 | elif agent_types[0, 1]: # pedestrian/bike 216 | diff = ego_in[-1, :2] - ego_in[-5, :2] 217 | yaw = np.arctan2(diff[1], diff[0]) 218 | angle_of_rotation = (np.pi / 2) + np.sign(-yaw) * np.abs(yaw) 219 | translation = ego_in[-1, :2] 220 | 221 | new_ego_in[:, :2] = convert_global_coords_to_local(coordinates=ego_in[:, :2] - translation, yaw=angle_of_rotation) 222 | new_ego_in[:, 5:7] = convert_global_coords_to_local(coordinates=ego_in[:, 2:4], yaw=angle_of_rotation) 223 | new_roads[0, :, :, :2] = convert_global_coords_to_local(coordinates=new_roads[0, :, :, :2] - translation, yaw=angle_of_rotation) 224 | new_roads[0][np.where(new_roads[0, :, :, -1] == 0)] = 0.0 225 | 226 | # other agents 227 | for n in range(num_others): 228 | if not agents_in[n, -1, -1]: 229 | continue 230 | 231 | if agent_types[n+1, 0]: # vehicle 232 | yaw = agents_in[n, -1, 4] 233 | elif agent_types[n+1, 1]: # pedestrian/bike 234 | diff = agents_in[n, -1, :2] - agents_in[n, -5, :2] 235 | yaw = np.arctan2(diff[1], diff[0]) 236 | angle_of_rotation = (np.pi / 2) + np.sign(-yaw) * np.abs(yaw) 237 | translation = agents_in[n, -1, :2] 238 | 239 | new_agents_in[n, :, :2] = convert_global_coords_to_local(coordinates=agents_in[n, :, :2] - translation, yaw=angle_of_rotation) 240 | new_agents_in[n, :, 5:7] = convert_global_coords_to_local(coordinates=agents_in[n, :, 2:4], yaw=angle_of_rotation) 241 | new_roads[n + 1, :, :, :2] = convert_global_coords_to_local(coordinates=new_roads[n + 1, :, :, :2] - translation, yaw=angle_of_rotation) 242 | new_roads[n + 1][np.where(new_roads[n + 1, :, :, -1] == 0)] = 0.0 243 | 244 | return new_ego_in, new_agents_in, new_roads 245 | 246 | 247 | def data_to_tensor(ego_in, agents_in, agent_roads, agent_types): 248 | return torch.from_numpy(ego_in).float().to(device).unsqueeze(0), \ 249 | torch.from_numpy(agents_in).float().to(device).unsqueeze(0), \ 250 | torch.from_numpy(agent_roads).float().to(device).unsqueeze(0), \ 251 | torch.from_numpy(agent_types).float().to(device).unsqueeze(0) 252 | 253 | 254 | def get_args(): 255 | parser = argparse.ArgumentParser(description="AutoBot") 256 | parser.add_argument("--models-path", type=str, required=True, help="Load model checkpoint") 257 | parser.add_argument("--dataset-root", type=str, required=True, help="Dataset path.") 258 | args = parser.parse_args() 259 | 260 | config, model_dirname = load_config(args.models_path) 261 | config = namedtuple("config", config.keys())(*config.values()) 262 | return args, config, model_dirname 263 | 264 | 265 | if __name__ == '__main__': 266 | args, config, model_dirname = get_args() 267 | save_dir = os.path.join(model_dirname, "autobot_submission_files") 268 | os.makedirs(save_dir, exist_ok=True) 269 | 270 | if torch.cuda.is_available(): 271 | device = torch.device("cuda") 272 | else: 273 | device = torch.device("cpu") 274 | 275 | # load model 276 | autobot_model = load_model(config, args.models_path, device) 277 | 278 | # load dataset and process 279 | total_scene_collisions = [] 280 | max_num_agents = 0 281 | datafiles = get_dataset_files(args.dataset_root) 282 | for dataf in datafiles: 283 | # create dataframe 284 | sub_file_name = os.path.join(save_dir, dataf.split("/")[-1].split(".")[0].replace("_obs", "_sub.csv")) 285 | headers_name = ['case_id', 'track_id', 'frame_id', 'timestamp_ms', 'track_to_predict', 'interesting_agent'] 286 | for i in range(1, 7): 287 | headers_name += ['x'+str(i), 'y'+str(i), 'psi_rad'+str(i)] 288 | sub_file_csv = [headers_name] 289 | 290 | # load map 291 | map_fname = os.path.join(args.dataset_root, "maps", dataf.split("/")[-1].split(".")[0].replace("_obs", ".osm")) 292 | roads = get_map_lanes(map_fname) 293 | 294 | data = pd.read_csv(dataf) 295 | scene_ids = list(set(data['case_id'].tolist())) 296 | for i, scene_id in enumerate(scene_ids): 297 | if i % 10 == 0: 298 | print(scene_id, "/", len(scene_ids)) 299 | scene_data = data.loc[data['case_id'] == scene_id] 300 | 301 | agent_ids = list(set(data['track_id'].tolist())) 302 | scene_trajectories = [] 303 | scene_agent_types = [] 304 | scene_agent_ids = [] 305 | interest_agent_id = -1 306 | for agent_id in agent_ids: 307 | agent_data = scene_data.loc[scene_data['track_id'] == agent_id] 308 | if len(agent_data['track_to_predict'].tolist()) == 0 or not agent_data['track_to_predict'].tolist()[0]: 309 | continue 310 | if agent_data['interesting_agent'].tolist()[0]: 311 | interest_agent_id = agent_id 312 | scene_agent_ids.append(agent_id) 313 | curr_agent_type = [1.0, 0.0] if 'car' in agent_data['agent_type'].tolist()[0] else [0.0, 1.0] 314 | scene_agent_types.append(curr_agent_type) 315 | assert np.array_equal(agent_data[['frame_id']].to_numpy()[:, 0], np.arange(1, 11).astype(np.int64)) 316 | scene_trajectories.append(agent_data[['x', 'y', 'vx', 'vy', 'psi_rad', 'length', 'width']].to_numpy()) 317 | 318 | assert interest_agent_id >= 0 319 | scene_trajectories = np.array(scene_trajectories) 320 | scene_agent_types = np.array(scene_agent_types) 321 | 322 | ego_in, agents_in = get_ego_and_agents(scene_trajectories) 323 | agent_roads = copy_agent_roads_across_agents(agents_in, roads) 324 | translations = np.expand_dims(np.concatenate((ego_in[-1:, :2], agents_in[:, -1, :2]), axis=0), axis=0) 325 | ego_in, agents_in, agent_roads = rotate_agents(ego_in, agents_in, agent_roads, scene_agent_types) 326 | 327 | # downsample inputs across time 328 | ego_in = ego_in[1::2] 329 | agents_in = agents_in[:, 1::2].transpose(1, 0, 2) 330 | 331 | # prepare inputs for model 332 | ego_in, agents_in, agent_roads, agent_types = data_to_tensor(ego_in, agents_in, agent_roads, scene_agent_types) 333 | model_ego_in = ego_in.clone() 334 | model_ego_in[:, :, 3:5] = 0 335 | model_agents_in = agents_in.clone() 336 | model_agents_in[:, :, :, 3:5] = 0 337 | 338 | # These are used for rotating the trajectories of pedestrians. 339 | ego_in[:, :, 0:2] = ego_in[:, :, 3:5] 340 | agents_in[:, :, :, 0:2] = agents_in[:, :, :, 3:5] 341 | 342 | # generate predictions using model' 343 | with torch.no_grad(): 344 | autobot_model._M = model_agents_in.shape[2] 345 | pred_obs, mode_probs = autobot_model(model_ego_in, model_agents_in, agent_roads, agent_types) 346 | pred_obs = interpolate_trajectories(pred_obs) 347 | pred_obs = yaw_from_predictions(pred_obs, ego_in, agents_in) 348 | scene_collisions, new_preds, vehicles_only = collisions_for_inter_dataset(pred_obs.cpu().numpy(), 349 | agent_types.cpu().numpy(), 350 | ego_in.cpu().numpy(), 351 | agents_in.cpu().numpy(), 352 | translations, device=device) 353 | total_scene_collisions.append(scene_collisions) 354 | 355 | for n, agent_id in enumerate(scene_agent_ids): 356 | for t in range(new_preds.shape[1]): 357 | row_data = [scene_id, agent_id, t+11, ((t+1)*100)+1000, 1, int(agent_id == interest_agent_id)] 358 | for k in range(new_preds.shape[0]): 359 | curr_pred = new_preds[k, t, 0, n, :3].tolist() 360 | row_data += curr_pred 361 | 362 | sub_file_csv.append(row_data) 363 | 364 | with open(sub_file_name, 'w', newline='') as myfile: 365 | wr = csv.writer(myfile, quoting=csv.QUOTE_ALL) 366 | wr.writerows(sub_file_csv) 367 | 368 | print("Scene collision rate", np.mean(total_scene_collisions)) 369 | 370 | 371 | 372 | -------------------------------------------------------------------------------- /useful_scripts/generate_nuscene_results.py: -------------------------------------------------------------------------------- 1 | import json 2 | import numpy as np 3 | import torch 4 | from nuscenes.prediction import convert_local_coords_to_global 5 | from sklearn.cluster import AgglomerativeClustering 6 | 7 | from datasets.nuscenes.dataset import NuscenesH5Dataset 8 | from models.autobot_ego import AutoBotEgo 9 | from process_args import get_eval_args 10 | 11 | 12 | def load_model(args, model_config, k_attr, num_other_agents, pred_horizon, map_attr): 13 | if torch.cuda.is_available() and not args.disable_cuda: 14 | device = torch.device("cuda") 15 | else: 16 | device = torch.device("cpu") 17 | 18 | autobot_model = AutoBotEgo(k_attr=k_attr, 19 | d_k=model_config.hidden_size, 20 | _M=num_other_agents, 21 | c=model_config.num_modes, 22 | T=pred_horizon, 23 | L_enc=model_config.num_encoder_layers, 24 | dropout=model_config.dropout, 25 | num_heads=model_config.tx_num_heads, 26 | L_dec=model_config.num_decoder_layers, 27 | tx_hidden_size=model_config.tx_hidden_size, 28 | use_map_img=model_config.use_map_image, 29 | use_map_lanes=model_config.use_map_lanes, 30 | map_attr=map_attr).to(device) 31 | 32 | model_dicts = torch.load(args.models_path, map_location={'cuda:1': 'cuda:0'}) 33 | autobot_model.load_state_dict(model_dicts["AutoBot"]) 34 | autobot_model.eval() 35 | 36 | return autobot_model, device 37 | 38 | 39 | def recompute_probs(pred_trajs, probs): 40 | distances = [] 41 | for k in range(len(pred_trajs)): 42 | distances.append(np.mean(np.linalg.norm(pred_trajs[k] - pred_trajs, axis=-1), axis=-1)) 43 | distances = np.array(distances) 44 | 45 | agg = AgglomerativeClustering(affinity='precomputed', linkage='complete', distance_threshold=None, n_clusters=6) 46 | output = agg.fit_predict(distances) # Returns class labels. 47 | temp_probs = probs.copy() 48 | for element in np.unique(output): 49 | similar_ks = np.where(output == element)[0].tolist() 50 | if len(similar_ks) > 1: 51 | best_k = similar_ks[np.argmax(temp_probs[similar_ks])] 52 | similar_ks.remove(best_k) 53 | for similar_k in similar_ks: 54 | temp_probs[best_k] += temp_probs[similar_k] 55 | temp_probs[similar_k] = 0.0 56 | 57 | return temp_probs 58 | 59 | 60 | if __name__ == "__main__": 61 | args, model_config, model_dirname = get_eval_args() 62 | 63 | val_dset = NuscenesH5Dataset(dset_path=args.dataset_path, split_name="val", 64 | model_type=model_config.model_type, use_map_img=model_config.use_map_image, 65 | use_map_lanes=model_config.use_map_lanes, rtn_extras=True) 66 | 67 | val_loader = torch.utils.data.DataLoader( 68 | val_dset, batch_size=args.batch_size, shuffle=False, num_workers=12, drop_last=False, pin_memory=False 69 | ) 70 | print("Val dataset loaded with length", len(val_dset)) 71 | 72 | autobot_model, device = load_model(args, model_config, val_dset.k_attr, val_dset.num_others, val_dset.pred_horizon, 73 | val_dset.map_attr) 74 | 75 | preds = [] 76 | with torch.no_grad(): 77 | for i, data in enumerate(val_loader): 78 | if i % 25 == 0: 79 | print(i, "/", len(val_dset) // args.batch_size) 80 | 81 | ego_in, ego_out, agents_in, roads, extras = data 82 | ego_in = ego_in.float().to(device) 83 | ego_out = ego_out.float().to(device) 84 | agents_in = agents_in.float().to(device) 85 | roads = roads.float().to(device) 86 | 87 | pred_obs, mode_preds = autobot_model(ego_in, agents_in, roads) 88 | pred_seqs = pred_obs[:, :, :, :2].cpu().numpy().transpose((2, 0, 1, 3)) 89 | mode_preds = mode_preds.cpu().numpy() 90 | 91 | # Process extras 92 | translation = extras[0].cpu().numpy() 93 | rotation = extras[1].cpu().numpy() 94 | instance_tokens = list(extras[2]) 95 | sample_tokens = list(extras[3]) 96 | 97 | for b in range(min(args.batch_size, len(pred_seqs))): 98 | curr_out = {} 99 | curr_out["instance"] = instance_tokens[b] 100 | curr_out["sample"] = sample_tokens[b] 101 | mode_preds[b] = recompute_probs(pred_seqs[b], mode_preds[b]) 102 | curr_out["probabilities"] = mode_preds[b].tolist() 103 | curr_out["prediction"] = [] 104 | for k in range(model_config.num_modes): 105 | pred_seq = pred_seqs[b, k] 106 | curr_out["prediction"].append( 107 | convert_local_coords_to_global(pred_seq, translation[b], rotation[b]).tolist() 108 | ) 109 | 110 | preds.append(curr_out) 111 | 112 | with open(model_dirname + '/autobot_preds.json', 'w') as fout: 113 | json.dump(preds, fout) 114 | 115 | 116 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roggirg/AutoBots/0ceb8f7f4c6e011ed0af879bc9cc3e9c1ad53351/utils/__init__.py -------------------------------------------------------------------------------- /utils/metric_helpers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | def min_xde_K(xdes, probs, K): 6 | best_ks = probs.argsort(axis=1)[:, -K:] 7 | dummy_rows = np.expand_dims(np.arange(len(xdes)), 1) 8 | new_xdes = xdes[dummy_rows, best_ks] 9 | return np.nanmean(np.sort(new_xdes), axis=0) 10 | 11 | 12 | def yaw_from_predictions(preds, ego_in, agents_in): 13 | ''' 14 | For collision detection in the interaction dataset. This function computes the final yaw based on the predicted 15 | delta yaw. 16 | ''' 17 | device = preds.device 18 | new_preds = preds.clone() 19 | last_obs_yaws = torch.cat((ego_in[:, -1, 7].unsqueeze(-1), agents_in[:, -1, :, 7]), dim=-1) 20 | T = preds.shape[1] 21 | yaws = torch.zeros((preds.shape[:4])).to(device) 22 | for t in range(T): 23 | yaws[:, t] = last_obs_yaws.unsqueeze(0) + preds[:, t, :, :, 2] 24 | new_preds[:, :, :, :, 2] = yaws 25 | 26 | return new_preds 27 | 28 | 29 | def interpolate_trajectories(preds): 30 | ''' 31 | This function is used for the Interaction dataset. Since we downsample the trajectories during training for 32 | efficiency, we now interpolate the trajectories to bring it back to the original number of timesteps. 33 | for evaluation on the test server. 34 | ''' 35 | device = preds.device 36 | K = preds.shape[0] 37 | T_in = preds.shape[1] 38 | B = preds.shape[2] 39 | N = preds.shape[3] 40 | out = preds.shape[4] 41 | 42 | new_preds = torch.zeros((K, 2*T_in, B, N, out)).to(device) 43 | T_in = preds.shape[1] 44 | preds = preds.permute(0, 2, 3, 1, 4).reshape(-1, T_in, out) 45 | new_idx = 0 46 | for t in range(T_in): 47 | if t == 0: 48 | new_pred = (preds[:, t, :2] - torch.tensor([[0.0, 0.0]]).to(device)) / 2.0 49 | new_pred = new_pred.view(K, B, N, 2) 50 | pred_t = preds[:, t, :2].view(K, B, N, 2) 51 | new_pred_yaw = (preds[:, t, -1] - torch.tensor([0.0]).to(device)) / 2.0 52 | new_pred_yaw = new_pred_yaw.view(K, B, N) 53 | pred_t_yaw = preds[:, t, -1].view(K, B, N) 54 | 55 | new_preds[:, new_idx, :, :, :2] = new_pred 56 | new_preds[:, new_idx, :, :, 2] = new_pred_yaw 57 | new_idx += 1 58 | new_preds[:, new_idx, :, :, :2] = pred_t 59 | new_preds[:, new_idx, :, :, 2] = pred_t_yaw 60 | new_idx += 1 61 | else: 62 | new_pred = ((preds[:, t, :2] - preds[:, t-1, :2]) / 2.0) + preds[:, t-1, :2] 63 | new_pred = new_pred.view(K, B, N, 2) 64 | pred_t = preds[:, t, :2].view(K, B, N, 2) 65 | new_pred_yaw = ((preds[:, t, -1] - preds[:, t-1, -1]) / 2.0) + preds[:, t-1, -1] 66 | new_pred_yaw = new_pred_yaw.view(K, B, N) 67 | pred_t_yaw = preds[:, t, -1].view(K, B, N) 68 | 69 | new_preds[:, new_idx, :, :, :2] = new_pred 70 | new_preds[:, new_idx, :, :, 2] = new_pred_yaw 71 | new_idx += 1 72 | new_preds[:, new_idx, :, :, :2] = pred_t 73 | new_preds[:, new_idx, :, :, 2] = pred_t_yaw 74 | new_idx += 1 75 | 76 | return new_preds 77 | 78 | 79 | def make_2d_rotation_matrix(angle_in_radians: float) -> np.ndarray: 80 | """ 81 | Makes rotation matrix to rotate point in x-y plane counterclockwise 82 | by angle_in_radians. 83 | """ 84 | return np.array([[np.cos(angle_in_radians), -np.sin(angle_in_radians)], 85 | [np.sin(angle_in_radians), np.cos(angle_in_radians)]]) 86 | 87 | 88 | def convert_local_coords_to_global(coordinates: np.ndarray, yaw: float) -> np.ndarray: 89 | """ 90 | Converts global coordinates to coordinates in the frame given by the rotation quaternion and 91 | centered at the translation vector. The rotation is meant to be a z-axis rotation. 92 | :param coordinates: x,y locations. array of shape [n_steps, 2]. 93 | :param translation: Tuple of (x, y, z) location that is the center of the new frame. 94 | :param rotation: Tuple representation of quaternion of new frame. 95 | Representation - cos(theta / 2) + (xi + yi + zi)sin(theta / 2). 96 | :return: x,y locations in frame stored in array of share [n_times, 2]. 97 | """ 98 | transform = make_2d_rotation_matrix(angle_in_radians=-yaw) 99 | if len(coordinates.shape) > 2: 100 | coord_shape = coordinates.shape 101 | return np.dot(transform, coordinates.reshape((-1, 2)).T).T.reshape(*coord_shape) 102 | return np.dot(transform, coordinates.T).T[:, :2] 103 | 104 | 105 | # the next two functions are taken from the Interaction dataset gihub page 106 | # https://github.com/interaction-dataset/INTERPRET_challenge_multi-agent/blob/main/calculate_collision.py 107 | def return_circle_list(x, y, l, w, yaw): 108 | """ 109 | This function returns the list of origins of circles for the given vehicle at all 110 | predicted timestamps and modalities. x, y, and yaw have the same shape (T, Modality). 111 | l, w are scalars represents the length and width of the vehicle. 112 | The output has the shape (T, Modality, c, 2) where c is the number of circles 113 | determined by the length of the given vehicle. 114 | """ 115 | r = w/np.sqrt(2) 116 | cos_yaw = np.cos(yaw) 117 | sin_yaw = np.sin(yaw) 118 | if l < 4.0: 119 | c1 = [x-(l-w)/2*cos_yaw, y-(l-w)/2*sin_yaw] 120 | c2 = [x+(l-w)/2*cos_yaw, y+(l-w)/2*sin_yaw] 121 | c = [c1, c2] 122 | elif l >= 4.0 and l < 8.0: 123 | c0 = [x, y] 124 | c1 = [x-(l-w)/2*cos_yaw, y-(l-w)/2*sin_yaw] 125 | c2 = [x+(l-w)/2*cos_yaw, y+(l-w)/2*sin_yaw] 126 | c = [c0, c1, c2] 127 | else: 128 | c0 = [x, y] 129 | c1 = [x-(l-w)/2*cos_yaw, y-(l-w)/2*sin_yaw] 130 | c2 = [x+(l-w)/2*cos_yaw, y+(l-w)/2*sin_yaw] 131 | c3 = [x-(l-w)/2*cos_yaw/2, y-(l-w)/2*sin_yaw/2] 132 | c4 = [x+(l-w)/2*cos_yaw/2, y+(l-w)/2*sin_yaw/2] 133 | c = [c0, c1, c2, c3, c4] 134 | for i in range(len(c)): 135 | c[i] = np.stack(c[i], axis=-1) 136 | c = np.stack(c, axis=-2) 137 | return c 138 | 139 | 140 | def return_collision_threshold(w1, w2): 141 | """ 142 | This function returns the threshold for collision. 143 | If any of two circles' origins' distance between two vehicles is lower than this threshold, 144 | it is considered as a collision at that timestamp. 145 | w1, w2 are scalar values which represents the width of vehicle 1 and vehicle 2. 146 | """ 147 | return (w1 + w2) / np.sqrt(3.8) 148 | 149 | 150 | def collisions_for_inter_dataset(preds, agent_types, ego_in, agents_in, translations, device="cpu"): 151 | ''' 152 | 1. Rotate and Translate all agents to the same coordinate system. 153 | 2. Get all agent width and length (if they are vehicles only). 154 | 3. Use provided functions to compute the circles. 155 | 4. Check for collisions between vehicles. 156 | :return 157 | batch_collisions: collisions per-item in batch. 158 | new_preds: 159 | ''' 160 | new_preds = preds.copy() 161 | last_obs_yaws = np.concatenate((ego_in[:, -1, 7:8], agents_in[:, -1, :, 7]), axis=-1) 162 | angles_of_rotation = (np.pi / 2) + np.sign(-last_obs_yaws) * np.abs(last_obs_yaws) 163 | lengths = np.concatenate((ego_in[:, -1, 8:9], agents_in[:, -1, :, 8]), axis=-1) 164 | widths = np.concatenate((ego_in[:, -1, 9:10], agents_in[:, -1, :, 9]), axis=-1) 165 | 166 | vehicles_only = agent_types[:, :, 0] == 1.0 167 | K = preds.shape[0] 168 | N = preds.shape[3] 169 | B = ego_in.shape[0] 170 | batch_collisions = np.zeros(B) 171 | for b in range(B): 172 | agents_circles = [] 173 | agents_widths = [] 174 | curr_preds = preds[:, :, b] 175 | for n in range(N): 176 | if not vehicles_only[b, n]: 177 | if agent_types[b, n, 1]: 178 | if n == 0: 179 | diff = ego_in[b, -1, 0:2] - ego_in[b, -5, 0:2] 180 | yaw = np.arctan2(diff[1], diff[0]) 181 | else: 182 | diff = agents_in[b, -1, n-1, 0:2] - agents_in[b, -5, n-1, 0:2] 183 | yaw = np.arctan2(diff[1], diff[0]) 184 | angle_of_rotation = (np.pi / 2) + np.sign(-yaw) * np.abs(yaw) 185 | new_preds[:, :, b, n, :2] = convert_local_coords_to_global(coordinates=curr_preds[:, :, n, :2], 186 | yaw=angle_of_rotation) + \ 187 | translations[b, n].reshape(1, 1, -1) 188 | continue 189 | yaw = angles_of_rotation[b, n] 190 | new_preds[:, :, b, n, :2] = convert_local_coords_to_global(coordinates=curr_preds[:, :, n, :2], yaw=yaw) + \ 191 | translations[b, n].reshape(1, 1, -1) 192 | preds_for_circle = new_preds[:, :, b, n, :3].transpose(1, 0, 2) 193 | circles_list = return_circle_list(x=preds_for_circle[:, :, 0], 194 | y=preds_for_circle[:, :, 1], 195 | l=lengths[b, n], w=widths[b, n], 196 | yaw=preds_for_circle[:, :, 2]) 197 | agents_circles.append(circles_list) 198 | agents_widths.append(widths[b, n]) 199 | 200 | # We now have all rotated agents. Now we need to compute the circles using the lengths and widths of all agents 201 | collisions = np.zeros(K) 202 | for n in range(len(agents_circles)): 203 | curr_agent_circles = agents_circles[n] 204 | for _n in range(len(agents_circles)): 205 | if n == _n: 206 | continue 207 | other_agent_circles = agents_circles[_n] 208 | threshold_between_agents = return_collision_threshold(agents_widths[n], agents_widths[_n]) 209 | 210 | for curr_circle_idx in range(curr_agent_circles.shape[2]): 211 | for other_circle_idx in range(other_agent_circles.shape[2]): 212 | dists = np.linalg.norm(curr_agent_circles[:, :, curr_circle_idx] - other_agent_circles[:, :, other_circle_idx], axis=-1) 213 | collisions += (dists <= threshold_between_agents).sum(0) 214 | 215 | batch_collisions[b] = (collisions > 0.0).sum() / K 216 | 217 | return batch_collisions, torch.from_numpy(new_preds).to(device), vehicles_only 218 | -------------------------------------------------------------------------------- /utils/train_helpers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from scipy import special 3 | import numpy as np 4 | import torch.distributions as D 5 | from torch.distributions import MultivariateNormal, Laplace 6 | 7 | 8 | # ==================================== AUTOBOT-EGO STUFF ==================================== 9 | 10 | def get_BVG_distributions(pred): 11 | B = pred.size(0) 12 | T = pred.size(1) 13 | mu_x = pred[:, :, 0].unsqueeze(2) 14 | mu_y = pred[:, :, 1].unsqueeze(2) 15 | sigma_x = pred[:, :, 2] 16 | sigma_y = pred[:, :, 3] 17 | rho = pred[:, :, 4] 18 | 19 | cov = torch.zeros((B, T, 2, 2)).to(pred.device) 20 | cov[:, :, 0, 0] = sigma_x ** 2 21 | cov[:, :, 1, 1] = sigma_y ** 2 22 | cov[:, :, 0, 1] = rho * sigma_x * sigma_y 23 | cov[:, :, 1, 0] = rho * sigma_x * sigma_y 24 | 25 | biv_gauss_dist = MultivariateNormal(loc=torch.cat((mu_x, mu_y), dim=-1), covariance_matrix=cov) 26 | return biv_gauss_dist 27 | 28 | 29 | def get_Laplace_dist(pred): 30 | return Laplace(pred[:, :, :2], pred[:, :, 2:4]) 31 | 32 | 33 | def nll_pytorch_dist(pred, data, rtn_loss=True): 34 | # biv_gauss_dist = get_BVG_distributions(pred) 35 | biv_gauss_dist = get_Laplace_dist(pred) 36 | if rtn_loss: 37 | # return (-biv_gauss_dist.log_prob(data)).sum(1) # Gauss 38 | return (-biv_gauss_dist.log_prob(data)).sum(-1).sum(1) # Laplace 39 | else: 40 | # return (-biv_gauss_dist.log_prob(data)).sum(-1) # Gauss 41 | return (-biv_gauss_dist.log_prob(data)).sum(dim=(1, 2)) # Laplace 42 | 43 | 44 | def nll_loss_multimodes(pred, data, modes_pred, entropy_weight=1.0, kl_weight=1.0, use_FDEADE_aux_loss=True): 45 | """NLL loss multimodes for training. MFP Loss function 46 | Args: 47 | pred: [K, T, B, 5] 48 | data: [B, T, 5] 49 | modes_pred: [B, K], prior prob over modes 50 | noise is optional 51 | """ 52 | modes = len(pred) 53 | nSteps, batch_sz, dim = pred[0].shape 54 | 55 | # compute posterior probability based on predicted prior and likelihood of predicted trajectory. 56 | log_lik = np.zeros((batch_sz, modes)) 57 | with torch.no_grad(): 58 | for kk in range(modes): 59 | nll = nll_pytorch_dist(pred[kk].transpose(0, 1), data, rtn_loss=False) 60 | log_lik[:, kk] = -nll.cpu().numpy() 61 | 62 | priors = modes_pred.detach().cpu().numpy() 63 | log_posterior_unnorm = log_lik + np.log(priors) 64 | log_posterior = log_posterior_unnorm - special.logsumexp(log_posterior_unnorm, axis=-1).reshape((batch_sz, -1)) 65 | post_pr = np.exp(log_posterior) 66 | post_pr = torch.tensor(post_pr).float().to(data.device) 67 | post_entropy = torch.mean(D.Categorical(post_pr).entropy()).item() 68 | 69 | # Compute loss. 70 | loss = 0.0 71 | for kk in range(modes): 72 | nll_k = nll_pytorch_dist(pred[kk].transpose(0, 1), data, rtn_loss=True) * post_pr[:, kk] 73 | loss += nll_k.mean() 74 | 75 | # Adding entropy loss term to ensure that individual predictions do not try to cover multiple modes. 76 | entropy_vals = [] 77 | for kk in range(modes): 78 | entropy_vals.append(get_BVG_distributions(pred[kk]).entropy()) 79 | entropy_vals = torch.stack(entropy_vals).permute(2, 0, 1) 80 | entropy_loss = torch.mean((entropy_vals).sum(2).max(1)[0]) 81 | loss += entropy_weight * entropy_loss 82 | 83 | # KL divergence between the prior and the posterior distributions. 84 | kl_loss_fn = torch.nn.KLDivLoss(reduction='batchmean') # type: ignore 85 | kl_loss = kl_weight*kl_loss_fn(torch.log(modes_pred), post_pr) 86 | 87 | # compute ADE/FDE loss - L2 norms with between best predictions and GT. 88 | if use_FDEADE_aux_loss: 89 | adefde_loss = l2_loss_fde(pred, data) 90 | else: 91 | adefde_loss = torch.tensor(0.0).to(data.device) 92 | 93 | return loss, kl_loss, post_entropy, adefde_loss 94 | 95 | 96 | def l2_loss_fde(pred, data): 97 | fde_loss = torch.norm((pred[:, -1, :, :2].transpose(0, 1) - data[:, -1, :2].unsqueeze(1)), 2, dim=-1) 98 | ade_loss = torch.norm((pred[:, :, :, :2].transpose(1, 2) - data[:, :, :2].unsqueeze(0)), 2, dim=-1).mean(dim=2).transpose(0, 1) 99 | loss, min_inds = (fde_loss + ade_loss).min(dim=1) 100 | return 100.0 * loss.mean() 101 | 102 | 103 | # ==================================== AUTOBOT-JOINT STUFF ==================================== 104 | 105 | 106 | def get_BVG_distributions_joint(pred): 107 | B = pred.size(0) 108 | T = pred.size(1) 109 | N = pred.size(2) 110 | mu_x = pred[:, :, :, 0].unsqueeze(3) 111 | mu_y = pred[:, :, :, 1].unsqueeze(3) 112 | sigma_x = pred[:, :, :, 2] 113 | sigma_y = pred[:, :, :, 3] 114 | rho = pred[:, :, :, 4] 115 | 116 | cov = torch.zeros((B, T, N, 2, 2)).to(pred.device) 117 | cov[:, :, :, 0, 0] = sigma_x ** 2 118 | cov[:, :, :, 1, 1] = sigma_y ** 2 119 | cov_val = rho * sigma_x * sigma_y 120 | cov[:, :, :, 0, 1] = cov_val 121 | cov[:, :, :, 1, 0] = cov_val 122 | 123 | biv_gauss_dist = MultivariateNormal(loc=torch.cat((mu_x, mu_y), dim=-1), covariance_matrix=cov) 124 | return biv_gauss_dist 125 | 126 | 127 | def get_Laplace_dist_joint(pred): 128 | return Laplace(pred[:, :, :, :2], pred[:, :, :, 2:4]) 129 | 130 | 131 | def nll_pytorch_dist_joint(pred, data, agents_masks): 132 | # biv_gauss_dist = get_BVG_distributions_joint(pred) 133 | biv_gauss_dist = get_Laplace_dist_joint(pred) 134 | num_active_agents_per_timestep = agents_masks.sum(2) 135 | loss = (((-biv_gauss_dist.log_prob(data).sum(-1) * agents_masks).sum(2)) / num_active_agents_per_timestep).sum(1) 136 | return loss 137 | 138 | 139 | def nll_loss_multimodes_joint(pred, ego_data, agents_data, mode_probs, entropy_weight=1.0, kl_weight=1.0, 140 | use_FDEADE_aux_loss=True, agent_types=None, predict_yaw=False): 141 | """ 142 | Args: 143 | pred: [c, T, B, M, 5] 144 | ego_data: [B, T, 5] 145 | agents_data: [B, T, M, 5] 146 | mode_probs: [B, c], prior prob over modes 147 | """ 148 | gt_agents = torch.cat((ego_data.unsqueeze(2), agents_data), dim=2) 149 | modes = len(pred) 150 | nSteps, batch_sz, N, dim = pred[0].shape 151 | agents_masks = torch.cat((torch.ones(batch_sz, nSteps, 1).to(ego_data.device), agents_data[:, :, :, -1]), dim=-1) 152 | 153 | # compute posterior probability based on predicted prior and likelihood of predicted scene. 154 | log_lik = np.zeros((batch_sz, modes)) 155 | with torch.no_grad(): 156 | for kk in range(modes): 157 | nll = nll_pytorch_dist_joint(pred[kk].transpose(0, 1), gt_agents[:, :, :, :2], agents_masks) 158 | log_lik[:, kk] = -nll.cpu().numpy() 159 | 160 | priors = mode_probs.detach().cpu().numpy() 161 | log_posterior_unnorm = log_lik + np.log(priors) 162 | log_posterior = log_posterior_unnorm - special.logsumexp(log_posterior_unnorm, axis=1).reshape((batch_sz, 1)) 163 | post_pr = np.exp(log_posterior) 164 | post_pr = torch.tensor(post_pr).float().to(gt_agents.device) 165 | post_entropy = torch.mean(D.Categorical(post_pr).entropy()).item() 166 | 167 | # Compute loss. 168 | loss = 0.0 169 | for kk in range(modes): 170 | nll_k = nll_pytorch_dist_joint(pred[kk].transpose(0, 1), gt_agents[:, :, :, :2], agents_masks) * post_pr[:, kk] 171 | loss += nll_k.mean() 172 | 173 | # Adding entropy loss term to ensure that individual predictions do not try to cover multiple modes. 174 | entropy_vals = [] 175 | for kk in range(modes): 176 | entropy_vals.append(get_BVG_distributions_joint(pred[kk]).entropy()) 177 | entropy_loss = torch.mean(torch.stack(entropy_vals).permute(2, 0, 3, 1).sum(3).mean(2).max(1)[0]) 178 | loss += entropy_weight * entropy_loss 179 | 180 | # KL divergence between the prior and the posterior distributions. 181 | kl_loss_fn = torch.nn.KLDivLoss(reduction='batchmean') # type: ignore 182 | kl_loss = kl_weight*kl_loss_fn(torch.log(mode_probs), post_pr) 183 | 184 | # compute ADE/FDE loss - L2 norms with between best predictions and GT. 185 | if use_FDEADE_aux_loss: 186 | adefde_loss = l2_loss_fde_joint(pred, gt_agents, agents_masks, agent_types, predict_yaw) 187 | else: 188 | adefde_loss = torch.tensor(0.0).to(gt_agents.device) 189 | 190 | return loss, kl_loss, post_entropy, adefde_loss 191 | 192 | 193 | def l2_loss_fde_joint(pred, data, agent_masks, agent_types, predict_yaw): 194 | fde_loss = (torch.norm((pred[:, -1, :, :, :2].transpose(0, 1) - data[:, -1, :, :2].unsqueeze(1)), 2, dim=-1) * 195 | agent_masks[:, -1:, :]).mean(-1) 196 | ade_loss = (torch.norm((pred[:, :, :, :, :2].transpose(1, 2) - data[:, :, :, :2].unsqueeze(0)), 2, dim=-1) * 197 | agent_masks.unsqueeze(0)).mean(-1).mean(dim=2).transpose(0, 1) 198 | 199 | yaw_loss = torch.tensor(0.0).to(pred.device) 200 | if predict_yaw: 201 | vehicles_only = (agent_types[:, :, 0] == 1.0).unsqueeze(0) 202 | yaw_loss = torch.norm(pred[:, :, :, :, 5:].transpose(1, 2) - data[:, :, :, 4:5].unsqueeze(0), dim=-1).mean(2) # across time 203 | yaw_loss = (yaw_loss * vehicles_only).mean(-1).transpose(0, 1) 204 | 205 | loss, min_inds = (fde_loss + ade_loss + yaw_loss).min(dim=1) 206 | return 100.0 * loss.mean() 207 | --------------------------------------------------------------------------------