├── .vscode ├── extensions.json └── settings.json ├── human_scene_transformer ├── images │ ├── hero.png │ └── architecture.png ├── config │ ├── pedestrians │ │ ├── dataset_params.gin │ │ ├── training_params.gin │ │ ├── model_params.gin │ │ └── metrics.gin │ ├── jrdb_challenge │ │ ├── training_params.gin │ │ ├── model_params.gin │ │ ├── metrics.gin │ │ └── dataset_params.gin │ └── jrdb │ │ ├── training_params.gin │ │ ├── model_params.gin │ │ ├── dataset_params.gin │ │ └── metrics.gin ├── __init__.py ├── model │ ├── README.md │ ├── embedding.py │ ├── head.py │ ├── output_distributions.py │ ├── multimodality_induction.py │ ├── agent_self_alignment.py │ ├── scene_encoder.py │ ├── model_params.py │ ├── preprocess.py │ ├── agent_feature_encoder.py │ ├── agent_encoder.py │ └── model.py ├── metrics │ ├── metrics.py │ ├── pos_nll.py │ ├── mean_angle_error.py │ └── ade.py ├── jrdb │ ├── torch_dataset.py │ ├── dataset_params.py │ ├── eval.py │ └── eval_keypoints.py ├── pedestrians │ ├── dataset_params.py │ └── eval.py ├── training_params.py ├── data │ ├── extract_robot_odometry_from_rosbag.py │ ├── README.md │ ├── math │ │ ├── math_types.py │ │ └── data_types.py │ ├── jrdb_train_detections_to_tracks.py │ ├── jrdb_preprocess_test.py │ ├── jrdb_preprocess_train.py │ └── utils.py ├── is_hidden_generators.py └── train.py ├── requirements.txt ├── .gitignore ├── CHANGELOG.md ├── CONTRIBUTING.md ├── .github └── workflows │ └── pytest_and_autopublish.yml ├── pyproject.toml ├── README.md └── LICENSE /.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | "recommendations": [ 3 | "ms-python.black-formatter" 4 | ] 5 | } 6 | -------------------------------------------------------------------------------- /human_scene_transformer/images/hero.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/human-scene-transformer/HEAD/human_scene_transformer/images/hero.png -------------------------------------------------------------------------------- /human_scene_transformer/images/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/human-scene-transformer/HEAD/human_scene_transformer/images/architecture.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py>=1.4 2 | gin-config>=0.5 3 | numpy>=1.24 4 | open3d>=0.17 5 | pandas>=2.0 6 | scipy>=1.10 7 | tensorflow>=2.13 8 | tensorflow-datasets>=4.9 9 | tensorflow-graphics>=2021.12 10 | tensorflow-probability>=0.21 11 | tf-models-official>=2.5 12 | tqdm>=4.66 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled python modules. 2 | *.pyc 3 | 4 | # Byte-compiled 5 | _pycache__/ 6 | .cache/ 7 | 8 | # Poetry, setuptools, PyPI distribution artifacts. 9 | /*.egg-info 10 | .eggs/ 11 | build/ 12 | dist/ 13 | poetry.lock 14 | 15 | # Tests 16 | .pytest_cache/ 17 | 18 | # Type checking 19 | .pytype/ 20 | 21 | # Other 22 | *.DS_Store 23 | 24 | # PyCharm 25 | .idea 26 | -------------------------------------------------------------------------------- /human_scene_transformer/config/pedestrians/dataset_params.gin: -------------------------------------------------------------------------------- 1 | PedestriansDatasetParams.path = '' 2 | PedestriansDatasetParams.dataset = 'eth' 3 | PedestriansDatasetParams.train_config = 'train' # train, trainval 4 | PedestriansDatasetParams.eval_config = 'val' # val, test 5 | 6 | PedestriansDatasetParams.num_history_steps = 7 7 | PedestriansDatasetParams.num_steps = 20 8 | PedestriansDatasetParams.num_agents = 14 9 | PedestriansDatasetParams.timestep = 0.4 -------------------------------------------------------------------------------- /human_scene_transformer/config/jrdb_challenge/training_params.gin: -------------------------------------------------------------------------------- 1 | TrainingParams.batch_size = 64 2 | TrainingParams.shuffle_buffer_size = 10000 3 | TrainingParams.total_train_steps = 2e6 4 | TrainingParams.warmup_steps = 5e4 5 | TrainingParams.peak_learning_rate = 1e-4 6 | #TrainingParams.global_clipnorm = 1. 7 | TrainingParams.batches_per_train_step = 25000 8 | TrainingParams.batches_per_eval_step = 2000 9 | TrainingParams.eval_every_n_step = 1e4 10 | TrainingParams.loss = @MultimodalPositionNLLLoss -------------------------------------------------------------------------------- /human_scene_transformer/config/jrdb/training_params.gin: -------------------------------------------------------------------------------- 1 | TrainingParams.batch_size = 64 2 | TrainingParams.shuffle_buffer_size = 10000 3 | TrainingParams.total_train_steps = 5e6 4 | TrainingParams.warmup_steps = 5e4 5 | TrainingParams.peak_learning_rate = 1e-4 6 | #TrainingParams.global_clipnorm = 1. 7 | TrainingParams.batches_per_train_step = 25000 8 | TrainingParams.batches_per_eval_step = 2000 9 | TrainingParams.eval_every_n_step = 1e4 10 | TrainingParams.loss = @MinNLLPositionMixtureCategoricalCrossentropyLoss -------------------------------------------------------------------------------- /human_scene_transformer/config/pedestrians/training_params.gin: -------------------------------------------------------------------------------- 1 | TrainingParams.batch_size = 16 2 | TrainingParams.shuffle_buffer_size = 10000 3 | TrainingParams.total_train_steps = 1e6 4 | TrainingParams.warmup_steps = 5e4 5 | TrainingParams.peak_learning_rate = 5e-4 6 | #TrainingParams.global_clipnorm = 1. 7 | TrainingParams.batches_per_train_step = 25000 8 | TrainingParams.batches_per_eval_step = 2000 9 | TrainingParams.eval_every_n_step = 1e4 10 | TrainingParams.loss = @MinNLLPositionMixtureCategoricalCrossentropyLoss -------------------------------------------------------------------------------- /human_scene_transformer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The human_scene_transformer Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """human_scene_transformer API.""" 16 | 17 | # A new PyPI release will be pushed everytime `__version__` is increased 18 | # When changing this, also update the CHANGELOG.md 19 | __version__ = '0.1.0' 20 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "files.insertFinalNewline": true, 3 | "files.trimFinalNewlines": true, 4 | "files.trimTrailingWhitespace": true, 5 | "files.associations": { 6 | ".pylintrc": "ini" 7 | }, 8 | "python.testing.unittestEnabled": false, 9 | "python.testing.nosetestsEnabled": false, 10 | "python.testing.pytestEnabled": true, 11 | "python.linting.pylintUseMinimalCheckers": false, 12 | "[python]": { 13 | "editor.rulers": [80], 14 | "editor.tabSize": 2, 15 | "editor.defaultFormatter": "ms-python.black-formatter", 16 | "editor.formatOnSave": true, 17 | "editor.detectIndentation": false 18 | }, 19 | "python.formatting.provider": "none", 20 | "black-formatter.path": ["pyink"], 21 | "files.watcherExclude": { 22 | "**/.git/**": true 23 | }, 24 | "files.exclude": { 25 | "**/__pycache__": true, 26 | "**/.pytest_cache": true, 27 | "**/*.egg-info": true 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /human_scene_transformer/config/pedestrians/model_params.gin: -------------------------------------------------------------------------------- 1 | ModelParams.agents_position_key = 'agents/position' 2 | ModelParams.agents_feature_config = { 3 | 'agents/position': @AgentPositionEncoder, 4 | } 5 | ModelParams.hidden_size = 64 6 | ModelParams.feature_embedding_size = 128 7 | ModelParams.transformer_ff_dim = 64 8 | 9 | ModelParams.num_heads = 4 10 | ModelParams.num_modes = 20 11 | ModelParams.scene_encoder = None 12 | ModelParams.attn_architecture = ( 13 | 'self-attention', 14 | 'self-attention', 15 | 'multimodality_induction', 16 | 'self-attention', 17 | 'self-attention-mode', 18 | 'self-attention', 19 | 'self-attention-mode', 20 | ) 21 | ModelParams.mask_style = "has_historic_data" 22 | ModelParams.drop_prob = 0.2 23 | ModelParams.prediction_head = @Prediction2DPositionHeadLayer 24 | 25 | ModelParams.num_history_steps = 7 26 | ModelParams.num_steps = 20 27 | ModelParams.timestep = 0.4 28 | # Must be one of the classes in is_hidden_generators.py. 29 | ModelParams.is_hidden_generator = @BPIsHiddenGenerator -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | 23 | 24 | ## [Unreleased] 25 | 26 | ## [0.1.0] - 2022-09-26 27 | 28 | * Initial release 29 | 30 | [Unreleased]: https://github.com/google-research/human-scene-transformer/compare/v0.1.0...HEAD 31 | [0.1.0]: https://github.com/google-research/human-scene-transformer/releases/tag/v0.1.0 32 | -------------------------------------------------------------------------------- /human_scene_transformer/model/README.md: -------------------------------------------------------------------------------- 1 | # Human Scene Transformer Architecture 2 | 3 | ![Human Scene Transformer Architecture](../images/architecture.png) 4 | 5 | Overview of the HST architecture: From the robot’s sensors we extract the scene context, the historic position tracks of each agent, and vision based skeletal keypoints/head orientation when feasible. All features are encoded individually before the agent features are combined via cross-attention (XA) using a learned query tensor. The resulting agent-timestep-tokens is passed to our Agent Self-Alignment layer which enables the use of subsequent full self-attention (FSA) layers. Embedded scene context is attended to via cross-attention (XA). After multimodality is induced and further FSA layers the model outputs the parameters of a Normal distribution for each agent at each prediction timestep. We can represent the full output structure as a Gaussian Mixture Model (formula in bottom right) over all possible futures where the mixture coefficients w come from the Multimodality Induction. Both cross-attention (XA) and full self-attention layers use the Transformer layer (top right) with different input configurations for Query (Q), Key (K), and Value (V). -------------------------------------------------------------------------------- /human_scene_transformer/config/jrdb_challenge/model_params.gin: -------------------------------------------------------------------------------- 1 | ModelParams.agents_position_key = 'agents/position' 2 | ModelParams.agents_feature_config = { 3 | 'agents/position': @AgentPositionEncoder, 4 | 'agents/keypoints': @AgentKeypointsEncoder, 5 | #'agents/gaze': @Agent2DOrientationEncoder, 6 | } 7 | ModelParams.hidden_size = 128 8 | ModelParams.feature_embedding_size = 128 9 | ModelParams.transformer_ff_dim = 128 10 | 11 | ModelParams.num_heads = 4 12 | ModelParams.num_modes = 4 13 | ModelParams.scene_encoder = @PointCloudEncoderLayer 14 | ModelParams.attn_architecture = ( 15 | 'self-attention', 16 | 'self-attention', 17 | 'cross-attention', 18 | 'multimodality_induction', 19 | 'self-attention', 20 | 'self-attention-mode', 21 | 'self-attention', 22 | 'self-attention-mode', 23 | ) 24 | ModelParams.mask_style = "has_historic_data" 25 | ModelParams.drop_prob = 0.1 26 | ModelParams.prediction_head = @Prediction2DPositionHeadLayer 27 | 28 | ModelParams.num_history_steps = 11 29 | ModelParams.num_steps = 24 30 | ModelParams.timestep = 0.4 31 | # Must be one of the classes in is_hidden_generators.py. 32 | ModelParams.is_hidden_generator = @BPIsHiddenGenerator -------------------------------------------------------------------------------- /human_scene_transformer/config/jrdb/model_params.gin: -------------------------------------------------------------------------------- 1 | ModelParams.agents_position_key = 'agents/position' 2 | ModelParams.agents_feature_config = { 3 | 'agents/position': @AgentPositionEncoder, 4 | 'agents/keypoints': @AgentKeypointsEncoder, 5 | #'agents/gaze': @Agent2DOrientationEncoder, 6 | } 7 | ModelParams.hidden_size = 128 8 | ModelParams.feature_embedding_size = 128 9 | ModelParams.transformer_ff_dim = 128 10 | 11 | ModelParams.num_heads = 4 12 | ModelParams.num_modes = 6 13 | #ModelParams.scene_encoder = @PointCloudEncoderLayer 14 | ModelParams.attn_architecture = ( 15 | 'self-attention', 16 | 'self-attention', 17 | #'cross-attention', 18 | 'multimodality_induction', 19 | 'self-attention', 20 | 'self-attention-mode', 21 | 'self-attention', 22 | 'self-attention-mode', 23 | ) 24 | ModelParams.mask_style = "has_historic_data" 25 | ModelParams.drop_prob = 0.1 26 | ModelParams.prediction_head = @Prediction2DPositionHeadLayer 27 | 28 | ModelParams.num_history_steps = 6 29 | ModelParams.num_steps = 19 30 | ModelParams.timestep = 0.33333333333334 31 | # Must be one of the classes in is_hidden_generators.py. 32 | ModelParams.is_hidden_generator = @BPIsHiddenGenerator -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | The primary purpose of this repository is to provide a record of published work 4 | and we do not expect to receive pull requests. If you'd nonetheless like to 5 | contribute, there are just a few small guidelines you need to follow. 6 | 7 | ## Contributor License Agreement 8 | 9 | Contributions to this project must be accompanied by a Contributor License 10 | Agreement (CLA). You (or your employer) retain the copyright to your 11 | contribution; this simply gives us permission to use and redistribute your 12 | contributions as part of the project. Head over to 13 | to see your current agreements on file or 14 | to sign a new one. 15 | 16 | You generally only need to submit a CLA once, so if you've already submitted one 17 | (even if it was for a different project), you probably don't need to do it 18 | again. 19 | 20 | ## Code Reviews 21 | 22 | All submissions, including submissions by project members, require review. We 23 | use GitHub pull requests for this purpose. Consult 24 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 25 | information on using pull requests. 26 | 27 | ## Community Guidelines 28 | 29 | This project follows 30 | [Google's Open Source Community Guidelines](https://opensource.google/conduct/). 31 | -------------------------------------------------------------------------------- /human_scene_transformer/metrics/metrics.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The human_scene_transformer Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Entry point file for metrics.""" 16 | 17 | import gin 18 | 19 | from human_scene_transformer.metrics import ade # pylint: disable=unused-import 20 | from human_scene_transformer.metrics import mean_angle_error # pylint: disable=unused-import 21 | from human_scene_transformer.metrics import pos_nll # pylint: disable=unused-import 22 | 23 | import tensorflow as tf 24 | 25 | 26 | # Create a "stock" subclass from Keras' Mean for gin. 27 | # Somehow, calling gin.external_configurable() did not work. 28 | @gin.configurable 29 | class Mean(tf.keras.metrics.Mean): 30 | 31 | def __init__(self, name="mean", dtype=None): 32 | super().__init__(name=name, dtype=dtype) 33 | -------------------------------------------------------------------------------- /human_scene_transformer/jrdb/torch_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The human_scene_transformer Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """PyTorch Dataset Wrapper for JRDB Dataset.""" 16 | 17 | from human_scene_transformer.jrdb import dataset_params as jrdb_dataset_params 18 | from human_scene_transformer.jrdb import input_fn 19 | 20 | import tensorflow_datasets as tfds 21 | 22 | import torch 23 | 24 | 25 | class JRDBPredictionDataset(torch.utils.data.IterableDataset): 26 | """PyTorch Dataset Wrapper for JRDB Dataset.""" 27 | 28 | def __init__(self, 29 | dataset_params: jrdb_dataset_params.JRDBDatasetParams, 30 | train: bool = True): 31 | self.dataset_params = dataset_params 32 | self.train = train 33 | 34 | def __iter__(self): 35 | if self.train: 36 | tfds_iter = iter( 37 | tfds.as_numpy(input_fn.get_train_dataset(self.dataset_params))) 38 | else: 39 | tfds_iter = iter( 40 | tfds.as_numpy(input_fn.get_eval_dataset(self.dataset_params))) 41 | 42 | return tfds_iter 43 | -------------------------------------------------------------------------------- /human_scene_transformer/pedestrians/dataset_params.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The human_scene_transformer Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """JRDB dataset parameter class. 16 | 17 | The module contains a dataclasses used to configure the dataset. 18 | """ 19 | 20 | import gin 21 | 22 | 23 | @gin.configurable 24 | class PedestriansDatasetParams(object): 25 | """This object describes parameters of the dataset. 26 | 27 | The default values represent the proxy dataset. 28 | """ 29 | 30 | def __init__( 31 | self, 32 | path=None, 33 | dataset='eth', 34 | train_config='train', 35 | eval_config='val', 36 | num_history_steps=7, 37 | num_steps=20, 38 | num_agents=16, 39 | timestep=0.4, 40 | ): 41 | 42 | self.path = path 43 | self.dataset = dataset 44 | self.train_config = train_config 45 | self.eval_config = eval_config 46 | self.num_history_steps = num_history_steps 47 | self.num_steps = num_steps 48 | self.num_agents = num_agents 49 | self.timestep = timestep 50 | -------------------------------------------------------------------------------- /.github/workflows/pytest_and_autopublish.yml: -------------------------------------------------------------------------------- 1 | name: Unittests & Auto-publish 2 | 3 | # Allow to trigger the workflow manually (e.g. when deps changes) 4 | on: [push, workflow_dispatch] 5 | 6 | jobs: 7 | pytest-job: 8 | runs-on: ubuntu-latest 9 | timeout-minutes: 30 10 | 11 | concurrency: 12 | group: ${{ github.workflow }}-${{ github.ref }} 13 | cancel-in-progress: true 14 | 15 | steps: 16 | - uses: actions/checkout@v3 17 | 18 | # Install deps 19 | - uses: actions/setup-python@v4 20 | with: 21 | python-version: '3.10' 22 | # Uncomment to cache of pip dependencies (if tests too slow) 23 | # cache: pip 24 | # cache-dependency-path: '**/pyproject.toml' 25 | 26 | - run: pip install --upgrade pip 27 | - run: pip --version 28 | - run: pip install -e .[dev] 29 | - run: pip freeze 30 | 31 | # Run tests (in parallel) 32 | # - name: Run core tests 33 | # run: pytest -vv -n auto 34 | 35 | # # Auto-publish when version is increased 36 | # publish-job: 37 | # # Only try to publish if: 38 | # # * Repo is self (prevents running from forks) 39 | # # * Branch is `main` 40 | # if: | 41 | # github.repository == 'google-research/human-scene-transformer' 42 | # && github.ref == 'refs/heads/main' 43 | # needs: pytest-job # Only publish after tests are successful 44 | # runs-on: ubuntu-latest 45 | # permissions: 46 | # contents: write 47 | # timeout-minutes: 30 48 | 49 | # steps: 50 | # # Publish the package (if local `__version__` > pip version) 51 | # - uses: etils-actions/pypi-auto-publish@v1 52 | # with: 53 | # pypi-token: ${{ secrets.PYPI_API_TOKEN }} 54 | # gh-token: ${{ secrets.GITHUB_TOKEN }} 55 | # parse-changelog: true 56 | -------------------------------------------------------------------------------- /human_scene_transformer/model/embedding.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The human_scene_transformer Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Contains embedding layers.""" 16 | 17 | import tensorflow as tf 18 | 19 | 20 | class SinusoidalEmbeddingLayer(tf.keras.layers.Layer): 21 | """Sinusoidal Postional Embedding for xyz and time.""" 22 | 23 | def __init__(self, min_freq=4, max_freq=256, hidden_size=256): 24 | super().__init__() 25 | self.min_freq = float(min_freq) 26 | self.max_freq = float(max_freq) 27 | self.hidden_size = hidden_size 28 | if hidden_size % 2 != 0: 29 | raise ValueError('hidden_size ({hidden_size}) must be divisible by 2.') 30 | self.num_freqs_int32 = hidden_size // 2 31 | self.num_freqs = tf.cast(self.num_freqs_int32, dtype=tf.float32) 32 | 33 | def build(self, input_shape): 34 | log_freq_increment = ( 35 | tf.math.log(float(self.max_freq) / float(self.min_freq)) / 36 | tf.maximum(1.0, self.num_freqs - 1)) 37 | # [num_freqs] 38 | self.inv_freqs = self.min_freq * tf.exp( 39 | tf.range(self.num_freqs, dtype=tf.float32) * -log_freq_increment) 40 | 41 | def call(self, input_tensor): 42 | # [..., num_freqs] 43 | input_tensor = tf.repeat( 44 | input_tensor[..., tf.newaxis], self.num_freqs_int32, axis=-1) 45 | # [..., h] 46 | embedded = tf.concat([ 47 | tf.sin(input_tensor * self.inv_freqs), 48 | tf.cos(input_tensor * self.inv_freqs) 49 | ], 50 | axis=-1) 51 | return embedded 52 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | # Project metadata. Available keys are documented at: 3 | # https://packaging.python.org/en/latest/specifications/declaring-project-metadata 4 | name = "human_scene_transformer" 5 | description = "Human Scene Transformer: Trajectory Prediction in Human-Centered Environments." 6 | readme = "README.md" 7 | requires-python = ">=3.10" 8 | license = {file = "LICENSE"} 9 | authors = [ 10 | {name = "Tim Salzmann", email="Tim.Salzmann@tum.de"}, 11 | {name = "Lewis Chiang"}, 12 | {name = "Markus Ryll"}, 13 | {name = "Dorsa Sadigh"}, 14 | {name = "Carolina Parada"}, 15 | {name = "Alex Bewley", email="bewley@google.com"}] 16 | classifiers = [ # List of https://pypi.org/classifiers/ 17 | "License :: OSI Approved :: Apache Software License", 18 | "Intended Audience :: Science/Research", 19 | ] 20 | keywords = [] 21 | 22 | # pip dependencies of the project 23 | # Installed locally with `pip install -e .` 24 | dependencies = [ 25 | "absl-py>=1.4", 26 | "gin-config>=0.5", 27 | "numpy>=1.24", 28 | "open3d>=0.17", 29 | "pandas>=2.0", 30 | "scipy>=1.10", 31 | "tensorflow>=2.13", 32 | "tensorflow-datasets>=4.9", 33 | "tensorflow-graphics>=2021.12", 34 | "tensorflow-probability>=0.21", 35 | "tf-models-official>=2.5", 36 | "tqdm>=4.66" 37 | ] 38 | 39 | # `version` is automatically set by flit to use `human_scene_transformer.__version__` 40 | dynamic = ["version"] 41 | 42 | [project.urls] 43 | homepage = "https://human-scene-transformer.github.io/" 44 | repository = "https://github.com/google-research/human-scene-transformer" 45 | changelog = "https://github.com/google-research/human-scene-transformer/blob/main/CHANGELOG.md" 46 | # documentation = "" 47 | 48 | [project.optional-dependencies] 49 | # Development deps (unittest, linting, formating,...) 50 | # Installed through `pip install -e .[dev]` 51 | dev = [ 52 | "pytest", 53 | "pytest-xdist", 54 | "pylint>=2.6.0", 55 | "pyink", 56 | ] 57 | 58 | [tool.pyink] 59 | # Formatting configuration to follow Google style-guide 60 | line-length = 80 61 | preview = true 62 | pyink-indentation = 2 63 | pyink-use-majority-quotes = true 64 | 65 | [build-system] 66 | # Build system specify which backend is used to build/install the project (flit, 67 | # poetry, setuptools,...). All backends are supported by `pip install` 68 | requires = ["flit_core >=3.8,<4"] 69 | build-backend = "flit_core.buildapi" 70 | 71 | [tool.flit.sdist] 72 | # Flit specific options (files to exclude from the PyPI package). 73 | # If using another build backend (setuptools, poetry), you can remove this 74 | # section. 75 | exclude = [ 76 | # Do not release tests files on PyPI 77 | "**/*_test.py", 78 | ] 79 | -------------------------------------------------------------------------------- /human_scene_transformer/jrdb/dataset_params.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The human_scene_transformer Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """JRDB dataset parameter class. 16 | 17 | The module contains a dataclasses used to configure the dataset. 18 | """ 19 | 20 | import gin 21 | 22 | 23 | @gin.configurable 24 | class JRDBDatasetParams(object): 25 | """This object describes parameters of the dataset. 26 | 27 | The default values represent the proxy dataset. 28 | """ 29 | 30 | def __init__( 31 | self, 32 | path=None, 33 | train_scenes=None, 34 | eval_scenes=None, 35 | features=None, 36 | train_split=(0., 1.), 37 | eval_split=(0., 1.), 38 | num_history_steps=6, 39 | num_steps=19, 40 | num_agents=8, 41 | timestep=0.33333333333334, 42 | subsample=5, 43 | min_distance_to_robot=7.0, 44 | num_pointcloud_points=512, 45 | ): 46 | """Initialize the DatasetParam object. 47 | 48 | Args: 49 | path: Location of dataset(s). 50 | train_scenes: Scenes to use in the train dataset. 51 | eval_scenes: Scenes to use in the eval dataset. 52 | features: The features to load from dataset file. 53 | train_split: Split to use from the train scenes in the train dataset. 54 | eval_split: Split to use from the eval scenes in the eval dataset. 55 | num_history_steps: The number of history timesteps. 56 | num_steps: The total number of timesteps. 57 | num_agents: The max number of agents in the dataset. 58 | timestep: The interval between timesteps. 59 | subsample: Subsample JRDB dataset by this frequency. 60 | min_distance_to_robot: Filter out agents with larger 61 | minimum distance to robot in selected window. 62 | num_pointcloud_points: Number of sampled pointcloud points. 63 | """ 64 | 65 | self.path = path 66 | self.train_scenes = train_scenes 67 | self.eval_scenes = eval_scenes 68 | self.features = features 69 | self.train_split = train_split 70 | self.eval_split = eval_split 71 | self.num_history_steps = num_history_steps 72 | self.num_steps = num_steps 73 | self.num_agents = num_agents 74 | self.timestep = timestep 75 | self.subsample = subsample 76 | self.min_distance_to_robot = min_distance_to_robot 77 | self.num_pointcloud_points = num_pointcloud_points 78 | -------------------------------------------------------------------------------- /human_scene_transformer/config/jrdb/dataset_params.gin: -------------------------------------------------------------------------------- 1 | TRAIN_SCENES = ['bytes-cafe-2019-02-07_0', 2 | 'clark-center-2019-02-28_0', 3 | 'clark-center-intersection-2019-02-28_0', 4 | 'cubberly-auditorium-2019-04-22_0', 5 | 'gates-159-group-meeting-2019-04-03_0', 6 | 'gates-ai-lab-2019-02-08_0', 7 | 'gates-to-clark-2019-02-28_1', 8 | 'hewlett-packard-intersection-2019-01-24_0', 9 | 'huang-basement-2019-01-25_0', 10 | 'huang-lane-2019-02-12_0', 11 | 'memorial-court-2019-03-16_0', 12 | 'meyer-green-2019-03-16_0', 13 | 'packard-poster-session-2019-03-20_0', 14 | 'packard-poster-session-2019-03-20_1', 15 | 'stlc-111-2019-04-19_0', 16 | 'svl-meeting-gates-2-2019-04-08_0', 17 | 'tressider-2019-03-16_0', 18 | 'tressider-2019-03-16_1', 19 | 'cubberly-auditorium-2019-04-22_1_test', 20 | 'discovery-walk-2019-02-28_0_test', 21 | 'food-trucks-2019-02-12_0_test', 22 | 'gates-ai-lab-2019-04-17_0_test', 23 | 'gates-foyer-2019-01-17_0_test', 24 | 'gates-to-clark-2019-02-28_0_test', 25 | 'hewlett-class-2019-01-23_1_test', 26 | 'huang-2-2019-01-25_1_test', 27 | 'indoor-coupa-cafe-2019-02-06_0_test', 28 | 'lomita-serra-intersection-2019-01-30_0_test', 29 | 'nvidia-aud-2019-01-25_0_test', 30 | 'nvidia-aud-2019-04-18_1_test', 31 | 'outdoor-coupa-cafe-2019-02-06_0_test', 32 | 'quarry-road-2019-02-28_0_test', 33 | 'stlc-111-2019-04-19_1_test', 34 | 'stlc-111-2019-04-19_2_test', 35 | 'tressider-2019-04-26_0_test', 36 | 'tressider-2019-04-26_1_test'] 37 | 38 | TEST_SCENES = ['clark-center-2019-02-28_1', 39 | 'forbes-cafe-2019-01-22_0', 40 | 'gates-basement-elevators-2019-01-17_1', 41 | 'huang-2-2019-01-25_0', 42 | 'jordan-hall-2019-04-22_0', 43 | 'nvidia-aud-2019-04-18_0', 44 | 'packard-poster-session-2019-03-20_2', 45 | 'svl-meeting-gates-2-2019-04-08_1', 46 | 'tressider-2019-04-26_2', 47 | 'discovery-walk-2019-02-28_1_test', 48 | 'gates-basement-elevators-2019-01-17_0_test', 49 | 'hewlett-class-2019-01-23_0_test', 50 | 'huang-intersection-2019-01-22_0_test', 51 | 'meyer-green-2019-03-16_1_test', 52 | 'nvidia-aud-2019-04-18_2_test', 53 | 'serra-street-2019-01-30_0_test', 54 | 'tressider-2019-03-16_2_test', 55 | 'tressider-2019-04-26_3_test'] 56 | 57 | 58 | JRDBDatasetParams.path = '' 59 | 60 | JRDBDatasetParams.train_scenes = %TRAIN_SCENES 61 | JRDBDatasetParams.eval_scenes = %TEST_SCENES 62 | JRDBDatasetParams.features = [ 63 | 'agents/position', 64 | 'agents/keypoints', 65 | 'robot/position', 66 | #'robot/orientation', 67 | #'scene/pc' 68 | ] 69 | 70 | JRDBDatasetParams.train_split = (0., 1.0) 71 | JRDBDatasetParams.eval_split = (0.0, 1.0) 72 | 73 | 74 | JRDBDatasetParams.num_history_steps = 6 75 | JRDBDatasetParams.num_steps = 19 76 | JRDBDatasetParams.num_agents = 8 77 | JRDBDatasetParams.timestep = 0.33333333333334 78 | 79 | JRDBDatasetParams.subsample = 5 80 | JRDBDatasetParams.num_pointcloud_points = 512 81 | 82 | JRDBDatasetParams.min_distance_to_robot = 7.0 -------------------------------------------------------------------------------- /human_scene_transformer/config/jrdb/metrics.gin: -------------------------------------------------------------------------------- 1 | # All available metrics. 2 | min_ade/metrics.ade.MinADE.cutoff_seconds = None 3 | min_ade1s/metrics.ade.MinADE.cutoff_seconds = 1.0 4 | min_ade2s/metrics.ade.MinADE.cutoff_seconds = 2.0 5 | min_ade3s/metrics.ade.MinADE.cutoff_seconds = 3.0 6 | min_ade4s/metrics.ade.MinADE.cutoff_seconds = 4.0 7 | 8 | ml_ade/metrics.ade.MLADE.cutoff_seconds = None 9 | ml_ade1s/metrics.ade.MLADE.cutoff_seconds = 1.0 10 | ml_ade2s/metrics.ade.MLADE.cutoff_seconds = 2.0 11 | ml_ade3s/metrics.ade.MLADE.cutoff_seconds = 3.0 12 | ml_ade4s/metrics.ade.MLADE.cutoff_seconds = 4.0 13 | 14 | pos_nll/metrics.pos_nll.PositionNegativeLogLikelihood.cutoff_seconds = None 15 | pos_nll1s/metrics.pos_nll.PositionNegativeLogLikelihood.cutoff_seconds = 1.0 16 | pos_nll2s/metrics.pos_nll.PositionNegativeLogLikelihood.cutoff_seconds = 2.0 17 | pos_nll3s/metrics.pos_nll.PositionNegativeLogLikelihood.cutoff_seconds = 3.0 18 | pos_nll4s/metrics.pos_nll.PositionNegativeLogLikelihood.cutoff_seconds = 4.0 19 | 20 | # Training metrics. 21 | get_metrics.train_metrics = { 22 | 'loss': @metrics.Mean, 23 | 'loss_position': @metrics.Mean, 24 | 'loss_orientation': @metrics.Mean, 25 | 26 | 'min_ade': @min_ade/metrics.ade.MinADE, 27 | 'min_ade1s': @min_ade1s/metrics.ade.MinADE, 28 | 'min_ade2s': @min_ade2s/metrics.ade.MinADE, 29 | 'min_ade3s': @min_ade3s/metrics.ade.MinADE, 30 | 'min_ade4s': @min_ade4s/metrics.ade.MinADE, 31 | 32 | 'ml_ade': @ml_ade/metrics.ade.MLADE, 33 | 'ml_ade1s': @ml_ade1s/metrics.ade.MLADE, 34 | 'ml_ade2s': @ml_ade2s/metrics.ade.MLADE, 35 | 'ml_ade3s': @ml_ade3s/metrics.ade.MLADE, 36 | 'ml_ade4s': @ml_ade4s/metrics.ade.MLADE, 37 | 38 | 'pos_nll': @pos_nll/metrics.pos_nll.PositionNegativeLogLikelihood, 39 | 'pos_nll1s': @pos_nll1s/metrics.pos_nll.PositionNegativeLogLikelihood, 40 | 'pos_nll2s': @pos_nll2s/metrics.pos_nll.PositionNegativeLogLikelihood, 41 | 'pos_nll3s': @pos_nll3s/metrics.pos_nll.PositionNegativeLogLikelihood, 42 | 'pos_nll4s': @pos_nll4s/metrics.pos_nll.PositionNegativeLogLikelihood, 43 | } 44 | 45 | # Eval metrics. 46 | get_metrics.eval_metrics = { 47 | 'loss': @metrics.Mean, 48 | 'loss_position': @metrics.Mean, 49 | 'loss_orientation': @metrics.Mean, 50 | 51 | 'min_ade': @min_ade/metrics.ade.MinADE, 52 | 'min_ade1s': @min_ade1s/metrics.ade.MinADE, 53 | 'min_ade2s': @min_ade2s/metrics.ade.MinADE, 54 | 'min_ade3s': @min_ade3s/metrics.ade.MinADE, 55 | 'min_ade4s': @min_ade4s/metrics.ade.MinADE, 56 | 57 | 'ml_ade': @ml_ade/metrics.ade.MLADE, 58 | 'ml_ade1s': @ml_ade1s/metrics.ade.MLADE, 59 | 'ml_ade2s': @ml_ade2s/metrics.ade.MLADE, 60 | 'ml_ade3s': @ml_ade3s/metrics.ade.MLADE, 61 | 'ml_ade4s': @ml_ade4s/metrics.ade.MLADE, 62 | 63 | 'pos_nll': @pos_nll/metrics.pos_nll.PositionNegativeLogLikelihood, 64 | 'pos_nll1s': @pos_nll1s/metrics.pos_nll.PositionNegativeLogLikelihood, 65 | 'pos_nll2s': @pos_nll2s/metrics.pos_nll.PositionNegativeLogLikelihood, 66 | 'pos_nll3s': @pos_nll3s/metrics.pos_nll.PositionNegativeLogLikelihood, 67 | 'pos_nll4s': @pos_nll4s/metrics.pos_nll.PositionNegativeLogLikelihood, 68 | } -------------------------------------------------------------------------------- /human_scene_transformer/config/jrdb_challenge/metrics.gin: -------------------------------------------------------------------------------- 1 | # All available metrics. 2 | min_ade/metrics.ade.MinADE.cutoff_seconds = None 3 | min_ade1s/metrics.ade.MinADE.cutoff_seconds = 1.0 4 | min_ade2s/metrics.ade.MinADE.cutoff_seconds = 2.0 5 | min_ade3s/metrics.ade.MinADE.cutoff_seconds = 3.0 6 | min_ade4s/metrics.ade.MinADE.cutoff_seconds = 4.0 7 | 8 | ml_ade/metrics.ade.MLADE.cutoff_seconds = None 9 | ml_ade1s/metrics.ade.MLADE.cutoff_seconds = 1.0 10 | ml_ade2s/metrics.ade.MLADE.cutoff_seconds = 2.0 11 | ml_ade3s/metrics.ade.MLADE.cutoff_seconds = 3.0 12 | ml_ade4s/metrics.ade.MLADE.cutoff_seconds = 4.8 13 | 14 | pos_nll/metrics.pos_nll.PositionNegativeLogLikelihood.cutoff_seconds = None 15 | pos_nll1s/metrics.pos_nll.PositionNegativeLogLikelihood.cutoff_seconds = 1.0 16 | pos_nll2s/metrics.pos_nll.PositionNegativeLogLikelihood.cutoff_seconds = 2.0 17 | pos_nll3s/metrics.pos_nll.PositionNegativeLogLikelihood.cutoff_seconds = 3.0 18 | pos_nll4s/metrics.pos_nll.PositionNegativeLogLikelihood.cutoff_seconds = 4.8 19 | 20 | # Training metrics. 21 | get_metrics.train_metrics = { 22 | 'loss': @metrics.Mean, 23 | 'loss_position': @metrics.Mean, 24 | 'loss_orientation': @metrics.Mean, 25 | 26 | 'min_ade': @min_ade/metrics.ade.MinADE, 27 | 'min_ade1s': @min_ade1s/metrics.ade.MinADE, 28 | 'min_ade2s': @min_ade2s/metrics.ade.MinADE, 29 | 'min_ade3s': @min_ade3s/metrics.ade.MinADE, 30 | 'min_ade4s': @min_ade4s/metrics.ade.MinADE, 31 | 32 | 'ml_ade': @ml_ade/metrics.ade.MLADE, 33 | 'ml_ade1s': @ml_ade1s/metrics.ade.MLADE, 34 | 'ml_ade2s': @ml_ade2s/metrics.ade.MLADE, 35 | 'ml_ade3s': @ml_ade3s/metrics.ade.MLADE, 36 | 'ml_ade4s': @ml_ade4s/metrics.ade.MLADE, 37 | 38 | 'pos_nll': @pos_nll/metrics.pos_nll.PositionNegativeLogLikelihood, 39 | 'pos_nll1s': @pos_nll1s/metrics.pos_nll.PositionNegativeLogLikelihood, 40 | 'pos_nll2s': @pos_nll2s/metrics.pos_nll.PositionNegativeLogLikelihood, 41 | 'pos_nll3s': @pos_nll3s/metrics.pos_nll.PositionNegativeLogLikelihood, 42 | 'pos_nll4s': @pos_nll4s/metrics.pos_nll.PositionNegativeLogLikelihood, 43 | } 44 | 45 | # Eval metrics. 46 | get_metrics.eval_metrics = { 47 | 'loss': @metrics.Mean, 48 | 'loss_position': @metrics.Mean, 49 | 'loss_orientation': @metrics.Mean, 50 | 51 | 'min_ade': @min_ade/metrics.ade.MinADE, 52 | 'min_ade1s': @min_ade1s/metrics.ade.MinADE, 53 | 'min_ade2s': @min_ade2s/metrics.ade.MinADE, 54 | 'min_ade3s': @min_ade3s/metrics.ade.MinADE, 55 | 'min_ade4s': @min_ade4s/metrics.ade.MinADE, 56 | 57 | 'ml_ade': @ml_ade/metrics.ade.MLADE, 58 | 'ml_ade1s': @ml_ade1s/metrics.ade.MLADE, 59 | 'ml_ade2s': @ml_ade2s/metrics.ade.MLADE, 60 | 'ml_ade3s': @ml_ade3s/metrics.ade.MLADE, 61 | 'ml_ade4s': @ml_ade4s/metrics.ade.MLADE, 62 | 63 | 'pos_nll': @pos_nll/metrics.pos_nll.PositionNegativeLogLikelihood, 64 | 'pos_nll1s': @pos_nll1s/metrics.pos_nll.PositionNegativeLogLikelihood, 65 | 'pos_nll2s': @pos_nll2s/metrics.pos_nll.PositionNegativeLogLikelihood, 66 | 'pos_nll3s': @pos_nll3s/metrics.pos_nll.PositionNegativeLogLikelihood, 67 | 'pos_nll4s': @pos_nll4s/metrics.pos_nll.PositionNegativeLogLikelihood, 68 | } -------------------------------------------------------------------------------- /human_scene_transformer/config/pedestrians/metrics.gin: -------------------------------------------------------------------------------- 1 | # All available metrics. 2 | min_ade/metrics.ade.MinADE.cutoff_seconds = None 3 | min_ade1s/metrics.ade.MinADE.cutoff_seconds = 1.0 4 | min_ade2s/metrics.ade.MinADE.cutoff_seconds = 2.0 5 | min_ade3s/metrics.ade.MinADE.cutoff_seconds = 3.0 6 | min_ade5s/metrics.ade.MinADE.cutoff_seconds = 5.0 7 | 8 | ml_ade/metrics.ade.MLADE.cutoff_seconds = None 9 | ml_ade1s/metrics.ade.MLADE.cutoff_seconds = 1.0 10 | ml_ade2s/metrics.ade.MLADE.cutoff_seconds = 2.0 11 | ml_ade3s/metrics.ade.MLADE.cutoff_seconds = 3.0 12 | ml_ade5s/metrics.ade.MLADE.cutoff_seconds = 5.0 13 | 14 | pos_nll/metrics.pos_nll.PositionNegativeLogLikelihood.cutoff_seconds = None 15 | pos_nll1s/metrics.pos_nll.PositionNegativeLogLikelihood.cutoff_seconds = 1.0 16 | pos_nll2s/metrics.pos_nll.PositionNegativeLogLikelihood.cutoff_seconds = 2.0 17 | pos_nll3s/metrics.pos_nll.PositionNegativeLogLikelihood.cutoff_seconds = 3.0 18 | pos_nll5s/metrics.pos_nll.PositionNegativeLogLikelihood.cutoff_seconds = 5.0 19 | 20 | # Training metrics. 21 | get_metrics.train_metrics = { 22 | 'loss': @metrics.Mean, 23 | 'loss_position': @metrics.Mean, 24 | 'loss_orientation': @metrics.Mean, 25 | 26 | 'min_ade': @min_ade/metrics.ade.MinADE, 27 | 'min_ade1s': @min_ade1s/metrics.ade.MinADE, 28 | 'min_ade2s': @min_ade2s/metrics.ade.MinADE, 29 | 'min_ade3s': @min_ade3s/metrics.ade.MinADE, 30 | #'min_ade5s': @min_ade5s/metrics.ade.MinADE, 31 | 32 | 'ml_ade': @ml_ade/metrics.ade.MLADE, 33 | 'ml_ade1s': @ml_ade1s/metrics.ade.MLADE, 34 | 'ml_ade2s': @ml_ade2s/metrics.ade.MLADE, 35 | 'ml_ade3s': @ml_ade3s/metrics.ade.MLADE, 36 | #'ml_ade5s': @ml_ade5s/metrics.ade.MLADE, 37 | 38 | 'pos_nll': @pos_nll/metrics.pos_nll.PositionNegativeLogLikelihood, 39 | 'pos_nll1s': @pos_nll1s/metrics.pos_nll.PositionNegativeLogLikelihood, 40 | 'pos_nll2s': @pos_nll2s/metrics.pos_nll.PositionNegativeLogLikelihood, 41 | 'pos_nll3s': @pos_nll3s/metrics.pos_nll.PositionNegativeLogLikelihood, 42 | #'pos_nll5s': @pos_nll5s/metrics.pos_nll.PositionNegativeLogLikelihood, 43 | } 44 | 45 | # Eval metrics. 46 | get_metrics.eval_metrics = { 47 | 'loss': @metrics.Mean, 48 | 'loss_position': @metrics.Mean, 49 | 'loss_orientation': @metrics.Mean, 50 | 51 | 'min_ade': @min_ade/metrics.ade.MinADE, 52 | 'min_ade1s': @min_ade1s/metrics.ade.MinADE, 53 | 'min_ade2s': @min_ade2s/metrics.ade.MinADE, 54 | 'min_ade3s': @min_ade3s/metrics.ade.MinADE, 55 | #'min_ade5s': @min_ade5s/metrics.ade.MinADE, 56 | 57 | 'ml_ade': @ml_ade/metrics.ade.MLADE, 58 | 'ml_ade1s': @ml_ade1s/metrics.ade.MLADE, 59 | 'ml_ade2s': @ml_ade2s/metrics.ade.MLADE, 60 | 'ml_ade3s': @ml_ade3s/metrics.ade.MLADE, 61 | #'ml_ade5s': @ml_ade5s/metrics.ade.MLADE, 62 | 63 | 'pos_nll': @pos_nll/metrics.pos_nll.PositionNegativeLogLikelihood, 64 | 'pos_nll1s': @pos_nll1s/metrics.pos_nll.PositionNegativeLogLikelihood, 65 | 'pos_nll2s': @pos_nll2s/metrics.pos_nll.PositionNegativeLogLikelihood, 66 | 'pos_nll3s': @pos_nll3s/metrics.pos_nll.PositionNegativeLogLikelihood, 67 | #'pos_nll5s': @pos_nll5s/metrics.pos_nll.PositionNegativeLogLikelihood, 68 | } -------------------------------------------------------------------------------- /human_scene_transformer/training_params.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The human_scene_transformer Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Training parameter class. 16 | 17 | The module contains a dataclasses used to configure the training procedure. 18 | """ 19 | 20 | import gin 21 | 22 | from human_scene_transformer import losses 23 | 24 | 25 | @gin.configurable 26 | class TrainingParams(object): 27 | """This object configures the model and training/eval.""" 28 | 29 | def __init__( 30 | self, 31 | batch_size=32, 32 | shuffle_buffer_size=10000, 33 | total_train_steps=5e6, 34 | warmup_steps=5e4, 35 | peak_learning_rate=1e-4, 36 | global_clipnorm=None, 37 | batches_per_train_step=2000, 38 | batches_per_eval_step=2000, 39 | eval_every_n_step=1e4, 40 | loss=losses.MultimodalPositionOrientationNLLLoss, 41 | ): 42 | """Initialize the TrainParam object. 43 | 44 | This is where gin injection occurs. 45 | 46 | Args: 47 | batch_size: The batch size of training and eval. On TPUs, each core will 48 | receive a batch size that equals batch_size/num_cores. 49 | shuffle_buffer_size: The size of the tf.dataset.shuffle() operation. 50 | total_train_steps: The total number of training steps (will automatically 51 | be converted to int). 52 | warmup_steps: The number of warmup steps during training. The learning 53 | rate will increase linearly to the peak at this step (from 1/10 of 54 | peak). Will automatically be converted to int. 55 | peak_learning_rate: The maximum learning rate during training. 56 | global_clipnorm: Gradient norm clipping value. 57 | batches_per_train_step: Number of batches per training step. This is 58 | required to improve the efficiency for Keras custom training loops. Will 59 | automatically be converted to int. 60 | batches_per_eval_step: Number of batches per eval step. The total number 61 | of eval examples will be batches_per_eval_step * batch_size. Will 62 | automatically be converted to int. 63 | eval_every_n_step: Number of training steps before evaluating using the 64 | eval set. Will automatically be converted to int. 65 | loss: A class used as loss. 66 | """ 67 | self.batch_size = batch_size 68 | self.shuffle_buffer_size = shuffle_buffer_size 69 | self.total_train_steps = int(total_train_steps) 70 | self.warmup_steps = int(warmup_steps) 71 | self.peak_learning_rate = peak_learning_rate 72 | self.global_clipnorm = global_clipnorm 73 | self.batches_per_train_step = int(batches_per_train_step) 74 | self.batches_per_eval_step = int(batches_per_eval_step) 75 | self.eval_every_n_step = int(eval_every_n_step) 76 | self.loss = loss 77 | -------------------------------------------------------------------------------- /human_scene_transformer/model/head.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The human_scene_transformer Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Contains prediction head layers.""" 16 | 17 | import gin 18 | 19 | import tensorflow as tf 20 | 21 | 22 | @gin.register 23 | class PredictionHeadLayer(tf.keras.layers.Layer): 24 | """Converts transformer hidden vectors to model predictions.""" 25 | 26 | def __init__(self, hidden_units=None): 27 | super().__init__() 28 | 29 | self.dense_layers = [] 30 | # Add hidden layers. 31 | if hidden_units is not None: 32 | for units in hidden_units: 33 | self.dense_layers.append( 34 | tf.keras.layers.Dense(units, activation='relu')) 35 | self.dense_layers.append( 36 | tf.keras.layers.EinsumDense( 37 | '...h,hf->...f', 38 | output_shape=11, 39 | bias_axes='f', 40 | activation=None)) 41 | 42 | def call(self, input_batch): 43 | # [b, a, t, h] 44 | hidden_vecs = input_batch['hidden_vecs'] 45 | x = hidden_vecs 46 | # [b, a, t, 7] 47 | for layer in self.dense_layers: 48 | x = layer(x) 49 | pred = x 50 | predictions = { 51 | 'agents/position': pred[..., 0:3], 52 | 'agents/orientation': pred[..., 3:4], 53 | 'agents/position/raw_scale_tril': pred[..., 4:10], 54 | 'agents/orientation/raw_concentration': pred[..., 10:11], 55 | } 56 | if 'mixture_logits' in input_batch: 57 | predictions['mixture_logits'] = input_batch['mixture_logits'] 58 | return predictions 59 | 60 | 61 | @gin.register 62 | class Prediction2DPositionHeadLayer(tf.keras.layers.Layer): 63 | """Converts transformer hidden vectors to model predictions.""" 64 | 65 | def __init__(self, hidden_units=None, num_stages=5): 66 | super().__init__() 67 | 68 | self.dense_layers = [] 69 | # Add hidden layers. 70 | if hidden_units is not None: 71 | for units in hidden_units: 72 | self.dense_layers.append( 73 | tf.keras.layers.Dense(units, activation='relu')) 74 | # Add the final prediction head. 75 | self.dense_layers.append( 76 | tf.keras.layers.EinsumDense( 77 | '...h,hf->...f', 78 | output_shape=5, 79 | bias_axes='f', 80 | activation=None)) 81 | 82 | def call(self, input_batch): 83 | # [b, a, t, h] 84 | hidden_vecs = input_batch['hidden_vecs'] 85 | x = hidden_vecs 86 | # [b, a, t, 5] 87 | for layer in self.dense_layers: 88 | x = layer(x) 89 | pred = x 90 | predictions = { 91 | 'agents/position': pred[..., 0:2], 92 | 'agents/position/raw_scale_tril': pred[..., 2:5], 93 | } 94 | if 'mixture_logits' in input_batch: 95 | predictions['mixture_logits'] = input_batch['mixture_logits'] 96 | return predictions 97 | -------------------------------------------------------------------------------- /human_scene_transformer/model/output_distributions.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The human_scene_transformer Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Output distribution functions to further process the model's raw output.""" 16 | 17 | import tensorflow as tf 18 | import tensorflow_probability as tfp 19 | 20 | 21 | def force_positive(x, eps=1e-6): 22 | return tf.keras.activations.elu(x) + 1. + eps 23 | 24 | 25 | @tf.function 26 | def to_positive_definite_scale_tril(logit_sigma): 27 | tril = tfp.math.fill_triangular(logit_sigma) 28 | scale_tril = tf.linalg.set_diag( 29 | tril, 30 | force_positive(tf.linalg.diag_part(tril))) 31 | return scale_tril 32 | 33 | 34 | @tf.function 35 | def to_concentration(raw_concentration): 36 | return tf.math.reciprocal(force_positive(raw_concentration)) 37 | 38 | 39 | def get_position_distribution(model_output): 40 | """Multivariate Normal distribution over position.""" 41 | p_pos = tfp.distributions.MultivariateNormalTriL( 42 | loc=model_output['agents/position'], 43 | scale_tril=to_positive_definite_scale_tril( 44 | model_output['agents/position/raw_scale_tril'])) 45 | 46 | return p_pos 47 | 48 | 49 | def get_multimodal_position_distribution(model_output): 50 | """Multivariate Normal Mixture distribution over position.""" 51 | p_pos = get_position_distribution(model_output) 52 | 53 | p_pos_mm = tfp.distributions.MixtureSameFamily( 54 | mixture_distribution=tfp.distributions.Categorical( 55 | logits=model_output['mixture_logits']), 56 | components_distribution=p_pos) 57 | 58 | return p_pos_mm 59 | 60 | 61 | def get_orientation_distribution(model_output): 62 | """VonMises distribution over the yaw orientation. 63 | 64 | VonMises has a concentration input -> high values lead to unbounded log 65 | likelihoods. Thus we use the reciprocal of the elu to force vanishing 66 | gradients for high concentrations. 67 | Args: 68 | model_output: Raw model output dictionary. 69 | Required keys: agents/orientation, agents/orientation/raw_concentration 70 | Returns: 71 | VanMises distribution. 72 | """ 73 | # [b, a, t, n, 1] 74 | p_orientation = tfp.distributions.VonMises( 75 | loc=model_output['agents/orientation'][..., 0], 76 | concentration=to_concentration( 77 | model_output['agents/orientation/raw_concentration'][..., 0])) 78 | 79 | return p_orientation 80 | 81 | 82 | def get_multimodal_orientation_distribution(model_output): 83 | """VonMises distribution over position.""" 84 | p_orientation = get_orientation_distribution(model_output) 85 | 86 | p_orientation_mm = tfp.distributions.MixtureSameFamily( 87 | mixture_distribution=tfp.distributions.Categorical( 88 | logits=model_output['mixture_logits']), 89 | components_distribution=p_orientation) 90 | 91 | return p_orientation_mm 92 | -------------------------------------------------------------------------------- /human_scene_transformer/metrics/pos_nll.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The human_scene_transformer Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Position Negative Log Likelihood Keras metric.""" 16 | 17 | import gin 18 | 19 | from human_scene_transformer.model import output_distributions 20 | 21 | import tensorflow as tf 22 | 23 | 24 | @gin.configurable 25 | class PositionNegativeLogLikelihood(tf.keras.metrics.Metric): 26 | """Position Negative Log Likelihood.""" 27 | 28 | def __init__(self, params, cutoff_seconds=None, at_cutoff=False, 29 | name='PosNLL'): 30 | """Initializes the PositionNegativeLogLikelihood metric. 31 | 32 | Args: 33 | params: ModelParams 34 | cutoff_seconds: Cutoff up to which time the metric should be calculated 35 | in seconds. 36 | at_cutoff: If True metric will be calculated at cutoff timestep. 37 | Otherwise metric is calculated as average up to cutoff_seconds. 38 | name: Metric name. 39 | """ 40 | super().__init__(name=name) 41 | self.agents_position_key = params.agents_position_key 42 | self.cutoff_seconds = cutoff_seconds 43 | self.at_cutoff = at_cutoff 44 | if cutoff_seconds is None: 45 | self.cutoff_idx = None 46 | else: 47 | # +1 due to current time step. 48 | self.cutoff_idx = int( 49 | params.num_history_steps + 50 | cutoff_seconds / params.timestep) + 1 51 | 52 | self.num_predictions = self.add_weight( 53 | name='num_predictions', initializer='zeros') 54 | self.total_deviation = self.add_weight( 55 | name='total_deviation', initializer='zeros') 56 | 57 | def update_state(self, input_batch, predictions): 58 | should_predict = tf.cast(input_batch['should_predict'], tf.float32) 59 | 60 | p_pos = output_distributions.get_multimodal_position_distribution( 61 | predictions) 62 | 63 | target = input_batch[f'{self.agents_position_key}/target'] 64 | target = target[..., :p_pos.event_shape_tensor()[0]] 65 | 66 | # [b, a, t, n, 1] 67 | per_position_nll = -p_pos.log_prob(target)[..., tf.newaxis] 68 | 69 | # Non-observed or past should not contribute to metric. 70 | nll = tf.math.multiply_no_nan(per_position_nll, should_predict) 71 | # Chop off the un-wanted time part. 72 | # [b, a, cutoff_idx, 1] 73 | if self.at_cutoff and self.cutoff_seconds is not None: 74 | nll = nll[:, :, self.cutoff_idx-1:self.cutoff_idx, :] 75 | num_predictions = tf.reduce_sum( 76 | should_predict[:, :, self.cutoff_idx-1::self.cutoff_idx, :]) 77 | else: 78 | nll = nll[:, :, :self.cutoff_idx, :] 79 | num_predictions = tf.reduce_sum(should_predict[:, :, :self.cutoff_idx, :]) 80 | 81 | # [1] 82 | nll = tf.reduce_sum(nll) 83 | 84 | self.num_predictions.assign_add(num_predictions) 85 | self.total_deviation.assign_add(nll) 86 | 87 | def result(self): 88 | return self.total_deviation / self.num_predictions 89 | 90 | def reset_states(self): 91 | self.num_predictions.assign(0) 92 | self.total_deviation.assign(0.0) 93 | -------------------------------------------------------------------------------- /human_scene_transformer/data/extract_robot_odometry_from_rosbag.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The human_scene_transformer Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Extracts robot odometry from JRDB raw rosbags.""" 16 | 17 | 18 | import collections 19 | import glob 20 | import json 21 | import os 22 | 23 | from typing import Sequence 24 | from absl import app 25 | from absl import flags 26 | from absl import logging 27 | 28 | import pandas as pd 29 | import rosbag 30 | 31 | 32 | _INPUT_PATH = flags.DEFINE_string( 33 | 'input_path', 34 | None, 35 | 'Input path to folder with rosbags.', 36 | ) 37 | 38 | _TIMESTAMPS_PATH = flags.DEFINE_string( 39 | 'timestamps_path', 40 | None, 41 | 'Input path to folder with timestamps.', 42 | ) 43 | 44 | _OUTPUT_PATH = flags.DEFINE_string( 45 | 'output_path', 46 | None, 47 | 'Output path', 48 | ) 49 | 50 | 51 | def extract_odometry(bag_file): 52 | """Extracts robot odometry from JRDB raw rosbags.""" 53 | bag = rosbag.Bag(bag_file) 54 | 55 | data_list = list() 56 | for _, msg, t in bag.read_messages(topics=['segway/feedback/wheel_odometry']): 57 | data_point = dict() 58 | data_point['timestamp'] = t.to_sec() 59 | data_point['pose'] = dict() 60 | data_point['pose']['position'] = collections.OrderedDict() 61 | data_point['pose']['orientation'] = collections.OrderedDict() 62 | 63 | data_point['pose']['position']['x'] = msg.pose.pose.position.x 64 | data_point['pose']['position']['y'] = msg.pose.pose.position.y 65 | data_point['pose']['position']['z'] = msg.pose.pose.position.z 66 | 67 | data_point['pose']['orientation']['x'] = msg.pose.pose.orientation.x 68 | data_point['pose']['orientation']['y'] = msg.pose.pose.orientation.y 69 | data_point['pose']['orientation']['z'] = msg.pose.pose.orientation.z 70 | data_point['pose']['orientation']['w'] = msg.pose.pose.orientation.w 71 | data_list.append(data_point) 72 | 73 | return data_list 74 | 75 | 76 | def fix_timestamps(data_list, timestamp_file): 77 | """Fix timestamps with expected format by JRDB dataset.""" 78 | with open(timestamp_file) as f: 79 | timestamps = json.load(f)['data'] 80 | 81 | df = pd.DataFrame(data_list).set_index('timestamp') 82 | 83 | data_dict = collections.OrderedDict() 84 | for timestamp in timestamps: 85 | ts_id = timestamp['pointclouds'][0]['url'].split('/')[-1] 86 | ts = timestamp['pointclouds'][0]['timestamp'] 87 | data_dict[ts_id] = df.iloc[df.index.get_indexer([ts], method='nearest')[0]][ 88 | 'pose' 89 | ] 90 | 91 | return data_dict 92 | 93 | 94 | def main(argv: Sequence[str]) -> None: 95 | if len(argv) > 1: 96 | raise app.UsageError('Too many command-line arguments.') 97 | 98 | bag_files = glob.glob('*.bag', root_dir=_INPUT_PATH.value) 99 | for bag_file in bag_files[1:2]: 100 | data_list = extract_odometry(os.path.join(_INPUT_PATH.value, bag_file)) 101 | fixed_data_list = fix_timestamps( 102 | data_list, 103 | os.path.join(_TIMESTAMPS_PATH.value, bag_file[:-4], 'frames_pc.json'), 104 | ) 105 | data_dict = {'odometry': fixed_data_list} 106 | with open( 107 | os.path.join(_OUTPUT_PATH.value, bag_file.replace('bag', 'json')), 'w' 108 | ) as f: 109 | json.dump(data_dict, f, indent=2) 110 | 111 | if __name__ == '__main__': 112 | logging.set_verbosity(logging.ERROR) 113 | app.run(main) 114 | -------------------------------------------------------------------------------- /human_scene_transformer/is_hidden_generators.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The human_scene_transformer Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """is_hidden generator classes for deciding HST operation mode. 16 | 17 | Currently includes BP (predicting all agents), Conditional-BP (predicting 18 | agents conditioned on the 0th agent, usually the ego-robot). 19 | """ 20 | 21 | import gin 22 | import numpy as np 23 | 24 | 25 | @gin.configurable 26 | class IsHiddenGenerator(object): 27 | """Base class generator for the is_hidden tensor.""" 28 | 29 | def __init__(self, num_steps, num_history_steps): 30 | self._num_steps = num_steps 31 | self._num_history_steps = num_history_steps 32 | 33 | def __call__(self, num_agents: int, train_progress: float = 0.0): 34 | """Returns the is_hidden tensor. 35 | 36 | Args: 37 | num_agents: Number of agents in the scene. 38 | train_progress: A float between 0 to 1 representing the overall progress 39 | of training. This float can be current_step / total_training_steps. This 40 | float can be used for training w/ an annealing schedule. 41 | """ 42 | raise NotImplementedError('Calling the base class is prohibited.') 43 | 44 | 45 | @gin.configurable 46 | class BPIsHiddenGenerator(IsHiddenGenerator): 47 | """is_hidden generator for BP. All agents futures are hidden.""" 48 | 49 | def __call__(self, num_agents: int, train_progress: float = 0.0): 50 | """Returns the is_hidden tensor for behavior prediction. 51 | 52 | Always returns 0 (not hidden) for history/current steps and 1 (hidden) 53 | for future steps. 54 | 55 | Args: 56 | num_agents: Number of agents in the scene. 57 | train_progress: A float between 0 to 1 representing the overall progress 58 | of training. This float can be current_step / total_training_steps. This 59 | float can be used for training w/ an annealing schedule. 60 | 61 | Returns: 62 | is_hidden: The is_hidden tensor for behavior prediction. 63 | """ 64 | 65 | # [1, a, t, 1]. 66 | is_hidden = np.ones([1, num_agents, self._num_steps, 1], 67 | dtype=bool) 68 | is_hidden[:, :, :self._num_history_steps + 1, :] = False 69 | return is_hidden 70 | 71 | 72 | @gin.configurable 73 | class CBPIsHiddenGenerator(IsHiddenGenerator): 74 | """is_hidden generator for Conditional-BP.""" 75 | 76 | def __call__(self, num_agents: int, train_progress: float = 0.0): 77 | """Returns the is_hidden tensor for conditional behavior prediction. 78 | 79 | Always returns 0 (not hidden) for history/current steps and 1 (hidden) for 80 | future steps except for the 0th agent, which is all 0. This is often used 81 | along with ModelParams.robot_as_0th_agent=True so that the model can predict 82 | how other agents move conditioned on a given robot trajectory. 83 | 84 | Args: 85 | num_agents: Number of agents in the scene. 86 | train_progress: A float between 0 to 1 representing the overall progress 87 | of training. This float can be current_step / total_training_steps. This 88 | float can be used for training w/ an annealing schedule. 89 | 90 | Returns: 91 | is_hidden: The is_hidden tensor for conditional behavior prediction. 92 | """ 93 | 94 | # [1, a, t, 1]. 95 | is_hidden = np.ones([1, num_agents, self._num_steps, 1], 96 | dtype=bool) 97 | is_hidden[:, :, :self._num_history_steps + 1, :] = False 98 | is_hidden[:, 0, :, :] = False 99 | return is_hidden 100 | -------------------------------------------------------------------------------- /human_scene_transformer/pedestrians/eval.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The human_scene_transformer Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Evaluates Model on Pedestrian dataset.""" 16 | 17 | import os 18 | from typing import Sequence 19 | from absl import app 20 | from absl import flags 21 | from absl import logging 22 | import gin 23 | from human_scene_transformer.model import model as hst_model 24 | from human_scene_transformer.model import model_params 25 | from human_scene_transformer.pedestrians import dataset_params 26 | from human_scene_transformer.pedestrians import input_fn 27 | import numpy as np 28 | import tensorflow as tf 29 | import tqdm 30 | 31 | 32 | _MODEL_PATH = flags.DEFINE_string( 33 | 'model_path', 34 | None, 35 | 'Path to model directory.', 36 | ) 37 | 38 | _CKPT_PATH = flags.DEFINE_string( 39 | 'checkpoint_path', 40 | None, 41 | 'Path to model checkpoint.', 42 | ) 43 | 44 | 45 | def min_ade(pred, gt): 46 | dist = np.linalg.norm(pred - gt[..., tf.newaxis, :], axis=-1) 47 | ade = np.mean(dist, axis=0) 48 | minade = np.min(ade, axis=-1) 49 | return minade 50 | 51 | 52 | def min_fde(pred, gt): 53 | fde = np.linalg.norm(pred[-1] - gt[-1, ..., tf.newaxis, :], axis=-1) 54 | minfde = np.min(fde, axis=-1) 55 | return minfde 56 | 57 | 58 | def evaluation(checkpoint_path): 59 | """Evaluates Model on Pedestrian dataset.""" 60 | d_params = dataset_params.PedestriansDatasetParams( 61 | num_agents=None, 62 | eval_config='test' 63 | ) 64 | 65 | dataset = input_fn.load_dataset( 66 | d_params, 67 | split='test', 68 | augment=False, 69 | shuffle=False, 70 | repeat=False, 71 | deterministic=True, 72 | ) 73 | 74 | model_p = model_params.ModelParams() 75 | 76 | model = hst_model.HumanTrajectorySceneTransformer(model_p) 77 | 78 | _, _ = model(next(iter(dataset.batch(1))), training=False) 79 | 80 | checkpoint_mngr = tf.train.Checkpoint(model=model) 81 | checkpoint_mngr.restore(checkpoint_path).assert_existing_objects_matched() 82 | logging.info('Restored checkpoint: %s', checkpoint_path) 83 | 84 | min_ades = [] 85 | min_fdes = [] 86 | for input_batch in tqdm.tqdm(dataset.batch(1)): 87 | if input_batch['agents/position'].shape[1] == 0: 88 | continue 89 | target = tf.identity(input_batch['agents/position']) 90 | target_np = input_batch['agents/position'].numpy() 91 | target_np[:, :, 8:] = np.nan 92 | input_batch['agents/position'] = tf.convert_to_tensor(target_np) 93 | full_pred, _ = model(input_batch, training=False) 94 | samples = full_pred['agents/position'] 95 | for a in range(input_batch['agents/position'].shape[1]): 96 | if not tf.reduce_any(tf.math.is_nan(target[0, a])): 97 | min_ades.append(min_ade(samples[0, a, 8:], target[0, a, 8:])) 98 | min_fdes.append(min_fde(samples[0, a, 8:], target[0, a, 8:])) 99 | 100 | print(f'MinADE: {np.nanmean(min_ades):.2f}') 101 | print(f'MinFDE: {np.nanmean(min_fdes):.2f}') 102 | 103 | 104 | def main(argv: Sequence[str]) -> None: 105 | if len(argv) > 1: 106 | raise app.UsageError('Too many command-line arguments.') 107 | 108 | gin.parse_config_files_and_bindings( 109 | [os.path.join(_MODEL_PATH.value, 'params', 'operative_config.gin')], 110 | None, 111 | skip_unknown=True) 112 | print('Actual gin config used:') 113 | print(gin.config_str()) 114 | 115 | evaluation(_CKPT_PATH.value) 116 | 117 | 118 | if __name__ == '__main__': 119 | logging.set_verbosity(logging.ERROR) 120 | app.run(main) 121 | -------------------------------------------------------------------------------- /human_scene_transformer/config/jrdb_challenge/dataset_params.gin: -------------------------------------------------------------------------------- 1 | TRAIN_SCENES = ['bytes-cafe-2019-02-07_0', 2 | 'clark-center-2019-02-28_0', 3 | 'clark-center-intersection-2019-02-28_0', 4 | 'cubberly-auditorium-2019-04-22_0', 5 | 'gates-159-group-meeting-2019-04-03_0', 6 | 'gates-ai-lab-2019-02-08_0', 7 | 'gates-to-clark-2019-02-28_1', 8 | 'hewlett-packard-intersection-2019-01-24_0', 9 | 'huang-basement-2019-01-25_0', 10 | 'huang-lane-2019-02-12_0', 11 | 'memorial-court-2019-03-16_0', 12 | 'meyer-green-2019-03-16_0', 13 | 'packard-poster-session-2019-03-20_0', 14 | 'packard-poster-session-2019-03-20_1', 15 | 'stlc-111-2019-04-19_0', 16 | 'svl-meeting-gates-2-2019-04-08_0', 17 | 'tressider-2019-03-16_0', 18 | 'tressider-2019-03-16_1', 19 | 'cubberly-auditorium-2019-04-22_1_test', 20 | 'discovery-walk-2019-02-28_0_test', 21 | 'food-trucks-2019-02-12_0_test', 22 | 'gates-ai-lab-2019-04-17_0_test', 23 | 'gates-foyer-2019-01-17_0_test', 24 | 'gates-to-clark-2019-02-28_0_test', 25 | 'hewlett-class-2019-01-23_1_test', 26 | 'huang-2-2019-01-25_1_test', 27 | 'indoor-coupa-cafe-2019-02-06_0_test', 28 | 'lomita-serra-intersection-2019-01-30_0_test', 29 | 'nvidia-aud-2019-01-25_0_test', 30 | 'nvidia-aud-2019-04-18_1_test', 31 | 'outdoor-coupa-cafe-2019-02-06_0_test', 32 | 'quarry-road-2019-02-28_0_test', 33 | 'stlc-111-2019-04-19_1_test', 34 | 'stlc-111-2019-04-19_2_test', 35 | 'tressider-2019-04-26_0_test', 36 | 'tressider-2019-04-26_1_test', 37 | 'clark-center-2019-02-28_1', 38 | 'forbes-cafe-2019-01-22_0', 39 | 'gates-basement-elevators-2019-01-17_1', 40 | 'huang-2-2019-01-25_0', 41 | 'jordan-hall-2019-04-22_0', 42 | 'nvidia-aud-2019-04-18_0', 43 | 'packard-poster-session-2019-03-20_2', 44 | 'svl-meeting-gates-2-2019-04-08_1', 45 | 'tressider-2019-04-26_2', 46 | 'discovery-walk-2019-02-28_1_test', 47 | 'gates-basement-elevators-2019-01-17_0_test', 48 | 'hewlett-class-2019-01-23_0_test', 49 | 'huang-intersection-2019-01-22_0_test', 50 | 'meyer-green-2019-03-16_1_test', 51 | 'nvidia-aud-2019-04-18_2_test', 52 | 'serra-street-2019-01-30_0_test', 53 | 'tressider-2019-03-16_2_test', 54 | 'tressider-2019-04-26_3_test'] 55 | 56 | TEST_SCENES = [ 57 | 'cubberly-auditorium-2019-04-22_1_test', 58 | 'discovery-walk-2019-02-28_0_test', 59 | 'discovery-walk-2019-02-28_1_test', 60 | 'food-trucks-2019-02-12_0_test', 61 | 'gates-ai-lab-2019-04-17_0_test', 62 | 'gates-basement-elevators-2019-01-17_0_test', 63 | 'gates-foyer-2019-01-17_0_test', 64 | 'gates-to-clark-2019-02-28_0_test', 65 | 'hewlett-class-2019-01-23_0_test', 66 | 'hewlett-class-2019-01-23_1_test', 67 | 'huang-2-2019-01-25_1_test', 68 | 'huang-intersection-2019-01-22_0_test', 69 | 'indoor-coupa-cafe-2019-02-06_0_test', 70 | 'lomita-serra-intersection-2019-01-30_0_test', 71 | 'meyer-green-2019-03-16_1_test', 72 | 'nvidia-aud-2019-01-25_0_test', 73 | 'nvidia-aud-2019-04-18_1_test', 74 | 'nvidia-aud-2019-04-18_2_test', 75 | 'outdoor-coupa-cafe-2019-02-06_0_test', 76 | 'quarry-road-2019-02-28_0_test', 77 | 'serra-street-2019-01-30_0_test', 78 | 'stlc-111-2019-04-19_1_test', 79 | 'stlc-111-2019-04-19_2_test', 80 | 'tressider-2019-03-16_2_test', 81 | 'tressider-2019-04-26_0_test', 82 | 'tressider-2019-04-26_1_test', 83 | 'tressider-2019-04-26_3_test', 84 | ] 85 | 86 | 87 | JRDBDatasetParams.path = 88 | 89 | JRDBDatasetParams.train_scenes = %TRAIN_SCENES 90 | JRDBDatasetParams.eval_scenes = %TEST_SCENES 91 | JRDBDatasetParams.features = [ 92 | 'agents/position', 93 | 'agents/keypoints', 94 | 'robot/position', 95 | 'robot/orientation', 96 | 'scene/pc' 97 | ] 98 | 99 | JRDBDatasetParams.train_split = (0., 1.0) 100 | JRDBDatasetParams.eval_split = (0., 1.0) 101 | 102 | 103 | JRDBDatasetParams.num_history_steps = 11 104 | JRDBDatasetParams.num_steps = 24 105 | JRDBDatasetParams.num_agents = 16 106 | JRDBDatasetParams.timestep = 0.4 107 | 108 | JRDBDatasetParams.subsample = 6 109 | JRDBDatasetParams.num_pointcloud_points = 512 110 | 111 | JRDBDatasetParams.min_distance_to_robot = 50.0 -------------------------------------------------------------------------------- /human_scene_transformer/data/README.md: -------------------------------------------------------------------------------- 1 | *This is not an officially supported Google product.* 2 | 3 | # JRDB for Prediction Dataset Setup 4 | 5 | ## Get the JRDB Dataset 6 | 7 | 1. Go to https://jrdb.erc.monash.edu/#downloads 8 | 2. Create a User or login. 9 | 3. Download and extract [JRDB 2022 Full Train Dataset](https://jrdb.erc.monash.edu/static/downloads/JRDB2022/train_dataset_with_activity/train_dataset_with_activity.zip) to `/train_dataset`. 10 | 4. Download and extract [JRDB 2022 Full Test Dataset](https://jrdb.erc.monash.edu/static/downloads/JRDB2022/test_dataset_without_labels/jrdb22_test.zip) to `/test_dataset`. 11 | 5. Download and extract [Train Detections](https://jrdb.erc.monash.edu/static/downloads/train_detections.zip) from the JRDB 2019 section to `/detections`. 12 | 13 | ## Get the Leaderboard Test Set Tracks 14 | 15 | ### For the JRDB Challenge Dataset 16 | Download and extract this leaderboard [3D tracking result](https://jrdb.erc.monash.edu/leaderboards/download/1762) to `/test_dataset/labels/PiFeNet/`. Such that you have `/test_dataset/labels/PiFeNet/00XX.txt`. 17 | 18 | ### For the Orginal Dataset used in the Paper 19 | Download and extract this leaderboard [3D tracking result](https://jrdb.erc.monash.edu/leaderboards/download/1605) to `/test_dataset/labels/ss3d_mot/`. Such that you have `/test_dataset/labels/ss3d_mot/00XX.txt`. This was the best available leaderboard tracker at the time the method was developed. 20 | 21 | ## Get the Robot Odometry 22 | 23 | Download the compressed Odometry data file [here](https://storage.googleapis.com/gresearch/human_scene_transformer/odometry.zip). 24 | 25 | Extract the files and move them to `/processed/` such that you have `/processed/odoemtry/train`, `/processed/odoemtry/test`. 26 | 27 | Alternatively you can extract the robot odometry from the raw rosbags yourself via `extract_robot_odometry_from_rosbag.py`. 28 | 29 | ## Get the Preprocessed Keypoints 30 | 31 | Download the compressed Keypoints data file [here](https://storage.googleapis.com/gresearch/human_scene_transformer/keypoints.zip). 32 | 33 | Extract the files and move them to `/processed/` such that you have `/processed/labels/labels_3d_keypoints/train/`, `/processed/labels/labels_3d_keypoints/test/`. 34 | 35 | ## Create Real-World Tracks for Train Data 36 | 37 | Run 38 | 39 | ```python jrdb_train_detections_to_tracks.py --input_path=``` 40 | 41 | ## Dataset Folder 42 | 43 | You should end up with a dataset folder of the following structure 44 | 45 | ``` 46 | - 47 | - train_dataset 48 | - calibration 49 | - detections 50 | - images 51 | - labels 52 | - pointclouds 53 | - test_dataset 54 | - calibration 55 | - images 56 | - labels 57 | - pointclouds 58 | - processed 59 | - labels 60 | - labels_3d_keypoints 61 | - train 62 | - test 63 | - labels_detections_3d 64 | - odoemtry 65 | - train 66 | - test 67 | ``` 68 | 69 | ## Generate the Tensorflow Dataset 70 | ### For the JRDB Challenge Dataset 71 | ```python jrdb_preprocess_train.py --input_path= --output_path= --max_distance_to_robot=50.0``` 72 | 73 | ```python jrdb_preprocess_test.py --input_path= --output_path= --max_distance_to_robot=50.0 --tracking_method=PiFeNet --tracking_confidence_threshold=0.01``` 74 | 75 | Please note that this can take multiple hours due to the processing of the scene's 76 | pointclouds. If you do not need the pointclouds you can speed up the processing 77 | by passing `--process_pointclouds=False` for both. 78 | 79 | ### For the Orginal Dataset used in the Paper 80 | ```python jrdb_preprocess_train.py --input_path= --output_path= --max_distance_to_robot=15.0``` 81 | 82 | ```python jrdb_preprocess_test.py --input_path= --output_path= --max_distance_to_robot=15.0 --tracking_method=ss3d_mot``` 83 | 84 | Please note that this can take multiple hours due to the processing of the scene's 85 | pointclouds. If you do not need the pointclouds you can speed up the processing 86 | by passing `--process_pointclouds=False` for both. -------------------------------------------------------------------------------- /human_scene_transformer/data/math/math_types.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The human_scene_transformer Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Pytype definitions and conversion utilities. 16 | 17 | (python3). 18 | 19 | Utility functions: 20 | is_scalar - Distinguishes between scalar and vector values. 21 | get_matching_arrays - Gets two arrays with the same shape from scalar or 22 | matrix arguments. 23 | """ 24 | 25 | from collections import abc 26 | from typing import Iterable, Text, Tuple, Union 27 | 28 | import numpy as np 29 | 30 | # --------------------------------------------------------------------------- 31 | # PyType Definitions 32 | # 33 | # A list or vector of floating point values. 34 | ScalarType = Union[float, int] 35 | Float2Type = Tuple[float, float] 36 | Float3Type = Tuple[float, float, float] 37 | Float4Type = Tuple[float, float, float, float] 38 | Vector2Type = Union[np.ndarray, Iterable[float], Float2Type] 39 | Vector3Type = Union[np.ndarray, Iterable[float], Float3Type] 40 | Vector4Type = Union[np.ndarray, Iterable[float], Float4Type] 41 | VectorType = Union[np.ndarray, Iterable[float], Float2Type, Float3Type, 42 | Float4Type] 43 | # A scalar or a vector of floating point values. 44 | VectorOrValueType = Union[VectorType, float] 45 | Vector3OrValueType = Union[Vector3Type, float] 46 | Vector4OrValueType = Union[Vector4Type, float] 47 | 48 | # Error messages for exceptions. 49 | SHAPE_MISMATCH_MESSAGE = 'lhs and rhs should have the same dimension' 50 | DIMENSION_INVALID_MESSAGE = 'Dimension is undefined or invalid' 51 | DIMENSION_MISMATCH_MESSAGE = 'Dimension is defined, but does not match inputs' 52 | 53 | # --------------------------------------------------------------------------- 54 | # Global Constants 55 | # 56 | # Default values for rtol and atol arguments to np.isclose are taken from 57 | # third_party/py/numpy/core/numeric.py 58 | DEFAULT_RTOL_VALUE_FOR_NP_IS_CLOSE = 1e-5 # Relative tolerance 59 | DEFAULT_ATOL_VALUE_FOR_NP_IS_CLOSE = 1e-8 # Absolute tolerance 60 | 61 | 62 | def is_scalar(scalar_or_array: VectorOrValueType) -> bool: 63 | """Returns true if the argument has a scalar type. 64 | 65 | Args: 66 | scalar_or_array: A scalar floating point value or an array of values. 67 | 68 | Returns: 69 | True if the value is a scalar (single value). 70 | """ 71 | return not isinstance(scalar_or_array, abc.Sized) 72 | 73 | 74 | def get_matching_arrays(lhs: VectorOrValueType, 75 | rhs: VectorOrValueType, 76 | err_msg: Text = '') -> Tuple[np.ndarray, np.ndarray]: 77 | """Converts both inputs to numpy arrays with the same shape. 78 | 79 | If either input is a scalar, it will be converted to a constant array with 80 | the same shape as the other input. If both inputs are scalars, they will 81 | both be converted to arrays containing a single value. 82 | 83 | If they are not scalars, the inputs must have the same shape. 84 | 85 | If they are both scalars, the inputs will be converted to arrays with a 86 | single element. 87 | 88 | Args: 89 | lhs: Left hand side scalar or array argument. 90 | rhs: Right hand side scalar or array argument. 91 | err_msg: String error message added to exception in case of failure. 92 | 93 | Returns: 94 | lhs, rhs: The arguments converted to numpy arrays. 95 | 96 | Raises: 97 | ValueError: If the lhs and rhs arrays not have the same shape. 98 | """ 99 | if is_scalar(rhs): 100 | lhs = np.asarray(lhs) 101 | rhs = np.full_like(lhs, rhs) 102 | else: 103 | rhs = np.asarray(rhs) 104 | if is_scalar(lhs): 105 | lhs = np.full_like(rhs, lhs) 106 | else: 107 | lhs = np.asarray(lhs) 108 | if lhs.shape != rhs.shape: 109 | raise ValueError( 110 | 'lhs and rhs should have the same dimension: %s != %s; %s' % 111 | (lhs.shape, rhs.shape, err_msg)) 112 | return lhs, rhs 113 | -------------------------------------------------------------------------------- /human_scene_transformer/metrics/mean_angle_error.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The human_scene_transformer Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """The Mean Angle Error Keras Metric class.""" 16 | 17 | import gin 18 | import numpy as np 19 | import tensorflow as tf 20 | 21 | 22 | @gin.configurable 23 | class MeanAngleError(tf.keras.metrics.Metric): 24 | """Mean Angle Error over yaw angle. 25 | 26 | Calculates the mean angular distance over all predicted timesteps. 27 | """ 28 | 29 | def __init__(self, params, cutoff_seconds=None, at_cutoff=False, 30 | name='AngleError'): 31 | """Initializes the MeanAngleError metric. 32 | 33 | Args: 34 | params: ModelParams 35 | cutoff_seconds: Cutoff up to which time the metric should be calculated 36 | in seconds. 37 | at_cutoff: If True metric will be calculated at cutoff timestep. 38 | Otherwise metric is calculated as average up to cutoff_seconds. 39 | name: Metric name. 40 | """ 41 | super().__init__(name=name) 42 | self.agents_orientation_key = params.agents_orientation_key 43 | self.cutoff_seconds = cutoff_seconds 44 | self.at_cutoff = at_cutoff 45 | if cutoff_seconds is None: 46 | self.cutoff_idx = None 47 | else: 48 | # +1 due to current time step. 49 | self.cutoff_idx = int( 50 | params.num_history_steps + 51 | cutoff_seconds / params.timestep) + 1 52 | 53 | self.num_predictions = self.add_weight( 54 | name='num_predictions', initializer='zeros') 55 | self.total_deviation = self.add_weight( 56 | name='total_deviation', initializer='zeros') 57 | 58 | def _reduce(self, mae, input_batch, predictions): 59 | return mae[..., 0, :] 60 | 61 | def update_state(self, input_batch, predictions): 62 | should_predict = tf.cast(input_batch['should_predict'], tf.float32) 63 | # [b, a, t, n, 1] 64 | diff = ( 65 | input_batch[f'{self.agents_orientation_key}/target'][..., tf.newaxis, :] 66 | - predictions['agents/orientation']) 67 | deviation = tf.abs(tf.math.mod((diff + np.pi), (2 * np.pi)) - np.pi) 68 | 69 | deviation = self._reduce(deviation, input_batch, predictions) 70 | 71 | # Non-observed or past should not contribute to ade. 72 | deviation = tf.math.multiply_no_nan(deviation, should_predict) 73 | # Chop off the un-wanted time part. 74 | # [b, a, cutoff_idx, 1] 75 | if self.at_cutoff and self.cutoff_seconds is not None: 76 | deviation = deviation[:, :, self.cutoff_idx-1:self.cutoff_idx, :] 77 | num_predictions = tf.reduce_sum( 78 | should_predict[:, :, self.cutoff_idx-1::self.cutoff_idx, :]) 79 | else: 80 | deviation = deviation[:, :, :self.cutoff_idx, :] 81 | num_predictions = tf.reduce_sum(should_predict[:, :, :self.cutoff_idx, :]) 82 | # [1] 83 | deviation = tf.reduce_sum(deviation) 84 | 85 | self.num_predictions.assign_add(num_predictions) 86 | self.total_deviation.assign_add(deviation) 87 | 88 | def result(self): 89 | return self.total_deviation / self.num_predictions 90 | 91 | def reset_states(self): 92 | self.num_predictions.assign(0) 93 | self.total_deviation.assign(0.0) 94 | 95 | 96 | @gin.configurable 97 | class MinMeanAngleError(MeanAngleError): 98 | """Takes the minimum over all modes.""" 99 | 100 | def _reduce(self, mae, input_batch, predictions): 101 | return tf.reduce_min(mae, axis=-2) 102 | 103 | 104 | @gin.configurable 105 | class MLMeanAngleError(MeanAngleError): 106 | """Takes the maximum likelihood mode.""" 107 | 108 | def _reduce(self, mae, input_batch, predictions): 109 | # Get index of mixture component with highest probability 110 | ml_indices = tf.math.argmax(predictions['mixture_logits'], axis=-1) 111 | a = mae.shape[1] 112 | t = mae.shape[2] 113 | ml_indices = tf.tile(ml_indices, [1, a, t])[..., tf.newaxis] 114 | 115 | return tf.gather(mae, indices=ml_indices, batch_dims=3, axis=-2)[..., 0, :] 116 | 117 | -------------------------------------------------------------------------------- /human_scene_transformer/model/multimodality_induction.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The human_scene_transformer Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Contains muldimodality induction layers.""" 16 | 17 | import tensorflow as tf 18 | 19 | 20 | class MultimodalityInduction(tf.keras.layers.Layer): 21 | """Enables the model to forward and predict multi-mode predictions. 22 | 23 | 1) Features are broadcasted to number of modes and summed with learned mode 24 | tensors. 25 | 2) Mixture Weights are generated by cross-attention over all dimensions 26 | between learned mode tensors and hidden tensors. 27 | """ 28 | 29 | def __init__(self, 30 | num_modes=5, 31 | num_heads=8, 32 | hidden_size=256, 33 | drop_prob=0.1, 34 | ln_eps=1e-6, 35 | ff_dim=128): 36 | super().__init__() 37 | self.num_modes = num_modes 38 | self.hidden_size = hidden_size 39 | if hidden_size % num_heads != 0: 40 | raise ValueError(f'hidden_size ({hidden_size}) must be an integer ' 41 | f'times bigger than num_heads ({num_heads}).') 42 | self.mm_attn_layer = tf.keras.layers.MultiHeadAttention( 43 | num_heads=num_heads, 44 | key_dim=hidden_size // num_heads, 45 | attention_axes=3) 46 | self.mm_attn_ln = tf.keras.layers.LayerNormalization(epsilon=ln_eps) 47 | self.mm_ff_layer1 = tf.keras.layers.EinsumDense( 48 | '...h,hf->...f', output_shape=ff_dim, bias_axes='f', activation='relu') 49 | self.mm_ff_layer2 = tf.keras.layers.EinsumDense( 50 | '...f,fh->...h', 51 | output_shape=hidden_size, 52 | bias_axes='h', 53 | activation=None) 54 | self.mm_ff_ln = tf.keras.layers.LayerNormalization(epsilon=ln_eps) 55 | 56 | self.mw_attn_layer = tf.keras.layers.MultiHeadAttention( 57 | num_heads=num_heads, 58 | key_dim=hidden_size // num_heads, 59 | attention_axes=None) 60 | self.mw_attn_ln = tf.keras.layers.LayerNormalization(epsilon=ln_eps) 61 | self.mw_ff_layer1 = tf.keras.layers.EinsumDense( 62 | '...h,hf->...f', output_shape=ff_dim, bias_axes='f', activation='relu') 63 | self.mw_ff_layer2 = tf.keras.layers.EinsumDense( 64 | '...f,fh->...h', 65 | output_shape=1, # Single logit per mode 66 | bias_axes='h', 67 | activation=None) 68 | self.mw_ff_ln = tf.keras.layers.LayerNormalization(epsilon=ln_eps) 69 | 70 | self.attn_dropout = tf.keras.layers.Dropout(drop_prob) 71 | self.ff_dropout = tf.keras.layers.Dropout(drop_prob) 72 | 73 | # [1, 1, 1, h] 74 | self.learned_add_mm = tf.Variable( 75 | tf.random_uniform_initializer( 76 | minval=-1., 77 | maxval=1.)(shape=[1, 1, 1, self.num_modes, hidden_size]), 78 | trainable=True, 79 | dtype=tf.float32) 80 | 81 | def call(self, input_batch, training=None): 82 | input_batch = input_batch.copy() 83 | 84 | # [b, a, t, 1, h] 85 | hidden_vecs = input_batch['hidden_vecs'][..., tf.newaxis, :] 86 | 87 | # Multi Modes 88 | mm_add = self.mm_attn_ln(self.learned_add_mm + hidden_vecs) 89 | 90 | # Feed-forward layers. 91 | out = self.mm_ff_layer1(mm_add) 92 | out = self.mm_ff_layer2(out) 93 | out = self.ff_dropout(out) 94 | out = self.mm_ff_ln(out + hidden_vecs) 95 | 96 | input_batch['hidden_vecs'] = out 97 | 98 | # Mixture Weights 99 | # [b, 1, 1, n, h] 100 | b = tf.shape(input_batch['hidden_vecs'])[0] 101 | attn_out_mw = self.mw_attn_layer( 102 | query=tf.tile(self.learned_add_mm, [b, 1, 1, 1, 1]), 103 | key=mm_add, 104 | value=mm_add, 105 | return_attention_scores=False) 106 | 107 | attn_out_mw = self.attn_dropout(attn_out_mw, training=training) 108 | 109 | # [b, 1, 1, n, h] 110 | attn_out_mw = self.mw_attn_ln(attn_out_mw) 111 | 112 | # Feed-forward layers. 113 | out_mw = self.mw_ff_layer1(attn_out_mw) 114 | out_mw = self.mw_ff_layer2(out_mw) 115 | out_mw = self.ff_dropout(out_mw, training=training) 116 | 117 | # [b, 1, 1, n] 118 | input_batch['mixture_logits'] = out_mw[..., 0] 119 | return input_batch 120 | -------------------------------------------------------------------------------- /human_scene_transformer/model/agent_self_alignment.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The human_scene_transformer Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Contains agent self alignment layers.""" 16 | 17 | import tensorflow as tf 18 | 19 | 20 | class AgentSelfAlignmentLayer(tf.keras.layers.Layer): 21 | """Enables agent to become aware of its temporal identity. 22 | 23 | Agent features are cross-attended with a learned query in temporal dimension. 24 | """ 25 | 26 | def __init__(self, 27 | num_heads=8, 28 | hidden_size=256, 29 | ln_eps=1e-6, 30 | ff_dim=128, 31 | mask_style=None): 32 | super().__init__() 33 | self.hidden_size = hidden_size 34 | self.mask_style = mask_style 35 | if hidden_size % num_heads != 0: 36 | raise ValueError(f'hidden_size ({hidden_size}) must be an integer ' 37 | f'times bigger than num_heads ({num_heads}).') 38 | self.attn_layer = tf.keras.layers.MultiHeadAttention( 39 | num_heads=num_heads, 40 | key_dim=hidden_size // num_heads, 41 | attention_axes=2) 42 | self.attn_ln = tf.keras.layers.LayerNormalization(epsilon=ln_eps) 43 | self.ff_layer1 = tf.keras.layers.EinsumDense( 44 | '...h,hf->...f', output_shape=ff_dim, bias_axes='f', activation='relu') 45 | self.ff_layer2 = tf.keras.layers.EinsumDense( 46 | '...f,fh->...h', 47 | output_shape=hidden_size, 48 | bias_axes='h', 49 | activation=None) 50 | self.ff_ln = tf.keras.layers.LayerNormalization(epsilon=ln_eps) 51 | 52 | # [1, 1, 1, h] 53 | self.learned_query_vec = tf.Variable( 54 | tf.random_uniform_initializer( 55 | minval=-1., maxval=1.)(shape=[1, 1, 1, hidden_size]), 56 | trainable=True, 57 | dtype=tf.float32) 58 | 59 | def build_learned_query(self, input_batch): 60 | """Converts self.learned_query_vec into a learned query vector.""" 61 | # This weird thing is for exporting and loading keras model... 62 | b = tf.shape(input_batch['hidden_vecs'])[0] 63 | a = tf.shape(input_batch['hidden_vecs'])[1] 64 | t = tf.shape(input_batch['hidden_vecs'])[2] 65 | 66 | # [b, a, t, 1, h] 67 | return tf.tile(self.learned_query_vec, [b, a, t, 1]) 68 | 69 | def call(self, input_batch): 70 | input_batch = input_batch.copy() 71 | 72 | # [b, a, t, h] 73 | hidden_vecs = input_batch['hidden_vecs'] 74 | 75 | # Expand the attention mask with new dims so that Keras can broadcast to 76 | # the same shape as the attn_score: [b, num_heads, a, t, a, t]. 77 | # attn_mask shape: [b, 1, 1, 1, a, t,] 78 | # True means the position participate in the attention while all 79 | # False positions are ignored. 80 | if self.mask_style is None: 81 | attn_mask = None 82 | elif self.mask_style == 'has_historic_data': 83 | has_historic_data = tf.logical_and( 84 | input_batch['has_historic_data'][..., 0], 85 | tf.logical_not(input_batch['is_hidden'][..., 0])) 86 | attn_mask = has_historic_data[:, :, tf.newaxis, tf.newaxis, :] 87 | elif self.mask_style == 'has_data': 88 | has_data_historic = tf.logical_and( 89 | input_batch['has_data'][..., 0], 90 | tf.logical_not(input_batch['is_hidden'][..., 0])) 91 | attn_mask = has_data_historic[:, :, tf.newaxis, tf.newaxis, :,] 92 | else: 93 | raise ValueError(f'Unrecognized mask style: {self.mask_style}. ' 94 | "Must be either None, 'fully_padded' or 'any_padded'.") 95 | 96 | # [b, a, t, 1, h] 97 | learned_query = self.build_learned_query(input_batch) 98 | attn_out, attn_score = self.attn_layer( 99 | query=learned_query, 100 | key=hidden_vecs, 101 | value=hidden_vecs, 102 | attention_mask=attn_mask, 103 | return_attention_scores=True) 104 | 105 | attn_out = self.attn_ln(attn_out + hidden_vecs) 106 | 107 | # Feed-forward layers. 108 | out = self.ff_layer1(attn_out) 109 | out = self.ff_layer2(out) 110 | out = self.ff_ln(out + attn_out) 111 | 112 | input_batch['hidden_vecs'] = out 113 | input_batch[f'attn_scores/{self.name}'] = attn_score 114 | 115 | return input_batch 116 | -------------------------------------------------------------------------------- /human_scene_transformer/metrics/ade.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The human_scene_transformer Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """The ADE Keras Metric class.""" 16 | 17 | import gin 18 | import tensorflow as tf 19 | 20 | 21 | def distance_error(target: tf.Tensor, pred: tf.Tensor) -> tf.Tensor: 22 | return tf.sqrt( 23 | tf.reduce_sum(tf.square(pred - target), axis=-1, keepdims=True)) 24 | 25 | 26 | @gin.configurable 27 | class ADE(tf.keras.metrics.Metric): 28 | """Average Displacement Error over a n dimensional track. 29 | 30 | Calculates the mean L2 distance over all predicted timesteps. 31 | """ 32 | 33 | def __init__(self, params, cutoff_seconds=None, at_cutoff=False, name='ADE'): 34 | """Initializes the ADE metric. 35 | 36 | Args: 37 | params: ModelParams 38 | cutoff_seconds: Cutoff up to which time the metric should be calculated 39 | in seconds. 40 | at_cutoff: If True metric will be calculated at cutoff timestep. 41 | Otherwise metric is calculated as average up to cutoff_seconds. 42 | name: Metric name. 43 | """ 44 | super().__init__(name=name) 45 | self.agents_position_key = params.agents_position_key 46 | self.cutoff_seconds = cutoff_seconds 47 | self.at_cutoff = at_cutoff 48 | if cutoff_seconds is None: 49 | self.cutoff_idx = None 50 | else: 51 | # +1 due to current time step. 52 | self.cutoff_idx = int( 53 | params.num_history_steps + 54 | cutoff_seconds / params.timestep) + 1 55 | 56 | self.num_predictions = self.add_weight( 57 | name='num_predictions', initializer='zeros') 58 | self.total_deviation = self.add_weight( 59 | name='total_deviation', initializer='zeros') 60 | 61 | def _reduce(self, ade_with_modes, input_batch, predictions): 62 | """Reduces mode dimension. The base class squeezes a single mode.""" 63 | return tf.squeeze(ade_with_modes, axis=-1) 64 | 65 | def update_state(self, input_batch, predictions): 66 | should_predict = tf.cast(input_batch['should_predict'], tf.float32) 67 | 68 | target = input_batch[f'{self.agents_position_key}/target'] 69 | target = target[..., :predictions['agents/position'].shape[-1]] 70 | # [b, a, t, n, 3] -> [b, a, t, n, 1] 71 | per_position_ade = distance_error( 72 | target[..., tf.newaxis, :], 73 | predictions['agents/position']) 74 | 75 | # Non-observed or past should not contribute to ade. 76 | deviation = tf.math.multiply_no_nan(per_position_ade, 77 | should_predict[..., tf.newaxis, :]) 78 | # Chop off the un-wanted time part. 79 | # [b, a, cutoff_idx, 1] 80 | if self.at_cutoff and self.cutoff_seconds is not None: 81 | deviation = deviation[:, :, self.cutoff_idx-1:self.cutoff_idx, :] 82 | num_predictions = tf.reduce_sum( 83 | should_predict[:, :, self.cutoff_idx-1:self.cutoff_idx, :]) 84 | else: 85 | deviation = deviation[:, :, :self.cutoff_idx, :] 86 | num_predictions = tf.reduce_sum(should_predict[:, :, :self.cutoff_idx, :]) 87 | 88 | # Reduce along time 89 | deviation = tf.reduce_sum(deviation, axis=2) 90 | # Reduce along modes 91 | deviation = self._reduce(deviation, input_batch, predictions) 92 | # [1] 93 | deviation = tf.reduce_sum(deviation) 94 | 95 | self.num_predictions.assign_add(num_predictions) 96 | self.total_deviation.assign_add(deviation) 97 | 98 | def result(self): 99 | return self.total_deviation / self.num_predictions 100 | 101 | def reset_states(self): 102 | self.num_predictions.assign(0) 103 | self.total_deviation.assign(0.0) 104 | 105 | 106 | @gin.configurable 107 | class MinADE(ADE): 108 | """Takes the minimum over all modes.""" 109 | 110 | def _reduce(self, ade_with_modes, input_batch, predictions): 111 | return tf.reduce_min(ade_with_modes, axis=-2) 112 | 113 | 114 | @gin.configurable 115 | class MLADE(ADE): 116 | """Takes the maximum likelihood mode.""" 117 | 118 | def _reduce(self, ade_with_modes, input_batch, predictions): 119 | # Get index of mixture component with highest probability 120 | # [b, a=1, t=1, n] 121 | ml_indices = tf.math.argmax(predictions['mixture_logits'], axis=-1) 122 | a = ade_with_modes.shape[1] 123 | ml_indices = tf.tile( 124 | tf.squeeze(ml_indices, axis=2), [1, a])[..., tf.newaxis] 125 | 126 | return tf.gather( 127 | ade_with_modes, indices=ml_indices, batch_dims=2, axis=-2)[..., 0, :] 128 | -------------------------------------------------------------------------------- /human_scene_transformer/model/scene_encoder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The human_scene_transformer Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Contains scene encoder layers.""" 16 | 17 | import gin 18 | 19 | from human_scene_transformer.model import embedding 20 | 21 | import tensorflow as tf 22 | 23 | 24 | @gin.register 25 | class ConvOccupancyGridEncoderLayer(tf.keras.layers.Layer): 26 | """Uses only the frame on the current step for now.""" 27 | 28 | def __init__(self, params): 29 | super().__init__() 30 | self.xyz_embedding_layer = embedding.SinusoidalEmbeddingLayer( 31 | hidden_size=params.hidden_size) 32 | self.current_step_idx = params.num_history_steps 33 | self.num_filters = params.num_conv_filters 34 | self.hidden_size = params.hidden_size 35 | drop_prob = params.drop_prob 36 | self.mlp = tf.keras.layers.EinsumDense( 37 | '...f,fh->...h', 38 | output_shape=self.xyz_embedding_layer.hidden_size, 39 | bias_axes='h', 40 | activation=None) 41 | 42 | # Construct conv and max pooling layers. 43 | self.layers = [] 44 | for i, num_filter in enumerate(self.num_filters): 45 | if i == 0 or i == 1: 46 | strides = 2 47 | else: 48 | strides = 1 49 | conv_layer = tf.keras.layers.Conv2D( 50 | filters=num_filter, 51 | kernel_size=3, 52 | strides=strides, 53 | padding='same', 54 | activation='relu', 55 | data_format='channels_last') 56 | self.layers.append(conv_layer) 57 | dropout = tf.keras.layers.Dropout(drop_prob) 58 | self.layers.append(dropout) 59 | pooling_layer = tf.keras.layers.MaxPool2D( 60 | pool_size=(2, 2), data_format='channels_last') 61 | self.layers.append(pooling_layer) 62 | # Flatten and use MLP to map to [b, h]. 63 | self.layers.append(tf.keras.layers.Flatten()) 64 | self.layers.append( 65 | tf.keras.layers.Dense(units=self.hidden_size, activation='relu')) 66 | self.layers.append(tf.keras.layers.Dropout(drop_prob)) 67 | 68 | self.seq_layers = tf.keras.Sequential(self.layers) 69 | 70 | def _get_origin_embedding(self, input_batch, training=None): 71 | # [b] 72 | x = input_batch['scene/origin'][:, self.current_step_idx, 0] 73 | y = input_batch['scene/origin'][:, self.current_step_idx, 1] 74 | # [b, hidden_size] 75 | origin_embedding_x = self.xyz_embedding_layer(x, training=training) 76 | origin_embedding_y = self.xyz_embedding_layer(y, training=training) 77 | origin_embedding = tf.concat([origin_embedding_x, origin_embedding_y], 78 | axis=-1) 79 | origin_embedding = self.mlp(origin_embedding) 80 | 81 | return origin_embedding 82 | 83 | def _apply_img_layers(self, input_batch, training=None): 84 | # [b, width, height]. 85 | occ_grid = input_batch['scene/grid'][:, self.current_step_idx, :, :, 86 | tf.newaxis] 87 | 88 | occ_grid = self.seq_layers(occ_grid, training=training) 89 | return occ_grid 90 | 91 | def call(self, input_batch, training=None): 92 | input_batch = input_batch.copy() 93 | 94 | # Embed occupation grid origin w/ sinusoidal embedding. 95 | origin_embedding = self._get_origin_embedding(input_batch) 96 | 97 | # Apply layers. 98 | occ_grid = self._apply_img_layers(input_batch, training=training) 99 | 100 | # [b, hidden_size]. Combine origin and grid. 101 | occ_grid += origin_embedding 102 | input_batch['scene_hidden_vec'] = occ_grid[:, tf.newaxis, tf.newaxis :] 103 | return input_batch 104 | 105 | 106 | @gin.register 107 | class PointCloudEncoderLayer(tf.keras.layers.Layer): 108 | """Retrieves the point cloud at the current timestep.""" 109 | 110 | def __init__(self, params): 111 | super().__init__() 112 | self.current_step_idx = params.num_history_steps 113 | self.embedding_layer = embedding.SinusoidalEmbeddingLayer( 114 | hidden_size=params.feature_embedding_size) 115 | 116 | def call(self, input_batch, training=None): 117 | input_batch = input_batch.copy() 118 | 119 | pc = input_batch['scene/pc'][:, self.current_step_idx, ..., :2] 120 | 121 | pc = tf.where(tf.math.is_nan(pc), 0., pc) 122 | 123 | pc_emb = self.embedding_layer(pc, training=training) 124 | 125 | pc_emb = tf.concat([pc_emb[..., 0, :], pc_emb[..., 1, :]], axis=-1) 126 | 127 | input_batch['scene_hidden_vec'] = pc_emb 128 | 129 | return input_batch 130 | -------------------------------------------------------------------------------- /human_scene_transformer/train.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The human_scene_transformer Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Train human scene transformer.""" 16 | 17 | from typing import Sequence 18 | 19 | from absl import app 20 | from absl import flags 21 | from absl import logging 22 | import gin 23 | 24 | from human_scene_transformer import train_model 25 | from human_scene_transformer import training_params as tp 26 | from human_scene_transformer.jrdb import dataset_params as jrdb_dp 27 | from human_scene_transformer.jrdb import input_fn as jrdb_input_fn 28 | from human_scene_transformer.model import model_params as mp 29 | from human_scene_transformer.pedestrians import dataset_params as pedestrians_dp 30 | from human_scene_transformer.pedestrians import input_fn as pedestrians_input_fn 31 | 32 | import tensorflow as tf 33 | 34 | 35 | _DEVICE = flags.DEFINE_string( 36 | 'device', 'cpu', 'The device identification.' 37 | ) 38 | _REPLACE_DATE_TIME_STRING_BY = flags.DEFINE_string( 39 | 'replace_date_time_string_by', 40 | None, 41 | ( 42 | 'A string to replace the automatic date-time string for the model' 43 | ' foldername.' 44 | ), 45 | ) 46 | _MODEL_BASE_DIR = flags.DEFINE_string( 47 | 'model_base_dir', 48 | '/tmp/model_dir', 49 | ( 50 | 'The base folder for the trained model, contains ckpts, etc. ' 51 | 'Usually this comes from xmanager.' 52 | ), 53 | ) 54 | _TENSORBOARD_DIR = flags.DEFINE_string( 55 | 'tensorboard_dir', 56 | '/tmp/tensorboard', 57 | 'The base folder to save tensorboard summaries.', 58 | ) 59 | _LEARNING_RATE = flags.DEFINE_float( 60 | 'learning_rate', 1e-4, 'The maximum learning rate' 61 | ) 62 | _DATASET = flags.DEFINE_string( 63 | 'dataset', 'EDR', 'Dataset to train and evaluate on.' 64 | ) 65 | 66 | _GIN_FILE = flags.DEFINE_multi_string( 67 | 'gin_files', 68 | None, 69 | 'A newline separated list of paths to Gin configuration files used to ' 70 | 'configure training, model, and dataset generation') 71 | 72 | _GIN_OVERRIDE = flags.DEFINE_multi_string( 73 | 'gin_overrides', 74 | [], 75 | 'A newline separated list of parameters used to override ones specified in' 76 | ' the gin file.') 77 | 78 | _LOGGING_INTERVAL = 1 79 | 80 | 81 | def main(argv: Sequence[str]) -> None: 82 | if len(argv) > 1: 83 | raise app.UsageError('Too many command-line arguments.') 84 | 85 | gin.parse_config_files_and_bindings(_GIN_FILE.value, _GIN_OVERRIDE.value) 86 | logging.info('Actual gin config used:') 87 | logging.info(gin.config_str()) 88 | 89 | if 'cpu' in _DEVICE.value or 'gpu' in _DEVICE.value: 90 | strategy = tf.distribute.OneDeviceStrategy(_DEVICE.value) 91 | else: # TPU 92 | resolver = tf.distribute.cluster_resolver.TPUClusterResolver( 93 | tpu=_DEVICE.value) 94 | tf.config.experimental_connect_to_cluster(resolver) 95 | tf.tpu.experimental.initialize_tpu_system(resolver) 96 | strategy = tf.distribute.TPUStrategy(resolver) 97 | 98 | train_params = tp.TrainingParams() 99 | 100 | if _DATASET.value == 'JRDB': 101 | dataset_params = jrdb_dp.JRDBDatasetParams() 102 | ds_train = jrdb_input_fn.get_train_dataset(dataset_params) 103 | ds_eval = jrdb_input_fn.get_eval_dataset(dataset_params) 104 | ds_train = ds_train.batch(train_params.batch_size, drop_remainder=True) 105 | ds_eval = ds_eval.batch(train_params.batch_size, drop_remainder=True) 106 | dist_train_dataset = strategy.experimental_distribute_dataset(ds_train) 107 | dist_eval_dataset = strategy.experimental_distribute_dataset(ds_eval) 108 | elif _DATASET.value == 'PEDESTRIANS': 109 | dataset_params = pedestrians_dp.PedestriansDatasetParams() 110 | ds_train = pedestrians_input_fn.get_train_dataset(dataset_params) 111 | ds_eval = pedestrians_input_fn.get_eval_dataset(dataset_params) 112 | ds_train = ds_train.batch(train_params.batch_size, drop_remainder=True) 113 | ds_eval = ds_eval.batch(train_params.batch_size, drop_remainder=True) 114 | dist_train_dataset = strategy.experimental_distribute_dataset(ds_train) 115 | dist_eval_dataset = strategy.experimental_distribute_dataset(ds_eval) 116 | else: 117 | raise ValueError 118 | 119 | model_params = mp.ModelParams() 120 | 121 | with strategy.scope(): 122 | metrics_tuple = train_model.get_metrics(model_params) 123 | 124 | train_model.train_model( 125 | train_params=train_params, 126 | model_params=model_params, 127 | train_ds=dist_train_dataset, 128 | eval_ds=dist_eval_dataset, 129 | metrics=metrics_tuple, 130 | strategy=strategy, 131 | replace_date_time_string_by=_REPLACE_DATE_TIME_STRING_BY.value, 132 | model_base_dir=_MODEL_BASE_DIR.value, 133 | tensorboard_dir=_TENSORBOARD_DIR.value 134 | ) 135 | 136 | 137 | if __name__ == '__main__': 138 | flags.mark_flags_as_required([ 139 | 'model_base_dir', 'gin_files' 140 | ]) 141 | app.run(main) 142 | -------------------------------------------------------------------------------- /human_scene_transformer/model/model_params.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The human_scene_transformer Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Model parameter classes. 16 | 17 | The module contains a dataclasses used to configure the model. 18 | """ 19 | 20 | import gin 21 | 22 | from human_scene_transformer import is_hidden_generators 23 | from human_scene_transformer.model import agent_feature_encoder 24 | from human_scene_transformer.model import head 25 | from human_scene_transformer.model import scene_encoder as _ 26 | 27 | 28 | @gin.configurable 29 | class ModelParams(object): 30 | """This object configures the model.""" 31 | 32 | def __init__( 33 | self, 34 | agents_feature_config=None, 35 | agents_position_key='agents/position', 36 | agents_orientation_key='agents/orientation', 37 | hidden_size=128, 38 | feature_embedding_size=128, 39 | transformer_ff_dim=64, 40 | ln_eps=1e-6, 41 | num_heads=4, 42 | num_conv_filters=(32, 32, 64, 64, 128), 43 | num_modes=4, 44 | attn_architecture=( 45 | 'self-attention', 46 | 'cross-attention', 47 | 'multimodality_induction', 48 | 'self-attention', 49 | 'self-attention-mode', 50 | 'self-attention', 51 | 'self-attention-mode', 52 | ), 53 | mask_style=None, 54 | scene_encoder=None, 55 | prediction_head=head.PredictionHeadLayer, 56 | prediction_head_hidden_units=None, 57 | drop_prob=0.1, 58 | num_history_steps=12, 59 | num_steps=49, 60 | timestep=1 / 6, 61 | is_hidden_generator=is_hidden_generators.BPIsHiddenGenerator, 62 | ): 63 | """Initialize the TrainParam object. 64 | 65 | This is where gin injection occurs. 66 | 67 | Args: 68 | agents_feature_config: Dict mapping input features to encoders. 69 | agents_position_key: Name of the position key. 70 | agents_orientation_key: Name of the orientation key. 71 | hidden_size: The hidden vector size of the transformer. 72 | feature_embedding_size: The embedding vector size of agent features. 73 | transformer_ff_dim: The hidden layer size of transformer layers. 74 | ln_eps: Epsilon value for layer norm operations. 75 | num_heads: The number of heads in multi-headed attention. 76 | num_conv_filters: The number of filters in the occupancy grid convolution 77 | layers. 78 | num_modes: Number of predicted possible future outcomes. Each agent has 79 | exactly one predicted trajectory per possible outcome. 80 | attn_architecture: A tuple of strings describing the transformer 81 | architecture. Options are: 'self-attention', 'cross-attention' and 82 | 'multimodality_induction'. 83 | mask_style: A string or None describing the mask in self-attention. 84 | 'fully_padded' means timesteps of padded agents will not participate in 85 | the attention computation. 'any_padded' means any padded timesteps will 86 | not participate in the attention computation. 87 | scene_encoder: The scene encoder to use. 88 | prediction_head: Type of prediction head to use: 'full' with all outputs 89 | or '2d_pos' where only the 2D position is predicted. 90 | prediction_head_hidden_units: # A tuple describing the number of hidden 91 | units in the Prediction head. Set to None to disable the hidden layer. 92 | drop_prob: Dropout probability during training. 93 | num_history_steps: Number of history timesteps per data point. 94 | num_steps: Number of timesteps per data point. 95 | timestep: Delta time of a timestep in seconds. 96 | is_hidden_generator: The class to generating the is_hidden tensor. When 97 | called, the generator class returns a np array representing which 98 | agents and timesteps should be masked from the model. 99 | """ 100 | if agents_feature_config is None: 101 | self.agents_feature_config = { 102 | 'agents/position': agent_feature_encoder.AgentPositionEncoder, 103 | 'agents/orientation': agent_feature_encoder.Agent2DOrientationEncoder, 104 | 'agents/detection_score': agent_feature_encoder.AgentScalarEncoder, 105 | 'agents/detection_stage': agent_feature_encoder.AgentOneHotEncoder 106 | } 107 | else: 108 | self.agents_feature_config = agents_feature_config 109 | self.agents_position_key = agents_position_key 110 | self.agents_orientation_key = agents_orientation_key 111 | self.hidden_size = hidden_size 112 | self.feature_embedding_size = feature_embedding_size 113 | self.transformer_ff_dim = transformer_ff_dim 114 | self.ln_eps = ln_eps 115 | self.num_heads = num_heads 116 | self.num_conv_filters = num_conv_filters 117 | self.num_modes = num_modes 118 | self.attn_architecture = attn_architecture 119 | self.mask_style = mask_style 120 | self.scene_encoder = scene_encoder 121 | self.prediction_head = prediction_head 122 | self.prediction_head_hidden_units = prediction_head_hidden_units 123 | self.drop_prob = drop_prob 124 | self.is_hidden_generator = is_hidden_generator 125 | 126 | self.num_history_steps = num_history_steps 127 | self.num_steps = num_steps 128 | self.timestep = timestep 129 | -------------------------------------------------------------------------------- /human_scene_transformer/jrdb/eval.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The human_scene_transformer Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Evaluates Model on JRDB dataset.""" 16 | 17 | import os 18 | from typing import Sequence 19 | from absl import app 20 | from absl import flags 21 | from absl import logging 22 | import gin 23 | from human_scene_transformer.jrdb import dataset_params 24 | from human_scene_transformer.jrdb import input_fn 25 | from human_scene_transformer.metrics import metrics 26 | from human_scene_transformer.model import model as hst_model 27 | from human_scene_transformer.model import model_params 28 | import tensorflow as tf 29 | import tqdm 30 | 31 | 32 | _MODEL_PATH = flags.DEFINE_string( 33 | 'model_path', 34 | None, 35 | 'Path to model directory.', 36 | ) 37 | 38 | _CKPT_PATH = flags.DEFINE_string( 39 | 'checkpoint_path', 40 | None, 41 | 'Path to model checkpoint.', 42 | ) 43 | 44 | 45 | def evaluation(checkpoint_path): 46 | """Evaluates Model on Pedestrian dataset.""" 47 | d_params = dataset_params.JRDBDatasetParams(num_agents=None) 48 | 49 | dataset = input_fn.load_dataset( 50 | d_params, 51 | d_params.eval_scenes, 52 | augment=False, 53 | shuffle=False, 54 | allow_parallel=False, 55 | evaluation=False, 56 | repeat=False, 57 | keep_subsamples=False, 58 | ) 59 | 60 | model_p = model_params.ModelParams() 61 | 62 | model = hst_model.HumanTrajectorySceneTransformer(model_p) 63 | 64 | _, _ = model(next(iter(dataset.batch(1))), training=False) 65 | 66 | checkpoint_mngr = tf.train.Checkpoint(model=model) 67 | checkpoint_mngr.restore(checkpoint_path).assert_existing_objects_matched() 68 | logging.info('Restored checkpoint: %s', checkpoint_path) 69 | 70 | ade_metric = metrics.ade.MinADE(model_p) 71 | ade_metric_1s = metrics.ade.MinADE( 72 | model_p, cutoff_seconds=1.0, at_cutoff=True 73 | ) 74 | ade_metric_2s = metrics.ade.MinADE( 75 | model_p, cutoff_seconds=2.0, at_cutoff=True 76 | ) 77 | ade_metric_3s = metrics.ade.MinADE( 78 | model_p, cutoff_seconds=3.0, at_cutoff=True 79 | ) 80 | ade_metric_4s = metrics.ade.MinADE( 81 | model_p, cutoff_seconds=4.0, at_cutoff=True 82 | ) 83 | 84 | mlade_metric = metrics.ade.MLADE(model_p) 85 | mlade_metric_1s = metrics.ade.MLADE( 86 | model_p, cutoff_seconds=1.0, at_cutoff=True 87 | ) 88 | mlade_metric_2s = metrics.ade.MLADE( 89 | model_p, cutoff_seconds=2.0, at_cutoff=True 90 | ) 91 | mlade_metric_3s = metrics.ade.MLADE( 92 | model_p, cutoff_seconds=3.0, at_cutoff=True 93 | ) 94 | mlade_metric_4s = metrics.ade.MLADE( 95 | model_p, cutoff_seconds=4.0, at_cutoff=True 96 | ) 97 | 98 | nll_metric = metrics.pos_nll.PositionNegativeLogLikelihood(model_p) 99 | nll_metric_1s = metrics.pos_nll.PositionNegativeLogLikelihood( 100 | model_p, cutoff_seconds=1.0, at_cutoff=True 101 | ) 102 | nll_metric_2s = metrics.pos_nll.PositionNegativeLogLikelihood( 103 | model_p, cutoff_seconds=2.0, at_cutoff=True 104 | ) 105 | nll_metric_3s = metrics.pos_nll.PositionNegativeLogLikelihood( 106 | model_p, cutoff_seconds=3.0, at_cutoff=True 107 | ) 108 | nll_metric_4s = metrics.pos_nll.PositionNegativeLogLikelihood( 109 | model_p, cutoff_seconds=4.0, at_cutoff=True 110 | ) 111 | 112 | for input_batch in tqdm.tqdm(dataset.batch(1)): 113 | full_pred, output_batch = model(input_batch, training=False) 114 | ade_metric.update_state(output_batch, full_pred) 115 | ade_metric_1s.update_state(output_batch, full_pred) 116 | ade_metric_2s.update_state(output_batch, full_pred) 117 | ade_metric_3s.update_state(output_batch, full_pred) 118 | ade_metric_4s.update_state(output_batch, full_pred) 119 | 120 | mlade_metric.update_state(output_batch, full_pred) 121 | mlade_metric_1s.update_state(output_batch, full_pred) 122 | mlade_metric_2s.update_state(output_batch, full_pred) 123 | mlade_metric_3s.update_state(output_batch, full_pred) 124 | mlade_metric_4s.update_state(output_batch, full_pred) 125 | 126 | nll_metric.update_state(output_batch, full_pred) 127 | nll_metric_1s.update_state(output_batch, full_pred) 128 | nll_metric_2s.update_state(output_batch, full_pred) 129 | nll_metric_3s.update_state(output_batch, full_pred) 130 | nll_metric_4s.update_state(output_batch, full_pred) 131 | 132 | print(f'MinADE: {ade_metric.result().numpy():.2f}') 133 | print(f'MinADE @ 1s: {ade_metric_1s.result().numpy():.2f}') 134 | print(f'MinADE @ 2s: {ade_metric_2s.result().numpy():.2f}') 135 | print(f'MinADE @ 3s: {ade_metric_3s.result().numpy():.2f}') 136 | print(f'MinADE @ 4s: {ade_metric_4s.result().numpy():.2f}') 137 | 138 | print(f'MLADE: {mlade_metric.result().numpy():.2f}') 139 | print(f'MLADE @ 1s: {mlade_metric_1s.result().numpy():.2f}') 140 | print(f'MLADE @ 2s: {mlade_metric_2s.result().numpy():.2f}') 141 | print(f'MLADE @ 3s: {mlade_metric_3s.result().numpy():.2f}') 142 | print(f'MLADE @ 4s: {mlade_metric_4s.result().numpy():.2f}') 143 | 144 | print(f'NLL: {nll_metric.result().numpy():.2f}') 145 | print(f'NLL @ 1s: {nll_metric_1s.result().numpy():.2f}') 146 | print(f'NLL @ 2s: {nll_metric_2s.result().numpy():.2f}') 147 | print(f'NLL @ 3s: {nll_metric_3s.result().numpy():.2f}') 148 | print(f'NLL @ 4s: {nll_metric_4s.result().numpy():.2f}') 149 | 150 | 151 | def main(argv: Sequence[str]) -> None: 152 | if len(argv) > 1: 153 | raise app.UsageError('Too many command-line arguments.') 154 | 155 | gin.parse_config_files_and_bindings( 156 | [os.path.join(_MODEL_PATH.value, 'params', 'operative_config.gin')], 157 | None, 158 | skip_unknown=True) 159 | print('Actual gin config used:') 160 | print(gin.config_str()) 161 | 162 | evaluation(_CKPT_PATH.value) 163 | 164 | if __name__ == '__main__': 165 | logging.set_verbosity(logging.ERROR) 166 | app.run(main) 167 | -------------------------------------------------------------------------------- /human_scene_transformer/model/preprocess.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The human_scene_transformer Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Contains preprocess layers.""" 16 | 17 | from typing import Dict, Optional 18 | 19 | from human_scene_transformer.model.model_params import ModelParams 20 | 21 | import tensorflow as tf 22 | 23 | 24 | class PreprocessLayer(tf.keras.layers.Layer): 25 | """Preprocessing layer of the model. 26 | 27 | The preprocess layer applies the following preprocessing steps to the input. 28 | NOTE: we refer 'positions' as the transformer positions so each position 29 | corresponds to a unique agent at a specific timestep. 30 | 31 | 1) Copy the raw input and save in `/original` and `/target` keys. 32 | 2) Compute the 'has_data' mask for all features. True at fields where the 33 | feature has data. Further, compute the global 'has_data' mask. True where 34 | xyz data are available. Further, compute the 'has_historic_data' mask. 35 | True for agents which have at least one valid xyz data point in the xyz 36 | feature. 37 | 3) Compute the `is_padded` bool tensor of shape [batch (b), num_agents (a) 38 | ,num_timesteps (t), 1]. True if the position is padded, ie, no valid 39 | observation. 40 | 4) Compute which positions need to be predicted and save it to the 41 | `should_predict` bool tensor of shape [b, a, t, 1]. A position should be 42 | predicted if it is hidden, not padded and the agent has historic data. 43 | 5) Mask agent features based on their 'has_data' mask. 44 | """ 45 | 46 | def __init__(self, params: ModelParams): 47 | super().__init__(name='PreprocessLayer') 48 | self.params = params 49 | 50 | self.is_hidden_generator = params.is_hidden_generator( 51 | self.params.num_steps, 52 | self.params.num_history_steps) 53 | 54 | self.agents_feature_config = params.agents_feature_config 55 | self.agents_position_key = params.agents_position_key 56 | 57 | def call(self, 58 | raw_input_batch: Dict[str, tf.Tensor], 59 | is_hidden: Optional[tf.Tensor] = None) -> Dict[str, tf.Tensor]: 60 | input_batch = raw_input_batch.copy() 61 | 62 | if is_hidden is None: 63 | num_agents = input_batch[self.params.agents_position_key].shape[1] 64 | input_batch['is_hidden'] = self.is_hidden_generator( 65 | num_agents, train_progress=0.0) 66 | else: 67 | input_batch['is_hidden'] = is_hidden 68 | 69 | input_batch = self._add_original_and_target(input_batch) 70 | input_batch = self._add_has_data(input_batch) 71 | input_batch = self._add_should_predict(input_batch) 72 | input_batch = self._mask_agent_features(input_batch) 73 | 74 | return input_batch 75 | 76 | def _add_original_and_target(self, input_batch): 77 | """Adds original, directly from input_batch values so we can use it later. 78 | """ 79 | input_batch_new = input_batch.copy() 80 | for feature in self.agents_feature_config.keys(): 81 | input_batch_new[f'{feature}/original'] = input_batch[feature] 82 | input_batch_new[f'{feature}/target'] = input_batch[feature] 83 | data_type = input_batch_new[f'{feature}/target'].dtype 84 | if data_type.is_floating: 85 | # Set target to 0. where NaN 86 | input_batch_new[f'{feature}/target'] = tf.where( 87 | tf.math.is_nan(input_batch_new[f'{feature}/target']), 88 | tf.zeros_like(input_batch_new[f'{feature}/target']), 89 | input_batch_new[f'{feature}/target']) 90 | else: 91 | input_batch_new[f'{feature}/target'] = tf.where( 92 | input_batch_new[f'{feature}/target'] == data_type.min, 93 | tf.zeros_like(input_batch_new[f'{feature}/target']), 94 | input_batch_new[f'{feature}/target']) 95 | return input_batch_new 96 | 97 | def _add_has_data(self, input_batch): 98 | num_hist_steps = self.params.num_history_steps 99 | input_batch = input_batch.copy() 100 | 101 | def has_data(t): 102 | if t.dtype.is_floating: 103 | return tf.math.logical_not( 104 | tf.reduce_any(tf.math.is_nan(t), axis=-1, keepdims=True)) 105 | else: 106 | return tf.math.logical_not( 107 | tf.reduce_any(t == t.dtype.min, axis=-1, keepdims=True)) 108 | 109 | # Each Feature 110 | for feature in self.agents_feature_config.keys(): 111 | f = input_batch[feature] 112 | input_batch[f'has_data/{feature}'] = has_data(f) 113 | 114 | # Global 115 | # [b, a, t, 1] 116 | has_data = input_batch[f'has_data/{self.agents_position_key}'] 117 | 118 | # [b, a, 1, 1] 119 | has_historic_data = tf.reduce_any( 120 | has_data[..., :num_hist_steps + 1, :], axis=-2, keepdims=True) 121 | 122 | input_batch['has_data'] = has_data 123 | input_batch['has_historic_data'] = has_historic_data 124 | return input_batch 125 | 126 | def _add_should_predict(self, input_batch): 127 | # Only include in loss computation if it is: 128 | # 1) hidden, 2) not padded, 3) has historic data 129 | input_batch['should_predict'] = tf.logical_and( 130 | input_batch['is_hidden'], 131 | tf.logical_and(input_batch['has_data'], 132 | input_batch['has_historic_data'])) 133 | 134 | return input_batch 135 | 136 | def _set_elems_to_value(self, target, should_set, new_val): 137 | """Sets elements in the target marked by should_set to value_to_set. 138 | 139 | Args: 140 | target: Target array to be operated on. 141 | should_set: This must be a binary array with value 1 or 0 with the same 142 | shape as the target. Elements with 1 will cause the element of the 143 | target at the same indices to be changed to value_to_set. 144 | new_val: The new value to set elements to. 145 | 146 | Returns: 147 | target: The target array after the operation. 148 | """ 149 | 150 | target = tf.where(should_set, tf.cast(new_val, target.dtype), target) 151 | 152 | return target 153 | 154 | def _mask_agent_features(self, input_batch): 155 | input_batch = input_batch.copy() 156 | 157 | for feature in self.agents_feature_config.keys(): 158 | feature_is_padded = tf.logical_not(input_batch[f'has_data/{feature}']) 159 | input_batch[feature] = self._set_elems_to_value( 160 | input_batch[feature], 161 | tf.logical_or(feature_is_padded, input_batch['is_hidden']), 0.) 162 | 163 | return input_batch 164 | -------------------------------------------------------------------------------- /human_scene_transformer/model/agent_feature_encoder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The human_scene_transformer Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Agent feature encoder.""" 16 | 17 | import gin 18 | 19 | from human_scene_transformer.model.embedding import SinusoidalEmbeddingLayer 20 | 21 | import tensorflow as tf 22 | 23 | 24 | @gin.register 25 | class AgentTemporalEncoder(tf.keras.layers.Layer): 26 | """Encodes agents temporal positions.""" 27 | 28 | def __init__(self, key, output_shape, params): 29 | super().__init__() 30 | self.key = key 31 | self.embedding_layer = SinusoidalEmbeddingLayer( 32 | max_freq=params.num_steps, 33 | hidden_size=params.feature_embedding_size) 34 | 35 | self.mlp = tf.keras.layers.EinsumDense( 36 | '...f,fh->...h', 37 | output_shape=output_shape, 38 | bias_axes='h', 39 | activation=None) 40 | 41 | def _get_temporal_embedding(self, input_batch): 42 | # This weird thing is for exporting and loading keras model... 43 | b = tf.shape(input_batch[self.key])[0] 44 | num_agents = tf.shape(input_batch[self.key])[1] 45 | num_steps = tf.shape(input_batch[self.key])[2] 46 | 47 | t = tf.range(0, num_steps, dtype=tf.float32) 48 | t = t[tf.newaxis, tf.newaxis, :] 49 | t = tf.tile(t, [b, num_agents, 1]) 50 | return self.embedding_layer(t[..., tf.newaxis]) 51 | 52 | def call(self, input_batch): 53 | return (self.mlp(self._get_temporal_embedding(input_batch)), 54 | tf.ones_like(input_batch['has_data'])) 55 | 56 | 57 | @gin.register 58 | class AgentPositionEncoder(tf.keras.layers.Layer): 59 | """Encodes agents spatial positions.""" 60 | 61 | def __init__(self, key, output_shape, params): 62 | super().__init__() 63 | self.key = key 64 | self.embedding_layer = SinusoidalEmbeddingLayer( 65 | hidden_size=params.feature_embedding_size) 66 | 67 | self.mlp = tf.keras.layers.EinsumDense( 68 | '...f,fh->...h', 69 | output_shape=output_shape, 70 | bias_axes='h', 71 | activation=None) 72 | 73 | def call(self, input_batch): 74 | not_is_hidden = tf.logical_not(input_batch['is_hidden']) 75 | mask = tf.logical_and(input_batch[f'has_data/{self.key}'], not_is_hidden) 76 | mask = tf.repeat(mask, tf.shape(input_batch[self.key])[-1], axis=-1) 77 | return self.mlp(self.embedding_layer(input_batch[self.key])), mask 78 | 79 | 80 | @gin.register 81 | class Agent2DOrientationEncoder(tf.keras.layers.Layer): 82 | """Encodes agents 2d orientation.""" 83 | 84 | def __init__(self, key, output_shape, params): 85 | super().__init__() 86 | self.key = key 87 | self.embedding_layer = SinusoidalEmbeddingLayer( 88 | max_freq=2, 89 | hidden_size=params.feature_embedding_size//2) 90 | 91 | self.mlp = tf.keras.layers.EinsumDense( 92 | '...f,fh->...h', 93 | output_shape=output_shape, 94 | bias_axes='h', 95 | activation=None) 96 | 97 | def call(self, input_batch): 98 | orientation = input_batch[self.key] 99 | orientation_embedding = tf.concat([ 100 | self.embedding_layer(tf.math.sin(orientation)), 101 | self.embedding_layer(tf.math.cos(orientation)) 102 | ], axis=-1) 103 | 104 | not_is_hidden = tf.logical_not(input_batch['is_hidden']) 105 | mask = tf.logical_and(input_batch[f'has_data/{self.key}'], not_is_hidden) 106 | 107 | return self.mlp(orientation_embedding), mask 108 | 109 | 110 | @gin.register 111 | class AgentScalarEncoder(tf.keras.layers.Layer): 112 | """Encodes a agent's scalar.""" 113 | 114 | def __init__(self, key, output_shape, params): 115 | super().__init__() 116 | self.key = key 117 | 118 | self.mlp = tf.keras.layers.EinsumDense( 119 | '...f,fh->...h', 120 | output_shape=output_shape, 121 | bias_axes='h', 122 | activation=None) 123 | 124 | def call(self, input_batch): 125 | not_is_hidden = tf.logical_not(input_batch['is_hidden']) 126 | mask = tf.logical_and(input_batch[f'has_data/{self.key}'], not_is_hidden) 127 | return self.mlp(input_batch[self.key])[..., tf.newaxis, :], mask 128 | 129 | 130 | @gin.configurable 131 | class AgentOneHotEncoder(tf.keras.layers.Layer): 132 | """Encodes the detection stage.""" 133 | 134 | def __init__(self, key, output_shape, params, depth=1): 135 | super().__init__() 136 | self.key = key 137 | self.depth = depth 138 | 139 | self.mlp = tf.keras.layers.EinsumDense( 140 | '...f,fh->...h', 141 | output_shape=output_shape, 142 | bias_axes='h', 143 | activation=None) 144 | 145 | def call(self, input_batch): 146 | stage_one_hot = tf.one_hot( 147 | tf.squeeze(input_batch[self.key], axis=-1), 148 | self.depth) 149 | 150 | not_is_hidden = tf.logical_not(input_batch['is_hidden']) 151 | mask = tf.logical_and(input_batch[f'has_data/{self.key}'], not_is_hidden) 152 | return self.mlp(stage_one_hot)[..., tf.newaxis, :], mask 153 | 154 | 155 | @gin.configurable 156 | class AgentKeypointsEncoder(tf.keras.layers.Layer): 157 | """Encodes the agent's keypoints.""" 158 | 159 | def __init__(self, key, output_shape, params): 160 | super().__init__() 161 | self.key = key 162 | 163 | self.mlp1 = tf.keras.layers.EinsumDense( 164 | '...f,fh->...h', 165 | output_shape=output_shape, 166 | bias_axes='h', 167 | activation=tf.nn.relu) 168 | 169 | def call(self, input_batch, training=None): 170 | not_is_hidden = tf.logical_not(input_batch['is_hidden']) 171 | mask = tf.logical_and(input_batch[f'has_data/{self.key}'], not_is_hidden) 172 | 173 | keypoints = input_batch[self.key] 174 | 175 | out = self.mlp1(keypoints)[..., tf.newaxis, :] 176 | 177 | return out, mask 178 | 179 | 180 | @gin.configurable 181 | class AgentHeadOrientationEncoder(tf.keras.layers.Layer): 182 | """Encodes the detection stage.""" 183 | 184 | def __init__(self, key, output_shape, params): 185 | super().__init__() 186 | self.key = key 187 | 188 | self.mlp = tf.keras.layers.EinsumDense( 189 | '...f,fh->...h', 190 | output_shape=output_shape, 191 | bias_axes='h', 192 | activation=None) 193 | 194 | def call(self, input_batch): 195 | not_is_hidden = tf.logical_not(input_batch['is_hidden']) 196 | mask = tf.logical_and(input_batch[f'has_data/{self.key}'], not_is_hidden) 197 | return self.mlp(input_batch[self.key])[..., tf.newaxis, :], mask 198 | -------------------------------------------------------------------------------- /human_scene_transformer/data/jrdb_train_detections_to_tracks.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The human_scene_transformer Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Fuses detections to tracks. 16 | """ 17 | 18 | 19 | import json 20 | import os 21 | 22 | from absl import app 23 | from absl import flags 24 | 25 | from human_scene_transformer.data import box_utils 26 | from human_scene_transformer.data import utils 27 | import numpy as np 28 | import pandas as pd 29 | import scipy 30 | import tensorflow as tf 31 | import tqdm 32 | 33 | 34 | _INPUT_PATH = flags.DEFINE_string( 35 | 'input_path', 36 | default=None, 37 | help='Path to jrdb2022 dataset.' 38 | ) 39 | 40 | _OUTPUT_PATH = flags.DEFINE_string( 41 | 'output_path', 42 | default=None, 43 | help='Path to output folder.' 44 | ) 45 | 46 | 47 | def get_agents_3d_bounding_box_dict(input_path, scene): 48 | """Returns a dict of agent labels and their bounding boxes.""" 49 | scene_data_file = utils.get_file_handle( 50 | os.path.join(input_path, 'labels', 'labels_3d', scene + '.json') 51 | ) 52 | scene_data = json.load(scene_data_file) 53 | 54 | agents = {} 55 | 56 | for frame in scene_data['labels']: 57 | ts = int(frame.split('.')[0]) 58 | for det in scene_data['labels'][frame]: 59 | agents[(ts, det['label_id'])] = { 60 | 'box': np.array([ 61 | det['box']['cx'], 62 | det['box']['cy'], 63 | det['box']['l'], 64 | det['box']['w'], 65 | det['box']['rot_z']]), 66 | 'box3d': np.array([ 67 | det['box']['cx'], 68 | det['box']['cy'], 69 | det['box']['cz'], 70 | det['box']['l'], 71 | det['box']['w'], 72 | det['box']['h'], 73 | det['box']['rot_z']]) 74 | } 75 | return agents 76 | 77 | 78 | def get_agents_3d_bounding_box_detections_dict(input_path, scene): 79 | """Returns a dict of agent detections and their bounding boxes.""" 80 | scene_data_file = utils.get_file_handle( 81 | os.path.join(input_path, 'detections', 'detections_3d', scene + '.json') 82 | ) 83 | 84 | scene_data = json.load(scene_data_file) 85 | 86 | agents = [] 87 | 88 | for frame in scene_data['detections']: 89 | ts = int(frame.split('.')[0]) 90 | for det in scene_data['detections'][frame]: 91 | agents.append(( 92 | ts, 93 | np.array( 94 | [det['box']['cx'], 95 | det['box']['cy'], 96 | det['box']['l'], 97 | det['box']['w'], 98 | det['box']['rot_z']]), 99 | np.array([ 100 | det['box']['cx'], 101 | det['box']['cy'], 102 | # Detections should be in sensor coordinate 103 | # frame (0.6m from ground). 104 | det['box']['cz'] - 0.606982, 105 | det['box']['l'], 106 | det['box']['w'], 107 | det['box']['h'], 108 | det['box']['rot_z']]), 109 | det['score'] 110 | )) 111 | return agents 112 | 113 | 114 | def detections_to_dict(df): 115 | """Puts a detections dataframe into expected dict format.""" 116 | labels_dict = {} 117 | for ts, group in df.groupby('timestep'): 118 | agent_list = [] 119 | for (_, agent_id), row in group.iterrows(): 120 | agent_list.append({'label_id': agent_id, 121 | 'box': { 122 | 'cx': row['detection'][0], 123 | 'cy': row['detection'][1], 124 | 'cz': row['detection'][2], 125 | 'l': row['detection'][3], 126 | 'w': row['detection'][4], 127 | 'h': row['detection'][5], 128 | 'rot_z': row['detection'][6], 129 | }, 130 | 'attributes': { 131 | 'distance': np.linalg.norm(row['detection'][:3]) 132 | }}) 133 | labels_dict[f'{ts:06}.pcb'] = agent_list 134 | return {'labels': labels_dict} 135 | 136 | 137 | def jrdb_train_detections_to_tracks(input_path, output_path): 138 | """Fuses detections to tracks.""" 139 | utils.maybe_makedir(output_path) 140 | scenes = utils.list_scenes( 141 | os.path.join(input_path, 'train_dataset')) 142 | for scene in tqdm.tqdm(scenes): 143 | bb_dict = get_agents_3d_bounding_box_dict( 144 | os.path.join(input_path, 'train_dataset'), scene) 145 | bb_3d_df = pd.DataFrame.from_dict( 146 | bb_dict, orient='index').rename_axis(['timestep', 'id']) # pytype: disable=missing-parameter # pandas-drop-duplicates-overloads 147 | 148 | bb_detections_list = get_agents_3d_bounding_box_detections_dict( 149 | os.path.join(input_path, 'train_dataset'), scene) 150 | bb_3d_detections_df = pd.DataFrame( 151 | bb_detections_list, columns=['timestep', 'box', 'box3d', 'score']) 152 | 153 | dfs = [] 154 | for ts, gt_group in bb_3d_df.groupby('timestep'): 155 | detections_group = bb_3d_detections_df.loc[ 156 | (bb_3d_detections_df['timestep'] == ts)] 157 | detection_boxes = np.vstack(detections_group['box']) 158 | gt_boxes = np.vstack(gt_group['box']) 159 | 160 | detection_boxes_rep = np.repeat( 161 | detection_boxes, gt_boxes.shape[0], axis=0) 162 | gt_boxes_til = np.tile(gt_boxes, (detection_boxes.shape[0], 1)) 163 | 164 | iou = box_utils.compute_paired_bev_iou( 165 | tf.convert_to_tensor(detection_boxes_rep), 166 | tf.convert_to_tensor(gt_boxes_til) 167 | ) 168 | assert isinstance(iou, tf.Tensor) 169 | iou_np = iou.numpy().reshape( 170 | (detection_boxes.shape[0], gt_boxes.shape[0])) 171 | cost = 1 - iou_np 172 | r, c = scipy.optimize.linear_sum_assignment(cost.T) 173 | 174 | unmatched_gt = np.argwhere(cost.T[r, c] == 1.)[..., 0] 175 | 176 | df_tmp = gt_group.iloc[r].copy() 177 | df_tmp['detection'] = detections_group['box3d'].iloc[c].to_list() 178 | 179 | df_tmp = df_tmp.drop(index=df_tmp.index[unmatched_gt]) 180 | 181 | dfs.append(df_tmp) 182 | 183 | matched_df = pd.concat(dfs) 184 | 185 | labels_dict = detections_to_dict(matched_df) 186 | 187 | with open(f"{output_path}/{scene}.json", 'w') as write_file: 188 | json.dump(labels_dict, write_file, indent=2, ensure_ascii=True) 189 | 190 | 191 | def main(argv): 192 | if len(argv) > 1: 193 | raise app.UsageError('Too many command-line arguments.') 194 | if _OUTPUT_PATH.value is None: 195 | output_path = os.path.join(_INPUT_PATH.value, 196 | 'processed/labels/labels_detections_3d') 197 | else: 198 | output_path = _OUTPUT_PATH.value 199 | jrdb_train_detections_to_tracks(_INPUT_PATH.value, output_path) 200 | 201 | if __name__ == '__main__': 202 | flags.mark_flags_as_required(['input_path']) 203 | app.run(main) 204 | -------------------------------------------------------------------------------- /human_scene_transformer/jrdb/eval_keypoints.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The human_scene_transformer Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Evaluates Model on JRDB dataset.""" 16 | 17 | import os 18 | from typing import Sequence 19 | from absl import app 20 | from absl import flags 21 | from absl import logging 22 | import gin 23 | from human_scene_transformer.jrdb import dataset_params 24 | from human_scene_transformer.jrdb import input_fn 25 | from human_scene_transformer.metrics import metrics 26 | from human_scene_transformer.model import model as hst_model 27 | from human_scene_transformer.model import model_params 28 | import tensorflow as tf 29 | import tqdm 30 | 31 | 32 | _MODEL_PATH = flags.DEFINE_string( 33 | 'model_path', 34 | None, 35 | 'Path to model directory.', 36 | ) 37 | 38 | _CKPT_PATH = flags.DEFINE_string( 39 | 'checkpoint_path', 40 | None, 41 | 'Path to model checkpoint.', 42 | ) 43 | 44 | 45 | def filter_non_moving_agents(input_dict, output_dict): 46 | """Filters non-moving agents.""" 47 | pos = input_dict['agents/position'][..., :2] 48 | dist = tf.linalg.norm( 49 | tf.reduce_min(tf.where(tf.math.is_nan(pos), tf.float32.max, pos), axis=-2) 50 | - tf.reduce_max( 51 | tf.where(tf.math.is_nan(pos), tf.float32.min, pos), axis=-2 52 | ), 53 | axis=-1, 54 | keepdims=True, 55 | )[..., tf.newaxis] 56 | output_dict['should_predict'] = tf.math.logical_and( 57 | output_dict['should_predict'], dist > 0.5 58 | ) 59 | 60 | return output_dict 61 | 62 | 63 | def filter_agents_without_keypoints(input_dict, output_dict): 64 | """Filters agents without keypoints.""" 65 | num_key = tf.reduce_sum( 66 | tf.cast( 67 | tf.logical_not( 68 | tf.reduce_any( 69 | tf.math.is_nan( 70 | input_dict['agents/keypoints'][:, :, :7] 71 | ), axis=-1, keepdims=True) 72 | ), tf.float32), axis=-2, keepdims=True) 73 | 74 | output_dict['should_predict'] = tf.math.logical_and( 75 | output_dict['should_predict'], num_key > 5) 76 | 77 | return output_dict 78 | 79 | 80 | def evaluation(checkpoint_path): 81 | """Evaluates Model on Pedestrian dataset.""" 82 | d_params = dataset_params.JRDBDatasetParams(num_agents=None) 83 | 84 | dataset = input_fn.load_dataset( 85 | d_params, 86 | d_params.eval_scenes, 87 | augment=False, 88 | shuffle=False, 89 | allow_parallel=False, 90 | evaluation=False, 91 | repeat=False, 92 | keep_subsamples=False, 93 | ) 94 | 95 | model_p = model_params.ModelParams() 96 | 97 | model = hst_model.HumanTrajectorySceneTransformer(model_p) 98 | 99 | _, _ = model(next(iter(dataset.batch(1))), training=False) 100 | 101 | checkpoint_mngr = tf.train.Checkpoint(model=model) 102 | checkpoint_mngr.restore(checkpoint_path).assert_existing_objects_matched() 103 | logging.info('Restored checkpoint: %s', checkpoint_path) 104 | 105 | ade_metric = metrics.ade.MinADE(model_p) 106 | ade_metric_1s = metrics.ade.MinADE( 107 | model_p, cutoff_seconds=1.0, at_cutoff=True 108 | ) 109 | ade_metric_2s = metrics.ade.MinADE( 110 | model_p, cutoff_seconds=2.0, at_cutoff=True 111 | ) 112 | ade_metric_3s = metrics.ade.MinADE( 113 | model_p, cutoff_seconds=3.0, at_cutoff=True 114 | ) 115 | ade_metric_4s = metrics.ade.MinADE( 116 | model_p, cutoff_seconds=4.0, at_cutoff=True 117 | ) 118 | 119 | mlade_metric = metrics.ade.MLADE(model_p) 120 | mlade_metric_1s = metrics.ade.MLADE( 121 | model_p, cutoff_seconds=1.0, at_cutoff=True 122 | ) 123 | mlade_metric_2s = metrics.ade.MLADE( 124 | model_p, cutoff_seconds=2.0, at_cutoff=True 125 | ) 126 | mlade_metric_3s = metrics.ade.MLADE( 127 | model_p, cutoff_seconds=3.0, at_cutoff=True 128 | ) 129 | mlade_metric_4s = metrics.ade.MLADE( 130 | model_p, cutoff_seconds=4.0, at_cutoff=True 131 | ) 132 | 133 | nll_metric = metrics.pos_nll.PositionNegativeLogLikelihood(model_p) 134 | nll_metric_1s = metrics.pos_nll.PositionNegativeLogLikelihood( 135 | model_p, cutoff_seconds=1.0, at_cutoff=True 136 | ) 137 | nll_metric_2s = metrics.pos_nll.PositionNegativeLogLikelihood( 138 | model_p, cutoff_seconds=2.0, at_cutoff=True 139 | ) 140 | nll_metric_3s = metrics.pos_nll.PositionNegativeLogLikelihood( 141 | model_p, cutoff_seconds=3.0, at_cutoff=True 142 | ) 143 | nll_metric_4s = metrics.pos_nll.PositionNegativeLogLikelihood( 144 | model_p, cutoff_seconds=4.0, at_cutoff=True 145 | ) 146 | 147 | for input_batch in tqdm.tqdm(dataset.batch(1)): 148 | full_pred, output_batch = model(input_batch, training=False) 149 | output_batch = filter_agents_without_keypoints(input_batch, output_batch) 150 | output_batch = filter_non_moving_agents(input_batch, output_batch) 151 | ade_metric.update_state(output_batch, full_pred) 152 | ade_metric_1s.update_state(output_batch, full_pred) 153 | ade_metric_2s.update_state(output_batch, full_pred) 154 | ade_metric_3s.update_state(output_batch, full_pred) 155 | ade_metric_4s.update_state(output_batch, full_pred) 156 | 157 | mlade_metric.update_state(output_batch, full_pred) 158 | mlade_metric_1s.update_state(output_batch, full_pred) 159 | mlade_metric_2s.update_state(output_batch, full_pred) 160 | mlade_metric_3s.update_state(output_batch, full_pred) 161 | mlade_metric_4s.update_state(output_batch, full_pred) 162 | 163 | nll_metric.update_state(output_batch, full_pred) 164 | nll_metric_1s.update_state(output_batch, full_pred) 165 | nll_metric_2s.update_state(output_batch, full_pred) 166 | nll_metric_3s.update_state(output_batch, full_pred) 167 | nll_metric_4s.update_state(output_batch, full_pred) 168 | 169 | print(f'MinADE: {ade_metric.result().numpy():.2f}') 170 | print(f'MinADE @ 1s: {ade_metric_1s.result().numpy():.2f}') 171 | print(f'MinADE @ 2s: {ade_metric_2s.result().numpy():.2f}') 172 | print(f'MinADE @ 3s: {ade_metric_3s.result().numpy():.2f}') 173 | print(f'MinADE @ 4s: {ade_metric_4s.result().numpy():.2f}') 174 | 175 | print(f'MLADE: {mlade_metric.result().numpy():.2f}') 176 | print(f'MLADE @ 1s: {mlade_metric_1s.result().numpy():.2f}') 177 | print(f'MLADE @ 2s: {mlade_metric_2s.result().numpy():.2f}') 178 | print(f'MLADE @ 3s: {mlade_metric_3s.result().numpy():.2f}') 179 | print(f'MLADE @ 4s: {mlade_metric_4s.result().numpy():.2f}') 180 | 181 | print(f'NLL: {nll_metric.result().numpy():.2f}') 182 | print(f'NLL @ 1s: {nll_metric_1s.result().numpy():.2f}') 183 | print(f'NLL @ 2s: {nll_metric_2s.result().numpy():.2f}') 184 | print(f'NLL @ 3s: {nll_metric_3s.result().numpy():.2f}') 185 | print(f'NLL @ 4s: {nll_metric_4s.result().numpy():.2f}') 186 | 187 | 188 | def main(argv: Sequence[str]) -> None: 189 | if len(argv) > 1: 190 | raise app.UsageError('Too many command-line arguments.') 191 | 192 | gin.parse_config_files_and_bindings( 193 | [os.path.join(_MODEL_PATH.value, 'params', 'operative_config.gin')], 194 | None, 195 | skip_unknown=True) 196 | print('Actual gin config used:') 197 | print(gin.config_str()) 198 | 199 | evaluation(_CKPT_PATH.value) 200 | 201 | if __name__ == '__main__': 202 | logging.set_verbosity(logging.ERROR) 203 | app.run(main) 204 | -------------------------------------------------------------------------------- /human_scene_transformer/model/agent_encoder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The human_scene_transformer Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Contains agent encoder layers.""" 16 | 17 | from typing import Dict, Optional 18 | 19 | from human_scene_transformer.model.agent_feature_encoder import AgentTemporalEncoder 20 | from human_scene_transformer.model.model_params import ModelParams 21 | 22 | import tensorflow as tf 23 | 24 | 25 | class FeatureConcatAgentEncoderLayer(tf.keras.layers.Layer): 26 | """Independently encodes features and attends to them. 27 | 28 | Agent features are cross-attended with a learned query or hidden_vecs instead 29 | of MLP. 30 | """ 31 | 32 | def __init__(self, 33 | params: ModelParams): 34 | super().__init__() 35 | 36 | # Cross Attention and learned query. 37 | self.ff_layer2 = tf.keras.layers.EinsumDense( 38 | '...f,fh->...h', 39 | output_shape=params.hidden_size, 40 | bias_axes='h', 41 | activation=None, 42 | ) 43 | self.ff_dropout = tf.keras.layers.Dropout(params.drop_prob) 44 | 45 | self.agent_feature_embedding_layers = [] 46 | # Position Feature 47 | self.agent_feature_embedding_layers.append( 48 | params.agents_feature_config[params.agents_position_key]( 49 | params.agents_position_key, params.hidden_size - 8, params 50 | ) 51 | ) 52 | # Feature Embedding 53 | for key, layer in params.agents_feature_config.items(): 54 | if key == params.agents_position_key: 55 | continue 56 | self.agent_feature_embedding_layers.append( 57 | layer(key, params.hidden_size - 8, params) 58 | ) 59 | 60 | # Temporal Embedding 61 | self.agent_feature_embedding_layers.append( 62 | AgentTemporalEncoder( 63 | list(params.agents_feature_config.keys())[0], 64 | params.hidden_size - 8, 65 | params, 66 | ) 67 | ) 68 | 69 | def call( 70 | self, input_batch: Dict[str, tf.Tensor], training: Optional[bool] = None 71 | ): 72 | input_batch = input_batch.copy() 73 | 74 | layer_embeddings = [] 75 | for layer in self.agent_feature_embedding_layers: 76 | layer_embedding, _ = layer(input_batch, training=training) 77 | layer_embedding = tf.reshape( 78 | layer_embedding, 79 | layer_embedding.shape[:-2] 80 | + [layer_embedding.shape[-2] * layer_embedding.shape[-1]], 81 | ) 82 | layer_embeddings.append(layer_embedding) 83 | embedding = tf.concat(layer_embeddings, axis=-1) 84 | 85 | out = self.ff_layer2(embedding) 86 | 87 | input_batch['hidden_vecs'] = out 88 | input_batch['hidden_vecs_fe'] = out 89 | return input_batch 90 | 91 | 92 | class FeatureAttnAgentEncoderLearnedLayer(tf.keras.layers.Layer): 93 | """Independently encodes features and attends to them. 94 | 95 | Agent features are cross-attended with a learned query or hidden_vecs instead 96 | of MLP. 97 | """ 98 | 99 | def __init__(self, 100 | params: ModelParams): 101 | super(FeatureAttnAgentEncoderLearnedLayer, self).__init__() 102 | 103 | # Cross Attention and learned query. 104 | self.attn_layer = tf.keras.layers.MultiHeadAttention( 105 | num_heads=params.num_heads, 106 | key_dim=params.hidden_size, # "large" to prevent a bottleneck 107 | attention_axes=3) 108 | self.attn_ln = tf.keras.layers.LayerNormalization(epsilon=params.ln_eps) 109 | self.ff_layer1 = tf.keras.layers.EinsumDense( 110 | '...h,hf->...f', 111 | output_shape=params.transformer_ff_dim, 112 | bias_axes='f', 113 | activation='relu') 114 | self.ff_layer2 = tf.keras.layers.EinsumDense( 115 | '...f,fh->...h', 116 | output_shape=params.hidden_size, 117 | bias_axes='h', 118 | activation=None) 119 | self.ff_dropout = tf.keras.layers.Dropout(params.drop_prob) 120 | self.ff_ln = tf.keras.layers.LayerNormalization(epsilon=params.ln_eps) 121 | 122 | self.agent_feature_embedding_layers = [] 123 | # Position Feature 124 | self.agent_feature_embedding_layers.append( 125 | params.agents_feature_config[params.agents_position_key]( 126 | params.agents_position_key, params.hidden_size-8, params)) 127 | # Feature Embedding 128 | for key, layer in params.agents_feature_config.items(): 129 | if key == params.agents_position_key: 130 | continue 131 | self.agent_feature_embedding_layers.append( 132 | layer(key, params.hidden_size-8, params)) 133 | 134 | # Temporal Embedding 135 | self.agent_feature_embedding_layers.append( 136 | AgentTemporalEncoder(list(params.agents_feature_config.keys())[0], 137 | params.hidden_size-8, params)) 138 | 139 | # [1, 1, 1, 1, h] 140 | self.learned_query_vec = tf.Variable( 141 | tf.random_uniform_initializer( 142 | minval=-1., maxval=1.)(shape=[1, 1, 1, 1, params.hidden_size]), 143 | trainable=True, 144 | dtype=tf.float32) 145 | 146 | def _build_learned_query(self, input_batch): 147 | """Converts self.learned_query_vec into a learned query vector.""" 148 | # This weird thing is for exporting and loading keras model... 149 | b = tf.shape(input_batch['agents/position'])[0] 150 | num_agents = tf.shape(input_batch['agents/position'])[1] 151 | num_steps = tf.shape(input_batch['agents/position'])[2] 152 | 153 | # [b, num_agents, num_steps, 1, h] 154 | return tf.tile(self.learned_query_vec, [b, num_agents, num_steps, 1, 1]) 155 | 156 | def call(self, input_batch: Dict[str, tf.Tensor], 157 | training: Optional[bool] = None): 158 | input_batch = input_batch.copy() 159 | 160 | layer_embeddings = [] 161 | layer_masks = [] 162 | for layer in self.agent_feature_embedding_layers: 163 | layer_embedding, layer_mask = layer(input_batch, training=training) 164 | layer_embeddings.append(layer_embedding) 165 | layer_masks.append(layer_mask) 166 | embedding = tf.concat(layer_embeddings, axis=3) 167 | 168 | b = tf.shape(embedding)[0] 169 | a = tf.shape(embedding)[1] 170 | t = tf.shape(embedding)[2] 171 | n = tf.shape(embedding)[3] 172 | 173 | # [1, 1, 1, N, 8] 174 | one_hot = tf.one_hot(tf.range(0, n), 8)[None, None, None] 175 | # [b, a, t, N, 8] 176 | one_hot_id = tf.tile(one_hot, (b, a, t, 1, 1)) 177 | 178 | embedding = tf.concat([embedding, one_hot_id], axis=-1) 179 | 180 | attention_mask = tf.concat(layer_masks, axis=-1) 181 | 182 | # [b, a, t, num_heads, 1, num_features] <- broadcasted 183 | # Newaxis for num_heads, num_features 184 | attention_mask = attention_mask[..., tf.newaxis, tf.newaxis, :] 185 | 186 | learned_query = self._build_learned_query(input_batch) 187 | 188 | # Attention along axis 3 189 | attn_out, attn_score = self.attn_layer( 190 | query=learned_query, 191 | key=embedding, 192 | value=embedding, 193 | attention_mask=attention_mask, 194 | return_attention_scores=True) 195 | # [b, a, t, h] 196 | attn_out = attn_out[..., 0, :] 197 | out = self.ff_layer1(attn_out) 198 | out = self.ff_layer2(out) 199 | out = self.ff_dropout(out, training=training) 200 | out = self.ff_ln(out + attn_out) 201 | 202 | input_batch['hidden_vecs'] = out 203 | input_batch[f'attn_scores/{self.name}'] = attn_score 204 | return input_batch 205 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | :trophy: Winner of the [2023 JRDB Trajectory Prediction Challenge](https://jrdb.erc.monash.edu/leaderboards/trajectory) - [Reproduce](#jrdb-trajectory-prediction-challenge-results) our Result! 2 | 3 | # Human Scene Transformer 4 | 5 | The (Human) Scene Transformer architecture (as described [here](https://arxiv.org/pdf/2309.17209.pdf) and [here)](https://arxiv.org/pdf/2106.08417.pdf) is a general and extendable trajectory prediction framework which threats trajectory prediction as a sequence to sequence problem and models it in a Transformer architecture. 6 | 7 | It is straightforward to extend with 8 | 9 | - additional input features 10 | - custom environment encoder 11 | - different loss functions 12 | - ... 13 | 14 | *This is not an officially supported Google product.* 15 | 16 | --- 17 | 18 | ![Human Scene Transformer](./human_scene_transformer/images/hero.png) 19 | 20 | Anticipating the motion of all humans in dynamic environments such as homes and offices is critical to enable safe and effective robot navigation. Such spaces remain challenging as humans do not follow strict rules of motion and there are often multiple occluded entry points such as corners and doors that create opportunities for sudden encounters. In this work, we present a Transformer based architecture to predict human future trajectories in human-centric environments from input features including human positions, head orientations, and 3D skeletal keypoints from onboard in-the-wild sensory information. The resulting model captures the inherent uncertainty for future human trajectory prediction and achieves state-of-the-art performance on common prediction benchmarks and a human tracking dataset captured from a mobile robot adapted for the prediction task. Furthermore, we identify new agents with limited historical data as a major contributor to error and demonstrate the complementary nature of 3D skeletal poses in reducing prediction error in such challenging scenarios. 21 | 22 | If you use this work please cite our paper 23 | 24 | ``` 25 | @article{salzmann2023hst, 26 | title={Robots That Can See: Leveraging Human Pose for Trajectory Prediction}, 27 | author={Salzmann, Tim and Chiang, Lewis and Ryll, Markus and Sadigh, Dorsa and Parada, Carolina and Bewley, Alex} 28 | journal={IEEE Robotics and Automation Letters}, 29 | title={Robots That Can See: Leveraging Human Pose for Trajectory Prediction}, 30 | year={2023}, volume={8}, number={11}, pages={7090-7097}, 31 | doi={10.1109/LRA.2023.3312035} 32 | } 33 | ``` 34 | 35 | --- 36 | 37 | ## Prerequisites 38 | 39 | Install requirements via `pip install -r requirements.txt`. 40 | 41 | Please note that this codebase is not compatible with the Intel MKL backend for 42 | tensorflow. The MKL backend supports tensors up to 5 dimensions which is 43 | not sufficient for parts of this codebase. Should you have a MKL backed 44 | tensorflow installation or are running into MKL related 45 | [errors](https://github.com/google-research/human-scene-transformer/issues/11), 46 | please disable the tensorflow MKL backend by setting the environment variable 47 | `TF_ENABLE_ONEDNN_OPTS=0` and `TF_DISABLE_MKL=1`. 48 | 49 | ## Data 50 | 51 | ### JRDB 52 | 53 | We provide a extensive prep-processing pipeline to convert the JRDB dataset, 54 | JRDB was created as a detection and tracking dataset rather than a prediction 55 | dataset. To make the data suitable for a prediction task, we first extract the 56 | robot motion from the raw sensor data to account for the robot's motion. 57 | Further, on the JRDB training split we combine algorithmic detection with the 58 | ground truth labels from the tracking dataset to create authentic tracks as 59 | input and labels for HST. 60 | Note that we do not purely use the ground truth hand labeled tracks in the JRDB 61 | train dataset as we find them to be overly smoothed giving away the future human 62 | movement. 63 | To adapt the JRDB dataset for prediction please follow [this](/human_scene_transformer/data) README. 64 | 65 | Make sure to adapt `` in `config//dataset_params.gin` accordingly. 66 | 67 | If you want to use the JRDB dataset for trajectory prediction in PyTorch we 68 | provide a [PyTorch Dataset wrapper](/human_scene_transformer/jrdb/torch_dataset.py) for the processed dataset. 69 | 70 | ### Pedestrians ETH/UCY 71 | Please download the raw data [here](https://github.com/StanfordASL/Trajectron-plus-plus/tree/master/experiments/pedestrians/raw). 72 | 73 | ## Training 74 | 75 | ### JRDB 76 | ``` 77 | python train.py --model_base_dir=./model/jrdb --gin_files=./config/jrdb/training_params.gin --gin_files=./config/jrdb/model_params.gin --gin_files=./config/jrdb/dataset_params.gin --gin_files=./config/jrdb/metrics.gin --dataset=JRDB 78 | ``` 79 | 80 | ### Pedestrians ETH/UCY 81 | ``` 82 | python train.py --model_base_dir=./models/pedestrians_eth --gin_files=..config/pedestrians/training_params.gin --gin_files=..config/pedestrians/model_params.gin --gin_files=./config/pedestrians/dataset_params.gin --gin_files=./config/pedestrians/metrics.gin --dataset=PEDESTRIANS 83 | ``` 84 | 85 | --- 86 | 87 | ## JRDB Trajectory Prediction Challenge Results 88 | To reproduce our winning results in the [2023 JRDB Trajectory Prediction Challenge](https://jrdb.erc.monash.edu/leaderboards/trajectory): 89 | 90 | - Make sure that you follow the [data pre-processing instructions](/human_scene_transformer/data) and pay special attention to where the instructions differentiate between the JRDB Challenge dataset and the original paper dataset. 91 | 92 | - Download the trained challenge model [here](https://storage.googleapis.com/gresearch/human_scene_transformer/jrdb_challenge_checkpoint.zip) 93 | 94 | - Run 95 | 96 | ``` 97 | python jrdb/eval_challenge.py --model_path= --checkpoint_path=/ckpts/ckpt-20 --dataset_path= --output_path= 98 | ``` 99 | 100 | --- 101 | 102 | ## Evaluation 103 | 104 | ### JRDB 105 | ``` 106 | python jrdb/eval.py --model_path=./models/jrdb/ --checkpoint_path=./models/jrdb/ckpts/ckpt-30 107 | ``` 108 | 109 | #### Keypoints Impact Evaluation 110 | ``` 111 | python jrdb/eval_keypoints.py --model_path=./models/jrdb/ --checkpoint_path=./models/jrdb/ckpts/ckpt-30 112 | ``` 113 | 114 | vs 115 | 116 | ``` 117 | python jrdb/eval_keypoints.py --model_path=./models/jrdb_no_keypoints/ --checkpoint_path=./models/jrdb_no_keypoints/ckpts/ckpt-30 118 | ``` 119 | 120 | ### Pedestrians ETH/UCY 121 | ``` 122 | python pedestrians/eval.py --model_path=./models/pedestrians_eth/ --checkpoint_path=./models/pedestrians_eth/ckpts/ckpt-20 123 | ``` 124 | 125 | --- 126 | 127 | ## Results 128 | 129 | Compared to the published paper we improved our data processing and fixed small 130 | bugs in this code release. If you compare against our method please use the 131 | following updated results. 132 | 133 | On the JRDB dataset with dataset options as set [here](/human_scene_transformer/config/jrdb/dataset_params.py): 134 | 135 | | | AVG | @ 1s | @ 2s | @ 3s | @ 4s | 136 | |--------|------|-------|------|-------|-------| 137 | | MinADE | 0.26 | 0.12 | 0.20 | 0.28 | 0.37 | 138 | | MinFDE | 0.45 | 0.21 | 0.39 | 0.56 | 0.71 | 139 | | NLL |-0.59 | -0.90 | -0.65| -0.08 | 0.32 | 140 | 141 | On the ETH/UCY Pedestrians Dataset: 142 | 143 | | | ETH | Hotel | Univ | Zara1 | Zara2 | Avg | 144 | |--------|------|-------|------|-------|-------|-------| 145 | | MinADE | 0.41 | 0.10 | 0.24 | 0.17 | 0.14 | 0.21 | 146 | | MinFDE | 0.73 | 0.14 | 0.44 | 0.30 | 0.24 | 0.37 | 147 | 148 | ### JRDB Train / Test Split 149 | The train / test split is implemented [here](/human_scene_transformer/config/jrdb/dataset_params.py). 150 | 151 | ### Checkpoints 152 | You can download trained model checkpoints for both `JRDB` and `Pedestrians (ETH/UCY)` datasets [here](https://storage.googleapis.com/gresearch/human_scene_transformer/checkpoints.zip). 153 | 154 | To evaluate the pre-trained checkpoints you will have to adjust the path to the dataset in the respective `params/operative_config.gin` file. 155 | 156 | ## Runtime 157 | Evaluation of forward inference runtime with single output mode: 158 | 159 | | #Humans | M1 - CPU | A100 - GPU | 160 | |---------|----------|------------| 161 | | 1 | 40Hz | 12Hz | 162 | | 10 | 30Hz | 11Hz | 163 | | 20 | 23Hz | 11Hz | 164 | | 50 | 12Hz | 11Hz | 165 | | 100 | 5Hz | 11Hz | 166 | | 150 | | 11Hz | -------------------------------------------------------------------------------- /human_scene_transformer/model/model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The human_scene_transformer Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Contains Human Scene Transformer keras model.""" 16 | 17 | from typing import Any, Dict, Optional, Tuple 18 | 19 | from human_scene_transformer.model import agent_encoder 20 | from human_scene_transformer.model import agent_self_alignment 21 | from human_scene_transformer.model import attention 22 | from human_scene_transformer.model import multimodality_induction 23 | from human_scene_transformer.model import preprocess 24 | from human_scene_transformer.model.model_params import ModelParams 25 | 26 | import tensorflow as tf 27 | 28 | 29 | class HumanTrajectorySceneTransformer(tf.keras.Model): 30 | """A variant of Scene Transformer adapted for human trajectory prediction. 31 | 32 | This class is a Keras model that is designed to predict the trajectory of 33 | bounding boxes of nearby humans (pedestrians). It is designed to be used on 34 | the proxy robots. 35 | The input of the model is an input_batch dict that maps input features to 36 | tensors. The exact features depends on the dataset (currently the EDR and 37 | Jackrobot dataset), but the input and output layers of the model will convert 38 | them to a dict with common feature names before passing to the transformer 39 | network. 40 | 41 | The model is a transformer that encodes each agent-timestep into a fixed size 42 | vector before doing self-attention and cross-attention w/ scene features such 43 | as the occupancy grid. 44 | The model outputs each agents future trajectories in the form of Gaussian 45 | distributions over the agents positions and von-Mises distributions over their 46 | corresponding orientations at each timestep. Furthermore, these distributions 47 | are multimodal similar to a Mixture Distributions but are shared for each 48 | agent with scene level mixture probabilities. 49 | 50 | """ 51 | 52 | def __init__(self, 53 | params: ModelParams, 54 | input_layer: Optional[tf.keras.layers.Layer] = None, 55 | output_layer: Optional[tf.keras.layers.Layer] = None): 56 | """Constructs the model. 57 | 58 | Args: 59 | params: A model parameters object containing the model configuration. 60 | input_layer: A Keras Layer object that maps various features names in the 61 | dataset to a set of common names. E.g., tracks_tensors/xyz -> 62 | agent/positions 63 | output_layer: A Keras Layer object that maps various features predicted by 64 | the model to a set of names used in the dataset E.g., xyz -> 65 | agent/positions 66 | """ 67 | 68 | super().__init__() 69 | self.input_layer = input_layer 70 | self.output_layer = output_layer 71 | 72 | # Preprocess layer. 73 | self.preprocess_layer = preprocess.PreprocessLayer(params) 74 | 75 | self.agent_encoding_layer = ( 76 | agent_encoder.FeatureAttnAgentEncoderLearnedLayer(params) 77 | ) 78 | 79 | if params.scene_encoder is not None: 80 | self.scene_encoder = params.scene_encoder(params) 81 | else: 82 | self.scene_encoder = None 83 | 84 | # Transformer layers. 85 | self.transformer_layers = [] 86 | # Agent self alignment layer 87 | layer = agent_self_alignment.AgentSelfAlignmentLayer( 88 | hidden_size=params.hidden_size, 89 | ff_dim=params.transformer_ff_dim, 90 | num_heads=params.num_heads, 91 | mask_style='has_data') 92 | self.transformer_layers.append(layer) 93 | 94 | multimodality_induced = False 95 | for arch in params.attn_architecture: 96 | if arch == 'self-attention': 97 | layer = attention.SelfAttnTransformerLayer( 98 | hidden_size=params.hidden_size, 99 | ff_dim=params.transformer_ff_dim, 100 | num_heads=params.num_heads, 101 | drop_prob=params.drop_prob, 102 | mask_style=params.mask_style, 103 | flatten=True, 104 | multimodality_induced=multimodality_induced) 105 | elif arch == 'self-attention-mode': 106 | layer = attention.SelfAttnModeTransformerLayer( 107 | hidden_size=params.hidden_size, 108 | ff_dim=params.transformer_ff_dim, 109 | num_heads=params.num_heads, 110 | drop_prob=params.drop_prob) 111 | elif arch == 'cross-attention': 112 | if not multimodality_induced: 113 | layer = attention.SceneNonMultimodalCrossAttnTransformerLayer( 114 | hidden_size=params.hidden_size, 115 | ff_dim=params.transformer_ff_dim, 116 | num_heads=params.num_heads, 117 | drop_prob=params.drop_prob) 118 | else: 119 | layer = attention.SceneMultimodalCrossAttnTransformerLayer( 120 | hidden_size=params.hidden_size, 121 | ff_dim=params.transformer_ff_dim, 122 | num_heads=params.num_heads, 123 | drop_prob=params.drop_prob) 124 | elif arch == 'multimodality_induction': 125 | multimodality_induced = True 126 | layer = multimodality_induction.MultimodalityInduction( 127 | num_modes=params.num_modes, 128 | hidden_size=params.hidden_size, 129 | ff_dim=params.transformer_ff_dim, 130 | num_heads=params.num_heads, 131 | drop_prob=params.drop_prob) 132 | else: 133 | raise ValueError(f'Unknown attn architecture: {arch}. ' + 134 | 'Must be either self-attention or cross-attention.') 135 | self.transformer_layers.append(layer) 136 | # Prediction head. 137 | self.prediction_layer = params.prediction_head( 138 | hidden_units=params.prediction_head_hidden_units) 139 | 140 | def call( 141 | self, 142 | input_batch: Dict[str, tf.Tensor], 143 | is_hidden: Optional[Any] = None, 144 | training: bool = False) -> Tuple[Dict[str, tf.Tensor], Dict[str, Any]]: 145 | """Override the standard call() function to provide more flexibility. 146 | 147 | Args: 148 | input_batch: A dictionary that maps a str to a tensor. The tensor's first 149 | dimensions is always the batch dimension. These tensors include all 150 | timesteps (history, current and future) and all agents (observed and 151 | padded). 152 | is_hidden: An optional bool np array or tf.tensor of the shape: [batch, 153 | max_num_agents, num_time_steps, 1]. This tensor instructs the model on 154 | which agent-timestep needs to be predicted (if set to True). Note that 155 | you do not have to worry about padded agent or timesteps being 156 | predicted. The preprocessing layer will handle that for you. If you do 157 | not put in anything, a default behavior predction tensor will be used, 158 | which looks like: False for all agents in the history or current 159 | timesteps and True for all future timesteps. 160 | training: An optional bool that instructs the model if the call is used 161 | during training. 162 | 163 | Returns: 164 | output: A dict containing the model prediction. Note that the predicted 165 | tensors has the same shape as the input_batch so the history and 166 | current steps are included. 167 | input_batch: The input batch modified to include features generated by 168 | the preprocess layer, e.g., has_data. 169 | """ 170 | 171 | if self.input_layer is not None: 172 | input_batch = self.input_layer(input_batch, training=training) 173 | 174 | # Preprocess input_batch. 175 | input_batch = self.preprocess_layer(input_batch, is_hidden=is_hidden) 176 | 177 | # Feed the input_batch through the network. 178 | input_batch = self.agent_encoding_layer(input_batch, training=training) 179 | if self.scene_encoder is not None: 180 | input_batch = self.scene_encoder(input_batch, training=training) 181 | for layer in self.transformer_layers: 182 | input_batch = layer(input_batch, training=training) 183 | predictions = self.prediction_layer(input_batch, training=training) 184 | if self.output_layer is not None: 185 | output = self.output_layer(predictions, training=training) 186 | else: 187 | output = predictions 188 | return output, input_batch 189 | -------------------------------------------------------------------------------- /human_scene_transformer/data/math/data_types.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The human_scene_transformer Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """A hidden layer to define all of the type definitions.""" 16 | 17 | from typing import Any, Optional, Sequence 18 | 19 | from human_scene_transformer.data.math import math_types 20 | from human_scene_transformer.data.math import pose3 21 | from human_scene_transformer.data.math import quaternion 22 | from human_scene_transformer.data.math import rotation3 23 | from human_scene_transformer.data.math import vector_util 24 | 25 | import numpy as np 26 | 27 | 28 | VECTOR_TYPE = math_types.VectorType 29 | Rotation3 = rotation3.Rotation3 30 | Pose3 = pose3.Pose3 31 | Quaternion = quaternion.Quaternion 32 | 33 | 34 | class Twist: 35 | """A class which represents a twist with linear and angular components.""" 36 | 37 | def __init__(self, 38 | linear: Optional[VECTOR_TYPE] = None, 39 | angular: Optional[VECTOR_TYPE] = None): 40 | """Constructs a new twist. 41 | 42 | Initializes a new twist object. 43 | 44 | If no parameters are specified the default will be a zero twist. 45 | 46 | Args: 47 | linear: Linear component of twist, zero by default. 48 | angular: Angular component of twist, zero by default. 49 | """ 50 | if linear is None: 51 | self.linear = np.zeros(3) 52 | else: 53 | self.linear = vector_util.as_vector3( 54 | linear, dtype=np.float64, err_msg='Linear vector in Twist').copy() 55 | if angular is None: 56 | self.angular = np.zeros(3) 57 | else: 58 | self.angular = vector_util.as_vector3( 59 | angular, dtype=np.float64, err_msg='Angular vector in Twist').copy() 60 | 61 | @property 62 | def vec(self) -> np.ndarray: 63 | """Returns six-value representation: [linear, angular].""" 64 | return np.hstack((self.linear, self.angular)) 65 | 66 | def __repr__(self) -> str: 67 | """Generates a string that describes the twist. 68 | 69 | Returns: 70 | A string describing the twist. 71 | """ 72 | return 'Twist({}, {})'.format(self.linear, self.angular) 73 | 74 | 75 | class Wrench: 76 | """A class which represents a wrench with force and torque components.""" 77 | 78 | def __init__(self, 79 | force: Optional[VECTOR_TYPE] = None, 80 | torque: Optional[VECTOR_TYPE] = None): 81 | """Constructs a new wrench. 82 | 83 | Initializes a new wrench object. 84 | 85 | If no parameters are specified the default will be a zero wrench. 86 | 87 | Args: 88 | force: Force, zero by default. 89 | torque: Torque, zero by default. 90 | """ 91 | if force is None: 92 | self.force = np.zeros(3) 93 | else: 94 | self.force = vector_util.as_vector3( 95 | force, dtype=np.float64, err_msg='Force vector in Wrench').copy() 96 | if torque is None: 97 | self.torque = np.zeros(3) 98 | else: 99 | self.torque = vector_util.as_vector3( 100 | torque, dtype=np.float64, err_msg='Torque vector in Wrench').copy() 101 | 102 | @property 103 | def vec(self) -> np.ndarray: 104 | """Returns six-value representation: [force, torque].""" 105 | return np.hstack((self.force, self.torque)) 106 | 107 | def __repr__(self) -> str: 108 | """Generates a string that describes the wrench. 109 | 110 | Returns: 111 | A string describing the wrench. 112 | """ 113 | return 'Wrench({}, {})'.format(self.force, self.torque) 114 | 115 | 116 | class Stiffness: 117 | """A class which represents stiffness.""" 118 | 119 | def __init__(self, 120 | linear: Optional[VECTOR_TYPE] = None, 121 | torsional: Optional[VECTOR_TYPE] = None, 122 | matrix6x6: Optional[np.ndarray] = None): 123 | """Constructs a new stiffness. 124 | 125 | Initializes a new stiffness object. 126 | 127 | If no parameters are specified the default will be a zero stiffness. 128 | 129 | Args: 130 | linear: Linear component of stiffness, zero by default. 131 | torsional: Torsional component of stiffness, zero by default. 132 | matrix6x6: A full 6x6 stiffness matrix. When 'matrix6x6' is provided, 133 | 'linear' and 'torsional' are ignored and the linear and torsional 134 | components of Stiffness will be set as the diagonal elements of 135 | 'matrix6x6'. 136 | 137 | Raises: 138 | ValueError if the shape of 'matrix6x6' is not 6x6. 139 | """ 140 | if matrix6x6 is not None: 141 | if not np.array_equal(matrix6x6.shape, [6, 6]): 142 | raise ValueError('Invalid matrix6x6 shape: {0}'.format(matrix6x6.shape)) 143 | self.matrix6x6 = np.copy(matrix6x6).astype(np.float64) 144 | diag = self.matrix6x6.diagonal() 145 | self.linear = diag[:3] 146 | self.torsional = diag[3:] 147 | else: 148 | # Sets linear stiffness. 149 | if linear is None: 150 | self.linear = np.zeros(3) 151 | else: 152 | self.linear = vector_util.as_vector3( 153 | linear, dtype=np.float64, 154 | err_msg='Linear vector in Stiffness').copy() 155 | # Sets torsional stiffness. 156 | if torsional is None: 157 | self.torsional = np.zeros(3) 158 | else: 159 | self.torsional = vector_util.as_vector3( 160 | torsional, dtype=np.float64, 161 | err_msg='Angular vector in Stiffness').copy() 162 | # Sets the 6x6 stiffness matrix. 163 | self.matrix6x6 = np.diag(self.vec) 164 | 165 | @classmethod 166 | def from_scalar(cls, scalar: float) -> 'Stiffness': 167 | return cls(linear=[scalar] * 3, torsional=[scalar] * 3) 168 | 169 | @property 170 | def vec(self) -> np.ndarray: 171 | """Returns the diagonal elements: [linear, torsional].""" 172 | return np.hstack((self.linear, self.torsional)) 173 | 174 | def __repr__(self) -> str: 175 | """Generates a string that describes the stiffness. 176 | 177 | Returns: 178 | A string describing the stiffness. 179 | """ 180 | return 'Stiffness({})'.format(self.matrix6x6) 181 | 182 | 183 | # Convenience functions. 184 | def vec6_to_pose3(vec6: VECTOR_TYPE) -> Pose3: 185 | """Creates a Pose3 object from a given 6-D vector. 186 | 187 | Args: 188 | vec6: A 6-D vector, [x, y, z, rx, ry, rz], that denotes translation [x, y, 189 | z] and orientation [rx, ry, rz] as Euler angles roll, pitch, yaw, in 190 | radians. 191 | 192 | Returns: 193 | A Pose3 object. 194 | """ 195 | assert len(vec6) == 6 196 | return Pose3( 197 | translation=vec6[:3], 198 | rotation=Rotation3.from_euler_angles(rpy_radians=vec6[3:])) 199 | 200 | 201 | def pose3_to_vec6(pose: Pose3) -> np.ndarray: 202 | """Returns a 6-D vector representation of the pose. 203 | 204 | Args: 205 | pose: A Pose3 object. 206 | 207 | Returns: 208 | A 6-D vector, [x, y, z, rx, ry, rz], that denotes translation 209 | [x, y, z] and orientation [rx, ry, rz]. 210 | """ 211 | return np.hstack((pose.translation, pose.rotation.euler_angles())) 212 | 213 | 214 | def pose3_to_vec6_in_axis_angles(pose: Pose3) -> np.ndarray: 215 | """Returns a 6-D vector representation of the pose. 216 | 217 | Args: 218 | pose: A Pose3 object. 219 | 220 | Returns: 221 | A 6-D vector, [x, y, z, rx, ry, rz], that denotes translation 222 | [x, y, z] and orientation [rx, ry, rz] as axis angles. 223 | """ 224 | return np.hstack( 225 | (pose.translation, pose.rotation.axis() * pose.rotation.angle())) 226 | 227 | 228 | def vec6_to_twist(vec6: VECTOR_TYPE) -> Twist: 229 | """Creates a Twist object from a given 6-D vector. 230 | 231 | Args: 232 | vec6: A 6-D vector, [vx, vy, vz, wx, wy, wz], that denotes linear velocity 233 | along X, Y, Z and angular velocity around X, Y, Z. 234 | 235 | Returns: 236 | A Twist object. 237 | """ 238 | assert len(vec6) == 6 239 | return Twist(linear=vec6[:3], angular=vec6[3:]) 240 | 241 | 242 | def vec6_to_wrench(vec6: VECTOR_TYPE) -> Wrench: 243 | """Creates a Wrench object from a given 6-D vector. 244 | 245 | Args: 246 | vec6: A 6-D vector, [fx, fy, fz, tx, ty, tz], that denotes force and torque. 247 | 248 | Returns: 249 | A Wrench object. 250 | """ 251 | assert len(vec6) == 6 252 | return Wrench(force=vec6[:3], torque=vec6[3:]) 253 | 254 | 255 | def vec6_to_stiffness(vec6: VECTOR_TYPE) -> Stiffness: 256 | """Creates a Stiffness object from a given 6-D vector. 257 | 258 | Args: 259 | vec6: A 6-D vector, [x, y, z, rx, ry, rz], that denotes translational and 260 | rotational stiffness for X, Y, Z. 261 | 262 | Returns: 263 | A Stiffness object. 264 | """ 265 | assert len(vec6) == 6 266 | return Stiffness(linear=vec6[:3], torsional=vec6[3:]) 267 | 268 | 269 | def to_list(data: Any) -> Sequence[float]: 270 | """Flattens a common data type to a list of float. 271 | 272 | Args: 273 | data: The input data to flatten. 274 | 275 | Returns: 276 | The flattend list, i.e., the vectorized parametrization of the input data. 277 | """ 278 | if isinstance(data, float): 279 | return [data] 280 | elif isinstance(data, np.ndarray): 281 | return data.tolist() 282 | elif isinstance(data, Pose3): 283 | return data.vec7.tolist() 284 | elif isinstance(data, Twist): 285 | return data.vec.tolist() 286 | elif isinstance(data, Wrench): 287 | return data.vec.tolist() 288 | else: 289 | raise ValueError('{} is not supported in to_list()'.format(type(data))) 290 | -------------------------------------------------------------------------------- /human_scene_transformer/data/jrdb_preprocess_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The human_scene_transformer Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Preprocesses the raw test split of JRDB. 16 | """ 17 | 18 | import os 19 | 20 | from absl import app 21 | from absl import flags 22 | 23 | from human_scene_transformer.data import utils 24 | import numpy as np 25 | import pandas as pd 26 | import tensorflow as tf 27 | import tqdm 28 | 29 | 30 | _INPUT_PATH = flags.DEFINE_string( 31 | 'input_path', 32 | default=None, 33 | help='Path to jrdb2022 dataset.' 34 | ) 35 | 36 | _OUTPUT_PATH = flags.DEFINE_string( 37 | 'output_path', 38 | default=None, 39 | help='Path to output folder.' 40 | ) 41 | 42 | _PROCESS_POINTCLOUDS = flags.DEFINE_bool( 43 | 'process_pointclouds', 44 | default=True, 45 | help='Whether to process pointclouds.' 46 | ) 47 | 48 | _MAX_DISTANCE_TO_ROBOT = flags.DEFINE_float( 49 | 'max_distance_to_robot', 50 | default=15., 51 | help=('Maximum distance of agent to the robot to be included' 52 | ' in the processed dataset.') 53 | ) 54 | 55 | _MAX_PC_DISTANCE_TO_ROBOT = flags.DEFINE_float( 56 | 'max_pc_distance_to_robot', 57 | default=10., 58 | help=('Maximum distance of pointcloud point to the robot to be included' 59 | ' in the processed dataset.') 60 | ) 61 | 62 | _TRACKING_METHOD = flags.DEFINE_string( 63 | 'tracking_method', 64 | default='ss3d_mot', 65 | help='Name of tracking method to use.' 66 | ) 67 | 68 | _TRACKING_CONFIDENCE_THRESHOLD = flags.DEFINE_float( 69 | 'tracking_confidence_threshold', 70 | default=.0, 71 | help=('Confidence threshold for tracked agent instance to be included' 72 | ' in the processed dataset.') 73 | ) 74 | 75 | AGENT_KEYPOINTS = True 76 | FROM_DETECTIONS = True 77 | 78 | 79 | def list_test_scenes(input_path): 80 | scenes = os.listdir(os.path.join(input_path, 'images', 'image_0')) 81 | scenes.sort() 82 | return scenes 83 | 84 | 85 | def get_agents_features_df_with_box( 86 | input_path, scene_id, max_distance_to_robot=10.0 87 | ): 88 | """Returns agents features with bounding box from raw leaderboard data.""" 89 | jrdb_header = [ 90 | 'frame', 91 | 'track id', 92 | 'type', 93 | 'truncated', 94 | 'occluded', 95 | 'alpha', 96 | 'bb_left', 97 | 'bb_top', 98 | 'bb_width', 99 | 'bb_height', 100 | 'x', 101 | 'y', 102 | 'z', 103 | 'height', 104 | 'width', 105 | 'length', 106 | 'rotation_y', 107 | 'score', 108 | ] 109 | scene_data_file = utils.get_file_handle( 110 | os.path.join( 111 | input_path, 'labels', _TRACKING_METHOD.value, 112 | f'{scene_id:04}' + '.txt' 113 | ) 114 | ) 115 | df = pd.read_csv(scene_data_file, sep=' ', names=jrdb_header) 116 | 117 | def camera_to_lower_velodyne(p): 118 | return np.stack( 119 | [p[..., 2], -p[..., 0], -p[..., 1] + (0.742092 - 0.606982)], axis=-1 120 | ) 121 | 122 | df = df[df['score'] >= _TRACKING_CONFIDENCE_THRESHOLD.value] 123 | 124 | df['p'] = df[['x', 'y', 'z']].apply( 125 | lambda s: camera_to_lower_velodyne(s.to_numpy()), axis=1 126 | ) 127 | df['distance'] = df['p'].apply(lambda s: np.linalg.norm(s, axis=-1)) 128 | df['l'] = df['height'] 129 | df['h'] = df['width'] 130 | df['w'] = df['length'] 131 | df['yaw'] = df['rotation_y'] 132 | 133 | df['id'] = df['track id'].apply(lambda s: f'pedestrian:{s}') 134 | df['timestep'] = df['frame'] 135 | 136 | df = df.set_index(['timestep', 'id']) 137 | 138 | df = df[df['distance'] <= max_distance_to_robot] 139 | 140 | return df[['p', 'yaw', 'l', 'h', 'w']] 141 | 142 | 143 | def jrdb_preprocess_test(input_path, output_path): 144 | """Preprocesses the raw test split of JRDB.""" 145 | 146 | tf.keras.utils.set_random_seed(123) 147 | 148 | scenes = list_test_scenes(os.path.join(input_path, 'test_dataset')) 149 | subsample = 1 150 | for scene in tqdm.tqdm(scenes): 151 | scene_save_name = scene + '_test' 152 | agents_df = get_agents_features_df_with_box( 153 | os.path.join(input_path, 'test_dataset'), 154 | scenes.index(scene), 155 | max_distance_to_robot=_MAX_DISTANCE_TO_ROBOT.value, 156 | ) 157 | 158 | robot_odom = utils.get_robot( 159 | os.path.join(input_path, 'processed', 'odometry', 'test'), scene 160 | ) 161 | 162 | if AGENT_KEYPOINTS: 163 | keypoints = utils.get_agents_keypoints( 164 | os.path.join( 165 | input_path, 'processed', 'labels', 166 | 'labels_3d_keypoints', 'test', _TRACKING_METHOD.value 167 | ), 168 | scene, 169 | ) 170 | keypoints_df = pd.DataFrame.from_dict( 171 | keypoints, orient='index' 172 | ).rename_axis(['timestep', 'id']) # pytype: disable=missing-parameter # pandas-drop-duplicates-overloads 173 | 174 | agents_df = agents_df.join(keypoints_df) 175 | agents_df.keypoints.fillna( 176 | dict( 177 | zip( 178 | agents_df.index[agents_df['keypoints'].isnull()], 179 | [np.ones((33, 3)) * np.nan] 180 | * len( 181 | agents_df.loc[ 182 | agents_df['keypoints'].isnull(), 'keypoints' 183 | ] 184 | ), 185 | ) 186 | ), 187 | inplace=True, 188 | ) 189 | 190 | robot_df = pd.DataFrame.from_dict(robot_odom, orient='index').rename_axis( # pytype: disable=missing-parameter # pandas-drop-duplicates-overloads 191 | ['timestep'] 192 | ) 193 | # Remove extra data odometry datapoints 194 | robot_df = robot_df.iloc[agents_df.index.levels[0]] 195 | 196 | assert (agents_df.index.levels[0] == robot_df.index).all() 197 | 198 | # Subsample 199 | assert len(agents_df.index.levels[0]) == agents_df.index.levels[0].max() + 1 200 | agents_df_subsampled_index = agents_df.unstack('id').iloc[::subsample].index 201 | agents_df = ( 202 | agents_df.unstack('id') 203 | .iloc[::subsample] 204 | .reset_index(drop=True) 205 | .stack('id', dropna=True) 206 | ) 207 | 208 | agents_in_odometry_df = utils.agents_to_odometry_frame( 209 | agents_df, robot_df.iloc[::subsample].reset_index(drop=True) 210 | ) 211 | 212 | agents_pos_ragged_tensor = utils.agents_pos_to_ragged_tensor( 213 | agents_in_odometry_df 214 | ) 215 | agents_yaw_ragged_tensor = utils.agents_yaw_to_ragged_tensor( 216 | agents_in_odometry_df 217 | ) 218 | assert ( 219 | agents_pos_ragged_tensor.shape[0] == agents_yaw_ragged_tensor.shape[0] 220 | ) 221 | 222 | tf.data.Dataset.from_tensors(agents_pos_ragged_tensor).save( 223 | os.path.join(output_path, scene_save_name, 'agents', 'position') 224 | ) 225 | tf.data.Dataset.from_tensors(agents_yaw_ragged_tensor).save( 226 | os.path.join(output_path, scene_save_name, 'agents', 'orientation') 227 | ) 228 | 229 | if AGENT_KEYPOINTS: 230 | agents_keypoints_ragged_tensor = utils.agents_keypoints_to_ragged_tensor( 231 | agents_in_odometry_df 232 | ) 233 | tf.data.Dataset.from_tensors(agents_keypoints_ragged_tensor).save( 234 | os.path.join(output_path, scene_save_name, 'agents', 'keypoints') 235 | ) 236 | 237 | robot_in_odometry_df = utils.robot_to_odometry_frame(robot_df) 238 | robot_pos = tf.convert_to_tensor( 239 | np.stack(robot_in_odometry_df.iloc[::subsample]['p'].values).astype( 240 | np.float32 241 | ) 242 | ) 243 | robot_orientation = tf.convert_to_tensor( 244 | np.stack(robot_in_odometry_df.iloc[::subsample]['yaw'].values).astype( 245 | np.float32 246 | ) 247 | )[..., tf.newaxis] 248 | 249 | tf.data.Dataset.from_tensors(robot_pos).save( 250 | os.path.join(output_path, scene_save_name, 'robot', 'position') 251 | ) 252 | tf.data.Dataset.from_tensors(robot_orientation).save( 253 | os.path.join(output_path, scene_save_name, 'robot', 'orientation') 254 | ) 255 | 256 | if _PROCESS_POINTCLOUDS.value: 257 | scene_pointcloud_dict = utils.get_scene_poinclouds( 258 | os.path.join(input_path, 'test_dataset'), scene, subsample=subsample 259 | ) 260 | # Remove extra timesteps 261 | scene_pointcloud_dict = { 262 | ts: scene_pointcloud_dict[ts] for ts in agents_df_subsampled_index 263 | } 264 | 265 | scene_pc_odometry = utils.pc_to_odometry_frame( 266 | scene_pointcloud_dict, robot_df 267 | ) 268 | 269 | filtered_pc = utils.filter_agents_and_ground_from_point_cloud( 270 | agents_in_odometry_df, scene_pc_odometry, robot_in_odometry_df, 271 | max_dist=_MAX_PC_DISTANCE_TO_ROBOT.value, 272 | ) 273 | 274 | scene_pc_ragged_tensor = tf.ragged.stack(filtered_pc) 275 | 276 | assert ( 277 | agents_pos_ragged_tensor.bounding_shape()[1] 278 | == scene_pc_ragged_tensor.shape[0] 279 | ) 280 | 281 | tf.data.Dataset.from_tensors(scene_pc_ragged_tensor).save( 282 | os.path.join(output_path, scene_save_name, 'scene', 'pc'), 283 | compression='GZIP', 284 | ) 285 | 286 | 287 | def main(argv): 288 | if len(argv) > 1: 289 | raise app.UsageError('Too many command-line arguments.') 290 | jrdb_preprocess_test(_INPUT_PATH.value, _OUTPUT_PATH.value) 291 | 292 | if __name__ == '__main__': 293 | flags.mark_flags_as_required([ 294 | 'input_path', 'output_path' 295 | ]) 296 | app.run(main) 297 | -------------------------------------------------------------------------------- /human_scene_transformer/data/jrdb_preprocess_train.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The human_scene_transformer Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Preprocesses the raw train split of JRDB. 16 | """ 17 | 18 | import collections 19 | import json 20 | import os 21 | 22 | from absl import app 23 | from absl import flags 24 | 25 | from human_scene_transformer.data import utils 26 | import numpy as np 27 | import pandas as pd 28 | import tensorflow as tf 29 | import tqdm 30 | 31 | _INPUT_PATH = flags.DEFINE_string( 32 | 'input_path', 33 | default=None, 34 | help='Path to jrdb2022 dataset.' 35 | ) 36 | 37 | _OUTPUT_PATH = flags.DEFINE_string( 38 | 'output_path', 39 | default=None, 40 | help='Path to output folder.' 41 | ) 42 | 43 | _PROCESS_POINTCLOUDS = flags.DEFINE_bool( 44 | 'process_pointclouds', 45 | default=True, 46 | help='Whether to process pointclouds.' 47 | ) 48 | 49 | _MAX_DISTANCE_TO_ROBOT = flags.DEFINE_float( 50 | 'max_distance_to_robot', 51 | default=15., 52 | help=('Maximum distance of agent to the robot to be included' 53 | ' in the processed dataset.') 54 | ) 55 | 56 | _MAX_PC_DISTANCE_TO_ROBOT = flags.DEFINE_float( 57 | 'max_pc_distance_to_robot', 58 | default=10., 59 | help=('Maximum distance of pointcloud point to the robot to be included' 60 | ' in the processed dataset.') 61 | ) 62 | 63 | 64 | AGENT_KEYPOINTS = True 65 | FROM_DETECTIONS = True 66 | 67 | 68 | def get_agents_dict(input_path, scene): 69 | """Returns agents GT data from raw data.""" 70 | scene_data_file = utils.get_file_handle( 71 | os.path.join(input_path, 'labels', 'labels_3d', scene + '.json') 72 | ) 73 | scene_data = json.load(scene_data_file) 74 | 75 | agents = collections.defaultdict(list) 76 | 77 | for frame in scene_data['labels']: 78 | ts = int(frame.split('.')[0]) 79 | for det in scene_data['labels'][frame]: 80 | agents[det['label_id']].append((ts, det)) 81 | return agents 82 | 83 | 84 | def get_agents_dict_from_detections(input_path, scene): 85 | """Returns agents data from fused detections raw data.""" 86 | scene_data_file = utils.get_file_handle( 87 | os.path.join( 88 | input_path, 'labels', 'labels_detections_3d', scene + '.json' 89 | ) 90 | ) 91 | scene_data = json.load(scene_data_file) 92 | 93 | agents = collections.defaultdict(list) 94 | 95 | for frame in scene_data['labels']: 96 | ts = int(frame.split('.')[0]) 97 | for det in scene_data['labels'][frame]: 98 | agents[det['label_id']].append((ts, det)) 99 | return agents 100 | 101 | 102 | def get_agents_features(agents_dict, max_distance_to_robot=10): 103 | """Returns agents features from raw data dict.""" 104 | agents_pos_dict = collections.defaultdict(dict) 105 | for agent_id, agent_data in agents_dict.items(): 106 | for ts, agent_instance in agent_data: 107 | if agent_instance['attributes']['distance'] <= max_distance_to_robot: 108 | agents_pos_dict[(ts, agent_id)] = { 109 | 'p': np.array([ 110 | agent_instance['box']['cx'], 111 | agent_instance['box']['cy'], 112 | agent_instance['box']['cz'], 113 | ]), 114 | # rotation angle is relative to negative x axis of robot 115 | 'yaw': np.pi - agent_instance['box']['rot_z'], 116 | } 117 | return agents_pos_dict 118 | 119 | 120 | def jrdb_preprocess_train(input_path, output_path): 121 | """Preprocesses the raw train split of JRDB.""" 122 | 123 | tf.keras.utils.set_random_seed(123) 124 | 125 | subsample = 1 126 | 127 | scenes = utils.list_scenes( 128 | os.path.join(input_path, 'train_dataset') 129 | ) 130 | for scene in tqdm.tqdm(scenes): 131 | if not FROM_DETECTIONS: 132 | agents_dict = get_agents_dict( 133 | os.path.join(input_path, 'train_dataset'), scene 134 | ) 135 | else: 136 | agents_dict = get_agents_dict_from_detections( 137 | os.path.join(input_path, 'processed'), scene 138 | ) 139 | 140 | agents_features = utils.get_agents_features_with_box( 141 | agents_dict, max_distance_to_robot=_MAX_DISTANCE_TO_ROBOT.value 142 | ) 143 | 144 | robot_odom = utils.get_robot( 145 | os.path.join(input_path, 'processed', 'odometry', 'train'), scene 146 | ) 147 | 148 | agents_df = pd.DataFrame.from_dict( 149 | agents_features, orient='index' 150 | ).rename_axis(['timestep', 'id']) # pytype: disable=missing-parameter # pandas-drop-duplicates-overloads 151 | 152 | if AGENT_KEYPOINTS: 153 | keypoints = utils.get_agents_keypoints( 154 | os.path.join( 155 | input_path, 'processed', 'labels', 156 | 'labels_3d_keypoints', 'train'), 157 | scene, 158 | ) 159 | keypoints_df = pd.DataFrame.from_dict( 160 | keypoints, orient='index' 161 | ).rename_axis(['timestep', 'id']) # pytype: disable=missing-parameter # pandas-drop-duplicates-overloads 162 | 163 | agents_df = agents_df.join(keypoints_df) 164 | agents_df.keypoints.fillna( 165 | dict( 166 | zip( 167 | agents_df.index[agents_df['keypoints'].isnull()], 168 | [np.ones((33, 3)) * np.nan] 169 | * len( 170 | agents_df.loc[ 171 | agents_df['keypoints'].isnull(), 'keypoints' 172 | ] 173 | ), 174 | ) 175 | ), 176 | inplace=True, 177 | ) 178 | 179 | robot_df = pd.DataFrame.from_dict(robot_odom, orient='index').rename_axis( # pytype: disable=missing-parameter # pandas-drop-duplicates-overloads 180 | ['timestep'] 181 | ) 182 | # Remove extra data odometry datapoints 183 | robot_df = robot_df.iloc[agents_df.index.levels[0]] 184 | 185 | assert (agents_df.index.levels[0] == robot_df.index).all() 186 | 187 | # Subsample 188 | assert len(agents_df.index.levels[0]) == agents_df.index.levels[0].max() + 1 189 | agents_df_subsampled_index = agents_df.unstack('id').iloc[::subsample].index 190 | agents_df = ( 191 | agents_df.unstack('id') 192 | .iloc[::subsample] 193 | .reset_index(drop=True) 194 | .stack('id', dropna=True) 195 | ) 196 | 197 | agents_in_odometry_df = utils.agents_to_odometry_frame( 198 | agents_df, robot_df.iloc[::subsample].reset_index(drop=True) 199 | ) 200 | 201 | agents_pos_ragged_tensor = utils.agents_pos_to_ragged_tensor( 202 | agents_in_odometry_df 203 | ) 204 | agents_yaw_ragged_tensor = utils.agents_yaw_to_ragged_tensor( 205 | agents_in_odometry_df 206 | ) 207 | assert ( 208 | agents_pos_ragged_tensor.shape[0] == agents_yaw_ragged_tensor.shape[0] 209 | ) 210 | 211 | tf.data.Dataset.from_tensors(agents_pos_ragged_tensor).save( 212 | os.path.join(output_path, scene, 'agents', 'position') 213 | ) 214 | tf.data.Dataset.from_tensors(agents_yaw_ragged_tensor).save( 215 | os.path.join(output_path, scene, 'agents', 'orientation') 216 | ) 217 | 218 | if AGENT_KEYPOINTS: 219 | agents_keypoints_ragged_tensor = utils.agents_keypoints_to_ragged_tensor( 220 | agents_in_odometry_df 221 | ) 222 | tf.data.Dataset.from_tensors(agents_keypoints_ragged_tensor).save( 223 | os.path.join(output_path, scene, 'agents', 'keypoints') 224 | ) 225 | 226 | robot_in_odometry_df = utils.robot_to_odometry_frame(robot_df) 227 | robot_pos = tf.convert_to_tensor( 228 | np.stack(robot_in_odometry_df.iloc[::subsample]['p'].values).astype( 229 | np.float32 230 | ) 231 | ) 232 | robot_orientation = tf.convert_to_tensor( 233 | np.stack(robot_in_odometry_df.iloc[::subsample]['yaw'].values).astype( 234 | np.float32 235 | ) 236 | )[..., tf.newaxis] 237 | 238 | tf.data.Dataset.from_tensors(robot_pos).save( 239 | os.path.join(output_path, scene, 'robot', 'position') 240 | ) 241 | tf.data.Dataset.from_tensors(robot_orientation).save( 242 | os.path.join(output_path, scene, 'robot', 'orientation') 243 | ) 244 | 245 | if _PROCESS_POINTCLOUDS.value: 246 | scene_pointcloud_dict = utils.get_scene_poinclouds( 247 | os.path.join(input_path, 'train_dataset'), 248 | scene, 249 | subsample=subsample, 250 | ) 251 | 252 | # Remove extra timesteps 253 | scene_pointcloud_dict = { 254 | ts: scene_pointcloud_dict[ts] for ts in agents_df_subsampled_index 255 | } 256 | 257 | scene_pc_odometry = utils.pc_to_odometry_frame( 258 | scene_pointcloud_dict, robot_df 259 | ) 260 | 261 | filtered_pc = utils.filter_agents_and_ground_from_point_cloud( 262 | agents_in_odometry_df, scene_pc_odometry, robot_in_odometry_df, 263 | max_dist=_MAX_PC_DISTANCE_TO_ROBOT.value, 264 | ) 265 | 266 | scene_pc_ragged_tensor = tf.ragged.stack(filtered_pc) 267 | 268 | assert ( 269 | agents_pos_ragged_tensor.bounding_shape()[1] 270 | == scene_pc_ragged_tensor.shape[0] 271 | ) 272 | 273 | tf.data.Dataset.from_tensors(scene_pc_ragged_tensor).save( 274 | os.path.join(output_path, scene, 'scene', 'pc'), compression='GZIP' 275 | ) 276 | 277 | 278 | def main(argv): 279 | if len(argv) > 1: 280 | raise app.UsageError('Too many command-line arguments.') 281 | jrdb_preprocess_train(_INPUT_PATH.value, _OUTPUT_PATH.value) 282 | 283 | if __name__ == '__main__': 284 | flags.mark_flags_as_required([ 285 | 'input_path', 'output_path' 286 | ]) 287 | app.run(main) 288 | -------------------------------------------------------------------------------- /human_scene_transformer/data/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The human_scene_transformer Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Utility Functions.""" 16 | 17 | import collections 18 | import json 19 | import os 20 | import tempfile 21 | 22 | from human_scene_transformer.data.math import pose3 23 | from human_scene_transformer.data.math import quaternion 24 | from human_scene_transformer.data.math import rotation3 25 | 26 | import numpy as np 27 | import pandas as pd 28 | 29 | import tensorflow as tf 30 | 31 | import open3d 32 | 33 | 34 | def maybe_makedir(path): 35 | if not os.path.exists(path): 36 | os.makedirs(path) 37 | 38 | 39 | def get_file_handle(path, mode='rt'): 40 | file_handle = open(path, mode) 41 | return file_handle 42 | 43 | 44 | def list_scenes(input_path): 45 | scenes = os.listdir(os.path.join(input_path, 'labels', 'labels_3d')) 46 | scenes.sort() 47 | return [scene[:-5] for scene in scenes] 48 | 49 | 50 | def get_robot(input_path, scene): 51 | """Returns robot features from raw data.""" 52 | odom_data_file = get_file_handle( 53 | os.path.join(input_path, scene + '.json')) 54 | odom_data = json.load(odom_data_file) 55 | 56 | robot = collections.defaultdict(list) 57 | 58 | for pc_ts, pose in odom_data['odometry'].items(): 59 | ts = int(pc_ts.split('.')[0]) 60 | robot[ts] = { 61 | 'p': np.array([pose['position']['x'], 62 | pose['position']['y'], 63 | pose['position']['z']]), 64 | 'q': np.array([pose['orientation']['x'], 65 | pose['orientation']['y'], 66 | pose['orientation']['z'], 67 | pose['orientation']['w']]), 68 | } 69 | 70 | return robot 71 | 72 | 73 | def get_agents_keypoints(input_path, scene): 74 | """Returns agents keypoints from raw data.""" 75 | scene_data_file = get_file_handle( 76 | os.path.join(input_path, scene + '.json')) 77 | scene_data = json.load(scene_data_file) 78 | 79 | agents_keypoints = collections.defaultdict(dict) 80 | 81 | for frame in scene_data['labels']: 82 | ts = int(frame.split('.')[0]) 83 | for det in scene_data['labels'][frame]: 84 | agents_keypoints[(ts, det['label_id'])] = { 85 | 'keypoints': np.array(det['keypoints']).reshape(33, 3)} 86 | return agents_keypoints 87 | 88 | 89 | def get_scene_poinclouds(input_path, scene, subsample=1): 90 | """Returns scene point clouds from raw data.""" 91 | pc_files = os.listdir( 92 | os.path.join(input_path, 'pointclouds', 'lower_velodyne', scene)) 93 | 94 | pc_files = sorted(pc_files) 95 | pc_dict = collections.OrderedDict() 96 | for _, pc_file in enumerate(pc_files[::subsample]): 97 | pc_file_path = os.path.join( 98 | input_path, 'pointclouds', 'lower_velodyne', scene, pc_file) 99 | with get_file_handle(pc_file_path, 'rb') as f: 100 | with tempfile.NamedTemporaryFile() as tmp: 101 | tmp.write(f.read()) 102 | pcd = open3d.io.read_point_cloud(tmp.name, format='pcd') 103 | pc_dict[int(pc_file.split('.')[0])] = pcd 104 | return pc_dict 105 | 106 | 107 | def pc_to_odometry_frame(pc_dict, robot_df): 108 | """Transforms point clouds into odometry frame.""" 109 | world_pose_odometry = pose3.Pose3( 110 | rotation3.Rotation3( 111 | quaternion.Quaternion(robot_df.loc[0]['q'])), robot_df.loc[0]['p']) 112 | odometry_pose_world = world_pose_odometry.inverse() 113 | 114 | pc_list = [] 115 | for ts, pc in pc_dict.items(): 116 | robot_odometry_dp = robot_df.loc[ts] 117 | 118 | world_pose_robot = pose3.Pose3( 119 | rotation3.Rotation3( 120 | quaternion.Quaternion( 121 | robot_odometry_dp['q'])), robot_odometry_dp['p']) 122 | 123 | odometry_pose_robot = odometry_pose_world * world_pose_robot 124 | 125 | odometry_pc = pc.transform(odometry_pose_robot.matrix4x4()) 126 | 127 | pc_list.append(np.array(odometry_pc.points, dtype=np.float32)) 128 | 129 | return pc_list 130 | 131 | 132 | def robot_to_odometry_frame(robot_df): 133 | """Transforms robot features into odometry frame.""" 134 | world_pose_odometry = pose3.Pose3( 135 | rotation3.Rotation3( 136 | quaternion.Quaternion(robot_df.loc[0]['q'])), robot_df.loc[0]['p']) 137 | odometry_pose_world = world_pose_odometry.inverse() 138 | 139 | robot_dict = {} 140 | for ts, row in robot_df.iterrows(): 141 | world_pose_robot = pose3.Pose3( 142 | rotation3.Rotation3(quaternion.Quaternion(row['q'])), row['p']) 143 | odometry_pose_robot = odometry_pose_world * world_pose_robot 144 | 145 | robot_dict[ts] = { 146 | 'p': odometry_pose_robot.translation, 147 | 'yaw': odometry_pose_robot.rotation.euler_angles(radians=True)[-1] 148 | } 149 | return pd.DataFrame.from_dict( 150 | robot_dict, orient='index').rename_axis(['timestep']) # pytype: disable=missing-parameter # pandas-drop-duplicates-overloads 151 | 152 | 153 | def agents_to_odometry_frame(agents_df, robot_df): 154 | """Transforms agents features into odometry frame.""" 155 | world_pose_odometry = pose3.Pose3( 156 | rotation3.Rotation3( 157 | quaternion.Quaternion(robot_df.loc[0]['q'])), robot_df.loc[0]['p']) 158 | odometry_pose_world = world_pose_odometry.inverse() 159 | 160 | agents_dict = {} 161 | for index, row in agents_df.iterrows(): 162 | ts = index[0] 163 | robot_odometry_dp = robot_df.loc[ts] 164 | 165 | world_pose_robot = pose3.Pose3( 166 | rotation3.Rotation3( 167 | quaternion.Quaternion(robot_odometry_dp['q'])), 168 | robot_odometry_dp['p']) 169 | 170 | robot_pose_agent = pose3.Pose3( 171 | rotation3.Rotation3.from_euler_angles( 172 | rpy_radians=[0., 0., row['yaw']]), row['p']) 173 | 174 | odometry_pose_agent = (odometry_pose_world * world_pose_robot 175 | * robot_pose_agent) 176 | 177 | agents_dict[index] = { 178 | 'p': odometry_pose_agent.translation, 179 | 'yaw': odometry_pose_agent.rotation.euler_angles(radians=True)[-1]} 180 | 181 | if 'l' in row: 182 | agents_dict[index]['l'] = row['l'] 183 | agents_dict[index]['w'] = row['w'] 184 | agents_dict[index]['h'] = row['h'] 185 | 186 | if 'keypoints' in row: 187 | world_rot_robot = rotation3.Rotation3( 188 | quaternion.Quaternion(robot_odometry_dp['q'])) 189 | odometry_rot_robot = odometry_pose_world.rotation * world_rot_robot 190 | rot_keypoints = [] 191 | for keypoint in row['keypoints']: 192 | if np.isnan(keypoint).any(): 193 | rot_keypoints.append(keypoint) 194 | else: 195 | rot_keypoints.append(odometry_rot_robot.rotate_point(keypoint)) 196 | rot_keypoints = np.array(rot_keypoints) 197 | agents_dict[index]['keypoints'] = rot_keypoints 198 | 199 | return pd.DataFrame.from_dict( 200 | agents_dict, orient='index').rename_axis(['timestep', 'id']) # pytype: disable=missing-parameter # pandas-drop-duplicates-overloads 201 | 202 | 203 | def get_agents_features_with_box(agents_dict, max_distance_to_robot=10): 204 | """Returns agents features with bounding box from raw data dict.""" 205 | agents_pos_dict = collections.defaultdict(dict) 206 | for agent_id, agent_data in agents_dict.items(): 207 | for (ts, agent_instance) in agent_data: 208 | if agent_instance['attributes']['distance'] <= max_distance_to_robot: 209 | agents_pos_dict[(ts, agent_id)] = { 210 | 'p': np.array([agent_instance['box']['cx'], 211 | agent_instance['box']['cy'], 212 | agent_instance['box']['cz']]), 213 | # rotation angle is relative to negatiev x axis of robot 214 | 'yaw': np.pi - agent_instance['box']['rot_z'], 215 | 'l': agent_instance['box']['l'], 216 | 'w': agent_instance['box']['w'], 217 | 'h': agent_instance['box']['h'] 218 | } 219 | return agents_pos_dict 220 | 221 | 222 | def agents_pos_to_ragged_tensor(agents_df): 223 | tensor_list = [] 224 | for _, df in agents_df.groupby('id'): 225 | dropped_df = df.droplevel(1, axis=0) 226 | r_tensor = tf.RaggedTensor.from_value_rowids( 227 | values=np.vstack(df['p'].values).flatten().astype(np.float32), 228 | value_rowids=np.tile( 229 | np.array(dropped_df.index), (3, 1)).transpose().flatten()) 230 | tensor_list.append(r_tensor) 231 | return tf.stack(tensor_list) 232 | 233 | 234 | def agents_yaw_to_ragged_tensor(agents_df): 235 | tensor_list = [] 236 | for _, df in agents_df.groupby('id'): 237 | dropped_df = df.droplevel(1, axis=0) 238 | r_tensor = tf.RaggedTensor.from_value_rowids( 239 | values=np.vstack(df['yaw'].values).flatten().astype(np.float32), 240 | value_rowids=np.array(dropped_df.index)) 241 | tensor_list.append(r_tensor) 242 | return tf.stack(tensor_list) 243 | 244 | 245 | def agents_keypoints_to_ragged_tensor(agents_df): 246 | tensor_list = [] 247 | for _, df in agents_df.groupby('id'): 248 | dropped_df = df.droplevel(1, axis=0) 249 | r_tensor = tf.RaggedTensor.from_value_rowids( 250 | values=np.vstack(df['keypoints'].values).astype(np.float32), 251 | value_rowids=np.tile( 252 | np.array(dropped_df.index), (33, 1)).transpose().flatten()) 253 | tensor_list.append(r_tensor) 254 | return tf.stack(tensor_list) 255 | 256 | 257 | def box_to_hyperplanes(pos, yaw, l, w, h): 258 | """Transforms a bounding box to a set of hyperplanes.""" 259 | s = np.sin(yaw) 260 | c = np.cos(yaw) 261 | normal = np.array([ 262 | np.array([0, 0, h/2]), 263 | np.array([0, 0, -h/2]), 264 | np.array([-s * w/2, c * w/2, 0]), 265 | np.array([-s * -w/2, c * -w/2, 0]), 266 | np.array([c * l/2, s * l/2, 0]), 267 | np.array([c * -l/2, s * -l/2, 0])]) 268 | points = pos + normal 269 | normal = normal / np.linalg.norm(normal, axis=-1, keepdims=True) 270 | w = -normal 271 | 272 | d = -w[:, 0] * points[:, 0] - w[:, 1] * points[:, 1] - w[:, 2] * points[:, 2] 273 | 274 | return w, d 275 | 276 | 277 | def filter_agents_and_ground_from_point_cloud( 278 | agents_df, pointcloud_dict, robot_in_odometry_df, max_dist=10.): 279 | """Filter points which are in human bb or belong to ground.""" 280 | for t, agent_df in agents_df.groupby('timestep'): 281 | pc_points = pointcloud_dict[t] 282 | robot_p = robot_in_odometry_df.loc[t]['p'][:2] 283 | dist_mask = np.linalg.norm(robot_p - pc_points[..., :2], axis=-1) < max_dist 284 | pc_points = pc_points[ 285 | (pc_points[:, -1] > -0.2) & (pc_points[:, -1] < 0.5) & dist_mask] 286 | for _, row in agent_df.iterrows(): 287 | w, d = box_to_hyperplanes( 288 | row['p'], row['yaw'], 1.5*row['l'], 1.5*row['w'], row['h']) 289 | agent_pc_mask = np.all((pc_points @ w.T + d) > 0., axis=-1) 290 | pc_points = pc_points[~agent_pc_mask] 291 | np.random.shuffle(pc_points) 292 | pointcloud_dict[t] = pc_points 293 | return pointcloud_dict 294 | 295 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. --------------------------------------------------------------------------------