├── .dockerignore ├── .gitattributes ├── .gitignore ├── .pre-commit-config.yaml ├── .pylintrc ├── .style.yapf ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── compass ├── __init__.py ├── distillation │ ├── distillation.py │ ├── distillation_trainer.py │ └── rl_specialists_dataset.py ├── residual_rl │ ├── __init__.py │ ├── actor_critic.py │ ├── constants.py │ ├── critic_state_assembler.py │ ├── mlp.py │ ├── ppo.py │ ├── residual_ppo_trainer.py │ ├── rollout_storage.py │ ├── utils.py │ └── x_mobility_rl.py ├── rl_env │ ├── LICENCE │ ├── README.md │ ├── exts │ │ └── mobility_es │ │ │ ├── config │ │ │ └── extension.toml │ │ │ ├── docs │ │ │ └── CHANGELOG.rst │ │ │ ├── mobility_es │ │ │ ├── __init__.py │ │ │ ├── ckpt │ │ │ │ ├── digit_locomotion_policy.pt │ │ │ │ ├── g1_locomotion_policy.pt │ │ │ │ ├── h1_locomotion_policy.pt │ │ │ │ └── spot_locomotion_policy.pt │ │ │ ├── config │ │ │ │ ├── __init__.py │ │ │ │ ├── carter_env_cfg.py │ │ │ │ ├── digit_env_cfg.py │ │ │ │ ├── env_cfg.py │ │ │ │ ├── environments.py │ │ │ │ ├── g1_env_cfg.py │ │ │ │ ├── h1_env_cfg.py │ │ │ │ ├── robots.py │ │ │ │ ├── scene_assets.py │ │ │ │ └── spot_env_cfg.py │ │ │ ├── mdp │ │ │ │ ├── __init__.py │ │ │ │ ├── action │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── action_visualization.py │ │ │ │ │ ├── differential_drive_action.py │ │ │ │ │ ├── locomotion_policy_action.py │ │ │ │ │ └── non_holonomic_perfect_control_action.py │ │ │ │ ├── command │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── commands_cfg.py │ │ │ │ │ └── uniform_collision_free_pose_command.py │ │ │ │ ├── curriculum.py │ │ │ │ ├── events.py │ │ │ │ ├── observations.py │ │ │ │ ├── rewards.py │ │ │ │ ├── termination.py │ │ │ │ └── utils.py │ │ │ ├── utils │ │ │ │ ├── __init__.py │ │ │ │ └── occupancy_map.py │ │ │ └── wrapper │ │ │ │ └── env_wrapper.py │ │ │ └── setup.py │ ├── pyproject.toml │ └── scripts │ │ ├── __init__.py │ │ └── play.py └── utils │ └── logger.py ├── configs ├── distillation_config.gin ├── distillation_dataset_config_template.yaml ├── eval_config.gin ├── record_config.gin ├── shared.gin ├── train_config.gin └── x_mobility_config.gin ├── distillation_train.py ├── docker ├── Dockerfile.distillation └── Dockerfile.rl ├── images ├── compass.png └── omap_generation.png ├── onnx_conversion.py ├── record.py ├── requirements.txt ├── run.py └── x_mobility └── x_mobility-0.1.0-py3-none-any.whl /.dockerignore: -------------------------------------------------------------------------------- 1 | ./compass/rl_env/exts/mobility_es/mobility_es/usd 2 | ./venv 3 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.png filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | bazel* 2 | cscope.* 3 | *.swp 4 | *.sublime-project 5 | *.sublime-workspace 6 | *.pyc 7 | imgui.ini 8 | .dazel_run 9 | venv/ 10 | .clangd/ 11 | 12 | # Local setups 13 | **/_isaac_sim 14 | 15 | # Build files 16 | *build 17 | *install 18 | *log 19 | 20 | # Protofiles 21 | *.pb* 22 | 23 | # Data files 24 | *.pkl 25 | 26 | #Jupter notebook 27 | *.ipynb_checkpoints/ 28 | 29 | # USD files 30 | compass/rl_env/exts/mobility_es/mobility_es/usd/ 31 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | repos: 9 | - repo: https://github.com/kynan/nbstripout 10 | rev: 0.5.0 11 | hooks: 12 | - id: nbstripout 13 | - repo: https://github.com/pre-commit/pre-commit-hooks.git 14 | rev: v4.0.1 15 | hooks: 16 | - id: check-added-large-files 17 | args: ['--maxkb=1024'] 18 | - id: end-of-file-fixer 19 | - id: requirements-txt-fixer 20 | - id: trailing-whitespace 21 | - repo: https://github.com/google/yapf 22 | rev: v0.31.0 23 | hooks: 24 | - id: yapf 25 | name: "yapf" 26 | additional_dependencies: [toml] 27 | - repo: https://github.com/pre-commit/mirrors-clang-format 28 | rev: v13.0.0 29 | hooks: 30 | - id: clang-format 31 | - repo: https://github.com/pylint-dev/pylint 32 | rev: v3.0.3 33 | hooks: 34 | - id: pylint 35 | -------------------------------------------------------------------------------- /.style.yapf: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | [style] 10 | based_on_style = google 11 | spaces_before_comment = 4 12 | split_before_logical_operator = true 13 | column_limit = 100 14 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions. 4 | 5 | ## Code Reviews 6 | 7 | All submissions, including submissions by project members, require review. We use GitHub pull requests for this purpose. Consult 8 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more information on using pull requests. 9 | 10 | ## Signing Your Work 11 | 12 | * We require that all contributors "sign-off" on their commits. This certifies that the contribution is your original work, or you have rights to submit it under the same license, or a compatible license. 13 | 14 | * Any contribution which contains commits that are not Signed-Off will not be accepted. 15 | 16 | * To sign off on a commit you simply use the `--signoff` (or `-s`) option when committing your changes: 17 | ```bash 18 | $ git commit -s -m "Add cool feature." 19 | ``` 20 | This will append the following to your commit message: 21 | ``` 22 | Signed-off-by: Your Name 23 | ``` 24 | 25 | * Full text of the DCO: 26 | 27 | ``` 28 | Developer Certificate of Origin 29 | Version 1.1 30 | 31 | Copyright (C) 2004, 2006 The Linux Foundation and its contributors. 32 | 1 Letterman Drive 33 | Suite D4700 34 | San Francisco, CA, 94129 35 | 36 | Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. 37 | ``` 38 | 39 | ``` 40 | Developer's Certificate of Origin 1.1 41 | 42 | By making a contribution to this project, I certify that: 43 | 44 | (a) The contribution was created in whole or in part by me and I have the right to submit it under the open source license indicated in the file; or 45 | 46 | (b) The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate open source license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same open source license (unless I am permitted to submit under a different license), as indicated in the file; or 47 | 48 | (c) The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it. 49 | 50 | (d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open source license(s) involved. 51 | ``` 52 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

COMPASS: Cross-Embodiment Mobility Policy via Residual RL and Skill Synthesis

2 | 3 |
4 | 5 | [![Isaac Lab](https://img.shields.io/badge/IsaacLab-1.3.0-b.svg)](https://isaac-sim.github.io/IsaacLab/v1.3.0/index.html) 6 | [![IsaacSim](https://img.shields.io/badge/IsaacSim-4.2.0-b.svg)](https://docs.isaacsim.omniverse.nvidia.com/4.2.0/index.html) 7 | [![Linux platform](https://img.shields.io/badge/Platform-linux--64-orange.svg)](https://ubuntu.com/blog/tag/22-04-lts) 8 | [![License: Apache 2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) 9 | 10 | 11 | [[Website]](https://nvlabs.github.io/COMPASS/) 12 | [[arXiv]](https://arxiv.org/abs/2502.16372) 13 |
14 | 15 | ## Overview 16 | 17 | This repository provides the official PyTorch implementation of [COMPASS](https://nvlabs.github.io/COMPASS/). 18 | 19 |

20 | COMPASS 21 |

22 | 23 | 24 | COMPASS is a novel framework for cross-embodiment mobility that combines: 25 | - Imitation Learning (IL) for strong baseline performance 26 | - Residual Reinforcement Learning (RL) for embodiment-specific adaptation 27 | - Policy distillation to create a unified, generalist policy 28 | 29 | ## Table of Contents 30 | - [Installation](#installation) 31 | - [1. Residual RL Environment USDs](#1-residual-rl-environment-usds) 32 | - [2. Isaac Lab Installation](#2-isaac-lab-installation) 33 | - [3. Environment Setup](#3-environment-setup) 34 | - [4. Dependencies](#4-dependencies) 35 | - [5. X-Mobility Installation](#5-x-mobility-installation) 36 | - [Usage](#usage) 37 | - [Residual RL Specialists](#residual-rl-specialists) 38 | - [Policy Distillation](#policy-distillation) 39 | - [Model Export](#model-export) 40 | - [Add New Embodiment or Scene](#add-new-embodiment-or-scene) 41 | - [Logging](#logging) 42 | - [Pre-trained Generalist Policy Example](#pre-trained-generalist-policy-example) 43 | - [License](#license) 44 | - [Core Contributors](#core-contributors) 45 | - [Acknowledgments](#acknowledgments) 46 | - [Citation](#citation) 47 | 48 | ## Installation 49 | 50 | ### 1. Residual RL environment USDs 51 | * Download the residual RL environment USDs from: https://huggingface.co/nvidia/COMPASS/blob/main/compass_usds.zip 52 | * Unzip and place the downloaded USDs in the `compass/rl_env/exts/mobility_es/mobility_es/usd` directory 53 | 54 | ### 2. Isaac Lab Installation 55 | * Install Isaac Lab and the residual RL mobility extension by following this [instruction](compass/rl_env/README.md). 56 | 57 | ### 3. Environment Setup 58 | * Create and activate a virtual environment: 59 | ```bash 60 | python3 -m venv venv 61 | source venv/bin/activate 62 | ``` 63 | 64 | ### 4. Dependencies 65 | * Install the required packages: 66 | ```bash 67 | ${ISAACLAB_PATH}/isaaclab.sh -p -m pip install -r requirements.txt 68 | ``` 69 | 70 | ### 5. X-Mobility Installation 71 | * Install the [X-Mobility](https://github.com/NVlabs/X-MOBILITY) package: 72 | ```bash 73 | ${ISAACLAB_PATH}/isaaclab.sh -p -m pip install x_mobility/x_mobility-0.1.0-py3-none-any.whl 74 | ``` 75 | * Download the pre-trained X-Mobility checkpoint from: https://huggingface.co/nvidia/X-Mobility/blob/main/x_mobility-nav2-semantic_action_path.ckpt 76 | 77 | 78 | ## Usage 79 | 80 | ### Residual RL Specialists 81 | 82 | * Train with the default configurations in `configs/train_config.gin`: 83 | ```bash 84 | ${ISAACLAB_PATH}/isaaclab.sh -p run.py \ 85 | -c configs/train_config.gin \ 86 | -o \ 87 | -b \ 88 | --enable_camera 89 | ``` 90 | 91 | * Evaluate trained model: 92 | ```bash 93 | ${ISAACLAB_PATH}/isaaclab.sh -p run.py \ 94 | -c configs/eval_config.gin \ 95 | -o \ 96 | -b \ 97 | -p \ 98 | --enable_camera \ 99 | --video \ 100 | --video_interval 101 | ``` 102 | 103 | Add additional argument `--headless` to run RL training/evaluation in headless mode. 104 | 105 | > **NOTE**: The GPU memory usage is proportional to the number of environments in residual RL training. For example, 32 environments will use around 30GB memory, so reduce the number of environments if you have limited GPU memory. 106 | 107 | ### Policy Distillation 108 | 109 | * Collect specialist data: 110 | * Update specialists policy checkpoint paths in [dataset_config_template](configs/distillation_dataset_config_template.yaml) 111 | * Run data collection: 112 | ```bash 113 | ${ISAACLAB_PATH}/isaaclab.sh -p record.py \ 114 | -c configs/distillation_dataset_config_template.yaml \ 115 | -o \ 116 | -b \ 117 | --dataset-name 118 | ``` 119 | 120 | * Train generalist policy: 121 | ```bash 122 | python3 distillation_train.py \ 123 | --config-files configs/distillation_config.gin \ 124 | --dataset-path \ 125 | --output-dir 126 | ``` 127 | 128 | * Evaluate generalist policy: 129 | ```bash 130 | ${ISAACLAB_PATH}/isaaclab.sh -p run.py \ 131 | -c configs/eval_config.gin \ 132 | -o \ 133 | -b \ 134 | -d \ 135 | --enable_camera \ 136 | --video \ 137 | --video_interval 138 | ``` 139 | 140 | ### Model Export 141 | 142 | * Export RL specialist policy to ONNX or JIT formats: 143 | ```bash 144 | python3 onnx_conversion.py \ 145 | -b \ 146 | -r \ 147 | -o \ 148 | -j 149 | ``` 150 | 151 | * Export generalist policy to ONNX or JIT formats: 152 | ```bash 153 | python3 onnx_conversion.py \ 154 | -b \ 155 | -g \ 156 | -e \ 157 | -o \ 158 | -j 159 | ``` 160 | 161 | ### Add New Embodiment or Scene 162 | 163 | * Follow this [instruction](compass/rl_env/README.md) to add a new embodiment or scene to the Isaac Lab RL environment. 164 | * Register the new embodiment or scene to the `EmbodimentEnvCfgMap` and `EnvSceneAssetCfgMap` in [run.py](run.py), then update the configs or use command line arguments to select the new embodiment or scene. 165 | 166 | 167 | ### Logging: 168 | 169 | The training and evaluation scripts use TensorBoard for logging by default. Weights & Biases (W&B) logging is also supported for more advanced experiment tracking features. 170 | 171 | **To use TensorBoard (default):** 172 | - Logs will be saved to `/tensorboard/` 173 | - View logs with: `tensorboard --logdir=/tensorboard/` 174 | 175 | **To use Weights & Biases:** 176 | 1. Install and set up W&B: `pip install wandb` and follow the [setup instructions](https://docs.wandb.ai/quickstart) 177 | 2. Log in to your W&B account: `wandb login` 178 | 3. Add the `--logger wandb` flag to your command: 179 | ```bash 180 | ${ISAACLAB_PATH}/isaaclab.sh -p run.py \ 181 | -c configs/train_config.gin \ 182 | -o \ 184 | --enable_camera \ 185 | --logger wandb \ 186 | --wandb-run-name "experiment_name" \ 187 | --wandb-project-name "project_name" \ 188 | --wandb-entity-name "your_username_or_team" 189 | ``` 190 | 191 | ## Pre-trained Generalist Policy Example 192 | 193 | We provide a pre-trained generalist policy that works across four robot embodiments: 194 | * **Carter** (wheeled robot) 195 | * **H1** (humanoid) 196 | * **G1** (humanoid) 197 | * **Spot** (quadruped) 198 | 199 | To try out the pre-trained generalist policy: 200 | 1. Download the checkpoint from: https://huggingface.co/nvidia/COMPASS/blob/main/compass_generalist.ckpt 201 | 2. Use the evaluation command shown above with your downloaded checkpoint: 202 | ```bash 203 | ${ISAACLAB_PATH}/isaaclab.sh -p run.py \ 204 | -c configs/eval_config.gin \ 205 | -o \ 206 | -b \ 207 | -d \ 208 | --enable_camera \ 209 | --embodiment \ 210 | --environment 211 | ``` 212 | 213 | > **NOTE**: The generalist policy uses one-hot embodiment encoding and may not generalize perfectly to unseen embodiment types. For best results with new embodiment types, we recommend fine-tuning with residual RL first. 214 | 215 | 216 | ## License 217 | COMPASS is released under the Apache License 2.0. See [LICENSE](LICENSE) for additional details. 218 | 219 | ## Core Contributors 220 | Wei Liu, Huihua Zhao, Chenran Li, Joydeep Biswas, Soha Pouya, Yan Chang 221 | 222 | 223 | ## Acknowledgments 224 | We would like to acknowledge the following projects where parts of the codes in this repo is derived from: 225 | - [RSL_RL](https://github.com/leggedrobotics/rsl_rl/tree/main) 226 | - [Isaac Lab](https://github.com/isaac-sim/IsaacLab) 227 | 228 | ## Citation 229 | If you find this work useful in your research, please consider citing: 230 | ```bibtex 231 | @article{liu2025compass, 232 | title={COMPASS: Cross-embodiment Mobility Policy via Residual RL and Skill Synthesis}, 233 | author={Liu, Wei and Zhao, Huihua and Li, Chenran and Biswas, Joydeep and Pouya, Soha and Chang, Yan}, 234 | journal={arXiv preprint arXiv:2502.16372}, 235 | year={2025} 236 | } 237 | -------------------------------------------------------------------------------- /compass/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /compass/distillation/distillation.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import gin 17 | import torch 18 | import torch.nn.functional as F 19 | from torch import nn 20 | 21 | from model.x_mobility.utils import pack_sequence_dim 22 | 23 | from compass.residual_rl.mlp import build_mlp_network, get_activation 24 | 25 | ACTION_MASK = (0, 1, 5) 26 | 27 | 28 | @gin.configurable 29 | class EmbodimentOneHotEncoder(nn.Module): 30 | '''Embodiment one-hot encoding.''' 31 | 32 | def __init__(self, 33 | num_dims: int = 8, 34 | supported_types: list[str] = ['carter', 'h1', 'spot', 'g1']): 35 | super().__init__() 36 | self.num_dims = num_dims 37 | self.supported_types = supported_types 38 | assert self.num_dims >= len(self.supported_types), \ 39 | "Number of dimensions less than the number of supported types" 40 | 41 | def forward(self, embodiment_type: str): 42 | if embodiment_type not in self.supported_types: 43 | raise ValueError(f"Embodiment type '{embodiment_type}' not supported. " 44 | f"Must be one of {self.supported_types}") 45 | 46 | # Create one-hot encoding 47 | index = self.supported_types.index(embodiment_type) 48 | encoding = torch.zeros(self.num_dims) 49 | encoding[index] = 1.0 50 | return encoding 51 | 52 | 53 | @gin.configurable 54 | class MLPActionPolicy(nn.Module): 55 | '''MLP action policy.''' 56 | 57 | def __init__(self, in_channels: int, command_n_channels: int): 58 | super().__init__() 59 | self.command_fc = build_mlp_network(input_dim=in_channels, 60 | hidden_dims=[1024, 512, 256, 128], 61 | output_dim=command_n_channels, 62 | activation_fn=get_activation('relu', False)) 63 | 64 | def forward(self, x): 65 | return {'mean': self.command_fc(x)} 66 | 67 | 68 | @gin.configurable 69 | class MLPActionPolicyDistribution(MLPActionPolicy): 70 | 71 | def __init__(self, in_channels, command_n_channels, init_noise_std: float = 0.1): 72 | super().__init__(in_channels, command_n_channels) 73 | 74 | # Global std variance for each action dim (trainable) 75 | self.std = nn.Parameter(init_noise_std * torch.ones(command_n_channels)) 76 | 77 | def forward(self, x): 78 | return {'mean': self.command_fc(x), 'std': self.std} 79 | 80 | 81 | @gin.configurable 82 | class ESDistillationKLLoss(nn.Module): 83 | """ 84 | Compute KL(teacher || student) for each sample. 85 | Returns average KL across the batch. 86 | Ref: https://stats.stackexchange.com/questions/7440/kl-divergence-between-two-univariate-gaussians # pylint: disable=line-too-long 87 | """ 88 | 89 | def forward(self, output, batch): 90 | losses = {} 91 | mu_teacher = pack_sequence_dim(batch['action'])[:, ACTION_MASK] 92 | std_teacher = pack_sequence_dim(batch['action_sigma'])[:, ACTION_MASK] 93 | mu_student = output['mean'][:, ACTION_MASK] 94 | std_student = output['std'].repeat(std_teacher.shape[0], 1)[:, ACTION_MASK] 95 | 96 | # Convert std to log_std 97 | log_std_teacher = torch.log(std_teacher) 98 | log_std_student = torch.log(std_student) 99 | 100 | # Term1: log(\sigma_S / \sigma_T) 101 | term1 = log_std_student - log_std_teacher 102 | 103 | # Term2: (sigma_T^2 + (mu_T - mu_S)^2) / (2 sigma_S^2) 104 | numerator = std_teacher.pow(2) + (mu_teacher - mu_student).pow(2) 105 | denominator = 2.0 * std_student.pow(2) 106 | term2 = numerator / denominator 107 | 108 | # KL = term1 + term2 - 0.5 109 | kl = term1 + term2 - 0.5 110 | # Average across actions, then across batch 111 | losses['action_kl'] = kl.mean() 112 | return losses 113 | 114 | 115 | @gin.configurable 116 | class ESDistillationMSELoss(nn.Module): 117 | '''Loss function for action policy distillation.''' 118 | 119 | def __init__(self): 120 | super().__init__() 121 | self.loss_fn = F.mse_loss 122 | 123 | def forward(self, output, batch): 124 | losses = {} 125 | mse = F.mse_loss(output['mean'][:, ACTION_MASK], 126 | pack_sequence_dim(batch['action'])[:, ACTION_MASK], 127 | reduction='none') 128 | losses['action'] = torch.sum(mse, dim=-1, keepdims=True).mean() 129 | return losses 130 | 131 | 132 | @gin.configurable 133 | class ESDistillationPolicy(nn.Module): 134 | '''Embodiment specialist distillation policy.''' 135 | 136 | def __init__(self, policy_model: nn.Module, embodiment_encoder: nn.Module, 137 | policy_state_dim: int, command_n_channels: int): 138 | super().__init__() 139 | self.embodiment_encoder = embodiment_encoder() 140 | self.policy_model = policy_model(in_channels=policy_state_dim + 141 | self.embodiment_encoder.num_dims, 142 | command_n_channels=command_n_channels) 143 | 144 | def forward(self, batch): 145 | policy_state = batch['policy_state'] 146 | embodiment_encodings = [ 147 | self.embodiment_encoder(embodiment).repeat(policy_state.shape[1], 1) 148 | for embodiment in batch['embodiment'] 149 | ] 150 | embodiment_encodings = torch.stack(embodiment_encodings, dim=0).to(policy_state.device) 151 | x = pack_sequence_dim(torch.cat([policy_state, embodiment_encodings], dim=-1)) 152 | return self.policy_model(x) 153 | 154 | 155 | class ESDistillationPolicyWrapper(nn.Module): 156 | '''Wrapper of the distilled policy to enable inference.''' 157 | 158 | def __init__(self, distillation_policy_ckpt_path: str, embodiment_type: str): 159 | super().__init__() 160 | # Load the checkpoint and remove the prefix if any. 161 | state_dict = torch.load(distillation_policy_ckpt_path, weights_only=False)['state_dict'] 162 | state_dict = {k.removeprefix('model.'): v for k, v in state_dict.items()} 163 | # Load the state dict. 164 | self.model = ESDistillationPolicy() 165 | self.model.load_state_dict(state_dict, strict=True) 166 | 167 | # Embodiment type. 168 | self.embodiment_type = embodiment_type 169 | 170 | def forward(self, policy_state): 171 | batch = { 172 | 'policy_state': policy_state.unsqueeze(1), 173 | 'embodiment': [self.embodiment_type] * policy_state.shape[0] 174 | } 175 | return self.model(batch)['mean'] 176 | -------------------------------------------------------------------------------- /compass/distillation/distillation_trainer.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import gin 17 | import pytorch_lightning as pl 18 | import torch 19 | 20 | # pylint: disable=unused-import 21 | from compass.distillation.distillation import (MLPActionPolicy, MLPActionPolicyDistribution, 22 | ESDistillationKLLoss, ESDistillationMSELoss) 23 | 24 | 25 | @gin.configurable 26 | class ESDistillationTrainer(pl.LightningModule): 27 | '''Pytorch Lightnining module of ES distillation for training. 28 | ''' 29 | 30 | def __init__(self, model, loss, lr): 31 | super().__init__() 32 | self.lr = lr 33 | # Save hyperparameters. 34 | self.save_hyperparameters() 35 | 36 | # Model 37 | self.model = model() 38 | 39 | # Losses 40 | self.loss = loss() 41 | 42 | def forward(self, batch): 43 | return self.model(batch) 44 | 45 | def shared_step(self, batch): 46 | output = self.forward(batch) 47 | losses = self.loss(output, batch) 48 | return losses, output 49 | 50 | def training_step(self, batch, batch_idx): 51 | losses, output = self.shared_step(batch) 52 | self.log_and_visualize(batch, output, losses, batch_idx, prefix='train') 53 | return self.loss_reducing(losses) 54 | 55 | def validation_step(self, batch, batch_idx): 56 | losses, output = self.shared_step(batch) 57 | self.log_and_visualize(batch, output, losses, batch_idx, prefix='validation') 58 | val_loss = self.loss_reducing(losses) 59 | return {'val_loss': val_loss} 60 | 61 | def test_step(self, batch, batch_idx): 62 | losses, output = self.shared_step(batch) 63 | self.log_and_visualize(batch, output, losses, batch_idx, prefix='test') 64 | 65 | def log_and_visualize(self, batch, output, losses, batch_idx, prefix='train'): #pylint: disable=unused-argument 66 | # Log losses 67 | for key, value in losses.items(): 68 | self.log(f'{prefix}/losses/{key}', value, sync_dist=True) 69 | 70 | # Log total loss and videos for validation. 71 | if prefix == 'validation': 72 | # Log total validation loss. 73 | self.log('val_loss', self.loss_reducing(losses), sync_dist=True, on_epoch=True) 74 | 75 | def loss_reducing(self, loss: torch.Tensor): 76 | total_loss = sum([x for x in loss.values()]) 77 | return total_loss 78 | 79 | def configure_optimizers(self): 80 | # Optimizer 81 | optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.lr) 82 | # scheduler 83 | lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=25, gamma=0.5) 84 | return [optimizer], [{'scheduler': lr_scheduler, 'interval': 'epoch'}] 85 | -------------------------------------------------------------------------------- /compass/distillation/rl_specialists_dataset.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import bisect 17 | import os 18 | 19 | import h5py 20 | import gin 21 | import pytorch_lightning as pl 22 | import torch 23 | from tqdm import tqdm 24 | from torch.utils.data import DataLoader, Dataset 25 | from torch.utils.data.distributed import DistributedSampler 26 | 27 | 28 | @gin.configurable 29 | class RLSpecialistDataModule(pl.LightningDataModule): 30 | '''Datamodule with dataset collected from Isaac Sim. 31 | ''' 32 | 33 | def __init__(self, dataset_path: str, batch_size: int, sequence_length: int, num_workers: int): 34 | super().__init__() 35 | self.batch_size = batch_size 36 | self.sequence_length = sequence_length 37 | self.num_workers = num_workers 38 | self.dataset_path = dataset_path 39 | self.train_dataset = None 40 | self.val_dataset = None 41 | 42 | def setup(self, stage=None): 43 | if stage == 'fit': 44 | self.train_dataset = RLSpecialistDataset(os.path.join(self.dataset_path, 'train'), 45 | self.sequence_length) 46 | self.val_dataset = RLSpecialistDataset(os.path.join(self.dataset_path, 'val'), 47 | self.sequence_length) 48 | if stage == 'test' or stage is None: 49 | self.test_dataset = RLSpecialistDataset(os.path.join(self.dataset_path, 'test'), 50 | self.sequence_length) 51 | 52 | def train_dataloader(self): 53 | train_sampler = DistributedSampler(self.train_dataset, shuffle=True) 54 | return DataLoader(self.train_dataset, 55 | batch_size=self.batch_size, 56 | num_workers=self.num_workers, 57 | pin_memory=True, 58 | drop_last=True, 59 | sampler=train_sampler) 60 | 61 | def val_dataloader(self): 62 | val_sampler = DistributedSampler(self.val_dataset, shuffle=False) 63 | return DataLoader(self.val_dataset, 64 | batch_size=self.batch_size, 65 | num_workers=self.num_workers, 66 | pin_memory=True, 67 | drop_last=True, 68 | sampler=val_sampler) 69 | 70 | def test_dataloader(self): 71 | test_sampler = DistributedSampler(self.test_dataset, shuffle=False) 72 | return DataLoader(self.test_dataset, 73 | batch_size=self.batch_size, 74 | num_workers=self.num_workers, 75 | pin_memory=True, 76 | drop_last=True, 77 | sampler=test_sampler) 78 | 79 | def load_test_data(self): 80 | """ Load test data. This function is used in local Jupyter testing environment only. 81 | """ 82 | test_dataset = RLSpecialistDataset(os.path.join(self.dataset_path, 'test'), 83 | self.sequence_length) 84 | return DataLoader( 85 | test_dataset, 86 | batch_size=16, 87 | num_workers=self.num_workers, 88 | shuffle=False, 89 | pin_memory=True, 90 | drop_last=True, 91 | ) 92 | 93 | 94 | class RLSpecialistDataset(Dataset): 95 | '''Dataset from Isaac sim. 96 | ''' 97 | 98 | def __init__(self, dataset_path: str, sequence_length: int): 99 | super().__init__() 100 | self.sequence_length = sequence_length 101 | self.rl_batch_size = 0 102 | self.rl_num_steps = 0 103 | self.hdfs = [] 104 | self.accumulated_sample_sizes = [] 105 | self.num_samples = 0 106 | 107 | # Iterate each embodiment in the dataset. 108 | for embodiment in os.listdir(dataset_path): 109 | embodiment_path = os.path.join(dataset_path, embodiment) 110 | # Iterate the sorted runs for the given scenario. 111 | dataset_files = [ 112 | run_file for run_file in os.listdir(embodiment_path) if run_file.endswith('h5') 113 | ] 114 | dataset_files = sorted(dataset_files) 115 | with tqdm(total=len(dataset_files), 116 | desc=f"Loading data from {embodiment_path}", 117 | unit="file") as pbar: 118 | for dataset_file in dataset_files: 119 | hdf = h5py.File(os.path.join(embodiment_path, dataset_file), 'r') 120 | self.hdfs.append(hdf) 121 | self.rl_batch_size = hdf['image'].shape[0] 122 | self.rl_num_steps = hdf['image'].shape[1] 123 | self.accumulated_sample_sizes.append(self.num_samples) 124 | self.num_samples += (self.rl_num_steps // 125 | self.sequence_length) * self.rl_batch_size 126 | pbar.update(1) 127 | 128 | def __len__(self): 129 | return self.num_samples 130 | 131 | def __getitem__(self, index): 132 | # Get the cooresponding hdf. 133 | hdf_idx = bisect.bisect_left(self.accumulated_sample_sizes, index + 1) - 1 134 | hdf_sample_idx = index - self.accumulated_sample_sizes[hdf_idx] 135 | return self._get_elements_batch(self.hdfs[hdf_idx], hdf_sample_idx) 136 | 137 | def _get_elements_batch(self, hdf, index): 138 | samples_per_batch = self.rl_num_steps // self.sequence_length 139 | batch_idx = index // samples_per_batch 140 | step_idx = (index % samples_per_batch) * self.sequence_length 141 | elements_batch = {} 142 | elements_batch['image'] = torch.tensor( 143 | hdf['image'][batch_idx, step_idx:step_idx + self.sequence_length, :]).permute( 144 | 0, 3, 1, 2) / 255.0 145 | elements_batch['route'] = torch.tensor(hdf['route'][batch_idx, step_idx:step_idx + 146 | self.sequence_length, :]) 147 | elements_batch['speed'] = torch.tensor(hdf['speed'][batch_idx, step_idx:step_idx + 148 | self.sequence_length, :]) 149 | elements_batch['action'] = torch.tensor(hdf['action'][batch_idx, step_idx:step_idx + 150 | self.sequence_length, :]) 151 | elements_batch['policy_state'] = torch.tensor( 152 | hdf['policy_state'][batch_idx, step_idx:step_idx + self.sequence_length, :]) 153 | 154 | # Get embodiment information. 155 | elements_batch['embodiment'] = hdf.attrs['embodiment'] 156 | elements_batch['action_sigma'] = torch.tensor(hdf['action_sigma']).repeat( 157 | self.sequence_length, 1) 158 | return elements_batch 159 | -------------------------------------------------------------------------------- /compass/residual_rl/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /compass/residual_rl/actor_critic.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import copy 17 | 18 | import gin 19 | import torch 20 | from torch import nn 21 | from torch.distributions import Normal 22 | 23 | from compass.residual_rl.mlp import build_mlp_network, get_activation 24 | 25 | 26 | @gin.configurable 27 | class ActorCriticBase(nn.Module): 28 | """Base class of the ActorCritic. 29 | 30 | Note: The inherited class needs to provide the definitions of the actor and critic network. 31 | 32 | """ 33 | 34 | def __init__(self, action_dim: int, init_noise_std: float = 1.0): 35 | 36 | super().__init__() 37 | 38 | self.actor = None 39 | self.critic = None 40 | 41 | # Action noise 42 | self.std = nn.Parameter(init_noise_std * torch.ones(action_dim)) 43 | self.distribution = None 44 | # disable args validation for speedup 45 | Normal.set_default_validate_args = False 46 | 47 | def reset(self, dones=None): 48 | pass 49 | 50 | @property 51 | def action_mean(self): 52 | return self.distribution.mean 53 | 54 | @property 55 | def action_std(self): 56 | return self.distribution.stddev 57 | 58 | @property 59 | def entropy(self): 60 | return self.distribution.entropy().sum(dim=-1) 61 | 62 | def update_distribution(self, policy_state): 63 | mean = self.act_inference(policy_state) 64 | self.distribution = Normal(mean, mean * 0. + self.std) 65 | 66 | def act(self, policy_state): 67 | self.update_distribution(policy_state) 68 | action = self.distribution.sample() 69 | return action 70 | 71 | def act_inference(self, policy_state): 72 | if self.actor is None: 73 | raise ValueError("User needs to provide the definition of actor network") 74 | # pylint: disable=not-callable 75 | return self.actor(policy_state) 76 | 77 | def get_actions_log_prob(self, actions): 78 | return self.distribution.log_prob(actions).sum(dim=-1) 79 | 80 | def evaluate(self, critic_state): 81 | if self.critic is None: 82 | raise ValueError("User needs to provide the definition of critic network") 83 | # Compute the value. 84 | # pylint: disable=not-callable 85 | value = self.critic(critic_state) 86 | return value 87 | 88 | 89 | @gin.configurable 90 | class ActorCriticXMobility(ActorCriticBase): 91 | """ActorCritic for residual policy to enhance X-Mobility. 92 | 93 | The actor network is initialized with the X-Mobility's policy head with the last linear layer 94 | reset to zero. 95 | """ 96 | 97 | def __init__(self, 98 | base_policy, 99 | action_dim, 100 | critic_state_dim, 101 | critic_hidden_dims=256, 102 | init_noise_std=1.0): 103 | super().__init__(action_dim=action_dim, init_noise_std=init_noise_std) 104 | # Overwrite the actor with the base policy 105 | # Load the base_policy and reset the last linear layer to 0.0 106 | self.actor = copy.deepcopy(base_policy.policy_mlp.command_fc) 107 | nn.init.zeros_(self.actor[-2].weight) # Set weights to zero 108 | if self.actor[-2].bias is not None: 109 | nn.init.zeros_(self.actor[-2].bias) 110 | # Enable the gradients. 111 | for param in self.actor.parameters(): 112 | param.requires_grad = True 113 | 114 | # Define critic 115 | activation = get_activation('relu', inplace=True) 116 | self.critic = build_mlp_network( 117 | input_dim=critic_state_dim, 118 | hidden_dims=[critic_hidden_dims, critic_hidden_dims, critic_hidden_dims // 2], 119 | output_dim=1, 120 | activation_fn=activation) 121 | 122 | 123 | @gin.configurable 124 | class ActorCriticMLP(ActorCriticBase): 125 | """ ActorCritic with both actor and critic built as MLP modules. 126 | """ 127 | 128 | def __init__(self, 129 | actor_state_dim, 130 | critic_state_dim, 131 | actor_hidden_dims, 132 | critic_hidden_dims, 133 | action_dim, 134 | init_noise_std=1.0): 135 | super().__init__(action_dim, init_noise_std) 136 | 137 | activation = get_activation('relu', inplace=True) 138 | self.actor = build_mlp_network(input_dim=actor_state_dim, 139 | hidden_dims=actor_hidden_dims, 140 | output_dim=action_dim, 141 | activation_fn=activation) 142 | 143 | self.critic = build_mlp_network(input_dim=critic_state_dim, 144 | hidden_dims=critic_hidden_dims, 145 | output_dim=1, 146 | activation_fn=activation) 147 | -------------------------------------------------------------------------------- /compass/residual_rl/constants.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | INPUT_IMAGE_SIZE = (320, 512) 17 | -------------------------------------------------------------------------------- /compass/residual_rl/critic_state_assembler.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from typing import Dict 17 | 18 | import gin 19 | import torch 20 | from torch import nn 21 | from torchvision import models 22 | 23 | from compass.residual_rl.constants import INPUT_IMAGE_SIZE 24 | from compass.residual_rl.utils import preprocess_depth_images 25 | 26 | 27 | @gin.configurable 28 | class DepthImageFeatureExtractor(nn.Module): 29 | """ 30 | Feature extractor for depth images using pretrained ResNet. 31 | Assumes input images are three channels (via replication for example) 32 | and preprocessed to 224x224. 33 | 34 | Note: while the input_size can be different, we currently restrict it to 35 | this fixed size only as the ResNet is trained with this size. It is a 36 | common practice and widely used. 37 | """ 38 | 39 | def __init__(self): 40 | super().__init__() 41 | # Initialize a pre-trained ResNet 42 | resnet = models.resnet18(weights=models.ResNet18_Weights.DEFAULT) 43 | 44 | self.input_size = [3, 224, 224] 45 | 46 | # Use the model's layers up to a certain point for feature extraction 47 | # Remove the fully connected layers, retain convolutional layers 48 | modules = list(resnet.children())[:-2] 49 | self.feature_extractor = nn.Sequential(*modules) 50 | 51 | # The output shape of the feature extractor is 512 x 7 x 7, we choose 52 | # output_size of the average pool as 1 to reduce the feature dimension. 53 | output_size = (1, 1) 54 | self.global_avg_pool = nn.AdaptiveAvgPool2d((output_size)) 55 | 56 | # Freeze all parameters 57 | for param in self.feature_extractor.parameters(): 58 | param.requires_grad = False 59 | 60 | self.out_channels = modules[-1][-1].conv2.out_channels * output_size[0] * output_size[1] 61 | 62 | def forward(self, x): 63 | # Forward pass through feature extractor 64 | features = self.feature_extractor(x) 65 | 66 | # Apply global average pooling 67 | pooled_features = self.global_avg_pool(features) 68 | 69 | pooled_features = torch.flatten(pooled_features, start_dim=1) 70 | if pooled_features.numel() == 0: 71 | raise ValueError("Feature tensor is empty. Check input size and network layers.") 72 | return pooled_features 73 | 74 | 75 | @gin.configurable 76 | class CriticObservationEncoder(nn.Module): 77 | '''Encoder of the input observations 78 | Args: 79 | depth_encoder: encoder of the depth image 80 | route_feat_dim: feature dimension of the route encoder 81 | speed_feat_dim: feature dimension of the speed encoder 82 | 83 | Inputs: 84 | batch (Dict): dict of the input tensors: 85 | depth_image: (b, 1, h, w) 86 | speed_feat: (b, out_channels), encoded speed feature 87 | route_feat: (b, out_channels), encoded route feature 88 | 89 | Returns: 90 | embedding: A dict of 1D embedding of the observations 91 | ''' 92 | 93 | def __init__(self, depth_encoder: nn.Module, route_feat_dim: int, speed_feat_dim: int): 94 | super().__init__() 95 | 96 | # Depth Image 97 | self.depth_encoder = depth_encoder() 98 | features_channels = self.depth_encoder.out_channels 99 | 100 | # Add dims from pretrained route encoder and speed encoder 101 | features_channels += route_feat_dim 102 | features_channels += speed_feat_dim 103 | 104 | self.embedding_dim = features_channels 105 | 106 | def forward(self, batch: Dict) -> torch.Tensor: 107 | # Only support one step for now. 108 | depth_image = batch['depth_image'] 109 | 110 | # Image encoding 111 | depth_feat = self.depth_encoder(depth_image) 112 | 113 | speed_feat = batch["speed_feat"] 114 | route_feat = batch["route_feat"] 115 | 116 | # Final observation embedding. 117 | embedding = torch.cat([depth_feat, speed_feat, route_feat], dim=1) 118 | 119 | # Check dimension matches 120 | assert embedding.shape[1] == self.embedding_dim 121 | 122 | return embedding 123 | 124 | 125 | @gin.configurable 126 | class CriticStateAssemblerBase: 127 | """Abstract base class to assemble the critic state. 128 | 129 | The goal is to provide a universal interface type for different critic setups. 130 | """ 131 | 132 | # pylint: disable=unused-argument 133 | def __init__(self, env=None, encoder=None, policy_state_dim=None): 134 | self._state_dim = -1 135 | 136 | @property 137 | def state_dim(self) -> int: 138 | return self._state_dim 139 | 140 | def __str__(self) -> str: 141 | raise NotImplementedError 142 | 143 | def compute_critic_state(self, policy_state=None, obs_dict=None, extras=None) -> torch.Tensor: 144 | raise NotImplementedError 145 | 146 | 147 | @gin.configurable 148 | class CriticStateDepthEncoderAssembler(CriticStateAssemblerBase): 149 | """ Use the encoded depth, speed and route feature for critic state. 150 | 151 | """ 152 | 153 | def __init__(self, env=None, encoder=None, policy_state_dim=None): 154 | super().__init__(env, encoder, policy_state_dim) 155 | self.encoder = encoder 156 | self._state_dim = encoder.embedding_dim 157 | 158 | def __str__(self) -> str: 159 | return "CriticStateDepthEncoderAssembler" 160 | 161 | def compute_critic_state(self, policy_state=None, obs_dict=None, extras=None): 162 | # Preprocess depth image 163 | input_size = self.encoder.depth_encoder.input_size 164 | raw_depth_image = obs_dict["privileged"]["camera_depth_img"] 165 | b = raw_depth_image.shape[0] 166 | depth_image_processed = raw_depth_image.reshape(b, 1, INPUT_IMAGE_SIZE[0], 167 | INPUT_IMAGE_SIZE[1]).float() 168 | 169 | depth_image_processed = preprocess_depth_images(depth_image_processed, 170 | target_height=input_size[1], 171 | target_width=input_size[2]) 172 | 173 | critic_batch = {} 174 | critic_batch.update(extras) 175 | critic_batch['depth_image'] = depth_image_processed 176 | return self.encoder(critic_batch) 177 | 178 | 179 | @gin.configurable 180 | class CriticStateSymmetricAssembler(CriticStateAssemblerBase): 181 | """Use the policy state as the critic state for symmetric A2C. 182 | 183 | """ 184 | 185 | def __init__(self, env=None, encoder=None, policy_state_dim=None): 186 | super().__init__(env, encoder, policy_state_dim) 187 | self._state_dim = policy_state_dim 188 | 189 | def __str__(self) -> str: 190 | return "CriticStateSymmetricAssembler" 191 | 192 | def compute_critic_state(self, policy_state=None, obs_dict=None, extras=None): 193 | return policy_state 194 | -------------------------------------------------------------------------------- /compass/residual_rl/mlp.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from torch import nn 17 | 18 | 19 | def build_mlp_network(input_dim, hidden_dims, output_dim, activation_fn): 20 | """ 21 | Builds a MLP feed-forward neural network. 22 | 23 | Args: 24 | input_dim (int): Dimension of the input layer. 25 | hidden_dims (list of int): Dimensions of the hidden layers. 26 | output_dim (int): Dimension of the output layer. 27 | activation_fn (nn.Module): Activation function to use between layers. 28 | 29 | Returns: 30 | nn.Sequential: A sequential container of layers representing the network. 31 | """ 32 | layers = [] 33 | layers.append(nn.Linear(input_dim, hidden_dims[0])) 34 | layers.append(activation_fn) 35 | 36 | for i in range(len(hidden_dims) - 1): 37 | layers.append(nn.Linear(hidden_dims[i], hidden_dims[i + 1])) 38 | layers.append(activation_fn) 39 | 40 | layers.append(nn.Linear(hidden_dims[-1], output_dim)) 41 | 42 | return nn.Sequential(*layers) 43 | 44 | 45 | def get_activation(act_name, inplace: bool = True): 46 | if act_name == "elu": 47 | return nn.ELU(inplace) 48 | elif act_name == "selu": 49 | return nn.SELU(inplace) 50 | elif act_name == "relu": 51 | return nn.ReLU(inplace) 52 | elif act_name == "crelu": 53 | return nn.CReLU(inplace) 54 | elif act_name == "lrelu": 55 | return nn.LeakyReLU(inplace) 56 | elif act_name == "tanh": 57 | return nn.Tanh(inplace) 58 | elif act_name == "sigmoid": 59 | return nn.Sigmoid(inplace) 60 | else: 61 | print("invalid activation function!") 62 | return None 63 | -------------------------------------------------------------------------------- /compass/residual_rl/ppo.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import gin 17 | import torch 18 | from torch import nn, optim 19 | 20 | from compass.residual_rl.rollout_storage import RolloutStorage 21 | 22 | 23 | @gin.configurable 24 | class PPO: 25 | """PPO class to train a A2C policy""" 26 | 27 | def __init__(self, 28 | actor_critic, 29 | device='cpu', 30 | value_loss_coef=1.0, 31 | use_clipped_value_loss=True, 32 | clip_param=0.2, 33 | entropy_coef=0.01, 34 | num_learning_epochs=5, 35 | num_mini_batches=4, 36 | learning_rate=1.e-4, 37 | schedule='adaptive', 38 | gamma=0.99, 39 | lam=0.95, 40 | desired_kl=0.01, 41 | max_grad_norm=1.0): 42 | 43 | self.device = device 44 | 45 | # PPO components 46 | self.actor_critic = actor_critic 47 | self.actor_critic.to(self.device) 48 | self.storage = None 49 | 50 | self.optimizer = optim.Adam(self.actor_critic.parameters(), lr=learning_rate) 51 | self.transition = RolloutStorage.Transition() 52 | 53 | # Parameters 54 | self.learning_rate = learning_rate 55 | self.value_loss_coef = value_loss_coef 56 | self.use_clipped_value_loss = use_clipped_value_loss 57 | self.clip_param = clip_param 58 | self.entropy_coef = entropy_coef 59 | self.num_learning_epochs = num_learning_epochs 60 | self.num_mini_batches = num_mini_batches 61 | self.schedule = schedule 62 | self.gamma = gamma 63 | self.lam = lam 64 | self.desired_kl = desired_kl 65 | self.max_grad_norm = max_grad_norm 66 | 67 | def init_storage(self, num_envs, num_transitions_per_env, policy_states_shape, 68 | critic_states_shape, actions_shape): 69 | self.storage = RolloutStorage(num_envs, num_transitions_per_env, policy_states_shape, 70 | critic_states_shape, actions_shape, self.device) 71 | 72 | def test_mode(self): 73 | self.actor_critic.test() 74 | 75 | def train_mode(self): 76 | self.actor_critic.train() 77 | 78 | def act(self, states): 79 | policy_states = self._get_policy_states(states) 80 | critic_states = self._get_critic_states(states) 81 | 82 | # Compute the actions and values 83 | self.transition.actions = (self.actor_critic.act(policy_states)).detach() 84 | self.transition.values = self.actor_critic.evaluate(critic_states).detach() 85 | self.transition.actions_log_prob = self.actor_critic.get_actions_log_prob( 86 | self.transition.actions).detach() 87 | self.transition.action_mean = (self.actor_critic.action_mean).detach() 88 | self.transition.action_sigma = (self.actor_critic.action_std).detach() 89 | 90 | # Need to record policy state before env.step() 91 | self.transition.policy_states = policy_states 92 | self.transition.critic_states = critic_states 93 | return self.transition.actions 94 | 95 | def act_inference(self, states): 96 | policy_states = self._get_policy_states(states) 97 | return self.actor_critic.act_inference(policy_states) 98 | 99 | def process_env_step(self, rewards, dones): 100 | self.transition.rewards = rewards.clone() 101 | self.transition.dones = dones 102 | 103 | # Record the transition 104 | self.storage.add_transitions(self.transition) 105 | self.transition.clear() 106 | self.actor_critic.reset(dones) 107 | 108 | def compute_returns(self, last_states): 109 | critic_states = self._get_critic_states(last_states) 110 | last_values = self.actor_critic.evaluate(critic_states).detach() 111 | self.storage.compute_returns(last_values, self.gamma, self.lam) 112 | 113 | def update(self): 114 | mean_value_loss = 0 115 | mean_surrogate_loss = 0 116 | generator = self.storage.mini_batch_generator(self.num_mini_batches, 117 | self.num_learning_epochs) 118 | for states_batch, actions_batch, target_values_batch, advantages_batch, \ 119 | returns_batch, old_actions_log_prob_batch, old_mu_batch, \ 120 | old_sigma_batch, _ in generator: 121 | policy_states_batch = self._get_policy_states(states_batch) 122 | critic_states_batch = self._get_critic_states(states_batch) 123 | self.actor_critic.act(policy_states_batch) 124 | actions_log_prob_batch = self.actor_critic.get_actions_log_prob(actions_batch) 125 | value_batch = self.actor_critic.evaluate(critic_states_batch) 126 | mu_batch = self.actor_critic.action_mean 127 | sigma_batch = self.actor_critic.action_std 128 | entropy_batch = self.actor_critic.entropy 129 | 130 | # KL 131 | if self.desired_kl and self.schedule == 'adaptive': 132 | with torch.inference_mode(): 133 | kl = torch.sum( 134 | torch.log(sigma_batch / old_sigma_batch + 1.e-5) + 135 | (torch.square(old_sigma_batch) + torch.square(old_mu_batch - mu_batch)) / 136 | (2.0 * torch.square(sigma_batch)) - 0.5, 137 | axis=-1) 138 | kl_mean = torch.mean(kl) 139 | 140 | if kl_mean > self.desired_kl * 2.0: 141 | self.learning_rate = max(1e-5, self.learning_rate / 1.5) 142 | elif (self.desired_kl / 2.0) > kl_mean > 0.0: 143 | self.learning_rate = min(1e-2, self.learning_rate * 1.5) 144 | 145 | for param_group in self.optimizer.param_groups: 146 | param_group['lr'] = self.learning_rate 147 | 148 | # Surrogate loss 149 | ratio = torch.exp(actions_log_prob_batch - torch.squeeze(old_actions_log_prob_batch)) 150 | surrogate = -torch.squeeze(advantages_batch) * ratio 151 | surrogate_clipped = -torch.squeeze(advantages_batch) * torch.clamp( 152 | ratio, 1.0 - self.clip_param, 1.0 + self.clip_param) 153 | surrogate_loss = torch.max(surrogate, surrogate_clipped).mean() 154 | 155 | # Value function loss 156 | if self.use_clipped_value_loss: 157 | value_clipped = target_values_batch + \ 158 | (value_batch - target_values_batch).clamp(-self.clip_param, 159 | self.clip_param) 160 | value_losses = (value_batch - returns_batch).pow(2) 161 | value_losses_clipped = (value_clipped - returns_batch).pow(2) 162 | value_loss = torch.max(value_losses, value_losses_clipped).mean() 163 | else: 164 | value_loss = (returns_batch - value_batch).pow(2).mean() 165 | 166 | loss = surrogate_loss + self.value_loss_coef * value_loss - \ 167 | self.entropy_coef * entropy_batch.mean() 168 | 169 | # Gradient step 170 | self.optimizer.zero_grad() 171 | loss.backward() 172 | nn.utils.clip_grad_norm_(self.actor_critic.parameters(), self.max_grad_norm) 173 | self.optimizer.step() 174 | 175 | mean_value_loss += value_loss.item() 176 | mean_surrogate_loss += surrogate_loss.item() 177 | 178 | num_updates = self.num_learning_epochs * self.num_mini_batches 179 | mean_value_loss /= num_updates 180 | mean_surrogate_loss /= num_updates 181 | self.storage.clear() 182 | 183 | return mean_value_loss, mean_surrogate_loss 184 | 185 | def _get_policy_states(self, states): 186 | return states[0] 187 | 188 | def _get_critic_states(self, states): 189 | return states[1] 190 | -------------------------------------------------------------------------------- /compass/residual_rl/rollout_storage.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import torch 17 | 18 | 19 | class RolloutStorage: 20 | """Storage of rollout info.""" 21 | 22 | class Transition: 23 | """Transition info.""" 24 | 25 | def __init__(self): 26 | self.policy_states = None 27 | self.critic_states = None 28 | self.actions = None 29 | self.rewards = None 30 | self.dones = None 31 | self.values = None 32 | self.actions_log_prob = None 33 | self.action_mean = None 34 | self.action_sigma = None 35 | 36 | def clear(self): 37 | self.__init__() # pylint: disable=unnecessary-dunder-call 38 | 39 | def __init__(self, 40 | num_envs, 41 | num_transitions_per_env, 42 | policy_states_shape, 43 | critic_states_shape, 44 | actions_shape, 45 | device='cpu'): 46 | 47 | self.device = device 48 | 49 | self.policy_states_shape = policy_states_shape 50 | self.actions_shape = actions_shape 51 | 52 | # Core 53 | self.policy_states = torch.zeros(num_transitions_per_env, 54 | num_envs, 55 | *policy_states_shape, 56 | device=self.device) 57 | self.critic_states = torch.zeros(num_transitions_per_env, 58 | num_envs, 59 | *critic_states_shape, 60 | device=self.device) 61 | 62 | self.rewards = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 63 | self.actions = torch.zeros(num_transitions_per_env, 64 | num_envs, 65 | *actions_shape, 66 | device=self.device) 67 | self.dones = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device).byte() 68 | 69 | # For PPO 70 | self.actions_log_prob = torch.zeros(num_transitions_per_env, 71 | num_envs, 72 | 1, 73 | device=self.device) 74 | self.values = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 75 | self.returns = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 76 | self.advantages = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 77 | self.mu = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device) 78 | self.sigma = torch.zeros(num_transitions_per_env, 79 | num_envs, 80 | *actions_shape, 81 | device=self.device) 82 | 83 | self.num_transitions_per_env = num_transitions_per_env 84 | self.num_envs = num_envs 85 | 86 | self.step = 0 87 | 88 | def add_transitions(self, transition: Transition): 89 | if self.step >= self.num_transitions_per_env: 90 | raise AssertionError("Rollout buffer overflow") 91 | self.policy_states[self.step].copy_(transition.policy_states) 92 | self.critic_states[self.step].copy_(transition.critic_states) 93 | self.actions[self.step].copy_(transition.actions) 94 | self.rewards[self.step].copy_(transition.rewards.view(-1, 1)) 95 | self.dones[self.step].copy_(transition.dones.view(-1, 1)) 96 | self.values[self.step].copy_(transition.values) 97 | self.actions_log_prob[self.step].copy_(transition.actions_log_prob.view(-1, 1)) 98 | self.mu[self.step].copy_(transition.action_mean) 99 | self.sigma[self.step].copy_(transition.action_sigma) 100 | self.step += 1 101 | 102 | def clear(self): 103 | self.step = 0 104 | 105 | def compute_returns(self, last_values, gamma, lam): 106 | advantage = 0 107 | for step in reversed(range(self.num_transitions_per_env)): 108 | if step == self.num_transitions_per_env - 1: 109 | next_values = last_values 110 | else: 111 | next_values = self.values[step + 1] 112 | next_is_not_terminal = 1.0 - self.dones[step].float() 113 | delta = self.rewards[step] + next_is_not_terminal * gamma * next_values - self.values[ 114 | step] 115 | advantage = delta + next_is_not_terminal * gamma * lam * advantage 116 | self.returns[step] = advantage + self.values[step] 117 | 118 | # Compute and normalize the advantages 119 | self.advantages = self.returns - self.values 120 | self.advantages = (self.advantages - self.advantages.mean()) / (self.advantages.std() + 121 | 1e-8) 122 | 123 | def get_statistics(self): 124 | done = self.dones 125 | done[-1] = 1 126 | flat_dones = done.permute(1, 0, 2).reshape(-1, 1) 127 | done_indices = torch.cat( 128 | (flat_dones.new_tensor([-1], dtype=torch.int64), flat_dones.nonzero(as_tuple=False)[:, 129 | 0])) 130 | trajectory_lengths = done_indices[1:] - done_indices[:-1] 131 | return trajectory_lengths.float().mean(), self.rewards.mean() 132 | 133 | def mini_batch_generator(self, num_mini_batches, num_epochs=8): 134 | batch_size = self.num_envs * self.num_transitions_per_env 135 | mini_batch_size = batch_size // num_mini_batches 136 | indices = torch.randperm(num_mini_batches * mini_batch_size, 137 | requires_grad=False, 138 | device=self.device) 139 | 140 | policy_states = self.policy_states.flatten(0, 1) 141 | critic_states = self.critic_states.flatten(0, 1) 142 | actions = self.actions.flatten(0, 1) 143 | values = self.values.flatten(0, 1) 144 | returns = self.returns.flatten(0, 1) 145 | old_actions_log_prob = self.actions_log_prob.flatten(0, 1) 146 | advantages = self.advantages.flatten(0, 1) 147 | old_mu = self.mu.flatten(0, 1) 148 | old_sigma = self.sigma.flatten(0, 1) 149 | 150 | for _ in range(num_epochs): 151 | for i in range(num_mini_batches): 152 | start = i * mini_batch_size 153 | end = (i + 1) * mini_batch_size 154 | batch_idx = indices[start:end] 155 | 156 | states_batch = [policy_states[batch_idx], critic_states[batch_idx]] 157 | actions_batch = actions[batch_idx] 158 | target_values_batch = values[batch_idx] 159 | returns_batch = returns[batch_idx] 160 | old_actions_log_prob_batch = old_actions_log_prob[batch_idx] 161 | advantages_batch = advantages[batch_idx] 162 | old_mu_batch = old_mu[batch_idx] 163 | old_sigma_batch = old_sigma[batch_idx] 164 | yield states_batch, actions_batch, target_values_batch, advantages_batch, \ 165 | returns_batch, old_actions_log_prob_batch, old_mu_batch, old_sigma_batch, None 166 | -------------------------------------------------------------------------------- /compass/residual_rl/utils.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import torch 17 | import torch.nn.functional as F 18 | 19 | 20 | @torch.jit.script 21 | def preprocess_depth_images(batched_raw_depth_image: torch.Tensor, 22 | target_height: int = 224, 23 | target_width: int = 224): 24 | """ Preporcess the raw depth images, including 25 | 1) replace the inf elements with the maximum depth value. 26 | 2) resize the image to the target size. 27 | 3) replicate the single channel to create a 3-channel tensor for resnet. 28 | 4) normalize the tensor using ImageNet statistics. 29 | 30 | It will raise ValueError if the raw image has NaN. 31 | """ 32 | 33 | if torch.isnan(batched_raw_depth_image).any(): 34 | raise ValueError("Input tensor contains NaN values.") 35 | 36 | # Preprocess for infinity values. 37 | if torch.isinf(batched_raw_depth_image).any(): 38 | batched_raw_depth_image[torch.isinf(batched_raw_depth_image)] = -1 39 | depth_max = batched_raw_depth_image.max() 40 | # Sanity check to make sure we don't get negative depth value. 41 | depth_max = torch.where(depth_max < 0, torch.tensor(1.0), depth_max) 42 | batched_raw_depth_image[batched_raw_depth_image == -1] = depth_max 43 | 44 | # Resize using interpolate 45 | resized_tensors = F.interpolate(batched_raw_depth_image, 46 | size=(target_height, target_width), 47 | mode='bilinear', 48 | align_corners=False) 49 | 50 | # Normalize 51 | # Compute minimum and maximum values 52 | depth_min = resized_tensors.min() 53 | depth_max = resized_tensors.max() 54 | 55 | # Check for the case where all elements are the same 56 | if depth_max == depth_min: 57 | # Return a zero tensor if no variation is present 58 | return torch.zeros_like(resized_tensors).expand(-1, 3, -1, -1) 59 | 60 | # Replicate the single channel to create a 3-channel tensor for resnet encoding. 61 | resized_tensors = resized_tensors.expand(-1, 3, -1, -1) 62 | # Normalize each image in the batch using ImageNet statistics 63 | mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(batched_raw_depth_image.device) 64 | std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(batched_raw_depth_image.device) 65 | 66 | epsilon = 1e-6 67 | resized_tensors = (resized_tensors - mean) / (std + epsilon) 68 | 69 | return resized_tensors 70 | -------------------------------------------------------------------------------- /compass/residual_rl/x_mobility_rl.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import torch 17 | from torch import nn 18 | 19 | from model.x_mobility.x_mobility import XMobility 20 | from model.x_mobility.utils import pack_sequence_dim 21 | 22 | from compass.residual_rl.constants import INPUT_IMAGE_SIZE 23 | 24 | 25 | class XMobilityBasePolicy(nn.Module): 26 | '''Wrapper of the X-Mobility to enable parallel inference during RL training.''' 27 | 28 | def __init__(self, base_policy_ckpt_path): 29 | super().__init__() 30 | # Load the checkpoint and remove the prefix if any. 31 | state_dict = torch.load(base_policy_ckpt_path, weights_only=False)['state_dict'] 32 | state_dict = {k.removeprefix('model.'): v for k, v in state_dict.items()} 33 | # Load the state dict. 34 | self.model = XMobility() 35 | self.model.load_state_dict(state_dict) 36 | 37 | def forward(self, obs_dict, history=None, sample=None, action=None): 38 | b = obs_dict["policy"]["camera_rgb_img"].shape[0] 39 | batch = {} 40 | batch['speed'] = obs_dict["policy"]["base_speed"].reshape(b, 1, -1).float() 41 | batch['image'] = obs_dict["policy"]["camera_rgb_img"].reshape(b, 1, INPUT_IMAGE_SIZE[0], 42 | INPUT_IMAGE_SIZE[1], 43 | 3).float() 44 | batch['image'] = batch['image'].permute(0, 1, 4, 2, 3) / 255.0 45 | batch['history'] = batch['speed'].new_zeros( 46 | (b, self.model.rssm.hidden_state_dim)).float() if history is None else history 47 | batch['sample'] = batch['speed'].new_zeros( 48 | (b, self.model.rssm.state_dim)).float() if sample is None else sample 49 | batch['action'] = batch['speed'].new_zeros( 50 | (b, 6)).float() if action is None else (action.reshape(b, -1).float()) 51 | batch['route'] = obs_dict["policy"]['route'].unsqueeze(1).float() 52 | 53 | action, _, history, sample, _, _, _ = self.model.inference(batch, False, False, False) 54 | action = pack_sequence_dim(action) 55 | latent_state = torch.cat([history, sample], dim=-1).reshape(b, -1) 56 | route_feat = self.model.action_policy.route_encoder(pack_sequence_dim(batch['route'])) 57 | speed_feat = self.model.observation_encoder.speed_encoder(pack_sequence_dim(batch['speed'])) 58 | policy_state = self.model.action_policy.policy_state_fusion(latent_state, route_feat) 59 | 60 | extras = {} 61 | extras["route_feat"] = route_feat 62 | extras["speed_feat"] = speed_feat 63 | return policy_state, action, history, sample, extras 64 | -------------------------------------------------------------------------------- /compass/rl_env/LICENCE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2024, The Isaac Lab Project Developers. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /compass/rl_env/README.md: -------------------------------------------------------------------------------- 1 | # Isaac Lab Extension for Mobility Embodiment Specialist using Residual RL 2 | 3 | ## Overview 4 | 5 | This directory contains Isaac Lab extension for mobility embodiment specialist with residual RL. 6 | 7 | ## Installation 8 | 9 | 1. First, install Isaac Lab v1.3.0 and Isaac Sim 4.2 by following the [Isaac Lab installation guide](https://isaac-sim.github.io/IsaacLab/v1.3.0/source/setup/installation/index.html). 10 | 11 | The repo has been tested in Isaac Lab v1.3.0, so follow the steps below to set the correct version: 12 | ```bash 13 | # Clone the Isaac Lab repository 14 | git clone git@github.com:isaac-sim/IsaacLab.git 15 | 16 | # Switch to the correct version 17 | cd IsaacLab 18 | git fetch origin 19 | git checkout v1.3.0 20 | 21 | # Install Isaac Lab 22 | ./isaaclab.sh --install 23 | ``` 24 | > **NOTE**: There might be error messages about RSL_RL not found during installation, we can safely ignore it as a custimized RSL_RL libary is used for residual RL training. 25 | 26 | 27 | 2. Install the mobility_es extension in this directory after Isaac Lab and Sim are installed: 28 | 29 | ```bash 30 | # Set Isaac Lab path 31 | export ISAACLAB_PATH= 32 | 33 | # Install the extension 34 | ${ISAACLAB_PATH}/isaaclab.sh -p -m pip install -e exts/mobility_es 35 | ``` 36 | 37 | ## Usage 38 | 39 | ### Running the Environment 40 | 41 | ```bash 42 | ${ISAACLAB_PATH}/isaaclab.sh -p scripts/play.py --enable_cameras 43 | ``` 44 | 45 | ### Adding New Embodiments 46 | 47 | To add a new robot embodiment, follow these steps: 48 | 49 | 1. **Create Robot Configuration**: 50 | - Follow Isaac Lab [instructions](https://isaac-sim.github.io/IsaacLab/main/source/how-to/write_articulation_cfg.html) to add a new robot articulation configuration in [exts/mobility_es/mobility_es/config/robots.py](exts/mobility_es/mobility_es/config/robots.py) 51 | - Ensure proper joint configurations, collision properties, and physics parameters are set 52 | 53 | 2. **Create Environment Configuration**: 54 | - Create a new RL environment class that inherits from `GoalReachingEnvCfg` in [exts/mobility_es/mobility_es/config/env_cfg.py](exts/mobility_es/mobility_es/config/env_cfg.py) 55 | - Override the following components as needed: 56 | 57 | | Component | Description | 58 | |-----------|-------------| 59 | | `scene.robot` | Set robot articulation configuration | 60 | | `scene.camera` | Configure camera settings | 61 | | `actions` | Define action item for the embodiment | 62 | | `observations` | Configure RL observations | 63 | | `events` | Configure RL events | 64 | | `rewards` | Configure RL rewards | 65 | | `terminations` | Configure RL terminations | 66 | 67 | > **NOTE**: Each embodiment should define its own action term as a low-level controller that maps velocity commands to joint positions. We have pre-defined action terms for different robot embodiments in [exts/mobility_es/mobility_es/mdp/action](exts/mobility_es/mobility_es/mdp/action) that can be re-configured for new embodiments. 68 | 69 | 3. **Register and Verify Environment**: 70 | - Import your new environment in the play.py script [scripts/play.py](scripts/play.py) 71 | - Verify the environment is functional by running the play.py script with the new environment 72 | 73 | 74 | 3. **Examples of Existing Embodiments**: 75 | - [H1 Humanoid Robot](exts/mobility_es/mobility_es/config/h1_env_cfg.py) 76 | - [Carter Mobile Robot](exts/mobility_es/mobility_es/config/carter_env_cfg.py) 77 | - [Spot Quadruped Robot](exts/mobility_es/mobility_es/config/spot_env_cfg.py) 78 | - [G1 Humanoid Robot](exts/mobility_es/mobility_es/config/g1_env_cfg.py) 79 | - [Digit Humanoid Robot](exts/mobility_es/mobility_es/config/digit_env_cfg.py) 80 | 81 | 82 | ### Adding New Scenes 83 | 84 | To add a new scene, follow these steps: 85 | 86 | 1. **Create Scene Configuration**: 87 | - Follow Isaac Lab [instructions](https://isaac-sim.github.io/IsaacLab/main/source/tutorials/02_scene/create_scene.html) to add a new scene configuration in [exts/mobility_es/mobility_es/config/environments.py](exts/mobility_es/mobility_es/config/environments.py), make sure to inherit from `EnvSceneAssetCfg` with proper parameters like pose_sample_range, env_spacing, replicate_physics, etc. 88 | 89 | 90 | 2. **Add Occupancy Map [Optional]**: 91 | - We use occupancy map for collision-free pose sampling, while optional, we recommend adding the occupancy map for the new scene to improve sampling efficiency. 92 | - For occupancy map generation, follow this [insturction](https://docs.omniverse.nvidia.com/isaacsim/latest/features/ext_omni_isaac_occupancy_map.html) to create the occupancy map with the new scene USD file in Isaac Sim. Make sure to rotate the occupancy map properly to match the scene orientation and set the origin as the top-left of the image in the occupancy_map.yaml file. 93 | 94 | ![Occupancy Map Example](../../images/omap_generation.png) 95 | 96 | - Add the occupancy_map.yaml path in `OMAP_PATHS` in [exts/mobility_es/mobility_es/config/environments.py](exts/mobility_es/mobility_es/config/environments.py) with key as the scene name and value as the occupancy map yaml path. 97 | 98 | 3. **Register and Verify Scene**: 99 | - Import your new scene in the play.py script [scripts/play.py](scripts/play.py) 100 | - Verify the scene is functional by running the play.py script with the new scene by setting `env_cfg.scene.environment = MyNewSceneCfg()` 101 | -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/config/extension.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | 3 | # Semantic Versioning is used: https://semver.org/ 4 | version = "0.1.0" 5 | 6 | # Description 7 | category = "isaaclab" 8 | readme = "README.md" 9 | 10 | title = "Extension Template" 11 | author = "Isaac Lab Project Developers" 12 | maintainer = "Isaac Lab Project Developers" 13 | description="Extension Template for Isaac Lab" 14 | repository = "https://github.com/isaac-sim/IsaacLabExtensionTemplate.git" 15 | keywords = ["extension", "template", "isaaclab"] 16 | 17 | [dependencies] 18 | "omni.isaac.lab" = {} 19 | "omni.isaac.lab_assets" = {} 20 | "omni.isaac.lab_tasks" = {} 21 | # NOTE: Add additional dependencies here 22 | 23 | [[python.module]] 24 | name = "mobility_es" 25 | 26 | [isaaclab_settings] 27 | # TODO: Uncomment and list any apt dependencies here. 28 | # If none, leave it commented out. 29 | # apt_deps = ["example_package"] 30 | # TODO: Uncomment and provide path to a ros_ws 31 | # with rosdeps to be installed. If none, 32 | # leave it commented out. 33 | # ros_ws = "path/from/extension_root/to/ros_ws" 34 | -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/docs/CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | Changelog 2 | --------- 3 | 4 | 0.1.0 (2024-01-29) 5 | ~~~~~~~~~~~~~~~~~~ 6 | 7 | Added 8 | ^^^^^ 9 | 10 | * Created an initial template for building an extension or project based on Isaac Lab 11 | -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/mobility_es/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """ 17 | Python module serving as a project/extension template. 18 | """ 19 | 20 | # Register Gym environments. 21 | from .config import * 22 | from .mdp import * 23 | -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/mobility_es/ckpt/digit_locomotion_policy.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/COMPASS/d9a1a2b4b530f848488de528eac6a17ef0d778d1/compass/rl_env/exts/mobility_es/mobility_es/ckpt/digit_locomotion_policy.pt -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/mobility_es/ckpt/g1_locomotion_policy.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/COMPASS/d9a1a2b4b530f848488de528eac6a17ef0d778d1/compass/rl_env/exts/mobility_es/mobility_es/ckpt/g1_locomotion_policy.pt -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/mobility_es/ckpt/h1_locomotion_policy.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/COMPASS/d9a1a2b4b530f848488de528eac6a17ef0d778d1/compass/rl_env/exts/mobility_es/mobility_es/ckpt/h1_locomotion_policy.pt -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/mobility_es/ckpt/spot_locomotion_policy.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/COMPASS/d9a1a2b4b530f848488de528eac6a17ef0d778d1/compass/rl_env/exts/mobility_es/mobility_es/ckpt/spot_locomotion_policy.pt -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/mobility_es/config/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/mobility_es/config/carter_env_cfg.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import annotations 17 | 18 | from omni.isaac.lab.envs import mdp 19 | from omni.isaac.lab.utils import configclass 20 | from omni.isaac.lab.managers import TerminationTermCfg as DoneTerm 21 | from omni.isaac.lab.managers import ObservationTermCfg as ObsTerm 22 | from omni.isaac.lab.managers import EventTermCfg as EventTerm 23 | from omni.isaac.lab.managers import SceneEntityCfg 24 | 25 | from mobility_es.config import scene_assets 26 | from mobility_es.config import robots 27 | from mobility_es.config.env_cfg import GoalReachingEnvCfg 28 | from mobility_es.mdp.action.non_holonomic_perfect_control_action import NonHolonomicPerfectControlAction 29 | 30 | 31 | @configclass 32 | class CarterActionsCfg: 33 | """Action specifications for the MDP.""" 34 | 35 | drive_joints = mdp.ActionTermCfg(class_type=NonHolonomicPerfectControlAction, 36 | asset_name="robot") 37 | 38 | 39 | @configclass 40 | class CarterGoalReachingEnvCfg(GoalReachingEnvCfg): 41 | """Configuration for the carter to reach a 2D target pose environment.""" 42 | 43 | def __post_init__(self): 44 | super().__post_init__() 45 | self.scene.robot = robots.carter.replace(prim_path="{ENV_REGEX_NS}/Robot") 46 | self.scene.camera = scene_assets.camera.replace( 47 | prim_path="{ENV_REGEX_NS}/Robot/chassis_link/front_cam") 48 | self.actions = CarterActionsCfg() 49 | self.observations.locomotion = None 50 | self.events.reset_robot_joints = None 51 | self.observations.eval.fall_down = ObsTerm( 52 | func=mdp.illegal_contact, 53 | params={ 54 | "sensor_cfg": SceneEntityCfg("contact_forces", body_names=["chassis_link"]), 55 | "threshold": 1.0 56 | }, 57 | ) 58 | self.terminations.base_contact = DoneTerm( 59 | func=mdp.illegal_contact, 60 | params={ 61 | "sensor_cfg": SceneEntityCfg("contact_forces", body_names=["chassis_link"]), 62 | "threshold": 1.0 63 | }, 64 | ) 65 | self.events.base_external_force_torque = EventTerm( 66 | func=mdp.apply_external_force_torque, 67 | mode="reset", 68 | params={ 69 | "asset_cfg": SceneEntityCfg("robot", body_names="chassis_link"), 70 | "force_range": (0.0, 0.0), 71 | "torque_range": (-0.0, 0.0), 72 | }, 73 | ) 74 | -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/mobility_es/config/digit_env_cfg.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import annotations 17 | 18 | import pathlib 19 | 20 | from omni.isaac.lab.envs import mdp 21 | from omni.isaac.lab.sensors import CameraCfg 22 | from omni.isaac.lab.managers import EventTermCfg as EventTerm 23 | from omni.isaac.lab.managers import TerminationTermCfg as DoneTerm 24 | from omni.isaac.lab.managers import ObservationTermCfg as ObsTerm 25 | from omni.isaac.lab.managers import SceneEntityCfg 26 | from omni.isaac.lab.utils import configclass 27 | from omni.isaac.lab.utils.noise import AdditiveUniformNoiseCfg as Unoise 28 | 29 | from mobility_es.config import scene_assets 30 | from mobility_es.config.robots import digit, DIGIT_ACTUATED_JOINT_NAMES 31 | from mobility_es.config.env_cfg import GoalReachingEnvCfg 32 | from mobility_es.mdp.action.locomotion_policy_action import LocomotionPolicyAction, LocomotionPolicyActionCfg 33 | 34 | FILE_DIR = pathlib.Path(__file__).parent 35 | 36 | 37 | @configclass 38 | class DigitActionsCfg: 39 | """Action specifications for the MDP.""" 40 | 41 | drive_joints = LocomotionPolicyActionCfg( 42 | class_type=LocomotionPolicyAction, 43 | asset_name="robot", 44 | policy_ckpt_path=FILE_DIR / "../ckpt/digit_locomotion_policy.pt", 45 | joint_names=DIGIT_ACTUATED_JOINT_NAMES, 46 | scale=0.5, 47 | use_default_offset=True, 48 | ) 49 | 50 | 51 | @configclass 52 | class DigitGoalReachingEnvCfg(GoalReachingEnvCfg): 53 | """Configuration for the Digit to reach a 2D target pose environment.""" 54 | 55 | def __post_init__(self): 56 | super().__post_init__() 57 | self.scene.robot = digit.replace(prim_path="{ENV_REGEX_NS}/Robot") 58 | self.scene.camera = scene_assets.camera.replace( 59 | prim_path="{ENV_REGEX_NS}/Robot/torso_base/front_cam") 60 | self.scene.camera.offset = CameraCfg.OffsetCfg(pos=(0.06, 0.0, -0.065), 61 | rot=(-0.2705, 0.6532, -0.6532, 0.2705981), 62 | convention="ros") 63 | 64 | self.actions = DigitActionsCfg() 65 | self.observations.locomotion.joint_pos = ObsTerm( 66 | func=mdp.joint_pos_rel, 67 | params={"asset_cfg": SceneEntityCfg("robot", joint_names=DIGIT_ACTUATED_JOINT_NAMES)}, 68 | noise=Unoise(n_min=-0.01, n_max=0.01), 69 | ) 70 | self.observations.locomotion.joint_vel = ObsTerm( 71 | func=mdp.joint_vel_rel, 72 | params={"asset_cfg": SceneEntityCfg("robot", joint_names=DIGIT_ACTUATED_JOINT_NAMES)}, 73 | noise=Unoise(n_min=-1.5, n_max=1.5), 74 | ) 75 | self.observations.eval.fall_down = ObsTerm( 76 | func=mdp.illegal_contact, 77 | params={ 78 | "sensor_cfg": SceneEntityCfg("contact_forces", body_names=["torso_base"]), 79 | "threshold": 1.0 80 | }, 81 | ) 82 | self.events.base_external_force_torque = EventTerm( 83 | func=mdp.apply_external_force_torque, 84 | mode="reset", 85 | params={ 86 | "asset_cfg": SceneEntityCfg("robot", body_names=".*torso_base"), 87 | "force_range": (0.0, 0.0), 88 | "torque_range": (-0.0, 0.0), 89 | }, 90 | ) 91 | self.terminations.base_contact = DoneTerm( 92 | func=mdp.illegal_contact, 93 | params={ 94 | "sensor_cfg": SceneEntityCfg("contact_forces", body_names=["torso_base"]), 95 | "threshold": 1.0 96 | }, 97 | ) 98 | -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/mobility_es/config/environments.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import os 17 | 18 | import omni.isaac.lab.sim as sim_utils 19 | from omni.isaac.lab.assets import AssetBaseCfg 20 | from omni.isaac.lab.utils import configclass 21 | from omni.isaac.lab.utils.assets import ISAAC_NUCLEUS_DIR 22 | 23 | USD_PATHS = { 24 | 'CombinedSingleRack': 25 | os.path.join(os.path.dirname(__file__), "../usd/combined_simple_warehouse/combined.usd"), 26 | 'CombinedMultiRack': 27 | os.path.join(os.path.dirname(__file__), "../usd/combined_multi_rack/combined.usd"), 28 | 'GalileoLab': 29 | os.path.join(os.path.dirname(__file__), 30 | "../usd/galileo_lab_no_robot_no_wall/galileo_lab_no_robot_no_wall.usd"), 31 | 'WarehouseSingleRack': 32 | os.path.join( 33 | os.path.dirname(__file__), 34 | "../usd/sample_small_footprint_one_rack_obst_sdg/sample_small_footprint_one_rack_obst_sdg.usd" #pylint: disable=line-too-long 35 | ), 36 | 'WarehouseMultiRack': 37 | f'{ISAAC_NUCLEUS_DIR}/Environments/Simple_Warehouse/warehouse_multiple_shelves.usd', 38 | 'SimpleOffice': 39 | os.path.join(os.path.dirname(__file__), "../usd/office/office.usd"), 40 | 'SimpleWarehouse': 41 | os.path.join(os.path.dirname(__file__), 42 | "../usd/simple_warehouse_no_roof/simple_warehouse_no_roof.usd"), 43 | 'Hospital': 44 | f'{ISAAC_NUCLEUS_DIR}/Environments/Hospital/hospital.usd', 45 | } 46 | 47 | OMAP_PATHS = { 48 | 'CombinedSingleRack': 49 | os.path.join(os.path.dirname(__file__), 50 | "../usd/combined_simple_warehouse/omap/occupancy_map.yaml"), 51 | 'CombinedMultiRack': 52 | os.path.join(os.path.dirname(__file__), 53 | "../usd/combined_multi_rack/omap/occupancy_map.yaml"), 54 | 'WarehouseSingleRack': 55 | os.path.join(os.path.dirname(__file__), 56 | "../usd/sample_small_footprint_one_rack_obst_sdg/omap/occupancy_map.yaml"), 57 | 'WarehouseMultiRack': 58 | os.path.join(os.path.dirname(__file__), 59 | "../usd/warehouse_multi_rack/omap/occupancy_map.yaml"), 60 | 'SimpleOffice': 61 | os.path.join(os.path.dirname(__file__), "../usd/office/omap/occupancy_map.yaml"), 62 | 'Hospital': 63 | os.path.join(os.path.dirname(__file__), "../usd/hospital/omap/occupancy_map.yaml"), 64 | } 65 | 66 | 67 | @configclass 68 | class EnvSceneAssetCfg(AssetBaseCfg): 69 | """EnvSceneAssetCfg to add additional scene parameters to AssetBaseCfg. 70 | """ 71 | 72 | # Range for robot pose sampling. 73 | pose_sample_range = {"x": (-5, 5), "y": (-5, 5), "yaw": (-3.14, 3.14)} 74 | 75 | # Env spacing 76 | env_spacing = 50 77 | 78 | # Replicate physics in the scene. 79 | replicate_physics = True 80 | 81 | 82 | # Adding a USD scene with combined office, galileo lab and warehouse single rack. 83 | combined_single_rack = EnvSceneAssetCfg( 84 | prim_path="{ENV_REGEX_NS}/CombinedSingleRack", 85 | init_state=AssetBaseCfg.InitialStateCfg( 86 | pos=(0, 0, 0.01), 87 | rot=(1.0, 0.0, 0.0, 0.0), 88 | ), 89 | spawn=sim_utils.UsdFileCfg( 90 | usd_path=USD_PATHS['CombinedSingleRack'], 91 | scale=(1.0, 1.0, 1.0), 92 | rigid_props=sim_utils.RigidBodyPropertiesCfg( 93 | disable_gravity=None, 94 | solver_position_iteration_count=4, 95 | solver_velocity_iteration_count=1, 96 | ), 97 | ), 98 | pose_sample_range={ 99 | "x": (-10, 10), 100 | "y": (-12, 17.5), 101 | "yaw": (-3.14, 3.14) 102 | }, 103 | ) 104 | 105 | # Adding a USD scene with combined office, galileo lab and warehouse multi rack. 106 | combined_multi_rack = EnvSceneAssetCfg( 107 | prim_path="{ENV_REGEX_NS}/CombinedMultiRack", 108 | init_state=AssetBaseCfg.InitialStateCfg( 109 | pos=(0, 0, 0.01), 110 | rot=(1.0, 0.0, 0.0, 0.0), 111 | ), 112 | spawn=sim_utils.UsdFileCfg( 113 | usd_path=USD_PATHS['CombinedMultiRack'], 114 | scale=(1.0, 1.0, 1.0), 115 | rigid_props=sim_utils.RigidBodyPropertiesCfg( 116 | disable_gravity=None, 117 | solver_position_iteration_count=4, 118 | solver_velocity_iteration_count=1, 119 | ), 120 | ), 121 | pose_sample_range={ 122 | "x": (-31.5, 8), 123 | "y": (-12, 19), 124 | "yaw": (-3.14, 3.14) 125 | }, 126 | ) 127 | 128 | # Adding a USD scene for galileo lab 129 | galileo_lab = EnvSceneAssetCfg( 130 | prim_path="{ENV_REGEX_NS}/GalileoLab", 131 | init_state=AssetBaseCfg.InitialStateCfg( 132 | pos=(0, 0, 0.01), 133 | rot=(1.0, 0.0, 0.0, 0.0), 134 | ), 135 | spawn=sim_utils.UsdFileCfg( 136 | usd_path=USD_PATHS['GalileoLab'], 137 | scale=(1.0, 1.0, 1.0), 138 | rigid_props=sim_utils.RigidBodyPropertiesCfg( 139 | disable_gravity=None, 140 | solver_position_iteration_count=4, 141 | solver_velocity_iteration_count=1, 142 | ), 143 | ), 144 | env_spacing=20, 145 | ) 146 | 147 | # Adding a USD scene for warehouse with single rack 148 | warehouse_single_rack = EnvSceneAssetCfg( 149 | prim_path="{ENV_REGEX_NS}/WarehouseSingleRack", 150 | init_state=AssetBaseCfg.InitialStateCfg( 151 | pos=(0, 0, 0.01), 152 | rot=(1.0, 0.0, 0.0, 0.0), 153 | ), 154 | spawn=sim_utils.UsdFileCfg( 155 | usd_path=USD_PATHS['WarehouseSingleRack'], 156 | scale=(1.0, 1.0, 1.0), 157 | rigid_props=sim_utils.RigidBodyPropertiesCfg( 158 | disable_gravity=None, 159 | solver_position_iteration_count=4, 160 | solver_velocity_iteration_count=1, 161 | ), 162 | ), 163 | ) 164 | 165 | # Adding a USD scene for warehouse with multi rack 166 | warehouse_multi_rack = EnvSceneAssetCfg( 167 | prim_path="{ENV_REGEX_NS}/WarehouseMultiRack", 168 | init_state=AssetBaseCfg.InitialStateCfg( 169 | pos=(0, 0, 0.01), 170 | rot=(1.0, 0.0, 0.0, 0.0), 171 | ), 172 | spawn=sim_utils.UsdFileCfg( 173 | usd_path=USD_PATHS['WarehouseMultiRack'], 174 | scale=(1.0, 1.0, 1.0), 175 | rigid_props=sim_utils.RigidBodyPropertiesCfg( 176 | disable_gravity=None, 177 | solver_position_iteration_count=4, 178 | solver_velocity_iteration_count=1, 179 | ), 180 | ), 181 | pose_sample_range={ 182 | "x": (-9, 9), 183 | "y": (-8, 12), 184 | "yaw": (-3.14, 3.14) 185 | }, 186 | ) 187 | 188 | # Adding a USD scene for simple office. 189 | simple_office = EnvSceneAssetCfg( 190 | prim_path="{ENV_REGEX_NS}/SimpleOffice", 191 | init_state=AssetBaseCfg.InitialStateCfg( 192 | pos=(0, 0, 0.01), 193 | rot=(1.0, 0.0, 0.0, 0.0), 194 | ), 195 | spawn=sim_utils.UsdFileCfg( 196 | usd_path=USD_PATHS['SimpleOffice'], 197 | scale=(1.0, 1.0, 1.0), 198 | rigid_props=sim_utils.RigidBodyPropertiesCfg( 199 | disable_gravity=None, 200 | solver_position_iteration_count=4, 201 | solver_velocity_iteration_count=1, 202 | ), 203 | ), 204 | env_spacing=15, 205 | ) 206 | 207 | # Adding a USD scene for hospital. 208 | hospital = EnvSceneAssetCfg( 209 | prim_path="{ENV_REGEX_NS}/Hospital", 210 | init_state=AssetBaseCfg.InitialStateCfg( 211 | pos=(0, 0, 0.01), 212 | rot=(1.0, 0.0, 0.0, 0.0), 213 | ), 214 | spawn=sim_utils.UsdFileCfg( 215 | usd_path=USD_PATHS['Hospital'], 216 | scale=(1.0, 1.0, 1.0), 217 | rigid_props=sim_utils.RigidBodyPropertiesCfg( 218 | disable_gravity=None, 219 | solver_position_iteration_count=4, 220 | solver_velocity_iteration_count=1, 221 | ), 222 | ), 223 | pose_sample_range={ 224 | "x": (-49, 27), 225 | "y": (-4, 17), 226 | "yaw": (-3.14, 3.14) 227 | }, 228 | env_spacing=80, 229 | ) 230 | 231 | # Random sample a USD scene from the given list. 232 | random_envs = EnvSceneAssetCfg( 233 | prim_path="{ENV_REGEX_NS}/RandomEnvs", 234 | init_state=AssetBaseCfg.InitialStateCfg( 235 | pos=(0, 0, 0.01), 236 | rot=(1.0, 0.0, 0.0, 0.0), 237 | ), 238 | spawn=sim_utils.MultiUsdFileCfg( 239 | usd_path=[USD_PATHS['SimpleOffice'], USD_PATHS['GalileoLab'], USD_PATHS['SimpleWarehouse']], 240 | random_choice=True, 241 | scale=(1.0, 1.0, 1.0), 242 | rigid_props=sim_utils.RigidBodyPropertiesCfg( 243 | disable_gravity=None, 244 | solver_position_iteration_count=4, 245 | solver_velocity_iteration_count=1, 246 | ), 247 | ), 248 | replicate_physics=False, 249 | ) 250 | -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/mobility_es/config/g1_env_cfg.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import annotations 17 | 18 | import os 19 | 20 | from omni.isaac.lab.utils import configclass 21 | from omni.isaac.lab.sensors import CameraCfg 22 | 23 | from mobility_es.config import scene_assets 24 | from mobility_es.config import robots 25 | from mobility_es.config.env_cfg import GoalReachingEnvCfg 26 | from mobility_es.mdp.action.locomotion_policy_action import LocomotionPolicyAction, LocomotionPolicyActionCfg 27 | 28 | 29 | @configclass 30 | class G1ActionsCfg: 31 | """Action specifications for the MDP.""" 32 | 33 | drive_joints = LocomotionPolicyActionCfg(class_type=LocomotionPolicyAction, 34 | asset_name="robot", 35 | policy_ckpt_path=os.path.join( 36 | os.path.dirname(__file__), 37 | "../ckpt/g1_locomotion_policy.pt"), 38 | joint_names=[".*"], 39 | scale=0.5, 40 | use_default_offset=True) 41 | 42 | 43 | @configclass 44 | class G1GoalReachingEnvCfg(GoalReachingEnvCfg): 45 | """Configuration for the G1 to reach a 2D target pose environment.""" 46 | 47 | def __post_init__(self): 48 | super().__post_init__() 49 | self.scene.robot = robots.g1.replace(prim_path="{ENV_REGEX_NS}/Robot") 50 | self.scene.camera = scene_assets.camera.replace( 51 | prim_path="{ENV_REGEX_NS}/Robot/torso_link/front_cam") 52 | self.scene.camera.offset = CameraCfg.OffsetCfg(pos=(0.1, 0.0, 0.45), 53 | rot=(-0.2705, 0.6532, -0.6532, 0.2705981), 54 | convention="ros") 55 | 56 | self.actions = G1ActionsCfg() 57 | -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/mobility_es/config/h1_env_cfg.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import annotations 17 | 18 | import os 19 | 20 | from omni.isaac.lab.utils import configclass 21 | from omni.isaac.lab.sensors import CameraCfg 22 | 23 | from mobility_es.config import scene_assets 24 | from mobility_es.config import robots 25 | from mobility_es.config.env_cfg import GoalReachingEnvCfg 26 | from mobility_es.mdp.action.locomotion_policy_action import LocomotionPolicyAction, LocomotionPolicyActionCfg 27 | 28 | 29 | @configclass 30 | class H1ActionsCfg: 31 | """Action specifications for the MDP.""" 32 | 33 | drive_joints = LocomotionPolicyActionCfg(class_type=LocomotionPolicyAction, 34 | asset_name="robot", 35 | policy_ckpt_path=os.path.join( 36 | os.path.dirname(__file__), 37 | "../ckpt/h1_locomotion_policy.pt"), 38 | joint_names=[".*"], 39 | scale=0.5, 40 | use_default_offset=True) 41 | 42 | 43 | @configclass 44 | class H1GoalReachingEnvCfg(GoalReachingEnvCfg): 45 | """Configuration for the H1 to reach a 2D target pose environment.""" 46 | 47 | def __post_init__(self): 48 | super().__post_init__() 49 | self.scene.robot = robots.h1.replace(prim_path="{ENV_REGEX_NS}/Robot") 50 | self.scene.camera = scene_assets.camera.replace( 51 | prim_path="{ENV_REGEX_NS}/Robot/torso_link/front_cam") 52 | self.scene.camera.offset = CameraCfg.OffsetCfg(pos=(0.15, 0.0, 0.65), 53 | rot=(-0.2705, 0.6532, -0.6532, 0.2705981), 54 | convention="ros") 55 | 56 | self.actions = H1ActionsCfg() 57 | -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/mobility_es/config/scene_assets.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import omni.isaac.lab.sim as sim_utils 17 | from omni.isaac.lab.assets import AssetBaseCfg 18 | from omni.isaac.lab.sensors import CameraCfg, TiledCameraCfg 19 | from omni.isaac.lab.terrains import TerrainImporterCfg 20 | 21 | # Terrain 22 | terrain = TerrainImporterCfg( 23 | prim_path="/World/ground", 24 | terrain_type="plane", 25 | debug_vis=False, 26 | ) 27 | 28 | # Lights 29 | light = AssetBaseCfg( 30 | prim_path="/World/light", 31 | spawn=sim_utils.DistantLightCfg(color=(1.0, 1.0, 1.0), intensity=1000.0), 32 | ) 33 | 34 | # Camera 35 | camera = TiledCameraCfg( 36 | prim_path="{ENV_REGEX_NS}/Robot/chassis_link/front_cam", 37 | update_period=0.0, 38 | height=320, 39 | width=512, 40 | data_types=["rgb", "depth"], 41 | spawn=sim_utils.PinholeCameraCfg(focal_length=10.0, 42 | focus_distance=400.0, 43 | horizontal_aperture=20.955, 44 | clipping_range=(0.1, 20.0)), 45 | offset=CameraCfg.OffsetCfg(pos=(0.10434, 0.0, 0.37439), 46 | rot=(0.5, -0.5, 0.5, -0.5), 47 | convention="ros"), 48 | ) 49 | -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/mobility_es/config/spot_env_cfg.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import annotations 17 | 18 | import os 19 | 20 | from omni.isaac.lab.envs import mdp 21 | from omni.isaac.lab.managers import EventTermCfg as EventTerm 22 | from omni.isaac.lab.managers import TerminationTermCfg as DoneTerm 23 | from omni.isaac.lab.managers import ObservationTermCfg as ObsTerm 24 | from omni.isaac.lab.managers import SceneEntityCfg 25 | from omni.isaac.lab.utils import configclass 26 | 27 | from mobility_es.config import scene_assets 28 | from mobility_es.config import robots 29 | from mobility_es.config.env_cfg import GoalReachingEnvCfg 30 | from mobility_es.mdp.action.locomotion_policy_action import LocomotionPolicyAction, LocomotionPolicyActionCfg 31 | 32 | 33 | @configclass 34 | class SpotActionsCfg: 35 | """Action specifications for the MDP.""" 36 | 37 | drive_joints = LocomotionPolicyActionCfg(class_type=LocomotionPolicyAction, 38 | asset_name="robot", 39 | policy_ckpt_path=os.path.join( 40 | os.path.dirname(__file__), 41 | "../ckpt/spot_locomotion_policy.pt"), 42 | joint_names=[".*"], 43 | scale=0.2, 44 | use_default_offset=True) 45 | 46 | 47 | @configclass 48 | class SpotGoalReachingEnvCfg(GoalReachingEnvCfg): 49 | """Configuration for the Spot to reach a 2D target pose environment.""" 50 | 51 | def __post_init__(self): 52 | super().__post_init__() 53 | self.scene.robot = robots.spot.replace(prim_path="{ENV_REGEX_NS}/Robot") 54 | self.scene.camera = scene_assets.camera.replace( 55 | prim_path="{ENV_REGEX_NS}/Robot/body/front_cam") 56 | self.actions = SpotActionsCfg() 57 | self.observations.eval.fall_down = ObsTerm( 58 | func=mdp.illegal_contact, 59 | params={ 60 | "sensor_cfg": SceneEntityCfg("contact_forces", body_names=["body", ".*leg"]), 61 | "threshold": 1.0 62 | }, 63 | ) 64 | self.events.base_external_force_torque = EventTerm( 65 | func=mdp.apply_external_force_torque, 66 | mode="reset", 67 | params={ 68 | "asset_cfg": SceneEntityCfg("robot", body_names="body"), 69 | "force_range": (0.0, 0.0), 70 | "torque_range": (-0.0, 0.0), 71 | }, 72 | ) 73 | self.terminations.base_contact = DoneTerm( 74 | func=mdp.illegal_contact, 75 | params={ 76 | "sensor_cfg": SceneEntityCfg("contact_forces", body_names=["body", ".*leg"]), 77 | "threshold": 1.0 78 | }, 79 | ) 80 | -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/mobility_es/mdp/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/mobility_es/mdp/action/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/mobility_es/mdp/action/action_visualization.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import annotations 17 | 18 | import torch 19 | 20 | import omni.isaac.lab.utils.math as math_utils 21 | from omni.isaac.lab.markers import VisualizationMarkers 22 | from omni.isaac.lab.markers.config import BLUE_ARROW_X_MARKER_CFG, GREEN_ARROW_X_MARKER_CFG 23 | from omni.isaac.lab.assets.articulation import Articulation 24 | 25 | ACTION_HEIGHT = 0.8 26 | ACTION_GAP_HEIGHT = 0.1 27 | 28 | 29 | class ActionVisualizer: 30 | """Visualizer of the actions.""" 31 | 32 | def __init__(self, cfg, env): 33 | self.robot: Articulation = env.scene[cfg.asset_name] 34 | self._base_markers = None 35 | self._residual_markers = None 36 | 37 | def visualize(self, base_action, residual_action): 38 | if not self._base_markers or not self._residual_markers: 39 | self._initialize_action_markers() 40 | # Visualize base action. 41 | robot_pos_w = self.robot.data.root_pos_w.clone() 42 | robot_pos_w[:, 2] += ACTION_HEIGHT 43 | base_action_arrow_scale, base_action_arrow_quat = self._resolve_xy_velocity_to_arrow( 44 | base_action[:, :2]) 45 | self._base_markers.visualize(robot_pos_w, base_action_arrow_quat, base_action_arrow_scale) 46 | 47 | # Visualize residual action. 48 | robot_pos_w[:, 2] += ACTION_GAP_HEIGHT 49 | residual_action_arrow_scale, residual_action_arrow_quat = self._resolve_xy_velocity_to_arrow( # pylint: disable=line-too-long 50 | residual_action[:, :2]) 51 | self._residual_markers.visualize(robot_pos_w, residual_action_arrow_quat, 52 | residual_action_arrow_scale) 53 | 54 | def _initialize_action_markers(self): 55 | print("Initialize markers for actions.") 56 | base_marker_cfg = BLUE_ARROW_X_MARKER_CFG.copy() 57 | base_marker_cfg.prim_path = "/Visuals/Action/base" 58 | self._base_markers = VisualizationMarkers(base_marker_cfg) 59 | self._base_markers.set_visibility(True) 60 | 61 | residual_marker_cfg = GREEN_ARROW_X_MARKER_CFG.copy() 62 | residual_marker_cfg.prim_path = "/Visuals/Action/residual" 63 | self._residual_markers = VisualizationMarkers(residual_marker_cfg) 64 | self._residual_markers.set_visibility(True) 65 | 66 | def _resolve_xy_velocity_to_arrow( 67 | self, xy_velocity: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: 68 | """Converts the XY base velocity command to arrow direction rotation.""" 69 | # obtain default scale of the marker 70 | default_scale = self._base_markers.cfg.markers["arrow"].scale 71 | # arrow-scale 72 | arrow_scale = torch.tensor(default_scale, 73 | device=xy_velocity.device).repeat(xy_velocity.shape[0], 1) 74 | # pylint: disable=not-callable 75 | arrow_scale[:, 0] *= torch.linalg.norm(xy_velocity, dim=1) * 5.0 76 | # arrow-direction 77 | heading_angle = torch.atan2(xy_velocity[:, 1], xy_velocity[:, 0]) 78 | zeros = torch.zeros_like(heading_angle) 79 | arrow_quat = math_utils.quat_from_euler_xyz(zeros, zeros, heading_angle) 80 | # convert everything back from base to world frame 81 | base_quat_w = self.robot.data.root_quat_w 82 | arrow_quat = math_utils.quat_mul(base_quat_w, arrow_quat) 83 | 84 | return arrow_scale, arrow_quat 85 | -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/mobility_es/mdp/action/differential_drive_action.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import annotations 17 | 18 | import torch 19 | from collections.abc import Sequence 20 | from dataclasses import MISSING 21 | 22 | from omni.isaac.lab.assets.articulation import Articulation 23 | from omni.isaac.lab.managers.action_manager import ActionTerm, ActionTermCfg 24 | from omni.isaac.lab.utils import configclass 25 | 26 | from mobility_es.mdp.action.action_visualization import ActionVisualizer 27 | from mobility_es.wrapper.env_wrapper import RLESEnvWrapper 28 | 29 | 30 | @configclass 31 | class DifferentialDriveActionCfg(ActionTermCfg): 32 | """Configuration for the differential drive action term with left and right wheel joints. 33 | 34 | See :class:`DifferentialDriveAction` for more details. 35 | """ 36 | 37 | class_type: type[ActionTerm] = MISSING 38 | # Name of the left wheel joint. 39 | left_joint_name: str = MISSING 40 | # Name of the right wheel joint. 41 | right_joint_name: str = MISSING 42 | # Differential drive wheel parameters. 43 | wheel_base: float = MISSING 44 | wheel_radius: float = MISSING 45 | 46 | 47 | class DifferentialDriveAction(ActionTerm): 48 | """Action item for differential drive.""" 49 | 50 | cfg: DifferentialDriveActionCfg 51 | """The articulation asset on which the action term is applied.""" 52 | _asset: Articulation 53 | 54 | def __init__(self, cfg: DifferentialDriveActionCfg, env: RLESEnvWrapper): 55 | # Initialize the action term 56 | super().__init__(cfg, env) 57 | 58 | # Parse the joint information 59 | # -- Left joint 60 | left_joint_id, _ = self._asset.find_joints(self.cfg.left_joint_name) 61 | if len(left_joint_id) != 1: 62 | raise ValueError( 63 | f"Found more than one joint for the left joint name: {self.cfg.left_joint_name}") 64 | # -- Right joint 65 | right_joint_id, _ = self._asset.find_joints(self.cfg.right_joint_name) 66 | if len(right_joint_id) != 1: 67 | raise ValueError( 68 | f"Found more than one joint for the right joint name: {self.cfg.right_joint_name}") 69 | 70 | # Process into a list of joint ids 71 | self._joint_ids = [left_joint_id[0], right_joint_id[0]] 72 | 73 | # Create tensors for raw and processed actions 74 | self._raw_actions = torch.zeros(self.num_envs, self.action_dim, device=self.device) 75 | self._processed_actions = torch.zeros_like(self.raw_actions) 76 | self._joint_vel_command = torch.zeros(self.num_envs, 2, device=self.device) 77 | 78 | # Visualization 79 | self._visualizer = ActionVisualizer(cfg, env) 80 | 81 | @property 82 | def action_dim(self) -> int: 83 | return 6 84 | 85 | @property 86 | def raw_actions(self) -> torch.Tensor: 87 | return self._raw_actions 88 | 89 | @property 90 | def processed_actions(self) -> torch.Tensor: 91 | return self._processed_actions 92 | 93 | def process_actions(self, actions): 94 | # store the raw actions 95 | self._raw_actions[:] = actions 96 | self._processed_actions = self.raw_actions 97 | 98 | def apply_actions(self): 99 | self._joint_vel_command[:, 0] = ( 100 | self._processed_actions[:, 0] - 101 | self._processed_actions[:, 5] * self.cfg.wheel_base * 0.5) / self.cfg.wheel_radius 102 | self._joint_vel_command[:, 1] = ( 103 | self._processed_actions[:, 0] + 104 | self._processed_actions[:, 5] * self.cfg.wheel_base * 0.5) / self.cfg.wheel_radius 105 | # Set the joint velocity targets 106 | self._asset.set_joint_velocity_target(self._joint_vel_command, joint_ids=self._joint_ids) 107 | 108 | def reset(self, env_ids: Sequence[int] | None = None) -> None: 109 | self._raw_actions[env_ids] = 0.0 110 | 111 | def visualize(self, base_action, residual_action): 112 | self._visualizer.visualize(base_action, residual_action) 113 | -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/mobility_es/mdp/action/locomotion_policy_action.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import annotations 17 | 18 | import torch 19 | from collections.abc import Sequence 20 | from dataclasses import MISSING 21 | 22 | from omni.isaac.lab.assets.articulation import Articulation 23 | from omni.isaac.lab.envs import mdp 24 | from omni.isaac.lab.managers.action_manager import ActionTerm 25 | from omni.isaac.lab.utils import configclass 26 | 27 | from mobility_es.mdp.action.action_visualization import ActionVisualizer 28 | from mobility_es.wrapper.env_wrapper import RLESEnvWrapper 29 | 30 | 31 | @configclass 32 | class LocomotionPolicyActionCfg(mdp.JointPositionActionCfg): 33 | """Configuration for the action term with locomotion policies. 34 | 35 | See :class:`LocomotionPolicyActionCfg` for more details. 36 | """ 37 | 38 | class_type: type[ActionTerm] = MISSING 39 | policy_ckpt_path: str = MISSING 40 | 41 | 42 | class LocomotionPolicyAction(mdp.JointPositionAction): 43 | """Action item for locomotion policies.""" 44 | 45 | cfg: LocomotionPolicyActionCfg 46 | """The articulation asset on which the action term is applied.""" 47 | _asset: Articulation 48 | 49 | def __init__(self, cfg: LocomotionPolicyActionCfg, env: RLESEnvWrapper): 50 | super().__init__(cfg, env) 51 | 52 | # Load the env and locomotion policy. 53 | self.env = env 54 | self._locomotion_policy = torch.jit.load(self.cfg.policy_ckpt_path).to(self.device) 55 | 56 | # Create tensors for raw and processed actions 57 | self._raw_actions = torch.zeros(self.num_envs, self.action_dim, device=self.device) 58 | self._processed_actions = torch.zeros_like(self.raw_actions) 59 | self._last_joint_actions = torch.zeros(self.num_envs, self._num_joints, device=self.device) 60 | 61 | # Visualization 62 | self._visualizer = ActionVisualizer(cfg, env) 63 | 64 | @property 65 | def action_dim(self) -> int: 66 | return 6 67 | 68 | @property 69 | def raw_actions(self) -> torch.Tensor: 70 | return self._raw_actions 71 | 72 | @property 73 | def processed_actions(self) -> torch.Tensor: 74 | return self._processed_actions 75 | 76 | def process_actions(self, actions): 77 | # store the raw actions 78 | self._raw_actions[:] = actions 79 | self._processed_actions = self.raw_actions 80 | 81 | def apply_actions(self): 82 | # Get locomotion observation. 83 | obs_dict = self.env.observation_manager.compute_group('locomotion') 84 | 85 | base_lin_vel = obs_dict['base_lin_vel'] 86 | base_ang_vel = obs_dict['base_ang_vel'] 87 | projected_gravity = obs_dict['projected_gravity'] 88 | velocity_commands = self._processed_actions[:, [0, 1, 5]] 89 | joint_pos = obs_dict['joint_pos'] 90 | joint_vel = obs_dict['joint_vel'] 91 | last_joint_actions = self._last_joint_actions 92 | # Concatenate to compose the input obs tensor. (Order preserved) 93 | obs = torch.cat([ 94 | base_lin_vel, base_ang_vel, projected_gravity, velocity_commands, joint_pos, joint_vel, 95 | last_joint_actions 96 | ], 97 | dim=1) 98 | joint_actions = self._locomotion_policy(obs) 99 | self._last_joint_actions = joint_actions 100 | 101 | # Process and set the joint pose targets 102 | joint_actions = joint_actions * self._scale + self._offset 103 | self._asset.set_joint_position_target(joint_actions, joint_ids=self._joint_ids) 104 | 105 | def reset(self, env_ids: Sequence[int] | None = None) -> None: 106 | self._raw_actions[env_ids] = 0.0 107 | self._last_joint_actions[env_ids] = 0.0 108 | 109 | def visualize(self, base_action, residual_action): 110 | self._visualizer.visualize(base_action, residual_action) 111 | -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/mobility_es/mdp/action/non_holonomic_perfect_control_action.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import annotations 17 | 18 | import torch 19 | from collections.abc import Sequence 20 | 21 | from omni.isaac.lab.assets.articulation import Articulation 22 | from omni.isaac.lab.envs.mdp.actions import ActionTerm, ActionTermCfg 23 | 24 | from mobility_es.mdp.action.action_visualization import ActionVisualizer 25 | from mobility_es.wrapper.env_wrapper import RLESEnvWrapper 26 | 27 | 28 | class NonHolonomicPerfectControlAction(ActionTerm): 29 | """Action item for non-holonomic robot with perfect controller.""" 30 | 31 | cfg: ActionTermCfg 32 | """The articulation asset on which the action term is applied.""" 33 | _asset: Articulation 34 | 35 | def __init__(self, cfg: ActionTermCfg, env: RLESEnvWrapper): 36 | # Initialize the action term 37 | super().__init__(cfg, env) 38 | 39 | # Create tensors for raw and processed actions 40 | self._raw_actions = torch.zeros(self.num_envs, self.action_dim, device=self.device) 41 | self._processed_actions = torch.zeros_like(self.raw_actions) 42 | 43 | # Visualization 44 | self._visualizer = ActionVisualizer(cfg, env) 45 | 46 | @property 47 | def action_dim(self) -> int: 48 | return 6 49 | 50 | @property 51 | def raw_actions(self) -> torch.Tensor: 52 | return self._raw_actions 53 | 54 | @property 55 | def processed_actions(self) -> torch.Tensor: 56 | return self._processed_actions 57 | 58 | def process_actions(self, actions): 59 | # store the raw actions 60 | self._raw_actions[:] = actions 61 | self._processed_actions = self.raw_actions[:, (0, 5)] 62 | 63 | def apply_actions(self): 64 | # Get current asset state 65 | root_state = self._asset.data.root_state_w 66 | 67 | # Extract linear_x and angular_z from actions 68 | linear_x = self.processed_actions[:, 0] 69 | angular_z = self.processed_actions[:, 1] 70 | 71 | # Get current position and orientation quaternion 72 | pos = root_state[:, :3] 73 | quat = root_state[:, 3:7] 74 | 75 | # Convert quaternion to yaw angle 76 | yaw = 2 * torch.atan2(quat[:, 3], quat[:, 0]) 77 | 78 | # Update yaw with angular velocity 79 | dt = self._env.physics_dt 80 | new_yaw = yaw + angular_z * dt 81 | 82 | # Compute new position based on forward velocity and orientation 83 | pos[:, 0] += linear_x * torch.cos(new_yaw) * dt 84 | pos[:, 1] += linear_x * torch.sin(new_yaw) * dt 85 | 86 | # Convert new yaw to quaternion (rotation around z-axis only) 87 | new_quat = torch.zeros_like(quat) 88 | new_quat[:, 0] = torch.cos(new_yaw / 2) 89 | new_quat[:, 3] = torch.sin(new_yaw / 2) 90 | 91 | # Update root state 92 | root_state[:, :3] = pos 93 | root_state[:, 3:7] = new_quat 94 | root_state[:, 7:10] = torch.stack([ 95 | linear_x * torch.cos(new_yaw), linear_x * torch.sin(new_yaw), 96 | torch.zeros_like(linear_x) 97 | ], 98 | dim=1) 99 | root_state[:, 10:13] = torch.stack( 100 | [torch.zeros_like(angular_z), 101 | torch.zeros_like(angular_z), angular_z], dim=1) 102 | self._asset.write_root_state_to_sim(root_state) 103 | 104 | def reset(self, env_ids: Sequence[int] | None = None) -> None: 105 | self._raw_actions[env_ids] = 0.0 106 | 107 | def visualize(self, base_action, residual_action): 108 | self._visualizer.visualize(base_action, residual_action) 109 | -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/mobility_es/mdp/command/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/mobility_es/mdp/command/commands_cfg.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from omni.isaac.lab.envs.mdp import commands 17 | from omni.isaac.lab.utils import configclass 18 | 19 | from mobility_es.mdp.command.uniform_collision_free_pose_command import UniformCollisionFreePoseCommand 20 | 21 | 22 | @configclass 23 | class UniformCollisionFreePose2dCommandCfg(commands.UniformPose2dCommandCfg): 24 | '''Config for UniformCollisionFreePose2dCommand''' 25 | class_type: type = UniformCollisionFreePoseCommand 26 | 27 | # Maximum number of trials for the reject sampling. 28 | max_resample_trial: int = 100 29 | 30 | # Minimum distance to asset for command sampling. 31 | minimum_distance: float = None 32 | 33 | # Probability of applying minimum distance sampling. 34 | minimum_distance_prob: float = None 35 | -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/mobility_es/mdp/command/uniform_collision_free_pose_command.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import annotations 17 | 18 | import random 19 | 20 | import torch 21 | from typing import TYPE_CHECKING 22 | from collections.abc import Sequence 23 | 24 | from omni.isaac.lab.envs.mdp import commands 25 | 26 | from mobility_es.wrapper.env_wrapper import RLESEnvWrapper 27 | 28 | if TYPE_CHECKING: 29 | from mobility_es.mdp.command.commands_cfg import UniformCollisionFreePose2dCommandCfg 30 | 31 | 32 | class UniformCollisionFreePoseCommand(commands.pose_2d_command.UniformPose2dCommand): 33 | """Command generator that generates pose commands containing a 3-D position and heading. 34 | 35 | The command generator samples uniform 2D positions around the environment origin. It sets 36 | the height of the position command to the default root height of the robot. The heading 37 | command is either set to point towards the target or is sampled uniformly. 38 | This can be configured through the :attr:`Pose2dCommandCfg.simple_heading` parameter in 39 | the configuration. 40 | 41 | It will then run collision checker to make sure the sampled pose is not in collision. If it 42 | is in collision, the sampled pose will be discarded and a new pose will be sampled until 43 | either a collision pose is sampled or reaches maximum number of sampling. 44 | 45 | Note: the occupancy map based collision checker only works for static environments and 46 | doesn't support randomization. 47 | """ 48 | 49 | cfg: UniformCollisionFreePose2dCommandCfg 50 | """Configuration for the command term.""" 51 | 52 | # pylint: disable=useless-parent-delegation 53 | def __init__(self, cfg: UniformCollisionFreePose2dCommandCfg, env: RLESEnvWrapper): 54 | """Initialize the command term class. 55 | 56 | Args: 57 | cfg: The configuration parameters for the command term. 58 | env: The environment object. 59 | """ 60 | # initialize the base class 61 | super().__init__(cfg, env) 62 | 63 | # Create the occupancy map for collision check. 64 | self.collision_checker = env.collision_checker 65 | 66 | def __str__(self) -> str: 67 | msg = "UniformCollisionFreePoseCommand:\n" 68 | msg += f"\tCommand dimension: {tuple(self.command.shape[1:])}\n" 69 | return msg 70 | 71 | def _resample_command(self, env_ids: Sequence[int]): 72 | resample_env_ids = env_ids 73 | num_resample_trials = 0 74 | while num_resample_trials < self.cfg.max_resample_trial: 75 | # Obtain origins for the robots 76 | self.pos_command_w[resample_env_ids] = self.robot.data.root_pos_w[resample_env_ids] 77 | # Offset the position command by the current root position 78 | r = torch.empty(len(resample_env_ids), device=self.device) 79 | # Randomize the position 80 | self.pos_command_w[resample_env_ids, 81 | 0] += r.uniform_(*self._get_sampling_range(self.cfg.ranges.pos_x)) 82 | self.pos_command_w[resample_env_ids, 83 | 1] += r.uniform_(*self._get_sampling_range(self.cfg.ranges.pos_y)) 84 | self.pos_command_w[resample_env_ids, 2] = 0.0 85 | # Randomize the heading 86 | self.heading_command_w[resample_env_ids] = r.uniform_(*self.cfg.ranges.heading) 87 | 88 | resample_env_ids = self._check_collision(resample_env_ids) 89 | num_resample_trials += 1 90 | if len(resample_env_ids) < 1: 91 | # Finished resampling for all the envs 92 | break 93 | 94 | # Print a warning message. 95 | if num_resample_trials >= self.cfg.max_resample_trial: 96 | print(f"Envs with id: {resample_env_ids} failed to sample collision free" 97 | f"poses within the maxum iteration: {self.cfg.max_resample_trial}") 98 | 99 | # Apply minimum distance constraint to the input range, and randomly choose a feasible one. 100 | def _get_sampling_range(self, input_range: tuple[float, float]) -> tuple[float, float]: 101 | if not self.cfg.minimum_distance or random.random() > self.cfg.minimum_distance_prob: 102 | return input_range 103 | low, high = input_range 104 | if low > -self.cfg.minimum_distance and high < self.cfg.minimum_distance: 105 | raise ValueError('Sampling range is too small for minimum distance.') 106 | 107 | sampling_ranges = [] 108 | if low < -self.cfg.minimum_distance: 109 | sampling_ranges.append((low, -self.cfg.minimum_distance)) 110 | if high > self.cfg.minimum_distance: 111 | sampling_ranges.append((self.cfg.minimum_distance, high)) 112 | return random.choice(sampling_ranges) 113 | 114 | # pylint: disable=unused-argument 115 | def _check_collision(self, env_ids: Sequence[int]): 116 | """ Checks whether the resampled command is collision free for the environments in env_ids. 117 | Returns a list of env_ids that are in collision. 118 | """ 119 | if not self.collision_checker.is_initialized(): 120 | return [] 121 | 122 | sampled_points_r = self.pos_command_w[env_ids, :2] 123 | # Get the points relative to the environment origin if the env prime is not global. 124 | if self._env.scene.cfg.environment.prim_path.startswith('/World/envs/'): 125 | sampled_points_r = sampled_points_r - self._env.scene.env_origins[env_ids, :2] 126 | in_collisions = self.collision_checker.is_in_collision(sampled_points_r, distance=0.5) 127 | return env_ids[torch.where(in_collisions == 1)[0]] 128 | -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/mobility_es/mdp/curriculum.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import annotations 17 | 18 | from collections.abc import Sequence 19 | 20 | from mobility_es.wrapper.env_wrapper import RLESEnvWrapper 21 | 22 | 23 | # Increase the prob of applying min command distance constraint based the training iteration. 24 | def increase_command_minimum_distance_prob( 25 | env: RLESEnvWrapper, 26 | env_ids: Sequence[int], # pylint: disable=unused-argument 27 | command_name: str = 'goal_pose', 28 | minimum_distance_prob_range=(0.1, 1.0), 29 | total_iterations=1000, 30 | num_steps_per_iteration=256): 31 | cur_iteration = env.common_step_counter // num_steps_per_iteration 32 | start_prob, end_prob = minimum_distance_prob_range 33 | prob_range = end_prob - start_prob 34 | minimum_distance_prob = start_prob + prob_range * (cur_iteration / total_iterations) 35 | env.command_manager.get_term(command_name).cfg.minimum_distance_prob = minimum_distance_prob 36 | -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/mobility_es/mdp/events.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import annotations 17 | 18 | import torch 19 | 20 | import omni.isaac.lab.utils.math as math_utils 21 | from omni.isaac.lab.managers import SceneEntityCfg 22 | 23 | from mobility_es.wrapper.env_wrapper import RLESEnvWrapper 24 | 25 | 26 | def sample_root_state_uniform(env: RLESEnvWrapper, 27 | env_ids: torch.Tensor, 28 | pose_range: dict[str, tuple[float, float]], 29 | velocity_range: dict[str, tuple[float, float]], 30 | asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")): 31 | # extract the used quantities (to enable type-hinting) 32 | asset = env.scene[asset_cfg.name] 33 | # get default root state 34 | root_states = asset.data.default_root_state[env_ids].clone() 35 | 36 | # Sample poses 37 | pose_range_list = [ 38 | pose_range.get(key, (0.0, 0.0)) for key in ["x", "y", "z", "roll", "pitch", "yaw"] 39 | ] 40 | pose_ranges = torch.tensor(pose_range_list, device=asset.device) 41 | 42 | # poses 43 | rand_samples = math_utils.sample_uniform(pose_ranges[:, 0], 44 | pose_ranges[:, 1], (len(env_ids), 6), 45 | device=asset.device) 46 | positions = root_states[:, 0:3] + env.scene.env_origins[env_ids] + rand_samples[:, 0:3] 47 | orientations_delta = math_utils.quat_from_euler_xyz(rand_samples[:, 3], rand_samples[:, 4], 48 | rand_samples[:, 5]) 49 | orientations = math_utils.quat_mul(root_states[:, 3:7], orientations_delta) 50 | 51 | # Sample velocities 52 | velocity_range_list = [ 53 | velocity_range.get(key, (0.0, 0.0)) for key in ["x", "y", "z", "roll", "pitch", "yaw"] 54 | ] 55 | velocity_ranges = torch.tensor(velocity_range_list, device=asset.device) 56 | rand_samples = math_utils.sample_uniform(velocity_ranges[:, 0], 57 | velocity_ranges[:, 1], (len(env_ids), 6), 58 | device=asset.device) 59 | velocities = root_states[:, 7:13] + rand_samples 60 | 61 | return positions, orientations, velocities 62 | 63 | 64 | def reset_root_state_uniform_collision_free(env: RLESEnvWrapper, 65 | env_ids: torch.Tensor, 66 | pose_range: dict[str, tuple[float, float]], 67 | velocity_range: dict[str, tuple[float, float]], 68 | asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), 69 | max_resample_trial=10): 70 | # extract the used quantities (to enable type-hinting) 71 | asset = env.scene[asset_cfg.name] 72 | 73 | # Sample states 74 | positions, orientations, velocities = sample_root_state_uniform(env, env_ids, pose_range, 75 | velocity_range, asset_cfg) 76 | # Check collisions if collision checker is initialized. 77 | if env.collision_checker.is_initialized(): 78 | # Index map with key as env_id and value as the index. 79 | env_ids_index_map = {int(val.item()): idx for idx, val in enumerate(env_ids)} 80 | resample_env_ids = env_ids 81 | resample_indices = torch.tensor( 82 | [env_ids_index_map[int(env_id.item())] for env_id in resample_env_ids]) 83 | num_resample_trials = 0 84 | while num_resample_trials < max_resample_trial: 85 | positions_2d = positions[resample_indices, :2] 86 | # Get the points relative to the environment origin if the env prime is not global. 87 | if env.scene.cfg.environment.prim_path.startswith('/World/envs/'): 88 | positions_2d = positions_2d - env.scene.env_origins[resample_env_ids, :2] 89 | in_collisions = env.collision_checker.is_in_collision(positions_2d) 90 | resample_env_ids = resample_env_ids[torch.where(in_collisions == 1)[0]] 91 | if len(resample_env_ids) < 1: 92 | break 93 | # Resample new states for the collided root. 94 | resample_indices = torch.tensor( 95 | [env_ids_index_map[int(env_id.item())] for env_id in resample_env_ids]) 96 | positions[resample_indices], orientations[resample_indices], velocities[ 97 | resample_indices] = sample_root_state_uniform(env, resample_env_ids, pose_range, 98 | velocity_range, asset_cfg) 99 | num_resample_trials += 1 100 | 101 | # set into the physics simulation 102 | asset.write_root_pose_to_sim(torch.cat([positions, orientations], dim=-1), env_ids=env_ids) 103 | asset.write_root_velocity_to_sim(velocities, env_ids=env_ids) 104 | -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/mobility_es/mdp/observations.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import annotations 17 | 18 | import torch 19 | 20 | from omni.isaac.lab.assets import RigidObject 21 | from omni.isaac.lab.managers import SceneEntityCfg 22 | 23 | from mobility_es.mdp.utils import goal_pose_in_robot_frame 24 | from mobility_es.wrapper.env_wrapper import RLESEnvWrapper 25 | 26 | 27 | def root_speed( 28 | env: RLESEnvWrapper, 29 | asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), 30 | ) -> torch.Tensor: 31 | """The speed of the root, which is the normalization of the linear velocity""" 32 | asset: RigidObject = env.scene[asset_cfg.name] 33 | return torch.norm(asset.data.root_lin_vel_w, dim=1).unsqueeze(1) 34 | 35 | 36 | def root_yaw_rate( 37 | env: RLESEnvWrapper, 38 | asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), 39 | ) -> torch.Tensor: 40 | """The yaw rate of the root""" 41 | asset: RigidObject = env.scene[asset_cfg.name] 42 | return asset.data.root_ang_vel_w[:, 2].unsqueeze(1) 43 | 44 | 45 | def camera_img(env: RLESEnvWrapper, data_type: str = "rgb"): 46 | supported_data_types = ["rgb", "depth"] 47 | if data_type not in supported_data_types: 48 | raise ValueError(f"data type {data_type} is not in the support list {supported_data_types}") 49 | camera = env.scene["camera"] 50 | return camera.data.output[data_type].reshape((camera.data.output[data_type].shape[0], -1)) 51 | 52 | 53 | def upsample_segments(start_pose, end_pose, d_max=1.0, n_segments=10): 54 | num_env = start_pose.size(0) 55 | all_segments = torch.zeros((num_env, n_segments, 4)).to(start_pose.device) 56 | 57 | for i in range(num_env): 58 | s_x, s_y = start_pose[i] 59 | e_x, e_y = end_pose[i] 60 | distance = torch.sqrt((e_x - s_x)**2 + (e_y - s_y)**2) 61 | num_segments = torch.ceil(distance / d_max).int().item() 62 | 63 | # Generate interpolation steps for segments 64 | t = torch.linspace(0, 1, num_segments + 1).unsqueeze(1) 65 | points = (1 - t) * torch.tensor([s_x, s_y]) + t * torch.tensor([e_x, e_y]) 66 | segments = torch.cat([points[:-1], points[1:]], dim=1) 67 | 68 | if num_segments < n_segments: 69 | # Pad with segments where the start and end points are the last point 70 | last_point = torch.tensor([e_x, e_y]) 71 | last_segment = torch.cat([last_point, last_point], dim=0) 72 | padding_segments = last_segment.repeat(n_segments - num_segments, 1) 73 | segments = torch.cat([segments, padding_segments], dim=0) 74 | else: 75 | # Truncate to n_segments if necessary 76 | segments = segments[:n_segments] 77 | all_segments[i] = segments 78 | 79 | return all_segments 80 | 81 | 82 | def routing_to_goal_simplified( 83 | env: RLESEnvWrapper, 84 | command_name: str = "goal_pose", 85 | offset: tuple[float, float] = (-0.0, 0), 86 | robot_cfg: SceneEntityCfg = SceneEntityCfg("robot") 87 | ) -> torch.Tensor: 88 | goal_pose = goal_pose_in_robot_frame(env, command_name, offset, robot_cfg) 89 | start_pose = goal_pose.new_zeros(goal_pose.shape[0], 2) 90 | return upsample_segments(start_pose, goal_pose) 91 | 92 | 93 | def foot_print( 94 | env: RLESEnvWrapper, 95 | asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), 96 | asset_length: float = 1.0, 97 | asset_width: float = 1.0, 98 | ) -> torch.Tensor: 99 | """Represent the foot print of the asset as a rectangle that is offset to the asset root.""" 100 | robot: RigidObject = env.scene[asset_cfg.name] 101 | root_pos = robot.data.root_pos_w - env.scene.env_origins 102 | root_pos_2d = root_pos[:, :2] 103 | 104 | def create_footprint_tensor(length, width, device): 105 | tensor = torch.tensor([[length / 2, width / 2], [-length / 2, width / 2], 106 | [-length / 2, -width / 2], [length / 2, -width / 2]], 107 | device=device) 108 | return tensor 109 | 110 | foot_print_poly = create_footprint_tensor(asset_length, asset_width, root_pos.device) 111 | foot_print_poly = foot_print_poly.unsqueeze(0).repeat(root_pos.shape[0], 1, 1) 112 | 113 | foot_print_poly_offset = root_pos_2d.unsqueeze(1) + foot_print_poly 114 | return foot_print_poly_offset 115 | -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/mobility_es/mdp/rewards.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import annotations 17 | 18 | import torch 19 | from typing import Optional, List 20 | 21 | from omni.isaac.lab.managers import SceneEntityCfg 22 | 23 | from mobility_es.mdp.utils import goal_pose_in_robot_frame 24 | from mobility_es.wrapper.env_wrapper import RLESEnvWrapper 25 | 26 | 27 | def goal_reaching( 28 | env: RLESEnvWrapper, 29 | command_name: str = "goal_pose", 30 | offset: tuple[float, float] = (-0, 0), 31 | robot_cfg: SceneEntityCfg = SceneEntityCfg("robot") 32 | ) -> torch.Tensor: 33 | """Reward the agent for moving towards the goal pose. """ 34 | goal_pose = goal_pose_in_robot_frame(env, command_name, offset, robot_cfg) 35 | goal_pose_abs = -torch.norm(goal_pose, p=2, dim=1).reshape((goal_pose.shape[0], 1)) 36 | 37 | return torch.flatten(goal_pose_abs) 38 | 39 | 40 | def masked_action_rate_l2(env: RLESEnvWrapper, 41 | idx_masks: Optional[List[int]] = None) -> torch.Tensor: 42 | """Penalize the rate of change of the actions using an L2 squared kernel.""" 43 | 44 | # Extract current and previous actions 45 | current_action = env.action_manager.action 46 | prev_action = env.action_manager.prev_action 47 | 48 | # Calculate the difference in actions 49 | if idx_masks is not None: 50 | current_action = current_action[:, idx_masks] 51 | prev_action = prev_action[:, idx_masks] 52 | 53 | action_rate = current_action - prev_action 54 | 55 | # Return the sum of squared differences for the L2 penalty 56 | return torch.sum(action_rate.pow(2), dim=1) 57 | 58 | 59 | def masked_action_l2(env: RLESEnvWrapper, idx_masks: Optional[List[int]] = None) -> torch.Tensor: 60 | """Penalize the actions using L2 squared kernel.""" 61 | current_action = env.action_manager.action 62 | 63 | # Calculate the difference in actions 64 | if idx_masks is not None: 65 | current_action = current_action[:, idx_masks] 66 | 67 | return torch.sum(current_action.pow(2), dim=1) 68 | -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/mobility_es/mdp/termination.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import annotations 17 | 18 | import torch 19 | 20 | from omni.isaac.lab.assets import RigidObject 21 | from omni.isaac.lab.managers import SceneEntityCfg 22 | 23 | from mobility_es.mdp.utils import goal_pose_in_robot_frame 24 | from mobility_es.wrapper.env_wrapper import RLESEnvWrapper 25 | 26 | 27 | def goal_reached( 28 | env: RLESEnvWrapper, 29 | dist_threshold: float = 0.5, 30 | vel_threshold: float = 0.1, 31 | command_name: str = 'goal_pose', 32 | offset: tuple[float, float] = (-0.0, 0), 33 | robot_cfg: SceneEntityCfg = SceneEntityCfg("robot") 34 | ) -> torch.Tensor: 35 | robot: RigidObject = env.scene[robot_cfg.name] 36 | 37 | # Goal pose 38 | goal_pose = goal_pose_in_robot_frame(env, command_name, offset, robot_cfg) 39 | 40 | # Distance to goal 41 | goal_pose_abs = torch.norm(goal_pose, p=2, dim=1) 42 | # Robot velocity 43 | root_vel = torch.norm(robot.data.root_lin_vel_w, dim=1) 44 | 45 | return torch.logical_and(goal_pose_abs < dist_threshold, root_vel < vel_threshold) 46 | 47 | 48 | def nan_pose(env: RLESEnvWrapper, robot_cfg: SceneEntityCfg = SceneEntityCfg("robot")): 49 | # Check if the robot pose is nan, which can happen when the robot explodes in simulation. 50 | nan_pos = torch.isnan(env.scene[robot_cfg.name].data.root_pos_w) 51 | nan_quat = torch.isnan(env.scene[robot_cfg.name].data.root_quat_w) 52 | return torch.logical_or(torch.any(nan_pos, dim=1), torch.any(nan_quat, dim=1)) 53 | -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/mobility_es/mdp/utils.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import annotations 17 | 18 | import torch 19 | from typing import TYPE_CHECKING 20 | 21 | import omni.isaac.lab.utils.math as math_utils 22 | from omni.isaac.lab.managers import SceneEntityCfg 23 | 24 | if TYPE_CHECKING: 25 | from omni.isaac.lab.envs import ManagerBasedRLEnv 26 | 27 | 28 | def goal_pose_in_robot_frame( 29 | env: ManagerBasedRLEnv, 30 | command_name: str = "goal_pose", 31 | offset: tuple[float, float] = (-0, 0), 32 | robot_cfg: SceneEntityCfg = SceneEntityCfg("robot") 33 | ) -> torch.Tensor: 34 | """Get the goal pose in the robot frame with offset. """ 35 | 36 | # Get the goal pose relative to the root(i.e., robot) frame. 37 | goal_pose = env.command_manager.get_command(command_name)[:, :2] 38 | 39 | # Compute offset. 40 | robot = env.scene[robot_cfg.name] 41 | robot_yaw = math_utils.euler_xyz_from_quat(robot.data.root_quat_w)[2].reshape( 42 | (goal_pose.shape[0], 1)) 43 | offset_vec = torch.cat([ 44 | offset[0] * torch.cos(robot_yaw) - offset[1] * torch.sin(robot_yaw), 45 | offset[0] * torch.sin(robot_yaw) + offset[1] * torch.cos(robot_yaw) 46 | ], 47 | dim=1) 48 | # Goal pose in robot frame with offset. 49 | goal_pose_r = goal_pose - offset_vec 50 | return goal_pose_r 51 | -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/mobility_es/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/mobility_es/utils/occupancy_map.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import os 17 | 18 | import math 19 | import numpy as np 20 | import yaml 21 | 22 | import torch 23 | from PIL import Image 24 | 25 | from mobility_es.config.environments import OMAP_PATHS 26 | 27 | 28 | class OccupancyMapCollisionChecker: 29 | """Convert a occupancy map to grid map for collision checking. 30 | Note: This collision checker only works when the following conditions are met: 31 | 1. The USD world frame's origin has x facing right, y facing up. 32 | 2. The occupancy map must be generated in Isaac Sim with origin as (0,0,0), 33 | so the occupancy map's origin is measured in the USD's world frame. 34 | 3. The occupancy map's origin is placed at the top-left of the PNC file with 35 | x facing right, and y facing down. 36 | """ 37 | 38 | def __init__(self, cfg): 39 | self.__grid_map = None 40 | 41 | # Initialize the grid map using the occupancy map yaml file path. 42 | env_prim_path = cfg.environment.prim_path 43 | yaml_file_path = None 44 | for key, val in OMAP_PATHS.items(): 45 | if env_prim_path.endswith(key): 46 | yaml_file_path = val 47 | break 48 | 49 | if yaml_file_path and os.path.exists(yaml_file_path): 50 | print(f'Loaded omap for env {env_prim_path}') 51 | self._load_grid_map(yaml_file_path) 52 | 53 | def _load_grid_map(self, yaml_file_path): 54 | """ 55 | Reads the map image and generates the grid map.\n 56 | Grid map is a 2D boolean matrix where True=>occupied space & False=>Free space. 57 | """ 58 | with open(yaml_file_path, encoding="utf-8") as f: 59 | file_content = f.read() 60 | self.__map_meta = yaml.safe_load(file_content) 61 | img_file_path = os.path.join(os.path.dirname(yaml_file_path), self.__map_meta["image"]) 62 | 63 | img = Image.open(img_file_path) 64 | img = np.array(img) 65 | 66 | # Anything greater than free_thresh is considered as occupied 67 | if self.__map_meta["negate"]: 68 | res = np.where((img / 255)[:, :, 0] > self.__map_meta["free_thresh"]) 69 | else: 70 | res = np.where(((255 - img) / 255)[:, :, 0] > self.__map_meta["free_thresh"]) 71 | 72 | self.__grid_map = np.zeros(shape=(img.shape[:2]), dtype=bool) 73 | for i in range(res[0].shape[0]): 74 | self.__grid_map[res[0][i], res[1][i]] = True 75 | 76 | def __transform_to_image_coordinates(self, point): 77 | p_x = point[:, 0] 78 | p_y = point[:, 1] 79 | # NOTE: this assumes the occupancy map's origin with x facing right, and y facing down, and 80 | # the world frame's origin with x facing right, y facing up. 81 | i_x = torch.floor( 82 | (p_x - self.__map_meta["origin"][0]) / self.__map_meta["resolution"]).reshape( 83 | (point.shape[0], 1)) 84 | i_y = torch.floor( 85 | (self.__map_meta["origin"][1] - p_y) / self.__map_meta["resolution"]).reshape( 86 | (point.shape[0], 1)) 87 | return torch.cat((i_x, i_y), dim=1).int() 88 | 89 | def __is_obstacle_in_distance(self, img_point, distance_in_pixel): 90 | row_start_idx = img_point[:, 1] - distance_in_pixel 91 | col_start_idx = img_point[:, 0] - distance_in_pixel 92 | row_end_idx = img_point[:, 1] + distance_in_pixel 93 | col_end_idx = img_point[:, 0] + distance_in_pixel 94 | 95 | # image point acts as the center of the square, where each side of square is of size 96 | # 2xdistance 97 | in_collision = torch.zeros_like(row_start_idx).int() 98 | for i in range(row_end_idx.shape[0]): 99 | # Consider out of bounds points as in collision. 100 | if row_start_idx[i] < 0 or col_start_idx[i] < 0 or row_end_idx[ 101 | i] >= self.__grid_map.shape[0] or col_end_idx[i] >= self.__grid_map.shape[1]: 102 | in_collision[i] = 1 103 | continue 104 | patch = self.__grid_map[row_start_idx[i]:row_end_idx[i], 105 | col_start_idx[i]:col_end_idx[i]] 106 | if np.any(patch): 107 | in_collision[i] = 1 108 | 109 | return in_collision 110 | 111 | def is_in_collision(self, points, distance=0.75): 112 | assert points.shape[1] == 2 113 | img_points = self.__transform_to_image_coordinates(points) 114 | distance_in_pixel = math.ceil(distance / self.__map_meta["resolution"]) 115 | in_collision = self.__is_obstacle_in_distance(img_points, distance_in_pixel) 116 | return in_collision 117 | 118 | def is_initialized(self): 119 | return self.__grid_map is not None 120 | -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/mobility_es/wrapper/env_wrapper.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import annotations 17 | 18 | from omni.isaac.lab.envs import ManagerBasedRLEnv, ManagerBasedRLEnvCfg 19 | 20 | from mobility_es.utils.occupancy_map import OccupancyMapCollisionChecker 21 | 22 | 23 | class RLESEnvWrapper(ManagerBasedRLEnv): 24 | """Env wrapper of ManagerBasedRLEnv for RL embodiment specialist. 25 | """ 26 | 27 | def __init__(self, cfg: ManagerBasedRLEnvCfg, render_mode=None): 28 | """ 29 | Initializes the RLESEnvWrapper. 30 | 31 | Args: 32 | cfg (ManagerBasedRLEnvCfg): The configuration object for the environment. 33 | render_mode (str, optional): The render mode for the environment. Defaults to None. 34 | 35 | Returns: 36 | None 37 | """ 38 | # Initialize collision checker before ManagerBasedRLEnv to avoid loading error. 39 | self._collision_checker = OccupancyMapCollisionChecker(cfg.scene) 40 | super().__init__(cfg, render_mode) 41 | 42 | @property 43 | def collision_checker(self): 44 | return self._collision_checker 45 | -------------------------------------------------------------------------------- /compass/rl_env/exts/mobility_es/setup.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Installation script for the 'mobility_es' python package.""" 17 | 18 | import os 19 | import toml 20 | 21 | from setuptools import setup 22 | 23 | # Obtain the extension data from the extension.toml file 24 | EXTENSION_PATH = os.path.dirname(os.path.realpath(__file__)) 25 | # Read the extension.toml file 26 | EXTENSION_TOML_DATA = toml.load(os.path.join(EXTENSION_PATH, "config", "extension.toml")) 27 | 28 | # Minimum dependencies required prior to installation 29 | INSTALL_REQUIRES = [ 30 | # NOTE: Add dependencies 31 | "psutil", 32 | ] 33 | 34 | # Installation operation 35 | setup( 36 | name="mobility_es", 37 | packages=["mobility_es"], 38 | author=EXTENSION_TOML_DATA["package"]["author"], 39 | maintainer=EXTENSION_TOML_DATA["package"]["maintainer"], 40 | url=EXTENSION_TOML_DATA["package"]["repository"], 41 | version=EXTENSION_TOML_DATA["package"]["version"], 42 | description=EXTENSION_TOML_DATA["package"]["description"], 43 | keywords=EXTENSION_TOML_DATA["package"]["keywords"], 44 | install_requires=INSTALL_REQUIRES, 45 | license="MIT", 46 | include_package_data=True, 47 | python_requires=">=3.10", 48 | classifiers=[ 49 | "Natural Language :: English", 50 | "Programming Language :: Python :: 3.10", 51 | "Isaac Sim :: 2023.1.1", 52 | "Isaac Sim :: 4.0.0", 53 | ], 54 | zip_safe=False, 55 | ) 56 | -------------------------------------------------------------------------------- /compass/rl_env/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "toml"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [tool.isort] 6 | 7 | atomic = true 8 | profile = "black" 9 | line_length = 120 10 | py_version = 310 11 | skip_glob = ["docs/*", "logs/*"] 12 | group_by_package = true 13 | 14 | sections = [ 15 | "FUTURE", 16 | "STDLIB", 17 | "THIRDPARTY", 18 | "ISAACLABPARTY", 19 | "FIRSTPARTY", 20 | "LOCALFOLDER", 21 | ] 22 | extra_standard_library = [ 23 | "numpy", 24 | "h5py", 25 | "open3d", 26 | "torch", 27 | "tensordict", 28 | "bpy", 29 | "matplotlib", 30 | "gymnasium", 31 | "gym", 32 | "scipy", 33 | "hid", 34 | "yaml", 35 | "prettytable", 36 | "toml", 37 | "trimesh", 38 | "tqdm", 39 | "psutil", 40 | ] 41 | known_thirdparty = [ 42 | "omni.isaac.core", 43 | "omni.replicator.isaac", 44 | "omni.replicator.core", 45 | "pxr", 46 | "omni.kit.*", 47 | "warp", 48 | "carb", 49 | "Semantics", 50 | ] 51 | known_isaaclabparty = [ 52 | "omni.isaac.lab", 53 | "omni.isaac.lab_tasks", 54 | "omni.isaac.lab_assets" 55 | ] 56 | 57 | # Modify the following to include the package names of your first-party code 58 | known_firstparty = "mobility_es" 59 | known_local_folder = "config" 60 | 61 | [tool.pyright] 62 | 63 | exclude = [ 64 | "**/__pycache__", 65 | "**/docs", 66 | "**/logs", 67 | ".git", 68 | ".vscode", 69 | ] 70 | 71 | typeCheckingMode = "basic" 72 | pythonVersion = "3.10" 73 | pythonPlatform = "Linux" 74 | enableTypeIgnoreComments = true 75 | 76 | # This is required as the CI pre-commit does not download the module (i.e. numpy, torch, prettytable) 77 | # Therefore, we have to ignore missing imports 78 | reportMissingImports = "none" 79 | # This is required to ignore for type checks of modules with stubs missing. 80 | reportMissingModuleSource = "none" # -> most common: prettytable in mdp managers 81 | 82 | reportGeneralTypeIssues = "none" # -> raises 218 errors (usage of literal MISSING in dataclasses) 83 | reportOptionalMemberAccess = "warning" # -> raises 8 errors 84 | reportPrivateUsage = "warning" 85 | -------------------------------------------------------------------------------- /compass/rl_env/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /compass/rl_env/scripts/play.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # pylint: skip-file 17 | 18 | import argparse 19 | 20 | from omni.isaac.lab.app import AppLauncher 21 | 22 | # add argparse arguments 23 | parser = argparse.ArgumentParser(description="Embodiment specialist to enhance navigation policy.") 24 | parser.add_argument("--num_envs", type=int, default=1, help="Number of environments to spawn.") 25 | 26 | # append AppLauncher cli args 27 | AppLauncher.add_app_launcher_args(parser) 28 | # parse the arguments 29 | args_cli = parser.parse_args() 30 | 31 | # launch omniverse app 32 | app_launcher = AppLauncher(args_cli) 33 | simulation_app = app_launcher.app 34 | 35 | import torch 36 | 37 | from mobility_es.config.carter_env_cfg import CarterGoalReachingEnvCfg 38 | from mobility_es.config.h1_env_cfg import H1GoalReachingEnvCfg 39 | from mobility_es.wrapper.env_wrapper import RLESEnvWrapper 40 | 41 | 42 | def main(): 43 | """Main function.""" 44 | # Parse the arguments 45 | env_cfg = H1GoalReachingEnvCfg() 46 | env_cfg.scene.num_envs = args_cli.num_envs 47 | # Setup base environment 48 | env = RLESEnvWrapper(cfg=env_cfg) 49 | 50 | count = 0 51 | while simulation_app.is_running(): 52 | with torch.inference_mode(): 53 | # Reset 54 | if count % 300 == 0: 55 | count = 0 56 | env.reset() 57 | print("-" * 80) 58 | print("[INFO]: Resetting environment...") 59 | # Sample random actions 60 | action_commands = torch.randn_like(env.action_manager.action) 61 | # Step the environment 62 | obs_dict, rewards, dones, truncateds, infos = env.step(action_commands) 63 | print(obs_dict) 64 | # Update counter 65 | count += 1 66 | 67 | # close the environment 68 | env.close() 69 | 70 | 71 | if __name__ == "__main__": 72 | # Run the main function 73 | main() 74 | # Close sim app 75 | simulation_app.close() 76 | -------------------------------------------------------------------------------- /compass/utils/logger.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """ 17 | Flexible logging system that supports both Weights & Biases and TensorBoard. 18 | """ 19 | 20 | import os 21 | from typing import Dict, Any, Optional 22 | 23 | import wandb 24 | from tensorboardX import SummaryWriter 25 | 26 | 27 | class Logger: 28 | """ 29 | A unified logger that can use either Weights & Biases or TensorBoard. 30 | """ 31 | 32 | def __init__(self, 33 | log_dir: str, 34 | experiment_name: str, 35 | backend: str = "tensorboard", 36 | project_name: str = "compass", 37 | entity: Optional[str] = None): 38 | """ 39 | Initialize the logger. 40 | 41 | Args: 42 | log_dir: Directory to save logs 43 | experiment_name: Name of the experiment 44 | backend: Either "wandb" or "tensorboard" 45 | project_name: Project name (for wandb) 46 | entity: Team/entity name (for wandb) 47 | config: Configuration dictionary to log 48 | """ 49 | self.backend = backend.lower() 50 | self.experiment_name = experiment_name 51 | self.log_dir = log_dir 52 | 53 | # Create log directory if it doesn't exist 54 | os.makedirs(self.log_dir, exist_ok=True) 55 | 56 | # Initialize the backend. 57 | if self.backend == "wandb": 58 | # Initialize wandb 59 | self.writer = wandb.init( 60 | project=project_name, 61 | entity=entity, 62 | name=experiment_name, 63 | dir=log_dir, 64 | ) 65 | elif self.backend == "tensorboard": 66 | # Initialize TensorBoard writer 67 | self.writer = SummaryWriter(os.path.join(self.log_dir, "tensorboard")) 68 | else: 69 | raise ValueError(f"Unsupported backend: {backend}. " 70 | "Choose either 'wandb' or 'tensorboard'.") 71 | 72 | print(f"Logger initialized with backend: {self.backend}") 73 | 74 | def log_video(self, name: str, video_path: str, fps: int = 4, step: Optional[int] = None): 75 | """ 76 | Log a video. 77 | 78 | Args: 79 | name: Name of the video 80 | video_path: Path to the video file 81 | fps: Frames per second 82 | step: Step number (if None, uses 0) 83 | """ 84 | if step is None: 85 | step = 0 86 | 87 | if self.backend == "wandb": 88 | wandb.log( 89 | {name: wandb.Video(video_path, fps=fps, format="mp4", caption=f"Iteration {step}")}) 90 | else: 91 | print(f"Video upload via path not supported in TensorBoard. Video '{name}' not logged.") 92 | 93 | def log_dict(self, dict: Dict[str, Any], step: Optional[int] = None): 94 | """ 95 | Log multiple metrics at once. 96 | 97 | Args: 98 | dict: Dictionary of scalars to log 99 | step: Step number (if None, uses 0) 100 | """ 101 | if self.backend == "wandb": 102 | wandb.log(dict, step=step) 103 | else: 104 | for name, value in dict.items(): 105 | self.writer.add_scalar(name, value, global_step=step) 106 | 107 | def log_artifact(self, 108 | artifact_path: str, 109 | name: str, 110 | type: str, 111 | description: Optional[str] = None): 112 | """ 113 | Log an artifact (file or directory). 114 | 115 | Args: 116 | artifact_path: Path to the artifact 117 | name: Name of the artifact 118 | type: Type of the artifact 119 | description: Description of the artifact 120 | """ 121 | if self.backend == "wandb": 122 | artifact = wandb.Artifact(name=name, type=type, description=description) 123 | artifact.add_file(artifact_path) 124 | wandb.log_artifact(artifact) 125 | else: 126 | # TensorBoard doesn't support artifacts directly 127 | print(f"Artifacts not supported in TensorBoard. Artifact '{name}' not logged.") 128 | 129 | def log_config(self, config: Dict[str, Any]): 130 | """ 131 | Update the run configuration. 132 | 133 | Args: 134 | config: Configuration dictionary 135 | """ 136 | if self.backend == "wandb": 137 | self.writer.config.update(config) 138 | else: 139 | # TensorBoard doesn't support updating config 140 | print("Updating config not supported in TensorBoard.") 141 | 142 | def close(self): 143 | """Close the logger.""" 144 | if self.backend == "wandb": 145 | wandb.finish() 146 | elif self.backend == "tensorboard": 147 | self.writer.close() 148 | -------------------------------------------------------------------------------- /configs/distillation_config.gin: -------------------------------------------------------------------------------- 1 | # Train 2 | train.epochs=100 3 | train.precision="16-mixed" 4 | train.data_module=@RLSpecialistDataModule 5 | train.model_trainer=@ESDistillationTrainer 6 | 7 | #Trainer 8 | ESDistillationTrainer.lr=1e-3 9 | ESDistillationTrainer.model=@ESDistillationPolicy 10 | ESDistillationTrainer.loss=@ESDistillationKLLoss 11 | 12 | # Dataloader 13 | RLSpecialistDataModule.batch_size=16 14 | RLSpecialistDataModule.sequence_length=5 15 | RLSpecialistDataModule.num_workers=2 16 | 17 | # Embodiment specialist distillation policy 18 | ESDistillationPolicy.policy_model=@MLPActionPolicyDistribution 19 | ESDistillationPolicy.embodiment_encoder=@EmbodimentOneHotEncoder 20 | ESDistillationPolicy.policy_state_dim=2048 21 | ESDistillationPolicy.command_n_channels=6 22 | -------------------------------------------------------------------------------- /configs/distillation_dataset_config_template.yaml: -------------------------------------------------------------------------------- 1 | policy_ckpt_type: wandb_artifact # [wandb_artifact, local] 2 | 3 | embodiment_policies: 4 | carter: "path_to_carter_ckpt" 5 | h1: "path_to_h1_ckpt" 6 | spot: "path_to_spot_ckpt" 7 | g1: "path_to_g1_ckpt" 8 | 9 | data_split_ratios: 10 | train: 0.8 11 | val: 0.1 12 | test: 0.1 13 | -------------------------------------------------------------------------------- /configs/eval_config.gin: -------------------------------------------------------------------------------- 1 | include 'configs/shared.gin' 2 | include 'configs/distillation_config.gin' 3 | 4 | run.num_iterations=10 5 | run.num_envs=64 6 | run.run_mode='eval' 7 | run.seed = 10 8 | 9 | # Residual PPO Trainer 10 | ResidualPPOTrainer.debug_viz=True 11 | -------------------------------------------------------------------------------- /configs/record_config.gin: -------------------------------------------------------------------------------- 1 | include 'configs/shared.gin' 2 | 3 | run.num_iterations=20 4 | run.num_envs=16 5 | run.run_mode='record' 6 | run.seed = 30 7 | run.num_steps_per_iteration=128 8 | 9 | ResidualPPOTrainer.num_steps_per_env=128 10 | -------------------------------------------------------------------------------- /configs/shared.gin: -------------------------------------------------------------------------------- 1 | include 'configs/x_mobility_config.gin' 2 | 3 | NUM_STEPS_PER_ITER = 256 4 | 5 | # RL environment setup 6 | run.embodiment='h1' 7 | run.environment='combined_single_rack' 8 | run.num_steps_per_iteration=%NUM_STEPS_PER_ITER 9 | 10 | # Residual PPO Trainer 11 | ResidualPPOTrainer.num_steps_per_env=%NUM_STEPS_PER_ITER 12 | # Options include [CriticStateDepthEncoderAssembler, CriticStateSymmetricAssembler] 13 | ResidualPPOTrainer.critic_state_assembler=@CriticStateSymmetricAssembler 14 | 15 | # CriticObservationEncoder 16 | CriticObservationEncoder.depth_encoder=@DepthImageFeatureExtractor 17 | 18 | # Actor critic 19 | ActorCriticXMobility.critic_hidden_dims = 128 20 | ActorCriticXMobility.init_noise_std = 0.1 21 | -------------------------------------------------------------------------------- /configs/train_config.gin: -------------------------------------------------------------------------------- 1 | include 'configs/shared.gin' 2 | 3 | run.num_iterations=1000 4 | run.num_envs=64 5 | run.run_mode='train' 6 | run.seed = 20 7 | -------------------------------------------------------------------------------- /configs/x_mobility_config.gin: -------------------------------------------------------------------------------- 1 | # Shared macros 2 | NUM_SEMANTIC_CLASSES=7 3 | ACTION_SIZE=6 4 | # Path dim is 2 and path length is 5. 5 | FLAT_PATH_SIZE=10 6 | NUM_EPOCHS=100 7 | RECEPTIVE_FIELD=1 8 | SEQUENCE_LENGTH=4 9 | ENABLE_SEMANTIC=True 10 | ENABLE_RGB_STYLEGAN=False 11 | ENABLE_RGB_DIFFUSION=False 12 | ENABLE_POLICY_DIFFUSION=False 13 | IS_GWM_PRETRAIN=False 14 | 15 | # Train 16 | train.epochs=%NUM_EPOCHS 17 | train.precision="16-mixed" 18 | train.data_module=@XMobilityIsaacSimDataModule 19 | train.model_trainer=@XMobilityTrainer 20 | 21 | # Evaluate 22 | evaluate_observation.num_gpus=1 23 | evaluate_observation.precision="16-mixed" 24 | evaluate_prediction.max_history_length=5 25 | evaluate_prediction.max_future_length=[1, 2, 3, 4, 5, 10] 26 | evaluate_prediction.use_trained_policy=True 27 | 28 | #Trainer 29 | XMobilityTrainer.weight_decay=0.01 30 | XMobilityTrainer.lr=1e-5 31 | XMobilityTrainer.scheduler_pct_start=0.2 32 | 33 | # Dataloader 34 | XMobilityIsaacSimDataModule.batch_size=4 35 | XMobilityIsaacSimDataModule.sequence_length=%SEQUENCE_LENGTH 36 | XMobilityIsaacSimDataModule.num_workers=4 37 | XMobilityIsaacSimDataModule.is_gwm_pretrain=%IS_GWM_PRETRAIN 38 | XMobilityIsaacSimDataModule.enable_semantic=%ENABLE_SEMANTIC 39 | XMobilityIsaacSimDataModule.enable_rgb_stylegan=%ENABLE_RGB_STYLEGAN 40 | XMobilityIsaacSimDataModule.precomputed_semantic_label=True 41 | 42 | #Encoders 43 | SpeedEncoder.out_channels=32 44 | SpeedEncoder.speed_normalisation=1.5 45 | VectorNetSubGraph.vec_size=4 46 | VectorNetSubGraph.num_layers=4 47 | ImageDepthAnythingEncoder.enable_fine_tune=True 48 | ImageDINOEncoder.enable_fine_tune=True 49 | ObservationEncoder.image_encoder=@ImageDINOEncoder 50 | 51 | # Decoders 52 | SegmentationHead.n_classes=%NUM_SEMANTIC_CLASSES 53 | StyleGanDecoder.constant_size=(5, 8) 54 | 55 | # MLPPolicy 56 | MLPPolicy.command_n_channels=%ACTION_SIZE 57 | MLPPolicy.path_n_channels=%FLAT_PATH_SIZE 58 | 59 | # DiffusionPolicy 60 | DiffusionPolicy.action_n_channels=%ACTION_SIZE 61 | DiffusionPolicy.path_n_channels=%FLAT_PATH_SIZE 62 | DiffusionPolicy.default_denoising_steps=50 63 | 64 | # ConditionalUnet1D 65 | ConditionalUnet1D.diffusion_step_embed_dim = 256 66 | ConditionalUnet1D.unet_down_dims = [256, 512, 1024] 67 | ConditionalUnet1D.kernel_size = 3 68 | 69 | # ActionPolicy 70 | ActionPolicy.policy_state_dim = 2048 71 | ActionPolicy.route_encoder=@VectorNetSubGraph 72 | ActionPolicy.policy_state_fusion_mode="self_attn" 73 | ActionPolicy.enable_policy_diffusion=%ENABLE_POLICY_DIFFUSION 74 | 75 | # Losses 76 | KLLoss.alpha=0.75 77 | ActionLoss.norm=1 78 | SegmentationLoss.use_top_k=False 79 | SegmentationLoss.top_k_ratio=0.25 80 | SegmentationLoss.use_poly_one=False 81 | SegmentationLoss.poly_one_coefficient=0.0 82 | SegmentationLoss.use_weights=True 83 | SegmentationLoss.semantic_weights=[1.0, 2.0, 5.0, 5.0, 5.0, 5.0, 5.0] 84 | XMobilityLoss.action_weight=10.0 85 | XMobilityLoss.path_weight = 5.0 86 | XMobilityLoss.kl_weight=0.001 87 | XMobilityLoss.semantic_weight=2.0 88 | XMobilityLoss.rgb_weight=5.0 89 | XMobilityLoss.enable_semantic=%ENABLE_SEMANTIC 90 | XMobilityLoss.enable_rgb_stylegan=%ENABLE_RGB_STYLEGAN 91 | XMobilityLoss.enable_rgb_diffusion=%ENABLE_RGB_DIFFUSION 92 | XMobilityLoss.enable_policy_diffusion=%ENABLE_POLICY_DIFFUSION 93 | XMobilityLoss.is_gwm_pretrain=%IS_GWM_PRETRAIN 94 | XMobilityLoss.diffusion_weight=10.0 95 | XMobilityLoss.depth_weight=10.0 96 | 97 | # RSSM 98 | RSSM.action_dim=%ACTION_SIZE 99 | RSSM.hidden_state_dim=1024 100 | RSSM.state_dim=512 101 | RSSM.action_latent_dim=64 102 | RSSM.receptive_field=%RECEPTIVE_FIELD 103 | RSSM.use_dropout=False 104 | RSSM.dropout_probability=0.0 105 | 106 | # XMobility 107 | XMobility.enable_semantic=%ENABLE_SEMANTIC 108 | XMobility.enable_rgb_stylegan=%ENABLE_RGB_STYLEGAN 109 | XMobility.enable_rgb_diffusion=%ENABLE_RGB_DIFFUSION 110 | XMobility.is_gwm_pretrain=%IS_GWM_PRETRAIN 111 | -------------------------------------------------------------------------------- /distillation_train.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import os 17 | import argparse 18 | from enum import Enum 19 | 20 | import gin 21 | import pytorch_lightning as pl 22 | import wandb 23 | from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint 24 | from pytorch_lightning.loggers import WandbLogger, TensorBoardLogger 25 | 26 | from compass.distillation.rl_specialists_dataset import RLSpecialistDataModule # pylint: disable=unused-import 27 | from compass.distillation.distillation_trainer import ESDistillationTrainer # pylint: disable=unused-import 28 | 29 | 30 | class TaskMode(Enum): 31 | TRAIN = "train" 32 | EVAL = "eval" 33 | 34 | 35 | def parse_arguments(task_mode): 36 | ''' Arguments parser for model training and evaluation. 37 | ''' 38 | description = 'Train ML Nav' if task_mode == TaskMode.TRAIN else 'Eval ML Nav' 39 | parser = argparse.ArgumentParser(description=description) 40 | 41 | parser.add_argument('--config-files', 42 | '-c', 43 | nargs='+', 44 | required=True, 45 | help='The list of the config files.') 46 | parser.add_argument('--dataset-path', 47 | '-d', 48 | type=str, 49 | required=True, 50 | help='The path to the dataset.') 51 | parser.add_argument('--wandb-project-name', 52 | '-n', 53 | type=str, 54 | default='afm_rl_enhance_distillation', 55 | help='The project name of W&B.') 56 | parser.add_argument('--wandb-run-name', 57 | '-r', 58 | type=str, 59 | default='train_run', 60 | help='The run name of W&B.') 61 | parser.add_argument('--wandb-entity-name', 62 | '-e', 63 | type=str, 64 | default='nvidia-isaac', 65 | help='The entity name of W&B.') 66 | parser.add_argument('--checkpoint-path', 67 | '-p', 68 | type=str, 69 | default=None, 70 | help='The path to the checkpoint.') 71 | parser.add_argument('--logger', 72 | type=str, 73 | choices=['wandb', 'tensorboard'], 74 | default='tensorboard', 75 | help='Logger to use: wandb or tensorboard') 76 | if task_mode == TaskMode.TRAIN: 77 | parser.add_argument('--output-dir', 78 | '-o', 79 | type=str, 80 | required=True, 81 | help='The path to the output dir.') 82 | 83 | if task_mode == TaskMode.EVAL: 84 | parser.add_argument('--eval_target', 85 | type=str, 86 | default='observation', 87 | help='Target to evaluate: [observation, imagination]') 88 | 89 | args = parser.parse_args() 90 | return args 91 | 92 | 93 | @gin.configurable 94 | def train(dataset_path, 95 | output_dir, 96 | ckpt_path, 97 | wandb_project_name, 98 | wandb_run_name, 99 | wandb_entity_name, 100 | precision, 101 | epochs, 102 | data_module, 103 | model_trainer, 104 | logger_type='wandb'): 105 | # Create a output directory if not exit. 106 | if not os.path.exists(output_dir): 107 | os.makedirs(output_dir) 108 | 109 | data = data_module(dataset_path=dataset_path) 110 | if ckpt_path: 111 | model = model_trainer.load_from_checkpoint(checkpoint_path=ckpt_path, strict=False) 112 | else: 113 | model = model_trainer() 114 | 115 | # Set up the appropriate logger 116 | if logger_type == 'wandb': 117 | logger = WandbLogger(entity=wandb_entity_name, 118 | project=wandb_project_name, 119 | name=wandb_run_name, 120 | save_dir=output_dir, 121 | group="DDP", 122 | log_model=True) 123 | else: 124 | logger = TensorBoardLogger(save_dir=output_dir) 125 | 126 | callbacks = [ 127 | pl.callbacks.ModelSummary(-1), 128 | pl.callbacks.LearningRateMonitor(), 129 | ModelCheckpoint(dirpath=os.path.join(output_dir, 'checkpoints'), 130 | save_top_k=3, 131 | monitor='val_loss', 132 | mode='min', 133 | save_last=True), 134 | ] 135 | trainer = pl.Trainer(max_epochs=epochs, 136 | precision=precision, 137 | sync_batchnorm=True, 138 | callbacks=callbacks, 139 | strategy='ddp_find_unused_parameters_true', 140 | logger=logger) 141 | trainer.fit(model, datamodule=data) 142 | 143 | trainer.test(ckpt_path="last", datamodule=data) 144 | 145 | return logger 146 | 147 | 148 | def log_gin_config(logger: WandbLogger): 149 | # This function should be called after all the gin configurable functions. 150 | # Otherwise, the config string will be empty. 151 | gin_config_str = gin.operative_config_str() 152 | 153 | # Create a temporary file to store the gin config 154 | with open("/tmp/gin_config.txt", "w", encoding='UTF-8') as f: 155 | f.write(gin_config_str) 156 | 157 | # Log the artifact using the WandbLogger 158 | artifact = wandb.Artifact("gin_config", type="text") 159 | artifact.add_file("/tmp/gin_config.txt") 160 | logger.experiment.log_artifact(artifact) 161 | 162 | 163 | def main(): 164 | args = parse_arguments(TaskMode.TRAIN) 165 | 166 | for config_file in args.config_files: 167 | gin.parse_config_file(config_file, skip_unknown=True) 168 | 169 | # Run the training loop. 170 | logger = train(args.dataset_path, 171 | args.output_dir, 172 | args.checkpoint_path, 173 | args.wandb_project_name, 174 | args.wandb_run_name, 175 | args.wandb_entity_name, 176 | logger_type=args.logger) 177 | 178 | # Log gin config if using wandb 179 | if args.logger == 'wandb': 180 | log_gin_config(logger) 181 | # Finish wandb 182 | wandb.finish() 183 | 184 | 185 | if __name__ == '__main__': 186 | main() 187 | -------------------------------------------------------------------------------- /docker/Dockerfile.distillation: -------------------------------------------------------------------------------- 1 | FROM nvcr.io/nvidia/pytorch:24.01-py3 2 | 3 | COPY . /workspace 4 | 5 | RUN pip install --upgrade pip 6 | 7 | RUN pip install -r requirements.txt 8 | -------------------------------------------------------------------------------- /docker/Dockerfile.rl: -------------------------------------------------------------------------------- 1 | # b120 is a build of IsaacLab v1.3.0. 2 | FROM nvcr.io/nvidian/isaac-lab:IsaacLab-main-b120 3 | 4 | WORKDIR /workspace 5 | 6 | COPY . /workspace 7 | -------------------------------------------------------------------------------- /images/compass.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:baee5767b189bce11678d4141b20fadd58c23fb0249e3daeaa05f83639091bc7 3 | size 12078387 4 | -------------------------------------------------------------------------------- /images/omap_generation.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:03b191320340548fa3d3a632d1a0cf14f297e15d5c3e27ede72fb45ba068ae73 3 | size 1832418 4 | -------------------------------------------------------------------------------- /onnx_conversion.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import argparse 17 | 18 | import gin 19 | import wandb 20 | import torch 21 | import torch.nn.functional as F 22 | from torch import nn 23 | 24 | from model.x_mobility.utils import pack_sequence_dim 25 | from model.x_mobility.x_mobility import XMobility 26 | 27 | from compass.residual_rl.constants import INPUT_IMAGE_SIZE 28 | from compass.residual_rl.actor_critic import ActorCriticXMobility 29 | from compass.distillation.distillation import ESDistillationPolicyWrapper 30 | 31 | 32 | class PolicyInference(nn.Module): 33 | ''' Wrapper of policy for Onnx conversion. 34 | ''' 35 | 36 | def __init__(self, base_checkpoint_path: str): 37 | super().__init__() 38 | # Load the state dict for X-Mobility. 39 | state_dict = torch.load(base_checkpoint_path, weights_only=False)['state_dict'] 40 | state_dict = {k.removeprefix('model.'): v for k, v in state_dict.items()} 41 | self.x_mobility = XMobility() 42 | self.x_mobility.load_state_dict(state_dict) 43 | 44 | def base_policy_forward(self, image, route, speed, action_input, history_input, sample_input): 45 | inputs = {} 46 | # Resize the input image to desired size. 47 | image = image.squeeze(0) 48 | image = F.interpolate(image, size=INPUT_IMAGE_SIZE, mode='bilinear', align_corners=False) 49 | inputs['image'] = image.unsqueeze(0) 50 | inputs['route'] = route 51 | inputs['speed'] = speed 52 | inputs['action'] = action_input 53 | inputs['history'] = history_input 54 | inputs['sample'] = sample_input 55 | 56 | # Run base policy. 57 | base_action, _, history, sample, _, _, _ = self.x_mobility.inference( 58 | inputs, False, False, False) 59 | latent_state = torch.cat([history, sample], dim=-1) 60 | route_feat = self.x_mobility.action_policy.route_encoder(pack_sequence_dim(route)) 61 | policy_state = self.x_mobility.action_policy.policy_state_fusion(latent_state, route_feat) 62 | return base_action, policy_state, history, sample 63 | 64 | 65 | class SpecialistPolicyInference(PolicyInference): 66 | ''' Wrapper of embodiment specialist policy for Onnx conversion. 67 | ''' 68 | 69 | def __init__(self, base_checkpoint_path: str, residual_checkpoint_path: str): 70 | super().__init__(base_checkpoint_path) 71 | 72 | self.residual_policy = ActorCriticXMobility( 73 | self.x_mobility.action_policy, 74 | action_dim=6, 75 | critic_state_dim=self.x_mobility.action_policy.poly_state_dim) 76 | self.residual_policy.load_state_dict( 77 | torch.load(residual_checkpoint_path, weights_only=False)['model_state_dict']) 78 | 79 | def forward(self, image, route, speed, action_input, history_input, sample_input): 80 | # Run base policy. 81 | base_action, policy_state, history, sample = self.base_policy_forward( 82 | image, route, speed, action_input, history_input, sample_input) 83 | 84 | # Run residual policy. 85 | residual_action = self.residual_policy.act_inference(policy_state) 86 | final_action = base_action + residual_action 87 | 88 | # Outputs: [action_output, history_output, sample_output] 89 | return final_action, history, sample 90 | 91 | 92 | class GeneralistPolicyInference(PolicyInference): 93 | ''' Wrapper of generalist policy for Onnx conversion. 94 | ''' 95 | 96 | def __init__(self, base_checkpoint_path: str, generalist_policy_ckpt_path: str, 97 | embodiment_type: str): 98 | super().__init__(base_checkpoint_path) 99 | 100 | # Load the state dict for the generalist policy. 101 | self.generalist_policy = ESDistillationPolicyWrapper(generalist_policy_ckpt_path, 102 | embodiment_type) 103 | 104 | def forward(self, image, route, speed, action_input, history_input, sample_input): 105 | # Run base policy. 106 | _, policy_state, history, sample = self.base_policy_forward(image, route, speed, 107 | action_input, history_input, 108 | sample_input) 109 | 110 | # Run generalist policy. 111 | generalist_action = self.generalist_policy(policy_state) 112 | 113 | # Outputs: [action_output, history_output, sample_output] 114 | return generalist_action, history, sample 115 | 116 | 117 | def convert(base_checkpoint_path: str, residual_checkpoint_path: str, 118 | generalist_checkpoint_path: str, embodiment_type: str, onnx_path: str, jit_path: str): 119 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 120 | if residual_checkpoint_path: 121 | model = SpecialistPolicyInference(base_checkpoint_path, residual_checkpoint_path) 122 | elif generalist_checkpoint_path: 123 | model = GeneralistPolicyInference(base_checkpoint_path, generalist_checkpoint_path, 124 | embodiment_type) 125 | else: 126 | raise ValueError('Either residual or generalist checkpoint path must be provided.') 127 | 128 | model.to(device) 129 | model.eval() 130 | 131 | # Input tensors. 132 | image = torch.randn((1, 1, 3, 640, 960), dtype=torch.float32).to(device) 133 | speed = torch.randn((1, 1, 1), dtype=torch.float32).to(device) 134 | action = torch.randn((1, 6), dtype=torch.float32).to(device) 135 | history = torch.zeros((1, 1024), dtype=torch.float32).to(device) 136 | sample = torch.zeros((1, 512), dtype=torch.float32).to(device) 137 | route = torch.randn((1, 1, 10, 4), dtype=torch.float32).to(device) 138 | 139 | # Output jit file. 140 | if jit_path: 141 | traced_model = torch.jit.trace(model, (image, route, speed, action, history, sample)) 142 | traced_model.save(jit_path) 143 | 144 | # Output names. 145 | output_names = ['action_output', 'history_output', 'sample_output'] 146 | 147 | # ONNX conversion. 148 | torch.onnx.export( 149 | model, (image, route, speed, action, history, sample), 150 | onnx_path, 151 | verbose=True, 152 | input_names=['image', 'route', 'speed', 'action_input', 'history_input', 'sample_input'], 153 | output_names=output_names) 154 | 155 | 156 | def main(): 157 | # Parse the arguments. 158 | parser = argparse.ArgumentParser(description='Convert the E2E Nav model to onnx.') 159 | parser.add_argument('--base-ckpt-path', 160 | '-b', 161 | type=str, 162 | required=True, 163 | help='The path to the base policy checkpoint.') 164 | parser.add_argument('--residual-ckpt-path', 165 | '-r', 166 | type=str, 167 | required=False, 168 | help='The path to the residual policy checkpoint.') 169 | parser.add_argument('--generalist-ckpt-path', 170 | '-g', 171 | type=str, 172 | required=False, 173 | help='The path to the generalist policy checkpoint.') 174 | parser.add_argument('--embodiment-type', 175 | '-e', 176 | type=str, 177 | required=False, 178 | help='The embodiment type to use for the generalist policy.') 179 | parser.add_argument('--onnx-file', 180 | '-o', 181 | type=str, 182 | required=True, 183 | help='The path to the output onnx file.') 184 | parser.add_argument('--jit-file', 185 | '-j', 186 | type=str, 187 | required=False, 188 | help='The path to the output JIT file.') 189 | parser.add_argument('--onnx-artifact', 190 | '-a', 191 | type=str, 192 | required=False, 193 | help='The wandb onnx artifact to upload.') 194 | args = parser.parse_args() 195 | 196 | # Load hyperparameters. 197 | gin.parse_config_file('configs/eval_config.gin', skip_unknown=True) 198 | 199 | # Run the convert. 200 | print("Converting ONNX.") 201 | convert(args.base_ckpt_path, args.residual_ckpt_path, args.generalist_ckpt_path, 202 | args.embodiment_type, args.onnx_file, args.jit_file) 203 | 204 | # Upload onnx to wandb if onnx_artifact is provided. 205 | if args.onnx_artifact: 206 | print(f'Uploading to WANDB: {args.onnx_artifact.split("/")}.') 207 | wandb_project = args.onnx_artifact.split('/')[1] 208 | wandb_run_id = args.onnx_artifact.split('/')[2].split(':')[0] 209 | wandb.init(dir='/tmp', project=wandb_project, id=wandb_run_id) 210 | version = args.onnx_artifact.split('/')[2].split(':')[1] 211 | onnx_artifact = wandb.Artifact(f'onnx-{wandb_run_id}-{version}', type='onnx') 212 | onnx_artifact.add_file(args.onnx_file) 213 | wandb.log_artifact(onnx_artifact) 214 | wandb.finish() 215 | 216 | 217 | if __name__ == '__main__': 218 | main() 219 | -------------------------------------------------------------------------------- /record.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import argparse 17 | import os 18 | import subprocess 19 | import random 20 | import shutil 21 | import yaml 22 | 23 | import wandb 24 | 25 | 26 | def run(command: str): 27 | print(f'Running: {" ".join(command)}') 28 | subprocess.run(command, check=True) 29 | 30 | 31 | def collect_data(dataset_config: dict, environments: list, base_policy_path: str, output_dir: str, 32 | wandb_project_name: str, wandb_entity_name: str): 33 | """ 34 | Collects data for each environment and embodiment combination. 35 | 36 | Args: 37 | environments (List[str]): A list of environments to collect data for. 38 | base_policy_path (str): The path to the base policy. 39 | output_dir (str): The directory to store the collected data. 40 | """ 41 | # Initialize wandb if using artifact. 42 | if dataset_config['policy_ckpt_type'] == 'wandb_artifact': 43 | wandb.init(project=wandb_project_name, entity=wandb_entity_name) 44 | 45 | # Record data for each embodiment and environment. 46 | isaaclab_path = os.getenv("ISAACLAB_PATH") 47 | for embodiment, residual_policy in dataset_config['embodiment_policies'].items(): 48 | if dataset_config['policy_ckpt_type'] == 'wandb_artifact': 49 | print(f'Collecting data for {embodiment} with {residual_policy}...') 50 | artifact = wandb.use_artifact(residual_policy, type='model') 51 | artifact_dir = artifact.download(root=f"/tmp/{embodiment}") 52 | residual_policy_path = os.path.join(artifact_dir, os.listdir(artifact_dir)[0]) 53 | print(f'Downloaded embodiment residual policy: {residual_policy_path}') 54 | elif dataset_config['policy_ckpt_type'] == 'local': 55 | residual_policy_path = residual_policy 56 | else: 57 | raise ValueError( 58 | f'Invalid policy checkpoint type: {dataset_config["policy_ckpt_type"]}') 59 | for environment in environments: 60 | print(f'Collecting environment {environment}') 61 | dataset_dir = os.path.join(output_dir, embodiment, environment) 62 | if not os.path.exists(dataset_dir): 63 | os.makedirs(dataset_dir) 64 | command = [ 65 | f"{isaaclab_path}/isaaclab.sh", "-p", 'run.py', '-c', 'configs/record_config.gin', 66 | '--enable_cameras', '-b', base_policy_path, '-p', residual_policy_path, '-o', 67 | dataset_dir, '--embodiment', embodiment, '--environment', environment, '--headless' 68 | ] 69 | run(command) 70 | 71 | # Finish wandb if using artifact. 72 | if dataset_config['policy_ckpt_type'] == 'wandb_artifact': 73 | wandb.finish() 74 | 75 | 76 | def compose_dataset(dataset_config: dict, output_dir: str, dataset_name: str): 77 | """ 78 | Compose a dataset by splitting and organizing files in the output directory. 79 | 80 | Args: 81 | output_dir (str): The directory containing the files to be split. 82 | dataset_name (str): The name of the dataset. 83 | """ 84 | # Process each embodiment separately 85 | for embodiment in os.listdir(output_dir): 86 | embodiment_path = os.path.join(output_dir, embodiment) 87 | if not os.path.isdir(embodiment_path): 88 | continue 89 | embodiment_split_files = {"train": [], "val": [], "test": []} 90 | # Process each environment 91 | for env in os.listdir(embodiment_path): 92 | env_path = os.path.join(embodiment_path, env, 'data') 93 | if not os.path.isdir(env_path): 94 | continue 95 | # Split per environment 96 | files = [os.path.join(env_path, f) for f in os.listdir(env_path)] 97 | random.shuffle(files) 98 | num_files = len(files) 99 | train_end = int(num_files * dataset_config['data_split_ratios']['train']) 100 | val_end = train_end + int(num_files * dataset_config['data_split_ratios']['val']) 101 | env_splits = { 102 | "train": files[:train_end], 103 | "val": files[train_end:val_end], 104 | "test": files[val_end:] 105 | } 106 | # Collect split files per robot (flattening env layer) 107 | for split, split_files in env_splits.items(): 108 | embodiment_split_files[split].extend(split_files) 109 | 110 | # Move files to corresponding split directories (grouped by robot) 111 | for split, split_files in embodiment_split_files.items(): 112 | split_dir = os.path.join(output_dir, dataset_name, 'data', split, embodiment) 113 | os.makedirs(split_dir, exist_ok=True) 114 | for file_path in split_files: 115 | target_file_path = os.path.join(split_dir, file_path.replace("/", "_")) 116 | shutil.move(file_path, target_file_path) 117 | 118 | 119 | def main(): 120 | # Add argparse arguments 121 | parser = argparse.ArgumentParser(description="Script to record datasets for RL distillation.") 122 | parser.add_argument('--dataset-config-file', 123 | '-c', 124 | type=str, 125 | required=True, 126 | help='The path to the dataset config yaml file.') 127 | parser.add_argument('--base_policy_path', 128 | '-b', 129 | type=str, 130 | required=True, 131 | help='The path to the base policy checkpoint artifact.') 132 | parser.add_argument('--wandb-project-name', 133 | '-n', 134 | type=str, 135 | default='afm_rl_enhance_record', 136 | help='The project name of W&B.') 137 | parser.add_argument('--wandb-entity-name', 138 | '-e', 139 | type=str, 140 | default='nvidia-isaac', 141 | help='The entity name of W&B.') 142 | parser.add_argument('--environments', 143 | nargs='+', 144 | type=str, 145 | default=['combined_single_rack'], 146 | help='A list of environments to benchmark.') 147 | parser.add_argument('--dataset-name', type=str, help='Name of the dataset in OSMO.') 148 | parser.add_argument('--output-dir', '-o', type=str, help='Output directory for the dataset.') 149 | args = parser.parse_args() 150 | with open(args.dataset_config_file, 'r', encoding='utf-8') as f: 151 | dataset_config = yaml.safe_load(f) 152 | 153 | # Record dataset. 154 | collect_data(dataset_config=dataset_config, 155 | environments=args.environments, 156 | base_policy_path=args.base_policy_path, 157 | output_dir=args.output_dir, 158 | wandb_project_name=args.wandb_project_name, 159 | wandb_entity_name=args.wandb_entity_name) 160 | compose_dataset(dataset_config=dataset_config, 161 | output_dir=args.output_dir, 162 | dataset_name=args.dataset_name) 163 | 164 | 165 | if __name__ == '__main__': 166 | # Run the main function. 167 | main() 168 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | einops 2 | gin-config 3 | h5py 4 | matplotlib 5 | moviepy 6 | numpy 7 | onnx 8 | pandas 9 | pytorch-lightning 10 | timm 11 | torcheval 12 | transformers 13 | wandb 14 | wheel 15 | -------------------------------------------------------------------------------- /x_mobility/x_mobility-0.1.0-py3-none-any.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/COMPASS/d9a1a2b4b530f848488de528eac6a17ef0d778d1/x_mobility/x_mobility-0.1.0-py3-none-any.whl --------------------------------------------------------------------------------