├── .dockerignore
├── .gitattributes
├── .gitignore
├── .pre-commit-config.yaml
├── .pylintrc
├── .style.yapf
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── compass
    ├── __init__.py
    ├── distillation
    │   ├── distillation.py
    │   ├── distillation_trainer.py
    │   └── rl_specialists_dataset.py
    ├── residual_rl
    │   ├── __init__.py
    │   ├── actor_critic.py
    │   ├── constants.py
    │   ├── critic_state_assembler.py
    │   ├── mlp.py
    │   ├── ppo.py
    │   ├── residual_ppo_trainer.py
    │   ├── rollout_storage.py
    │   ├── utils.py
    │   └── x_mobility_rl.py
    ├── rl_env
    │   ├── LICENCE
    │   ├── README.md
    │   ├── exts
    │   │   └── mobility_es
    │   │   │   ├── config
    │   │   │       └── extension.toml
    │   │   │   ├── docs
    │   │   │       └── CHANGELOG.rst
    │   │   │   ├── mobility_es
    │   │   │       ├── __init__.py
    │   │   │       ├── ckpt
    │   │   │       │   ├── digit_locomotion_policy.pt
    │   │   │       │   ├── g1_locomotion_policy.pt
    │   │   │       │   ├── h1_locomotion_policy.pt
    │   │   │       │   └── spot_locomotion_policy.pt
    │   │   │       ├── config
    │   │   │       │   ├── __init__.py
    │   │   │       │   ├── carter_env_cfg.py
    │   │   │       │   ├── digit_env_cfg.py
    │   │   │       │   ├── env_cfg.py
    │   │   │       │   ├── environments.py
    │   │   │       │   ├── g1_env_cfg.py
    │   │   │       │   ├── h1_env_cfg.py
    │   │   │       │   ├── robots.py
    │   │   │       │   ├── scene_assets.py
    │   │   │       │   └── spot_env_cfg.py
    │   │   │       ├── mdp
    │   │   │       │   ├── __init__.py
    │   │   │       │   ├── action
    │   │   │       │   │   ├── __init__.py
    │   │   │       │   │   ├── action_visualization.py
    │   │   │       │   │   ├── differential_drive_action.py
    │   │   │       │   │   ├── locomotion_policy_action.py
    │   │   │       │   │   └── non_holonomic_perfect_control_action.py
    │   │   │       │   ├── command
    │   │   │       │   │   ├── __init__.py
    │   │   │       │   │   ├── commands_cfg.py
    │   │   │       │   │   └── uniform_collision_free_pose_command.py
    │   │   │       │   ├── curriculum.py
    │   │   │       │   ├── events.py
    │   │   │       │   ├── observations.py
    │   │   │       │   ├── rewards.py
    │   │   │       │   ├── termination.py
    │   │   │       │   └── utils.py
    │   │   │       ├── utils
    │   │   │       │   ├── __init__.py
    │   │   │       │   └── occupancy_map.py
    │   │   │       └── wrapper
    │   │   │       │   └── env_wrapper.py
    │   │   │   └── setup.py
    │   ├── pyproject.toml
    │   └── scripts
    │   │   ├── __init__.py
    │   │   └── play.py
    └── utils
    │   └── logger.py
├── configs
    ├── distillation_config.gin
    ├── distillation_dataset_config_template.yaml
    ├── eval_config.gin
    ├── record_config.gin
    ├── shared.gin
    ├── train_config.gin
    └── x_mobility_config.gin
├── distillation_train.py
├── docker
    ├── Dockerfile.distillation
    └── Dockerfile.rl
├── images
    ├── compass.png
    └── omap_generation.png
├── onnx_conversion.py
├── record.py
├── requirements.txt
├── run.py
└── x_mobility
    └── x_mobility-0.1.0-py3-none-any.whl


/.dockerignore:
--------------------------------------------------------------------------------
1 | ./compass/rl_env/exts/mobility_es/mobility_es/usd
2 | ./venv
3 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.png filter=lfs diff=lfs merge=lfs -text
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | bazel*
 2 | cscope.*
 3 | *.swp
 4 | *.sublime-project
 5 | *.sublime-workspace
 6 | *.pyc
 7 | imgui.ini
 8 | .dazel_run
 9 | venv/
10 | .clangd/
11 | 
12 | # Local setups
13 | **/_isaac_sim
14 | 
15 | # Build files
16 | *build
17 | *install
18 | *log
19 | 
20 | # Protofiles
21 | *.pb*
22 | 
23 | # Data files
24 | *.pkl
25 | 
26 | #Jupter notebook
27 | *.ipynb_checkpoints/
28 | 
29 | # USD files
30 | compass/rl_env/exts/mobility_es/mobility_es/usd/
31 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto. Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | repos:
 9 |   - repo: https://github.com/kynan/nbstripout
10 |     rev: 0.5.0
11 |     hooks:
12 |       - id: nbstripout
13 |   - repo: https://github.com/pre-commit/pre-commit-hooks.git
14 |     rev: v4.0.1
15 |     hooks:
16 |       - id: check-added-large-files
17 |         args: ['--maxkb=1024']
18 |       - id: end-of-file-fixer
19 |       - id: requirements-txt-fixer
20 |       - id: trailing-whitespace
21 |   - repo: https://github.com/google/yapf
22 |     rev: v0.31.0
23 |     hooks:
24 |       - id: yapf
25 |         name: "yapf"
26 |         additional_dependencies: [toml]
27 |   - repo: https://github.com/pre-commit/mirrors-clang-format
28 |     rev: v13.0.0
29 |     hooks:
30 |       - id: clang-format
31 |   - repo: https://github.com/pylint-dev/pylint
32 |     rev: v3.0.3
33 |     hooks:
34 |       - id: pylint
35 | 


--------------------------------------------------------------------------------
/.style.yapf:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto. Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | [style]
10 | based_on_style = google
11 | spaces_before_comment = 4
12 | split_before_logical_operator = true
13 | column_limit = 100
14 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to Contribute
 2 | 
 3 | We'd love to accept your patches and contributions.
 4 | 
 5 | ## Code Reviews
 6 | 
 7 | All submissions, including submissions by project members, require review. We use GitHub pull requests for this purpose. Consult
 8 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more information on using pull requests.
 9 | 
10 | ## Signing Your Work
11 | 
12 | * We require that all contributors "sign-off" on their commits. This certifies that the contribution is your original work, or you have rights to submit it under the same license, or a compatible license.
13 | 
14 |   * Any contribution which contains commits that are not Signed-Off will not be accepted.
15 | 
16 | * To sign off on a commit you simply use the `--signoff` (or `-s`) option when committing your changes:
17 |   ```bash
18 |   $ git commit -s -m "Add cool feature."
19 |   ```
20 |   This will append the following to your commit message:
21 |   ```
22 |   Signed-off-by: Your Name <your@email.com>
23 |   ```
24 | 
25 | * Full text of the DCO:
26 | 
27 |   ```
28 |     Developer Certificate of Origin
29 |     Version 1.1
30 | 
31 |     Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
32 |     1 Letterman Drive
33 |     Suite D4700
34 |     San Francisco, CA, 94129
35 | 
36 |     Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.
37 |   ```
38 | 
39 |   ```
40 |     Developer's Certificate of Origin 1.1
41 | 
42 |     By making a contribution to this project, I certify that:
43 | 
44 |     (a) The contribution was created in whole or in part by me and I have the right to submit it under the open source license indicated in the file; or
45 | 
46 |     (b) The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate open source license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same open source license (unless I am permitted to submit under a different license), as indicated in the file; or
47 | 
48 |     (c) The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it.
49 | 
50 |     (d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open source license(s) involved.
51 |   ```
52 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <h1 align="center"> COMPASS: Cross-Embodiment Mobility Policy via Residual RL and Skill Synthesis </h1>
  2 | 
  3 | <div align="center">
  4 | 
  5 | [![Isaac Lab](https://img.shields.io/badge/IsaacLab-1.3.0-b.svg)](https://isaac-sim.github.io/IsaacLab/v1.3.0/index.html)
  6 | [![IsaacSim](https://img.shields.io/badge/IsaacSim-4.2.0-b.svg)](https://docs.isaacsim.omniverse.nvidia.com/4.2.0/index.html)
  7 | [![Linux platform](https://img.shields.io/badge/Platform-linux--64-orange.svg)](https://ubuntu.com/blog/tag/22-04-lts)
  8 | [![License: Apache 2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
  9 | 
 10 | 
 11 | [[Website]](https://nvlabs.github.io/COMPASS/)
 12 | [[arXiv]](https://arxiv.org/abs/2502.16372)
 13 | </div>
 14 | 
 15 | ## Overview
 16 | 
 17 | This repository provides the official PyTorch implementation of [COMPASS](https://nvlabs.github.io/COMPASS/).
 18 | 
 19 | <p align="center">
 20 |     <img src="images/compass.png" alt="COMPASS" width="900" >
 21 | </p>
 22 | 
 23 | 
 24 | COMPASS is a novel framework for cross-embodiment mobility that combines:
 25 | - Imitation Learning (IL) for strong baseline performance
 26 | - Residual Reinforcement Learning (RL) for embodiment-specific adaptation
 27 | - Policy distillation to create a unified, generalist policy
 28 | 
 29 | ## Table of Contents
 30 | - [Installation](#installation)
 31 |   - [1. Residual RL Environment USDs](#1-residual-rl-environment-usds)
 32 |   - [2. Isaac Lab Installation](#2-isaac-lab-installation)
 33 |   - [3. Environment Setup](#3-environment-setup)
 34 |   - [4. Dependencies](#4-dependencies)
 35 |   - [5. X-Mobility Installation](#5-x-mobility-installation)
 36 | - [Usage](#usage)
 37 |   - [Residual RL Specialists](#residual-rl-specialists)
 38 |   - [Policy Distillation](#policy-distillation)
 39 |   - [Model Export](#model-export)
 40 |   - [Add New Embodiment or Scene](#add-new-embodiment-or-scene)
 41 |   - [Logging](#logging)
 42 | - [Pre-trained Generalist Policy Example](#pre-trained-generalist-policy-example)
 43 | - [License](#license)
 44 | - [Core Contributors](#core-contributors)
 45 | - [Acknowledgments](#acknowledgments)
 46 | - [Citation](#citation)
 47 | 
 48 | ## Installation
 49 | 
 50 | ### 1. Residual RL environment USDs
 51 | * Download the residual RL environment USDs from: https://huggingface.co/nvidia/COMPASS/blob/main/compass_usds.zip
 52 | * Unzip and place the downloaded USDs in the `compass/rl_env/exts/mobility_es/mobility_es/usd` directory
 53 | 
 54 | ### 2. Isaac Lab Installation
 55 | * Install Isaac Lab and the residual RL mobility extension by following this [instruction](compass/rl_env/README.md).
 56 | 
 57 | ### 3. Environment Setup
 58 | * Create and activate a virtual environment:
 59 |   ```bash
 60 |   python3 -m venv venv
 61 |   source venv/bin/activate
 62 |   ```
 63 | 
 64 | ### 4. Dependencies
 65 | * Install the required packages:
 66 |   ```bash
 67 |   ${ISAACLAB_PATH}/isaaclab.sh -p -m pip install -r requirements.txt
 68 |   ```
 69 | 
 70 | ### 5. X-Mobility Installation
 71 | * Install the [X-Mobility](https://github.com/NVlabs/X-MOBILITY) package:
 72 |   ```bash
 73 |   ${ISAACLAB_PATH}/isaaclab.sh -p -m pip install x_mobility/x_mobility-0.1.0-py3-none-any.whl
 74 |   ```
 75 | * Download the pre-trained X-Mobility checkpoint from: https://huggingface.co/nvidia/X-Mobility/blob/main/x_mobility-nav2-semantic_action_path.ckpt
 76 | 
 77 | 
 78 | ## Usage
 79 | 
 80 | ### Residual RL Specialists
 81 | 
 82 | * Train with the default configurations in `configs/train_config.gin`:
 83 |   ```bash
 84 |   ${ISAACLAB_PATH}/isaaclab.sh -p run.py \
 85 |       -c configs/train_config.gin \
 86 |       -o <output_dir> \
 87 |       -b <path/to/x_mobility_ckpt> \
 88 |       --enable_camera
 89 |   ```
 90 | 
 91 | * Evaluate trained model:
 92 |   ```bash
 93 |   ${ISAACLAB_PATH}/isaaclab.sh -p run.py \
 94 |       -c configs/eval_config.gin \
 95 |       -o <output_dir> \
 96 |       -b <path/to/x_mobility_ckpt> \
 97 |       -p <path/to/residual_policy_ckpt> \
 98 |       --enable_camera \
 99 |       --video \
100 |       --video_interval <video_interval>
101 |   ```
102 | 
103 | Add additional argument `--headless` to run RL training/evaluation in headless mode.
104 | 
105 | > **NOTE**: The GPU memory usage is proportional to the number of environments in residual RL training. For example, 32 environments will use around 30GB memory, so reduce the number of environments if you have limited GPU memory.
106 | 
107 | ### Policy Distillation
108 | 
109 | * Collect specialist data:
110 |   * Update specialists policy checkpoint paths in [dataset_config_template](configs/distillation_dataset_config_template.yaml)
111 |   * Run data collection:
112 |     ```bash
113 |     ${ISAACLAB_PATH}/isaaclab.sh -p record.py \
114 |         -c configs/distillation_dataset_config_template.yaml \
115 |         -o <output_dir> \
116 |         -b <path/to/x_mobility_ckpt> \
117 |         --dataset-name <dataset_name>
118 |     ```
119 | 
120 | * Train generalist policy:
121 |   ```bash
122 |   python3 distillation_train.py \
123 |       --config-files configs/distillation_config.gin \
124 |       --dataset-path <path/to/specialists_dataset> \
125 |       --output-dir <output_dir>
126 |   ```
127 | 
128 | * Evaluate generalist policy:
129 |   ```bash
130 |   ${ISAACLAB_PATH}/isaaclab.sh -p run.py \
131 |       -c configs/eval_config.gin \
132 |       -o <output_dir> \
133 |       -b <path/to/x_mobility_ckpt> \
134 |       -d <path/to/generalist_policy_ckpt> \
135 |       --enable_camera \
136 |       --video \
137 |       --video_interval <video_interval>
138 |   ```
139 | 
140 | ### Model Export
141 | 
142 | * Export RL specialist policy to ONNX or JIT formats:
143 |   ```bash
144 |   python3 onnx_conversion.py \
145 |       -b <path/to/x_mobility_ckpt> \
146 |       -r <path/to/residual_policy_ckpt> \
147 |       -o <path/to/output_onnx_file> \
148 |       -j <path/to/output_jit_file>
149 |   ```
150 | 
151 | * Export generalist policy to ONNX or JIT formats:
152 |   ```bash
153 |   python3 onnx_conversion.py \
154 |       -b <path/to/x_mobility_ckpt> \
155 |       -g <path/to/generalist_policy_ckpt> \
156 |       -e <embodiment_type> \
157 |       -o <path/to/output_onnx_file> \
158 |       -j <path/to/output_jit_file>
159 |   ```
160 | 
161 | ### Add New Embodiment or Scene
162 | 
163 | * Follow this [instruction](compass/rl_env/README.md) to add a new embodiment or scene to the Isaac Lab RL environment.
164 | * Register the new embodiment or scene to the `EmbodimentEnvCfgMap` and `EnvSceneAssetCfgMap` in [run.py](run.py), then update the configs or use command line arguments to select the new embodiment or scene.
165 | 
166 | 
167 | ### Logging:
168 | 
169 | The training and evaluation scripts use TensorBoard for logging by default. Weights & Biases (W&B) logging is also supported for more advanced experiment tracking features.
170 | 
171 | **To use TensorBoard (default):**
172 | - Logs will be saved to `<output_dir>/tensorboard/`
173 | - View logs with: `tensorboard --logdir=<output_dir>/tensorboard/`
174 | 
175 | **To use Weights & Biases:**
176 | 1. Install and set up W&B: `pip install wandb` and follow the [setup instructions](https://docs.wandb.ai/quickstart)
177 | 2. Log in to your W&B account: `wandb login`
178 | 3. Add the `--logger wandb` flag to your command:
179 |    ```bash
180 |    ${ISAACLAB_PATH}/isaaclab.sh -p run.py \
181 |        -c configs/train_config.gin \
182 |        -o <output_dir\
183 |        -b <path/to/x_mobility_ckpt> \
184 |        --enable_camera \
185 |        --logger wandb \
186 |        --wandb-run-name "experiment_name" \
187 |        --wandb-project-name "project_name" \
188 |        --wandb-entity-name "your_username_or_team"
189 |    ```
190 | 
191 | ## Pre-trained Generalist Policy Example
192 | 
193 | We provide a pre-trained generalist policy that works across four robot embodiments:
194 | * **Carter** (wheeled robot)
195 | * **H1** (humanoid)
196 | * **G1** (humanoid)
197 | * **Spot** (quadruped)
198 | 
199 | To try out the pre-trained generalist policy:
200 | 1. Download the checkpoint from: https://huggingface.co/nvidia/COMPASS/blob/main/compass_generalist.ckpt
201 | 2. Use the evaluation command shown above with your downloaded checkpoint:
202 |    ```bash
203 |    ${ISAACLAB_PATH}/isaaclab.sh -p run.py \
204 |        -c configs/eval_config.gin \
205 |        -o <output_dir> \
206 |        -b <path/to/x_mobility_ckpt> \
207 |        -d <path/to/downloaded_generalist_policy_ckpt> \
208 |        --enable_camera \
209 |        --embodiment <embodiment_name> \
210 |        --environment <environment_name>
211 |    ```
212 | 
213 | > **NOTE**: The generalist policy uses one-hot embodiment encoding and may not generalize perfectly to unseen embodiment types. For best results with new embodiment types, we recommend fine-tuning with residual RL first.
214 | 
215 | 
216 | ## License
217 | COMPASS is released under the Apache License 2.0. See [LICENSE](LICENSE) for additional details.
218 | 
219 | ## Core Contributors
220 | Wei Liu, Huihua Zhao, Chenran Li, Joydeep Biswas, Soha Pouya, Yan Chang
221 | 
222 | 
223 | ## Acknowledgments
224 | We would like to acknowledge the following projects where parts of the codes in this repo is derived from:
225 | - [RSL_RL](https://github.com/leggedrobotics/rsl_rl/tree/main)
226 | - [Isaac Lab](https://github.com/isaac-sim/IsaacLab)
227 | 
228 | ## Citation
229 | If you find this work useful in your research, please consider citing:
230 | ```bibtex
231 | @article{liu2025compass,
232 |   title={COMPASS: Cross-embodiment Mobility Policy via Residual RL and Skill Synthesis},
233 |   author={Liu, Wei and Zhao, Huihua and Li, Chenran and Biswas, Joydeep and Pouya, Soha and Chang, Yan},
234 |   journal={arXiv preprint arXiv:2502.16372},
235 |   year={2025}
236 | }
237 | 


--------------------------------------------------------------------------------
/compass/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/compass/distillation/distillation.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: Apache-2.0
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | # http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | import gin
 17 | import torch
 18 | import torch.nn.functional as F
 19 | from torch import nn
 20 | 
 21 | from model.x_mobility.utils import pack_sequence_dim
 22 | 
 23 | from compass.residual_rl.mlp import build_mlp_network, get_activation
 24 | 
 25 | ACTION_MASK = (0, 1, 5)
 26 | 
 27 | 
 28 | @gin.configurable
 29 | class EmbodimentOneHotEncoder(nn.Module):
 30 |     '''Embodiment one-hot encoding.'''
 31 | 
 32 |     def __init__(self,
 33 |                  num_dims: int = 8,
 34 |                  supported_types: list[str] = ['carter', 'h1', 'spot', 'g1']):
 35 |         super().__init__()
 36 |         self.num_dims = num_dims
 37 |         self.supported_types = supported_types
 38 |         assert self.num_dims >= len(self.supported_types), \
 39 |             "Number of dimensions less than the number of supported types"
 40 | 
 41 |     def forward(self, embodiment_type: str):
 42 |         if embodiment_type not in self.supported_types:
 43 |             raise ValueError(f"Embodiment type '{embodiment_type}' not supported. "
 44 |                              f"Must be one of {self.supported_types}")
 45 | 
 46 |         # Create one-hot encoding
 47 |         index = self.supported_types.index(embodiment_type)
 48 |         encoding = torch.zeros(self.num_dims)
 49 |         encoding[index] = 1.0
 50 |         return encoding
 51 | 
 52 | 
 53 | @gin.configurable
 54 | class MLPActionPolicy(nn.Module):
 55 |     '''MLP action policy.'''
 56 | 
 57 |     def __init__(self, in_channels: int, command_n_channels: int):
 58 |         super().__init__()
 59 |         self.command_fc = build_mlp_network(input_dim=in_channels,
 60 |                                             hidden_dims=[1024, 512, 256, 128],
 61 |                                             output_dim=command_n_channels,
 62 |                                             activation_fn=get_activation('relu', False))
 63 | 
 64 |     def forward(self, x):
 65 |         return {'mean': self.command_fc(x)}
 66 | 
 67 | 
 68 | @gin.configurable
 69 | class MLPActionPolicyDistribution(MLPActionPolicy):
 70 | 
 71 |     def __init__(self, in_channels, command_n_channels, init_noise_std: float = 0.1):
 72 |         super().__init__(in_channels, command_n_channels)
 73 | 
 74 |         # Global std variance for each action dim (trainable)
 75 |         self.std = nn.Parameter(init_noise_std * torch.ones(command_n_channels))
 76 | 
 77 |     def forward(self, x):
 78 |         return {'mean': self.command_fc(x), 'std': self.std}
 79 | 
 80 | 
 81 | @gin.configurable
 82 | class ESDistillationKLLoss(nn.Module):
 83 |     """
 84 |     Compute KL(teacher || student) for each sample.
 85 |     Returns average KL across the batch.
 86 |     Ref: https://stats.stackexchange.com/questions/7440/kl-divergence-between-two-univariate-gaussians # pylint: disable=line-too-long
 87 |     """
 88 | 
 89 |     def forward(self, output, batch):
 90 |         losses = {}
 91 |         mu_teacher = pack_sequence_dim(batch['action'])[:, ACTION_MASK]
 92 |         std_teacher = pack_sequence_dim(batch['action_sigma'])[:, ACTION_MASK]
 93 |         mu_student = output['mean'][:, ACTION_MASK]
 94 |         std_student = output['std'].repeat(std_teacher.shape[0], 1)[:, ACTION_MASK]
 95 | 
 96 |         # Convert std to log_std
 97 |         log_std_teacher = torch.log(std_teacher)
 98 |         log_std_student = torch.log(std_student)
 99 | 
100 |         # Term1: log(\sigma_S / \sigma_T)
101 |         term1 = log_std_student - log_std_teacher
102 | 
103 |         # Term2: (sigma_T^2 + (mu_T - mu_S)^2) / (2 sigma_S^2)
104 |         numerator = std_teacher.pow(2) + (mu_teacher - mu_student).pow(2)
105 |         denominator = 2.0 * std_student.pow(2)
106 |         term2 = numerator / denominator
107 | 
108 |         # KL = term1 + term2 - 0.5
109 |         kl = term1 + term2 - 0.5
110 |         # Average across actions, then across batch
111 |         losses['action_kl'] = kl.mean()
112 |         return losses
113 | 
114 | 
115 | @gin.configurable
116 | class ESDistillationMSELoss(nn.Module):
117 |     '''Loss function for action policy distillation.'''
118 | 
119 |     def __init__(self):
120 |         super().__init__()
121 |         self.loss_fn = F.mse_loss
122 | 
123 |     def forward(self, output, batch):
124 |         losses = {}
125 |         mse = F.mse_loss(output['mean'][:, ACTION_MASK],
126 |                          pack_sequence_dim(batch['action'])[:, ACTION_MASK],
127 |                          reduction='none')
128 |         losses['action'] = torch.sum(mse, dim=-1, keepdims=True).mean()
129 |         return losses
130 | 
131 | 
132 | @gin.configurable
133 | class ESDistillationPolicy(nn.Module):
134 |     '''Embodiment specialist distillation policy.'''
135 | 
136 |     def __init__(self, policy_model: nn.Module, embodiment_encoder: nn.Module,
137 |                  policy_state_dim: int, command_n_channels: int):
138 |         super().__init__()
139 |         self.embodiment_encoder = embodiment_encoder()
140 |         self.policy_model = policy_model(in_channels=policy_state_dim +
141 |                                          self.embodiment_encoder.num_dims,
142 |                                          command_n_channels=command_n_channels)
143 | 
144 |     def forward(self, batch):
145 |         policy_state = batch['policy_state']
146 |         embodiment_encodings = [
147 |             self.embodiment_encoder(embodiment).repeat(policy_state.shape[1], 1)
148 |             for embodiment in batch['embodiment']
149 |         ]
150 |         embodiment_encodings = torch.stack(embodiment_encodings, dim=0).to(policy_state.device)
151 |         x = pack_sequence_dim(torch.cat([policy_state, embodiment_encodings], dim=-1))
152 |         return self.policy_model(x)
153 | 
154 | 
155 | class ESDistillationPolicyWrapper(nn.Module):
156 |     '''Wrapper of the distilled policy to enable inference.'''
157 | 
158 |     def __init__(self, distillation_policy_ckpt_path: str, embodiment_type: str):
159 |         super().__init__()
160 |         # Load the checkpoint and remove the prefix if any.
161 |         state_dict = torch.load(distillation_policy_ckpt_path, weights_only=False)['state_dict']
162 |         state_dict = {k.removeprefix('model.'): v for k, v in state_dict.items()}
163 |         # Load the state dict.
164 |         self.model = ESDistillationPolicy()
165 |         self.model.load_state_dict(state_dict, strict=True)
166 | 
167 |         # Embodiment type.
168 |         self.embodiment_type = embodiment_type
169 | 
170 |     def forward(self, policy_state):
171 |         batch = {
172 |             'policy_state': policy_state.unsqueeze(1),
173 |             'embodiment': [self.embodiment_type] * policy_state.shape[0]
174 |         }
175 |         return self.model(batch)['mean']
176 | 


--------------------------------------------------------------------------------
/compass/distillation/distillation_trainer.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import gin
17 | import pytorch_lightning as pl
18 | import torch
19 | 
20 | # pylint: disable=unused-import
21 | from compass.distillation.distillation import (MLPActionPolicy, MLPActionPolicyDistribution,
22 |                                                ESDistillationKLLoss, ESDistillationMSELoss)
23 | 
24 | 
25 | @gin.configurable
26 | class ESDistillationTrainer(pl.LightningModule):
27 |     '''Pytorch Lightnining module of ES distillation for training.
28 |     '''
29 | 
30 |     def __init__(self, model, loss, lr):
31 |         super().__init__()
32 |         self.lr = lr
33 |         # Save hyperparameters.
34 |         self.save_hyperparameters()
35 | 
36 |         # Model
37 |         self.model = model()
38 | 
39 |         # Losses
40 |         self.loss = loss()
41 | 
42 |     def forward(self, batch):
43 |         return self.model(batch)
44 | 
45 |     def shared_step(self, batch):
46 |         output = self.forward(batch)
47 |         losses = self.loss(output, batch)
48 |         return losses, output
49 | 
50 |     def training_step(self, batch, batch_idx):
51 |         losses, output = self.shared_step(batch)
52 |         self.log_and_visualize(batch, output, losses, batch_idx, prefix='train')
53 |         return self.loss_reducing(losses)
54 | 
55 |     def validation_step(self, batch, batch_idx):
56 |         losses, output = self.shared_step(batch)
57 |         self.log_and_visualize(batch, output, losses, batch_idx, prefix='validation')
58 |         val_loss = self.loss_reducing(losses)
59 |         return {'val_loss': val_loss}
60 | 
61 |     def test_step(self, batch, batch_idx):
62 |         losses, output = self.shared_step(batch)
63 |         self.log_and_visualize(batch, output, losses, batch_idx, prefix='test')
64 | 
65 |     def log_and_visualize(self, batch, output, losses, batch_idx, prefix='train'):    #pylint: disable=unused-argument
66 |         # Log losses
67 |         for key, value in losses.items():
68 |             self.log(f'{prefix}/losses/{key}', value, sync_dist=True)
69 | 
70 |         # Log total loss and videos for validation.
71 |         if prefix == 'validation':
72 |             # Log total validation loss.
73 |             self.log('val_loss', self.loss_reducing(losses), sync_dist=True, on_epoch=True)
74 | 
75 |     def loss_reducing(self, loss: torch.Tensor):
76 |         total_loss = sum([x for x in loss.values()])
77 |         return total_loss
78 | 
79 |     def configure_optimizers(self):
80 |         # Optimizer
81 |         optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.lr)
82 |         # scheduler
83 |         lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=25, gamma=0.5)
84 |         return [optimizer], [{'scheduler': lr_scheduler, 'interval': 'epoch'}]
85 | 


--------------------------------------------------------------------------------
/compass/distillation/rl_specialists_dataset.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: Apache-2.0
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | # http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | import bisect
 17 | import os
 18 | 
 19 | import h5py
 20 | import gin
 21 | import pytorch_lightning as pl
 22 | import torch
 23 | from tqdm import tqdm
 24 | from torch.utils.data import DataLoader, Dataset
 25 | from torch.utils.data.distributed import DistributedSampler
 26 | 
 27 | 
 28 | @gin.configurable
 29 | class RLSpecialistDataModule(pl.LightningDataModule):
 30 |     '''Datamodule with dataset collected from Isaac Sim.
 31 |     '''
 32 | 
 33 |     def __init__(self, dataset_path: str, batch_size: int, sequence_length: int, num_workers: int):
 34 |         super().__init__()
 35 |         self.batch_size = batch_size
 36 |         self.sequence_length = sequence_length
 37 |         self.num_workers = num_workers
 38 |         self.dataset_path = dataset_path
 39 |         self.train_dataset = None
 40 |         self.val_dataset = None
 41 | 
 42 |     def setup(self, stage=None):
 43 |         if stage == 'fit':
 44 |             self.train_dataset = RLSpecialistDataset(os.path.join(self.dataset_path, 'train'),
 45 |                                                      self.sequence_length)
 46 |             self.val_dataset = RLSpecialistDataset(os.path.join(self.dataset_path, 'val'),
 47 |                                                    self.sequence_length)
 48 |         if stage == 'test' or stage is None:
 49 |             self.test_dataset = RLSpecialistDataset(os.path.join(self.dataset_path, 'test'),
 50 |                                                     self.sequence_length)
 51 | 
 52 |     def train_dataloader(self):
 53 |         train_sampler = DistributedSampler(self.train_dataset, shuffle=True)
 54 |         return DataLoader(self.train_dataset,
 55 |                           batch_size=self.batch_size,
 56 |                           num_workers=self.num_workers,
 57 |                           pin_memory=True,
 58 |                           drop_last=True,
 59 |                           sampler=train_sampler)
 60 | 
 61 |     def val_dataloader(self):
 62 |         val_sampler = DistributedSampler(self.val_dataset, shuffle=False)
 63 |         return DataLoader(self.val_dataset,
 64 |                           batch_size=self.batch_size,
 65 |                           num_workers=self.num_workers,
 66 |                           pin_memory=True,
 67 |                           drop_last=True,
 68 |                           sampler=val_sampler)
 69 | 
 70 |     def test_dataloader(self):
 71 |         test_sampler = DistributedSampler(self.test_dataset, shuffle=False)
 72 |         return DataLoader(self.test_dataset,
 73 |                           batch_size=self.batch_size,
 74 |                           num_workers=self.num_workers,
 75 |                           pin_memory=True,
 76 |                           drop_last=True,
 77 |                           sampler=test_sampler)
 78 | 
 79 |     def load_test_data(self):
 80 |         """ Load test data. This function is used in local Jupyter testing environment only.
 81 |         """
 82 |         test_dataset = RLSpecialistDataset(os.path.join(self.dataset_path, 'test'),
 83 |                                            self.sequence_length)
 84 |         return DataLoader(
 85 |             test_dataset,
 86 |             batch_size=16,
 87 |             num_workers=self.num_workers,
 88 |             shuffle=False,
 89 |             pin_memory=True,
 90 |             drop_last=True,
 91 |         )
 92 | 
 93 | 
 94 | class RLSpecialistDataset(Dataset):
 95 |     '''Dataset from Isaac sim.
 96 |     '''
 97 | 
 98 |     def __init__(self, dataset_path: str, sequence_length: int):
 99 |         super().__init__()
100 |         self.sequence_length = sequence_length
101 |         self.rl_batch_size = 0
102 |         self.rl_num_steps = 0
103 |         self.hdfs = []
104 |         self.accumulated_sample_sizes = []
105 |         self.num_samples = 0
106 | 
107 |         # Iterate each embodiment in the dataset.
108 |         for embodiment in os.listdir(dataset_path):
109 |             embodiment_path = os.path.join(dataset_path, embodiment)
110 |             # Iterate the sorted runs for the given scenario.
111 |             dataset_files = [
112 |                 run_file for run_file in os.listdir(embodiment_path) if run_file.endswith('h5')
113 |             ]
114 |             dataset_files = sorted(dataset_files)
115 |             with tqdm(total=len(dataset_files),
116 |                       desc=f"Loading data from {embodiment_path}",
117 |                       unit="file") as pbar:
118 |                 for dataset_file in dataset_files:
119 |                     hdf = h5py.File(os.path.join(embodiment_path, dataset_file), 'r')
120 |                     self.hdfs.append(hdf)
121 |                     self.rl_batch_size = hdf['image'].shape[0]
122 |                     self.rl_num_steps = hdf['image'].shape[1]
123 |                     self.accumulated_sample_sizes.append(self.num_samples)
124 |                     self.num_samples += (self.rl_num_steps //
125 |                                          self.sequence_length) * self.rl_batch_size
126 |                     pbar.update(1)
127 | 
128 |     def __len__(self):
129 |         return self.num_samples
130 | 
131 |     def __getitem__(self, index):
132 |         # Get the cooresponding hdf.
133 |         hdf_idx = bisect.bisect_left(self.accumulated_sample_sizes, index + 1) - 1
134 |         hdf_sample_idx = index - self.accumulated_sample_sizes[hdf_idx]
135 |         return self._get_elements_batch(self.hdfs[hdf_idx], hdf_sample_idx)
136 | 
137 |     def _get_elements_batch(self, hdf, index):
138 |         samples_per_batch = self.rl_num_steps // self.sequence_length
139 |         batch_idx = index // samples_per_batch
140 |         step_idx = (index % samples_per_batch) * self.sequence_length
141 |         elements_batch = {}
142 |         elements_batch['image'] = torch.tensor(
143 |             hdf['image'][batch_idx, step_idx:step_idx + self.sequence_length, :]).permute(
144 |                 0, 3, 1, 2) / 255.0
145 |         elements_batch['route'] = torch.tensor(hdf['route'][batch_idx, step_idx:step_idx +
146 |                                                             self.sequence_length, :])
147 |         elements_batch['speed'] = torch.tensor(hdf['speed'][batch_idx, step_idx:step_idx +
148 |                                                             self.sequence_length, :])
149 |         elements_batch['action'] = torch.tensor(hdf['action'][batch_idx, step_idx:step_idx +
150 |                                                               self.sequence_length, :])
151 |         elements_batch['policy_state'] = torch.tensor(
152 |             hdf['policy_state'][batch_idx, step_idx:step_idx + self.sequence_length, :])
153 | 
154 |         # Get embodiment information.
155 |         elements_batch['embodiment'] = hdf.attrs['embodiment']
156 |         elements_batch['action_sigma'] = torch.tensor(hdf['action_sigma']).repeat(
157 |             self.sequence_length, 1)
158 |         return elements_batch
159 | 


--------------------------------------------------------------------------------
/compass/residual_rl/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/compass/residual_rl/actor_critic.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: Apache-2.0
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | # http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | import copy
 17 | 
 18 | import gin
 19 | import torch
 20 | from torch import nn
 21 | from torch.distributions import Normal
 22 | 
 23 | from compass.residual_rl.mlp import build_mlp_network, get_activation
 24 | 
 25 | 
 26 | @gin.configurable
 27 | class ActorCriticBase(nn.Module):
 28 |     """Base class of the ActorCritic.
 29 | 
 30 |     Note: The inherited class needs to provide the definitions of the actor and critic network.
 31 | 
 32 |     """
 33 | 
 34 |     def __init__(self, action_dim: int, init_noise_std: float = 1.0):
 35 | 
 36 |         super().__init__()
 37 | 
 38 |         self.actor = None
 39 |         self.critic = None
 40 | 
 41 |         # Action noise
 42 |         self.std = nn.Parameter(init_noise_std * torch.ones(action_dim))
 43 |         self.distribution = None
 44 |         # disable args validation for speedup
 45 |         Normal.set_default_validate_args = False
 46 | 
 47 |     def reset(self, dones=None):
 48 |         pass
 49 | 
 50 |     @property
 51 |     def action_mean(self):
 52 |         return self.distribution.mean
 53 | 
 54 |     @property
 55 |     def action_std(self):
 56 |         return self.distribution.stddev
 57 | 
 58 |     @property
 59 |     def entropy(self):
 60 |         return self.distribution.entropy().sum(dim=-1)
 61 | 
 62 |     def update_distribution(self, policy_state):
 63 |         mean = self.act_inference(policy_state)
 64 |         self.distribution = Normal(mean, mean * 0. + self.std)
 65 | 
 66 |     def act(self, policy_state):
 67 |         self.update_distribution(policy_state)
 68 |         action = self.distribution.sample()
 69 |         return action
 70 | 
 71 |     def act_inference(self, policy_state):
 72 |         if self.actor is None:
 73 |             raise ValueError("User needs to provide the definition of actor network")
 74 |         # pylint: disable=not-callable
 75 |         return self.actor(policy_state)
 76 | 
 77 |     def get_actions_log_prob(self, actions):
 78 |         return self.distribution.log_prob(actions).sum(dim=-1)
 79 | 
 80 |     def evaluate(self, critic_state):
 81 |         if self.critic is None:
 82 |             raise ValueError("User needs to provide the definition of critic network")
 83 |         # Compute the value.
 84 |         # pylint: disable=not-callable
 85 |         value = self.critic(critic_state)
 86 |         return value
 87 | 
 88 | 
 89 | @gin.configurable
 90 | class ActorCriticXMobility(ActorCriticBase):
 91 |     """ActorCritic for residual policy to enhance X-Mobility.
 92 | 
 93 |     The actor network is initialized with the X-Mobility's policy head with the last linear layer
 94 |     reset to zero.
 95 |     """
 96 | 
 97 |     def __init__(self,
 98 |                  base_policy,
 99 |                  action_dim,
100 |                  critic_state_dim,
101 |                  critic_hidden_dims=256,
102 |                  init_noise_std=1.0):
103 |         super().__init__(action_dim=action_dim, init_noise_std=init_noise_std)
104 |         # Overwrite the actor with the base policy
105 |         # Load the base_policy and reset the last linear layer to 0.0
106 |         self.actor = copy.deepcopy(base_policy.policy_mlp.command_fc)
107 |         nn.init.zeros_(self.actor[-2].weight)    # Set weights to zero
108 |         if self.actor[-2].bias is not None:
109 |             nn.init.zeros_(self.actor[-2].bias)
110 |         # Enable the gradients.
111 |         for param in self.actor.parameters():
112 |             param.requires_grad = True
113 | 
114 |         # Define critic
115 |         activation = get_activation('relu', inplace=True)
116 |         self.critic = build_mlp_network(
117 |             input_dim=critic_state_dim,
118 |             hidden_dims=[critic_hidden_dims, critic_hidden_dims, critic_hidden_dims // 2],
119 |             output_dim=1,
120 |             activation_fn=activation)
121 | 
122 | 
123 | @gin.configurable
124 | class ActorCriticMLP(ActorCriticBase):
125 |     """ ActorCritic with both actor and critic built as MLP modules.
126 |     """
127 | 
128 |     def __init__(self,
129 |                  actor_state_dim,
130 |                  critic_state_dim,
131 |                  actor_hidden_dims,
132 |                  critic_hidden_dims,
133 |                  action_dim,
134 |                  init_noise_std=1.0):
135 |         super().__init__(action_dim, init_noise_std)
136 | 
137 |         activation = get_activation('relu', inplace=True)
138 |         self.actor = build_mlp_network(input_dim=actor_state_dim,
139 |                                        hidden_dims=actor_hidden_dims,
140 |                                        output_dim=action_dim,
141 |                                        activation_fn=activation)
142 | 
143 |         self.critic = build_mlp_network(input_dim=critic_state_dim,
144 |                                         hidden_dims=critic_hidden_dims,
145 |                                         output_dim=1,
146 |                                         activation_fn=activation)
147 | 


--------------------------------------------------------------------------------
/compass/residual_rl/constants.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | INPUT_IMAGE_SIZE = (320, 512)
17 | 


--------------------------------------------------------------------------------
/compass/residual_rl/critic_state_assembler.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: Apache-2.0
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | # http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | from typing import Dict
 17 | 
 18 | import gin
 19 | import torch
 20 | from torch import nn
 21 | from torchvision import models
 22 | 
 23 | from compass.residual_rl.constants import INPUT_IMAGE_SIZE
 24 | from compass.residual_rl.utils import preprocess_depth_images
 25 | 
 26 | 
 27 | @gin.configurable
 28 | class DepthImageFeatureExtractor(nn.Module):
 29 |     """
 30 |     Feature extractor for depth images using pretrained ResNet.
 31 |     Assumes input images are three channels (via replication for example)
 32 |     and preprocessed to 224x224.
 33 | 
 34 |     Note: while the input_size can be different, we currently restrict it to
 35 |      this fixed size only as the ResNet is trained with this size. It is a
 36 |      common practice and widely used.
 37 |     """
 38 | 
 39 |     def __init__(self):
 40 |         super().__init__()
 41 |         # Initialize a pre-trained ResNet
 42 |         resnet = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
 43 | 
 44 |         self.input_size = [3, 224, 224]
 45 | 
 46 |         # Use the model's layers up to a certain point for feature extraction
 47 |         # Remove the fully connected layers, retain convolutional layers
 48 |         modules = list(resnet.children())[:-2]
 49 |         self.feature_extractor = nn.Sequential(*modules)
 50 | 
 51 |         # The output shape of the feature extractor is 512 x 7 x 7, we choose
 52 |         # output_size of the average pool as 1 to reduce the feature dimension.
 53 |         output_size = (1, 1)
 54 |         self.global_avg_pool = nn.AdaptiveAvgPool2d((output_size))
 55 | 
 56 |         # Freeze all parameters
 57 |         for param in self.feature_extractor.parameters():
 58 |             param.requires_grad = False
 59 | 
 60 |         self.out_channels = modules[-1][-1].conv2.out_channels * output_size[0] * output_size[1]
 61 | 
 62 |     def forward(self, x):
 63 |         # Forward pass through feature extractor
 64 |         features = self.feature_extractor(x)
 65 | 
 66 |         # Apply global average pooling
 67 |         pooled_features = self.global_avg_pool(features)
 68 | 
 69 |         pooled_features = torch.flatten(pooled_features, start_dim=1)
 70 |         if pooled_features.numel() == 0:
 71 |             raise ValueError("Feature tensor is empty. Check input size and network layers.")
 72 |         return pooled_features
 73 | 
 74 | 
 75 | @gin.configurable
 76 | class CriticObservationEncoder(nn.Module):
 77 |     '''Encoder of the input observations
 78 |         Args:
 79 |             depth_encoder: encoder of the depth image
 80 |             route_feat_dim: feature dimension of the route encoder
 81 |             speed_feat_dim: feature dimension of the speed encoder
 82 | 
 83 |         Inputs:
 84 |             batch (Dict): dict of the input tensors:
 85 |                 depth_image: (b, 1, h, w)
 86 |                 speed_feat: (b, out_channels), encoded speed feature
 87 |                 route_feat: (b, out_channels), encoded route feature
 88 | 
 89 |         Returns:
 90 |             embedding: A dict of 1D embedding of the observations
 91 |     '''
 92 | 
 93 |     def __init__(self, depth_encoder: nn.Module, route_feat_dim: int, speed_feat_dim: int):
 94 |         super().__init__()
 95 | 
 96 |         # Depth Image
 97 |         self.depth_encoder = depth_encoder()
 98 |         features_channels = self.depth_encoder.out_channels
 99 | 
100 |         # Add dims from pretrained route encoder and speed encoder
101 |         features_channels += route_feat_dim
102 |         features_channels += speed_feat_dim
103 | 
104 |         self.embedding_dim = features_channels
105 | 
106 |     def forward(self, batch: Dict) -> torch.Tensor:
107 |         # Only support one step for now.
108 |         depth_image = batch['depth_image']
109 | 
110 |         # Image encoding
111 |         depth_feat = self.depth_encoder(depth_image)
112 | 
113 |         speed_feat = batch["speed_feat"]
114 |         route_feat = batch["route_feat"]
115 | 
116 |         # Final observation embedding.
117 |         embedding = torch.cat([depth_feat, speed_feat, route_feat], dim=1)
118 | 
119 |         # Check dimension matches
120 |         assert embedding.shape[1] == self.embedding_dim
121 | 
122 |         return embedding
123 | 
124 | 
125 | @gin.configurable
126 | class CriticStateAssemblerBase:
127 |     """Abstract base class to assemble the critic state.
128 | 
129 |     The goal is to provide a universal interface type for different critic setups.
130 |     """
131 | 
132 |     # pylint: disable=unused-argument
133 |     def __init__(self, env=None, encoder=None, policy_state_dim=None):
134 |         self._state_dim = -1
135 | 
136 |     @property
137 |     def state_dim(self) -> int:
138 |         return self._state_dim
139 | 
140 |     def __str__(self) -> str:
141 |         raise NotImplementedError
142 | 
143 |     def compute_critic_state(self, policy_state=None, obs_dict=None, extras=None) -> torch.Tensor:
144 |         raise NotImplementedError
145 | 
146 | 
147 | @gin.configurable
148 | class CriticStateDepthEncoderAssembler(CriticStateAssemblerBase):
149 |     """ Use the encoded depth, speed and route feature for critic state.
150 | 
151 |     """
152 | 
153 |     def __init__(self, env=None, encoder=None, policy_state_dim=None):
154 |         super().__init__(env, encoder, policy_state_dim)
155 |         self.encoder = encoder
156 |         self._state_dim = encoder.embedding_dim
157 | 
158 |     def __str__(self) -> str:
159 |         return "CriticStateDepthEncoderAssembler"
160 | 
161 |     def compute_critic_state(self, policy_state=None, obs_dict=None, extras=None):
162 |         # Preprocess depth image
163 |         input_size = self.encoder.depth_encoder.input_size
164 |         raw_depth_image = obs_dict["privileged"]["camera_depth_img"]
165 |         b = raw_depth_image.shape[0]
166 |         depth_image_processed = raw_depth_image.reshape(b, 1, INPUT_IMAGE_SIZE[0],
167 |                                                         INPUT_IMAGE_SIZE[1]).float()
168 | 
169 |         depth_image_processed = preprocess_depth_images(depth_image_processed,
170 |                                                         target_height=input_size[1],
171 |                                                         target_width=input_size[2])
172 | 
173 |         critic_batch = {}
174 |         critic_batch.update(extras)
175 |         critic_batch['depth_image'] = depth_image_processed
176 |         return self.encoder(critic_batch)
177 | 
178 | 
179 | @gin.configurable
180 | class CriticStateSymmetricAssembler(CriticStateAssemblerBase):
181 |     """Use the policy state as the critic state for symmetric A2C.
182 | 
183 |     """
184 | 
185 |     def __init__(self, env=None, encoder=None, policy_state_dim=None):
186 |         super().__init__(env, encoder, policy_state_dim)
187 |         self._state_dim = policy_state_dim
188 | 
189 |     def __str__(self) -> str:
190 |         return "CriticStateSymmetricAssembler"
191 | 
192 |     def compute_critic_state(self, policy_state=None, obs_dict=None, extras=None):
193 |         return policy_state
194 | 


--------------------------------------------------------------------------------
/compass/residual_rl/mlp.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from torch import nn
17 | 
18 | 
19 | def build_mlp_network(input_dim, hidden_dims, output_dim, activation_fn):
20 |     """
21 |     Builds a MLP feed-forward neural network.
22 | 
23 |     Args:
24 |         input_dim (int): Dimension of the input layer.
25 |         hidden_dims (list of int): Dimensions of the hidden layers.
26 |         output_dim (int): Dimension of the output layer.
27 |         activation_fn (nn.Module): Activation function to use between layers.
28 | 
29 |     Returns:
30 |         nn.Sequential: A sequential container of layers representing the network.
31 |     """
32 |     layers = []
33 |     layers.append(nn.Linear(input_dim, hidden_dims[0]))
34 |     layers.append(activation_fn)
35 | 
36 |     for i in range(len(hidden_dims) - 1):
37 |         layers.append(nn.Linear(hidden_dims[i], hidden_dims[i + 1]))
38 |         layers.append(activation_fn)
39 | 
40 |     layers.append(nn.Linear(hidden_dims[-1], output_dim))
41 | 
42 |     return nn.Sequential(*layers)
43 | 
44 | 
45 | def get_activation(act_name, inplace: bool = True):
46 |     if act_name == "elu":
47 |         return nn.ELU(inplace)
48 |     elif act_name == "selu":
49 |         return nn.SELU(inplace)
50 |     elif act_name == "relu":
51 |         return nn.ReLU(inplace)
52 |     elif act_name == "crelu":
53 |         return nn.CReLU(inplace)
54 |     elif act_name == "lrelu":
55 |         return nn.LeakyReLU(inplace)
56 |     elif act_name == "tanh":
57 |         return nn.Tanh(inplace)
58 |     elif act_name == "sigmoid":
59 |         return nn.Sigmoid(inplace)
60 |     else:
61 |         print("invalid activation function!")
62 |         return None
63 | 


--------------------------------------------------------------------------------
/compass/residual_rl/ppo.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: Apache-2.0
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | # http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | import gin
 17 | import torch
 18 | from torch import nn, optim
 19 | 
 20 | from compass.residual_rl.rollout_storage import RolloutStorage
 21 | 
 22 | 
 23 | @gin.configurable
 24 | class PPO:
 25 |     """PPO class to train a A2C policy"""
 26 | 
 27 |     def __init__(self,
 28 |                  actor_critic,
 29 |                  device='cpu',
 30 |                  value_loss_coef=1.0,
 31 |                  use_clipped_value_loss=True,
 32 |                  clip_param=0.2,
 33 |                  entropy_coef=0.01,
 34 |                  num_learning_epochs=5,
 35 |                  num_mini_batches=4,
 36 |                  learning_rate=1.e-4,
 37 |                  schedule='adaptive',
 38 |                  gamma=0.99,
 39 |                  lam=0.95,
 40 |                  desired_kl=0.01,
 41 |                  max_grad_norm=1.0):
 42 | 
 43 |         self.device = device
 44 | 
 45 |         # PPO components
 46 |         self.actor_critic = actor_critic
 47 |         self.actor_critic.to(self.device)
 48 |         self.storage = None
 49 | 
 50 |         self.optimizer = optim.Adam(self.actor_critic.parameters(), lr=learning_rate)
 51 |         self.transition = RolloutStorage.Transition()
 52 | 
 53 |         # Parameters
 54 |         self.learning_rate = learning_rate
 55 |         self.value_loss_coef = value_loss_coef
 56 |         self.use_clipped_value_loss = use_clipped_value_loss
 57 |         self.clip_param = clip_param
 58 |         self.entropy_coef = entropy_coef
 59 |         self.num_learning_epochs = num_learning_epochs
 60 |         self.num_mini_batches = num_mini_batches
 61 |         self.schedule = schedule
 62 |         self.gamma = gamma
 63 |         self.lam = lam
 64 |         self.desired_kl = desired_kl
 65 |         self.max_grad_norm = max_grad_norm
 66 | 
 67 |     def init_storage(self, num_envs, num_transitions_per_env, policy_states_shape,
 68 |                      critic_states_shape, actions_shape):
 69 |         self.storage = RolloutStorage(num_envs, num_transitions_per_env, policy_states_shape,
 70 |                                       critic_states_shape, actions_shape, self.device)
 71 | 
 72 |     def test_mode(self):
 73 |         self.actor_critic.test()
 74 | 
 75 |     def train_mode(self):
 76 |         self.actor_critic.train()
 77 | 
 78 |     def act(self, states):
 79 |         policy_states = self._get_policy_states(states)
 80 |         critic_states = self._get_critic_states(states)
 81 | 
 82 |         # Compute the actions and values
 83 |         self.transition.actions = (self.actor_critic.act(policy_states)).detach()
 84 |         self.transition.values = self.actor_critic.evaluate(critic_states).detach()
 85 |         self.transition.actions_log_prob = self.actor_critic.get_actions_log_prob(
 86 |             self.transition.actions).detach()
 87 |         self.transition.action_mean = (self.actor_critic.action_mean).detach()
 88 |         self.transition.action_sigma = (self.actor_critic.action_std).detach()
 89 | 
 90 |         # Need to record policy state before env.step()
 91 |         self.transition.policy_states = policy_states
 92 |         self.transition.critic_states = critic_states
 93 |         return self.transition.actions
 94 | 
 95 |     def act_inference(self, states):
 96 |         policy_states = self._get_policy_states(states)
 97 |         return self.actor_critic.act_inference(policy_states)
 98 | 
 99 |     def process_env_step(self, rewards, dones):
100 |         self.transition.rewards = rewards.clone()
101 |         self.transition.dones = dones
102 | 
103 |         # Record the transition
104 |         self.storage.add_transitions(self.transition)
105 |         self.transition.clear()
106 |         self.actor_critic.reset(dones)
107 | 
108 |     def compute_returns(self, last_states):
109 |         critic_states = self._get_critic_states(last_states)
110 |         last_values = self.actor_critic.evaluate(critic_states).detach()
111 |         self.storage.compute_returns(last_values, self.gamma, self.lam)
112 | 
113 |     def update(self):
114 |         mean_value_loss = 0
115 |         mean_surrogate_loss = 0
116 |         generator = self.storage.mini_batch_generator(self.num_mini_batches,
117 |                                                       self.num_learning_epochs)
118 |         for states_batch, actions_batch, target_values_batch, advantages_batch, \
119 |                 returns_batch, old_actions_log_prob_batch, old_mu_batch, \
120 |                 old_sigma_batch, _ in generator:
121 |             policy_states_batch = self._get_policy_states(states_batch)
122 |             critic_states_batch = self._get_critic_states(states_batch)
123 |             self.actor_critic.act(policy_states_batch)
124 |             actions_log_prob_batch = self.actor_critic.get_actions_log_prob(actions_batch)
125 |             value_batch = self.actor_critic.evaluate(critic_states_batch)
126 |             mu_batch = self.actor_critic.action_mean
127 |             sigma_batch = self.actor_critic.action_std
128 |             entropy_batch = self.actor_critic.entropy
129 | 
130 |             # KL
131 |             if self.desired_kl and self.schedule == 'adaptive':
132 |                 with torch.inference_mode():
133 |                     kl = torch.sum(
134 |                         torch.log(sigma_batch / old_sigma_batch + 1.e-5) +
135 |                         (torch.square(old_sigma_batch) + torch.square(old_mu_batch - mu_batch)) /
136 |                         (2.0 * torch.square(sigma_batch)) - 0.5,
137 |                         axis=-1)
138 |                     kl_mean = torch.mean(kl)
139 | 
140 |                     if kl_mean > self.desired_kl * 2.0:
141 |                         self.learning_rate = max(1e-5, self.learning_rate / 1.5)
142 |                     elif (self.desired_kl / 2.0) > kl_mean > 0.0:
143 |                         self.learning_rate = min(1e-2, self.learning_rate * 1.5)
144 | 
145 |                     for param_group in self.optimizer.param_groups:
146 |                         param_group['lr'] = self.learning_rate
147 | 
148 |             # Surrogate loss
149 |             ratio = torch.exp(actions_log_prob_batch - torch.squeeze(old_actions_log_prob_batch))
150 |             surrogate = -torch.squeeze(advantages_batch) * ratio
151 |             surrogate_clipped = -torch.squeeze(advantages_batch) * torch.clamp(
152 |                 ratio, 1.0 - self.clip_param, 1.0 + self.clip_param)
153 |             surrogate_loss = torch.max(surrogate, surrogate_clipped).mean()
154 | 
155 |             # Value function loss
156 |             if self.use_clipped_value_loss:
157 |                 value_clipped = target_values_batch + \
158 |                                 (value_batch - target_values_batch).clamp(-self.clip_param,
159 |                                                                           self.clip_param)
160 |                 value_losses = (value_batch - returns_batch).pow(2)
161 |                 value_losses_clipped = (value_clipped - returns_batch).pow(2)
162 |                 value_loss = torch.max(value_losses, value_losses_clipped).mean()
163 |             else:
164 |                 value_loss = (returns_batch - value_batch).pow(2).mean()
165 | 
166 |             loss = surrogate_loss + self.value_loss_coef * value_loss - \
167 |                 self.entropy_coef * entropy_batch.mean()
168 | 
169 |             # Gradient step
170 |             self.optimizer.zero_grad()
171 |             loss.backward()
172 |             nn.utils.clip_grad_norm_(self.actor_critic.parameters(), self.max_grad_norm)
173 |             self.optimizer.step()
174 | 
175 |             mean_value_loss += value_loss.item()
176 |             mean_surrogate_loss += surrogate_loss.item()
177 | 
178 |         num_updates = self.num_learning_epochs * self.num_mini_batches
179 |         mean_value_loss /= num_updates
180 |         mean_surrogate_loss /= num_updates
181 |         self.storage.clear()
182 | 
183 |         return mean_value_loss, mean_surrogate_loss
184 | 
185 |     def _get_policy_states(self, states):
186 |         return states[0]
187 | 
188 |     def _get_critic_states(self, states):
189 |         return states[1]
190 | 


--------------------------------------------------------------------------------
/compass/residual_rl/rollout_storage.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: Apache-2.0
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | # http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | import torch
 17 | 
 18 | 
 19 | class RolloutStorage:
 20 |     """Storage of rollout info."""
 21 | 
 22 |     class Transition:
 23 |         """Transition info."""
 24 | 
 25 |         def __init__(self):
 26 |             self.policy_states = None
 27 |             self.critic_states = None
 28 |             self.actions = None
 29 |             self.rewards = None
 30 |             self.dones = None
 31 |             self.values = None
 32 |             self.actions_log_prob = None
 33 |             self.action_mean = None
 34 |             self.action_sigma = None
 35 | 
 36 |         def clear(self):
 37 |             self.__init__()    # pylint: disable=unnecessary-dunder-call
 38 | 
 39 |     def __init__(self,
 40 |                  num_envs,
 41 |                  num_transitions_per_env,
 42 |                  policy_states_shape,
 43 |                  critic_states_shape,
 44 |                  actions_shape,
 45 |                  device='cpu'):
 46 | 
 47 |         self.device = device
 48 | 
 49 |         self.policy_states_shape = policy_states_shape
 50 |         self.actions_shape = actions_shape
 51 | 
 52 |         # Core
 53 |         self.policy_states = torch.zeros(num_transitions_per_env,
 54 |                                          num_envs,
 55 |                                          *policy_states_shape,
 56 |                                          device=self.device)
 57 |         self.critic_states = torch.zeros(num_transitions_per_env,
 58 |                                          num_envs,
 59 |                                          *critic_states_shape,
 60 |                                          device=self.device)
 61 | 
 62 |         self.rewards = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
 63 |         self.actions = torch.zeros(num_transitions_per_env,
 64 |                                    num_envs,
 65 |                                    *actions_shape,
 66 |                                    device=self.device)
 67 |         self.dones = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device).byte()
 68 | 
 69 |         # For PPO
 70 |         self.actions_log_prob = torch.zeros(num_transitions_per_env,
 71 |                                             num_envs,
 72 |                                             1,
 73 |                                             device=self.device)
 74 |         self.values = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
 75 |         self.returns = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
 76 |         self.advantages = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
 77 |         self.mu = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device)
 78 |         self.sigma = torch.zeros(num_transitions_per_env,
 79 |                                  num_envs,
 80 |                                  *actions_shape,
 81 |                                  device=self.device)
 82 | 
 83 |         self.num_transitions_per_env = num_transitions_per_env
 84 |         self.num_envs = num_envs
 85 | 
 86 |         self.step = 0
 87 | 
 88 |     def add_transitions(self, transition: Transition):
 89 |         if self.step >= self.num_transitions_per_env:
 90 |             raise AssertionError("Rollout buffer overflow")
 91 |         self.policy_states[self.step].copy_(transition.policy_states)
 92 |         self.critic_states[self.step].copy_(transition.critic_states)
 93 |         self.actions[self.step].copy_(transition.actions)
 94 |         self.rewards[self.step].copy_(transition.rewards.view(-1, 1))
 95 |         self.dones[self.step].copy_(transition.dones.view(-1, 1))
 96 |         self.values[self.step].copy_(transition.values)
 97 |         self.actions_log_prob[self.step].copy_(transition.actions_log_prob.view(-1, 1))
 98 |         self.mu[self.step].copy_(transition.action_mean)
 99 |         self.sigma[self.step].copy_(transition.action_sigma)
100 |         self.step += 1
101 | 
102 |     def clear(self):
103 |         self.step = 0
104 | 
105 |     def compute_returns(self, last_values, gamma, lam):
106 |         advantage = 0
107 |         for step in reversed(range(self.num_transitions_per_env)):
108 |             if step == self.num_transitions_per_env - 1:
109 |                 next_values = last_values
110 |             else:
111 |                 next_values = self.values[step + 1]
112 |             next_is_not_terminal = 1.0 - self.dones[step].float()
113 |             delta = self.rewards[step] + next_is_not_terminal * gamma * next_values - self.values[
114 |                 step]
115 |             advantage = delta + next_is_not_terminal * gamma * lam * advantage
116 |             self.returns[step] = advantage + self.values[step]
117 | 
118 |         # Compute and normalize the advantages
119 |         self.advantages = self.returns - self.values
120 |         self.advantages = (self.advantages - self.advantages.mean()) / (self.advantages.std() +
121 |                                                                         1e-8)
122 | 
123 |     def get_statistics(self):
124 |         done = self.dones
125 |         done[-1] = 1
126 |         flat_dones = done.permute(1, 0, 2).reshape(-1, 1)
127 |         done_indices = torch.cat(
128 |             (flat_dones.new_tensor([-1], dtype=torch.int64), flat_dones.nonzero(as_tuple=False)[:,
129 |                                                                                                 0]))
130 |         trajectory_lengths = done_indices[1:] - done_indices[:-1]
131 |         return trajectory_lengths.float().mean(), self.rewards.mean()
132 | 
133 |     def mini_batch_generator(self, num_mini_batches, num_epochs=8):
134 |         batch_size = self.num_envs * self.num_transitions_per_env
135 |         mini_batch_size = batch_size // num_mini_batches
136 |         indices = torch.randperm(num_mini_batches * mini_batch_size,
137 |                                  requires_grad=False,
138 |                                  device=self.device)
139 | 
140 |         policy_states = self.policy_states.flatten(0, 1)
141 |         critic_states = self.critic_states.flatten(0, 1)
142 |         actions = self.actions.flatten(0, 1)
143 |         values = self.values.flatten(0, 1)
144 |         returns = self.returns.flatten(0, 1)
145 |         old_actions_log_prob = self.actions_log_prob.flatten(0, 1)
146 |         advantages = self.advantages.flatten(0, 1)
147 |         old_mu = self.mu.flatten(0, 1)
148 |         old_sigma = self.sigma.flatten(0, 1)
149 | 
150 |         for _ in range(num_epochs):
151 |             for i in range(num_mini_batches):
152 |                 start = i * mini_batch_size
153 |                 end = (i + 1) * mini_batch_size
154 |                 batch_idx = indices[start:end]
155 | 
156 |                 states_batch = [policy_states[batch_idx], critic_states[batch_idx]]
157 |                 actions_batch = actions[batch_idx]
158 |                 target_values_batch = values[batch_idx]
159 |                 returns_batch = returns[batch_idx]
160 |                 old_actions_log_prob_batch = old_actions_log_prob[batch_idx]
161 |                 advantages_batch = advantages[batch_idx]
162 |                 old_mu_batch = old_mu[batch_idx]
163 |                 old_sigma_batch = old_sigma[batch_idx]
164 |                 yield states_batch, actions_batch, target_values_batch, advantages_batch, \
165 |                     returns_batch, old_actions_log_prob_batch, old_mu_batch, old_sigma_batch, None
166 | 


--------------------------------------------------------------------------------
/compass/residual_rl/utils.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import torch
17 | import torch.nn.functional as F
18 | 
19 | 
20 | @torch.jit.script
21 | def preprocess_depth_images(batched_raw_depth_image: torch.Tensor,
22 |                             target_height: int = 224,
23 |                             target_width: int = 224):
24 |     """ Preporcess the raw depth images, including
25 |       1) replace the inf elements with the maximum depth value.
26 |       2) resize the image to the target size.
27 |       3) replicate the single channel to create a 3-channel tensor for resnet.
28 |       4) normalize the tensor using ImageNet statistics.
29 | 
30 |       It will raise ValueError if the raw image has NaN.
31 |   """
32 | 
33 |     if torch.isnan(batched_raw_depth_image).any():
34 |         raise ValueError("Input tensor contains NaN values.")
35 | 
36 |     # Preprocess for infinity values.
37 |     if torch.isinf(batched_raw_depth_image).any():
38 |         batched_raw_depth_image[torch.isinf(batched_raw_depth_image)] = -1
39 |         depth_max = batched_raw_depth_image.max()
40 |         # Sanity check to make sure we don't get negative depth value.
41 |         depth_max = torch.where(depth_max < 0, torch.tensor(1.0), depth_max)
42 |         batched_raw_depth_image[batched_raw_depth_image == -1] = depth_max
43 | 
44 |     # Resize using interpolate
45 |     resized_tensors = F.interpolate(batched_raw_depth_image,
46 |                                     size=(target_height, target_width),
47 |                                     mode='bilinear',
48 |                                     align_corners=False)
49 | 
50 |     # Normalize
51 |     # Compute minimum and maximum values
52 |     depth_min = resized_tensors.min()
53 |     depth_max = resized_tensors.max()
54 | 
55 |     # Check for the case where all elements are the same
56 |     if depth_max == depth_min:
57 |         # Return a zero tensor if no variation is present
58 |         return torch.zeros_like(resized_tensors).expand(-1, 3, -1, -1)
59 | 
60 |     # Replicate the single channel to create a 3-channel tensor for resnet encoding.
61 |     resized_tensors = resized_tensors.expand(-1, 3, -1, -1)
62 |     # Normalize each image in the batch using ImageNet statistics
63 |     mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(batched_raw_depth_image.device)
64 |     std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(batched_raw_depth_image.device)
65 | 
66 |     epsilon = 1e-6
67 |     resized_tensors = (resized_tensors - mean) / (std + epsilon)
68 | 
69 |     return resized_tensors
70 | 


--------------------------------------------------------------------------------
/compass/residual_rl/x_mobility_rl.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import torch
17 | from torch import nn
18 | 
19 | from model.x_mobility.x_mobility import XMobility
20 | from model.x_mobility.utils import pack_sequence_dim
21 | 
22 | from compass.residual_rl.constants import INPUT_IMAGE_SIZE
23 | 
24 | 
25 | class XMobilityBasePolicy(nn.Module):
26 |     '''Wrapper of the X-Mobility to enable parallel inference during RL training.'''
27 | 
28 |     def __init__(self, base_policy_ckpt_path):
29 |         super().__init__()
30 |         # Load the checkpoint and remove the prefix if any.
31 |         state_dict = torch.load(base_policy_ckpt_path, weights_only=False)['state_dict']
32 |         state_dict = {k.removeprefix('model.'): v for k, v in state_dict.items()}
33 |         # Load the state dict.
34 |         self.model = XMobility()
35 |         self.model.load_state_dict(state_dict)
36 | 
37 |     def forward(self, obs_dict, history=None, sample=None, action=None):
38 |         b = obs_dict["policy"]["camera_rgb_img"].shape[0]
39 |         batch = {}
40 |         batch['speed'] = obs_dict["policy"]["base_speed"].reshape(b, 1, -1).float()
41 |         batch['image'] = obs_dict["policy"]["camera_rgb_img"].reshape(b, 1, INPUT_IMAGE_SIZE[0],
42 |                                                                       INPUT_IMAGE_SIZE[1],
43 |                                                                       3).float()
44 |         batch['image'] = batch['image'].permute(0, 1, 4, 2, 3) / 255.0
45 |         batch['history'] = batch['speed'].new_zeros(
46 |             (b, self.model.rssm.hidden_state_dim)).float() if history is None else history
47 |         batch['sample'] = batch['speed'].new_zeros(
48 |             (b, self.model.rssm.state_dim)).float() if sample is None else sample
49 |         batch['action'] = batch['speed'].new_zeros(
50 |             (b, 6)).float() if action is None else (action.reshape(b, -1).float())
51 |         batch['route'] = obs_dict["policy"]['route'].unsqueeze(1).float()
52 | 
53 |         action, _, history, sample, _, _, _ = self.model.inference(batch, False, False, False)
54 |         action = pack_sequence_dim(action)
55 |         latent_state = torch.cat([history, sample], dim=-1).reshape(b, -1)
56 |         route_feat = self.model.action_policy.route_encoder(pack_sequence_dim(batch['route']))
57 |         speed_feat = self.model.observation_encoder.speed_encoder(pack_sequence_dim(batch['speed']))
58 |         policy_state = self.model.action_policy.policy_state_fusion(latent_state, route_feat)
59 | 
60 |         extras = {}
61 |         extras["route_feat"] = route_feat
62 |         extras["speed_feat"] = speed_feat
63 |         return policy_state, action, history, sample, extras
64 | 


--------------------------------------------------------------------------------
/compass/rl_env/LICENCE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2024, The Isaac Lab Project Developers.
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/compass/rl_env/README.md:
--------------------------------------------------------------------------------
  1 | # Isaac Lab Extension for Mobility Embodiment Specialist using Residual RL
  2 | 
  3 | ## Overview
  4 | 
  5 | This directory contains Isaac Lab extension for mobility embodiment specialist with residual RL.
  6 | 
  7 | ## Installation
  8 | 
  9 | 1. First, install Isaac Lab v1.3.0 and Isaac Sim 4.2 by following the [Isaac Lab installation guide](https://isaac-sim.github.io/IsaacLab/v1.3.0/source/setup/installation/index.html).
 10 | 
 11 |    The repo has been tested in Isaac Lab v1.3.0, so follow the steps below to set the correct version:
 12 |    ```bash
 13 |    # Clone the Isaac Lab repository
 14 |    git clone git@github.com:isaac-sim/IsaacLab.git
 15 | 
 16 |    # Switch to the correct version
 17 |    cd IsaacLab
 18 |    git fetch origin
 19 |    git checkout v1.3.0
 20 | 
 21 |    # Install Isaac Lab
 22 |    ./isaaclab.sh --install
 23 |    ```
 24 |    > **NOTE**: There might be error messages about RSL_RL not found during installation, we can safely ignore it as a custimized RSL_RL libary is used for residual RL training.
 25 | 
 26 | 
 27 | 2. Install the mobility_es extension in this directory after Isaac Lab and Sim are installed:
 28 | 
 29 |    ```bash
 30 |    # Set Isaac Lab path
 31 |    export ISAACLAB_PATH=</path/to/IsaacLab>
 32 | 
 33 |    # Install the extension
 34 |    ${ISAACLAB_PATH}/isaaclab.sh -p -m pip install -e exts/mobility_es
 35 |    ```
 36 | 
 37 | ## Usage
 38 | 
 39 | ### Running the Environment
 40 | 
 41 | ```bash
 42 | ${ISAACLAB_PATH}/isaaclab.sh -p scripts/play.py --enable_cameras
 43 | ```
 44 | 
 45 | ### Adding New Embodiments
 46 | 
 47 | To add a new robot embodiment, follow these steps:
 48 | 
 49 | 1. **Create Robot Configuration**:
 50 |    - Follow Isaac Lab [instructions](https://isaac-sim.github.io/IsaacLab/main/source/how-to/write_articulation_cfg.html) to add a new robot articulation configuration in [exts/mobility_es/mobility_es/config/robots.py](exts/mobility_es/mobility_es/config/robots.py)
 51 |    - Ensure proper joint configurations, collision properties, and physics parameters are set
 52 | 
 53 | 2. **Create Environment Configuration**:
 54 |    - Create a new RL environment class that inherits from `GoalReachingEnvCfg` in [exts/mobility_es/mobility_es/config/env_cfg.py](exts/mobility_es/mobility_es/config/env_cfg.py)
 55 |    - Override the following components as needed:
 56 | 
 57 |    | Component | Description |
 58 |    |-----------|-------------|
 59 |    | `scene.robot` | Set robot articulation configuration |
 60 |    | `scene.camera` | Configure camera settings |
 61 |    | `actions` | Define action item for the embodiment |
 62 |    | `observations` | Configure RL observations |
 63 |    | `events` | Configure RL events |
 64 |    | `rewards` | Configure RL rewards |
 65 |    | `terminations` | Configure RL terminations |
 66 | 
 67 |    > **NOTE**: Each embodiment should define its own action term as a low-level controller that maps velocity commands to joint positions. We have pre-defined action terms for different robot embodiments in [exts/mobility_es/mobility_es/mdp/action](exts/mobility_es/mobility_es/mdp/action) that can be re-configured for new embodiments.
 68 | 
 69 | 3. **Register and Verify Environment**:
 70 |    - Import your new environment in the play.py script [scripts/play.py](scripts/play.py)
 71 |    - Verify the environment is functional by running the play.py script with the new environment
 72 | 
 73 | 
 74 | 3. **Examples of Existing Embodiments**:
 75 | - [H1 Humanoid Robot](exts/mobility_es/mobility_es/config/h1_env_cfg.py)
 76 | - [Carter Mobile Robot](exts/mobility_es/mobility_es/config/carter_env_cfg.py)
 77 | - [Spot Quadruped Robot](exts/mobility_es/mobility_es/config/spot_env_cfg.py)
 78 | - [G1 Humanoid Robot](exts/mobility_es/mobility_es/config/g1_env_cfg.py)
 79 | - [Digit Humanoid Robot](exts/mobility_es/mobility_es/config/digit_env_cfg.py)
 80 | 
 81 | 
 82 | ### Adding New Scenes
 83 | 
 84 | To add a new scene, follow these steps:
 85 | 
 86 | 1. **Create Scene Configuration**:
 87 |    - Follow Isaac Lab [instructions](https://isaac-sim.github.io/IsaacLab/main/source/tutorials/02_scene/create_scene.html) to add a new scene configuration in [exts/mobility_es/mobility_es/config/environments.py](exts/mobility_es/mobility_es/config/environments.py), make sure to inherit from `EnvSceneAssetCfg` with proper parameters like pose_sample_range, env_spacing, replicate_physics, etc.
 88 | 
 89 | 
 90 | 2. **Add Occupancy Map [Optional]**:
 91 |    - We use occupancy map for collision-free pose sampling, while optional, we recommend adding the occupancy map for the new scene to improve sampling efficiency.
 92 |    - For occupancy map generation, follow this [insturction](https://docs.omniverse.nvidia.com/isaacsim/latest/features/ext_omni_isaac_occupancy_map.html) to create the occupancy map with the new scene USD file in Isaac Sim. Make sure to rotate the occupancy map properly to match the scene orientation and set the origin as the top-left of the image in the occupancy_map.yaml file.
 93 | 
 94 |     ![Occupancy Map Example](../../images/omap_generation.png)
 95 | 
 96 |    - Add the occupancy_map.yaml path in `OMAP_PATHS` in [exts/mobility_es/mobility_es/config/environments.py](exts/mobility_es/mobility_es/config/environments.py) with key as the scene name and value as the occupancy map yaml path.
 97 | 
 98 | 3. **Register and Verify Scene**:
 99 |    - Import your new scene in the play.py script [scripts/play.py](scripts/play.py)
100 |    - Verify the scene is functional by running the play.py script with the new scene by setting `env_cfg.scene.environment = MyNewSceneCfg()`
101 | 


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/config/extension.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | 
 3 | # Semantic Versioning is used: https://semver.org/
 4 | version = "0.1.0"
 5 | 
 6 | # Description
 7 | category = "isaaclab"
 8 | readme  = "README.md"
 9 | 
10 | title = "Extension Template"
11 | author = "Isaac Lab Project Developers"
12 | maintainer = "Isaac Lab Project Developers"
13 | description="Extension Template for Isaac Lab"
14 | repository = "https://github.com/isaac-sim/IsaacLabExtensionTemplate.git"
15 | keywords = ["extension", "template", "isaaclab"]
16 | 
17 | [dependencies]
18 | "omni.isaac.lab" = {}
19 | "omni.isaac.lab_assets" = {}
20 | "omni.isaac.lab_tasks" = {}
21 | # NOTE: Add additional dependencies here
22 | 
23 | [[python.module]]
24 | name = "mobility_es"
25 | 
26 | [isaaclab_settings]
27 | # TODO: Uncomment and list any apt dependencies here.
28 | #       If none, leave it commented out.
29 | # apt_deps = ["example_package"]
30 | # TODO: Uncomment and provide path to a ros_ws
31 | #       with rosdeps to be installed. If none,
32 | #       leave it commented out.
33 | # ros_ws = "path/from/extension_root/to/ros_ws"
34 | 


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/docs/CHANGELOG.rst:
--------------------------------------------------------------------------------
 1 | Changelog
 2 | ---------
 3 | 
 4 | 0.1.0 (2024-01-29)
 5 | ~~~~~~~~~~~~~~~~~~
 6 | 
 7 | Added
 8 | ^^^^^
 9 | 
10 | * Created an initial template for building an extension or project based on Isaac Lab
11 | 


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/mobility_es/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """
17 | Python module serving as a project/extension template.
18 | """
19 | 
20 | # Register Gym environments.
21 | from .config import *
22 | from .mdp import *
23 | 


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/mobility_es/ckpt/digit_locomotion_policy.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/COMPASS/d9a1a2b4b530f848488de528eac6a17ef0d778d1/compass/rl_env/exts/mobility_es/mobility_es/ckpt/digit_locomotion_policy.pt


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/mobility_es/ckpt/g1_locomotion_policy.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/COMPASS/d9a1a2b4b530f848488de528eac6a17ef0d778d1/compass/rl_env/exts/mobility_es/mobility_es/ckpt/g1_locomotion_policy.pt


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/mobility_es/ckpt/h1_locomotion_policy.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/COMPASS/d9a1a2b4b530f848488de528eac6a17ef0d778d1/compass/rl_env/exts/mobility_es/mobility_es/ckpt/h1_locomotion_policy.pt


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/mobility_es/ckpt/spot_locomotion_policy.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/COMPASS/d9a1a2b4b530f848488de528eac6a17ef0d778d1/compass/rl_env/exts/mobility_es/mobility_es/ckpt/spot_locomotion_policy.pt


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/mobility_es/config/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/mobility_es/config/carter_env_cfg.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from __future__ import annotations
17 | 
18 | from omni.isaac.lab.envs import mdp
19 | from omni.isaac.lab.utils import configclass
20 | from omni.isaac.lab.managers import TerminationTermCfg as DoneTerm
21 | from omni.isaac.lab.managers import ObservationTermCfg as ObsTerm
22 | from omni.isaac.lab.managers import EventTermCfg as EventTerm
23 | from omni.isaac.lab.managers import SceneEntityCfg
24 | 
25 | from mobility_es.config import scene_assets
26 | from mobility_es.config import robots
27 | from mobility_es.config.env_cfg import GoalReachingEnvCfg
28 | from mobility_es.mdp.action.non_holonomic_perfect_control_action import NonHolonomicPerfectControlAction
29 | 
30 | 
31 | @configclass
32 | class CarterActionsCfg:
33 |     """Action specifications for the MDP."""
34 | 
35 |     drive_joints = mdp.ActionTermCfg(class_type=NonHolonomicPerfectControlAction,
36 |                                      asset_name="robot")
37 | 
38 | 
39 | @configclass
40 | class CarterGoalReachingEnvCfg(GoalReachingEnvCfg):
41 |     """Configuration for the carter to reach a 2D target pose environment."""
42 | 
43 |     def __post_init__(self):
44 |         super().__post_init__()
45 |         self.scene.robot = robots.carter.replace(prim_path="{ENV_REGEX_NS}/Robot")
46 |         self.scene.camera = scene_assets.camera.replace(
47 |             prim_path="{ENV_REGEX_NS}/Robot/chassis_link/front_cam")
48 |         self.actions = CarterActionsCfg()
49 |         self.observations.locomotion = None
50 |         self.events.reset_robot_joints = None
51 |         self.observations.eval.fall_down = ObsTerm(
52 |             func=mdp.illegal_contact,
53 |             params={
54 |                 "sensor_cfg": SceneEntityCfg("contact_forces", body_names=["chassis_link"]),
55 |                 "threshold": 1.0
56 |             },
57 |         )
58 |         self.terminations.base_contact = DoneTerm(
59 |             func=mdp.illegal_contact,
60 |             params={
61 |                 "sensor_cfg": SceneEntityCfg("contact_forces", body_names=["chassis_link"]),
62 |                 "threshold": 1.0
63 |             },
64 |         )
65 |         self.events.base_external_force_torque = EventTerm(
66 |             func=mdp.apply_external_force_torque,
67 |             mode="reset",
68 |             params={
69 |                 "asset_cfg": SceneEntityCfg("robot", body_names="chassis_link"),
70 |                 "force_range": (0.0, 0.0),
71 |                 "torque_range": (-0.0, 0.0),
72 |             },
73 |         )
74 | 


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/mobility_es/config/digit_env_cfg.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from __future__ import annotations
17 | 
18 | import pathlib
19 | 
20 | from omni.isaac.lab.envs import mdp
21 | from omni.isaac.lab.sensors import CameraCfg
22 | from omni.isaac.lab.managers import EventTermCfg as EventTerm
23 | from omni.isaac.lab.managers import TerminationTermCfg as DoneTerm
24 | from omni.isaac.lab.managers import ObservationTermCfg as ObsTerm
25 | from omni.isaac.lab.managers import SceneEntityCfg
26 | from omni.isaac.lab.utils import configclass
27 | from omni.isaac.lab.utils.noise import AdditiveUniformNoiseCfg as Unoise
28 | 
29 | from mobility_es.config import scene_assets
30 | from mobility_es.config.robots import digit, DIGIT_ACTUATED_JOINT_NAMES
31 | from mobility_es.config.env_cfg import GoalReachingEnvCfg
32 | from mobility_es.mdp.action.locomotion_policy_action import LocomotionPolicyAction, LocomotionPolicyActionCfg
33 | 
34 | FILE_DIR = pathlib.Path(__file__).parent
35 | 
36 | 
37 | @configclass
38 | class DigitActionsCfg:
39 |     """Action specifications for the MDP."""
40 | 
41 |     drive_joints = LocomotionPolicyActionCfg(
42 |         class_type=LocomotionPolicyAction,
43 |         asset_name="robot",
44 |         policy_ckpt_path=FILE_DIR / "../ckpt/digit_locomotion_policy.pt",
45 |         joint_names=DIGIT_ACTUATED_JOINT_NAMES,
46 |         scale=0.5,
47 |         use_default_offset=True,
48 |     )
49 | 
50 | 
51 | @configclass
52 | class DigitGoalReachingEnvCfg(GoalReachingEnvCfg):
53 |     """Configuration for the Digit to reach a 2D target pose environment."""
54 | 
55 |     def __post_init__(self):
56 |         super().__post_init__()
57 |         self.scene.robot = digit.replace(prim_path="{ENV_REGEX_NS}/Robot")
58 |         self.scene.camera = scene_assets.camera.replace(
59 |             prim_path="{ENV_REGEX_NS}/Robot/torso_base/front_cam")
60 |         self.scene.camera.offset = CameraCfg.OffsetCfg(pos=(0.06, 0.0, -0.065),
61 |                                                        rot=(-0.2705, 0.6532, -0.6532, 0.2705981),
62 |                                                        convention="ros")
63 | 
64 |         self.actions = DigitActionsCfg()
65 |         self.observations.locomotion.joint_pos = ObsTerm(
66 |             func=mdp.joint_pos_rel,
67 |             params={"asset_cfg": SceneEntityCfg("robot", joint_names=DIGIT_ACTUATED_JOINT_NAMES)},
68 |             noise=Unoise(n_min=-0.01, n_max=0.01),
69 |         )
70 |         self.observations.locomotion.joint_vel = ObsTerm(
71 |             func=mdp.joint_vel_rel,
72 |             params={"asset_cfg": SceneEntityCfg("robot", joint_names=DIGIT_ACTUATED_JOINT_NAMES)},
73 |             noise=Unoise(n_min=-1.5, n_max=1.5),
74 |         )
75 |         self.observations.eval.fall_down = ObsTerm(
76 |             func=mdp.illegal_contact,
77 |             params={
78 |                 "sensor_cfg": SceneEntityCfg("contact_forces", body_names=["torso_base"]),
79 |                 "threshold": 1.0
80 |             },
81 |         )
82 |         self.events.base_external_force_torque = EventTerm(
83 |             func=mdp.apply_external_force_torque,
84 |             mode="reset",
85 |             params={
86 |                 "asset_cfg": SceneEntityCfg("robot", body_names=".*torso_base"),
87 |                 "force_range": (0.0, 0.0),
88 |                 "torque_range": (-0.0, 0.0),
89 |             },
90 |         )
91 |         self.terminations.base_contact = DoneTerm(
92 |             func=mdp.illegal_contact,
93 |             params={
94 |                 "sensor_cfg": SceneEntityCfg("contact_forces", body_names=["torso_base"]),
95 |                 "threshold": 1.0
96 |             },
97 |         )
98 | 


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/mobility_es/config/environments.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: Apache-2.0
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | # http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | import os
 17 | 
 18 | import omni.isaac.lab.sim as sim_utils
 19 | from omni.isaac.lab.assets import AssetBaseCfg
 20 | from omni.isaac.lab.utils import configclass
 21 | from omni.isaac.lab.utils.assets import ISAAC_NUCLEUS_DIR
 22 | 
 23 | USD_PATHS = {
 24 |     'CombinedSingleRack':
 25 |         os.path.join(os.path.dirname(__file__), "../usd/combined_simple_warehouse/combined.usd"),
 26 |     'CombinedMultiRack':
 27 |         os.path.join(os.path.dirname(__file__), "../usd/combined_multi_rack/combined.usd"),
 28 |     'GalileoLab':
 29 |         os.path.join(os.path.dirname(__file__),
 30 |                      "../usd/galileo_lab_no_robot_no_wall/galileo_lab_no_robot_no_wall.usd"),
 31 |     'WarehouseSingleRack':
 32 |         os.path.join(
 33 |             os.path.dirname(__file__),
 34 |             "../usd/sample_small_footprint_one_rack_obst_sdg/sample_small_footprint_one_rack_obst_sdg.usd"    #pylint: disable=line-too-long
 35 |         ),
 36 |     'WarehouseMultiRack':
 37 |         f'{ISAAC_NUCLEUS_DIR}/Environments/Simple_Warehouse/warehouse_multiple_shelves.usd',
 38 |     'SimpleOffice':
 39 |         os.path.join(os.path.dirname(__file__), "../usd/office/office.usd"),
 40 |     'SimpleWarehouse':
 41 |         os.path.join(os.path.dirname(__file__),
 42 |                      "../usd/simple_warehouse_no_roof/simple_warehouse_no_roof.usd"),
 43 |     'Hospital':
 44 |         f'{ISAAC_NUCLEUS_DIR}/Environments/Hospital/hospital.usd',
 45 | }
 46 | 
 47 | OMAP_PATHS = {
 48 |     'CombinedSingleRack':
 49 |         os.path.join(os.path.dirname(__file__),
 50 |                      "../usd/combined_simple_warehouse/omap/occupancy_map.yaml"),
 51 |     'CombinedMultiRack':
 52 |         os.path.join(os.path.dirname(__file__),
 53 |                      "../usd/combined_multi_rack/omap/occupancy_map.yaml"),
 54 |     'WarehouseSingleRack':
 55 |         os.path.join(os.path.dirname(__file__),
 56 |                      "../usd/sample_small_footprint_one_rack_obst_sdg/omap/occupancy_map.yaml"),
 57 |     'WarehouseMultiRack':
 58 |         os.path.join(os.path.dirname(__file__),
 59 |                      "../usd/warehouse_multi_rack/omap/occupancy_map.yaml"),
 60 |     'SimpleOffice':
 61 |         os.path.join(os.path.dirname(__file__), "../usd/office/omap/occupancy_map.yaml"),
 62 |     'Hospital':
 63 |         os.path.join(os.path.dirname(__file__), "../usd/hospital/omap/occupancy_map.yaml"),
 64 | }
 65 | 
 66 | 
 67 | @configclass
 68 | class EnvSceneAssetCfg(AssetBaseCfg):
 69 |     """EnvSceneAssetCfg to add additional scene parameters to AssetBaseCfg.
 70 |     """
 71 | 
 72 |     # Range for robot pose sampling.
 73 |     pose_sample_range = {"x": (-5, 5), "y": (-5, 5), "yaw": (-3.14, 3.14)}
 74 | 
 75 |     # Env spacing
 76 |     env_spacing = 50
 77 | 
 78 |     # Replicate physics in the scene.
 79 |     replicate_physics = True
 80 | 
 81 | 
 82 | # Adding a USD scene with combined office, galileo lab and warehouse single rack.
 83 | combined_single_rack = EnvSceneAssetCfg(
 84 |     prim_path="{ENV_REGEX_NS}/CombinedSingleRack",
 85 |     init_state=AssetBaseCfg.InitialStateCfg(
 86 |         pos=(0, 0, 0.01),
 87 |         rot=(1.0, 0.0, 0.0, 0.0),
 88 |     ),
 89 |     spawn=sim_utils.UsdFileCfg(
 90 |         usd_path=USD_PATHS['CombinedSingleRack'],
 91 |         scale=(1.0, 1.0, 1.0),
 92 |         rigid_props=sim_utils.RigidBodyPropertiesCfg(
 93 |             disable_gravity=None,
 94 |             solver_position_iteration_count=4,
 95 |             solver_velocity_iteration_count=1,
 96 |         ),
 97 |     ),
 98 |     pose_sample_range={
 99 |         "x": (-10, 10),
100 |         "y": (-12, 17.5),
101 |         "yaw": (-3.14, 3.14)
102 |     },
103 | )
104 | 
105 | # Adding a USD scene with combined office, galileo lab and warehouse multi rack.
106 | combined_multi_rack = EnvSceneAssetCfg(
107 |     prim_path="{ENV_REGEX_NS}/CombinedMultiRack",
108 |     init_state=AssetBaseCfg.InitialStateCfg(
109 |         pos=(0, 0, 0.01),
110 |         rot=(1.0, 0.0, 0.0, 0.0),
111 |     ),
112 |     spawn=sim_utils.UsdFileCfg(
113 |         usd_path=USD_PATHS['CombinedMultiRack'],
114 |         scale=(1.0, 1.0, 1.0),
115 |         rigid_props=sim_utils.RigidBodyPropertiesCfg(
116 |             disable_gravity=None,
117 |             solver_position_iteration_count=4,
118 |             solver_velocity_iteration_count=1,
119 |         ),
120 |     ),
121 |     pose_sample_range={
122 |         "x": (-31.5, 8),
123 |         "y": (-12, 19),
124 |         "yaw": (-3.14, 3.14)
125 |     },
126 | )
127 | 
128 | # Adding a USD scene for galileo lab
129 | galileo_lab = EnvSceneAssetCfg(
130 |     prim_path="{ENV_REGEX_NS}/GalileoLab",
131 |     init_state=AssetBaseCfg.InitialStateCfg(
132 |         pos=(0, 0, 0.01),
133 |         rot=(1.0, 0.0, 0.0, 0.0),
134 |     ),
135 |     spawn=sim_utils.UsdFileCfg(
136 |         usd_path=USD_PATHS['GalileoLab'],
137 |         scale=(1.0, 1.0, 1.0),
138 |         rigid_props=sim_utils.RigidBodyPropertiesCfg(
139 |             disable_gravity=None,
140 |             solver_position_iteration_count=4,
141 |             solver_velocity_iteration_count=1,
142 |         ),
143 |     ),
144 |     env_spacing=20,
145 | )
146 | 
147 | # Adding a USD scene for warehouse with single rack
148 | warehouse_single_rack = EnvSceneAssetCfg(
149 |     prim_path="{ENV_REGEX_NS}/WarehouseSingleRack",
150 |     init_state=AssetBaseCfg.InitialStateCfg(
151 |         pos=(0, 0, 0.01),
152 |         rot=(1.0, 0.0, 0.0, 0.0),
153 |     ),
154 |     spawn=sim_utils.UsdFileCfg(
155 |         usd_path=USD_PATHS['WarehouseSingleRack'],
156 |         scale=(1.0, 1.0, 1.0),
157 |         rigid_props=sim_utils.RigidBodyPropertiesCfg(
158 |             disable_gravity=None,
159 |             solver_position_iteration_count=4,
160 |             solver_velocity_iteration_count=1,
161 |         ),
162 |     ),
163 | )
164 | 
165 | # Adding a USD scene for warehouse with multi rack
166 | warehouse_multi_rack = EnvSceneAssetCfg(
167 |     prim_path="{ENV_REGEX_NS}/WarehouseMultiRack",
168 |     init_state=AssetBaseCfg.InitialStateCfg(
169 |         pos=(0, 0, 0.01),
170 |         rot=(1.0, 0.0, 0.0, 0.0),
171 |     ),
172 |     spawn=sim_utils.UsdFileCfg(
173 |         usd_path=USD_PATHS['WarehouseMultiRack'],
174 |         scale=(1.0, 1.0, 1.0),
175 |         rigid_props=sim_utils.RigidBodyPropertiesCfg(
176 |             disable_gravity=None,
177 |             solver_position_iteration_count=4,
178 |             solver_velocity_iteration_count=1,
179 |         ),
180 |     ),
181 |     pose_sample_range={
182 |         "x": (-9, 9),
183 |         "y": (-8, 12),
184 |         "yaw": (-3.14, 3.14)
185 |     },
186 | )
187 | 
188 | # Adding a USD scene for simple office.
189 | simple_office = EnvSceneAssetCfg(
190 |     prim_path="{ENV_REGEX_NS}/SimpleOffice",
191 |     init_state=AssetBaseCfg.InitialStateCfg(
192 |         pos=(0, 0, 0.01),
193 |         rot=(1.0, 0.0, 0.0, 0.0),
194 |     ),
195 |     spawn=sim_utils.UsdFileCfg(
196 |         usd_path=USD_PATHS['SimpleOffice'],
197 |         scale=(1.0, 1.0, 1.0),
198 |         rigid_props=sim_utils.RigidBodyPropertiesCfg(
199 |             disable_gravity=None,
200 |             solver_position_iteration_count=4,
201 |             solver_velocity_iteration_count=1,
202 |         ),
203 |     ),
204 |     env_spacing=15,
205 | )
206 | 
207 | # Adding a USD scene for hospital.
208 | hospital = EnvSceneAssetCfg(
209 |     prim_path="{ENV_REGEX_NS}/Hospital",
210 |     init_state=AssetBaseCfg.InitialStateCfg(
211 |         pos=(0, 0, 0.01),
212 |         rot=(1.0, 0.0, 0.0, 0.0),
213 |     ),
214 |     spawn=sim_utils.UsdFileCfg(
215 |         usd_path=USD_PATHS['Hospital'],
216 |         scale=(1.0, 1.0, 1.0),
217 |         rigid_props=sim_utils.RigidBodyPropertiesCfg(
218 |             disable_gravity=None,
219 |             solver_position_iteration_count=4,
220 |             solver_velocity_iteration_count=1,
221 |         ),
222 |     ),
223 |     pose_sample_range={
224 |         "x": (-49, 27),
225 |         "y": (-4, 17),
226 |         "yaw": (-3.14, 3.14)
227 |     },
228 |     env_spacing=80,
229 | )
230 | 
231 | # Random sample a USD scene from the given list.
232 | random_envs = EnvSceneAssetCfg(
233 |     prim_path="{ENV_REGEX_NS}/RandomEnvs",
234 |     init_state=AssetBaseCfg.InitialStateCfg(
235 |         pos=(0, 0, 0.01),
236 |         rot=(1.0, 0.0, 0.0, 0.0),
237 |     ),
238 |     spawn=sim_utils.MultiUsdFileCfg(
239 |         usd_path=[USD_PATHS['SimpleOffice'], USD_PATHS['GalileoLab'], USD_PATHS['SimpleWarehouse']],
240 |         random_choice=True,
241 |         scale=(1.0, 1.0, 1.0),
242 |         rigid_props=sim_utils.RigidBodyPropertiesCfg(
243 |             disable_gravity=None,
244 |             solver_position_iteration_count=4,
245 |             solver_velocity_iteration_count=1,
246 |         ),
247 |     ),
248 |     replicate_physics=False,
249 | )
250 | 


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/mobility_es/config/g1_env_cfg.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from __future__ import annotations
17 | 
18 | import os
19 | 
20 | from omni.isaac.lab.utils import configclass
21 | from omni.isaac.lab.sensors import CameraCfg
22 | 
23 | from mobility_es.config import scene_assets
24 | from mobility_es.config import robots
25 | from mobility_es.config.env_cfg import GoalReachingEnvCfg
26 | from mobility_es.mdp.action.locomotion_policy_action import LocomotionPolicyAction, LocomotionPolicyActionCfg
27 | 
28 | 
29 | @configclass
30 | class G1ActionsCfg:
31 |     """Action specifications for the MDP."""
32 | 
33 |     drive_joints = LocomotionPolicyActionCfg(class_type=LocomotionPolicyAction,
34 |                                              asset_name="robot",
35 |                                              policy_ckpt_path=os.path.join(
36 |                                                  os.path.dirname(__file__),
37 |                                                  "../ckpt/g1_locomotion_policy.pt"),
38 |                                              joint_names=[".*"],
39 |                                              scale=0.5,
40 |                                              use_default_offset=True)
41 | 
42 | 
43 | @configclass
44 | class G1GoalReachingEnvCfg(GoalReachingEnvCfg):
45 |     """Configuration for the G1 to reach a 2D target pose environment."""
46 | 
47 |     def __post_init__(self):
48 |         super().__post_init__()
49 |         self.scene.robot = robots.g1.replace(prim_path="{ENV_REGEX_NS}/Robot")
50 |         self.scene.camera = scene_assets.camera.replace(
51 |             prim_path="{ENV_REGEX_NS}/Robot/torso_link/front_cam")
52 |         self.scene.camera.offset = CameraCfg.OffsetCfg(pos=(0.1, 0.0, 0.45),
53 |                                                        rot=(-0.2705, 0.6532, -0.6532, 0.2705981),
54 |                                                        convention="ros")
55 | 
56 |         self.actions = G1ActionsCfg()
57 | 


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/mobility_es/config/h1_env_cfg.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from __future__ import annotations
17 | 
18 | import os
19 | 
20 | from omni.isaac.lab.utils import configclass
21 | from omni.isaac.lab.sensors import CameraCfg
22 | 
23 | from mobility_es.config import scene_assets
24 | from mobility_es.config import robots
25 | from mobility_es.config.env_cfg import GoalReachingEnvCfg
26 | from mobility_es.mdp.action.locomotion_policy_action import LocomotionPolicyAction, LocomotionPolicyActionCfg
27 | 
28 | 
29 | @configclass
30 | class H1ActionsCfg:
31 |     """Action specifications for the MDP."""
32 | 
33 |     drive_joints = LocomotionPolicyActionCfg(class_type=LocomotionPolicyAction,
34 |                                              asset_name="robot",
35 |                                              policy_ckpt_path=os.path.join(
36 |                                                  os.path.dirname(__file__),
37 |                                                  "../ckpt/h1_locomotion_policy.pt"),
38 |                                              joint_names=[".*"],
39 |                                              scale=0.5,
40 |                                              use_default_offset=True)
41 | 
42 | 
43 | @configclass
44 | class H1GoalReachingEnvCfg(GoalReachingEnvCfg):
45 |     """Configuration for the H1 to reach a 2D target pose environment."""
46 | 
47 |     def __post_init__(self):
48 |         super().__post_init__()
49 |         self.scene.robot = robots.h1.replace(prim_path="{ENV_REGEX_NS}/Robot")
50 |         self.scene.camera = scene_assets.camera.replace(
51 |             prim_path="{ENV_REGEX_NS}/Robot/torso_link/front_cam")
52 |         self.scene.camera.offset = CameraCfg.OffsetCfg(pos=(0.15, 0.0, 0.65),
53 |                                                        rot=(-0.2705, 0.6532, -0.6532, 0.2705981),
54 |                                                        convention="ros")
55 | 
56 |         self.actions = H1ActionsCfg()
57 | 


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/mobility_es/config/scene_assets.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import omni.isaac.lab.sim as sim_utils
17 | from omni.isaac.lab.assets import AssetBaseCfg
18 | from omni.isaac.lab.sensors import CameraCfg, TiledCameraCfg
19 | from omni.isaac.lab.terrains import TerrainImporterCfg
20 | 
21 | # Terrain
22 | terrain = TerrainImporterCfg(
23 |     prim_path="/World/ground",
24 |     terrain_type="plane",
25 |     debug_vis=False,
26 | )
27 | 
28 | # Lights
29 | light = AssetBaseCfg(
30 |     prim_path="/World/light",
31 |     spawn=sim_utils.DistantLightCfg(color=(1.0, 1.0, 1.0), intensity=1000.0),
32 | )
33 | 
34 | # Camera
35 | camera = TiledCameraCfg(
36 |     prim_path="{ENV_REGEX_NS}/Robot/chassis_link/front_cam",
37 |     update_period=0.0,
38 |     height=320,
39 |     width=512,
40 |     data_types=["rgb", "depth"],
41 |     spawn=sim_utils.PinholeCameraCfg(focal_length=10.0,
42 |                                      focus_distance=400.0,
43 |                                      horizontal_aperture=20.955,
44 |                                      clipping_range=(0.1, 20.0)),
45 |     offset=CameraCfg.OffsetCfg(pos=(0.10434, 0.0, 0.37439),
46 |                                rot=(0.5, -0.5, 0.5, -0.5),
47 |                                convention="ros"),
48 | )
49 | 


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/mobility_es/config/spot_env_cfg.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from __future__ import annotations
17 | 
18 | import os
19 | 
20 | from omni.isaac.lab.envs import mdp
21 | from omni.isaac.lab.managers import EventTermCfg as EventTerm
22 | from omni.isaac.lab.managers import TerminationTermCfg as DoneTerm
23 | from omni.isaac.lab.managers import ObservationTermCfg as ObsTerm
24 | from omni.isaac.lab.managers import SceneEntityCfg
25 | from omni.isaac.lab.utils import configclass
26 | 
27 | from mobility_es.config import scene_assets
28 | from mobility_es.config import robots
29 | from mobility_es.config.env_cfg import GoalReachingEnvCfg
30 | from mobility_es.mdp.action.locomotion_policy_action import LocomotionPolicyAction, LocomotionPolicyActionCfg
31 | 
32 | 
33 | @configclass
34 | class SpotActionsCfg:
35 |     """Action specifications for the MDP."""
36 | 
37 |     drive_joints = LocomotionPolicyActionCfg(class_type=LocomotionPolicyAction,
38 |                                              asset_name="robot",
39 |                                              policy_ckpt_path=os.path.join(
40 |                                                  os.path.dirname(__file__),
41 |                                                  "../ckpt/spot_locomotion_policy.pt"),
42 |                                              joint_names=[".*"],
43 |                                              scale=0.2,
44 |                                              use_default_offset=True)
45 | 
46 | 
47 | @configclass
48 | class SpotGoalReachingEnvCfg(GoalReachingEnvCfg):
49 |     """Configuration for the Spot to reach a 2D target pose environment."""
50 | 
51 |     def __post_init__(self):
52 |         super().__post_init__()
53 |         self.scene.robot = robots.spot.replace(prim_path="{ENV_REGEX_NS}/Robot")
54 |         self.scene.camera = scene_assets.camera.replace(
55 |             prim_path="{ENV_REGEX_NS}/Robot/body/front_cam")
56 |         self.actions = SpotActionsCfg()
57 |         self.observations.eval.fall_down = ObsTerm(
58 |             func=mdp.illegal_contact,
59 |             params={
60 |                 "sensor_cfg": SceneEntityCfg("contact_forces", body_names=["body", ".*leg"]),
61 |                 "threshold": 1.0
62 |             },
63 |         )
64 |         self.events.base_external_force_torque = EventTerm(
65 |             func=mdp.apply_external_force_torque,
66 |             mode="reset",
67 |             params={
68 |                 "asset_cfg": SceneEntityCfg("robot", body_names="body"),
69 |                 "force_range": (0.0, 0.0),
70 |                 "torque_range": (-0.0, 0.0),
71 |             },
72 |         )
73 |         self.terminations.base_contact = DoneTerm(
74 |             func=mdp.illegal_contact,
75 |             params={
76 |                 "sensor_cfg": SceneEntityCfg("contact_forces", body_names=["body", ".*leg"]),
77 |                 "threshold": 1.0
78 |             },
79 |         )
80 | 


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/mobility_es/mdp/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/mobility_es/mdp/action/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/mobility_es/mdp/action/action_visualization.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from __future__ import annotations
17 | 
18 | import torch
19 | 
20 | import omni.isaac.lab.utils.math as math_utils
21 | from omni.isaac.lab.markers import VisualizationMarkers
22 | from omni.isaac.lab.markers.config import BLUE_ARROW_X_MARKER_CFG, GREEN_ARROW_X_MARKER_CFG
23 | from omni.isaac.lab.assets.articulation import Articulation
24 | 
25 | ACTION_HEIGHT = 0.8
26 | ACTION_GAP_HEIGHT = 0.1
27 | 
28 | 
29 | class ActionVisualizer:
30 |     """Visualizer of the actions."""
31 | 
32 |     def __init__(self, cfg, env):
33 |         self.robot: Articulation = env.scene[cfg.asset_name]
34 |         self._base_markers = None
35 |         self._residual_markers = None
36 | 
37 |     def visualize(self, base_action, residual_action):
38 |         if not self._base_markers or not self._residual_markers:
39 |             self._initialize_action_markers()
40 |         # Visualize base action.
41 |         robot_pos_w = self.robot.data.root_pos_w.clone()
42 |         robot_pos_w[:, 2] += ACTION_HEIGHT
43 |         base_action_arrow_scale, base_action_arrow_quat = self._resolve_xy_velocity_to_arrow(
44 |             base_action[:, :2])
45 |         self._base_markers.visualize(robot_pos_w, base_action_arrow_quat, base_action_arrow_scale)
46 | 
47 |         # Visualize residual action.
48 |         robot_pos_w[:, 2] += ACTION_GAP_HEIGHT
49 |         residual_action_arrow_scale, residual_action_arrow_quat = self._resolve_xy_velocity_to_arrow(    # pylint: disable=line-too-long
50 |             residual_action[:, :2])
51 |         self._residual_markers.visualize(robot_pos_w, residual_action_arrow_quat,
52 |                                          residual_action_arrow_scale)
53 | 
54 |     def _initialize_action_markers(self):
55 |         print("Initialize markers for actions.")
56 |         base_marker_cfg = BLUE_ARROW_X_MARKER_CFG.copy()
57 |         base_marker_cfg.prim_path = "/Visuals/Action/base"
58 |         self._base_markers = VisualizationMarkers(base_marker_cfg)
59 |         self._base_markers.set_visibility(True)
60 | 
61 |         residual_marker_cfg = GREEN_ARROW_X_MARKER_CFG.copy()
62 |         residual_marker_cfg.prim_path = "/Visuals/Action/residual"
63 |         self._residual_markers = VisualizationMarkers(residual_marker_cfg)
64 |         self._residual_markers.set_visibility(True)
65 | 
66 |     def _resolve_xy_velocity_to_arrow(
67 |             self, xy_velocity: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
68 |         """Converts the XY base velocity command to arrow direction rotation."""
69 |         # obtain default scale of the marker
70 |         default_scale = self._base_markers.cfg.markers["arrow"].scale
71 |         # arrow-scale
72 |         arrow_scale = torch.tensor(default_scale,
73 |                                    device=xy_velocity.device).repeat(xy_velocity.shape[0], 1)
74 |         # pylint: disable=not-callable
75 |         arrow_scale[:, 0] *= torch.linalg.norm(xy_velocity, dim=1) * 5.0
76 |         # arrow-direction
77 |         heading_angle = torch.atan2(xy_velocity[:, 1], xy_velocity[:, 0])
78 |         zeros = torch.zeros_like(heading_angle)
79 |         arrow_quat = math_utils.quat_from_euler_xyz(zeros, zeros, heading_angle)
80 |         # convert everything back from base to world frame
81 |         base_quat_w = self.robot.data.root_quat_w
82 |         arrow_quat = math_utils.quat_mul(base_quat_w, arrow_quat)
83 | 
84 |         return arrow_scale, arrow_quat
85 | 


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/mobility_es/mdp/action/differential_drive_action.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: Apache-2.0
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | # http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | from __future__ import annotations
 17 | 
 18 | import torch
 19 | from collections.abc import Sequence
 20 | from dataclasses import MISSING
 21 | 
 22 | from omni.isaac.lab.assets.articulation import Articulation
 23 | from omni.isaac.lab.managers.action_manager import ActionTerm, ActionTermCfg
 24 | from omni.isaac.lab.utils import configclass
 25 | 
 26 | from mobility_es.mdp.action.action_visualization import ActionVisualizer
 27 | from mobility_es.wrapper.env_wrapper import RLESEnvWrapper
 28 | 
 29 | 
 30 | @configclass
 31 | class DifferentialDriveActionCfg(ActionTermCfg):
 32 |     """Configuration for the differential drive action term with left and right wheel joints.
 33 | 
 34 |     See :class:`DifferentialDriveAction` for more details.
 35 |     """
 36 | 
 37 |     class_type: type[ActionTerm] = MISSING
 38 |     # Name of the left wheel joint.
 39 |     left_joint_name: str = MISSING
 40 |     # Name of the right wheel joint.
 41 |     right_joint_name: str = MISSING
 42 |     # Differential drive wheel parameters.
 43 |     wheel_base: float = MISSING
 44 |     wheel_radius: float = MISSING
 45 | 
 46 | 
 47 | class DifferentialDriveAction(ActionTerm):
 48 |     """Action item for differential drive."""
 49 | 
 50 |     cfg: DifferentialDriveActionCfg
 51 |     """The articulation asset on which the action term is applied."""
 52 |     _asset: Articulation
 53 | 
 54 |     def __init__(self, cfg: DifferentialDriveActionCfg, env: RLESEnvWrapper):
 55 |         # Initialize the action term
 56 |         super().__init__(cfg, env)
 57 | 
 58 |         # Parse the joint information
 59 |         # -- Left joint
 60 |         left_joint_id, _ = self._asset.find_joints(self.cfg.left_joint_name)
 61 |         if len(left_joint_id) != 1:
 62 |             raise ValueError(
 63 |                 f"Found more than one joint for the left joint name: {self.cfg.left_joint_name}")
 64 |         # -- Right joint
 65 |         right_joint_id, _ = self._asset.find_joints(self.cfg.right_joint_name)
 66 |         if len(right_joint_id) != 1:
 67 |             raise ValueError(
 68 |                 f"Found more than one joint for the right joint name: {self.cfg.right_joint_name}")
 69 | 
 70 |         # Process into a list of joint ids
 71 |         self._joint_ids = [left_joint_id[0], right_joint_id[0]]
 72 | 
 73 |         # Create tensors for raw and processed actions
 74 |         self._raw_actions = torch.zeros(self.num_envs, self.action_dim, device=self.device)
 75 |         self._processed_actions = torch.zeros_like(self.raw_actions)
 76 |         self._joint_vel_command = torch.zeros(self.num_envs, 2, device=self.device)
 77 | 
 78 |         # Visualization
 79 |         self._visualizer = ActionVisualizer(cfg, env)
 80 | 
 81 |     @property
 82 |     def action_dim(self) -> int:
 83 |         return 6
 84 | 
 85 |     @property
 86 |     def raw_actions(self) -> torch.Tensor:
 87 |         return self._raw_actions
 88 | 
 89 |     @property
 90 |     def processed_actions(self) -> torch.Tensor:
 91 |         return self._processed_actions
 92 | 
 93 |     def process_actions(self, actions):
 94 |         # store the raw actions
 95 |         self._raw_actions[:] = actions
 96 |         self._processed_actions = self.raw_actions
 97 | 
 98 |     def apply_actions(self):
 99 |         self._joint_vel_command[:, 0] = (
100 |             self._processed_actions[:, 0] -
101 |             self._processed_actions[:, 5] * self.cfg.wheel_base * 0.5) / self.cfg.wheel_radius
102 |         self._joint_vel_command[:, 1] = (
103 |             self._processed_actions[:, 0] +
104 |             self._processed_actions[:, 5] * self.cfg.wheel_base * 0.5) / self.cfg.wheel_radius
105 |         # Set the joint velocity targets
106 |         self._asset.set_joint_velocity_target(self._joint_vel_command, joint_ids=self._joint_ids)
107 | 
108 |     def reset(self, env_ids: Sequence[int] | None = None) -> None:
109 |         self._raw_actions[env_ids] = 0.0
110 | 
111 |     def visualize(self, base_action, residual_action):
112 |         self._visualizer.visualize(base_action, residual_action)
113 | 


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/mobility_es/mdp/action/locomotion_policy_action.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: Apache-2.0
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | # http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | from __future__ import annotations
 17 | 
 18 | import torch
 19 | from collections.abc import Sequence
 20 | from dataclasses import MISSING
 21 | 
 22 | from omni.isaac.lab.assets.articulation import Articulation
 23 | from omni.isaac.lab.envs import mdp
 24 | from omni.isaac.lab.managers.action_manager import ActionTerm
 25 | from omni.isaac.lab.utils import configclass
 26 | 
 27 | from mobility_es.mdp.action.action_visualization import ActionVisualizer
 28 | from mobility_es.wrapper.env_wrapper import RLESEnvWrapper
 29 | 
 30 | 
 31 | @configclass
 32 | class LocomotionPolicyActionCfg(mdp.JointPositionActionCfg):
 33 |     """Configuration for the action term with locomotion policies.
 34 | 
 35 |     See :class:`LocomotionPolicyActionCfg` for more details.
 36 |     """
 37 | 
 38 |     class_type: type[ActionTerm] = MISSING
 39 |     policy_ckpt_path: str = MISSING
 40 | 
 41 | 
 42 | class LocomotionPolicyAction(mdp.JointPositionAction):
 43 |     """Action item for locomotion policies."""
 44 | 
 45 |     cfg: LocomotionPolicyActionCfg
 46 |     """The articulation asset on which the action term is applied."""
 47 |     _asset: Articulation
 48 | 
 49 |     def __init__(self, cfg: LocomotionPolicyActionCfg, env: RLESEnvWrapper):
 50 |         super().__init__(cfg, env)
 51 | 
 52 |         # Load the env and locomotion policy.
 53 |         self.env = env
 54 |         self._locomotion_policy = torch.jit.load(self.cfg.policy_ckpt_path).to(self.device)
 55 | 
 56 |         # Create tensors for raw and processed actions
 57 |         self._raw_actions = torch.zeros(self.num_envs, self.action_dim, device=self.device)
 58 |         self._processed_actions = torch.zeros_like(self.raw_actions)
 59 |         self._last_joint_actions = torch.zeros(self.num_envs, self._num_joints, device=self.device)
 60 | 
 61 |         # Visualization
 62 |         self._visualizer = ActionVisualizer(cfg, env)
 63 | 
 64 |     @property
 65 |     def action_dim(self) -> int:
 66 |         return 6
 67 | 
 68 |     @property
 69 |     def raw_actions(self) -> torch.Tensor:
 70 |         return self._raw_actions
 71 | 
 72 |     @property
 73 |     def processed_actions(self) -> torch.Tensor:
 74 |         return self._processed_actions
 75 | 
 76 |     def process_actions(self, actions):
 77 |         # store the raw actions
 78 |         self._raw_actions[:] = actions
 79 |         self._processed_actions = self.raw_actions
 80 | 
 81 |     def apply_actions(self):
 82 |         # Get locomotion observation.
 83 |         obs_dict = self.env.observation_manager.compute_group('locomotion')
 84 | 
 85 |         base_lin_vel = obs_dict['base_lin_vel']
 86 |         base_ang_vel = obs_dict['base_ang_vel']
 87 |         projected_gravity = obs_dict['projected_gravity']
 88 |         velocity_commands = self._processed_actions[:, [0, 1, 5]]
 89 |         joint_pos = obs_dict['joint_pos']
 90 |         joint_vel = obs_dict['joint_vel']
 91 |         last_joint_actions = self._last_joint_actions
 92 |         # Concatenate to compose the input obs tensor. (Order preserved)
 93 |         obs = torch.cat([
 94 |             base_lin_vel, base_ang_vel, projected_gravity, velocity_commands, joint_pos, joint_vel,
 95 |             last_joint_actions
 96 |         ],
 97 |                         dim=1)
 98 |         joint_actions = self._locomotion_policy(obs)
 99 |         self._last_joint_actions = joint_actions
100 | 
101 |         # Process and set the joint pose targets
102 |         joint_actions = joint_actions * self._scale + self._offset
103 |         self._asset.set_joint_position_target(joint_actions, joint_ids=self._joint_ids)
104 | 
105 |     def reset(self, env_ids: Sequence[int] | None = None) -> None:
106 |         self._raw_actions[env_ids] = 0.0
107 |         self._last_joint_actions[env_ids] = 0.0
108 | 
109 |     def visualize(self, base_action, residual_action):
110 |         self._visualizer.visualize(base_action, residual_action)
111 | 


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/mobility_es/mdp/action/non_holonomic_perfect_control_action.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: Apache-2.0
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | # http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | from __future__ import annotations
 17 | 
 18 | import torch
 19 | from collections.abc import Sequence
 20 | 
 21 | from omni.isaac.lab.assets.articulation import Articulation
 22 | from omni.isaac.lab.envs.mdp.actions import ActionTerm, ActionTermCfg
 23 | 
 24 | from mobility_es.mdp.action.action_visualization import ActionVisualizer
 25 | from mobility_es.wrapper.env_wrapper import RLESEnvWrapper
 26 | 
 27 | 
 28 | class NonHolonomicPerfectControlAction(ActionTerm):
 29 |     """Action item for non-holonomic robot with perfect controller."""
 30 | 
 31 |     cfg: ActionTermCfg
 32 |     """The articulation asset on which the action term is applied."""
 33 |     _asset: Articulation
 34 | 
 35 |     def __init__(self, cfg: ActionTermCfg, env: RLESEnvWrapper):
 36 |         # Initialize the action term
 37 |         super().__init__(cfg, env)
 38 | 
 39 |         # Create tensors for raw and processed actions
 40 |         self._raw_actions = torch.zeros(self.num_envs, self.action_dim, device=self.device)
 41 |         self._processed_actions = torch.zeros_like(self.raw_actions)
 42 | 
 43 |         # Visualization
 44 |         self._visualizer = ActionVisualizer(cfg, env)
 45 | 
 46 |     @property
 47 |     def action_dim(self) -> int:
 48 |         return 6
 49 | 
 50 |     @property
 51 |     def raw_actions(self) -> torch.Tensor:
 52 |         return self._raw_actions
 53 | 
 54 |     @property
 55 |     def processed_actions(self) -> torch.Tensor:
 56 |         return self._processed_actions
 57 | 
 58 |     def process_actions(self, actions):
 59 |         # store the raw actions
 60 |         self._raw_actions[:] = actions
 61 |         self._processed_actions = self.raw_actions[:, (0, 5)]
 62 | 
 63 |     def apply_actions(self):
 64 |         # Get current asset state
 65 |         root_state = self._asset.data.root_state_w
 66 | 
 67 |         # Extract linear_x and angular_z from actions
 68 |         linear_x = self.processed_actions[:, 0]
 69 |         angular_z = self.processed_actions[:, 1]
 70 | 
 71 |         # Get current position and orientation quaternion
 72 |         pos = root_state[:, :3]
 73 |         quat = root_state[:, 3:7]
 74 | 
 75 |         # Convert quaternion to yaw angle
 76 |         yaw = 2 * torch.atan2(quat[:, 3], quat[:, 0])
 77 | 
 78 |         # Update yaw with angular velocity
 79 |         dt = self._env.physics_dt
 80 |         new_yaw = yaw + angular_z * dt
 81 | 
 82 |         # Compute new position based on forward velocity and orientation
 83 |         pos[:, 0] += linear_x * torch.cos(new_yaw) * dt
 84 |         pos[:, 1] += linear_x * torch.sin(new_yaw) * dt
 85 | 
 86 |         # Convert new yaw to quaternion (rotation around z-axis only)
 87 |         new_quat = torch.zeros_like(quat)
 88 |         new_quat[:, 0] = torch.cos(new_yaw / 2)
 89 |         new_quat[:, 3] = torch.sin(new_yaw / 2)
 90 | 
 91 |         # Update root state
 92 |         root_state[:, :3] = pos
 93 |         root_state[:, 3:7] = new_quat
 94 |         root_state[:, 7:10] = torch.stack([
 95 |             linear_x * torch.cos(new_yaw), linear_x * torch.sin(new_yaw),
 96 |             torch.zeros_like(linear_x)
 97 |         ],
 98 |                                           dim=1)
 99 |         root_state[:, 10:13] = torch.stack(
100 |             [torch.zeros_like(angular_z),
101 |              torch.zeros_like(angular_z), angular_z], dim=1)
102 |         self._asset.write_root_state_to_sim(root_state)
103 | 
104 |     def reset(self, env_ids: Sequence[int] | None = None) -> None:
105 |         self._raw_actions[env_ids] = 0.0
106 | 
107 |     def visualize(self, base_action, residual_action):
108 |         self._visualizer.visualize(base_action, residual_action)
109 | 


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/mobility_es/mdp/command/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/mobility_es/mdp/command/commands_cfg.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from omni.isaac.lab.envs.mdp import commands
17 | from omni.isaac.lab.utils import configclass
18 | 
19 | from mobility_es.mdp.command.uniform_collision_free_pose_command import UniformCollisionFreePoseCommand
20 | 
21 | 
22 | @configclass
23 | class UniformCollisionFreePose2dCommandCfg(commands.UniformPose2dCommandCfg):
24 |     '''Config for UniformCollisionFreePose2dCommand'''
25 |     class_type: type = UniformCollisionFreePoseCommand
26 | 
27 |     # Maximum number of trials for the reject sampling.
28 |     max_resample_trial: int = 100
29 | 
30 |     # Minimum distance to asset for command sampling.
31 |     minimum_distance: float = None
32 | 
33 |     # Probability of applying minimum distance sampling.
34 |     minimum_distance_prob: float = None
35 | 


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/mobility_es/mdp/command/uniform_collision_free_pose_command.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: Apache-2.0
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | # http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | from __future__ import annotations
 17 | 
 18 | import random
 19 | 
 20 | import torch
 21 | from typing import TYPE_CHECKING
 22 | from collections.abc import Sequence
 23 | 
 24 | from omni.isaac.lab.envs.mdp import commands
 25 | 
 26 | from mobility_es.wrapper.env_wrapper import RLESEnvWrapper
 27 | 
 28 | if TYPE_CHECKING:
 29 |     from mobility_es.mdp.command.commands_cfg import UniformCollisionFreePose2dCommandCfg
 30 | 
 31 | 
 32 | class UniformCollisionFreePoseCommand(commands.pose_2d_command.UniformPose2dCommand):
 33 |     """Command generator that generates pose commands containing a 3-D position and heading.
 34 | 
 35 |     The command generator samples uniform 2D positions around the environment origin. It sets
 36 |     the height of the position command to the default root height of the robot. The heading
 37 |     command is either set to point towards the target or is sampled uniformly.
 38 |     This can be configured through the :attr:`Pose2dCommandCfg.simple_heading` parameter in
 39 |     the configuration.
 40 | 
 41 |     It will then run collision checker to make sure the sampled pose is not in collision. If it
 42 |     is in collision, the sampled pose will be discarded and a new pose will be sampled until
 43 |     either a collision pose is sampled or reaches maximum number of sampling.
 44 | 
 45 |     Note: the occupancy map based collision checker only works for static environments and
 46 |     doesn't support randomization.
 47 |     """
 48 | 
 49 |     cfg: UniformCollisionFreePose2dCommandCfg
 50 |     """Configuration for the command term."""
 51 | 
 52 |     # pylint: disable=useless-parent-delegation
 53 |     def __init__(self, cfg: UniformCollisionFreePose2dCommandCfg, env: RLESEnvWrapper):
 54 |         """Initialize the command term class.
 55 | 
 56 |         Args:
 57 |             cfg: The configuration parameters for the command term.
 58 |             env: The environment object.
 59 |         """
 60 |         # initialize the base class
 61 |         super().__init__(cfg, env)
 62 | 
 63 |         # Create the occupancy map for collision check.
 64 |         self.collision_checker = env.collision_checker
 65 | 
 66 |     def __str__(self) -> str:
 67 |         msg = "UniformCollisionFreePoseCommand:\n"
 68 |         msg += f"\tCommand dimension: {tuple(self.command.shape[1:])}\n"
 69 |         return msg
 70 | 
 71 |     def _resample_command(self, env_ids: Sequence[int]):
 72 |         resample_env_ids = env_ids
 73 |         num_resample_trials = 0
 74 |         while num_resample_trials < self.cfg.max_resample_trial:
 75 |             # Obtain origins for the robots
 76 |             self.pos_command_w[resample_env_ids] = self.robot.data.root_pos_w[resample_env_ids]
 77 |             # Offset the position command by the current root position
 78 |             r = torch.empty(len(resample_env_ids), device=self.device)
 79 |             # Randomize the position
 80 |             self.pos_command_w[resample_env_ids,
 81 |                                0] += r.uniform_(*self._get_sampling_range(self.cfg.ranges.pos_x))
 82 |             self.pos_command_w[resample_env_ids,
 83 |                                1] += r.uniform_(*self._get_sampling_range(self.cfg.ranges.pos_y))
 84 |             self.pos_command_w[resample_env_ids, 2] = 0.0
 85 |             # Randomize the heading
 86 |             self.heading_command_w[resample_env_ids] = r.uniform_(*self.cfg.ranges.heading)
 87 | 
 88 |             resample_env_ids = self._check_collision(resample_env_ids)
 89 |             num_resample_trials += 1
 90 |             if len(resample_env_ids) < 1:
 91 |                 # Finished resampling for all the envs
 92 |                 break
 93 | 
 94 |         # Print a warning message.
 95 |         if num_resample_trials >= self.cfg.max_resample_trial:
 96 |             print(f"Envs with id: {resample_env_ids} failed to sample collision free"
 97 |                   f"poses within the maxum iteration:  {self.cfg.max_resample_trial}")
 98 | 
 99 |     # Apply minimum distance constraint to the input range, and randomly choose a feasible one.
100 |     def _get_sampling_range(self, input_range: tuple[float, float]) -> tuple[float, float]:
101 |         if not self.cfg.minimum_distance or random.random() > self.cfg.minimum_distance_prob:
102 |             return input_range
103 |         low, high = input_range
104 |         if low > -self.cfg.minimum_distance and high < self.cfg.minimum_distance:
105 |             raise ValueError('Sampling range is too small for minimum distance.')
106 | 
107 |         sampling_ranges = []
108 |         if low < -self.cfg.minimum_distance:
109 |             sampling_ranges.append((low, -self.cfg.minimum_distance))
110 |         if high > self.cfg.minimum_distance:
111 |             sampling_ranges.append((self.cfg.minimum_distance, high))
112 |         return random.choice(sampling_ranges)
113 | 
114 |     # pylint: disable=unused-argument
115 |     def _check_collision(self, env_ids: Sequence[int]):
116 |         """ Checks whether the resampled command is collision free for the environments in env_ids.
117 |         Returns a list of env_ids that are in collision.
118 |         """
119 |         if not self.collision_checker.is_initialized():
120 |             return []
121 | 
122 |         sampled_points_r = self.pos_command_w[env_ids, :2]
123 |         # Get the points relative to the environment origin if the env prime is not global.
124 |         if self._env.scene.cfg.environment.prim_path.startswith('/World/envs/'):
125 |             sampled_points_r = sampled_points_r - self._env.scene.env_origins[env_ids, :2]
126 |         in_collisions = self.collision_checker.is_in_collision(sampled_points_r, distance=0.5)
127 |         return env_ids[torch.where(in_collisions == 1)[0]]
128 | 


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/mobility_es/mdp/curriculum.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from __future__ import annotations
17 | 
18 | from collections.abc import Sequence
19 | 
20 | from mobility_es.wrapper.env_wrapper import RLESEnvWrapper
21 | 
22 | 
23 | # Increase the prob of applying min command distance constraint based the training iteration.
24 | def increase_command_minimum_distance_prob(
25 |         env: RLESEnvWrapper,
26 |         env_ids: Sequence[int],    # pylint: disable=unused-argument
27 |         command_name: str = 'goal_pose',
28 |         minimum_distance_prob_range=(0.1, 1.0),
29 |         total_iterations=1000,
30 |         num_steps_per_iteration=256):
31 |     cur_iteration = env.common_step_counter // num_steps_per_iteration
32 |     start_prob, end_prob = minimum_distance_prob_range
33 |     prob_range = end_prob - start_prob
34 |     minimum_distance_prob = start_prob + prob_range * (cur_iteration / total_iterations)
35 |     env.command_manager.get_term(command_name).cfg.minimum_distance_prob = minimum_distance_prob
36 | 


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/mobility_es/mdp/events.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: Apache-2.0
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | # http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | from __future__ import annotations
 17 | 
 18 | import torch
 19 | 
 20 | import omni.isaac.lab.utils.math as math_utils
 21 | from omni.isaac.lab.managers import SceneEntityCfg
 22 | 
 23 | from mobility_es.wrapper.env_wrapper import RLESEnvWrapper
 24 | 
 25 | 
 26 | def sample_root_state_uniform(env: RLESEnvWrapper,
 27 |                               env_ids: torch.Tensor,
 28 |                               pose_range: dict[str, tuple[float, float]],
 29 |                               velocity_range: dict[str, tuple[float, float]],
 30 |                               asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")):
 31 |     # extract the used quantities (to enable type-hinting)
 32 |     asset = env.scene[asset_cfg.name]
 33 |     # get default root state
 34 |     root_states = asset.data.default_root_state[env_ids].clone()
 35 | 
 36 |     # Sample poses
 37 |     pose_range_list = [
 38 |         pose_range.get(key, (0.0, 0.0)) for key in ["x", "y", "z", "roll", "pitch", "yaw"]
 39 |     ]
 40 |     pose_ranges = torch.tensor(pose_range_list, device=asset.device)
 41 | 
 42 |     # poses
 43 |     rand_samples = math_utils.sample_uniform(pose_ranges[:, 0],
 44 |                                              pose_ranges[:, 1], (len(env_ids), 6),
 45 |                                              device=asset.device)
 46 |     positions = root_states[:, 0:3] + env.scene.env_origins[env_ids] + rand_samples[:, 0:3]
 47 |     orientations_delta = math_utils.quat_from_euler_xyz(rand_samples[:, 3], rand_samples[:, 4],
 48 |                                                         rand_samples[:, 5])
 49 |     orientations = math_utils.quat_mul(root_states[:, 3:7], orientations_delta)
 50 | 
 51 |     # Sample velocities
 52 |     velocity_range_list = [
 53 |         velocity_range.get(key, (0.0, 0.0)) for key in ["x", "y", "z", "roll", "pitch", "yaw"]
 54 |     ]
 55 |     velocity_ranges = torch.tensor(velocity_range_list, device=asset.device)
 56 |     rand_samples = math_utils.sample_uniform(velocity_ranges[:, 0],
 57 |                                              velocity_ranges[:, 1], (len(env_ids), 6),
 58 |                                              device=asset.device)
 59 |     velocities = root_states[:, 7:13] + rand_samples
 60 | 
 61 |     return positions, orientations, velocities
 62 | 
 63 | 
 64 | def reset_root_state_uniform_collision_free(env: RLESEnvWrapper,
 65 |                                             env_ids: torch.Tensor,
 66 |                                             pose_range: dict[str, tuple[float, float]],
 67 |                                             velocity_range: dict[str, tuple[float, float]],
 68 |                                             asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
 69 |                                             max_resample_trial=10):
 70 |     # extract the used quantities (to enable type-hinting)
 71 |     asset = env.scene[asset_cfg.name]
 72 | 
 73 |     # Sample states
 74 |     positions, orientations, velocities = sample_root_state_uniform(env, env_ids, pose_range,
 75 |                                                                     velocity_range, asset_cfg)
 76 |     # Check collisions if collision checker is initialized.
 77 |     if env.collision_checker.is_initialized():
 78 |         # Index map with key as env_id and value as the index.
 79 |         env_ids_index_map = {int(val.item()): idx for idx, val in enumerate(env_ids)}
 80 |         resample_env_ids = env_ids
 81 |         resample_indices = torch.tensor(
 82 |             [env_ids_index_map[int(env_id.item())] for env_id in resample_env_ids])
 83 |         num_resample_trials = 0
 84 |         while num_resample_trials < max_resample_trial:
 85 |             positions_2d = positions[resample_indices, :2]
 86 |             # Get the points relative to the environment origin if the env prime is not global.
 87 |             if env.scene.cfg.environment.prim_path.startswith('/World/envs/'):
 88 |                 positions_2d = positions_2d - env.scene.env_origins[resample_env_ids, :2]
 89 |             in_collisions = env.collision_checker.is_in_collision(positions_2d)
 90 |             resample_env_ids = resample_env_ids[torch.where(in_collisions == 1)[0]]
 91 |             if len(resample_env_ids) < 1:
 92 |                 break
 93 |             # Resample new states for the collided root.
 94 |             resample_indices = torch.tensor(
 95 |                 [env_ids_index_map[int(env_id.item())] for env_id in resample_env_ids])
 96 |             positions[resample_indices], orientations[resample_indices], velocities[
 97 |                 resample_indices] = sample_root_state_uniform(env, resample_env_ids, pose_range,
 98 |                                                               velocity_range, asset_cfg)
 99 |             num_resample_trials += 1
100 | 
101 |     # set into the physics simulation
102 |     asset.write_root_pose_to_sim(torch.cat([positions, orientations], dim=-1), env_ids=env_ids)
103 |     asset.write_root_velocity_to_sim(velocities, env_ids=env_ids)
104 | 


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/mobility_es/mdp/observations.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: Apache-2.0
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | # http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | from __future__ import annotations
 17 | 
 18 | import torch
 19 | 
 20 | from omni.isaac.lab.assets import RigidObject
 21 | from omni.isaac.lab.managers import SceneEntityCfg
 22 | 
 23 | from mobility_es.mdp.utils import goal_pose_in_robot_frame
 24 | from mobility_es.wrapper.env_wrapper import RLESEnvWrapper
 25 | 
 26 | 
 27 | def root_speed(
 28 |         env: RLESEnvWrapper,
 29 |         asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
 30 | ) -> torch.Tensor:
 31 |     """The speed of the root, which is the normalization of the linear velocity"""
 32 |     asset: RigidObject = env.scene[asset_cfg.name]
 33 |     return torch.norm(asset.data.root_lin_vel_w, dim=1).unsqueeze(1)
 34 | 
 35 | 
 36 | def root_yaw_rate(
 37 |         env: RLESEnvWrapper,
 38 |         asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
 39 | ) -> torch.Tensor:
 40 |     """The yaw rate of the root"""
 41 |     asset: RigidObject = env.scene[asset_cfg.name]
 42 |     return asset.data.root_ang_vel_w[:, 2].unsqueeze(1)
 43 | 
 44 | 
 45 | def camera_img(env: RLESEnvWrapper, data_type: str = "rgb"):
 46 |     supported_data_types = ["rgb", "depth"]
 47 |     if data_type not in supported_data_types:
 48 |         raise ValueError(f"data type {data_type} is not in the support list {supported_data_types}")
 49 |     camera = env.scene["camera"]
 50 |     return camera.data.output[data_type].reshape((camera.data.output[data_type].shape[0], -1))
 51 | 
 52 | 
 53 | def upsample_segments(start_pose, end_pose, d_max=1.0, n_segments=10):
 54 |     num_env = start_pose.size(0)
 55 |     all_segments = torch.zeros((num_env, n_segments, 4)).to(start_pose.device)
 56 | 
 57 |     for i in range(num_env):
 58 |         s_x, s_y = start_pose[i]
 59 |         e_x, e_y = end_pose[i]
 60 |         distance = torch.sqrt((e_x - s_x)**2 + (e_y - s_y)**2)
 61 |         num_segments = torch.ceil(distance / d_max).int().item()
 62 | 
 63 |         # Generate interpolation steps for segments
 64 |         t = torch.linspace(0, 1, num_segments + 1).unsqueeze(1)
 65 |         points = (1 - t) * torch.tensor([s_x, s_y]) + t * torch.tensor([e_x, e_y])
 66 |         segments = torch.cat([points[:-1], points[1:]], dim=1)
 67 | 
 68 |         if num_segments < n_segments:
 69 |             # Pad with segments where the start and end points are the last point
 70 |             last_point = torch.tensor([e_x, e_y])
 71 |             last_segment = torch.cat([last_point, last_point], dim=0)
 72 |             padding_segments = last_segment.repeat(n_segments - num_segments, 1)
 73 |             segments = torch.cat([segments, padding_segments], dim=0)
 74 |         else:
 75 |             # Truncate to n_segments if necessary
 76 |             segments = segments[:n_segments]
 77 |         all_segments[i] = segments
 78 | 
 79 |     return all_segments
 80 | 
 81 | 
 82 | def routing_to_goal_simplified(
 83 |     env: RLESEnvWrapper,
 84 |     command_name: str = "goal_pose",
 85 |     offset: tuple[float, float] = (-0.0, 0),
 86 |     robot_cfg: SceneEntityCfg = SceneEntityCfg("robot")
 87 | ) -> torch.Tensor:
 88 |     goal_pose = goal_pose_in_robot_frame(env, command_name, offset, robot_cfg)
 89 |     start_pose = goal_pose.new_zeros(goal_pose.shape[0], 2)
 90 |     return upsample_segments(start_pose, goal_pose)
 91 | 
 92 | 
 93 | def foot_print(
 94 |     env: RLESEnvWrapper,
 95 |     asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
 96 |     asset_length: float = 1.0,
 97 |     asset_width: float = 1.0,
 98 | ) -> torch.Tensor:
 99 |     """Represent the foot print of the asset as a rectangle that is offset to the asset root."""
100 |     robot: RigidObject = env.scene[asset_cfg.name]
101 |     root_pos = robot.data.root_pos_w - env.scene.env_origins
102 |     root_pos_2d = root_pos[:, :2]
103 | 
104 |     def create_footprint_tensor(length, width, device):
105 |         tensor = torch.tensor([[length / 2, width / 2], [-length / 2, width / 2],
106 |                                [-length / 2, -width / 2], [length / 2, -width / 2]],
107 |                               device=device)
108 |         return tensor
109 | 
110 |     foot_print_poly = create_footprint_tensor(asset_length, asset_width, root_pos.device)
111 |     foot_print_poly = foot_print_poly.unsqueeze(0).repeat(root_pos.shape[0], 1, 1)
112 | 
113 |     foot_print_poly_offset = root_pos_2d.unsqueeze(1) + foot_print_poly
114 |     return foot_print_poly_offset
115 | 


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/mobility_es/mdp/rewards.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from __future__ import annotations
17 | 
18 | import torch
19 | from typing import Optional, List
20 | 
21 | from omni.isaac.lab.managers import SceneEntityCfg
22 | 
23 | from mobility_es.mdp.utils import goal_pose_in_robot_frame
24 | from mobility_es.wrapper.env_wrapper import RLESEnvWrapper
25 | 
26 | 
27 | def goal_reaching(
28 |     env: RLESEnvWrapper,
29 |     command_name: str = "goal_pose",
30 |     offset: tuple[float, float] = (-0, 0),
31 |     robot_cfg: SceneEntityCfg = SceneEntityCfg("robot")
32 | ) -> torch.Tensor:
33 |     """Reward the agent for moving towards the goal pose. """
34 |     goal_pose = goal_pose_in_robot_frame(env, command_name, offset, robot_cfg)
35 |     goal_pose_abs = -torch.norm(goal_pose, p=2, dim=1).reshape((goal_pose.shape[0], 1))
36 | 
37 |     return torch.flatten(goal_pose_abs)
38 | 
39 | 
40 | def masked_action_rate_l2(env: RLESEnvWrapper,
41 |                           idx_masks: Optional[List[int]] = None) -> torch.Tensor:
42 |     """Penalize the rate of change of the actions using an L2 squared kernel."""
43 | 
44 |     # Extract current and previous actions
45 |     current_action = env.action_manager.action
46 |     prev_action = env.action_manager.prev_action
47 | 
48 |     # Calculate the difference in actions
49 |     if idx_masks is not None:
50 |         current_action = current_action[:, idx_masks]
51 |         prev_action = prev_action[:, idx_masks]
52 | 
53 |     action_rate = current_action - prev_action
54 | 
55 |     # Return the sum of squared differences for the L2 penalty
56 |     return torch.sum(action_rate.pow(2), dim=1)
57 | 
58 | 
59 | def masked_action_l2(env: RLESEnvWrapper, idx_masks: Optional[List[int]] = None) -> torch.Tensor:
60 |     """Penalize the actions using L2 squared kernel."""
61 |     current_action = env.action_manager.action
62 | 
63 |     # Calculate the difference in actions
64 |     if idx_masks is not None:
65 |         current_action = current_action[:, idx_masks]
66 | 
67 |     return torch.sum(current_action.pow(2), dim=1)
68 | 


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/mobility_es/mdp/termination.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from __future__ import annotations
17 | 
18 | import torch
19 | 
20 | from omni.isaac.lab.assets import RigidObject
21 | from omni.isaac.lab.managers import SceneEntityCfg
22 | 
23 | from mobility_es.mdp.utils import goal_pose_in_robot_frame
24 | from mobility_es.wrapper.env_wrapper import RLESEnvWrapper
25 | 
26 | 
27 | def goal_reached(
28 |     env: RLESEnvWrapper,
29 |     dist_threshold: float = 0.5,
30 |     vel_threshold: float = 0.1,
31 |     command_name: str = 'goal_pose',
32 |     offset: tuple[float, float] = (-0.0, 0),
33 |     robot_cfg: SceneEntityCfg = SceneEntityCfg("robot")
34 | ) -> torch.Tensor:
35 |     robot: RigidObject = env.scene[robot_cfg.name]
36 | 
37 |     # Goal pose
38 |     goal_pose = goal_pose_in_robot_frame(env, command_name, offset, robot_cfg)
39 | 
40 |     # Distance to goal
41 |     goal_pose_abs = torch.norm(goal_pose, p=2, dim=1)
42 |     # Robot velocity
43 |     root_vel = torch.norm(robot.data.root_lin_vel_w, dim=1)
44 | 
45 |     return torch.logical_and(goal_pose_abs < dist_threshold, root_vel < vel_threshold)
46 | 
47 | 
48 | def nan_pose(env: RLESEnvWrapper, robot_cfg: SceneEntityCfg = SceneEntityCfg("robot")):
49 |     # Check if the robot pose is nan, which can happen when the robot explodes in simulation.
50 |     nan_pos = torch.isnan(env.scene[robot_cfg.name].data.root_pos_w)
51 |     nan_quat = torch.isnan(env.scene[robot_cfg.name].data.root_quat_w)
52 |     return torch.logical_or(torch.any(nan_pos, dim=1), torch.any(nan_quat, dim=1))
53 | 


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/mobility_es/mdp/utils.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from __future__ import annotations
17 | 
18 | import torch
19 | from typing import TYPE_CHECKING
20 | 
21 | import omni.isaac.lab.utils.math as math_utils
22 | from omni.isaac.lab.managers import SceneEntityCfg
23 | 
24 | if TYPE_CHECKING:
25 |     from omni.isaac.lab.envs import ManagerBasedRLEnv
26 | 
27 | 
28 | def goal_pose_in_robot_frame(
29 |     env: ManagerBasedRLEnv,
30 |     command_name: str = "goal_pose",
31 |     offset: tuple[float, float] = (-0, 0),
32 |     robot_cfg: SceneEntityCfg = SceneEntityCfg("robot")
33 | ) -> torch.Tensor:
34 |     """Get the goal pose in the robot frame with offset. """
35 | 
36 |     # Get the goal pose relative to the root(i.e., robot) frame.
37 |     goal_pose = env.command_manager.get_command(command_name)[:, :2]
38 | 
39 |     # Compute offset.
40 |     robot = env.scene[robot_cfg.name]
41 |     robot_yaw = math_utils.euler_xyz_from_quat(robot.data.root_quat_w)[2].reshape(
42 |         (goal_pose.shape[0], 1))
43 |     offset_vec = torch.cat([
44 |         offset[0] * torch.cos(robot_yaw) - offset[1] * torch.sin(robot_yaw),
45 |         offset[0] * torch.sin(robot_yaw) + offset[1] * torch.cos(robot_yaw)
46 |     ],
47 |                            dim=1)
48 |     # Goal pose in robot frame with offset.
49 |     goal_pose_r = goal_pose - offset_vec
50 |     return goal_pose_r
51 | 


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/mobility_es/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/mobility_es/utils/occupancy_map.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: Apache-2.0
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | # http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | import os
 17 | 
 18 | import math
 19 | import numpy as np
 20 | import yaml
 21 | 
 22 | import torch
 23 | from PIL import Image
 24 | 
 25 | from mobility_es.config.environments import OMAP_PATHS
 26 | 
 27 | 
 28 | class OccupancyMapCollisionChecker:
 29 |     """Convert a occupancy map to grid map for collision checking.
 30 |         Note: This collision checker only works when the following conditions are met:
 31 |             1. The USD world frame's origin has x facing right, y facing up.
 32 |             2. The occupancy map must be generated in Isaac Sim with origin as (0,0,0),
 33 |                so the occupancy map's origin is measured in the USD's world frame.
 34 |             3. The occupancy map's origin is placed at the top-left of the PNC file with
 35 |                x facing right, and y facing down.
 36 |     """
 37 | 
 38 |     def __init__(self, cfg):
 39 |         self.__grid_map = None
 40 | 
 41 |         # Initialize the grid map using the occupancy map yaml file path.
 42 |         env_prim_path = cfg.environment.prim_path
 43 |         yaml_file_path = None
 44 |         for key, val in OMAP_PATHS.items():
 45 |             if env_prim_path.endswith(key):
 46 |                 yaml_file_path = val
 47 |                 break
 48 | 
 49 |         if yaml_file_path and os.path.exists(yaml_file_path):
 50 |             print(f'Loaded omap for env {env_prim_path}')
 51 |             self._load_grid_map(yaml_file_path)
 52 | 
 53 |     def _load_grid_map(self, yaml_file_path):
 54 |         """
 55 |         Reads the map image and generates the grid map.\n
 56 |         Grid map is a 2D boolean matrix where True=>occupied space & False=>Free space.
 57 |         """
 58 |         with open(yaml_file_path, encoding="utf-8") as f:
 59 |             file_content = f.read()
 60 |         self.__map_meta = yaml.safe_load(file_content)
 61 |         img_file_path = os.path.join(os.path.dirname(yaml_file_path), self.__map_meta["image"])
 62 | 
 63 |         img = Image.open(img_file_path)
 64 |         img = np.array(img)
 65 | 
 66 |         # Anything greater than free_thresh is considered as occupied
 67 |         if self.__map_meta["negate"]:
 68 |             res = np.where((img / 255)[:, :, 0] > self.__map_meta["free_thresh"])
 69 |         else:
 70 |             res = np.where(((255 - img) / 255)[:, :, 0] > self.__map_meta["free_thresh"])
 71 | 
 72 |         self.__grid_map = np.zeros(shape=(img.shape[:2]), dtype=bool)
 73 |         for i in range(res[0].shape[0]):
 74 |             self.__grid_map[res[0][i], res[1][i]] = True
 75 | 
 76 |     def __transform_to_image_coordinates(self, point):
 77 |         p_x = point[:, 0]
 78 |         p_y = point[:, 1]
 79 |         # NOTE: this assumes the occupancy map's origin with x facing right, and y facing down, and
 80 |         # the world frame's origin with x facing right, y facing up.
 81 |         i_x = torch.floor(
 82 |             (p_x - self.__map_meta["origin"][0]) / self.__map_meta["resolution"]).reshape(
 83 |                 (point.shape[0], 1))
 84 |         i_y = torch.floor(
 85 |             (self.__map_meta["origin"][1] - p_y) / self.__map_meta["resolution"]).reshape(
 86 |                 (point.shape[0], 1))
 87 |         return torch.cat((i_x, i_y), dim=1).int()
 88 | 
 89 |     def __is_obstacle_in_distance(self, img_point, distance_in_pixel):
 90 |         row_start_idx = img_point[:, 1] - distance_in_pixel
 91 |         col_start_idx = img_point[:, 0] - distance_in_pixel
 92 |         row_end_idx = img_point[:, 1] + distance_in_pixel
 93 |         col_end_idx = img_point[:, 0] + distance_in_pixel
 94 | 
 95 |         # image point acts as the center of the square, where each side of square is of size
 96 |         # 2xdistance
 97 |         in_collision = torch.zeros_like(row_start_idx).int()
 98 |         for i in range(row_end_idx.shape[0]):
 99 |             # Consider out of bounds points as in collision.
100 |             if row_start_idx[i] < 0 or col_start_idx[i] < 0 or row_end_idx[
101 |                     i] >= self.__grid_map.shape[0] or col_end_idx[i] >= self.__grid_map.shape[1]:
102 |                 in_collision[i] = 1
103 |                 continue
104 |             patch = self.__grid_map[row_start_idx[i]:row_end_idx[i],
105 |                                     col_start_idx[i]:col_end_idx[i]]
106 |             if np.any(patch):
107 |                 in_collision[i] = 1
108 | 
109 |         return in_collision
110 | 
111 |     def is_in_collision(self, points, distance=0.75):
112 |         assert points.shape[1] == 2
113 |         img_points = self.__transform_to_image_coordinates(points)
114 |         distance_in_pixel = math.ceil(distance / self.__map_meta["resolution"])
115 |         in_collision = self.__is_obstacle_in_distance(img_points, distance_in_pixel)
116 |         return in_collision
117 | 
118 |     def is_initialized(self):
119 |         return self.__grid_map is not None
120 | 


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/mobility_es/wrapper/env_wrapper.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from __future__ import annotations
17 | 
18 | from omni.isaac.lab.envs import ManagerBasedRLEnv, ManagerBasedRLEnvCfg
19 | 
20 | from mobility_es.utils.occupancy_map import OccupancyMapCollisionChecker
21 | 
22 | 
23 | class RLESEnvWrapper(ManagerBasedRLEnv):
24 |     """Env wrapper of ManagerBasedRLEnv for RL embodiment specialist.
25 |     """
26 | 
27 |     def __init__(self, cfg: ManagerBasedRLEnvCfg, render_mode=None):
28 |         """
29 |         Initializes the RLESEnvWrapper.
30 | 
31 |         Args:
32 |             cfg (ManagerBasedRLEnvCfg): The configuration object for the environment.
33 |             render_mode (str, optional): The render mode for the environment. Defaults to None.
34 | 
35 |         Returns:
36 |             None
37 |         """
38 |         # Initialize collision checker before ManagerBasedRLEnv to avoid loading error.
39 |         self._collision_checker = OccupancyMapCollisionChecker(cfg.scene)
40 |         super().__init__(cfg, render_mode)
41 | 
42 |     @property
43 |     def collision_checker(self):
44 |         return self._collision_checker
45 | 


--------------------------------------------------------------------------------
/compass/rl_env/exts/mobility_es/setup.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Installation script for the 'mobility_es' python package."""
17 | 
18 | import os
19 | import toml
20 | 
21 | from setuptools import setup
22 | 
23 | # Obtain the extension data from the extension.toml file
24 | EXTENSION_PATH = os.path.dirname(os.path.realpath(__file__))
25 | # Read the extension.toml file
26 | EXTENSION_TOML_DATA = toml.load(os.path.join(EXTENSION_PATH, "config", "extension.toml"))
27 | 
28 | # Minimum dependencies required prior to installation
29 | INSTALL_REQUIRES = [
30 |     # NOTE: Add dependencies
31 |     "psutil",
32 | ]
33 | 
34 | # Installation operation
35 | setup(
36 |     name="mobility_es",
37 |     packages=["mobility_es"],
38 |     author=EXTENSION_TOML_DATA["package"]["author"],
39 |     maintainer=EXTENSION_TOML_DATA["package"]["maintainer"],
40 |     url=EXTENSION_TOML_DATA["package"]["repository"],
41 |     version=EXTENSION_TOML_DATA["package"]["version"],
42 |     description=EXTENSION_TOML_DATA["package"]["description"],
43 |     keywords=EXTENSION_TOML_DATA["package"]["keywords"],
44 |     install_requires=INSTALL_REQUIRES,
45 |     license="MIT",
46 |     include_package_data=True,
47 |     python_requires=">=3.10",
48 |     classifiers=[
49 |         "Natural Language :: English",
50 |         "Programming Language :: Python :: 3.10",
51 |         "Isaac Sim :: 2023.1.1",
52 |         "Isaac Sim :: 4.0.0",
53 |     ],
54 |     zip_safe=False,
55 | )
56 | 


--------------------------------------------------------------------------------
/compass/rl_env/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools", "toml"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [tool.isort]
 6 | 
 7 | atomic = true
 8 | profile = "black"
 9 | line_length = 120
10 | py_version = 310
11 | skip_glob = ["docs/*", "logs/*"]
12 | group_by_package = true
13 | 
14 | sections = [
15 |     "FUTURE",
16 |     "STDLIB",
17 |     "THIRDPARTY",
18 |     "ISAACLABPARTY",
19 |     "FIRSTPARTY",
20 |     "LOCALFOLDER",
21 | ]
22 | extra_standard_library = [
23 |     "numpy",
24 |     "h5py",
25 |     "open3d",
26 |     "torch",
27 |     "tensordict",
28 |     "bpy",
29 |     "matplotlib",
30 |     "gymnasium",
31 |     "gym",
32 |     "scipy",
33 |     "hid",
34 |     "yaml",
35 |     "prettytable",
36 |     "toml",
37 |     "trimesh",
38 |     "tqdm",
39 |     "psutil",
40 | ]
41 | known_thirdparty = [
42 |     "omni.isaac.core",
43 |     "omni.replicator.isaac",
44 |     "omni.replicator.core",
45 |     "pxr",
46 |     "omni.kit.*",
47 |     "warp",
48 |     "carb",
49 |     "Semantics",
50 | ]
51 | known_isaaclabparty = [
52 |     "omni.isaac.lab",
53 |     "omni.isaac.lab_tasks",
54 |     "omni.isaac.lab_assets"
55 | ]
56 | 
57 | # Modify the following to include the package names of your first-party code
58 | known_firstparty = "mobility_es"
59 | known_local_folder = "config"
60 | 
61 | [tool.pyright]
62 | 
63 | exclude = [
64 |     "**/__pycache__",
65 |     "**/docs",
66 |     "**/logs",
67 |     ".git",
68 |     ".vscode",
69 | ]
70 | 
71 | typeCheckingMode = "basic"
72 | pythonVersion = "3.10"
73 | pythonPlatform = "Linux"
74 | enableTypeIgnoreComments = true
75 | 
76 | # This is required as the CI pre-commit does not download the module (i.e. numpy, torch, prettytable)
77 | # Therefore, we have to ignore missing imports
78 | reportMissingImports = "none"
79 | # This is required to ignore for type checks of modules with stubs missing.
80 | reportMissingModuleSource = "none" # -> most common: prettytable in mdp managers
81 | 
82 | reportGeneralTypeIssues = "none"       # -> raises 218 errors (usage of literal MISSING in dataclasses)
83 | reportOptionalMemberAccess = "warning" # -> raises 8 errors
84 | reportPrivateUsage = "warning"
85 | 


--------------------------------------------------------------------------------
/compass/rl_env/scripts/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/compass/rl_env/scripts/play.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | # pylint: skip-file
17 | 
18 | import argparse
19 | 
20 | from omni.isaac.lab.app import AppLauncher
21 | 
22 | # add argparse arguments
23 | parser = argparse.ArgumentParser(description="Embodiment specialist to enhance navigation policy.")
24 | parser.add_argument("--num_envs", type=int, default=1, help="Number of environments to spawn.")
25 | 
26 | # append AppLauncher cli args
27 | AppLauncher.add_app_launcher_args(parser)
28 | # parse the arguments
29 | args_cli = parser.parse_args()
30 | 
31 | # launch omniverse app
32 | app_launcher = AppLauncher(args_cli)
33 | simulation_app = app_launcher.app
34 | 
35 | import torch
36 | 
37 | from mobility_es.config.carter_env_cfg import CarterGoalReachingEnvCfg
38 | from mobility_es.config.h1_env_cfg import H1GoalReachingEnvCfg
39 | from mobility_es.wrapper.env_wrapper import RLESEnvWrapper
40 | 
41 | 
42 | def main():
43 |     """Main function."""
44 |     # Parse the arguments
45 |     env_cfg = H1GoalReachingEnvCfg()
46 |     env_cfg.scene.num_envs = args_cli.num_envs
47 |     # Setup base environment
48 |     env = RLESEnvWrapper(cfg=env_cfg)
49 | 
50 |     count = 0
51 |     while simulation_app.is_running():
52 |         with torch.inference_mode():
53 |             # Reset
54 |             if count % 300 == 0:
55 |                 count = 0
56 |                 env.reset()
57 |                 print("-" * 80)
58 |                 print("[INFO]: Resetting environment...")
59 |             # Sample random actions
60 |             action_commands = torch.randn_like(env.action_manager.action)
61 |             # Step the environment
62 |             obs_dict, rewards, dones, truncateds, infos = env.step(action_commands)
63 |             print(obs_dict)
64 |             # Update counter
65 |             count += 1
66 | 
67 |     # close the environment
68 |     env.close()
69 | 
70 | 
71 | if __name__ == "__main__":
72 |     # Run the main function
73 |     main()
74 |     # Close sim app
75 |     simulation_app.close()
76 | 


--------------------------------------------------------------------------------
/compass/utils/logger.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: Apache-2.0
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | # http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | """
 17 | Flexible logging system that supports both Weights & Biases and TensorBoard.
 18 | """
 19 | 
 20 | import os
 21 | from typing import Dict, Any, Optional
 22 | 
 23 | import wandb
 24 | from tensorboardX import SummaryWriter
 25 | 
 26 | 
 27 | class Logger:
 28 |     """
 29 |     A unified logger that can use either Weights & Biases or TensorBoard.
 30 |     """
 31 | 
 32 |     def __init__(self,
 33 |                  log_dir: str,
 34 |                  experiment_name: str,
 35 |                  backend: str = "tensorboard",
 36 |                  project_name: str = "compass",
 37 |                  entity: Optional[str] = None):
 38 |         """
 39 |         Initialize the logger.
 40 | 
 41 |         Args:
 42 |             log_dir: Directory to save logs
 43 |             experiment_name: Name of the experiment
 44 |             backend: Either "wandb" or "tensorboard"
 45 |             project_name: Project name (for wandb)
 46 |             entity: Team/entity name (for wandb)
 47 |             config: Configuration dictionary to log
 48 |         """
 49 |         self.backend = backend.lower()
 50 |         self.experiment_name = experiment_name
 51 |         self.log_dir = log_dir
 52 | 
 53 |         # Create log directory if it doesn't exist
 54 |         os.makedirs(self.log_dir, exist_ok=True)
 55 | 
 56 |         # Initialize the backend.
 57 |         if self.backend == "wandb":
 58 |             # Initialize wandb
 59 |             self.writer = wandb.init(
 60 |                 project=project_name,
 61 |                 entity=entity,
 62 |                 name=experiment_name,
 63 |                 dir=log_dir,
 64 |             )
 65 |         elif self.backend == "tensorboard":
 66 |             # Initialize TensorBoard writer
 67 |             self.writer = SummaryWriter(os.path.join(self.log_dir, "tensorboard"))
 68 |         else:
 69 |             raise ValueError(f"Unsupported backend: {backend}. "
 70 |                              "Choose either 'wandb' or 'tensorboard'.")
 71 | 
 72 |         print(f"Logger initialized with backend: {self.backend}")
 73 | 
 74 |     def log_video(self, name: str, video_path: str, fps: int = 4, step: Optional[int] = None):
 75 |         """
 76 |         Log a video.
 77 | 
 78 |         Args:
 79 |             name: Name of the video
 80 |             video_path: Path to the video file
 81 |             fps: Frames per second
 82 |             step: Step number (if None, uses 0)
 83 |         """
 84 |         if step is None:
 85 |             step = 0
 86 | 
 87 |         if self.backend == "wandb":
 88 |             wandb.log(
 89 |                 {name: wandb.Video(video_path, fps=fps, format="mp4", caption=f"Iteration {step}")})
 90 |         else:
 91 |             print(f"Video upload via path not supported in TensorBoard. Video '{name}' not logged.")
 92 | 
 93 |     def log_dict(self, dict: Dict[str, Any], step: Optional[int] = None):
 94 |         """
 95 |         Log multiple metrics at once.
 96 | 
 97 |         Args:
 98 |             dict: Dictionary of scalars to log
 99 |             step: Step number (if None, uses 0)
100 |         """
101 |         if self.backend == "wandb":
102 |             wandb.log(dict, step=step)
103 |         else:
104 |             for name, value in dict.items():
105 |                 self.writer.add_scalar(name, value, global_step=step)
106 | 
107 |     def log_artifact(self,
108 |                      artifact_path: str,
109 |                      name: str,
110 |                      type: str,
111 |                      description: Optional[str] = None):
112 |         """
113 |         Log an artifact (file or directory).
114 | 
115 |         Args:
116 |             artifact_path: Path to the artifact
117 |             name: Name of the artifact
118 |             type: Type of the artifact
119 |             description: Description of the artifact
120 |         """
121 |         if self.backend == "wandb":
122 |             artifact = wandb.Artifact(name=name, type=type, description=description)
123 |             artifact.add_file(artifact_path)
124 |             wandb.log_artifact(artifact)
125 |         else:
126 |             # TensorBoard doesn't support artifacts directly
127 |             print(f"Artifacts not supported in TensorBoard. Artifact '{name}' not logged.")
128 | 
129 |     def log_config(self, config: Dict[str, Any]):
130 |         """
131 |         Update the run configuration.
132 | 
133 |         Args:
134 |             config: Configuration dictionary
135 |         """
136 |         if self.backend == "wandb":
137 |             self.writer.config.update(config)
138 |         else:
139 |             # TensorBoard doesn't support updating config
140 |             print("Updating config not supported in TensorBoard.")
141 | 
142 |     def close(self):
143 |         """Close the logger."""
144 |         if self.backend == "wandb":
145 |             wandb.finish()
146 |         elif self.backend == "tensorboard":
147 |             self.writer.close()
148 | 


--------------------------------------------------------------------------------
/configs/distillation_config.gin:
--------------------------------------------------------------------------------
 1 | # Train
 2 | train.epochs=100
 3 | train.precision="16-mixed"
 4 | train.data_module=@RLSpecialistDataModule
 5 | train.model_trainer=@ESDistillationTrainer
 6 | 
 7 | #Trainer
 8 | ESDistillationTrainer.lr=1e-3
 9 | ESDistillationTrainer.model=@ESDistillationPolicy
10 | ESDistillationTrainer.loss=@ESDistillationKLLoss
11 | 
12 | # Dataloader
13 | RLSpecialistDataModule.batch_size=16
14 | RLSpecialistDataModule.sequence_length=5
15 | RLSpecialistDataModule.num_workers=2
16 | 
17 | # Embodiment specialist distillation policy
18 | ESDistillationPolicy.policy_model=@MLPActionPolicyDistribution
19 | ESDistillationPolicy.embodiment_encoder=@EmbodimentOneHotEncoder
20 | ESDistillationPolicy.policy_state_dim=2048
21 | ESDistillationPolicy.command_n_channels=6
22 | 


--------------------------------------------------------------------------------
/configs/distillation_dataset_config_template.yaml:
--------------------------------------------------------------------------------
 1 | policy_ckpt_type: wandb_artifact # [wandb_artifact, local]
 2 | 
 3 | embodiment_policies:
 4 |   carter: "path_to_carter_ckpt"
 5 |   h1: "path_to_h1_ckpt"
 6 |   spot: "path_to_spot_ckpt"
 7 |   g1: "path_to_g1_ckpt"
 8 | 
 9 | data_split_ratios:
10 |   train: 0.8
11 |   val: 0.1
12 |   test: 0.1
13 | 


--------------------------------------------------------------------------------
/configs/eval_config.gin:
--------------------------------------------------------------------------------
 1 | include 'configs/shared.gin'
 2 | include 'configs/distillation_config.gin'
 3 | 
 4 | run.num_iterations=10
 5 | run.num_envs=64
 6 | run.run_mode='eval'
 7 | run.seed = 10
 8 | 
 9 | # Residual PPO Trainer
10 | ResidualPPOTrainer.debug_viz=True
11 | 


--------------------------------------------------------------------------------
/configs/record_config.gin:
--------------------------------------------------------------------------------
 1 | include 'configs/shared.gin'
 2 | 
 3 | run.num_iterations=20
 4 | run.num_envs=16
 5 | run.run_mode='record'
 6 | run.seed = 30
 7 | run.num_steps_per_iteration=128
 8 | 
 9 | ResidualPPOTrainer.num_steps_per_env=128
10 | 


--------------------------------------------------------------------------------
/configs/shared.gin:
--------------------------------------------------------------------------------
 1 | include 'configs/x_mobility_config.gin'
 2 | 
 3 | NUM_STEPS_PER_ITER = 256
 4 | 
 5 | # RL environment setup
 6 | run.embodiment='h1'
 7 | run.environment='combined_single_rack'
 8 | run.num_steps_per_iteration=%NUM_STEPS_PER_ITER
 9 | 
10 | # Residual PPO Trainer
11 | ResidualPPOTrainer.num_steps_per_env=%NUM_STEPS_PER_ITER
12 | # Options include [CriticStateDepthEncoderAssembler, CriticStateSymmetricAssembler]
13 | ResidualPPOTrainer.critic_state_assembler=@CriticStateSymmetricAssembler
14 | 
15 | # CriticObservationEncoder
16 | CriticObservationEncoder.depth_encoder=@DepthImageFeatureExtractor
17 | 
18 | # Actor critic
19 | ActorCriticXMobility.critic_hidden_dims = 128
20 | ActorCriticXMobility.init_noise_std = 0.1
21 | 


--------------------------------------------------------------------------------
/configs/train_config.gin:
--------------------------------------------------------------------------------
1 | include 'configs/shared.gin'
2 | 
3 | run.num_iterations=1000
4 | run.num_envs=64
5 | run.run_mode='train'
6 | run.seed = 20
7 | 


--------------------------------------------------------------------------------
/configs/x_mobility_config.gin:
--------------------------------------------------------------------------------
  1 | # Shared macros
  2 | NUM_SEMANTIC_CLASSES=7
  3 | ACTION_SIZE=6
  4 | # Path dim is 2 and path length is 5.
  5 | FLAT_PATH_SIZE=10
  6 | NUM_EPOCHS=100
  7 | RECEPTIVE_FIELD=1
  8 | SEQUENCE_LENGTH=4
  9 | ENABLE_SEMANTIC=True
 10 | ENABLE_RGB_STYLEGAN=False
 11 | ENABLE_RGB_DIFFUSION=False
 12 | ENABLE_POLICY_DIFFUSION=False
 13 | IS_GWM_PRETRAIN=False
 14 | 
 15 | # Train
 16 | train.epochs=%NUM_EPOCHS
 17 | train.precision="16-mixed"
 18 | train.data_module=@XMobilityIsaacSimDataModule
 19 | train.model_trainer=@XMobilityTrainer
 20 | 
 21 | # Evaluate
 22 | evaluate_observation.num_gpus=1
 23 | evaluate_observation.precision="16-mixed"
 24 | evaluate_prediction.max_history_length=5
 25 | evaluate_prediction.max_future_length=[1, 2, 3, 4, 5, 10]
 26 | evaluate_prediction.use_trained_policy=True
 27 | 
 28 | #Trainer
 29 | XMobilityTrainer.weight_decay=0.01
 30 | XMobilityTrainer.lr=1e-5
 31 | XMobilityTrainer.scheduler_pct_start=0.2
 32 | 
 33 | # Dataloader
 34 | XMobilityIsaacSimDataModule.batch_size=4
 35 | XMobilityIsaacSimDataModule.sequence_length=%SEQUENCE_LENGTH
 36 | XMobilityIsaacSimDataModule.num_workers=4
 37 | XMobilityIsaacSimDataModule.is_gwm_pretrain=%IS_GWM_PRETRAIN
 38 | XMobilityIsaacSimDataModule.enable_semantic=%ENABLE_SEMANTIC
 39 | XMobilityIsaacSimDataModule.enable_rgb_stylegan=%ENABLE_RGB_STYLEGAN
 40 | XMobilityIsaacSimDataModule.precomputed_semantic_label=True
 41 | 
 42 | #Encoders
 43 | SpeedEncoder.out_channels=32
 44 | SpeedEncoder.speed_normalisation=1.5
 45 | VectorNetSubGraph.vec_size=4
 46 | VectorNetSubGraph.num_layers=4
 47 | ImageDepthAnythingEncoder.enable_fine_tune=True
 48 | ImageDINOEncoder.enable_fine_tune=True
 49 | ObservationEncoder.image_encoder=@ImageDINOEncoder
 50 | 
 51 | # Decoders
 52 | SegmentationHead.n_classes=%NUM_SEMANTIC_CLASSES
 53 | StyleGanDecoder.constant_size=(5, 8)
 54 | 
 55 | # MLPPolicy
 56 | MLPPolicy.command_n_channels=%ACTION_SIZE
 57 | MLPPolicy.path_n_channels=%FLAT_PATH_SIZE
 58 | 
 59 | # DiffusionPolicy
 60 | DiffusionPolicy.action_n_channels=%ACTION_SIZE
 61 | DiffusionPolicy.path_n_channels=%FLAT_PATH_SIZE
 62 | DiffusionPolicy.default_denoising_steps=50
 63 | 
 64 | # ConditionalUnet1D
 65 | ConditionalUnet1D.diffusion_step_embed_dim = 256
 66 | ConditionalUnet1D.unet_down_dims = [256, 512, 1024]
 67 | ConditionalUnet1D.kernel_size = 3
 68 | 
 69 | # ActionPolicy
 70 | ActionPolicy.policy_state_dim = 2048
 71 | ActionPolicy.route_encoder=@VectorNetSubGraph
 72 | ActionPolicy.policy_state_fusion_mode="self_attn"
 73 | ActionPolicy.enable_policy_diffusion=%ENABLE_POLICY_DIFFUSION
 74 | 
 75 | # Losses
 76 | KLLoss.alpha=0.75
 77 | ActionLoss.norm=1
 78 | SegmentationLoss.use_top_k=False
 79 | SegmentationLoss.top_k_ratio=0.25
 80 | SegmentationLoss.use_poly_one=False
 81 | SegmentationLoss.poly_one_coefficient=0.0
 82 | SegmentationLoss.use_weights=True
 83 | SegmentationLoss.semantic_weights=[1.0, 2.0, 5.0, 5.0, 5.0, 5.0, 5.0]
 84 | XMobilityLoss.action_weight=10.0
 85 | XMobilityLoss.path_weight = 5.0
 86 | XMobilityLoss.kl_weight=0.001
 87 | XMobilityLoss.semantic_weight=2.0
 88 | XMobilityLoss.rgb_weight=5.0
 89 | XMobilityLoss.enable_semantic=%ENABLE_SEMANTIC
 90 | XMobilityLoss.enable_rgb_stylegan=%ENABLE_RGB_STYLEGAN
 91 | XMobilityLoss.enable_rgb_diffusion=%ENABLE_RGB_DIFFUSION
 92 | XMobilityLoss.enable_policy_diffusion=%ENABLE_POLICY_DIFFUSION
 93 | XMobilityLoss.is_gwm_pretrain=%IS_GWM_PRETRAIN
 94 | XMobilityLoss.diffusion_weight=10.0
 95 | XMobilityLoss.depth_weight=10.0
 96 | 
 97 | # RSSM
 98 | RSSM.action_dim=%ACTION_SIZE
 99 | RSSM.hidden_state_dim=1024
100 | RSSM.state_dim=512
101 | RSSM.action_latent_dim=64
102 | RSSM.receptive_field=%RECEPTIVE_FIELD
103 | RSSM.use_dropout=False
104 | RSSM.dropout_probability=0.0
105 | 
106 | # XMobility
107 | XMobility.enable_semantic=%ENABLE_SEMANTIC
108 | XMobility.enable_rgb_stylegan=%ENABLE_RGB_STYLEGAN
109 | XMobility.enable_rgb_diffusion=%ENABLE_RGB_DIFFUSION
110 | XMobility.is_gwm_pretrain=%IS_GWM_PRETRAIN
111 | 


--------------------------------------------------------------------------------
/distillation_train.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: Apache-2.0
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | # http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | import os
 17 | import argparse
 18 | from enum import Enum
 19 | 
 20 | import gin
 21 | import pytorch_lightning as pl
 22 | import wandb
 23 | from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint
 24 | from pytorch_lightning.loggers import WandbLogger, TensorBoardLogger
 25 | 
 26 | from compass.distillation.rl_specialists_dataset import RLSpecialistDataModule    # pylint: disable=unused-import
 27 | from compass.distillation.distillation_trainer import ESDistillationTrainer    # pylint: disable=unused-import
 28 | 
 29 | 
 30 | class TaskMode(Enum):
 31 |     TRAIN = "train"
 32 |     EVAL = "eval"
 33 | 
 34 | 
 35 | def parse_arguments(task_mode):
 36 |     ''' Arguments parser for model training and evaluation.
 37 |     '''
 38 |     description = 'Train ML Nav' if task_mode == TaskMode.TRAIN else 'Eval ML Nav'
 39 |     parser = argparse.ArgumentParser(description=description)
 40 | 
 41 |     parser.add_argument('--config-files',
 42 |                         '-c',
 43 |                         nargs='+',
 44 |                         required=True,
 45 |                         help='The list of the config files.')
 46 |     parser.add_argument('--dataset-path',
 47 |                         '-d',
 48 |                         type=str,
 49 |                         required=True,
 50 |                         help='The path to the dataset.')
 51 |     parser.add_argument('--wandb-project-name',
 52 |                         '-n',
 53 |                         type=str,
 54 |                         default='afm_rl_enhance_distillation',
 55 |                         help='The project name of W&B.')
 56 |     parser.add_argument('--wandb-run-name',
 57 |                         '-r',
 58 |                         type=str,
 59 |                         default='train_run',
 60 |                         help='The run name of W&B.')
 61 |     parser.add_argument('--wandb-entity-name',
 62 |                         '-e',
 63 |                         type=str,
 64 |                         default='nvidia-isaac',
 65 |                         help='The entity name of W&B.')
 66 |     parser.add_argument('--checkpoint-path',
 67 |                         '-p',
 68 |                         type=str,
 69 |                         default=None,
 70 |                         help='The path to the checkpoint.')
 71 |     parser.add_argument('--logger',
 72 |                         type=str,
 73 |                         choices=['wandb', 'tensorboard'],
 74 |                         default='tensorboard',
 75 |                         help='Logger to use: wandb or tensorboard')
 76 |     if task_mode == TaskMode.TRAIN:
 77 |         parser.add_argument('--output-dir',
 78 |                             '-o',
 79 |                             type=str,
 80 |                             required=True,
 81 |                             help='The path to the output dir.')
 82 | 
 83 |     if task_mode == TaskMode.EVAL:
 84 |         parser.add_argument('--eval_target',
 85 |                             type=str,
 86 |                             default='observation',
 87 |                             help='Target to evaluate: [observation, imagination]')
 88 | 
 89 |     args = parser.parse_args()
 90 |     return args
 91 | 
 92 | 
 93 | @gin.configurable
 94 | def train(dataset_path,
 95 |           output_dir,
 96 |           ckpt_path,
 97 |           wandb_project_name,
 98 |           wandb_run_name,
 99 |           wandb_entity_name,
100 |           precision,
101 |           epochs,
102 |           data_module,
103 |           model_trainer,
104 |           logger_type='wandb'):
105 |     # Create a output directory if not exit.
106 |     if not os.path.exists(output_dir):
107 |         os.makedirs(output_dir)
108 | 
109 |     data = data_module(dataset_path=dataset_path)
110 |     if ckpt_path:
111 |         model = model_trainer.load_from_checkpoint(checkpoint_path=ckpt_path, strict=False)
112 |     else:
113 |         model = model_trainer()
114 | 
115 |     # Set up the appropriate logger
116 |     if logger_type == 'wandb':
117 |         logger = WandbLogger(entity=wandb_entity_name,
118 |                              project=wandb_project_name,
119 |                              name=wandb_run_name,
120 |                              save_dir=output_dir,
121 |                              group="DDP",
122 |                              log_model=True)
123 |     else:
124 |         logger = TensorBoardLogger(save_dir=output_dir)
125 | 
126 |     callbacks = [
127 |         pl.callbacks.ModelSummary(-1),
128 |         pl.callbacks.LearningRateMonitor(),
129 |         ModelCheckpoint(dirpath=os.path.join(output_dir, 'checkpoints'),
130 |                         save_top_k=3,
131 |                         monitor='val_loss',
132 |                         mode='min',
133 |                         save_last=True),
134 |     ]
135 |     trainer = pl.Trainer(max_epochs=epochs,
136 |                          precision=precision,
137 |                          sync_batchnorm=True,
138 |                          callbacks=callbacks,
139 |                          strategy='ddp_find_unused_parameters_true',
140 |                          logger=logger)
141 |     trainer.fit(model, datamodule=data)
142 | 
143 |     trainer.test(ckpt_path="last", datamodule=data)
144 | 
145 |     return logger
146 | 
147 | 
148 | def log_gin_config(logger: WandbLogger):
149 |     # This function should be called after all the gin configurable functions.
150 |     # Otherwise, the config string will be empty.
151 |     gin_config_str = gin.operative_config_str()
152 | 
153 |     # Create a temporary file to store the gin config
154 |     with open("/tmp/gin_config.txt", "w", encoding='UTF-8') as f:
155 |         f.write(gin_config_str)
156 | 
157 |     # Log the artifact using the WandbLogger
158 |     artifact = wandb.Artifact("gin_config", type="text")
159 |     artifact.add_file("/tmp/gin_config.txt")
160 |     logger.experiment.log_artifact(artifact)
161 | 
162 | 
163 | def main():
164 |     args = parse_arguments(TaskMode.TRAIN)
165 | 
166 |     for config_file in args.config_files:
167 |         gin.parse_config_file(config_file, skip_unknown=True)
168 | 
169 |     # Run the training loop.
170 |     logger = train(args.dataset_path,
171 |                    args.output_dir,
172 |                    args.checkpoint_path,
173 |                    args.wandb_project_name,
174 |                    args.wandb_run_name,
175 |                    args.wandb_entity_name,
176 |                    logger_type=args.logger)
177 | 
178 |     # Log gin config if using wandb
179 |     if args.logger == 'wandb':
180 |         log_gin_config(logger)
181 |         # Finish wandb
182 |         wandb.finish()
183 | 
184 | 
185 | if __name__ == '__main__':
186 |     main()
187 | 


--------------------------------------------------------------------------------
/docker/Dockerfile.distillation:
--------------------------------------------------------------------------------
1 | FROM nvcr.io/nvidia/pytorch:24.01-py3
2 | 
3 | COPY . /workspace
4 | 
5 | RUN pip install --upgrade pip
6 | 
7 | RUN pip install -r requirements.txt
8 | 


--------------------------------------------------------------------------------
/docker/Dockerfile.rl:
--------------------------------------------------------------------------------
1 | # b120 is a build of IsaacLab v1.3.0.
2 | FROM nvcr.io/nvidian/isaac-lab:IsaacLab-main-b120
3 | 
4 | WORKDIR /workspace
5 | 
6 | COPY . /workspace
7 | 


--------------------------------------------------------------------------------
/images/compass.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:baee5767b189bce11678d4141b20fadd58c23fb0249e3daeaa05f83639091bc7
3 | size 12078387
4 | 


--------------------------------------------------------------------------------
/images/omap_generation.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:03b191320340548fa3d3a632d1a0cf14f297e15d5c3e27ede72fb45ba068ae73
3 | size 1832418
4 | 


--------------------------------------------------------------------------------
/onnx_conversion.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: Apache-2.0
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | # http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | import argparse
 17 | 
 18 | import gin
 19 | import wandb
 20 | import torch
 21 | import torch.nn.functional as F
 22 | from torch import nn
 23 | 
 24 | from model.x_mobility.utils import pack_sequence_dim
 25 | from model.x_mobility.x_mobility import XMobility
 26 | 
 27 | from compass.residual_rl.constants import INPUT_IMAGE_SIZE
 28 | from compass.residual_rl.actor_critic import ActorCriticXMobility
 29 | from compass.distillation.distillation import ESDistillationPolicyWrapper
 30 | 
 31 | 
 32 | class PolicyInference(nn.Module):
 33 |     ''' Wrapper of policy for Onnx conversion.
 34 |     '''
 35 | 
 36 |     def __init__(self, base_checkpoint_path: str):
 37 |         super().__init__()
 38 |         # Load the state dict for X-Mobility.
 39 |         state_dict = torch.load(base_checkpoint_path, weights_only=False)['state_dict']
 40 |         state_dict = {k.removeprefix('model.'): v for k, v in state_dict.items()}
 41 |         self.x_mobility = XMobility()
 42 |         self.x_mobility.load_state_dict(state_dict)
 43 | 
 44 |     def base_policy_forward(self, image, route, speed, action_input, history_input, sample_input):
 45 |         inputs = {}
 46 |         # Resize the input image to desired size.
 47 |         image = image.squeeze(0)
 48 |         image = F.interpolate(image, size=INPUT_IMAGE_SIZE, mode='bilinear', align_corners=False)
 49 |         inputs['image'] = image.unsqueeze(0)
 50 |         inputs['route'] = route
 51 |         inputs['speed'] = speed
 52 |         inputs['action'] = action_input
 53 |         inputs['history'] = history_input
 54 |         inputs['sample'] = sample_input
 55 | 
 56 |         # Run base policy.
 57 |         base_action, _, history, sample, _, _, _ = self.x_mobility.inference(
 58 |             inputs, False, False, False)
 59 |         latent_state = torch.cat([history, sample], dim=-1)
 60 |         route_feat = self.x_mobility.action_policy.route_encoder(pack_sequence_dim(route))
 61 |         policy_state = self.x_mobility.action_policy.policy_state_fusion(latent_state, route_feat)
 62 |         return base_action, policy_state, history, sample
 63 | 
 64 | 
 65 | class SpecialistPolicyInference(PolicyInference):
 66 |     ''' Wrapper of embodiment specialist policy for Onnx conversion.
 67 |     '''
 68 | 
 69 |     def __init__(self, base_checkpoint_path: str, residual_checkpoint_path: str):
 70 |         super().__init__(base_checkpoint_path)
 71 | 
 72 |         self.residual_policy = ActorCriticXMobility(
 73 |             self.x_mobility.action_policy,
 74 |             action_dim=6,
 75 |             critic_state_dim=self.x_mobility.action_policy.poly_state_dim)
 76 |         self.residual_policy.load_state_dict(
 77 |             torch.load(residual_checkpoint_path, weights_only=False)['model_state_dict'])
 78 | 
 79 |     def forward(self, image, route, speed, action_input, history_input, sample_input):
 80 |         # Run base policy.
 81 |         base_action, policy_state, history, sample = self.base_policy_forward(
 82 |             image, route, speed, action_input, history_input, sample_input)
 83 | 
 84 |         # Run residual policy.
 85 |         residual_action = self.residual_policy.act_inference(policy_state)
 86 |         final_action = base_action + residual_action
 87 | 
 88 |         # Outputs: [action_output, history_output, sample_output]
 89 |         return final_action, history, sample
 90 | 
 91 | 
 92 | class GeneralistPolicyInference(PolicyInference):
 93 |     ''' Wrapper of generalist policy for Onnx conversion.
 94 |     '''
 95 | 
 96 |     def __init__(self, base_checkpoint_path: str, generalist_policy_ckpt_path: str,
 97 |                  embodiment_type: str):
 98 |         super().__init__(base_checkpoint_path)
 99 | 
100 |         # Load the state dict for the generalist policy.
101 |         self.generalist_policy = ESDistillationPolicyWrapper(generalist_policy_ckpt_path,
102 |                                                              embodiment_type)
103 | 
104 |     def forward(self, image, route, speed, action_input, history_input, sample_input):
105 |         # Run base policy.
106 |         _, policy_state, history, sample = self.base_policy_forward(image, route, speed,
107 |                                                                     action_input, history_input,
108 |                                                                     sample_input)
109 | 
110 |         # Run generalist policy.
111 |         generalist_action = self.generalist_policy(policy_state)
112 | 
113 |         # Outputs: [action_output, history_output, sample_output]
114 |         return generalist_action, history, sample
115 | 
116 | 
117 | def convert(base_checkpoint_path: str, residual_checkpoint_path: str,
118 |             generalist_checkpoint_path: str, embodiment_type: str, onnx_path: str, jit_path: str):
119 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
120 |     if residual_checkpoint_path:
121 |         model = SpecialistPolicyInference(base_checkpoint_path, residual_checkpoint_path)
122 |     elif generalist_checkpoint_path:
123 |         model = GeneralistPolicyInference(base_checkpoint_path, generalist_checkpoint_path,
124 |                                           embodiment_type)
125 |     else:
126 |         raise ValueError('Either residual or generalist checkpoint path must be provided.')
127 | 
128 |     model.to(device)
129 |     model.eval()
130 | 
131 |     # Input tensors.
132 |     image = torch.randn((1, 1, 3, 640, 960), dtype=torch.float32).to(device)
133 |     speed = torch.randn((1, 1, 1), dtype=torch.float32).to(device)
134 |     action = torch.randn((1, 6), dtype=torch.float32).to(device)
135 |     history = torch.zeros((1, 1024), dtype=torch.float32).to(device)
136 |     sample = torch.zeros((1, 512), dtype=torch.float32).to(device)
137 |     route = torch.randn((1, 1, 10, 4), dtype=torch.float32).to(device)
138 | 
139 |     # Output jit file.
140 |     if jit_path:
141 |         traced_model = torch.jit.trace(model, (image, route, speed, action, history, sample))
142 |         traced_model.save(jit_path)
143 | 
144 |     # Output names.
145 |     output_names = ['action_output', 'history_output', 'sample_output']
146 | 
147 |     # ONNX conversion.
148 |     torch.onnx.export(
149 |         model, (image, route, speed, action, history, sample),
150 |         onnx_path,
151 |         verbose=True,
152 |         input_names=['image', 'route', 'speed', 'action_input', 'history_input', 'sample_input'],
153 |         output_names=output_names)
154 | 
155 | 
156 | def main():
157 |     # Parse the arguments.
158 |     parser = argparse.ArgumentParser(description='Convert the E2E Nav model to onnx.')
159 |     parser.add_argument('--base-ckpt-path',
160 |                         '-b',
161 |                         type=str,
162 |                         required=True,
163 |                         help='The path to the base policy checkpoint.')
164 |     parser.add_argument('--residual-ckpt-path',
165 |                         '-r',
166 |                         type=str,
167 |                         required=False,
168 |                         help='The path to the residual policy checkpoint.')
169 |     parser.add_argument('--generalist-ckpt-path',
170 |                         '-g',
171 |                         type=str,
172 |                         required=False,
173 |                         help='The path to the generalist policy checkpoint.')
174 |     parser.add_argument('--embodiment-type',
175 |                         '-e',
176 |                         type=str,
177 |                         required=False,
178 |                         help='The embodiment type to use for the generalist policy.')
179 |     parser.add_argument('--onnx-file',
180 |                         '-o',
181 |                         type=str,
182 |                         required=True,
183 |                         help='The path to the output onnx file.')
184 |     parser.add_argument('--jit-file',
185 |                         '-j',
186 |                         type=str,
187 |                         required=False,
188 |                         help='The path to the output JIT file.')
189 |     parser.add_argument('--onnx-artifact',
190 |                         '-a',
191 |                         type=str,
192 |                         required=False,
193 |                         help='The wandb onnx artifact to upload.')
194 |     args = parser.parse_args()
195 | 
196 |     # Load hyperparameters.
197 |     gin.parse_config_file('configs/eval_config.gin', skip_unknown=True)
198 | 
199 |     # Run the convert.
200 |     print("Converting ONNX.")
201 |     convert(args.base_ckpt_path, args.residual_ckpt_path, args.generalist_ckpt_path,
202 |             args.embodiment_type, args.onnx_file, args.jit_file)
203 | 
204 |     # Upload onnx to wandb if onnx_artifact is provided.
205 |     if args.onnx_artifact:
206 |         print(f'Uploading to WANDB: {args.onnx_artifact.split("/")}.')
207 |         wandb_project = args.onnx_artifact.split('/')[1]
208 |         wandb_run_id = args.onnx_artifact.split('/')[2].split(':')[0]
209 |         wandb.init(dir='/tmp', project=wandb_project, id=wandb_run_id)
210 |         version = args.onnx_artifact.split('/')[2].split(':')[1]
211 |         onnx_artifact = wandb.Artifact(f'onnx-{wandb_run_id}-{version}', type='onnx')
212 |         onnx_artifact.add_file(args.onnx_file)
213 |         wandb.log_artifact(onnx_artifact)
214 |         wandb.finish()
215 | 
216 | 
217 | if __name__ == '__main__':
218 |     main()
219 | 


--------------------------------------------------------------------------------
/record.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: Apache-2.0
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | # http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | import argparse
 17 | import os
 18 | import subprocess
 19 | import random
 20 | import shutil
 21 | import yaml
 22 | 
 23 | import wandb
 24 | 
 25 | 
 26 | def run(command: str):
 27 |     print(f'Running: {" ".join(command)}')
 28 |     subprocess.run(command, check=True)
 29 | 
 30 | 
 31 | def collect_data(dataset_config: dict, environments: list, base_policy_path: str, output_dir: str,
 32 |                  wandb_project_name: str, wandb_entity_name: str):
 33 |     """
 34 |     Collects data for each environment and embodiment combination.
 35 | 
 36 |     Args:
 37 |         environments (List[str]): A list of environments to collect data for.
 38 |         base_policy_path (str): The path to the base policy.
 39 |         output_dir (str): The directory to store the collected data.
 40 |     """
 41 |     # Initialize wandb if using artifact.
 42 |     if dataset_config['policy_ckpt_type'] == 'wandb_artifact':
 43 |         wandb.init(project=wandb_project_name, entity=wandb_entity_name)
 44 | 
 45 |     # Record data for each embodiment and environment.
 46 |     isaaclab_path = os.getenv("ISAACLAB_PATH")
 47 |     for embodiment, residual_policy in dataset_config['embodiment_policies'].items():
 48 |         if dataset_config['policy_ckpt_type'] == 'wandb_artifact':
 49 |             print(f'Collecting data for {embodiment} with {residual_policy}...')
 50 |             artifact = wandb.use_artifact(residual_policy, type='model')
 51 |             artifact_dir = artifact.download(root=f"/tmp/{embodiment}")
 52 |             residual_policy_path = os.path.join(artifact_dir, os.listdir(artifact_dir)[0])
 53 |             print(f'Downloaded embodiment residual policy: {residual_policy_path}')
 54 |         elif dataset_config['policy_ckpt_type'] == 'local':
 55 |             residual_policy_path = residual_policy
 56 |         else:
 57 |             raise ValueError(
 58 |                 f'Invalid policy checkpoint type: {dataset_config["policy_ckpt_type"]}')
 59 |         for environment in environments:
 60 |             print(f'Collecting environment {environment}')
 61 |             dataset_dir = os.path.join(output_dir, embodiment, environment)
 62 |             if not os.path.exists(dataset_dir):
 63 |                 os.makedirs(dataset_dir)
 64 |             command = [
 65 |                 f"{isaaclab_path}/isaaclab.sh", "-p", 'run.py', '-c', 'configs/record_config.gin',
 66 |                 '--enable_cameras', '-b', base_policy_path, '-p', residual_policy_path, '-o',
 67 |                 dataset_dir, '--embodiment', embodiment, '--environment', environment, '--headless'
 68 |             ]
 69 |             run(command)
 70 | 
 71 |     # Finish wandb if using artifact.
 72 |     if dataset_config['policy_ckpt_type'] == 'wandb_artifact':
 73 |         wandb.finish()
 74 | 
 75 | 
 76 | def compose_dataset(dataset_config: dict, output_dir: str, dataset_name: str):
 77 |     """
 78 |     Compose a dataset by splitting and organizing files in the output directory.
 79 | 
 80 |     Args:
 81 |         output_dir (str): The directory containing the files to be split.
 82 |         dataset_name (str): The name of the dataset.
 83 |     """
 84 |     # Process each embodiment separately
 85 |     for embodiment in os.listdir(output_dir):
 86 |         embodiment_path = os.path.join(output_dir, embodiment)
 87 |         if not os.path.isdir(embodiment_path):
 88 |             continue
 89 |         embodiment_split_files = {"train": [], "val": [], "test": []}
 90 |         # Process each environment
 91 |         for env in os.listdir(embodiment_path):
 92 |             env_path = os.path.join(embodiment_path, env, 'data')
 93 |             if not os.path.isdir(env_path):
 94 |                 continue
 95 |             # Split per environment
 96 |             files = [os.path.join(env_path, f) for f in os.listdir(env_path)]
 97 |             random.shuffle(files)
 98 |             num_files = len(files)
 99 |             train_end = int(num_files * dataset_config['data_split_ratios']['train'])
100 |             val_end = train_end + int(num_files * dataset_config['data_split_ratios']['val'])
101 |             env_splits = {
102 |                 "train": files[:train_end],
103 |                 "val": files[train_end:val_end],
104 |                 "test": files[val_end:]
105 |             }
106 |             # Collect split files per robot (flattening env layer)
107 |             for split, split_files in env_splits.items():
108 |                 embodiment_split_files[split].extend(split_files)
109 | 
110 |         # Move files to corresponding split directories (grouped by robot)
111 |         for split, split_files in embodiment_split_files.items():
112 |             split_dir = os.path.join(output_dir, dataset_name, 'data', split, embodiment)
113 |             os.makedirs(split_dir, exist_ok=True)
114 |             for file_path in split_files:
115 |                 target_file_path = os.path.join(split_dir, file_path.replace("/", "_"))
116 |                 shutil.move(file_path, target_file_path)
117 | 
118 | 
119 | def main():
120 |     # Add argparse arguments
121 |     parser = argparse.ArgumentParser(description="Script to record datasets for RL distillation.")
122 |     parser.add_argument('--dataset-config-file',
123 |                         '-c',
124 |                         type=str,
125 |                         required=True,
126 |                         help='The path to the dataset config yaml file.')
127 |     parser.add_argument('--base_policy_path',
128 |                         '-b',
129 |                         type=str,
130 |                         required=True,
131 |                         help='The path to the base policy checkpoint artifact.')
132 |     parser.add_argument('--wandb-project-name',
133 |                         '-n',
134 |                         type=str,
135 |                         default='afm_rl_enhance_record',
136 |                         help='The project name of W&B.')
137 |     parser.add_argument('--wandb-entity-name',
138 |                         '-e',
139 |                         type=str,
140 |                         default='nvidia-isaac',
141 |                         help='The entity name of W&B.')
142 |     parser.add_argument('--environments',
143 |                         nargs='+',
144 |                         type=str,
145 |                         default=['combined_single_rack'],
146 |                         help='A list of environments to benchmark.')
147 |     parser.add_argument('--dataset-name', type=str, help='Name of the dataset in OSMO.')
148 |     parser.add_argument('--output-dir', '-o', type=str, help='Output directory for the dataset.')
149 |     args = parser.parse_args()
150 |     with open(args.dataset_config_file, 'r', encoding='utf-8') as f:
151 |         dataset_config = yaml.safe_load(f)
152 | 
153 |     # Record dataset.
154 |     collect_data(dataset_config=dataset_config,
155 |                  environments=args.environments,
156 |                  base_policy_path=args.base_policy_path,
157 |                  output_dir=args.output_dir,
158 |                  wandb_project_name=args.wandb_project_name,
159 |                  wandb_entity_name=args.wandb_entity_name)
160 |     compose_dataset(dataset_config=dataset_config,
161 |                     output_dir=args.output_dir,
162 |                     dataset_name=args.dataset_name)
163 | 
164 | 
165 | if __name__ == '__main__':
166 |     # Run the main function.
167 |     main()
168 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | einops
 2 | gin-config
 3 | h5py
 4 | matplotlib
 5 | moviepy
 6 | numpy
 7 | onnx
 8 | pandas
 9 | pytorch-lightning
10 | timm
11 | torcheval
12 | transformers
13 | wandb
14 | wheel
15 | 


--------------------------------------------------------------------------------
/x_mobility/x_mobility-0.1.0-py3-none-any.whl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/COMPASS/d9a1a2b4b530f848488de528eac6a17ef0d778d1/x_mobility/x_mobility-0.1.0-py3-none-any.whl


--------------------------------------------------------------------------------