├── .gitignore ├── LICENSE ├── README.md ├── isaaclab_exts └── omni.isaac.leggedloco │ ├── config │ └── extension.toml │ ├── omni │ └── isaac │ │ └── leggedloco │ │ ├── config │ │ ├── __init__.py │ │ ├── g1 │ │ │ ├── __init__.py │ │ │ ├── g1_low_base_cfg.py │ │ │ └── g1_low_vision_cfg.py │ │ ├── go1 │ │ │ ├── __init__.py │ │ │ ├── go1_low_base_cfg.py │ │ │ └── go1_low_vision_cfg.py │ │ ├── go2 │ │ │ ├── __init__.py │ │ │ ├── go2_low_base_cfg.py │ │ │ └── go2_low_vision_cfg.py │ │ └── h1 │ │ │ ├── __init__.py │ │ │ ├── h1_low_base_cfg.py │ │ │ └── h1_low_vision_cfg.py │ │ ├── leggedloco │ │ ├── __init__.py │ │ └── mdp │ │ │ ├── __init__.py │ │ │ ├── actions │ │ │ ├── __init__.py │ │ │ ├── navigation_actions.py │ │ │ ├── vlm_navigation_actions.py │ │ │ └── vlm_navigation_actions_gpt.py │ │ │ ├── commands │ │ │ ├── __init__.py │ │ │ ├── goal_command_generator.py │ │ │ ├── goal_command_generator_cfg.py │ │ │ ├── lowlevel_command_generator.py │ │ │ ├── lowlevel_command_generator_cfg.py │ │ │ ├── midlevel_command_generator.py │ │ │ ├── midlevel_command_generator_cfg.py │ │ │ ├── path_follower_command_generator.py │ │ │ ├── path_follower_command_generator_cfg.py │ │ │ ├── path_follower_command_generator_gpt.py │ │ │ ├── path_follower_command_generator_gpt_cfg.py │ │ │ ├── rl_command_generator.py │ │ │ ├── rl_command_generator_cfg.py │ │ │ ├── robot_vel_command_generator.py │ │ │ └── robot_vel_command_generator_cfg.py │ │ │ ├── curriculums.py │ │ │ ├── events.py │ │ │ ├── observations.py │ │ │ └── rewards │ │ │ ├── __init__.py │ │ │ └── objnav_rewards.py │ │ └── utils │ │ ├── __init__.py │ │ └── wrappers.py │ └── setup.py ├── rsl_rl ├── .flake8 ├── .gitignore ├── .pre-commit-config.yaml ├── CONTRIBUTORS.md ├── LICENSE ├── README.md ├── config │ └── dummy_config.yaml ├── licenses │ └── dependencies │ │ ├── black-license.txt │ │ ├── codespell-license.txt │ │ ├── flake8-license.txt │ │ ├── isort-license.txt │ │ ├── numpy_license.txt │ │ ├── onnx-license.txt │ │ ├── pre-commit-hooks-license.txt │ │ ├── pre-commit-license.txt │ │ ├── pyright-license.txt │ │ ├── pyupgrade-license.txt │ │ └── torch_license.txt ├── pyproject.toml ├── rsl_rl │ ├── __init__.py │ ├── algorithms │ │ ├── __init__.py │ │ └── ppo.py │ ├── env │ │ ├── __init__.py │ │ └── vec_env.py │ ├── modules │ │ ├── __init__.py │ │ ├── actor_critic.py │ │ ├── actor_critic_depth_cnn.py │ │ ├── actor_critic_history.py │ │ ├── actor_critic_recurrent.py │ │ ├── depth_backbone.py │ │ └── normalizer.py │ ├── runners │ │ ├── __init__.py │ │ ├── on_policy_runner.py │ │ └── on_policy_runner_history.py │ ├── storage │ │ ├── __init__.py │ │ └── rollout_storage.py │ └── utils │ │ ├── __init__.py │ │ ├── neptune_utils.py │ │ ├── utils.py │ │ └── wandb_utils.py └── setup.py ├── scripts ├── cli_args.py ├── demo_matterport.py ├── play.py ├── play_low_matterport_keyboard.py ├── run_data_collection.py ├── train.py └── utils.py └── src ├── go2_teaser.gif └── h1_teaser.gif /.gitignore: -------------------------------------------------------------------------------- 1 | assets/ 2 | logs/ 3 | *.pyc 4 | *.egg-info 5 | .vscode/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 An-Chieh Cheng, Yandong Ji, Zhaojing Yang, Xueyan Zou, Jan Kautz, Erdem Bıyık, Hongxu Yin, Sifei Liu, Xiaolong Wang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Legged Loco 2 | This repo is used to train low-level locomotion policy of Unitree Go2 and H1 in Isaac Lab. 3 | 4 |

5 | First Demo 6 |   7 | Second Demo 8 |

9 | 10 | 11 | ## Installation 12 | 1. Create a new conda environment with python 3.10. 13 | ```shell 14 | conda create -n isaaclab python=3.10 15 | conda activate isaaclab 16 | ``` 17 | 18 | 2. Make sure that Isaac Sim is installed on your machine. Otherwise follow [this guideline](https://docs.omniverse.nvidia.com/isaacsim/latest/installation/install_workstation.html) to install it. If installing via the Omniverse Launcher, please ensure that Isaac Sim 4.1.0 is selected and installed. On Ubuntu 22.04 or higher, you could install it via pip: 19 | ```shell 20 | pip install isaacsim-rl==4.1.0 isaacsim-replicator==4.1.0 isaacsim-extscache-physics==4.1.0 isaacsim-extscache-kit-sdk==4.1.0 isaacsim-extscache-kit==4.1.0 isaacsim-app==4.1.0 --extra-index-url https://pypi.nvidia.com 21 | ``` 22 | 23 | 3. Install PyTorch. 24 | ```shell 25 | pip install torch==2.2.2 --index-url https://download.pytorch.org/whl/cu121 26 | ``` 27 | 28 | 4. Clone the Isaac Lab repository, and link extensions. 29 | 30 | **Note**: This codebase was tested with Isaac Lab 1.1.0 and may not be compatible with newer versions. Please make sure to use the modified version of Isaac Lab provided below, which includes important bug fixes and updates. As Isaac Lab is under active development, we will consider supporting newer versions in the future. 31 | ```shell 32 | git clone git@github.com:yang-zj1026/IsaacLab.git 33 | cd IsaacLab 34 | cd source/extensions 35 | ln -s {THIS_REPO_DIR}/isaaclab_exts/omni.isaac.leggedloco . 36 | cd ../.. 37 | ``` 38 | 39 | 5. Run the Isaac Lab installer script and additionally install rsl rl in this repo. 40 | ```shell 41 | ./isaaclab.sh -i none 42 | ./isaaclab.sh -p -m pip install -e {THIS_REPO_DIR}/rsl_rl 43 | cd .. 44 | ``` 45 | 46 | 47 | ## Usage 48 | * train 49 | 50 | ```shell 51 | python scripts/train.py --task=go2_base --history_len=9 --run_name=XXX --max_iterations=2000 --save_interval=200 --headless 52 | 53 | python scripts/train.py --task=h1_base --run_name=XXX --max_iterations=2000 --save_interval=200 --headless 54 | ``` 55 | 56 | * test 57 | 58 | ```shell 59 | python scripts/play.py --task=go2_base_play --history_len=9 --load_run=RUN_NAME --num_envs=10 60 | python scripts/play.py --task=h1_base_play --load_run=RUN_NAME --num_envs=10 61 | ``` 62 | 63 | Use `--headless` to enable headless mode. Add `--enable_cameras --video` for headless rendering and video saving. 64 | 65 | ## Add New Environments 66 | You can add additional environments by placing them under `isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/config`. -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/config/extension.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | version = "0.0.1" 3 | title = "Legged Robot extension" 4 | description="Isaac Lab Extension for Legged Robot Locomotion" 5 | authors =["Zhaojing Yang", "Yandong Ji"] 6 | repository = "https://github.com/leggedrobotics/viplanner" 7 | category = "robotics" 8 | keywords = ["kit", "robotics"] 9 | readme = "docs/README.md" 10 | 11 | [dependencies] 12 | "omni.kit.uiapp" = {} 13 | "omni.isaac.ui" = {} 14 | "omni.isaac.core" = {} 15 | "omni.isaac.lab" = {} 16 | 17 | # Main python module this extension provides. 18 | [[python.module]] 19 | name = "omni.isaac.leggedloco" 20 | -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/config/__init__.py: -------------------------------------------------------------------------------- 1 | from .h1 import * 2 | from .g1 import * 3 | from .go1 import * 4 | from .go2 import * 5 | -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/config/g1/__init__.py: -------------------------------------------------------------------------------- 1 | import gymnasium as gym 2 | 3 | from .g1_low_base_cfg import G1BaseRoughEnvCfg, G1BaseRoughEnvCfg_PLAY, G1RoughPPORunnerCfg 4 | from .g1_low_vision_cfg import G1VisionRoughEnvCfg, G1VisionRoughEnvCfg_PLAY, G1VisionRoughPPORunnerCfg 5 | 6 | ## 7 | # Register Gym environments. 8 | ## 9 | 10 | 11 | gym.register( 12 | id="g1_base", 13 | entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv", 14 | disable_env_checker=True, 15 | kwargs={ 16 | "env_cfg_entry_point": G1BaseRoughEnvCfg, 17 | "rsl_rl_cfg_entry_point": G1RoughPPORunnerCfg, 18 | }, 19 | ) 20 | 21 | 22 | gym.register( 23 | id="g1_base_play", 24 | entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv", 25 | disable_env_checker=True, 26 | kwargs={ 27 | "env_cfg_entry_point": G1BaseRoughEnvCfg, 28 | "rsl_rl_cfg_entry_point": G1RoughPPORunnerCfg, 29 | }, 30 | ) 31 | 32 | 33 | gym.register( 34 | id="g1_vision", 35 | entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv", 36 | disable_env_checker=True, 37 | kwargs={ 38 | "env_cfg_entry_point": G1VisionRoughEnvCfg, 39 | "rsl_rl_cfg_entry_point": G1VisionRoughPPORunnerCfg, 40 | }, 41 | ) 42 | 43 | 44 | gym.register( 45 | id="g1_vision_play", 46 | entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv", 47 | disable_env_checker=True, 48 | kwargs={ 49 | "env_cfg_entry_point": G1VisionRoughEnvCfg_PLAY, 50 | "rsl_rl_cfg_entry_point": G1VisionRoughPPORunnerCfg, 51 | }, 52 | ) 53 | -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/config/go1/__init__.py: -------------------------------------------------------------------------------- 1 | import gymnasium as gym 2 | 3 | from .go1_low_base_cfg import Go1BaseRoughEnvCfg, Go1BaseRoughEnvCfg_PLAY, Go1RoughPPORunnerCfg 4 | from .go1_low_vision_cfg import Go1VisionRoughEnvCfg, Go1VisionRoughEnvCfg_PLAY, Go1VisionRoughPPORunnerCfg 5 | 6 | ## 7 | # Register Gym environments. 8 | ## 9 | 10 | 11 | gym.register( 12 | id="go1_base", 13 | entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv", 14 | disable_env_checker=True, 15 | kwargs={ 16 | "env_cfg_entry_point": Go1BaseRoughEnvCfg, 17 | "rsl_rl_cfg_entry_point": Go1RoughPPORunnerCfg, 18 | }, 19 | ) 20 | 21 | 22 | gym.register( 23 | id="go1_base_play", 24 | entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv", 25 | disable_env_checker=True, 26 | kwargs={ 27 | "env_cfg_entry_point": Go1BaseRoughEnvCfg_PLAY, 28 | "rsl_rl_cfg_entry_point": Go1RoughPPORunnerCfg, 29 | }, 30 | ) 31 | 32 | 33 | gym.register( 34 | id="go1_vision", 35 | entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv", 36 | disable_env_checker=True, 37 | kwargs={ 38 | "env_cfg_entry_point": Go1VisionRoughEnvCfg, 39 | "rsl_rl_cfg_entry_point": Go1VisionRoughPPORunnerCfg, 40 | }, 41 | ) 42 | 43 | 44 | gym.register( 45 | id="go1_vision_play", 46 | entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv", 47 | disable_env_checker=True, 48 | kwargs={ 49 | "env_cfg_entry_point": Go1VisionRoughEnvCfg_PLAY, 50 | "rsl_rl_cfg_entry_point": Go1VisionRoughPPORunnerCfg, 51 | }, 52 | ) 53 | -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/config/go2/__init__.py: -------------------------------------------------------------------------------- 1 | import gymnasium as gym 2 | 3 | from .go2_low_base_cfg import Go2BaseRoughEnvCfg, Go2BaseRoughEnvCfg_PLAY, Go2RoughPPORunnerCfg 4 | from .go2_low_vision_cfg import Go2VisionRoughEnvCfg, Go2VisionRoughEnvCfg_PLAY, Go2VisionRoughPPORunnerCfg 5 | 6 | ## 7 | # Register Gym environments. 8 | ## 9 | 10 | gym.register( 11 | id="go2_base", 12 | entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv", 13 | disable_env_checker=True, 14 | kwargs={ 15 | "env_cfg_entry_point": Go2BaseRoughEnvCfg, 16 | "rsl_rl_cfg_entry_point": Go2RoughPPORunnerCfg, 17 | }, 18 | ) 19 | 20 | 21 | gym.register( 22 | id="go2_base_play", 23 | entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv", 24 | disable_env_checker=True, 25 | kwargs={ 26 | "env_cfg_entry_point": Go2BaseRoughEnvCfg_PLAY, 27 | "rsl_rl_cfg_entry_point": Go2RoughPPORunnerCfg, 28 | }, 29 | ) 30 | 31 | gym.register( 32 | id="go2_vision", 33 | entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv", 34 | disable_env_checker=True, 35 | kwargs={ 36 | "env_cfg_entry_point": Go2VisionRoughEnvCfg, 37 | "rsl_rl_cfg_entry_point": Go2VisionRoughPPORunnerCfg, 38 | }, 39 | ) 40 | 41 | gym.register( 42 | id="go2_vision_play", 43 | entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv", 44 | disable_env_checker=True, 45 | kwargs={ 46 | "env_cfg_entry_point": Go2VisionRoughEnvCfg_PLAY, 47 | "rsl_rl_cfg_entry_point": Go2VisionRoughPPORunnerCfg, 48 | }, 49 | ) -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/config/h1/__init__.py: -------------------------------------------------------------------------------- 1 | import gymnasium as gym 2 | 3 | from .h1_low_base_cfg import H1BaseRoughEnvCfg, H1BaseRoughEnvCfg_PLAY, H1RoughPPORunnerCfg 4 | from .h1_low_vision_cfg import H1VisionRoughEnvCfg, H1VisionRoughEnvCfg_PLAY, H1VisionRoughPPORunnerCfg 5 | 6 | ## 7 | # Register Gym environments. 8 | ## 9 | 10 | 11 | gym.register( 12 | id="h1_base", 13 | entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv", 14 | disable_env_checker=True, 15 | kwargs={ 16 | "env_cfg_entry_point": H1BaseRoughEnvCfg, 17 | "rsl_rl_cfg_entry_point": H1RoughPPORunnerCfg, 18 | }, 19 | ) 20 | 21 | 22 | gym.register( 23 | id="h1_base_play", 24 | entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv", 25 | disable_env_checker=True, 26 | kwargs={ 27 | "env_cfg_entry_point": H1BaseRoughEnvCfg_PLAY, 28 | "rsl_rl_cfg_entry_point": H1RoughPPORunnerCfg, 29 | }, 30 | ) 31 | 32 | 33 | gym.register( 34 | id="h1_vision", 35 | entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv", 36 | disable_env_checker=True, 37 | kwargs={ 38 | "env_cfg_entry_point": H1VisionRoughEnvCfg, 39 | "rsl_rl_cfg_entry_point": H1VisionRoughPPORunnerCfg, 40 | }, 41 | ) 42 | 43 | 44 | gym.register( 45 | id="h1_vision_play", 46 | entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv", 47 | disable_env_checker=True, 48 | kwargs={ 49 | "env_cfg_entry_point": H1VisionRoughEnvCfg_PLAY, 50 | "rsl_rl_cfg_entry_point": H1VisionRoughPPORunnerCfg, 51 | }, 52 | ) 53 | -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab) 2 | # Author: Pascal Roth 3 | # All rights reserved. 4 | # 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | 7 | """This sub-module contains the functions that are specific to the viplanner environments.""" 8 | 9 | from omni.isaac.lab.envs.mdp import * # noqa: F401, F403 10 | from omni.isaac.lab_tasks.manager_based.locomotion.velocity.mdp.rewards import * 11 | 12 | from .actions import * # noqa: F401, F403 13 | from .commands import * # noqa: F401, F403 14 | from .observations import * # noqa: F401, F403 15 | from .rewards import * # noqa: F401, F403 16 | from .curriculums import * # noqa: F401, F403 17 | from .events import * # noqa: F401, F403 -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/actions/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab) 2 | # Author: Pascal Roth 3 | # All rights reserved. 4 | # 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | 7 | from .navigation_actions import * # noqa: F401, F403 8 | from .vlm_navigation_actions import * # noqa: F401, F403 9 | from .vlm_navigation_actions_gpt import * # noqa: F401, F403 -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/actions/navigation_actions.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab) 2 | # Author: Pascal Roth 3 | # All rights reserved. 4 | # 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | 7 | from __future__ import annotations 8 | 9 | from dataclasses import MISSING 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torchvision 14 | from omni.isaac.lab.envs import ManagerBasedRLEnv 15 | from omni.isaac.lab.managers.action_manager import ActionTerm, ActionTermCfg 16 | from omni.isaac.lab.utils import configclass 17 | from omni.isaac.lab.utils.assets import check_file_path, read_file 18 | 19 | # class DepthImageProcessor(nn.Module): 20 | # def __init__(self, image_height, image_width, num_output_units): 21 | # super(DepthImageProcessor, self).__init__() 22 | # self.cnn = nn.Sequential( 23 | # nn.Conv2d(1, 32, 3, 1), 24 | # nn.ReLU(), 25 | # nn.Conv2d(32, 64, 3, 1), 26 | # nn.ReLU(), 27 | # nn.MaxPool2d(2), 28 | # ) 29 | # self.fc = nn.Sequential( 30 | # nn.Linear(64 * (image_height//4) * (image_width//4), num_output_units) # Adjust based on your specific task 31 | # ) 32 | 33 | # def forward(self, x): 34 | # x = self.cnn(x) 35 | # x = x.view(x.size(0), -1) 36 | # x = self.fc(x) 37 | # return x 38 | 39 | # -- Navigation Action 40 | class NavigationAction(ActionTerm): 41 | """Actions to navigate a robot by following some path.""" 42 | 43 | cfg: NavigationActionCfg 44 | _env: ManagerBasedRLEnv 45 | 46 | def __init__(self, cfg: NavigationActionCfg, env: ManagerBasedRLEnv): 47 | super().__init__(cfg, env) 48 | # self.depth_cnn = DepthImageProcessor(image_height=self.cfg.image_size[0], 49 | # image_width=self.cfg.image_size[1],num_output_units=32).to(self.device) 50 | # self.resize_transform = torchvision.transforms.Resize((58, 87), 51 | # interpolation=torchvision.transforms.InterpolationMode.BICUBIC) 52 | 53 | self.image_count = 0 54 | # # check if policy file exists 55 | if not check_file_path(self.cfg.low_level_policy_file): 56 | raise FileNotFoundError(f"Policy file '{self.cfg.low_level_policy_file}' does not exist.") 57 | file_bytes = read_file(self.cfg.low_level_policy_file) 58 | 59 | # # # load policies 60 | self.low_level_policy = torch.jit.load(file_bytes, map_location=self.device) 61 | self.low_level_policy = torch.jit.freeze(self.low_level_policy.eval()) 62 | 63 | # prepare joint position actions 64 | self.low_level_action_term: ActionTerm = self.cfg.low_level_action.class_type(cfg.low_level_action, env) 65 | 66 | # prepare buffers 67 | self._action_dim = ( 68 | 3 69 | ) # [vx, vy, omega] --> vx: [-0.5,1.0], vy: [-0.5,0.5], omega: [-1.0,1.0] 70 | self._raw_navigation_velocity_actions = torch.zeros(self.num_envs, self._action_dim, device=self.device) 71 | self._processed_navigation_velocity_actions = torch.zeros( 72 | (self.num_envs, 3), device=self.device 73 | ) 74 | 75 | self._low_level_actions = torch.zeros(self.num_envs, self.low_level_action_term.action_dim, device=self.device) 76 | self._low_level_step_dt = self.cfg.low_level_decimation * self._env.physics_dt 77 | 78 | self._counter = 0 79 | 80 | """ 81 | Properties. 82 | """ 83 | 84 | @property 85 | def action_dim(self) -> int: 86 | return self._action_dim 87 | 88 | @property 89 | def raw_actions(self) -> torch.Tensor: 90 | return self._raw_navigation_velocity_actions 91 | 92 | @property 93 | def processed_actions(self) -> torch.Tensor: 94 | return self._processed_navigation_velocity_actions 95 | 96 | @property 97 | def low_level_actions(self) -> torch.Tensor: 98 | return self._low_level_actions 99 | 100 | """ 101 | Operations. 102 | """ 103 | 104 | def process_actions(self, actions): 105 | """Process high-level navigation actions. This function is called with a frequency of 10Hz""" 106 | 107 | # Store low level navigation actions 108 | self._raw_navigation_velocity_actions[:] = actions 109 | # reshape into 3D path 110 | self._processed_navigation_velocity_actions[:] = actions.clone().view(self.num_envs, 3) 111 | 112 | def apply_actions(self): 113 | """Apply low-level actions for the simulator to the physics engine. This functions is called with the 114 | simulation frequency of 200Hz. Since low-level locomotion runs at 50Hz, we need to decimate the actions.""" 115 | if self._counter % self.cfg.low_level_decimation == 0: 116 | self._counter = 0 117 | # # -- update command 118 | self._env.command_manager.compute(dt=self._low_level_step_dt) 119 | # Get low level actions from low level policy 120 | self._low_level_actions[:] = self.low_level_policy( 121 | self._env.observation_manager.compute_group(group_name="low_level_policy") 122 | ) 123 | # Process low level actions 124 | self.low_level_action_term.process_actions(self._low_level_actions) 125 | 126 | # Apply low level actions 127 | self.low_level_action_term.apply_actions() 128 | self._counter += 1 129 | 130 | 131 | @configclass 132 | class NavigationActionCfg(ActionTermCfg): 133 | class_type: type[ActionTerm] = NavigationAction 134 | """ Class of the action term.""" 135 | low_level_decimation: int = 4 136 | """Decimation factor for the low level action term.""" 137 | low_level_action: ActionTermCfg = MISSING 138 | """Configuration of the low level action term.""" 139 | low_level_policy_file: str = MISSING 140 | """Path to the low level policy file.""" 141 | path_length: int = 51 142 | """Length of the path to be followed.""" 143 | # low_level_agent_cfg: dict = {} 144 | image_size: tuple = () 145 | -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/actions/vlm_navigation_actions.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab) 2 | # Author: Pascal Roth 3 | # All rights reserved. 4 | # 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | 7 | from __future__ import annotations 8 | 9 | from dataclasses import MISSING 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torchvision 14 | from omni.isaac.lab.envs import ManagerBasedRLEnv 15 | from omni.isaac.lab.managers.action_manager import ActionTerm, ActionTermCfg 16 | from omni.isaac.lab.utils import configclass 17 | from omni.isaac.lab.utils.assets import check_file_path, read_file 18 | 19 | # -- Navigation Action 20 | class VLMActions(ActionTerm): 21 | """Actions to navigate a robot by following some path.""" 22 | 23 | cfg: VLMActionsCfg 24 | _env: ManagerBasedRLEnv 25 | 26 | def __init__(self, cfg: VLMActionsCfg, env: ManagerBasedRLEnv): 27 | super().__init__(cfg, env) 28 | # self.depth_cnn = DepthImageProcessor(image_height=self.cfg.image_size[0], 29 | # image_width=self.cfg.image_size[1],num_output_units=32).to(self.device) 30 | # self.resize_transform = torchvision.transforms.Resize((58, 87), 31 | # interpolation=torchvision.transforms.InterpolationMode.BICUBIC) 32 | 33 | self.image_count = 0 34 | # # check if policy file exists 35 | if not check_file_path(self.cfg.low_level_policy_file): 36 | raise FileNotFoundError(f"Policy file '{self.cfg.low_level_policy_file}' does not exist.") 37 | file_bytes = read_file(self.cfg.low_level_policy_file) 38 | 39 | # # # load policies 40 | self.low_level_policy = torch.jit.load(file_bytes, map_location=self.device) 41 | self.low_level_policy = torch.jit.freeze(self.low_level_policy.eval()) 42 | 43 | # prepare joint position actions 44 | self.low_level_action_term: ActionTerm = self.cfg.low_level_action.class_type(cfg.low_level_action, env) 45 | 46 | # prepare buffers 47 | self._action_dim = ( 48 | 3 49 | ) # [vx, vy, omega] --> vx: [-0.5,1.0], vy: [-0.5,0.5], omega: [-1.0,1.0] 50 | self._raw_navigation_velocity_actions = torch.zeros(self.num_envs, self._action_dim, device=self.device) 51 | self._processed_command_velocity_actions = torch.zeros( 52 | (self.num_envs, 3), device=self.device 53 | ) 54 | 55 | self._low_level_actions = torch.zeros(self.num_envs, self.low_level_action_term.action_dim, device=self.device) 56 | self._low_level_step_dt = self.cfg.low_level_decimation * self._env.physics_dt 57 | 58 | self._counter = 0 59 | 60 | """ 61 | Properties. 62 | """ 63 | 64 | @property 65 | def action_dim(self) -> int: 66 | return self._action_dim 67 | 68 | @property 69 | def raw_actions(self) -> torch.Tensor: 70 | return self._raw_navigation_velocity_actions 71 | 72 | @property 73 | def processed_actions(self) -> torch.Tensor: 74 | return self._processed_command_velocity_actions 75 | 76 | @property 77 | def low_level_actions(self) -> torch.Tensor: 78 | return self._low_level_actions 79 | 80 | """ 81 | Operations. 82 | """ 83 | 84 | def process_actions(self, actions): 85 | """Process high-level navigation actions. This function is called with a frequency of 10Hz""" 86 | 87 | # Store low level navigation actions 88 | self._raw_navigation_velocity_actions[:] = actions 89 | # reshape into 3D path 90 | self._processed_command_velocity_actions[:] = actions.clone().view(self.num_envs, 3) 91 | 92 | def apply_actions(self): 93 | """Apply low-level actions for the simulator to the physics engine. This functions is called with the 94 | simulation frequency of 200Hz. Since low-level locomotion runs at 50Hz, we need to decimate the actions.""" 95 | if self._counter % self.cfg.low_level_decimation == 0: 96 | self._counter = 0 97 | # # -- update command 98 | self._env.command_manager.compute(dt=self._low_level_step_dt) 99 | # Get low level actions from low level policy 100 | self._low_level_actions[:] = self.low_level_policy( 101 | self._env.observation_manager.compute_group(group_name="low_level_policy") 102 | ) 103 | # Process low level actions 104 | self.low_level_action_term.process_actions(self._low_level_actions) 105 | 106 | # Apply low level actions 107 | self.low_level_action_term.apply_actions() 108 | self._counter += 1 109 | 110 | 111 | @configclass 112 | class VLMActionsCfg(ActionTermCfg): 113 | class_type: type[ActionTerm] = VLMActions 114 | """ Class of the action term.""" 115 | low_level_decimation: int = 4 116 | """Decimation factor for the low level action term.""" 117 | low_level_action: ActionTermCfg = MISSING 118 | """Configuration of the low level action term.""" 119 | low_level_policy_file: str = MISSING 120 | """Path to the low level policy file.""" 121 | # path_length: int = 51 122 | # """Length of the path to be followed.""" 123 | # # low_level_agent_cfg: dict = {} 124 | # image_size: tuple = () 125 | -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/actions/vlm_navigation_actions_gpt.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab) 2 | # Author: Pascal Roth 3 | # All rights reserved. 4 | # 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | 7 | from __future__ import annotations 8 | 9 | from dataclasses import MISSING 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torchvision 14 | from omni.isaac.lab.envs import ManagerBasedRLEnv 15 | from omni.isaac.lab.managers.action_manager import ActionTerm, ActionTermCfg 16 | from omni.isaac.lab.utils import configclass 17 | from omni.isaac.lab.utils.assets import check_file_path, read_file 18 | 19 | # -- Navigation Action 20 | class VLMActionsGPT(ActionTerm): 21 | """Actions to navigate a robot by following some path.""" 22 | 23 | cfg: VLMActionsGPTCfg 24 | _env: ManagerBasedRLEnv 25 | 26 | def __init__(self, cfg: VLMActionsGPTCfg, env: ManagerBasedRLEnv): 27 | super().__init__(cfg, env) 28 | # self.depth_cnn = DepthImageProcessor(image_height=self.cfg.image_size[0], 29 | # image_width=self.cfg.image_size[1],num_output_units=32).to(self.device) 30 | # self.resize_transform = torchvision.transforms.Resize((58, 87), 31 | # interpolation=torchvision.transforms.InterpolationMode.BICUBIC) 32 | 33 | self.image_count = 0 34 | # # check if policy file exists 35 | if not check_file_path(self.cfg.low_level_policy_file): 36 | raise FileNotFoundError(f"Policy file '{self.cfg.low_level_policy_file}' does not exist.") 37 | file_bytes = read_file(self.cfg.low_level_policy_file) 38 | 39 | # # # load policies 40 | self.low_level_policy = torch.jit.load(file_bytes, map_location=self.device) 41 | self.low_level_policy = torch.jit.freeze(self.low_level_policy.eval()) 42 | 43 | # prepare joint position actions 44 | self.low_level_action_term: ActionTerm = self.cfg.low_level_action.class_type(cfg.low_level_action, env) 45 | 46 | # prepare buffers 47 | self._action_dim = ( 48 | 3 49 | ) # [vx, vy, omega] --> vx: [-0.5,1.0], vy: [-0.5,0.5], omega: [-1.0,1.0] 50 | self._raw_navigation_velocity_actions = torch.zeros(self.num_envs, self._action_dim, device=self.device) 51 | self._processed_command_velocity_actions = torch.zeros( 52 | (self.num_envs, 3), device=self.device 53 | ) 54 | 55 | self._low_level_actions = torch.zeros(self.num_envs, self.low_level_action_term.action_dim, device=self.device) 56 | self._low_level_step_dt = self.cfg.low_level_decimation * self._env.physics_dt 57 | 58 | self._counter = 0 59 | 60 | """ 61 | Properties. 62 | """ 63 | 64 | @property 65 | def action_dim(self) -> int: 66 | return self._action_dim 67 | 68 | @property 69 | def raw_actions(self) -> torch.Tensor: 70 | return self._raw_navigation_velocity_actions 71 | 72 | @property 73 | def processed_actions(self) -> torch.Tensor: 74 | return self._processed_command_velocity_actions 75 | 76 | @property 77 | def low_level_actions(self) -> torch.Tensor: 78 | return self._low_level_actions 79 | 80 | """ 81 | Operations. 82 | """ 83 | 84 | def process_actions(self, actions): 85 | """Process high-level navigation actions. This function is called with a frequency of 10Hz""" 86 | 87 | # Store low level navigation actions 88 | self._raw_navigation_velocity_actions[:] = actions 89 | # reshape into 3D path 90 | self._processed_command_velocity_actions[:] = actions.clone().view(self.num_envs, 3) 91 | 92 | def apply_actions(self): 93 | """Apply low-level actions for the simulator to the physics engine. This functions is called with the 94 | simulation frequency of 200Hz. Since low-level locomotion runs at 50Hz, we need to decimate the actions.""" 95 | if self._counter % self.cfg.low_level_decimation == 0: 96 | self._counter = 0 97 | # # -- update command 98 | self._env.command_manager.compute(dt=self._low_level_step_dt) 99 | # Get low level actions from low level policy 100 | self._low_level_actions[:] = self.low_level_policy( 101 | self._env.observation_manager.compute_group(group_name="low_level_policy") 102 | ) 103 | # Process low level actions 104 | self.low_level_action_term.process_actions(self._low_level_actions) 105 | 106 | # Apply low level actions 107 | self.low_level_action_term.apply_actions() 108 | self._counter += 1 109 | 110 | 111 | @configclass 112 | class VLMActionsGPTCfg(ActionTermCfg): 113 | class_type: type[ActionTerm] = VLMActionsGPT 114 | """ Class of the action term.""" 115 | low_level_decimation: int = 4 116 | """Decimation factor for the low level action term.""" 117 | low_level_action: ActionTermCfg = MISSING 118 | """Configuration of the low level action term.""" 119 | low_level_policy_file: str = MISSING 120 | """Path to the low level policy file.""" 121 | # path_length: int = 51 122 | # """Length of the path to be followed.""" 123 | # # low_level_agent_cfg: dict = {} 124 | # image_size: tuple = () 125 | -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/commands/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab) 2 | # Author: Pascal Roth 3 | # All rights reserved. 4 | # 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | 7 | from .path_follower_command_generator import PathFollowerCommandGenerator 8 | from .path_follower_command_generator_cfg import PathFollowerCommandGeneratorCfg 9 | 10 | from .path_follower_command_generator_gpt import PathFollowerCommandGeneratorGPT 11 | from .path_follower_command_generator_gpt_cfg import PathFollowerCommandGeneratorGPTCfg 12 | 13 | from .rl_command_generator import RLCommandGenerator 14 | from .rl_command_generator_cfg import RLCommandGeneratorCfg 15 | 16 | from .midlevel_command_generator import MidLevelCommandGenerator 17 | from .midlevel_command_generator_cfg import MidLevelCommandGeneratorCfg 18 | 19 | from .lowlevel_command_generator import LowLevelCommandGenerator 20 | from .lowlevel_command_generator_cfg import LowLevelCommandGeneratorCfg 21 | 22 | from .goal_command_generator import GoalCommandGenerator 23 | from .goal_command_generator_cfg import GoalCommandGeneratorCfg 24 | 25 | from .robot_vel_command_generator import RobotVelCommandGenerator 26 | from .robot_vel_command_generator_cfg import RobotVelCommandGeneratorCfg 27 | 28 | __all__ = ["PathFollowerCommandGeneratorCfg", "PathFollowerCommandGenerator", 29 | "PathFollowerCommandGeneratorGPTCfg", "PathFollowerCommandGeneratorGPT", 30 | "RLCommandGeneratorCfg", "RLCommandGenerator", 31 | "MidLevelCommandGeneratorCfg", "MidLevelCommandGenerator", 32 | "LowLevelCommandGeneratorCfg", "LowLevelCommandGenerator", 33 | "GoalCommandGenerator", "GoalCommandGeneratorCfg", 34 | "RobotVelCommandGenerator", "RobotVelCommandGeneratorCfg"] 35 | -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/commands/goal_command_generator.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import math 4 | from typing import TYPE_CHECKING, Sequence 5 | 6 | import omni.isaac.lab.utils.math as math_utils 7 | from omni.isaac.lab.utils.math import wrap_to_pi, quat_rotate_inverse, yaw_quat 8 | import torch 9 | from omni.isaac.lab.assets.articulation import Articulation 10 | from omni.isaac.lab.envs import ManagerBasedRLEnv 11 | from omni.isaac.lab.managers import CommandTerm 12 | from omni.isaac.lab.markers import VisualizationMarkers 13 | from omni.isaac.lab.markers.config import ( 14 | BLUE_ARROW_X_MARKER_CFG, 15 | GREEN_ARROW_X_MARKER_CFG, 16 | ) 17 | from omni.isaac.lab.sim import SimulationContext 18 | 19 | if TYPE_CHECKING: 20 | from .goal_command_generator_cfg import GoalCommandGeneratorCfg 21 | 22 | 23 | 24 | 25 | 26 | class GoalCommandGenerator(CommandTerm): 27 | """Command generator that generates pose commands containing a 3-D position and heading. 28 | 29 | The command generator samples uniform 2D positions around the environment origin. It sets 30 | the height of the position command to the default root height of the robot. The heading 31 | command is either set to point towards the target or is sampled uniformly. 32 | This can be configured through the :attr:`Pose2dCommandCfg.simple_heading` parameter in 33 | the configuration. 34 | """ 35 | 36 | cfg: GoalCommandGeneratorCfg 37 | """Configuration for the command generator.""" 38 | 39 | def __init__(self, cfg: GoalCommandGeneratorCfg, env: ManagerBasedRLEnv): 40 | """Initialize the command generator class. 41 | 42 | Args: 43 | cfg: The configuration parameters for the command generator. 44 | env: The environment object. 45 | """ 46 | # initialize the base class 47 | super().__init__(cfg, env) 48 | 49 | # obtain the robot and terrain assets 50 | # -- robot 51 | self.robot: Articulation = env.scene[cfg.asset_name] 52 | 53 | # crete buffers to store the command 54 | # -- commands: (x, y, z, heading) 55 | self.pos_command_w = torch.zeros(self.num_envs, 3, device=self.device) 56 | self.heading_command_w = torch.zeros(self.num_envs, device=self.device) 57 | self.pos_command_b = torch.zeros_like(self.pos_command_w) 58 | self.heading_command_b = torch.zeros_like(self.heading_command_w) 59 | # -- metrics 60 | self.metrics["error_pos_2d"] = torch.zeros(self.num_envs, device=self.device) 61 | self.metrics["error_heading"] = torch.zeros(self.num_envs, device=self.device) 62 | 63 | def __str__(self) -> str: 64 | msg = "PositionCommand:\n" 65 | msg += f"\tCommand dimension: {tuple(self.command.shape[1:])}\n" 66 | msg += f"\tResampling time range: {self.cfg.resampling_time_range}" 67 | return msg 68 | 69 | """ 70 | Properties 71 | """ 72 | 73 | @property 74 | def command(self) -> torch.Tensor: 75 | """The desired 2D-pose in base frame. Shape is (num_envs, 4).""" 76 | return torch.cat([self.pos_command_b, self.heading_command_b.unsqueeze(1)], dim=1) 77 | 78 | """ 79 | Implementation specific functions. 80 | """ 81 | 82 | def _update_metrics(self): 83 | # logs data 84 | self.metrics["error_pos_2d"] = torch.norm(self.pos_command_w[:, :2] - self.robot.data.root_pos_w[:, :2], dim=1) 85 | self.metrics["error_heading"] = torch.abs(wrap_to_pi(self.heading_command_w - self.robot.data.heading_w)) 86 | 87 | def _resample_command(self, env_ids: Sequence[int]): 88 | # import ipdb; ipdb.set_trace() 89 | r = torch.empty(len(env_ids), device=self.device) 90 | self.pos_command_b[env_ids, 0] = r.uniform_(*self.cfg.ranges.pos_x)#6.4756#, r.uniform_(*self.cfg.ranges.pos_x) 91 | self.pos_command_b[env_ids, 1] = r.uniform_(*self.cfg.ranges.pos_y)#-0.2762#r.uniform_(*self.cfg.ranges.pos_y) 92 | self.pos_command_b[env_ids, 2] = 0.0 93 | # self.pos_command_b[env_ids, 0] = 6.4756#6.4756#, r.uniform_(*self.cfg.ranges.pos_x) 94 | # self.pos_command_b[env_ids, 1] = -0.2762#-0.2762#r.uniform_(*self.cfg.ranges.pos_y) 95 | # self.pos_command_b[env_ids, 2] = 0.0 96 | # print("resampled command: ", self.pos_command_b[env_ids,:3]) 97 | 98 | self.pos_command_w[env_ids,:3] = math_utils.quat_apply(math_utils.yaw_quat(self.robot.data.root_quat_w[env_ids,:]), self.pos_command_b[env_ids,:3]) 99 | self.pos_command_w[env_ids,:3] += self.robot.data.root_pos_w[env_ids,:3] 100 | self.pos_command_w[env_ids, 2] = self._env.scene.env_origins[env_ids, 2] + self.robot.data.default_root_state[env_ids, 2] 101 | # self.pos_command_w[env_ids, 1] = -100 102 | # self.pos_command_w[env_ids, 0] = torch.clamp(self.pos_command_w[env_ids, 0], torch.min(self._env.scene.env_origins[:, 0]), torch.max(self._env.scene.env_origins[:, 0])) 103 | # self.pos_command_w[env_ids, 1] = torch.clamp(self.pos_command_w[env_ids, 1], torch.min(self._env.scene.env_origins[:, 1]), torch.max(self._env.scene.env_origins[:, 1])) 104 | # print("world command: ", self.pos_command_w[env_ids,:3]) 105 | # import ipdb; ipdb.set_trace() 106 | # # obtain env origins for the environments 107 | # self.pos_command_w[env_ids] = self._env.scene.env_origins[env_ids] 108 | # # offset the position command by the current root position 109 | # r = torch.empty(len(env_ids), device=self.device) 110 | # self.pos_command_w[env_ids, 0] += r.uniform_(*self.cfg.ranges.pos_x) 111 | # self.pos_command_w[env_ids, 1] += r.uniform_(*self.cfg.ranges.pos_y) 112 | # self.pos_command_w[env_ids, 2] += self.robot.data.default_root_state[env_ids, 2] 113 | 114 | if self.cfg.simple_heading: 115 | # set heading command to point towards target 116 | target_vec = self.pos_command_w[env_ids] - self.robot.data.root_pos_w[env_ids] 117 | target_direction = torch.atan2(target_vec[:, 1], target_vec[:, 0]) 118 | flipped_target_direction = wrap_to_pi(target_direction + torch.pi) 119 | 120 | # compute errors to find the closest direction to the current heading 121 | # this is done to avoid the discontinuity at the -pi/pi boundary 122 | curr_to_target = wrap_to_pi(target_direction - self.robot.data.heading_w[env_ids]).abs() 123 | curr_to_flipped_target = wrap_to_pi(flipped_target_direction - self.robot.data.heading_w[env_ids]).abs() 124 | 125 | # set the heading command to the closest direction 126 | self.heading_command_w[env_ids] = torch.where( 127 | curr_to_target < curr_to_flipped_target, 128 | target_direction, 129 | flipped_target_direction, 130 | ) 131 | else: 132 | # random heading command 133 | self.heading_command_w[env_ids] = r.uniform_(*self.cfg.ranges.heading) 134 | 135 | def _update_command(self): 136 | """Re-target the position command to the current root state.""" 137 | target_vec = self.pos_command_w - self.robot.data.root_pos_w[:, :3] 138 | self.pos_command_b[:] = quat_rotate_inverse(yaw_quat(self.robot.data.root_quat_w), target_vec) 139 | self.pos_command_b[:, 2] = 0.0 140 | self.heading_command_b[:] = wrap_to_pi(self.heading_command_w - self.robot.data.heading_w)*0.0 141 | 142 | def _set_debug_vis_impl(self, debug_vis: bool): 143 | # create markers if necessary for the first tome 144 | if debug_vis: 145 | if not hasattr(self, "arrow_goal_visualizer"): 146 | marker_cfg = GREEN_ARROW_X_MARKER_CFG.copy() 147 | marker_cfg.markers["arrow"].scale = (0.2, 0.2, 0.8) 148 | marker_cfg.prim_path = "/Visuals/Command/pose_goal" 149 | self.arrow_goal_visualizer = VisualizationMarkers(marker_cfg) 150 | # set their visibility to true 151 | self.arrow_goal_visualizer.set_visibility(True) 152 | else: 153 | if hasattr(self, "arrow_goal_visualizer"): 154 | self.arrow_goal_visualizer.set_visibility(False) 155 | 156 | def _debug_vis_callback(self, event): 157 | # update the box marker 158 | self.arrow_goal_visualizer.visualize( 159 | translations=self.pos_command_w, 160 | orientations=quat_from_euler_xyz( 161 | torch.zeros_like(self.heading_command_w), 162 | torch.zeros_like(self.heading_command_w), 163 | self.heading_command_w, 164 | ), 165 | ) 166 | 167 | -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/commands/goal_command_generator_cfg.py: -------------------------------------------------------------------------------- 1 | import math 2 | from dataclasses import MISSING 3 | 4 | from omni.isaac.lab.managers import CommandTermCfg 5 | from omni.isaac.lab.utils.configclass import configclass 6 | from typing_extensions import Literal 7 | 8 | from .goal_command_generator import GoalCommandGenerator 9 | 10 | 11 | @configclass 12 | class GoalCommandGeneratorCfg(CommandTermCfg): 13 | """Configuration for the uniform 2D-pose command generator.""" 14 | 15 | class_type: type = GoalCommandGenerator 16 | 17 | asset_name: str = MISSING 18 | """Name of the asset in the environment for which the commands are generated.""" 19 | 20 | simple_heading: bool = MISSING 21 | """Whether to use simple heading or not. 22 | 23 | If True, the heading is in the direction of the target position. 24 | """ 25 | 26 | @configclass 27 | class Ranges: 28 | """Uniform distribution ranges for the position commands.""" 29 | 30 | pos_x: tuple[float, float] = MISSING 31 | """Range for the x position (in m).""" 32 | pos_y: tuple[float, float] = MISSING 33 | """Range for the y position (in m).""" 34 | heading: tuple[float, float] = MISSING 35 | """Heading range for the position commands (in rad). 36 | 37 | Used only if :attr:`simple_heading` is False. 38 | """ 39 | 40 | ranges: Ranges = MISSING 41 | """Distribution ranges for the position commands.""" 42 | -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/commands/lowlevel_command_generator.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab) 2 | # Author: Pascal Roth 3 | # All rights reserved. 4 | # 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | 7 | # Copyright (c) 2022-2024, The lab Project Developers. 8 | # All rights reserved. 9 | # 10 | # SPDX-License-Identifier: BSD-3-Clause 11 | 12 | """Sub-module containing command generators for the velocity-based locomotion task.""" 13 | from __future__ import annotations 14 | 15 | import os 16 | import math 17 | from typing import TYPE_CHECKING, Sequence 18 | 19 | import omni.isaac.lab.utils.math as math_utils 20 | import torch 21 | from omni.isaac.lab.assets.articulation import Articulation 22 | from omni.isaac.lab.envs import ManagerBasedRLEnv 23 | from omni.isaac.lab.managers import CommandTerm 24 | from omni.isaac.lab.markers import VisualizationMarkers 25 | from omni.isaac.lab.markers.config import ( 26 | BLUE_ARROW_X_MARKER_CFG, 27 | GREEN_ARROW_X_MARKER_CFG, 28 | CUBOID_MARKER_CFG, 29 | ) 30 | from omni.isaac.lab.sim import SimulationContext 31 | from omni.isaac.lab.utils.assets import check_file_path, read_file 32 | # from omni.isaac.lab.utils.assets import LOCAL_ISAAC_DIR 33 | 34 | 35 | if TYPE_CHECKING: 36 | from .lowlevel_command_generator_cfg import LowLevelCommandGeneratorCfg 37 | 38 | 39 | class LowLevelCommandGenerator(CommandTerm): 40 | r"""Command generator that generates a velocity command in SE(2) from a path given by a local planner. 41 | 42 | The command comprises of a linear velocity in x and y direction and an angular velocity around 43 | the z-axis. It is given in the robot's base frame. 44 | 45 | The path follower acts as a PD-controller that checks for the last point on the path within a lookahead distance 46 | and uses it to compute the steering angle and the linear velocity. 47 | """ 48 | 49 | cfg: LowLevelCommandGeneratorCfg 50 | """The configuration of the command generator.""" 51 | 52 | def __init__(self, cfg: LowLevelCommandGeneratorCfg, env: ManagerBasedRLEnv): 53 | """Initialize the command generator. 54 | 55 | Args: 56 | cfg (LowLevelCommandGeneratorCfg): The configuration of the command generator. 57 | env (object): The environment. 58 | """ 59 | super().__init__(cfg, env) 60 | # -- robot 61 | self.robot: Articulation = env.scene[cfg.robot_attr] 62 | # -- Simulation Context 63 | self.sim: SimulationContext = SimulationContext.instance() 64 | self.twist: torch.Tensor = torch.zeros((self.num_envs, 3), device=self.device) 65 | # -- debug vis 66 | self._base_vel_goal_markers = None 67 | self._base_vel_markers = None 68 | 69 | # Rotation mark 70 | self.rotation_mark = False 71 | self.initialized = False 72 | self.goal_reached = False 73 | self.identity_quat = torch.tensor([1.0, 0.0, 0.0, 0.0], device=self._env.device).repeat(self._env.num_envs, 1) 74 | 75 | self.objnav_policy_path = os.path.join(LOCAL_ISAAC_DIR, "low_level_policy", "mid_policy_height_scan.jit") 76 | self.objnav_policy_path = os.path.join(LOCAL_ISAAC_DIR, "low_level_policy", "mid_policy_depth.jit") 77 | if not check_file_path(self.objnav_policy_path): 78 | raise FileNotFoundError(f"Policy file '{self.objnav_policy_path}' does not exist.") 79 | file_bytes = read_file(self.objnav_policy_path) 80 | self.mid_level_policy = torch.jit.load(file_bytes, map_location=self.device) 81 | self.mid_level_policy = torch.jit.freeze(self.mid_level_policy.eval()) 82 | 83 | 84 | def __str__(self) -> str: 85 | """Return a string representation of the command generator.""" 86 | msg = "rlCommandGenerator:\n" 87 | msg += f"\tCommand dimension: {tuple(self.command.shape[1:])}\n" 88 | msg += f"\tLookahead distance: {self.cfg.lookAheadDistance}\n" 89 | return msg 90 | 91 | """ 92 | Properties 93 | """ 94 | 95 | @property 96 | def command(self) -> torch.Tensor: 97 | """The desired base velocity command in the base frame. Shape is (num_envs, 3).""" 98 | # print("twist: ", self.twist) 99 | return self.twist 100 | 101 | """ 102 | Operations. 103 | """ 104 | 105 | def reset(self, env_ids: Sequence[int] | None = None) -> dict: 106 | """Reset the command generator. 107 | 108 | This function resets the command generator. It should be called whenever the environment is reset. 109 | 110 | Args: 111 | env_ids (Optional[Sequence[int]], optional): The list of environment IDs to reset. Defaults to None. 112 | """ 113 | if env_ids is None: 114 | env_ids = ... 115 | 116 | self.twist = torch.zeros((self.num_envs, 3), device=self.device) 117 | self.goal_reached = False 118 | 119 | return {} 120 | 121 | def compute(self, dt: float): 122 | """Compute the command. 123 | 124 | Paths as a tensor of shape (num_envs, N, 3) where N is number of poses on the path. The paths 125 | should be given in the base frame of the robot. Num_envs is equal to the number of robots spawned in all 126 | environments. 127 | """ 128 | if not self.rotation_mark and not self.goal_reached and self.initialized: 129 | # get paths 130 | self.twist = self.mid_level_policy(self._env.observation_manager.compute_group(group_name="mid_level_planner")).reshape(self._env.num_envs, 3) 131 | # self.twist = self._env.action_manager._terms['paths']._processed_navigation_velocity_actions.clone() 132 | # import ipdb; ipdb.set_trace() 133 | self.twist[:, 0] = torch.clip(self.twist[:, 0], 0.0, 0.5) 134 | self.twist[:,1] = 0.0 135 | self.twist[:,2] = torch.clip(self.twist[:,2], -math.pi, math.pi) 136 | elif self.goal_reached or (not self.initialized): 137 | # self.twist[:, 0] = torch.clip(self.twist[:, 0], 0.0, 0.5) 138 | # self.twist[:,1] = 0.0 139 | self.twist[:,:2] = torch.zeros((self.num_envs, 2), device=self.device) 140 | else: 141 | # self.twist = torch.zeros((self.num_envs, 3), device=self.device) 142 | self.twist[:,:2] = torch.zeros((self.num_envs, 2), device=self.device) 143 | self.twist[:, 2] += 0.1 144 | print("rotation") 145 | self.goal_reached = self._env.command_manager._terms['midlevel_command'].command[:, :2].norm(dim=1) < 2.5 146 | return self.twist 147 | 148 | """ 149 | Implementation specific functions. 150 | """ 151 | 152 | def _update_command(self): 153 | pass 154 | 155 | def _update_metrics(self): 156 | pass 157 | 158 | def _resample_command(self, env_ids: Sequence[int]): 159 | pass 160 | 161 | def _set_debug_vis_impl(self, debug_vis: bool): 162 | # set visibility of markers 163 | # note: parent only deals with callbacks. not their visibility 164 | if debug_vis: 165 | # create markers if necessary for the first tome 166 | if not hasattr(self, "base_vel_goal_visualizer"): 167 | # -- goal 168 | marker_cfg = GREEN_ARROW_X_MARKER_CFG.copy() 169 | marker_cfg.prim_path = "/Visuals/Command/velocity_goal" 170 | marker_cfg.markers["arrow"].scale = (0.5, 0.5, 0.5) 171 | self.base_vel_goal_visualizer = VisualizationMarkers(marker_cfg) 172 | # -- current 173 | marker_cfg = BLUE_ARROW_X_MARKER_CFG.copy() 174 | marker_cfg.prim_path = "/Visuals/Command/velocity_current" 175 | marker_cfg.markers["arrow"].scale = (0.5, 0.5, 0.5) 176 | self.base_vel_visualizer = VisualizationMarkers(marker_cfg) 177 | # -- goal command 178 | marker_cfg = CUBOID_MARKER_CFG.copy() 179 | marker_cfg.prim_path = "/Visuals/Command/pos_goal_command" 180 | marker_cfg.markers["cuboid"].scale = (0.5, 0.5, 0.5) 181 | # self.base_vel_goal_command_visualizer = VisualizationMarkers(marker_cfg) 182 | # set their visibility to true 183 | self.base_vel_goal_visualizer.set_visibility(True) 184 | self.base_vel_visualizer.set_visibility(True) 185 | # self.base_vel_goal_command_visualizer.set_visibility(True) 186 | else: 187 | if hasattr(self, "base_vel_goal_visualizer"): 188 | self.base_vel_goal_visualizer.set_visibility(False) 189 | self.base_vel_visualizer.set_visibility(False) 190 | # self.base_vel_goal_command_visualizer.set_visibility(False) 191 | 192 | def _debug_vis_callback(self, event): 193 | # get marker location 194 | # -- base state 195 | base_pos_w = self.robot.data.root_pos_w.clone() 196 | base_pos_w[:, 2] += 0.5 197 | base_quat_w = self.robot.data.root_quat_w.clone() 198 | 199 | # -- resolve the scales and quaternions 200 | vel_des_arrow_scale, vel_des_arrow_quat = self._resolve_xy_velocity_to_arrow(self.command[:, :2]) 201 | vel_arrow_scale, vel_arrow_quat = self._resolve_xy_velocity_to_arrow(self.robot.data.root_lin_vel_b[:, :2]) 202 | # display markers 203 | self.base_vel_goal_visualizer.visualize(base_pos_w, vel_des_arrow_quat, vel_des_arrow_scale) 204 | self.base_vel_visualizer.visualize(base_pos_w, vel_arrow_quat, vel_arrow_scale) 205 | 206 | # pos_command_w = self._env.command_manager._terms['midlevel_command'].pos_command_w.clone() 207 | # default_scale = self.base_vel_goal_visualizer.cfg.markers["arrow"].scale 208 | # larger_scale = 2.0*torch.tensor(default_scale, device=self.device).repeat(pos_command_w.shape[0], 1) 209 | # self.base_vel_goal_command_visualizer.visualize(pos_command_w, self.identity_quat,larger_scale) 210 | 211 | """ 212 | Internal helpers. 213 | """ 214 | 215 | def _resolve_xy_velocity_to_arrow(self, xy_velocity: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: 216 | """Converts the XY base velocity command to arrow direction rotation.""" 217 | # obtain default scale of the marker 218 | default_scale = self.base_vel_goal_visualizer.cfg.markers["arrow"].scale 219 | # arrow-scale 220 | arrow_scale = torch.tensor(default_scale, device=self.device).repeat(xy_velocity.shape[0], 1) 221 | arrow_scale[:, 0] *= torch.linalg.norm(xy_velocity, dim=1) 222 | # arrow-direction 223 | heading_angle = torch.atan2(xy_velocity[:, 1], xy_velocity[:, 0]) 224 | zeros = torch.zeros_like(heading_angle) 225 | arrow_quat = math_utils.quat_from_euler_xyz(zeros, zeros, heading_angle) 226 | 227 | return arrow_scale, arrow_quat 228 | -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/commands/lowlevel_command_generator_cfg.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab) 2 | # Author: Pascal Roth 3 | # All rights reserved. 4 | # 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | 7 | # Copyright (c) 2022-2024, The lab Project Developers. 8 | # All rights reserved. 9 | # 10 | # SPDX-License-Identifier: BSD-3-Clause 11 | 12 | """Sub-module containing command generators for the velocity-based locomotion task.""" 13 | 14 | import math 15 | from dataclasses import MISSING 16 | 17 | from omni.isaac.lab.managers import CommandTermCfg 18 | from omni.isaac.lab.utils.configclass import configclass 19 | from typing_extensions import Literal 20 | 21 | from .lowlevel_command_generator import LowLevelCommandGenerator 22 | 23 | 24 | @configclass 25 | class LowLevelCommandGeneratorCfg(CommandTermCfg): 26 | class_type: LowLevelCommandGenerator = LowLevelCommandGenerator 27 | """Name of the command generator class.""" 28 | 29 | robot_attr: str = MISSING 30 | """Name of the robot attribute from the environment.""" 31 | 32 | path_frame: Literal["world", "robot"] = "world" 33 | """Frame in which the path is defined. 34 | - ``world``: the path is defined in the world frame. Also called ``odom``. 35 | - ``robot``: the path is defined in the robot frame. Also called ``base``. 36 | """ 37 | 38 | -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/commands/midlevel_command_generator.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab) 2 | # Author: Pascal Roth 3 | # All rights reserved. 4 | # 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | 7 | # Copyright (c) 2022-2024, The lab Project Developers. 8 | # All rights reserved. 9 | # 10 | # SPDX-License-Identifier: BSD-3-Clause 11 | 12 | """Sub-module containing command generators for the velocity-based locomotion task.""" 13 | from __future__ import annotations 14 | 15 | import math 16 | from typing import TYPE_CHECKING, Sequence 17 | 18 | import omni.isaac.lab.utils.math as math_utils 19 | import torch 20 | from omni.isaac.lab.assets.articulation import Articulation 21 | from omni.isaac.lab.envs import ManagerBasedRLEnv 22 | from omni.isaac.lab.managers import CommandTerm 23 | from omni.isaac.lab.sim import SimulationContext 24 | 25 | if TYPE_CHECKING: 26 | from .midlevel_command_generator_cfg import MidLevelCommandGeneratorCfg 27 | 28 | 29 | class MidLevelCommandGenerator(CommandTerm): 30 | r"""Command generator that generates a velocity command in SE(2) from a path given by a local planner. 31 | 32 | The command comprises of a linear velocity in x and y direction and an angular velocity around 33 | the z-axis. It is given in the robot's base frame. 34 | 35 | The path follower acts as a PD-controller that checks for the last point on the path within a lookahead distance 36 | and uses it to compute the steering angle and the linear velocity. 37 | """ 38 | 39 | cfg: MidLevelCommandGeneratorCfg 40 | """The configuration of the command generator.""" 41 | 42 | def __init__(self, cfg: MidLevelCommandGeneratorCfg, env: ManagerBasedRLEnv): 43 | """Initialize the command generator. 44 | 45 | Args: 46 | cfg (RLCommandGeneratorCfg): The configuration of the command generator. 47 | env (object): The environment. 48 | """ 49 | super().__init__(cfg, env) 50 | 51 | # -- robot 52 | self.robot: Articulation = env.scene[cfg.robot_attr] 53 | # -- Simulation Context 54 | self.sim: SimulationContext = SimulationContext.instance() 55 | self.command_b: torch.Tensor = torch.zeros((self.num_envs, 4), device=self.device) 56 | # -- debug vis 57 | self._base_vel_goal_markers = None 58 | self._base_vel_markers = None 59 | 60 | # Rotation mark 61 | self.rotation_mark = False 62 | self.initialized = False 63 | 64 | def __str__(self) -> str: 65 | """Return a string representation of the command generator.""" 66 | msg = "rlCommandGenerator:\n" 67 | msg += f"\tCommand dimension: {tuple(self.command.shape[1:])}\n" 68 | msg += f"\tLookahead distance: {self.cfg.lookAheadDistance}\n" 69 | return msg 70 | 71 | """ 72 | Properties 73 | """ 74 | 75 | @property 76 | def command(self) -> torch.Tensor: 77 | """The desired base velocity command in the base frame. Shape is (num_envs, 3).""" 78 | # print("twist: ", self.twist) 79 | return self.command_b 80 | 81 | """ 82 | Operations. 83 | """ 84 | 85 | def reset(self, env_ids: Sequence[int] | None = None) -> dict: 86 | """Reset the command generator. 87 | 88 | This function resets the command generator. It should be called whenever the environment is reset. 89 | 90 | Args: 91 | env_ids (Optional[Sequence[int]], optional): The list of environment IDs to reset. Defaults to None. 92 | """ 93 | if env_ids is None: 94 | env_ids = ... 95 | 96 | self.command_b = torch.zeros((self.num_envs, 4), device=self.device) 97 | self.goal_reached = False 98 | 99 | return {} 100 | 101 | def compute(self, dt: float): 102 | """Compute the command. 103 | 104 | Paths as a tensor of shape (num_envs, N, 3) where N is number of poses on the path. The paths 105 | should be given in the base frame of the robot. Num_envs is equal to the number of robots spawned in all 106 | environments. 107 | """ 108 | # get paths 109 | self.command_b[:,:2] = self._env.action_manager._terms['paths']._processed_navigation_velocity_actions.clone()[:,:2] 110 | 111 | return self.command_b 112 | 113 | """ 114 | Implementation specific functions. 115 | """ 116 | 117 | def _update_command(self): 118 | pass 119 | 120 | def _update_metrics(self): 121 | pass 122 | 123 | def _resample_command(self, env_ids: Sequence[int]): 124 | pass 125 | -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/commands/midlevel_command_generator_cfg.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab) 2 | # Author: Pascal Roth 3 | # All rights reserved. 4 | # 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | 7 | # Copyright (c) 2022-2024, The lab Project Developers. 8 | # All rights reserved. 9 | # 10 | # SPDX-License-Identifier: BSD-3-Clause 11 | 12 | """Sub-module containing command generators for the velocity-based locomotion task.""" 13 | 14 | import math 15 | from dataclasses import MISSING 16 | 17 | from omni.isaac.lab.managers import CommandTermCfg 18 | from omni.isaac.lab.utils.configclass import configclass 19 | from typing_extensions import Literal 20 | 21 | from .midlevel_command_generator import MidLevelCommandGenerator 22 | 23 | 24 | @configclass 25 | class MidLevelCommandGeneratorCfg(CommandTermCfg): 26 | class_type: MidLevelCommandGenerator = MidLevelCommandGenerator 27 | """Name of the command generator class.""" 28 | 29 | robot_attr: str = MISSING 30 | """Name of the robot attribute from the environment.""" 31 | 32 | path_frame: Literal["world", "robot"] = "world" 33 | """Frame in which the path is defined. 34 | - ``world``: the path is defined in the world frame. Also called ``odom``. 35 | - ``robot``: the path is defined in the robot frame. Also called ``base``. 36 | """ 37 | 38 | -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/commands/path_follower_command_generator.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab) 2 | # Author: Pascal Roth 3 | # All rights reserved. 4 | # 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | 7 | # Copyright (c) 2022-2024, The lab Project Developers. 8 | # All rights reserved. 9 | # 10 | # SPDX-License-Identifier: BSD-3-Clause 11 | 12 | """Sub-module containing command generators for the velocity-based locomotion task.""" 13 | from __future__ import annotations 14 | 15 | import math 16 | from typing import TYPE_CHECKING, Sequence 17 | 18 | import omni.isaac.lab.utils.math as math_utils 19 | import torch 20 | from omni.isaac.lab.assets.articulation import Articulation 21 | from omni.isaac.lab.envs import ManagerBasedRLEnv 22 | from omni.isaac.lab.managers import CommandTerm 23 | from omni.isaac.lab.markers import VisualizationMarkers 24 | from omni.isaac.lab.markers.config import ( 25 | BLUE_ARROW_X_MARKER_CFG, 26 | GREEN_ARROW_X_MARKER_CFG, 27 | ) 28 | from omni.isaac.lab.sim import SimulationContext 29 | 30 | if TYPE_CHECKING: 31 | from .path_follower_command_generator_cfg import PathFollowerCommandGeneratorCfg 32 | 33 | 34 | class PathFollowerCommandGenerator(CommandTerm): 35 | r"""Command generator that generates a velocity command in SE(2) from a path given by a local planner. 36 | 37 | The command comprises of a linear velocity in x and y direction and an angular velocity around 38 | the z-axis. It is given in the robot's base frame. 39 | 40 | The path follower acts as a PD-controller that checks for the last point on the path within a lookahead distance 41 | and uses it to compute the steering angle and the linear velocity. 42 | """ 43 | 44 | cfg: PathFollowerCommandGeneratorCfg 45 | """The configuration of the command generator.""" 46 | 47 | def __init__(self, cfg: PathFollowerCommandGeneratorCfg, env: ManagerBasedRLEnv): 48 | """Initialize the command generator. 49 | 50 | Args: 51 | cfg (PathFollowerCommandGeneratorCfg): The configuration of the command generator. 52 | env (object): The environment. 53 | """ 54 | super().__init__(cfg, env) 55 | # -- robot 56 | self.robot: Articulation = env.scene[cfg.robot_attr] 57 | # -- Simulation Context 58 | self.sim: SimulationContext = SimulationContext.instance() 59 | # -- buffers 60 | self.vehicleSpeed: torch.Tensor = torch.zeros(self.num_envs, device=self.device) 61 | self.switch_time: torch.Tensor = torch.zeros(self.num_envs, device=self.device) 62 | self.vehicleYawRate: torch.Tensor = torch.zeros(self.num_envs, device=self.device) 63 | self.navigation_forward: torch.Tensor = torch.ones(self.num_envs, device=self.device, dtype=torch.bool) 64 | self.twist: torch.Tensor = torch.zeros((self.num_envs, 3), device=self.device) 65 | # -- debug vis 66 | self._base_vel_goal_markers = None 67 | self._base_vel_markers = None 68 | 69 | # Rotation mark 70 | self.rotation_mark = False 71 | self.initialized = False 72 | self.goal_reached = False 73 | 74 | def __str__(self) -> str: 75 | """Return a string representation of the command generator.""" 76 | msg = "PathFollowerCommandGenerator:\n" 77 | msg += f"\tCommand dimension: {tuple(self.command.shape[1:])}\n" 78 | msg += f"\tLookahead distance: {self.cfg.lookAheadDistance}\n" 79 | return msg 80 | 81 | """ 82 | Properties 83 | """ 84 | 85 | @property 86 | def command(self) -> torch.Tensor: 87 | """The desired base velocity command in the base frame. Shape is (num_envs, 3).""" 88 | return self.twist 89 | 90 | """ 91 | Operations. 92 | """ 93 | 94 | def reset(self, env_ids: Sequence[int] | None = None) -> dict: 95 | """Reset the command generator. 96 | 97 | This function resets the command generator. It should be called whenever the environment is reset. 98 | 99 | Args: 100 | env_ids (Optional[Sequence[int]], optional): The list of environment IDs to reset. Defaults to None. 101 | """ 102 | if env_ids is None: 103 | env_ids = ... 104 | 105 | self.vehicleSpeed = torch.zeros(self.num_envs, device=self.device) 106 | self.switch_time = torch.zeros(self.num_envs, device=self.device) 107 | self.vehicleYawRate = torch.zeros(self.num_envs, device=self.device) 108 | self.navigation_forward = torch.ones(self.num_envs, device=self.device, dtype=torch.bool) 109 | self.twist = torch.zeros((self.num_envs, 3), device=self.device) 110 | self.goal_reached = False 111 | 112 | return {} 113 | 114 | def compute(self, dt: float): 115 | """Compute the command. 116 | 117 | Paths as a tensor of shape (num_envs, N, 3) where N is number of poses on the path. The paths 118 | should be given in the base frame of the robot. Num_envs is equal to the number of robots spawned in all 119 | environments. 120 | """ 121 | # get paths 122 | paths = self._env.action_manager._terms['paths']._processed_navigation_velocity_actions.clone() 123 | # get number of pases of the paths 124 | num_envs, N = paths.shape 125 | assert N > 0, "PathFollowerCommandGenerator: paths must have at least one poses." 126 | # get the current simulation time 127 | curr_time = self.sim.current_time 128 | # define current maxSpeed for the velocities 129 | max_speed = torch.ones(num_envs, device=self.device) * self.cfg.maxSpeed 130 | 131 | # # transform path in base/ robot frame if given in world frame 132 | # if self.cfg.path_frame == "world": 133 | # paths = math_utils.quat_apply( 134 | # math_utils.quat_inv(self.robot.data.root_quat_w[:, None, :].repeat(1, N, 1)), 135 | # paths - self.robot.data.root_pos_w[:, None, :], 136 | # ) 137 | self.paths_diff_global = paths[:,:2] - self.robot.data.root_pos_w[:, :2] 138 | 139 | if self.initialized and not self.goal_reached: 140 | if not self.rotation_mark: 141 | # if abs(self.paths_diff_global[0,0,0]) < 0.5: 142 | # self.twist[:, 0] = 0.0 143 | # else: 144 | self.twist[:, 0] = min(max(self.paths_diff_global[0,0], -0.5), 0.5) 145 | # if abs(self.paths_diff_global[0,0,1]) < 2.0: 146 | # self.twist[:, 1] = 0.0 147 | # else: 148 | self.twist[:, 1] = min(max(self.paths_diff_global[0,1], -0.3), 0.3) 149 | self.twist[:, 2] = min(max(0.01*paths[0,2], -0.2), 0.2) 150 | # TODO: add yaw rotation mechanism 151 | else: 152 | self.twist[:,0] = 0.0 153 | self.twist[:,1] = 0.0 154 | self.twist[:,2] = 0.5 155 | else: 156 | self.twist[:,0] = 0.0 157 | self.twist[:,1] = 0.0 158 | self.twist[:,2] = 0.0 159 | if (torch.linalg.norm(self.paths_diff_global[0,:2], dim=-1))<3.0 and self.initialized: 160 | self.goal_reached = True 161 | # print("goal_reached: ", self.goal_reached, " rotation_mark: ", self.rotation_mark, " twist: ", self.twist[:,:3], " norm: ", torch.linalg.norm(self.paths_diff_global[0,0,:2], dim=-1), " initialized: ", self.initialized) 162 | 163 | return self.twist 164 | 165 | """ 166 | Implementation specific functions. 167 | """ 168 | 169 | def _update_command(self): 170 | pass 171 | 172 | def _update_metrics(self): 173 | pass 174 | 175 | def _resample_command(self, env_ids: Sequence[int]): 176 | pass 177 | 178 | def _set_debug_vis_impl(self, debug_vis: bool): 179 | # set visibility of markers 180 | # note: parent only deals with callbacks. not their visibility 181 | if debug_vis: 182 | # create markers if necessary for the first tome 183 | if not hasattr(self, "base_vel_goal_visualizer"): 184 | # -- goal 185 | marker_cfg = GREEN_ARROW_X_MARKER_CFG.copy() 186 | marker_cfg.prim_path = "/Visuals/Command/velocity_goal" 187 | marker_cfg.markers["arrow"].scale = (0.5, 0.5, 0.5) 188 | self.base_vel_goal_visualizer = VisualizationMarkers(marker_cfg) 189 | # -- current 190 | marker_cfg = BLUE_ARROW_X_MARKER_CFG.copy() 191 | marker_cfg.prim_path = "/Visuals/Command/velocity_current" 192 | marker_cfg.markers["arrow"].scale = (0.5, 0.5, 0.5) 193 | self.base_vel_visualizer = VisualizationMarkers(marker_cfg) 194 | # set their visibility to true 195 | self.base_vel_goal_visualizer.set_visibility(True) 196 | self.base_vel_visualizer.set_visibility(True) 197 | else: 198 | if hasattr(self, "base_vel_goal_visualizer"): 199 | self.base_vel_goal_visualizer.set_visibility(False) 200 | self.base_vel_visualizer.set_visibility(False) 201 | 202 | def _debug_vis_callback(self, event): 203 | # get marker location 204 | # -- base state 205 | base_pos_w = self.robot.data.root_pos_w.clone() 206 | base_pos_w[:, 2] += 0.5 207 | # -- resolve the scales and quaternions 208 | vel_des_arrow_scale, vel_des_arrow_quat = self._resolve_xy_velocity_to_arrow(self.command[:, :2]) 209 | vel_arrow_scale, vel_arrow_quat = self._resolve_xy_velocity_to_arrow(self.robot.data.root_lin_vel_b[:, :2]) 210 | # display markers 211 | self.base_vel_goal_visualizer.visualize(base_pos_w, vel_des_arrow_quat, vel_des_arrow_scale) 212 | self.base_vel_visualizer.visualize(base_pos_w, vel_arrow_quat, vel_arrow_scale) 213 | 214 | """ 215 | Internal helpers. 216 | """ 217 | 218 | def _resolve_xy_velocity_to_arrow(self, xy_velocity: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: 219 | """Converts the XY base velocity command to arrow direction rotation.""" 220 | # obtain default scale of the marker 221 | default_scale = self.base_vel_goal_visualizer.cfg.markers["arrow"].scale 222 | # arrow-scale 223 | arrow_scale = torch.tensor(default_scale, device=self.device).repeat(xy_velocity.shape[0], 1) 224 | arrow_scale[:, 0] *= torch.linalg.norm(xy_velocity, dim=1) 225 | # arrow-direction 226 | heading_angle = torch.atan2(xy_velocity[:, 1], xy_velocity[:, 0]) 227 | zeros = torch.zeros_like(heading_angle) 228 | arrow_quat = math_utils.quat_from_euler_xyz(zeros, zeros, heading_angle) 229 | 230 | return arrow_scale, arrow_quat 231 | -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/commands/path_follower_command_generator_cfg.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab) 2 | # Author: Pascal Roth 3 | # All rights reserved. 4 | # 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | 7 | # Copyright (c) 2022-2024, The lab Project Developers. 8 | # All rights reserved. 9 | # 10 | # SPDX-License-Identifier: BSD-3-Clause 11 | 12 | """Sub-module containing command generators for the velocity-based locomotion task.""" 13 | 14 | import math 15 | from dataclasses import MISSING 16 | 17 | from omni.isaac.lab.managers import CommandTermCfg 18 | from omni.isaac.lab.utils.configclass import configclass 19 | from typing_extensions import Literal 20 | 21 | from .path_follower_command_generator import PathFollowerCommandGenerator 22 | 23 | 24 | @configclass 25 | class PathFollowerCommandGeneratorCfg(CommandTermCfg): 26 | class_type: PathFollowerCommandGenerator = PathFollowerCommandGenerator 27 | """Name of the command generator class.""" 28 | 29 | robot_attr: str = MISSING 30 | """Name of the robot attribute from the environment.""" 31 | 32 | path_frame: Literal["world", "robot"] = "world" 33 | """Frame in which the path is defined. 34 | - ``world``: the path is defined in the world frame. Also called ``odom``. 35 | - ``robot``: the path is defined in the robot frame. Also called ``base``. 36 | """ 37 | 38 | lookAheadDistance: float = MISSING 39 | """The lookahead distance for the path follower.""" 40 | two_way_drive: bool = False 41 | """Allow robot to use reverse gear.""" 42 | switch_time_threshold: float = 1.0 43 | """Time threshold to switch between the forward and backward drive.""" 44 | maxSpeed: float = 0.5 45 | """Maximum speed of the robot.""" 46 | maxAccel: float = 2.5 / 100.0 # 2.5 / 100 47 | """Maximum acceleration of the robot.""" 48 | joyYaw: float = 1.0 49 | """TODO: add description""" 50 | yawRateGain: float = 7.0 # 3.5 51 | """Gain for the yaw rate.""" 52 | stopYawRateGain: float = 7.0 # 3.5 53 | """""" 54 | maxYawRate: float = 90.0 * math.pi / 360 55 | dirDiffThre: float = 0.7 56 | stopDisThre: float = 0.2 57 | slowDwnDisThre: float = 0.3 58 | slowRate1: float = 0.25 59 | slowRate2: float = 0.5 60 | noRotAtGoal: bool = True 61 | autonomyMode: bool = False 62 | 63 | dynamic_lookahead: bool = False 64 | min_points_within_lookahead: int = 3 65 | -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/commands/path_follower_command_generator_gpt.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab) 2 | # Author: Pascal Roth 3 | # All rights reserved. 4 | # 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | 7 | # Copyright (c) 2022-2024, The lab Project Developers. 8 | # All rights reserved. 9 | # 10 | # SPDX-License-Identifier: BSD-3-Clause 11 | 12 | """Sub-module containing command generators for the velocity-based locomotion task.""" 13 | from __future__ import annotations 14 | 15 | import math 16 | from typing import TYPE_CHECKING, Sequence 17 | 18 | import omni.isaac.lab.utils.math as math_utils 19 | import torch 20 | from omni.isaac.lab.assets.articulation import Articulation 21 | from omni.isaac.lab.envs import ManagerBasedRLEnv 22 | from omni.isaac.lab.managers import CommandTerm 23 | from omni.isaac.lab.markers import VisualizationMarkers 24 | from omni.isaac.lab.markers.config import ( 25 | BLUE_ARROW_X_MARKER_CFG, 26 | GREEN_ARROW_X_MARKER_CFG, 27 | ) 28 | from omni.isaac.lab.sim import SimulationContext 29 | 30 | if TYPE_CHECKING: 31 | from .path_follower_command_generator_gpt_cfg import PathFollowerCommandGeneratorGPTCfg 32 | 33 | 34 | class PathFollowerCommandGeneratorGPT(CommandTerm): 35 | r"""Command generator that generates a velocity command in SE(2) from a path given by a local planner. 36 | 37 | The command comprises of a linear velocity in x and y direction and an angular velocity around 38 | the z-axis. It is given in the robot's base frame. 39 | 40 | The path follower acts as a PD-controller that checks for the last point on the path within a lookahead distance 41 | and uses it to compute the steering angle and the linear velocity. 42 | """ 43 | 44 | cfg: PathFollowerCommandGeneratorGPTCfg 45 | """The configuration of the command generator.""" 46 | 47 | def __init__(self, cfg: PathFollowerCommandGeneratorGPTCfg, env: ManagerBasedRLEnv): 48 | """Initialize the command generator. 49 | 50 | Args: 51 | cfg (PathFollowerCommandGeneratorCfg): The configuration of the command generator. 52 | env (object): The environment. 53 | """ 54 | super().__init__(cfg, env) 55 | # -- robot 56 | self.robot: Articulation = env.scene[cfg.robot_attr] 57 | # -- Simulation Context 58 | self.sim: SimulationContext = SimulationContext.instance() 59 | # -- buffers 60 | self.twist: torch.Tensor = torch.zeros((self.num_envs, 3), device=self.device) 61 | # -- debug vis 62 | self._base_vel_goal_markers = None 63 | self._base_vel_markers = None 64 | 65 | # Rotation mark 66 | self.rotation_mark = False 67 | self.initialized = False 68 | self.goal_reached = False 69 | 70 | def __str__(self) -> str: 71 | """Return a string representation of the command generator.""" 72 | msg = "PathFollowerCommandGenerator:\n" 73 | msg += f"\tCommand dimension: {tuple(self.command.shape[1:])}\n" 74 | return msg 75 | 76 | """ 77 | Properties 78 | """ 79 | 80 | @property 81 | def command(self) -> torch.Tensor: 82 | """The desired base velocity command in the base frame. Shape is (num_envs, 3).""" 83 | return self.twist 84 | 85 | """ 86 | Operations. 87 | """ 88 | 89 | def reset(self, env_ids: Sequence[int] | None = None) -> dict: 90 | """Reset the command generator. 91 | 92 | This function resets the command generator. It should be called whenever the environment is reset. 93 | 94 | Args: 95 | env_ids (Optional[Sequence[int]], optional): The list of environment IDs to reset. Defaults to None. 96 | """ 97 | if env_ids is None: 98 | env_ids = ... 99 | 100 | self.twist = torch.zeros((self.num_envs, 3), device=self.device) 101 | self.goal_reached = False 102 | 103 | return {} 104 | 105 | def compute(self, dt: float): 106 | """Compute the command. 107 | 108 | Paths as a tensor of shape (num_envs, N, 3) where N is number of poses on the path. The paths 109 | should be given in the base frame of the robot. Num_envs is equal to the number of robots spawned in all 110 | environments. 111 | """ 112 | # get paths 113 | goals = self._env.action_manager._terms['vlm_actions_gpt']._processed_command_velocity_actions.clone() 114 | # get number of pases of the paths 115 | num_envs, N = goals.shape 116 | assert N > 0, "PathFollowerCommandGenerator: paths must have at least one poses." 117 | # get the current simulation time 118 | curr_time = self.sim.current_time 119 | # define current maxSpeed for the velocities 120 | max_speed = torch.ones(num_envs, device=self.device) * self.cfg.maxSpeed 121 | 122 | # # transform path in base/ robot frame if given in world frame 123 | # if self.cfg.path_frame == "world": 124 | # paths = math_utils.quat_apply( 125 | # math_utils.quat_inv(self.robot.data.root_quat_w[:, None, :].repeat(1, N, 1)), 126 | # paths - self.robot.data.root_pos_w[:, None, :], 127 | # ) 128 | # self.paths_diff_global = goals[:,:2] #- self.robot.data.root_pos_w[:, :2] 129 | # yaw_angle_ 130 | 131 | # if self.initialized and not self.goal_reached: 132 | # if not self.rotation_mark: 133 | # # if abs(self.paths_diff_global[0,0,0]) < 0.5: 134 | # # self.twist[:, 0] = 0.0 135 | # # else: 136 | # self.twist[:, 0] = min(max(self.paths_diff_global[0,0], -0.5), 0.5) 137 | # # if abs(self.paths_diff_global[0,0,1]) < 2.0: 138 | # # self.twist[:, 1] = 0.0 139 | # # else: 140 | # self.twist[:, 1] = 0.0#min(max(self.paths_diff_global[0,1], -0.3), 0.3) 141 | # self.twist[:, 2] = min(max(0.01*goals[0,2], -0.2), 0.2) 142 | # # TODO: add yaw rotation mechanism 143 | # else: 144 | # self.twist[:,0] = 0.0 145 | # self.twist[:,1] = 0.0 146 | # self.twist[:,2] = 0.5 147 | # else: 148 | # self.twist[:,0] = 0.0 149 | # self.twist[:,1] = 0.0 150 | # self.twist[:,2] = 0.0 151 | self.twist[0,:3] = goals[0,:3] 152 | # if (torch.linalg.norm(self.paths_diff_global[0,:2], dim=-1))<3.0 and self.initialized: 153 | # self.goal_reached = True 154 | # print("goal_reached: ", self.goal_reached, " rotation_mark: ", self.rotation_mark, " twist: ", self.twist[:,:3], " norm: ", torch.linalg.norm(self.paths_diff_global[0,0,:2], dim=-1), " initialized: ", self.initialized) 155 | 156 | return self.twist 157 | 158 | """ 159 | Implementation specific functions. 160 | """ 161 | 162 | def _update_command(self): 163 | pass 164 | 165 | def _update_metrics(self): 166 | pass 167 | 168 | def _resample_command(self, env_ids: Sequence[int]): 169 | pass 170 | 171 | def _set_debug_vis_impl(self, debug_vis: bool): 172 | # set visibility of markers 173 | # note: parent only deals with callbacks. not their visibility 174 | if debug_vis: 175 | # create markers if necessary for the first tome 176 | if not hasattr(self, "base_vel_goal_visualizer"): 177 | # -- goal 178 | marker_cfg = GREEN_ARROW_X_MARKER_CFG.copy() 179 | marker_cfg.prim_path = "/Visuals/Command/velocity_goal" 180 | marker_cfg.markers["arrow"].scale = (0.5, 0.5, 0.5) 181 | self.base_vel_goal_visualizer = VisualizationMarkers(marker_cfg) 182 | # -- current 183 | marker_cfg = BLUE_ARROW_X_MARKER_CFG.copy() 184 | marker_cfg.prim_path = "/Visuals/Command/velocity_current" 185 | marker_cfg.markers["arrow"].scale = (0.5, 0.5, 0.5) 186 | self.base_vel_visualizer = VisualizationMarkers(marker_cfg) 187 | # set their visibility to true 188 | self.base_vel_goal_visualizer.set_visibility(True) 189 | self.base_vel_visualizer.set_visibility(True) 190 | else: 191 | if hasattr(self, "base_vel_goal_visualizer"): 192 | self.base_vel_goal_visualizer.set_visibility(False) 193 | self.base_vel_visualizer.set_visibility(False) 194 | 195 | def _debug_vis_callback(self, event): 196 | # get marker location 197 | # -- base state 198 | base_pos_w = self.robot.data.root_pos_w.clone() 199 | base_pos_w[:, 2] += 0.5 200 | # -- resolve the scales and quaternions 201 | vel_des_arrow_scale, vel_des_arrow_quat = self._resolve_xy_velocity_to_arrow(self.command[:, :2]) 202 | vel_arrow_scale, vel_arrow_quat = self._resolve_xy_velocity_to_arrow(self.robot.data.root_lin_vel_b[:, :2]) 203 | # display markers 204 | self.base_vel_goal_visualizer.visualize(base_pos_w, vel_des_arrow_quat, vel_des_arrow_scale) 205 | self.base_vel_visualizer.visualize(base_pos_w, vel_arrow_quat, vel_arrow_scale) 206 | 207 | """ 208 | Internal helpers. 209 | """ 210 | 211 | def _resolve_xy_velocity_to_arrow(self, xy_velocity: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: 212 | """Converts the XY base velocity command to arrow direction rotation.""" 213 | # obtain default scale of the marker 214 | default_scale = self.base_vel_goal_visualizer.cfg.markers["arrow"].scale 215 | # arrow-scale 216 | arrow_scale = torch.tensor(default_scale, device=self.device).repeat(xy_velocity.shape[0], 1) 217 | arrow_scale[:, 0] *= torch.linalg.norm(xy_velocity, dim=1) 218 | # arrow-direction 219 | heading_angle = torch.atan2(xy_velocity[:, 1], xy_velocity[:, 0]) 220 | zeros = torch.zeros_like(heading_angle) 221 | arrow_quat = math_utils.quat_from_euler_xyz(zeros, zeros, heading_angle) 222 | 223 | return arrow_scale, arrow_quat 224 | -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/commands/path_follower_command_generator_gpt_cfg.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab) 2 | # Author: Pascal Roth 3 | # All rights reserved. 4 | # 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | 7 | # Copyright (c) 2022-2024, The lab Project Developers. 8 | # All rights reserved. 9 | # 10 | # SPDX-License-Identifier: BSD-3-Clause 11 | 12 | """Sub-module containing command generators for the velocity-based locomotion task.""" 13 | 14 | import math 15 | from dataclasses import MISSING 16 | 17 | from omni.isaac.lab.managers import CommandTermCfg 18 | from omni.isaac.lab.utils.configclass import configclass 19 | from typing_extensions import Literal 20 | 21 | from .path_follower_command_generator_gpt import PathFollowerCommandGeneratorGPT 22 | 23 | 24 | @configclass 25 | class PathFollowerCommandGeneratorGPTCfg(CommandTermCfg): 26 | class_type: PathFollowerCommandGeneratorGPT = PathFollowerCommandGeneratorGPT 27 | """Name of the command generator class.""" 28 | 29 | robot_attr: str = MISSING 30 | """Name of the robot attribute from the environment.""" 31 | 32 | path_frame: Literal["world", "robot"] = "world" 33 | """Frame in which the path is defined. 34 | - ``world``: the path is defined in the world frame. Also called ``odom``. 35 | - ``robot``: the path is defined in the robot frame. Also called ``base``. 36 | """ 37 | 38 | lookAheadDistance: float = MISSING 39 | """The lookahead distance for the path follower.""" 40 | two_way_drive: bool = False 41 | """Allow robot to use reverse gear.""" 42 | switch_time_threshold: float = 1.0 43 | """Time threshold to switch between the forward and backward drive.""" 44 | maxSpeed: float = 0.5 45 | """Maximum speed of the robot.""" 46 | maxAccel: float = 2.5 / 100.0 # 2.5 / 100 47 | """Maximum acceleration of the robot.""" 48 | joyYaw: float = 1.0 49 | """TODO: add description""" 50 | yawRateGain: float = 7.0 # 3.5 51 | """Gain for the yaw rate.""" 52 | stopYawRateGain: float = 7.0 # 3.5 53 | """""" 54 | maxYawRate: float = 90.0 * math.pi / 360 55 | dirDiffThre: float = 0.7 56 | stopDisThre: float = 0.2 57 | slowDwnDisThre: float = 0.3 58 | slowRate1: float = 0.25 59 | slowRate2: float = 0.5 60 | noRotAtGoal: bool = True 61 | autonomyMode: bool = False 62 | 63 | dynamic_lookahead: bool = False 64 | min_points_within_lookahead: int = 3 65 | -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/commands/rl_command_generator.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab) 2 | # Author: Pascal Roth 3 | # All rights reserved. 4 | # 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | 7 | # Copyright (c) 2022-2024, The lab Project Developers. 8 | # All rights reserved. 9 | # 10 | # SPDX-License-Identifier: BSD-3-Clause 11 | 12 | """Sub-module containing command generators for the velocity-based locomotion task.""" 13 | from __future__ import annotations 14 | 15 | import math 16 | from typing import TYPE_CHECKING, Sequence 17 | 18 | import omni.isaac.lab.utils.math as math_utils 19 | import torch 20 | from omni.isaac.lab.assets.articulation import Articulation 21 | from omni.isaac.lab.envs import ManagerBasedRLEnv 22 | from omni.isaac.lab.managers import CommandTerm 23 | from omni.isaac.lab.markers import VisualizationMarkers 24 | from omni.isaac.lab.markers.config import ( 25 | BLUE_ARROW_X_MARKER_CFG, 26 | GREEN_ARROW_X_MARKER_CFG, 27 | CUBOID_MARKER_CFG, 28 | ) 29 | from omni.isaac.lab.sim import SimulationContext 30 | 31 | if TYPE_CHECKING: 32 | from .rl_command_generator_cfg import RLCommandGeneratorCfg 33 | 34 | 35 | class RLCommandGenerator(CommandTerm): 36 | r"""Command generator that generates a velocity command in SE(2) from a path given by a local planner. 37 | 38 | The command comprises of a linear velocity in x and y direction and an angular velocity around 39 | the z-axis. It is given in the robot's base frame. 40 | 41 | The path follower acts as a PD-controller that checks for the last point on the path within a lookahead distance 42 | and uses it to compute the steering angle and the linear velocity. 43 | """ 44 | 45 | cfg: RLCommandGeneratorCfg 46 | """The configuration of the command generator.""" 47 | 48 | def __init__(self, cfg: RLCommandGeneratorCfg, env: ManagerBasedRLEnv): 49 | """Initialize the command generator. 50 | 51 | Args: 52 | cfg (RLCommandGeneratorCfg): The configuration of the command generator. 53 | env (object): The environment. 54 | """ 55 | super().__init__(cfg, env) 56 | # -- robot 57 | self.robot: Articulation = env.scene[cfg.robot_attr] 58 | # -- Simulation Context 59 | self.sim: SimulationContext = SimulationContext.instance() 60 | self.twist: torch.Tensor = torch.zeros((self.num_envs, 3), device=self.device) 61 | # -- debug vis 62 | self._base_vel_goal_markers = None 63 | self._base_vel_markers = None 64 | 65 | # Rotation mark 66 | self.rotation_mark = False 67 | self.initialized = False 68 | self.goal_reached = False 69 | self.identity_quat = torch.tensor([1.0, 0.0, 0.0, 0.0], device=self._env.device).repeat(self._env.num_envs, 1) 70 | 71 | def __str__(self) -> str: 72 | """Return a string representation of the command generator.""" 73 | msg = "rlCommandGenerator:\n" 74 | msg += f"\tCommand dimension: {tuple(self.command.shape[1:])}\n" 75 | msg += f"\tLookahead distance: {self.cfg.lookAheadDistance}\n" 76 | return msg 77 | 78 | """ 79 | Properties 80 | """ 81 | 82 | @property 83 | def command(self) -> torch.Tensor: 84 | """The desired base velocity command in the base frame. Shape is (num_envs, 3).""" 85 | # print("twist: ", self.twist) 86 | return self.twist 87 | 88 | """ 89 | Operations. 90 | """ 91 | 92 | def reset(self, env_ids: Sequence[int] | None = None) -> dict: 93 | """Reset the command generator. 94 | 95 | This function resets the command generator. It should be called whenever the environment is reset. 96 | 97 | Args: 98 | env_ids (Optional[Sequence[int]], optional): The list of environment IDs to reset. Defaults to None. 99 | """ 100 | if env_ids is None: 101 | env_ids = ... 102 | 103 | self.twist = torch.zeros((self.num_envs, 3), device=self.device) 104 | self.goal_reached = False 105 | 106 | return {} 107 | 108 | def compute(self, dt: float): 109 | """Compute the command. 110 | 111 | Paths as a tensor of shape (num_envs, N, 3) where N is number of poses on the path. The paths 112 | should be given in the base frame of the robot. Num_envs is equal to the number of robots spawned in all 113 | environments. 114 | """ 115 | # get paths 116 | self.twist = self._env.action_manager._terms['paths']._processed_navigation_velocity_actions.clone() 117 | self.twist[:, 0] = torch.clip(self.twist[:, 0], 0.0, 0.5) 118 | self.twist[:,1] = 0.0 119 | self.twist[:,2] = torch.clip(self.twist[:,2], -math.pi, math.pi) 120 | 121 | return self.twist 122 | 123 | """ 124 | Implementation specific functions. 125 | """ 126 | 127 | def _update_command(self): 128 | pass 129 | 130 | def _update_metrics(self): 131 | pass 132 | 133 | def _resample_command(self, env_ids: Sequence[int]): 134 | pass 135 | 136 | def _set_debug_vis_impl(self, debug_vis: bool): 137 | # set visibility of markers 138 | # note: parent only deals with callbacks. not their visibility 139 | if debug_vis: 140 | # create markers if necessary for the first tome 141 | if not hasattr(self, "base_vel_goal_visualizer"): 142 | # -- goal 143 | marker_cfg = GREEN_ARROW_X_MARKER_CFG.copy() 144 | marker_cfg.prim_path = "/Visuals/Command/velocity_goal" 145 | marker_cfg.markers["arrow"].scale = (0.5, 0.5, 0.5) 146 | self.base_vel_goal_visualizer = VisualizationMarkers(marker_cfg) 147 | # -- current 148 | marker_cfg = BLUE_ARROW_X_MARKER_CFG.copy() 149 | marker_cfg.prim_path = "/Visuals/Command/velocity_current" 150 | marker_cfg.markers["arrow"].scale = (0.5, 0.5, 0.5) 151 | self.base_vel_visualizer = VisualizationMarkers(marker_cfg) 152 | # -- goal command 153 | marker_cfg = CUBOID_MARKER_CFG.copy() 154 | marker_cfg.prim_path = "/Visuals/Command/pos_goal_command" 155 | marker_cfg.markers["cuboid"].scale = (0.5, 0.5, 0.5) 156 | self.base_vel_goal_command_visualizer = VisualizationMarkers(marker_cfg) 157 | # set their visibility to true 158 | self.base_vel_goal_visualizer.set_visibility(True) 159 | self.base_vel_visualizer.set_visibility(True) 160 | self.base_vel_goal_command_visualizer.set_visibility(True) 161 | else: 162 | if hasattr(self, "base_vel_goal_visualizer"): 163 | self.base_vel_goal_visualizer.set_visibility(False) 164 | self.base_vel_visualizer.set_visibility(False) 165 | self.base_vel_goal_command_visualizer.set_visibility(False) 166 | 167 | def _debug_vis_callback(self, event): 168 | # get marker location 169 | # -- base state 170 | base_pos_w = self.robot.data.root_pos_w.clone() 171 | base_pos_w[:, 2] += 0.5 172 | base_quat_w = self.robot.data.root_quat_w.clone() 173 | 174 | # -- resolve the scales and quaternions 175 | vel_des_arrow_scale, vel_des_arrow_quat = self._resolve_xy_velocity_to_arrow(self.command[:, :2]) 176 | vel_arrow_scale, vel_arrow_quat = self._resolve_xy_velocity_to_arrow(self.robot.data.root_lin_vel_b[:, :2]) 177 | # display markers 178 | self.base_vel_goal_visualizer.visualize(base_pos_w, vel_des_arrow_quat, vel_des_arrow_scale) 179 | self.base_vel_visualizer.visualize(base_pos_w, vel_arrow_quat, vel_arrow_scale) 180 | 181 | pos_command_w = self._env.command_manager._terms['goal_command'].pos_command_w.clone() 182 | default_scale = self.base_vel_goal_visualizer.cfg.markers["arrow"].scale 183 | larger_scale = 2.0*torch.tensor(default_scale, device=self.device).repeat(pos_command_w.shape[0], 1) 184 | self.base_vel_goal_command_visualizer.visualize(pos_command_w, self.identity_quat,larger_scale) 185 | 186 | """ 187 | Internal helpers. 188 | """ 189 | 190 | def _resolve_xy_velocity_to_arrow(self, xy_velocity: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: 191 | """Converts the XY base velocity command to arrow direction rotation.""" 192 | # obtain default scale of the marker 193 | default_scale = self.base_vel_goal_visualizer.cfg.markers["arrow"].scale 194 | # arrow-scale 195 | arrow_scale = torch.tensor(default_scale, device=self.device).repeat(xy_velocity.shape[0], 1) 196 | arrow_scale[:, 0] *= torch.linalg.norm(xy_velocity, dim=1) 197 | # arrow-direction 198 | heading_angle = torch.atan2(xy_velocity[:, 1], xy_velocity[:, 0]) 199 | zeros = torch.zeros_like(heading_angle) 200 | arrow_quat = math_utils.quat_from_euler_xyz(zeros, zeros, heading_angle) 201 | 202 | return arrow_scale, arrow_quat 203 | -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/commands/rl_command_generator_cfg.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab) 2 | # Author: Pascal Roth 3 | # All rights reserved. 4 | # 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | 7 | # Copyright (c) 2022-2024, The lab Project Developers. 8 | # All rights reserved. 9 | # 10 | # SPDX-License-Identifier: BSD-3-Clause 11 | 12 | """Sub-module containing command generators for the velocity-based locomotion task.""" 13 | 14 | import math 15 | from dataclasses import MISSING 16 | 17 | from omni.isaac.lab.managers import CommandTermCfg 18 | from omni.isaac.lab.utils.configclass import configclass 19 | from typing_extensions import Literal 20 | 21 | from .rl_command_generator import RLCommandGenerator 22 | 23 | 24 | @configclass 25 | class RLCommandGeneratorCfg(CommandTermCfg): 26 | class_type: RLCommandGenerator = RLCommandGenerator 27 | """Name of the command generator class.""" 28 | 29 | robot_attr: str = MISSING 30 | """Name of the robot attribute from the environment.""" 31 | 32 | path_frame: Literal["world", "robot"] = "world" 33 | """Frame in which the path is defined. 34 | - ``world``: the path is defined in the world frame. Also called ``odom``. 35 | - ``robot``: the path is defined in the robot frame. Also called ``base``. 36 | """ 37 | 38 | -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/commands/robot_vel_command_generator.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab) 2 | # Author: Pascal Roth 3 | # All rights reserved. 4 | # 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | 7 | # Copyright (c) 2022-2024, The lab Project Developers. 8 | # All rights reserved. 9 | # 10 | # SPDX-License-Identifier: BSD-3-Clause 11 | 12 | """Sub-module containing command generators for the velocity-based locomotion task.""" 13 | from __future__ import annotations 14 | 15 | import math 16 | from typing import TYPE_CHECKING, Sequence 17 | 18 | import omni.isaac.lab.utils.math as math_utils 19 | import torch 20 | from omni.isaac.lab.assets.articulation import Articulation 21 | from omni.isaac.lab.envs import ManagerBasedRLEnv 22 | from omni.isaac.lab.managers import CommandTerm 23 | from omni.isaac.lab.sim import SimulationContext 24 | 25 | if TYPE_CHECKING: 26 | from .robot_vel_command_generator_cfg import RobotVelCommandGeneratorCfg 27 | 28 | 29 | class RobotVelCommandGenerator(CommandTerm): 30 | r"""Command generator that generates a velocity command in SE(2) from a path given by a local planner. 31 | 32 | The command comprises of a linear velocity in x and y direction and an angular velocity around 33 | the z-axis. It is given in the robot's base frame. 34 | 35 | The path follower acts as a PD-controller that checks for the last point on the path within a lookahead distance 36 | and uses it to compute the steering angle and the linear velocity. 37 | """ 38 | 39 | cfg: RobotVelCommandGeneratorCfg 40 | """The configuration of the command generator.""" 41 | 42 | def __init__(self, cfg: RobotVelCommandGeneratorCfg, env: ManagerBasedRLEnv): 43 | """Initialize the command generator. 44 | 45 | Args: 46 | cfg (RLCommandGeneratorCfg): The configuration of the command generator. 47 | env (object): The environment. 48 | """ 49 | super().__init__(cfg, env) 50 | 51 | # -- robot 52 | self.robot: Articulation = env.scene[cfg.robot_attr] 53 | # -- Simulation Context 54 | self.sim: SimulationContext = SimulationContext.instance() 55 | self.command_b: torch.Tensor = torch.zeros((self.num_envs, 3), device=self.device) 56 | # -- debug vis 57 | self._base_vel_goal_markers = None 58 | self._base_vel_markers = None 59 | 60 | # Rotation mark 61 | self.rotation_mark = False 62 | self.initialized = False 63 | 64 | def __str__(self) -> str: 65 | """Return a string representation of the command generator.""" 66 | msg = "rlCommandGenerator:\n" 67 | msg += f"\tCommand dimension: {tuple(self.command.shape[1:])}\n" 68 | msg += f"\tLookahead distance: {self.cfg.lookAheadDistance}\n" 69 | return msg 70 | 71 | """ 72 | Properties 73 | """ 74 | 75 | @property 76 | def command(self) -> torch.Tensor: 77 | """The desired base velocity command in the base frame. Shape is (num_envs, 3).""" 78 | # print("twist: ", self.twist) 79 | return self.command_b 80 | 81 | """ 82 | Operations. 83 | """ 84 | 85 | def reset(self, env_ids: Sequence[int] | None = None) -> dict: 86 | """Reset the command generator. 87 | 88 | This function resets the command generator. It should be called whenever the environment is reset. 89 | 90 | Args: 91 | env_ids (Optional[Sequence[int]], optional): The list of environment IDs to reset. Defaults to None. 92 | """ 93 | if env_ids is None: 94 | env_ids = ... 95 | 96 | self.command_b = torch.zeros((self.num_envs, 3), device=self.device) 97 | self.goal_reached = False 98 | 99 | return {} 100 | 101 | def compute(self, dt: float): 102 | """Compute the command. 103 | 104 | Paths as a tensor of shape (num_envs, N, 3) where N is number of poses on the path. The paths 105 | should be given in the base frame of the robot. Num_envs is equal to the number of robots spawned in all 106 | environments. 107 | """ 108 | # get paths 109 | self.command_b[:,:3] = self._env.action_manager._terms['vlm_actions']._processed_command_velocity_actions.clone()[:,:3] 110 | 111 | return self.command_b 112 | 113 | """ 114 | Implementation specific functions. 115 | """ 116 | 117 | def _update_command(self): 118 | pass 119 | 120 | def _update_metrics(self): 121 | pass 122 | 123 | def _resample_command(self, env_ids: Sequence[int]): 124 | pass 125 | -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/commands/robot_vel_command_generator_cfg.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab) 2 | # Author: Pascal Roth 3 | # All rights reserved. 4 | # 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | 7 | # Copyright (c) 2022-2024, The lab Project Developers. 8 | # All rights reserved. 9 | # 10 | # SPDX-License-Identifier: BSD-3-Clause 11 | 12 | """Sub-module containing command generators for the velocity-based locomotion task.""" 13 | 14 | import math 15 | from dataclasses import MISSING 16 | 17 | from omni.isaac.lab.managers import CommandTermCfg 18 | from omni.isaac.lab.utils.configclass import configclass 19 | from typing_extensions import Literal 20 | 21 | from .robot_vel_command_generator import RobotVelCommandGenerator 22 | 23 | 24 | @configclass 25 | class RobotVelCommandGeneratorCfg(CommandTermCfg): 26 | class_type: RobotVelCommandGenerator = RobotVelCommandGenerator 27 | """Name of the command generator class.""" 28 | 29 | robot_attr: str = MISSING 30 | """Name of the robot attribute from the environment.""" 31 | 32 | # path_frame: Literal["world", "robot"] = "world" 33 | """Frame in which the path is defined. 34 | - ``world``: the path is defined in the world frame. Also called ``odom``. 35 | - ``robot``: the path is defined in the robot frame. Also called ``base``. 36 | """ 37 | 38 | -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/curriculums.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022-2024, The lab Project Developers. 2 | # All rights reserved. 3 | # 4 | # SPDX-License-Identifier: BSD-3-Clause 5 | 6 | """Common functions that can be used to create curriculum for the learning environment. 7 | 8 | The functions can be passed to the :class:`omni.isaac.lab.managers.CurriculumTermCfg` object to enable 9 | the curriculum introduced by the function. 10 | """ 11 | 12 | from __future__ import annotations 13 | 14 | import torch 15 | from collections.abc import Sequence 16 | from typing import TYPE_CHECKING 17 | 18 | from omni.isaac.lab.assets import Articulation 19 | from omni.isaac.lab.managers import SceneEntityCfg 20 | from omni.isaac.lab.terrains import TerrainImporter 21 | 22 | if TYPE_CHECKING: 23 | from omni.isaac.lab.envs import ManagerBasedRLEnv 24 | 25 | 26 | def terrain_levels_vel( 27 | env: ManagerBasedRLEnv, env_ids: Sequence[int], asset_cfg: SceneEntityCfg = SceneEntityCfg("robot") 28 | ) -> torch.Tensor: 29 | """Curriculum based on the distance the robot walked when commanded to move at a desired velocity. 30 | 31 | This term is used to increase the difficulty of the terrain when the robot walks far enough and decrease the 32 | difficulty when the robot walks less than half of the distance required by the commanded velocity. 33 | 34 | .. note:: 35 | It is only possible to use this term with the terrain type ``generator``. For further information 36 | on different terrain types, check the :class:`omni.isaac.lab.terrains.TerrainImporter` class. 37 | 38 | Returns: 39 | The mean terrain level for the given environment ids. 40 | """ 41 | # extract the used quantities (to enable type-hinting) 42 | asset: Articulation = env.scene[asset_cfg.name] 43 | terrain: TerrainImporter = env.scene.terrain 44 | command = env.command_manager.get_command("base_velocity") 45 | # compute the distance the robot walked 46 | distance = torch.norm(asset.data.root_pos_w[env_ids, :2] - env.scene.env_origins[env_ids, :2], dim=1) 47 | # robots that walked far enough progress to harder terrains 48 | move_up = distance > terrain.cfg.terrain_generator.size[0] / 2 49 | # robots that walked less than half of their required distance go to simpler terrains 50 | move_down = distance < torch.norm(command[env_ids, :2], dim=1) * env.max_episode_length_s * 0.5 51 | move_down *= ~move_up 52 | # update terrain levels 53 | terrain.update_env_origins(env_ids, move_up, move_down) 54 | # return the mean terrain level 55 | return torch.mean(terrain.terrain_levels.float()) 56 | -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/events.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | import omni.isaac.lab.sim as sim_utils 4 | import omni.isaac.lab.utils.math as math_utils 5 | from omni.isaac.lab.actuators import ImplicitActuator 6 | from omni.isaac.lab.assets import Articulation, RigidObject 7 | from omni.isaac.lab.managers import SceneEntityCfg 8 | from omni.isaac.lab.terrains import TerrainImporter 9 | 10 | 11 | from omni.isaac.lab.envs import ManagerBasedEnv 12 | 13 | 14 | def reset_camera_pos_uniform( 15 | env: ManagerBasedEnv, 16 | env_ids: torch.Tensor, 17 | asset_cfg: SceneEntityCfg, 18 | pose_delta_range: dict[str, tuple[float, float]], 19 | ): 20 | """Reset the camera to a random position uniformly within the given ranges. 21 | 22 | This function randomizes the position of the asset. 23 | 24 | * It samples the delta position from the given ranges and adds them to the default camera position, before setting 25 | them into the physics simulation. 26 | * It samples the root orientation from the given ranges and sets them into the physics simulation. 27 | * It samples the root velocity from the given ranges and sets them into the physics simulation. 28 | 29 | The function takes a dictionary of pose and velocity ranges for each axis and rotation. The keys of the 30 | dictionary are ``x``, ``y``, ``z``, ``roll``, ``pitch``, and ``yaw``. The values are tuples of the form 31 | ``(min, max)``. If the dictionary does not contain a key, the position or velocity is set to zero for that axis. 32 | """ 33 | # extract the used quantities (to enable type-hinting) 34 | camera: RigidObject | Articulation = env.scene[asset_cfg.name] 35 | # get default root state 36 | pos_w, quat_w = camera._compute_view_world_poses(env_ids) 37 | 38 | # poses 39 | range_list = [pose_delta_range.get(key, (0.0, 0.0)) for key in ["x", "y", "z", "roll", "pitch", "yaw"]] 40 | ranges = torch.tensor(range_list, device=camera.device) 41 | rand_samples = math_utils.sample_uniform(ranges[:, 0], ranges[:, 1], (len(env_ids), 6), device=camera.device) 42 | 43 | positions = pos_w + rand_samples[:, 0:3] 44 | orientations_delta = math_utils.quat_from_euler_xyz(rand_samples[:, 3], rand_samples[:, 4], rand_samples[:, 5]) 45 | orientations = math_utils.quat_mul(quat_w, orientations_delta) 46 | 47 | # set into the physics simulation 48 | camera.set_world_poses(positions, orientations, env_ids=env_ids, convention="world") -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/rewards/__init__.py: -------------------------------------------------------------------------------- 1 | from .objnav_rewards import * -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/utils/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from .wrappers import RslRlVecEnvHistoryWrapper 4 | 5 | ASSETS_DIR = os.path.abspath("assets") 6 | 7 | __all__ = [ 8 | "ASSETS_DIR", 9 | "RslRlVecEnvHistoryWrapper", 10 | ] 11 | -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/utils/wrappers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022-2024, The Isaac Lab Project Developers. 2 | # All rights reserved. 3 | # 4 | # SPDX-License-Identifier: BSD-3-Clause 5 | 6 | """Wrapper to configure an :class:`ManagerBasedRLEnv` instance to RSL-RL vectorized environment. 7 | 8 | The following example shows how to wrap an environment for RSL-RL: 9 | 10 | .. code-block:: python 11 | 12 | from omni.isaac.lab_tasks.utils.wrappers.rsl_rl import RslRlVecEnvWrapper 13 | 14 | env = RslRlVecEnvWrapper(env) 15 | 16 | """ 17 | 18 | 19 | import gymnasium as gym 20 | import torch 21 | 22 | from rsl_rl.env import VecEnv 23 | 24 | from omni.isaac.lab.envs import DirectRLEnv, ManagerBasedRLEnv 25 | from omni.isaac.lab_tasks.utils.wrappers.rsl_rl import RslRlVecEnvWrapper 26 | 27 | 28 | def get_proprio_obs_dim(env: ManagerBasedRLEnv) -> int: 29 | """Returns the dimension of the proprioceptive observations.""" 30 | return env.unwrapped.observation_manager.compute_group("proprio").shape[1] 31 | 32 | 33 | class RslRlVecEnvHistoryWrapper(RslRlVecEnvWrapper): 34 | """Wraps around Isaac Lab environment for RSL-RL to add history buffer to the proprioception observations. 35 | 36 | .. caution:: 37 | 38 | This class must be the last wrapper in the wrapper chain. This is because the wrapper does not follow 39 | the :class:`gym.Wrapper` interface. Any subsequent wrappers will need to be modified to work with this 40 | wrapper. 41 | 42 | Reference: 43 | https://github.com/leggedrobotics/rsl_rl/blob/master/rsl_rl/env/vec_env.py 44 | """ 45 | 46 | def __init__(self, env: ManagerBasedRLEnv, history_length: int = 1): 47 | """Initializes the wrapper.""" 48 | super().__init__(env) 49 | 50 | self.history_length = history_length 51 | self.proprio_obs_dim = get_proprio_obs_dim(env) 52 | self.proprio_obs_buf = torch.zeros(self.num_envs, self.history_length, self.proprio_obs_dim, 53 | dtype=torch.float, device=self.unwrapped.device) 54 | 55 | self.clip_actions = 20.0 56 | 57 | """ 58 | Properties 59 | """ 60 | def get_observations(self) -> tuple[torch.Tensor, dict]: 61 | """Returns the current observations of the environment.""" 62 | if hasattr(self.unwrapped, "observation_manager"): 63 | obs_dict = self.unwrapped.observation_manager.compute() 64 | else: 65 | obs_dict = self.unwrapped._get_observations() 66 | proprio_obs, obs = obs_dict["proprio"], obs_dict["policy"] 67 | self.proprio_obs_buf = torch.cat([proprio_obs.unsqueeze(1)] * self.history_length, dim=1) 68 | proprio_obs_history = self.proprio_obs_buf.view(self.num_envs, -1) 69 | curr_obs = torch.cat([obs, proprio_obs_history], dim=1) 70 | obs_dict["policy"] = curr_obs 71 | 72 | return curr_obs, {"observations": obs_dict} 73 | 74 | def step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict]: 75 | # clip the actions (for testing only) 76 | actions = torch.clamp(actions, -self.clip_actions, self.clip_actions) 77 | 78 | # record step information 79 | obs_dict, rew, terminated, truncated, extras = self.env.step(actions) 80 | # compute dones for compatibility with RSL-RL 81 | dones = (terminated | truncated).to(dtype=torch.long) 82 | # move extra observations to the extras dict 83 | proprio_obs, obs = obs_dict["proprio"], obs_dict["policy"] 84 | # print("============== Height Map ==============") 85 | # print(obs_dict["test_height_map"]) 86 | extras["observations"] = obs_dict 87 | # move time out information to the extras dict 88 | # this is only needed for infinite horizon tasks 89 | if not self.unwrapped.cfg.is_finite_horizon: 90 | extras["time_outs"] = truncated 91 | 92 | # update obsservation history buffer & reset the history buffer for done environments 93 | self.proprio_obs_buf = torch.where( 94 | (self.episode_length_buf < 1)[:, None, None], 95 | torch.stack([torch.zeros_like(proprio_obs)] * self.history_length, dim=1), 96 | torch.cat([ 97 | self.proprio_obs_buf[:, 1:], 98 | proprio_obs.unsqueeze(1) 99 | ], dim=1) 100 | ) 101 | proprio_obs_history = self.proprio_obs_buf.view(self.num_envs, -1) 102 | curr_obs = torch.cat([obs, proprio_obs_history], dim=1) 103 | extras["observations"]["policy"] = curr_obs 104 | 105 | # return the step information 106 | return curr_obs, rew, dones, extras 107 | 108 | def update_command(self, command: torch.Tensor) -> None: 109 | """Updates the command for the environment.""" 110 | self.proprio_obs_buf[:, -1, 6:9] = command 111 | 112 | def close(self): # noqa: D102 113 | return self.env.close() -------------------------------------------------------------------------------- /isaaclab_exts/omni.isaac.leggedloco/setup.py: -------------------------------------------------------------------------------- 1 | """Installation script for the 'omni.isaac.leggedloco' python package.""" 2 | 3 | 4 | from setuptools import setup 5 | 6 | # Minimum dependencies required prior to installation 7 | INSTALL_REQUIRES = [ 8 | # generic 9 | "numpy", 10 | "scipy>=1.7.1", 11 | # RL 12 | "torch>=1.9.0", 13 | ] 14 | 15 | # Installation operation 16 | setup( 17 | name="omni-isaac-leggedloco", 18 | version="0.0.1", 19 | keywords=["robotics", "rl"], 20 | include_package_data=True, 21 | python_requires=">=3.10", 22 | install_requires=INSTALL_REQUIRES, 23 | packages=["omni.isaac.leggedloco"], 24 | classifiers=["Natural Language :: English", "Programming Language :: Python :: 3.7"], 25 | zip_safe=False, 26 | ) 27 | 28 | # EOF 29 | -------------------------------------------------------------------------------- /rsl_rl/.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | show-source=True 3 | statistics=True 4 | per-file-ignores=*/__init__.py:F401 5 | # E402: Module level import not at top of file 6 | # E501: Line too long 7 | # W503: Line break before binary operator 8 | # E203: Whitespace before ':' -> conflicts with black 9 | # D401: First line should be in imperative mood 10 | # R504: Unnecessary variable assignment before return statement. 11 | # R505: Unnecessary elif after return statement 12 | # SIM102: Use a single if-statement instead of nested if-statements 13 | # SIM117: Merge with statements for context managers that have same scope. 14 | ignore=E402,E501,W503,E203,D401,R504,R505,SIM102,SIM117 15 | max-line-length = 120 16 | max-complexity = 18 17 | exclude=_*,.vscode,.git,docs/** 18 | # docstrings 19 | docstring-convention=google 20 | # annotations 21 | suppress-none-returning=True 22 | allow-star-arg-any=True 23 | -------------------------------------------------------------------------------- /rsl_rl/.gitignore: -------------------------------------------------------------------------------- 1 | # IDEs 2 | .idea 3 | 4 | # builds 5 | *.egg-info 6 | 7 | # cache 8 | __pycache__ 9 | .pytest_cache 10 | 11 | # vs code 12 | .vscode 13 | -------------------------------------------------------------------------------- /rsl_rl/.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/python/black 3 | rev: 23.10.1 4 | hooks: 5 | - id: black 6 | args: ["--line-length", "120", "--preview"] 7 | - repo: https://github.com/pycqa/flake8 8 | rev: 6.1.0 9 | hooks: 10 | - id: flake8 11 | additional_dependencies: [flake8-simplify, flake8-return] 12 | - repo: https://github.com/pre-commit/pre-commit-hooks 13 | rev: v4.5.0 14 | hooks: 15 | - id: trailing-whitespace 16 | - id: check-symlinks 17 | - id: destroyed-symlinks 18 | - id: check-yaml 19 | - id: check-merge-conflict 20 | - id: check-case-conflict 21 | - id: check-executables-have-shebangs 22 | - id: check-toml 23 | - id: end-of-file-fixer 24 | - id: check-shebang-scripts-are-executable 25 | - id: detect-private-key 26 | - id: debug-statements 27 | - repo: https://github.com/pycqa/isort 28 | rev: 5.12.0 29 | hooks: 30 | - id: isort 31 | name: isort (python) 32 | args: ["--profile", "black", "--filter-files"] 33 | - repo: https://github.com/asottile/pyupgrade 34 | rev: v3.15.0 35 | hooks: 36 | - id: pyupgrade 37 | args: ["--py37-plus"] 38 | - repo: https://github.com/codespell-project/codespell 39 | rev: v2.2.6 40 | hooks: 41 | - id: codespell 42 | additional_dependencies: 43 | - tomli 44 | # FIXME: Figure out why this is getting stuck under VPN. 45 | # - repo: https://github.com/RobertCraigie/pyright-python 46 | # rev: v1.1.315 47 | # hooks: 48 | # - id: pyright 49 | # Note: We disable this by default since not all code is compatible with it. 50 | # - repo: https://github.com/Lucas-C/pre-commit-hooks 51 | # rev: v1.5.1 52 | # hooks: 53 | # - id: insert-license 54 | # files: \.py$ 55 | # args: 56 | # # - --remove-header # Remove existing license headers. Useful when updating license. 57 | # - --license-filepath 58 | # - .github/LICENSE_HEADER.txt 59 | -------------------------------------------------------------------------------- /rsl_rl/CONTRIBUTORS.md: -------------------------------------------------------------------------------- 1 | # RSL-RL Maintainers and Contributors 2 | 3 | This is the official list of developers and contributors. 4 | 5 | To see the full list of contributors, see the revision history in the source control. 6 | 7 | Names should be added to this file as: individual names or organizations. 8 | 9 | Email addresses are tracked elsewhere to avoid spam. 10 | 11 | Please keep the lists sorted alphabetically. 12 | 13 | ## Maintainers 14 | 15 | * Robotic Syetms Lab, ETH Zurich 16 | * NVIDIA Corporation 17 | 18 | --- 19 | 20 | * David Hoeller 21 | * Nikita Rudin 22 | 23 | ## Contributors 24 | 25 | * Eric Vollenweider 26 | * Fabian Jenelten 27 | * Lorenzo Terenzi 28 | * Marko Bjelonic 29 | * Matthijs van der Boon 30 | * Mayank Mittal 31 | * Zhang Chong 32 | -------------------------------------------------------------------------------- /rsl_rl/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2021, ETH Zurich, Nikita Rudin 2 | Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, 6 | are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its contributors 16 | may be used to endorse or promote products derived from this software without 17 | specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 23 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 26 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | See licenses/dependencies for license information of dependencies of this package. 31 | -------------------------------------------------------------------------------- /rsl_rl/README.md: -------------------------------------------------------------------------------- 1 | # RSL RL 2 | 3 | Fast and simple implementation of RL algorithms, designed to run fully on GPU. 4 | This code is an evolution of `rl-pytorch` provided with NVIDIA's Isaac GYM. 5 | 6 | | :zap: The `algorithms` branch supports additional algorithms (SAC, DDPG, DSAC, and more)! | 7 | | ------------------------------------------------------------------------------------------------ | 8 | 9 | Only PPO is implemented for now. More algorithms will be added later. 10 | Contributions are welcome. 11 | 12 | **Maintainer**: David Hoeller and Nikita Rudin
13 | **Affiliation**: Robotic Systems Lab, ETH Zurich & NVIDIA
14 | **Contact**: rudinn@ethz.ch 15 | 16 | ## Setup 17 | 18 | Following are the instructions to setup the repository for your workspace: 19 | 20 | ```bash 21 | git clone https://github.com/leggedrobotics/rsl_rl 22 | cd rsl_rl 23 | pip install -e . 24 | ``` 25 | 26 | The framework supports the following logging frameworks which can be configured through `logger`: 27 | 28 | * Tensorboard: https://www.tensorflow.org/tensorboard/ 29 | * Weights & Biases: https://wandb.ai/site 30 | * Neptune: https://docs.neptune.ai/ 31 | 32 | For a demo configuration of the PPO, please check: [dummy_config.yaml](config/dummy_config.yaml) file. 33 | 34 | 35 | ## Contribution Guidelines 36 | 37 | For documentation, we adopt the [Google Style Guide](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html) for docstrings. We use [Sphinx](https://www.sphinx-doc.org/en/master/) for generating the documentation. Please make sure that your code is well-documented and follows the guidelines. 38 | 39 | We use the following tools for maintaining code quality: 40 | 41 | - [pre-commit](https://pre-commit.com/): Runs a list of formatters and linters over the codebase. 42 | - [black](https://black.readthedocs.io/en/stable/): The uncompromising code formatter. 43 | - [flake8](https://flake8.pycqa.org/en/latest/): A wrapper around PyFlakes, pycodestyle, and McCabe complexity checker. 44 | 45 | Please check [here](https://pre-commit.com/#install) for instructions to set these up. To run over the entire repository, please execute the following command in the terminal: 46 | 47 | 48 | ```bash 49 | # for installation (only once) 50 | pre-commit install 51 | # for running 52 | pre-commit run --all-files 53 | ``` 54 | 55 | ### Useful Links 56 | 57 | Environment repositories using the framework: 58 | 59 | * `Legged-Gym` (built on top of NVIDIA Isaac Gym): https://leggedrobotics.github.io/legged_gym/ 60 | * `Isaac Lab` (built on top of NVIDIA Isaac Sim): https://isaac-lab.github.io/ 61 | -------------------------------------------------------------------------------- /rsl_rl/config/dummy_config.yaml: -------------------------------------------------------------------------------- 1 | algorithm: 2 | class_name: PPO 3 | # training parameters 4 | # -- value function 5 | value_loss_coef: 1.0 6 | clip_param: 0.2 7 | use_clipped_value_loss: true 8 | # -- surrogate loss 9 | desired_kl: 0.01 10 | entropy_coef: 0.01 11 | gamma: 0.99 12 | lam: 0.95 13 | max_grad_norm: 1.0 14 | # -- training 15 | learning_rate: 0.001 16 | num_learning_epochs: 5 17 | num_mini_batches: 4 # mini batch size = num_envs * num_steps / num_mini_batches 18 | schedule: adaptive # adaptive, fixed 19 | policy: 20 | class_name: ActorCritic 21 | # for MLP i.e. `ActorCritic` 22 | activation: elu 23 | actor_hidden_dims: [128, 128, 128] 24 | critic_hidden_dims: [128, 128, 128] 25 | init_noise_std: 1.0 26 | # only needed for `ActorCriticRecurrent` 27 | # rnn_type: 'lstm' 28 | # rnn_hidden_size: 512 29 | # rnn_num_layers: 1 30 | runner: 31 | num_steps_per_env: 24 # number of steps per environment per iteration 32 | max_iterations: 1500 # number of policy updates 33 | empirical_normalization: false 34 | # -- logging parameters 35 | save_interval: 50 # check for potential saves every `save_interval` iterations 36 | experiment_name: walking_experiment 37 | run_name: "" 38 | # -- logging writer 39 | logger: tensorboard # tensorboard, neptune, wandb 40 | neptune_project: legged_gym 41 | wandb_project: legged_gym 42 | # -- load and resuming 43 | resume: false 44 | load_run: -1 # -1 means load latest run 45 | resume_path: null # updated from load_run and checkpoint 46 | checkpoint: -1 # -1 means load latest checkpoint 47 | runner_class_name: OnPolicyRunner 48 | seed: 1 49 | -------------------------------------------------------------------------------- /rsl_rl/licenses/dependencies/black-license.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2018 Łukasz Langa 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /rsl_rl/licenses/dependencies/flake8-license.txt: -------------------------------------------------------------------------------- 1 | == Flake8 License (MIT) == 2 | 3 | Copyright (C) 2011-2013 Tarek Ziade 4 | Copyright (C) 2012-2016 Ian Cordasco 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | this software and associated documentation files (the "Software"), to deal in 8 | the Software without restriction, including without limitation the rights to 9 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 | of the Software, and to permit persons to whom the Software is furnished to do 11 | so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /rsl_rl/licenses/dependencies/isort-license.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2013 Timothy Edmund Crosley 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /rsl_rl/licenses/dependencies/numpy_license.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2005-2021, NumPy Developers. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are 6 | met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | * Neither the name of the NumPy Developers nor the names of any 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /rsl_rl/licenses/dependencies/pre-commit-hooks-license.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014 pre-commit dev team: Anthony Sottile, Ken Struys 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /rsl_rl/licenses/dependencies/pre-commit-license.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014 pre-commit dev team: Anthony Sottile, Ken Struys 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /rsl_rl/licenses/dependencies/pyright-license.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Robert Craigie 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | 24 | =============================================================================== 25 | 26 | MIT License 27 | 28 | Pyright - A static type checker for the Python language 29 | Copyright (c) Microsoft Corporation. All rights reserved. 30 | 31 | Permission is hereby granted, free of charge, to any person obtaining a copy 32 | of this software and associated documentation files (the "Software"), to deal 33 | in the Software without restriction, including without limitation the rights 34 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 35 | copies of the Software, and to permit persons to whom the Software is 36 | furnished to do so, subject to the following conditions: 37 | 38 | The above copyright notice and this permission notice shall be included in all 39 | copies or substantial portions of the Software. 40 | 41 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 42 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 43 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 44 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 45 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 46 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 47 | SOFTWARE 48 | -------------------------------------------------------------------------------- /rsl_rl/licenses/dependencies/pyupgrade-license.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 Anthony Sottile 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /rsl_rl/licenses/dependencies/torch_license.txt: -------------------------------------------------------------------------------- 1 | From PyTorch: 2 | 3 | Copyright (c) 2016- Facebook, Inc (Adam Paszke) 4 | Copyright (c) 2014- Facebook, Inc (Soumith Chintala) 5 | Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) 6 | Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) 7 | Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) 8 | Copyright (c) 2011-2013 NYU (Clement Farabet) 9 | Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston) 10 | Copyright (c) 2006 Idiap Research Institute (Samy Bengio) 11 | Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz) 12 | 13 | From Caffe2: 14 | 15 | Copyright (c) 2016-present, Facebook Inc. All rights reserved. 16 | 17 | All contributions by Facebook: 18 | Copyright (c) 2016 Facebook Inc. 19 | 20 | All contributions by Google: 21 | Copyright (c) 2015 Google Inc. 22 | All rights reserved. 23 | 24 | All contributions by Yangqing Jia: 25 | Copyright (c) 2015 Yangqing Jia 26 | All rights reserved. 27 | 28 | All contributions by Kakao Brain: 29 | Copyright 2019-2020 Kakao Brain 30 | 31 | All contributions from Caffe: 32 | Copyright(c) 2013, 2014, 2015, the respective contributors 33 | All rights reserved. 34 | 35 | All other contributions: 36 | Copyright(c) 2015, 2016 the respective contributors 37 | All rights reserved. 38 | 39 | Caffe2 uses a copyright model similar to Caffe: each contributor holds 40 | copyright over their contributions to Caffe2. The project versioning records 41 | all such contribution and copyright details. If a contributor wants to further 42 | mark their specific copyright on a particular contribution, they should 43 | indicate their copyright solely in the commit message of the change when it is 44 | committed. 45 | 46 | All rights reserved. 47 | 48 | Redistribution and use in source and binary forms, with or without 49 | modification, are permitted provided that the following conditions are met: 50 | 51 | 1. Redistributions of source code must retain the above copyright 52 | notice, this list of conditions and the following disclaimer. 53 | 54 | 2. Redistributions in binary form must reproduce the above copyright 55 | notice, this list of conditions and the following disclaimer in the 56 | documentation and/or other materials provided with the distribution. 57 | 58 | 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America 59 | and IDIAP Research Institute nor the names of its contributors may be 60 | used to endorse or promote products derived from this software without 61 | specific prior written permission. 62 | 63 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 64 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 65 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 66 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 67 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 68 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 69 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 70 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 71 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 72 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 73 | POSSIBILITY OF SUCH DAMAGE. 74 | -------------------------------------------------------------------------------- /rsl_rl/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.isort] 2 | 3 | py_version = 37 4 | line_length = 120 5 | group_by_package = true 6 | 7 | # Files to skip 8 | skip_glob = [".vscode/*"] 9 | 10 | # Order of imports 11 | sections = [ 12 | "FUTURE", 13 | "STDLIB", 14 | "THIRDPARTY", 15 | "FIRSTPARTY", 16 | "LOCALFOLDER", 17 | ] 18 | 19 | # Extra standard libraries considered as part of python (permissive licenses) 20 | extra_standard_library = [ 21 | "numpy", 22 | "torch", 23 | "tensordict", 24 | "warp", 25 | "typing_extensions", 26 | "git", 27 | ] 28 | # Imports from this repository 29 | known_first_party = "rsl_rl" 30 | 31 | [tool.pyright] 32 | 33 | include = ["rsl_rl"] 34 | 35 | typeCheckingMode = "basic" 36 | pythonVersion = "3.7" 37 | pythonPlatform = "Linux" 38 | enableTypeIgnoreComments = true 39 | 40 | # This is required as the CI pre-commit does not download the module (i.e. numpy, torch, prettytable) 41 | # Therefore, we have to ignore missing imports 42 | reportMissingImports = "none" 43 | # This is required to ignore for type checks of modules with stubs missing. 44 | reportMissingModuleSource = "none" # -> most common: prettytable in mdp managers 45 | 46 | reportGeneralTypeIssues = "none" # -> raises 218 errors (usage of literal MISSING in dataclasses) 47 | reportOptionalMemberAccess = "warning" # -> raises 8 errors 48 | reportPrivateUsage = "warning" 49 | -------------------------------------------------------------------------------- /rsl_rl/rsl_rl/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | """Main module for the rsl_rl package.""" 5 | 6 | __version__ = "2.0.1" 7 | __license__ = "BSD-3" 8 | -------------------------------------------------------------------------------- /rsl_rl/rsl_rl/algorithms/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | """Implementation of different RL agents.""" 5 | 6 | from .ppo import PPO 7 | 8 | __all__ = ["PPO"] 9 | -------------------------------------------------------------------------------- /rsl_rl/rsl_rl/algorithms/ppo.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | from __future__ import annotations 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.optim as optim 9 | 10 | from rsl_rl.modules import ActorCritic, ActorCriticDepthCNN 11 | from rsl_rl.storage import RolloutStorage 12 | 13 | 14 | class PPO: 15 | # actor_critic: ActorCritic | ActorCriticDepthCNN 16 | 17 | def __init__( 18 | self, 19 | actor_critic, 20 | num_learning_epochs=1, 21 | num_mini_batches=1, 22 | clip_param=0.2, 23 | gamma=0.998, 24 | lam=0.95, 25 | value_loss_coef=1.0, 26 | entropy_coef=0.0, 27 | learning_rate=1e-3, 28 | max_grad_norm=1.0, 29 | use_clipped_value_loss=True, 30 | schedule="fixed", 31 | desired_kl=0.01, 32 | device="cpu", 33 | ): 34 | self.device = device 35 | 36 | self.desired_kl = desired_kl 37 | self.schedule = schedule 38 | self.learning_rate = learning_rate 39 | 40 | # PPO components 41 | self.actor_critic = actor_critic 42 | self.actor_critic.to(self.device) 43 | self.storage = None # initialized later 44 | self.optimizer = optim.Adam(self.actor_critic.parameters(), lr=learning_rate) 45 | self.transition = RolloutStorage.Transition() 46 | 47 | # PPO parameters 48 | self.clip_param = clip_param 49 | self.num_learning_epochs = num_learning_epochs 50 | self.num_mini_batches = num_mini_batches 51 | self.value_loss_coef = value_loss_coef 52 | self.entropy_coef = entropy_coef 53 | self.gamma = gamma 54 | self.lam = lam 55 | self.max_grad_norm = max_grad_norm 56 | self.use_clipped_value_loss = use_clipped_value_loss 57 | 58 | def init_storage(self, num_envs, num_transitions_per_env, actor_obs_shape, critic_obs_shape, action_shape): 59 | self.storage = RolloutStorage( 60 | num_envs, num_transitions_per_env, actor_obs_shape, critic_obs_shape, action_shape, self.device 61 | ) 62 | 63 | def test_mode(self): 64 | self.actor_critic.test() 65 | 66 | def train_mode(self): 67 | self.actor_critic.train() 68 | 69 | def act(self, obs, critic_obs): 70 | if self.actor_critic.is_recurrent: 71 | self.transition.hidden_states = self.actor_critic.get_hidden_states() 72 | # Compute the actions and values 73 | self.transition.actions = self.actor_critic.act(obs).detach() 74 | self.transition.values = self.actor_critic.evaluate(critic_obs).detach() 75 | self.transition.actions_log_prob = self.actor_critic.get_actions_log_prob(self.transition.actions).detach() 76 | self.transition.action_mean = self.actor_critic.action_mean.detach() 77 | self.transition.action_sigma = self.actor_critic.action_std.detach() 78 | # need to record obs and critic_obs before env.step() 79 | self.transition.observations = obs 80 | self.transition.critic_observations = critic_obs 81 | return self.transition.actions 82 | 83 | def process_env_step(self, rewards, dones, infos): 84 | self.transition.rewards = rewards.clone() 85 | self.transition.dones = dones 86 | # Bootstrapping on time outs 87 | if "time_outs" in infos: 88 | self.transition.rewards += self.gamma * torch.squeeze( 89 | self.transition.values * infos["time_outs"].unsqueeze(1).to(self.device), 1 90 | ) 91 | 92 | # Record the transition 93 | self.storage.add_transitions(self.transition) 94 | self.transition.clear() 95 | self.actor_critic.reset(dones) 96 | 97 | def compute_returns(self, last_critic_obs): 98 | last_values = self.actor_critic.evaluate(last_critic_obs).detach() 99 | self.storage.compute_returns(last_values, self.gamma, self.lam) 100 | 101 | def update(self): 102 | mean_value_loss = 0 103 | mean_surrogate_loss = 0 104 | if self.actor_critic.is_recurrent: 105 | generator = self.storage.reccurent_mini_batch_generator(self.num_mini_batches, self.num_learning_epochs) 106 | else: 107 | generator = self.storage.mini_batch_generator(self.num_mini_batches, self.num_learning_epochs) 108 | for ( 109 | obs_batch, 110 | critic_obs_batch, 111 | actions_batch, 112 | target_values_batch, 113 | advantages_batch, 114 | returns_batch, 115 | old_actions_log_prob_batch, 116 | old_mu_batch, 117 | old_sigma_batch, 118 | hid_states_batch, 119 | masks_batch, 120 | ) in generator: 121 | self.actor_critic.act(obs_batch, masks=masks_batch, hidden_states=hid_states_batch[0]) 122 | actions_log_prob_batch = self.actor_critic.get_actions_log_prob(actions_batch) 123 | value_batch = self.actor_critic.evaluate( 124 | critic_obs_batch, masks=masks_batch, hidden_states=hid_states_batch[1] 125 | ) 126 | mu_batch = self.actor_critic.action_mean 127 | sigma_batch = self.actor_critic.action_std 128 | entropy_batch = self.actor_critic.entropy 129 | 130 | # KL 131 | if self.desired_kl is not None and self.schedule == "adaptive": 132 | with torch.inference_mode(): 133 | kl = torch.sum( 134 | torch.log(sigma_batch / old_sigma_batch + 1.0e-5) 135 | + (torch.square(old_sigma_batch) + torch.square(old_mu_batch - mu_batch)) 136 | / (2.0 * torch.square(sigma_batch)) 137 | - 0.5, 138 | axis=-1, 139 | ) 140 | kl_mean = torch.mean(kl) 141 | 142 | if kl_mean > self.desired_kl * 2.0: 143 | self.learning_rate = max(1e-5, self.learning_rate / 1.5) 144 | elif kl_mean < self.desired_kl / 2.0 and kl_mean > 0.0: 145 | self.learning_rate = min(1e-2, self.learning_rate * 1.5) 146 | 147 | for param_group in self.optimizer.param_groups: 148 | param_group["lr"] = self.learning_rate 149 | 150 | # Surrogate loss 151 | ratio = torch.exp(actions_log_prob_batch - torch.squeeze(old_actions_log_prob_batch)) 152 | surrogate = -torch.squeeze(advantages_batch) * ratio 153 | surrogate_clipped = -torch.squeeze(advantages_batch) * torch.clamp( 154 | ratio, 1.0 - self.clip_param, 1.0 + self.clip_param 155 | ) 156 | surrogate_loss = torch.max(surrogate, surrogate_clipped).mean() 157 | 158 | # Value function loss 159 | if self.use_clipped_value_loss: 160 | value_clipped = target_values_batch + (value_batch - target_values_batch).clamp( 161 | -self.clip_param, self.clip_param 162 | ) 163 | value_losses = (value_batch - returns_batch).pow(2) 164 | value_losses_clipped = (value_clipped - returns_batch).pow(2) 165 | value_loss = torch.max(value_losses, value_losses_clipped).mean() 166 | else: 167 | value_loss = (returns_batch - value_batch).pow(2).mean() 168 | 169 | loss = surrogate_loss + self.value_loss_coef * value_loss - self.entropy_coef * entropy_batch.mean() 170 | 171 | # Gradient step 172 | self.optimizer.zero_grad() 173 | loss.backward() 174 | nn.utils.clip_grad_norm_(self.actor_critic.parameters(), self.max_grad_norm) 175 | self.optimizer.step() 176 | 177 | mean_value_loss += value_loss.item() 178 | mean_surrogate_loss += surrogate_loss.item() 179 | 180 | num_updates = self.num_learning_epochs * self.num_mini_batches 181 | mean_value_loss /= num_updates 182 | mean_surrogate_loss /= num_updates 183 | self.storage.clear() 184 | 185 | return mean_value_loss, mean_surrogate_loss 186 | -------------------------------------------------------------------------------- /rsl_rl/rsl_rl/env/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | """Submodule defining the environment definitions.""" 5 | 6 | from .vec_env import VecEnv 7 | 8 | __all__ = ["VecEnv"] 9 | -------------------------------------------------------------------------------- /rsl_rl/rsl_rl/env/vec_env.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | from __future__ import annotations 5 | 6 | import torch 7 | from abc import ABC, abstractmethod 8 | 9 | 10 | class VecEnv(ABC): 11 | """Abstract class for vectorized environment. 12 | 13 | The vectorized environment is a collection of environments that are synchronized. This means that 14 | the same action is applied to all environments and the same observation is returned from all environments. 15 | 16 | All extra observations must be provided as a dictionary to "extras" in the step() method. Based on the 17 | configuration, the extra observations are used for different purposes. The following keys are reserved 18 | in the "observations" dictionary (if they are present): 19 | 20 | - "critic": The observation is used as input to the critic network. Useful for asymmetric observation spaces. 21 | """ 22 | 23 | num_envs: int 24 | """Number of environments.""" 25 | num_obs: int 26 | """Number of observations.""" 27 | num_privileged_obs: int 28 | """Number of privileged observations.""" 29 | num_actions: int 30 | """Number of actions.""" 31 | max_episode_length: int 32 | """Maximum episode length.""" 33 | privileged_obs_buf: torch.Tensor 34 | """Buffer for privileged observations.""" 35 | obs_buf: torch.Tensor 36 | """Buffer for observations.""" 37 | rew_buf: torch.Tensor 38 | """Buffer for rewards.""" 39 | reset_buf: torch.Tensor 40 | """Buffer for resets.""" 41 | episode_length_buf: torch.Tensor # current episode duration 42 | """Buffer for current episode lengths.""" 43 | extras: dict 44 | """Extra information (metrics). 45 | 46 | Extra information is stored in a dictionary. This includes metrics such as the episode reward, episode length, 47 | etc. Additional information can be stored in the dictionary such as observations for the critic network, etc. 48 | """ 49 | device: torch.device 50 | """Device to use.""" 51 | 52 | """ 53 | Operations. 54 | """ 55 | 56 | @abstractmethod 57 | def get_observations(self) -> tuple[torch.Tensor, dict]: 58 | """Return the current observations. 59 | 60 | Returns: 61 | Tuple[torch.Tensor, dict]: Tuple containing the observations and extras. 62 | """ 63 | raise NotImplementedError 64 | 65 | @abstractmethod 66 | def reset(self) -> tuple[torch.Tensor, dict]: 67 | """Reset all environment instances. 68 | 69 | Returns: 70 | Tuple[torch.Tensor, dict]: Tuple containing the observations and extras. 71 | """ 72 | raise NotImplementedError 73 | 74 | @abstractmethod 75 | def step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict]: 76 | """Apply input action on the environment. 77 | 78 | Args: 79 | actions (torch.Tensor): Input actions to apply. Shape: (num_envs, num_actions) 80 | 81 | Returns: 82 | Tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict]: 83 | A tuple containing the observations, rewards, dones and extra information (metrics). 84 | """ 85 | raise NotImplementedError 86 | -------------------------------------------------------------------------------- /rsl_rl/rsl_rl/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | """Definitions for neural-network components for RL-agents.""" 5 | 6 | from .actor_critic import ActorCritic 7 | from .actor_critic_recurrent import ActorCriticRecurrent 8 | from .actor_critic_depth_cnn import ActorCriticDepthCNN, ActorCriticDepthCNNRecurrent 9 | from .actor_critic_history import ActorCriticHistory 10 | from .normalizer import EmpiricalNormalization 11 | 12 | __all__ = [ 13 | "ActorCritic", 14 | "ActorCriticRecurrent", 15 | "ActorCriticDepthCNN", 16 | "ActorCriticDepthCNNRecurrent", 17 | "EmpiricalNormalization", 18 | "ActorCriticHistory" 19 | ] 20 | -------------------------------------------------------------------------------- /rsl_rl/rsl_rl/modules/actor_critic.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | 5 | from __future__ import annotations 6 | 7 | import torch 8 | import torch.nn as nn 9 | from torch.distributions import Normal 10 | 11 | 12 | class ActorCritic(nn.Module): 13 | is_recurrent = False 14 | 15 | def __init__( 16 | self, 17 | num_actor_obs, 18 | num_critic_obs, 19 | num_actions, 20 | actor_hidden_dims=[256, 256, 256], 21 | critic_hidden_dims=[256, 256, 256], 22 | activation="elu", 23 | init_noise_std=1.0, 24 | **kwargs, 25 | ): 26 | if kwargs: 27 | print( 28 | "ActorCritic.__init__ got unexpected arguments, which will be ignored: " 29 | + str([key for key in kwargs.keys()]) 30 | ) 31 | super().__init__() 32 | activation = get_activation(activation) 33 | 34 | mlp_input_dim_a = num_actor_obs 35 | mlp_input_dim_c = num_critic_obs 36 | # Policy 37 | actor_layers = [] 38 | actor_layers.append(nn.Linear(mlp_input_dim_a, actor_hidden_dims[0])) 39 | actor_layers.append(activation) 40 | for layer_index in range(len(actor_hidden_dims)): 41 | if layer_index == len(actor_hidden_dims) - 1: 42 | actor_layers.append(nn.Linear(actor_hidden_dims[layer_index], num_actions)) 43 | else: 44 | actor_layers.append(nn.Linear(actor_hidden_dims[layer_index], actor_hidden_dims[layer_index + 1])) 45 | actor_layers.append(activation) 46 | self.actor = nn.Sequential(*actor_layers) 47 | 48 | # Value function 49 | critic_layers = [] 50 | critic_layers.append(nn.Linear(mlp_input_dim_c, critic_hidden_dims[0])) 51 | critic_layers.append(activation) 52 | for layer_index in range(len(critic_hidden_dims)): 53 | if layer_index == len(critic_hidden_dims) - 1: 54 | critic_layers.append(nn.Linear(critic_hidden_dims[layer_index], 1)) 55 | else: 56 | critic_layers.append(nn.Linear(critic_hidden_dims[layer_index], critic_hidden_dims[layer_index + 1])) 57 | critic_layers.append(activation) 58 | self.critic = nn.Sequential(*critic_layers) 59 | 60 | print(f"Actor MLP: {self.actor}") 61 | print(f"Critic MLP: {self.critic}") 62 | 63 | # Action noise 64 | self.std = nn.Parameter(init_noise_std * torch.ones(num_actions)) 65 | self.distribution = None 66 | # disable args validation for speedup 67 | Normal.set_default_validate_args = False 68 | 69 | # seems that we get better performance without init 70 | # self.init_memory_weights(self.memory_a, 0.001, 0.) 71 | # self.init_memory_weights(self.memory_c, 0.001, 0.) 72 | 73 | @staticmethod 74 | # not used at the moment 75 | def init_weights(sequential, scales): 76 | [ 77 | torch.nn.init.orthogonal_(module.weight, gain=scales[idx]) 78 | for idx, module in enumerate(mod for mod in sequential if isinstance(mod, nn.Linear)) 79 | ] 80 | 81 | def reset(self, dones=None): 82 | pass 83 | 84 | def forward(self): 85 | raise NotImplementedError 86 | 87 | @property 88 | def action_mean(self): 89 | return self.distribution.mean 90 | 91 | @property 92 | def action_std(self): 93 | return self.distribution.stddev 94 | 95 | @property 96 | def entropy(self): 97 | return self.distribution.entropy().sum(dim=-1) 98 | 99 | def update_distribution(self, observations): 100 | mean = self.actor(observations) 101 | self.distribution = Normal(mean, mean * 0.0 + self.std) 102 | 103 | def act(self, observations, **kwargs): 104 | self.update_distribution(observations) 105 | return self.distribution.sample() 106 | 107 | def get_actions_log_prob(self, actions): 108 | return self.distribution.log_prob(actions).sum(dim=-1) 109 | 110 | def act_inference(self, observations): 111 | actions_mean = self.actor(observations) 112 | return actions_mean 113 | 114 | def evaluate(self, critic_observations, **kwargs): 115 | value = self.critic(critic_observations) 116 | return value 117 | 118 | 119 | def get_activation(act_name): 120 | if act_name == "elu": 121 | return nn.ELU() 122 | elif act_name == "selu": 123 | return nn.SELU() 124 | elif act_name == "relu": 125 | return nn.ReLU() 126 | elif act_name == "crelu": 127 | return nn.CReLU() 128 | elif act_name == "lrelu": 129 | return nn.LeakyReLU() 130 | elif act_name == "tanh": 131 | return nn.Tanh() 132 | elif act_name == "sigmoid": 133 | return nn.Sigmoid() 134 | else: 135 | print("invalid activation function!") 136 | return None 137 | -------------------------------------------------------------------------------- /rsl_rl/rsl_rl/modules/actor_critic_depth_cnn.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch.distributions import Normal 6 | 7 | from rsl_rl.modules.actor_critic import get_activation 8 | from rsl_rl.modules.depth_backbone import DepthOnlyFCBackbone, DepthBackbone 9 | from rsl_rl.modules.actor_critic_recurrent import Memory 10 | from rsl_rl.utils import unpad_trajectories 11 | 12 | 13 | class ActorDepthCNN(nn.Module): 14 | def __init__(self, 15 | num_obs_proprio, 16 | obs_depth_shape, 17 | num_actions, 18 | activation, 19 | hidden_dims=[256, 256, 128], 20 | ): 21 | super().__init__() 22 | 23 | self.prop_mlp = nn.Sequential( 24 | nn.Linear(num_obs_proprio, hidden_dims[0]), 25 | activation, 26 | nn.Linear(hidden_dims[0], hidden_dims[1]), 27 | activation, 28 | nn.Linear(hidden_dims[1], hidden_dims[2]), 29 | activation, 30 | ) 31 | self.depth_backbone = DepthOnlyFCBackbone( 32 | output_dim=hidden_dims[2], 33 | hidden_dim=hidden_dims[1], 34 | activation=activation, 35 | num_frames=1, 36 | ) 37 | # base_backbone = DepthOnlyFCBackbone( 38 | # output_dim=hidden_dims[2], 39 | # hidden_dim=hidden_dims[1], 40 | # activation=activation, 41 | # num_frames=1, 42 | # ) 43 | # self.depth_backbone = DepthBackbone(base_backbone, hidden_dims[2], hidden_dims[2]) 44 | 45 | self.action_head = nn.Linear(2 * hidden_dims[2], num_actions) 46 | 47 | self.num_obs_proprio = num_obs_proprio 48 | self.obs_depth_shape = obs_depth_shape 49 | 50 | def forward(self, x): 51 | prop_input = x[..., :self.num_obs_proprio] 52 | prop_latent = self.prop_mlp(prop_input) 53 | 54 | depth_input = x[..., self.num_obs_proprio:] 55 | ori_shape = depth_input.shape 56 | depth_input = depth_input.reshape(-1, *self.obs_depth_shape) 57 | depth_latent = self.depth_backbone(depth_input) 58 | 59 | actions = self.action_head(torch.cat((prop_latent, depth_latent), dim=-1)) 60 | return actions 61 | 62 | def encode(self, observations): 63 | original_shape = observations.shape 64 | 65 | if observations.dim() == 3: 66 | observations = observations.reshape(-1, original_shape[-1]) 67 | 68 | prop_input = observations[..., :self.num_obs_proprio] 69 | prop_latent = self.prop_mlp(prop_input) 70 | 71 | depth_input = observations[..., self.num_obs_proprio:] 72 | depth_input = depth_input.reshape(-1, *self.obs_depth_shape) 73 | depth_latent = self.depth_backbone(depth_input) 74 | 75 | if len(original_shape) == 3: 76 | return torch.cat((prop_latent, depth_latent), dim=-1).reshape(*original_shape[:-1], -1) 77 | 78 | return torch.cat((prop_latent, depth_latent), dim=-1) 79 | 80 | def reset(self, dones=None): 81 | self.depth_backbone.reset(dones) 82 | 83 | 84 | class ActorCriticDepthCNN(nn.Module): 85 | is_recurrent = False 86 | 87 | def __init__( 88 | self, 89 | num_actor_obs, 90 | num_critic_obs, 91 | num_actions, 92 | num_actor_obs_prop=48, 93 | obs_depth_shape=(15, 15), 94 | actor_hidden_dims=[256, 256, 128], 95 | critic_hidden_dims=[256, 256, 128], 96 | activation="elu", 97 | init_noise_std=1.0, 98 | **kwargs, 99 | ): 100 | if kwargs: 101 | print( 102 | "ActorCriticDepth.__init__ got unexpected arguments, which will be ignored: " + str(kwargs.keys()), 103 | ) 104 | super().__init__() 105 | activation = get_activation(activation) 106 | mlp_input_dim_c = num_critic_obs 107 | 108 | # Policy Function 109 | self.actor = ActorDepthCNN(num_actor_obs_prop, obs_depth_shape, num_actions, activation, actor_hidden_dims) 110 | 111 | # Value function 112 | critic_layers = [] 113 | critic_layers.append(nn.Linear(mlp_input_dim_c, critic_hidden_dims[0])) 114 | critic_layers.append(activation) 115 | for layer_index in range(len(critic_hidden_dims)): 116 | if layer_index == len(critic_hidden_dims) - 1: 117 | critic_layers.append(nn.Linear(critic_hidden_dims[layer_index], 1)) 118 | else: 119 | critic_layers.append(nn.Linear(critic_hidden_dims[layer_index], critic_hidden_dims[layer_index + 1])) 120 | critic_layers.append(activation) 121 | self.critic = nn.Sequential(*critic_layers) 122 | 123 | print(f"Actor MLP+CNN: {self.actor}") 124 | print(f"Critic MLP: {self.critic}") 125 | 126 | # Action noise 127 | self.std = nn.Parameter(init_noise_std * torch.ones(num_actions)) 128 | self.distribution = None 129 | # disable args validation for speedup 130 | Normal.set_default_validate_args = False 131 | 132 | def reset(self, dones=None): 133 | pass 134 | 135 | def forward(self): 136 | raise NotImplementedError 137 | 138 | @property 139 | def action_mean(self): 140 | return self.distribution.mean 141 | 142 | @property 143 | def action_std(self): 144 | return self.distribution.stddev 145 | 146 | @property 147 | def entropy(self): 148 | return self.distribution.entropy().sum(dim=-1) 149 | 150 | def update_distribution(self, observations): 151 | mean = self.actor(observations) 152 | self.distribution = Normal(mean, mean * 0.0 + self.std) 153 | 154 | def act(self, observations, **kwargs): 155 | self.update_distribution(observations) 156 | return self.distribution.sample() 157 | 158 | def get_actions_log_prob(self, actions): 159 | return self.distribution.log_prob(actions).sum(dim=-1) 160 | 161 | def act_inference(self, observations): 162 | actions_mean = self.actor(observations) 163 | return actions_mean 164 | 165 | def evaluate(self, critic_observations, **kwargs): 166 | value = self.critic(critic_observations) 167 | return value 168 | 169 | def act_hidden(self, hidden_states): 170 | mean = self.actor.action_head(hidden_states) 171 | self.distribution = Normal(mean, mean * 0.0 + self.std) 172 | return self.distribution.sample() 173 | 174 | def act_hidden_inference(self, hidden_states): 175 | actions_mean = self.actor.action_head(hidden_states) 176 | return actions_mean 177 | 178 | def evaluate_hidden(self, hidden_states): 179 | return self.critic.value_head(hidden_states) 180 | 181 | def get_hidden_states(self): 182 | return self.actor.depth_backbone.hidden_states, self.actor.depth_backbone.hidden_states 183 | 184 | 185 | 186 | class ActorCriticDepthCNNRecurrent(ActorCriticDepthCNN): 187 | is_recurrent = True 188 | 189 | def __init__( 190 | self, 191 | num_actor_obs, 192 | num_critic_obs, 193 | num_actions, 194 | num_actor_obs_prop=48, 195 | num_critic_obs_prop=48, 196 | obs_depth_shape=(15, 15), 197 | actor_hidden_dims=[256, 256, 128], 198 | critic_hidden_dims=[256, 256, 128], 199 | activation="elu", 200 | rnn_type="lstm", 201 | rnn_input_size=256, 202 | rnn_hidden_size=256, 203 | rnn_num_layers=1, 204 | init_noise_std=1.0, 205 | **kwargs, 206 | ): 207 | if kwargs: 208 | print( 209 | "ActorCriticDepthCNNRecurrent.__init__ got unexpected arguments, which will be ignored: " + str(kwargs.keys()), 210 | ) 211 | 212 | super().__init__( 213 | num_actor_obs=num_actor_obs, 214 | num_critic_obs=num_critic_obs, 215 | num_actions=num_actions, 216 | num_actor_obs_prop=num_actor_obs_prop, 217 | num_critic_obs_prop=num_critic_obs_prop, 218 | obs_depth_shape=obs_depth_shape, 219 | actor_hidden_dims=actor_hidden_dims, 220 | critic_hidden_dims=critic_hidden_dims, 221 | activation=activation, 222 | init_noise_std=init_noise_std, 223 | ) 224 | 225 | self.memory_a = Memory(rnn_input_size, type=rnn_type, num_layers=rnn_num_layers, hidden_size=rnn_hidden_size) 226 | self.memory_c = Memory(rnn_input_size, type=rnn_type, num_layers=rnn_num_layers, hidden_size=rnn_hidden_size) 227 | 228 | print(f"Actor RNN: {self.memory_a}") 229 | print(f"Critic RNN: {self.memory_c}") 230 | 231 | def reset(self, dones=None): 232 | self.memory_a.reset(dones) 233 | self.memory_c.reset(dones) 234 | 235 | def act(self, observations, masks=None, hidden_states=None): 236 | observations = self.actor.encode(observations) 237 | input_a = self.memory_a(observations, masks, hidden_states) 238 | return super().act_hidden(input_a.squeeze(0)) 239 | 240 | def act_inference(self, observations): 241 | observations = self.actor.encode(observations) 242 | input_a = self.memory_a(observations) 243 | return super().act_hidden_inference(input_a.squeeze(0)) 244 | 245 | def evaluate(self, critic_observations, masks=None, hidden_states=None): 246 | critic_observations = self.critic.encode(critic_observations) 247 | input_c = self.memory_c(critic_observations, masks, hidden_states) 248 | return super().evaluate_hidden(input_c.squeeze(0)) 249 | 250 | def get_hidden_states(self): 251 | return self.memory_a.hidden_states, self.memory_c.hidden_states 252 | -------------------------------------------------------------------------------- /rsl_rl/rsl_rl/modules/actor_critic_history.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch.distributions import Normal 6 | 7 | from rsl_rl.modules.actor_critic import get_activation 8 | 9 | 10 | class ActorHistory(nn.Module): 11 | def __init__( 12 | self, 13 | num_actor_obs, 14 | num_actions, 15 | activation, 16 | actor_hidden_dims=[256, 256, 256], 17 | ): 18 | super().__init__() 19 | 20 | mlp_input_dim_a = num_actor_obs 21 | # Policy (use mlp for now) 22 | actor_layers = [] 23 | actor_layers.append(nn.Linear(mlp_input_dim_a, actor_hidden_dims[0])) 24 | actor_layers.append(activation) 25 | for layer_index in range(len(actor_hidden_dims)): 26 | if layer_index == len(actor_hidden_dims) - 1: 27 | actor_layers.append(nn.Linear(actor_hidden_dims[layer_index], num_actions)) 28 | else: 29 | actor_layers.append(nn.Linear(actor_hidden_dims[layer_index], actor_hidden_dims[layer_index + 1])) 30 | actor_layers.append(activation) 31 | self.model = nn.Sequential(*actor_layers) 32 | 33 | def forward(self, x): 34 | # x is of shape (batch_size, history_len, num_single_observations) 35 | x = x.view(*x.shape[:-2], -1) 36 | return self.model(x) 37 | 38 | 39 | class CriticHistory(nn.Module): 40 | def __init__( 41 | self, 42 | num_critic_obs, 43 | activation, 44 | critic_hidden_dims=[256, 256, 256], 45 | ): 46 | super().__init__() 47 | 48 | mlp_input_dim_c = num_critic_obs 49 | # Value function (use mlp for now) 50 | critic_layers = [] 51 | critic_layers.append(nn.Linear(mlp_input_dim_c, critic_hidden_dims[0])) 52 | critic_layers.append(activation) 53 | for layer_index in range(len(critic_hidden_dims)): 54 | if layer_index == len(critic_hidden_dims) - 1: 55 | critic_layers.append(nn.Linear(critic_hidden_dims[layer_index], 1)) 56 | else: 57 | critic_layers.append(nn.Linear(critic_hidden_dims[layer_index], critic_hidden_dims[layer_index + 1])) 58 | critic_layers.append(activation) 59 | self.model = nn.Sequential(*critic_layers) 60 | 61 | def forward(self, x): 62 | # x is of shape (batch_size, history_len, num_single_observations) 63 | x = x.view(*x.shape[:-2], -1) 64 | return self.model(x) 65 | 66 | 67 | class ActorCriticHistory(nn.Module): 68 | is_recurrent = False 69 | 70 | def __init__( 71 | self, 72 | num_actor_obs, 73 | num_critic_obs, 74 | num_actions, 75 | actor_hidden_dims=[256, 256, 256], 76 | critic_hidden_dims=[256, 256, 256], 77 | activation="elu", 78 | init_noise_std=1.0, 79 | **kwargs, 80 | ): 81 | if kwargs: 82 | print( 83 | "ActorCritic.__init__ got unexpected arguments, which will be ignored: " 84 | + str([key for key in kwargs.keys()]) 85 | ) 86 | super().__init__() 87 | activation = get_activation(activation) 88 | 89 | self.actor = ActorHistory( 90 | num_actor_obs=num_actor_obs, 91 | num_actions=num_actions, 92 | actor_hidden_dims=actor_hidden_dims, 93 | activation=activation, 94 | ) 95 | self.critic = CriticHistory( 96 | num_critic_obs=num_critic_obs, 97 | critic_hidden_dims=critic_hidden_dims, 98 | activation=activation, 99 | ) 100 | 101 | print(f"Actor MLP: {self.actor}") 102 | print(f"Critic MLP: {self.critic}") 103 | 104 | # Action noise 105 | self.std = nn.Parameter(init_noise_std * torch.ones(num_actions)) 106 | self.distribution = None 107 | # disable args validation for speedup 108 | Normal.set_default_validate_args = False 109 | 110 | def reset(self, dones=None): 111 | pass 112 | 113 | def forward(self): 114 | raise NotImplementedError 115 | 116 | @property 117 | def action_mean(self): 118 | return self.distribution.mean 119 | 120 | @property 121 | def action_std(self): 122 | return self.distribution.stddev 123 | 124 | @property 125 | def entropy(self): 126 | return self.distribution.entropy().sum(dim=-1) 127 | 128 | def update_distribution(self, observations): 129 | mean = self.actor(observations) 130 | self.distribution = Normal(mean, mean * 0.0 + self.std) 131 | 132 | def act(self, observations, **kwargs): 133 | self.update_distribution(observations) 134 | return self.distribution.sample() 135 | 136 | def get_actions_log_prob(self, actions): 137 | return self.distribution.log_prob(actions).sum(dim=-1) 138 | 139 | def act_inference(self, observations): 140 | actions_mean = self.actor(observations) 141 | return actions_mean 142 | 143 | def evaluate(self, critic_observations, **kwargs): 144 | value = self.critic(critic_observations) 145 | return value -------------------------------------------------------------------------------- /rsl_rl/rsl_rl/modules/actor_critic_recurrent.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | from __future__ import annotations 5 | 6 | import torch 7 | import torch.nn as nn 8 | 9 | from rsl_rl.modules.actor_critic import ActorCritic, get_activation 10 | from rsl_rl.utils import unpad_trajectories 11 | 12 | 13 | class ActorCriticRecurrent(ActorCritic): 14 | is_recurrent = True 15 | 16 | def __init__( 17 | self, 18 | num_actor_obs, 19 | num_critic_obs, 20 | num_actions, 21 | actor_hidden_dims=[256, 256, 256], 22 | critic_hidden_dims=[256, 256, 256], 23 | activation="elu", 24 | rnn_type="lstm", 25 | rnn_hidden_size=256, 26 | rnn_num_layers=1, 27 | init_noise_std=1.0, 28 | **kwargs, 29 | ): 30 | if kwargs: 31 | print( 32 | "ActorCriticRecurrent.__init__ got unexpected arguments, which will be ignored: " + str(kwargs.keys()), 33 | ) 34 | 35 | super().__init__( 36 | num_actor_obs=rnn_hidden_size, 37 | num_critic_obs=rnn_hidden_size, 38 | num_actions=num_actions, 39 | actor_hidden_dims=actor_hidden_dims, 40 | critic_hidden_dims=critic_hidden_dims, 41 | activation=activation, 42 | init_noise_std=init_noise_std, 43 | ) 44 | 45 | activation = get_activation(activation) 46 | 47 | self.memory_a = Memory(num_actor_obs, type=rnn_type, num_layers=rnn_num_layers, hidden_size=rnn_hidden_size) 48 | self.memory_c = Memory(num_critic_obs, type=rnn_type, num_layers=rnn_num_layers, hidden_size=rnn_hidden_size) 49 | 50 | print(f"Actor RNN: {self.memory_a}") 51 | print(f"Critic RNN: {self.memory_c}") 52 | 53 | def reset(self, dones=None): 54 | self.memory_a.reset(dones) 55 | self.memory_c.reset(dones) 56 | 57 | def act(self, observations, masks=None, hidden_states=None): 58 | input_a = self.memory_a(observations, masks, hidden_states) 59 | return super().act(input_a.squeeze(0)) 60 | 61 | def act_inference(self, observations): 62 | input_a = self.memory_a(observations) 63 | return super().act_inference(input_a.squeeze(0)) 64 | 65 | def evaluate(self, critic_observations, masks=None, hidden_states=None): 66 | input_c = self.memory_c(critic_observations, masks, hidden_states) 67 | return super().evaluate(input_c.squeeze(0)) 68 | 69 | def get_hidden_states(self): 70 | return self.memory_a.hidden_states, self.memory_c.hidden_states 71 | 72 | 73 | class Memory(torch.nn.Module): 74 | def __init__(self, input_size, type="lstm", num_layers=1, hidden_size=256): 75 | super().__init__() 76 | # RNN 77 | rnn_cls = nn.GRU if type.lower() == "gru" else nn.LSTM 78 | self.rnn = rnn_cls(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers) 79 | self.hidden_states = None 80 | 81 | def forward(self, input, masks=None, hidden_states=None): 82 | batch_mode = masks is not None 83 | if batch_mode: 84 | # batch mode (policy update): need saved hidden states 85 | if hidden_states is None: 86 | raise ValueError("Hidden states not passed to memory module during policy update") 87 | out, _ = self.rnn(input, hidden_states) 88 | out = unpad_trajectories(out, masks) 89 | else: 90 | # inference mode (collection): use hidden states of last step 91 | out, self.hidden_states = self.rnn(input.unsqueeze(0), self.hidden_states) 92 | return out 93 | 94 | def reset(self, dones=None): 95 | # When the RNN is an LSTM, self.hidden_states_a is a list with hidden_state and cell_state 96 | for hidden_state in self.hidden_states: 97 | hidden_state[..., dones, :] = 0.0 98 | -------------------------------------------------------------------------------- /rsl_rl/rsl_rl/modules/depth_backbone.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class DepthBackbone(nn.Module): 6 | def __init__(self, base_backbone, rnn_hidden_dim, output_dim) -> None: 7 | super().__init__() 8 | last_activation = nn.Tanh() 9 | self.base_backbone = base_backbone 10 | self.rnn = nn.GRU( 11 | input_size=base_backbone.output_dim, 12 | hidden_size=rnn_hidden_dim, 13 | num_layers=1, 14 | ) 15 | self.output_mlp = nn.Sequential( 16 | nn.Linear(rnn_hidden_dim, output_dim), 17 | last_activation 18 | ) 19 | self.hidden_states = None 20 | 21 | def forward(self, depth_input, ori_shape, masks=None, hidden_states=None): 22 | depth_latent = self.base_backbone(depth_input) 23 | # depth_latent = self.base_backbone(depth_image) 24 | batch_mode = masks is not None 25 | if batch_mode: 26 | # batch mode (policy update): need saved hidden states 27 | if hidden_states is None: 28 | raise ValueError("Hidden states not passed to memory module during policy update") 29 | depth_latent = depth_latent.view(*ori_shape[:2], -1) 30 | out, _ = self.rnn(depth_latent, hidden_states) 31 | else: 32 | # inference mode (collection): use hidden states of last step 33 | out, self.hidden_states = self.rnn(depth_latent.unsqueeze(0), self.hidden_states) 34 | out = out.squeeze(0) 35 | out = self.output_mlp(out.squeeze(1)) 36 | 37 | return out 38 | 39 | def detach_hidden_states(self): 40 | self.hidden_states = self.hidden_states.detach().clone() 41 | 42 | def reset(self, dones): 43 | self.hidden_states[..., dones, :] = 0.0 44 | 45 | 46 | class DepthOnlyFCBackbone(nn.Module): 47 | def __init__(self, output_dim, hidden_dim, activation, num_frames=1): 48 | super().__init__() 49 | 50 | self.num_frames = num_frames 51 | self.output_dim = output_dim 52 | self.image_compression = nn.Sequential( 53 | # [1, 24, 32] 54 | nn.Conv2d(in_channels=self.num_frames, out_channels=16, kernel_size=5), 55 | # [16, 20, 28] 56 | nn.MaxPool2d(kernel_size=2, stride=2), 57 | # [16, 10, 14] 58 | activation, 59 | nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3), 60 | # [32, 8, 12] 61 | nn.MaxPool2d(kernel_size=2, stride=2), 62 | # [32, 4, 6] 63 | activation, 64 | nn.Flatten(), 65 | 66 | nn.Linear(32 * 4 * 6, hidden_dim), 67 | activation, 68 | nn.Linear(hidden_dim, output_dim), 69 | activation 70 | ) 71 | 72 | def forward(self, images: torch.Tensor): 73 | latent = self.image_compression(images.unsqueeze(1)) 74 | 75 | return latent -------------------------------------------------------------------------------- /rsl_rl/rsl_rl/modules/normalizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Preferred Networks, Inc. 2 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | 5 | from __future__ import annotations 6 | 7 | import torch 8 | from torch import nn 9 | 10 | 11 | class EmpiricalNormalization(nn.Module): 12 | """Normalize mean and variance of values based on empirical values.""" 13 | 14 | def __init__(self, shape, eps=1e-2, until=None): 15 | """Initialize EmpiricalNormalization module. 16 | 17 | Args: 18 | shape (int or tuple of int): Shape of input values except batch axis. 19 | eps (float): Small value for stability. 20 | until (int or None): If this arg is specified, the link learns input values until the sum of batch sizes 21 | exceeds it. 22 | """ 23 | super().__init__() 24 | self.eps = eps 25 | self.until = until 26 | self.register_buffer("_mean", torch.zeros(shape).unsqueeze(0)) 27 | self.register_buffer("_var", torch.ones(shape).unsqueeze(0)) 28 | self.register_buffer("_std", torch.ones(shape).unsqueeze(0)) 29 | self.count = 0 30 | 31 | @property 32 | def mean(self): 33 | return self._mean.squeeze(0).clone() 34 | 35 | @property 36 | def std(self): 37 | return self._std.squeeze(0).clone() 38 | 39 | def forward(self, x): 40 | """Normalize mean and variance of values based on empirical values. 41 | 42 | Args: 43 | x (ndarray or Variable): Input values 44 | 45 | Returns: 46 | ndarray or Variable: Normalized output values 47 | """ 48 | 49 | if self.training: 50 | self.update(x) 51 | return (x - self._mean) / (self._std + self.eps) 52 | 53 | @torch.jit.unused 54 | def update(self, x): 55 | """Learn input values without computing the output values of them""" 56 | 57 | if self.until is not None and self.count >= self.until: 58 | return 59 | 60 | count_x = x.shape[0] 61 | self.count += count_x 62 | rate = count_x / self.count 63 | 64 | var_x = torch.var(x, dim=0, unbiased=False, keepdim=True) 65 | mean_x = torch.mean(x, dim=0, keepdim=True) 66 | delta_mean = mean_x - self._mean 67 | self._mean += rate * delta_mean 68 | self._var += rate * (var_x - self._var + delta_mean * (mean_x - self._mean)) 69 | self._std = torch.sqrt(self._var) 70 | 71 | @torch.jit.unused 72 | def inverse(self, y): 73 | return y * (self._std + self.eps) + self._mean 74 | -------------------------------------------------------------------------------- /rsl_rl/rsl_rl/runners/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | """Implementation of runners for environment-agent interaction.""" 5 | 6 | from .on_policy_runner import OnPolicyRunner 7 | from .on_policy_runner_history import OnPolicyRunnerHistory 8 | 9 | __all__ = ["OnPolicyRunner", "OnPolicyRunnerHistory"] 10 | -------------------------------------------------------------------------------- /rsl_rl/rsl_rl/storage/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | """Implementation of transitions storage for RL-agent.""" 5 | 6 | from .rollout_storage import RolloutStorage 7 | 8 | __all__ = ["RolloutStorage"] 9 | -------------------------------------------------------------------------------- /rsl_rl/rsl_rl/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | """Helper functions.""" 5 | 6 | from .utils import split_and_pad_trajectories, store_code_state, unpad_trajectories 7 | -------------------------------------------------------------------------------- /rsl_rl/rsl_rl/utils/neptune_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | from __future__ import annotations 5 | 6 | import os 7 | from dataclasses import asdict 8 | from torch.utils.tensorboard import SummaryWriter 9 | 10 | try: 11 | import neptune 12 | except ModuleNotFoundError: 13 | raise ModuleNotFoundError("neptune-client is required to log to Neptune.") 14 | 15 | 16 | class NeptuneLogger: 17 | def __init__(self, project, token): 18 | self.run = neptune.init_run(project=project, api_token=token) 19 | 20 | def store_config(self, env_cfg, runner_cfg, alg_cfg, policy_cfg): 21 | self.run["runner_cfg"] = runner_cfg 22 | self.run["policy_cfg"] = policy_cfg 23 | self.run["alg_cfg"] = alg_cfg 24 | self.run["env_cfg"] = asdict(env_cfg) 25 | 26 | 27 | class NeptuneSummaryWriter(SummaryWriter): 28 | """Summary writer for Neptune.""" 29 | 30 | def __init__(self, log_dir: str, flush_secs: int, cfg): 31 | super().__init__(log_dir, flush_secs) 32 | 33 | try: 34 | project = cfg["neptune_project"] 35 | except KeyError: 36 | raise KeyError("Please specify neptune_project in the runner config, e.g. legged_gym.") 37 | 38 | try: 39 | token = os.environ["NEPTUNE_API_TOKEN"] 40 | except KeyError: 41 | raise KeyError( 42 | "Neptune api token not found. Please run or add to ~/.bashrc: export NEPTUNE_API_TOKEN=YOUR_API_TOKEN" 43 | ) 44 | 45 | try: 46 | entity = os.environ["NEPTUNE_USERNAME"] 47 | except KeyError: 48 | raise KeyError( 49 | "Neptune username not found. Please run or add to ~/.bashrc: export NEPTUNE_USERNAME=YOUR_USERNAME" 50 | ) 51 | 52 | neptune_project = entity + "/" + project 53 | 54 | self.neptune_logger = NeptuneLogger(neptune_project, token) 55 | 56 | self.name_map = { 57 | "Train/mean_reward/time": "Train/mean_reward_time", 58 | "Train/mean_episode_length/time": "Train/mean_episode_length_time", 59 | } 60 | 61 | run_name = os.path.split(log_dir)[-1] 62 | 63 | self.neptune_logger.run["log_dir"].log(run_name) 64 | 65 | def _map_path(self, path): 66 | if path in self.name_map: 67 | return self.name_map[path] 68 | else: 69 | return path 70 | 71 | def add_scalar(self, tag, scalar_value, global_step=None, walltime=None, new_style=False): 72 | super().add_scalar( 73 | tag, 74 | scalar_value, 75 | global_step=global_step, 76 | walltime=walltime, 77 | new_style=new_style, 78 | ) 79 | self.neptune_logger.run[self._map_path(tag)].log(scalar_value, step=global_step) 80 | 81 | def stop(self): 82 | self.neptune_logger.run.stop() 83 | 84 | def log_config(self, env_cfg, runner_cfg, alg_cfg, policy_cfg): 85 | self.neptune_logger.store_config(env_cfg, runner_cfg, alg_cfg, policy_cfg) 86 | 87 | def save_model(self, model_path, iter): 88 | self.neptune_logger.run["model/saved_model_" + str(iter)].upload(model_path) 89 | 90 | def save_file(self, path, iter=None): 91 | name = path.rsplit("/", 1)[-1].split(".")[0] 92 | self.neptune_logger.run["git_diff/" + name].upload(path) 93 | -------------------------------------------------------------------------------- /rsl_rl/rsl_rl/utils/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | from __future__ import annotations 5 | 6 | import git 7 | import os 8 | import pathlib 9 | import torch 10 | 11 | 12 | def split_and_pad_trajectories(tensor, dones): 13 | """Splits trajectories at done indices. Then concatenates them and pads with zeros up to the length og the longest trajectory. 14 | Returns masks corresponding to valid parts of the trajectories 15 | Example: 16 | Input: [ [a1, a2, a3, a4 | a5, a6], 17 | [b1, b2 | b3, b4, b5 | b6] 18 | ] 19 | 20 | Output:[ [a1, a2, a3, a4], | [ [True, True, True, True], 21 | [a5, a6, 0, 0], | [True, True, False, False], 22 | [b1, b2, 0, 0], | [True, True, False, False], 23 | [b3, b4, b5, 0], | [True, True, True, False], 24 | [b6, 0, 0, 0] | [True, False, False, False], 25 | ] | ] 26 | 27 | Assumes that the inputy has the following dimension order: [time, number of envs, additional dimensions] 28 | """ 29 | dones = dones.clone() 30 | dones[-1] = 1 31 | # Permute the buffers to have order (num_envs, num_transitions_per_env, ...), for correct reshaping 32 | flat_dones = dones.transpose(1, 0).reshape(-1, 1) 33 | 34 | # Get length of trajectory by counting the number of successive not done elements 35 | done_indices = torch.cat((flat_dones.new_tensor([-1], dtype=torch.int64), flat_dones.nonzero()[:, 0])) 36 | trajectory_lengths = done_indices[1:] - done_indices[:-1] 37 | trajectory_lengths_list = trajectory_lengths.tolist() 38 | # Extract the individual trajectories 39 | trajectories = torch.split(tensor.transpose(1, 0).flatten(0, 1), trajectory_lengths_list) 40 | # add at least one full length trajectory 41 | trajectories = trajectories + (torch.zeros(tensor.shape[0], tensor.shape[-1], device=tensor.device),) 42 | # pad the trajectories to the length of the longest trajectory 43 | padded_trajectories = torch.nn.utils.rnn.pad_sequence(trajectories) 44 | # remove the added tensor 45 | padded_trajectories = padded_trajectories[:, :-1] 46 | 47 | trajectory_masks = trajectory_lengths > torch.arange(0, tensor.shape[0], device=tensor.device).unsqueeze(1) 48 | return padded_trajectories, trajectory_masks 49 | 50 | 51 | def unpad_trajectories(trajectories, masks): 52 | """Does the inverse operation of split_and_pad_trajectories()""" 53 | # Need to transpose before and after the masking to have proper reshaping 54 | return ( 55 | trajectories.transpose(1, 0)[masks.transpose(1, 0)] 56 | .view(-1, trajectories.shape[0], trajectories.shape[-1]) 57 | .transpose(1, 0) 58 | ) 59 | 60 | 61 | def store_code_state(logdir, repositories) -> list: 62 | git_log_dir = os.path.join(logdir, "git") 63 | os.makedirs(git_log_dir, exist_ok=True) 64 | file_paths = [] 65 | for repository_file_path in repositories: 66 | try: 67 | repo = git.Repo(repository_file_path, search_parent_directories=True) 68 | except Exception: 69 | print(f"Could not find git repository in {repository_file_path}. Skipping.") 70 | # skip if not a git repository 71 | continue 72 | # get the name of the repository 73 | repo_name = pathlib.Path(repo.working_dir).name 74 | t = repo.head.commit.tree 75 | diff_file_name = os.path.join(git_log_dir, f"{repo_name}.diff") 76 | # check if the diff file already exists 77 | if os.path.isfile(diff_file_name): 78 | continue 79 | # write the diff file 80 | print(f"Storing git diff for '{repo_name}' in: {diff_file_name}") 81 | with open(diff_file_name, "x", encoding='utf-8') as f: 82 | content = f"--- git status ---\n{repo.git.status()} \n\n\n--- git diff ---\n{repo.git.diff(t)}" 83 | content = content 84 | f.write(content) 85 | # add the file path to the list of files to be uploaded 86 | file_paths.append(diff_file_name) 87 | return file_paths 88 | -------------------------------------------------------------------------------- /rsl_rl/rsl_rl/utils/wandb_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | from __future__ import annotations 5 | 6 | import os 7 | from dataclasses import asdict 8 | from torch.utils.tensorboard import SummaryWriter 9 | 10 | try: 11 | import wandb 12 | except ModuleNotFoundError: 13 | raise ModuleNotFoundError("Wandb is required to log to Weights and Biases.") 14 | 15 | 16 | class WandbSummaryWriter(SummaryWriter): 17 | """Summary writer for Weights and Biases.""" 18 | 19 | def __init__(self, log_dir: str, flush_secs: int, cfg): 20 | super().__init__(log_dir, flush_secs) 21 | 22 | try: 23 | project = cfg["wandb_project"] 24 | except KeyError: 25 | raise KeyError("Please specify wandb_project in the runner config, e.g. legged_gym.") 26 | 27 | try: 28 | entity = os.environ["WANDB_USERNAME"] 29 | except KeyError: 30 | raise KeyError( 31 | "Wandb username not found. Please run or add to ~/.bashrc: export WANDB_USERNAME=YOUR_USERNAME" 32 | ) 33 | 34 | wandb.init(project=project, entity=entity) 35 | 36 | # Change generated name to project-number format 37 | wandb.run.name = project + wandb.run.name.split("-")[-1] 38 | 39 | self.name_map = { 40 | "Train/mean_reward/time": "Train/mean_reward_time", 41 | "Train/mean_episode_length/time": "Train/mean_episode_length_time", 42 | } 43 | 44 | run_name = os.path.split(log_dir)[-1] 45 | 46 | wandb.log({"log_dir": run_name}) 47 | 48 | def store_config(self, env_cfg, runner_cfg, alg_cfg, policy_cfg): 49 | wandb.config.update({"runner_cfg": runner_cfg}) 50 | wandb.config.update({"policy_cfg": policy_cfg}) 51 | wandb.config.update({"alg_cfg": alg_cfg}) 52 | wandb.config.update({"env_cfg": asdict(env_cfg)}) 53 | 54 | def _map_path(self, path): 55 | if path in self.name_map: 56 | return self.name_map[path] 57 | else: 58 | return path 59 | 60 | def add_scalar(self, tag, scalar_value, global_step=None, walltime=None, new_style=False): 61 | super().add_scalar( 62 | tag, 63 | scalar_value, 64 | global_step=global_step, 65 | walltime=walltime, 66 | new_style=new_style, 67 | ) 68 | wandb.log({self._map_path(tag): scalar_value}, step=global_step) 69 | 70 | def stop(self): 71 | wandb.finish() 72 | 73 | def log_config(self, env_cfg, runner_cfg, alg_cfg, policy_cfg): 74 | self.store_config(env_cfg, runner_cfg, alg_cfg, policy_cfg) 75 | 76 | def save_model(self, model_path, iter): 77 | wandb.save(model_path, base_path=os.path.dirname(model_path)) 78 | 79 | def save_file(self, path, iter=None): 80 | wandb.save(path, base_path=os.path.dirname(path)) 81 | -------------------------------------------------------------------------------- /rsl_rl/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | from setuptools import find_packages, setup 5 | 6 | setup( 7 | name="rsl_rl", 8 | version="2.0.2", 9 | packages=find_packages(), 10 | author="ETH Zurich, NVIDIA CORPORATION", 11 | maintainer="Nikita Rudin, David Hoeller", 12 | maintainer_email="rudinn@ethz.ch", 13 | url="https://github.com/leggedrobotics/rsl_rl", 14 | license="BSD-3", 15 | description="Fast and simple RL algorithms implemented in pytorch", 16 | python_requires=">=3.6", 17 | install_requires=[ 18 | "torch>=1.10.0", 19 | "torchvision>=0.5.0", 20 | "numpy>=1.16.4", 21 | "GitPython", 22 | "onnx", 23 | ], 24 | ) 25 | -------------------------------------------------------------------------------- /scripts/cli_args.py: -------------------------------------------------------------------------------- 1 | # From Isaac Lab's standalone package 2 | # Copyright (c) 2022-2024, The Isaac Lab Project Developers. 3 | # All rights reserved. 4 | # 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | 7 | from __future__ import annotations 8 | 9 | import argparse 10 | from typing import TYPE_CHECKING 11 | 12 | if TYPE_CHECKING: 13 | from omni.isaac.lab_tasks.utils.wrappers.rsl_rl import RslRlOnPolicyRunnerCfg 14 | from omni.isaac.lab.envs import ManagerBasedRLEnvCfg 15 | 16 | 17 | def add_rsl_rl_args(parser: argparse.ArgumentParser): 18 | """Add RSL-RL arguments to the parser. 19 | 20 | Args: 21 | parser: The parser to add the arguments to. 22 | """ 23 | # create a new argument group 24 | arg_group = parser.add_argument_group("rsl_rl", description="Arguments for RSL-RL agent.") 25 | # -- experiment arguments 26 | arg_group.add_argument( 27 | "--experiment_name", type=str, default=None, help="Name of the experiment folder where logs will be stored." 28 | ) 29 | arg_group.add_argument("--run_name", type=str, default=None, help="Run name suffix to the log directory.") 30 | arg_group.add_argument("--save_interval", type=int, default=None, help="Interval to save the model.") 31 | # -- load arguments 32 | arg_group.add_argument("--resume", type=bool, default=None, help="Whether to resume from a checkpoint.") 33 | arg_group.add_argument("--load_run", type=str, default=None, help="Name of the run folder to resume from.") 34 | arg_group.add_argument("--checkpoint", type=str, default=None, help="Checkpoint file to resume from.") 35 | # -- logger arguments 36 | arg_group.add_argument( 37 | "--logger", type=str, default=None, choices={"wandb", "tensorboard", "neptune"}, help="Logger module to use." 38 | ) 39 | arg_group.add_argument( 40 | "--log_project_name", type=str, default=None, help="Name of the logging project when using wandb or neptune." 41 | ) 42 | 43 | 44 | def parse_rsl_rl_cfg(task_name, args_cli: argparse.Namespace, play=False) -> RslRlOnPolicyRunnerCfg: 45 | """Parse configuration for RSL-RL agent based on inputs. 46 | 47 | Args: 48 | rslrl_cfg: The default configuration for RSL-RL agent. 49 | args_cli: The command line arguments. 50 | 51 | Returns: 52 | The parsed configuration for RSL-RL agent based on inputs. 53 | """ 54 | from omni.isaac.lab_tasks.utils.parse_cfg import load_cfg_from_registry 55 | 56 | # load the default configuration 57 | rslrl_cfg: RslRlOnPolicyRunnerCfg = load_cfg_from_registry(task_name, "rsl_rl_cfg_entry_point") 58 | 59 | # override the default configuration with CLI arguments 60 | if args_cli.seed is not None: 61 | rslrl_cfg.seed = args_cli.seed 62 | if args_cli.resume is not None: 63 | rslrl_cfg.resume = args_cli.resume 64 | if args_cli.load_run is not None: 65 | rslrl_cfg.load_run = args_cli.load_run 66 | if args_cli.checkpoint is not None: 67 | rslrl_cfg.load_checkpoint = args_cli.checkpoint 68 | if args_cli.save_interval is not None: 69 | rslrl_cfg.save_interval = args_cli.save_interval 70 | if args_cli.run_name is not None: 71 | rslrl_cfg.run_name = args_cli.run_name 72 | if args_cli.logger is not None: 73 | rslrl_cfg.logger = args_cli.logger 74 | # set the project name for wandb and neptune 75 | if rslrl_cfg.logger in {"wandb", "neptune"} and args_cli.log_project_name: 76 | rslrl_cfg.wandb_project = args_cli.log_project_name 77 | rslrl_cfg.neptune_project = args_cli.log_project_name 78 | if args_cli.use_cnn is not None: 79 | rslrl_cfg.use_cnn = args_cli.use_cnn 80 | rslrl_cfg.policy.class_name = "ActorCriticDepthCNN" 81 | rslrl_cfg.policy.obs_depth_shape = (24, 32) 82 | if args_cli.use_rnn: 83 | rslrl_cfg.policy.rnn_input_size = 2 * rslrl_cfg.policy.actor_hidden_dims[-1] 84 | rslrl_cfg.policy.rnn_hidden_size = 2 * rslrl_cfg.policy.actor_hidden_dims[-1] 85 | if not args_cli.use_cnn: 86 | rslrl_cfg.policy.class_name = "ActorCriticRecurrent" 87 | else: 88 | rslrl_cfg.policy.class_name = "ActorCriticDepthCNNRecurrent" 89 | 90 | rslrl_cfg.policy.history_length = args_cli.history_length 91 | 92 | return rslrl_cfg 93 | -------------------------------------------------------------------------------- /scripts/play.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022-2024, The lab Project Developers. 2 | # All rights reserved. 3 | # 4 | # SPDX-License-Identifier: BSD-3-Clause 5 | 6 | """Script to play a checkpoint if an RL agent from RSL-RL.""" 7 | 8 | """Launch Isaac Sim Simulator first.""" 9 | 10 | import argparse 11 | import subprocess 12 | 13 | from omni.isaac.lab.app import AppLauncher 14 | 15 | # local imports 16 | import cli_args # isort: skip 17 | 18 | # add argparse arguments 19 | parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.") 20 | # parser.add_argument("--cpu", action="store_true", default=False, help="Use CPU pipeline.") 21 | parser.add_argument( 22 | "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations." 23 | ) 24 | parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") 25 | parser.add_argument("--task", type=str, default=None, help="Name of the task.") 26 | parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment") 27 | 28 | parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") 29 | parser.add_argument("--video_length", type=int, default=2000, help="Length of the recorded video (in steps).") 30 | parser.add_argument("--use_cnn", action="store_true", default=None, help="Name of the run folder to resume from.") 31 | parser.add_argument("--arm_fixed", action="store_true", default=False, help="Fix the robot's arms.") 32 | parser.add_argument("--use_rnn", action="store_true", default=False, help="Use RNN in the actor-critic model.") 33 | parser.add_argument("--history_length", default=0, type=int, help="Length of history buffer.") 34 | 35 | # append RSL-RL cli arguments 36 | cli_args.add_rsl_rl_args(parser) 37 | # append AppLauncher cli args 38 | AppLauncher.add_app_launcher_args(parser) 39 | args_cli = parser.parse_args() 40 | 41 | # launch omniverse app 42 | app_launcher = AppLauncher(args_cli) 43 | simulation_app = app_launcher.app 44 | 45 | """Rest everything follows.""" 46 | 47 | import gymnasium as gym 48 | import os 49 | import math 50 | import torch 51 | import imageio 52 | 53 | from rsl_rl.runners import OnPolicyRunner 54 | 55 | from omni.isaac.lab.envs import ManagerBasedRLEnvCfg 56 | from omni.isaac.lab.utils.io import load_yaml 57 | from omni.isaac.lab.utils import update_class_from_dict 58 | 59 | import omni.isaac.lab_tasks # noqa: F401 60 | from omni.isaac.lab_tasks.utils import get_checkpoint_path, parse_env_cfg 61 | from omni.isaac.lab_tasks.utils.wrappers.rsl_rl import ( 62 | RslRlOnPolicyRunnerCfg, 63 | RslRlVecEnvWrapper, 64 | export_policy_as_jit, 65 | ) 66 | 67 | from omni.isaac.leggedloco.config import * 68 | from omni.isaac.leggedloco.utils import RslRlVecEnvHistoryWrapper 69 | 70 | from utils import quat2eulers 71 | 72 | 73 | def main(): 74 | """Train with RSL-RL agent.""" 75 | # parse configuration 76 | # env_cfg: ManagerBasedRLEnvCfg = parse_env_cfg( 77 | # args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs, use_fabric=not args_cli.disable_fabric 78 | # ) 79 | env_cfg = parse_env_cfg(args_cli.task, num_envs=args_cli.num_envs) 80 | agent_cfg: RslRlOnPolicyRunnerCfg = cli_args.parse_rsl_rl_cfg(args_cli.task, args_cli) 81 | 82 | # specify directory for logging experiments 83 | log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name) 84 | log_root_path = os.path.abspath(log_root_path) 85 | # print(f"[INFO] Logging experiment in directory: {log_root_path}") 86 | # specify directory for logging runs: {time-stamp}_{run_name} 87 | log_dir = os.path.join(log_root_path, args_cli.load_run) 88 | print(f"[INFO] Loading run from directory: {log_dir}") 89 | 90 | # update agent config with the one from the loaded run 91 | log_agent_cfg_file_path = os.path.join(log_dir, "params", "agent.yaml") 92 | assert os.path.exists(log_agent_cfg_file_path), f"Agent config file not found: {log_agent_cfg_file_path}" 93 | log_agent_cfg_dict = load_yaml(log_agent_cfg_file_path) 94 | update_class_from_dict(agent_cfg, log_agent_cfg_dict) 95 | 96 | 97 | # create isaac environment 98 | env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) 99 | # wrap around environment for rsl-rl 100 | if args_cli.history_length > 0: 101 | env = RslRlVecEnvHistoryWrapper(env, history_length=args_cli.history_length) 102 | else: 103 | env = RslRlVecEnvWrapper(env) 104 | 105 | # specify directory for logging experiments 106 | log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name) 107 | log_root_path = os.path.abspath(log_root_path) 108 | print(f"[INFO] Loading experiment from directory: {log_root_path}") 109 | resume_path = get_checkpoint_path(log_root_path, args_cli.load_run, agent_cfg.load_checkpoint) 110 | 111 | # load previously trained model 112 | ppo_runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) 113 | ppo_runner.load(resume_path) 114 | print(f"[INFO]: Loading model checkpoint from: {resume_path}") 115 | 116 | # obtain the trained policy for inference 117 | policy = ppo_runner.get_inference_policy(device=env.unwrapped.device) 118 | robot_pos_w = env.unwrapped.scene["robot"].data.root_pos_w[0].detach().cpu().numpy() 119 | cam_eye = (robot_pos_w[0]+2.5, robot_pos_w[1]+2.5, 20) 120 | cam_target = (robot_pos_w[0], robot_pos_w[1], 0.0) 121 | # set the camera view 122 | env.unwrapped.sim.set_camera_view(eye=cam_eye, target=cam_target) 123 | 124 | # export policy to onnx 125 | export_model_dir = os.path.join(os.path.dirname(resume_path), "exported") 126 | # export_policy_as_onnx(ppo_runner.alg.actor_critic, export_model_dir, filename="policy.onnx") 127 | export_policy_as_jit(ppo_runner.alg.actor_critic, None, path=export_model_dir, filename="policy.jit") 128 | # reset environment 129 | obs, _ = env.get_observations() 130 | if args_cli.video: 131 | base_env = env.unwrapped 132 | init_frame = base_env.render() 133 | frames = [init_frame] 134 | 135 | robot_pos_w = env.unwrapped.scene["robot"].data.root_pos_w[0].detach().cpu().numpy() 136 | cam_eye = (robot_pos_w[0]+2.5, robot_pos_w[1]+2.5, 5) 137 | cam_target = (robot_pos_w[0], robot_pos_w[1], 0.0) 138 | # set the camera view 139 | env.unwrapped.sim.set_camera_view(eye=cam_eye, target=cam_target) 140 | 141 | # simulate environment 142 | while simulation_app.is_running(): 143 | # run everything in inference mode 144 | with torch.inference_mode(): 145 | # agent stepping 146 | actions = policy(obs) 147 | # env stepping 148 | obs, _, _, infos = env.step(actions) 149 | # import pdb; pdb.set_trace() 150 | 151 | if args_cli.video and len(frames) < args_cli.video_length: 152 | base_env = env.unwrapped 153 | frame = base_env.render() 154 | frames.append(frame) 155 | 156 | robot_pos_w = env.unwrapped.scene["robot"].data.root_pos_w[0].detach().cpu().numpy() 157 | robot_quat_w = env.unwrapped.scene["robot"].data.root_quat_w[0].detach().cpu().numpy() 158 | roll, pitch, yaw = quat2eulers(robot_quat_w[0], robot_quat_w[1], robot_quat_w[2], robot_quat_w[3]) 159 | cam_eye = (robot_pos_w[0] + 3.0, robot_pos_w[1] + 3.0, robot_pos_w[2] + 3.0) 160 | cam_target = (robot_pos_w[0], robot_pos_w[1], robot_pos_w[2]) 161 | # set the camera view 162 | env.unwrapped.sim.set_camera_view(eye=cam_eye, target=cam_target) 163 | 164 | if args_cli.video and len(frames) == args_cli.video_length: 165 | break 166 | 167 | writer = imageio.get_writer(os.path.join(log_dir, f"{args_cli.load_run}.mp4"), fps=50) 168 | for frame in frames: 169 | writer.append_data(frame) 170 | writer.close() 171 | 172 | # close the simulator 173 | env.close() 174 | 175 | 176 | if __name__ == "__main__": 177 | # run the main function 178 | main() 179 | # close sim app 180 | simulation_app.close() 181 | -------------------------------------------------------------------------------- /scripts/play_low_matterport_keyboard.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022-2024, The lab Project Developers. 2 | # All rights reserved. 3 | # 4 | # SPDX-License-Identifier: BSD-3-Clause 5 | 6 | """Script to play a checkpoint if an RL agent from RSL-RL.""" 7 | 8 | """Launch Isaac Sim Simulator first.""" 9 | 10 | import argparse 11 | import gymnasium as gym 12 | import os 13 | import math 14 | import torch 15 | import numpy as np 16 | 17 | from omni.isaac.lab.app import AppLauncher 18 | 19 | # local imports 20 | import cli_args # isort: skip 21 | 22 | # add argparse arguments 23 | parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.") 24 | parser.add_argument("--cpu", action="store_true", default=False, help="Use CPU pipeline.") 25 | parser.add_argument( 26 | "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations." 27 | ) 28 | parser.add_argument("--num_envs", type=int, default=1, help="Number of environments to simulate.") 29 | parser.add_argument("--task", type=str, default="h1_rough", help="Name of the task.") 30 | parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment") 31 | parser.add_argument("--use_cnn", action="store_true", default=None, help="Name of the run folder to resume from.") 32 | parser.add_argument("--arm_fixed", action="store_true", default=False, help="Fix the robot's arms.") 33 | parser.add_argument("--use_rnn", action="store_true", default=False, help="Use RNN in the actor-critic model.") 34 | parser.add_argument("--history_length", default=0, type=int, help="Length of history buffer.") 35 | # append RSL-RL cli arguments 36 | cli_args.add_rsl_rl_args(parser) 37 | # append AppLauncher cli args 38 | AppLauncher.add_app_launcher_args(parser) 39 | args_cli = parser.parse_args() 40 | 41 | # args_cli.experience=f'{os.environ["EXP_PATH"]}/omni.isaac.sim.python.kit' 42 | 43 | # launch omniverse app 44 | app_launcher = AppLauncher(args_cli) 45 | simulation_app = app_launcher.app 46 | 47 | """Rest everything follows.""" 48 | 49 | from rsl_rl.runners import OnPolicyRunner 50 | 51 | import omni.isaac.lab_tasks # noqa: F401 52 | from omni.isaac.lab_tasks.utils import get_checkpoint_path, parse_env_cfg 53 | from omni.isaac.lab_tasks.utils.wrappers.rsl_rl import ( 54 | RslRlOnPolicyRunnerCfg, 55 | RslRlVecEnvWrapper, 56 | ) 57 | 58 | from omni.isaac.leggedloco.config import * 59 | from omni.isaac.lab.devices.keyboard import Se2Keyboard 60 | from omni.isaac.leggedloco.utils import RslRlVecEnvHistoryWrapper 61 | 62 | import omni.isaac.lab.sim as sim_utils 63 | from omni.isaac.lab.markers import VisualizationMarkers, VisualizationMarkersCfg 64 | 65 | from utils import quat2eulers 66 | 67 | 68 | def define_markers() -> VisualizationMarkers: 69 | """Define path markers with various different shapes.""" 70 | marker_cfg = VisualizationMarkersCfg( 71 | prim_path="/Visuals/pathMarkers", 72 | markers={ 73 | "waypoint": sim_utils.SphereCfg( 74 | radius=0.1, 75 | visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(0.0, 1.0, 0.0)), 76 | ), 77 | }, 78 | ) 79 | return VisualizationMarkers(marker_cfg) 80 | 81 | 82 | def main(): 83 | """Play with RSL-RL agent.""" 84 | # parse configuration 85 | env_cfg = parse_env_cfg(args_cli.task, num_envs=args_cli.num_envs) 86 | agent_cfg: RslRlOnPolicyRunnerCfg = cli_args.parse_rsl_rl_cfg(args_cli.task, args_cli, play=True) 87 | 88 | # create isaac environment 89 | env = gym.make(args_cli.task, cfg=env_cfg) 90 | # wrap around environment for rsl-rl 91 | if args_cli.history_length > 0: 92 | env = RslRlVecEnvHistoryWrapper(env, history_length=args_cli.history_length) 93 | else: 94 | env = RslRlVecEnvWrapper(env) 95 | 96 | # specify directory for logging experiments 97 | log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name) 98 | log_root_path = os.path.abspath(log_root_path) 99 | print(f"[INFO] Loading experiment from directory: {log_root_path}") 100 | resume_path = get_checkpoint_path(log_root_path, args_cli.load_run, agent_cfg.load_checkpoint) 101 | print(f"[INFO]: Loading model checkpoint from: {resume_path}") 102 | 103 | # load previously trained model 104 | # import pdb; pdb.set_trace() 105 | ppo_runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) 106 | ppo_runner.load(resume_path) 107 | print(f"[INFO]: Loading model checkpoint from: {resume_path}") 108 | 109 | # obtain the trained policy for inference 110 | policy = ppo_runner.get_inference_policy(device=env.unwrapped.device) 111 | 112 | 113 | keyboard_interface = Se2Keyboard(v_x_sensitivity = 0.05, v_y_sensitivity = 0.05, omega_z_sensitivity=0.05) 114 | keyboard_interface.reset() 115 | 116 | # reset environment 117 | obs, _ = env.get_observations() 118 | vel_command_keyboard = torch.zeros(3, device = obs.device) 119 | num_count = 0 120 | 121 | robot_pos_w = env.unwrapped.scene["robot"].data.root_pos_w[0].detach().cpu().numpy() 122 | cam_eye = (robot_pos_w[0]+0.0, robot_pos_w[1]+2.0, 1.0) 123 | cam_target = (robot_pos_w[0], robot_pos_w[1], 1.0) 124 | # set the camera view 125 | env.unwrapped.sim.set_camera_view(eye=cam_eye, target=cam_target) 126 | robot_pos_w = env.unwrapped.scene["robot"].data.root_pos_w[0].detach().cpu().numpy() 127 | cam_eye = (robot_pos_w[0]-0.0, robot_pos_w[1]-1.5, 1.2) 128 | robot_quat_w = env.unwrapped.scene["robot"].data.root_quat_w[0].detach().cpu().numpy() 129 | roll, pitch, yaw = quat2eulers(robot_quat_w[0], robot_quat_w[1], robot_quat_w[2], robot_quat_w[3]) 130 | cam_eye = (robot_pos_w[0] - 0.8 * math.sin(-yaw), robot_pos_w[1] - 0.8 * math.cos(-yaw), robot_pos_w[2] + 0.8) 131 | cam_target = (robot_pos_w[0], robot_pos_w[1], robot_pos_w[2]) 132 | 133 | # set the camera view 134 | # env.unwrapped.sim.set_camera_view(eye=cam_eye, target=cam_target) 135 | 136 | # simulate environment 137 | while simulation_app.is_running(): 138 | # run everything in inference mode 139 | with torch.inference_mode(): 140 | commands_key = torch.tensor(keyboard_interface.advance(), device=obs.device) 141 | vel_command_keyboard += commands_key 142 | if num_count % 10 == 0: 143 | print("vel_command_keyboard: ", vel_command_keyboard) 144 | # import pdb; pdb.set_trace() 145 | # assert torch.allclose(env.unwrapped.command_manager._terms['base_velocity'].vel_command_b, torch.tensor([0., 0., 0.], device = obs.device)) 146 | # env.command_manager._terms['base_velocity'].vel_command_b[0,:] = commands 147 | # obs[:,9:12] = commands_key 148 | 149 | obs[:,6:9] = torch.tensor([vel_command_keyboard[0], vel_command_keyboard[1], vel_command_keyboard[2]], device = obs.device) 150 | env.update_command(torch.tensor([vel_command_keyboard[0], vel_command_keyboard[1], vel_command_keyboard[2]], device = obs.device)) 151 | # agent stepping 152 | actions = policy(obs) 153 | 154 | # robot_pos_w = env.unwrapped.scene["robot"].data.root_pos_w[0].detach().cpu().numpy() 155 | # robot_quat_w = env.unwrapped.scene["robot"].data.root_quat_w[0].detach().cpu().numpy() 156 | # print("robot quat: ", robot_quat_w) 157 | # print("robot pos: ", robot_pos_w) 158 | 159 | # roll, pitch, yaw = quat2eulers(robot_quat_w[0], robot_quat_w[1], robot_quat_w[2], robot_quat_w[3]) 160 | # cam_eye = (robot_pos_w[0] - 2.0 * math.cos(-yaw), robot_pos_w[1] - 2.0 * math.sin(yaw), robot_pos_w[2] + 1.0) 161 | # cam_target = (robot_pos_w[0], robot_pos_w[1], robot_pos_w[2]) 162 | # set the camera view 163 | # env.unwrapped.sim.set_camera_view(eye=cam_eye, target=cam_target) 164 | # env stepping 165 | obs, _, _, infos = env.step(actions) 166 | 167 | 168 | # close the simulator 169 | env.close() 170 | 171 | 172 | 173 | 174 | if __name__ == "__main__": 175 | # run the main function 176 | main() 177 | # close sim app 178 | simulation_app.close() 179 | -------------------------------------------------------------------------------- /scripts/run_data_collection.py: -------------------------------------------------------------------------------- 1 | import os 2 | import gzip 3 | import json 4 | import argparse 5 | import subprocess 6 | 7 | 8 | if __name__ == "__main__": 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument("--r2r_data_path", type=str, default="/home/zhaojing/h1-training-isaaclab/assets/vln-ce/R2R_VLNCE_v1-3_preprocessed/train/train_filtered.json.gz") 11 | parser.add_argument("--task", type=str, default="go2_matterport_dataset") 12 | parser.add_argument("--resume", action="store_true") 13 | args = parser.parse_args() 14 | 15 | # Define the base arguments for data collection 16 | collection_args = [ 17 | f"--task={args.task}", 18 | "--headless", 19 | "--enable_cameras" 20 | ] 21 | 22 | with gzip.open(args.r2r_data_path, 'rt') as file: 23 | data = json.load(file) 24 | episodes = data['episodes'] 25 | 26 | start_idx = 0 27 | if args.resume: 28 | # Get already collected episodes from the data directory 29 | data_dir = "collected_data" 30 | if os.path.exists(data_dir): 31 | data_files = os.listdir(data_dir) 32 | data_files = [f for f in data_files if f.endswith(".npy")] 33 | if data_files: 34 | episode_nums = [int(f.split("_")[1].split(".")[0]) for f in data_files] 35 | start_idx = max(episode_nums) + 1 36 | print(f"Resuming from episode {start_idx}.....") 37 | 38 | for i in range(start_idx, len(episodes)): 39 | episode_id = episodes[i]['episode_id'] 40 | 41 | # check if the episode has been collected 42 | if os.path.exists(f"collected_data/episode_{episode_id}.npy"): 43 | print(f"Episode {episode_id} already collected. Skipping....") 44 | continue 45 | 46 | # msg = f"\n======================= Collecting Data for Episode {episode_id} =======================" 47 | # # msg += f"\nScene: {episode['scene_id']}" 48 | # # msg += f"\nStart Position: {episode['start_position']}" 49 | # # msg += f"\nInstruction: {episode['instruction']['instruction_text']}\n" 50 | # print(msg) 51 | 52 | # Create episode-specific arguments 53 | episode_args = collection_args + [f"--episode_index={i}"] 54 | 55 | # Run the data collection script 56 | subprocess.run(['python', 'collect_data_matterport.py'] + episode_args) 57 | 58 | # Optional: Add a small delay between episodes 59 | # time.sleep(1) -------------------------------------------------------------------------------- /scripts/train.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022-2024, The Isaac Lab Project Developers. 2 | # All rights reserved. 3 | # 4 | # SPDX-License-Identifier: BSD-3-Clause 5 | 6 | """Script to train RL agent with RSL-RL.""" 7 | 8 | """Launch Isaac Sim Simulator first.""" 9 | 10 | import argparse 11 | 12 | from omni.isaac.lab.app import AppLauncher 13 | 14 | # local imports 15 | import cli_args # isort: skip 16 | 17 | 18 | # add argparse arguments 19 | parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.") 20 | parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") 21 | parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") 22 | parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).") 23 | # parser.add_argument("--cpu", action="store_true", default=False, help="Use CPU pipeline.") 24 | parser.add_argument( 25 | "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations." 26 | ) 27 | parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") 28 | parser.add_argument("--task", type=str, default=None, help="Name of the task.") 29 | parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment") 30 | parser.add_argument("--max_iterations", type=int, default=None, help="RL Policy training iterations.") 31 | parser.add_argument("--use_cnn", action="store_true", default=None, help="Name of the run folder to resume from.") 32 | parser.add_argument("--arm_fixed", action="store_true", default=False, help="Fix the robot's arms.") 33 | parser.add_argument("--use_rnn", action="store_true", default=False, help="Use RNN in the actor-critic model.") 34 | parser.add_argument("--history_length", default=0, type=int, help="Length of history buffer.") 35 | 36 | # append RSL-RL cli arguments 37 | cli_args.add_rsl_rl_args(parser) 38 | # append AppLauncher cli args 39 | AppLauncher.add_app_launcher_args(parser) 40 | args_cli = parser.parse_args() 41 | 42 | # launch omniverse app 43 | app_launcher = AppLauncher(args_cli) 44 | simulation_app = app_launcher.app 45 | 46 | """Rest everything follows.""" 47 | 48 | import gymnasium as gym 49 | import os 50 | import torch 51 | from datetime import datetime 52 | 53 | from rsl_rl.runners import OnPolicyRunner 54 | 55 | from omni.isaac.lab.envs import ManagerBasedRLEnvCfg 56 | from omni.isaac.lab.utils.dict import print_dict 57 | from omni.isaac.lab.utils.io import dump_pickle, dump_yaml 58 | 59 | import omni.isaac.lab_tasks # noqa: F401 60 | from omni.isaac.lab_tasks.utils import get_checkpoint_path, parse_env_cfg 61 | from omni.isaac.lab_tasks.utils.wrappers.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlVecEnvWrapper 62 | from omni.isaac.lab_tasks.utils import get_checkpoint_path, parse_env_cfg 63 | 64 | # from omni.isaac.viplanner.config import H1RoughEnvCfg, H1BaseRoughEnvCfg, H12DoFRoughEnvCfg, H1VisionRoughEnvCfg, G1VisionRoughEnvCfg 65 | # from omni.isaac.viplanner.config import H1RoughEnvCfg_PLAY, H1BaseRoughEnvCfg_PLAY, H12DoFRoughEnvCfg_PLAY, H1VisionRoughEnvCfg_PLAY, G1VisionRoughEnvCfg_PLAY 66 | from omni.isaac.leggedloco.config import * 67 | from omni.isaac.leggedloco.utils import RslRlVecEnvHistoryWrapper 68 | 69 | torch.backends.cuda.matmul.allow_tf32 = True 70 | torch.backends.cudnn.allow_tf32 = True 71 | torch.backends.cudnn.deterministic = False 72 | torch.backends.cudnn.benchmark = False 73 | 74 | 75 | def main(): 76 | """Train with RSL-RL agent.""" 77 | # parse configuration 78 | # env_cfg: ManagerBasedRLEnvCfg = parse_env_cfg( 79 | # args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs, use_fabric=not args_cli.disable_fabric 80 | # ) 81 | env_cfg = parse_env_cfg(args_cli.task, num_envs=args_cli.num_envs) 82 | agent_cfg: RslRlOnPolicyRunnerCfg = cli_args.parse_rsl_rl_cfg(args_cli.task, args_cli) 83 | 84 | # specify directory for logging experiments 85 | log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name) 86 | log_root_path = os.path.abspath(log_root_path) 87 | print(f"[INFO] Logging experiment in directory: {log_root_path}") 88 | # specify directory for logging runs: {time-stamp}_{run_name} 89 | log_dir = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") 90 | if agent_cfg.run_name: 91 | log_dir += f"_{agent_cfg.run_name}" 92 | log_dir = os.path.join(log_root_path, log_dir) 93 | 94 | # max iterations for training 95 | if args_cli.max_iterations: 96 | agent_cfg.max_iterations = args_cli.max_iterations 97 | 98 | # create isaac environment 99 | env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) 100 | # wrap for video recording 101 | if args_cli.video: 102 | video_kwargs = { 103 | "video_folder": os.path.join(log_dir, "videos"), 104 | "step_trigger": lambda step: step % args_cli.video_interval == 0, 105 | "video_length": args_cli.video_length, 106 | "disable_logger": True, 107 | } 108 | print("[INFO] Recording videos during training.") 109 | print_dict(video_kwargs, nesting=4) 110 | env = gym.wrappers.RecordVideo(env, **video_kwargs) 111 | # wrap around environment for rsl-rl 112 | if args_cli.history_length > 0: 113 | env = RslRlVecEnvHistoryWrapper(env, history_length=args_cli.history_length) 114 | else: 115 | env = RslRlVecEnvWrapper(env) 116 | 117 | # create runner from rsl-rl 118 | runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device) 119 | # write git state to logs 120 | runner.add_git_repo_to_log(__file__) 121 | # save resume path before creating a new log_dir 122 | if agent_cfg.resume: 123 | # get path to previous checkpoint 124 | resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint) 125 | print(f"[INFO]: Loading model checkpoint from: {resume_path}") 126 | # load previously trained model 127 | runner.load(resume_path) 128 | 129 | # set seed of the environment 130 | env.seed(agent_cfg.seed) 131 | 132 | # dump the configuration into log-directory 133 | dump_yaml(os.path.join(log_dir, "params", "env.yaml"), env_cfg) 134 | dump_yaml(os.path.join(log_dir, "params", "agent.yaml"), agent_cfg) 135 | dump_pickle(os.path.join(log_dir, "params", "env.pkl"), env_cfg) 136 | dump_pickle(os.path.join(log_dir, "params", "agent.pkl"), agent_cfg) 137 | 138 | # run training 139 | runner.learn(num_learning_iterations=agent_cfg.max_iterations, init_at_random_ep_len=True) 140 | 141 | # close the simulator 142 | env.close() 143 | 144 | 145 | if __name__ == "__main__": 146 | # run the main function 147 | main() 148 | # close sim app 149 | simulation_app.close() 150 | -------------------------------------------------------------------------------- /scripts/utils.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | 4 | def quat2eulers(q0, q1, q2, q3): 5 | """ 6 | Calculates the roll, pitch, and yaw angles from a quaternion. 7 | 8 | Args: 9 | q0: The scalar component of the quaternion. 10 | q1: The x-component of the quaternion. 11 | q2: The y-component of the quaternion. 12 | q3: The z-component of the quaternion. 13 | 14 | Returns: 15 | A tuple containing the roll, pitch, and yaw angles in radians. 16 | """ 17 | 18 | roll = math.atan2(2 * (q2 * q3 + q0 * q1), q0 ** 2 - q1 ** 2 - q2 ** 2 + q3 ** 2) 19 | pitch = math.asin(2 * (q1 * q3 - q0 * q2)) 20 | yaw = math.atan2(2 * (q1 * q2 + q0 * q3), q0 ** 2 + q1 ** 2 - q2 ** 2 - q3 ** 2) 21 | 22 | return roll, pitch, yaw -------------------------------------------------------------------------------- /src/go2_teaser.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-zj1026/legged-loco/87b0d3d18404e784abc0a62227bc41c940f29ecc/src/go2_teaser.gif -------------------------------------------------------------------------------- /src/h1_teaser.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-zj1026/legged-loco/87b0d3d18404e784abc0a62227bc41c940f29ecc/src/h1_teaser.gif --------------------------------------------------------------------------------