├── .gitignore
├── LICENSE
├── README.md
├── isaaclab_exts
└── omni.isaac.leggedloco
│ ├── config
│ └── extension.toml
│ ├── omni
│ └── isaac
│ │ └── leggedloco
│ │ ├── config
│ │ ├── __init__.py
│ │ ├── g1
│ │ │ ├── __init__.py
│ │ │ ├── g1_low_base_cfg.py
│ │ │ └── g1_low_vision_cfg.py
│ │ ├── go1
│ │ │ ├── __init__.py
│ │ │ ├── go1_low_base_cfg.py
│ │ │ └── go1_low_vision_cfg.py
│ │ ├── go2
│ │ │ ├── __init__.py
│ │ │ ├── go2_low_base_cfg.py
│ │ │ └── go2_low_vision_cfg.py
│ │ └── h1
│ │ │ ├── __init__.py
│ │ │ ├── h1_low_base_cfg.py
│ │ │ └── h1_low_vision_cfg.py
│ │ ├── leggedloco
│ │ ├── __init__.py
│ │ └── mdp
│ │ │ ├── __init__.py
│ │ │ ├── actions
│ │ │ ├── __init__.py
│ │ │ ├── navigation_actions.py
│ │ │ ├── vlm_navigation_actions.py
│ │ │ └── vlm_navigation_actions_gpt.py
│ │ │ ├── commands
│ │ │ ├── __init__.py
│ │ │ ├── goal_command_generator.py
│ │ │ ├── goal_command_generator_cfg.py
│ │ │ ├── lowlevel_command_generator.py
│ │ │ ├── lowlevel_command_generator_cfg.py
│ │ │ ├── midlevel_command_generator.py
│ │ │ ├── midlevel_command_generator_cfg.py
│ │ │ ├── path_follower_command_generator.py
│ │ │ ├── path_follower_command_generator_cfg.py
│ │ │ ├── path_follower_command_generator_gpt.py
│ │ │ ├── path_follower_command_generator_gpt_cfg.py
│ │ │ ├── rl_command_generator.py
│ │ │ ├── rl_command_generator_cfg.py
│ │ │ ├── robot_vel_command_generator.py
│ │ │ └── robot_vel_command_generator_cfg.py
│ │ │ ├── curriculums.py
│ │ │ ├── events.py
│ │ │ ├── observations.py
│ │ │ └── rewards
│ │ │ ├── __init__.py
│ │ │ └── objnav_rewards.py
│ │ └── utils
│ │ ├── __init__.py
│ │ └── wrappers.py
│ └── setup.py
├── rsl_rl
├── .flake8
├── .gitignore
├── .pre-commit-config.yaml
├── CONTRIBUTORS.md
├── LICENSE
├── README.md
├── config
│ └── dummy_config.yaml
├── licenses
│ └── dependencies
│ │ ├── black-license.txt
│ │ ├── codespell-license.txt
│ │ ├── flake8-license.txt
│ │ ├── isort-license.txt
│ │ ├── numpy_license.txt
│ │ ├── onnx-license.txt
│ │ ├── pre-commit-hooks-license.txt
│ │ ├── pre-commit-license.txt
│ │ ├── pyright-license.txt
│ │ ├── pyupgrade-license.txt
│ │ └── torch_license.txt
├── pyproject.toml
├── rsl_rl
│ ├── __init__.py
│ ├── algorithms
│ │ ├── __init__.py
│ │ └── ppo.py
│ ├── env
│ │ ├── __init__.py
│ │ └── vec_env.py
│ ├── modules
│ │ ├── __init__.py
│ │ ├── actor_critic.py
│ │ ├── actor_critic_depth_cnn.py
│ │ ├── actor_critic_history.py
│ │ ├── actor_critic_recurrent.py
│ │ ├── depth_backbone.py
│ │ └── normalizer.py
│ ├── runners
│ │ ├── __init__.py
│ │ ├── on_policy_runner.py
│ │ └── on_policy_runner_history.py
│ ├── storage
│ │ ├── __init__.py
│ │ └── rollout_storage.py
│ └── utils
│ │ ├── __init__.py
│ │ ├── neptune_utils.py
│ │ ├── utils.py
│ │ └── wandb_utils.py
└── setup.py
├── scripts
├── cli_args.py
├── demo_matterport.py
├── play.py
├── play_low_matterport_keyboard.py
├── run_data_collection.py
├── train.py
└── utils.py
└── src
├── go2_teaser.gif
└── h1_teaser.gif
/.gitignore:
--------------------------------------------------------------------------------
1 | assets/
2 | logs/
3 | *.pyc
4 | *.egg-info
5 | .vscode/
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 An-Chieh Cheng, Yandong Ji, Zhaojing Yang, Xueyan Zou, Jan Kautz, Erdem Bıyık, Hongxu Yin, Sifei Liu, Xiaolong Wang
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Legged Loco
2 | This repo is used to train low-level locomotion policy of Unitree Go2 and H1 in Isaac Lab.
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 | ## Installation
12 | 1. Create a new conda environment with python 3.10.
13 | ```shell
14 | conda create -n isaaclab python=3.10
15 | conda activate isaaclab
16 | ```
17 |
18 | 2. Make sure that Isaac Sim is installed on your machine. Otherwise follow [this guideline](https://docs.omniverse.nvidia.com/isaacsim/latest/installation/install_workstation.html) to install it. If installing via the Omniverse Launcher, please ensure that Isaac Sim 4.1.0 is selected and installed. On Ubuntu 22.04 or higher, you could install it via pip:
19 | ```shell
20 | pip install isaacsim-rl==4.1.0 isaacsim-replicator==4.1.0 isaacsim-extscache-physics==4.1.0 isaacsim-extscache-kit-sdk==4.1.0 isaacsim-extscache-kit==4.1.0 isaacsim-app==4.1.0 --extra-index-url https://pypi.nvidia.com
21 | ```
22 |
23 | 3. Install PyTorch.
24 | ```shell
25 | pip install torch==2.2.2 --index-url https://download.pytorch.org/whl/cu121
26 | ```
27 |
28 | 4. Clone the Isaac Lab repository, and link extensions.
29 |
30 | **Note**: This codebase was tested with Isaac Lab 1.1.0 and may not be compatible with newer versions. Please make sure to use the modified version of Isaac Lab provided below, which includes important bug fixes and updates. As Isaac Lab is under active development, we will consider supporting newer versions in the future.
31 | ```shell
32 | git clone git@github.com:yang-zj1026/IsaacLab.git
33 | cd IsaacLab
34 | cd source/extensions
35 | ln -s {THIS_REPO_DIR}/isaaclab_exts/omni.isaac.leggedloco .
36 | cd ../..
37 | ```
38 |
39 | 5. Run the Isaac Lab installer script and additionally install rsl rl in this repo.
40 | ```shell
41 | ./isaaclab.sh -i none
42 | ./isaaclab.sh -p -m pip install -e {THIS_REPO_DIR}/rsl_rl
43 | cd ..
44 | ```
45 |
46 |
47 | ## Usage
48 | * train
49 |
50 | ```shell
51 | python scripts/train.py --task=go2_base --history_len=9 --run_name=XXX --max_iterations=2000 --save_interval=200 --headless
52 |
53 | python scripts/train.py --task=h1_base --run_name=XXX --max_iterations=2000 --save_interval=200 --headless
54 | ```
55 |
56 | * test
57 |
58 | ```shell
59 | python scripts/play.py --task=go2_base_play --history_len=9 --load_run=RUN_NAME --num_envs=10
60 | python scripts/play.py --task=h1_base_play --load_run=RUN_NAME --num_envs=10
61 | ```
62 |
63 | Use `--headless` to enable headless mode. Add `--enable_cameras --video` for headless rendering and video saving.
64 |
65 | ## Add New Environments
66 | You can add additional environments by placing them under `isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/config`.
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/config/extension.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | version = "0.0.1"
3 | title = "Legged Robot extension"
4 | description="Isaac Lab Extension for Legged Robot Locomotion"
5 | authors =["Zhaojing Yang", "Yandong Ji"]
6 | repository = "https://github.com/leggedrobotics/viplanner"
7 | category = "robotics"
8 | keywords = ["kit", "robotics"]
9 | readme = "docs/README.md"
10 |
11 | [dependencies]
12 | "omni.kit.uiapp" = {}
13 | "omni.isaac.ui" = {}
14 | "omni.isaac.core" = {}
15 | "omni.isaac.lab" = {}
16 |
17 | # Main python module this extension provides.
18 | [[python.module]]
19 | name = "omni.isaac.leggedloco"
20 |
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/config/__init__.py:
--------------------------------------------------------------------------------
1 | from .h1 import *
2 | from .g1 import *
3 | from .go1 import *
4 | from .go2 import *
5 |
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/config/g1/__init__.py:
--------------------------------------------------------------------------------
1 | import gymnasium as gym
2 |
3 | from .g1_low_base_cfg import G1BaseRoughEnvCfg, G1BaseRoughEnvCfg_PLAY, G1RoughPPORunnerCfg
4 | from .g1_low_vision_cfg import G1VisionRoughEnvCfg, G1VisionRoughEnvCfg_PLAY, G1VisionRoughPPORunnerCfg
5 |
6 | ##
7 | # Register Gym environments.
8 | ##
9 |
10 |
11 | gym.register(
12 | id="g1_base",
13 | entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv",
14 | disable_env_checker=True,
15 | kwargs={
16 | "env_cfg_entry_point": G1BaseRoughEnvCfg,
17 | "rsl_rl_cfg_entry_point": G1RoughPPORunnerCfg,
18 | },
19 | )
20 |
21 |
22 | gym.register(
23 | id="g1_base_play",
24 | entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv",
25 | disable_env_checker=True,
26 | kwargs={
27 | "env_cfg_entry_point": G1BaseRoughEnvCfg,
28 | "rsl_rl_cfg_entry_point": G1RoughPPORunnerCfg,
29 | },
30 | )
31 |
32 |
33 | gym.register(
34 | id="g1_vision",
35 | entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv",
36 | disable_env_checker=True,
37 | kwargs={
38 | "env_cfg_entry_point": G1VisionRoughEnvCfg,
39 | "rsl_rl_cfg_entry_point": G1VisionRoughPPORunnerCfg,
40 | },
41 | )
42 |
43 |
44 | gym.register(
45 | id="g1_vision_play",
46 | entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv",
47 | disable_env_checker=True,
48 | kwargs={
49 | "env_cfg_entry_point": G1VisionRoughEnvCfg_PLAY,
50 | "rsl_rl_cfg_entry_point": G1VisionRoughPPORunnerCfg,
51 | },
52 | )
53 |
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/config/go1/__init__.py:
--------------------------------------------------------------------------------
1 | import gymnasium as gym
2 |
3 | from .go1_low_base_cfg import Go1BaseRoughEnvCfg, Go1BaseRoughEnvCfg_PLAY, Go1RoughPPORunnerCfg
4 | from .go1_low_vision_cfg import Go1VisionRoughEnvCfg, Go1VisionRoughEnvCfg_PLAY, Go1VisionRoughPPORunnerCfg
5 |
6 | ##
7 | # Register Gym environments.
8 | ##
9 |
10 |
11 | gym.register(
12 | id="go1_base",
13 | entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv",
14 | disable_env_checker=True,
15 | kwargs={
16 | "env_cfg_entry_point": Go1BaseRoughEnvCfg,
17 | "rsl_rl_cfg_entry_point": Go1RoughPPORunnerCfg,
18 | },
19 | )
20 |
21 |
22 | gym.register(
23 | id="go1_base_play",
24 | entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv",
25 | disable_env_checker=True,
26 | kwargs={
27 | "env_cfg_entry_point": Go1BaseRoughEnvCfg_PLAY,
28 | "rsl_rl_cfg_entry_point": Go1RoughPPORunnerCfg,
29 | },
30 | )
31 |
32 |
33 | gym.register(
34 | id="go1_vision",
35 | entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv",
36 | disable_env_checker=True,
37 | kwargs={
38 | "env_cfg_entry_point": Go1VisionRoughEnvCfg,
39 | "rsl_rl_cfg_entry_point": Go1VisionRoughPPORunnerCfg,
40 | },
41 | )
42 |
43 |
44 | gym.register(
45 | id="go1_vision_play",
46 | entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv",
47 | disable_env_checker=True,
48 | kwargs={
49 | "env_cfg_entry_point": Go1VisionRoughEnvCfg_PLAY,
50 | "rsl_rl_cfg_entry_point": Go1VisionRoughPPORunnerCfg,
51 | },
52 | )
53 |
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/config/go2/__init__.py:
--------------------------------------------------------------------------------
1 | import gymnasium as gym
2 |
3 | from .go2_low_base_cfg import Go2BaseRoughEnvCfg, Go2BaseRoughEnvCfg_PLAY, Go2RoughPPORunnerCfg
4 | from .go2_low_vision_cfg import Go2VisionRoughEnvCfg, Go2VisionRoughEnvCfg_PLAY, Go2VisionRoughPPORunnerCfg
5 |
6 | ##
7 | # Register Gym environments.
8 | ##
9 |
10 | gym.register(
11 | id="go2_base",
12 | entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv",
13 | disable_env_checker=True,
14 | kwargs={
15 | "env_cfg_entry_point": Go2BaseRoughEnvCfg,
16 | "rsl_rl_cfg_entry_point": Go2RoughPPORunnerCfg,
17 | },
18 | )
19 |
20 |
21 | gym.register(
22 | id="go2_base_play",
23 | entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv",
24 | disable_env_checker=True,
25 | kwargs={
26 | "env_cfg_entry_point": Go2BaseRoughEnvCfg_PLAY,
27 | "rsl_rl_cfg_entry_point": Go2RoughPPORunnerCfg,
28 | },
29 | )
30 |
31 | gym.register(
32 | id="go2_vision",
33 | entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv",
34 | disable_env_checker=True,
35 | kwargs={
36 | "env_cfg_entry_point": Go2VisionRoughEnvCfg,
37 | "rsl_rl_cfg_entry_point": Go2VisionRoughPPORunnerCfg,
38 | },
39 | )
40 |
41 | gym.register(
42 | id="go2_vision_play",
43 | entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv",
44 | disable_env_checker=True,
45 | kwargs={
46 | "env_cfg_entry_point": Go2VisionRoughEnvCfg_PLAY,
47 | "rsl_rl_cfg_entry_point": Go2VisionRoughPPORunnerCfg,
48 | },
49 | )
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/config/h1/__init__.py:
--------------------------------------------------------------------------------
1 | import gymnasium as gym
2 |
3 | from .h1_low_base_cfg import H1BaseRoughEnvCfg, H1BaseRoughEnvCfg_PLAY, H1RoughPPORunnerCfg
4 | from .h1_low_vision_cfg import H1VisionRoughEnvCfg, H1VisionRoughEnvCfg_PLAY, H1VisionRoughPPORunnerCfg
5 |
6 | ##
7 | # Register Gym environments.
8 | ##
9 |
10 |
11 | gym.register(
12 | id="h1_base",
13 | entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv",
14 | disable_env_checker=True,
15 | kwargs={
16 | "env_cfg_entry_point": H1BaseRoughEnvCfg,
17 | "rsl_rl_cfg_entry_point": H1RoughPPORunnerCfg,
18 | },
19 | )
20 |
21 |
22 | gym.register(
23 | id="h1_base_play",
24 | entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv",
25 | disable_env_checker=True,
26 | kwargs={
27 | "env_cfg_entry_point": H1BaseRoughEnvCfg_PLAY,
28 | "rsl_rl_cfg_entry_point": H1RoughPPORunnerCfg,
29 | },
30 | )
31 |
32 |
33 | gym.register(
34 | id="h1_vision",
35 | entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv",
36 | disable_env_checker=True,
37 | kwargs={
38 | "env_cfg_entry_point": H1VisionRoughEnvCfg,
39 | "rsl_rl_cfg_entry_point": H1VisionRoughPPORunnerCfg,
40 | },
41 | )
42 |
43 |
44 | gym.register(
45 | id="h1_vision_play",
46 | entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv",
47 | disable_env_checker=True,
48 | kwargs={
49 | "env_cfg_entry_point": H1VisionRoughEnvCfg_PLAY,
50 | "rsl_rl_cfg_entry_point": H1VisionRoughPPORunnerCfg,
51 | },
52 | )
53 |
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab)
2 | # Author: Pascal Roth
3 | # All rights reserved.
4 | #
5 | # SPDX-License-Identifier: BSD-3-Clause
6 |
7 | """This sub-module contains the functions that are specific to the viplanner environments."""
8 |
9 | from omni.isaac.lab.envs.mdp import * # noqa: F401, F403
10 | from omni.isaac.lab_tasks.manager_based.locomotion.velocity.mdp.rewards import *
11 |
12 | from .actions import * # noqa: F401, F403
13 | from .commands import * # noqa: F401, F403
14 | from .observations import * # noqa: F401, F403
15 | from .rewards import * # noqa: F401, F403
16 | from .curriculums import * # noqa: F401, F403
17 | from .events import * # noqa: F401, F403
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/actions/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab)
2 | # Author: Pascal Roth
3 | # All rights reserved.
4 | #
5 | # SPDX-License-Identifier: BSD-3-Clause
6 |
7 | from .navigation_actions import * # noqa: F401, F403
8 | from .vlm_navigation_actions import * # noqa: F401, F403
9 | from .vlm_navigation_actions_gpt import * # noqa: F401, F403
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/actions/navigation_actions.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab)
2 | # Author: Pascal Roth
3 | # All rights reserved.
4 | #
5 | # SPDX-License-Identifier: BSD-3-Clause
6 |
7 | from __future__ import annotations
8 |
9 | from dataclasses import MISSING
10 |
11 | import torch
12 | import torch.nn as nn
13 | import torchvision
14 | from omni.isaac.lab.envs import ManagerBasedRLEnv
15 | from omni.isaac.lab.managers.action_manager import ActionTerm, ActionTermCfg
16 | from omni.isaac.lab.utils import configclass
17 | from omni.isaac.lab.utils.assets import check_file_path, read_file
18 |
19 | # class DepthImageProcessor(nn.Module):
20 | # def __init__(self, image_height, image_width, num_output_units):
21 | # super(DepthImageProcessor, self).__init__()
22 | # self.cnn = nn.Sequential(
23 | # nn.Conv2d(1, 32, 3, 1),
24 | # nn.ReLU(),
25 | # nn.Conv2d(32, 64, 3, 1),
26 | # nn.ReLU(),
27 | # nn.MaxPool2d(2),
28 | # )
29 | # self.fc = nn.Sequential(
30 | # nn.Linear(64 * (image_height//4) * (image_width//4), num_output_units) # Adjust based on your specific task
31 | # )
32 |
33 | # def forward(self, x):
34 | # x = self.cnn(x)
35 | # x = x.view(x.size(0), -1)
36 | # x = self.fc(x)
37 | # return x
38 |
39 | # -- Navigation Action
40 | class NavigationAction(ActionTerm):
41 | """Actions to navigate a robot by following some path."""
42 |
43 | cfg: NavigationActionCfg
44 | _env: ManagerBasedRLEnv
45 |
46 | def __init__(self, cfg: NavigationActionCfg, env: ManagerBasedRLEnv):
47 | super().__init__(cfg, env)
48 | # self.depth_cnn = DepthImageProcessor(image_height=self.cfg.image_size[0],
49 | # image_width=self.cfg.image_size[1],num_output_units=32).to(self.device)
50 | # self.resize_transform = torchvision.transforms.Resize((58, 87),
51 | # interpolation=torchvision.transforms.InterpolationMode.BICUBIC)
52 |
53 | self.image_count = 0
54 | # # check if policy file exists
55 | if not check_file_path(self.cfg.low_level_policy_file):
56 | raise FileNotFoundError(f"Policy file '{self.cfg.low_level_policy_file}' does not exist.")
57 | file_bytes = read_file(self.cfg.low_level_policy_file)
58 |
59 | # # # load policies
60 | self.low_level_policy = torch.jit.load(file_bytes, map_location=self.device)
61 | self.low_level_policy = torch.jit.freeze(self.low_level_policy.eval())
62 |
63 | # prepare joint position actions
64 | self.low_level_action_term: ActionTerm = self.cfg.low_level_action.class_type(cfg.low_level_action, env)
65 |
66 | # prepare buffers
67 | self._action_dim = (
68 | 3
69 | ) # [vx, vy, omega] --> vx: [-0.5,1.0], vy: [-0.5,0.5], omega: [-1.0,1.0]
70 | self._raw_navigation_velocity_actions = torch.zeros(self.num_envs, self._action_dim, device=self.device)
71 | self._processed_navigation_velocity_actions = torch.zeros(
72 | (self.num_envs, 3), device=self.device
73 | )
74 |
75 | self._low_level_actions = torch.zeros(self.num_envs, self.low_level_action_term.action_dim, device=self.device)
76 | self._low_level_step_dt = self.cfg.low_level_decimation * self._env.physics_dt
77 |
78 | self._counter = 0
79 |
80 | """
81 | Properties.
82 | """
83 |
84 | @property
85 | def action_dim(self) -> int:
86 | return self._action_dim
87 |
88 | @property
89 | def raw_actions(self) -> torch.Tensor:
90 | return self._raw_navigation_velocity_actions
91 |
92 | @property
93 | def processed_actions(self) -> torch.Tensor:
94 | return self._processed_navigation_velocity_actions
95 |
96 | @property
97 | def low_level_actions(self) -> torch.Tensor:
98 | return self._low_level_actions
99 |
100 | """
101 | Operations.
102 | """
103 |
104 | def process_actions(self, actions):
105 | """Process high-level navigation actions. This function is called with a frequency of 10Hz"""
106 |
107 | # Store low level navigation actions
108 | self._raw_navigation_velocity_actions[:] = actions
109 | # reshape into 3D path
110 | self._processed_navigation_velocity_actions[:] = actions.clone().view(self.num_envs, 3)
111 |
112 | def apply_actions(self):
113 | """Apply low-level actions for the simulator to the physics engine. This functions is called with the
114 | simulation frequency of 200Hz. Since low-level locomotion runs at 50Hz, we need to decimate the actions."""
115 | if self._counter % self.cfg.low_level_decimation == 0:
116 | self._counter = 0
117 | # # -- update command
118 | self._env.command_manager.compute(dt=self._low_level_step_dt)
119 | # Get low level actions from low level policy
120 | self._low_level_actions[:] = self.low_level_policy(
121 | self._env.observation_manager.compute_group(group_name="low_level_policy")
122 | )
123 | # Process low level actions
124 | self.low_level_action_term.process_actions(self._low_level_actions)
125 |
126 | # Apply low level actions
127 | self.low_level_action_term.apply_actions()
128 | self._counter += 1
129 |
130 |
131 | @configclass
132 | class NavigationActionCfg(ActionTermCfg):
133 | class_type: type[ActionTerm] = NavigationAction
134 | """ Class of the action term."""
135 | low_level_decimation: int = 4
136 | """Decimation factor for the low level action term."""
137 | low_level_action: ActionTermCfg = MISSING
138 | """Configuration of the low level action term."""
139 | low_level_policy_file: str = MISSING
140 | """Path to the low level policy file."""
141 | path_length: int = 51
142 | """Length of the path to be followed."""
143 | # low_level_agent_cfg: dict = {}
144 | image_size: tuple = ()
145 |
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/actions/vlm_navigation_actions.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab)
2 | # Author: Pascal Roth
3 | # All rights reserved.
4 | #
5 | # SPDX-License-Identifier: BSD-3-Clause
6 |
7 | from __future__ import annotations
8 |
9 | from dataclasses import MISSING
10 |
11 | import torch
12 | import torch.nn as nn
13 | import torchvision
14 | from omni.isaac.lab.envs import ManagerBasedRLEnv
15 | from omni.isaac.lab.managers.action_manager import ActionTerm, ActionTermCfg
16 | from omni.isaac.lab.utils import configclass
17 | from omni.isaac.lab.utils.assets import check_file_path, read_file
18 |
19 | # -- Navigation Action
20 | class VLMActions(ActionTerm):
21 | """Actions to navigate a robot by following some path."""
22 |
23 | cfg: VLMActionsCfg
24 | _env: ManagerBasedRLEnv
25 |
26 | def __init__(self, cfg: VLMActionsCfg, env: ManagerBasedRLEnv):
27 | super().__init__(cfg, env)
28 | # self.depth_cnn = DepthImageProcessor(image_height=self.cfg.image_size[0],
29 | # image_width=self.cfg.image_size[1],num_output_units=32).to(self.device)
30 | # self.resize_transform = torchvision.transforms.Resize((58, 87),
31 | # interpolation=torchvision.transforms.InterpolationMode.BICUBIC)
32 |
33 | self.image_count = 0
34 | # # check if policy file exists
35 | if not check_file_path(self.cfg.low_level_policy_file):
36 | raise FileNotFoundError(f"Policy file '{self.cfg.low_level_policy_file}' does not exist.")
37 | file_bytes = read_file(self.cfg.low_level_policy_file)
38 |
39 | # # # load policies
40 | self.low_level_policy = torch.jit.load(file_bytes, map_location=self.device)
41 | self.low_level_policy = torch.jit.freeze(self.low_level_policy.eval())
42 |
43 | # prepare joint position actions
44 | self.low_level_action_term: ActionTerm = self.cfg.low_level_action.class_type(cfg.low_level_action, env)
45 |
46 | # prepare buffers
47 | self._action_dim = (
48 | 3
49 | ) # [vx, vy, omega] --> vx: [-0.5,1.0], vy: [-0.5,0.5], omega: [-1.0,1.0]
50 | self._raw_navigation_velocity_actions = torch.zeros(self.num_envs, self._action_dim, device=self.device)
51 | self._processed_command_velocity_actions = torch.zeros(
52 | (self.num_envs, 3), device=self.device
53 | )
54 |
55 | self._low_level_actions = torch.zeros(self.num_envs, self.low_level_action_term.action_dim, device=self.device)
56 | self._low_level_step_dt = self.cfg.low_level_decimation * self._env.physics_dt
57 |
58 | self._counter = 0
59 |
60 | """
61 | Properties.
62 | """
63 |
64 | @property
65 | def action_dim(self) -> int:
66 | return self._action_dim
67 |
68 | @property
69 | def raw_actions(self) -> torch.Tensor:
70 | return self._raw_navigation_velocity_actions
71 |
72 | @property
73 | def processed_actions(self) -> torch.Tensor:
74 | return self._processed_command_velocity_actions
75 |
76 | @property
77 | def low_level_actions(self) -> torch.Tensor:
78 | return self._low_level_actions
79 |
80 | """
81 | Operations.
82 | """
83 |
84 | def process_actions(self, actions):
85 | """Process high-level navigation actions. This function is called with a frequency of 10Hz"""
86 |
87 | # Store low level navigation actions
88 | self._raw_navigation_velocity_actions[:] = actions
89 | # reshape into 3D path
90 | self._processed_command_velocity_actions[:] = actions.clone().view(self.num_envs, 3)
91 |
92 | def apply_actions(self):
93 | """Apply low-level actions for the simulator to the physics engine. This functions is called with the
94 | simulation frequency of 200Hz. Since low-level locomotion runs at 50Hz, we need to decimate the actions."""
95 | if self._counter % self.cfg.low_level_decimation == 0:
96 | self._counter = 0
97 | # # -- update command
98 | self._env.command_manager.compute(dt=self._low_level_step_dt)
99 | # Get low level actions from low level policy
100 | self._low_level_actions[:] = self.low_level_policy(
101 | self._env.observation_manager.compute_group(group_name="low_level_policy")
102 | )
103 | # Process low level actions
104 | self.low_level_action_term.process_actions(self._low_level_actions)
105 |
106 | # Apply low level actions
107 | self.low_level_action_term.apply_actions()
108 | self._counter += 1
109 |
110 |
111 | @configclass
112 | class VLMActionsCfg(ActionTermCfg):
113 | class_type: type[ActionTerm] = VLMActions
114 | """ Class of the action term."""
115 | low_level_decimation: int = 4
116 | """Decimation factor for the low level action term."""
117 | low_level_action: ActionTermCfg = MISSING
118 | """Configuration of the low level action term."""
119 | low_level_policy_file: str = MISSING
120 | """Path to the low level policy file."""
121 | # path_length: int = 51
122 | # """Length of the path to be followed."""
123 | # # low_level_agent_cfg: dict = {}
124 | # image_size: tuple = ()
125 |
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/actions/vlm_navigation_actions_gpt.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab)
2 | # Author: Pascal Roth
3 | # All rights reserved.
4 | #
5 | # SPDX-License-Identifier: BSD-3-Clause
6 |
7 | from __future__ import annotations
8 |
9 | from dataclasses import MISSING
10 |
11 | import torch
12 | import torch.nn as nn
13 | import torchvision
14 | from omni.isaac.lab.envs import ManagerBasedRLEnv
15 | from omni.isaac.lab.managers.action_manager import ActionTerm, ActionTermCfg
16 | from omni.isaac.lab.utils import configclass
17 | from omni.isaac.lab.utils.assets import check_file_path, read_file
18 |
19 | # -- Navigation Action
20 | class VLMActionsGPT(ActionTerm):
21 | """Actions to navigate a robot by following some path."""
22 |
23 | cfg: VLMActionsGPTCfg
24 | _env: ManagerBasedRLEnv
25 |
26 | def __init__(self, cfg: VLMActionsGPTCfg, env: ManagerBasedRLEnv):
27 | super().__init__(cfg, env)
28 | # self.depth_cnn = DepthImageProcessor(image_height=self.cfg.image_size[0],
29 | # image_width=self.cfg.image_size[1],num_output_units=32).to(self.device)
30 | # self.resize_transform = torchvision.transforms.Resize((58, 87),
31 | # interpolation=torchvision.transforms.InterpolationMode.BICUBIC)
32 |
33 | self.image_count = 0
34 | # # check if policy file exists
35 | if not check_file_path(self.cfg.low_level_policy_file):
36 | raise FileNotFoundError(f"Policy file '{self.cfg.low_level_policy_file}' does not exist.")
37 | file_bytes = read_file(self.cfg.low_level_policy_file)
38 |
39 | # # # load policies
40 | self.low_level_policy = torch.jit.load(file_bytes, map_location=self.device)
41 | self.low_level_policy = torch.jit.freeze(self.low_level_policy.eval())
42 |
43 | # prepare joint position actions
44 | self.low_level_action_term: ActionTerm = self.cfg.low_level_action.class_type(cfg.low_level_action, env)
45 |
46 | # prepare buffers
47 | self._action_dim = (
48 | 3
49 | ) # [vx, vy, omega] --> vx: [-0.5,1.0], vy: [-0.5,0.5], omega: [-1.0,1.0]
50 | self._raw_navigation_velocity_actions = torch.zeros(self.num_envs, self._action_dim, device=self.device)
51 | self._processed_command_velocity_actions = torch.zeros(
52 | (self.num_envs, 3), device=self.device
53 | )
54 |
55 | self._low_level_actions = torch.zeros(self.num_envs, self.low_level_action_term.action_dim, device=self.device)
56 | self._low_level_step_dt = self.cfg.low_level_decimation * self._env.physics_dt
57 |
58 | self._counter = 0
59 |
60 | """
61 | Properties.
62 | """
63 |
64 | @property
65 | def action_dim(self) -> int:
66 | return self._action_dim
67 |
68 | @property
69 | def raw_actions(self) -> torch.Tensor:
70 | return self._raw_navigation_velocity_actions
71 |
72 | @property
73 | def processed_actions(self) -> torch.Tensor:
74 | return self._processed_command_velocity_actions
75 |
76 | @property
77 | def low_level_actions(self) -> torch.Tensor:
78 | return self._low_level_actions
79 |
80 | """
81 | Operations.
82 | """
83 |
84 | def process_actions(self, actions):
85 | """Process high-level navigation actions. This function is called with a frequency of 10Hz"""
86 |
87 | # Store low level navigation actions
88 | self._raw_navigation_velocity_actions[:] = actions
89 | # reshape into 3D path
90 | self._processed_command_velocity_actions[:] = actions.clone().view(self.num_envs, 3)
91 |
92 | def apply_actions(self):
93 | """Apply low-level actions for the simulator to the physics engine. This functions is called with the
94 | simulation frequency of 200Hz. Since low-level locomotion runs at 50Hz, we need to decimate the actions."""
95 | if self._counter % self.cfg.low_level_decimation == 0:
96 | self._counter = 0
97 | # # -- update command
98 | self._env.command_manager.compute(dt=self._low_level_step_dt)
99 | # Get low level actions from low level policy
100 | self._low_level_actions[:] = self.low_level_policy(
101 | self._env.observation_manager.compute_group(group_name="low_level_policy")
102 | )
103 | # Process low level actions
104 | self.low_level_action_term.process_actions(self._low_level_actions)
105 |
106 | # Apply low level actions
107 | self.low_level_action_term.apply_actions()
108 | self._counter += 1
109 |
110 |
111 | @configclass
112 | class VLMActionsGPTCfg(ActionTermCfg):
113 | class_type: type[ActionTerm] = VLMActionsGPT
114 | """ Class of the action term."""
115 | low_level_decimation: int = 4
116 | """Decimation factor for the low level action term."""
117 | low_level_action: ActionTermCfg = MISSING
118 | """Configuration of the low level action term."""
119 | low_level_policy_file: str = MISSING
120 | """Path to the low level policy file."""
121 | # path_length: int = 51
122 | # """Length of the path to be followed."""
123 | # # low_level_agent_cfg: dict = {}
124 | # image_size: tuple = ()
125 |
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/commands/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab)
2 | # Author: Pascal Roth
3 | # All rights reserved.
4 | #
5 | # SPDX-License-Identifier: BSD-3-Clause
6 |
7 | from .path_follower_command_generator import PathFollowerCommandGenerator
8 | from .path_follower_command_generator_cfg import PathFollowerCommandGeneratorCfg
9 |
10 | from .path_follower_command_generator_gpt import PathFollowerCommandGeneratorGPT
11 | from .path_follower_command_generator_gpt_cfg import PathFollowerCommandGeneratorGPTCfg
12 |
13 | from .rl_command_generator import RLCommandGenerator
14 | from .rl_command_generator_cfg import RLCommandGeneratorCfg
15 |
16 | from .midlevel_command_generator import MidLevelCommandGenerator
17 | from .midlevel_command_generator_cfg import MidLevelCommandGeneratorCfg
18 |
19 | from .lowlevel_command_generator import LowLevelCommandGenerator
20 | from .lowlevel_command_generator_cfg import LowLevelCommandGeneratorCfg
21 |
22 | from .goal_command_generator import GoalCommandGenerator
23 | from .goal_command_generator_cfg import GoalCommandGeneratorCfg
24 |
25 | from .robot_vel_command_generator import RobotVelCommandGenerator
26 | from .robot_vel_command_generator_cfg import RobotVelCommandGeneratorCfg
27 |
28 | __all__ = ["PathFollowerCommandGeneratorCfg", "PathFollowerCommandGenerator",
29 | "PathFollowerCommandGeneratorGPTCfg", "PathFollowerCommandGeneratorGPT",
30 | "RLCommandGeneratorCfg", "RLCommandGenerator",
31 | "MidLevelCommandGeneratorCfg", "MidLevelCommandGenerator",
32 | "LowLevelCommandGeneratorCfg", "LowLevelCommandGenerator",
33 | "GoalCommandGenerator", "GoalCommandGeneratorCfg",
34 | "RobotVelCommandGenerator", "RobotVelCommandGeneratorCfg"]
35 |
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/commands/goal_command_generator.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import math
4 | from typing import TYPE_CHECKING, Sequence
5 |
6 | import omni.isaac.lab.utils.math as math_utils
7 | from omni.isaac.lab.utils.math import wrap_to_pi, quat_rotate_inverse, yaw_quat
8 | import torch
9 | from omni.isaac.lab.assets.articulation import Articulation
10 | from omni.isaac.lab.envs import ManagerBasedRLEnv
11 | from omni.isaac.lab.managers import CommandTerm
12 | from omni.isaac.lab.markers import VisualizationMarkers
13 | from omni.isaac.lab.markers.config import (
14 | BLUE_ARROW_X_MARKER_CFG,
15 | GREEN_ARROW_X_MARKER_CFG,
16 | )
17 | from omni.isaac.lab.sim import SimulationContext
18 |
19 | if TYPE_CHECKING:
20 | from .goal_command_generator_cfg import GoalCommandGeneratorCfg
21 |
22 |
23 |
24 |
25 |
26 | class GoalCommandGenerator(CommandTerm):
27 | """Command generator that generates pose commands containing a 3-D position and heading.
28 |
29 | The command generator samples uniform 2D positions around the environment origin. It sets
30 | the height of the position command to the default root height of the robot. The heading
31 | command is either set to point towards the target or is sampled uniformly.
32 | This can be configured through the :attr:`Pose2dCommandCfg.simple_heading` parameter in
33 | the configuration.
34 | """
35 |
36 | cfg: GoalCommandGeneratorCfg
37 | """Configuration for the command generator."""
38 |
39 | def __init__(self, cfg: GoalCommandGeneratorCfg, env: ManagerBasedRLEnv):
40 | """Initialize the command generator class.
41 |
42 | Args:
43 | cfg: The configuration parameters for the command generator.
44 | env: The environment object.
45 | """
46 | # initialize the base class
47 | super().__init__(cfg, env)
48 |
49 | # obtain the robot and terrain assets
50 | # -- robot
51 | self.robot: Articulation = env.scene[cfg.asset_name]
52 |
53 | # crete buffers to store the command
54 | # -- commands: (x, y, z, heading)
55 | self.pos_command_w = torch.zeros(self.num_envs, 3, device=self.device)
56 | self.heading_command_w = torch.zeros(self.num_envs, device=self.device)
57 | self.pos_command_b = torch.zeros_like(self.pos_command_w)
58 | self.heading_command_b = torch.zeros_like(self.heading_command_w)
59 | # -- metrics
60 | self.metrics["error_pos_2d"] = torch.zeros(self.num_envs, device=self.device)
61 | self.metrics["error_heading"] = torch.zeros(self.num_envs, device=self.device)
62 |
63 | def __str__(self) -> str:
64 | msg = "PositionCommand:\n"
65 | msg += f"\tCommand dimension: {tuple(self.command.shape[1:])}\n"
66 | msg += f"\tResampling time range: {self.cfg.resampling_time_range}"
67 | return msg
68 |
69 | """
70 | Properties
71 | """
72 |
73 | @property
74 | def command(self) -> torch.Tensor:
75 | """The desired 2D-pose in base frame. Shape is (num_envs, 4)."""
76 | return torch.cat([self.pos_command_b, self.heading_command_b.unsqueeze(1)], dim=1)
77 |
78 | """
79 | Implementation specific functions.
80 | """
81 |
82 | def _update_metrics(self):
83 | # logs data
84 | self.metrics["error_pos_2d"] = torch.norm(self.pos_command_w[:, :2] - self.robot.data.root_pos_w[:, :2], dim=1)
85 | self.metrics["error_heading"] = torch.abs(wrap_to_pi(self.heading_command_w - self.robot.data.heading_w))
86 |
87 | def _resample_command(self, env_ids: Sequence[int]):
88 | # import ipdb; ipdb.set_trace()
89 | r = torch.empty(len(env_ids), device=self.device)
90 | self.pos_command_b[env_ids, 0] = r.uniform_(*self.cfg.ranges.pos_x)#6.4756#, r.uniform_(*self.cfg.ranges.pos_x)
91 | self.pos_command_b[env_ids, 1] = r.uniform_(*self.cfg.ranges.pos_y)#-0.2762#r.uniform_(*self.cfg.ranges.pos_y)
92 | self.pos_command_b[env_ids, 2] = 0.0
93 | # self.pos_command_b[env_ids, 0] = 6.4756#6.4756#, r.uniform_(*self.cfg.ranges.pos_x)
94 | # self.pos_command_b[env_ids, 1] = -0.2762#-0.2762#r.uniform_(*self.cfg.ranges.pos_y)
95 | # self.pos_command_b[env_ids, 2] = 0.0
96 | # print("resampled command: ", self.pos_command_b[env_ids,:3])
97 |
98 | self.pos_command_w[env_ids,:3] = math_utils.quat_apply(math_utils.yaw_quat(self.robot.data.root_quat_w[env_ids,:]), self.pos_command_b[env_ids,:3])
99 | self.pos_command_w[env_ids,:3] += self.robot.data.root_pos_w[env_ids,:3]
100 | self.pos_command_w[env_ids, 2] = self._env.scene.env_origins[env_ids, 2] + self.robot.data.default_root_state[env_ids, 2]
101 | # self.pos_command_w[env_ids, 1] = -100
102 | # self.pos_command_w[env_ids, 0] = torch.clamp(self.pos_command_w[env_ids, 0], torch.min(self._env.scene.env_origins[:, 0]), torch.max(self._env.scene.env_origins[:, 0]))
103 | # self.pos_command_w[env_ids, 1] = torch.clamp(self.pos_command_w[env_ids, 1], torch.min(self._env.scene.env_origins[:, 1]), torch.max(self._env.scene.env_origins[:, 1]))
104 | # print("world command: ", self.pos_command_w[env_ids,:3])
105 | # import ipdb; ipdb.set_trace()
106 | # # obtain env origins for the environments
107 | # self.pos_command_w[env_ids] = self._env.scene.env_origins[env_ids]
108 | # # offset the position command by the current root position
109 | # r = torch.empty(len(env_ids), device=self.device)
110 | # self.pos_command_w[env_ids, 0] += r.uniform_(*self.cfg.ranges.pos_x)
111 | # self.pos_command_w[env_ids, 1] += r.uniform_(*self.cfg.ranges.pos_y)
112 | # self.pos_command_w[env_ids, 2] += self.robot.data.default_root_state[env_ids, 2]
113 |
114 | if self.cfg.simple_heading:
115 | # set heading command to point towards target
116 | target_vec = self.pos_command_w[env_ids] - self.robot.data.root_pos_w[env_ids]
117 | target_direction = torch.atan2(target_vec[:, 1], target_vec[:, 0])
118 | flipped_target_direction = wrap_to_pi(target_direction + torch.pi)
119 |
120 | # compute errors to find the closest direction to the current heading
121 | # this is done to avoid the discontinuity at the -pi/pi boundary
122 | curr_to_target = wrap_to_pi(target_direction - self.robot.data.heading_w[env_ids]).abs()
123 | curr_to_flipped_target = wrap_to_pi(flipped_target_direction - self.robot.data.heading_w[env_ids]).abs()
124 |
125 | # set the heading command to the closest direction
126 | self.heading_command_w[env_ids] = torch.where(
127 | curr_to_target < curr_to_flipped_target,
128 | target_direction,
129 | flipped_target_direction,
130 | )
131 | else:
132 | # random heading command
133 | self.heading_command_w[env_ids] = r.uniform_(*self.cfg.ranges.heading)
134 |
135 | def _update_command(self):
136 | """Re-target the position command to the current root state."""
137 | target_vec = self.pos_command_w - self.robot.data.root_pos_w[:, :3]
138 | self.pos_command_b[:] = quat_rotate_inverse(yaw_quat(self.robot.data.root_quat_w), target_vec)
139 | self.pos_command_b[:, 2] = 0.0
140 | self.heading_command_b[:] = wrap_to_pi(self.heading_command_w - self.robot.data.heading_w)*0.0
141 |
142 | def _set_debug_vis_impl(self, debug_vis: bool):
143 | # create markers if necessary for the first tome
144 | if debug_vis:
145 | if not hasattr(self, "arrow_goal_visualizer"):
146 | marker_cfg = GREEN_ARROW_X_MARKER_CFG.copy()
147 | marker_cfg.markers["arrow"].scale = (0.2, 0.2, 0.8)
148 | marker_cfg.prim_path = "/Visuals/Command/pose_goal"
149 | self.arrow_goal_visualizer = VisualizationMarkers(marker_cfg)
150 | # set their visibility to true
151 | self.arrow_goal_visualizer.set_visibility(True)
152 | else:
153 | if hasattr(self, "arrow_goal_visualizer"):
154 | self.arrow_goal_visualizer.set_visibility(False)
155 |
156 | def _debug_vis_callback(self, event):
157 | # update the box marker
158 | self.arrow_goal_visualizer.visualize(
159 | translations=self.pos_command_w,
160 | orientations=quat_from_euler_xyz(
161 | torch.zeros_like(self.heading_command_w),
162 | torch.zeros_like(self.heading_command_w),
163 | self.heading_command_w,
164 | ),
165 | )
166 |
167 |
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/commands/goal_command_generator_cfg.py:
--------------------------------------------------------------------------------
1 | import math
2 | from dataclasses import MISSING
3 |
4 | from omni.isaac.lab.managers import CommandTermCfg
5 | from omni.isaac.lab.utils.configclass import configclass
6 | from typing_extensions import Literal
7 |
8 | from .goal_command_generator import GoalCommandGenerator
9 |
10 |
11 | @configclass
12 | class GoalCommandGeneratorCfg(CommandTermCfg):
13 | """Configuration for the uniform 2D-pose command generator."""
14 |
15 | class_type: type = GoalCommandGenerator
16 |
17 | asset_name: str = MISSING
18 | """Name of the asset in the environment for which the commands are generated."""
19 |
20 | simple_heading: bool = MISSING
21 | """Whether to use simple heading or not.
22 |
23 | If True, the heading is in the direction of the target position.
24 | """
25 |
26 | @configclass
27 | class Ranges:
28 | """Uniform distribution ranges for the position commands."""
29 |
30 | pos_x: tuple[float, float] = MISSING
31 | """Range for the x position (in m)."""
32 | pos_y: tuple[float, float] = MISSING
33 | """Range for the y position (in m)."""
34 | heading: tuple[float, float] = MISSING
35 | """Heading range for the position commands (in rad).
36 |
37 | Used only if :attr:`simple_heading` is False.
38 | """
39 |
40 | ranges: Ranges = MISSING
41 | """Distribution ranges for the position commands."""
42 |
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/commands/lowlevel_command_generator.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab)
2 | # Author: Pascal Roth
3 | # All rights reserved.
4 | #
5 | # SPDX-License-Identifier: BSD-3-Clause
6 |
7 | # Copyright (c) 2022-2024, The lab Project Developers.
8 | # All rights reserved.
9 | #
10 | # SPDX-License-Identifier: BSD-3-Clause
11 |
12 | """Sub-module containing command generators for the velocity-based locomotion task."""
13 | from __future__ import annotations
14 |
15 | import os
16 | import math
17 | from typing import TYPE_CHECKING, Sequence
18 |
19 | import omni.isaac.lab.utils.math as math_utils
20 | import torch
21 | from omni.isaac.lab.assets.articulation import Articulation
22 | from omni.isaac.lab.envs import ManagerBasedRLEnv
23 | from omni.isaac.lab.managers import CommandTerm
24 | from omni.isaac.lab.markers import VisualizationMarkers
25 | from omni.isaac.lab.markers.config import (
26 | BLUE_ARROW_X_MARKER_CFG,
27 | GREEN_ARROW_X_MARKER_CFG,
28 | CUBOID_MARKER_CFG,
29 | )
30 | from omni.isaac.lab.sim import SimulationContext
31 | from omni.isaac.lab.utils.assets import check_file_path, read_file
32 | # from omni.isaac.lab.utils.assets import LOCAL_ISAAC_DIR
33 |
34 |
35 | if TYPE_CHECKING:
36 | from .lowlevel_command_generator_cfg import LowLevelCommandGeneratorCfg
37 |
38 |
39 | class LowLevelCommandGenerator(CommandTerm):
40 | r"""Command generator that generates a velocity command in SE(2) from a path given by a local planner.
41 |
42 | The command comprises of a linear velocity in x and y direction and an angular velocity around
43 | the z-axis. It is given in the robot's base frame.
44 |
45 | The path follower acts as a PD-controller that checks for the last point on the path within a lookahead distance
46 | and uses it to compute the steering angle and the linear velocity.
47 | """
48 |
49 | cfg: LowLevelCommandGeneratorCfg
50 | """The configuration of the command generator."""
51 |
52 | def __init__(self, cfg: LowLevelCommandGeneratorCfg, env: ManagerBasedRLEnv):
53 | """Initialize the command generator.
54 |
55 | Args:
56 | cfg (LowLevelCommandGeneratorCfg): The configuration of the command generator.
57 | env (object): The environment.
58 | """
59 | super().__init__(cfg, env)
60 | # -- robot
61 | self.robot: Articulation = env.scene[cfg.robot_attr]
62 | # -- Simulation Context
63 | self.sim: SimulationContext = SimulationContext.instance()
64 | self.twist: torch.Tensor = torch.zeros((self.num_envs, 3), device=self.device)
65 | # -- debug vis
66 | self._base_vel_goal_markers = None
67 | self._base_vel_markers = None
68 |
69 | # Rotation mark
70 | self.rotation_mark = False
71 | self.initialized = False
72 | self.goal_reached = False
73 | self.identity_quat = torch.tensor([1.0, 0.0, 0.0, 0.0], device=self._env.device).repeat(self._env.num_envs, 1)
74 |
75 | self.objnav_policy_path = os.path.join(LOCAL_ISAAC_DIR, "low_level_policy", "mid_policy_height_scan.jit")
76 | self.objnav_policy_path = os.path.join(LOCAL_ISAAC_DIR, "low_level_policy", "mid_policy_depth.jit")
77 | if not check_file_path(self.objnav_policy_path):
78 | raise FileNotFoundError(f"Policy file '{self.objnav_policy_path}' does not exist.")
79 | file_bytes = read_file(self.objnav_policy_path)
80 | self.mid_level_policy = torch.jit.load(file_bytes, map_location=self.device)
81 | self.mid_level_policy = torch.jit.freeze(self.mid_level_policy.eval())
82 |
83 |
84 | def __str__(self) -> str:
85 | """Return a string representation of the command generator."""
86 | msg = "rlCommandGenerator:\n"
87 | msg += f"\tCommand dimension: {tuple(self.command.shape[1:])}\n"
88 | msg += f"\tLookahead distance: {self.cfg.lookAheadDistance}\n"
89 | return msg
90 |
91 | """
92 | Properties
93 | """
94 |
95 | @property
96 | def command(self) -> torch.Tensor:
97 | """The desired base velocity command in the base frame. Shape is (num_envs, 3)."""
98 | # print("twist: ", self.twist)
99 | return self.twist
100 |
101 | """
102 | Operations.
103 | """
104 |
105 | def reset(self, env_ids: Sequence[int] | None = None) -> dict:
106 | """Reset the command generator.
107 |
108 | This function resets the command generator. It should be called whenever the environment is reset.
109 |
110 | Args:
111 | env_ids (Optional[Sequence[int]], optional): The list of environment IDs to reset. Defaults to None.
112 | """
113 | if env_ids is None:
114 | env_ids = ...
115 |
116 | self.twist = torch.zeros((self.num_envs, 3), device=self.device)
117 | self.goal_reached = False
118 |
119 | return {}
120 |
121 | def compute(self, dt: float):
122 | """Compute the command.
123 |
124 | Paths as a tensor of shape (num_envs, N, 3) where N is number of poses on the path. The paths
125 | should be given in the base frame of the robot. Num_envs is equal to the number of robots spawned in all
126 | environments.
127 | """
128 | if not self.rotation_mark and not self.goal_reached and self.initialized:
129 | # get paths
130 | self.twist = self.mid_level_policy(self._env.observation_manager.compute_group(group_name="mid_level_planner")).reshape(self._env.num_envs, 3)
131 | # self.twist = self._env.action_manager._terms['paths']._processed_navigation_velocity_actions.clone()
132 | # import ipdb; ipdb.set_trace()
133 | self.twist[:, 0] = torch.clip(self.twist[:, 0], 0.0, 0.5)
134 | self.twist[:,1] = 0.0
135 | self.twist[:,2] = torch.clip(self.twist[:,2], -math.pi, math.pi)
136 | elif self.goal_reached or (not self.initialized):
137 | # self.twist[:, 0] = torch.clip(self.twist[:, 0], 0.0, 0.5)
138 | # self.twist[:,1] = 0.0
139 | self.twist[:,:2] = torch.zeros((self.num_envs, 2), device=self.device)
140 | else:
141 | # self.twist = torch.zeros((self.num_envs, 3), device=self.device)
142 | self.twist[:,:2] = torch.zeros((self.num_envs, 2), device=self.device)
143 | self.twist[:, 2] += 0.1
144 | print("rotation")
145 | self.goal_reached = self._env.command_manager._terms['midlevel_command'].command[:, :2].norm(dim=1) < 2.5
146 | return self.twist
147 |
148 | """
149 | Implementation specific functions.
150 | """
151 |
152 | def _update_command(self):
153 | pass
154 |
155 | def _update_metrics(self):
156 | pass
157 |
158 | def _resample_command(self, env_ids: Sequence[int]):
159 | pass
160 |
161 | def _set_debug_vis_impl(self, debug_vis: bool):
162 | # set visibility of markers
163 | # note: parent only deals with callbacks. not their visibility
164 | if debug_vis:
165 | # create markers if necessary for the first tome
166 | if not hasattr(self, "base_vel_goal_visualizer"):
167 | # -- goal
168 | marker_cfg = GREEN_ARROW_X_MARKER_CFG.copy()
169 | marker_cfg.prim_path = "/Visuals/Command/velocity_goal"
170 | marker_cfg.markers["arrow"].scale = (0.5, 0.5, 0.5)
171 | self.base_vel_goal_visualizer = VisualizationMarkers(marker_cfg)
172 | # -- current
173 | marker_cfg = BLUE_ARROW_X_MARKER_CFG.copy()
174 | marker_cfg.prim_path = "/Visuals/Command/velocity_current"
175 | marker_cfg.markers["arrow"].scale = (0.5, 0.5, 0.5)
176 | self.base_vel_visualizer = VisualizationMarkers(marker_cfg)
177 | # -- goal command
178 | marker_cfg = CUBOID_MARKER_CFG.copy()
179 | marker_cfg.prim_path = "/Visuals/Command/pos_goal_command"
180 | marker_cfg.markers["cuboid"].scale = (0.5, 0.5, 0.5)
181 | # self.base_vel_goal_command_visualizer = VisualizationMarkers(marker_cfg)
182 | # set their visibility to true
183 | self.base_vel_goal_visualizer.set_visibility(True)
184 | self.base_vel_visualizer.set_visibility(True)
185 | # self.base_vel_goal_command_visualizer.set_visibility(True)
186 | else:
187 | if hasattr(self, "base_vel_goal_visualizer"):
188 | self.base_vel_goal_visualizer.set_visibility(False)
189 | self.base_vel_visualizer.set_visibility(False)
190 | # self.base_vel_goal_command_visualizer.set_visibility(False)
191 |
192 | def _debug_vis_callback(self, event):
193 | # get marker location
194 | # -- base state
195 | base_pos_w = self.robot.data.root_pos_w.clone()
196 | base_pos_w[:, 2] += 0.5
197 | base_quat_w = self.robot.data.root_quat_w.clone()
198 |
199 | # -- resolve the scales and quaternions
200 | vel_des_arrow_scale, vel_des_arrow_quat = self._resolve_xy_velocity_to_arrow(self.command[:, :2])
201 | vel_arrow_scale, vel_arrow_quat = self._resolve_xy_velocity_to_arrow(self.robot.data.root_lin_vel_b[:, :2])
202 | # display markers
203 | self.base_vel_goal_visualizer.visualize(base_pos_w, vel_des_arrow_quat, vel_des_arrow_scale)
204 | self.base_vel_visualizer.visualize(base_pos_w, vel_arrow_quat, vel_arrow_scale)
205 |
206 | # pos_command_w = self._env.command_manager._terms['midlevel_command'].pos_command_w.clone()
207 | # default_scale = self.base_vel_goal_visualizer.cfg.markers["arrow"].scale
208 | # larger_scale = 2.0*torch.tensor(default_scale, device=self.device).repeat(pos_command_w.shape[0], 1)
209 | # self.base_vel_goal_command_visualizer.visualize(pos_command_w, self.identity_quat,larger_scale)
210 |
211 | """
212 | Internal helpers.
213 | """
214 |
215 | def _resolve_xy_velocity_to_arrow(self, xy_velocity: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
216 | """Converts the XY base velocity command to arrow direction rotation."""
217 | # obtain default scale of the marker
218 | default_scale = self.base_vel_goal_visualizer.cfg.markers["arrow"].scale
219 | # arrow-scale
220 | arrow_scale = torch.tensor(default_scale, device=self.device).repeat(xy_velocity.shape[0], 1)
221 | arrow_scale[:, 0] *= torch.linalg.norm(xy_velocity, dim=1)
222 | # arrow-direction
223 | heading_angle = torch.atan2(xy_velocity[:, 1], xy_velocity[:, 0])
224 | zeros = torch.zeros_like(heading_angle)
225 | arrow_quat = math_utils.quat_from_euler_xyz(zeros, zeros, heading_angle)
226 |
227 | return arrow_scale, arrow_quat
228 |
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/commands/lowlevel_command_generator_cfg.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab)
2 | # Author: Pascal Roth
3 | # All rights reserved.
4 | #
5 | # SPDX-License-Identifier: BSD-3-Clause
6 |
7 | # Copyright (c) 2022-2024, The lab Project Developers.
8 | # All rights reserved.
9 | #
10 | # SPDX-License-Identifier: BSD-3-Clause
11 |
12 | """Sub-module containing command generators for the velocity-based locomotion task."""
13 |
14 | import math
15 | from dataclasses import MISSING
16 |
17 | from omni.isaac.lab.managers import CommandTermCfg
18 | from omni.isaac.lab.utils.configclass import configclass
19 | from typing_extensions import Literal
20 |
21 | from .lowlevel_command_generator import LowLevelCommandGenerator
22 |
23 |
24 | @configclass
25 | class LowLevelCommandGeneratorCfg(CommandTermCfg):
26 | class_type: LowLevelCommandGenerator = LowLevelCommandGenerator
27 | """Name of the command generator class."""
28 |
29 | robot_attr: str = MISSING
30 | """Name of the robot attribute from the environment."""
31 |
32 | path_frame: Literal["world", "robot"] = "world"
33 | """Frame in which the path is defined.
34 | - ``world``: the path is defined in the world frame. Also called ``odom``.
35 | - ``robot``: the path is defined in the robot frame. Also called ``base``.
36 | """
37 |
38 |
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/commands/midlevel_command_generator.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab)
2 | # Author: Pascal Roth
3 | # All rights reserved.
4 | #
5 | # SPDX-License-Identifier: BSD-3-Clause
6 |
7 | # Copyright (c) 2022-2024, The lab Project Developers.
8 | # All rights reserved.
9 | #
10 | # SPDX-License-Identifier: BSD-3-Clause
11 |
12 | """Sub-module containing command generators for the velocity-based locomotion task."""
13 | from __future__ import annotations
14 |
15 | import math
16 | from typing import TYPE_CHECKING, Sequence
17 |
18 | import omni.isaac.lab.utils.math as math_utils
19 | import torch
20 | from omni.isaac.lab.assets.articulation import Articulation
21 | from omni.isaac.lab.envs import ManagerBasedRLEnv
22 | from omni.isaac.lab.managers import CommandTerm
23 | from omni.isaac.lab.sim import SimulationContext
24 |
25 | if TYPE_CHECKING:
26 | from .midlevel_command_generator_cfg import MidLevelCommandGeneratorCfg
27 |
28 |
29 | class MidLevelCommandGenerator(CommandTerm):
30 | r"""Command generator that generates a velocity command in SE(2) from a path given by a local planner.
31 |
32 | The command comprises of a linear velocity in x and y direction and an angular velocity around
33 | the z-axis. It is given in the robot's base frame.
34 |
35 | The path follower acts as a PD-controller that checks for the last point on the path within a lookahead distance
36 | and uses it to compute the steering angle and the linear velocity.
37 | """
38 |
39 | cfg: MidLevelCommandGeneratorCfg
40 | """The configuration of the command generator."""
41 |
42 | def __init__(self, cfg: MidLevelCommandGeneratorCfg, env: ManagerBasedRLEnv):
43 | """Initialize the command generator.
44 |
45 | Args:
46 | cfg (RLCommandGeneratorCfg): The configuration of the command generator.
47 | env (object): The environment.
48 | """
49 | super().__init__(cfg, env)
50 |
51 | # -- robot
52 | self.robot: Articulation = env.scene[cfg.robot_attr]
53 | # -- Simulation Context
54 | self.sim: SimulationContext = SimulationContext.instance()
55 | self.command_b: torch.Tensor = torch.zeros((self.num_envs, 4), device=self.device)
56 | # -- debug vis
57 | self._base_vel_goal_markers = None
58 | self._base_vel_markers = None
59 |
60 | # Rotation mark
61 | self.rotation_mark = False
62 | self.initialized = False
63 |
64 | def __str__(self) -> str:
65 | """Return a string representation of the command generator."""
66 | msg = "rlCommandGenerator:\n"
67 | msg += f"\tCommand dimension: {tuple(self.command.shape[1:])}\n"
68 | msg += f"\tLookahead distance: {self.cfg.lookAheadDistance}\n"
69 | return msg
70 |
71 | """
72 | Properties
73 | """
74 |
75 | @property
76 | def command(self) -> torch.Tensor:
77 | """The desired base velocity command in the base frame. Shape is (num_envs, 3)."""
78 | # print("twist: ", self.twist)
79 | return self.command_b
80 |
81 | """
82 | Operations.
83 | """
84 |
85 | def reset(self, env_ids: Sequence[int] | None = None) -> dict:
86 | """Reset the command generator.
87 |
88 | This function resets the command generator. It should be called whenever the environment is reset.
89 |
90 | Args:
91 | env_ids (Optional[Sequence[int]], optional): The list of environment IDs to reset. Defaults to None.
92 | """
93 | if env_ids is None:
94 | env_ids = ...
95 |
96 | self.command_b = torch.zeros((self.num_envs, 4), device=self.device)
97 | self.goal_reached = False
98 |
99 | return {}
100 |
101 | def compute(self, dt: float):
102 | """Compute the command.
103 |
104 | Paths as a tensor of shape (num_envs, N, 3) where N is number of poses on the path. The paths
105 | should be given in the base frame of the robot. Num_envs is equal to the number of robots spawned in all
106 | environments.
107 | """
108 | # get paths
109 | self.command_b[:,:2] = self._env.action_manager._terms['paths']._processed_navigation_velocity_actions.clone()[:,:2]
110 |
111 | return self.command_b
112 |
113 | """
114 | Implementation specific functions.
115 | """
116 |
117 | def _update_command(self):
118 | pass
119 |
120 | def _update_metrics(self):
121 | pass
122 |
123 | def _resample_command(self, env_ids: Sequence[int]):
124 | pass
125 |
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/commands/midlevel_command_generator_cfg.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab)
2 | # Author: Pascal Roth
3 | # All rights reserved.
4 | #
5 | # SPDX-License-Identifier: BSD-3-Clause
6 |
7 | # Copyright (c) 2022-2024, The lab Project Developers.
8 | # All rights reserved.
9 | #
10 | # SPDX-License-Identifier: BSD-3-Clause
11 |
12 | """Sub-module containing command generators for the velocity-based locomotion task."""
13 |
14 | import math
15 | from dataclasses import MISSING
16 |
17 | from omni.isaac.lab.managers import CommandTermCfg
18 | from omni.isaac.lab.utils.configclass import configclass
19 | from typing_extensions import Literal
20 |
21 | from .midlevel_command_generator import MidLevelCommandGenerator
22 |
23 |
24 | @configclass
25 | class MidLevelCommandGeneratorCfg(CommandTermCfg):
26 | class_type: MidLevelCommandGenerator = MidLevelCommandGenerator
27 | """Name of the command generator class."""
28 |
29 | robot_attr: str = MISSING
30 | """Name of the robot attribute from the environment."""
31 |
32 | path_frame: Literal["world", "robot"] = "world"
33 | """Frame in which the path is defined.
34 | - ``world``: the path is defined in the world frame. Also called ``odom``.
35 | - ``robot``: the path is defined in the robot frame. Also called ``base``.
36 | """
37 |
38 |
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/commands/path_follower_command_generator.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab)
2 | # Author: Pascal Roth
3 | # All rights reserved.
4 | #
5 | # SPDX-License-Identifier: BSD-3-Clause
6 |
7 | # Copyright (c) 2022-2024, The lab Project Developers.
8 | # All rights reserved.
9 | #
10 | # SPDX-License-Identifier: BSD-3-Clause
11 |
12 | """Sub-module containing command generators for the velocity-based locomotion task."""
13 | from __future__ import annotations
14 |
15 | import math
16 | from typing import TYPE_CHECKING, Sequence
17 |
18 | import omni.isaac.lab.utils.math as math_utils
19 | import torch
20 | from omni.isaac.lab.assets.articulation import Articulation
21 | from omni.isaac.lab.envs import ManagerBasedRLEnv
22 | from omni.isaac.lab.managers import CommandTerm
23 | from omni.isaac.lab.markers import VisualizationMarkers
24 | from omni.isaac.lab.markers.config import (
25 | BLUE_ARROW_X_MARKER_CFG,
26 | GREEN_ARROW_X_MARKER_CFG,
27 | )
28 | from omni.isaac.lab.sim import SimulationContext
29 |
30 | if TYPE_CHECKING:
31 | from .path_follower_command_generator_cfg import PathFollowerCommandGeneratorCfg
32 |
33 |
34 | class PathFollowerCommandGenerator(CommandTerm):
35 | r"""Command generator that generates a velocity command in SE(2) from a path given by a local planner.
36 |
37 | The command comprises of a linear velocity in x and y direction and an angular velocity around
38 | the z-axis. It is given in the robot's base frame.
39 |
40 | The path follower acts as a PD-controller that checks for the last point on the path within a lookahead distance
41 | and uses it to compute the steering angle and the linear velocity.
42 | """
43 |
44 | cfg: PathFollowerCommandGeneratorCfg
45 | """The configuration of the command generator."""
46 |
47 | def __init__(self, cfg: PathFollowerCommandGeneratorCfg, env: ManagerBasedRLEnv):
48 | """Initialize the command generator.
49 |
50 | Args:
51 | cfg (PathFollowerCommandGeneratorCfg): The configuration of the command generator.
52 | env (object): The environment.
53 | """
54 | super().__init__(cfg, env)
55 | # -- robot
56 | self.robot: Articulation = env.scene[cfg.robot_attr]
57 | # -- Simulation Context
58 | self.sim: SimulationContext = SimulationContext.instance()
59 | # -- buffers
60 | self.vehicleSpeed: torch.Tensor = torch.zeros(self.num_envs, device=self.device)
61 | self.switch_time: torch.Tensor = torch.zeros(self.num_envs, device=self.device)
62 | self.vehicleYawRate: torch.Tensor = torch.zeros(self.num_envs, device=self.device)
63 | self.navigation_forward: torch.Tensor = torch.ones(self.num_envs, device=self.device, dtype=torch.bool)
64 | self.twist: torch.Tensor = torch.zeros((self.num_envs, 3), device=self.device)
65 | # -- debug vis
66 | self._base_vel_goal_markers = None
67 | self._base_vel_markers = None
68 |
69 | # Rotation mark
70 | self.rotation_mark = False
71 | self.initialized = False
72 | self.goal_reached = False
73 |
74 | def __str__(self) -> str:
75 | """Return a string representation of the command generator."""
76 | msg = "PathFollowerCommandGenerator:\n"
77 | msg += f"\tCommand dimension: {tuple(self.command.shape[1:])}\n"
78 | msg += f"\tLookahead distance: {self.cfg.lookAheadDistance}\n"
79 | return msg
80 |
81 | """
82 | Properties
83 | """
84 |
85 | @property
86 | def command(self) -> torch.Tensor:
87 | """The desired base velocity command in the base frame. Shape is (num_envs, 3)."""
88 | return self.twist
89 |
90 | """
91 | Operations.
92 | """
93 |
94 | def reset(self, env_ids: Sequence[int] | None = None) -> dict:
95 | """Reset the command generator.
96 |
97 | This function resets the command generator. It should be called whenever the environment is reset.
98 |
99 | Args:
100 | env_ids (Optional[Sequence[int]], optional): The list of environment IDs to reset. Defaults to None.
101 | """
102 | if env_ids is None:
103 | env_ids = ...
104 |
105 | self.vehicleSpeed = torch.zeros(self.num_envs, device=self.device)
106 | self.switch_time = torch.zeros(self.num_envs, device=self.device)
107 | self.vehicleYawRate = torch.zeros(self.num_envs, device=self.device)
108 | self.navigation_forward = torch.ones(self.num_envs, device=self.device, dtype=torch.bool)
109 | self.twist = torch.zeros((self.num_envs, 3), device=self.device)
110 | self.goal_reached = False
111 |
112 | return {}
113 |
114 | def compute(self, dt: float):
115 | """Compute the command.
116 |
117 | Paths as a tensor of shape (num_envs, N, 3) where N is number of poses on the path. The paths
118 | should be given in the base frame of the robot. Num_envs is equal to the number of robots spawned in all
119 | environments.
120 | """
121 | # get paths
122 | paths = self._env.action_manager._terms['paths']._processed_navigation_velocity_actions.clone()
123 | # get number of pases of the paths
124 | num_envs, N = paths.shape
125 | assert N > 0, "PathFollowerCommandGenerator: paths must have at least one poses."
126 | # get the current simulation time
127 | curr_time = self.sim.current_time
128 | # define current maxSpeed for the velocities
129 | max_speed = torch.ones(num_envs, device=self.device) * self.cfg.maxSpeed
130 |
131 | # # transform path in base/ robot frame if given in world frame
132 | # if self.cfg.path_frame == "world":
133 | # paths = math_utils.quat_apply(
134 | # math_utils.quat_inv(self.robot.data.root_quat_w[:, None, :].repeat(1, N, 1)),
135 | # paths - self.robot.data.root_pos_w[:, None, :],
136 | # )
137 | self.paths_diff_global = paths[:,:2] - self.robot.data.root_pos_w[:, :2]
138 |
139 | if self.initialized and not self.goal_reached:
140 | if not self.rotation_mark:
141 | # if abs(self.paths_diff_global[0,0,0]) < 0.5:
142 | # self.twist[:, 0] = 0.0
143 | # else:
144 | self.twist[:, 0] = min(max(self.paths_diff_global[0,0], -0.5), 0.5)
145 | # if abs(self.paths_diff_global[0,0,1]) < 2.0:
146 | # self.twist[:, 1] = 0.0
147 | # else:
148 | self.twist[:, 1] = min(max(self.paths_diff_global[0,1], -0.3), 0.3)
149 | self.twist[:, 2] = min(max(0.01*paths[0,2], -0.2), 0.2)
150 | # TODO: add yaw rotation mechanism
151 | else:
152 | self.twist[:,0] = 0.0
153 | self.twist[:,1] = 0.0
154 | self.twist[:,2] = 0.5
155 | else:
156 | self.twist[:,0] = 0.0
157 | self.twist[:,1] = 0.0
158 | self.twist[:,2] = 0.0
159 | if (torch.linalg.norm(self.paths_diff_global[0,:2], dim=-1))<3.0 and self.initialized:
160 | self.goal_reached = True
161 | # print("goal_reached: ", self.goal_reached, " rotation_mark: ", self.rotation_mark, " twist: ", self.twist[:,:3], " norm: ", torch.linalg.norm(self.paths_diff_global[0,0,:2], dim=-1), " initialized: ", self.initialized)
162 |
163 | return self.twist
164 |
165 | """
166 | Implementation specific functions.
167 | """
168 |
169 | def _update_command(self):
170 | pass
171 |
172 | def _update_metrics(self):
173 | pass
174 |
175 | def _resample_command(self, env_ids: Sequence[int]):
176 | pass
177 |
178 | def _set_debug_vis_impl(self, debug_vis: bool):
179 | # set visibility of markers
180 | # note: parent only deals with callbacks. not their visibility
181 | if debug_vis:
182 | # create markers if necessary for the first tome
183 | if not hasattr(self, "base_vel_goal_visualizer"):
184 | # -- goal
185 | marker_cfg = GREEN_ARROW_X_MARKER_CFG.copy()
186 | marker_cfg.prim_path = "/Visuals/Command/velocity_goal"
187 | marker_cfg.markers["arrow"].scale = (0.5, 0.5, 0.5)
188 | self.base_vel_goal_visualizer = VisualizationMarkers(marker_cfg)
189 | # -- current
190 | marker_cfg = BLUE_ARROW_X_MARKER_CFG.copy()
191 | marker_cfg.prim_path = "/Visuals/Command/velocity_current"
192 | marker_cfg.markers["arrow"].scale = (0.5, 0.5, 0.5)
193 | self.base_vel_visualizer = VisualizationMarkers(marker_cfg)
194 | # set their visibility to true
195 | self.base_vel_goal_visualizer.set_visibility(True)
196 | self.base_vel_visualizer.set_visibility(True)
197 | else:
198 | if hasattr(self, "base_vel_goal_visualizer"):
199 | self.base_vel_goal_visualizer.set_visibility(False)
200 | self.base_vel_visualizer.set_visibility(False)
201 |
202 | def _debug_vis_callback(self, event):
203 | # get marker location
204 | # -- base state
205 | base_pos_w = self.robot.data.root_pos_w.clone()
206 | base_pos_w[:, 2] += 0.5
207 | # -- resolve the scales and quaternions
208 | vel_des_arrow_scale, vel_des_arrow_quat = self._resolve_xy_velocity_to_arrow(self.command[:, :2])
209 | vel_arrow_scale, vel_arrow_quat = self._resolve_xy_velocity_to_arrow(self.robot.data.root_lin_vel_b[:, :2])
210 | # display markers
211 | self.base_vel_goal_visualizer.visualize(base_pos_w, vel_des_arrow_quat, vel_des_arrow_scale)
212 | self.base_vel_visualizer.visualize(base_pos_w, vel_arrow_quat, vel_arrow_scale)
213 |
214 | """
215 | Internal helpers.
216 | """
217 |
218 | def _resolve_xy_velocity_to_arrow(self, xy_velocity: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
219 | """Converts the XY base velocity command to arrow direction rotation."""
220 | # obtain default scale of the marker
221 | default_scale = self.base_vel_goal_visualizer.cfg.markers["arrow"].scale
222 | # arrow-scale
223 | arrow_scale = torch.tensor(default_scale, device=self.device).repeat(xy_velocity.shape[0], 1)
224 | arrow_scale[:, 0] *= torch.linalg.norm(xy_velocity, dim=1)
225 | # arrow-direction
226 | heading_angle = torch.atan2(xy_velocity[:, 1], xy_velocity[:, 0])
227 | zeros = torch.zeros_like(heading_angle)
228 | arrow_quat = math_utils.quat_from_euler_xyz(zeros, zeros, heading_angle)
229 |
230 | return arrow_scale, arrow_quat
231 |
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/commands/path_follower_command_generator_cfg.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab)
2 | # Author: Pascal Roth
3 | # All rights reserved.
4 | #
5 | # SPDX-License-Identifier: BSD-3-Clause
6 |
7 | # Copyright (c) 2022-2024, The lab Project Developers.
8 | # All rights reserved.
9 | #
10 | # SPDX-License-Identifier: BSD-3-Clause
11 |
12 | """Sub-module containing command generators for the velocity-based locomotion task."""
13 |
14 | import math
15 | from dataclasses import MISSING
16 |
17 | from omni.isaac.lab.managers import CommandTermCfg
18 | from omni.isaac.lab.utils.configclass import configclass
19 | from typing_extensions import Literal
20 |
21 | from .path_follower_command_generator import PathFollowerCommandGenerator
22 |
23 |
24 | @configclass
25 | class PathFollowerCommandGeneratorCfg(CommandTermCfg):
26 | class_type: PathFollowerCommandGenerator = PathFollowerCommandGenerator
27 | """Name of the command generator class."""
28 |
29 | robot_attr: str = MISSING
30 | """Name of the robot attribute from the environment."""
31 |
32 | path_frame: Literal["world", "robot"] = "world"
33 | """Frame in which the path is defined.
34 | - ``world``: the path is defined in the world frame. Also called ``odom``.
35 | - ``robot``: the path is defined in the robot frame. Also called ``base``.
36 | """
37 |
38 | lookAheadDistance: float = MISSING
39 | """The lookahead distance for the path follower."""
40 | two_way_drive: bool = False
41 | """Allow robot to use reverse gear."""
42 | switch_time_threshold: float = 1.0
43 | """Time threshold to switch between the forward and backward drive."""
44 | maxSpeed: float = 0.5
45 | """Maximum speed of the robot."""
46 | maxAccel: float = 2.5 / 100.0 # 2.5 / 100
47 | """Maximum acceleration of the robot."""
48 | joyYaw: float = 1.0
49 | """TODO: add description"""
50 | yawRateGain: float = 7.0 # 3.5
51 | """Gain for the yaw rate."""
52 | stopYawRateGain: float = 7.0 # 3.5
53 | """"""
54 | maxYawRate: float = 90.0 * math.pi / 360
55 | dirDiffThre: float = 0.7
56 | stopDisThre: float = 0.2
57 | slowDwnDisThre: float = 0.3
58 | slowRate1: float = 0.25
59 | slowRate2: float = 0.5
60 | noRotAtGoal: bool = True
61 | autonomyMode: bool = False
62 |
63 | dynamic_lookahead: bool = False
64 | min_points_within_lookahead: int = 3
65 |
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/commands/path_follower_command_generator_gpt.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab)
2 | # Author: Pascal Roth
3 | # All rights reserved.
4 | #
5 | # SPDX-License-Identifier: BSD-3-Clause
6 |
7 | # Copyright (c) 2022-2024, The lab Project Developers.
8 | # All rights reserved.
9 | #
10 | # SPDX-License-Identifier: BSD-3-Clause
11 |
12 | """Sub-module containing command generators for the velocity-based locomotion task."""
13 | from __future__ import annotations
14 |
15 | import math
16 | from typing import TYPE_CHECKING, Sequence
17 |
18 | import omni.isaac.lab.utils.math as math_utils
19 | import torch
20 | from omni.isaac.lab.assets.articulation import Articulation
21 | from omni.isaac.lab.envs import ManagerBasedRLEnv
22 | from omni.isaac.lab.managers import CommandTerm
23 | from omni.isaac.lab.markers import VisualizationMarkers
24 | from omni.isaac.lab.markers.config import (
25 | BLUE_ARROW_X_MARKER_CFG,
26 | GREEN_ARROW_X_MARKER_CFG,
27 | )
28 | from omni.isaac.lab.sim import SimulationContext
29 |
30 | if TYPE_CHECKING:
31 | from .path_follower_command_generator_gpt_cfg import PathFollowerCommandGeneratorGPTCfg
32 |
33 |
34 | class PathFollowerCommandGeneratorGPT(CommandTerm):
35 | r"""Command generator that generates a velocity command in SE(2) from a path given by a local planner.
36 |
37 | The command comprises of a linear velocity in x and y direction and an angular velocity around
38 | the z-axis. It is given in the robot's base frame.
39 |
40 | The path follower acts as a PD-controller that checks for the last point on the path within a lookahead distance
41 | and uses it to compute the steering angle and the linear velocity.
42 | """
43 |
44 | cfg: PathFollowerCommandGeneratorGPTCfg
45 | """The configuration of the command generator."""
46 |
47 | def __init__(self, cfg: PathFollowerCommandGeneratorGPTCfg, env: ManagerBasedRLEnv):
48 | """Initialize the command generator.
49 |
50 | Args:
51 | cfg (PathFollowerCommandGeneratorCfg): The configuration of the command generator.
52 | env (object): The environment.
53 | """
54 | super().__init__(cfg, env)
55 | # -- robot
56 | self.robot: Articulation = env.scene[cfg.robot_attr]
57 | # -- Simulation Context
58 | self.sim: SimulationContext = SimulationContext.instance()
59 | # -- buffers
60 | self.twist: torch.Tensor = torch.zeros((self.num_envs, 3), device=self.device)
61 | # -- debug vis
62 | self._base_vel_goal_markers = None
63 | self._base_vel_markers = None
64 |
65 | # Rotation mark
66 | self.rotation_mark = False
67 | self.initialized = False
68 | self.goal_reached = False
69 |
70 | def __str__(self) -> str:
71 | """Return a string representation of the command generator."""
72 | msg = "PathFollowerCommandGenerator:\n"
73 | msg += f"\tCommand dimension: {tuple(self.command.shape[1:])}\n"
74 | return msg
75 |
76 | """
77 | Properties
78 | """
79 |
80 | @property
81 | def command(self) -> torch.Tensor:
82 | """The desired base velocity command in the base frame. Shape is (num_envs, 3)."""
83 | return self.twist
84 |
85 | """
86 | Operations.
87 | """
88 |
89 | def reset(self, env_ids: Sequence[int] | None = None) -> dict:
90 | """Reset the command generator.
91 |
92 | This function resets the command generator. It should be called whenever the environment is reset.
93 |
94 | Args:
95 | env_ids (Optional[Sequence[int]], optional): The list of environment IDs to reset. Defaults to None.
96 | """
97 | if env_ids is None:
98 | env_ids = ...
99 |
100 | self.twist = torch.zeros((self.num_envs, 3), device=self.device)
101 | self.goal_reached = False
102 |
103 | return {}
104 |
105 | def compute(self, dt: float):
106 | """Compute the command.
107 |
108 | Paths as a tensor of shape (num_envs, N, 3) where N is number of poses on the path. The paths
109 | should be given in the base frame of the robot. Num_envs is equal to the number of robots spawned in all
110 | environments.
111 | """
112 | # get paths
113 | goals = self._env.action_manager._terms['vlm_actions_gpt']._processed_command_velocity_actions.clone()
114 | # get number of pases of the paths
115 | num_envs, N = goals.shape
116 | assert N > 0, "PathFollowerCommandGenerator: paths must have at least one poses."
117 | # get the current simulation time
118 | curr_time = self.sim.current_time
119 | # define current maxSpeed for the velocities
120 | max_speed = torch.ones(num_envs, device=self.device) * self.cfg.maxSpeed
121 |
122 | # # transform path in base/ robot frame if given in world frame
123 | # if self.cfg.path_frame == "world":
124 | # paths = math_utils.quat_apply(
125 | # math_utils.quat_inv(self.robot.data.root_quat_w[:, None, :].repeat(1, N, 1)),
126 | # paths - self.robot.data.root_pos_w[:, None, :],
127 | # )
128 | # self.paths_diff_global = goals[:,:2] #- self.robot.data.root_pos_w[:, :2]
129 | # yaw_angle_
130 |
131 | # if self.initialized and not self.goal_reached:
132 | # if not self.rotation_mark:
133 | # # if abs(self.paths_diff_global[0,0,0]) < 0.5:
134 | # # self.twist[:, 0] = 0.0
135 | # # else:
136 | # self.twist[:, 0] = min(max(self.paths_diff_global[0,0], -0.5), 0.5)
137 | # # if abs(self.paths_diff_global[0,0,1]) < 2.0:
138 | # # self.twist[:, 1] = 0.0
139 | # # else:
140 | # self.twist[:, 1] = 0.0#min(max(self.paths_diff_global[0,1], -0.3), 0.3)
141 | # self.twist[:, 2] = min(max(0.01*goals[0,2], -0.2), 0.2)
142 | # # TODO: add yaw rotation mechanism
143 | # else:
144 | # self.twist[:,0] = 0.0
145 | # self.twist[:,1] = 0.0
146 | # self.twist[:,2] = 0.5
147 | # else:
148 | # self.twist[:,0] = 0.0
149 | # self.twist[:,1] = 0.0
150 | # self.twist[:,2] = 0.0
151 | self.twist[0,:3] = goals[0,:3]
152 | # if (torch.linalg.norm(self.paths_diff_global[0,:2], dim=-1))<3.0 and self.initialized:
153 | # self.goal_reached = True
154 | # print("goal_reached: ", self.goal_reached, " rotation_mark: ", self.rotation_mark, " twist: ", self.twist[:,:3], " norm: ", torch.linalg.norm(self.paths_diff_global[0,0,:2], dim=-1), " initialized: ", self.initialized)
155 |
156 | return self.twist
157 |
158 | """
159 | Implementation specific functions.
160 | """
161 |
162 | def _update_command(self):
163 | pass
164 |
165 | def _update_metrics(self):
166 | pass
167 |
168 | def _resample_command(self, env_ids: Sequence[int]):
169 | pass
170 |
171 | def _set_debug_vis_impl(self, debug_vis: bool):
172 | # set visibility of markers
173 | # note: parent only deals with callbacks. not their visibility
174 | if debug_vis:
175 | # create markers if necessary for the first tome
176 | if not hasattr(self, "base_vel_goal_visualizer"):
177 | # -- goal
178 | marker_cfg = GREEN_ARROW_X_MARKER_CFG.copy()
179 | marker_cfg.prim_path = "/Visuals/Command/velocity_goal"
180 | marker_cfg.markers["arrow"].scale = (0.5, 0.5, 0.5)
181 | self.base_vel_goal_visualizer = VisualizationMarkers(marker_cfg)
182 | # -- current
183 | marker_cfg = BLUE_ARROW_X_MARKER_CFG.copy()
184 | marker_cfg.prim_path = "/Visuals/Command/velocity_current"
185 | marker_cfg.markers["arrow"].scale = (0.5, 0.5, 0.5)
186 | self.base_vel_visualizer = VisualizationMarkers(marker_cfg)
187 | # set their visibility to true
188 | self.base_vel_goal_visualizer.set_visibility(True)
189 | self.base_vel_visualizer.set_visibility(True)
190 | else:
191 | if hasattr(self, "base_vel_goal_visualizer"):
192 | self.base_vel_goal_visualizer.set_visibility(False)
193 | self.base_vel_visualizer.set_visibility(False)
194 |
195 | def _debug_vis_callback(self, event):
196 | # get marker location
197 | # -- base state
198 | base_pos_w = self.robot.data.root_pos_w.clone()
199 | base_pos_w[:, 2] += 0.5
200 | # -- resolve the scales and quaternions
201 | vel_des_arrow_scale, vel_des_arrow_quat = self._resolve_xy_velocity_to_arrow(self.command[:, :2])
202 | vel_arrow_scale, vel_arrow_quat = self._resolve_xy_velocity_to_arrow(self.robot.data.root_lin_vel_b[:, :2])
203 | # display markers
204 | self.base_vel_goal_visualizer.visualize(base_pos_w, vel_des_arrow_quat, vel_des_arrow_scale)
205 | self.base_vel_visualizer.visualize(base_pos_w, vel_arrow_quat, vel_arrow_scale)
206 |
207 | """
208 | Internal helpers.
209 | """
210 |
211 | def _resolve_xy_velocity_to_arrow(self, xy_velocity: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
212 | """Converts the XY base velocity command to arrow direction rotation."""
213 | # obtain default scale of the marker
214 | default_scale = self.base_vel_goal_visualizer.cfg.markers["arrow"].scale
215 | # arrow-scale
216 | arrow_scale = torch.tensor(default_scale, device=self.device).repeat(xy_velocity.shape[0], 1)
217 | arrow_scale[:, 0] *= torch.linalg.norm(xy_velocity, dim=1)
218 | # arrow-direction
219 | heading_angle = torch.atan2(xy_velocity[:, 1], xy_velocity[:, 0])
220 | zeros = torch.zeros_like(heading_angle)
221 | arrow_quat = math_utils.quat_from_euler_xyz(zeros, zeros, heading_angle)
222 |
223 | return arrow_scale, arrow_quat
224 |
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/commands/path_follower_command_generator_gpt_cfg.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab)
2 | # Author: Pascal Roth
3 | # All rights reserved.
4 | #
5 | # SPDX-License-Identifier: BSD-3-Clause
6 |
7 | # Copyright (c) 2022-2024, The lab Project Developers.
8 | # All rights reserved.
9 | #
10 | # SPDX-License-Identifier: BSD-3-Clause
11 |
12 | """Sub-module containing command generators for the velocity-based locomotion task."""
13 |
14 | import math
15 | from dataclasses import MISSING
16 |
17 | from omni.isaac.lab.managers import CommandTermCfg
18 | from omni.isaac.lab.utils.configclass import configclass
19 | from typing_extensions import Literal
20 |
21 | from .path_follower_command_generator_gpt import PathFollowerCommandGeneratorGPT
22 |
23 |
24 | @configclass
25 | class PathFollowerCommandGeneratorGPTCfg(CommandTermCfg):
26 | class_type: PathFollowerCommandGeneratorGPT = PathFollowerCommandGeneratorGPT
27 | """Name of the command generator class."""
28 |
29 | robot_attr: str = MISSING
30 | """Name of the robot attribute from the environment."""
31 |
32 | path_frame: Literal["world", "robot"] = "world"
33 | """Frame in which the path is defined.
34 | - ``world``: the path is defined in the world frame. Also called ``odom``.
35 | - ``robot``: the path is defined in the robot frame. Also called ``base``.
36 | """
37 |
38 | lookAheadDistance: float = MISSING
39 | """The lookahead distance for the path follower."""
40 | two_way_drive: bool = False
41 | """Allow robot to use reverse gear."""
42 | switch_time_threshold: float = 1.0
43 | """Time threshold to switch between the forward and backward drive."""
44 | maxSpeed: float = 0.5
45 | """Maximum speed of the robot."""
46 | maxAccel: float = 2.5 / 100.0 # 2.5 / 100
47 | """Maximum acceleration of the robot."""
48 | joyYaw: float = 1.0
49 | """TODO: add description"""
50 | yawRateGain: float = 7.0 # 3.5
51 | """Gain for the yaw rate."""
52 | stopYawRateGain: float = 7.0 # 3.5
53 | """"""
54 | maxYawRate: float = 90.0 * math.pi / 360
55 | dirDiffThre: float = 0.7
56 | stopDisThre: float = 0.2
57 | slowDwnDisThre: float = 0.3
58 | slowRate1: float = 0.25
59 | slowRate2: float = 0.5
60 | noRotAtGoal: bool = True
61 | autonomyMode: bool = False
62 |
63 | dynamic_lookahead: bool = False
64 | min_points_within_lookahead: int = 3
65 |
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/commands/rl_command_generator.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab)
2 | # Author: Pascal Roth
3 | # All rights reserved.
4 | #
5 | # SPDX-License-Identifier: BSD-3-Clause
6 |
7 | # Copyright (c) 2022-2024, The lab Project Developers.
8 | # All rights reserved.
9 | #
10 | # SPDX-License-Identifier: BSD-3-Clause
11 |
12 | """Sub-module containing command generators for the velocity-based locomotion task."""
13 | from __future__ import annotations
14 |
15 | import math
16 | from typing import TYPE_CHECKING, Sequence
17 |
18 | import omni.isaac.lab.utils.math as math_utils
19 | import torch
20 | from omni.isaac.lab.assets.articulation import Articulation
21 | from omni.isaac.lab.envs import ManagerBasedRLEnv
22 | from omni.isaac.lab.managers import CommandTerm
23 | from omni.isaac.lab.markers import VisualizationMarkers
24 | from omni.isaac.lab.markers.config import (
25 | BLUE_ARROW_X_MARKER_CFG,
26 | GREEN_ARROW_X_MARKER_CFG,
27 | CUBOID_MARKER_CFG,
28 | )
29 | from omni.isaac.lab.sim import SimulationContext
30 |
31 | if TYPE_CHECKING:
32 | from .rl_command_generator_cfg import RLCommandGeneratorCfg
33 |
34 |
35 | class RLCommandGenerator(CommandTerm):
36 | r"""Command generator that generates a velocity command in SE(2) from a path given by a local planner.
37 |
38 | The command comprises of a linear velocity in x and y direction and an angular velocity around
39 | the z-axis. It is given in the robot's base frame.
40 |
41 | The path follower acts as a PD-controller that checks for the last point on the path within a lookahead distance
42 | and uses it to compute the steering angle and the linear velocity.
43 | """
44 |
45 | cfg: RLCommandGeneratorCfg
46 | """The configuration of the command generator."""
47 |
48 | def __init__(self, cfg: RLCommandGeneratorCfg, env: ManagerBasedRLEnv):
49 | """Initialize the command generator.
50 |
51 | Args:
52 | cfg (RLCommandGeneratorCfg): The configuration of the command generator.
53 | env (object): The environment.
54 | """
55 | super().__init__(cfg, env)
56 | # -- robot
57 | self.robot: Articulation = env.scene[cfg.robot_attr]
58 | # -- Simulation Context
59 | self.sim: SimulationContext = SimulationContext.instance()
60 | self.twist: torch.Tensor = torch.zeros((self.num_envs, 3), device=self.device)
61 | # -- debug vis
62 | self._base_vel_goal_markers = None
63 | self._base_vel_markers = None
64 |
65 | # Rotation mark
66 | self.rotation_mark = False
67 | self.initialized = False
68 | self.goal_reached = False
69 | self.identity_quat = torch.tensor([1.0, 0.0, 0.0, 0.0], device=self._env.device).repeat(self._env.num_envs, 1)
70 |
71 | def __str__(self) -> str:
72 | """Return a string representation of the command generator."""
73 | msg = "rlCommandGenerator:\n"
74 | msg += f"\tCommand dimension: {tuple(self.command.shape[1:])}\n"
75 | msg += f"\tLookahead distance: {self.cfg.lookAheadDistance}\n"
76 | return msg
77 |
78 | """
79 | Properties
80 | """
81 |
82 | @property
83 | def command(self) -> torch.Tensor:
84 | """The desired base velocity command in the base frame. Shape is (num_envs, 3)."""
85 | # print("twist: ", self.twist)
86 | return self.twist
87 |
88 | """
89 | Operations.
90 | """
91 |
92 | def reset(self, env_ids: Sequence[int] | None = None) -> dict:
93 | """Reset the command generator.
94 |
95 | This function resets the command generator. It should be called whenever the environment is reset.
96 |
97 | Args:
98 | env_ids (Optional[Sequence[int]], optional): The list of environment IDs to reset. Defaults to None.
99 | """
100 | if env_ids is None:
101 | env_ids = ...
102 |
103 | self.twist = torch.zeros((self.num_envs, 3), device=self.device)
104 | self.goal_reached = False
105 |
106 | return {}
107 |
108 | def compute(self, dt: float):
109 | """Compute the command.
110 |
111 | Paths as a tensor of shape (num_envs, N, 3) where N is number of poses on the path. The paths
112 | should be given in the base frame of the robot. Num_envs is equal to the number of robots spawned in all
113 | environments.
114 | """
115 | # get paths
116 | self.twist = self._env.action_manager._terms['paths']._processed_navigation_velocity_actions.clone()
117 | self.twist[:, 0] = torch.clip(self.twist[:, 0], 0.0, 0.5)
118 | self.twist[:,1] = 0.0
119 | self.twist[:,2] = torch.clip(self.twist[:,2], -math.pi, math.pi)
120 |
121 | return self.twist
122 |
123 | """
124 | Implementation specific functions.
125 | """
126 |
127 | def _update_command(self):
128 | pass
129 |
130 | def _update_metrics(self):
131 | pass
132 |
133 | def _resample_command(self, env_ids: Sequence[int]):
134 | pass
135 |
136 | def _set_debug_vis_impl(self, debug_vis: bool):
137 | # set visibility of markers
138 | # note: parent only deals with callbacks. not their visibility
139 | if debug_vis:
140 | # create markers if necessary for the first tome
141 | if not hasattr(self, "base_vel_goal_visualizer"):
142 | # -- goal
143 | marker_cfg = GREEN_ARROW_X_MARKER_CFG.copy()
144 | marker_cfg.prim_path = "/Visuals/Command/velocity_goal"
145 | marker_cfg.markers["arrow"].scale = (0.5, 0.5, 0.5)
146 | self.base_vel_goal_visualizer = VisualizationMarkers(marker_cfg)
147 | # -- current
148 | marker_cfg = BLUE_ARROW_X_MARKER_CFG.copy()
149 | marker_cfg.prim_path = "/Visuals/Command/velocity_current"
150 | marker_cfg.markers["arrow"].scale = (0.5, 0.5, 0.5)
151 | self.base_vel_visualizer = VisualizationMarkers(marker_cfg)
152 | # -- goal command
153 | marker_cfg = CUBOID_MARKER_CFG.copy()
154 | marker_cfg.prim_path = "/Visuals/Command/pos_goal_command"
155 | marker_cfg.markers["cuboid"].scale = (0.5, 0.5, 0.5)
156 | self.base_vel_goal_command_visualizer = VisualizationMarkers(marker_cfg)
157 | # set their visibility to true
158 | self.base_vel_goal_visualizer.set_visibility(True)
159 | self.base_vel_visualizer.set_visibility(True)
160 | self.base_vel_goal_command_visualizer.set_visibility(True)
161 | else:
162 | if hasattr(self, "base_vel_goal_visualizer"):
163 | self.base_vel_goal_visualizer.set_visibility(False)
164 | self.base_vel_visualizer.set_visibility(False)
165 | self.base_vel_goal_command_visualizer.set_visibility(False)
166 |
167 | def _debug_vis_callback(self, event):
168 | # get marker location
169 | # -- base state
170 | base_pos_w = self.robot.data.root_pos_w.clone()
171 | base_pos_w[:, 2] += 0.5
172 | base_quat_w = self.robot.data.root_quat_w.clone()
173 |
174 | # -- resolve the scales and quaternions
175 | vel_des_arrow_scale, vel_des_arrow_quat = self._resolve_xy_velocity_to_arrow(self.command[:, :2])
176 | vel_arrow_scale, vel_arrow_quat = self._resolve_xy_velocity_to_arrow(self.robot.data.root_lin_vel_b[:, :2])
177 | # display markers
178 | self.base_vel_goal_visualizer.visualize(base_pos_w, vel_des_arrow_quat, vel_des_arrow_scale)
179 | self.base_vel_visualizer.visualize(base_pos_w, vel_arrow_quat, vel_arrow_scale)
180 |
181 | pos_command_w = self._env.command_manager._terms['goal_command'].pos_command_w.clone()
182 | default_scale = self.base_vel_goal_visualizer.cfg.markers["arrow"].scale
183 | larger_scale = 2.0*torch.tensor(default_scale, device=self.device).repeat(pos_command_w.shape[0], 1)
184 | self.base_vel_goal_command_visualizer.visualize(pos_command_w, self.identity_quat,larger_scale)
185 |
186 | """
187 | Internal helpers.
188 | """
189 |
190 | def _resolve_xy_velocity_to_arrow(self, xy_velocity: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
191 | """Converts the XY base velocity command to arrow direction rotation."""
192 | # obtain default scale of the marker
193 | default_scale = self.base_vel_goal_visualizer.cfg.markers["arrow"].scale
194 | # arrow-scale
195 | arrow_scale = torch.tensor(default_scale, device=self.device).repeat(xy_velocity.shape[0], 1)
196 | arrow_scale[:, 0] *= torch.linalg.norm(xy_velocity, dim=1)
197 | # arrow-direction
198 | heading_angle = torch.atan2(xy_velocity[:, 1], xy_velocity[:, 0])
199 | zeros = torch.zeros_like(heading_angle)
200 | arrow_quat = math_utils.quat_from_euler_xyz(zeros, zeros, heading_angle)
201 |
202 | return arrow_scale, arrow_quat
203 |
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/commands/rl_command_generator_cfg.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab)
2 | # Author: Pascal Roth
3 | # All rights reserved.
4 | #
5 | # SPDX-License-Identifier: BSD-3-Clause
6 |
7 | # Copyright (c) 2022-2024, The lab Project Developers.
8 | # All rights reserved.
9 | #
10 | # SPDX-License-Identifier: BSD-3-Clause
11 |
12 | """Sub-module containing command generators for the velocity-based locomotion task."""
13 |
14 | import math
15 | from dataclasses import MISSING
16 |
17 | from omni.isaac.lab.managers import CommandTermCfg
18 | from omni.isaac.lab.utils.configclass import configclass
19 | from typing_extensions import Literal
20 |
21 | from .rl_command_generator import RLCommandGenerator
22 |
23 |
24 | @configclass
25 | class RLCommandGeneratorCfg(CommandTermCfg):
26 | class_type: RLCommandGenerator = RLCommandGenerator
27 | """Name of the command generator class."""
28 |
29 | robot_attr: str = MISSING
30 | """Name of the robot attribute from the environment."""
31 |
32 | path_frame: Literal["world", "robot"] = "world"
33 | """Frame in which the path is defined.
34 | - ``world``: the path is defined in the world frame. Also called ``odom``.
35 | - ``robot``: the path is defined in the robot frame. Also called ``base``.
36 | """
37 |
38 |
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/commands/robot_vel_command_generator.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab)
2 | # Author: Pascal Roth
3 | # All rights reserved.
4 | #
5 | # SPDX-License-Identifier: BSD-3-Clause
6 |
7 | # Copyright (c) 2022-2024, The lab Project Developers.
8 | # All rights reserved.
9 | #
10 | # SPDX-License-Identifier: BSD-3-Clause
11 |
12 | """Sub-module containing command generators for the velocity-based locomotion task."""
13 | from __future__ import annotations
14 |
15 | import math
16 | from typing import TYPE_CHECKING, Sequence
17 |
18 | import omni.isaac.lab.utils.math as math_utils
19 | import torch
20 | from omni.isaac.lab.assets.articulation import Articulation
21 | from omni.isaac.lab.envs import ManagerBasedRLEnv
22 | from omni.isaac.lab.managers import CommandTerm
23 | from omni.isaac.lab.sim import SimulationContext
24 |
25 | if TYPE_CHECKING:
26 | from .robot_vel_command_generator_cfg import RobotVelCommandGeneratorCfg
27 |
28 |
29 | class RobotVelCommandGenerator(CommandTerm):
30 | r"""Command generator that generates a velocity command in SE(2) from a path given by a local planner.
31 |
32 | The command comprises of a linear velocity in x and y direction and an angular velocity around
33 | the z-axis. It is given in the robot's base frame.
34 |
35 | The path follower acts as a PD-controller that checks for the last point on the path within a lookahead distance
36 | and uses it to compute the steering angle and the linear velocity.
37 | """
38 |
39 | cfg: RobotVelCommandGeneratorCfg
40 | """The configuration of the command generator."""
41 |
42 | def __init__(self, cfg: RobotVelCommandGeneratorCfg, env: ManagerBasedRLEnv):
43 | """Initialize the command generator.
44 |
45 | Args:
46 | cfg (RLCommandGeneratorCfg): The configuration of the command generator.
47 | env (object): The environment.
48 | """
49 | super().__init__(cfg, env)
50 |
51 | # -- robot
52 | self.robot: Articulation = env.scene[cfg.robot_attr]
53 | # -- Simulation Context
54 | self.sim: SimulationContext = SimulationContext.instance()
55 | self.command_b: torch.Tensor = torch.zeros((self.num_envs, 3), device=self.device)
56 | # -- debug vis
57 | self._base_vel_goal_markers = None
58 | self._base_vel_markers = None
59 |
60 | # Rotation mark
61 | self.rotation_mark = False
62 | self.initialized = False
63 |
64 | def __str__(self) -> str:
65 | """Return a string representation of the command generator."""
66 | msg = "rlCommandGenerator:\n"
67 | msg += f"\tCommand dimension: {tuple(self.command.shape[1:])}\n"
68 | msg += f"\tLookahead distance: {self.cfg.lookAheadDistance}\n"
69 | return msg
70 |
71 | """
72 | Properties
73 | """
74 |
75 | @property
76 | def command(self) -> torch.Tensor:
77 | """The desired base velocity command in the base frame. Shape is (num_envs, 3)."""
78 | # print("twist: ", self.twist)
79 | return self.command_b
80 |
81 | """
82 | Operations.
83 | """
84 |
85 | def reset(self, env_ids: Sequence[int] | None = None) -> dict:
86 | """Reset the command generator.
87 |
88 | This function resets the command generator. It should be called whenever the environment is reset.
89 |
90 | Args:
91 | env_ids (Optional[Sequence[int]], optional): The list of environment IDs to reset. Defaults to None.
92 | """
93 | if env_ids is None:
94 | env_ids = ...
95 |
96 | self.command_b = torch.zeros((self.num_envs, 3), device=self.device)
97 | self.goal_reached = False
98 |
99 | return {}
100 |
101 | def compute(self, dt: float):
102 | """Compute the command.
103 |
104 | Paths as a tensor of shape (num_envs, N, 3) where N is number of poses on the path. The paths
105 | should be given in the base frame of the robot. Num_envs is equal to the number of robots spawned in all
106 | environments.
107 | """
108 | # get paths
109 | self.command_b[:,:3] = self._env.action_manager._terms['vlm_actions']._processed_command_velocity_actions.clone()[:,:3]
110 |
111 | return self.command_b
112 |
113 | """
114 | Implementation specific functions.
115 | """
116 |
117 | def _update_command(self):
118 | pass
119 |
120 | def _update_metrics(self):
121 | pass
122 |
123 | def _resample_command(self, env_ids: Sequence[int]):
124 | pass
125 |
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/commands/robot_vel_command_generator_cfg.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023-2024, ETH Zurich (Robotics Systems Lab)
2 | # Author: Pascal Roth
3 | # All rights reserved.
4 | #
5 | # SPDX-License-Identifier: BSD-3-Clause
6 |
7 | # Copyright (c) 2022-2024, The lab Project Developers.
8 | # All rights reserved.
9 | #
10 | # SPDX-License-Identifier: BSD-3-Clause
11 |
12 | """Sub-module containing command generators for the velocity-based locomotion task."""
13 |
14 | import math
15 | from dataclasses import MISSING
16 |
17 | from omni.isaac.lab.managers import CommandTermCfg
18 | from omni.isaac.lab.utils.configclass import configclass
19 | from typing_extensions import Literal
20 |
21 | from .robot_vel_command_generator import RobotVelCommandGenerator
22 |
23 |
24 | @configclass
25 | class RobotVelCommandGeneratorCfg(CommandTermCfg):
26 | class_type: RobotVelCommandGenerator = RobotVelCommandGenerator
27 | """Name of the command generator class."""
28 |
29 | robot_attr: str = MISSING
30 | """Name of the robot attribute from the environment."""
31 |
32 | # path_frame: Literal["world", "robot"] = "world"
33 | """Frame in which the path is defined.
34 | - ``world``: the path is defined in the world frame. Also called ``odom``.
35 | - ``robot``: the path is defined in the robot frame. Also called ``base``.
36 | """
37 |
38 |
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/curriculums.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022-2024, The lab Project Developers.
2 | # All rights reserved.
3 | #
4 | # SPDX-License-Identifier: BSD-3-Clause
5 |
6 | """Common functions that can be used to create curriculum for the learning environment.
7 |
8 | The functions can be passed to the :class:`omni.isaac.lab.managers.CurriculumTermCfg` object to enable
9 | the curriculum introduced by the function.
10 | """
11 |
12 | from __future__ import annotations
13 |
14 | import torch
15 | from collections.abc import Sequence
16 | from typing import TYPE_CHECKING
17 |
18 | from omni.isaac.lab.assets import Articulation
19 | from omni.isaac.lab.managers import SceneEntityCfg
20 | from omni.isaac.lab.terrains import TerrainImporter
21 |
22 | if TYPE_CHECKING:
23 | from omni.isaac.lab.envs import ManagerBasedRLEnv
24 |
25 |
26 | def terrain_levels_vel(
27 | env: ManagerBasedRLEnv, env_ids: Sequence[int], asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")
28 | ) -> torch.Tensor:
29 | """Curriculum based on the distance the robot walked when commanded to move at a desired velocity.
30 |
31 | This term is used to increase the difficulty of the terrain when the robot walks far enough and decrease the
32 | difficulty when the robot walks less than half of the distance required by the commanded velocity.
33 |
34 | .. note::
35 | It is only possible to use this term with the terrain type ``generator``. For further information
36 | on different terrain types, check the :class:`omni.isaac.lab.terrains.TerrainImporter` class.
37 |
38 | Returns:
39 | The mean terrain level for the given environment ids.
40 | """
41 | # extract the used quantities (to enable type-hinting)
42 | asset: Articulation = env.scene[asset_cfg.name]
43 | terrain: TerrainImporter = env.scene.terrain
44 | command = env.command_manager.get_command("base_velocity")
45 | # compute the distance the robot walked
46 | distance = torch.norm(asset.data.root_pos_w[env_ids, :2] - env.scene.env_origins[env_ids, :2], dim=1)
47 | # robots that walked far enough progress to harder terrains
48 | move_up = distance > terrain.cfg.terrain_generator.size[0] / 2
49 | # robots that walked less than half of their required distance go to simpler terrains
50 | move_down = distance < torch.norm(command[env_ids, :2], dim=1) * env.max_episode_length_s * 0.5
51 | move_down *= ~move_up
52 | # update terrain levels
53 | terrain.update_env_origins(env_ids, move_up, move_down)
54 | # return the mean terrain level
55 | return torch.mean(terrain.terrain_levels.float())
56 |
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/events.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | import omni.isaac.lab.sim as sim_utils
4 | import omni.isaac.lab.utils.math as math_utils
5 | from omni.isaac.lab.actuators import ImplicitActuator
6 | from omni.isaac.lab.assets import Articulation, RigidObject
7 | from omni.isaac.lab.managers import SceneEntityCfg
8 | from omni.isaac.lab.terrains import TerrainImporter
9 |
10 |
11 | from omni.isaac.lab.envs import ManagerBasedEnv
12 |
13 |
14 | def reset_camera_pos_uniform(
15 | env: ManagerBasedEnv,
16 | env_ids: torch.Tensor,
17 | asset_cfg: SceneEntityCfg,
18 | pose_delta_range: dict[str, tuple[float, float]],
19 | ):
20 | """Reset the camera to a random position uniformly within the given ranges.
21 |
22 | This function randomizes the position of the asset.
23 |
24 | * It samples the delta position from the given ranges and adds them to the default camera position, before setting
25 | them into the physics simulation.
26 | * It samples the root orientation from the given ranges and sets them into the physics simulation.
27 | * It samples the root velocity from the given ranges and sets them into the physics simulation.
28 |
29 | The function takes a dictionary of pose and velocity ranges for each axis and rotation. The keys of the
30 | dictionary are ``x``, ``y``, ``z``, ``roll``, ``pitch``, and ``yaw``. The values are tuples of the form
31 | ``(min, max)``. If the dictionary does not contain a key, the position or velocity is set to zero for that axis.
32 | """
33 | # extract the used quantities (to enable type-hinting)
34 | camera: RigidObject | Articulation = env.scene[asset_cfg.name]
35 | # get default root state
36 | pos_w, quat_w = camera._compute_view_world_poses(env_ids)
37 |
38 | # poses
39 | range_list = [pose_delta_range.get(key, (0.0, 0.0)) for key in ["x", "y", "z", "roll", "pitch", "yaw"]]
40 | ranges = torch.tensor(range_list, device=camera.device)
41 | rand_samples = math_utils.sample_uniform(ranges[:, 0], ranges[:, 1], (len(env_ids), 6), device=camera.device)
42 |
43 | positions = pos_w + rand_samples[:, 0:3]
44 | orientations_delta = math_utils.quat_from_euler_xyz(rand_samples[:, 3], rand_samples[:, 4], rand_samples[:, 5])
45 | orientations = math_utils.quat_mul(quat_w, orientations_delta)
46 |
47 | # set into the physics simulation
48 | camera.set_world_poses(positions, orientations, env_ids=env_ids, convention="world")
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/leggedloco/mdp/rewards/__init__.py:
--------------------------------------------------------------------------------
1 | from .objnav_rewards import *
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/utils/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from .wrappers import RslRlVecEnvHistoryWrapper
4 |
5 | ASSETS_DIR = os.path.abspath("assets")
6 |
7 | __all__ = [
8 | "ASSETS_DIR",
9 | "RslRlVecEnvHistoryWrapper",
10 | ]
11 |
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/omni/isaac/leggedloco/utils/wrappers.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022-2024, The Isaac Lab Project Developers.
2 | # All rights reserved.
3 | #
4 | # SPDX-License-Identifier: BSD-3-Clause
5 |
6 | """Wrapper to configure an :class:`ManagerBasedRLEnv` instance to RSL-RL vectorized environment.
7 |
8 | The following example shows how to wrap an environment for RSL-RL:
9 |
10 | .. code-block:: python
11 |
12 | from omni.isaac.lab_tasks.utils.wrappers.rsl_rl import RslRlVecEnvWrapper
13 |
14 | env = RslRlVecEnvWrapper(env)
15 |
16 | """
17 |
18 |
19 | import gymnasium as gym
20 | import torch
21 |
22 | from rsl_rl.env import VecEnv
23 |
24 | from omni.isaac.lab.envs import DirectRLEnv, ManagerBasedRLEnv
25 | from omni.isaac.lab_tasks.utils.wrappers.rsl_rl import RslRlVecEnvWrapper
26 |
27 |
28 | def get_proprio_obs_dim(env: ManagerBasedRLEnv) -> int:
29 | """Returns the dimension of the proprioceptive observations."""
30 | return env.unwrapped.observation_manager.compute_group("proprio").shape[1]
31 |
32 |
33 | class RslRlVecEnvHistoryWrapper(RslRlVecEnvWrapper):
34 | """Wraps around Isaac Lab environment for RSL-RL to add history buffer to the proprioception observations.
35 |
36 | .. caution::
37 |
38 | This class must be the last wrapper in the wrapper chain. This is because the wrapper does not follow
39 | the :class:`gym.Wrapper` interface. Any subsequent wrappers will need to be modified to work with this
40 | wrapper.
41 |
42 | Reference:
43 | https://github.com/leggedrobotics/rsl_rl/blob/master/rsl_rl/env/vec_env.py
44 | """
45 |
46 | def __init__(self, env: ManagerBasedRLEnv, history_length: int = 1):
47 | """Initializes the wrapper."""
48 | super().__init__(env)
49 |
50 | self.history_length = history_length
51 | self.proprio_obs_dim = get_proprio_obs_dim(env)
52 | self.proprio_obs_buf = torch.zeros(self.num_envs, self.history_length, self.proprio_obs_dim,
53 | dtype=torch.float, device=self.unwrapped.device)
54 |
55 | self.clip_actions = 20.0
56 |
57 | """
58 | Properties
59 | """
60 | def get_observations(self) -> tuple[torch.Tensor, dict]:
61 | """Returns the current observations of the environment."""
62 | if hasattr(self.unwrapped, "observation_manager"):
63 | obs_dict = self.unwrapped.observation_manager.compute()
64 | else:
65 | obs_dict = self.unwrapped._get_observations()
66 | proprio_obs, obs = obs_dict["proprio"], obs_dict["policy"]
67 | self.proprio_obs_buf = torch.cat([proprio_obs.unsqueeze(1)] * self.history_length, dim=1)
68 | proprio_obs_history = self.proprio_obs_buf.view(self.num_envs, -1)
69 | curr_obs = torch.cat([obs, proprio_obs_history], dim=1)
70 | obs_dict["policy"] = curr_obs
71 |
72 | return curr_obs, {"observations": obs_dict}
73 |
74 | def step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict]:
75 | # clip the actions (for testing only)
76 | actions = torch.clamp(actions, -self.clip_actions, self.clip_actions)
77 |
78 | # record step information
79 | obs_dict, rew, terminated, truncated, extras = self.env.step(actions)
80 | # compute dones for compatibility with RSL-RL
81 | dones = (terminated | truncated).to(dtype=torch.long)
82 | # move extra observations to the extras dict
83 | proprio_obs, obs = obs_dict["proprio"], obs_dict["policy"]
84 | # print("============== Height Map ==============")
85 | # print(obs_dict["test_height_map"])
86 | extras["observations"] = obs_dict
87 | # move time out information to the extras dict
88 | # this is only needed for infinite horizon tasks
89 | if not self.unwrapped.cfg.is_finite_horizon:
90 | extras["time_outs"] = truncated
91 |
92 | # update obsservation history buffer & reset the history buffer for done environments
93 | self.proprio_obs_buf = torch.where(
94 | (self.episode_length_buf < 1)[:, None, None],
95 | torch.stack([torch.zeros_like(proprio_obs)] * self.history_length, dim=1),
96 | torch.cat([
97 | self.proprio_obs_buf[:, 1:],
98 | proprio_obs.unsqueeze(1)
99 | ], dim=1)
100 | )
101 | proprio_obs_history = self.proprio_obs_buf.view(self.num_envs, -1)
102 | curr_obs = torch.cat([obs, proprio_obs_history], dim=1)
103 | extras["observations"]["policy"] = curr_obs
104 |
105 | # return the step information
106 | return curr_obs, rew, dones, extras
107 |
108 | def update_command(self, command: torch.Tensor) -> None:
109 | """Updates the command for the environment."""
110 | self.proprio_obs_buf[:, -1, 6:9] = command
111 |
112 | def close(self): # noqa: D102
113 | return self.env.close()
--------------------------------------------------------------------------------
/isaaclab_exts/omni.isaac.leggedloco/setup.py:
--------------------------------------------------------------------------------
1 | """Installation script for the 'omni.isaac.leggedloco' python package."""
2 |
3 |
4 | from setuptools import setup
5 |
6 | # Minimum dependencies required prior to installation
7 | INSTALL_REQUIRES = [
8 | # generic
9 | "numpy",
10 | "scipy>=1.7.1",
11 | # RL
12 | "torch>=1.9.0",
13 | ]
14 |
15 | # Installation operation
16 | setup(
17 | name="omni-isaac-leggedloco",
18 | version="0.0.1",
19 | keywords=["robotics", "rl"],
20 | include_package_data=True,
21 | python_requires=">=3.10",
22 | install_requires=INSTALL_REQUIRES,
23 | packages=["omni.isaac.leggedloco"],
24 | classifiers=["Natural Language :: English", "Programming Language :: Python :: 3.7"],
25 | zip_safe=False,
26 | )
27 |
28 | # EOF
29 |
--------------------------------------------------------------------------------
/rsl_rl/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | show-source=True
3 | statistics=True
4 | per-file-ignores=*/__init__.py:F401
5 | # E402: Module level import not at top of file
6 | # E501: Line too long
7 | # W503: Line break before binary operator
8 | # E203: Whitespace before ':' -> conflicts with black
9 | # D401: First line should be in imperative mood
10 | # R504: Unnecessary variable assignment before return statement.
11 | # R505: Unnecessary elif after return statement
12 | # SIM102: Use a single if-statement instead of nested if-statements
13 | # SIM117: Merge with statements for context managers that have same scope.
14 | ignore=E402,E501,W503,E203,D401,R504,R505,SIM102,SIM117
15 | max-line-length = 120
16 | max-complexity = 18
17 | exclude=_*,.vscode,.git,docs/**
18 | # docstrings
19 | docstring-convention=google
20 | # annotations
21 | suppress-none-returning=True
22 | allow-star-arg-any=True
23 |
--------------------------------------------------------------------------------
/rsl_rl/.gitignore:
--------------------------------------------------------------------------------
1 | # IDEs
2 | .idea
3 |
4 | # builds
5 | *.egg-info
6 |
7 | # cache
8 | __pycache__
9 | .pytest_cache
10 |
11 | # vs code
12 | .vscode
13 |
--------------------------------------------------------------------------------
/rsl_rl/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/python/black
3 | rev: 23.10.1
4 | hooks:
5 | - id: black
6 | args: ["--line-length", "120", "--preview"]
7 | - repo: https://github.com/pycqa/flake8
8 | rev: 6.1.0
9 | hooks:
10 | - id: flake8
11 | additional_dependencies: [flake8-simplify, flake8-return]
12 | - repo: https://github.com/pre-commit/pre-commit-hooks
13 | rev: v4.5.0
14 | hooks:
15 | - id: trailing-whitespace
16 | - id: check-symlinks
17 | - id: destroyed-symlinks
18 | - id: check-yaml
19 | - id: check-merge-conflict
20 | - id: check-case-conflict
21 | - id: check-executables-have-shebangs
22 | - id: check-toml
23 | - id: end-of-file-fixer
24 | - id: check-shebang-scripts-are-executable
25 | - id: detect-private-key
26 | - id: debug-statements
27 | - repo: https://github.com/pycqa/isort
28 | rev: 5.12.0
29 | hooks:
30 | - id: isort
31 | name: isort (python)
32 | args: ["--profile", "black", "--filter-files"]
33 | - repo: https://github.com/asottile/pyupgrade
34 | rev: v3.15.0
35 | hooks:
36 | - id: pyupgrade
37 | args: ["--py37-plus"]
38 | - repo: https://github.com/codespell-project/codespell
39 | rev: v2.2.6
40 | hooks:
41 | - id: codespell
42 | additional_dependencies:
43 | - tomli
44 | # FIXME: Figure out why this is getting stuck under VPN.
45 | # - repo: https://github.com/RobertCraigie/pyright-python
46 | # rev: v1.1.315
47 | # hooks:
48 | # - id: pyright
49 | # Note: We disable this by default since not all code is compatible with it.
50 | # - repo: https://github.com/Lucas-C/pre-commit-hooks
51 | # rev: v1.5.1
52 | # hooks:
53 | # - id: insert-license
54 | # files: \.py$
55 | # args:
56 | # # - --remove-header # Remove existing license headers. Useful when updating license.
57 | # - --license-filepath
58 | # - .github/LICENSE_HEADER.txt
59 |
--------------------------------------------------------------------------------
/rsl_rl/CONTRIBUTORS.md:
--------------------------------------------------------------------------------
1 | # RSL-RL Maintainers and Contributors
2 |
3 | This is the official list of developers and contributors.
4 |
5 | To see the full list of contributors, see the revision history in the source control.
6 |
7 | Names should be added to this file as: individual names or organizations.
8 |
9 | Email addresses are tracked elsewhere to avoid spam.
10 |
11 | Please keep the lists sorted alphabetically.
12 |
13 | ## Maintainers
14 |
15 | * Robotic Syetms Lab, ETH Zurich
16 | * NVIDIA Corporation
17 |
18 | ---
19 |
20 | * David Hoeller
21 | * Nikita Rudin
22 |
23 | ## Contributors
24 |
25 | * Eric Vollenweider
26 | * Fabian Jenelten
27 | * Lorenzo Terenzi
28 | * Marko Bjelonic
29 | * Matthijs van der Boon
30 | * Mayank Mittal
31 | * Zhang Chong
32 |
--------------------------------------------------------------------------------
/rsl_rl/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2021, ETH Zurich, Nikita Rudin
2 | Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES
3 | All rights reserved.
4 |
5 | Redistribution and use in source and binary forms, with or without modification,
6 | are permitted provided that the following conditions are met:
7 |
8 | 1. Redistributions of source code must retain the above copyright notice,
9 | this list of conditions and the following disclaimer.
10 |
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 | this list of conditions and the following disclaimer in the documentation
13 | and/or other materials provided with the distribution.
14 |
15 | 3. Neither the name of the copyright holder nor the names of its contributors
16 | may be used to endorse or promote products derived from this software without
17 | specific prior written permission.
18 |
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
23 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |
30 | See licenses/dependencies for license information of dependencies of this package.
31 |
--------------------------------------------------------------------------------
/rsl_rl/README.md:
--------------------------------------------------------------------------------
1 | # RSL RL
2 |
3 | Fast and simple implementation of RL algorithms, designed to run fully on GPU.
4 | This code is an evolution of `rl-pytorch` provided with NVIDIA's Isaac GYM.
5 |
6 | | :zap: The `algorithms` branch supports additional algorithms (SAC, DDPG, DSAC, and more)! |
7 | | ------------------------------------------------------------------------------------------------ |
8 |
9 | Only PPO is implemented for now. More algorithms will be added later.
10 | Contributions are welcome.
11 |
12 | **Maintainer**: David Hoeller and Nikita Rudin
13 | **Affiliation**: Robotic Systems Lab, ETH Zurich & NVIDIA
14 | **Contact**: rudinn@ethz.ch
15 |
16 | ## Setup
17 |
18 | Following are the instructions to setup the repository for your workspace:
19 |
20 | ```bash
21 | git clone https://github.com/leggedrobotics/rsl_rl
22 | cd rsl_rl
23 | pip install -e .
24 | ```
25 |
26 | The framework supports the following logging frameworks which can be configured through `logger`:
27 |
28 | * Tensorboard: https://www.tensorflow.org/tensorboard/
29 | * Weights & Biases: https://wandb.ai/site
30 | * Neptune: https://docs.neptune.ai/
31 |
32 | For a demo configuration of the PPO, please check: [dummy_config.yaml](config/dummy_config.yaml) file.
33 |
34 |
35 | ## Contribution Guidelines
36 |
37 | For documentation, we adopt the [Google Style Guide](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html) for docstrings. We use [Sphinx](https://www.sphinx-doc.org/en/master/) for generating the documentation. Please make sure that your code is well-documented and follows the guidelines.
38 |
39 | We use the following tools for maintaining code quality:
40 |
41 | - [pre-commit](https://pre-commit.com/): Runs a list of formatters and linters over the codebase.
42 | - [black](https://black.readthedocs.io/en/stable/): The uncompromising code formatter.
43 | - [flake8](https://flake8.pycqa.org/en/latest/): A wrapper around PyFlakes, pycodestyle, and McCabe complexity checker.
44 |
45 | Please check [here](https://pre-commit.com/#install) for instructions to set these up. To run over the entire repository, please execute the following command in the terminal:
46 |
47 |
48 | ```bash
49 | # for installation (only once)
50 | pre-commit install
51 | # for running
52 | pre-commit run --all-files
53 | ```
54 |
55 | ### Useful Links
56 |
57 | Environment repositories using the framework:
58 |
59 | * `Legged-Gym` (built on top of NVIDIA Isaac Gym): https://leggedrobotics.github.io/legged_gym/
60 | * `Isaac Lab` (built on top of NVIDIA Isaac Sim): https://isaac-lab.github.io/
61 |
--------------------------------------------------------------------------------
/rsl_rl/config/dummy_config.yaml:
--------------------------------------------------------------------------------
1 | algorithm:
2 | class_name: PPO
3 | # training parameters
4 | # -- value function
5 | value_loss_coef: 1.0
6 | clip_param: 0.2
7 | use_clipped_value_loss: true
8 | # -- surrogate loss
9 | desired_kl: 0.01
10 | entropy_coef: 0.01
11 | gamma: 0.99
12 | lam: 0.95
13 | max_grad_norm: 1.0
14 | # -- training
15 | learning_rate: 0.001
16 | num_learning_epochs: 5
17 | num_mini_batches: 4 # mini batch size = num_envs * num_steps / num_mini_batches
18 | schedule: adaptive # adaptive, fixed
19 | policy:
20 | class_name: ActorCritic
21 | # for MLP i.e. `ActorCritic`
22 | activation: elu
23 | actor_hidden_dims: [128, 128, 128]
24 | critic_hidden_dims: [128, 128, 128]
25 | init_noise_std: 1.0
26 | # only needed for `ActorCriticRecurrent`
27 | # rnn_type: 'lstm'
28 | # rnn_hidden_size: 512
29 | # rnn_num_layers: 1
30 | runner:
31 | num_steps_per_env: 24 # number of steps per environment per iteration
32 | max_iterations: 1500 # number of policy updates
33 | empirical_normalization: false
34 | # -- logging parameters
35 | save_interval: 50 # check for potential saves every `save_interval` iterations
36 | experiment_name: walking_experiment
37 | run_name: ""
38 | # -- logging writer
39 | logger: tensorboard # tensorboard, neptune, wandb
40 | neptune_project: legged_gym
41 | wandb_project: legged_gym
42 | # -- load and resuming
43 | resume: false
44 | load_run: -1 # -1 means load latest run
45 | resume_path: null # updated from load_run and checkpoint
46 | checkpoint: -1 # -1 means load latest checkpoint
47 | runner_class_name: OnPolicyRunner
48 | seed: 1
49 |
--------------------------------------------------------------------------------
/rsl_rl/licenses/dependencies/black-license.txt:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2018 Łukasz Langa
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/rsl_rl/licenses/dependencies/flake8-license.txt:
--------------------------------------------------------------------------------
1 | == Flake8 License (MIT) ==
2 |
3 | Copyright (C) 2011-2013 Tarek Ziade
4 | Copyright (C) 2012-2016 Ian Cordasco
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy of
7 | this software and associated documentation files (the "Software"), to deal in
8 | the Software without restriction, including without limitation the rights to
9 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
10 | of the Software, and to permit persons to whom the Software is furnished to do
11 | so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 |
--------------------------------------------------------------------------------
/rsl_rl/licenses/dependencies/isort-license.txt:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2013 Timothy Edmund Crosley
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 |
--------------------------------------------------------------------------------
/rsl_rl/licenses/dependencies/numpy_license.txt:
--------------------------------------------------------------------------------
1 | Copyright (c) 2005-2021, NumPy Developers.
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are
6 | met:
7 |
8 | * Redistributions of source code must retain the above copyright
9 | notice, this list of conditions and the following disclaimer.
10 |
11 | * Redistributions in binary form must reproduce the above
12 | copyright notice, this list of conditions and the following
13 | disclaimer in the documentation and/or other materials provided
14 | with the distribution.
15 |
16 | * Neither the name of the NumPy Developers nor the names of any
17 | contributors may be used to endorse or promote products derived
18 | from this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 |
--------------------------------------------------------------------------------
/rsl_rl/licenses/dependencies/pre-commit-hooks-license.txt:
--------------------------------------------------------------------------------
1 | Copyright (c) 2014 pre-commit dev team: Anthony Sottile, Ken Struys
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy
4 | of this software and associated documentation files (the "Software"), to deal
5 | in the Software without restriction, including without limitation the rights
6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 | copies of the Software, and to permit persons to whom the Software is
8 | furnished to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 |
--------------------------------------------------------------------------------
/rsl_rl/licenses/dependencies/pre-commit-license.txt:
--------------------------------------------------------------------------------
1 | Copyright (c) 2014 pre-commit dev team: Anthony Sottile, Ken Struys
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy
4 | of this software and associated documentation files (the "Software"), to deal
5 | in the Software without restriction, including without limitation the rights
6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 | copies of the Software, and to permit persons to whom the Software is
8 | furnished to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 |
--------------------------------------------------------------------------------
/rsl_rl/licenses/dependencies/pyright-license.txt:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 Robert Craigie
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 |
24 | ===============================================================================
25 |
26 | MIT License
27 |
28 | Pyright - A static type checker for the Python language
29 | Copyright (c) Microsoft Corporation. All rights reserved.
30 |
31 | Permission is hereby granted, free of charge, to any person obtaining a copy
32 | of this software and associated documentation files (the "Software"), to deal
33 | in the Software without restriction, including without limitation the rights
34 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
35 | copies of the Software, and to permit persons to whom the Software is
36 | furnished to do so, subject to the following conditions:
37 |
38 | The above copyright notice and this permission notice shall be included in all
39 | copies or substantial portions of the Software.
40 |
41 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
42 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
43 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
44 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
45 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
46 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
47 | SOFTWARE
48 |
--------------------------------------------------------------------------------
/rsl_rl/licenses/dependencies/pyupgrade-license.txt:
--------------------------------------------------------------------------------
1 | Copyright (c) 2017 Anthony Sottile
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy
4 | of this software and associated documentation files (the "Software"), to deal
5 | in the Software without restriction, including without limitation the rights
6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 | copies of the Software, and to permit persons to whom the Software is
8 | furnished to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 |
--------------------------------------------------------------------------------
/rsl_rl/licenses/dependencies/torch_license.txt:
--------------------------------------------------------------------------------
1 | From PyTorch:
2 |
3 | Copyright (c) 2016- Facebook, Inc (Adam Paszke)
4 | Copyright (c) 2014- Facebook, Inc (Soumith Chintala)
5 | Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
6 | Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu)
7 | Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
8 | Copyright (c) 2011-2013 NYU (Clement Farabet)
9 | Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston)
10 | Copyright (c) 2006 Idiap Research Institute (Samy Bengio)
11 | Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz)
12 |
13 | From Caffe2:
14 |
15 | Copyright (c) 2016-present, Facebook Inc. All rights reserved.
16 |
17 | All contributions by Facebook:
18 | Copyright (c) 2016 Facebook Inc.
19 |
20 | All contributions by Google:
21 | Copyright (c) 2015 Google Inc.
22 | All rights reserved.
23 |
24 | All contributions by Yangqing Jia:
25 | Copyright (c) 2015 Yangqing Jia
26 | All rights reserved.
27 |
28 | All contributions by Kakao Brain:
29 | Copyright 2019-2020 Kakao Brain
30 |
31 | All contributions from Caffe:
32 | Copyright(c) 2013, 2014, 2015, the respective contributors
33 | All rights reserved.
34 |
35 | All other contributions:
36 | Copyright(c) 2015, 2016 the respective contributors
37 | All rights reserved.
38 |
39 | Caffe2 uses a copyright model similar to Caffe: each contributor holds
40 | copyright over their contributions to Caffe2. The project versioning records
41 | all such contribution and copyright details. If a contributor wants to further
42 | mark their specific copyright on a particular contribution, they should
43 | indicate their copyright solely in the commit message of the change when it is
44 | committed.
45 |
46 | All rights reserved.
47 |
48 | Redistribution and use in source and binary forms, with or without
49 | modification, are permitted provided that the following conditions are met:
50 |
51 | 1. Redistributions of source code must retain the above copyright
52 | notice, this list of conditions and the following disclaimer.
53 |
54 | 2. Redistributions in binary form must reproduce the above copyright
55 | notice, this list of conditions and the following disclaimer in the
56 | documentation and/or other materials provided with the distribution.
57 |
58 | 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America
59 | and IDIAP Research Institute nor the names of its contributors may be
60 | used to endorse or promote products derived from this software without
61 | specific prior written permission.
62 |
63 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
64 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
67 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
68 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
69 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
70 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
71 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
72 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
73 | POSSIBILITY OF SUCH DAMAGE.
74 |
--------------------------------------------------------------------------------
/rsl_rl/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.isort]
2 |
3 | py_version = 37
4 | line_length = 120
5 | group_by_package = true
6 |
7 | # Files to skip
8 | skip_glob = [".vscode/*"]
9 |
10 | # Order of imports
11 | sections = [
12 | "FUTURE",
13 | "STDLIB",
14 | "THIRDPARTY",
15 | "FIRSTPARTY",
16 | "LOCALFOLDER",
17 | ]
18 |
19 | # Extra standard libraries considered as part of python (permissive licenses)
20 | extra_standard_library = [
21 | "numpy",
22 | "torch",
23 | "tensordict",
24 | "warp",
25 | "typing_extensions",
26 | "git",
27 | ]
28 | # Imports from this repository
29 | known_first_party = "rsl_rl"
30 |
31 | [tool.pyright]
32 |
33 | include = ["rsl_rl"]
34 |
35 | typeCheckingMode = "basic"
36 | pythonVersion = "3.7"
37 | pythonPlatform = "Linux"
38 | enableTypeIgnoreComments = true
39 |
40 | # This is required as the CI pre-commit does not download the module (i.e. numpy, torch, prettytable)
41 | # Therefore, we have to ignore missing imports
42 | reportMissingImports = "none"
43 | # This is required to ignore for type checks of modules with stubs missing.
44 | reportMissingModuleSource = "none" # -> most common: prettytable in mdp managers
45 |
46 | reportGeneralTypeIssues = "none" # -> raises 218 errors (usage of literal MISSING in dataclasses)
47 | reportOptionalMemberAccess = "warning" # -> raises 8 errors
48 | reportPrivateUsage = "warning"
49 |
--------------------------------------------------------------------------------
/rsl_rl/rsl_rl/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION
2 | # SPDX-License-Identifier: BSD-3-Clause
3 |
4 | """Main module for the rsl_rl package."""
5 |
6 | __version__ = "2.0.1"
7 | __license__ = "BSD-3"
8 |
--------------------------------------------------------------------------------
/rsl_rl/rsl_rl/algorithms/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION
2 | # SPDX-License-Identifier: BSD-3-Clause
3 |
4 | """Implementation of different RL agents."""
5 |
6 | from .ppo import PPO
7 |
8 | __all__ = ["PPO"]
9 |
--------------------------------------------------------------------------------
/rsl_rl/rsl_rl/algorithms/ppo.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION
2 | # SPDX-License-Identifier: BSD-3-Clause
3 |
4 | from __future__ import annotations
5 |
6 | import torch
7 | import torch.nn as nn
8 | import torch.optim as optim
9 |
10 | from rsl_rl.modules import ActorCritic, ActorCriticDepthCNN
11 | from rsl_rl.storage import RolloutStorage
12 |
13 |
14 | class PPO:
15 | # actor_critic: ActorCritic | ActorCriticDepthCNN
16 |
17 | def __init__(
18 | self,
19 | actor_critic,
20 | num_learning_epochs=1,
21 | num_mini_batches=1,
22 | clip_param=0.2,
23 | gamma=0.998,
24 | lam=0.95,
25 | value_loss_coef=1.0,
26 | entropy_coef=0.0,
27 | learning_rate=1e-3,
28 | max_grad_norm=1.0,
29 | use_clipped_value_loss=True,
30 | schedule="fixed",
31 | desired_kl=0.01,
32 | device="cpu",
33 | ):
34 | self.device = device
35 |
36 | self.desired_kl = desired_kl
37 | self.schedule = schedule
38 | self.learning_rate = learning_rate
39 |
40 | # PPO components
41 | self.actor_critic = actor_critic
42 | self.actor_critic.to(self.device)
43 | self.storage = None # initialized later
44 | self.optimizer = optim.Adam(self.actor_critic.parameters(), lr=learning_rate)
45 | self.transition = RolloutStorage.Transition()
46 |
47 | # PPO parameters
48 | self.clip_param = clip_param
49 | self.num_learning_epochs = num_learning_epochs
50 | self.num_mini_batches = num_mini_batches
51 | self.value_loss_coef = value_loss_coef
52 | self.entropy_coef = entropy_coef
53 | self.gamma = gamma
54 | self.lam = lam
55 | self.max_grad_norm = max_grad_norm
56 | self.use_clipped_value_loss = use_clipped_value_loss
57 |
58 | def init_storage(self, num_envs, num_transitions_per_env, actor_obs_shape, critic_obs_shape, action_shape):
59 | self.storage = RolloutStorage(
60 | num_envs, num_transitions_per_env, actor_obs_shape, critic_obs_shape, action_shape, self.device
61 | )
62 |
63 | def test_mode(self):
64 | self.actor_critic.test()
65 |
66 | def train_mode(self):
67 | self.actor_critic.train()
68 |
69 | def act(self, obs, critic_obs):
70 | if self.actor_critic.is_recurrent:
71 | self.transition.hidden_states = self.actor_critic.get_hidden_states()
72 | # Compute the actions and values
73 | self.transition.actions = self.actor_critic.act(obs).detach()
74 | self.transition.values = self.actor_critic.evaluate(critic_obs).detach()
75 | self.transition.actions_log_prob = self.actor_critic.get_actions_log_prob(self.transition.actions).detach()
76 | self.transition.action_mean = self.actor_critic.action_mean.detach()
77 | self.transition.action_sigma = self.actor_critic.action_std.detach()
78 | # need to record obs and critic_obs before env.step()
79 | self.transition.observations = obs
80 | self.transition.critic_observations = critic_obs
81 | return self.transition.actions
82 |
83 | def process_env_step(self, rewards, dones, infos):
84 | self.transition.rewards = rewards.clone()
85 | self.transition.dones = dones
86 | # Bootstrapping on time outs
87 | if "time_outs" in infos:
88 | self.transition.rewards += self.gamma * torch.squeeze(
89 | self.transition.values * infos["time_outs"].unsqueeze(1).to(self.device), 1
90 | )
91 |
92 | # Record the transition
93 | self.storage.add_transitions(self.transition)
94 | self.transition.clear()
95 | self.actor_critic.reset(dones)
96 |
97 | def compute_returns(self, last_critic_obs):
98 | last_values = self.actor_critic.evaluate(last_critic_obs).detach()
99 | self.storage.compute_returns(last_values, self.gamma, self.lam)
100 |
101 | def update(self):
102 | mean_value_loss = 0
103 | mean_surrogate_loss = 0
104 | if self.actor_critic.is_recurrent:
105 | generator = self.storage.reccurent_mini_batch_generator(self.num_mini_batches, self.num_learning_epochs)
106 | else:
107 | generator = self.storage.mini_batch_generator(self.num_mini_batches, self.num_learning_epochs)
108 | for (
109 | obs_batch,
110 | critic_obs_batch,
111 | actions_batch,
112 | target_values_batch,
113 | advantages_batch,
114 | returns_batch,
115 | old_actions_log_prob_batch,
116 | old_mu_batch,
117 | old_sigma_batch,
118 | hid_states_batch,
119 | masks_batch,
120 | ) in generator:
121 | self.actor_critic.act(obs_batch, masks=masks_batch, hidden_states=hid_states_batch[0])
122 | actions_log_prob_batch = self.actor_critic.get_actions_log_prob(actions_batch)
123 | value_batch = self.actor_critic.evaluate(
124 | critic_obs_batch, masks=masks_batch, hidden_states=hid_states_batch[1]
125 | )
126 | mu_batch = self.actor_critic.action_mean
127 | sigma_batch = self.actor_critic.action_std
128 | entropy_batch = self.actor_critic.entropy
129 |
130 | # KL
131 | if self.desired_kl is not None and self.schedule == "adaptive":
132 | with torch.inference_mode():
133 | kl = torch.sum(
134 | torch.log(sigma_batch / old_sigma_batch + 1.0e-5)
135 | + (torch.square(old_sigma_batch) + torch.square(old_mu_batch - mu_batch))
136 | / (2.0 * torch.square(sigma_batch))
137 | - 0.5,
138 | axis=-1,
139 | )
140 | kl_mean = torch.mean(kl)
141 |
142 | if kl_mean > self.desired_kl * 2.0:
143 | self.learning_rate = max(1e-5, self.learning_rate / 1.5)
144 | elif kl_mean < self.desired_kl / 2.0 and kl_mean > 0.0:
145 | self.learning_rate = min(1e-2, self.learning_rate * 1.5)
146 |
147 | for param_group in self.optimizer.param_groups:
148 | param_group["lr"] = self.learning_rate
149 |
150 | # Surrogate loss
151 | ratio = torch.exp(actions_log_prob_batch - torch.squeeze(old_actions_log_prob_batch))
152 | surrogate = -torch.squeeze(advantages_batch) * ratio
153 | surrogate_clipped = -torch.squeeze(advantages_batch) * torch.clamp(
154 | ratio, 1.0 - self.clip_param, 1.0 + self.clip_param
155 | )
156 | surrogate_loss = torch.max(surrogate, surrogate_clipped).mean()
157 |
158 | # Value function loss
159 | if self.use_clipped_value_loss:
160 | value_clipped = target_values_batch + (value_batch - target_values_batch).clamp(
161 | -self.clip_param, self.clip_param
162 | )
163 | value_losses = (value_batch - returns_batch).pow(2)
164 | value_losses_clipped = (value_clipped - returns_batch).pow(2)
165 | value_loss = torch.max(value_losses, value_losses_clipped).mean()
166 | else:
167 | value_loss = (returns_batch - value_batch).pow(2).mean()
168 |
169 | loss = surrogate_loss + self.value_loss_coef * value_loss - self.entropy_coef * entropy_batch.mean()
170 |
171 | # Gradient step
172 | self.optimizer.zero_grad()
173 | loss.backward()
174 | nn.utils.clip_grad_norm_(self.actor_critic.parameters(), self.max_grad_norm)
175 | self.optimizer.step()
176 |
177 | mean_value_loss += value_loss.item()
178 | mean_surrogate_loss += surrogate_loss.item()
179 |
180 | num_updates = self.num_learning_epochs * self.num_mini_batches
181 | mean_value_loss /= num_updates
182 | mean_surrogate_loss /= num_updates
183 | self.storage.clear()
184 |
185 | return mean_value_loss, mean_surrogate_loss
186 |
--------------------------------------------------------------------------------
/rsl_rl/rsl_rl/env/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION
2 | # SPDX-License-Identifier: BSD-3-Clause
3 |
4 | """Submodule defining the environment definitions."""
5 |
6 | from .vec_env import VecEnv
7 |
8 | __all__ = ["VecEnv"]
9 |
--------------------------------------------------------------------------------
/rsl_rl/rsl_rl/env/vec_env.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION
2 | # SPDX-License-Identifier: BSD-3-Clause
3 |
4 | from __future__ import annotations
5 |
6 | import torch
7 | from abc import ABC, abstractmethod
8 |
9 |
10 | class VecEnv(ABC):
11 | """Abstract class for vectorized environment.
12 |
13 | The vectorized environment is a collection of environments that are synchronized. This means that
14 | the same action is applied to all environments and the same observation is returned from all environments.
15 |
16 | All extra observations must be provided as a dictionary to "extras" in the step() method. Based on the
17 | configuration, the extra observations are used for different purposes. The following keys are reserved
18 | in the "observations" dictionary (if they are present):
19 |
20 | - "critic": The observation is used as input to the critic network. Useful for asymmetric observation spaces.
21 | """
22 |
23 | num_envs: int
24 | """Number of environments."""
25 | num_obs: int
26 | """Number of observations."""
27 | num_privileged_obs: int
28 | """Number of privileged observations."""
29 | num_actions: int
30 | """Number of actions."""
31 | max_episode_length: int
32 | """Maximum episode length."""
33 | privileged_obs_buf: torch.Tensor
34 | """Buffer for privileged observations."""
35 | obs_buf: torch.Tensor
36 | """Buffer for observations."""
37 | rew_buf: torch.Tensor
38 | """Buffer for rewards."""
39 | reset_buf: torch.Tensor
40 | """Buffer for resets."""
41 | episode_length_buf: torch.Tensor # current episode duration
42 | """Buffer for current episode lengths."""
43 | extras: dict
44 | """Extra information (metrics).
45 |
46 | Extra information is stored in a dictionary. This includes metrics such as the episode reward, episode length,
47 | etc. Additional information can be stored in the dictionary such as observations for the critic network, etc.
48 | """
49 | device: torch.device
50 | """Device to use."""
51 |
52 | """
53 | Operations.
54 | """
55 |
56 | @abstractmethod
57 | def get_observations(self) -> tuple[torch.Tensor, dict]:
58 | """Return the current observations.
59 |
60 | Returns:
61 | Tuple[torch.Tensor, dict]: Tuple containing the observations and extras.
62 | """
63 | raise NotImplementedError
64 |
65 | @abstractmethod
66 | def reset(self) -> tuple[torch.Tensor, dict]:
67 | """Reset all environment instances.
68 |
69 | Returns:
70 | Tuple[torch.Tensor, dict]: Tuple containing the observations and extras.
71 | """
72 | raise NotImplementedError
73 |
74 | @abstractmethod
75 | def step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict]:
76 | """Apply input action on the environment.
77 |
78 | Args:
79 | actions (torch.Tensor): Input actions to apply. Shape: (num_envs, num_actions)
80 |
81 | Returns:
82 | Tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict]:
83 | A tuple containing the observations, rewards, dones and extra information (metrics).
84 | """
85 | raise NotImplementedError
86 |
--------------------------------------------------------------------------------
/rsl_rl/rsl_rl/modules/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION
2 | # SPDX-License-Identifier: BSD-3-Clause
3 |
4 | """Definitions for neural-network components for RL-agents."""
5 |
6 | from .actor_critic import ActorCritic
7 | from .actor_critic_recurrent import ActorCriticRecurrent
8 | from .actor_critic_depth_cnn import ActorCriticDepthCNN, ActorCriticDepthCNNRecurrent
9 | from .actor_critic_history import ActorCriticHistory
10 | from .normalizer import EmpiricalNormalization
11 |
12 | __all__ = [
13 | "ActorCritic",
14 | "ActorCriticRecurrent",
15 | "ActorCriticDepthCNN",
16 | "ActorCriticDepthCNNRecurrent",
17 | "EmpiricalNormalization",
18 | "ActorCriticHistory"
19 | ]
20 |
--------------------------------------------------------------------------------
/rsl_rl/rsl_rl/modules/actor_critic.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION
2 | # SPDX-License-Identifier: BSD-3-Clause
3 |
4 |
5 | from __future__ import annotations
6 |
7 | import torch
8 | import torch.nn as nn
9 | from torch.distributions import Normal
10 |
11 |
12 | class ActorCritic(nn.Module):
13 | is_recurrent = False
14 |
15 | def __init__(
16 | self,
17 | num_actor_obs,
18 | num_critic_obs,
19 | num_actions,
20 | actor_hidden_dims=[256, 256, 256],
21 | critic_hidden_dims=[256, 256, 256],
22 | activation="elu",
23 | init_noise_std=1.0,
24 | **kwargs,
25 | ):
26 | if kwargs:
27 | print(
28 | "ActorCritic.__init__ got unexpected arguments, which will be ignored: "
29 | + str([key for key in kwargs.keys()])
30 | )
31 | super().__init__()
32 | activation = get_activation(activation)
33 |
34 | mlp_input_dim_a = num_actor_obs
35 | mlp_input_dim_c = num_critic_obs
36 | # Policy
37 | actor_layers = []
38 | actor_layers.append(nn.Linear(mlp_input_dim_a, actor_hidden_dims[0]))
39 | actor_layers.append(activation)
40 | for layer_index in range(len(actor_hidden_dims)):
41 | if layer_index == len(actor_hidden_dims) - 1:
42 | actor_layers.append(nn.Linear(actor_hidden_dims[layer_index], num_actions))
43 | else:
44 | actor_layers.append(nn.Linear(actor_hidden_dims[layer_index], actor_hidden_dims[layer_index + 1]))
45 | actor_layers.append(activation)
46 | self.actor = nn.Sequential(*actor_layers)
47 |
48 | # Value function
49 | critic_layers = []
50 | critic_layers.append(nn.Linear(mlp_input_dim_c, critic_hidden_dims[0]))
51 | critic_layers.append(activation)
52 | for layer_index in range(len(critic_hidden_dims)):
53 | if layer_index == len(critic_hidden_dims) - 1:
54 | critic_layers.append(nn.Linear(critic_hidden_dims[layer_index], 1))
55 | else:
56 | critic_layers.append(nn.Linear(critic_hidden_dims[layer_index], critic_hidden_dims[layer_index + 1]))
57 | critic_layers.append(activation)
58 | self.critic = nn.Sequential(*critic_layers)
59 |
60 | print(f"Actor MLP: {self.actor}")
61 | print(f"Critic MLP: {self.critic}")
62 |
63 | # Action noise
64 | self.std = nn.Parameter(init_noise_std * torch.ones(num_actions))
65 | self.distribution = None
66 | # disable args validation for speedup
67 | Normal.set_default_validate_args = False
68 |
69 | # seems that we get better performance without init
70 | # self.init_memory_weights(self.memory_a, 0.001, 0.)
71 | # self.init_memory_weights(self.memory_c, 0.001, 0.)
72 |
73 | @staticmethod
74 | # not used at the moment
75 | def init_weights(sequential, scales):
76 | [
77 | torch.nn.init.orthogonal_(module.weight, gain=scales[idx])
78 | for idx, module in enumerate(mod for mod in sequential if isinstance(mod, nn.Linear))
79 | ]
80 |
81 | def reset(self, dones=None):
82 | pass
83 |
84 | def forward(self):
85 | raise NotImplementedError
86 |
87 | @property
88 | def action_mean(self):
89 | return self.distribution.mean
90 |
91 | @property
92 | def action_std(self):
93 | return self.distribution.stddev
94 |
95 | @property
96 | def entropy(self):
97 | return self.distribution.entropy().sum(dim=-1)
98 |
99 | def update_distribution(self, observations):
100 | mean = self.actor(observations)
101 | self.distribution = Normal(mean, mean * 0.0 + self.std)
102 |
103 | def act(self, observations, **kwargs):
104 | self.update_distribution(observations)
105 | return self.distribution.sample()
106 |
107 | def get_actions_log_prob(self, actions):
108 | return self.distribution.log_prob(actions).sum(dim=-1)
109 |
110 | def act_inference(self, observations):
111 | actions_mean = self.actor(observations)
112 | return actions_mean
113 |
114 | def evaluate(self, critic_observations, **kwargs):
115 | value = self.critic(critic_observations)
116 | return value
117 |
118 |
119 | def get_activation(act_name):
120 | if act_name == "elu":
121 | return nn.ELU()
122 | elif act_name == "selu":
123 | return nn.SELU()
124 | elif act_name == "relu":
125 | return nn.ReLU()
126 | elif act_name == "crelu":
127 | return nn.CReLU()
128 | elif act_name == "lrelu":
129 | return nn.LeakyReLU()
130 | elif act_name == "tanh":
131 | return nn.Tanh()
132 | elif act_name == "sigmoid":
133 | return nn.Sigmoid()
134 | else:
135 | print("invalid activation function!")
136 | return None
137 |
--------------------------------------------------------------------------------
/rsl_rl/rsl_rl/modules/actor_critic_depth_cnn.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import torch
4 | import torch.nn as nn
5 | from torch.distributions import Normal
6 |
7 | from rsl_rl.modules.actor_critic import get_activation
8 | from rsl_rl.modules.depth_backbone import DepthOnlyFCBackbone, DepthBackbone
9 | from rsl_rl.modules.actor_critic_recurrent import Memory
10 | from rsl_rl.utils import unpad_trajectories
11 |
12 |
13 | class ActorDepthCNN(nn.Module):
14 | def __init__(self,
15 | num_obs_proprio,
16 | obs_depth_shape,
17 | num_actions,
18 | activation,
19 | hidden_dims=[256, 256, 128],
20 | ):
21 | super().__init__()
22 |
23 | self.prop_mlp = nn.Sequential(
24 | nn.Linear(num_obs_proprio, hidden_dims[0]),
25 | activation,
26 | nn.Linear(hidden_dims[0], hidden_dims[1]),
27 | activation,
28 | nn.Linear(hidden_dims[1], hidden_dims[2]),
29 | activation,
30 | )
31 | self.depth_backbone = DepthOnlyFCBackbone(
32 | output_dim=hidden_dims[2],
33 | hidden_dim=hidden_dims[1],
34 | activation=activation,
35 | num_frames=1,
36 | )
37 | # base_backbone = DepthOnlyFCBackbone(
38 | # output_dim=hidden_dims[2],
39 | # hidden_dim=hidden_dims[1],
40 | # activation=activation,
41 | # num_frames=1,
42 | # )
43 | # self.depth_backbone = DepthBackbone(base_backbone, hidden_dims[2], hidden_dims[2])
44 |
45 | self.action_head = nn.Linear(2 * hidden_dims[2], num_actions)
46 |
47 | self.num_obs_proprio = num_obs_proprio
48 | self.obs_depth_shape = obs_depth_shape
49 |
50 | def forward(self, x):
51 | prop_input = x[..., :self.num_obs_proprio]
52 | prop_latent = self.prop_mlp(prop_input)
53 |
54 | depth_input = x[..., self.num_obs_proprio:]
55 | ori_shape = depth_input.shape
56 | depth_input = depth_input.reshape(-1, *self.obs_depth_shape)
57 | depth_latent = self.depth_backbone(depth_input)
58 |
59 | actions = self.action_head(torch.cat((prop_latent, depth_latent), dim=-1))
60 | return actions
61 |
62 | def encode(self, observations):
63 | original_shape = observations.shape
64 |
65 | if observations.dim() == 3:
66 | observations = observations.reshape(-1, original_shape[-1])
67 |
68 | prop_input = observations[..., :self.num_obs_proprio]
69 | prop_latent = self.prop_mlp(prop_input)
70 |
71 | depth_input = observations[..., self.num_obs_proprio:]
72 | depth_input = depth_input.reshape(-1, *self.obs_depth_shape)
73 | depth_latent = self.depth_backbone(depth_input)
74 |
75 | if len(original_shape) == 3:
76 | return torch.cat((prop_latent, depth_latent), dim=-1).reshape(*original_shape[:-1], -1)
77 |
78 | return torch.cat((prop_latent, depth_latent), dim=-1)
79 |
80 | def reset(self, dones=None):
81 | self.depth_backbone.reset(dones)
82 |
83 |
84 | class ActorCriticDepthCNN(nn.Module):
85 | is_recurrent = False
86 |
87 | def __init__(
88 | self,
89 | num_actor_obs,
90 | num_critic_obs,
91 | num_actions,
92 | num_actor_obs_prop=48,
93 | obs_depth_shape=(15, 15),
94 | actor_hidden_dims=[256, 256, 128],
95 | critic_hidden_dims=[256, 256, 128],
96 | activation="elu",
97 | init_noise_std=1.0,
98 | **kwargs,
99 | ):
100 | if kwargs:
101 | print(
102 | "ActorCriticDepth.__init__ got unexpected arguments, which will be ignored: " + str(kwargs.keys()),
103 | )
104 | super().__init__()
105 | activation = get_activation(activation)
106 | mlp_input_dim_c = num_critic_obs
107 |
108 | # Policy Function
109 | self.actor = ActorDepthCNN(num_actor_obs_prop, obs_depth_shape, num_actions, activation, actor_hidden_dims)
110 |
111 | # Value function
112 | critic_layers = []
113 | critic_layers.append(nn.Linear(mlp_input_dim_c, critic_hidden_dims[0]))
114 | critic_layers.append(activation)
115 | for layer_index in range(len(critic_hidden_dims)):
116 | if layer_index == len(critic_hidden_dims) - 1:
117 | critic_layers.append(nn.Linear(critic_hidden_dims[layer_index], 1))
118 | else:
119 | critic_layers.append(nn.Linear(critic_hidden_dims[layer_index], critic_hidden_dims[layer_index + 1]))
120 | critic_layers.append(activation)
121 | self.critic = nn.Sequential(*critic_layers)
122 |
123 | print(f"Actor MLP+CNN: {self.actor}")
124 | print(f"Critic MLP: {self.critic}")
125 |
126 | # Action noise
127 | self.std = nn.Parameter(init_noise_std * torch.ones(num_actions))
128 | self.distribution = None
129 | # disable args validation for speedup
130 | Normal.set_default_validate_args = False
131 |
132 | def reset(self, dones=None):
133 | pass
134 |
135 | def forward(self):
136 | raise NotImplementedError
137 |
138 | @property
139 | def action_mean(self):
140 | return self.distribution.mean
141 |
142 | @property
143 | def action_std(self):
144 | return self.distribution.stddev
145 |
146 | @property
147 | def entropy(self):
148 | return self.distribution.entropy().sum(dim=-1)
149 |
150 | def update_distribution(self, observations):
151 | mean = self.actor(observations)
152 | self.distribution = Normal(mean, mean * 0.0 + self.std)
153 |
154 | def act(self, observations, **kwargs):
155 | self.update_distribution(observations)
156 | return self.distribution.sample()
157 |
158 | def get_actions_log_prob(self, actions):
159 | return self.distribution.log_prob(actions).sum(dim=-1)
160 |
161 | def act_inference(self, observations):
162 | actions_mean = self.actor(observations)
163 | return actions_mean
164 |
165 | def evaluate(self, critic_observations, **kwargs):
166 | value = self.critic(critic_observations)
167 | return value
168 |
169 | def act_hidden(self, hidden_states):
170 | mean = self.actor.action_head(hidden_states)
171 | self.distribution = Normal(mean, mean * 0.0 + self.std)
172 | return self.distribution.sample()
173 |
174 | def act_hidden_inference(self, hidden_states):
175 | actions_mean = self.actor.action_head(hidden_states)
176 | return actions_mean
177 |
178 | def evaluate_hidden(self, hidden_states):
179 | return self.critic.value_head(hidden_states)
180 |
181 | def get_hidden_states(self):
182 | return self.actor.depth_backbone.hidden_states, self.actor.depth_backbone.hidden_states
183 |
184 |
185 |
186 | class ActorCriticDepthCNNRecurrent(ActorCriticDepthCNN):
187 | is_recurrent = True
188 |
189 | def __init__(
190 | self,
191 | num_actor_obs,
192 | num_critic_obs,
193 | num_actions,
194 | num_actor_obs_prop=48,
195 | num_critic_obs_prop=48,
196 | obs_depth_shape=(15, 15),
197 | actor_hidden_dims=[256, 256, 128],
198 | critic_hidden_dims=[256, 256, 128],
199 | activation="elu",
200 | rnn_type="lstm",
201 | rnn_input_size=256,
202 | rnn_hidden_size=256,
203 | rnn_num_layers=1,
204 | init_noise_std=1.0,
205 | **kwargs,
206 | ):
207 | if kwargs:
208 | print(
209 | "ActorCriticDepthCNNRecurrent.__init__ got unexpected arguments, which will be ignored: " + str(kwargs.keys()),
210 | )
211 |
212 | super().__init__(
213 | num_actor_obs=num_actor_obs,
214 | num_critic_obs=num_critic_obs,
215 | num_actions=num_actions,
216 | num_actor_obs_prop=num_actor_obs_prop,
217 | num_critic_obs_prop=num_critic_obs_prop,
218 | obs_depth_shape=obs_depth_shape,
219 | actor_hidden_dims=actor_hidden_dims,
220 | critic_hidden_dims=critic_hidden_dims,
221 | activation=activation,
222 | init_noise_std=init_noise_std,
223 | )
224 |
225 | self.memory_a = Memory(rnn_input_size, type=rnn_type, num_layers=rnn_num_layers, hidden_size=rnn_hidden_size)
226 | self.memory_c = Memory(rnn_input_size, type=rnn_type, num_layers=rnn_num_layers, hidden_size=rnn_hidden_size)
227 |
228 | print(f"Actor RNN: {self.memory_a}")
229 | print(f"Critic RNN: {self.memory_c}")
230 |
231 | def reset(self, dones=None):
232 | self.memory_a.reset(dones)
233 | self.memory_c.reset(dones)
234 |
235 | def act(self, observations, masks=None, hidden_states=None):
236 | observations = self.actor.encode(observations)
237 | input_a = self.memory_a(observations, masks, hidden_states)
238 | return super().act_hidden(input_a.squeeze(0))
239 |
240 | def act_inference(self, observations):
241 | observations = self.actor.encode(observations)
242 | input_a = self.memory_a(observations)
243 | return super().act_hidden_inference(input_a.squeeze(0))
244 |
245 | def evaluate(self, critic_observations, masks=None, hidden_states=None):
246 | critic_observations = self.critic.encode(critic_observations)
247 | input_c = self.memory_c(critic_observations, masks, hidden_states)
248 | return super().evaluate_hidden(input_c.squeeze(0))
249 |
250 | def get_hidden_states(self):
251 | return self.memory_a.hidden_states, self.memory_c.hidden_states
252 |
--------------------------------------------------------------------------------
/rsl_rl/rsl_rl/modules/actor_critic_history.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import torch
4 | import torch.nn as nn
5 | from torch.distributions import Normal
6 |
7 | from rsl_rl.modules.actor_critic import get_activation
8 |
9 |
10 | class ActorHistory(nn.Module):
11 | def __init__(
12 | self,
13 | num_actor_obs,
14 | num_actions,
15 | activation,
16 | actor_hidden_dims=[256, 256, 256],
17 | ):
18 | super().__init__()
19 |
20 | mlp_input_dim_a = num_actor_obs
21 | # Policy (use mlp for now)
22 | actor_layers = []
23 | actor_layers.append(nn.Linear(mlp_input_dim_a, actor_hidden_dims[0]))
24 | actor_layers.append(activation)
25 | for layer_index in range(len(actor_hidden_dims)):
26 | if layer_index == len(actor_hidden_dims) - 1:
27 | actor_layers.append(nn.Linear(actor_hidden_dims[layer_index], num_actions))
28 | else:
29 | actor_layers.append(nn.Linear(actor_hidden_dims[layer_index], actor_hidden_dims[layer_index + 1]))
30 | actor_layers.append(activation)
31 | self.model = nn.Sequential(*actor_layers)
32 |
33 | def forward(self, x):
34 | # x is of shape (batch_size, history_len, num_single_observations)
35 | x = x.view(*x.shape[:-2], -1)
36 | return self.model(x)
37 |
38 |
39 | class CriticHistory(nn.Module):
40 | def __init__(
41 | self,
42 | num_critic_obs,
43 | activation,
44 | critic_hidden_dims=[256, 256, 256],
45 | ):
46 | super().__init__()
47 |
48 | mlp_input_dim_c = num_critic_obs
49 | # Value function (use mlp for now)
50 | critic_layers = []
51 | critic_layers.append(nn.Linear(mlp_input_dim_c, critic_hidden_dims[0]))
52 | critic_layers.append(activation)
53 | for layer_index in range(len(critic_hidden_dims)):
54 | if layer_index == len(critic_hidden_dims) - 1:
55 | critic_layers.append(nn.Linear(critic_hidden_dims[layer_index], 1))
56 | else:
57 | critic_layers.append(nn.Linear(critic_hidden_dims[layer_index], critic_hidden_dims[layer_index + 1]))
58 | critic_layers.append(activation)
59 | self.model = nn.Sequential(*critic_layers)
60 |
61 | def forward(self, x):
62 | # x is of shape (batch_size, history_len, num_single_observations)
63 | x = x.view(*x.shape[:-2], -1)
64 | return self.model(x)
65 |
66 |
67 | class ActorCriticHistory(nn.Module):
68 | is_recurrent = False
69 |
70 | def __init__(
71 | self,
72 | num_actor_obs,
73 | num_critic_obs,
74 | num_actions,
75 | actor_hidden_dims=[256, 256, 256],
76 | critic_hidden_dims=[256, 256, 256],
77 | activation="elu",
78 | init_noise_std=1.0,
79 | **kwargs,
80 | ):
81 | if kwargs:
82 | print(
83 | "ActorCritic.__init__ got unexpected arguments, which will be ignored: "
84 | + str([key for key in kwargs.keys()])
85 | )
86 | super().__init__()
87 | activation = get_activation(activation)
88 |
89 | self.actor = ActorHistory(
90 | num_actor_obs=num_actor_obs,
91 | num_actions=num_actions,
92 | actor_hidden_dims=actor_hidden_dims,
93 | activation=activation,
94 | )
95 | self.critic = CriticHistory(
96 | num_critic_obs=num_critic_obs,
97 | critic_hidden_dims=critic_hidden_dims,
98 | activation=activation,
99 | )
100 |
101 | print(f"Actor MLP: {self.actor}")
102 | print(f"Critic MLP: {self.critic}")
103 |
104 | # Action noise
105 | self.std = nn.Parameter(init_noise_std * torch.ones(num_actions))
106 | self.distribution = None
107 | # disable args validation for speedup
108 | Normal.set_default_validate_args = False
109 |
110 | def reset(self, dones=None):
111 | pass
112 |
113 | def forward(self):
114 | raise NotImplementedError
115 |
116 | @property
117 | def action_mean(self):
118 | return self.distribution.mean
119 |
120 | @property
121 | def action_std(self):
122 | return self.distribution.stddev
123 |
124 | @property
125 | def entropy(self):
126 | return self.distribution.entropy().sum(dim=-1)
127 |
128 | def update_distribution(self, observations):
129 | mean = self.actor(observations)
130 | self.distribution = Normal(mean, mean * 0.0 + self.std)
131 |
132 | def act(self, observations, **kwargs):
133 | self.update_distribution(observations)
134 | return self.distribution.sample()
135 |
136 | def get_actions_log_prob(self, actions):
137 | return self.distribution.log_prob(actions).sum(dim=-1)
138 |
139 | def act_inference(self, observations):
140 | actions_mean = self.actor(observations)
141 | return actions_mean
142 |
143 | def evaluate(self, critic_observations, **kwargs):
144 | value = self.critic(critic_observations)
145 | return value
--------------------------------------------------------------------------------
/rsl_rl/rsl_rl/modules/actor_critic_recurrent.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION
2 | # SPDX-License-Identifier: BSD-3-Clause
3 |
4 | from __future__ import annotations
5 |
6 | import torch
7 | import torch.nn as nn
8 |
9 | from rsl_rl.modules.actor_critic import ActorCritic, get_activation
10 | from rsl_rl.utils import unpad_trajectories
11 |
12 |
13 | class ActorCriticRecurrent(ActorCritic):
14 | is_recurrent = True
15 |
16 | def __init__(
17 | self,
18 | num_actor_obs,
19 | num_critic_obs,
20 | num_actions,
21 | actor_hidden_dims=[256, 256, 256],
22 | critic_hidden_dims=[256, 256, 256],
23 | activation="elu",
24 | rnn_type="lstm",
25 | rnn_hidden_size=256,
26 | rnn_num_layers=1,
27 | init_noise_std=1.0,
28 | **kwargs,
29 | ):
30 | if kwargs:
31 | print(
32 | "ActorCriticRecurrent.__init__ got unexpected arguments, which will be ignored: " + str(kwargs.keys()),
33 | )
34 |
35 | super().__init__(
36 | num_actor_obs=rnn_hidden_size,
37 | num_critic_obs=rnn_hidden_size,
38 | num_actions=num_actions,
39 | actor_hidden_dims=actor_hidden_dims,
40 | critic_hidden_dims=critic_hidden_dims,
41 | activation=activation,
42 | init_noise_std=init_noise_std,
43 | )
44 |
45 | activation = get_activation(activation)
46 |
47 | self.memory_a = Memory(num_actor_obs, type=rnn_type, num_layers=rnn_num_layers, hidden_size=rnn_hidden_size)
48 | self.memory_c = Memory(num_critic_obs, type=rnn_type, num_layers=rnn_num_layers, hidden_size=rnn_hidden_size)
49 |
50 | print(f"Actor RNN: {self.memory_a}")
51 | print(f"Critic RNN: {self.memory_c}")
52 |
53 | def reset(self, dones=None):
54 | self.memory_a.reset(dones)
55 | self.memory_c.reset(dones)
56 |
57 | def act(self, observations, masks=None, hidden_states=None):
58 | input_a = self.memory_a(observations, masks, hidden_states)
59 | return super().act(input_a.squeeze(0))
60 |
61 | def act_inference(self, observations):
62 | input_a = self.memory_a(observations)
63 | return super().act_inference(input_a.squeeze(0))
64 |
65 | def evaluate(self, critic_observations, masks=None, hidden_states=None):
66 | input_c = self.memory_c(critic_observations, masks, hidden_states)
67 | return super().evaluate(input_c.squeeze(0))
68 |
69 | def get_hidden_states(self):
70 | return self.memory_a.hidden_states, self.memory_c.hidden_states
71 |
72 |
73 | class Memory(torch.nn.Module):
74 | def __init__(self, input_size, type="lstm", num_layers=1, hidden_size=256):
75 | super().__init__()
76 | # RNN
77 | rnn_cls = nn.GRU if type.lower() == "gru" else nn.LSTM
78 | self.rnn = rnn_cls(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)
79 | self.hidden_states = None
80 |
81 | def forward(self, input, masks=None, hidden_states=None):
82 | batch_mode = masks is not None
83 | if batch_mode:
84 | # batch mode (policy update): need saved hidden states
85 | if hidden_states is None:
86 | raise ValueError("Hidden states not passed to memory module during policy update")
87 | out, _ = self.rnn(input, hidden_states)
88 | out = unpad_trajectories(out, masks)
89 | else:
90 | # inference mode (collection): use hidden states of last step
91 | out, self.hidden_states = self.rnn(input.unsqueeze(0), self.hidden_states)
92 | return out
93 |
94 | def reset(self, dones=None):
95 | # When the RNN is an LSTM, self.hidden_states_a is a list with hidden_state and cell_state
96 | for hidden_state in self.hidden_states:
97 | hidden_state[..., dones, :] = 0.0
98 |
--------------------------------------------------------------------------------
/rsl_rl/rsl_rl/modules/depth_backbone.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 |
5 | class DepthBackbone(nn.Module):
6 | def __init__(self, base_backbone, rnn_hidden_dim, output_dim) -> None:
7 | super().__init__()
8 | last_activation = nn.Tanh()
9 | self.base_backbone = base_backbone
10 | self.rnn = nn.GRU(
11 | input_size=base_backbone.output_dim,
12 | hidden_size=rnn_hidden_dim,
13 | num_layers=1,
14 | )
15 | self.output_mlp = nn.Sequential(
16 | nn.Linear(rnn_hidden_dim, output_dim),
17 | last_activation
18 | )
19 | self.hidden_states = None
20 |
21 | def forward(self, depth_input, ori_shape, masks=None, hidden_states=None):
22 | depth_latent = self.base_backbone(depth_input)
23 | # depth_latent = self.base_backbone(depth_image)
24 | batch_mode = masks is not None
25 | if batch_mode:
26 | # batch mode (policy update): need saved hidden states
27 | if hidden_states is None:
28 | raise ValueError("Hidden states not passed to memory module during policy update")
29 | depth_latent = depth_latent.view(*ori_shape[:2], -1)
30 | out, _ = self.rnn(depth_latent, hidden_states)
31 | else:
32 | # inference mode (collection): use hidden states of last step
33 | out, self.hidden_states = self.rnn(depth_latent.unsqueeze(0), self.hidden_states)
34 | out = out.squeeze(0)
35 | out = self.output_mlp(out.squeeze(1))
36 |
37 | return out
38 |
39 | def detach_hidden_states(self):
40 | self.hidden_states = self.hidden_states.detach().clone()
41 |
42 | def reset(self, dones):
43 | self.hidden_states[..., dones, :] = 0.0
44 |
45 |
46 | class DepthOnlyFCBackbone(nn.Module):
47 | def __init__(self, output_dim, hidden_dim, activation, num_frames=1):
48 | super().__init__()
49 |
50 | self.num_frames = num_frames
51 | self.output_dim = output_dim
52 | self.image_compression = nn.Sequential(
53 | # [1, 24, 32]
54 | nn.Conv2d(in_channels=self.num_frames, out_channels=16, kernel_size=5),
55 | # [16, 20, 28]
56 | nn.MaxPool2d(kernel_size=2, stride=2),
57 | # [16, 10, 14]
58 | activation,
59 | nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3),
60 | # [32, 8, 12]
61 | nn.MaxPool2d(kernel_size=2, stride=2),
62 | # [32, 4, 6]
63 | activation,
64 | nn.Flatten(),
65 |
66 | nn.Linear(32 * 4 * 6, hidden_dim),
67 | activation,
68 | nn.Linear(hidden_dim, output_dim),
69 | activation
70 | )
71 |
72 | def forward(self, images: torch.Tensor):
73 | latent = self.image_compression(images.unsqueeze(1))
74 |
75 | return latent
--------------------------------------------------------------------------------
/rsl_rl/rsl_rl/modules/normalizer.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 Preferred Networks, Inc.
2 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION
3 | # SPDX-License-Identifier: BSD-3-Clause
4 |
5 | from __future__ import annotations
6 |
7 | import torch
8 | from torch import nn
9 |
10 |
11 | class EmpiricalNormalization(nn.Module):
12 | """Normalize mean and variance of values based on empirical values."""
13 |
14 | def __init__(self, shape, eps=1e-2, until=None):
15 | """Initialize EmpiricalNormalization module.
16 |
17 | Args:
18 | shape (int or tuple of int): Shape of input values except batch axis.
19 | eps (float): Small value for stability.
20 | until (int or None): If this arg is specified, the link learns input values until the sum of batch sizes
21 | exceeds it.
22 | """
23 | super().__init__()
24 | self.eps = eps
25 | self.until = until
26 | self.register_buffer("_mean", torch.zeros(shape).unsqueeze(0))
27 | self.register_buffer("_var", torch.ones(shape).unsqueeze(0))
28 | self.register_buffer("_std", torch.ones(shape).unsqueeze(0))
29 | self.count = 0
30 |
31 | @property
32 | def mean(self):
33 | return self._mean.squeeze(0).clone()
34 |
35 | @property
36 | def std(self):
37 | return self._std.squeeze(0).clone()
38 |
39 | def forward(self, x):
40 | """Normalize mean and variance of values based on empirical values.
41 |
42 | Args:
43 | x (ndarray or Variable): Input values
44 |
45 | Returns:
46 | ndarray or Variable: Normalized output values
47 | """
48 |
49 | if self.training:
50 | self.update(x)
51 | return (x - self._mean) / (self._std + self.eps)
52 |
53 | @torch.jit.unused
54 | def update(self, x):
55 | """Learn input values without computing the output values of them"""
56 |
57 | if self.until is not None and self.count >= self.until:
58 | return
59 |
60 | count_x = x.shape[0]
61 | self.count += count_x
62 | rate = count_x / self.count
63 |
64 | var_x = torch.var(x, dim=0, unbiased=False, keepdim=True)
65 | mean_x = torch.mean(x, dim=0, keepdim=True)
66 | delta_mean = mean_x - self._mean
67 | self._mean += rate * delta_mean
68 | self._var += rate * (var_x - self._var + delta_mean * (mean_x - self._mean))
69 | self._std = torch.sqrt(self._var)
70 |
71 | @torch.jit.unused
72 | def inverse(self, y):
73 | return y * (self._std + self.eps) + self._mean
74 |
--------------------------------------------------------------------------------
/rsl_rl/rsl_rl/runners/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION
2 | # SPDX-License-Identifier: BSD-3-Clause
3 |
4 | """Implementation of runners for environment-agent interaction."""
5 |
6 | from .on_policy_runner import OnPolicyRunner
7 | from .on_policy_runner_history import OnPolicyRunnerHistory
8 |
9 | __all__ = ["OnPolicyRunner", "OnPolicyRunnerHistory"]
10 |
--------------------------------------------------------------------------------
/rsl_rl/rsl_rl/storage/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION
2 | # SPDX-License-Identifier: BSD-3-Clause
3 |
4 | """Implementation of transitions storage for RL-agent."""
5 |
6 | from .rollout_storage import RolloutStorage
7 |
8 | __all__ = ["RolloutStorage"]
9 |
--------------------------------------------------------------------------------
/rsl_rl/rsl_rl/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION
2 | # SPDX-License-Identifier: BSD-3-Clause
3 |
4 | """Helper functions."""
5 |
6 | from .utils import split_and_pad_trajectories, store_code_state, unpad_trajectories
7 |
--------------------------------------------------------------------------------
/rsl_rl/rsl_rl/utils/neptune_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION
2 | # SPDX-License-Identifier: BSD-3-Clause
3 |
4 | from __future__ import annotations
5 |
6 | import os
7 | from dataclasses import asdict
8 | from torch.utils.tensorboard import SummaryWriter
9 |
10 | try:
11 | import neptune
12 | except ModuleNotFoundError:
13 | raise ModuleNotFoundError("neptune-client is required to log to Neptune.")
14 |
15 |
16 | class NeptuneLogger:
17 | def __init__(self, project, token):
18 | self.run = neptune.init_run(project=project, api_token=token)
19 |
20 | def store_config(self, env_cfg, runner_cfg, alg_cfg, policy_cfg):
21 | self.run["runner_cfg"] = runner_cfg
22 | self.run["policy_cfg"] = policy_cfg
23 | self.run["alg_cfg"] = alg_cfg
24 | self.run["env_cfg"] = asdict(env_cfg)
25 |
26 |
27 | class NeptuneSummaryWriter(SummaryWriter):
28 | """Summary writer for Neptune."""
29 |
30 | def __init__(self, log_dir: str, flush_secs: int, cfg):
31 | super().__init__(log_dir, flush_secs)
32 |
33 | try:
34 | project = cfg["neptune_project"]
35 | except KeyError:
36 | raise KeyError("Please specify neptune_project in the runner config, e.g. legged_gym.")
37 |
38 | try:
39 | token = os.environ["NEPTUNE_API_TOKEN"]
40 | except KeyError:
41 | raise KeyError(
42 | "Neptune api token not found. Please run or add to ~/.bashrc: export NEPTUNE_API_TOKEN=YOUR_API_TOKEN"
43 | )
44 |
45 | try:
46 | entity = os.environ["NEPTUNE_USERNAME"]
47 | except KeyError:
48 | raise KeyError(
49 | "Neptune username not found. Please run or add to ~/.bashrc: export NEPTUNE_USERNAME=YOUR_USERNAME"
50 | )
51 |
52 | neptune_project = entity + "/" + project
53 |
54 | self.neptune_logger = NeptuneLogger(neptune_project, token)
55 |
56 | self.name_map = {
57 | "Train/mean_reward/time": "Train/mean_reward_time",
58 | "Train/mean_episode_length/time": "Train/mean_episode_length_time",
59 | }
60 |
61 | run_name = os.path.split(log_dir)[-1]
62 |
63 | self.neptune_logger.run["log_dir"].log(run_name)
64 |
65 | def _map_path(self, path):
66 | if path in self.name_map:
67 | return self.name_map[path]
68 | else:
69 | return path
70 |
71 | def add_scalar(self, tag, scalar_value, global_step=None, walltime=None, new_style=False):
72 | super().add_scalar(
73 | tag,
74 | scalar_value,
75 | global_step=global_step,
76 | walltime=walltime,
77 | new_style=new_style,
78 | )
79 | self.neptune_logger.run[self._map_path(tag)].log(scalar_value, step=global_step)
80 |
81 | def stop(self):
82 | self.neptune_logger.run.stop()
83 |
84 | def log_config(self, env_cfg, runner_cfg, alg_cfg, policy_cfg):
85 | self.neptune_logger.store_config(env_cfg, runner_cfg, alg_cfg, policy_cfg)
86 |
87 | def save_model(self, model_path, iter):
88 | self.neptune_logger.run["model/saved_model_" + str(iter)].upload(model_path)
89 |
90 | def save_file(self, path, iter=None):
91 | name = path.rsplit("/", 1)[-1].split(".")[0]
92 | self.neptune_logger.run["git_diff/" + name].upload(path)
93 |
--------------------------------------------------------------------------------
/rsl_rl/rsl_rl/utils/utils.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION
2 | # SPDX-License-Identifier: BSD-3-Clause
3 |
4 | from __future__ import annotations
5 |
6 | import git
7 | import os
8 | import pathlib
9 | import torch
10 |
11 |
12 | def split_and_pad_trajectories(tensor, dones):
13 | """Splits trajectories at done indices. Then concatenates them and pads with zeros up to the length og the longest trajectory.
14 | Returns masks corresponding to valid parts of the trajectories
15 | Example:
16 | Input: [ [a1, a2, a3, a4 | a5, a6],
17 | [b1, b2 | b3, b4, b5 | b6]
18 | ]
19 |
20 | Output:[ [a1, a2, a3, a4], | [ [True, True, True, True],
21 | [a5, a6, 0, 0], | [True, True, False, False],
22 | [b1, b2, 0, 0], | [True, True, False, False],
23 | [b3, b4, b5, 0], | [True, True, True, False],
24 | [b6, 0, 0, 0] | [True, False, False, False],
25 | ] | ]
26 |
27 | Assumes that the inputy has the following dimension order: [time, number of envs, additional dimensions]
28 | """
29 | dones = dones.clone()
30 | dones[-1] = 1
31 | # Permute the buffers to have order (num_envs, num_transitions_per_env, ...), for correct reshaping
32 | flat_dones = dones.transpose(1, 0).reshape(-1, 1)
33 |
34 | # Get length of trajectory by counting the number of successive not done elements
35 | done_indices = torch.cat((flat_dones.new_tensor([-1], dtype=torch.int64), flat_dones.nonzero()[:, 0]))
36 | trajectory_lengths = done_indices[1:] - done_indices[:-1]
37 | trajectory_lengths_list = trajectory_lengths.tolist()
38 | # Extract the individual trajectories
39 | trajectories = torch.split(tensor.transpose(1, 0).flatten(0, 1), trajectory_lengths_list)
40 | # add at least one full length trajectory
41 | trajectories = trajectories + (torch.zeros(tensor.shape[0], tensor.shape[-1], device=tensor.device),)
42 | # pad the trajectories to the length of the longest trajectory
43 | padded_trajectories = torch.nn.utils.rnn.pad_sequence(trajectories)
44 | # remove the added tensor
45 | padded_trajectories = padded_trajectories[:, :-1]
46 |
47 | trajectory_masks = trajectory_lengths > torch.arange(0, tensor.shape[0], device=tensor.device).unsqueeze(1)
48 | return padded_trajectories, trajectory_masks
49 |
50 |
51 | def unpad_trajectories(trajectories, masks):
52 | """Does the inverse operation of split_and_pad_trajectories()"""
53 | # Need to transpose before and after the masking to have proper reshaping
54 | return (
55 | trajectories.transpose(1, 0)[masks.transpose(1, 0)]
56 | .view(-1, trajectories.shape[0], trajectories.shape[-1])
57 | .transpose(1, 0)
58 | )
59 |
60 |
61 | def store_code_state(logdir, repositories) -> list:
62 | git_log_dir = os.path.join(logdir, "git")
63 | os.makedirs(git_log_dir, exist_ok=True)
64 | file_paths = []
65 | for repository_file_path in repositories:
66 | try:
67 | repo = git.Repo(repository_file_path, search_parent_directories=True)
68 | except Exception:
69 | print(f"Could not find git repository in {repository_file_path}. Skipping.")
70 | # skip if not a git repository
71 | continue
72 | # get the name of the repository
73 | repo_name = pathlib.Path(repo.working_dir).name
74 | t = repo.head.commit.tree
75 | diff_file_name = os.path.join(git_log_dir, f"{repo_name}.diff")
76 | # check if the diff file already exists
77 | if os.path.isfile(diff_file_name):
78 | continue
79 | # write the diff file
80 | print(f"Storing git diff for '{repo_name}' in: {diff_file_name}")
81 | with open(diff_file_name, "x", encoding='utf-8') as f:
82 | content = f"--- git status ---\n{repo.git.status()} \n\n\n--- git diff ---\n{repo.git.diff(t)}"
83 | content = content
84 | f.write(content)
85 | # add the file path to the list of files to be uploaded
86 | file_paths.append(diff_file_name)
87 | return file_paths
88 |
--------------------------------------------------------------------------------
/rsl_rl/rsl_rl/utils/wandb_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION
2 | # SPDX-License-Identifier: BSD-3-Clause
3 |
4 | from __future__ import annotations
5 |
6 | import os
7 | from dataclasses import asdict
8 | from torch.utils.tensorboard import SummaryWriter
9 |
10 | try:
11 | import wandb
12 | except ModuleNotFoundError:
13 | raise ModuleNotFoundError("Wandb is required to log to Weights and Biases.")
14 |
15 |
16 | class WandbSummaryWriter(SummaryWriter):
17 | """Summary writer for Weights and Biases."""
18 |
19 | def __init__(self, log_dir: str, flush_secs: int, cfg):
20 | super().__init__(log_dir, flush_secs)
21 |
22 | try:
23 | project = cfg["wandb_project"]
24 | except KeyError:
25 | raise KeyError("Please specify wandb_project in the runner config, e.g. legged_gym.")
26 |
27 | try:
28 | entity = os.environ["WANDB_USERNAME"]
29 | except KeyError:
30 | raise KeyError(
31 | "Wandb username not found. Please run or add to ~/.bashrc: export WANDB_USERNAME=YOUR_USERNAME"
32 | )
33 |
34 | wandb.init(project=project, entity=entity)
35 |
36 | # Change generated name to project-number format
37 | wandb.run.name = project + wandb.run.name.split("-")[-1]
38 |
39 | self.name_map = {
40 | "Train/mean_reward/time": "Train/mean_reward_time",
41 | "Train/mean_episode_length/time": "Train/mean_episode_length_time",
42 | }
43 |
44 | run_name = os.path.split(log_dir)[-1]
45 |
46 | wandb.log({"log_dir": run_name})
47 |
48 | def store_config(self, env_cfg, runner_cfg, alg_cfg, policy_cfg):
49 | wandb.config.update({"runner_cfg": runner_cfg})
50 | wandb.config.update({"policy_cfg": policy_cfg})
51 | wandb.config.update({"alg_cfg": alg_cfg})
52 | wandb.config.update({"env_cfg": asdict(env_cfg)})
53 |
54 | def _map_path(self, path):
55 | if path in self.name_map:
56 | return self.name_map[path]
57 | else:
58 | return path
59 |
60 | def add_scalar(self, tag, scalar_value, global_step=None, walltime=None, new_style=False):
61 | super().add_scalar(
62 | tag,
63 | scalar_value,
64 | global_step=global_step,
65 | walltime=walltime,
66 | new_style=new_style,
67 | )
68 | wandb.log({self._map_path(tag): scalar_value}, step=global_step)
69 |
70 | def stop(self):
71 | wandb.finish()
72 |
73 | def log_config(self, env_cfg, runner_cfg, alg_cfg, policy_cfg):
74 | self.store_config(env_cfg, runner_cfg, alg_cfg, policy_cfg)
75 |
76 | def save_model(self, model_path, iter):
77 | wandb.save(model_path, base_path=os.path.dirname(model_path))
78 |
79 | def save_file(self, path, iter=None):
80 | wandb.save(path, base_path=os.path.dirname(path))
81 |
--------------------------------------------------------------------------------
/rsl_rl/setup.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION
2 | # SPDX-License-Identifier: BSD-3-Clause
3 |
4 | from setuptools import find_packages, setup
5 |
6 | setup(
7 | name="rsl_rl",
8 | version="2.0.2",
9 | packages=find_packages(),
10 | author="ETH Zurich, NVIDIA CORPORATION",
11 | maintainer="Nikita Rudin, David Hoeller",
12 | maintainer_email="rudinn@ethz.ch",
13 | url="https://github.com/leggedrobotics/rsl_rl",
14 | license="BSD-3",
15 | description="Fast and simple RL algorithms implemented in pytorch",
16 | python_requires=">=3.6",
17 | install_requires=[
18 | "torch>=1.10.0",
19 | "torchvision>=0.5.0",
20 | "numpy>=1.16.4",
21 | "GitPython",
22 | "onnx",
23 | ],
24 | )
25 |
--------------------------------------------------------------------------------
/scripts/cli_args.py:
--------------------------------------------------------------------------------
1 | # From Isaac Lab's standalone package
2 | # Copyright (c) 2022-2024, The Isaac Lab Project Developers.
3 | # All rights reserved.
4 | #
5 | # SPDX-License-Identifier: BSD-3-Clause
6 |
7 | from __future__ import annotations
8 |
9 | import argparse
10 | from typing import TYPE_CHECKING
11 |
12 | if TYPE_CHECKING:
13 | from omni.isaac.lab_tasks.utils.wrappers.rsl_rl import RslRlOnPolicyRunnerCfg
14 | from omni.isaac.lab.envs import ManagerBasedRLEnvCfg
15 |
16 |
17 | def add_rsl_rl_args(parser: argparse.ArgumentParser):
18 | """Add RSL-RL arguments to the parser.
19 |
20 | Args:
21 | parser: The parser to add the arguments to.
22 | """
23 | # create a new argument group
24 | arg_group = parser.add_argument_group("rsl_rl", description="Arguments for RSL-RL agent.")
25 | # -- experiment arguments
26 | arg_group.add_argument(
27 | "--experiment_name", type=str, default=None, help="Name of the experiment folder where logs will be stored."
28 | )
29 | arg_group.add_argument("--run_name", type=str, default=None, help="Run name suffix to the log directory.")
30 | arg_group.add_argument("--save_interval", type=int, default=None, help="Interval to save the model.")
31 | # -- load arguments
32 | arg_group.add_argument("--resume", type=bool, default=None, help="Whether to resume from a checkpoint.")
33 | arg_group.add_argument("--load_run", type=str, default=None, help="Name of the run folder to resume from.")
34 | arg_group.add_argument("--checkpoint", type=str, default=None, help="Checkpoint file to resume from.")
35 | # -- logger arguments
36 | arg_group.add_argument(
37 | "--logger", type=str, default=None, choices={"wandb", "tensorboard", "neptune"}, help="Logger module to use."
38 | )
39 | arg_group.add_argument(
40 | "--log_project_name", type=str, default=None, help="Name of the logging project when using wandb or neptune."
41 | )
42 |
43 |
44 | def parse_rsl_rl_cfg(task_name, args_cli: argparse.Namespace, play=False) -> RslRlOnPolicyRunnerCfg:
45 | """Parse configuration for RSL-RL agent based on inputs.
46 |
47 | Args:
48 | rslrl_cfg: The default configuration for RSL-RL agent.
49 | args_cli: The command line arguments.
50 |
51 | Returns:
52 | The parsed configuration for RSL-RL agent based on inputs.
53 | """
54 | from omni.isaac.lab_tasks.utils.parse_cfg import load_cfg_from_registry
55 |
56 | # load the default configuration
57 | rslrl_cfg: RslRlOnPolicyRunnerCfg = load_cfg_from_registry(task_name, "rsl_rl_cfg_entry_point")
58 |
59 | # override the default configuration with CLI arguments
60 | if args_cli.seed is not None:
61 | rslrl_cfg.seed = args_cli.seed
62 | if args_cli.resume is not None:
63 | rslrl_cfg.resume = args_cli.resume
64 | if args_cli.load_run is not None:
65 | rslrl_cfg.load_run = args_cli.load_run
66 | if args_cli.checkpoint is not None:
67 | rslrl_cfg.load_checkpoint = args_cli.checkpoint
68 | if args_cli.save_interval is not None:
69 | rslrl_cfg.save_interval = args_cli.save_interval
70 | if args_cli.run_name is not None:
71 | rslrl_cfg.run_name = args_cli.run_name
72 | if args_cli.logger is not None:
73 | rslrl_cfg.logger = args_cli.logger
74 | # set the project name for wandb and neptune
75 | if rslrl_cfg.logger in {"wandb", "neptune"} and args_cli.log_project_name:
76 | rslrl_cfg.wandb_project = args_cli.log_project_name
77 | rslrl_cfg.neptune_project = args_cli.log_project_name
78 | if args_cli.use_cnn is not None:
79 | rslrl_cfg.use_cnn = args_cli.use_cnn
80 | rslrl_cfg.policy.class_name = "ActorCriticDepthCNN"
81 | rslrl_cfg.policy.obs_depth_shape = (24, 32)
82 | if args_cli.use_rnn:
83 | rslrl_cfg.policy.rnn_input_size = 2 * rslrl_cfg.policy.actor_hidden_dims[-1]
84 | rslrl_cfg.policy.rnn_hidden_size = 2 * rslrl_cfg.policy.actor_hidden_dims[-1]
85 | if not args_cli.use_cnn:
86 | rslrl_cfg.policy.class_name = "ActorCriticRecurrent"
87 | else:
88 | rslrl_cfg.policy.class_name = "ActorCriticDepthCNNRecurrent"
89 |
90 | rslrl_cfg.policy.history_length = args_cli.history_length
91 |
92 | return rslrl_cfg
93 |
--------------------------------------------------------------------------------
/scripts/play.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022-2024, The lab Project Developers.
2 | # All rights reserved.
3 | #
4 | # SPDX-License-Identifier: BSD-3-Clause
5 |
6 | """Script to play a checkpoint if an RL agent from RSL-RL."""
7 |
8 | """Launch Isaac Sim Simulator first."""
9 |
10 | import argparse
11 | import subprocess
12 |
13 | from omni.isaac.lab.app import AppLauncher
14 |
15 | # local imports
16 | import cli_args # isort: skip
17 |
18 | # add argparse arguments
19 | parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.")
20 | # parser.add_argument("--cpu", action="store_true", default=False, help="Use CPU pipeline.")
21 | parser.add_argument(
22 | "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
23 | )
24 | parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
25 | parser.add_argument("--task", type=str, default=None, help="Name of the task.")
26 | parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
27 |
28 | parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
29 | parser.add_argument("--video_length", type=int, default=2000, help="Length of the recorded video (in steps).")
30 | parser.add_argument("--use_cnn", action="store_true", default=None, help="Name of the run folder to resume from.")
31 | parser.add_argument("--arm_fixed", action="store_true", default=False, help="Fix the robot's arms.")
32 | parser.add_argument("--use_rnn", action="store_true", default=False, help="Use RNN in the actor-critic model.")
33 | parser.add_argument("--history_length", default=0, type=int, help="Length of history buffer.")
34 |
35 | # append RSL-RL cli arguments
36 | cli_args.add_rsl_rl_args(parser)
37 | # append AppLauncher cli args
38 | AppLauncher.add_app_launcher_args(parser)
39 | args_cli = parser.parse_args()
40 |
41 | # launch omniverse app
42 | app_launcher = AppLauncher(args_cli)
43 | simulation_app = app_launcher.app
44 |
45 | """Rest everything follows."""
46 |
47 | import gymnasium as gym
48 | import os
49 | import math
50 | import torch
51 | import imageio
52 |
53 | from rsl_rl.runners import OnPolicyRunner
54 |
55 | from omni.isaac.lab.envs import ManagerBasedRLEnvCfg
56 | from omni.isaac.lab.utils.io import load_yaml
57 | from omni.isaac.lab.utils import update_class_from_dict
58 |
59 | import omni.isaac.lab_tasks # noqa: F401
60 | from omni.isaac.lab_tasks.utils import get_checkpoint_path, parse_env_cfg
61 | from omni.isaac.lab_tasks.utils.wrappers.rsl_rl import (
62 | RslRlOnPolicyRunnerCfg,
63 | RslRlVecEnvWrapper,
64 | export_policy_as_jit,
65 | )
66 |
67 | from omni.isaac.leggedloco.config import *
68 | from omni.isaac.leggedloco.utils import RslRlVecEnvHistoryWrapper
69 |
70 | from utils import quat2eulers
71 |
72 |
73 | def main():
74 | """Train with RSL-RL agent."""
75 | # parse configuration
76 | # env_cfg: ManagerBasedRLEnvCfg = parse_env_cfg(
77 | # args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs, use_fabric=not args_cli.disable_fabric
78 | # )
79 | env_cfg = parse_env_cfg(args_cli.task, num_envs=args_cli.num_envs)
80 | agent_cfg: RslRlOnPolicyRunnerCfg = cli_args.parse_rsl_rl_cfg(args_cli.task, args_cli)
81 |
82 | # specify directory for logging experiments
83 | log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
84 | log_root_path = os.path.abspath(log_root_path)
85 | # print(f"[INFO] Logging experiment in directory: {log_root_path}")
86 | # specify directory for logging runs: {time-stamp}_{run_name}
87 | log_dir = os.path.join(log_root_path, args_cli.load_run)
88 | print(f"[INFO] Loading run from directory: {log_dir}")
89 |
90 | # update agent config with the one from the loaded run
91 | log_agent_cfg_file_path = os.path.join(log_dir, "params", "agent.yaml")
92 | assert os.path.exists(log_agent_cfg_file_path), f"Agent config file not found: {log_agent_cfg_file_path}"
93 | log_agent_cfg_dict = load_yaml(log_agent_cfg_file_path)
94 | update_class_from_dict(agent_cfg, log_agent_cfg_dict)
95 |
96 |
97 | # create isaac environment
98 | env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
99 | # wrap around environment for rsl-rl
100 | if args_cli.history_length > 0:
101 | env = RslRlVecEnvHistoryWrapper(env, history_length=args_cli.history_length)
102 | else:
103 | env = RslRlVecEnvWrapper(env)
104 |
105 | # specify directory for logging experiments
106 | log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
107 | log_root_path = os.path.abspath(log_root_path)
108 | print(f"[INFO] Loading experiment from directory: {log_root_path}")
109 | resume_path = get_checkpoint_path(log_root_path, args_cli.load_run, agent_cfg.load_checkpoint)
110 |
111 | # load previously trained model
112 | ppo_runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
113 | ppo_runner.load(resume_path)
114 | print(f"[INFO]: Loading model checkpoint from: {resume_path}")
115 |
116 | # obtain the trained policy for inference
117 | policy = ppo_runner.get_inference_policy(device=env.unwrapped.device)
118 | robot_pos_w = env.unwrapped.scene["robot"].data.root_pos_w[0].detach().cpu().numpy()
119 | cam_eye = (robot_pos_w[0]+2.5, robot_pos_w[1]+2.5, 20)
120 | cam_target = (robot_pos_w[0], robot_pos_w[1], 0.0)
121 | # set the camera view
122 | env.unwrapped.sim.set_camera_view(eye=cam_eye, target=cam_target)
123 |
124 | # export policy to onnx
125 | export_model_dir = os.path.join(os.path.dirname(resume_path), "exported")
126 | # export_policy_as_onnx(ppo_runner.alg.actor_critic, export_model_dir, filename="policy.onnx")
127 | export_policy_as_jit(ppo_runner.alg.actor_critic, None, path=export_model_dir, filename="policy.jit")
128 | # reset environment
129 | obs, _ = env.get_observations()
130 | if args_cli.video:
131 | base_env = env.unwrapped
132 | init_frame = base_env.render()
133 | frames = [init_frame]
134 |
135 | robot_pos_w = env.unwrapped.scene["robot"].data.root_pos_w[0].detach().cpu().numpy()
136 | cam_eye = (robot_pos_w[0]+2.5, robot_pos_w[1]+2.5, 5)
137 | cam_target = (robot_pos_w[0], robot_pos_w[1], 0.0)
138 | # set the camera view
139 | env.unwrapped.sim.set_camera_view(eye=cam_eye, target=cam_target)
140 |
141 | # simulate environment
142 | while simulation_app.is_running():
143 | # run everything in inference mode
144 | with torch.inference_mode():
145 | # agent stepping
146 | actions = policy(obs)
147 | # env stepping
148 | obs, _, _, infos = env.step(actions)
149 | # import pdb; pdb.set_trace()
150 |
151 | if args_cli.video and len(frames) < args_cli.video_length:
152 | base_env = env.unwrapped
153 | frame = base_env.render()
154 | frames.append(frame)
155 |
156 | robot_pos_w = env.unwrapped.scene["robot"].data.root_pos_w[0].detach().cpu().numpy()
157 | robot_quat_w = env.unwrapped.scene["robot"].data.root_quat_w[0].detach().cpu().numpy()
158 | roll, pitch, yaw = quat2eulers(robot_quat_w[0], robot_quat_w[1], robot_quat_w[2], robot_quat_w[3])
159 | cam_eye = (robot_pos_w[0] + 3.0, robot_pos_w[1] + 3.0, robot_pos_w[2] + 3.0)
160 | cam_target = (robot_pos_w[0], robot_pos_w[1], robot_pos_w[2])
161 | # set the camera view
162 | env.unwrapped.sim.set_camera_view(eye=cam_eye, target=cam_target)
163 |
164 | if args_cli.video and len(frames) == args_cli.video_length:
165 | break
166 |
167 | writer = imageio.get_writer(os.path.join(log_dir, f"{args_cli.load_run}.mp4"), fps=50)
168 | for frame in frames:
169 | writer.append_data(frame)
170 | writer.close()
171 |
172 | # close the simulator
173 | env.close()
174 |
175 |
176 | if __name__ == "__main__":
177 | # run the main function
178 | main()
179 | # close sim app
180 | simulation_app.close()
181 |
--------------------------------------------------------------------------------
/scripts/play_low_matterport_keyboard.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022-2024, The lab Project Developers.
2 | # All rights reserved.
3 | #
4 | # SPDX-License-Identifier: BSD-3-Clause
5 |
6 | """Script to play a checkpoint if an RL agent from RSL-RL."""
7 |
8 | """Launch Isaac Sim Simulator first."""
9 |
10 | import argparse
11 | import gymnasium as gym
12 | import os
13 | import math
14 | import torch
15 | import numpy as np
16 |
17 | from omni.isaac.lab.app import AppLauncher
18 |
19 | # local imports
20 | import cli_args # isort: skip
21 |
22 | # add argparse arguments
23 | parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.")
24 | parser.add_argument("--cpu", action="store_true", default=False, help="Use CPU pipeline.")
25 | parser.add_argument(
26 | "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
27 | )
28 | parser.add_argument("--num_envs", type=int, default=1, help="Number of environments to simulate.")
29 | parser.add_argument("--task", type=str, default="h1_rough", help="Name of the task.")
30 | parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
31 | parser.add_argument("--use_cnn", action="store_true", default=None, help="Name of the run folder to resume from.")
32 | parser.add_argument("--arm_fixed", action="store_true", default=False, help="Fix the robot's arms.")
33 | parser.add_argument("--use_rnn", action="store_true", default=False, help="Use RNN in the actor-critic model.")
34 | parser.add_argument("--history_length", default=0, type=int, help="Length of history buffer.")
35 | # append RSL-RL cli arguments
36 | cli_args.add_rsl_rl_args(parser)
37 | # append AppLauncher cli args
38 | AppLauncher.add_app_launcher_args(parser)
39 | args_cli = parser.parse_args()
40 |
41 | # args_cli.experience=f'{os.environ["EXP_PATH"]}/omni.isaac.sim.python.kit'
42 |
43 | # launch omniverse app
44 | app_launcher = AppLauncher(args_cli)
45 | simulation_app = app_launcher.app
46 |
47 | """Rest everything follows."""
48 |
49 | from rsl_rl.runners import OnPolicyRunner
50 |
51 | import omni.isaac.lab_tasks # noqa: F401
52 | from omni.isaac.lab_tasks.utils import get_checkpoint_path, parse_env_cfg
53 | from omni.isaac.lab_tasks.utils.wrappers.rsl_rl import (
54 | RslRlOnPolicyRunnerCfg,
55 | RslRlVecEnvWrapper,
56 | )
57 |
58 | from omni.isaac.leggedloco.config import *
59 | from omni.isaac.lab.devices.keyboard import Se2Keyboard
60 | from omni.isaac.leggedloco.utils import RslRlVecEnvHistoryWrapper
61 |
62 | import omni.isaac.lab.sim as sim_utils
63 | from omni.isaac.lab.markers import VisualizationMarkers, VisualizationMarkersCfg
64 |
65 | from utils import quat2eulers
66 |
67 |
68 | def define_markers() -> VisualizationMarkers:
69 | """Define path markers with various different shapes."""
70 | marker_cfg = VisualizationMarkersCfg(
71 | prim_path="/Visuals/pathMarkers",
72 | markers={
73 | "waypoint": sim_utils.SphereCfg(
74 | radius=0.1,
75 | visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(0.0, 1.0, 0.0)),
76 | ),
77 | },
78 | )
79 | return VisualizationMarkers(marker_cfg)
80 |
81 |
82 | def main():
83 | """Play with RSL-RL agent."""
84 | # parse configuration
85 | env_cfg = parse_env_cfg(args_cli.task, num_envs=args_cli.num_envs)
86 | agent_cfg: RslRlOnPolicyRunnerCfg = cli_args.parse_rsl_rl_cfg(args_cli.task, args_cli, play=True)
87 |
88 | # create isaac environment
89 | env = gym.make(args_cli.task, cfg=env_cfg)
90 | # wrap around environment for rsl-rl
91 | if args_cli.history_length > 0:
92 | env = RslRlVecEnvHistoryWrapper(env, history_length=args_cli.history_length)
93 | else:
94 | env = RslRlVecEnvWrapper(env)
95 |
96 | # specify directory for logging experiments
97 | log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
98 | log_root_path = os.path.abspath(log_root_path)
99 | print(f"[INFO] Loading experiment from directory: {log_root_path}")
100 | resume_path = get_checkpoint_path(log_root_path, args_cli.load_run, agent_cfg.load_checkpoint)
101 | print(f"[INFO]: Loading model checkpoint from: {resume_path}")
102 |
103 | # load previously trained model
104 | # import pdb; pdb.set_trace()
105 | ppo_runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
106 | ppo_runner.load(resume_path)
107 | print(f"[INFO]: Loading model checkpoint from: {resume_path}")
108 |
109 | # obtain the trained policy for inference
110 | policy = ppo_runner.get_inference_policy(device=env.unwrapped.device)
111 |
112 |
113 | keyboard_interface = Se2Keyboard(v_x_sensitivity = 0.05, v_y_sensitivity = 0.05, omega_z_sensitivity=0.05)
114 | keyboard_interface.reset()
115 |
116 | # reset environment
117 | obs, _ = env.get_observations()
118 | vel_command_keyboard = torch.zeros(3, device = obs.device)
119 | num_count = 0
120 |
121 | robot_pos_w = env.unwrapped.scene["robot"].data.root_pos_w[0].detach().cpu().numpy()
122 | cam_eye = (robot_pos_w[0]+0.0, robot_pos_w[1]+2.0, 1.0)
123 | cam_target = (robot_pos_w[0], robot_pos_w[1], 1.0)
124 | # set the camera view
125 | env.unwrapped.sim.set_camera_view(eye=cam_eye, target=cam_target)
126 | robot_pos_w = env.unwrapped.scene["robot"].data.root_pos_w[0].detach().cpu().numpy()
127 | cam_eye = (robot_pos_w[0]-0.0, robot_pos_w[1]-1.5, 1.2)
128 | robot_quat_w = env.unwrapped.scene["robot"].data.root_quat_w[0].detach().cpu().numpy()
129 | roll, pitch, yaw = quat2eulers(robot_quat_w[0], robot_quat_w[1], robot_quat_w[2], robot_quat_w[3])
130 | cam_eye = (robot_pos_w[0] - 0.8 * math.sin(-yaw), robot_pos_w[1] - 0.8 * math.cos(-yaw), robot_pos_w[2] + 0.8)
131 | cam_target = (robot_pos_w[0], robot_pos_w[1], robot_pos_w[2])
132 |
133 | # set the camera view
134 | # env.unwrapped.sim.set_camera_view(eye=cam_eye, target=cam_target)
135 |
136 | # simulate environment
137 | while simulation_app.is_running():
138 | # run everything in inference mode
139 | with torch.inference_mode():
140 | commands_key = torch.tensor(keyboard_interface.advance(), device=obs.device)
141 | vel_command_keyboard += commands_key
142 | if num_count % 10 == 0:
143 | print("vel_command_keyboard: ", vel_command_keyboard)
144 | # import pdb; pdb.set_trace()
145 | # assert torch.allclose(env.unwrapped.command_manager._terms['base_velocity'].vel_command_b, torch.tensor([0., 0., 0.], device = obs.device))
146 | # env.command_manager._terms['base_velocity'].vel_command_b[0,:] = commands
147 | # obs[:,9:12] = commands_key
148 |
149 | obs[:,6:9] = torch.tensor([vel_command_keyboard[0], vel_command_keyboard[1], vel_command_keyboard[2]], device = obs.device)
150 | env.update_command(torch.tensor([vel_command_keyboard[0], vel_command_keyboard[1], vel_command_keyboard[2]], device = obs.device))
151 | # agent stepping
152 | actions = policy(obs)
153 |
154 | # robot_pos_w = env.unwrapped.scene["robot"].data.root_pos_w[0].detach().cpu().numpy()
155 | # robot_quat_w = env.unwrapped.scene["robot"].data.root_quat_w[0].detach().cpu().numpy()
156 | # print("robot quat: ", robot_quat_w)
157 | # print("robot pos: ", robot_pos_w)
158 |
159 | # roll, pitch, yaw = quat2eulers(robot_quat_w[0], robot_quat_w[1], robot_quat_w[2], robot_quat_w[3])
160 | # cam_eye = (robot_pos_w[0] - 2.0 * math.cos(-yaw), robot_pos_w[1] - 2.0 * math.sin(yaw), robot_pos_w[2] + 1.0)
161 | # cam_target = (robot_pos_w[0], robot_pos_w[1], robot_pos_w[2])
162 | # set the camera view
163 | # env.unwrapped.sim.set_camera_view(eye=cam_eye, target=cam_target)
164 | # env stepping
165 | obs, _, _, infos = env.step(actions)
166 |
167 |
168 | # close the simulator
169 | env.close()
170 |
171 |
172 |
173 |
174 | if __name__ == "__main__":
175 | # run the main function
176 | main()
177 | # close sim app
178 | simulation_app.close()
179 |
--------------------------------------------------------------------------------
/scripts/run_data_collection.py:
--------------------------------------------------------------------------------
1 | import os
2 | import gzip
3 | import json
4 | import argparse
5 | import subprocess
6 |
7 |
8 | if __name__ == "__main__":
9 | parser = argparse.ArgumentParser()
10 | parser.add_argument("--r2r_data_path", type=str, default="/home/zhaojing/h1-training-isaaclab/assets/vln-ce/R2R_VLNCE_v1-3_preprocessed/train/train_filtered.json.gz")
11 | parser.add_argument("--task", type=str, default="go2_matterport_dataset")
12 | parser.add_argument("--resume", action="store_true")
13 | args = parser.parse_args()
14 |
15 | # Define the base arguments for data collection
16 | collection_args = [
17 | f"--task={args.task}",
18 | "--headless",
19 | "--enable_cameras"
20 | ]
21 |
22 | with gzip.open(args.r2r_data_path, 'rt') as file:
23 | data = json.load(file)
24 | episodes = data['episodes']
25 |
26 | start_idx = 0
27 | if args.resume:
28 | # Get already collected episodes from the data directory
29 | data_dir = "collected_data"
30 | if os.path.exists(data_dir):
31 | data_files = os.listdir(data_dir)
32 | data_files = [f for f in data_files if f.endswith(".npy")]
33 | if data_files:
34 | episode_nums = [int(f.split("_")[1].split(".")[0]) for f in data_files]
35 | start_idx = max(episode_nums) + 1
36 | print(f"Resuming from episode {start_idx}.....")
37 |
38 | for i in range(start_idx, len(episodes)):
39 | episode_id = episodes[i]['episode_id']
40 |
41 | # check if the episode has been collected
42 | if os.path.exists(f"collected_data/episode_{episode_id}.npy"):
43 | print(f"Episode {episode_id} already collected. Skipping....")
44 | continue
45 |
46 | # msg = f"\n======================= Collecting Data for Episode {episode_id} ======================="
47 | # # msg += f"\nScene: {episode['scene_id']}"
48 | # # msg += f"\nStart Position: {episode['start_position']}"
49 | # # msg += f"\nInstruction: {episode['instruction']['instruction_text']}\n"
50 | # print(msg)
51 |
52 | # Create episode-specific arguments
53 | episode_args = collection_args + [f"--episode_index={i}"]
54 |
55 | # Run the data collection script
56 | subprocess.run(['python', 'collect_data_matterport.py'] + episode_args)
57 |
58 | # Optional: Add a small delay between episodes
59 | # time.sleep(1)
--------------------------------------------------------------------------------
/scripts/train.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022-2024, The Isaac Lab Project Developers.
2 | # All rights reserved.
3 | #
4 | # SPDX-License-Identifier: BSD-3-Clause
5 |
6 | """Script to train RL agent with RSL-RL."""
7 |
8 | """Launch Isaac Sim Simulator first."""
9 |
10 | import argparse
11 |
12 | from omni.isaac.lab.app import AppLauncher
13 |
14 | # local imports
15 | import cli_args # isort: skip
16 |
17 |
18 | # add argparse arguments
19 | parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.")
20 | parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
21 | parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
22 | parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).")
23 | # parser.add_argument("--cpu", action="store_true", default=False, help="Use CPU pipeline.")
24 | parser.add_argument(
25 | "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
26 | )
27 | parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
28 | parser.add_argument("--task", type=str, default=None, help="Name of the task.")
29 | parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
30 | parser.add_argument("--max_iterations", type=int, default=None, help="RL Policy training iterations.")
31 | parser.add_argument("--use_cnn", action="store_true", default=None, help="Name of the run folder to resume from.")
32 | parser.add_argument("--arm_fixed", action="store_true", default=False, help="Fix the robot's arms.")
33 | parser.add_argument("--use_rnn", action="store_true", default=False, help="Use RNN in the actor-critic model.")
34 | parser.add_argument("--history_length", default=0, type=int, help="Length of history buffer.")
35 |
36 | # append RSL-RL cli arguments
37 | cli_args.add_rsl_rl_args(parser)
38 | # append AppLauncher cli args
39 | AppLauncher.add_app_launcher_args(parser)
40 | args_cli = parser.parse_args()
41 |
42 | # launch omniverse app
43 | app_launcher = AppLauncher(args_cli)
44 | simulation_app = app_launcher.app
45 |
46 | """Rest everything follows."""
47 |
48 | import gymnasium as gym
49 | import os
50 | import torch
51 | from datetime import datetime
52 |
53 | from rsl_rl.runners import OnPolicyRunner
54 |
55 | from omni.isaac.lab.envs import ManagerBasedRLEnvCfg
56 | from omni.isaac.lab.utils.dict import print_dict
57 | from omni.isaac.lab.utils.io import dump_pickle, dump_yaml
58 |
59 | import omni.isaac.lab_tasks # noqa: F401
60 | from omni.isaac.lab_tasks.utils import get_checkpoint_path, parse_env_cfg
61 | from omni.isaac.lab_tasks.utils.wrappers.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlVecEnvWrapper
62 | from omni.isaac.lab_tasks.utils import get_checkpoint_path, parse_env_cfg
63 |
64 | # from omni.isaac.viplanner.config import H1RoughEnvCfg, H1BaseRoughEnvCfg, H12DoFRoughEnvCfg, H1VisionRoughEnvCfg, G1VisionRoughEnvCfg
65 | # from omni.isaac.viplanner.config import H1RoughEnvCfg_PLAY, H1BaseRoughEnvCfg_PLAY, H12DoFRoughEnvCfg_PLAY, H1VisionRoughEnvCfg_PLAY, G1VisionRoughEnvCfg_PLAY
66 | from omni.isaac.leggedloco.config import *
67 | from omni.isaac.leggedloco.utils import RslRlVecEnvHistoryWrapper
68 |
69 | torch.backends.cuda.matmul.allow_tf32 = True
70 | torch.backends.cudnn.allow_tf32 = True
71 | torch.backends.cudnn.deterministic = False
72 | torch.backends.cudnn.benchmark = False
73 |
74 |
75 | def main():
76 | """Train with RSL-RL agent."""
77 | # parse configuration
78 | # env_cfg: ManagerBasedRLEnvCfg = parse_env_cfg(
79 | # args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs, use_fabric=not args_cli.disable_fabric
80 | # )
81 | env_cfg = parse_env_cfg(args_cli.task, num_envs=args_cli.num_envs)
82 | agent_cfg: RslRlOnPolicyRunnerCfg = cli_args.parse_rsl_rl_cfg(args_cli.task, args_cli)
83 |
84 | # specify directory for logging experiments
85 | log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
86 | log_root_path = os.path.abspath(log_root_path)
87 | print(f"[INFO] Logging experiment in directory: {log_root_path}")
88 | # specify directory for logging runs: {time-stamp}_{run_name}
89 | log_dir = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
90 | if agent_cfg.run_name:
91 | log_dir += f"_{agent_cfg.run_name}"
92 | log_dir = os.path.join(log_root_path, log_dir)
93 |
94 | # max iterations for training
95 | if args_cli.max_iterations:
96 | agent_cfg.max_iterations = args_cli.max_iterations
97 |
98 | # create isaac environment
99 | env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
100 | # wrap for video recording
101 | if args_cli.video:
102 | video_kwargs = {
103 | "video_folder": os.path.join(log_dir, "videos"),
104 | "step_trigger": lambda step: step % args_cli.video_interval == 0,
105 | "video_length": args_cli.video_length,
106 | "disable_logger": True,
107 | }
108 | print("[INFO] Recording videos during training.")
109 | print_dict(video_kwargs, nesting=4)
110 | env = gym.wrappers.RecordVideo(env, **video_kwargs)
111 | # wrap around environment for rsl-rl
112 | if args_cli.history_length > 0:
113 | env = RslRlVecEnvHistoryWrapper(env, history_length=args_cli.history_length)
114 | else:
115 | env = RslRlVecEnvWrapper(env)
116 |
117 | # create runner from rsl-rl
118 | runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)
119 | # write git state to logs
120 | runner.add_git_repo_to_log(__file__)
121 | # save resume path before creating a new log_dir
122 | if agent_cfg.resume:
123 | # get path to previous checkpoint
124 | resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
125 | print(f"[INFO]: Loading model checkpoint from: {resume_path}")
126 | # load previously trained model
127 | runner.load(resume_path)
128 |
129 | # set seed of the environment
130 | env.seed(agent_cfg.seed)
131 |
132 | # dump the configuration into log-directory
133 | dump_yaml(os.path.join(log_dir, "params", "env.yaml"), env_cfg)
134 | dump_yaml(os.path.join(log_dir, "params", "agent.yaml"), agent_cfg)
135 | dump_pickle(os.path.join(log_dir, "params", "env.pkl"), env_cfg)
136 | dump_pickle(os.path.join(log_dir, "params", "agent.pkl"), agent_cfg)
137 |
138 | # run training
139 | runner.learn(num_learning_iterations=agent_cfg.max_iterations, init_at_random_ep_len=True)
140 |
141 | # close the simulator
142 | env.close()
143 |
144 |
145 | if __name__ == "__main__":
146 | # run the main function
147 | main()
148 | # close sim app
149 | simulation_app.close()
150 |
--------------------------------------------------------------------------------
/scripts/utils.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 |
4 | def quat2eulers(q0, q1, q2, q3):
5 | """
6 | Calculates the roll, pitch, and yaw angles from a quaternion.
7 |
8 | Args:
9 | q0: The scalar component of the quaternion.
10 | q1: The x-component of the quaternion.
11 | q2: The y-component of the quaternion.
12 | q3: The z-component of the quaternion.
13 |
14 | Returns:
15 | A tuple containing the roll, pitch, and yaw angles in radians.
16 | """
17 |
18 | roll = math.atan2(2 * (q2 * q3 + q0 * q1), q0 ** 2 - q1 ** 2 - q2 ** 2 + q3 ** 2)
19 | pitch = math.asin(2 * (q1 * q3 - q0 * q2))
20 | yaw = math.atan2(2 * (q1 * q2 + q0 * q3), q0 ** 2 + q1 ** 2 - q2 ** 2 - q3 ** 2)
21 |
22 | return roll, pitch, yaw
--------------------------------------------------------------------------------
/src/go2_teaser.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yang-zj1026/legged-loco/87b0d3d18404e784abc0a62227bc41c940f29ecc/src/go2_teaser.gif
--------------------------------------------------------------------------------
/src/h1_teaser.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yang-zj1026/legged-loco/87b0d3d18404e784abc0a62227bc41c940f29ecc/src/h1_teaser.gif
--------------------------------------------------------------------------------