├── .gitattributes
├── .gitignore
├── README.md
├── humanoid
├── __init__.py
├── algo
│ ├── __init__.py
│ ├── ppo
│ │ ├── __init__.py
│ │ ├── actor_critic.py
│ │ ├── on_policy_runner.py
│ │ ├── ppo.py
│ │ └── rollout_storage.py
│ └── vec_env.py
├── envs
│ ├── __init__.py
│ ├── base
│ │ ├── LICENSE
│ │ ├── base_config.py
│ │ ├── base_task.py
│ │ ├── legged_robot.py
│ │ └── legged_robot_config.py
│ └── custom
│ │ ├── humanoid_config.py
│ │ └── humanoid_env.py
├── scripts
│ ├── play.py
│ ├── sim2sim.py
│ └── train.py
└── utils
│ ├── __init__.py
│ ├── calculate_gait.py
│ ├── helpers.py
│ ├── logger.py
│ ├── math.py
│ ├── task_registry.py
│ └── terrain.py
├── images
└── demo.gif
├── logs
└── XBot_ppo
│ └── exported
│ └── policies
│ └── policy_example.pt
├── resources
└── robots
│ └── XBot
│ ├── meshes
│ ├── base_link.STL
│ ├── left_ankle_pitch_link.STL
│ ├── left_ankle_pitch_linkage1_link.STL
│ ├── left_ankle_pitch_linkage2_link.STL
│ ├── left_ankle_pitch_motor1_link.STL
│ ├── left_ankle_pitch_motor2_link.STL
│ ├── left_ankle_roll_link.STL
│ ├── left_arm_base_link.STL
│ ├── left_arm_yaw_link.STL
│ ├── left_elbow_pitch_link.STL
│ ├── left_elbow_yaw_link.STL
│ ├── left_foot_ee_link.STL
│ ├── left_hand_ee_link.STL
│ ├── left_hand_index_bend_link.STL
│ ├── left_hand_index_rota_link1.STL
│ ├── left_hand_index_rota_link2.STL
│ ├── left_hand_index_tip.STL
│ ├── left_hand_link.STL
│ ├── left_hand_mid_link1.STL
│ ├── left_hand_mid_link2.STL
│ ├── left_hand_mid_tip.STL
│ ├── left_hand_pinky_link1.STL
│ ├── left_hand_pinky_link2.STL
│ ├── left_hand_pinky_tip.STL
│ ├── left_hand_ring_link1.STL
│ ├── left_hand_ring_link2.STL
│ ├── left_hand_ring_tip.STL
│ ├── left_hand_thumb_bend_link.STL
│ ├── left_hand_thumb_rota_link1.STL
│ ├── left_hand_thumb_rota_link2.STL
│ ├── left_hand_thumb_tip.STL
│ ├── left_knee_link.STL
│ ├── left_knee_linkage_link.STL
│ ├── left_knee_motor_link.STL
│ ├── left_leg_pitch_link.STL
│ ├── left_leg_roll_link.STL
│ ├── left_leg_yaw_link.STL
│ ├── left_shoulder_pitch_link.STL
│ ├── left_shoulder_roll_link.STL
│ ├── left_wrist_roll_link.STL
│ ├── left_wrist_yaw_link.STL
│ ├── neck_base_link.STL
│ ├── neck_pitch_link.STL
│ ├── neck_yaw_link.STL
│ ├── realsense_link.STL
│ ├── right_ankle_pitch_link.STL
│ ├── right_ankle_pitch_linkage1_link.STL
│ ├── right_ankle_pitch_linkage2_link.STL
│ ├── right_ankle_pitch_motor1_link.STL
│ ├── right_ankle_pitch_motor2_link.STL
│ ├── right_ankle_roll_link.STL
│ ├── right_arm_base_link.STL
│ ├── right_arm_yaw_link.STL
│ ├── right_elbow_pitch_link.STL
│ ├── right_elbow_yaw_link.STL
│ ├── right_foot_ee_link.STL
│ ├── right_hand_ee_link.STL
│ ├── right_hand_index_bend_link.STL
│ ├── right_hand_index_rota_link1.STL
│ ├── right_hand_index_rota_link2.STL
│ ├── right_hand_index_tip.STL
│ ├── right_hand_link.STL
│ ├── right_hand_mid_link1.STL
│ ├── right_hand_mid_link2.STL
│ ├── right_hand_mid_tip.STL
│ ├── right_hand_pinky_link1.STL
│ ├── right_hand_pinky_link2.STL
│ ├── right_hand_pinky_tip.STL
│ ├── right_hand_ring_link1.STL
│ ├── right_hand_ring_link2.STL
│ ├── right_hand_ring_tip.STL
│ ├── right_hand_thumb_bend_link.STL
│ ├── right_hand_thumb_rota_link1.STL
│ ├── right_hand_thumb_rota_link2.STL
│ ├── right_hand_thumb_rota_tip.STL
│ ├── right_knee_link.STL
│ ├── right_knee_linkage_link.STL
│ ├── right_knee_motor_link.STL
│ ├── right_leg_pitch_link.STL
│ ├── right_leg_roll_link.STL
│ ├── right_leg_yaw_link.STL
│ ├── right_shoulder_pitch_link.STL
│ ├── right_shoulder_roll_link.STL
│ ├── right_wrist_roll_link.STL
│ ├── right_wrist_yaw_link.STL
│ ├── waist_roll_link.STL
│ └── waist_yaw_link.STL
│ ├── mjcf
│ ├── XBot-L-terrain.xml
│ └── XBot-L.xml
│ ├── terrain
│ └── uneven.png
│ └── urdf
│ └── XBot-L.urdf
└── setup.py
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.dae filter=lfs diff=lfs merge=lfs -text
2 | *.obj filter=lfs diff=lfs merge=lfs -text
3 | *.obj text !filter !merge !diff
4 | *.dae text !filter !merge !diff
5 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # These are some examples of commonly ignored file patterns.
2 | # You should customize this list as applicable to your project.
3 | # Learn more about .gitignore:
4 | # https://www.atlassian.com/git/tutorials/saving-changes/gitignore
5 |
6 | # Node artifact files
7 | node_modules/
8 | dist/
9 | wandb/
10 | .ipynb_checkpoints/
11 | *.npy
12 | test_data/
13 | *.ipynb
14 | # Compiled Java class files
15 | *.class
16 |
17 | # Compiled Python bytecode
18 | *.py[cod]
19 |
20 | # Log files
21 | *.log
22 |
23 | # Package files
24 | *.jar
25 |
26 | # Maven
27 | target/
28 | dist/
29 |
30 | # JetBrains IDE
31 | .idea/
32 |
33 | # Unit test reports
34 | TEST*.xml
35 |
36 | # Generated by MacOS
37 | .DS_Store
38 |
39 | # Generated by Windows
40 | Thumbs.db
41 |
42 | # Applications
43 | *.app
44 | *.exe
45 | *.war
46 |
47 | # Large media files
48 | *.mp4
49 | *.tiff
50 | *.avi
51 | *.flv
52 | *.mov
53 | *.wmv
54 |
55 | # VS Code
56 | .vscode
57 | # logs
58 | logs
59 | runs
60 | videos
61 | # resources
62 |
63 | # other
64 | *.egg-info
65 | __pycache__
66 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Humanoid-Gym: Reinforcement Learning for Humanoid Robot with Zero-Shot Sim2Real Transfer
2 |
3 | Project Page
4 | |
5 | arXiv
6 | |
7 | Twitter
8 |
9 | Xinyang Gu*,
10 | Yen-Jen Wang*,
11 | Jianyu Chen†
12 |
13 | *: Equal contribution. Project Co-lead., †: Corresponding Author.
14 |
15 | 
16 |
17 | Humanoid-Gym is an easy-to-use reinforcement learning (RL) framework based on Nvidia Isaac Gym, designed to train locomotion skills for humanoid robots, emphasizing zero-shot transfer from simulation to the real-world environment. Humanoid-Gym also integrates a sim-to-sim framework from Isaac Gym to Mujoco that allows users to verify the trained policies in different physical simulations to ensure the robustness and generalization of the policies.
18 |
19 | This codebase is verified by RobotEra's XBot-S (1.2-meter tall humanoid robot) and XBot-L (1.65-meter tall humanoid robot) in a real-world environment with zero-shot sim-to-real transfer.
20 |
21 | ## Features
22 |
23 | ### 1. Humanoid Robot Training
24 | This repository offers comprehensive guidance and scripts for the training of humanoid robots. Humanoid-Gym features specialized rewards for humanoid robots, simplifying the difficulty of sim-to-real transfer. In this repository, we use RobotEra's XBot-L as a primary example. It can also be used for other robots with minimal adjustments. Our resources cover setup, configuration, and execution. Our goal is to fully prepare the robot for real-world locomotion by providing in-depth training and optimization.
25 |
26 |
27 | - **Comprehensive Training Guidelines**: We offer thorough walkthroughs for each stage of the training process.
28 | - **Step-by-Step Configuration Instructions**: Our guidance is clear and succinct, ensuring an efficient setup process.
29 | - **Execution Scripts for Easy Deployment**: Utilize our pre-prepared scripts to streamline the training workflow.
30 |
31 | ### 2. Sim2Sim Support
32 | We also share our sim2sim pipeline, which allows you to transfer trained policies to highly accurate and carefully designed simulated environments. Once you acquire the robot, you can confidently deploy the RL-trained policies in real-world settings.
33 |
34 | Our simulator settings, particularly with Mujoco, are finely tuned to closely mimic real-world scenarios. This careful calibration ensures that the performances in both simulated and real-world environments are closely aligned. This improvement makes our simulations more trustworthy and enhances our confidence in their applicability to real-world scenarios.
35 |
36 |
37 | ### 3. Denoising World Model Learning
38 | #### Robotics: Science and Systems (RSS), 2024 (Best Paper Award Finalist)
39 | Paper
40 | |
41 | Twitter
42 |
43 | Xinyang Gu*,
44 | Yen-Jen Wang*,
45 | Xiang Zhu*, Chengming Shi*, Yanjiang Guo, Yichen Liu,
46 | Jianyu Chen†
47 |
48 | *: Equal contribution. Project Co-lead., †: Corresponding Author.
49 |
50 | Denoising World Model Learning(DWL) presents an advanced sim-to-real framework that integrates state estimation and system identification. This dual-method approach ensures the robot's learning and adaptation are both practical and effective in real-world contexts.
51 |
52 | - **Enhanced Sim-to-real Adaptability**: Techniques to optimize the robot's transition from simulated to real environments.
53 | - **Improved State Estimation Capabilities**: Advanced tools for precise and reliable state analysis.
54 |
55 | ### Perceptive Locomotion Learning for Humanoid Robots (Coming Soon!)
56 | Twitter
57 |
58 | ### Dexterous Hand Manipulation (Coming Soon!)
59 | Twitter
60 |
61 | ## Installation
62 |
63 | 1. Generate a new Python virtual environment with Python 3.8 using `conda create -n myenv python=3.8`.
64 | 2. For the best performance, we recommend using NVIDIA driver version 525 `sudo apt install nvidia-driver-525`. The minimal driver version supported is 515. If you're unable to install version 525, ensure that your system has at least version 515 to maintain basic functionality.
65 | 3. Install PyTorch 1.13 with Cuda-11.7:
66 | - `conda install pytorch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1 pytorch-cuda=11.7 -c pytorch -c nvidia`
67 | 4. Install numpy-1.23 with `conda install numpy=1.23`.
68 | 5. Install Isaac Gym:
69 | - Download and install Isaac Gym Preview 4 from https://developer.nvidia.com/isaac-gym.
70 | - `cd isaacgym/python && pip install -e .`
71 | - Run an example with `cd examples && python 1080_balls_of_solitude.py`.
72 | - Consult `isaacgym/docs/index.html` for troubleshooting.
73 | 6. Install humanoid-gym:
74 | - Clone this repository.
75 | - `cd humanoid-gym && pip install -e .`
76 |
77 |
78 |
79 | ## Usage Guide
80 |
81 | #### Examples
82 |
83 | ```bash
84 | # Under the directory humanoid-gym/humanoid
85 | # Launching PPO Policy Training for 'v1' Across 4096 Environments
86 | # This command initiates the PPO algorithm-based training for the humanoid task.
87 | python scripts/train.py --task=humanoid_ppo --run_name v1 --headless --num_envs 4096
88 |
89 | # Evaluating the Trained PPO Policy 'v1'
90 | # This command loads the 'v1' policy for performance assessment in its environment.
91 | # Additionally, it automatically exports a JIT model, suitable for deployment purposes.
92 | python scripts/play.py --task=humanoid_ppo --run_name v1
93 |
94 | # Implementing Simulation-to-Simulation Model Transformation
95 | # This command facilitates a sim-to-sim transformation using exported 'v1' policy.
96 | # You have to run play.py first to get the JIT model and use it with sim2sim.py
97 | python scripts/sim2sim.py --load_model /path/to/logs/XBot_ppo/exported/policies/policy_1.pt
98 |
99 | # Run our trained policy
100 | python scripts/sim2sim.py --load_model /path/to/logs/XBot_ppo/exported/policies/policy_example.pt
101 |
102 | ```
103 |
104 | #### 1. Default Tasks
105 |
106 |
107 | - **humanoid_ppo**
108 | - Purpose: Baseline, PPO policy, Multi-frame low-level control
109 | - Observation Space: Variable $(47 \times H)$ dimensions, where $H$ is the number of frames
110 | - $[O_{t-H} ... O_t]$
111 | - Privileged Information: $73$ dimensions
112 |
113 | - **humanoid_dwl (coming soon)**
114 |
115 | #### 2. PPO Policy
116 | - **Training Command**: For training the PPO policy, execute:
117 | ```
118 | python humanoid/scripts/train.py --task=humanoid_ppo --load_run log_file_path --name run_name
119 | ```
120 | - **Running a Trained Policy**: To deploy a trained PPO policy, use:
121 | ```
122 | python humanoid/scripts/play.py --task=humanoid_ppo --load_run log_file_path --name run_name
123 | ```
124 | - By default, the latest model of the last run from the experiment folder is loaded. However, other run iterations/models can be selected by adjusting `load_run` and `checkpoint` in the training config.
125 |
126 | #### 3. Sim-to-sim
127 | - **Please note: Before initiating the sim-to-sim process, ensure that you run `play.py` to export a JIT policy.**
128 | - **Mujoco-based Sim2Sim Deployment**: Utilize Mujoco for executing simulation-to-simulation (sim2sim) deployments with the command below:
129 | ```
130 | python scripts/sim2sim.py --load_model /path/to/export/model.pt
131 | ```
132 |
133 |
134 | #### 4. Parameters
135 | - **CPU and GPU Usage**: To run simulations on the CPU, set both `--sim_device=cpu` and `--rl_device=cpu`. For GPU operations, specify `--sim_device=cuda:{0,1,2...}` and `--rl_device={0,1,2...}` accordingly. Please note that `CUDA_VISIBLE_DEVICES` is not applicable, and it's essential to match the `--sim_device` and `--rl_device` settings.
136 | - **Headless Operation**: Include `--headless` for operations without rendering.
137 | - **Rendering Control**: Press 'v' to toggle rendering during training.
138 | - **Policy Location**: Trained policies are saved in `humanoid/logs//_/model_.pt`.
139 |
140 | #### 5. Command-Line Arguments
141 | For RL training, please refer to `humanoid/utils/helpers.py#L161`.
142 | For the sim-to-sim process, please refer to `humanoid/scripts/sim2sim.py#L169`.
143 |
144 | ## Code Structure
145 |
146 | 1. Every environment hinges on an `env` file (`legged_robot.py`) and a `configuration` file (`legged_robot_config.py`). The latter houses two classes: `LeggedRobotCfg` (encompassing all environmental parameters) and `LeggedRobotCfgPPO` (denoting all training parameters).
147 | 2. Both `env` and `config` classes use inheritance.
148 | 3. Non-zero reward scales specified in `cfg` contribute a function of the corresponding name to the sum-total reward.
149 | 4. Tasks must be registered with `task_registry.register(name, EnvClass, EnvConfig, TrainConfig)`. Registration may occur within `envs/__init__.py`, or outside of this repository.
150 |
151 |
152 | ## Add a new environment
153 |
154 | The base environment `legged_robot` constructs a rough terrain locomotion task. The corresponding configuration does not specify a robot asset (URDF/ MJCF) and no reward scales.
155 |
156 | 1. If you need to add a new environment, create a new folder in the `envs/` directory with a configuration file named `_config.py`. The new configuration should inherit from existing environment configurations.
157 | 2. If proposing a new robot:
158 | - Insert the corresponding assets in the `resources/` folder.
159 | - In the `cfg` file, set the path to the asset, define body names, default_joint_positions, and PD gains. Specify the desired `train_cfg` and the environment's name (python class).
160 | - In the `train_cfg`, set the `experiment_name` and `run_name`.
161 | 3. If needed, create your environment in `.py`. Inherit from existing environments, override desired functions and/or add your reward functions.
162 | 4. Register your environment in `humanoid/envs/__init__.py`.
163 | 5. Modify or tune other parameters in your `cfg` or `cfg_train` as per requirements. To remove the reward, set its scale to zero. Avoid modifying the parameters of other environments!
164 | 6. If you want a new robot/environment to perform sim2sim, you may need to modify `humanoid/scripts/sim2sim.py`:
165 | - Check the joint mapping of the robot between MJCF and URDF.
166 | - Change the initial joint position of the robot according to your trained policy.
167 |
168 | ## Troubleshooting
169 |
170 | Observe the following cases:
171 |
172 | ```bash
173 | # error
174 | ImportError: libpython3.8.so.1.0: cannot open shared object file: No such file or directory
175 |
176 | # solution
177 | # set the correct path
178 | export LD_LIBRARY_PATH="~/miniconda3/envs/your_env/lib:$LD_LIBRARY_PATH"
179 |
180 | # OR
181 | sudo apt install libpython3.8
182 |
183 | # error
184 | AttributeError: module 'distutils' has no attribute 'version'
185 |
186 | # solution
187 | # install pytorch 1.12.0
188 | conda install pytorch torchvision torchaudio cudatoolkit=11.3 -c pytorch
189 |
190 | # error, results from libstdc++ version distributed with conda differing from the one used on your system to build Isaac Gym
191 | ImportError: /home/roboterax/anaconda3/bin/../lib/libstdc++.so.6: version `GLIBCXX_3.4.20` not found (required by /home/roboterax/carbgym/python/isaacgym/_bindings/linux64/gym_36.so)
192 |
193 | # solution
194 | mkdir ${YOUR_CONDA_ENV}/lib/_unused
195 | mv ${YOUR_CONDA_ENV}/lib/libstdc++* ${YOUR_CONDA_ENV}/lib/_unused
196 | ```
197 |
198 | ## Citation
199 |
200 | Please cite the following if you use this code or parts of it:
201 | ```
202 | @article{gu2024humanoid,
203 | title={Humanoid-Gym: Reinforcement Learning for Humanoid Robot with Zero-Shot Sim2Real Transfer},
204 | author={Gu, Xinyang and Wang, Yen-Jen and Chen, Jianyu},
205 | journal={arXiv preprint arXiv:2404.05695},
206 | year={2024}
207 | }
208 |
209 | @inproceedings{gu2024advancing,
210 | title={Advancing Humanoid Locomotion: Mastering Challenging Terrains with Denoising World Model Learning},
211 | author={Gu, Xinyang and Wang, Yen-Jen and Zhu, Xiang and Shi, Chengming and Guo, Yanjiang and Liu, Yichen and Chen, Jianyu},
212 | booktitle={Robotics: Science and Systems},
213 | year={2024},
214 | url={https://enriquecoronadozu.github.io/rssproceedings2024/rss20/p058.pdf}
215 | }
216 | ```
217 |
218 | ## Acknowledgment
219 |
220 | The implementation of Humanoid-Gym relies on resources from [legged_gym](https://github.com/leggedrobotics/legged_gym) and [rsl_rl](https://github.com/leggedrobotics/rsl_rl) projects, created by the Robotic Systems Lab. We specifically utilize the `LeggedRobot` implementation from their research to enhance our codebase.
221 |
222 | ## Any Questions?
223 |
224 | If you have any more questions, please contact [support@robotera.com](mailto:support@robotera.com) or create an issue in this repository.
225 |
--------------------------------------------------------------------------------
/humanoid/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
31 |
32 |
33 | import os
34 |
35 | LEGGED_GYM_ROOT_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
36 | LEGGED_GYM_ENVS_DIR = os.path.join(LEGGED_GYM_ROOT_DIR, 'humanoid', 'envs')
37 |
--------------------------------------------------------------------------------
/humanoid/algo/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
31 |
32 |
33 | from .vec_env import VecEnv
34 | from .ppo import *
--------------------------------------------------------------------------------
/humanoid/algo/ppo/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
31 |
32 |
33 | from .ppo import PPO
34 | from .on_policy_runner import OnPolicyRunner
35 | from .actor_critic import ActorCritic
36 | from .rollout_storage import RolloutStorage
37 |
--------------------------------------------------------------------------------
/humanoid/algo/ppo/actor_critic.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
31 |
32 | import torch
33 | import torch.nn as nn
34 | from torch.distributions import Normal
35 |
36 | class ActorCritic(nn.Module):
37 | def __init__(self, num_actor_obs,
38 | num_critic_obs,
39 | num_actions,
40 | actor_hidden_dims=[256, 256, 256],
41 | critic_hidden_dims=[256, 256, 256],
42 | init_noise_std=1.0,
43 | activation = nn.ELU(),
44 | **kwargs):
45 | if kwargs:
46 | print("ActorCritic.__init__ got unexpected arguments, which will be ignored: " + str([key for key in kwargs.keys()]))
47 | super(ActorCritic, self).__init__()
48 |
49 |
50 | mlp_input_dim_a = num_actor_obs
51 | mlp_input_dim_c = num_critic_obs
52 | # Policy
53 | actor_layers = []
54 | actor_layers.append(nn.Linear(mlp_input_dim_a, actor_hidden_dims[0]))
55 | actor_layers.append(activation)
56 | for l in range(len(actor_hidden_dims)):
57 | if l == len(actor_hidden_dims) - 1:
58 | actor_layers.append(nn.Linear(actor_hidden_dims[l], num_actions))
59 | else:
60 | actor_layers.append(nn.Linear(actor_hidden_dims[l], actor_hidden_dims[l + 1]))
61 | actor_layers.append(activation)
62 | self.actor = nn.Sequential(*actor_layers)
63 |
64 | # Value function
65 | critic_layers = []
66 | critic_layers.append(nn.Linear(mlp_input_dim_c, critic_hidden_dims[0]))
67 | critic_layers.append(activation)
68 | for l in range(len(critic_hidden_dims)):
69 | if l == len(critic_hidden_dims) - 1:
70 | critic_layers.append(nn.Linear(critic_hidden_dims[l], 1))
71 | else:
72 | critic_layers.append(nn.Linear(critic_hidden_dims[l], critic_hidden_dims[l + 1]))
73 | critic_layers.append(activation)
74 | self.critic = nn.Sequential(*critic_layers)
75 |
76 | print(f"Actor MLP: {self.actor}")
77 | print(f"Critic MLP: {self.critic}")
78 |
79 | # Action noise
80 | self.std = nn.Parameter(init_noise_std * torch.ones(num_actions))
81 | self.distribution = None
82 | # disable args validation for speedup
83 | Normal.set_default_validate_args = False
84 |
85 |
86 | @staticmethod
87 | # not used at the moment
88 | def init_weights(sequential, scales):
89 | [torch.nn.init.orthogonal_(module.weight, gain=scales[idx]) for idx, module in
90 | enumerate(mod for mod in sequential if isinstance(mod, nn.Linear))]
91 |
92 |
93 | def reset(self, dones=None):
94 | pass
95 |
96 | def forward(self):
97 | raise NotImplementedError
98 |
99 | @property
100 | def action_mean(self):
101 | return self.distribution.mean
102 |
103 | @property
104 | def action_std(self):
105 | return self.distribution.stddev
106 |
107 | @property
108 | def entropy(self):
109 | return self.distribution.entropy().sum(dim=-1)
110 |
111 | def update_distribution(self, observations):
112 | mean = self.actor(observations)
113 | self.distribution = Normal(mean, mean*0. + self.std)
114 |
115 | def act(self, observations, **kwargs):
116 | self.update_distribution(observations)
117 | return self.distribution.sample()
118 |
119 | def get_actions_log_prob(self, actions):
120 | return self.distribution.log_prob(actions).sum(dim=-1)
121 |
122 | def act_inference(self, observations):
123 | actions_mean = self.actor(observations)
124 | return actions_mean
125 |
126 | def evaluate(self, critic_observations, **kwargs):
127 | value = self.critic(critic_observations)
128 | return value
--------------------------------------------------------------------------------
/humanoid/algo/ppo/on_policy_runner.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
31 |
32 | import os
33 | import time
34 | import torch
35 | import wandb
36 | import statistics
37 | from collections import deque
38 | from datetime import datetime
39 | from .ppo import PPO
40 | from .actor_critic import ActorCritic
41 | from humanoid.algo.vec_env import VecEnv
42 | from torch.utils.tensorboard import SummaryWriter
43 |
44 |
45 | class OnPolicyRunner:
46 |
47 | def __init__(self, env: VecEnv, train_cfg, log_dir=None, device="cpu"):
48 |
49 | self.cfg = train_cfg["runner"]
50 | self.alg_cfg = train_cfg["algorithm"]
51 | self.policy_cfg = train_cfg["policy"]
52 | self.all_cfg = train_cfg
53 | self.wandb_run_name = (
54 | datetime.now().strftime("%b%d_%H-%M-%S")
55 | + "_"
56 | + train_cfg["runner"]["experiment_name"]
57 | + "_"
58 | + train_cfg["runner"]["run_name"]
59 | )
60 | self.device = device
61 | self.env = env
62 | if self.env.num_privileged_obs is not None:
63 | num_critic_obs = self.env.num_privileged_obs
64 | else:
65 | num_critic_obs = self.env.num_obs
66 | actor_critic_class = eval(self.cfg["policy_class_name"]) # ActorCritic
67 | actor_critic: ActorCritic = actor_critic_class(
68 | self.env.num_obs, num_critic_obs, self.env.num_actions, **self.policy_cfg
69 | ).to(self.device)
70 | alg_class = eval(self.cfg["algorithm_class_name"]) # PPO
71 | self.alg: PPO = alg_class(actor_critic, device=self.device, **self.alg_cfg)
72 | self.num_steps_per_env = self.cfg["num_steps_per_env"]
73 | self.save_interval = self.cfg["save_interval"]
74 |
75 | # init storage and model
76 | self.alg.init_storage(
77 | self.env.num_envs,
78 | self.num_steps_per_env,
79 | [self.env.num_obs],
80 | [self.env.num_privileged_obs],
81 | [self.env.num_actions],
82 | )
83 |
84 | # Log
85 | self.log_dir = log_dir
86 | self.writer = None
87 | self.tot_timesteps = 0
88 | self.tot_time = 0
89 | self.current_learning_iteration = 0
90 |
91 | _, _ = self.env.reset()
92 |
93 | def learn(self, num_learning_iterations, init_at_random_ep_len=False):
94 | # initialize writer
95 | if self.log_dir is not None and self.writer is None:
96 | wandb.init(
97 | project="XBot",
98 | sync_tensorboard=True,
99 | name=self.wandb_run_name,
100 | config=self.all_cfg,
101 | )
102 | self.writer = SummaryWriter(log_dir=self.log_dir, flush_secs=10)
103 | if init_at_random_ep_len:
104 | self.env.episode_length_buf = torch.randint_like(
105 | self.env.episode_length_buf, high=int(self.env.max_episode_length)
106 | )
107 | obs = self.env.get_observations()
108 | privileged_obs = self.env.get_privileged_observations()
109 | critic_obs = privileged_obs if privileged_obs is not None else obs
110 | obs, critic_obs = obs.to(self.device), critic_obs.to(self.device)
111 | self.alg.actor_critic.train() # switch to train mode (for dropout for example)
112 |
113 | ep_infos = []
114 | rewbuffer = deque(maxlen=100)
115 | lenbuffer = deque(maxlen=100)
116 | cur_reward_sum = torch.zeros(
117 | self.env.num_envs, dtype=torch.float, device=self.device
118 | )
119 | cur_episode_length = torch.zeros(
120 | self.env.num_envs, dtype=torch.float, device=self.device
121 | )
122 |
123 | tot_iter = self.current_learning_iteration + num_learning_iterations
124 | for it in range(self.current_learning_iteration, tot_iter):
125 | start = time.time()
126 | # Rollout
127 | with torch.inference_mode():
128 | for i in range(self.num_steps_per_env):
129 | actions = self.alg.act(obs, critic_obs)
130 | obs, privileged_obs, rewards, dones, infos = self.env.step(actions)
131 | critic_obs = privileged_obs if privileged_obs is not None else obs
132 | obs, critic_obs, rewards, dones = (
133 | obs.to(self.device),
134 | critic_obs.to(self.device),
135 | rewards.to(self.device),
136 | dones.to(self.device),
137 | )
138 | self.alg.process_env_step(rewards, dones, infos)
139 |
140 | if self.log_dir is not None:
141 | # Book keeping
142 | if "episode" in infos:
143 | ep_infos.append(infos["episode"])
144 | cur_reward_sum += rewards
145 | cur_episode_length += 1
146 | new_ids = (dones > 0).nonzero(as_tuple=False)
147 | rewbuffer.extend(
148 | cur_reward_sum[new_ids][:, 0].cpu().numpy().tolist()
149 | )
150 | lenbuffer.extend(
151 | cur_episode_length[new_ids][:, 0].cpu().numpy().tolist()
152 | )
153 | cur_reward_sum[new_ids] = 0
154 | cur_episode_length[new_ids] = 0
155 |
156 | stop = time.time()
157 | collection_time = stop - start
158 |
159 | # Learning step
160 | start = stop
161 | self.alg.compute_returns(critic_obs)
162 |
163 | mean_value_loss, mean_surrogate_loss = self.alg.update()
164 | stop = time.time()
165 | learn_time = stop - start
166 | if self.log_dir is not None:
167 | self.log(locals())
168 | if it % self.save_interval == 0:
169 | self.save(os.path.join(self.log_dir, "model_{}.pt".format(it)))
170 | ep_infos.clear()
171 |
172 | self.current_learning_iteration += num_learning_iterations
173 | self.save(
174 | os.path.join(
175 | self.log_dir, "model_{}.pt".format(self.current_learning_iteration)
176 | )
177 | )
178 |
179 | def log(self, locs, width=80, pad=35):
180 | self.tot_timesteps += self.num_steps_per_env * self.env.num_envs
181 | self.tot_time += locs["collection_time"] + locs["learn_time"]
182 | iteration_time = locs["collection_time"] + locs["learn_time"]
183 |
184 | ep_string = f""
185 | if locs["ep_infos"]:
186 | for key in locs["ep_infos"][0]:
187 | infotensor = torch.tensor([], device=self.device)
188 | for ep_info in locs["ep_infos"]:
189 | # handle scalar and zero dimensional tensor infos
190 | if not isinstance(ep_info[key], torch.Tensor):
191 | ep_info[key] = torch.Tensor([ep_info[key]])
192 | if len(ep_info[key].shape) == 0:
193 | ep_info[key] = ep_info[key].unsqueeze(0)
194 | infotensor = torch.cat((infotensor, ep_info[key].to(self.device)))
195 | value = torch.mean(infotensor)
196 | self.writer.add_scalar("Episode/" + key, value, locs["it"])
197 | ep_string += f"""{f'Mean episode {key}:':>{pad}} {value:.4f}\n"""
198 | mean_std = self.alg.actor_critic.std.mean()
199 | fps = int(
200 | self.num_steps_per_env
201 | * self.env.num_envs
202 | / (locs["collection_time"] + locs["learn_time"])
203 | )
204 |
205 | self.writer.add_scalar(
206 | "Loss/value_function", locs["mean_value_loss"], locs["it"]
207 | )
208 | self.writer.add_scalar(
209 | "Loss/surrogate", locs["mean_surrogate_loss"], locs["it"]
210 | )
211 | self.writer.add_scalar("Loss/learning_rate", self.alg.learning_rate, locs["it"])
212 | self.writer.add_scalar("Policy/mean_noise_std", mean_std.item(), locs["it"])
213 | self.writer.add_scalar("Perf/total_fps", fps, locs["it"])
214 | self.writer.add_scalar(
215 | "Perf/collection time", locs["collection_time"], locs["it"]
216 | )
217 | self.writer.add_scalar("Perf/learning_time", locs["learn_time"], locs["it"])
218 | if len(locs["rewbuffer"]) > 0:
219 | self.writer.add_scalar(
220 | "Train/mean_reward", statistics.mean(locs["rewbuffer"]), locs["it"]
221 | )
222 | self.writer.add_scalar(
223 | "Train/mean_episode_length",
224 | statistics.mean(locs["lenbuffer"]),
225 | locs["it"],
226 | )
227 | self.writer.add_scalar(
228 | "Train/mean_reward/time",
229 | statistics.mean(locs["rewbuffer"]),
230 | self.tot_time,
231 | )
232 | self.writer.add_scalar(
233 | "Train/mean_episode_length/time",
234 | statistics.mean(locs["lenbuffer"]),
235 | self.tot_time,
236 | )
237 |
238 | str = f" \033[1m Learning iteration {locs['it']}/{self.current_learning_iteration + locs['num_learning_iterations']} \033[0m "
239 |
240 | if len(locs["rewbuffer"]) > 0:
241 | log_string = (
242 | f"""{'#' * width}\n"""
243 | f"""{str.center(width, ' ')}\n\n"""
244 | f"""{'Computation:':>{pad}} {fps:.0f} steps/s (collection: {locs[
245 | 'collection_time']:.3f}s, learning {locs['learn_time']:.3f}s)\n"""
246 | f"""{'Value function loss:':>{pad}} {locs['mean_value_loss']:.4f}\n"""
247 | f"""{'Surrogate loss:':>{pad}} {locs['mean_surrogate_loss']:.4f}\n"""
248 | f"""{'Mean action noise std:':>{pad}} {mean_std.item():.2f}\n"""
249 | f"""{'Mean reward:':>{pad}} {statistics.mean(locs['rewbuffer']):.2f}\n"""
250 | f"""{'Mean episode length:':>{pad}} {statistics.mean(locs['lenbuffer']):.2f}\n"""
251 | )
252 | # f"""{'Mean reward/step:':>{pad}} {locs['mean_reward']:.2f}\n"""
253 | # f"""{'Mean episode length/episode:':>{pad}} {locs['mean_trajectory_length']:.2f}\n""")
254 | else:
255 | log_string = (
256 | f"""{'#' * width}\n"""
257 | f"""{str.center(width, ' ')}\n\n"""
258 | f"""{'Computation:':>{pad}} {fps:.0f} steps/s (collection: {locs[
259 | 'collection_time']:.3f}s, learning {locs['learn_time']:.3f}s)\n"""
260 | f"""{'Value function loss:':>{pad}} {locs['mean_value_loss']:.4f}\n"""
261 | f"""{'Surrogate loss:':>{pad}} {locs['mean_surrogate_loss']:.4f}\n"""
262 | f"""{'Mean action noise std:':>{pad}} {mean_std.item():.2f}\n"""
263 | )
264 | # f"""{'Mean reward/step:':>{pad}} {locs['mean_reward']:.2f}\n"""
265 | # f"""{'Mean episode length/episode:':>{pad}} {locs['mean_trajectory_length']:.2f}\n""")
266 |
267 | log_string += ep_string
268 | log_string += (
269 | f"""{'-' * width}\n"""
270 | f"""{'Total timesteps:':>{pad}} {self.tot_timesteps}\n"""
271 | f"""{'Iteration time:':>{pad}} {iteration_time:.2f}s\n"""
272 | f"""{'Total time:':>{pad}} {self.tot_time:.2f}s\n"""
273 | f"""{'ETA:':>{pad}} {self.tot_time / (locs['it'] + 1) * (
274 | locs['num_learning_iterations'] - locs['it']):.1f}s\n"""
275 | )
276 | print(log_string)
277 |
278 | def save(self, path, infos=None):
279 | torch.save(
280 | {
281 | "model_state_dict": self.alg.actor_critic.state_dict(),
282 | "optimizer_state_dict": self.alg.optimizer.state_dict(),
283 | "iter": self.current_learning_iteration,
284 | "infos": infos,
285 | },
286 | path,
287 | )
288 |
289 | def load(self, path, load_optimizer=True):
290 | loaded_dict = torch.load(path)
291 | self.alg.actor_critic.load_state_dict(loaded_dict["model_state_dict"])
292 | if load_optimizer:
293 | self.alg.optimizer.load_state_dict(loaded_dict["optimizer_state_dict"])
294 | self.current_learning_iteration = loaded_dict["iter"]
295 | return loaded_dict["infos"]
296 |
297 | def get_inference_policy(self, device=None):
298 | self.alg.actor_critic.eval() # switch to evaluation mode (dropout for example)
299 | if device is not None:
300 | self.alg.actor_critic.to(device)
301 | return self.alg.actor_critic.act_inference
302 |
303 | def get_inference_critic(self, device=None):
304 | self.alg.actor_critic.eval() # switch to evaluation mode (dropout for example)
305 | if device is not None:
306 | self.alg.actor_critic.to(device)
307 | return self.alg.actor_critic.evaluate
308 |
--------------------------------------------------------------------------------
/humanoid/algo/ppo/ppo.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
31 |
32 | import torch
33 | import torch.nn as nn
34 | import torch.optim as optim
35 |
36 | from .actor_critic import ActorCritic
37 | from .rollout_storage import RolloutStorage
38 |
39 | class PPO:
40 | actor_critic: ActorCritic
41 | def __init__(self,
42 | actor_critic,
43 | num_learning_epochs=1,
44 | num_mini_batches=1,
45 | clip_param=0.2,
46 | gamma=0.998,
47 | lam=0.95,
48 | value_loss_coef=1.0,
49 | entropy_coef=0.0,
50 | learning_rate=1e-3,
51 | max_grad_norm=1.0,
52 | use_clipped_value_loss=True,
53 | schedule="fixed",
54 | desired_kl=0.01,
55 | device='cpu',
56 | ):
57 |
58 | self.device = device
59 |
60 | self.desired_kl = desired_kl
61 | self.schedule = schedule
62 | self.learning_rate = learning_rate
63 |
64 | # PPO components
65 | self.actor_critic = actor_critic
66 | self.actor_critic.to(self.device)
67 | self.storage = None # initialized later
68 | self.optimizer = optim.Adam(self.actor_critic.parameters(), lr=learning_rate)
69 | self.transition = RolloutStorage.Transition()
70 |
71 | # PPO parameters
72 | self.clip_param = clip_param
73 | self.num_learning_epochs = num_learning_epochs
74 | self.num_mini_batches = num_mini_batches
75 | self.value_loss_coef = value_loss_coef
76 | self.entropy_coef = entropy_coef
77 | self.gamma = gamma
78 | self.lam = lam
79 | self.max_grad_norm = max_grad_norm
80 | self.use_clipped_value_loss = use_clipped_value_loss
81 |
82 | def init_storage(self, num_envs, num_transitions_per_env, actor_obs_shape, critic_obs_shape, action_shape):
83 | self.storage = RolloutStorage(num_envs, num_transitions_per_env, actor_obs_shape, critic_obs_shape, action_shape, self.device)
84 |
85 | def test_mode(self):
86 | self.actor_critic.test()
87 |
88 | def train_mode(self):
89 | self.actor_critic.train()
90 |
91 | def act(self, obs, critic_obs):
92 | # Compute the actions and values
93 | self.transition.actions = self.actor_critic.act(obs).detach()
94 | self.transition.values = self.actor_critic.evaluate(critic_obs).detach()
95 | self.transition.actions_log_prob = self.actor_critic.get_actions_log_prob(self.transition.actions).detach()
96 | self.transition.action_mean = self.actor_critic.action_mean.detach()
97 | self.transition.action_sigma = self.actor_critic.action_std.detach()
98 | # need to record obs and critic_obs before env.step()
99 | self.transition.observations = obs
100 | self.transition.critic_observations = critic_obs
101 | return self.transition.actions
102 |
103 | def process_env_step(self, rewards, dones, infos):
104 | self.transition.rewards = rewards.clone()
105 | self.transition.dones = dones
106 | # Bootstrapping on time outs
107 | if 'time_outs' in infos:
108 | self.transition.rewards += self.gamma * torch.squeeze(self.transition.values * infos['time_outs'].unsqueeze(1).to(self.device), 1)
109 |
110 | # Record the transition
111 | self.storage.add_transitions(self.transition)
112 | self.transition.clear()
113 | self.actor_critic.reset(dones)
114 |
115 | def compute_returns(self, last_critic_obs):
116 | last_values= self.actor_critic.evaluate(last_critic_obs).detach()
117 | self.storage.compute_returns(last_values, self.gamma, self.lam)
118 |
119 | def update(self):
120 | mean_value_loss = 0
121 | mean_surrogate_loss = 0
122 |
123 | generator = self.storage.mini_batch_generator(self.num_mini_batches, self.num_learning_epochs)
124 | for obs_batch, critic_obs_batch, actions_batch, target_values_batch, advantages_batch, returns_batch, old_actions_log_prob_batch, \
125 | old_mu_batch, old_sigma_batch, hid_states_batch, masks_batch in generator:
126 |
127 |
128 | self.actor_critic.act(obs_batch, masks=masks_batch, hidden_states=hid_states_batch[0])
129 | actions_log_prob_batch = self.actor_critic.get_actions_log_prob(actions_batch)
130 | value_batch = self.actor_critic.evaluate(critic_obs_batch, masks=masks_batch, hidden_states=hid_states_batch[1])
131 | mu_batch = self.actor_critic.action_mean
132 | sigma_batch = self.actor_critic.action_std
133 | entropy_batch = self.actor_critic.entropy
134 |
135 | # KL
136 | if self.desired_kl != None and self.schedule == 'adaptive':
137 | with torch.inference_mode():
138 | kl = torch.sum(
139 | torch.log(sigma_batch / old_sigma_batch + 1.e-5) + (torch.square(old_sigma_batch) + torch.square(old_mu_batch - mu_batch)) / (2.0 * torch.square(sigma_batch)) - 0.5, axis=-1)
140 | kl_mean = torch.mean(kl)
141 |
142 | if kl_mean > self.desired_kl * 2.0:
143 | self.learning_rate = max(1e-5, self.learning_rate / 1.5)
144 | elif kl_mean < self.desired_kl / 2.0 and kl_mean > 0.0:
145 | self.learning_rate = min(1e-2, self.learning_rate * 1.5)
146 |
147 | for param_group in self.optimizer.param_groups:
148 | param_group['lr'] = self.learning_rate
149 |
150 |
151 | # Surrogate loss
152 | ratio = torch.exp(actions_log_prob_batch - torch.squeeze(old_actions_log_prob_batch))
153 | surrogate = -torch.squeeze(advantages_batch) * ratio
154 | surrogate_clipped = -torch.squeeze(advantages_batch) * torch.clamp(ratio, 1.0 - self.clip_param,
155 | 1.0 + self.clip_param)
156 | surrogate_loss = torch.max(surrogate, surrogate_clipped).mean()
157 |
158 | # Value function loss
159 | if self.use_clipped_value_loss:
160 | value_clipped = target_values_batch + (value_batch - target_values_batch).clamp(-self.clip_param,
161 | self.clip_param)
162 | value_losses = (value_batch - returns_batch).pow(2)
163 | value_losses_clipped = (value_clipped - returns_batch).pow(2)
164 | value_loss = torch.max(value_losses, value_losses_clipped).mean()
165 | else:
166 | value_loss = (returns_batch - value_batch).pow(2).mean()
167 |
168 | loss = surrogate_loss + self.value_loss_coef * value_loss - self.entropy_coef * entropy_batch.mean()
169 |
170 | # Gradient step
171 | self.optimizer.zero_grad()
172 | loss.backward()
173 | nn.utils.clip_grad_norm_(self.actor_critic.parameters(), self.max_grad_norm)
174 | self.optimizer.step()
175 |
176 | mean_value_loss += value_loss.item()
177 | mean_surrogate_loss += surrogate_loss.item()
178 |
179 | num_updates = self.num_learning_epochs * self.num_mini_batches
180 | mean_value_loss /= num_updates
181 | mean_surrogate_loss /= num_updates
182 | self.storage.clear()
183 |
184 | return mean_value_loss, mean_surrogate_loss
185 |
--------------------------------------------------------------------------------
/humanoid/algo/ppo/rollout_storage.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
31 |
32 |
33 | import torch
34 |
35 | class RolloutStorage:
36 | class Transition:
37 | def __init__(self):
38 | self.observations = None
39 | self.critic_observations = None
40 | self.actions = None
41 | self.rewards = None
42 | self.dones = None
43 | self.values = None
44 | self.actions_log_prob = None
45 | self.action_mean = None
46 | self.action_sigma = None
47 | self.hidden_states = None
48 |
49 | def clear(self):
50 | self.__init__()
51 |
52 | def __init__(self, num_envs, num_transitions_per_env, obs_shape, privileged_obs_shape, actions_shape, device='cpu'):
53 |
54 | self.device = device
55 |
56 | self.obs_shape = obs_shape
57 | self.privileged_obs_shape = privileged_obs_shape
58 | self.actions_shape = actions_shape
59 |
60 | # Core
61 | self.observations = torch.zeros(num_transitions_per_env, num_envs, *obs_shape, device=self.device)
62 | if privileged_obs_shape[0] is not None:
63 | self.privileged_observations = torch.zeros(num_transitions_per_env, num_envs, *privileged_obs_shape, device=self.device)
64 | else:
65 | self.privileged_observations = None
66 | self.rewards = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
67 | self.actions = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device)
68 | self.dones = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device).byte()
69 |
70 | # For PPO
71 | self.actions_log_prob = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
72 | self.values = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
73 | self.returns = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
74 | self.advantages = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
75 | self.mu = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device)
76 | self.sigma = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device)
77 |
78 | self.num_transitions_per_env = num_transitions_per_env
79 | self.num_envs = num_envs
80 |
81 | # rnn
82 | self.saved_hidden_states_a = None
83 | self.saved_hidden_states_c = None
84 |
85 | self.step = 0
86 |
87 | def add_transitions(self, transition: Transition):
88 | if self.step >= self.num_transitions_per_env:
89 | raise AssertionError("Rollout buffer overflow")
90 | self.observations[self.step].copy_(transition.observations)
91 | if self.privileged_observations is not None: self.privileged_observations[self.step].copy_(transition.critic_observations)
92 | self.actions[self.step].copy_(transition.actions)
93 | self.rewards[self.step].copy_(transition.rewards.view(-1, 1))
94 | self.dones[self.step].copy_(transition.dones.view(-1, 1))
95 | self.values[self.step].copy_(transition.values)
96 | self.actions_log_prob[self.step].copy_(transition.actions_log_prob.view(-1, 1))
97 | self.mu[self.step].copy_(transition.action_mean)
98 | self.sigma[self.step].copy_(transition.action_sigma)
99 | self._save_hidden_states(transition.hidden_states)
100 | self.step += 1
101 |
102 | def _save_hidden_states(self, hidden_states):
103 | if hidden_states is None or hidden_states==(None, None):
104 | return
105 | # make a tuple out of GRU hidden state sto match the LSTM format
106 | hid_a = hidden_states[0] if isinstance(hidden_states[0], tuple) else (hidden_states[0],)
107 | hid_c = hidden_states[1] if isinstance(hidden_states[1], tuple) else (hidden_states[1],)
108 |
109 | # initialize if needed
110 | if self.saved_hidden_states_a is None:
111 | self.saved_hidden_states_a = [torch.zeros(self.observations.shape[0], *hid_a[i].shape, device=self.device) for i in range(len(hid_a))]
112 | self.saved_hidden_states_c = [torch.zeros(self.observations.shape[0], *hid_c[i].shape, device=self.device) for i in range(len(hid_c))]
113 | # copy the states
114 | for i in range(len(hid_a)):
115 | self.saved_hidden_states_a[i][self.step].copy_(hid_a[i])
116 | self.saved_hidden_states_c[i][self.step].copy_(hid_c[i])
117 |
118 |
119 | def clear(self):
120 | self.step = 0
121 |
122 | def compute_returns(self, last_values, gamma, lam):
123 | advantage = 0
124 | for step in reversed(range(self.num_transitions_per_env)):
125 | if step == self.num_transitions_per_env - 1:
126 | next_values = last_values
127 | else:
128 | next_values = self.values[step + 1]
129 | next_is_not_terminal = 1.0 - self.dones[step].float()
130 | delta = self.rewards[step] + next_is_not_terminal * gamma * next_values - self.values[step]
131 | advantage = delta + next_is_not_terminal * gamma * lam * advantage
132 | self.returns[step] = advantage + self.values[step]
133 |
134 | # Compute and normalize the advantages
135 | self.advantages = self.returns - self.values
136 | self.advantages = (self.advantages - self.advantages.mean()) / (self.advantages.std() + 1e-8)
137 |
138 | def get_statistics(self):
139 | done = self.dones
140 | done[-1] = 1
141 | flat_dones = done.permute(1, 0, 2).reshape(-1, 1)
142 | done_indices = torch.cat((flat_dones.new_tensor([-1], dtype=torch.int64), flat_dones.nonzero(as_tuple=False)[:, 0]))
143 | trajectory_lengths = (done_indices[1:] - done_indices[:-1])
144 | return trajectory_lengths.float().mean(), self.rewards.mean()
145 |
146 | def mini_batch_generator(self, num_mini_batches, num_epochs=8):
147 | batch_size = self.num_envs * self.num_transitions_per_env
148 | mini_batch_size = batch_size // num_mini_batches
149 | indices = torch.randperm(num_mini_batches*mini_batch_size, requires_grad=False, device=self.device)
150 |
151 | observations = self.observations.flatten(0, 1)
152 | if self.privileged_observations is not None:
153 | critic_observations = self.privileged_observations.flatten(0, 1)
154 | else:
155 | critic_observations = observations
156 |
157 | actions = self.actions.flatten(0, 1)
158 | values = self.values.flatten(0, 1)
159 | returns = self.returns.flatten(0, 1)
160 | old_actions_log_prob = self.actions_log_prob.flatten(0, 1)
161 | advantages = self.advantages.flatten(0, 1)
162 | old_mu = self.mu.flatten(0, 1)
163 | old_sigma = self.sigma.flatten(0, 1)
164 |
165 | for epoch in range(num_epochs):
166 | for i in range(num_mini_batches):
167 |
168 | start = i*mini_batch_size
169 | end = (i+1)*mini_batch_size
170 | batch_idx = indices[start:end]
171 |
172 | obs_batch = observations[batch_idx]
173 | critic_observations_batch = critic_observations[batch_idx]
174 | actions_batch = actions[batch_idx]
175 | target_values_batch = values[batch_idx]
176 | returns_batch = returns[batch_idx]
177 | old_actions_log_prob_batch = old_actions_log_prob[batch_idx]
178 | advantages_batch = advantages[batch_idx]
179 | old_mu_batch = old_mu[batch_idx]
180 | old_sigma_batch = old_sigma[batch_idx]
181 | yield obs_batch, critic_observations_batch, actions_batch, target_values_batch, advantages_batch, returns_batch, \
182 | old_actions_log_prob_batch, old_mu_batch, old_sigma_batch, (None, None), None
183 |
--------------------------------------------------------------------------------
/humanoid/algo/vec_env.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
31 |
32 | import torch
33 | from typing import Tuple, Union
34 | from abc import ABC, abstractmethod
35 |
36 | # minimal interface of the environment
37 | class VecEnv(ABC):
38 | num_envs: int
39 | num_obs: int
40 | num_privileged_obs: int
41 | num_actions: int
42 | max_episode_length: int
43 | privileged_obs_buf: torch.Tensor
44 | obs_buf: torch.Tensor
45 | rew_buf: torch.Tensor
46 | reset_buf: torch.Tensor
47 | episode_length_buf: torch.Tensor # current episode duration
48 | extras: dict
49 | device: torch.device
50 | @abstractmethod
51 | def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, Union[torch.Tensor, None], torch.Tensor, torch.Tensor, dict]:
52 | pass
53 | @abstractmethod
54 | def reset(self, env_ids: Union[list, torch.Tensor]):
55 | pass
56 | @abstractmethod
57 | def get_observations(self) -> torch.Tensor:
58 | pass
59 | @abstractmethod
60 | def get_privileged_observations(self) -> Union[torch.Tensor, None]:
61 | pass
--------------------------------------------------------------------------------
/humanoid/envs/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
31 |
32 |
33 | from humanoid import LEGGED_GYM_ROOT_DIR, LEGGED_GYM_ENVS_DIR
34 | from .base.legged_robot import LeggedRobot
35 |
36 | from .custom.humanoid_config import XBotLCfg, XBotLCfgPPO
37 | from .custom.humanoid_env import XBotLFreeEnv
38 |
39 | from humanoid.utils.task_registry import task_registry
40 |
41 |
42 | task_registry.register( "humanoid_ppo", XBotLFreeEnv, XBotLCfg(), XBotLCfgPPO() )
43 |
--------------------------------------------------------------------------------
/humanoid/envs/base/LICENSE:
--------------------------------------------------------------------------------
1 | SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
3 | SPDX-License-Identifier: BSD-3-Clause
4 |
5 | Redistribution and use in source and binary forms, with or without
6 | modification, are permitted provided that the following conditions are met:
7 |
8 | 1. Redistributions of source code must retain the above copyright notice, this
9 | list of conditions and the following disclaimer.
10 |
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 | this list of conditions and the following disclaimer in the documentation
13 | and/or other materials provided with the distribution.
14 |
15 | 3. Neither the name of the copyright holder nor the names of its
16 | contributors may be used to endorse or promote products derived from
17 | this software without specific prior written permission.
18 |
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |
30 | Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
--------------------------------------------------------------------------------
/humanoid/envs/base/base_config.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
31 |
32 | import inspect
33 |
34 | class BaseConfig:
35 | def __init__(self) -> None:
36 | """ Initializes all member classes recursively. Ignores all namse starting with '__' (buit-in methods)."""
37 | self.init_member_classes(self)
38 |
39 | @staticmethod
40 | def init_member_classes(obj):
41 | # iterate over all attributes names
42 | for key in dir(obj):
43 | # disregard builtin attributes
44 | # if key.startswith("__"):
45 | if key=="__class__":
46 | continue
47 | # get the corresponding attribute object
48 | var = getattr(obj, key)
49 | # check if it the attribute is a class
50 | if inspect.isclass(var):
51 | # instantate the class
52 | i_var = var()
53 | # set the attribute to the instance instead of the type
54 | setattr(obj, key, i_var)
55 | # recursively init members of the attribute
56 | BaseConfig.init_member_classes(i_var)
--------------------------------------------------------------------------------
/humanoid/envs/base/base_task.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
31 |
32 | import sys
33 | from isaacgym import gymapi
34 | from isaacgym import gymutil
35 | import numpy as np
36 | import torch
37 |
38 | # Base class for RL tasks
39 |
40 |
41 | class BaseTask():
42 |
43 | def __init__(self, cfg, sim_params, physics_engine, sim_device, headless):
44 | self.gym = gymapi.acquire_gym()
45 |
46 | self.sim_params = sim_params
47 | self.physics_engine = physics_engine
48 | self.sim_device = sim_device
49 | sim_device_type, self.sim_device_id = gymutil.parse_device_str(
50 | self.sim_device)
51 | self.headless = headless
52 |
53 | # env device is GPU only if sim is on GPU and use_gpu_pipeline=True, otherwise returned tensors are copied to CPU by physX.
54 | if sim_device_type == 'cuda' and sim_params.use_gpu_pipeline:
55 | self.device = self.sim_device
56 | else:
57 | self.device = 'cpu'
58 |
59 | # graphics device for rendering, -1 for no rendering
60 | self.graphics_device_id = self.sim_device_id
61 |
62 | self.num_envs = cfg.env.num_envs
63 | self.num_obs = cfg.env.num_observations
64 | self.num_privileged_obs = cfg.env.num_privileged_obs
65 | self.num_actions = cfg.env.num_actions
66 |
67 | # optimization flags for pytorch JIT
68 | torch._C._jit_set_profiling_mode(False)
69 | torch._C._jit_set_profiling_executor(False)
70 |
71 | # allocate buffers
72 | self.obs_buf = torch.zeros(
73 | self.num_envs, self.num_obs, device=self.device, dtype=torch.float)
74 | self.rew_buf = torch.zeros(
75 | self.num_envs, device=self.device, dtype=torch.float)
76 | # new reward buffers for exp rewrads
77 | self.neg_reward_buf = torch.zeros(
78 | self.num_envs, device=self.device, dtype=torch.float)
79 | self.pos_reward_buf = torch.zeros(
80 | self.num_envs, device=self.device, dtype=torch.float)
81 |
82 | self.reset_buf = torch.ones(
83 | self.num_envs, device=self.device, dtype=torch.long)
84 | self.episode_length_buf = torch.zeros(
85 | self.num_envs, device=self.device, dtype=torch.long)
86 | self.time_out_buf = torch.zeros(
87 | self.num_envs, device=self.device, dtype=torch.bool)
88 | if self.num_privileged_obs is not None:
89 | self.privileged_obs_buf = torch.zeros(
90 | self.num_envs, self.num_privileged_obs, device=self.device, dtype=torch.float)
91 | else:
92 | self.privileged_obs_buf = None
93 |
94 | self.extras = {}
95 |
96 | # create envs, sim and viewer
97 | self.create_sim()
98 | self.gym.prepare_sim(self.sim)
99 | self.enable_viewer_sync = True
100 | self.viewer = None
101 |
102 | # if running with a viewer, set up keyboard shortcuts and camera
103 | if self.headless == False:
104 | # subscribe to keyboard shortcuts
105 | self.viewer = self.gym.create_viewer(
106 | self.sim, gymapi.CameraProperties())
107 | self.gym.subscribe_viewer_keyboard_event(
108 | self.viewer, gymapi.KEY_ESCAPE, "QUIT")
109 | self.gym.subscribe_viewer_keyboard_event(
110 | self.viewer, gymapi.KEY_V, "toggle_viewer_sync")
111 |
112 | camera_properties = gymapi.CameraProperties()
113 | camera_properties.width = 720
114 | camera_properties.height = 480
115 | camera_handle = self.gym.create_camera_sensor(
116 | self.envs[0], camera_properties)
117 | self.camera_handle = camera_handle
118 | else:
119 | # pass
120 | camera_properties = gymapi.CameraProperties()
121 | camera_properties.width = 720
122 | camera_properties.height = 480
123 | camera_handle = self.gym.create_camera_sensor(
124 | self.envs[0], camera_properties)
125 | self.camera_handle = camera_handle
126 |
127 | def get_observations(self):
128 | return self.obs_buf
129 |
130 | def get_privileged_observations(self):
131 | return self.privileged_obs_buf
132 |
133 | def get_rma_observations(self):
134 | return self.rma_obs_buf
135 |
136 | def reset_idx(self, env_ids):
137 | """Reset selected robots"""
138 | raise NotImplementedError
139 |
140 | def reset(self):
141 | """ Reset all robots"""
142 | self.reset_idx(torch.arange(self.num_envs, device=self.device))
143 | obs, privileged_obs, _, _, _ = self.step(torch.zeros(
144 | self.num_envs, self.num_actions, device=self.device, requires_grad=False))
145 | return obs, privileged_obs
146 |
147 | def step(self, actions):
148 | raise NotImplementedError
149 |
150 | def render(self, sync_frame_time=True):
151 | if self.viewer:
152 | # check for window closed
153 | if self.gym.query_viewer_has_closed(self.viewer):
154 | sys.exit()
155 |
156 | # check for keyboard events
157 | for evt in self.gym.query_viewer_action_events(self.viewer):
158 | if evt.action == "QUIT" and evt.value > 0:
159 | sys.exit()
160 | elif evt.action == "toggle_viewer_sync" and evt.value > 0:
161 | self.enable_viewer_sync = not self.enable_viewer_sync
162 |
163 | # fetch results
164 | if self.device != 'cpu':
165 | self.gym.fetch_results(self.sim, True)
166 |
167 | # step graphics
168 | if self.enable_viewer_sync:
169 | self.gym.step_graphics(self.sim)
170 | self.gym.draw_viewer(self.viewer, self.sim, True)
171 | if sync_frame_time:
172 | self.gym.sync_frame_time(self.sim)
173 | else:
174 | self.gym.poll_viewer_events(self.viewer)
175 |
--------------------------------------------------------------------------------
/humanoid/envs/base/legged_robot_config.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
31 |
32 | from .base_config import BaseConfig
33 |
34 | class LeggedRobotCfg(BaseConfig):
35 | class env:
36 | num_envs = 4096
37 | num_observations = 235
38 | num_privileged_obs = None # if not None a priviledge_obs_buf will be returned by step() (critic obs for assymetric training). None is returned otherwise
39 | num_actions = 12
40 | env_spacing = 3. # not used with heightfields/trimeshes
41 | send_timeouts = True # send time out information to the algorithm
42 | episode_length_s = 20 # episode length in seconds
43 |
44 | class terrain:
45 | mesh_type = 'trimesh' # "heightfield" # none, plane, heightfield or trimesh
46 | horizontal_scale = 0.1 # [m]
47 | vertical_scale = 0.005 # [m]
48 | border_size = 25 # [m]
49 | curriculum = True
50 | static_friction = 1.0
51 | dynamic_friction = 1.0
52 | restitution = 0.
53 | # rough terrain only:
54 | measure_heights = True
55 | measured_points_x = [-0.8, -0.7, -0.6, -0.5, -0.4, -0.3, -0.2, -0.1, 0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8] # 1mx1.6m rectangle (without center line)
56 | measured_points_y = [-0.5, -0.4, -0.3, -0.2, -0.1, 0., 0.1, 0.2, 0.3, 0.4, 0.5]
57 | selected = False # select a unique terrain type and pass all arguments
58 | terrain_kwargs = None # Dict of arguments for selected terrain
59 | max_init_terrain_level = 5 # starting curriculum state
60 | terrain_length = 8.
61 | terrain_width = 8.
62 | num_rows= 10 # number of terrain rows (levels)
63 | num_cols = 20 # number of terrain cols (types)
64 | # terrain types: [smooth slope, rough slope, stairs up, stairs down, discrete]
65 | terrain_proportions = [0.1, 0.1, 0.35, 0.25, 0.2]
66 | # trimesh only:
67 | slope_treshold = 0.75 # slopes above this threshold will be corrected to vertical surfaces
68 |
69 | class commands:
70 | curriculum = False
71 | max_curriculum = 1.
72 | num_commands = 4 # default: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
73 | resampling_time = 10. # time before command are changed[s]
74 | heading_command = True # if true: compute ang vel command from heading error
75 | class ranges:
76 | lin_vel_x = [-1.0, 1.0] # min max [m/s]
77 | lin_vel_y = [-1.0, 1.0] # min max [m/s]
78 | ang_vel_yaw = [-1, 1] # min max [rad/s]
79 | heading = [-3.14, 3.14]
80 |
81 | class init_state:
82 | pos = [0.0, 0.0, 1.] # x,y,z [m]
83 | rot = [0.0, 0.0, 0.0, 1.0] # x,y,z,w [quat]
84 | lin_vel = [0.0, 0.0, 0.0] # x,y,z [m/s]
85 | ang_vel = [0.0, 0.0, 0.0] # x,y,z [rad/s]
86 | default_joint_angles = { # target angles when action = 0.0
87 | "joint_a": 0.,
88 | "joint_b": 0.}
89 |
90 | class control:
91 | # PD Drive parameters:
92 | stiffness = {'joint_a': 10.0, 'joint_b': 15.} # [N*m/rad]
93 | damping = {'joint_a': 1.0, 'joint_b': 1.5} # [N*m*s/rad]
94 | # action scale: target angle = actionScale * action + defaultAngle
95 | action_scale = 0.5
96 | # decimation: Number of control action updates @ sim DT per policy DT
97 | decimation = 4
98 |
99 | class asset:
100 | file = ""
101 | name = "legged_robot" # actor name
102 | foot_name = "None" # name of the feet bodies, used to index body state and contact force tensors
103 | penalize_contacts_on = []
104 | terminate_after_contacts_on = []
105 | disable_gravity = False
106 | collapse_fixed_joints = True # merge bodies connected by fixed joints. Specific fixed joints can be kept by adding " <... dont_collapse="true">
107 | fix_base_link = False # fixe the base of the robot
108 | default_dof_drive_mode = 3 # see GymDofDriveModeFlags (0 is none, 1 is pos tgt, 2 is vel tgt, 3 effort)
109 | self_collisions = 0 # 1 to disable, 0 to enable...bitwise filter
110 | replace_cylinder_with_capsule = True # replace collision cylinders with capsules, leads to faster/more stable simulation
111 | flip_visual_attachments = True # Some .obj meshes must be flipped from y-up to z-up
112 |
113 | density = 0.001
114 | angular_damping = 0.
115 | linear_damping = 0.
116 | max_angular_velocity = 1000.
117 | max_linear_velocity = 1000.
118 | armature = 0.
119 | thickness = 0.01
120 |
121 |
122 | class domain_rand:
123 | randomize_friction = True
124 | friction_range = [0.5, 1.25]
125 | randomize_base_mass = False
126 | added_mass_range = [-1., 1.]
127 | push_robots = True
128 | push_interval_s = 15
129 | max_push_vel_xy = 1.
130 |
131 |
132 | class rewards:
133 | class scales:
134 | termination = -0.0
135 | tracking_lin_vel = 1.0
136 | tracking_ang_vel = 0.5
137 | lin_vel_z = -2.0
138 | ang_vel_xy = -0.05
139 | orientation = -0.
140 | torques = -0.00001
141 | dof_vel = -0.
142 | dof_acc = -2.5e-7
143 | base_height = -0.
144 | feet_air_time = 1.0
145 | collision = -1.
146 | feet_stumble = -0.0
147 | action_rate = -0.
148 | stand_still = -0.
149 |
150 | only_positive_rewards = True # if true negative total rewards are clipped at zero (avoids early termination problems)
151 | tracking_sigma = 0.25 # tracking reward = exp(-error^2/sigma)
152 | max_contact_force = 100. # forces above this value are penalized
153 |
154 | class normalization:
155 | class obs_scales:
156 | lin_vel = 2.0
157 | ang_vel = 0.25
158 | dof_pos = 1.0
159 | dof_vel = 0.05
160 | height_measurements = 5.0
161 | clip_observations = 100.
162 | clip_actions = 100.
163 |
164 | class noise:
165 | add_noise = True
166 | noise_level = 1.0 # scales other values
167 | class noise_scales:
168 | dof_pos = 0.01
169 | dof_vel = 1.5
170 | lin_vel = 0.1
171 | ang_vel = 0.2
172 | gravity = 0.05
173 | height_measurements = 0.1
174 |
175 | # viewer camera:
176 | class viewer:
177 | ref_env = 0
178 | pos = [10, 0, 6] # [m]
179 | lookat = [11., 5, 3.] # [m]
180 |
181 | class sim:
182 | dt = 0.005
183 | substeps = 1
184 | gravity = [0., 0. ,-9.81] # [m/s^2]
185 | up_axis = 1 # 0 is y, 1 is z
186 |
187 | class physx:
188 | num_threads = 10
189 | solver_type = 1 # 0: pgs, 1: tgs
190 | num_position_iterations = 4
191 | num_velocity_iterations = 0
192 | contact_offset = 0.01 # [m]
193 | rest_offset = 0.0 # [m]
194 | bounce_threshold_velocity = 0.5 #0.5 [m/s]
195 | max_depenetration_velocity = 1.0
196 | max_gpu_contact_pairs = 2**23 #2**24 -> needed for 8000 envs and more
197 | default_buffer_size_multiplier = 5
198 | contact_collection = 2 # 0: never, 1: last sub-step, 2: all sub-steps (default=2)
199 |
200 | class LeggedRobotCfgPPO(BaseConfig):
201 | seed = 1
202 | runner_class_name = 'OnPolicyRunner'
203 | class policy:
204 | init_noise_std = 1.0
205 | actor_hidden_dims = [512, 256, 128]
206 | critic_hidden_dims = [512, 256, 128]
207 |
208 | class algorithm:
209 | # training params
210 | value_loss_coef = 1.0
211 | use_clipped_value_loss = True
212 | clip_param = 0.2
213 | entropy_coef = 0.01
214 | num_learning_epochs = 5
215 | num_mini_batches = 4 # mini batch size = num_envs*nsteps / nminibatches
216 | learning_rate = 1.e-3 #5.e-4
217 | schedule = 'adaptive' # could be adaptive, fixed
218 | gamma = 0.99
219 | lam = 0.95
220 | desired_kl = 0.01
221 | max_grad_norm = 1.
222 |
223 | class runner:
224 | policy_class_name = 'ActorCritic'
225 | algorithm_class_name = 'PPO'
226 | num_steps_per_env = 24 # per iteration
227 | max_iterations = 1500 # number of policy updates
228 |
229 | # logging
230 | save_interval = 100 # check for potential saves every this many iterations
231 | experiment_name = 'test'
232 | run_name = ''
233 | # load and resume
234 | resume = False
235 | load_run = -1 # -1 = last run
236 | checkpoint = -1 # -1 = last saved model
237 | resume_path = None # updated from load_run and chkpt
--------------------------------------------------------------------------------
/humanoid/envs/custom/humanoid_config.py:
--------------------------------------------------------------------------------
1 | # SPDX-License-Identifier: BSD-3-Clause
2 | #
3 | # Redistribution and use in source and binary forms, with or without
4 | # modification, are permitted provided that the following conditions are met:
5 | #
6 | # 1. Redistributions of source code must retain the above copyright notice, this
7 | # list of conditions and the following disclaimer.
8 | #
9 | # 2. Redistributions in binary form must reproduce the above copyright notice,
10 | # this list of conditions and the following disclaimer in the documentation
11 | # and/or other materials provided with the distribution.
12 | #
13 | # 3. Neither the name of the copyright holder nor the names of its
14 | # contributors may be used to endorse or promote products derived from
15 | # this software without specific prior written permission.
16 | #
17 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
21 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | #
28 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
29 |
30 |
31 | from humanoid.envs.base.legged_robot_config import LeggedRobotCfg, LeggedRobotCfgPPO
32 |
33 |
34 | class XBotLCfg(LeggedRobotCfg):
35 | """
36 | Configuration class for the XBotL humanoid robot.
37 | """
38 | class env(LeggedRobotCfg.env):
39 | # change the observation dim
40 | frame_stack = 15
41 | c_frame_stack = 3
42 | num_single_obs = 47
43 | num_observations = int(frame_stack * num_single_obs)
44 | single_num_privileged_obs = 73
45 | num_privileged_obs = int(c_frame_stack * single_num_privileged_obs)
46 | num_actions = 12
47 | num_envs = 4096
48 | episode_length_s = 24 # episode length in seconds
49 | use_ref_actions = False # speed up training by using reference actions
50 |
51 | class safety:
52 | # safety factors
53 | pos_limit = 1.0
54 | vel_limit = 1.0
55 | torque_limit = 0.85
56 |
57 | class asset(LeggedRobotCfg.asset):
58 | file = '{LEGGED_GYM_ROOT_DIR}/resources/robots/XBot/urdf/XBot-L.urdf'
59 |
60 | name = "XBot-L"
61 | foot_name = "ankle_roll"
62 | knee_name = "knee"
63 |
64 | terminate_after_contacts_on = ['base_link']
65 | penalize_contacts_on = ["base_link"]
66 | self_collisions = 0 # 1 to disable, 0 to enable...bitwise filter
67 | flip_visual_attachments = False
68 | replace_cylinder_with_capsule = False
69 | fix_base_link = False
70 |
71 | class terrain(LeggedRobotCfg.terrain):
72 | mesh_type = 'plane'
73 | # mesh_type = 'trimesh'
74 | curriculum = False
75 | # rough terrain only:
76 | measure_heights = False
77 | static_friction = 0.6
78 | dynamic_friction = 0.6
79 | terrain_length = 8.
80 | terrain_width = 8.
81 | num_rows = 20 # number of terrain rows (levels)
82 | num_cols = 20 # number of terrain cols (types)
83 | max_init_terrain_level = 10 # starting curriculum state
84 | # plane; obstacles; uniform; slope_up; slope_down, stair_up, stair_down
85 | terrain_proportions = [0.2, 0.2, 0.4, 0.1, 0.1, 0, 0]
86 | restitution = 0.
87 |
88 | class noise:
89 | add_noise = True
90 | noise_level = 0.6 # scales other values
91 |
92 | class noise_scales:
93 | dof_pos = 0.05
94 | dof_vel = 0.5
95 | ang_vel = 0.1
96 | lin_vel = 0.05
97 | quat = 0.03
98 | height_measurements = 0.1
99 |
100 | class init_state(LeggedRobotCfg.init_state):
101 | pos = [0.0, 0.0, 0.95]
102 |
103 | default_joint_angles = { # = target angles [rad] when action = 0.0
104 | 'left_leg_roll_joint': 0.,
105 | 'left_leg_yaw_joint': 0.,
106 | 'left_leg_pitch_joint': 0.,
107 | 'left_knee_joint': 0.,
108 | 'left_ankle_pitch_joint': 0.,
109 | 'left_ankle_roll_joint': 0.,
110 | 'right_leg_roll_joint': 0.,
111 | 'right_leg_yaw_joint': 0.,
112 | 'right_leg_pitch_joint': 0.,
113 | 'right_knee_joint': 0.,
114 | 'right_ankle_pitch_joint': 0.,
115 | 'right_ankle_roll_joint': 0.,
116 | }
117 |
118 | class control(LeggedRobotCfg.control):
119 | # PD Drive parameters:
120 | stiffness = {'leg_roll': 200.0, 'leg_pitch': 350.0, 'leg_yaw': 200.0,
121 | 'knee': 350.0, 'ankle': 15}
122 | damping = {'leg_roll': 10, 'leg_pitch': 10, 'leg_yaw':
123 | 10, 'knee': 10, 'ankle': 10}
124 |
125 | # action scale: target angle = actionScale * action + defaultAngle
126 | action_scale = 0.25
127 | # decimation: Number of control action updates @ sim DT per policy DT
128 | decimation = 10 # 100hz
129 |
130 | class sim(LeggedRobotCfg.sim):
131 | dt = 0.001 # 1000 Hz
132 | substeps = 1
133 | up_axis = 1 # 0 is y, 1 is z
134 |
135 | class physx(LeggedRobotCfg.sim.physx):
136 | num_threads = 10
137 | solver_type = 1 # 0: pgs, 1: tgs
138 | num_position_iterations = 4
139 | num_velocity_iterations = 1
140 | contact_offset = 0.01 # [m]
141 | rest_offset = 0.0 # [m]
142 | bounce_threshold_velocity = 0.1 # [m/s]
143 | max_depenetration_velocity = 1.0
144 | max_gpu_contact_pairs = 2**23 # 2**24 -> needed for 8000 envs and more
145 | default_buffer_size_multiplier = 5
146 | # 0: never, 1: last sub-step, 2: all sub-steps (default=2)
147 | contact_collection = 2
148 |
149 | class domain_rand:
150 | randomize_friction = True
151 | friction_range = [0.1, 2.0]
152 | randomize_base_mass = True
153 | added_mass_range = [-5., 5.]
154 | push_robots = True
155 | push_interval_s = 4
156 | max_push_vel_xy = 0.2
157 | max_push_ang_vel = 0.4
158 | # dynamic randomization
159 | action_delay = 0.5
160 | action_noise = 0.02
161 |
162 | class commands(LeggedRobotCfg.commands):
163 | # Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
164 | num_commands = 4
165 | resampling_time = 8. # time before command are changed[s]
166 | heading_command = True # if true: compute ang vel command from heading error
167 |
168 | class ranges:
169 | lin_vel_x = [-0.3, 0.6] # min max [m/s]
170 | lin_vel_y = [-0.3, 0.3] # min max [m/s]
171 | ang_vel_yaw = [-0.3, 0.3] # min max [rad/s]
172 | heading = [-3.14, 3.14]
173 |
174 | class rewards:
175 | base_height_target = 0.89
176 | min_dist = 0.2
177 | max_dist = 0.5
178 | # put some settings here for LLM parameter tuning
179 | target_joint_pos_scale = 0.17 # rad
180 | target_feet_height = 0.06 # m
181 | cycle_time = 0.64 # sec
182 | # if true negative total rewards are clipped at zero (avoids early termination problems)
183 | only_positive_rewards = True
184 | # tracking reward = exp(error*sigma)
185 | tracking_sigma = 5
186 | max_contact_force = 700 # Forces above this value are penalized
187 |
188 | class scales:
189 | # reference motion tracking
190 | joint_pos = 1.6
191 | feet_clearance = 1.
192 | feet_contact_number = 1.2
193 | # gait
194 | feet_air_time = 1.
195 | foot_slip = -0.05
196 | feet_distance = 0.2
197 | knee_distance = 0.2
198 | # contact
199 | feet_contact_forces = -0.01
200 | # vel tracking
201 | tracking_lin_vel = 1.2
202 | tracking_ang_vel = 1.1
203 | vel_mismatch_exp = 0.5 # lin_z; ang x,y
204 | low_speed = 0.2
205 | track_vel_hard = 0.5
206 | # base pos
207 | default_joint_pos = 0.5
208 | orientation = 1.
209 | base_height = 0.2
210 | base_acc = 0.2
211 | # energy
212 | action_smoothness = -0.002
213 | torques = -1e-5
214 | dof_vel = -5e-4
215 | dof_acc = -1e-7
216 | collision = -1.
217 |
218 | class normalization:
219 | class obs_scales:
220 | lin_vel = 2.
221 | ang_vel = 1.
222 | dof_pos = 1.
223 | dof_vel = 0.05
224 | quat = 1.
225 | height_measurements = 5.0
226 | clip_observations = 18.
227 | clip_actions = 18.
228 |
229 |
230 | class XBotLCfgPPO(LeggedRobotCfgPPO):
231 | seed = 5
232 | runner_class_name = 'OnPolicyRunner' # DWLOnPolicyRunner
233 |
234 | class policy:
235 | init_noise_std = 1.0
236 | actor_hidden_dims = [512, 256, 128]
237 | critic_hidden_dims = [768, 256, 128]
238 |
239 | class algorithm(LeggedRobotCfgPPO.algorithm):
240 | entropy_coef = 0.001
241 | learning_rate = 1e-5
242 | num_learning_epochs = 2
243 | gamma = 0.994
244 | lam = 0.9
245 | num_mini_batches = 4
246 |
247 | class runner:
248 | policy_class_name = 'ActorCritic'
249 | algorithm_class_name = 'PPO'
250 | num_steps_per_env = 60 # per iteration
251 | max_iterations = 3001 # number of policy updates
252 |
253 | # logging
254 | save_interval = 100 # Please check for potential savings every `save_interval` iterations.
255 | experiment_name = 'XBot_ppo'
256 | run_name = ''
257 | # Load and resume
258 | resume = False
259 | load_run = -1 # -1 = last run
260 | checkpoint = -1 # -1 = last saved model
261 | resume_path = None # updated from load_run and chkpt
262 |
--------------------------------------------------------------------------------
/humanoid/envs/custom/humanoid_env.py:
--------------------------------------------------------------------------------
1 | # SPDX-License-Identifier: BSD-3-Clause
2 | #
3 | # Redistribution and use in source and binary forms, with or without
4 | # modification, are permitted provided that the following conditions are met:
5 | #
6 | # 1. Redistributions of source code must retain the above copyright notice, this
7 | # list of conditions and the following disclaimer.
8 | #
9 | # 2. Redistributions in binary form must reproduce the above copyright notice,
10 | # this list of conditions and the following disclaimer in the documentation
11 | # and/or other materials provided with the distribution.
12 | #
13 | # 3. Neither the name of the copyright holder nor the names of its
14 | # contributors may be used to endorse or promote products derived from
15 | # this software without specific prior written permission.
16 | #
17 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
21 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | #
28 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
29 |
30 |
31 | from humanoid.envs.base.legged_robot_config import LeggedRobotCfg
32 |
33 | from isaacgym.torch_utils import *
34 | from isaacgym import gymtorch, gymapi
35 |
36 | import torch
37 | from humanoid.envs import LeggedRobot
38 |
39 | from humanoid.utils.terrain import HumanoidTerrain
40 |
41 |
42 | class XBotLFreeEnv(LeggedRobot):
43 | '''
44 | XBotLFreeEnv is a class that represents a custom environment for a legged robot.
45 |
46 | Args:
47 | cfg (LeggedRobotCfg): Configuration object for the legged robot.
48 | sim_params: Parameters for the simulation.
49 | physics_engine: Physics engine used in the simulation.
50 | sim_device: Device used for the simulation.
51 | headless: Flag indicating whether the simulation should be run in headless mode.
52 |
53 | Attributes:
54 | last_feet_z (float): The z-coordinate of the last feet position.
55 | feet_height (torch.Tensor): Tensor representing the height of the feet.
56 | sim (gymtorch.GymSim): The simulation object.
57 | terrain (HumanoidTerrain): The terrain object.
58 | up_axis_idx (int): The index representing the up axis.
59 | command_input (torch.Tensor): Tensor representing the command input.
60 | privileged_obs_buf (torch.Tensor): Tensor representing the privileged observations buffer.
61 | obs_buf (torch.Tensor): Tensor representing the observations buffer.
62 | obs_history (collections.deque): Deque containing the history of observations.
63 | critic_history (collections.deque): Deque containing the history of critic observations.
64 |
65 | Methods:
66 | _push_robots(): Randomly pushes the robots by setting a randomized base velocity.
67 | _get_phase(): Calculates the phase of the gait cycle.
68 | _get_gait_phase(): Calculates the gait phase.
69 | compute_ref_state(): Computes the reference state.
70 | create_sim(): Creates the simulation, terrain, and environments.
71 | _get_noise_scale_vec(cfg): Sets a vector used to scale the noise added to the observations.
72 | step(actions): Performs a simulation step with the given actions.
73 | compute_observations(): Computes the observations.
74 | reset_idx(env_ids): Resets the environment for the specified environment IDs.
75 | '''
76 | def __init__(self, cfg: LeggedRobotCfg, sim_params, physics_engine, sim_device, headless):
77 | super().__init__(cfg, sim_params, physics_engine, sim_device, headless)
78 | self.last_feet_z = 0.05
79 | self.feet_height = torch.zeros((self.num_envs, 2), device=self.device)
80 | self.reset_idx(torch.tensor(range(self.num_envs), device=self.device))
81 | self.compute_observations()
82 |
83 | def _push_robots(self):
84 | """ Random pushes the robots. Emulates an impulse by setting a randomized base velocity.
85 | """
86 | max_vel = self.cfg.domain_rand.max_push_vel_xy
87 | max_push_angular = self.cfg.domain_rand.max_push_ang_vel
88 | self.rand_push_force[:, :2] = torch_rand_float(
89 | -max_vel, max_vel, (self.num_envs, 2), device=self.device) # lin vel x/y
90 | self.root_states[:, 7:9] = self.rand_push_force[:, :2]
91 |
92 | self.rand_push_torque = torch_rand_float(
93 | -max_push_angular, max_push_angular, (self.num_envs, 3), device=self.device)
94 |
95 | self.root_states[:, 10:13] = self.rand_push_torque
96 |
97 | self.gym.set_actor_root_state_tensor(
98 | self.sim, gymtorch.unwrap_tensor(self.root_states))
99 |
100 | def _get_phase(self):
101 | cycle_time = self.cfg.rewards.cycle_time
102 | phase = self.episode_length_buf * self.dt / cycle_time
103 | return phase
104 |
105 | def _get_gait_phase(self):
106 | # return float mask 1 is stance, 0 is swing
107 | phase = self._get_phase()
108 | sin_pos = torch.sin(2 * torch.pi * phase)
109 | # Add double support phase
110 | stance_mask = torch.zeros((self.num_envs, 2), device=self.device)
111 | # left foot stance
112 | stance_mask[:, 0] = sin_pos >= 0
113 | # right foot stance
114 | stance_mask[:, 1] = sin_pos < 0
115 | # Double support phase
116 | stance_mask[torch.abs(sin_pos) < 0.1] = 1
117 |
118 | return stance_mask
119 |
120 |
121 | def compute_ref_state(self):
122 | phase = self._get_phase()
123 | sin_pos = torch.sin(2 * torch.pi * phase)
124 | sin_pos_l = sin_pos.clone()
125 | sin_pos_r = sin_pos.clone()
126 | self.ref_dof_pos = torch.zeros_like(self.dof_pos)
127 | scale_1 = self.cfg.rewards.target_joint_pos_scale
128 | scale_2 = 2 * scale_1
129 | # left foot stance phase set to default joint pos
130 | sin_pos_l[sin_pos_l > 0] = 0
131 | self.ref_dof_pos[:, 2] = sin_pos_l * scale_1
132 | self.ref_dof_pos[:, 3] = sin_pos_l * scale_2
133 | self.ref_dof_pos[:, 4] = sin_pos_l * scale_1
134 | # right foot stance phase set to default joint pos
135 | sin_pos_r[sin_pos_r < 0] = 0
136 | self.ref_dof_pos[:, 8] = sin_pos_r * scale_1
137 | self.ref_dof_pos[:, 9] = sin_pos_r * scale_2
138 | self.ref_dof_pos[:, 10] = sin_pos_r * scale_1
139 | # Double support phase
140 | self.ref_dof_pos[torch.abs(sin_pos) < 0.1] = 0
141 |
142 | self.ref_action = 2 * self.ref_dof_pos
143 |
144 |
145 | def create_sim(self):
146 | """ Creates simulation, terrain and evironments
147 | """
148 | self.up_axis_idx = 2 # 2 for z, 1 for y -> adapt gravity accordingly
149 | self.sim = self.gym.create_sim(
150 | self.sim_device_id, self.graphics_device_id, self.physics_engine, self.sim_params)
151 | mesh_type = self.cfg.terrain.mesh_type
152 | if mesh_type in ['heightfield', 'trimesh']:
153 | self.terrain = HumanoidTerrain(self.cfg.terrain, self.num_envs)
154 | if mesh_type == 'plane':
155 | self._create_ground_plane()
156 | elif mesh_type == 'heightfield':
157 | self._create_heightfield()
158 | elif mesh_type == 'trimesh':
159 | self._create_trimesh()
160 | elif mesh_type is not None:
161 | raise ValueError(
162 | "Terrain mesh type not recognised. Allowed types are [None, plane, heightfield, trimesh]")
163 | self._create_envs()
164 |
165 |
166 | def _get_noise_scale_vec(self, cfg):
167 | """ Sets a vector used to scale the noise added to the observations.
168 | [NOTE]: Must be adapted when changing the observations structure
169 |
170 | Args:
171 | cfg (Dict): Environment config file
172 |
173 | Returns:
174 | [torch.Tensor]: Vector of scales used to multiply a uniform distribution in [-1, 1]
175 | """
176 | noise_vec = torch.zeros(
177 | self.cfg.env.num_single_obs, device=self.device)
178 | self.add_noise = self.cfg.noise.add_noise
179 | noise_scales = self.cfg.noise.noise_scales
180 | noise_vec[0: 5] = 0. # commands
181 | noise_vec[5: 17] = noise_scales.dof_pos * self.obs_scales.dof_pos
182 | noise_vec[17: 29] = noise_scales.dof_vel * self.obs_scales.dof_vel
183 | noise_vec[29: 41] = 0. # previous actions
184 | noise_vec[41: 44] = noise_scales.ang_vel * self.obs_scales.ang_vel # ang vel
185 | noise_vec[44: 47] = noise_scales.quat * self.obs_scales.quat # euler x,y
186 | return noise_vec
187 |
188 |
189 | def step(self, actions):
190 | if self.cfg.env.use_ref_actions:
191 | actions += self.ref_action
192 | actions = torch.clip(actions, -self.cfg.normalization.clip_actions, self.cfg.normalization.clip_actions)
193 | # dynamic randomization
194 | delay = torch.rand((self.num_envs, 1), device=self.device) * self.cfg.domain_rand.action_delay
195 | actions = (1 - delay) * actions + delay * self.actions
196 | actions += self.cfg.domain_rand.action_noise * torch.randn_like(actions) * actions
197 | return super().step(actions)
198 |
199 |
200 | def compute_observations(self):
201 |
202 | phase = self._get_phase()
203 | self.compute_ref_state()
204 |
205 | sin_pos = torch.sin(2 * torch.pi * phase).unsqueeze(1)
206 | cos_pos = torch.cos(2 * torch.pi * phase).unsqueeze(1)
207 |
208 | stance_mask = self._get_gait_phase()
209 | contact_mask = self.contact_forces[:, self.feet_indices, 2] > 5.
210 |
211 | self.command_input = torch.cat(
212 | (sin_pos, cos_pos, self.commands[:, :3] * self.commands_scale), dim=1)
213 |
214 | q = (self.dof_pos - self.default_dof_pos) * self.obs_scales.dof_pos
215 | dq = self.dof_vel * self.obs_scales.dof_vel
216 |
217 | diff = self.dof_pos - self.ref_dof_pos
218 |
219 | self.privileged_obs_buf = torch.cat((
220 | self.command_input, # 2 + 3
221 | (self.dof_pos - self.default_joint_pd_target) * \
222 | self.obs_scales.dof_pos, # 12
223 | self.dof_vel * self.obs_scales.dof_vel, # 12
224 | self.actions, # 12
225 | diff, # 12
226 | self.base_lin_vel * self.obs_scales.lin_vel, # 3
227 | self.base_ang_vel * self.obs_scales.ang_vel, # 3
228 | self.base_euler_xyz * self.obs_scales.quat, # 3
229 | self.rand_push_force[:, :2], # 2
230 | self.rand_push_torque, # 3
231 | self.env_frictions, # 1
232 | self.body_mass / 30., # 1
233 | stance_mask, # 2
234 | contact_mask, # 2
235 | ), dim=-1)
236 |
237 | obs_buf = torch.cat((
238 | self.command_input, # 5 = 2D(sin cos) + 3D(vel_x, vel_y, aug_vel_yaw)
239 | q, # 12D
240 | dq, # 12D
241 | self.actions, # 12D
242 | self.base_ang_vel * self.obs_scales.ang_vel, # 3
243 | self.base_euler_xyz * self.obs_scales.quat, # 3
244 | ), dim=-1)
245 |
246 | if self.cfg.terrain.measure_heights:
247 | heights = torch.clip(self.root_states[:, 2].unsqueeze(1) - 0.5 - self.measured_heights, -1, 1.) * self.obs_scales.height_measurements
248 | self.privileged_obs_buf = torch.cat((self.obs_buf, heights), dim=-1)
249 |
250 | if self.add_noise:
251 | obs_now = obs_buf.clone() + torch.randn_like(obs_buf) * self.noise_scale_vec * self.cfg.noise.noise_level
252 | else:
253 | obs_now = obs_buf.clone()
254 | self.obs_history.append(obs_now)
255 | self.critic_history.append(self.privileged_obs_buf)
256 |
257 |
258 | obs_buf_all = torch.stack([self.obs_history[i]
259 | for i in range(self.obs_history.maxlen)], dim=1) # N,T,K
260 |
261 | self.obs_buf = obs_buf_all.reshape(self.num_envs, -1) # N, T*K
262 | self.privileged_obs_buf = torch.cat([self.critic_history[i] for i in range(self.cfg.env.c_frame_stack)], dim=1)
263 |
264 | def reset_idx(self, env_ids):
265 | super().reset_idx(env_ids)
266 | for i in range(self.obs_history.maxlen):
267 | self.obs_history[i][env_ids] *= 0
268 | for i in range(self.critic_history.maxlen):
269 | self.critic_history[i][env_ids] *= 0
270 |
271 | # ================================================ Rewards ================================================== #
272 | def _reward_joint_pos(self):
273 | """
274 | Calculates the reward based on the difference between the current joint positions and the target joint positions.
275 | """
276 | joint_pos = self.dof_pos.clone()
277 | pos_target = self.ref_dof_pos.clone()
278 | diff = joint_pos - pos_target
279 | r = torch.exp(-2 * torch.norm(diff, dim=1)) - 0.2 * torch.norm(diff, dim=1).clamp(0, 0.5)
280 | return r
281 |
282 | def _reward_feet_distance(self):
283 | """
284 | Calculates the reward based on the distance between the feet. Penalize feet get close to each other or too far away.
285 | """
286 | foot_pos = self.rigid_state[:, self.feet_indices, :2]
287 | foot_dist = torch.norm(foot_pos[:, 0, :] - foot_pos[:, 1, :], dim=1)
288 | fd = self.cfg.rewards.min_dist
289 | max_df = self.cfg.rewards.max_dist
290 | d_min = torch.clamp(foot_dist - fd, -0.5, 0.)
291 | d_max = torch.clamp(foot_dist - max_df, 0, 0.5)
292 | return (torch.exp(-torch.abs(d_min) * 100) + torch.exp(-torch.abs(d_max) * 100)) / 2
293 |
294 |
295 | def _reward_knee_distance(self):
296 | """
297 | Calculates the reward based on the distance between the knee of the humanoid.
298 | """
299 | foot_pos = self.rigid_state[:, self.knee_indices, :2]
300 | foot_dist = torch.norm(foot_pos[:, 0, :] - foot_pos[:, 1, :], dim=1)
301 | fd = self.cfg.rewards.min_dist
302 | max_df = self.cfg.rewards.max_dist / 2
303 | d_min = torch.clamp(foot_dist - fd, -0.5, 0.)
304 | d_max = torch.clamp(foot_dist - max_df, 0, 0.5)
305 | return (torch.exp(-torch.abs(d_min) * 100) + torch.exp(-torch.abs(d_max) * 100)) / 2
306 |
307 |
308 | def _reward_foot_slip(self):
309 | """
310 | Calculates the reward for minimizing foot slip. The reward is based on the contact forces
311 | and the speed of the feet. A contact threshold is used to determine if the foot is in contact
312 | with the ground. The speed of the foot is calculated and scaled by the contact condition.
313 | """
314 | contact = self.contact_forces[:, self.feet_indices, 2] > 5.
315 | foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 7:9], dim=2)
316 | rew = torch.sqrt(foot_speed_norm)
317 | rew *= contact
318 | return torch.sum(rew, dim=1)
319 |
320 | def _reward_feet_air_time(self):
321 | """
322 | Calculates the reward for feet air time, promoting longer steps. This is achieved by
323 | checking the first contact with the ground after being in the air. The air time is
324 | limited to a maximum value for reward calculation.
325 | """
326 | contact = self.contact_forces[:, self.feet_indices, 2] > 5.
327 | stance_mask = self._get_gait_phase()
328 | self.contact_filt = torch.logical_or(torch.logical_or(contact, stance_mask), self.last_contacts)
329 | self.last_contacts = contact
330 | first_contact = (self.feet_air_time > 0.) * self.contact_filt
331 | self.feet_air_time += self.dt
332 | air_time = self.feet_air_time.clamp(0, 0.5) * first_contact
333 | self.feet_air_time *= ~self.contact_filt
334 | return air_time.sum(dim=1)
335 |
336 | def _reward_feet_contact_number(self):
337 | """
338 | Calculates a reward based on the number of feet contacts aligning with the gait phase.
339 | Rewards or penalizes depending on whether the foot contact matches the expected gait phase.
340 | """
341 | contact = self.contact_forces[:, self.feet_indices, 2] > 5.
342 | stance_mask = self._get_gait_phase()
343 | reward = torch.where(contact == stance_mask, 1.0, -0.3)
344 | return torch.mean(reward, dim=1)
345 |
346 | def _reward_orientation(self):
347 | """
348 | Calculates the reward for maintaining a flat base orientation. It penalizes deviation
349 | from the desired base orientation using the base euler angles and the projected gravity vector.
350 | """
351 | quat_mismatch = torch.exp(-torch.sum(torch.abs(self.base_euler_xyz[:, :2]), dim=1) * 10)
352 | orientation = torch.exp(-torch.norm(self.projected_gravity[:, :2], dim=1) * 20)
353 | return (quat_mismatch + orientation) / 2.
354 |
355 | def _reward_feet_contact_forces(self):
356 | """
357 | Calculates the reward for keeping contact forces within a specified range. Penalizes
358 | high contact forces on the feet.
359 | """
360 | return torch.sum((torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1) - self.cfg.rewards.max_contact_force).clip(0, 400), dim=1)
361 |
362 | def _reward_default_joint_pos(self):
363 | """
364 | Calculates the reward for keeping joint positions close to default positions, with a focus
365 | on penalizing deviation in yaw and roll directions. Excludes yaw and roll from the main penalty.
366 | """
367 | joint_diff = self.dof_pos - self.default_joint_pd_target
368 | left_yaw_roll = joint_diff[:, :2]
369 | right_yaw_roll = joint_diff[:, 6: 8]
370 | yaw_roll = torch.norm(left_yaw_roll, dim=1) + torch.norm(right_yaw_roll, dim=1)
371 | yaw_roll = torch.clamp(yaw_roll - 0.1, 0, 50)
372 | return torch.exp(-yaw_roll * 100) - 0.01 * torch.norm(joint_diff, dim=1)
373 |
374 | def _reward_base_height(self):
375 | """
376 | Calculates the reward based on the robot's base height. Penalizes deviation from a target base height.
377 | The reward is computed based on the height difference between the robot's base and the average height
378 | of its feet when they are in contact with the ground.
379 | """
380 | stance_mask = self._get_gait_phase()
381 | measured_heights = torch.sum(
382 | self.rigid_state[:, self.feet_indices, 2] * stance_mask, dim=1) / torch.sum(stance_mask, dim=1)
383 | base_height = self.root_states[:, 2] - (measured_heights - 0.05)
384 | return torch.exp(-torch.abs(base_height - self.cfg.rewards.base_height_target) * 100)
385 |
386 | def _reward_base_acc(self):
387 | """
388 | Computes the reward based on the base's acceleration. Penalizes high accelerations of the robot's base,
389 | encouraging smoother motion.
390 | """
391 | root_acc = self.last_root_vel - self.root_states[:, 7:13]
392 | rew = torch.exp(-torch.norm(root_acc, dim=1) * 3)
393 | return rew
394 |
395 |
396 | def _reward_vel_mismatch_exp(self):
397 | """
398 | Computes a reward based on the mismatch in the robot's linear and angular velocities.
399 | Encourages the robot to maintain a stable velocity by penalizing large deviations.
400 | """
401 | lin_mismatch = torch.exp(-torch.square(self.base_lin_vel[:, 2]) * 10)
402 | ang_mismatch = torch.exp(-torch.norm(self.base_ang_vel[:, :2], dim=1) * 5.)
403 |
404 | c_update = (lin_mismatch + ang_mismatch) / 2.
405 |
406 | return c_update
407 |
408 | def _reward_track_vel_hard(self):
409 | """
410 | Calculates a reward for accurately tracking both linear and angular velocity commands.
411 | Penalizes deviations from specified linear and angular velocity targets.
412 | """
413 | # Tracking of linear velocity commands (xy axes)
414 | lin_vel_error = torch.norm(
415 | self.commands[:, :2] - self.base_lin_vel[:, :2], dim=1)
416 | lin_vel_error_exp = torch.exp(-lin_vel_error * 10)
417 |
418 | # Tracking of angular velocity commands (yaw)
419 | ang_vel_error = torch.abs(
420 | self.commands[:, 2] - self.base_ang_vel[:, 2])
421 | ang_vel_error_exp = torch.exp(-ang_vel_error * 10)
422 |
423 | linear_error = 0.2 * (lin_vel_error + ang_vel_error)
424 |
425 | return (lin_vel_error_exp + ang_vel_error_exp) / 2. - linear_error
426 |
427 | def _reward_tracking_lin_vel(self):
428 | """
429 | Tracks linear velocity commands along the xy axes.
430 | Calculates a reward based on how closely the robot's linear velocity matches the commanded values.
431 | """
432 | lin_vel_error = torch.sum(torch.square(
433 | self.commands[:, :2] - self.base_lin_vel[:, :2]), dim=1)
434 | return torch.exp(-lin_vel_error * self.cfg.rewards.tracking_sigma)
435 |
436 | def _reward_tracking_ang_vel(self):
437 | """
438 | Tracks angular velocity commands for yaw rotation.
439 | Computes a reward based on how closely the robot's angular velocity matches the commanded yaw values.
440 | """
441 |
442 | ang_vel_error = torch.square(
443 | self.commands[:, 2] - self.base_ang_vel[:, 2])
444 | return torch.exp(-ang_vel_error * self.cfg.rewards.tracking_sigma)
445 |
446 | def _reward_feet_clearance(self):
447 | """
448 | Calculates reward based on the clearance of the swing leg from the ground during movement.
449 | Encourages appropriate lift of the feet during the swing phase of the gait.
450 | """
451 | # Compute feet contact mask
452 | contact = self.contact_forces[:, self.feet_indices, 2] > 5.
453 |
454 | # Get the z-position of the feet and compute the change in z-position
455 | feet_z = self.rigid_state[:, self.feet_indices, 2] - 0.05
456 | delta_z = feet_z - self.last_feet_z
457 | self.feet_height += delta_z
458 | self.last_feet_z = feet_z
459 |
460 | # Compute swing mask
461 | swing_mask = 1 - self._get_gait_phase()
462 |
463 | # feet height should be closed to target feet height at the peak
464 | rew_pos = torch.abs(self.feet_height - self.cfg.rewards.target_feet_height) < 0.01
465 | rew_pos = torch.sum(rew_pos * swing_mask, dim=1)
466 | self.feet_height *= ~contact
467 | return rew_pos
468 |
469 | def _reward_low_speed(self):
470 | """
471 | Rewards or penalizes the robot based on its speed relative to the commanded speed.
472 | This function checks if the robot is moving too slow, too fast, or at the desired speed,
473 | and if the movement direction matches the command.
474 | """
475 | # Calculate the absolute value of speed and command for comparison
476 | absolute_speed = torch.abs(self.base_lin_vel[:, 0])
477 | absolute_command = torch.abs(self.commands[:, 0])
478 |
479 | # Define speed criteria for desired range
480 | speed_too_low = absolute_speed < 0.5 * absolute_command
481 | speed_too_high = absolute_speed > 1.2 * absolute_command
482 | speed_desired = ~(speed_too_low | speed_too_high)
483 |
484 | # Check if the speed and command directions are mismatched
485 | sign_mismatch = torch.sign(
486 | self.base_lin_vel[:, 0]) != torch.sign(self.commands[:, 0])
487 |
488 | # Initialize reward tensor
489 | reward = torch.zeros_like(self.base_lin_vel[:, 0])
490 |
491 | # Assign rewards based on conditions
492 | # Speed too low
493 | reward[speed_too_low] = -1.0
494 | # Speed too high
495 | reward[speed_too_high] = 0.
496 | # Speed within desired range
497 | reward[speed_desired] = 1.2
498 | # Sign mismatch has the highest priority
499 | reward[sign_mismatch] = -2.0
500 | return reward * (self.commands[:, 0].abs() > 0.1)
501 |
502 | def _reward_torques(self):
503 | """
504 | Penalizes the use of high torques in the robot's joints. Encourages efficient movement by minimizing
505 | the necessary force exerted by the motors.
506 | """
507 | return torch.sum(torch.square(self.torques), dim=1)
508 |
509 | def _reward_dof_vel(self):
510 | """
511 | Penalizes high velocities at the degrees of freedom (DOF) of the robot. This encourages smoother and
512 | more controlled movements.
513 | """
514 | return torch.sum(torch.square(self.dof_vel), dim=1)
515 |
516 | def _reward_dof_acc(self):
517 | """
518 | Penalizes high accelerations at the robot's degrees of freedom (DOF). This is important for ensuring
519 | smooth and stable motion, reducing wear on the robot's mechanical parts.
520 | """
521 | return torch.sum(torch.square((self.last_dof_vel - self.dof_vel) / self.dt), dim=1)
522 |
523 | def _reward_collision(self):
524 | """
525 | Penalizes collisions of the robot with the environment, specifically focusing on selected body parts.
526 | This encourages the robot to avoid undesired contact with objects or surfaces.
527 | """
528 | return torch.sum(1.*(torch.norm(self.contact_forces[:, self.penalised_contact_indices, :], dim=-1) > 0.1), dim=1)
529 |
530 | def _reward_action_smoothness(self):
531 | """
532 | Encourages smoothness in the robot's actions by penalizing large differences between consecutive actions.
533 | This is important for achieving fluid motion and reducing mechanical stress.
534 | """
535 | term_1 = torch.sum(torch.square(
536 | self.last_actions - self.actions), dim=1)
537 | term_2 = torch.sum(torch.square(
538 | self.actions + self.last_last_actions - 2 * self.last_actions), dim=1)
539 | term_3 = 0.05 * torch.sum(torch.abs(self.actions), dim=1)
540 | return term_1 + term_2 + term_3
541 |
--------------------------------------------------------------------------------
/humanoid/scripts/play.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
31 |
32 | import os
33 | import cv2
34 | import numpy as np
35 | from isaacgym import gymapi
36 | from humanoid import LEGGED_GYM_ROOT_DIR
37 |
38 | # import isaacgym
39 | from humanoid.envs import *
40 | from humanoid.utils import get_args, export_policy_as_jit, task_registry, Logger
41 | from isaacgym.torch_utils import *
42 |
43 | import torch
44 | from tqdm import tqdm
45 | from datetime import datetime
46 |
47 |
48 | def play(args):
49 | env_cfg, train_cfg = task_registry.get_cfgs(name=args.task)
50 | # override some parameters for testing
51 | env_cfg.env.num_envs = min(env_cfg.env.num_envs, 1)
52 | env_cfg.sim.max_gpu_contact_pairs = 2**10
53 | # env_cfg.terrain.mesh_type = 'trimesh'
54 | env_cfg.terrain.mesh_type = 'plane'
55 | env_cfg.terrain.num_rows = 5
56 | env_cfg.terrain.num_cols = 5
57 | env_cfg.terrain.curriculum = False
58 | env_cfg.terrain.max_init_terrain_level = 5
59 | env_cfg.noise.add_noise = True
60 | env_cfg.domain_rand.push_robots = False
61 | env_cfg.domain_rand.joint_angle_noise = 0.
62 | env_cfg.noise.curriculum = False
63 | env_cfg.noise.noise_level = 0.5
64 |
65 |
66 | train_cfg.seed = 123145
67 | print("train_cfg.runner_class_name:", train_cfg.runner_class_name)
68 |
69 | # prepare environment
70 | env, _ = task_registry.make_env(name=args.task, args=args, env_cfg=env_cfg)
71 | env.set_camera(env_cfg.viewer.pos, env_cfg.viewer.lookat)
72 |
73 | obs = env.get_observations()
74 |
75 | # load policy
76 | train_cfg.runner.resume = True
77 | ppo_runner, train_cfg = task_registry.make_alg_runner(env=env, name=args.task, args=args, train_cfg=train_cfg)
78 | policy = ppo_runner.get_inference_policy(device=env.device)
79 |
80 | # export policy as a jit module (used to run it from C++)
81 | if EXPORT_POLICY:
82 | path = os.path.join(LEGGED_GYM_ROOT_DIR, 'logs', train_cfg.runner.experiment_name, 'exported', 'policies')
83 | export_policy_as_jit(ppo_runner.alg.actor_critic, path)
84 | print('Exported policy as jit script to: ', path)
85 |
86 | logger = Logger(env.dt)
87 | robot_index = 0 # which robot is used for logging
88 | joint_index = 1 # which joint is used for logging
89 | stop_state_log = 1200 # number of steps before plotting states
90 | if RENDER:
91 | camera_properties = gymapi.CameraProperties()
92 | camera_properties.width = 1920
93 | camera_properties.height = 1080
94 | h1 = env.gym.create_camera_sensor(env.envs[0], camera_properties)
95 | camera_offset = gymapi.Vec3(1, -1, 0.5)
96 | camera_rotation = gymapi.Quat.from_axis_angle(gymapi.Vec3(-0.3, 0.2, 1),
97 | np.deg2rad(135))
98 | actor_handle = env.gym.get_actor_handle(env.envs[0], 0)
99 | body_handle = env.gym.get_actor_rigid_body_handle(env.envs[0], actor_handle, 0)
100 | env.gym.attach_camera_to_body(
101 | h1, env.envs[0], body_handle,
102 | gymapi.Transform(camera_offset, camera_rotation),
103 | gymapi.FOLLOW_POSITION)
104 |
105 | fourcc = cv2.VideoWriter_fourcc(*"mp4v")
106 | video_dir = os.path.join(LEGGED_GYM_ROOT_DIR, 'videos')
107 | experiment_dir = os.path.join(LEGGED_GYM_ROOT_DIR, 'videos', train_cfg.runner.experiment_name)
108 | dir = os.path.join(experiment_dir, datetime.now().strftime('%b%d_%H-%M-%S')+ args.run_name + '.mp4')
109 | if not os.path.exists(video_dir):
110 | os.mkdir(video_dir)
111 | if not os.path.exists(experiment_dir):
112 | os.mkdir(experiment_dir)
113 | video = cv2.VideoWriter(dir, fourcc, 50.0, (1920, 1080))
114 |
115 | for i in tqdm(range(stop_state_log)):
116 |
117 | actions = policy(obs.detach()) # * 0.
118 |
119 | if FIX_COMMAND:
120 | env.commands[:, 0] = 0.5 # 1.0
121 | env.commands[:, 1] = 0.
122 | env.commands[:, 2] = 0.
123 | env.commands[:, 3] = 0.
124 |
125 | obs, critic_obs, rews, dones, infos = env.step(actions.detach())
126 |
127 | if RENDER:
128 | env.gym.fetch_results(env.sim, True)
129 | env.gym.step_graphics(env.sim)
130 | env.gym.render_all_camera_sensors(env.sim)
131 | img = env.gym.get_camera_image(env.sim, env.envs[0], h1, gymapi.IMAGE_COLOR)
132 | img = np.reshape(img, (1080, 1920, 4))
133 | img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
134 | video.write(img[..., :3])
135 |
136 | logger.log_states(
137 | {
138 | 'dof_pos_target': actions[robot_index, joint_index].item() * env.cfg.control.action_scale,
139 | 'dof_pos': env.dof_pos[robot_index, joint_index].item(),
140 | 'dof_vel': env.dof_vel[robot_index, joint_index].item(),
141 | 'dof_torque': env.torques[robot_index, joint_index].item(),
142 | 'command_x': env.commands[robot_index, 0].item(),
143 | 'command_y': env.commands[robot_index, 1].item(),
144 | 'command_yaw': env.commands[robot_index, 2].item(),
145 | 'base_vel_x': env.base_lin_vel[robot_index, 0].item(),
146 | 'base_vel_y': env.base_lin_vel[robot_index, 1].item(),
147 | 'base_vel_z': env.base_lin_vel[robot_index, 2].item(),
148 | 'base_vel_yaw': env.base_ang_vel[robot_index, 2].item(),
149 | 'contact_forces_z': env.contact_forces[robot_index, env.feet_indices, 2].cpu().numpy()
150 | }
151 | )
152 | # ====================== Log states ======================
153 | if infos["episode"]:
154 | num_episodes = torch.sum(env.reset_buf).item()
155 | if num_episodes>0:
156 | logger.log_rewards(infos["episode"], num_episodes)
157 |
158 | logger.print_rewards()
159 | logger.plot_states()
160 |
161 | if RENDER:
162 | video.release()
163 |
164 | if __name__ == '__main__':
165 | EXPORT_POLICY = True
166 | RENDER = True
167 | FIX_COMMAND = True
168 | args = get_args()
169 | play(args)
170 |
--------------------------------------------------------------------------------
/humanoid/scripts/sim2sim.py:
--------------------------------------------------------------------------------
1 | # SPDX-License-Identifier: BSD-3-Clause
2 | #
3 | # Redistribution and use in source and binary forms, with or without
4 | # modification, are permitted provided that the following conditions are met:
5 | #
6 | # 1. Redistributions of source code must retain the above copyright notice, this
7 | # list of conditions and the following disclaimer.
8 | #
9 | # 2. Redistributions in binary form must reproduce the above copyright notice,
10 | # this list of conditions and the following disclaimer in the documentation
11 | # and/or other materials provided with the distribution.
12 | #
13 | # 3. Neither the name of the copyright holder nor the names of its
14 | # contributors may be used to endorse or promote products derived from
15 | # this software without specific prior written permission.
16 | #
17 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
21 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | #
28 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
29 |
30 |
31 | import math
32 | import numpy as np
33 | import mujoco, mujoco_viewer
34 | from tqdm import tqdm
35 | from collections import deque
36 | from scipy.spatial.transform import Rotation as R
37 | from humanoid import LEGGED_GYM_ROOT_DIR
38 | from humanoid.envs import XBotLCfg
39 | import torch
40 |
41 |
42 | class cmd:
43 | vx = 0.4
44 | vy = 0.0
45 | dyaw = 0.0
46 |
47 |
48 | def quaternion_to_euler_array(quat):
49 | # Ensure quaternion is in the correct format [x, y, z, w]
50 | x, y, z, w = quat
51 |
52 | # Roll (x-axis rotation)
53 | t0 = +2.0 * (w * x + y * z)
54 | t1 = +1.0 - 2.0 * (x * x + y * y)
55 | roll_x = np.arctan2(t0, t1)
56 |
57 | # Pitch (y-axis rotation)
58 | t2 = +2.0 * (w * y - z * x)
59 | t2 = np.clip(t2, -1.0, 1.0)
60 | pitch_y = np.arcsin(t2)
61 |
62 | # Yaw (z-axis rotation)
63 | t3 = +2.0 * (w * z + x * y)
64 | t4 = +1.0 - 2.0 * (y * y + z * z)
65 | yaw_z = np.arctan2(t3, t4)
66 |
67 | # Returns roll, pitch, yaw in a NumPy array in radians
68 | return np.array([roll_x, pitch_y, yaw_z])
69 |
70 | def get_obs(data):
71 | '''Extracts an observation from the mujoco data structure
72 | '''
73 | q = data.qpos.astype(np.double)
74 | dq = data.qvel.astype(np.double)
75 | quat = data.sensor('orientation').data[[1, 2, 3, 0]].astype(np.double)
76 | r = R.from_quat(quat)
77 | v = r.apply(data.qvel[:3], inverse=True).astype(np.double) # In the base frame
78 | omega = data.sensor('angular-velocity').data.astype(np.double)
79 | gvec = r.apply(np.array([0., 0., -1.]), inverse=True).astype(np.double)
80 | return (q, dq, quat, v, omega, gvec)
81 |
82 | def pd_control(target_q, q, kp, target_dq, dq, kd):
83 | '''Calculates torques from position commands
84 | '''
85 | return (target_q - q) * kp + (target_dq - dq) * kd
86 |
87 | def run_mujoco(policy, cfg):
88 | """
89 | Run the Mujoco simulation using the provided policy and configuration.
90 |
91 | Args:
92 | policy: The policy used for controlling the simulation.
93 | cfg: The configuration object containing simulation settings.
94 |
95 | Returns:
96 | None
97 | """
98 | model = mujoco.MjModel.from_xml_path(cfg.sim_config.mujoco_model_path)
99 | model.opt.timestep = cfg.sim_config.dt
100 | data = mujoco.MjData(model)
101 | mujoco.mj_step(model, data)
102 | viewer = mujoco_viewer.MujocoViewer(model, data)
103 |
104 | target_q = np.zeros((cfg.env.num_actions), dtype=np.double)
105 | action = np.zeros((cfg.env.num_actions), dtype=np.double)
106 |
107 | hist_obs = deque()
108 | for _ in range(cfg.env.frame_stack):
109 | hist_obs.append(np.zeros([1, cfg.env.num_single_obs], dtype=np.double))
110 |
111 | count_lowlevel = 0
112 |
113 |
114 | for _ in tqdm(range(int(cfg.sim_config.sim_duration / cfg.sim_config.dt)), desc="Simulating..."):
115 |
116 | # Obtain an observation
117 | q, dq, quat, v, omega, gvec = get_obs(data)
118 | q = q[-cfg.env.num_actions:]
119 | dq = dq[-cfg.env.num_actions:]
120 |
121 | # 1000hz -> 100hz
122 | if count_lowlevel % cfg.sim_config.decimation == 0:
123 |
124 | obs = np.zeros([1, cfg.env.num_single_obs], dtype=np.float32)
125 | eu_ang = quaternion_to_euler_array(quat)
126 | eu_ang[eu_ang > math.pi] -= 2 * math.pi
127 |
128 | obs[0, 0] = math.sin(2 * math.pi * count_lowlevel * cfg.sim_config.dt / 0.64)
129 | obs[0, 1] = math.cos(2 * math.pi * count_lowlevel * cfg.sim_config.dt / 0.64)
130 | obs[0, 2] = cmd.vx * cfg.normalization.obs_scales.lin_vel
131 | obs[0, 3] = cmd.vy * cfg.normalization.obs_scales.lin_vel
132 | obs[0, 4] = cmd.dyaw * cfg.normalization.obs_scales.ang_vel
133 | obs[0, 5:17] = q * cfg.normalization.obs_scales.dof_pos
134 | obs[0, 17:29] = dq * cfg.normalization.obs_scales.dof_vel
135 | obs[0, 29:41] = action
136 | obs[0, 41:44] = omega
137 | obs[0, 44:47] = eu_ang
138 |
139 | obs = np.clip(obs, -cfg.normalization.clip_observations, cfg.normalization.clip_observations)
140 |
141 | hist_obs.append(obs)
142 | hist_obs.popleft()
143 |
144 | policy_input = np.zeros([1, cfg.env.num_observations], dtype=np.float32)
145 | for i in range(cfg.env.frame_stack):
146 | policy_input[0, i * cfg.env.num_single_obs : (i + 1) * cfg.env.num_single_obs] = hist_obs[i][0, :]
147 | action[:] = policy(torch.tensor(policy_input))[0].detach().numpy()
148 | action = np.clip(action, -cfg.normalization.clip_actions, cfg.normalization.clip_actions)
149 |
150 | target_q = action * cfg.control.action_scale
151 |
152 |
153 | target_dq = np.zeros((cfg.env.num_actions), dtype=np.double)
154 | # Generate PD control
155 | tau = pd_control(target_q, q, cfg.robot_config.kps,
156 | target_dq, dq, cfg.robot_config.kds) # Calc torques
157 | tau = np.clip(tau, -cfg.robot_config.tau_limit, cfg.robot_config.tau_limit) # Clamp torques
158 | data.ctrl = tau
159 |
160 | mujoco.mj_step(model, data)
161 | viewer.render()
162 | count_lowlevel += 1
163 |
164 | viewer.close()
165 |
166 |
167 | if __name__ == '__main__':
168 | import argparse
169 |
170 | parser = argparse.ArgumentParser(description='Deployment script.')
171 | parser.add_argument('--load_model', type=str, required=True,
172 | help='Run to load from.')
173 | parser.add_argument('--terrain', action='store_true', help='terrain or plane')
174 | args = parser.parse_args()
175 |
176 | class Sim2simCfg(XBotLCfg):
177 |
178 | class sim_config:
179 | if args.terrain:
180 | mujoco_model_path = f'{LEGGED_GYM_ROOT_DIR}/resources/robots/XBot/mjcf/XBot-L-terrain.xml'
181 | else:
182 | mujoco_model_path = f'{LEGGED_GYM_ROOT_DIR}/resources/robots/XBot/mjcf/XBot-L.xml'
183 | sim_duration = 60.0
184 | dt = 0.001
185 | decimation = 10
186 |
187 | class robot_config:
188 | kps = np.array([200, 200, 350, 350, 15, 15, 200, 200, 350, 350, 15, 15], dtype=np.double)
189 | kds = np.array([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10], dtype=np.double)
190 | tau_limit = 200. * np.ones(12, dtype=np.double)
191 |
192 | policy = torch.jit.load(args.load_model)
193 | run_mujoco(policy, Sim2simCfg())
194 |
--------------------------------------------------------------------------------
/humanoid/scripts/train.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
31 |
32 |
33 | from humanoid.envs import *
34 | from humanoid.utils import get_args, task_registry
35 |
36 | def train(args):
37 | env, env_cfg = task_registry.make_env(name=args.task, args=args)
38 | ppo_runner, train_cfg = task_registry.make_alg_runner(env=env, name=args.task, args=args)
39 | ppo_runner.learn(num_learning_iterations=train_cfg.runner.max_iterations, init_at_random_ep_len=True)
40 |
41 | if __name__ == '__main__':
42 | args = get_args()
43 | train(args)
44 |
--------------------------------------------------------------------------------
/humanoid/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
31 |
32 |
33 | from .helpers import class_to_dict, get_load_path, get_args, export_policy_as_jit, set_seed, update_class_from_dict
34 | from .task_registry import task_registry
35 | from .logger import Logger
36 | from .math import *
37 | from .terrain import Terrain
--------------------------------------------------------------------------------
/humanoid/utils/calculate_gait.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
31 |
32 | import numpy as np
33 | from scipy.optimize import fsolve
34 | import matplotlib.pyplot as plt
35 |
36 | def get_coefficients(h0, hswing, v0, vswing, hmax, swing_time):
37 |
38 | def equations(coeffs):
39 | a5, a4, a3, a2, a1, a0 = coeffs
40 |
41 | # Height at t=0 should be h0
42 | eq1 = a0 - h0
43 |
44 | # Height at t=swing_time should be hswing
45 | eq2 = a5 * swing_time**5 + a4 * swing_time**4 + a3 * swing_time**3 + a2 * swing_time**2 + a1 * swing_time + a0 - hswing
46 |
47 | # Velocity at t=0 should be v0
48 | eq3 = a1 - v0
49 |
50 | # Velocity at t=swing_time should be vswing
51 | eq4 = 5 * a5 * swing_time**4 + 4 * a4 * swing_time**3 + 3 * a3 * swing_time**2 + 2 * a2 * swing_time + a1 - vswing
52 |
53 | # Height at t=swing_time/2 should be hmax
54 | eq5 = a5 * (swing_time/2)**5 + a4 * (swing_time/2)**4 + a3 * (swing_time/2)**3 + a2 * (swing_time/2)**2 + a1 * (swing_time/2) + a0 - hmax
55 |
56 | # Return the deviations from the expected values. These will be minimized by fsolve.
57 | return (eq1, eq2, eq3, eq4, eq5, a5 + a4 + a3 + a2 + a1 + a0)
58 |
59 | # Solve for the coefficients using the equations above
60 | return fsolve(equations, (1, 1, 1, 1, 1, 1))
61 |
62 |
63 | def plot_curves(coeffs, swing_time):
64 | a5, a4, a3, a2, a1, a0 = coeffs
65 |
66 | def h(t):
67 | return a5 * t**5 + a4 * t**4 + a3 * t**3 + a2 * t**2 + a1 * t + a0
68 |
69 | def v(t):
70 | return 5 * a5 * t**4 + 4 * a4 * t**3 + 3 * a3 * t**2 + 2 * a2 * t + a1
71 |
72 | # Define the acceleration function based on the coefficients
73 | def a(t):
74 | return 20 * a5 * t**3 + 12 * a4 * t**2 + 6 * a3 * t + 2 * a2
75 |
76 | t_values = np.linspace(0, swing_time, 500)
77 | h_values = h(t_values)
78 | v_values = v(t_values)
79 | a_values = a(t_values) # Compute acceleration values
80 |
81 |
82 | discrete_t_values = np.linspace(0, swing_time, 14)
83 |
84 | plt.figure(figsize=(12, 9))
85 |
86 | plt.subplot(3, 1, 1)
87 | plt.plot(t_values, h_values, label='Height (h(t))')
88 | plt.scatter(discrete_t_values, h(discrete_t_values), color='black', label='Discrete Height')
89 | plt.title('Height Curve')
90 | plt.grid(True)
91 | plt.legend()
92 |
93 | plt.subplot(3, 1, 2)
94 | plt.plot(t_values, v_values, label='Velocity (v(t))', color='red')
95 | plt.scatter(discrete_t_values, v(discrete_t_values), color='black', label='Discrete Velocity')
96 | # print(v(discrete_t_values))
97 | plt.title('Velocity Curve')
98 | plt.grid(True)
99 | plt.legend()
100 |
101 | # Plotting the acceleration curve
102 | plt.subplot(3, 1, 3)
103 | plt.plot(t_values, a_values/50, label='Acceleration (a(t))', color='green')
104 | plt.scatter(discrete_t_values, a(discrete_t_values)/50, color='black', label='Discrete Acceleration')
105 | # print(a(discrete_t_values)/50)
106 | plt.title('Acceleration Curve')
107 | plt.grid(True)
108 | plt.legend()
109 |
110 | plt.tight_layout()
111 | plt.show()
112 |
113 |
114 | # Set the constraints and swing time
115 | coeffs = get_coefficients(0, 0, 0, -0.1, 0.04, 0.26)
116 |
117 | print("Coefficients (a5, a4, a3, a2, a1, a0):")
118 | print(f"a5 = {coeffs[0]:.15f}")
119 | print(f"a4 = {coeffs[1]:.15f}")
120 | print(f"a3 = {coeffs[2]:.15f}")
121 | print(f"a2 = {coeffs[3]:.15f}")
122 | print(f"a1 = {coeffs[4]:.15f}")
123 | print(f"a0 = {coeffs[5]:.15f}")
124 |
125 | plot_curves(coeffs, 0.26)
--------------------------------------------------------------------------------
/humanoid/utils/helpers.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
31 |
32 | import datetime
33 | import os
34 | import copy
35 | import torch
36 | import numpy as np
37 | import random
38 | from isaacgym import gymapi
39 | from isaacgym import gymutil
40 |
41 | from humanoid import LEGGED_GYM_ROOT_DIR, LEGGED_GYM_ENVS_DIR
42 |
43 |
44 | def class_to_dict(obj) -> dict:
45 | if not hasattr(obj, "__dict__"):
46 | return obj
47 | result = {}
48 | for key in dir(obj):
49 | if key.startswith("_"):
50 | continue
51 | element = []
52 | val = getattr(obj, key)
53 | if isinstance(val, list):
54 | for item in val:
55 | element.append(class_to_dict(item))
56 | else:
57 | element = class_to_dict(val)
58 | result[key] = element
59 | return result
60 |
61 |
62 | def update_class_from_dict(obj, dict):
63 | for key, val in dict.items():
64 | attr = getattr(obj, key, None)
65 | if isinstance(attr, type):
66 | update_class_from_dict(attr, val)
67 | else:
68 | setattr(obj, key, val)
69 | return
70 |
71 |
72 | def set_seed(seed):
73 | if seed == -1:
74 | seed = np.random.randint(0, 10000)
75 | print("Setting seed: {}".format(seed))
76 |
77 | random.seed(seed)
78 | np.random.seed(seed)
79 | torch.manual_seed(seed)
80 | os.environ["PYTHONHASHSEED"] = str(seed)
81 | torch.cuda.manual_seed(seed)
82 | torch.cuda.manual_seed_all(seed)
83 |
84 |
85 | def parse_sim_params(args, cfg):
86 | # code from Isaac Gym Preview 2
87 | # initialize sim params
88 | sim_params = gymapi.SimParams()
89 |
90 | # set some values from args
91 | if args.physics_engine == gymapi.SIM_FLEX:
92 | if args.device != "cpu":
93 | print("WARNING: Using Flex with GPU instead of PHYSX!")
94 | elif args.physics_engine == gymapi.SIM_PHYSX:
95 | sim_params.physx.use_gpu = args.use_gpu
96 | sim_params.physx.num_subscenes = args.subscenes
97 | sim_params.use_gpu_pipeline = args.use_gpu_pipeline
98 |
99 | # if sim options are provided in cfg, parse them and update/override above:
100 | if "sim" in cfg:
101 | gymutil.parse_sim_config(cfg["sim"], sim_params)
102 |
103 | # Override num_threads if passed on the command line
104 | if args.physics_engine == gymapi.SIM_PHYSX and args.num_threads > 0:
105 | sim_params.physx.num_threads = args.num_threads
106 |
107 | return sim_params
108 |
109 |
110 | def get_load_path(root, load_run=-1, checkpoint=-1):
111 | def month_to_number(month):
112 | return datetime.datetime.strptime(month, "%b").month
113 |
114 | try:
115 | runs = os.listdir(root)
116 | try:
117 | runs.sort(key=lambda x: (month_to_number(x[:3]), int(x[3:5]), x[6:]))
118 | except ValueError as e:
119 | print("WARNING - Could not sort runs by month: " + str(e))
120 | runs.sort()
121 | if "exported" in runs:
122 | runs.remove("exported")
123 | last_run = os.path.join(root, runs[-1])
124 | except:
125 | raise ValueError("No runs in this directory: " + root)
126 | if load_run == -1:
127 | load_run = last_run
128 | else:
129 | load_run = os.path.join(root, load_run)
130 | if checkpoint == -1:
131 | models = [file for file in os.listdir(load_run) if "model" in file]
132 | models.sort(key=lambda m: "{0:0>15}".format(m))
133 | model = models[-1]
134 | else:
135 | model = "model_{}.pt".format(checkpoint)
136 |
137 | load_path = os.path.join(load_run, model)
138 | return load_path
139 |
140 |
141 | def update_cfg_from_args(env_cfg, cfg_train, args):
142 | # seed
143 | if env_cfg is not None:
144 | # num envs
145 | if args.num_envs is not None:
146 | env_cfg.env.num_envs = args.num_envs
147 | if cfg_train is not None:
148 | if args.seed is not None:
149 | cfg_train.seed = args.seed
150 | # alg runner parameters
151 | if args.max_iterations is not None:
152 | cfg_train.runner.max_iterations = args.max_iterations
153 | if args.resume:
154 | cfg_train.runner.resume = args.resume
155 | if args.experiment_name is not None:
156 | cfg_train.runner.experiment_name = args.experiment_name
157 | if args.run_name is not None:
158 | cfg_train.runner.run_name = args.run_name
159 | if args.load_run is not None:
160 | cfg_train.runner.load_run = args.load_run
161 | if args.checkpoint is not None:
162 | cfg_train.runner.checkpoint = args.checkpoint
163 |
164 | return env_cfg, cfg_train
165 |
166 |
167 | def get_args():
168 | custom_parameters = [
169 | {
170 | "name": "--task",
171 | "type": str,
172 | "default": "XBotL_free",
173 | "help": "Resume training or start testing from a checkpoint. Overrides config file if provided.",
174 | },
175 | {
176 | "name": "--resume",
177 | "action": "store_true",
178 | "default": False,
179 | "help": "Resume training from a checkpoint",
180 | },
181 | {
182 | "name": "--experiment_name",
183 | "type": str,
184 | "help": "Name of the experiment to run or load. Overrides config file if provided.",
185 | },
186 | {
187 | "name": "--run_name",
188 | "type": str,
189 | "help": "Name of the run. Overrides config file if provided.",
190 | },
191 | {
192 | "name": "--load_run",
193 | "type": str,
194 | "help": "Name of the run to load when resume=True. If -1: will load the last run. Overrides config file if provided.",
195 | },
196 | {
197 | "name": "--checkpoint",
198 | "type": int,
199 | "help": "Saved model checkpoint number. If -1: will load the last checkpoint. Overrides config file if provided.",
200 | },
201 | {
202 | "name": "--headless",
203 | "action": "store_true",
204 | "default": False,
205 | "help": "Force display off at all times",
206 | },
207 | {
208 | "name": "--horovod",
209 | "action": "store_true",
210 | "default": False,
211 | "help": "Use horovod for multi-gpu training",
212 | },
213 | {
214 | "name": "--rl_device",
215 | "type": str,
216 | "default": "cuda:0",
217 | "help": "Device used by the RL algorithm, (cpu, gpu, cuda:0, cuda:1 etc..)",
218 | },
219 | {
220 | "name": "--num_envs",
221 | "type": int,
222 | "help": "Number of environments to create. Overrides config file if provided.",
223 | },
224 | {
225 | "name": "--seed",
226 | "type": int,
227 | "help": "Random seed. Overrides config file if provided.",
228 | },
229 | {
230 | "name": "--max_iterations",
231 | "type": int,
232 | "help": "Maximum number of training iterations. Overrides config file if provided.",
233 | },
234 | ]
235 | # parse arguments
236 | args = gymutil.parse_arguments(
237 | description="RL Policy", custom_parameters=custom_parameters
238 | )
239 |
240 | # name allignment
241 | args.sim_device_id = args.compute_device_id
242 | args.sim_device = args.sim_device_type
243 | if args.sim_device == "cuda":
244 | args.sim_device += f":{args.sim_device_id}"
245 | return args
246 |
247 |
248 | def export_policy_as_jit(actor_critic, path):
249 | os.makedirs(path, exist_ok=True)
250 | path = os.path.join(path, "policy_1.pt")
251 | model = copy.deepcopy(actor_critic.actor).to("cpu")
252 | traced_script_module = torch.jit.script(model)
253 | traced_script_module.save(path)
254 |
--------------------------------------------------------------------------------
/humanoid/utils/logger.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
31 |
32 | import matplotlib.pyplot as plt
33 | import numpy as np
34 | from collections import defaultdict
35 | from multiprocessing import Process, Value
36 |
37 | class Logger:
38 | def __init__(self, dt):
39 | self.state_log = defaultdict(list)
40 | self.rew_log = defaultdict(list)
41 | self.dt = dt
42 | self.num_episodes = 0
43 | self.plot_process = None
44 |
45 | def log_state(self, key, value):
46 | self.state_log[key].append(value)
47 |
48 | def log_states(self, dict):
49 | for key, value in dict.items():
50 | self.log_state(key, value)
51 |
52 | def log_rewards(self, dict, num_episodes):
53 | for key, value in dict.items():
54 | if 'rew' in key:
55 | self.rew_log[key].append(value.item() * num_episodes)
56 | self.num_episodes += num_episodes
57 |
58 | def reset(self):
59 | self.state_log.clear()
60 | self.rew_log.clear()
61 |
62 | def plot_states(self):
63 | self.plot_process = Process(target=self._plot)
64 | self.plot_process.start()
65 |
66 | def _plot(self):
67 | nb_rows = 3
68 | nb_cols = 3
69 | fig, axs = plt.subplots(nb_rows, nb_cols)
70 | for key, value in self.state_log.items():
71 | time = np.linspace(0, len(value)*self.dt, len(value))
72 | break
73 | log= self.state_log
74 | # plot joint targets and measured positions
75 | a = axs[1, 0]
76 | if log["dof_pos"]: a.plot(time, log["dof_pos"], label='measured')
77 | if log["dof_pos_target"]: a.plot(time, log["dof_pos_target"], label='target')
78 | a.set(xlabel='time [s]', ylabel='Position [rad]', title='DOF Position')
79 | a.legend()
80 | # plot joint velocity
81 | a = axs[1, 1]
82 | if log["dof_vel"]: a.plot(time, log["dof_vel"], label='measured')
83 | if log["dof_vel_target"]: a.plot(time, log["dof_vel_target"], label='target')
84 | a.set(xlabel='time [s]', ylabel='Velocity [rad/s]', title='Joint Velocity')
85 | a.legend()
86 | # plot base vel x
87 | a = axs[0, 0]
88 | if log["base_vel_x"]: a.plot(time, log["base_vel_x"], label='measured')
89 | if log["command_x"]: a.plot(time, log["command_x"], label='commanded')
90 | a.set(xlabel='time [s]', ylabel='base lin vel [m/s]', title='Base velocity x')
91 | a.legend()
92 | # plot base vel y
93 | a = axs[0, 1]
94 | if log["base_vel_y"]: a.plot(time, log["base_vel_y"], label='measured')
95 | if log["command_y"]: a.plot(time, log["command_y"], label='commanded')
96 | a.set(xlabel='time [s]', ylabel='base lin vel [m/s]', title='Base velocity y')
97 | a.legend()
98 | # plot base vel yaw
99 | a = axs[0, 2]
100 | if log["base_vel_yaw"]: a.plot(time, log["base_vel_yaw"], label='measured')
101 | if log["command_yaw"]: a.plot(time, log["command_yaw"], label='commanded')
102 | a.set(xlabel='time [s]', ylabel='base ang vel [rad/s]', title='Base velocity yaw')
103 | a.legend()
104 | # plot base vel z
105 | a = axs[1, 2]
106 | if log["base_vel_z"]: a.plot(time, log["base_vel_z"], label='measured')
107 | a.set(xlabel='time [s]', ylabel='base lin vel [m/s]', title='Base velocity z')
108 | a.legend()
109 | # plot contact forces
110 | a = axs[2, 0]
111 | if log["contact_forces_z"]:
112 | forces = np.array(log["contact_forces_z"])
113 | for i in range(forces.shape[1]):
114 | a.plot(time, forces[:, i], label=f'force {i}')
115 | a.set(xlabel='time [s]', ylabel='Forces z [N]', title='Vertical Contact forces')
116 | a.legend()
117 | # plot torque/vel curves
118 | a = axs[2, 1]
119 | if log["dof_vel"]!=[] and log["dof_torque"]!=[]: a.plot(log["dof_vel"], log["dof_torque"], 'x', label='measured')
120 | a.set(xlabel='Joint vel [rad/s]', ylabel='Joint Torque [Nm]', title='Torque/velocity curves')
121 | a.legend()
122 | # plot torques
123 | a = axs[2, 2]
124 | if log["dof_torque"]!=[]: a.plot(time, log["dof_torque"], label='measured')
125 | a.set(xlabel='time [s]', ylabel='Joint Torque [Nm]', title='Torque')
126 | a.legend()
127 | plt.show()
128 |
129 | def print_rewards(self):
130 | print("Average rewards per second:")
131 | for key, values in self.rew_log.items():
132 | mean = np.sum(np.array(values)) / self.num_episodes
133 | print(f" - {key}: {mean}")
134 | print(f"Total number of episodes: {self.num_episodes}")
135 |
136 | def __del__(self):
137 | if self.plot_process is not None:
138 | self.plot_process.kill()
--------------------------------------------------------------------------------
/humanoid/utils/math.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
31 |
32 | import torch
33 | from torch import Tensor
34 | import numpy as np
35 | from isaacgym.torch_utils import quat_apply, normalize
36 | from typing import Tuple
37 |
38 | # @ torch.jit.script
39 | def quat_apply_yaw(quat, vec):
40 | quat_yaw = quat.clone().view(-1, 4)
41 | quat_yaw[:, :2] = 0.
42 | quat_yaw = normalize(quat_yaw)
43 | return quat_apply(quat_yaw, vec)
44 |
45 | # @ torch.jit.script
46 | def wrap_to_pi(angles):
47 | angles %= 2*np.pi
48 | angles -= 2*np.pi * (angles > np.pi)
49 | return angles
50 |
51 | # @ torch.jit.script
52 | def torch_rand_sqrt_float(lower, upper, shape, device):
53 | # type: (float, float, Tuple[int, int], str) -> Tensor
54 | r = 2*torch.rand(*shape, device=device) - 1
55 | r = torch.where(r<0., -torch.sqrt(-r), torch.sqrt(r))
56 | r = (r + 1.) / 2.
57 | return (upper - lower) * r + lower
--------------------------------------------------------------------------------
/humanoid/utils/task_registry.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
31 |
32 |
33 | import os
34 | from typing import Tuple
35 | from datetime import datetime
36 |
37 | from humanoid.algo import VecEnv
38 | from humanoid.algo import OnPolicyRunner
39 |
40 | from humanoid import LEGGED_GYM_ROOT_DIR, LEGGED_GYM_ENVS_DIR
41 | from .helpers import get_args, update_cfg_from_args, class_to_dict, get_load_path, set_seed, parse_sim_params
42 | from humanoid.envs.base.legged_robot_config import LeggedRobotCfg, LeggedRobotCfgPPO
43 |
44 | class TaskRegistry():
45 | def __init__(self):
46 | self.task_classes = {}
47 | self.env_cfgs = {}
48 | self.train_cfgs = {}
49 |
50 | def register(self, name: str, task_class: VecEnv, env_cfg: LeggedRobotCfg, train_cfg: LeggedRobotCfgPPO):
51 | self.task_classes[name] = task_class
52 | self.env_cfgs[name] = env_cfg
53 | self.train_cfgs[name] = train_cfg
54 |
55 | def get_task_class(self, name: str) -> VecEnv:
56 | return self.task_classes[name]
57 |
58 | def get_cfgs(self, name) -> Tuple[LeggedRobotCfg, LeggedRobotCfgPPO]:
59 | train_cfg = self.train_cfgs[name]
60 | env_cfg = self.env_cfgs[name]
61 | # copy seed
62 | env_cfg.seed = train_cfg.seed
63 | return env_cfg, train_cfg
64 |
65 | def make_env(self, name, args=None, env_cfg=None) -> Tuple[VecEnv, LeggedRobotCfg]:
66 | """ Creates an environment either from a registered namme or from the provided config file.
67 |
68 | Args:
69 | name (string): Name of a registered env.
70 | args (Args, optional): Isaac Gym comand line arguments. If None get_args() will be called. Defaults to None.
71 | env_cfg (Dict, optional): Environment config file used to override the registered config. Defaults to None.
72 |
73 | Raises:
74 | ValueError: Error if no registered env corresponds to 'name'
75 |
76 | Returns:
77 | isaacgym.VecTaskPython: The created environment
78 | Dict: the corresponding config file
79 | """
80 | # if no args passed get command line arguments
81 | if args is None:
82 | args = get_args()
83 | # check if there is a registered env with that name
84 | if name in self.task_classes:
85 | task_class = self.get_task_class(name)
86 | else:
87 | raise ValueError(f"Task with name: {name} was not registered")
88 | if env_cfg is None:
89 | # load config files
90 | env_cfg, _ = self.get_cfgs(name)
91 | # override cfg from args (if specified)
92 | env_cfg, _ = update_cfg_from_args(env_cfg, None, args)
93 | set_seed(env_cfg.seed)
94 | # parse sim params (convert to dict first)
95 | sim_params = {"sim": class_to_dict(env_cfg.sim)}
96 | sim_params = parse_sim_params(args, sim_params)
97 | env = task_class( cfg=env_cfg,
98 | sim_params=sim_params,
99 | physics_engine=args.physics_engine,
100 | sim_device=args.sim_device,
101 | headless=args.headless)
102 | self.env_cfg_for_wandb = env_cfg
103 | return env, env_cfg
104 |
105 | def make_alg_runner(self, env, name=None, args=None, train_cfg=None, log_root="default") -> Tuple[OnPolicyRunner, LeggedRobotCfgPPO]:
106 | """ Creates the training algorithm either from a registered namme or from the provided config file.
107 |
108 | Args:
109 | env (isaacgym.VecTaskPython): The environment to train (TODO: remove from within the algorithm)
110 | name (string, optional): Name of a registered env. If None, the config file will be used instead. Defaults to None.
111 | args (Args, optional): Isaac Gym comand line arguments. If None get_args() will be called. Defaults to None.
112 | train_cfg (Dict, optional): Training config file. If None 'name' will be used to get the config file. Defaults to None.
113 | log_root (str, optional): Logging directory for Tensorboard. Set to 'None' to avoid logging (at test time for example).
114 | Logs will be saved in /_. Defaults to "default"=/logs/.
115 |
116 | Raises:
117 | ValueError: Error if neither 'name' or 'train_cfg' are provided
118 | Warning: If both 'name' or 'train_cfg' are provided 'name' is ignored
119 |
120 | Returns:
121 | PPO: The created algorithm
122 | Dict: the corresponding config file
123 | """
124 | # if no args passed get command line arguments
125 | if args is None:
126 | args = get_args()
127 | # if config files are passed use them, otherwise load from the name
128 | if train_cfg is None:
129 | if name is None:
130 | raise ValueError("Either 'name' or 'train_cfg' must be not None")
131 | # load config files
132 | _, train_cfg = self.get_cfgs(name)
133 | else:
134 | if name is not None:
135 | print(f"'train_cfg' provided -> Ignoring 'name={name}'")
136 | # override cfg from args (if specified)
137 | _, train_cfg = update_cfg_from_args(None, train_cfg, args)
138 |
139 | if log_root=="default":
140 | log_root = os.path.join(LEGGED_GYM_ROOT_DIR, 'logs', train_cfg.runner.experiment_name)
141 | log_dir = os.path.join(log_root, datetime.now().strftime('%b%d_%H-%M-%S') + '_' + train_cfg.runner.run_name)
142 | elif log_root is None:
143 | log_dir = None
144 | else:
145 | log_dir = os.path.join(log_root, datetime.now().strftime('%b%d_%H-%M-%S') + '_' + train_cfg.runner.run_name)
146 |
147 | train_cfg_dict = class_to_dict(train_cfg)
148 | env_cfg_dict = class_to_dict(self.env_cfg_for_wandb)
149 | all_cfg = {**train_cfg_dict, **env_cfg_dict}
150 |
151 | runner_class = eval(train_cfg_dict["runner_class_name"])
152 | runner = runner_class(env, all_cfg, log_dir, device=args.rl_device)
153 | #save resume path before creating a new log_dir
154 | resume = train_cfg.runner.resume
155 | if resume:
156 | # load previously trained model
157 | resume_path = get_load_path(log_root, load_run=train_cfg.runner.load_run, checkpoint=train_cfg.runner.checkpoint)
158 | print(f"Loading model from: {resume_path}")
159 | runner.load(resume_path, load_optimizer=False)
160 | return runner, train_cfg
161 |
162 | # make global task registry
163 | task_registry = TaskRegistry()
--------------------------------------------------------------------------------
/humanoid/utils/terrain.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
31 |
32 |
33 | import numpy as np
34 |
35 | from isaacgym import terrain_utils
36 | from humanoid.envs.base.legged_robot_config import LeggedRobotCfg
37 |
38 | class Terrain:
39 | def __init__(self, cfg: LeggedRobotCfg.terrain, num_robots) -> None:
40 |
41 | self.cfg = cfg
42 | self.num_robots = num_robots
43 | self.type = cfg.mesh_type
44 | if self.type in ["none", 'plane']:
45 | return
46 | self.env_length = cfg.terrain_length
47 | self.env_width = cfg.terrain_width
48 | self.proportions = [np.sum(cfg.terrain_proportions[:i+1]) for i in range(len(cfg.terrain_proportions))]
49 |
50 | self.cfg.num_sub_terrains = cfg.num_rows * cfg.num_cols
51 | self.env_origins = np.zeros((cfg.num_rows, cfg.num_cols, 3))
52 |
53 | self.width_per_env_pixels = int(self.env_width / cfg.horizontal_scale)
54 | self.length_per_env_pixels = int(self.env_length / cfg.horizontal_scale)
55 |
56 | self.border = int(cfg.border_size/self.cfg.horizontal_scale)
57 | self.tot_cols = int(cfg.num_cols * self.width_per_env_pixels) + 2 * self.border
58 | self.tot_rows = int(cfg.num_rows * self.length_per_env_pixels) + 2 * self.border
59 |
60 | self.height_field_raw = np.zeros((self.tot_rows , self.tot_cols), dtype=np.int16)
61 | if cfg.curriculum:
62 | self.curiculum()
63 | elif cfg.selected:
64 | self.selected_terrain()
65 | else:
66 | self.randomized_terrain()
67 |
68 | self.heightsamples = self.height_field_raw
69 | if self.type=="trimesh":
70 | self.vertices, self.triangles = terrain_utils.convert_heightfield_to_trimesh( self.height_field_raw,
71 | self.cfg.horizontal_scale,
72 | self.cfg.vertical_scale,
73 | self.cfg.slope_treshold)
74 |
75 | def randomized_terrain(self):
76 | for k in range(self.cfg.num_sub_terrains):
77 | # Env coordinates in the world
78 | (i, j) = np.unravel_index(k, (self.cfg.num_rows, self.cfg.num_cols))
79 |
80 | choice = np.random.uniform(0, 1)
81 | difficulty = np.random.choice([0.5, 0.75, 0.9])
82 | terrain = self.make_terrain(choice, difficulty)
83 | self.add_terrain_to_map(terrain, i, j)
84 |
85 | def curiculum(self):
86 | for j in range(self.cfg.num_cols):
87 | for i in range(self.cfg.num_rows):
88 | difficulty = i / self.cfg.num_rows
89 | choice = j / self.cfg.num_cols + 0.001
90 |
91 | terrain = self.make_terrain(choice, difficulty)
92 | self.add_terrain_to_map(terrain, i, j)
93 |
94 | def selected_terrain(self):
95 | terrain_type = self.cfg.terrain_kwargs.pop('type')
96 | for k in range(self.cfg.num_sub_terrains):
97 | # Env coordinates in the world
98 | (i, j) = np.unravel_index(k, (self.cfg.num_rows, self.cfg.num_cols))
99 |
100 | terrain = terrain_utils.SubTerrain("terrain",
101 | width=self.width_per_env_pixels,
102 | length=self.width_per_env_pixels,
103 | vertical_scale=self.vertical_scale,
104 | horizontal_scale=self.horizontal_scale)
105 |
106 | eval(terrain_type)(terrain, **self.cfg.terrain_kwargs.terrain_kwargs)
107 | self.add_terrain_to_map(terrain, i, j)
108 |
109 | def make_terrain(self, choice, difficulty):
110 | terrain = terrain_utils.SubTerrain( "terrain",
111 | width=self.width_per_env_pixels,
112 | length=self.width_per_env_pixels,
113 | vertical_scale=self.cfg.vertical_scale,
114 | horizontal_scale=self.cfg.horizontal_scale)
115 | slope = difficulty * 0.4
116 | step_height = 0.05 + 0.18 * difficulty
117 | discrete_obstacles_height = 0.05 + difficulty * 0.2
118 | stepping_stones_size = 1.5 * (1.05 - difficulty)
119 | stone_distance = 0.05 if difficulty==0 else 0.1
120 | gap_size = 1. * difficulty
121 | pit_depth = 1. * difficulty
122 | if choice < self.proportions[0]:
123 | if choice < self.proportions[0]/ 2:
124 | slope *= -1
125 | terrain_utils.pyramid_sloped_terrain(terrain, slope=slope, platform_size=3.)
126 | elif choice < self.proportions[1]:
127 | terrain_utils.pyramid_sloped_terrain(terrain, slope=slope, platform_size=3.)
128 | terrain_utils.random_uniform_terrain(terrain, min_height=-0.05, max_height=0.05, step=0.005, downsampled_scale=0.2)
129 | elif choice < self.proportions[3]:
130 | if choice None:
191 | super().__init__(cfg, num_robots)
192 |
193 | def randomized_terrain(self):
194 | for k in range(self.cfg.num_sub_terrains):
195 | # Env coordinates in the world
196 | (i, j) = np.unravel_index(k, (self.cfg.num_rows, self.cfg.num_cols))
197 |
198 | choice = np.random.uniform(0, 1)
199 | difficulty = np.random.uniform(0, 1)
200 | terrain = self.make_terrain(choice, difficulty)
201 | self.add_terrain_to_map(terrain, i, j)
202 |
203 | def make_terrain(self, choice, difficulty):
204 | terrain = terrain_utils.SubTerrain( "terrain",
205 | width=self.width_per_env_pixels,
206 | length=self.width_per_env_pixels,
207 | vertical_scale=self.cfg.vertical_scale,
208 | horizontal_scale=self.cfg.horizontal_scale)
209 | discrete_obstacles_height = difficulty * 0.04
210 | r_height = difficulty * 0.07
211 | h_slope = difficulty * 0.15
212 | if choice < self.proportions[0]:
213 | pass
214 | elif choice < self.proportions[1]:
215 | num_rectangles = 20
216 | rectangle_min_size = 1.
217 | rectangle_max_size = 2.
218 | terrain_utils.discrete_obstacles_terrain(terrain, discrete_obstacles_height, rectangle_min_size, rectangle_max_size, num_rectangles, platform_size=3.)
219 | elif choice < self.proportions[2]:
220 | terrain_utils.random_uniform_terrain(terrain, min_height=-r_height, max_height=r_height, step=0.005, downsampled_scale=0.2)
221 | elif choice < self.proportions[3]:
222 | terrain_utils.pyramid_sloped_terrain(terrain, slope=h_slope, platform_size=0.1)
223 | elif choice < self.proportions[4]:
224 | terrain_utils.pyramid_sloped_terrain(terrain, slope=-h_slope, platform_size=0.1)
225 | elif choice < self.proportions[5]:
226 | terrain_utils.pyramid_stairs_terrain(terrain, step_width=0.4, step_height=discrete_obstacles_height, platform_size=1.)
227 | elif choice < self.proportions[6]:
228 | terrain_utils.pyramid_stairs_terrain(terrain, step_width=0.4, step_height=-discrete_obstacles_height, platform_size=1.)
229 | else:
230 | pass
231 | return terrain
232 |
--------------------------------------------------------------------------------
/images/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/images/demo.gif
--------------------------------------------------------------------------------
/logs/XBot_ppo/exported/policies/policy_example.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/logs/XBot_ppo/exported/policies/policy_example.pt
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/base_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/base_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_ankle_pitch_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_ankle_pitch_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_ankle_pitch_linkage1_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_ankle_pitch_linkage1_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_ankle_pitch_linkage2_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_ankle_pitch_linkage2_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_ankle_pitch_motor1_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_ankle_pitch_motor1_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_ankle_pitch_motor2_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_ankle_pitch_motor2_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_ankle_roll_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_ankle_roll_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_arm_base_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_arm_base_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_arm_yaw_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_arm_yaw_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_elbow_pitch_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_elbow_pitch_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_elbow_yaw_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_elbow_yaw_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_foot_ee_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_foot_ee_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_ee_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_ee_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_index_bend_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_index_bend_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_index_rota_link1.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_index_rota_link1.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_index_rota_link2.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_index_rota_link2.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_index_tip.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_index_tip.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_mid_link1.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_mid_link1.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_mid_link2.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_mid_link2.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_mid_tip.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_mid_tip.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_pinky_link1.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_pinky_link1.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_pinky_link2.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_pinky_link2.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_pinky_tip.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_pinky_tip.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_ring_link1.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_ring_link1.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_ring_link2.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_ring_link2.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_ring_tip.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_ring_tip.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_thumb_bend_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_thumb_bend_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_thumb_rota_link1.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_thumb_rota_link1.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_thumb_rota_link2.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_thumb_rota_link2.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_thumb_tip.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_thumb_tip.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_knee_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_knee_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_knee_linkage_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_knee_linkage_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_knee_motor_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_knee_motor_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_leg_pitch_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_leg_pitch_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_leg_roll_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_leg_roll_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_leg_yaw_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_leg_yaw_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_shoulder_pitch_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_shoulder_pitch_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_shoulder_roll_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_shoulder_roll_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_wrist_roll_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_wrist_roll_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_wrist_yaw_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_wrist_yaw_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/neck_base_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/neck_base_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/neck_pitch_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/neck_pitch_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/neck_yaw_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/neck_yaw_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/realsense_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/realsense_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_ankle_pitch_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_ankle_pitch_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_ankle_pitch_linkage1_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_ankle_pitch_linkage1_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_ankle_pitch_linkage2_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_ankle_pitch_linkage2_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_ankle_pitch_motor1_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_ankle_pitch_motor1_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_ankle_pitch_motor2_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_ankle_pitch_motor2_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_ankle_roll_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_ankle_roll_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_arm_base_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_arm_base_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_arm_yaw_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_arm_yaw_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_elbow_pitch_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_elbow_pitch_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_elbow_yaw_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_elbow_yaw_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_foot_ee_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_foot_ee_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_ee_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_ee_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_index_bend_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_index_bend_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_index_rota_link1.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_index_rota_link1.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_index_rota_link2.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_index_rota_link2.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_index_tip.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_index_tip.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_mid_link1.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_mid_link1.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_mid_link2.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_mid_link2.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_mid_tip.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_mid_tip.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_pinky_link1.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_pinky_link1.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_pinky_link2.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_pinky_link2.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_pinky_tip.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_pinky_tip.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_ring_link1.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_ring_link1.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_ring_link2.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_ring_link2.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_ring_tip.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_ring_tip.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_thumb_bend_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_thumb_bend_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_thumb_rota_link1.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_thumb_rota_link1.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_thumb_rota_link2.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_thumb_rota_link2.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_thumb_rota_tip.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_thumb_rota_tip.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_knee_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_knee_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_knee_linkage_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_knee_linkage_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_knee_motor_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_knee_motor_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_leg_pitch_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_leg_pitch_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_leg_roll_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_leg_roll_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_leg_yaw_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_leg_yaw_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_shoulder_pitch_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_shoulder_pitch_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_shoulder_roll_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_shoulder_roll_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_wrist_roll_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_wrist_roll_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_wrist_yaw_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_wrist_yaw_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/waist_roll_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/waist_roll_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/waist_yaw_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/waist_yaw_link.STL
--------------------------------------------------------------------------------
/resources/robots/XBot/terrain/uneven.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/terrain/uneven.png
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
31 |
32 | from setuptools import find_packages
33 | from distutils.core import setup
34 |
35 | setup(
36 | name='humanoid',
37 | version='1.0.0',
38 | author='Xinyang Gu, Yen-Jen Wang, Jianyu Chen',
39 | license="BSD-3-Clause",
40 | packages=find_packages(),
41 | author_email='zlw21gxy@gmail.com, wangyenjen@berkeley.edu, jianyuchen@tsinghua.edu.cn',
42 | description='Isaac Gym environments for humanoid robot',
43 | install_requires=['isaacgym', # preview4
44 | 'wandb',
45 | 'DateTime', # used for sort month
46 | 'tensorboard',
47 | 'tqdm',
48 | 'numpy==1.23.5',
49 | 'opencv-python',
50 | 'mujoco==2.3.6',
51 | 'mujoco-python-viewer',
52 | 'matplotlib']
53 | )
54 |
--------------------------------------------------------------------------------