├── .gitattributes
├── .gitignore
├── README.md
├── humanoid
    ├── __init__.py
    ├── algo
    │   ├── __init__.py
    │   ├── ppo
    │   │   ├── __init__.py
    │   │   ├── actor_critic.py
    │   │   ├── on_policy_runner.py
    │   │   ├── ppo.py
    │   │   └── rollout_storage.py
    │   └── vec_env.py
    ├── envs
    │   ├── __init__.py
    │   ├── base
    │   │   ├── LICENSE
    │   │   ├── base_config.py
    │   │   ├── base_task.py
    │   │   ├── legged_robot.py
    │   │   └── legged_robot_config.py
    │   └── custom
    │   │   ├── humanoid_config.py
    │   │   └── humanoid_env.py
    ├── scripts
    │   ├── play.py
    │   ├── sim2sim.py
    │   └── train.py
    └── utils
    │   ├── __init__.py
    │   ├── calculate_gait.py
    │   ├── helpers.py
    │   ├── logger.py
    │   ├── math.py
    │   ├── task_registry.py
    │   └── terrain.py
├── images
    └── demo.gif
├── logs
    └── XBot_ppo
    │   └── exported
    │       └── policies
    │           └── policy_example.pt
├── resources
    └── robots
    │   └── XBot
    │       ├── meshes
    │           ├── base_link.STL
    │           ├── left_ankle_pitch_link.STL
    │           ├── left_ankle_pitch_linkage1_link.STL
    │           ├── left_ankle_pitch_linkage2_link.STL
    │           ├── left_ankle_pitch_motor1_link.STL
    │           ├── left_ankle_pitch_motor2_link.STL
    │           ├── left_ankle_roll_link.STL
    │           ├── left_arm_base_link.STL
    │           ├── left_arm_yaw_link.STL
    │           ├── left_elbow_pitch_link.STL
    │           ├── left_elbow_yaw_link.STL
    │           ├── left_foot_ee_link.STL
    │           ├── left_hand_ee_link.STL
    │           ├── left_hand_index_bend_link.STL
    │           ├── left_hand_index_rota_link1.STL
    │           ├── left_hand_index_rota_link2.STL
    │           ├── left_hand_index_tip.STL
    │           ├── left_hand_link.STL
    │           ├── left_hand_mid_link1.STL
    │           ├── left_hand_mid_link2.STL
    │           ├── left_hand_mid_tip.STL
    │           ├── left_hand_pinky_link1.STL
    │           ├── left_hand_pinky_link2.STL
    │           ├── left_hand_pinky_tip.STL
    │           ├── left_hand_ring_link1.STL
    │           ├── left_hand_ring_link2.STL
    │           ├── left_hand_ring_tip.STL
    │           ├── left_hand_thumb_bend_link.STL
    │           ├── left_hand_thumb_rota_link1.STL
    │           ├── left_hand_thumb_rota_link2.STL
    │           ├── left_hand_thumb_tip.STL
    │           ├── left_knee_link.STL
    │           ├── left_knee_linkage_link.STL
    │           ├── left_knee_motor_link.STL
    │           ├── left_leg_pitch_link.STL
    │           ├── left_leg_roll_link.STL
    │           ├── left_leg_yaw_link.STL
    │           ├── left_shoulder_pitch_link.STL
    │           ├── left_shoulder_roll_link.STL
    │           ├── left_wrist_roll_link.STL
    │           ├── left_wrist_yaw_link.STL
    │           ├── neck_base_link.STL
    │           ├── neck_pitch_link.STL
    │           ├── neck_yaw_link.STL
    │           ├── realsense_link.STL
    │           ├── right_ankle_pitch_link.STL
    │           ├── right_ankle_pitch_linkage1_link.STL
    │           ├── right_ankle_pitch_linkage2_link.STL
    │           ├── right_ankle_pitch_motor1_link.STL
    │           ├── right_ankle_pitch_motor2_link.STL
    │           ├── right_ankle_roll_link.STL
    │           ├── right_arm_base_link.STL
    │           ├── right_arm_yaw_link.STL
    │           ├── right_elbow_pitch_link.STL
    │           ├── right_elbow_yaw_link.STL
    │           ├── right_foot_ee_link.STL
    │           ├── right_hand_ee_link.STL
    │           ├── right_hand_index_bend_link.STL
    │           ├── right_hand_index_rota_link1.STL
    │           ├── right_hand_index_rota_link2.STL
    │           ├── right_hand_index_tip.STL
    │           ├── right_hand_link.STL
    │           ├── right_hand_mid_link1.STL
    │           ├── right_hand_mid_link2.STL
    │           ├── right_hand_mid_tip.STL
    │           ├── right_hand_pinky_link1.STL
    │           ├── right_hand_pinky_link2.STL
    │           ├── right_hand_pinky_tip.STL
    │           ├── right_hand_ring_link1.STL
    │           ├── right_hand_ring_link2.STL
    │           ├── right_hand_ring_tip.STL
    │           ├── right_hand_thumb_bend_link.STL
    │           ├── right_hand_thumb_rota_link1.STL
    │           ├── right_hand_thumb_rota_link2.STL
    │           ├── right_hand_thumb_rota_tip.STL
    │           ├── right_knee_link.STL
    │           ├── right_knee_linkage_link.STL
    │           ├── right_knee_motor_link.STL
    │           ├── right_leg_pitch_link.STL
    │           ├── right_leg_roll_link.STL
    │           ├── right_leg_yaw_link.STL
    │           ├── right_shoulder_pitch_link.STL
    │           ├── right_shoulder_roll_link.STL
    │           ├── right_wrist_roll_link.STL
    │           ├── right_wrist_yaw_link.STL
    │           ├── waist_roll_link.STL
    │           └── waist_yaw_link.STL
    │       ├── mjcf
    │           ├── XBot-L-terrain.xml
    │           └── XBot-L.xml
    │       ├── terrain
    │           └── uneven.png
    │       └── urdf
    │           └── XBot-L.urdf
└── setup.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.dae filter=lfs diff=lfs merge=lfs -text
2 | *.obj filter=lfs diff=lfs merge=lfs -text
3 | *.obj text !filter !merge !diff
4 | *.dae text !filter !merge !diff
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # These are some examples of commonly ignored file patterns.
 2 | # You should customize this list as applicable to your project.
 3 | # Learn more about .gitignore:
 4 | #     https://www.atlassian.com/git/tutorials/saving-changes/gitignore
 5 | 
 6 | # Node artifact files
 7 | node_modules/
 8 | dist/
 9 | wandb/
10 | .ipynb_checkpoints/
11 | *.npy
12 | test_data/
13 | *.ipynb
14 | # Compiled Java class files
15 | *.class
16 | 
17 | # Compiled Python bytecode
18 | *.py[cod]
19 | 
20 | # Log files
21 | *.log
22 | 
23 | # Package files
24 | *.jar
25 | 
26 | # Maven
27 | target/
28 | dist/
29 | 
30 | # JetBrains IDE
31 | .idea/
32 | 
33 | # Unit test reports
34 | TEST*.xml
35 | 
36 | # Generated by MacOS
37 | .DS_Store
38 | 
39 | # Generated by Windows
40 | Thumbs.db
41 | 
42 | # Applications
43 | *.app
44 | *.exe
45 | *.war
46 | 
47 | # Large media files
48 | *.mp4
49 | *.tiff
50 | *.avi
51 | *.flv
52 | *.mov
53 | *.wmv
54 | 
55 | # VS Code
56 | .vscode
57 | # logs
58 | logs
59 | runs
60 | videos
61 | # resources
62 | 
63 | # other
64 | *.egg-info
65 | __pycache__
66 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # <a href="https://sites.google.com/view/humanoid-gym/">Humanoid-Gym: Reinforcement Learning for Humanoid Robot with Zero-Shot Sim2Real Transfer</a>
  2 | 
  3 | <a href="https://sites.google.com/view/humanoid-gym/"><strong>Project Page</strong></a>
  4 |   |
  5 |   <a href="https://arxiv.org/abs/2404.05695"><strong>arXiv</strong></a>
  6 |   |
  7 |   <a href="https://twitter.com/roboterax/status/1765038672641175662"><strong>Twitter</strong></a>
  8 | 
  9 |   <a href="https://github.com/zlw21gxy">Xinyang Gu*</a>, 
 10 |   <a href="https://wangyenjen.github.io/">Yen-Jen Wang*</a>,
 11 |   <a href="http://people.iiis.tsinghua.edu.cn/~jychen/">Jianyu Chen†</a>
 12 | 
 13 |   *: Equal contribution. Project Co-lead., †: Corresponding Author.
 14 | 
 15 | ![Demo](./images/demo.gif)
 16 | 
 17 | Humanoid-Gym is an easy-to-use reinforcement learning (RL) framework based on Nvidia Isaac Gym, designed to train locomotion skills for humanoid robots, emphasizing zero-shot transfer from simulation to the real-world environment. Humanoid-Gym also integrates a sim-to-sim framework from Isaac Gym to Mujoco that allows users to verify the trained policies in different physical simulations to ensure the robustness and generalization of the policies.
 18 | 
 19 | This codebase is verified by RobotEra's XBot-S (1.2-meter tall humanoid robot) and XBot-L (1.65-meter tall humanoid robot) in a real-world environment with zero-shot sim-to-real transfer.
 20 | 
 21 | ## Features
 22 | 
 23 | ### 1. Humanoid Robot Training
 24 | This repository offers comprehensive guidance and scripts for the training of humanoid robots. Humanoid-Gym features specialized rewards for humanoid robots, simplifying the difficulty of sim-to-real transfer. In this repository, we use RobotEra's XBot-L as a primary example. It can also be used for other robots with minimal adjustments. Our resources cover setup, configuration, and execution. Our goal is to fully prepare the robot for real-world locomotion by providing in-depth training and optimization.
 25 | 
 26 | 
 27 | - **Comprehensive Training Guidelines**: We offer thorough walkthroughs for each stage of the training process.
 28 | - **Step-by-Step Configuration Instructions**: Our guidance is clear and succinct, ensuring an efficient setup process.
 29 | - **Execution Scripts for Easy Deployment**: Utilize our pre-prepared scripts to streamline the training workflow.
 30 | 
 31 | ### 2. Sim2Sim Support
 32 | We also share our sim2sim pipeline, which allows you to transfer trained policies to highly accurate and carefully designed simulated environments. Once you acquire the robot, you can confidently deploy the RL-trained policies in real-world settings.
 33 | 
 34 | Our simulator settings, particularly with Mujoco, are finely tuned to closely mimic real-world scenarios. This careful calibration ensures that the performances in both simulated and real-world environments are closely aligned. This improvement makes our simulations more trustworthy and enhances our confidence in their applicability to real-world scenarios.
 35 | 
 36 | 
 37 | ### 3. Denoising World Model Learning
 38 | #### Robotics: Science and Systems (RSS), 2024 (Best Paper Award Finalist)
 39 | <a href="https://enriquecoronadozu.github.io/rssproceedings2024/rss20/p058.pdf"><strong>Paper</strong></a>
 40 | |
 41 | <a href="https://x.com/wangyenjen/status/1792741940087394540"><strong>Twitter</strong></a>
 42 | 
 43 | <a href="https://github.com/zlw21gxy">Xinyang Gu*</a>, 
 44 | <a href="https://wangyenjen.github.io/">Yen-Jen Wang*</a>,
 45 | Xiang Zhu*, Chengming Shi*, Yanjiang Guo, Yichen Liu,
 46 | <a href="http://people.iiis.tsinghua.edu.cn/~jychen/">Jianyu Chen†</a>
 47 | 
 48 | *: Equal contribution. Project Co-lead., †: Corresponding Author.
 49 | 
 50 | Denoising World Model Learning(DWL) presents an advanced sim-to-real framework that integrates state estimation and system identification. This dual-method approach ensures the robot's learning and adaptation are both practical and effective in real-world contexts.
 51 | 
 52 | - **Enhanced Sim-to-real Adaptability**: Techniques to optimize the robot's transition from simulated to real environments.
 53 | - **Improved State Estimation Capabilities**: Advanced tools for precise and reliable state analysis.
 54 | 
 55 | ### Perceptive Locomotion Learning for Humanoid Robots (Coming Soon!)
 56 | <a href="https://x.com/roboterax/status/1798694054374564010"><strong>Twitter</strong></a>
 57 | 
 58 | ### Dexterous Hand Manipulation (Coming Soon!)
 59 | <a href="https://x.com/roboterax/status/1791349763448938924"><strong>Twitter</strong></a>
 60 | 
 61 | ## Installation
 62 | 
 63 | 1. Generate a new Python virtual environment with Python 3.8 using `conda create -n myenv python=3.8`.
 64 | 2. For the best performance, we recommend using NVIDIA driver version 525 `sudo apt install nvidia-driver-525`. The minimal driver version supported is 515. If you're unable to install version 525, ensure that your system has at least version 515 to maintain basic functionality.
 65 | 3. Install PyTorch 1.13 with Cuda-11.7:
 66 |    - `conda install pytorch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1 pytorch-cuda=11.7 -c pytorch -c nvidia`
 67 | 4. Install numpy-1.23 with `conda install numpy=1.23`.
 68 | 5. Install Isaac Gym:
 69 |    - Download and install Isaac Gym Preview 4 from https://developer.nvidia.com/isaac-gym.
 70 |    - `cd isaacgym/python && pip install -e .`
 71 |    - Run an example with `cd examples && python 1080_balls_of_solitude.py`.
 72 |    - Consult `isaacgym/docs/index.html` for troubleshooting.
 73 | 6. Install humanoid-gym:
 74 |    - Clone this repository.
 75 |    - `cd humanoid-gym && pip install -e .`
 76 | 
 77 | 
 78 | 
 79 | ## Usage Guide
 80 | 
 81 | #### Examples
 82 | 
 83 | ```bash
 84 | # Under the directory humanoid-gym/humanoid
 85 | # Launching PPO Policy Training for 'v1' Across 4096 Environments
 86 | # This command initiates the PPO algorithm-based training for the humanoid task.
 87 | python scripts/train.py --task=humanoid_ppo --run_name v1 --headless --num_envs 4096
 88 | 
 89 | # Evaluating the Trained PPO Policy 'v1'
 90 | # This command loads the 'v1' policy for performance assessment in its environment. 
 91 | # Additionally, it automatically exports a JIT model, suitable for deployment purposes.
 92 | python scripts/play.py --task=humanoid_ppo --run_name v1
 93 | 
 94 | # Implementing Simulation-to-Simulation Model Transformation
 95 | # This command facilitates a sim-to-sim transformation using exported 'v1' policy.
 96 | # You have to run play.py first to get the JIT model and use it with sim2sim.py
 97 | python scripts/sim2sim.py --load_model /path/to/logs/XBot_ppo/exported/policies/policy_1.pt
 98 | 
 99 | # Run our trained policy
100 | python scripts/sim2sim.py --load_model /path/to/logs/XBot_ppo/exported/policies/policy_example.pt
101 | 
102 | ```
103 | 
104 | #### 1. Default Tasks
105 | 
106 | 
107 | - **humanoid_ppo**
108 |    - Purpose: Baseline, PPO policy, Multi-frame low-level control
109 |    - Observation Space: Variable $(47 \times H)$ dimensions, where $H$ is the number of frames
110 |    - $[O_{t-H} ... O_t]$
111 |    - Privileged Information: $73$ dimensions
112 | 
113 | - **humanoid_dwl (coming soon)**
114 | 
115 | #### 2. PPO Policy
116 | - **Training Command**: For training the PPO policy, execute:
117 |   ```
118 |   python humanoid/scripts/train.py --task=humanoid_ppo --load_run log_file_path --name run_name
119 |   ```
120 | - **Running a Trained Policy**: To deploy a trained PPO policy, use:
121 |   ```
122 |   python humanoid/scripts/play.py --task=humanoid_ppo --load_run log_file_path --name run_name
123 |   ```
124 | - By default, the latest model of the last run from the experiment folder is loaded. However, other run iterations/models can be selected by adjusting `load_run` and `checkpoint` in the training config.
125 | 
126 | #### 3. Sim-to-sim
127 | - **Please note: Before initiating the sim-to-sim process, ensure that you run `play.py` to export a JIT policy.**
128 | - **Mujoco-based Sim2Sim Deployment**: Utilize Mujoco for executing simulation-to-simulation (sim2sim) deployments with the command below:
129 |   ```
130 |   python scripts/sim2sim.py --load_model /path/to/export/model.pt
131 |   ```
132 | 
133 | 
134 | #### 4. Parameters
135 | - **CPU and GPU Usage**: To run simulations on the CPU, set both `--sim_device=cpu` and `--rl_device=cpu`. For GPU operations, specify `--sim_device=cuda:{0,1,2...}` and `--rl_device={0,1,2...}` accordingly. Please note that `CUDA_VISIBLE_DEVICES` is not applicable, and it's essential to match the `--sim_device` and `--rl_device` settings.
136 | - **Headless Operation**: Include `--headless` for operations without rendering.
137 | - **Rendering Control**: Press 'v' to toggle rendering during training.
138 | - **Policy Location**: Trained policies are saved in `humanoid/logs/<experiment_name>/<date_time>_<run_name>/model_<iteration>.pt`.
139 | 
140 | #### 5. Command-Line Arguments
141 | For RL training, please refer to `humanoid/utils/helpers.py#L161`.
142 | For the sim-to-sim process, please refer to `humanoid/scripts/sim2sim.py#L169`.
143 | 
144 | ## Code Structure
145 | 
146 | 1. Every environment hinges on an `env` file (`legged_robot.py`) and a `configuration` file (`legged_robot_config.py`). The latter houses two classes: `LeggedRobotCfg` (encompassing all environmental parameters) and `LeggedRobotCfgPPO` (denoting all training parameters).
147 | 2. Both `env` and `config` classes use inheritance.
148 | 3. Non-zero reward scales specified in `cfg` contribute a function of the corresponding name to the sum-total reward.
149 | 4. Tasks must be registered with `task_registry.register(name, EnvClass, EnvConfig, TrainConfig)`. Registration may occur within `envs/__init__.py`, or outside of this repository.
150 | 
151 | 
152 | ## Add a new environment 
153 | 
154 | The base environment `legged_robot` constructs a rough terrain locomotion task. The corresponding configuration does not specify a robot asset (URDF/ MJCF) and no reward scales.
155 | 
156 | 1. If you need to add a new environment, create a new folder in the `envs/` directory with a configuration file named `<your_env>_config.py`. The new configuration should inherit from existing environment configurations.
157 | 2. If proposing a new robot:
158 |     - Insert the corresponding assets in the `resources/` folder.
159 |     - In the `cfg` file, set the path to the asset, define body names, default_joint_positions, and PD gains. Specify the desired `train_cfg` and the environment's name (python class).
160 |     - In the `train_cfg`, set the `experiment_name` and `run_name`.
161 | 3. If needed, create your environment in `<your_env>.py`. Inherit from existing environments, override desired functions and/or add your reward functions.
162 | 4. Register your environment in `humanoid/envs/__init__.py`.
163 | 5. Modify or tune other parameters in your `cfg` or `cfg_train` as per requirements. To remove the reward, set its scale to zero. Avoid modifying the parameters of other environments!
164 | 6. If you want a new robot/environment to perform sim2sim, you may need to modify `humanoid/scripts/sim2sim.py`: 
165 |     - Check the joint mapping of the robot between MJCF and URDF.
166 |     - Change the initial joint position of the robot according to your trained policy.
167 | 
168 | ## Troubleshooting
169 | 
170 | Observe the following cases:
171 | 
172 | ```bash
173 | # error
174 | ImportError: libpython3.8.so.1.0: cannot open shared object file: No such file or directory
175 | 
176 | # solution
177 | # set the correct path
178 | export LD_LIBRARY_PATH="~/miniconda3/envs/your_env/lib:$LD_LIBRARY_PATH" 
179 | 
180 | # OR
181 | sudo apt install libpython3.8
182 | 
183 | # error
184 | AttributeError: module 'distutils' has no attribute 'version'
185 | 
186 | # solution
187 | # install pytorch 1.12.0
188 | conda install pytorch torchvision torchaudio cudatoolkit=11.3 -c pytorch
189 | 
190 | # error, results from libstdc++ version distributed with conda differing from the one used on your system to build Isaac Gym
191 | ImportError: /home/roboterax/anaconda3/bin/../lib/libstdc++.so.6: version `GLIBCXX_3.4.20` not found (required by /home/roboterax/carbgym/python/isaacgym/_bindings/linux64/gym_36.so)
192 | 
193 | # solution
194 | mkdir ${YOUR_CONDA_ENV}/lib/_unused
195 | mv ${YOUR_CONDA_ENV}/lib/libstdc++* ${YOUR_CONDA_ENV}/lib/_unused
196 | ```
197 | 
198 | ## Citation
199 | 
200 | Please cite the following if you use this code or parts of it:
201 | ```
202 | @article{gu2024humanoid,
203 |   title={Humanoid-Gym: Reinforcement Learning for Humanoid Robot with Zero-Shot Sim2Real Transfer},
204 |   author={Gu, Xinyang and Wang, Yen-Jen and Chen, Jianyu},
205 |   journal={arXiv preprint arXiv:2404.05695},
206 |   year={2024}
207 | }
208 | 
209 | @inproceedings{gu2024advancing,
210 |   title={Advancing Humanoid Locomotion: Mastering Challenging Terrains with Denoising World Model Learning},
211 |   author={Gu, Xinyang and Wang, Yen-Jen and Zhu, Xiang and Shi, Chengming and Guo, Yanjiang and Liu, Yichen and Chen, Jianyu},
212 |   booktitle={Robotics: Science and Systems},
213 |   year={2024},
214 |   url={https://enriquecoronadozu.github.io/rssproceedings2024/rss20/p058.pdf}
215 | }
216 | ```
217 | 
218 | ## Acknowledgment
219 | 
220 | The implementation of Humanoid-Gym relies on resources from [legged_gym](https://github.com/leggedrobotics/legged_gym) and [rsl_rl](https://github.com/leggedrobotics/rsl_rl) projects, created by the Robotic Systems Lab. We specifically utilize the `LeggedRobot` implementation from their research to enhance our codebase.
221 | 
222 | ## Any Questions?
223 | 
224 | If you have any more questions, please contact [support@robotera.com](mailto:support@robotera.com) or create an issue in this repository.
225 | 


--------------------------------------------------------------------------------
/humanoid/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
 3 | # SPDX-License-Identifier: BSD-3-Clause
 4 | # 
 5 | # Redistribution and use in source and binary forms, with or without
 6 | # modification, are permitted provided that the following conditions are met:
 7 | #
 8 | # 1. Redistributions of source code must retain the above copyright notice, this
 9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
31 | 
32 | 
33 | import os
34 | 
35 | LEGGED_GYM_ROOT_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
36 | LEGGED_GYM_ENVS_DIR = os.path.join(LEGGED_GYM_ROOT_DIR, 'humanoid', 'envs')
37 | 


--------------------------------------------------------------------------------
/humanoid/algo/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
 3 | # SPDX-License-Identifier: BSD-3-Clause
 4 | # 
 5 | # Redistribution and use in source and binary forms, with or without
 6 | # modification, are permitted provided that the following conditions are met:
 7 | #
 8 | # 1. Redistributions of source code must retain the above copyright notice, this
 9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
31 | 
32 | 
33 | from .vec_env import VecEnv
34 | from .ppo import *


--------------------------------------------------------------------------------
/humanoid/algo/ppo/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
 3 | # SPDX-License-Identifier: BSD-3-Clause
 4 | # 
 5 | # Redistribution and use in source and binary forms, with or without
 6 | # modification, are permitted provided that the following conditions are met:
 7 | #
 8 | # 1. Redistributions of source code must retain the above copyright notice, this
 9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
31 | 
32 | 
33 | from .ppo import PPO
34 | from .on_policy_runner import OnPolicyRunner
35 | from .actor_critic import ActorCritic
36 | from .rollout_storage import RolloutStorage
37 | 


--------------------------------------------------------------------------------
/humanoid/algo/ppo/actor_critic.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
  3 | # SPDX-License-Identifier: BSD-3-Clause
  4 | # 
  5 | # Redistribution and use in source and binary forms, with or without
  6 | # modification, are permitted provided that the following conditions are met:
  7 | #
  8 | # 1. Redistributions of source code must retain the above copyright notice, this
  9 | # list of conditions and the following disclaimer.
 10 | #
 11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 12 | # this list of conditions and the following disclaimer in the documentation
 13 | # and/or other materials provided with the distribution.
 14 | #
 15 | # 3. Neither the name of the copyright holder nor the names of its
 16 | # contributors may be used to endorse or promote products derived from
 17 | # this software without specific prior written permission.
 18 | #
 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 | #
 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
 31 | 
 32 | import torch
 33 | import torch.nn as nn
 34 | from torch.distributions import Normal
 35 | 
 36 | class ActorCritic(nn.Module):
 37 |     def __init__(self,  num_actor_obs,
 38 |                         num_critic_obs,
 39 |                         num_actions,
 40 |                         actor_hidden_dims=[256, 256, 256],
 41 |                         critic_hidden_dims=[256, 256, 256],
 42 |                         init_noise_std=1.0,
 43 |                         activation = nn.ELU(),
 44 |                         **kwargs):
 45 |         if kwargs:
 46 |             print("ActorCritic.__init__ got unexpected arguments, which will be ignored: " + str([key for key in kwargs.keys()]))
 47 |         super(ActorCritic, self).__init__()
 48 | 
 49 | 
 50 |         mlp_input_dim_a = num_actor_obs
 51 |         mlp_input_dim_c = num_critic_obs
 52 |         # Policy
 53 |         actor_layers = []
 54 |         actor_layers.append(nn.Linear(mlp_input_dim_a, actor_hidden_dims[0]))
 55 |         actor_layers.append(activation)
 56 |         for l in range(len(actor_hidden_dims)):
 57 |             if l == len(actor_hidden_dims) - 1:
 58 |                 actor_layers.append(nn.Linear(actor_hidden_dims[l], num_actions))
 59 |             else:
 60 |                 actor_layers.append(nn.Linear(actor_hidden_dims[l], actor_hidden_dims[l + 1]))
 61 |                 actor_layers.append(activation)
 62 |         self.actor = nn.Sequential(*actor_layers)
 63 | 
 64 |         # Value function
 65 |         critic_layers = []
 66 |         critic_layers.append(nn.Linear(mlp_input_dim_c, critic_hidden_dims[0]))
 67 |         critic_layers.append(activation)
 68 |         for l in range(len(critic_hidden_dims)):
 69 |             if l == len(critic_hidden_dims) - 1:
 70 |                 critic_layers.append(nn.Linear(critic_hidden_dims[l], 1))
 71 |             else:
 72 |                 critic_layers.append(nn.Linear(critic_hidden_dims[l], critic_hidden_dims[l + 1]))
 73 |                 critic_layers.append(activation)
 74 |         self.critic = nn.Sequential(*critic_layers)
 75 | 
 76 |         print(f"Actor MLP: {self.actor}")
 77 |         print(f"Critic MLP: {self.critic}")
 78 | 
 79 |         # Action noise
 80 |         self.std = nn.Parameter(init_noise_std * torch.ones(num_actions))
 81 |         self.distribution = None
 82 |         # disable args validation for speedup
 83 |         Normal.set_default_validate_args = False
 84 |         
 85 | 
 86 |     @staticmethod
 87 |     # not used at the moment
 88 |     def init_weights(sequential, scales):
 89 |         [torch.nn.init.orthogonal_(module.weight, gain=scales[idx]) for idx, module in
 90 |          enumerate(mod for mod in sequential if isinstance(mod, nn.Linear))]
 91 | 
 92 | 
 93 |     def reset(self, dones=None):
 94 |         pass
 95 | 
 96 |     def forward(self):
 97 |         raise NotImplementedError
 98 |     
 99 |     @property
100 |     def action_mean(self):
101 |         return self.distribution.mean
102 | 
103 |     @property
104 |     def action_std(self):
105 |         return self.distribution.stddev
106 |     
107 |     @property
108 |     def entropy(self):
109 |         return self.distribution.entropy().sum(dim=-1)
110 | 
111 |     def update_distribution(self, observations):
112 |         mean = self.actor(observations)
113 |         self.distribution = Normal(mean, mean*0. + self.std)
114 | 
115 |     def act(self, observations, **kwargs):
116 |         self.update_distribution(observations)
117 |         return self.distribution.sample()
118 |     
119 |     def get_actions_log_prob(self, actions):
120 |         return self.distribution.log_prob(actions).sum(dim=-1)
121 | 
122 |     def act_inference(self, observations):
123 |         actions_mean = self.actor(observations)
124 |         return actions_mean
125 | 
126 |     def evaluate(self, critic_observations, **kwargs):
127 |         value = self.critic(critic_observations)
128 |         return value


--------------------------------------------------------------------------------
/humanoid/algo/ppo/on_policy_runner.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
  3 | # SPDX-License-Identifier: BSD-3-Clause
  4 | # 
  5 | # Redistribution and use in source and binary forms, with or without
  6 | # modification, are permitted provided that the following conditions are met:
  7 | #
  8 | # 1. Redistributions of source code must retain the above copyright notice, this
  9 | # list of conditions and the following disclaimer.
 10 | #
 11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 12 | # this list of conditions and the following disclaimer in the documentation
 13 | # and/or other materials provided with the distribution.
 14 | #
 15 | # 3. Neither the name of the copyright holder nor the names of its
 16 | # contributors may be used to endorse or promote products derived from
 17 | # this software without specific prior written permission.
 18 | #
 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 | #
 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
 31 | 
 32 | import os
 33 | import time
 34 | import torch
 35 | import wandb
 36 | import statistics
 37 | from collections import deque
 38 | from datetime import datetime
 39 | from .ppo import PPO
 40 | from .actor_critic import ActorCritic
 41 | from humanoid.algo.vec_env import VecEnv
 42 | from torch.utils.tensorboard import SummaryWriter
 43 | 
 44 | 
 45 | class OnPolicyRunner:
 46 | 
 47 |     def __init__(self, env: VecEnv, train_cfg, log_dir=None, device="cpu"):
 48 | 
 49 |         self.cfg = train_cfg["runner"]
 50 |         self.alg_cfg = train_cfg["algorithm"]
 51 |         self.policy_cfg = train_cfg["policy"]
 52 |         self.all_cfg = train_cfg
 53 |         self.wandb_run_name = (
 54 |             datetime.now().strftime("%b%d_%H-%M-%S")
 55 |             + "_"
 56 |             + train_cfg["runner"]["experiment_name"]
 57 |             + "_"
 58 |             + train_cfg["runner"]["run_name"]
 59 |         )
 60 |         self.device = device
 61 |         self.env = env
 62 |         if self.env.num_privileged_obs is not None:
 63 |             num_critic_obs = self.env.num_privileged_obs
 64 |         else:
 65 |             num_critic_obs = self.env.num_obs
 66 |         actor_critic_class = eval(self.cfg["policy_class_name"])  # ActorCritic
 67 |         actor_critic: ActorCritic = actor_critic_class(
 68 |             self.env.num_obs, num_critic_obs, self.env.num_actions, **self.policy_cfg
 69 |         ).to(self.device)
 70 |         alg_class = eval(self.cfg["algorithm_class_name"])  # PPO
 71 |         self.alg: PPO = alg_class(actor_critic, device=self.device, **self.alg_cfg)
 72 |         self.num_steps_per_env = self.cfg["num_steps_per_env"]
 73 |         self.save_interval = self.cfg["save_interval"]
 74 | 
 75 |         # init storage and model
 76 |         self.alg.init_storage(
 77 |             self.env.num_envs,
 78 |             self.num_steps_per_env,
 79 |             [self.env.num_obs],
 80 |             [self.env.num_privileged_obs],
 81 |             [self.env.num_actions],
 82 |         )
 83 | 
 84 |         # Log
 85 |         self.log_dir = log_dir
 86 |         self.writer = None
 87 |         self.tot_timesteps = 0
 88 |         self.tot_time = 0
 89 |         self.current_learning_iteration = 0
 90 | 
 91 |         _, _ = self.env.reset()
 92 | 
 93 |     def learn(self, num_learning_iterations, init_at_random_ep_len=False):
 94 |         # initialize writer
 95 |         if self.log_dir is not None and self.writer is None:
 96 |             wandb.init(
 97 |                 project="XBot",
 98 |                 sync_tensorboard=True,
 99 |                 name=self.wandb_run_name,
100 |                 config=self.all_cfg,
101 |             )
102 |             self.writer = SummaryWriter(log_dir=self.log_dir, flush_secs=10)
103 |         if init_at_random_ep_len:
104 |             self.env.episode_length_buf = torch.randint_like(
105 |                 self.env.episode_length_buf, high=int(self.env.max_episode_length)
106 |             )
107 |         obs = self.env.get_observations()
108 |         privileged_obs = self.env.get_privileged_observations()
109 |         critic_obs = privileged_obs if privileged_obs is not None else obs
110 |         obs, critic_obs = obs.to(self.device), critic_obs.to(self.device)
111 |         self.alg.actor_critic.train()  # switch to train mode (for dropout for example)
112 | 
113 |         ep_infos = []
114 |         rewbuffer = deque(maxlen=100)
115 |         lenbuffer = deque(maxlen=100)
116 |         cur_reward_sum = torch.zeros(
117 |             self.env.num_envs, dtype=torch.float, device=self.device
118 |         )
119 |         cur_episode_length = torch.zeros(
120 |             self.env.num_envs, dtype=torch.float, device=self.device
121 |         )
122 | 
123 |         tot_iter = self.current_learning_iteration + num_learning_iterations
124 |         for it in range(self.current_learning_iteration, tot_iter):
125 |             start = time.time()
126 |             # Rollout
127 |             with torch.inference_mode():
128 |                 for i in range(self.num_steps_per_env):
129 |                     actions = self.alg.act(obs, critic_obs)
130 |                     obs, privileged_obs, rewards, dones, infos = self.env.step(actions)
131 |                     critic_obs = privileged_obs if privileged_obs is not None else obs
132 |                     obs, critic_obs, rewards, dones = (
133 |                         obs.to(self.device),
134 |                         critic_obs.to(self.device),
135 |                         rewards.to(self.device),
136 |                         dones.to(self.device),
137 |                     )
138 |                     self.alg.process_env_step(rewards, dones, infos)
139 | 
140 |                     if self.log_dir is not None:
141 |                         # Book keeping
142 |                         if "episode" in infos:
143 |                             ep_infos.append(infos["episode"])
144 |                         cur_reward_sum += rewards
145 |                         cur_episode_length += 1
146 |                         new_ids = (dones > 0).nonzero(as_tuple=False)
147 |                         rewbuffer.extend(
148 |                             cur_reward_sum[new_ids][:, 0].cpu().numpy().tolist()
149 |                         )
150 |                         lenbuffer.extend(
151 |                             cur_episode_length[new_ids][:, 0].cpu().numpy().tolist()
152 |                         )
153 |                         cur_reward_sum[new_ids] = 0
154 |                         cur_episode_length[new_ids] = 0
155 | 
156 |                 stop = time.time()
157 |                 collection_time = stop - start
158 | 
159 |                 # Learning step
160 |                 start = stop
161 |                 self.alg.compute_returns(critic_obs)
162 | 
163 |             mean_value_loss, mean_surrogate_loss = self.alg.update()
164 |             stop = time.time()
165 |             learn_time = stop - start
166 |             if self.log_dir is not None:
167 |                 self.log(locals())
168 |             if it % self.save_interval == 0:
169 |                 self.save(os.path.join(self.log_dir, "model_{}.pt".format(it)))
170 |             ep_infos.clear()
171 | 
172 |         self.current_learning_iteration += num_learning_iterations
173 |         self.save(
174 |             os.path.join(
175 |                 self.log_dir, "model_{}.pt".format(self.current_learning_iteration)
176 |             )
177 |         )
178 | 
179 |     def log(self, locs, width=80, pad=35):
180 |         self.tot_timesteps += self.num_steps_per_env * self.env.num_envs
181 |         self.tot_time += locs["collection_time"] + locs["learn_time"]
182 |         iteration_time = locs["collection_time"] + locs["learn_time"]
183 | 
184 |         ep_string = f""
185 |         if locs["ep_infos"]:
186 |             for key in locs["ep_infos"][0]:
187 |                 infotensor = torch.tensor([], device=self.device)
188 |                 for ep_info in locs["ep_infos"]:
189 |                     # handle scalar and zero dimensional tensor infos
190 |                     if not isinstance(ep_info[key], torch.Tensor):
191 |                         ep_info[key] = torch.Tensor([ep_info[key]])
192 |                     if len(ep_info[key].shape) == 0:
193 |                         ep_info[key] = ep_info[key].unsqueeze(0)
194 |                     infotensor = torch.cat((infotensor, ep_info[key].to(self.device)))
195 |                 value = torch.mean(infotensor)
196 |                 self.writer.add_scalar("Episode/" + key, value, locs["it"])
197 |                 ep_string += f"""{f'Mean episode {key}:':>{pad}} {value:.4f}\n"""
198 |         mean_std = self.alg.actor_critic.std.mean()
199 |         fps = int(
200 |             self.num_steps_per_env
201 |             * self.env.num_envs
202 |             / (locs["collection_time"] + locs["learn_time"])
203 |         )
204 | 
205 |         self.writer.add_scalar(
206 |             "Loss/value_function", locs["mean_value_loss"], locs["it"]
207 |         )
208 |         self.writer.add_scalar(
209 |             "Loss/surrogate", locs["mean_surrogate_loss"], locs["it"]
210 |         )
211 |         self.writer.add_scalar("Loss/learning_rate", self.alg.learning_rate, locs["it"])
212 |         self.writer.add_scalar("Policy/mean_noise_std", mean_std.item(), locs["it"])
213 |         self.writer.add_scalar("Perf/total_fps", fps, locs["it"])
214 |         self.writer.add_scalar(
215 |             "Perf/collection time", locs["collection_time"], locs["it"]
216 |         )
217 |         self.writer.add_scalar("Perf/learning_time", locs["learn_time"], locs["it"])
218 |         if len(locs["rewbuffer"]) > 0:
219 |             self.writer.add_scalar(
220 |                 "Train/mean_reward", statistics.mean(locs["rewbuffer"]), locs["it"]
221 |             )
222 |             self.writer.add_scalar(
223 |                 "Train/mean_episode_length",
224 |                 statistics.mean(locs["lenbuffer"]),
225 |                 locs["it"],
226 |             )
227 |             self.writer.add_scalar(
228 |                 "Train/mean_reward/time",
229 |                 statistics.mean(locs["rewbuffer"]),
230 |                 self.tot_time,
231 |             )
232 |             self.writer.add_scalar(
233 |                 "Train/mean_episode_length/time",
234 |                 statistics.mean(locs["lenbuffer"]),
235 |                 self.tot_time,
236 |             )
237 | 
238 |         str = f" \033[1m Learning iteration {locs['it']}/{self.current_learning_iteration + locs['num_learning_iterations']} \033[0m "
239 | 
240 |         if len(locs["rewbuffer"]) > 0:
241 |             log_string = (
242 |                 f"""{'#' * width}\n"""
243 |                 f"""{str.center(width, ' ')}\n\n"""
244 |                 f"""{'Computation:':>{pad}} {fps:.0f} steps/s (collection: {locs[
245 |                             'collection_time']:.3f}s, learning {locs['learn_time']:.3f}s)\n"""
246 |                 f"""{'Value function loss:':>{pad}} {locs['mean_value_loss']:.4f}\n"""
247 |                 f"""{'Surrogate loss:':>{pad}} {locs['mean_surrogate_loss']:.4f}\n"""
248 |                 f"""{'Mean action noise std:':>{pad}} {mean_std.item():.2f}\n"""
249 |                 f"""{'Mean reward:':>{pad}} {statistics.mean(locs['rewbuffer']):.2f}\n"""
250 |                 f"""{'Mean episode length:':>{pad}} {statistics.mean(locs['lenbuffer']):.2f}\n"""
251 |             )
252 |             #   f"""{'Mean reward/step:':>{pad}} {locs['mean_reward']:.2f}\n"""
253 |             #   f"""{'Mean episode length/episode:':>{pad}} {locs['mean_trajectory_length']:.2f}\n""")
254 |         else:
255 |             log_string = (
256 |                 f"""{'#' * width}\n"""
257 |                 f"""{str.center(width, ' ')}\n\n"""
258 |                 f"""{'Computation:':>{pad}} {fps:.0f} steps/s (collection: {locs[
259 |                             'collection_time']:.3f}s, learning {locs['learn_time']:.3f}s)\n"""
260 |                 f"""{'Value function loss:':>{pad}} {locs['mean_value_loss']:.4f}\n"""
261 |                 f"""{'Surrogate loss:':>{pad}} {locs['mean_surrogate_loss']:.4f}\n"""
262 |                 f"""{'Mean action noise std:':>{pad}} {mean_std.item():.2f}\n"""
263 |             )
264 |             #   f"""{'Mean reward/step:':>{pad}} {locs['mean_reward']:.2f}\n"""
265 |             #   f"""{'Mean episode length/episode:':>{pad}} {locs['mean_trajectory_length']:.2f}\n""")
266 | 
267 |         log_string += ep_string
268 |         log_string += (
269 |             f"""{'-' * width}\n"""
270 |             f"""{'Total timesteps:':>{pad}} {self.tot_timesteps}\n"""
271 |             f"""{'Iteration time:':>{pad}} {iteration_time:.2f}s\n"""
272 |             f"""{'Total time:':>{pad}} {self.tot_time:.2f}s\n"""
273 |             f"""{'ETA:':>{pad}} {self.tot_time / (locs['it'] + 1) * (
274 |                                locs['num_learning_iterations'] - locs['it']):.1f}s\n"""
275 |         )
276 |         print(log_string)
277 | 
278 |     def save(self, path, infos=None):
279 |         torch.save(
280 |             {
281 |                 "model_state_dict": self.alg.actor_critic.state_dict(),
282 |                 "optimizer_state_dict": self.alg.optimizer.state_dict(),
283 |                 "iter": self.current_learning_iteration,
284 |                 "infos": infos,
285 |             },
286 |             path,
287 |         )
288 | 
289 |     def load(self, path, load_optimizer=True):
290 |         loaded_dict = torch.load(path)
291 |         self.alg.actor_critic.load_state_dict(loaded_dict["model_state_dict"])
292 |         if load_optimizer:
293 |             self.alg.optimizer.load_state_dict(loaded_dict["optimizer_state_dict"])
294 |         self.current_learning_iteration = loaded_dict["iter"]
295 |         return loaded_dict["infos"]
296 | 
297 |     def get_inference_policy(self, device=None):
298 |         self.alg.actor_critic.eval()  # switch to evaluation mode (dropout for example)
299 |         if device is not None:
300 |             self.alg.actor_critic.to(device)
301 |         return self.alg.actor_critic.act_inference
302 | 
303 |     def get_inference_critic(self, device=None):
304 |         self.alg.actor_critic.eval()  # switch to evaluation mode (dropout for example)
305 |         if device is not None:
306 |             self.alg.actor_critic.to(device)
307 |         return self.alg.actor_critic.evaluate
308 | 


--------------------------------------------------------------------------------
/humanoid/algo/ppo/ppo.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
  3 | # SPDX-License-Identifier: BSD-3-Clause
  4 | # 
  5 | # Redistribution and use in source and binary forms, with or without
  6 | # modification, are permitted provided that the following conditions are met:
  7 | #
  8 | # 1. Redistributions of source code must retain the above copyright notice, this
  9 | # list of conditions and the following disclaimer.
 10 | #
 11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 12 | # this list of conditions and the following disclaimer in the documentation
 13 | # and/or other materials provided with the distribution.
 14 | #
 15 | # 3. Neither the name of the copyright holder nor the names of its
 16 | # contributors may be used to endorse or promote products derived from
 17 | # this software without specific prior written permission.
 18 | #
 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 | #
 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
 31 | 
 32 | import torch
 33 | import torch.nn as nn
 34 | import torch.optim as optim
 35 | 
 36 | from .actor_critic import ActorCritic
 37 | from .rollout_storage import RolloutStorage
 38 | 
 39 | class PPO:
 40 |     actor_critic: ActorCritic
 41 |     def __init__(self,
 42 |                  actor_critic,
 43 |                  num_learning_epochs=1,
 44 |                  num_mini_batches=1,
 45 |                  clip_param=0.2,
 46 |                  gamma=0.998,
 47 |                  lam=0.95,
 48 |                  value_loss_coef=1.0,
 49 |                  entropy_coef=0.0,
 50 |                  learning_rate=1e-3,
 51 |                  max_grad_norm=1.0,
 52 |                  use_clipped_value_loss=True,
 53 |                  schedule="fixed",
 54 |                  desired_kl=0.01,
 55 |                  device='cpu',
 56 |                  ):
 57 | 
 58 |         self.device = device
 59 | 
 60 |         self.desired_kl = desired_kl
 61 |         self.schedule = schedule
 62 |         self.learning_rate = learning_rate
 63 | 
 64 |         # PPO components
 65 |         self.actor_critic = actor_critic
 66 |         self.actor_critic.to(self.device)
 67 |         self.storage = None # initialized later
 68 |         self.optimizer = optim.Adam(self.actor_critic.parameters(), lr=learning_rate)
 69 |         self.transition = RolloutStorage.Transition()
 70 | 
 71 |         # PPO parameters
 72 |         self.clip_param = clip_param
 73 |         self.num_learning_epochs = num_learning_epochs
 74 |         self.num_mini_batches = num_mini_batches
 75 |         self.value_loss_coef = value_loss_coef
 76 |         self.entropy_coef = entropy_coef
 77 |         self.gamma = gamma
 78 |         self.lam = lam
 79 |         self.max_grad_norm = max_grad_norm
 80 |         self.use_clipped_value_loss = use_clipped_value_loss
 81 | 
 82 |     def init_storage(self, num_envs, num_transitions_per_env, actor_obs_shape, critic_obs_shape, action_shape):
 83 |         self.storage = RolloutStorage(num_envs, num_transitions_per_env, actor_obs_shape, critic_obs_shape, action_shape, self.device)
 84 | 
 85 |     def test_mode(self):
 86 |         self.actor_critic.test()
 87 |     
 88 |     def train_mode(self):
 89 |         self.actor_critic.train()
 90 | 
 91 |     def act(self, obs, critic_obs):
 92 |         # Compute the actions and values
 93 |         self.transition.actions = self.actor_critic.act(obs).detach()
 94 |         self.transition.values = self.actor_critic.evaluate(critic_obs).detach()
 95 |         self.transition.actions_log_prob = self.actor_critic.get_actions_log_prob(self.transition.actions).detach()
 96 |         self.transition.action_mean = self.actor_critic.action_mean.detach()
 97 |         self.transition.action_sigma = self.actor_critic.action_std.detach()
 98 |         # need to record obs and critic_obs before env.step()
 99 |         self.transition.observations = obs
100 |         self.transition.critic_observations = critic_obs
101 |         return self.transition.actions
102 |     
103 |     def process_env_step(self, rewards, dones, infos):
104 |         self.transition.rewards = rewards.clone()
105 |         self.transition.dones = dones
106 |         # Bootstrapping on time outs
107 |         if 'time_outs' in infos:
108 |             self.transition.rewards += self.gamma * torch.squeeze(self.transition.values * infos['time_outs'].unsqueeze(1).to(self.device), 1)
109 | 
110 |         # Record the transition
111 |         self.storage.add_transitions(self.transition)
112 |         self.transition.clear()
113 |         self.actor_critic.reset(dones)
114 |     
115 |     def compute_returns(self, last_critic_obs):
116 |         last_values= self.actor_critic.evaluate(last_critic_obs).detach()
117 |         self.storage.compute_returns(last_values, self.gamma, self.lam)
118 | 
119 |     def update(self):
120 |         mean_value_loss = 0
121 |         mean_surrogate_loss = 0
122 | 
123 |         generator = self.storage.mini_batch_generator(self.num_mini_batches, self.num_learning_epochs)
124 |         for obs_batch, critic_obs_batch, actions_batch, target_values_batch, advantages_batch, returns_batch, old_actions_log_prob_batch, \
125 |             old_mu_batch, old_sigma_batch, hid_states_batch, masks_batch in generator:
126 | 
127 | 
128 |                 self.actor_critic.act(obs_batch, masks=masks_batch, hidden_states=hid_states_batch[0])
129 |                 actions_log_prob_batch = self.actor_critic.get_actions_log_prob(actions_batch)
130 |                 value_batch = self.actor_critic.evaluate(critic_obs_batch, masks=masks_batch, hidden_states=hid_states_batch[1])
131 |                 mu_batch = self.actor_critic.action_mean
132 |                 sigma_batch = self.actor_critic.action_std
133 |                 entropy_batch = self.actor_critic.entropy
134 | 
135 |                 # KL
136 |                 if self.desired_kl != None and self.schedule == 'adaptive':
137 |                     with torch.inference_mode():
138 |                         kl = torch.sum(
139 |                             torch.log(sigma_batch / old_sigma_batch + 1.e-5) + (torch.square(old_sigma_batch) + torch.square(old_mu_batch - mu_batch)) / (2.0 * torch.square(sigma_batch)) - 0.5, axis=-1)
140 |                         kl_mean = torch.mean(kl)
141 | 
142 |                         if kl_mean > self.desired_kl * 2.0:
143 |                             self.learning_rate = max(1e-5, self.learning_rate / 1.5)
144 |                         elif kl_mean < self.desired_kl / 2.0 and kl_mean > 0.0:
145 |                             self.learning_rate = min(1e-2, self.learning_rate * 1.5)
146 |                         
147 |                         for param_group in self.optimizer.param_groups:
148 |                             param_group['lr'] = self.learning_rate
149 | 
150 | 
151 |                 # Surrogate loss
152 |                 ratio = torch.exp(actions_log_prob_batch - torch.squeeze(old_actions_log_prob_batch))
153 |                 surrogate = -torch.squeeze(advantages_batch) * ratio
154 |                 surrogate_clipped = -torch.squeeze(advantages_batch) * torch.clamp(ratio, 1.0 - self.clip_param,
155 |                                                                                 1.0 + self.clip_param)
156 |                 surrogate_loss = torch.max(surrogate, surrogate_clipped).mean()
157 | 
158 |                 # Value function loss
159 |                 if self.use_clipped_value_loss:
160 |                     value_clipped = target_values_batch + (value_batch - target_values_batch).clamp(-self.clip_param,
161 |                                                                                                     self.clip_param)
162 |                     value_losses = (value_batch - returns_batch).pow(2)
163 |                     value_losses_clipped = (value_clipped - returns_batch).pow(2)
164 |                     value_loss = torch.max(value_losses, value_losses_clipped).mean()
165 |                 else:
166 |                     value_loss = (returns_batch - value_batch).pow(2).mean()
167 | 
168 |                 loss = surrogate_loss + self.value_loss_coef * value_loss - self.entropy_coef * entropy_batch.mean()
169 | 
170 |                 # Gradient step
171 |                 self.optimizer.zero_grad()
172 |                 loss.backward()
173 |                 nn.utils.clip_grad_norm_(self.actor_critic.parameters(), self.max_grad_norm)
174 |                 self.optimizer.step()
175 | 
176 |                 mean_value_loss += value_loss.item()
177 |                 mean_surrogate_loss += surrogate_loss.item()
178 | 
179 |         num_updates = self.num_learning_epochs * self.num_mini_batches
180 |         mean_value_loss /= num_updates
181 |         mean_surrogate_loss /= num_updates
182 |         self.storage.clear()
183 | 
184 |         return mean_value_loss, mean_surrogate_loss
185 | 


--------------------------------------------------------------------------------
/humanoid/algo/ppo/rollout_storage.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
  3 | # SPDX-License-Identifier: BSD-3-Clause
  4 | # 
  5 | # Redistribution and use in source and binary forms, with or without
  6 | # modification, are permitted provided that the following conditions are met:
  7 | #
  8 | # 1. Redistributions of source code must retain the above copyright notice, this
  9 | # list of conditions and the following disclaimer.
 10 | #
 11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 12 | # this list of conditions and the following disclaimer in the documentation
 13 | # and/or other materials provided with the distribution.
 14 | #
 15 | # 3. Neither the name of the copyright holder nor the names of its
 16 | # contributors may be used to endorse or promote products derived from
 17 | # this software without specific prior written permission.
 18 | #
 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 | #
 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
 31 | 
 32 | 
 33 | import torch
 34 | 
 35 | class RolloutStorage:
 36 |     class Transition:
 37 |         def __init__(self):
 38 |             self.observations = None
 39 |             self.critic_observations = None
 40 |             self.actions = None
 41 |             self.rewards = None
 42 |             self.dones = None
 43 |             self.values = None
 44 |             self.actions_log_prob = None
 45 |             self.action_mean = None
 46 |             self.action_sigma = None
 47 |             self.hidden_states = None
 48 |         
 49 |         def clear(self):
 50 |             self.__init__()
 51 | 
 52 |     def __init__(self, num_envs, num_transitions_per_env, obs_shape, privileged_obs_shape, actions_shape, device='cpu'):
 53 | 
 54 |         self.device = device
 55 | 
 56 |         self.obs_shape = obs_shape
 57 |         self.privileged_obs_shape = privileged_obs_shape
 58 |         self.actions_shape = actions_shape
 59 | 
 60 |         # Core
 61 |         self.observations = torch.zeros(num_transitions_per_env, num_envs, *obs_shape, device=self.device)
 62 |         if privileged_obs_shape[0] is not None:
 63 |             self.privileged_observations = torch.zeros(num_transitions_per_env, num_envs, *privileged_obs_shape, device=self.device)
 64 |         else:
 65 |             self.privileged_observations = None
 66 |         self.rewards = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
 67 |         self.actions = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device)
 68 |         self.dones = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device).byte()
 69 | 
 70 |         # For PPO
 71 |         self.actions_log_prob = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
 72 |         self.values = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
 73 |         self.returns = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
 74 |         self.advantages = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
 75 |         self.mu = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device)
 76 |         self.sigma = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device)
 77 | 
 78 |         self.num_transitions_per_env = num_transitions_per_env
 79 |         self.num_envs = num_envs
 80 | 
 81 |         # rnn
 82 |         self.saved_hidden_states_a = None
 83 |         self.saved_hidden_states_c = None
 84 | 
 85 |         self.step = 0
 86 | 
 87 |     def add_transitions(self, transition: Transition):
 88 |         if self.step >= self.num_transitions_per_env:
 89 |             raise AssertionError("Rollout buffer overflow")
 90 |         self.observations[self.step].copy_(transition.observations)
 91 |         if self.privileged_observations is not None: self.privileged_observations[self.step].copy_(transition.critic_observations)
 92 |         self.actions[self.step].copy_(transition.actions)
 93 |         self.rewards[self.step].copy_(transition.rewards.view(-1, 1))
 94 |         self.dones[self.step].copy_(transition.dones.view(-1, 1))
 95 |         self.values[self.step].copy_(transition.values)
 96 |         self.actions_log_prob[self.step].copy_(transition.actions_log_prob.view(-1, 1))
 97 |         self.mu[self.step].copy_(transition.action_mean)
 98 |         self.sigma[self.step].copy_(transition.action_sigma)
 99 |         self._save_hidden_states(transition.hidden_states)
100 |         self.step += 1
101 | 
102 |     def _save_hidden_states(self, hidden_states):
103 |         if hidden_states is None or hidden_states==(None, None):
104 |             return
105 |         # make a tuple out of GRU hidden state sto match the LSTM format
106 |         hid_a = hidden_states[0] if isinstance(hidden_states[0], tuple) else (hidden_states[0],)
107 |         hid_c = hidden_states[1] if isinstance(hidden_states[1], tuple) else (hidden_states[1],)
108 | 
109 |         # initialize if needed 
110 |         if self.saved_hidden_states_a is None:
111 |             self.saved_hidden_states_a = [torch.zeros(self.observations.shape[0], *hid_a[i].shape, device=self.device) for i in range(len(hid_a))]
112 |             self.saved_hidden_states_c = [torch.zeros(self.observations.shape[0], *hid_c[i].shape, device=self.device) for i in range(len(hid_c))]
113 |         # copy the states
114 |         for i in range(len(hid_a)):
115 |             self.saved_hidden_states_a[i][self.step].copy_(hid_a[i])
116 |             self.saved_hidden_states_c[i][self.step].copy_(hid_c[i])
117 | 
118 | 
119 |     def clear(self):
120 |         self.step = 0
121 | 
122 |     def compute_returns(self, last_values, gamma, lam):
123 |         advantage = 0
124 |         for step in reversed(range(self.num_transitions_per_env)):
125 |             if step == self.num_transitions_per_env - 1:
126 |                 next_values = last_values
127 |             else:
128 |                 next_values = self.values[step + 1]
129 |             next_is_not_terminal = 1.0 - self.dones[step].float()
130 |             delta = self.rewards[step] + next_is_not_terminal * gamma * next_values - self.values[step]
131 |             advantage = delta + next_is_not_terminal * gamma * lam * advantage
132 |             self.returns[step] = advantage + self.values[step]
133 | 
134 |         # Compute and normalize the advantages
135 |         self.advantages = self.returns - self.values
136 |         self.advantages = (self.advantages - self.advantages.mean()) / (self.advantages.std() + 1e-8)
137 | 
138 |     def get_statistics(self):
139 |         done = self.dones
140 |         done[-1] = 1
141 |         flat_dones = done.permute(1, 0, 2).reshape(-1, 1)
142 |         done_indices = torch.cat((flat_dones.new_tensor([-1], dtype=torch.int64), flat_dones.nonzero(as_tuple=False)[:, 0]))
143 |         trajectory_lengths = (done_indices[1:] - done_indices[:-1])
144 |         return trajectory_lengths.float().mean(), self.rewards.mean()
145 | 
146 |     def mini_batch_generator(self, num_mini_batches, num_epochs=8):
147 |         batch_size = self.num_envs * self.num_transitions_per_env
148 |         mini_batch_size = batch_size // num_mini_batches
149 |         indices = torch.randperm(num_mini_batches*mini_batch_size, requires_grad=False, device=self.device)
150 | 
151 |         observations = self.observations.flatten(0, 1)
152 |         if self.privileged_observations is not None:
153 |             critic_observations = self.privileged_observations.flatten(0, 1)
154 |         else:
155 |             critic_observations = observations
156 | 
157 |         actions = self.actions.flatten(0, 1)
158 |         values = self.values.flatten(0, 1)
159 |         returns = self.returns.flatten(0, 1)
160 |         old_actions_log_prob = self.actions_log_prob.flatten(0, 1)
161 |         advantages = self.advantages.flatten(0, 1)
162 |         old_mu = self.mu.flatten(0, 1)
163 |         old_sigma = self.sigma.flatten(0, 1)
164 | 
165 |         for epoch in range(num_epochs):
166 |             for i in range(num_mini_batches):
167 | 
168 |                 start = i*mini_batch_size
169 |                 end = (i+1)*mini_batch_size
170 |                 batch_idx = indices[start:end]
171 | 
172 |                 obs_batch = observations[batch_idx]
173 |                 critic_observations_batch = critic_observations[batch_idx]
174 |                 actions_batch = actions[batch_idx]
175 |                 target_values_batch = values[batch_idx]
176 |                 returns_batch = returns[batch_idx]
177 |                 old_actions_log_prob_batch = old_actions_log_prob[batch_idx]
178 |                 advantages_batch = advantages[batch_idx]
179 |                 old_mu_batch = old_mu[batch_idx]
180 |                 old_sigma_batch = old_sigma[batch_idx]
181 |                 yield obs_batch, critic_observations_batch, actions_batch, target_values_batch, advantages_batch, returns_batch, \
182 |                        old_actions_log_prob_batch, old_mu_batch, old_sigma_batch, (None, None), None
183 | 


--------------------------------------------------------------------------------
/humanoid/algo/vec_env.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
 3 | # SPDX-License-Identifier: BSD-3-Clause
 4 | # 
 5 | # Redistribution and use in source and binary forms, with or without
 6 | # modification, are permitted provided that the following conditions are met:
 7 | #
 8 | # 1. Redistributions of source code must retain the above copyright notice, this
 9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
31 | 
32 | import torch
33 | from typing import Tuple, Union
34 | from abc import ABC, abstractmethod
35 | 
36 | # minimal interface of the environment
37 | class VecEnv(ABC):
38 |     num_envs: int
39 |     num_obs: int
40 |     num_privileged_obs: int
41 |     num_actions: int
42 |     max_episode_length: int
43 |     privileged_obs_buf: torch.Tensor
44 |     obs_buf: torch.Tensor 
45 |     rew_buf: torch.Tensor
46 |     reset_buf: torch.Tensor
47 |     episode_length_buf: torch.Tensor # current episode duration
48 |     extras: dict
49 |     device: torch.device
50 |     @abstractmethod
51 |     def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, Union[torch.Tensor, None], torch.Tensor, torch.Tensor, dict]:
52 |         pass
53 |     @abstractmethod
54 |     def reset(self, env_ids: Union[list, torch.Tensor]):
55 |         pass
56 |     @abstractmethod
57 |     def get_observations(self) -> torch.Tensor:
58 |         pass
59 |     @abstractmethod
60 |     def get_privileged_observations(self) -> Union[torch.Tensor, None]:
61 |         pass


--------------------------------------------------------------------------------
/humanoid/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
 3 | # SPDX-License-Identifier: BSD-3-Clause
 4 | # 
 5 | # Redistribution and use in source and binary forms, with or without
 6 | # modification, are permitted provided that the following conditions are met:
 7 | #
 8 | # 1. Redistributions of source code must retain the above copyright notice, this
 9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
31 | 
32 | 
33 | from humanoid import LEGGED_GYM_ROOT_DIR, LEGGED_GYM_ENVS_DIR
34 | from .base.legged_robot import LeggedRobot
35 | 
36 | from .custom.humanoid_config import XBotLCfg, XBotLCfgPPO
37 | from .custom.humanoid_env import XBotLFreeEnv
38 | 
39 | from humanoid.utils.task_registry import task_registry
40 | 
41 | 
42 | task_registry.register( "humanoid_ppo", XBotLFreeEnv, XBotLCfg(), XBotLCfgPPO() )
43 | 


--------------------------------------------------------------------------------
/humanoid/envs/base/LICENSE:
--------------------------------------------------------------------------------
 1 | SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
 3 | SPDX-License-Identifier: BSD-3-Clause
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice, this
 9 | list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 | this list of conditions and the following disclaimer in the documentation
13 | and/or other materials provided with the distribution.
14 | 
15 | 3. Neither the name of the copyright holder nor the names of its
16 | contributors may be used to endorse or promote products derived from
17 | this software without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 
30 | Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.


--------------------------------------------------------------------------------
/humanoid/envs/base/base_config.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
 3 | # SPDX-License-Identifier: BSD-3-Clause
 4 | # 
 5 | # Redistribution and use in source and binary forms, with or without
 6 | # modification, are permitted provided that the following conditions are met:
 7 | #
 8 | # 1. Redistributions of source code must retain the above copyright notice, this
 9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
31 | 
32 | import inspect
33 | 
34 | class BaseConfig:
35 |     def __init__(self) -> None:
36 |         """ Initializes all member classes recursively. Ignores all namse starting with '__' (buit-in methods)."""
37 |         self.init_member_classes(self)
38 |     
39 |     @staticmethod
40 |     def init_member_classes(obj):
41 |         # iterate over all attributes names
42 |         for key in dir(obj):
43 |             # disregard builtin attributes
44 |             # if key.startswith("__"):
45 |             if key=="__class__":
46 |                 continue
47 |             # get the corresponding attribute object
48 |             var =  getattr(obj, key)
49 |             # check if it the attribute is a class
50 |             if inspect.isclass(var):
51 |                 # instantate the class
52 |                 i_var = var()
53 |                 # set the attribute to the instance instead of the type
54 |                 setattr(obj, key, i_var)
55 |                 # recursively init members of the attribute
56 |                 BaseConfig.init_member_classes(i_var)


--------------------------------------------------------------------------------
/humanoid/envs/base/base_task.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
  3 | # SPDX-License-Identifier: BSD-3-Clause
  4 | # 
  5 | # Redistribution and use in source and binary forms, with or without
  6 | # modification, are permitted provided that the following conditions are met:
  7 | #
  8 | # 1. Redistributions of source code must retain the above copyright notice, this
  9 | # list of conditions and the following disclaimer.
 10 | #
 11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 12 | # this list of conditions and the following disclaimer in the documentation
 13 | # and/or other materials provided with the distribution.
 14 | #
 15 | # 3. Neither the name of the copyright holder nor the names of its
 16 | # contributors may be used to endorse or promote products derived from
 17 | # this software without specific prior written permission.
 18 | #
 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 | #
 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
 31 | 
 32 | import sys
 33 | from isaacgym import gymapi
 34 | from isaacgym import gymutil
 35 | import numpy as np
 36 | import torch
 37 | 
 38 | # Base class for RL tasks
 39 | 
 40 | 
 41 | class BaseTask():
 42 | 
 43 |     def __init__(self, cfg, sim_params, physics_engine, sim_device, headless):
 44 |         self.gym = gymapi.acquire_gym()
 45 | 
 46 |         self.sim_params = sim_params
 47 |         self.physics_engine = physics_engine
 48 |         self.sim_device = sim_device
 49 |         sim_device_type, self.sim_device_id = gymutil.parse_device_str(
 50 |             self.sim_device)
 51 |         self.headless = headless
 52 | 
 53 |         # env device is GPU only if sim is on GPU and use_gpu_pipeline=True, otherwise returned tensors are copied to CPU by physX.
 54 |         if sim_device_type == 'cuda' and sim_params.use_gpu_pipeline:
 55 |             self.device = self.sim_device
 56 |         else:
 57 |             self.device = 'cpu'
 58 | 
 59 |         # graphics device for rendering, -1 for no rendering
 60 |         self.graphics_device_id = self.sim_device_id
 61 | 
 62 |         self.num_envs = cfg.env.num_envs
 63 |         self.num_obs = cfg.env.num_observations
 64 |         self.num_privileged_obs = cfg.env.num_privileged_obs
 65 |         self.num_actions = cfg.env.num_actions
 66 | 
 67 |         # optimization flags for pytorch JIT
 68 |         torch._C._jit_set_profiling_mode(False)
 69 |         torch._C._jit_set_profiling_executor(False)
 70 | 
 71 |         # allocate buffers
 72 |         self.obs_buf = torch.zeros(
 73 |             self.num_envs, self.num_obs, device=self.device, dtype=torch.float)
 74 |         self.rew_buf = torch.zeros(
 75 |             self.num_envs, device=self.device, dtype=torch.float)
 76 |         # new reward buffers for exp rewrads
 77 |         self.neg_reward_buf = torch.zeros(
 78 |             self.num_envs, device=self.device, dtype=torch.float)
 79 |         self.pos_reward_buf = torch.zeros(
 80 |             self.num_envs, device=self.device, dtype=torch.float)
 81 | 
 82 |         self.reset_buf = torch.ones(
 83 |             self.num_envs, device=self.device, dtype=torch.long)
 84 |         self.episode_length_buf = torch.zeros(
 85 |             self.num_envs, device=self.device, dtype=torch.long)
 86 |         self.time_out_buf = torch.zeros(
 87 |             self.num_envs, device=self.device, dtype=torch.bool)
 88 |         if self.num_privileged_obs is not None:
 89 |             self.privileged_obs_buf = torch.zeros(
 90 |                 self.num_envs, self.num_privileged_obs, device=self.device, dtype=torch.float)
 91 |         else:
 92 |             self.privileged_obs_buf = None
 93 | 
 94 |         self.extras = {}
 95 | 
 96 |         # create envs, sim and viewer
 97 |         self.create_sim()
 98 |         self.gym.prepare_sim(self.sim)
 99 |         self.enable_viewer_sync = True
100 |         self.viewer = None
101 | 
102 |         # if running with a viewer, set up keyboard shortcuts and camera
103 |         if self.headless == False:
104 |             # subscribe to keyboard shortcuts
105 |             self.viewer = self.gym.create_viewer(
106 |                 self.sim, gymapi.CameraProperties())
107 |             self.gym.subscribe_viewer_keyboard_event(
108 |                 self.viewer, gymapi.KEY_ESCAPE, "QUIT")
109 |             self.gym.subscribe_viewer_keyboard_event(
110 |                 self.viewer, gymapi.KEY_V, "toggle_viewer_sync")
111 | 
112 |             camera_properties = gymapi.CameraProperties()
113 |             camera_properties.width = 720
114 |             camera_properties.height = 480
115 |             camera_handle = self.gym.create_camera_sensor(
116 |                 self.envs[0], camera_properties)
117 |             self.camera_handle = camera_handle
118 |         else:
119 |             # pass
120 |             camera_properties = gymapi.CameraProperties()
121 |             camera_properties.width = 720
122 |             camera_properties.height = 480
123 |             camera_handle = self.gym.create_camera_sensor(
124 |                 self.envs[0], camera_properties)
125 |             self.camera_handle = camera_handle
126 | 
127 |     def get_observations(self):
128 |         return self.obs_buf
129 | 
130 |     def get_privileged_observations(self):
131 |         return self.privileged_obs_buf
132 | 
133 |     def get_rma_observations(self):
134 |         return self.rma_obs_buf
135 | 
136 |     def reset_idx(self, env_ids):
137 |         """Reset selected robots"""
138 |         raise NotImplementedError
139 | 
140 |     def reset(self):
141 |         """ Reset all robots"""
142 |         self.reset_idx(torch.arange(self.num_envs, device=self.device))
143 |         obs, privileged_obs, _, _, _ = self.step(torch.zeros(
144 |             self.num_envs, self.num_actions, device=self.device, requires_grad=False))
145 |         return obs, privileged_obs
146 | 
147 |     def step(self, actions):
148 |         raise NotImplementedError
149 | 
150 |     def render(self, sync_frame_time=True):
151 |         if self.viewer:
152 |             # check for window closed
153 |             if self.gym.query_viewer_has_closed(self.viewer):
154 |                 sys.exit()
155 | 
156 |             # check for keyboard events
157 |             for evt in self.gym.query_viewer_action_events(self.viewer):
158 |                 if evt.action == "QUIT" and evt.value > 0:
159 |                     sys.exit()
160 |                 elif evt.action == "toggle_viewer_sync" and evt.value > 0:
161 |                     self.enable_viewer_sync = not self.enable_viewer_sync
162 | 
163 |             # fetch results
164 |             if self.device != 'cpu':
165 |                 self.gym.fetch_results(self.sim, True)
166 | 
167 |             # step graphics
168 |             if self.enable_viewer_sync:
169 |                 self.gym.step_graphics(self.sim)
170 |                 self.gym.draw_viewer(self.viewer, self.sim, True)
171 |                 if sync_frame_time:
172 |                     self.gym.sync_frame_time(self.sim)
173 |             else:
174 |                 self.gym.poll_viewer_events(self.viewer)
175 | 


--------------------------------------------------------------------------------
/humanoid/envs/base/legged_robot_config.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
  3 | # SPDX-License-Identifier: BSD-3-Clause
  4 | # 
  5 | # Redistribution and use in source and binary forms, with or without
  6 | # modification, are permitted provided that the following conditions are met:
  7 | #
  8 | # 1. Redistributions of source code must retain the above copyright notice, this
  9 | # list of conditions and the following disclaimer.
 10 | #
 11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 12 | # this list of conditions and the following disclaimer in the documentation
 13 | # and/or other materials provided with the distribution.
 14 | #
 15 | # 3. Neither the name of the copyright holder nor the names of its
 16 | # contributors may be used to endorse or promote products derived from
 17 | # this software without specific prior written permission.
 18 | #
 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 | #
 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
 31 | 
 32 | from .base_config import BaseConfig
 33 | 
 34 | class LeggedRobotCfg(BaseConfig):
 35 |     class env:
 36 |         num_envs = 4096
 37 |         num_observations = 235
 38 |         num_privileged_obs = None # if not None a priviledge_obs_buf will be returned by step() (critic obs for assymetric training). None is returned otherwise 
 39 |         num_actions = 12
 40 |         env_spacing = 3.  # not used with heightfields/trimeshes 
 41 |         send_timeouts = True # send time out information to the algorithm
 42 |         episode_length_s = 20 # episode length in seconds
 43 | 
 44 |     class terrain:
 45 |         mesh_type = 'trimesh' # "heightfield" # none, plane, heightfield or trimesh
 46 |         horizontal_scale = 0.1 # [m]
 47 |         vertical_scale = 0.005 # [m]
 48 |         border_size = 25 # [m]
 49 |         curriculum = True
 50 |         static_friction = 1.0
 51 |         dynamic_friction = 1.0
 52 |         restitution = 0.
 53 |         # rough terrain only:
 54 |         measure_heights = True
 55 |         measured_points_x = [-0.8, -0.7, -0.6, -0.5, -0.4, -0.3, -0.2, -0.1, 0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8] # 1mx1.6m rectangle (without center line)
 56 |         measured_points_y = [-0.5, -0.4, -0.3, -0.2, -0.1, 0., 0.1, 0.2, 0.3, 0.4, 0.5]
 57 |         selected = False # select a unique terrain type and pass all arguments
 58 |         terrain_kwargs = None # Dict of arguments for selected terrain
 59 |         max_init_terrain_level = 5 # starting curriculum state
 60 |         terrain_length = 8.
 61 |         terrain_width = 8.
 62 |         num_rows= 10 # number of terrain rows (levels)
 63 |         num_cols = 20 # number of terrain cols (types)
 64 |         # terrain types: [smooth slope, rough slope, stairs up, stairs down, discrete]
 65 |         terrain_proportions = [0.1, 0.1, 0.35, 0.25, 0.2]
 66 |         # trimesh only:
 67 |         slope_treshold = 0.75 # slopes above this threshold will be corrected to vertical surfaces
 68 | 
 69 |     class commands:
 70 |         curriculum = False
 71 |         max_curriculum = 1.
 72 |         num_commands = 4 # default: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
 73 |         resampling_time = 10. # time before command are changed[s]
 74 |         heading_command = True # if true: compute ang vel command from heading error
 75 |         class ranges:
 76 |             lin_vel_x = [-1.0, 1.0] # min max [m/s]
 77 |             lin_vel_y = [-1.0, 1.0]   # min max [m/s]
 78 |             ang_vel_yaw = [-1, 1]    # min max [rad/s]
 79 |             heading = [-3.14, 3.14]
 80 | 
 81 |     class init_state:
 82 |         pos = [0.0, 0.0, 1.] # x,y,z [m]
 83 |         rot = [0.0, 0.0, 0.0, 1.0] # x,y,z,w [quat]
 84 |         lin_vel = [0.0, 0.0, 0.0]  # x,y,z [m/s]
 85 |         ang_vel = [0.0, 0.0, 0.0]  # x,y,z [rad/s]
 86 |         default_joint_angles = { # target angles when action = 0.0
 87 |             "joint_a": 0., 
 88 |             "joint_b": 0.}
 89 | 
 90 |     class control:
 91 |         # PD Drive parameters:
 92 |         stiffness = {'joint_a': 10.0, 'joint_b': 15.}  # [N*m/rad]
 93 |         damping = {'joint_a': 1.0, 'joint_b': 1.5}     # [N*m*s/rad]
 94 |         # action scale: target angle = actionScale * action + defaultAngle
 95 |         action_scale = 0.5
 96 |         # decimation: Number of control action updates @ sim DT per policy DT
 97 |         decimation = 4
 98 | 
 99 |     class asset:
100 |         file = ""
101 |         name = "legged_robot"  # actor name
102 |         foot_name = "None" # name of the feet bodies, used to index body state and contact force tensors
103 |         penalize_contacts_on = []
104 |         terminate_after_contacts_on = []
105 |         disable_gravity = False
106 |         collapse_fixed_joints = True # merge bodies connected by fixed joints. Specific fixed joints can be kept by adding " <... dont_collapse="true">
107 |         fix_base_link = False # fixe the base of the robot
108 |         default_dof_drive_mode = 3 # see GymDofDriveModeFlags (0 is none, 1 is pos tgt, 2 is vel tgt, 3 effort)
109 |         self_collisions = 0 # 1 to disable, 0 to enable...bitwise filter
110 |         replace_cylinder_with_capsule = True # replace collision cylinders with capsules, leads to faster/more stable simulation
111 |         flip_visual_attachments = True # Some .obj meshes must be flipped from y-up to z-up
112 |         
113 |         density = 0.001
114 |         angular_damping = 0.
115 |         linear_damping = 0.
116 |         max_angular_velocity = 1000.
117 |         max_linear_velocity = 1000.
118 |         armature = 0.
119 |         thickness = 0.01
120 | 
121 | 
122 |     class domain_rand:
123 |         randomize_friction = True
124 |         friction_range = [0.5, 1.25]
125 |         randomize_base_mass = False
126 |         added_mass_range = [-1., 1.]
127 |         push_robots = True
128 |         push_interval_s = 15
129 |         max_push_vel_xy = 1.
130 | 
131 | 
132 |     class rewards:
133 |         class scales:
134 |             termination = -0.0
135 |             tracking_lin_vel = 1.0
136 |             tracking_ang_vel = 0.5
137 |             lin_vel_z = -2.0
138 |             ang_vel_xy = -0.05
139 |             orientation = -0.
140 |             torques = -0.00001
141 |             dof_vel = -0.
142 |             dof_acc = -2.5e-7
143 |             base_height = -0. 
144 |             feet_air_time =  1.0
145 |             collision = -1.
146 |             feet_stumble = -0.0 
147 |             action_rate = -0.
148 |             stand_still = -0.
149 | 
150 |         only_positive_rewards = True # if true negative total rewards are clipped at zero (avoids early termination problems)
151 |         tracking_sigma = 0.25 # tracking reward = exp(-error^2/sigma)
152 |         max_contact_force = 100. # forces above this value are penalized
153 | 
154 |     class normalization:
155 |         class obs_scales:
156 |             lin_vel = 2.0
157 |             ang_vel = 0.25
158 |             dof_pos = 1.0
159 |             dof_vel = 0.05
160 |             height_measurements = 5.0
161 |         clip_observations = 100.
162 |         clip_actions = 100.
163 | 
164 |     class noise:
165 |         add_noise = True
166 |         noise_level = 1.0 # scales other values
167 |         class noise_scales:
168 |             dof_pos = 0.01
169 |             dof_vel = 1.5
170 |             lin_vel = 0.1
171 |             ang_vel = 0.2
172 |             gravity = 0.05
173 |             height_measurements = 0.1
174 | 
175 |     # viewer camera:
176 |     class viewer:
177 |         ref_env = 0
178 |         pos = [10, 0, 6]  # [m]
179 |         lookat = [11., 5, 3.]  # [m]
180 | 
181 |     class sim:
182 |         dt =  0.005
183 |         substeps = 1
184 |         gravity = [0., 0. ,-9.81]  # [m/s^2]
185 |         up_axis = 1  # 0 is y, 1 is z
186 | 
187 |         class physx:
188 |             num_threads = 10
189 |             solver_type = 1  # 0: pgs, 1: tgs
190 |             num_position_iterations = 4
191 |             num_velocity_iterations = 0
192 |             contact_offset = 0.01  # [m]
193 |             rest_offset = 0.0   # [m]
194 |             bounce_threshold_velocity = 0.5 #0.5 [m/s]
195 |             max_depenetration_velocity = 1.0
196 |             max_gpu_contact_pairs = 2**23 #2**24 -> needed for 8000 envs and more
197 |             default_buffer_size_multiplier = 5
198 |             contact_collection = 2 # 0: never, 1: last sub-step, 2: all sub-steps (default=2)
199 | 
200 | class LeggedRobotCfgPPO(BaseConfig):
201 |     seed = 1
202 |     runner_class_name = 'OnPolicyRunner'
203 |     class policy:
204 |         init_noise_std = 1.0
205 |         actor_hidden_dims = [512, 256, 128]
206 |         critic_hidden_dims = [512, 256, 128]
207 | 
208 |     class algorithm:
209 |         # training params
210 |         value_loss_coef = 1.0
211 |         use_clipped_value_loss = True
212 |         clip_param = 0.2
213 |         entropy_coef = 0.01
214 |         num_learning_epochs = 5
215 |         num_mini_batches = 4 # mini batch size = num_envs*nsteps / nminibatches
216 |         learning_rate = 1.e-3 #5.e-4
217 |         schedule = 'adaptive' # could be adaptive, fixed
218 |         gamma = 0.99
219 |         lam = 0.95
220 |         desired_kl = 0.01
221 |         max_grad_norm = 1.
222 | 
223 |     class runner:
224 |         policy_class_name = 'ActorCritic'
225 |         algorithm_class_name = 'PPO'
226 |         num_steps_per_env = 24 # per iteration
227 |         max_iterations = 1500 # number of policy updates
228 | 
229 |         # logging
230 |         save_interval = 100 # check for potential saves every this many iterations
231 |         experiment_name = 'test'
232 |         run_name = ''
233 |         # load and resume
234 |         resume = False
235 |         load_run = -1 # -1 = last run
236 |         checkpoint = -1 # -1 = last saved model
237 |         resume_path = None # updated from load_run and chkpt


--------------------------------------------------------------------------------
/humanoid/envs/custom/humanoid_config.py:
--------------------------------------------------------------------------------
  1 | # SPDX-License-Identifier: BSD-3-Clause
  2 | #
  3 | # Redistribution and use in source and binary forms, with or without
  4 | # modification, are permitted provided that the following conditions are met:
  5 | #
  6 | # 1. Redistributions of source code must retain the above copyright notice, this
  7 | # list of conditions and the following disclaimer.
  8 | #
  9 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 10 | # this list of conditions and the following disclaimer in the documentation
 11 | # and/or other materials provided with the distribution.
 12 | #
 13 | # 3. Neither the name of the copyright holder nor the names of its
 14 | # contributors may be used to endorse or promote products derived from
 15 | # this software without specific prior written permission.
 16 | #
 17 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 18 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 19 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 20 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 21 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 22 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 23 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 24 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 25 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | #
 28 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
 29 | 
 30 | 
 31 | from humanoid.envs.base.legged_robot_config import LeggedRobotCfg, LeggedRobotCfgPPO
 32 | 
 33 | 
 34 | class XBotLCfg(LeggedRobotCfg):
 35 |     """
 36 |     Configuration class for the XBotL humanoid robot.
 37 |     """
 38 |     class env(LeggedRobotCfg.env):
 39 |         # change the observation dim
 40 |         frame_stack = 15
 41 |         c_frame_stack = 3
 42 |         num_single_obs = 47
 43 |         num_observations = int(frame_stack * num_single_obs)
 44 |         single_num_privileged_obs = 73
 45 |         num_privileged_obs = int(c_frame_stack * single_num_privileged_obs)
 46 |         num_actions = 12
 47 |         num_envs = 4096
 48 |         episode_length_s = 24     # episode length in seconds
 49 |         use_ref_actions = False   # speed up training by using reference actions
 50 | 
 51 |     class safety:
 52 |         # safety factors
 53 |         pos_limit = 1.0
 54 |         vel_limit = 1.0
 55 |         torque_limit = 0.85
 56 | 
 57 |     class asset(LeggedRobotCfg.asset):
 58 |         file = '{LEGGED_GYM_ROOT_DIR}/resources/robots/XBot/urdf/XBot-L.urdf'
 59 | 
 60 |         name = "XBot-L"
 61 |         foot_name = "ankle_roll"
 62 |         knee_name = "knee"
 63 | 
 64 |         terminate_after_contacts_on = ['base_link']
 65 |         penalize_contacts_on = ["base_link"]
 66 |         self_collisions = 0  # 1 to disable, 0 to enable...bitwise filter
 67 |         flip_visual_attachments = False
 68 |         replace_cylinder_with_capsule = False
 69 |         fix_base_link = False
 70 | 
 71 |     class terrain(LeggedRobotCfg.terrain):
 72 |         mesh_type = 'plane'
 73 |         # mesh_type = 'trimesh'
 74 |         curriculum = False
 75 |         # rough terrain only:
 76 |         measure_heights = False
 77 |         static_friction = 0.6
 78 |         dynamic_friction = 0.6
 79 |         terrain_length = 8.
 80 |         terrain_width = 8.
 81 |         num_rows = 20  # number of terrain rows (levels)
 82 |         num_cols = 20  # number of terrain cols (types)
 83 |         max_init_terrain_level = 10  # starting curriculum state
 84 |         # plane; obstacles; uniform; slope_up; slope_down, stair_up, stair_down
 85 |         terrain_proportions = [0.2, 0.2, 0.4, 0.1, 0.1, 0, 0]
 86 |         restitution = 0.
 87 | 
 88 |     class noise:
 89 |         add_noise = True
 90 |         noise_level = 0.6    # scales other values
 91 | 
 92 |         class noise_scales:
 93 |             dof_pos = 0.05
 94 |             dof_vel = 0.5
 95 |             ang_vel = 0.1
 96 |             lin_vel = 0.05
 97 |             quat = 0.03
 98 |             height_measurements = 0.1
 99 | 
100 |     class init_state(LeggedRobotCfg.init_state):
101 |         pos = [0.0, 0.0, 0.95]
102 | 
103 |         default_joint_angles = {  # = target angles [rad] when action = 0.0
104 |             'left_leg_roll_joint': 0.,
105 |             'left_leg_yaw_joint': 0.,
106 |             'left_leg_pitch_joint': 0.,
107 |             'left_knee_joint': 0.,
108 |             'left_ankle_pitch_joint': 0.,
109 |             'left_ankle_roll_joint': 0.,
110 |             'right_leg_roll_joint': 0.,
111 |             'right_leg_yaw_joint': 0.,
112 |             'right_leg_pitch_joint': 0.,
113 |             'right_knee_joint': 0.,
114 |             'right_ankle_pitch_joint': 0.,
115 |             'right_ankle_roll_joint': 0.,
116 |         }
117 | 
118 |     class control(LeggedRobotCfg.control):
119 |         # PD Drive parameters:
120 |         stiffness = {'leg_roll': 200.0, 'leg_pitch': 350.0, 'leg_yaw': 200.0,
121 |                      'knee': 350.0, 'ankle': 15}
122 |         damping = {'leg_roll': 10, 'leg_pitch': 10, 'leg_yaw':
123 |                    10, 'knee': 10, 'ankle': 10}
124 | 
125 |         # action scale: target angle = actionScale * action + defaultAngle
126 |         action_scale = 0.25
127 |         # decimation: Number of control action updates @ sim DT per policy DT
128 |         decimation = 10  # 100hz
129 | 
130 |     class sim(LeggedRobotCfg.sim):
131 |         dt = 0.001  # 1000 Hz
132 |         substeps = 1
133 |         up_axis = 1  # 0 is y, 1 is z
134 | 
135 |         class physx(LeggedRobotCfg.sim.physx):
136 |             num_threads = 10
137 |             solver_type = 1  # 0: pgs, 1: tgs
138 |             num_position_iterations = 4
139 |             num_velocity_iterations = 1
140 |             contact_offset = 0.01  # [m]
141 |             rest_offset = 0.0   # [m]
142 |             bounce_threshold_velocity = 0.1  # [m/s]
143 |             max_depenetration_velocity = 1.0
144 |             max_gpu_contact_pairs = 2**23  # 2**24 -> needed for 8000 envs and more
145 |             default_buffer_size_multiplier = 5
146 |             # 0: never, 1: last sub-step, 2: all sub-steps (default=2)
147 |             contact_collection = 2
148 | 
149 |     class domain_rand:
150 |         randomize_friction = True
151 |         friction_range = [0.1, 2.0]
152 |         randomize_base_mass = True
153 |         added_mass_range = [-5., 5.]
154 |         push_robots = True
155 |         push_interval_s = 4
156 |         max_push_vel_xy = 0.2
157 |         max_push_ang_vel = 0.4
158 |         # dynamic randomization
159 |         action_delay = 0.5
160 |         action_noise = 0.02
161 | 
162 |     class commands(LeggedRobotCfg.commands):
163 |         # Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
164 |         num_commands = 4
165 |         resampling_time = 8.  # time before command are changed[s]
166 |         heading_command = True  # if true: compute ang vel command from heading error
167 | 
168 |         class ranges:
169 |             lin_vel_x = [-0.3, 0.6]   # min max [m/s]
170 |             lin_vel_y = [-0.3, 0.3]   # min max [m/s]
171 |             ang_vel_yaw = [-0.3, 0.3] # min max [rad/s]
172 |             heading = [-3.14, 3.14]
173 | 
174 |     class rewards:
175 |         base_height_target = 0.89
176 |         min_dist = 0.2
177 |         max_dist = 0.5
178 |         # put some settings here for LLM parameter tuning
179 |         target_joint_pos_scale = 0.17    # rad
180 |         target_feet_height = 0.06        # m
181 |         cycle_time = 0.64                # sec
182 |         # if true negative total rewards are clipped at zero (avoids early termination problems)
183 |         only_positive_rewards = True
184 |         # tracking reward = exp(error*sigma)
185 |         tracking_sigma = 5
186 |         max_contact_force = 700  # Forces above this value are penalized
187 | 
188 |         class scales:
189 |             # reference motion tracking
190 |             joint_pos = 1.6
191 |             feet_clearance = 1.
192 |             feet_contact_number = 1.2
193 |             # gait
194 |             feet_air_time = 1.
195 |             foot_slip = -0.05
196 |             feet_distance = 0.2
197 |             knee_distance = 0.2
198 |             # contact
199 |             feet_contact_forces = -0.01
200 |             # vel tracking
201 |             tracking_lin_vel = 1.2
202 |             tracking_ang_vel = 1.1
203 |             vel_mismatch_exp = 0.5  # lin_z; ang x,y
204 |             low_speed = 0.2
205 |             track_vel_hard = 0.5
206 |             # base pos
207 |             default_joint_pos = 0.5
208 |             orientation = 1.
209 |             base_height = 0.2
210 |             base_acc = 0.2
211 |             # energy
212 |             action_smoothness = -0.002
213 |             torques = -1e-5
214 |             dof_vel = -5e-4
215 |             dof_acc = -1e-7
216 |             collision = -1.
217 | 
218 |     class normalization:
219 |         class obs_scales:
220 |             lin_vel = 2.
221 |             ang_vel = 1.
222 |             dof_pos = 1.
223 |             dof_vel = 0.05
224 |             quat = 1.
225 |             height_measurements = 5.0
226 |         clip_observations = 18.
227 |         clip_actions = 18.
228 | 
229 | 
230 | class XBotLCfgPPO(LeggedRobotCfgPPO):
231 |     seed = 5
232 |     runner_class_name = 'OnPolicyRunner'   # DWLOnPolicyRunner
233 | 
234 |     class policy:
235 |         init_noise_std = 1.0
236 |         actor_hidden_dims = [512, 256, 128]
237 |         critic_hidden_dims = [768, 256, 128]
238 | 
239 |     class algorithm(LeggedRobotCfgPPO.algorithm):
240 |         entropy_coef = 0.001
241 |         learning_rate = 1e-5
242 |         num_learning_epochs = 2
243 |         gamma = 0.994
244 |         lam = 0.9
245 |         num_mini_batches = 4
246 | 
247 |     class runner:
248 |         policy_class_name = 'ActorCritic'
249 |         algorithm_class_name = 'PPO'
250 |         num_steps_per_env = 60  # per iteration
251 |         max_iterations = 3001  # number of policy updates
252 | 
253 |         # logging
254 |         save_interval = 100  # Please check for potential savings every `save_interval` iterations.
255 |         experiment_name = 'XBot_ppo'
256 |         run_name = ''
257 |         # Load and resume
258 |         resume = False
259 |         load_run = -1  # -1 = last run
260 |         checkpoint = -1  # -1 = last saved model
261 |         resume_path = None  # updated from load_run and chkpt
262 | 


--------------------------------------------------------------------------------
/humanoid/envs/custom/humanoid_env.py:
--------------------------------------------------------------------------------
  1 | # SPDX-License-Identifier: BSD-3-Clause
  2 | # 
  3 | # Redistribution and use in source and binary forms, with or without
  4 | # modification, are permitted provided that the following conditions are met:
  5 | #
  6 | # 1. Redistributions of source code must retain the above copyright notice, this
  7 | # list of conditions and the following disclaimer.
  8 | #
  9 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 10 | # this list of conditions and the following disclaimer in the documentation
 11 | # and/or other materials provided with the distribution.
 12 | #
 13 | # 3. Neither the name of the copyright holder nor the names of its
 14 | # contributors may be used to endorse or promote products derived from
 15 | # this software without specific prior written permission.
 16 | #
 17 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 18 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 19 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 20 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 21 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 22 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 23 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 24 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 25 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | #
 28 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
 29 | 
 30 | 
 31 | from humanoid.envs.base.legged_robot_config import LeggedRobotCfg
 32 | 
 33 | from isaacgym.torch_utils import *
 34 | from isaacgym import gymtorch, gymapi
 35 | 
 36 | import torch
 37 | from humanoid.envs import LeggedRobot
 38 | 
 39 | from humanoid.utils.terrain import  HumanoidTerrain
 40 | 
 41 | 
 42 | class XBotLFreeEnv(LeggedRobot):
 43 |     '''
 44 |     XBotLFreeEnv is a class that represents a custom environment for a legged robot.
 45 | 
 46 |     Args:
 47 |         cfg (LeggedRobotCfg): Configuration object for the legged robot.
 48 |         sim_params: Parameters for the simulation.
 49 |         physics_engine: Physics engine used in the simulation.
 50 |         sim_device: Device used for the simulation.
 51 |         headless: Flag indicating whether the simulation should be run in headless mode.
 52 | 
 53 |     Attributes:
 54 |         last_feet_z (float): The z-coordinate of the last feet position.
 55 |         feet_height (torch.Tensor): Tensor representing the height of the feet.
 56 |         sim (gymtorch.GymSim): The simulation object.
 57 |         terrain (HumanoidTerrain): The terrain object.
 58 |         up_axis_idx (int): The index representing the up axis.
 59 |         command_input (torch.Tensor): Tensor representing the command input.
 60 |         privileged_obs_buf (torch.Tensor): Tensor representing the privileged observations buffer.
 61 |         obs_buf (torch.Tensor): Tensor representing the observations buffer.
 62 |         obs_history (collections.deque): Deque containing the history of observations.
 63 |         critic_history (collections.deque): Deque containing the history of critic observations.
 64 | 
 65 |     Methods:
 66 |         _push_robots(): Randomly pushes the robots by setting a randomized base velocity.
 67 |         _get_phase(): Calculates the phase of the gait cycle.
 68 |         _get_gait_phase(): Calculates the gait phase.
 69 |         compute_ref_state(): Computes the reference state.
 70 |         create_sim(): Creates the simulation, terrain, and environments.
 71 |         _get_noise_scale_vec(cfg): Sets a vector used to scale the noise added to the observations.
 72 |         step(actions): Performs a simulation step with the given actions.
 73 |         compute_observations(): Computes the observations.
 74 |         reset_idx(env_ids): Resets the environment for the specified environment IDs.
 75 |     '''
 76 |     def __init__(self, cfg: LeggedRobotCfg, sim_params, physics_engine, sim_device, headless):
 77 |         super().__init__(cfg, sim_params, physics_engine, sim_device, headless)
 78 |         self.last_feet_z = 0.05
 79 |         self.feet_height = torch.zeros((self.num_envs, 2), device=self.device)
 80 |         self.reset_idx(torch.tensor(range(self.num_envs), device=self.device))
 81 |         self.compute_observations()
 82 | 
 83 |     def _push_robots(self):
 84 |         """ Random pushes the robots. Emulates an impulse by setting a randomized base velocity. 
 85 |         """
 86 |         max_vel = self.cfg.domain_rand.max_push_vel_xy
 87 |         max_push_angular = self.cfg.domain_rand.max_push_ang_vel
 88 |         self.rand_push_force[:, :2] = torch_rand_float(
 89 |             -max_vel, max_vel, (self.num_envs, 2), device=self.device)  # lin vel x/y
 90 |         self.root_states[:, 7:9] = self.rand_push_force[:, :2]
 91 | 
 92 |         self.rand_push_torque = torch_rand_float(
 93 |             -max_push_angular, max_push_angular, (self.num_envs, 3), device=self.device)
 94 | 
 95 |         self.root_states[:, 10:13] = self.rand_push_torque
 96 | 
 97 |         self.gym.set_actor_root_state_tensor(
 98 |             self.sim, gymtorch.unwrap_tensor(self.root_states))
 99 | 
100 |     def  _get_phase(self):
101 |         cycle_time = self.cfg.rewards.cycle_time
102 |         phase = self.episode_length_buf * self.dt / cycle_time
103 |         return phase
104 | 
105 |     def _get_gait_phase(self):
106 |         # return float mask 1 is stance, 0 is swing
107 |         phase = self._get_phase()
108 |         sin_pos = torch.sin(2 * torch.pi * phase)
109 |         # Add double support phase
110 |         stance_mask = torch.zeros((self.num_envs, 2), device=self.device)
111 |         # left foot stance
112 |         stance_mask[:, 0] = sin_pos >= 0
113 |         # right foot stance
114 |         stance_mask[:, 1] = sin_pos < 0
115 |         # Double support phase
116 |         stance_mask[torch.abs(sin_pos) < 0.1] = 1
117 | 
118 |         return stance_mask
119 |     
120 | 
121 |     def compute_ref_state(self):
122 |         phase = self._get_phase()
123 |         sin_pos = torch.sin(2 * torch.pi * phase)
124 |         sin_pos_l = sin_pos.clone()
125 |         sin_pos_r = sin_pos.clone()
126 |         self.ref_dof_pos = torch.zeros_like(self.dof_pos)
127 |         scale_1 = self.cfg.rewards.target_joint_pos_scale
128 |         scale_2 = 2 * scale_1
129 |         # left foot stance phase set to default joint pos
130 |         sin_pos_l[sin_pos_l > 0] = 0
131 |         self.ref_dof_pos[:, 2] = sin_pos_l * scale_1
132 |         self.ref_dof_pos[:, 3] = sin_pos_l * scale_2
133 |         self.ref_dof_pos[:, 4] = sin_pos_l * scale_1
134 |         # right foot stance phase set to default joint pos
135 |         sin_pos_r[sin_pos_r < 0] = 0
136 |         self.ref_dof_pos[:, 8] = sin_pos_r * scale_1
137 |         self.ref_dof_pos[:, 9] = sin_pos_r * scale_2
138 |         self.ref_dof_pos[:, 10] = sin_pos_r * scale_1
139 |         # Double support phase
140 |         self.ref_dof_pos[torch.abs(sin_pos) < 0.1] = 0
141 | 
142 |         self.ref_action = 2 * self.ref_dof_pos
143 | 
144 | 
145 |     def create_sim(self):
146 |         """ Creates simulation, terrain and evironments
147 |         """
148 |         self.up_axis_idx = 2  # 2 for z, 1 for y -> adapt gravity accordingly
149 |         self.sim = self.gym.create_sim(
150 |             self.sim_device_id, self.graphics_device_id, self.physics_engine, self.sim_params)
151 |         mesh_type = self.cfg.terrain.mesh_type
152 |         if mesh_type in ['heightfield', 'trimesh']:
153 |             self.terrain = HumanoidTerrain(self.cfg.terrain, self.num_envs)
154 |         if mesh_type == 'plane':
155 |             self._create_ground_plane()
156 |         elif mesh_type == 'heightfield':
157 |             self._create_heightfield()
158 |         elif mesh_type == 'trimesh':
159 |             self._create_trimesh()
160 |         elif mesh_type is not None:
161 |             raise ValueError(
162 |                 "Terrain mesh type not recognised. Allowed types are [None, plane, heightfield, trimesh]")
163 |         self._create_envs()
164 | 
165 | 
166 |     def _get_noise_scale_vec(self, cfg):
167 |         """ Sets a vector used to scale the noise added to the observations.
168 |             [NOTE]: Must be adapted when changing the observations structure
169 | 
170 |         Args:
171 |             cfg (Dict): Environment config file
172 | 
173 |         Returns:
174 |             [torch.Tensor]: Vector of scales used to multiply a uniform distribution in [-1, 1]
175 |         """
176 |         noise_vec = torch.zeros(
177 |             self.cfg.env.num_single_obs, device=self.device)
178 |         self.add_noise = self.cfg.noise.add_noise
179 |         noise_scales = self.cfg.noise.noise_scales
180 |         noise_vec[0: 5] = 0.  # commands
181 |         noise_vec[5: 17] = noise_scales.dof_pos * self.obs_scales.dof_pos
182 |         noise_vec[17: 29] = noise_scales.dof_vel * self.obs_scales.dof_vel
183 |         noise_vec[29: 41] = 0.  # previous actions
184 |         noise_vec[41: 44] = noise_scales.ang_vel * self.obs_scales.ang_vel   # ang vel
185 |         noise_vec[44: 47] = noise_scales.quat * self.obs_scales.quat         # euler x,y
186 |         return noise_vec
187 | 
188 | 
189 |     def step(self, actions):
190 |         if self.cfg.env.use_ref_actions:
191 |             actions += self.ref_action
192 |         actions = torch.clip(actions, -self.cfg.normalization.clip_actions, self.cfg.normalization.clip_actions)
193 |         # dynamic randomization
194 |         delay = torch.rand((self.num_envs, 1), device=self.device) * self.cfg.domain_rand.action_delay
195 |         actions = (1 - delay) * actions + delay * self.actions
196 |         actions += self.cfg.domain_rand.action_noise * torch.randn_like(actions) * actions
197 |         return super().step(actions)
198 | 
199 | 
200 |     def compute_observations(self):
201 | 
202 |         phase = self._get_phase()
203 |         self.compute_ref_state()
204 | 
205 |         sin_pos = torch.sin(2 * torch.pi * phase).unsqueeze(1)
206 |         cos_pos = torch.cos(2 * torch.pi * phase).unsqueeze(1)
207 | 
208 |         stance_mask = self._get_gait_phase()
209 |         contact_mask = self.contact_forces[:, self.feet_indices, 2] > 5.
210 | 
211 |         self.command_input = torch.cat(
212 |             (sin_pos, cos_pos, self.commands[:, :3] * self.commands_scale), dim=1)
213 |         
214 |         q = (self.dof_pos - self.default_dof_pos) * self.obs_scales.dof_pos
215 |         dq = self.dof_vel * self.obs_scales.dof_vel
216 |         
217 |         diff = self.dof_pos - self.ref_dof_pos
218 | 
219 |         self.privileged_obs_buf = torch.cat((
220 |             self.command_input,  # 2 + 3
221 |             (self.dof_pos - self.default_joint_pd_target) * \
222 |             self.obs_scales.dof_pos,  # 12
223 |             self.dof_vel * self.obs_scales.dof_vel,  # 12
224 |             self.actions,  # 12
225 |             diff,  # 12
226 |             self.base_lin_vel * self.obs_scales.lin_vel,  # 3
227 |             self.base_ang_vel * self.obs_scales.ang_vel,  # 3
228 |             self.base_euler_xyz * self.obs_scales.quat,  # 3
229 |             self.rand_push_force[:, :2],  # 2
230 |             self.rand_push_torque,  # 3
231 |             self.env_frictions,  # 1
232 |             self.body_mass / 30.,  # 1
233 |             stance_mask,  # 2
234 |             contact_mask,  # 2
235 |         ), dim=-1)
236 | 
237 |         obs_buf = torch.cat((
238 |             self.command_input,  # 5 = 2D(sin cos) + 3D(vel_x, vel_y, aug_vel_yaw)
239 |             q,    # 12D
240 |             dq,  # 12D
241 |             self.actions,   # 12D
242 |             self.base_ang_vel * self.obs_scales.ang_vel,  # 3
243 |             self.base_euler_xyz * self.obs_scales.quat,  # 3
244 |         ), dim=-1)
245 | 
246 |         if self.cfg.terrain.measure_heights:
247 |             heights = torch.clip(self.root_states[:, 2].unsqueeze(1) - 0.5 - self.measured_heights, -1, 1.) * self.obs_scales.height_measurements
248 |             self.privileged_obs_buf = torch.cat((self.obs_buf, heights), dim=-1)
249 |         
250 |         if self.add_noise:  
251 |             obs_now = obs_buf.clone() + torch.randn_like(obs_buf) * self.noise_scale_vec * self.cfg.noise.noise_level
252 |         else:
253 |             obs_now = obs_buf.clone()
254 |         self.obs_history.append(obs_now)
255 |         self.critic_history.append(self.privileged_obs_buf)
256 | 
257 | 
258 |         obs_buf_all = torch.stack([self.obs_history[i]
259 |                                    for i in range(self.obs_history.maxlen)], dim=1)  # N,T,K
260 | 
261 |         self.obs_buf = obs_buf_all.reshape(self.num_envs, -1)  # N, T*K
262 |         self.privileged_obs_buf = torch.cat([self.critic_history[i] for i in range(self.cfg.env.c_frame_stack)], dim=1)
263 | 
264 |     def reset_idx(self, env_ids):
265 |         super().reset_idx(env_ids)
266 |         for i in range(self.obs_history.maxlen):
267 |             self.obs_history[i][env_ids] *= 0
268 |         for i in range(self.critic_history.maxlen):
269 |             self.critic_history[i][env_ids] *= 0
270 | 
271 | # ================================================ Rewards ================================================== #
272 |     def _reward_joint_pos(self):
273 |         """
274 |         Calculates the reward based on the difference between the current joint positions and the target joint positions.
275 |         """
276 |         joint_pos = self.dof_pos.clone()
277 |         pos_target = self.ref_dof_pos.clone()
278 |         diff = joint_pos - pos_target
279 |         r = torch.exp(-2 * torch.norm(diff, dim=1)) - 0.2 * torch.norm(diff, dim=1).clamp(0, 0.5)
280 |         return r
281 | 
282 |     def _reward_feet_distance(self):
283 |         """
284 |         Calculates the reward based on the distance between the feet. Penalize feet get close to each other or too far away.
285 |         """
286 |         foot_pos = self.rigid_state[:, self.feet_indices, :2]
287 |         foot_dist = torch.norm(foot_pos[:, 0, :] - foot_pos[:, 1, :], dim=1)
288 |         fd = self.cfg.rewards.min_dist
289 |         max_df = self.cfg.rewards.max_dist
290 |         d_min = torch.clamp(foot_dist - fd, -0.5, 0.)
291 |         d_max = torch.clamp(foot_dist - max_df, 0, 0.5)
292 |         return (torch.exp(-torch.abs(d_min) * 100) + torch.exp(-torch.abs(d_max) * 100)) / 2
293 | 
294 | 
295 |     def _reward_knee_distance(self):
296 |         """
297 |         Calculates the reward based on the distance between the knee of the humanoid.
298 |         """
299 |         foot_pos = self.rigid_state[:, self.knee_indices, :2]
300 |         foot_dist = torch.norm(foot_pos[:, 0, :] - foot_pos[:, 1, :], dim=1)
301 |         fd = self.cfg.rewards.min_dist
302 |         max_df = self.cfg.rewards.max_dist / 2
303 |         d_min = torch.clamp(foot_dist - fd, -0.5, 0.)
304 |         d_max = torch.clamp(foot_dist - max_df, 0, 0.5)
305 |         return (torch.exp(-torch.abs(d_min) * 100) + torch.exp(-torch.abs(d_max) * 100)) / 2
306 | 
307 | 
308 |     def _reward_foot_slip(self):
309 |         """
310 |         Calculates the reward for minimizing foot slip. The reward is based on the contact forces 
311 |         and the speed of the feet. A contact threshold is used to determine if the foot is in contact 
312 |         with the ground. The speed of the foot is calculated and scaled by the contact condition.
313 |         """
314 |         contact = self.contact_forces[:, self.feet_indices, 2] > 5.
315 |         foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 7:9], dim=2)
316 |         rew = torch.sqrt(foot_speed_norm)
317 |         rew *= contact
318 |         return torch.sum(rew, dim=1)    
319 | 
320 |     def _reward_feet_air_time(self):
321 |         """
322 |         Calculates the reward for feet air time, promoting longer steps. This is achieved by
323 |         checking the first contact with the ground after being in the air. The air time is
324 |         limited to a maximum value for reward calculation.
325 |         """
326 |         contact = self.contact_forces[:, self.feet_indices, 2] > 5.
327 |         stance_mask = self._get_gait_phase()
328 |         self.contact_filt = torch.logical_or(torch.logical_or(contact, stance_mask), self.last_contacts)
329 |         self.last_contacts = contact
330 |         first_contact = (self.feet_air_time > 0.) * self.contact_filt
331 |         self.feet_air_time += self.dt
332 |         air_time = self.feet_air_time.clamp(0, 0.5) * first_contact
333 |         self.feet_air_time *= ~self.contact_filt
334 |         return air_time.sum(dim=1)
335 | 
336 |     def _reward_feet_contact_number(self):
337 |         """
338 |         Calculates a reward based on the number of feet contacts aligning with the gait phase. 
339 |         Rewards or penalizes depending on whether the foot contact matches the expected gait phase.
340 |         """
341 |         contact = self.contact_forces[:, self.feet_indices, 2] > 5.
342 |         stance_mask = self._get_gait_phase()
343 |         reward = torch.where(contact == stance_mask, 1.0, -0.3)
344 |         return torch.mean(reward, dim=1)
345 | 
346 |     def _reward_orientation(self):
347 |         """
348 |         Calculates the reward for maintaining a flat base orientation. It penalizes deviation 
349 |         from the desired base orientation using the base euler angles and the projected gravity vector.
350 |         """
351 |         quat_mismatch = torch.exp(-torch.sum(torch.abs(self.base_euler_xyz[:, :2]), dim=1) * 10)
352 |         orientation = torch.exp(-torch.norm(self.projected_gravity[:, :2], dim=1) * 20)
353 |         return (quat_mismatch + orientation) / 2.
354 | 
355 |     def _reward_feet_contact_forces(self):
356 |         """
357 |         Calculates the reward for keeping contact forces within a specified range. Penalizes
358 |         high contact forces on the feet.
359 |         """
360 |         return torch.sum((torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1) - self.cfg.rewards.max_contact_force).clip(0, 400), dim=1)
361 | 
362 |     def _reward_default_joint_pos(self):
363 |         """
364 |         Calculates the reward for keeping joint positions close to default positions, with a focus 
365 |         on penalizing deviation in yaw and roll directions. Excludes yaw and roll from the main penalty.
366 |         """
367 |         joint_diff = self.dof_pos - self.default_joint_pd_target
368 |         left_yaw_roll = joint_diff[:, :2]
369 |         right_yaw_roll = joint_diff[:, 6: 8]
370 |         yaw_roll = torch.norm(left_yaw_roll, dim=1) + torch.norm(right_yaw_roll, dim=1)
371 |         yaw_roll = torch.clamp(yaw_roll - 0.1, 0, 50)
372 |         return torch.exp(-yaw_roll * 100) - 0.01 * torch.norm(joint_diff, dim=1)
373 | 
374 |     def _reward_base_height(self):
375 |         """
376 |         Calculates the reward based on the robot's base height. Penalizes deviation from a target base height.
377 |         The reward is computed based on the height difference between the robot's base and the average height 
378 |         of its feet when they are in contact with the ground.
379 |         """
380 |         stance_mask = self._get_gait_phase()
381 |         measured_heights = torch.sum(
382 |             self.rigid_state[:, self.feet_indices, 2] * stance_mask, dim=1) / torch.sum(stance_mask, dim=1)
383 |         base_height = self.root_states[:, 2] - (measured_heights - 0.05)
384 |         return torch.exp(-torch.abs(base_height - self.cfg.rewards.base_height_target) * 100)
385 | 
386 |     def _reward_base_acc(self):
387 |         """
388 |         Computes the reward based on the base's acceleration. Penalizes high accelerations of the robot's base,
389 |         encouraging smoother motion.
390 |         """
391 |         root_acc = self.last_root_vel - self.root_states[:, 7:13]
392 |         rew = torch.exp(-torch.norm(root_acc, dim=1) * 3)
393 |         return rew
394 | 
395 | 
396 |     def _reward_vel_mismatch_exp(self):
397 |         """
398 |         Computes a reward based on the mismatch in the robot's linear and angular velocities. 
399 |         Encourages the robot to maintain a stable velocity by penalizing large deviations.
400 |         """
401 |         lin_mismatch = torch.exp(-torch.square(self.base_lin_vel[:, 2]) * 10)
402 |         ang_mismatch = torch.exp(-torch.norm(self.base_ang_vel[:, :2], dim=1) * 5.)
403 | 
404 |         c_update = (lin_mismatch + ang_mismatch) / 2.
405 | 
406 |         return c_update
407 | 
408 |     def _reward_track_vel_hard(self):
409 |         """
410 |         Calculates a reward for accurately tracking both linear and angular velocity commands.
411 |         Penalizes deviations from specified linear and angular velocity targets.
412 |         """
413 |         # Tracking of linear velocity commands (xy axes)
414 |         lin_vel_error = torch.norm(
415 |             self.commands[:, :2] - self.base_lin_vel[:, :2], dim=1)
416 |         lin_vel_error_exp = torch.exp(-lin_vel_error * 10)
417 | 
418 |         # Tracking of angular velocity commands (yaw)
419 |         ang_vel_error = torch.abs(
420 |             self.commands[:, 2] - self.base_ang_vel[:, 2])
421 |         ang_vel_error_exp = torch.exp(-ang_vel_error * 10)
422 | 
423 |         linear_error = 0.2 * (lin_vel_error + ang_vel_error)
424 | 
425 |         return (lin_vel_error_exp + ang_vel_error_exp) / 2. - linear_error
426 | 
427 |     def _reward_tracking_lin_vel(self):
428 |         """
429 |         Tracks linear velocity commands along the xy axes. 
430 |         Calculates a reward based on how closely the robot's linear velocity matches the commanded values.
431 |         """
432 |         lin_vel_error = torch.sum(torch.square(
433 |             self.commands[:, :2] - self.base_lin_vel[:, :2]), dim=1)
434 |         return torch.exp(-lin_vel_error * self.cfg.rewards.tracking_sigma)
435 | 
436 |     def _reward_tracking_ang_vel(self):
437 |         """
438 |         Tracks angular velocity commands for yaw rotation.
439 |         Computes a reward based on how closely the robot's angular velocity matches the commanded yaw values.
440 |         """   
441 |         
442 |         ang_vel_error = torch.square(
443 |             self.commands[:, 2] - self.base_ang_vel[:, 2])
444 |         return torch.exp(-ang_vel_error * self.cfg.rewards.tracking_sigma)
445 |     
446 |     def _reward_feet_clearance(self):
447 |         """
448 |         Calculates reward based on the clearance of the swing leg from the ground during movement.
449 |         Encourages appropriate lift of the feet during the swing phase of the gait.
450 |         """
451 |         # Compute feet contact mask
452 |         contact = self.contact_forces[:, self.feet_indices, 2] > 5.
453 | 
454 |         # Get the z-position of the feet and compute the change in z-position
455 |         feet_z = self.rigid_state[:, self.feet_indices, 2] - 0.05
456 |         delta_z = feet_z - self.last_feet_z
457 |         self.feet_height += delta_z
458 |         self.last_feet_z = feet_z
459 | 
460 |         # Compute swing mask
461 |         swing_mask = 1 - self._get_gait_phase()
462 | 
463 |         # feet height should be closed to target feet height at the peak
464 |         rew_pos = torch.abs(self.feet_height - self.cfg.rewards.target_feet_height) < 0.01
465 |         rew_pos = torch.sum(rew_pos * swing_mask, dim=1)
466 |         self.feet_height *= ~contact
467 |         return rew_pos
468 | 
469 |     def _reward_low_speed(self):
470 |         """
471 |         Rewards or penalizes the robot based on its speed relative to the commanded speed. 
472 |         This function checks if the robot is moving too slow, too fast, or at the desired speed, 
473 |         and if the movement direction matches the command.
474 |         """
475 |         # Calculate the absolute value of speed and command for comparison
476 |         absolute_speed = torch.abs(self.base_lin_vel[:, 0])
477 |         absolute_command = torch.abs(self.commands[:, 0])
478 | 
479 |         # Define speed criteria for desired range
480 |         speed_too_low = absolute_speed < 0.5 * absolute_command
481 |         speed_too_high = absolute_speed > 1.2 * absolute_command
482 |         speed_desired = ~(speed_too_low | speed_too_high)
483 | 
484 |         # Check if the speed and command directions are mismatched
485 |         sign_mismatch = torch.sign(
486 |             self.base_lin_vel[:, 0]) != torch.sign(self.commands[:, 0])
487 | 
488 |         # Initialize reward tensor
489 |         reward = torch.zeros_like(self.base_lin_vel[:, 0])
490 | 
491 |         # Assign rewards based on conditions
492 |         # Speed too low
493 |         reward[speed_too_low] = -1.0
494 |         # Speed too high
495 |         reward[speed_too_high] = 0.
496 |         # Speed within desired range
497 |         reward[speed_desired] = 1.2
498 |         # Sign mismatch has the highest priority
499 |         reward[sign_mismatch] = -2.0
500 |         return reward * (self.commands[:, 0].abs() > 0.1)
501 |     
502 |     def _reward_torques(self):
503 |         """
504 |         Penalizes the use of high torques in the robot's joints. Encourages efficient movement by minimizing
505 |         the necessary force exerted by the motors.
506 |         """
507 |         return torch.sum(torch.square(self.torques), dim=1)
508 | 
509 |     def _reward_dof_vel(self):
510 |         """
511 |         Penalizes high velocities at the degrees of freedom (DOF) of the robot. This encourages smoother and 
512 |         more controlled movements.
513 |         """
514 |         return torch.sum(torch.square(self.dof_vel), dim=1)
515 |     
516 |     def _reward_dof_acc(self):
517 |         """
518 |         Penalizes high accelerations at the robot's degrees of freedom (DOF). This is important for ensuring
519 |         smooth and stable motion, reducing wear on the robot's mechanical parts.
520 |         """
521 |         return torch.sum(torch.square((self.last_dof_vel - self.dof_vel) / self.dt), dim=1)
522 |     
523 |     def _reward_collision(self):
524 |         """
525 |         Penalizes collisions of the robot with the environment, specifically focusing on selected body parts.
526 |         This encourages the robot to avoid undesired contact with objects or surfaces.
527 |         """
528 |         return torch.sum(1.*(torch.norm(self.contact_forces[:, self.penalised_contact_indices, :], dim=-1) > 0.1), dim=1)
529 |     
530 |     def _reward_action_smoothness(self):
531 |         """
532 |         Encourages smoothness in the robot's actions by penalizing large differences between consecutive actions.
533 |         This is important for achieving fluid motion and reducing mechanical stress.
534 |         """
535 |         term_1 = torch.sum(torch.square(
536 |             self.last_actions - self.actions), dim=1)
537 |         term_2 = torch.sum(torch.square(
538 |             self.actions + self.last_last_actions - 2 * self.last_actions), dim=1)
539 |         term_3 = 0.05 * torch.sum(torch.abs(self.actions), dim=1)
540 |         return term_1 + term_2 + term_3
541 | 


--------------------------------------------------------------------------------
/humanoid/scripts/play.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
  3 | # SPDX-License-Identifier: BSD-3-Clause
  4 | # 
  5 | # Redistribution and use in source and binary forms, with or without
  6 | # modification, are permitted provided that the following conditions are met:
  7 | #
  8 | # 1. Redistributions of source code must retain the above copyright notice, this
  9 | # list of conditions and the following disclaimer.
 10 | #
 11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 12 | # this list of conditions and the following disclaimer in the documentation
 13 | # and/or other materials provided with the distribution.
 14 | #
 15 | # 3. Neither the name of the copyright holder nor the names of its
 16 | # contributors may be used to endorse or promote products derived from
 17 | # this software without specific prior written permission.
 18 | #
 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 | #
 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
 31 | 
 32 | import os
 33 | import cv2
 34 | import numpy as np
 35 | from isaacgym import gymapi
 36 | from humanoid import LEGGED_GYM_ROOT_DIR
 37 | 
 38 | # import isaacgym
 39 | from humanoid.envs import *
 40 | from humanoid.utils import  get_args, export_policy_as_jit, task_registry, Logger
 41 | from isaacgym.torch_utils import *
 42 | 
 43 | import torch
 44 | from tqdm import tqdm
 45 | from datetime import datetime
 46 | 
 47 | 
 48 | def play(args):
 49 |     env_cfg, train_cfg = task_registry.get_cfgs(name=args.task)
 50 |     # override some parameters for testing
 51 |     env_cfg.env.num_envs = min(env_cfg.env.num_envs, 1)
 52 |     env_cfg.sim.max_gpu_contact_pairs = 2**10
 53 |     # env_cfg.terrain.mesh_type = 'trimesh'
 54 |     env_cfg.terrain.mesh_type = 'plane'
 55 |     env_cfg.terrain.num_rows = 5
 56 |     env_cfg.terrain.num_cols = 5
 57 |     env_cfg.terrain.curriculum = False     
 58 |     env_cfg.terrain.max_init_terrain_level = 5
 59 |     env_cfg.noise.add_noise = True
 60 |     env_cfg.domain_rand.push_robots = False 
 61 |     env_cfg.domain_rand.joint_angle_noise = 0.
 62 |     env_cfg.noise.curriculum = False
 63 |     env_cfg.noise.noise_level = 0.5
 64 | 
 65 | 
 66 |     train_cfg.seed = 123145
 67 |     print("train_cfg.runner_class_name:", train_cfg.runner_class_name)
 68 | 
 69 |     # prepare environment
 70 |     env, _ = task_registry.make_env(name=args.task, args=args, env_cfg=env_cfg)
 71 |     env.set_camera(env_cfg.viewer.pos, env_cfg.viewer.lookat)
 72 | 
 73 |     obs = env.get_observations()
 74 | 
 75 |     # load policy
 76 |     train_cfg.runner.resume = True
 77 |     ppo_runner, train_cfg = task_registry.make_alg_runner(env=env, name=args.task, args=args, train_cfg=train_cfg)
 78 |     policy = ppo_runner.get_inference_policy(device=env.device)
 79 |     
 80 |     # export policy as a jit module (used to run it from C++)
 81 |     if EXPORT_POLICY:
 82 |         path = os.path.join(LEGGED_GYM_ROOT_DIR, 'logs', train_cfg.runner.experiment_name, 'exported', 'policies')
 83 |         export_policy_as_jit(ppo_runner.alg.actor_critic, path)
 84 |         print('Exported policy as jit script to: ', path)
 85 | 
 86 |     logger = Logger(env.dt)
 87 |     robot_index = 0 # which robot is used for logging
 88 |     joint_index = 1 # which joint is used for logging
 89 |     stop_state_log = 1200 # number of steps before plotting states
 90 |     if RENDER:
 91 |         camera_properties = gymapi.CameraProperties()
 92 |         camera_properties.width = 1920
 93 |         camera_properties.height = 1080
 94 |         h1 = env.gym.create_camera_sensor(env.envs[0], camera_properties)
 95 |         camera_offset = gymapi.Vec3(1, -1, 0.5)
 96 |         camera_rotation = gymapi.Quat.from_axis_angle(gymapi.Vec3(-0.3, 0.2, 1),
 97 |                                                     np.deg2rad(135))
 98 |         actor_handle = env.gym.get_actor_handle(env.envs[0], 0)
 99 |         body_handle = env.gym.get_actor_rigid_body_handle(env.envs[0], actor_handle, 0)
100 |         env.gym.attach_camera_to_body(
101 |             h1, env.envs[0], body_handle,
102 |             gymapi.Transform(camera_offset, camera_rotation),
103 |             gymapi.FOLLOW_POSITION)
104 | 
105 |         fourcc = cv2.VideoWriter_fourcc(*"mp4v")
106 |         video_dir = os.path.join(LEGGED_GYM_ROOT_DIR, 'videos')
107 |         experiment_dir = os.path.join(LEGGED_GYM_ROOT_DIR, 'videos', train_cfg.runner.experiment_name)
108 |         dir = os.path.join(experiment_dir, datetime.now().strftime('%b%d_%H-%M-%S')+ args.run_name + '.mp4')
109 |         if not os.path.exists(video_dir):
110 |             os.mkdir(video_dir)
111 |         if not os.path.exists(experiment_dir):
112 |             os.mkdir(experiment_dir)
113 |         video = cv2.VideoWriter(dir, fourcc, 50.0, (1920, 1080))
114 | 
115 |     for i in tqdm(range(stop_state_log)):
116 | 
117 |         actions = policy(obs.detach()) # * 0.
118 |         
119 |         if FIX_COMMAND:
120 |             env.commands[:, 0] = 0.5    # 1.0
121 |             env.commands[:, 1] = 0.
122 |             env.commands[:, 2] = 0.
123 |             env.commands[:, 3] = 0.
124 | 
125 |         obs, critic_obs, rews, dones, infos = env.step(actions.detach())
126 | 
127 |         if RENDER:
128 |             env.gym.fetch_results(env.sim, True)
129 |             env.gym.step_graphics(env.sim)
130 |             env.gym.render_all_camera_sensors(env.sim)
131 |             img = env.gym.get_camera_image(env.sim, env.envs[0], h1, gymapi.IMAGE_COLOR)
132 |             img = np.reshape(img, (1080, 1920, 4))
133 |             img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
134 |             video.write(img[..., :3])
135 | 
136 |         logger.log_states(
137 |             {
138 |                 'dof_pos_target': actions[robot_index, joint_index].item() * env.cfg.control.action_scale,
139 |                 'dof_pos': env.dof_pos[robot_index, joint_index].item(),
140 |                 'dof_vel': env.dof_vel[robot_index, joint_index].item(),
141 |                 'dof_torque': env.torques[robot_index, joint_index].item(),
142 |                 'command_x': env.commands[robot_index, 0].item(),
143 |                 'command_y': env.commands[robot_index, 1].item(),
144 |                 'command_yaw': env.commands[robot_index, 2].item(),
145 |                 'base_vel_x': env.base_lin_vel[robot_index, 0].item(),
146 |                 'base_vel_y': env.base_lin_vel[robot_index, 1].item(),
147 |                 'base_vel_z': env.base_lin_vel[robot_index, 2].item(),
148 |                 'base_vel_yaw': env.base_ang_vel[robot_index, 2].item(),
149 |                 'contact_forces_z': env.contact_forces[robot_index, env.feet_indices, 2].cpu().numpy()
150 |             }
151 |             )
152 |         # ====================== Log states ======================
153 |         if infos["episode"]:
154 |             num_episodes = torch.sum(env.reset_buf).item()
155 |             if num_episodes>0:
156 |                 logger.log_rewards(infos["episode"], num_episodes)
157 | 
158 |     logger.print_rewards()
159 |     logger.plot_states()
160 |     
161 |     if RENDER:
162 |         video.release()
163 | 
164 | if __name__ == '__main__':
165 |     EXPORT_POLICY = True
166 |     RENDER = True
167 |     FIX_COMMAND = True
168 |     args = get_args()
169 |     play(args)
170 | 


--------------------------------------------------------------------------------
/humanoid/scripts/sim2sim.py:
--------------------------------------------------------------------------------
  1 | # SPDX-License-Identifier: BSD-3-Clause
  2 | # 
  3 | # Redistribution and use in source and binary forms, with or without
  4 | # modification, are permitted provided that the following conditions are met:
  5 | #
  6 | # 1. Redistributions of source code must retain the above copyright notice, this
  7 | # list of conditions and the following disclaimer.
  8 | #
  9 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 10 | # this list of conditions and the following disclaimer in the documentation
 11 | # and/or other materials provided with the distribution.
 12 | #
 13 | # 3. Neither the name of the copyright holder nor the names of its
 14 | # contributors may be used to endorse or promote products derived from
 15 | # this software without specific prior written permission.
 16 | #
 17 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 18 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 19 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 20 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 21 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 22 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 23 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 24 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 25 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | #
 28 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
 29 | 
 30 | 
 31 | import math
 32 | import numpy as np
 33 | import mujoco, mujoco_viewer
 34 | from tqdm import tqdm
 35 | from collections import deque
 36 | from scipy.spatial.transform import Rotation as R
 37 | from humanoid import LEGGED_GYM_ROOT_DIR
 38 | from humanoid.envs import XBotLCfg
 39 | import torch
 40 | 
 41 | 
 42 | class cmd:
 43 |     vx = 0.4
 44 |     vy = 0.0
 45 |     dyaw = 0.0
 46 | 
 47 | 
 48 | def quaternion_to_euler_array(quat):
 49 |     # Ensure quaternion is in the correct format [x, y, z, w]
 50 |     x, y, z, w = quat
 51 |     
 52 |     # Roll (x-axis rotation)
 53 |     t0 = +2.0 * (w * x + y * z)
 54 |     t1 = +1.0 - 2.0 * (x * x + y * y)
 55 |     roll_x = np.arctan2(t0, t1)
 56 |     
 57 |     # Pitch (y-axis rotation)
 58 |     t2 = +2.0 * (w * y - z * x)
 59 |     t2 = np.clip(t2, -1.0, 1.0)
 60 |     pitch_y = np.arcsin(t2)
 61 |     
 62 |     # Yaw (z-axis rotation)
 63 |     t3 = +2.0 * (w * z + x * y)
 64 |     t4 = +1.0 - 2.0 * (y * y + z * z)
 65 |     yaw_z = np.arctan2(t3, t4)
 66 |     
 67 |     # Returns roll, pitch, yaw in a NumPy array in radians
 68 |     return np.array([roll_x, pitch_y, yaw_z])
 69 | 
 70 | def get_obs(data):
 71 |     '''Extracts an observation from the mujoco data structure
 72 |     '''
 73 |     q = data.qpos.astype(np.double)
 74 |     dq = data.qvel.astype(np.double)
 75 |     quat = data.sensor('orientation').data[[1, 2, 3, 0]].astype(np.double)
 76 |     r = R.from_quat(quat)
 77 |     v = r.apply(data.qvel[:3], inverse=True).astype(np.double)  # In the base frame
 78 |     omega = data.sensor('angular-velocity').data.astype(np.double)
 79 |     gvec = r.apply(np.array([0., 0., -1.]), inverse=True).astype(np.double)
 80 |     return (q, dq, quat, v, omega, gvec)
 81 | 
 82 | def pd_control(target_q, q, kp, target_dq, dq, kd):
 83 |     '''Calculates torques from position commands
 84 |     '''
 85 |     return (target_q - q) * kp + (target_dq - dq) * kd
 86 | 
 87 | def run_mujoco(policy, cfg):
 88 |     """
 89 |     Run the Mujoco simulation using the provided policy and configuration.
 90 | 
 91 |     Args:
 92 |         policy: The policy used for controlling the simulation.
 93 |         cfg: The configuration object containing simulation settings.
 94 | 
 95 |     Returns:
 96 |         None
 97 |     """
 98 |     model = mujoco.MjModel.from_xml_path(cfg.sim_config.mujoco_model_path)
 99 |     model.opt.timestep = cfg.sim_config.dt
100 |     data = mujoco.MjData(model)
101 |     mujoco.mj_step(model, data)
102 |     viewer = mujoco_viewer.MujocoViewer(model, data)
103 | 
104 |     target_q = np.zeros((cfg.env.num_actions), dtype=np.double)
105 |     action = np.zeros((cfg.env.num_actions), dtype=np.double)
106 | 
107 |     hist_obs = deque()
108 |     for _ in range(cfg.env.frame_stack):
109 |         hist_obs.append(np.zeros([1, cfg.env.num_single_obs], dtype=np.double))
110 | 
111 |     count_lowlevel = 0
112 | 
113 | 
114 |     for _ in tqdm(range(int(cfg.sim_config.sim_duration / cfg.sim_config.dt)), desc="Simulating..."):
115 | 
116 |         # Obtain an observation
117 |         q, dq, quat, v, omega, gvec = get_obs(data)
118 |         q = q[-cfg.env.num_actions:]
119 |         dq = dq[-cfg.env.num_actions:]
120 | 
121 |         # 1000hz -> 100hz
122 |         if count_lowlevel % cfg.sim_config.decimation == 0:
123 | 
124 |             obs = np.zeros([1, cfg.env.num_single_obs], dtype=np.float32)
125 |             eu_ang = quaternion_to_euler_array(quat)
126 |             eu_ang[eu_ang > math.pi] -= 2 * math.pi
127 | 
128 |             obs[0, 0] = math.sin(2 * math.pi * count_lowlevel * cfg.sim_config.dt  / 0.64)
129 |             obs[0, 1] = math.cos(2 * math.pi * count_lowlevel * cfg.sim_config.dt  / 0.64)
130 |             obs[0, 2] = cmd.vx * cfg.normalization.obs_scales.lin_vel
131 |             obs[0, 3] = cmd.vy * cfg.normalization.obs_scales.lin_vel
132 |             obs[0, 4] = cmd.dyaw * cfg.normalization.obs_scales.ang_vel
133 |             obs[0, 5:17] = q * cfg.normalization.obs_scales.dof_pos
134 |             obs[0, 17:29] = dq * cfg.normalization.obs_scales.dof_vel
135 |             obs[0, 29:41] = action
136 |             obs[0, 41:44] = omega
137 |             obs[0, 44:47] = eu_ang
138 | 
139 |             obs = np.clip(obs, -cfg.normalization.clip_observations, cfg.normalization.clip_observations)
140 | 
141 |             hist_obs.append(obs)
142 |             hist_obs.popleft()
143 | 
144 |             policy_input = np.zeros([1, cfg.env.num_observations], dtype=np.float32)
145 |             for i in range(cfg.env.frame_stack):
146 |                 policy_input[0, i * cfg.env.num_single_obs : (i + 1) * cfg.env.num_single_obs] = hist_obs[i][0, :]
147 |             action[:] = policy(torch.tensor(policy_input))[0].detach().numpy()
148 |             action = np.clip(action, -cfg.normalization.clip_actions, cfg.normalization.clip_actions)
149 | 
150 |             target_q = action * cfg.control.action_scale
151 | 
152 | 
153 |         target_dq = np.zeros((cfg.env.num_actions), dtype=np.double)
154 |         # Generate PD control
155 |         tau = pd_control(target_q, q, cfg.robot_config.kps,
156 |                         target_dq, dq, cfg.robot_config.kds)  # Calc torques
157 |         tau = np.clip(tau, -cfg.robot_config.tau_limit, cfg.robot_config.tau_limit)  # Clamp torques
158 |         data.ctrl = tau
159 | 
160 |         mujoco.mj_step(model, data)
161 |         viewer.render()
162 |         count_lowlevel += 1
163 | 
164 |     viewer.close()
165 | 
166 | 
167 | if __name__ == '__main__':
168 |     import argparse
169 | 
170 |     parser = argparse.ArgumentParser(description='Deployment script.')
171 |     parser.add_argument('--load_model', type=str, required=True,
172 |                         help='Run to load from.')
173 |     parser.add_argument('--terrain', action='store_true', help='terrain or plane')
174 |     args = parser.parse_args()
175 | 
176 |     class Sim2simCfg(XBotLCfg):
177 | 
178 |         class sim_config:
179 |             if args.terrain:
180 |                 mujoco_model_path = f'{LEGGED_GYM_ROOT_DIR}/resources/robots/XBot/mjcf/XBot-L-terrain.xml'
181 |             else:
182 |                 mujoco_model_path = f'{LEGGED_GYM_ROOT_DIR}/resources/robots/XBot/mjcf/XBot-L.xml'
183 |             sim_duration = 60.0
184 |             dt = 0.001
185 |             decimation = 10
186 | 
187 |         class robot_config:
188 |             kps = np.array([200, 200, 350, 350, 15, 15, 200, 200, 350, 350, 15, 15], dtype=np.double)
189 |             kds = np.array([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10], dtype=np.double)
190 |             tau_limit = 200. * np.ones(12, dtype=np.double)
191 | 
192 |     policy = torch.jit.load(args.load_model)
193 |     run_mujoco(policy, Sim2simCfg())
194 | 


--------------------------------------------------------------------------------
/humanoid/scripts/train.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
 3 | # SPDX-License-Identifier: BSD-3-Clause
 4 | # 
 5 | # Redistribution and use in source and binary forms, with or without
 6 | # modification, are permitted provided that the following conditions are met:
 7 | #
 8 | # 1. Redistributions of source code must retain the above copyright notice, this
 9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
31 | 
32 | 
33 | from humanoid.envs import *
34 | from humanoid.utils import get_args, task_registry
35 | 
36 | def train(args):
37 |     env, env_cfg = task_registry.make_env(name=args.task, args=args)
38 |     ppo_runner, train_cfg = task_registry.make_alg_runner(env=env, name=args.task, args=args)
39 |     ppo_runner.learn(num_learning_iterations=train_cfg.runner.max_iterations, init_at_random_ep_len=True)
40 | 
41 | if __name__ == '__main__':
42 |     args = get_args()
43 |     train(args)
44 | 


--------------------------------------------------------------------------------
/humanoid/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
 3 | # SPDX-License-Identifier: BSD-3-Clause
 4 | # 
 5 | # Redistribution and use in source and binary forms, with or without
 6 | # modification, are permitted provided that the following conditions are met:
 7 | #
 8 | # 1. Redistributions of source code must retain the above copyright notice, this
 9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
31 | 
32 | 
33 | from .helpers import class_to_dict, get_load_path, get_args, export_policy_as_jit, set_seed, update_class_from_dict
34 | from .task_registry import task_registry
35 | from .logger import Logger
36 | from .math import *
37 | from .terrain import Terrain


--------------------------------------------------------------------------------
/humanoid/utils/calculate_gait.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
  3 | # SPDX-License-Identifier: BSD-3-Clause
  4 | # 
  5 | # Redistribution and use in source and binary forms, with or without
  6 | # modification, are permitted provided that the following conditions are met:
  7 | #
  8 | # 1. Redistributions of source code must retain the above copyright notice, this
  9 | # list of conditions and the following disclaimer.
 10 | #
 11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 12 | # this list of conditions and the following disclaimer in the documentation
 13 | # and/or other materials provided with the distribution.
 14 | #
 15 | # 3. Neither the name of the copyright holder nor the names of its
 16 | # contributors may be used to endorse or promote products derived from
 17 | # this software without specific prior written permission.
 18 | #
 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 | #
 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
 31 | 
 32 | import numpy as np
 33 | from scipy.optimize import fsolve
 34 | import matplotlib.pyplot as plt
 35 | 
 36 | def get_coefficients(h0, hswing, v0, vswing, hmax, swing_time):
 37 |     
 38 |     def equations(coeffs):
 39 |         a5, a4, a3, a2, a1, a0 = coeffs
 40 |         
 41 |         # Height at t=0 should be h0
 42 |         eq1 = a0 - h0
 43 |         
 44 |         # Height at t=swing_time should be hswing
 45 |         eq2 = a5 * swing_time**5 + a4 * swing_time**4 + a3 * swing_time**3 + a2 * swing_time**2 + a1 * swing_time + a0 - hswing
 46 |         
 47 |         # Velocity at t=0 should be v0
 48 |         eq3 = a1 - v0
 49 |         
 50 |         # Velocity at t=swing_time should be vswing
 51 |         eq4 = 5 * a5 * swing_time**4 + 4 * a4 * swing_time**3 + 3 * a3 * swing_time**2 + 2 * a2 * swing_time + a1 - vswing
 52 |         
 53 |         # Height at t=swing_time/2 should be hmax
 54 |         eq5 = a5 * (swing_time/2)**5 + a4 * (swing_time/2)**4 + a3 * (swing_time/2)**3 + a2 * (swing_time/2)**2 + a1 * (swing_time/2) + a0 - hmax
 55 |         
 56 |         # Return the deviations from the expected values. These will be minimized by fsolve.
 57 |         return (eq1, eq2, eq3, eq4, eq5, a5 + a4 + a3 + a2 + a1 + a0)  
 58 |     
 59 |     # Solve for the coefficients using the equations above
 60 |     return fsolve(equations, (1, 1, 1, 1, 1, 1))
 61 | 
 62 | 
 63 | def plot_curves(coeffs, swing_time):
 64 |     a5, a4, a3, a2, a1, a0 = coeffs
 65 | 
 66 |     def h(t):
 67 |         return a5 * t**5 + a4 * t**4 + a3 * t**3 + a2 * t**2 + a1 * t + a0
 68 | 
 69 |     def v(t):
 70 |         return 5 * a5 * t**4 + 4 * a4 * t**3 + 3 * a3 * t**2 + 2 * a2 * t + a1
 71 | 
 72 |     # Define the acceleration function based on the coefficients
 73 |     def a(t):
 74 |         return 20 * a5 * t**3 + 12 * a4 * t**2 + 6 * a3 * t + 2 * a2
 75 | 
 76 |     t_values = np.linspace(0, swing_time, 500)
 77 |     h_values = h(t_values)
 78 |     v_values = v(t_values)
 79 |     a_values = a(t_values)  # Compute acceleration values
 80 | 
 81 | 
 82 |     discrete_t_values = np.linspace(0, swing_time, 14)
 83 | 
 84 |     plt.figure(figsize=(12, 9))
 85 |     
 86 |     plt.subplot(3, 1, 1)
 87 |     plt.plot(t_values, h_values, label='Height (h(t))')
 88 |     plt.scatter(discrete_t_values, h(discrete_t_values), color='black', label='Discrete Height')
 89 |     plt.title('Height Curve')
 90 |     plt.grid(True)
 91 |     plt.legend()
 92 | 
 93 |     plt.subplot(3, 1, 2)
 94 |     plt.plot(t_values, v_values, label='Velocity (v(t))', color='red')
 95 |     plt.scatter(discrete_t_values, v(discrete_t_values), color='black', label='Discrete Velocity')
 96 |     # print(v(discrete_t_values))
 97 |     plt.title('Velocity Curve')
 98 |     plt.grid(True)
 99 |     plt.legend()
100 | 
101 |     # Plotting the acceleration curve
102 |     plt.subplot(3, 1, 3)
103 |     plt.plot(t_values, a_values/50, label='Acceleration (a(t))', color='green')
104 |     plt.scatter(discrete_t_values, a(discrete_t_values)/50, color='black', label='Discrete Acceleration')
105 |     # print(a(discrete_t_values)/50)
106 |     plt.title('Acceleration Curve')
107 |     plt.grid(True)
108 |     plt.legend()
109 | 
110 |     plt.tight_layout()
111 |     plt.show()
112 | 
113 | 
114 | # Set the constraints and swing time
115 | coeffs = get_coefficients(0, 0, 0, -0.1, 0.04, 0.26)
116 | 
117 | print("Coefficients (a5, a4, a3, a2, a1, a0):")
118 | print(f"a5 = {coeffs[0]:.15f}")
119 | print(f"a4 = {coeffs[1]:.15f}")
120 | print(f"a3 = {coeffs[2]:.15f}")
121 | print(f"a2 = {coeffs[3]:.15f}")
122 | print(f"a1 = {coeffs[4]:.15f}")
123 | print(f"a0 = {coeffs[5]:.15f}")
124 | 
125 | plot_curves(coeffs, 0.26)


--------------------------------------------------------------------------------
/humanoid/utils/helpers.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
  3 | # SPDX-License-Identifier: BSD-3-Clause
  4 | # 
  5 | # Redistribution and use in source and binary forms, with or without
  6 | # modification, are permitted provided that the following conditions are met:
  7 | #
  8 | # 1. Redistributions of source code must retain the above copyright notice, this
  9 | # list of conditions and the following disclaimer.
 10 | #
 11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 12 | # this list of conditions and the following disclaimer in the documentation
 13 | # and/or other materials provided with the distribution.
 14 | #
 15 | # 3. Neither the name of the copyright holder nor the names of its
 16 | # contributors may be used to endorse or promote products derived from
 17 | # this software without specific prior written permission.
 18 | #
 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 | #
 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
 31 | 
 32 | import datetime
 33 | import os
 34 | import copy
 35 | import torch
 36 | import numpy as np
 37 | import random
 38 | from isaacgym import gymapi
 39 | from isaacgym import gymutil
 40 | 
 41 | from humanoid import LEGGED_GYM_ROOT_DIR, LEGGED_GYM_ENVS_DIR
 42 | 
 43 | 
 44 | def class_to_dict(obj) -> dict:
 45 |     if not hasattr(obj, "__dict__"):
 46 |         return obj
 47 |     result = {}
 48 |     for key in dir(obj):
 49 |         if key.startswith("_"):
 50 |             continue
 51 |         element = []
 52 |         val = getattr(obj, key)
 53 |         if isinstance(val, list):
 54 |             for item in val:
 55 |                 element.append(class_to_dict(item))
 56 |         else:
 57 |             element = class_to_dict(val)
 58 |         result[key] = element
 59 |     return result
 60 | 
 61 | 
 62 | def update_class_from_dict(obj, dict):
 63 |     for key, val in dict.items():
 64 |         attr = getattr(obj, key, None)
 65 |         if isinstance(attr, type):
 66 |             update_class_from_dict(attr, val)
 67 |         else:
 68 |             setattr(obj, key, val)
 69 |     return
 70 | 
 71 | 
 72 | def set_seed(seed):
 73 |     if seed == -1:
 74 |         seed = np.random.randint(0, 10000)
 75 |     print("Setting seed: {}".format(seed))
 76 | 
 77 |     random.seed(seed)
 78 |     np.random.seed(seed)
 79 |     torch.manual_seed(seed)
 80 |     os.environ["PYTHONHASHSEED"] = str(seed)
 81 |     torch.cuda.manual_seed(seed)
 82 |     torch.cuda.manual_seed_all(seed)
 83 | 
 84 | 
 85 | def parse_sim_params(args, cfg):
 86 |     # code from Isaac Gym Preview 2
 87 |     # initialize sim params
 88 |     sim_params = gymapi.SimParams()
 89 | 
 90 |     # set some values from args
 91 |     if args.physics_engine == gymapi.SIM_FLEX:
 92 |         if args.device != "cpu":
 93 |             print("WARNING: Using Flex with GPU instead of PHYSX!")
 94 |     elif args.physics_engine == gymapi.SIM_PHYSX:
 95 |         sim_params.physx.use_gpu = args.use_gpu
 96 |         sim_params.physx.num_subscenes = args.subscenes
 97 |     sim_params.use_gpu_pipeline = args.use_gpu_pipeline
 98 | 
 99 |     # if sim options are provided in cfg, parse them and update/override above:
100 |     if "sim" in cfg:
101 |         gymutil.parse_sim_config(cfg["sim"], sim_params)
102 | 
103 |     # Override num_threads if passed on the command line
104 |     if args.physics_engine == gymapi.SIM_PHYSX and args.num_threads > 0:
105 |         sim_params.physx.num_threads = args.num_threads
106 | 
107 |     return sim_params
108 | 
109 | 
110 | def get_load_path(root, load_run=-1, checkpoint=-1):
111 |     def month_to_number(month):
112 |         return datetime.datetime.strptime(month, "%b").month
113 | 
114 |     try:
115 |         runs = os.listdir(root)
116 |         try:
117 |             runs.sort(key=lambda x: (month_to_number(x[:3]), int(x[3:5]), x[6:]))
118 |         except ValueError as e:
119 |             print("WARNING - Could not sort runs by month: " + str(e))
120 |             runs.sort()
121 |         if "exported" in runs:
122 |             runs.remove("exported")
123 |         last_run = os.path.join(root, runs[-1])
124 |     except:
125 |         raise ValueError("No runs in this directory: " + root)
126 |     if load_run == -1:
127 |         load_run = last_run
128 |     else:
129 |         load_run = os.path.join(root, load_run)
130 |     if checkpoint == -1:
131 |         models = [file for file in os.listdir(load_run) if "model" in file]
132 |         models.sort(key=lambda m: "{0:0>15}".format(m))
133 |         model = models[-1]
134 |     else:
135 |         model = "model_{}.pt".format(checkpoint)
136 | 
137 |     load_path = os.path.join(load_run, model)
138 |     return load_path
139 | 
140 | 
141 | def update_cfg_from_args(env_cfg, cfg_train, args):
142 |     # seed
143 |     if env_cfg is not None:
144 |         # num envs
145 |         if args.num_envs is not None:
146 |             env_cfg.env.num_envs = args.num_envs
147 |     if cfg_train is not None:
148 |         if args.seed is not None:
149 |             cfg_train.seed = args.seed
150 |         # alg runner parameters
151 |         if args.max_iterations is not None:
152 |             cfg_train.runner.max_iterations = args.max_iterations
153 |         if args.resume:
154 |             cfg_train.runner.resume = args.resume
155 |         if args.experiment_name is not None:
156 |             cfg_train.runner.experiment_name = args.experiment_name
157 |         if args.run_name is not None:
158 |             cfg_train.runner.run_name = args.run_name
159 |         if args.load_run is not None:
160 |             cfg_train.runner.load_run = args.load_run
161 |         if args.checkpoint is not None:
162 |             cfg_train.runner.checkpoint = args.checkpoint
163 | 
164 |     return env_cfg, cfg_train
165 | 
166 | 
167 | def get_args():
168 |     custom_parameters = [
169 |         {
170 |             "name": "--task",
171 |             "type": str,
172 |             "default": "XBotL_free",
173 |             "help": "Resume training or start testing from a checkpoint. Overrides config file if provided.",
174 |         },
175 |         {
176 |             "name": "--resume",
177 |             "action": "store_true",
178 |             "default": False,
179 |             "help": "Resume training from a checkpoint",
180 |         },
181 |         {
182 |             "name": "--experiment_name",
183 |             "type": str,
184 |             "help": "Name of the experiment to run or load. Overrides config file if provided.",
185 |         },
186 |         {
187 |             "name": "--run_name",
188 |             "type": str,
189 |             "help": "Name of the run. Overrides config file if provided.",
190 |         },
191 |         {
192 |             "name": "--load_run",
193 |             "type": str,
194 |             "help": "Name of the run to load when resume=True. If -1: will load the last run. Overrides config file if provided.",
195 |         },
196 |         {
197 |             "name": "--checkpoint",
198 |             "type": int,
199 |             "help": "Saved model checkpoint number. If -1: will load the last checkpoint. Overrides config file if provided.",
200 |         },
201 |         {
202 |             "name": "--headless",
203 |             "action": "store_true",
204 |             "default": False,
205 |             "help": "Force display off at all times",
206 |         },
207 |         {
208 |             "name": "--horovod",
209 |             "action": "store_true",
210 |             "default": False,
211 |             "help": "Use horovod for multi-gpu training",
212 |         },
213 |         {
214 |             "name": "--rl_device",
215 |             "type": str,
216 |             "default": "cuda:0",
217 |             "help": "Device used by the RL algorithm, (cpu, gpu, cuda:0, cuda:1 etc..)",
218 |         },
219 |         {
220 |             "name": "--num_envs",
221 |             "type": int,
222 |             "help": "Number of environments to create. Overrides config file if provided.",
223 |         },
224 |         {
225 |             "name": "--seed",
226 |             "type": int,
227 |             "help": "Random seed. Overrides config file if provided.",
228 |         },
229 |         {
230 |             "name": "--max_iterations",
231 |             "type": int,
232 |             "help": "Maximum number of training iterations. Overrides config file if provided.",
233 |         },
234 |     ]
235 |     # parse arguments
236 |     args = gymutil.parse_arguments(
237 |         description="RL Policy", custom_parameters=custom_parameters
238 |     )
239 | 
240 |     # name allignment
241 |     args.sim_device_id = args.compute_device_id
242 |     args.sim_device = args.sim_device_type
243 |     if args.sim_device == "cuda":
244 |         args.sim_device += f":{args.sim_device_id}"
245 |     return args
246 | 
247 | 
248 | def export_policy_as_jit(actor_critic, path):
249 |     os.makedirs(path, exist_ok=True)
250 |     path = os.path.join(path, "policy_1.pt")
251 |     model = copy.deepcopy(actor_critic.actor).to("cpu")
252 |     traced_script_module = torch.jit.script(model)
253 |     traced_script_module.save(path)
254 | 


--------------------------------------------------------------------------------
/humanoid/utils/logger.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
  3 | # SPDX-License-Identifier: BSD-3-Clause
  4 | # 
  5 | # Redistribution and use in source and binary forms, with or without
  6 | # modification, are permitted provided that the following conditions are met:
  7 | #
  8 | # 1. Redistributions of source code must retain the above copyright notice, this
  9 | # list of conditions and the following disclaimer.
 10 | #
 11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 12 | # this list of conditions and the following disclaimer in the documentation
 13 | # and/or other materials provided with the distribution.
 14 | #
 15 | # 3. Neither the name of the copyright holder nor the names of its
 16 | # contributors may be used to endorse or promote products derived from
 17 | # this software without specific prior written permission.
 18 | #
 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 | #
 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
 31 | 
 32 | import matplotlib.pyplot as plt
 33 | import numpy as np
 34 | from collections import defaultdict
 35 | from multiprocessing import Process, Value
 36 | 
 37 | class Logger:
 38 |     def __init__(self, dt):
 39 |         self.state_log = defaultdict(list)
 40 |         self.rew_log = defaultdict(list)
 41 |         self.dt = dt
 42 |         self.num_episodes = 0
 43 |         self.plot_process = None
 44 | 
 45 |     def log_state(self, key, value):
 46 |         self.state_log[key].append(value)
 47 | 
 48 |     def log_states(self, dict):
 49 |         for key, value in dict.items():
 50 |             self.log_state(key, value)
 51 | 
 52 |     def log_rewards(self, dict, num_episodes):
 53 |         for key, value in dict.items():
 54 |             if 'rew' in key:
 55 |                 self.rew_log[key].append(value.item() * num_episodes)
 56 |         self.num_episodes += num_episodes
 57 | 
 58 |     def reset(self):
 59 |         self.state_log.clear()
 60 |         self.rew_log.clear()
 61 | 
 62 |     def plot_states(self):
 63 |         self.plot_process = Process(target=self._plot)
 64 |         self.plot_process.start()
 65 | 
 66 |     def _plot(self):
 67 |         nb_rows = 3
 68 |         nb_cols = 3
 69 |         fig, axs = plt.subplots(nb_rows, nb_cols)
 70 |         for key, value in self.state_log.items():
 71 |             time = np.linspace(0, len(value)*self.dt, len(value))
 72 |             break
 73 |         log= self.state_log
 74 |         # plot joint targets and measured positions
 75 |         a = axs[1, 0]
 76 |         if log["dof_pos"]: a.plot(time, log["dof_pos"], label='measured')
 77 |         if log["dof_pos_target"]: a.plot(time, log["dof_pos_target"], label='target')
 78 |         a.set(xlabel='time [s]', ylabel='Position [rad]', title='DOF Position')
 79 |         a.legend()
 80 |         # plot joint velocity
 81 |         a = axs[1, 1]
 82 |         if log["dof_vel"]: a.plot(time, log["dof_vel"], label='measured')
 83 |         if log["dof_vel_target"]: a.plot(time, log["dof_vel_target"], label='target')
 84 |         a.set(xlabel='time [s]', ylabel='Velocity [rad/s]', title='Joint Velocity')
 85 |         a.legend()
 86 |         # plot base vel x
 87 |         a = axs[0, 0]
 88 |         if log["base_vel_x"]: a.plot(time, log["base_vel_x"], label='measured')
 89 |         if log["command_x"]: a.plot(time, log["command_x"], label='commanded')
 90 |         a.set(xlabel='time [s]', ylabel='base lin vel [m/s]', title='Base velocity x')
 91 |         a.legend()
 92 |         # plot base vel y
 93 |         a = axs[0, 1]
 94 |         if log["base_vel_y"]: a.plot(time, log["base_vel_y"], label='measured')
 95 |         if log["command_y"]: a.plot(time, log["command_y"], label='commanded')
 96 |         a.set(xlabel='time [s]', ylabel='base lin vel [m/s]', title='Base velocity y')
 97 |         a.legend()
 98 |         # plot base vel yaw
 99 |         a = axs[0, 2]
100 |         if log["base_vel_yaw"]: a.plot(time, log["base_vel_yaw"], label='measured')
101 |         if log["command_yaw"]: a.plot(time, log["command_yaw"], label='commanded')
102 |         a.set(xlabel='time [s]', ylabel='base ang vel [rad/s]', title='Base velocity yaw')
103 |         a.legend()
104 |         # plot base vel z
105 |         a = axs[1, 2]
106 |         if log["base_vel_z"]: a.plot(time, log["base_vel_z"], label='measured')
107 |         a.set(xlabel='time [s]', ylabel='base lin vel [m/s]', title='Base velocity z')
108 |         a.legend()
109 |         # plot contact forces
110 |         a = axs[2, 0]
111 |         if log["contact_forces_z"]:
112 |             forces = np.array(log["contact_forces_z"])
113 |             for i in range(forces.shape[1]):
114 |                 a.plot(time, forces[:, i], label=f'force {i}')
115 |         a.set(xlabel='time [s]', ylabel='Forces z [N]', title='Vertical Contact forces')
116 |         a.legend()
117 |         # plot torque/vel curves
118 |         a = axs[2, 1]
119 |         if log["dof_vel"]!=[] and log["dof_torque"]!=[]: a.plot(log["dof_vel"], log["dof_torque"], 'x', label='measured')
120 |         a.set(xlabel='Joint vel [rad/s]', ylabel='Joint Torque [Nm]', title='Torque/velocity curves')
121 |         a.legend()
122 |         # plot torques
123 |         a = axs[2, 2]
124 |         if log["dof_torque"]!=[]: a.plot(time, log["dof_torque"], label='measured')
125 |         a.set(xlabel='time [s]', ylabel='Joint Torque [Nm]', title='Torque')
126 |         a.legend()
127 |         plt.show()
128 | 
129 |     def print_rewards(self):
130 |         print("Average rewards per second:")
131 |         for key, values in self.rew_log.items():
132 |             mean = np.sum(np.array(values)) / self.num_episodes
133 |             print(f" - {key}: {mean}")
134 |         print(f"Total number of episodes: {self.num_episodes}")
135 |     
136 |     def __del__(self):
137 |         if self.plot_process is not None:
138 |             self.plot_process.kill()


--------------------------------------------------------------------------------
/humanoid/utils/math.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
 3 | # SPDX-License-Identifier: BSD-3-Clause
 4 | # 
 5 | # Redistribution and use in source and binary forms, with or without
 6 | # modification, are permitted provided that the following conditions are met:
 7 | #
 8 | # 1. Redistributions of source code must retain the above copyright notice, this
 9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
31 | 
32 | import torch
33 | from torch import Tensor
34 | import numpy as np
35 | from isaacgym.torch_utils import quat_apply, normalize
36 | from typing import Tuple
37 | 
38 | # @ torch.jit.script
39 | def quat_apply_yaw(quat, vec):
40 |     quat_yaw = quat.clone().view(-1, 4)
41 |     quat_yaw[:, :2] = 0.
42 |     quat_yaw = normalize(quat_yaw)
43 |     return quat_apply(quat_yaw, vec)
44 | 
45 | # @ torch.jit.script
46 | def wrap_to_pi(angles):
47 |     angles %= 2*np.pi
48 |     angles -= 2*np.pi * (angles > np.pi)
49 |     return angles
50 | 
51 | # @ torch.jit.script
52 | def torch_rand_sqrt_float(lower, upper, shape, device):
53 |     # type: (float, float, Tuple[int, int], str) -> Tensor
54 |     r = 2*torch.rand(*shape, device=device) - 1
55 |     r = torch.where(r<0., -torch.sqrt(-r), torch.sqrt(r))
56 |     r =  (r + 1.) / 2.
57 |     return (upper - lower) * r + lower


--------------------------------------------------------------------------------
/humanoid/utils/task_registry.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
  3 | # SPDX-License-Identifier: BSD-3-Clause
  4 | # 
  5 | # Redistribution and use in source and binary forms, with or without
  6 | # modification, are permitted provided that the following conditions are met:
  7 | #
  8 | # 1. Redistributions of source code must retain the above copyright notice, this
  9 | # list of conditions and the following disclaimer.
 10 | #
 11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 12 | # this list of conditions and the following disclaimer in the documentation
 13 | # and/or other materials provided with the distribution.
 14 | #
 15 | # 3. Neither the name of the copyright holder nor the names of its
 16 | # contributors may be used to endorse or promote products derived from
 17 | # this software without specific prior written permission.
 18 | #
 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 | #
 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
 31 | 
 32 | 
 33 | import os
 34 | from typing import Tuple
 35 | from datetime import datetime
 36 | 
 37 | from humanoid.algo import VecEnv
 38 | from humanoid.algo import OnPolicyRunner
 39 | 
 40 | from humanoid import LEGGED_GYM_ROOT_DIR, LEGGED_GYM_ENVS_DIR
 41 | from .helpers import get_args, update_cfg_from_args, class_to_dict, get_load_path, set_seed, parse_sim_params
 42 | from humanoid.envs.base.legged_robot_config import LeggedRobotCfg, LeggedRobotCfgPPO
 43 | 
 44 | class TaskRegistry():
 45 |     def __init__(self):
 46 |         self.task_classes = {}
 47 |         self.env_cfgs = {}
 48 |         self.train_cfgs = {}
 49 |     
 50 |     def register(self, name: str, task_class: VecEnv, env_cfg: LeggedRobotCfg, train_cfg: LeggedRobotCfgPPO):
 51 |         self.task_classes[name] = task_class
 52 |         self.env_cfgs[name] = env_cfg
 53 |         self.train_cfgs[name] = train_cfg
 54 |     
 55 |     def get_task_class(self, name: str) -> VecEnv:
 56 |         return self.task_classes[name]
 57 |     
 58 |     def get_cfgs(self, name) -> Tuple[LeggedRobotCfg, LeggedRobotCfgPPO]:
 59 |         train_cfg = self.train_cfgs[name]
 60 |         env_cfg = self.env_cfgs[name]
 61 |         # copy seed
 62 |         env_cfg.seed = train_cfg.seed
 63 |         return env_cfg, train_cfg
 64 |     
 65 |     def make_env(self, name, args=None, env_cfg=None) -> Tuple[VecEnv, LeggedRobotCfg]:
 66 |         """ Creates an environment either from a registered namme or from the provided config file.
 67 | 
 68 |         Args:
 69 |             name (string): Name of a registered env.
 70 |             args (Args, optional): Isaac Gym comand line arguments. If None get_args() will be called. Defaults to None.
 71 |             env_cfg (Dict, optional): Environment config file used to override the registered config. Defaults to None.
 72 | 
 73 |         Raises:
 74 |             ValueError: Error if no registered env corresponds to 'name' 
 75 | 
 76 |         Returns:
 77 |             isaacgym.VecTaskPython: The created environment
 78 |             Dict: the corresponding config file
 79 |         """
 80 |         # if no args passed get command line arguments
 81 |         if args is None:
 82 |             args = get_args()
 83 |         # check if there is a registered env with that name
 84 |         if name in self.task_classes:
 85 |             task_class = self.get_task_class(name)
 86 |         else:
 87 |             raise ValueError(f"Task with name: {name} was not registered")
 88 |         if env_cfg is None:
 89 |             # load config files
 90 |             env_cfg, _ = self.get_cfgs(name)
 91 |         # override cfg from args (if specified)
 92 |         env_cfg, _ = update_cfg_from_args(env_cfg, None, args)
 93 |         set_seed(env_cfg.seed)
 94 |         # parse sim params (convert to dict first)
 95 |         sim_params = {"sim": class_to_dict(env_cfg.sim)}
 96 |         sim_params = parse_sim_params(args, sim_params)
 97 |         env = task_class(   cfg=env_cfg,
 98 |                             sim_params=sim_params,
 99 |                             physics_engine=args.physics_engine,
100 |                             sim_device=args.sim_device,
101 |                             headless=args.headless)
102 |         self.env_cfg_for_wandb = env_cfg
103 |         return env, env_cfg
104 | 
105 |     def make_alg_runner(self, env, name=None, args=None, train_cfg=None, log_root="default") -> Tuple[OnPolicyRunner, LeggedRobotCfgPPO]:
106 |         """ Creates the training algorithm  either from a registered namme or from the provided config file.
107 | 
108 |         Args:
109 |             env (isaacgym.VecTaskPython): The environment to train (TODO: remove from within the algorithm)
110 |             name (string, optional): Name of a registered env. If None, the config file will be used instead. Defaults to None.
111 |             args (Args, optional): Isaac Gym comand line arguments. If None get_args() will be called. Defaults to None.
112 |             train_cfg (Dict, optional): Training config file. If None 'name' will be used to get the config file. Defaults to None.
113 |             log_root (str, optional): Logging directory for Tensorboard. Set to 'None' to avoid logging (at test time for example). 
114 |                                       Logs will be saved in <log_root>/<date_time>_<run_name>. Defaults to "default"=<path_to_LEGGED_GYM>/logs/<experiment_name>.
115 | 
116 |         Raises:
117 |             ValueError: Error if neither 'name' or 'train_cfg' are provided
118 |             Warning: If both 'name' or 'train_cfg' are provided 'name' is ignored
119 | 
120 |         Returns:
121 |             PPO: The created algorithm
122 |             Dict: the corresponding config file
123 |         """
124 |         # if no args passed get command line arguments
125 |         if args is None:
126 |             args = get_args()
127 |         # if config files are passed use them, otherwise load from the name
128 |         if train_cfg is None:
129 |             if name is None:
130 |                 raise ValueError("Either 'name' or 'train_cfg' must be not None")
131 |             # load config files
132 |             _, train_cfg = self.get_cfgs(name)
133 |         else:
134 |             if name is not None:
135 |                 print(f"'train_cfg' provided -> Ignoring 'name={name}'")
136 |         # override cfg from args (if specified)
137 |         _, train_cfg = update_cfg_from_args(None, train_cfg, args)
138 | 
139 |         if log_root=="default":
140 |             log_root = os.path.join(LEGGED_GYM_ROOT_DIR, 'logs', train_cfg.runner.experiment_name)
141 |             log_dir = os.path.join(log_root, datetime.now().strftime('%b%d_%H-%M-%S') + '_' + train_cfg.runner.run_name)
142 |         elif log_root is None:
143 |             log_dir = None
144 |         else:
145 |             log_dir = os.path.join(log_root, datetime.now().strftime('%b%d_%H-%M-%S') + '_' + train_cfg.runner.run_name)
146 |         
147 |         train_cfg_dict = class_to_dict(train_cfg)
148 |         env_cfg_dict = class_to_dict(self.env_cfg_for_wandb)
149 |         all_cfg = {**train_cfg_dict, **env_cfg_dict}
150 |         
151 |         runner_class = eval(train_cfg_dict["runner_class_name"])
152 |         runner = runner_class(env, all_cfg, log_dir, device=args.rl_device)
153 |         #save resume path before creating a new log_dir
154 |         resume = train_cfg.runner.resume
155 |         if resume:
156 |             # load previously trained model
157 |             resume_path = get_load_path(log_root, load_run=train_cfg.runner.load_run, checkpoint=train_cfg.runner.checkpoint)
158 |             print(f"Loading model from: {resume_path}")
159 |             runner.load(resume_path, load_optimizer=False)
160 |         return runner, train_cfg
161 | 
162 | # make global task registry
163 | task_registry = TaskRegistry()


--------------------------------------------------------------------------------
/humanoid/utils/terrain.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
  3 | # SPDX-License-Identifier: BSD-3-Clause
  4 | # 
  5 | # Redistribution and use in source and binary forms, with or without
  6 | # modification, are permitted provided that the following conditions are met:
  7 | #
  8 | # 1. Redistributions of source code must retain the above copyright notice, this
  9 | # list of conditions and the following disclaimer.
 10 | #
 11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 12 | # this list of conditions and the following disclaimer in the documentation
 13 | # and/or other materials provided with the distribution.
 14 | #
 15 | # 3. Neither the name of the copyright holder nor the names of its
 16 | # contributors may be used to endorse or promote products derived from
 17 | # this software without specific prior written permission.
 18 | #
 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 | #
 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
 31 | 
 32 | 
 33 | import numpy as np
 34 | 
 35 | from isaacgym import terrain_utils
 36 | from humanoid.envs.base.legged_robot_config import LeggedRobotCfg
 37 | 
 38 | class Terrain:
 39 |     def __init__(self, cfg: LeggedRobotCfg.terrain, num_robots) -> None:
 40 | 
 41 |         self.cfg = cfg
 42 |         self.num_robots = num_robots
 43 |         self.type = cfg.mesh_type
 44 |         if self.type in ["none", 'plane']:
 45 |             return
 46 |         self.env_length = cfg.terrain_length
 47 |         self.env_width = cfg.terrain_width
 48 |         self.proportions = [np.sum(cfg.terrain_proportions[:i+1]) for i in range(len(cfg.terrain_proportions))]
 49 | 
 50 |         self.cfg.num_sub_terrains = cfg.num_rows * cfg.num_cols
 51 |         self.env_origins = np.zeros((cfg.num_rows, cfg.num_cols, 3))
 52 | 
 53 |         self.width_per_env_pixels = int(self.env_width / cfg.horizontal_scale)
 54 |         self.length_per_env_pixels = int(self.env_length / cfg.horizontal_scale)
 55 | 
 56 |         self.border = int(cfg.border_size/self.cfg.horizontal_scale)
 57 |         self.tot_cols = int(cfg.num_cols * self.width_per_env_pixels) + 2 * self.border
 58 |         self.tot_rows = int(cfg.num_rows * self.length_per_env_pixels) + 2 * self.border
 59 | 
 60 |         self.height_field_raw = np.zeros((self.tot_rows , self.tot_cols), dtype=np.int16)
 61 |         if cfg.curriculum:
 62 |             self.curiculum()
 63 |         elif cfg.selected:
 64 |             self.selected_terrain()
 65 |         else:    
 66 |             self.randomized_terrain()   
 67 |         
 68 |         self.heightsamples = self.height_field_raw
 69 |         if self.type=="trimesh":
 70 |             self.vertices, self.triangles = terrain_utils.convert_heightfield_to_trimesh(   self.height_field_raw,
 71 |                                                                                             self.cfg.horizontal_scale,
 72 |                                                                                             self.cfg.vertical_scale,
 73 |                                                                                             self.cfg.slope_treshold)
 74 |     
 75 |     def randomized_terrain(self):
 76 |         for k in range(self.cfg.num_sub_terrains):
 77 |             # Env coordinates in the world
 78 |             (i, j) = np.unravel_index(k, (self.cfg.num_rows, self.cfg.num_cols))
 79 | 
 80 |             choice = np.random.uniform(0, 1)
 81 |             difficulty = np.random.choice([0.5, 0.75, 0.9])
 82 |             terrain = self.make_terrain(choice, difficulty)
 83 |             self.add_terrain_to_map(terrain, i, j)
 84 |         
 85 |     def curiculum(self):
 86 |         for j in range(self.cfg.num_cols):
 87 |             for i in range(self.cfg.num_rows):
 88 |                 difficulty = i / self.cfg.num_rows
 89 |                 choice = j / self.cfg.num_cols + 0.001
 90 | 
 91 |                 terrain = self.make_terrain(choice, difficulty)
 92 |                 self.add_terrain_to_map(terrain, i, j)
 93 | 
 94 |     def selected_terrain(self):
 95 |         terrain_type = self.cfg.terrain_kwargs.pop('type')
 96 |         for k in range(self.cfg.num_sub_terrains):
 97 |             # Env coordinates in the world
 98 |             (i, j) = np.unravel_index(k, (self.cfg.num_rows, self.cfg.num_cols))
 99 | 
100 |             terrain = terrain_utils.SubTerrain("terrain",
101 |                               width=self.width_per_env_pixels,
102 |                               length=self.width_per_env_pixels,
103 |                               vertical_scale=self.vertical_scale,
104 |                               horizontal_scale=self.horizontal_scale)
105 | 
106 |             eval(terrain_type)(terrain, **self.cfg.terrain_kwargs.terrain_kwargs)
107 |             self.add_terrain_to_map(terrain, i, j)
108 |     
109 |     def make_terrain(self, choice, difficulty):
110 |         terrain = terrain_utils.SubTerrain(   "terrain",
111 |                                 width=self.width_per_env_pixels,
112 |                                 length=self.width_per_env_pixels,
113 |                                 vertical_scale=self.cfg.vertical_scale,
114 |                                 horizontal_scale=self.cfg.horizontal_scale)
115 |         slope = difficulty * 0.4
116 |         step_height = 0.05 + 0.18 * difficulty
117 |         discrete_obstacles_height = 0.05 + difficulty * 0.2
118 |         stepping_stones_size = 1.5 * (1.05 - difficulty)
119 |         stone_distance = 0.05 if difficulty==0 else 0.1
120 |         gap_size = 1. * difficulty
121 |         pit_depth = 1. * difficulty
122 |         if choice < self.proportions[0]:
123 |             if choice < self.proportions[0]/ 2:
124 |                 slope *= -1
125 |             terrain_utils.pyramid_sloped_terrain(terrain, slope=slope, platform_size=3.)
126 |         elif choice < self.proportions[1]:
127 |             terrain_utils.pyramid_sloped_terrain(terrain, slope=slope, platform_size=3.)
128 |             terrain_utils.random_uniform_terrain(terrain, min_height=-0.05, max_height=0.05, step=0.005, downsampled_scale=0.2)
129 |         elif choice < self.proportions[3]:
130 |             if choice<self.proportions[2]:
131 |                 step_height *= -1
132 |             terrain_utils.pyramid_stairs_terrain(terrain, step_width=0.31, step_height=step_height, platform_size=3.)
133 |         elif choice < self.proportions[4]:
134 |             num_rectangles = 20
135 |             rectangle_min_size = 1.
136 |             rectangle_max_size = 2.
137 |             terrain_utils.discrete_obstacles_terrain(terrain, discrete_obstacles_height, rectangle_min_size, rectangle_max_size, num_rectangles, platform_size=3.)
138 |         elif choice < self.proportions[5]:
139 |             terrain_utils.stepping_stones_terrain(terrain, stone_size=stepping_stones_size, stone_distance=stone_distance, max_height=0., platform_size=4.)
140 |         elif choice < self.proportions[6]:
141 |             gap_terrain(terrain, gap_size=gap_size, platform_size=3.)
142 |         else:
143 |             pit_terrain(terrain, depth=pit_depth, platform_size=4.)
144 |         
145 |         return terrain
146 | 
147 |     def add_terrain_to_map(self, terrain, row, col):
148 |         i = row
149 |         j = col
150 |         # map coordinate system
151 |         start_x = self.border + i * self.length_per_env_pixels
152 |         end_x = self.border + (i + 1) * self.length_per_env_pixels
153 |         start_y = self.border + j * self.width_per_env_pixels
154 |         end_y = self.border + (j + 1) * self.width_per_env_pixels
155 |         self.height_field_raw[start_x: end_x, start_y:end_y] = terrain.height_field_raw
156 | 
157 |         env_origin_x = (i + 0.5) * self.env_length
158 |         env_origin_y = (j + 0.5) * self.env_width
159 |         x1 = int((self.env_length/2. - 1) / terrain.horizontal_scale)
160 |         x2 = int((self.env_length/2. + 1) / terrain.horizontal_scale)
161 |         y1 = int((self.env_width/2. - 1) / terrain.horizontal_scale)
162 |         y2 = int((self.env_width/2. + 1) / terrain.horizontal_scale)
163 |         env_origin_z = np.max(terrain.height_field_raw[x1:x2, y1:y2])*terrain.vertical_scale
164 |         self.env_origins[i, j] = [env_origin_x, env_origin_y, env_origin_z]
165 | 
166 | def gap_terrain(terrain, gap_size, platform_size=1.):
167 |     gap_size = int(gap_size / terrain.horizontal_scale)
168 |     platform_size = int(platform_size / terrain.horizontal_scale)
169 | 
170 |     center_x = terrain.length // 2
171 |     center_y = terrain.width // 2
172 |     x1 = (terrain.length - platform_size) // 2
173 |     x2 = x1 + gap_size
174 |     y1 = (terrain.width - platform_size) // 2
175 |     y2 = y1 + gap_size
176 |    
177 |     terrain.height_field_raw[center_x-x2 : center_x + x2, center_y-y2 : center_y + y2] = -1000
178 |     terrain.height_field_raw[center_x-x1 : center_x + x1, center_y-y1 : center_y + y1] = 0
179 | 
180 | def pit_terrain(terrain, depth, platform_size=1.):
181 |     depth = int(depth / terrain.vertical_scale)
182 |     platform_size = int(platform_size / terrain.horizontal_scale / 2)
183 |     x1 = terrain.length // 2 - platform_size
184 |     x2 = terrain.length // 2 + platform_size
185 |     y1 = terrain.width // 2 - platform_size
186 |     y2 = terrain.width // 2 + platform_size
187 |     terrain.height_field_raw[x1:x2, y1:y2] = -depth
188 | 
189 | class HumanoidTerrain(Terrain):
190 |     def __init__(self, cfg: LeggedRobotCfg.terrain, num_robots) -> None:
191 |         super().__init__(cfg, num_robots)
192 | 
193 |     def randomized_terrain(self):
194 |         for k in range(self.cfg.num_sub_terrains):
195 |             # Env coordinates in the world
196 |             (i, j) = np.unravel_index(k, (self.cfg.num_rows, self.cfg.num_cols))
197 | 
198 |             choice = np.random.uniform(0, 1)
199 |             difficulty = np.random.uniform(0, 1)
200 |             terrain = self.make_terrain(choice, difficulty)
201 |             self.add_terrain_to_map(terrain, i, j)
202 | 
203 |     def make_terrain(self, choice, difficulty):
204 |         terrain = terrain_utils.SubTerrain(   "terrain",
205 |                                 width=self.width_per_env_pixels,
206 |                                 length=self.width_per_env_pixels,
207 |                                 vertical_scale=self.cfg.vertical_scale,
208 |                                 horizontal_scale=self.cfg.horizontal_scale)
209 |         discrete_obstacles_height = difficulty * 0.04
210 |         r_height = difficulty * 0.07
211 |         h_slope = difficulty * 0.15
212 |         if choice < self.proportions[0]:
213 |             pass
214 |         elif choice < self.proportions[1]:
215 |             num_rectangles = 20
216 |             rectangle_min_size = 1.
217 |             rectangle_max_size = 2.
218 |             terrain_utils.discrete_obstacles_terrain(terrain, discrete_obstacles_height, rectangle_min_size, rectangle_max_size, num_rectangles, platform_size=3.)
219 |         elif choice < self.proportions[2]:
220 |             terrain_utils.random_uniform_terrain(terrain, min_height=-r_height, max_height=r_height, step=0.005, downsampled_scale=0.2)
221 |         elif choice < self.proportions[3]:
222 |             terrain_utils.pyramid_sloped_terrain(terrain, slope=h_slope, platform_size=0.1)
223 |         elif choice < self.proportions[4]:
224 |             terrain_utils.pyramid_sloped_terrain(terrain, slope=-h_slope, platform_size=0.1)
225 |         elif choice < self.proportions[5]:
226 |             terrain_utils.pyramid_stairs_terrain(terrain, step_width=0.4, step_height=discrete_obstacles_height, platform_size=1.)
227 |         elif choice < self.proportions[6]:
228 |             terrain_utils.pyramid_stairs_terrain(terrain, step_width=0.4, step_height=-discrete_obstacles_height, platform_size=1.)
229 |         else:
230 |             pass
231 |         return terrain
232 | 


--------------------------------------------------------------------------------
/images/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/images/demo.gif


--------------------------------------------------------------------------------
/logs/XBot_ppo/exported/policies/policy_example.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/logs/XBot_ppo/exported/policies/policy_example.pt


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/base_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/base_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_ankle_pitch_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_ankle_pitch_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_ankle_pitch_linkage1_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_ankle_pitch_linkage1_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_ankle_pitch_linkage2_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_ankle_pitch_linkage2_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_ankle_pitch_motor1_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_ankle_pitch_motor1_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_ankle_pitch_motor2_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_ankle_pitch_motor2_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_ankle_roll_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_ankle_roll_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_arm_base_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_arm_base_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_arm_yaw_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_arm_yaw_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_elbow_pitch_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_elbow_pitch_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_elbow_yaw_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_elbow_yaw_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_foot_ee_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_foot_ee_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_ee_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_ee_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_index_bend_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_index_bend_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_index_rota_link1.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_index_rota_link1.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_index_rota_link2.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_index_rota_link2.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_index_tip.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_index_tip.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_mid_link1.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_mid_link1.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_mid_link2.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_mid_link2.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_mid_tip.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_mid_tip.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_pinky_link1.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_pinky_link1.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_pinky_link2.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_pinky_link2.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_pinky_tip.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_pinky_tip.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_ring_link1.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_ring_link1.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_ring_link2.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_ring_link2.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_ring_tip.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_ring_tip.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_thumb_bend_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_thumb_bend_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_thumb_rota_link1.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_thumb_rota_link1.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_thumb_rota_link2.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_thumb_rota_link2.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_hand_thumb_tip.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_thumb_tip.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_knee_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_knee_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_knee_linkage_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_knee_linkage_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_knee_motor_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_knee_motor_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_leg_pitch_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_leg_pitch_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_leg_roll_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_leg_roll_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_leg_yaw_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_leg_yaw_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_shoulder_pitch_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_shoulder_pitch_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_shoulder_roll_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_shoulder_roll_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_wrist_roll_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_wrist_roll_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/left_wrist_yaw_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_wrist_yaw_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/neck_base_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/neck_base_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/neck_pitch_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/neck_pitch_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/neck_yaw_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/neck_yaw_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/realsense_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/realsense_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_ankle_pitch_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_ankle_pitch_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_ankle_pitch_linkage1_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_ankle_pitch_linkage1_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_ankle_pitch_linkage2_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_ankle_pitch_linkage2_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_ankle_pitch_motor1_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_ankle_pitch_motor1_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_ankle_pitch_motor2_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_ankle_pitch_motor2_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_ankle_roll_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_ankle_roll_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_arm_base_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_arm_base_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_arm_yaw_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_arm_yaw_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_elbow_pitch_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_elbow_pitch_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_elbow_yaw_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_elbow_yaw_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_foot_ee_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_foot_ee_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_ee_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_ee_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_index_bend_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_index_bend_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_index_rota_link1.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_index_rota_link1.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_index_rota_link2.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_index_rota_link2.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_index_tip.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_index_tip.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_mid_link1.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_mid_link1.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_mid_link2.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_mid_link2.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_mid_tip.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_mid_tip.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_pinky_link1.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_pinky_link1.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_pinky_link2.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_pinky_link2.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_pinky_tip.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_pinky_tip.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_ring_link1.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_ring_link1.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_ring_link2.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_ring_link2.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_ring_tip.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_ring_tip.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_thumb_bend_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_thumb_bend_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_thumb_rota_link1.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_thumb_rota_link1.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_thumb_rota_link2.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_thumb_rota_link2.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_hand_thumb_rota_tip.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_thumb_rota_tip.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_knee_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_knee_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_knee_linkage_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_knee_linkage_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_knee_motor_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_knee_motor_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_leg_pitch_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_leg_pitch_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_leg_roll_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_leg_roll_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_leg_yaw_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_leg_yaw_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_shoulder_pitch_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_shoulder_pitch_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_shoulder_roll_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_shoulder_roll_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_wrist_roll_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_wrist_roll_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/right_wrist_yaw_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_wrist_yaw_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/waist_roll_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/waist_roll_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/meshes/waist_yaw_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/waist_yaw_link.STL


--------------------------------------------------------------------------------
/resources/robots/XBot/terrain/uneven.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/terrain/uneven.png


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
 3 | # SPDX-License-Identifier: BSD-3-Clause
 4 | # 
 5 | # Redistribution and use in source and binary forms, with or without
 6 | # modification, are permitted provided that the following conditions are met:
 7 | #
 8 | # 1. Redistributions of source code must retain the above copyright notice, this
 9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
31 | 
32 | from setuptools import find_packages
33 | from distutils.core import setup
34 | 
35 | setup(
36 |     name='humanoid',
37 |     version='1.0.0',
38 |     author='Xinyang Gu, Yen-Jen Wang, Jianyu Chen',
39 |     license="BSD-3-Clause",
40 |     packages=find_packages(),
41 |     author_email='zlw21gxy@gmail.com, wangyenjen@berkeley.edu, jianyuchen@tsinghua.edu.cn',
42 |     description='Isaac Gym environments for humanoid robot',
43 |     install_requires=['isaacgym',  # preview4
44 |                       'wandb',
45 |                       'DateTime',  # used for sort month
46 |                       'tensorboard',
47 |                       'tqdm',
48 |                       'numpy==1.23.5',
49 |                       'opencv-python',
50 |                       'mujoco==2.3.6',
51 |                       'mujoco-python-viewer',
52 |                       'matplotlib']
53 | )
54 | 


--------------------------------------------------------------------------------