├── .gitattributes ├── .gitignore ├── README.md ├── humanoid ├── __init__.py ├── algo │ ├── __init__.py │ ├── ppo │ │ ├── __init__.py │ │ ├── actor_critic.py │ │ ├── on_policy_runner.py │ │ ├── ppo.py │ │ └── rollout_storage.py │ └── vec_env.py ├── envs │ ├── __init__.py │ ├── base │ │ ├── LICENSE │ │ ├── base_config.py │ │ ├── base_task.py │ │ ├── legged_robot.py │ │ └── legged_robot_config.py │ └── custom │ │ ├── humanoid_config.py │ │ └── humanoid_env.py ├── scripts │ ├── play.py │ ├── sim2sim.py │ └── train.py └── utils │ ├── __init__.py │ ├── calculate_gait.py │ ├── helpers.py │ ├── logger.py │ ├── math.py │ ├── task_registry.py │ └── terrain.py ├── images └── demo.gif ├── logs └── XBot_ppo │ └── exported │ └── policies │ └── policy_example.pt ├── resources └── robots │ └── XBot │ ├── meshes │ ├── base_link.STL │ ├── left_ankle_pitch_link.STL │ ├── left_ankle_pitch_linkage1_link.STL │ ├── left_ankle_pitch_linkage2_link.STL │ ├── left_ankle_pitch_motor1_link.STL │ ├── left_ankle_pitch_motor2_link.STL │ ├── left_ankle_roll_link.STL │ ├── left_arm_base_link.STL │ ├── left_arm_yaw_link.STL │ ├── left_elbow_pitch_link.STL │ ├── left_elbow_yaw_link.STL │ ├── left_foot_ee_link.STL │ ├── left_hand_ee_link.STL │ ├── left_hand_index_bend_link.STL │ ├── left_hand_index_rota_link1.STL │ ├── left_hand_index_rota_link2.STL │ ├── left_hand_index_tip.STL │ ├── left_hand_link.STL │ ├── left_hand_mid_link1.STL │ ├── left_hand_mid_link2.STL │ ├── left_hand_mid_tip.STL │ ├── left_hand_pinky_link1.STL │ ├── left_hand_pinky_link2.STL │ ├── left_hand_pinky_tip.STL │ ├── left_hand_ring_link1.STL │ ├── left_hand_ring_link2.STL │ ├── left_hand_ring_tip.STL │ ├── left_hand_thumb_bend_link.STL │ ├── left_hand_thumb_rota_link1.STL │ ├── left_hand_thumb_rota_link2.STL │ ├── left_hand_thumb_tip.STL │ ├── left_knee_link.STL │ ├── left_knee_linkage_link.STL │ ├── left_knee_motor_link.STL │ ├── left_leg_pitch_link.STL │ ├── left_leg_roll_link.STL │ ├── left_leg_yaw_link.STL │ ├── left_shoulder_pitch_link.STL │ ├── left_shoulder_roll_link.STL │ ├── left_wrist_roll_link.STL │ ├── left_wrist_yaw_link.STL │ ├── neck_base_link.STL │ ├── neck_pitch_link.STL │ ├── neck_yaw_link.STL │ ├── realsense_link.STL │ ├── right_ankle_pitch_link.STL │ ├── right_ankle_pitch_linkage1_link.STL │ ├── right_ankle_pitch_linkage2_link.STL │ ├── right_ankle_pitch_motor1_link.STL │ ├── right_ankle_pitch_motor2_link.STL │ ├── right_ankle_roll_link.STL │ ├── right_arm_base_link.STL │ ├── right_arm_yaw_link.STL │ ├── right_elbow_pitch_link.STL │ ├── right_elbow_yaw_link.STL │ ├── right_foot_ee_link.STL │ ├── right_hand_ee_link.STL │ ├── right_hand_index_bend_link.STL │ ├── right_hand_index_rota_link1.STL │ ├── right_hand_index_rota_link2.STL │ ├── right_hand_index_tip.STL │ ├── right_hand_link.STL │ ├── right_hand_mid_link1.STL │ ├── right_hand_mid_link2.STL │ ├── right_hand_mid_tip.STL │ ├── right_hand_pinky_link1.STL │ ├── right_hand_pinky_link2.STL │ ├── right_hand_pinky_tip.STL │ ├── right_hand_ring_link1.STL │ ├── right_hand_ring_link2.STL │ ├── right_hand_ring_tip.STL │ ├── right_hand_thumb_bend_link.STL │ ├── right_hand_thumb_rota_link1.STL │ ├── right_hand_thumb_rota_link2.STL │ ├── right_hand_thumb_rota_tip.STL │ ├── right_knee_link.STL │ ├── right_knee_linkage_link.STL │ ├── right_knee_motor_link.STL │ ├── right_leg_pitch_link.STL │ ├── right_leg_roll_link.STL │ ├── right_leg_yaw_link.STL │ ├── right_shoulder_pitch_link.STL │ ├── right_shoulder_roll_link.STL │ ├── right_wrist_roll_link.STL │ ├── right_wrist_yaw_link.STL │ ├── waist_roll_link.STL │ └── waist_yaw_link.STL │ ├── mjcf │ ├── XBot-L-terrain.xml │ └── XBot-L.xml │ ├── terrain │ └── uneven.png │ └── urdf │ └── XBot-L.urdf └── setup.py /.gitattributes: -------------------------------------------------------------------------------- 1 | *.dae filter=lfs diff=lfs merge=lfs -text 2 | *.obj filter=lfs diff=lfs merge=lfs -text 3 | *.obj text !filter !merge !diff 4 | *.dae text !filter !merge !diff 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # These are some examples of commonly ignored file patterns. 2 | # You should customize this list as applicable to your project. 3 | # Learn more about .gitignore: 4 | # https://www.atlassian.com/git/tutorials/saving-changes/gitignore 5 | 6 | # Node artifact files 7 | node_modules/ 8 | dist/ 9 | wandb/ 10 | .ipynb_checkpoints/ 11 | *.npy 12 | test_data/ 13 | *.ipynb 14 | # Compiled Java class files 15 | *.class 16 | 17 | # Compiled Python bytecode 18 | *.py[cod] 19 | 20 | # Log files 21 | *.log 22 | 23 | # Package files 24 | *.jar 25 | 26 | # Maven 27 | target/ 28 | dist/ 29 | 30 | # JetBrains IDE 31 | .idea/ 32 | 33 | # Unit test reports 34 | TEST*.xml 35 | 36 | # Generated by MacOS 37 | .DS_Store 38 | 39 | # Generated by Windows 40 | Thumbs.db 41 | 42 | # Applications 43 | *.app 44 | *.exe 45 | *.war 46 | 47 | # Large media files 48 | *.mp4 49 | *.tiff 50 | *.avi 51 | *.flv 52 | *.mov 53 | *.wmv 54 | 55 | # VS Code 56 | .vscode 57 | # logs 58 | logs 59 | runs 60 | videos 61 | # resources 62 | 63 | # other 64 | *.egg-info 65 | __pycache__ 66 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Humanoid-Gym: Reinforcement Learning for Humanoid Robot with Zero-Shot Sim2Real Transfer 2 | 3 | Project Page 4 | | 5 | arXiv 6 | | 7 | Twitter 8 | 9 | Xinyang Gu*, 10 | Yen-Jen Wang*, 11 | Jianyu Chen† 12 | 13 | *: Equal contribution. Project Co-lead., †: Corresponding Author. 14 | 15 | ![Demo](./images/demo.gif) 16 | 17 | Humanoid-Gym is an easy-to-use reinforcement learning (RL) framework based on Nvidia Isaac Gym, designed to train locomotion skills for humanoid robots, emphasizing zero-shot transfer from simulation to the real-world environment. Humanoid-Gym also integrates a sim-to-sim framework from Isaac Gym to Mujoco that allows users to verify the trained policies in different physical simulations to ensure the robustness and generalization of the policies. 18 | 19 | This codebase is verified by RobotEra's XBot-S (1.2-meter tall humanoid robot) and XBot-L (1.65-meter tall humanoid robot) in a real-world environment with zero-shot sim-to-real transfer. 20 | 21 | ## Features 22 | 23 | ### 1. Humanoid Robot Training 24 | This repository offers comprehensive guidance and scripts for the training of humanoid robots. Humanoid-Gym features specialized rewards for humanoid robots, simplifying the difficulty of sim-to-real transfer. In this repository, we use RobotEra's XBot-L as a primary example. It can also be used for other robots with minimal adjustments. Our resources cover setup, configuration, and execution. Our goal is to fully prepare the robot for real-world locomotion by providing in-depth training and optimization. 25 | 26 | 27 | - **Comprehensive Training Guidelines**: We offer thorough walkthroughs for each stage of the training process. 28 | - **Step-by-Step Configuration Instructions**: Our guidance is clear and succinct, ensuring an efficient setup process. 29 | - **Execution Scripts for Easy Deployment**: Utilize our pre-prepared scripts to streamline the training workflow. 30 | 31 | ### 2. Sim2Sim Support 32 | We also share our sim2sim pipeline, which allows you to transfer trained policies to highly accurate and carefully designed simulated environments. Once you acquire the robot, you can confidently deploy the RL-trained policies in real-world settings. 33 | 34 | Our simulator settings, particularly with Mujoco, are finely tuned to closely mimic real-world scenarios. This careful calibration ensures that the performances in both simulated and real-world environments are closely aligned. This improvement makes our simulations more trustworthy and enhances our confidence in their applicability to real-world scenarios. 35 | 36 | 37 | ### 3. Denoising World Model Learning 38 | #### Robotics: Science and Systems (RSS), 2024 (Best Paper Award Finalist) 39 | Paper 40 | | 41 | Twitter 42 | 43 | Xinyang Gu*, 44 | Yen-Jen Wang*, 45 | Xiang Zhu*, Chengming Shi*, Yanjiang Guo, Yichen Liu, 46 | Jianyu Chen† 47 | 48 | *: Equal contribution. Project Co-lead., †: Corresponding Author. 49 | 50 | Denoising World Model Learning(DWL) presents an advanced sim-to-real framework that integrates state estimation and system identification. This dual-method approach ensures the robot's learning and adaptation are both practical and effective in real-world contexts. 51 | 52 | - **Enhanced Sim-to-real Adaptability**: Techniques to optimize the robot's transition from simulated to real environments. 53 | - **Improved State Estimation Capabilities**: Advanced tools for precise and reliable state analysis. 54 | 55 | ### Perceptive Locomotion Learning for Humanoid Robots (Coming Soon!) 56 | Twitter 57 | 58 | ### Dexterous Hand Manipulation (Coming Soon!) 59 | Twitter 60 | 61 | ## Installation 62 | 63 | 1. Generate a new Python virtual environment with Python 3.8 using `conda create -n myenv python=3.8`. 64 | 2. For the best performance, we recommend using NVIDIA driver version 525 `sudo apt install nvidia-driver-525`. The minimal driver version supported is 515. If you're unable to install version 525, ensure that your system has at least version 515 to maintain basic functionality. 65 | 3. Install PyTorch 1.13 with Cuda-11.7: 66 | - `conda install pytorch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1 pytorch-cuda=11.7 -c pytorch -c nvidia` 67 | 4. Install numpy-1.23 with `conda install numpy=1.23`. 68 | 5. Install Isaac Gym: 69 | - Download and install Isaac Gym Preview 4 from https://developer.nvidia.com/isaac-gym. 70 | - `cd isaacgym/python && pip install -e .` 71 | - Run an example with `cd examples && python 1080_balls_of_solitude.py`. 72 | - Consult `isaacgym/docs/index.html` for troubleshooting. 73 | 6. Install humanoid-gym: 74 | - Clone this repository. 75 | - `cd humanoid-gym && pip install -e .` 76 | 77 | 78 | 79 | ## Usage Guide 80 | 81 | #### Examples 82 | 83 | ```bash 84 | # Under the directory humanoid-gym/humanoid 85 | # Launching PPO Policy Training for 'v1' Across 4096 Environments 86 | # This command initiates the PPO algorithm-based training for the humanoid task. 87 | python scripts/train.py --task=humanoid_ppo --run_name v1 --headless --num_envs 4096 88 | 89 | # Evaluating the Trained PPO Policy 'v1' 90 | # This command loads the 'v1' policy for performance assessment in its environment. 91 | # Additionally, it automatically exports a JIT model, suitable for deployment purposes. 92 | python scripts/play.py --task=humanoid_ppo --run_name v1 93 | 94 | # Implementing Simulation-to-Simulation Model Transformation 95 | # This command facilitates a sim-to-sim transformation using exported 'v1' policy. 96 | # You have to run play.py first to get the JIT model and use it with sim2sim.py 97 | python scripts/sim2sim.py --load_model /path/to/logs/XBot_ppo/exported/policies/policy_1.pt 98 | 99 | # Run our trained policy 100 | python scripts/sim2sim.py --load_model /path/to/logs/XBot_ppo/exported/policies/policy_example.pt 101 | 102 | ``` 103 | 104 | #### 1. Default Tasks 105 | 106 | 107 | - **humanoid_ppo** 108 | - Purpose: Baseline, PPO policy, Multi-frame low-level control 109 | - Observation Space: Variable $(47 \times H)$ dimensions, where $H$ is the number of frames 110 | - $[O_{t-H} ... O_t]$ 111 | - Privileged Information: $73$ dimensions 112 | 113 | - **humanoid_dwl (coming soon)** 114 | 115 | #### 2. PPO Policy 116 | - **Training Command**: For training the PPO policy, execute: 117 | ``` 118 | python humanoid/scripts/train.py --task=humanoid_ppo --load_run log_file_path --name run_name 119 | ``` 120 | - **Running a Trained Policy**: To deploy a trained PPO policy, use: 121 | ``` 122 | python humanoid/scripts/play.py --task=humanoid_ppo --load_run log_file_path --name run_name 123 | ``` 124 | - By default, the latest model of the last run from the experiment folder is loaded. However, other run iterations/models can be selected by adjusting `load_run` and `checkpoint` in the training config. 125 | 126 | #### 3. Sim-to-sim 127 | - **Please note: Before initiating the sim-to-sim process, ensure that you run `play.py` to export a JIT policy.** 128 | - **Mujoco-based Sim2Sim Deployment**: Utilize Mujoco for executing simulation-to-simulation (sim2sim) deployments with the command below: 129 | ``` 130 | python scripts/sim2sim.py --load_model /path/to/export/model.pt 131 | ``` 132 | 133 | 134 | #### 4. Parameters 135 | - **CPU and GPU Usage**: To run simulations on the CPU, set both `--sim_device=cpu` and `--rl_device=cpu`. For GPU operations, specify `--sim_device=cuda:{0,1,2...}` and `--rl_device={0,1,2...}` accordingly. Please note that `CUDA_VISIBLE_DEVICES` is not applicable, and it's essential to match the `--sim_device` and `--rl_device` settings. 136 | - **Headless Operation**: Include `--headless` for operations without rendering. 137 | - **Rendering Control**: Press 'v' to toggle rendering during training. 138 | - **Policy Location**: Trained policies are saved in `humanoid/logs//_/model_.pt`. 139 | 140 | #### 5. Command-Line Arguments 141 | For RL training, please refer to `humanoid/utils/helpers.py#L161`. 142 | For the sim-to-sim process, please refer to `humanoid/scripts/sim2sim.py#L169`. 143 | 144 | ## Code Structure 145 | 146 | 1. Every environment hinges on an `env` file (`legged_robot.py`) and a `configuration` file (`legged_robot_config.py`). The latter houses two classes: `LeggedRobotCfg` (encompassing all environmental parameters) and `LeggedRobotCfgPPO` (denoting all training parameters). 147 | 2. Both `env` and `config` classes use inheritance. 148 | 3. Non-zero reward scales specified in `cfg` contribute a function of the corresponding name to the sum-total reward. 149 | 4. Tasks must be registered with `task_registry.register(name, EnvClass, EnvConfig, TrainConfig)`. Registration may occur within `envs/__init__.py`, or outside of this repository. 150 | 151 | 152 | ## Add a new environment 153 | 154 | The base environment `legged_robot` constructs a rough terrain locomotion task. The corresponding configuration does not specify a robot asset (URDF/ MJCF) and no reward scales. 155 | 156 | 1. If you need to add a new environment, create a new folder in the `envs/` directory with a configuration file named `_config.py`. The new configuration should inherit from existing environment configurations. 157 | 2. If proposing a new robot: 158 | - Insert the corresponding assets in the `resources/` folder. 159 | - In the `cfg` file, set the path to the asset, define body names, default_joint_positions, and PD gains. Specify the desired `train_cfg` and the environment's name (python class). 160 | - In the `train_cfg`, set the `experiment_name` and `run_name`. 161 | 3. If needed, create your environment in `.py`. Inherit from existing environments, override desired functions and/or add your reward functions. 162 | 4. Register your environment in `humanoid/envs/__init__.py`. 163 | 5. Modify or tune other parameters in your `cfg` or `cfg_train` as per requirements. To remove the reward, set its scale to zero. Avoid modifying the parameters of other environments! 164 | 6. If you want a new robot/environment to perform sim2sim, you may need to modify `humanoid/scripts/sim2sim.py`: 165 | - Check the joint mapping of the robot between MJCF and URDF. 166 | - Change the initial joint position of the robot according to your trained policy. 167 | 168 | ## Troubleshooting 169 | 170 | Observe the following cases: 171 | 172 | ```bash 173 | # error 174 | ImportError: libpython3.8.so.1.0: cannot open shared object file: No such file or directory 175 | 176 | # solution 177 | # set the correct path 178 | export LD_LIBRARY_PATH="~/miniconda3/envs/your_env/lib:$LD_LIBRARY_PATH" 179 | 180 | # OR 181 | sudo apt install libpython3.8 182 | 183 | # error 184 | AttributeError: module 'distutils' has no attribute 'version' 185 | 186 | # solution 187 | # install pytorch 1.12.0 188 | conda install pytorch torchvision torchaudio cudatoolkit=11.3 -c pytorch 189 | 190 | # error, results from libstdc++ version distributed with conda differing from the one used on your system to build Isaac Gym 191 | ImportError: /home/roboterax/anaconda3/bin/../lib/libstdc++.so.6: version `GLIBCXX_3.4.20` not found (required by /home/roboterax/carbgym/python/isaacgym/_bindings/linux64/gym_36.so) 192 | 193 | # solution 194 | mkdir ${YOUR_CONDA_ENV}/lib/_unused 195 | mv ${YOUR_CONDA_ENV}/lib/libstdc++* ${YOUR_CONDA_ENV}/lib/_unused 196 | ``` 197 | 198 | ## Citation 199 | 200 | Please cite the following if you use this code or parts of it: 201 | ``` 202 | @article{gu2024humanoid, 203 | title={Humanoid-Gym: Reinforcement Learning for Humanoid Robot with Zero-Shot Sim2Real Transfer}, 204 | author={Gu, Xinyang and Wang, Yen-Jen and Chen, Jianyu}, 205 | journal={arXiv preprint arXiv:2404.05695}, 206 | year={2024} 207 | } 208 | 209 | @inproceedings{gu2024advancing, 210 | title={Advancing Humanoid Locomotion: Mastering Challenging Terrains with Denoising World Model Learning}, 211 | author={Gu, Xinyang and Wang, Yen-Jen and Zhu, Xiang and Shi, Chengming and Guo, Yanjiang and Liu, Yichen and Chen, Jianyu}, 212 | booktitle={Robotics: Science and Systems}, 213 | year={2024}, 214 | url={https://enriquecoronadozu.github.io/rssproceedings2024/rss20/p058.pdf} 215 | } 216 | ``` 217 | 218 | ## Acknowledgment 219 | 220 | The implementation of Humanoid-Gym relies on resources from [legged_gym](https://github.com/leggedrobotics/legged_gym) and [rsl_rl](https://github.com/leggedrobotics/rsl_rl) projects, created by the Robotic Systems Lab. We specifically utilize the `LeggedRobot` implementation from their research to enhance our codebase. 221 | 222 | ## Any Questions? 223 | 224 | If you have any more questions, please contact [support@robotera.com](mailto:support@robotera.com) or create an issue in this repository. 225 | -------------------------------------------------------------------------------- /humanoid/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | # 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. 31 | 32 | 33 | import os 34 | 35 | LEGGED_GYM_ROOT_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 36 | LEGGED_GYM_ENVS_DIR = os.path.join(LEGGED_GYM_ROOT_DIR, 'humanoid', 'envs') 37 | -------------------------------------------------------------------------------- /humanoid/algo/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | # 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. 31 | 32 | 33 | from .vec_env import VecEnv 34 | from .ppo import * -------------------------------------------------------------------------------- /humanoid/algo/ppo/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | # 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. 31 | 32 | 33 | from .ppo import PPO 34 | from .on_policy_runner import OnPolicyRunner 35 | from .actor_critic import ActorCritic 36 | from .rollout_storage import RolloutStorage 37 | -------------------------------------------------------------------------------- /humanoid/algo/ppo/actor_critic.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | # 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. 31 | 32 | import torch 33 | import torch.nn as nn 34 | from torch.distributions import Normal 35 | 36 | class ActorCritic(nn.Module): 37 | def __init__(self, num_actor_obs, 38 | num_critic_obs, 39 | num_actions, 40 | actor_hidden_dims=[256, 256, 256], 41 | critic_hidden_dims=[256, 256, 256], 42 | init_noise_std=1.0, 43 | activation = nn.ELU(), 44 | **kwargs): 45 | if kwargs: 46 | print("ActorCritic.__init__ got unexpected arguments, which will be ignored: " + str([key for key in kwargs.keys()])) 47 | super(ActorCritic, self).__init__() 48 | 49 | 50 | mlp_input_dim_a = num_actor_obs 51 | mlp_input_dim_c = num_critic_obs 52 | # Policy 53 | actor_layers = [] 54 | actor_layers.append(nn.Linear(mlp_input_dim_a, actor_hidden_dims[0])) 55 | actor_layers.append(activation) 56 | for l in range(len(actor_hidden_dims)): 57 | if l == len(actor_hidden_dims) - 1: 58 | actor_layers.append(nn.Linear(actor_hidden_dims[l], num_actions)) 59 | else: 60 | actor_layers.append(nn.Linear(actor_hidden_dims[l], actor_hidden_dims[l + 1])) 61 | actor_layers.append(activation) 62 | self.actor = nn.Sequential(*actor_layers) 63 | 64 | # Value function 65 | critic_layers = [] 66 | critic_layers.append(nn.Linear(mlp_input_dim_c, critic_hidden_dims[0])) 67 | critic_layers.append(activation) 68 | for l in range(len(critic_hidden_dims)): 69 | if l == len(critic_hidden_dims) - 1: 70 | critic_layers.append(nn.Linear(critic_hidden_dims[l], 1)) 71 | else: 72 | critic_layers.append(nn.Linear(critic_hidden_dims[l], critic_hidden_dims[l + 1])) 73 | critic_layers.append(activation) 74 | self.critic = nn.Sequential(*critic_layers) 75 | 76 | print(f"Actor MLP: {self.actor}") 77 | print(f"Critic MLP: {self.critic}") 78 | 79 | # Action noise 80 | self.std = nn.Parameter(init_noise_std * torch.ones(num_actions)) 81 | self.distribution = None 82 | # disable args validation for speedup 83 | Normal.set_default_validate_args = False 84 | 85 | 86 | @staticmethod 87 | # not used at the moment 88 | def init_weights(sequential, scales): 89 | [torch.nn.init.orthogonal_(module.weight, gain=scales[idx]) for idx, module in 90 | enumerate(mod for mod in sequential if isinstance(mod, nn.Linear))] 91 | 92 | 93 | def reset(self, dones=None): 94 | pass 95 | 96 | def forward(self): 97 | raise NotImplementedError 98 | 99 | @property 100 | def action_mean(self): 101 | return self.distribution.mean 102 | 103 | @property 104 | def action_std(self): 105 | return self.distribution.stddev 106 | 107 | @property 108 | def entropy(self): 109 | return self.distribution.entropy().sum(dim=-1) 110 | 111 | def update_distribution(self, observations): 112 | mean = self.actor(observations) 113 | self.distribution = Normal(mean, mean*0. + self.std) 114 | 115 | def act(self, observations, **kwargs): 116 | self.update_distribution(observations) 117 | return self.distribution.sample() 118 | 119 | def get_actions_log_prob(self, actions): 120 | return self.distribution.log_prob(actions).sum(dim=-1) 121 | 122 | def act_inference(self, observations): 123 | actions_mean = self.actor(observations) 124 | return actions_mean 125 | 126 | def evaluate(self, critic_observations, **kwargs): 127 | value = self.critic(critic_observations) 128 | return value -------------------------------------------------------------------------------- /humanoid/algo/ppo/on_policy_runner.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | # 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. 31 | 32 | import os 33 | import time 34 | import torch 35 | import wandb 36 | import statistics 37 | from collections import deque 38 | from datetime import datetime 39 | from .ppo import PPO 40 | from .actor_critic import ActorCritic 41 | from humanoid.algo.vec_env import VecEnv 42 | from torch.utils.tensorboard import SummaryWriter 43 | 44 | 45 | class OnPolicyRunner: 46 | 47 | def __init__(self, env: VecEnv, train_cfg, log_dir=None, device="cpu"): 48 | 49 | self.cfg = train_cfg["runner"] 50 | self.alg_cfg = train_cfg["algorithm"] 51 | self.policy_cfg = train_cfg["policy"] 52 | self.all_cfg = train_cfg 53 | self.wandb_run_name = ( 54 | datetime.now().strftime("%b%d_%H-%M-%S") 55 | + "_" 56 | + train_cfg["runner"]["experiment_name"] 57 | + "_" 58 | + train_cfg["runner"]["run_name"] 59 | ) 60 | self.device = device 61 | self.env = env 62 | if self.env.num_privileged_obs is not None: 63 | num_critic_obs = self.env.num_privileged_obs 64 | else: 65 | num_critic_obs = self.env.num_obs 66 | actor_critic_class = eval(self.cfg["policy_class_name"]) # ActorCritic 67 | actor_critic: ActorCritic = actor_critic_class( 68 | self.env.num_obs, num_critic_obs, self.env.num_actions, **self.policy_cfg 69 | ).to(self.device) 70 | alg_class = eval(self.cfg["algorithm_class_name"]) # PPO 71 | self.alg: PPO = alg_class(actor_critic, device=self.device, **self.alg_cfg) 72 | self.num_steps_per_env = self.cfg["num_steps_per_env"] 73 | self.save_interval = self.cfg["save_interval"] 74 | 75 | # init storage and model 76 | self.alg.init_storage( 77 | self.env.num_envs, 78 | self.num_steps_per_env, 79 | [self.env.num_obs], 80 | [self.env.num_privileged_obs], 81 | [self.env.num_actions], 82 | ) 83 | 84 | # Log 85 | self.log_dir = log_dir 86 | self.writer = None 87 | self.tot_timesteps = 0 88 | self.tot_time = 0 89 | self.current_learning_iteration = 0 90 | 91 | _, _ = self.env.reset() 92 | 93 | def learn(self, num_learning_iterations, init_at_random_ep_len=False): 94 | # initialize writer 95 | if self.log_dir is not None and self.writer is None: 96 | wandb.init( 97 | project="XBot", 98 | sync_tensorboard=True, 99 | name=self.wandb_run_name, 100 | config=self.all_cfg, 101 | ) 102 | self.writer = SummaryWriter(log_dir=self.log_dir, flush_secs=10) 103 | if init_at_random_ep_len: 104 | self.env.episode_length_buf = torch.randint_like( 105 | self.env.episode_length_buf, high=int(self.env.max_episode_length) 106 | ) 107 | obs = self.env.get_observations() 108 | privileged_obs = self.env.get_privileged_observations() 109 | critic_obs = privileged_obs if privileged_obs is not None else obs 110 | obs, critic_obs = obs.to(self.device), critic_obs.to(self.device) 111 | self.alg.actor_critic.train() # switch to train mode (for dropout for example) 112 | 113 | ep_infos = [] 114 | rewbuffer = deque(maxlen=100) 115 | lenbuffer = deque(maxlen=100) 116 | cur_reward_sum = torch.zeros( 117 | self.env.num_envs, dtype=torch.float, device=self.device 118 | ) 119 | cur_episode_length = torch.zeros( 120 | self.env.num_envs, dtype=torch.float, device=self.device 121 | ) 122 | 123 | tot_iter = self.current_learning_iteration + num_learning_iterations 124 | for it in range(self.current_learning_iteration, tot_iter): 125 | start = time.time() 126 | # Rollout 127 | with torch.inference_mode(): 128 | for i in range(self.num_steps_per_env): 129 | actions = self.alg.act(obs, critic_obs) 130 | obs, privileged_obs, rewards, dones, infos = self.env.step(actions) 131 | critic_obs = privileged_obs if privileged_obs is not None else obs 132 | obs, critic_obs, rewards, dones = ( 133 | obs.to(self.device), 134 | critic_obs.to(self.device), 135 | rewards.to(self.device), 136 | dones.to(self.device), 137 | ) 138 | self.alg.process_env_step(rewards, dones, infos) 139 | 140 | if self.log_dir is not None: 141 | # Book keeping 142 | if "episode" in infos: 143 | ep_infos.append(infos["episode"]) 144 | cur_reward_sum += rewards 145 | cur_episode_length += 1 146 | new_ids = (dones > 0).nonzero(as_tuple=False) 147 | rewbuffer.extend( 148 | cur_reward_sum[new_ids][:, 0].cpu().numpy().tolist() 149 | ) 150 | lenbuffer.extend( 151 | cur_episode_length[new_ids][:, 0].cpu().numpy().tolist() 152 | ) 153 | cur_reward_sum[new_ids] = 0 154 | cur_episode_length[new_ids] = 0 155 | 156 | stop = time.time() 157 | collection_time = stop - start 158 | 159 | # Learning step 160 | start = stop 161 | self.alg.compute_returns(critic_obs) 162 | 163 | mean_value_loss, mean_surrogate_loss = self.alg.update() 164 | stop = time.time() 165 | learn_time = stop - start 166 | if self.log_dir is not None: 167 | self.log(locals()) 168 | if it % self.save_interval == 0: 169 | self.save(os.path.join(self.log_dir, "model_{}.pt".format(it))) 170 | ep_infos.clear() 171 | 172 | self.current_learning_iteration += num_learning_iterations 173 | self.save( 174 | os.path.join( 175 | self.log_dir, "model_{}.pt".format(self.current_learning_iteration) 176 | ) 177 | ) 178 | 179 | def log(self, locs, width=80, pad=35): 180 | self.tot_timesteps += self.num_steps_per_env * self.env.num_envs 181 | self.tot_time += locs["collection_time"] + locs["learn_time"] 182 | iteration_time = locs["collection_time"] + locs["learn_time"] 183 | 184 | ep_string = f"" 185 | if locs["ep_infos"]: 186 | for key in locs["ep_infos"][0]: 187 | infotensor = torch.tensor([], device=self.device) 188 | for ep_info in locs["ep_infos"]: 189 | # handle scalar and zero dimensional tensor infos 190 | if not isinstance(ep_info[key], torch.Tensor): 191 | ep_info[key] = torch.Tensor([ep_info[key]]) 192 | if len(ep_info[key].shape) == 0: 193 | ep_info[key] = ep_info[key].unsqueeze(0) 194 | infotensor = torch.cat((infotensor, ep_info[key].to(self.device))) 195 | value = torch.mean(infotensor) 196 | self.writer.add_scalar("Episode/" + key, value, locs["it"]) 197 | ep_string += f"""{f'Mean episode {key}:':>{pad}} {value:.4f}\n""" 198 | mean_std = self.alg.actor_critic.std.mean() 199 | fps = int( 200 | self.num_steps_per_env 201 | * self.env.num_envs 202 | / (locs["collection_time"] + locs["learn_time"]) 203 | ) 204 | 205 | self.writer.add_scalar( 206 | "Loss/value_function", locs["mean_value_loss"], locs["it"] 207 | ) 208 | self.writer.add_scalar( 209 | "Loss/surrogate", locs["mean_surrogate_loss"], locs["it"] 210 | ) 211 | self.writer.add_scalar("Loss/learning_rate", self.alg.learning_rate, locs["it"]) 212 | self.writer.add_scalar("Policy/mean_noise_std", mean_std.item(), locs["it"]) 213 | self.writer.add_scalar("Perf/total_fps", fps, locs["it"]) 214 | self.writer.add_scalar( 215 | "Perf/collection time", locs["collection_time"], locs["it"] 216 | ) 217 | self.writer.add_scalar("Perf/learning_time", locs["learn_time"], locs["it"]) 218 | if len(locs["rewbuffer"]) > 0: 219 | self.writer.add_scalar( 220 | "Train/mean_reward", statistics.mean(locs["rewbuffer"]), locs["it"] 221 | ) 222 | self.writer.add_scalar( 223 | "Train/mean_episode_length", 224 | statistics.mean(locs["lenbuffer"]), 225 | locs["it"], 226 | ) 227 | self.writer.add_scalar( 228 | "Train/mean_reward/time", 229 | statistics.mean(locs["rewbuffer"]), 230 | self.tot_time, 231 | ) 232 | self.writer.add_scalar( 233 | "Train/mean_episode_length/time", 234 | statistics.mean(locs["lenbuffer"]), 235 | self.tot_time, 236 | ) 237 | 238 | str = f" \033[1m Learning iteration {locs['it']}/{self.current_learning_iteration + locs['num_learning_iterations']} \033[0m " 239 | 240 | if len(locs["rewbuffer"]) > 0: 241 | log_string = ( 242 | f"""{'#' * width}\n""" 243 | f"""{str.center(width, ' ')}\n\n""" 244 | f"""{'Computation:':>{pad}} {fps:.0f} steps/s (collection: {locs[ 245 | 'collection_time']:.3f}s, learning {locs['learn_time']:.3f}s)\n""" 246 | f"""{'Value function loss:':>{pad}} {locs['mean_value_loss']:.4f}\n""" 247 | f"""{'Surrogate loss:':>{pad}} {locs['mean_surrogate_loss']:.4f}\n""" 248 | f"""{'Mean action noise std:':>{pad}} {mean_std.item():.2f}\n""" 249 | f"""{'Mean reward:':>{pad}} {statistics.mean(locs['rewbuffer']):.2f}\n""" 250 | f"""{'Mean episode length:':>{pad}} {statistics.mean(locs['lenbuffer']):.2f}\n""" 251 | ) 252 | # f"""{'Mean reward/step:':>{pad}} {locs['mean_reward']:.2f}\n""" 253 | # f"""{'Mean episode length/episode:':>{pad}} {locs['mean_trajectory_length']:.2f}\n""") 254 | else: 255 | log_string = ( 256 | f"""{'#' * width}\n""" 257 | f"""{str.center(width, ' ')}\n\n""" 258 | f"""{'Computation:':>{pad}} {fps:.0f} steps/s (collection: {locs[ 259 | 'collection_time']:.3f}s, learning {locs['learn_time']:.3f}s)\n""" 260 | f"""{'Value function loss:':>{pad}} {locs['mean_value_loss']:.4f}\n""" 261 | f"""{'Surrogate loss:':>{pad}} {locs['mean_surrogate_loss']:.4f}\n""" 262 | f"""{'Mean action noise std:':>{pad}} {mean_std.item():.2f}\n""" 263 | ) 264 | # f"""{'Mean reward/step:':>{pad}} {locs['mean_reward']:.2f}\n""" 265 | # f"""{'Mean episode length/episode:':>{pad}} {locs['mean_trajectory_length']:.2f}\n""") 266 | 267 | log_string += ep_string 268 | log_string += ( 269 | f"""{'-' * width}\n""" 270 | f"""{'Total timesteps:':>{pad}} {self.tot_timesteps}\n""" 271 | f"""{'Iteration time:':>{pad}} {iteration_time:.2f}s\n""" 272 | f"""{'Total time:':>{pad}} {self.tot_time:.2f}s\n""" 273 | f"""{'ETA:':>{pad}} {self.tot_time / (locs['it'] + 1) * ( 274 | locs['num_learning_iterations'] - locs['it']):.1f}s\n""" 275 | ) 276 | print(log_string) 277 | 278 | def save(self, path, infos=None): 279 | torch.save( 280 | { 281 | "model_state_dict": self.alg.actor_critic.state_dict(), 282 | "optimizer_state_dict": self.alg.optimizer.state_dict(), 283 | "iter": self.current_learning_iteration, 284 | "infos": infos, 285 | }, 286 | path, 287 | ) 288 | 289 | def load(self, path, load_optimizer=True): 290 | loaded_dict = torch.load(path) 291 | self.alg.actor_critic.load_state_dict(loaded_dict["model_state_dict"]) 292 | if load_optimizer: 293 | self.alg.optimizer.load_state_dict(loaded_dict["optimizer_state_dict"]) 294 | self.current_learning_iteration = loaded_dict["iter"] 295 | return loaded_dict["infos"] 296 | 297 | def get_inference_policy(self, device=None): 298 | self.alg.actor_critic.eval() # switch to evaluation mode (dropout for example) 299 | if device is not None: 300 | self.alg.actor_critic.to(device) 301 | return self.alg.actor_critic.act_inference 302 | 303 | def get_inference_critic(self, device=None): 304 | self.alg.actor_critic.eval() # switch to evaluation mode (dropout for example) 305 | if device is not None: 306 | self.alg.actor_critic.to(device) 307 | return self.alg.actor_critic.evaluate 308 | -------------------------------------------------------------------------------- /humanoid/algo/ppo/ppo.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | # 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. 31 | 32 | import torch 33 | import torch.nn as nn 34 | import torch.optim as optim 35 | 36 | from .actor_critic import ActorCritic 37 | from .rollout_storage import RolloutStorage 38 | 39 | class PPO: 40 | actor_critic: ActorCritic 41 | def __init__(self, 42 | actor_critic, 43 | num_learning_epochs=1, 44 | num_mini_batches=1, 45 | clip_param=0.2, 46 | gamma=0.998, 47 | lam=0.95, 48 | value_loss_coef=1.0, 49 | entropy_coef=0.0, 50 | learning_rate=1e-3, 51 | max_grad_norm=1.0, 52 | use_clipped_value_loss=True, 53 | schedule="fixed", 54 | desired_kl=0.01, 55 | device='cpu', 56 | ): 57 | 58 | self.device = device 59 | 60 | self.desired_kl = desired_kl 61 | self.schedule = schedule 62 | self.learning_rate = learning_rate 63 | 64 | # PPO components 65 | self.actor_critic = actor_critic 66 | self.actor_critic.to(self.device) 67 | self.storage = None # initialized later 68 | self.optimizer = optim.Adam(self.actor_critic.parameters(), lr=learning_rate) 69 | self.transition = RolloutStorage.Transition() 70 | 71 | # PPO parameters 72 | self.clip_param = clip_param 73 | self.num_learning_epochs = num_learning_epochs 74 | self.num_mini_batches = num_mini_batches 75 | self.value_loss_coef = value_loss_coef 76 | self.entropy_coef = entropy_coef 77 | self.gamma = gamma 78 | self.lam = lam 79 | self.max_grad_norm = max_grad_norm 80 | self.use_clipped_value_loss = use_clipped_value_loss 81 | 82 | def init_storage(self, num_envs, num_transitions_per_env, actor_obs_shape, critic_obs_shape, action_shape): 83 | self.storage = RolloutStorage(num_envs, num_transitions_per_env, actor_obs_shape, critic_obs_shape, action_shape, self.device) 84 | 85 | def test_mode(self): 86 | self.actor_critic.test() 87 | 88 | def train_mode(self): 89 | self.actor_critic.train() 90 | 91 | def act(self, obs, critic_obs): 92 | # Compute the actions and values 93 | self.transition.actions = self.actor_critic.act(obs).detach() 94 | self.transition.values = self.actor_critic.evaluate(critic_obs).detach() 95 | self.transition.actions_log_prob = self.actor_critic.get_actions_log_prob(self.transition.actions).detach() 96 | self.transition.action_mean = self.actor_critic.action_mean.detach() 97 | self.transition.action_sigma = self.actor_critic.action_std.detach() 98 | # need to record obs and critic_obs before env.step() 99 | self.transition.observations = obs 100 | self.transition.critic_observations = critic_obs 101 | return self.transition.actions 102 | 103 | def process_env_step(self, rewards, dones, infos): 104 | self.transition.rewards = rewards.clone() 105 | self.transition.dones = dones 106 | # Bootstrapping on time outs 107 | if 'time_outs' in infos: 108 | self.transition.rewards += self.gamma * torch.squeeze(self.transition.values * infos['time_outs'].unsqueeze(1).to(self.device), 1) 109 | 110 | # Record the transition 111 | self.storage.add_transitions(self.transition) 112 | self.transition.clear() 113 | self.actor_critic.reset(dones) 114 | 115 | def compute_returns(self, last_critic_obs): 116 | last_values= self.actor_critic.evaluate(last_critic_obs).detach() 117 | self.storage.compute_returns(last_values, self.gamma, self.lam) 118 | 119 | def update(self): 120 | mean_value_loss = 0 121 | mean_surrogate_loss = 0 122 | 123 | generator = self.storage.mini_batch_generator(self.num_mini_batches, self.num_learning_epochs) 124 | for obs_batch, critic_obs_batch, actions_batch, target_values_batch, advantages_batch, returns_batch, old_actions_log_prob_batch, \ 125 | old_mu_batch, old_sigma_batch, hid_states_batch, masks_batch in generator: 126 | 127 | 128 | self.actor_critic.act(obs_batch, masks=masks_batch, hidden_states=hid_states_batch[0]) 129 | actions_log_prob_batch = self.actor_critic.get_actions_log_prob(actions_batch) 130 | value_batch = self.actor_critic.evaluate(critic_obs_batch, masks=masks_batch, hidden_states=hid_states_batch[1]) 131 | mu_batch = self.actor_critic.action_mean 132 | sigma_batch = self.actor_critic.action_std 133 | entropy_batch = self.actor_critic.entropy 134 | 135 | # KL 136 | if self.desired_kl != None and self.schedule == 'adaptive': 137 | with torch.inference_mode(): 138 | kl = torch.sum( 139 | torch.log(sigma_batch / old_sigma_batch + 1.e-5) + (torch.square(old_sigma_batch) + torch.square(old_mu_batch - mu_batch)) / (2.0 * torch.square(sigma_batch)) - 0.5, axis=-1) 140 | kl_mean = torch.mean(kl) 141 | 142 | if kl_mean > self.desired_kl * 2.0: 143 | self.learning_rate = max(1e-5, self.learning_rate / 1.5) 144 | elif kl_mean < self.desired_kl / 2.0 and kl_mean > 0.0: 145 | self.learning_rate = min(1e-2, self.learning_rate * 1.5) 146 | 147 | for param_group in self.optimizer.param_groups: 148 | param_group['lr'] = self.learning_rate 149 | 150 | 151 | # Surrogate loss 152 | ratio = torch.exp(actions_log_prob_batch - torch.squeeze(old_actions_log_prob_batch)) 153 | surrogate = -torch.squeeze(advantages_batch) * ratio 154 | surrogate_clipped = -torch.squeeze(advantages_batch) * torch.clamp(ratio, 1.0 - self.clip_param, 155 | 1.0 + self.clip_param) 156 | surrogate_loss = torch.max(surrogate, surrogate_clipped).mean() 157 | 158 | # Value function loss 159 | if self.use_clipped_value_loss: 160 | value_clipped = target_values_batch + (value_batch - target_values_batch).clamp(-self.clip_param, 161 | self.clip_param) 162 | value_losses = (value_batch - returns_batch).pow(2) 163 | value_losses_clipped = (value_clipped - returns_batch).pow(2) 164 | value_loss = torch.max(value_losses, value_losses_clipped).mean() 165 | else: 166 | value_loss = (returns_batch - value_batch).pow(2).mean() 167 | 168 | loss = surrogate_loss + self.value_loss_coef * value_loss - self.entropy_coef * entropy_batch.mean() 169 | 170 | # Gradient step 171 | self.optimizer.zero_grad() 172 | loss.backward() 173 | nn.utils.clip_grad_norm_(self.actor_critic.parameters(), self.max_grad_norm) 174 | self.optimizer.step() 175 | 176 | mean_value_loss += value_loss.item() 177 | mean_surrogate_loss += surrogate_loss.item() 178 | 179 | num_updates = self.num_learning_epochs * self.num_mini_batches 180 | mean_value_loss /= num_updates 181 | mean_surrogate_loss /= num_updates 182 | self.storage.clear() 183 | 184 | return mean_value_loss, mean_surrogate_loss 185 | -------------------------------------------------------------------------------- /humanoid/algo/ppo/rollout_storage.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | # 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. 31 | 32 | 33 | import torch 34 | 35 | class RolloutStorage: 36 | class Transition: 37 | def __init__(self): 38 | self.observations = None 39 | self.critic_observations = None 40 | self.actions = None 41 | self.rewards = None 42 | self.dones = None 43 | self.values = None 44 | self.actions_log_prob = None 45 | self.action_mean = None 46 | self.action_sigma = None 47 | self.hidden_states = None 48 | 49 | def clear(self): 50 | self.__init__() 51 | 52 | def __init__(self, num_envs, num_transitions_per_env, obs_shape, privileged_obs_shape, actions_shape, device='cpu'): 53 | 54 | self.device = device 55 | 56 | self.obs_shape = obs_shape 57 | self.privileged_obs_shape = privileged_obs_shape 58 | self.actions_shape = actions_shape 59 | 60 | # Core 61 | self.observations = torch.zeros(num_transitions_per_env, num_envs, *obs_shape, device=self.device) 62 | if privileged_obs_shape[0] is not None: 63 | self.privileged_observations = torch.zeros(num_transitions_per_env, num_envs, *privileged_obs_shape, device=self.device) 64 | else: 65 | self.privileged_observations = None 66 | self.rewards = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 67 | self.actions = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device) 68 | self.dones = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device).byte() 69 | 70 | # For PPO 71 | self.actions_log_prob = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 72 | self.values = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 73 | self.returns = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 74 | self.advantages = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 75 | self.mu = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device) 76 | self.sigma = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device) 77 | 78 | self.num_transitions_per_env = num_transitions_per_env 79 | self.num_envs = num_envs 80 | 81 | # rnn 82 | self.saved_hidden_states_a = None 83 | self.saved_hidden_states_c = None 84 | 85 | self.step = 0 86 | 87 | def add_transitions(self, transition: Transition): 88 | if self.step >= self.num_transitions_per_env: 89 | raise AssertionError("Rollout buffer overflow") 90 | self.observations[self.step].copy_(transition.observations) 91 | if self.privileged_observations is not None: self.privileged_observations[self.step].copy_(transition.critic_observations) 92 | self.actions[self.step].copy_(transition.actions) 93 | self.rewards[self.step].copy_(transition.rewards.view(-1, 1)) 94 | self.dones[self.step].copy_(transition.dones.view(-1, 1)) 95 | self.values[self.step].copy_(transition.values) 96 | self.actions_log_prob[self.step].copy_(transition.actions_log_prob.view(-1, 1)) 97 | self.mu[self.step].copy_(transition.action_mean) 98 | self.sigma[self.step].copy_(transition.action_sigma) 99 | self._save_hidden_states(transition.hidden_states) 100 | self.step += 1 101 | 102 | def _save_hidden_states(self, hidden_states): 103 | if hidden_states is None or hidden_states==(None, None): 104 | return 105 | # make a tuple out of GRU hidden state sto match the LSTM format 106 | hid_a = hidden_states[0] if isinstance(hidden_states[0], tuple) else (hidden_states[0],) 107 | hid_c = hidden_states[1] if isinstance(hidden_states[1], tuple) else (hidden_states[1],) 108 | 109 | # initialize if needed 110 | if self.saved_hidden_states_a is None: 111 | self.saved_hidden_states_a = [torch.zeros(self.observations.shape[0], *hid_a[i].shape, device=self.device) for i in range(len(hid_a))] 112 | self.saved_hidden_states_c = [torch.zeros(self.observations.shape[0], *hid_c[i].shape, device=self.device) for i in range(len(hid_c))] 113 | # copy the states 114 | for i in range(len(hid_a)): 115 | self.saved_hidden_states_a[i][self.step].copy_(hid_a[i]) 116 | self.saved_hidden_states_c[i][self.step].copy_(hid_c[i]) 117 | 118 | 119 | def clear(self): 120 | self.step = 0 121 | 122 | def compute_returns(self, last_values, gamma, lam): 123 | advantage = 0 124 | for step in reversed(range(self.num_transitions_per_env)): 125 | if step == self.num_transitions_per_env - 1: 126 | next_values = last_values 127 | else: 128 | next_values = self.values[step + 1] 129 | next_is_not_terminal = 1.0 - self.dones[step].float() 130 | delta = self.rewards[step] + next_is_not_terminal * gamma * next_values - self.values[step] 131 | advantage = delta + next_is_not_terminal * gamma * lam * advantage 132 | self.returns[step] = advantage + self.values[step] 133 | 134 | # Compute and normalize the advantages 135 | self.advantages = self.returns - self.values 136 | self.advantages = (self.advantages - self.advantages.mean()) / (self.advantages.std() + 1e-8) 137 | 138 | def get_statistics(self): 139 | done = self.dones 140 | done[-1] = 1 141 | flat_dones = done.permute(1, 0, 2).reshape(-1, 1) 142 | done_indices = torch.cat((flat_dones.new_tensor([-1], dtype=torch.int64), flat_dones.nonzero(as_tuple=False)[:, 0])) 143 | trajectory_lengths = (done_indices[1:] - done_indices[:-1]) 144 | return trajectory_lengths.float().mean(), self.rewards.mean() 145 | 146 | def mini_batch_generator(self, num_mini_batches, num_epochs=8): 147 | batch_size = self.num_envs * self.num_transitions_per_env 148 | mini_batch_size = batch_size // num_mini_batches 149 | indices = torch.randperm(num_mini_batches*mini_batch_size, requires_grad=False, device=self.device) 150 | 151 | observations = self.observations.flatten(0, 1) 152 | if self.privileged_observations is not None: 153 | critic_observations = self.privileged_observations.flatten(0, 1) 154 | else: 155 | critic_observations = observations 156 | 157 | actions = self.actions.flatten(0, 1) 158 | values = self.values.flatten(0, 1) 159 | returns = self.returns.flatten(0, 1) 160 | old_actions_log_prob = self.actions_log_prob.flatten(0, 1) 161 | advantages = self.advantages.flatten(0, 1) 162 | old_mu = self.mu.flatten(0, 1) 163 | old_sigma = self.sigma.flatten(0, 1) 164 | 165 | for epoch in range(num_epochs): 166 | for i in range(num_mini_batches): 167 | 168 | start = i*mini_batch_size 169 | end = (i+1)*mini_batch_size 170 | batch_idx = indices[start:end] 171 | 172 | obs_batch = observations[batch_idx] 173 | critic_observations_batch = critic_observations[batch_idx] 174 | actions_batch = actions[batch_idx] 175 | target_values_batch = values[batch_idx] 176 | returns_batch = returns[batch_idx] 177 | old_actions_log_prob_batch = old_actions_log_prob[batch_idx] 178 | advantages_batch = advantages[batch_idx] 179 | old_mu_batch = old_mu[batch_idx] 180 | old_sigma_batch = old_sigma[batch_idx] 181 | yield obs_batch, critic_observations_batch, actions_batch, target_values_batch, advantages_batch, returns_batch, \ 182 | old_actions_log_prob_batch, old_mu_batch, old_sigma_batch, (None, None), None 183 | -------------------------------------------------------------------------------- /humanoid/algo/vec_env.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | # 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. 31 | 32 | import torch 33 | from typing import Tuple, Union 34 | from abc import ABC, abstractmethod 35 | 36 | # minimal interface of the environment 37 | class VecEnv(ABC): 38 | num_envs: int 39 | num_obs: int 40 | num_privileged_obs: int 41 | num_actions: int 42 | max_episode_length: int 43 | privileged_obs_buf: torch.Tensor 44 | obs_buf: torch.Tensor 45 | rew_buf: torch.Tensor 46 | reset_buf: torch.Tensor 47 | episode_length_buf: torch.Tensor # current episode duration 48 | extras: dict 49 | device: torch.device 50 | @abstractmethod 51 | def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, Union[torch.Tensor, None], torch.Tensor, torch.Tensor, dict]: 52 | pass 53 | @abstractmethod 54 | def reset(self, env_ids: Union[list, torch.Tensor]): 55 | pass 56 | @abstractmethod 57 | def get_observations(self) -> torch.Tensor: 58 | pass 59 | @abstractmethod 60 | def get_privileged_observations(self) -> Union[torch.Tensor, None]: 61 | pass -------------------------------------------------------------------------------- /humanoid/envs/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | # 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. 31 | 32 | 33 | from humanoid import LEGGED_GYM_ROOT_DIR, LEGGED_GYM_ENVS_DIR 34 | from .base.legged_robot import LeggedRobot 35 | 36 | from .custom.humanoid_config import XBotLCfg, XBotLCfgPPO 37 | from .custom.humanoid_env import XBotLFreeEnv 38 | 39 | from humanoid.utils.task_registry import task_registry 40 | 41 | 42 | task_registry.register( "humanoid_ppo", XBotLFreeEnv, XBotLCfg(), XBotLCfgPPO() ) 43 | -------------------------------------------------------------------------------- /humanoid/envs/base/LICENSE: -------------------------------------------------------------------------------- 1 | SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin 3 | SPDX-License-Identifier: BSD-3-Clause 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. -------------------------------------------------------------------------------- /humanoid/envs/base/base_config.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | # 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. 31 | 32 | import inspect 33 | 34 | class BaseConfig: 35 | def __init__(self) -> None: 36 | """ Initializes all member classes recursively. Ignores all namse starting with '__' (buit-in methods).""" 37 | self.init_member_classes(self) 38 | 39 | @staticmethod 40 | def init_member_classes(obj): 41 | # iterate over all attributes names 42 | for key in dir(obj): 43 | # disregard builtin attributes 44 | # if key.startswith("__"): 45 | if key=="__class__": 46 | continue 47 | # get the corresponding attribute object 48 | var = getattr(obj, key) 49 | # check if it the attribute is a class 50 | if inspect.isclass(var): 51 | # instantate the class 52 | i_var = var() 53 | # set the attribute to the instance instead of the type 54 | setattr(obj, key, i_var) 55 | # recursively init members of the attribute 56 | BaseConfig.init_member_classes(i_var) -------------------------------------------------------------------------------- /humanoid/envs/base/base_task.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | # 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. 31 | 32 | import sys 33 | from isaacgym import gymapi 34 | from isaacgym import gymutil 35 | import numpy as np 36 | import torch 37 | 38 | # Base class for RL tasks 39 | 40 | 41 | class BaseTask(): 42 | 43 | def __init__(self, cfg, sim_params, physics_engine, sim_device, headless): 44 | self.gym = gymapi.acquire_gym() 45 | 46 | self.sim_params = sim_params 47 | self.physics_engine = physics_engine 48 | self.sim_device = sim_device 49 | sim_device_type, self.sim_device_id = gymutil.parse_device_str( 50 | self.sim_device) 51 | self.headless = headless 52 | 53 | # env device is GPU only if sim is on GPU and use_gpu_pipeline=True, otherwise returned tensors are copied to CPU by physX. 54 | if sim_device_type == 'cuda' and sim_params.use_gpu_pipeline: 55 | self.device = self.sim_device 56 | else: 57 | self.device = 'cpu' 58 | 59 | # graphics device for rendering, -1 for no rendering 60 | self.graphics_device_id = self.sim_device_id 61 | 62 | self.num_envs = cfg.env.num_envs 63 | self.num_obs = cfg.env.num_observations 64 | self.num_privileged_obs = cfg.env.num_privileged_obs 65 | self.num_actions = cfg.env.num_actions 66 | 67 | # optimization flags for pytorch JIT 68 | torch._C._jit_set_profiling_mode(False) 69 | torch._C._jit_set_profiling_executor(False) 70 | 71 | # allocate buffers 72 | self.obs_buf = torch.zeros( 73 | self.num_envs, self.num_obs, device=self.device, dtype=torch.float) 74 | self.rew_buf = torch.zeros( 75 | self.num_envs, device=self.device, dtype=torch.float) 76 | # new reward buffers for exp rewrads 77 | self.neg_reward_buf = torch.zeros( 78 | self.num_envs, device=self.device, dtype=torch.float) 79 | self.pos_reward_buf = torch.zeros( 80 | self.num_envs, device=self.device, dtype=torch.float) 81 | 82 | self.reset_buf = torch.ones( 83 | self.num_envs, device=self.device, dtype=torch.long) 84 | self.episode_length_buf = torch.zeros( 85 | self.num_envs, device=self.device, dtype=torch.long) 86 | self.time_out_buf = torch.zeros( 87 | self.num_envs, device=self.device, dtype=torch.bool) 88 | if self.num_privileged_obs is not None: 89 | self.privileged_obs_buf = torch.zeros( 90 | self.num_envs, self.num_privileged_obs, device=self.device, dtype=torch.float) 91 | else: 92 | self.privileged_obs_buf = None 93 | 94 | self.extras = {} 95 | 96 | # create envs, sim and viewer 97 | self.create_sim() 98 | self.gym.prepare_sim(self.sim) 99 | self.enable_viewer_sync = True 100 | self.viewer = None 101 | 102 | # if running with a viewer, set up keyboard shortcuts and camera 103 | if self.headless == False: 104 | # subscribe to keyboard shortcuts 105 | self.viewer = self.gym.create_viewer( 106 | self.sim, gymapi.CameraProperties()) 107 | self.gym.subscribe_viewer_keyboard_event( 108 | self.viewer, gymapi.KEY_ESCAPE, "QUIT") 109 | self.gym.subscribe_viewer_keyboard_event( 110 | self.viewer, gymapi.KEY_V, "toggle_viewer_sync") 111 | 112 | camera_properties = gymapi.CameraProperties() 113 | camera_properties.width = 720 114 | camera_properties.height = 480 115 | camera_handle = self.gym.create_camera_sensor( 116 | self.envs[0], camera_properties) 117 | self.camera_handle = camera_handle 118 | else: 119 | # pass 120 | camera_properties = gymapi.CameraProperties() 121 | camera_properties.width = 720 122 | camera_properties.height = 480 123 | camera_handle = self.gym.create_camera_sensor( 124 | self.envs[0], camera_properties) 125 | self.camera_handle = camera_handle 126 | 127 | def get_observations(self): 128 | return self.obs_buf 129 | 130 | def get_privileged_observations(self): 131 | return self.privileged_obs_buf 132 | 133 | def get_rma_observations(self): 134 | return self.rma_obs_buf 135 | 136 | def reset_idx(self, env_ids): 137 | """Reset selected robots""" 138 | raise NotImplementedError 139 | 140 | def reset(self): 141 | """ Reset all robots""" 142 | self.reset_idx(torch.arange(self.num_envs, device=self.device)) 143 | obs, privileged_obs, _, _, _ = self.step(torch.zeros( 144 | self.num_envs, self.num_actions, device=self.device, requires_grad=False)) 145 | return obs, privileged_obs 146 | 147 | def step(self, actions): 148 | raise NotImplementedError 149 | 150 | def render(self, sync_frame_time=True): 151 | if self.viewer: 152 | # check for window closed 153 | if self.gym.query_viewer_has_closed(self.viewer): 154 | sys.exit() 155 | 156 | # check for keyboard events 157 | for evt in self.gym.query_viewer_action_events(self.viewer): 158 | if evt.action == "QUIT" and evt.value > 0: 159 | sys.exit() 160 | elif evt.action == "toggle_viewer_sync" and evt.value > 0: 161 | self.enable_viewer_sync = not self.enable_viewer_sync 162 | 163 | # fetch results 164 | if self.device != 'cpu': 165 | self.gym.fetch_results(self.sim, True) 166 | 167 | # step graphics 168 | if self.enable_viewer_sync: 169 | self.gym.step_graphics(self.sim) 170 | self.gym.draw_viewer(self.viewer, self.sim, True) 171 | if sync_frame_time: 172 | self.gym.sync_frame_time(self.sim) 173 | else: 174 | self.gym.poll_viewer_events(self.viewer) 175 | -------------------------------------------------------------------------------- /humanoid/envs/base/legged_robot_config.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | # 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. 31 | 32 | from .base_config import BaseConfig 33 | 34 | class LeggedRobotCfg(BaseConfig): 35 | class env: 36 | num_envs = 4096 37 | num_observations = 235 38 | num_privileged_obs = None # if not None a priviledge_obs_buf will be returned by step() (critic obs for assymetric training). None is returned otherwise 39 | num_actions = 12 40 | env_spacing = 3. # not used with heightfields/trimeshes 41 | send_timeouts = True # send time out information to the algorithm 42 | episode_length_s = 20 # episode length in seconds 43 | 44 | class terrain: 45 | mesh_type = 'trimesh' # "heightfield" # none, plane, heightfield or trimesh 46 | horizontal_scale = 0.1 # [m] 47 | vertical_scale = 0.005 # [m] 48 | border_size = 25 # [m] 49 | curriculum = True 50 | static_friction = 1.0 51 | dynamic_friction = 1.0 52 | restitution = 0. 53 | # rough terrain only: 54 | measure_heights = True 55 | measured_points_x = [-0.8, -0.7, -0.6, -0.5, -0.4, -0.3, -0.2, -0.1, 0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8] # 1mx1.6m rectangle (without center line) 56 | measured_points_y = [-0.5, -0.4, -0.3, -0.2, -0.1, 0., 0.1, 0.2, 0.3, 0.4, 0.5] 57 | selected = False # select a unique terrain type and pass all arguments 58 | terrain_kwargs = None # Dict of arguments for selected terrain 59 | max_init_terrain_level = 5 # starting curriculum state 60 | terrain_length = 8. 61 | terrain_width = 8. 62 | num_rows= 10 # number of terrain rows (levels) 63 | num_cols = 20 # number of terrain cols (types) 64 | # terrain types: [smooth slope, rough slope, stairs up, stairs down, discrete] 65 | terrain_proportions = [0.1, 0.1, 0.35, 0.25, 0.2] 66 | # trimesh only: 67 | slope_treshold = 0.75 # slopes above this threshold will be corrected to vertical surfaces 68 | 69 | class commands: 70 | curriculum = False 71 | max_curriculum = 1. 72 | num_commands = 4 # default: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error) 73 | resampling_time = 10. # time before command are changed[s] 74 | heading_command = True # if true: compute ang vel command from heading error 75 | class ranges: 76 | lin_vel_x = [-1.0, 1.0] # min max [m/s] 77 | lin_vel_y = [-1.0, 1.0] # min max [m/s] 78 | ang_vel_yaw = [-1, 1] # min max [rad/s] 79 | heading = [-3.14, 3.14] 80 | 81 | class init_state: 82 | pos = [0.0, 0.0, 1.] # x,y,z [m] 83 | rot = [0.0, 0.0, 0.0, 1.0] # x,y,z,w [quat] 84 | lin_vel = [0.0, 0.0, 0.0] # x,y,z [m/s] 85 | ang_vel = [0.0, 0.0, 0.0] # x,y,z [rad/s] 86 | default_joint_angles = { # target angles when action = 0.0 87 | "joint_a": 0., 88 | "joint_b": 0.} 89 | 90 | class control: 91 | # PD Drive parameters: 92 | stiffness = {'joint_a': 10.0, 'joint_b': 15.} # [N*m/rad] 93 | damping = {'joint_a': 1.0, 'joint_b': 1.5} # [N*m*s/rad] 94 | # action scale: target angle = actionScale * action + defaultAngle 95 | action_scale = 0.5 96 | # decimation: Number of control action updates @ sim DT per policy DT 97 | decimation = 4 98 | 99 | class asset: 100 | file = "" 101 | name = "legged_robot" # actor name 102 | foot_name = "None" # name of the feet bodies, used to index body state and contact force tensors 103 | penalize_contacts_on = [] 104 | terminate_after_contacts_on = [] 105 | disable_gravity = False 106 | collapse_fixed_joints = True # merge bodies connected by fixed joints. Specific fixed joints can be kept by adding " <... dont_collapse="true"> 107 | fix_base_link = False # fixe the base of the robot 108 | default_dof_drive_mode = 3 # see GymDofDriveModeFlags (0 is none, 1 is pos tgt, 2 is vel tgt, 3 effort) 109 | self_collisions = 0 # 1 to disable, 0 to enable...bitwise filter 110 | replace_cylinder_with_capsule = True # replace collision cylinders with capsules, leads to faster/more stable simulation 111 | flip_visual_attachments = True # Some .obj meshes must be flipped from y-up to z-up 112 | 113 | density = 0.001 114 | angular_damping = 0. 115 | linear_damping = 0. 116 | max_angular_velocity = 1000. 117 | max_linear_velocity = 1000. 118 | armature = 0. 119 | thickness = 0.01 120 | 121 | 122 | class domain_rand: 123 | randomize_friction = True 124 | friction_range = [0.5, 1.25] 125 | randomize_base_mass = False 126 | added_mass_range = [-1., 1.] 127 | push_robots = True 128 | push_interval_s = 15 129 | max_push_vel_xy = 1. 130 | 131 | 132 | class rewards: 133 | class scales: 134 | termination = -0.0 135 | tracking_lin_vel = 1.0 136 | tracking_ang_vel = 0.5 137 | lin_vel_z = -2.0 138 | ang_vel_xy = -0.05 139 | orientation = -0. 140 | torques = -0.00001 141 | dof_vel = -0. 142 | dof_acc = -2.5e-7 143 | base_height = -0. 144 | feet_air_time = 1.0 145 | collision = -1. 146 | feet_stumble = -0.0 147 | action_rate = -0. 148 | stand_still = -0. 149 | 150 | only_positive_rewards = True # if true negative total rewards are clipped at zero (avoids early termination problems) 151 | tracking_sigma = 0.25 # tracking reward = exp(-error^2/sigma) 152 | max_contact_force = 100. # forces above this value are penalized 153 | 154 | class normalization: 155 | class obs_scales: 156 | lin_vel = 2.0 157 | ang_vel = 0.25 158 | dof_pos = 1.0 159 | dof_vel = 0.05 160 | height_measurements = 5.0 161 | clip_observations = 100. 162 | clip_actions = 100. 163 | 164 | class noise: 165 | add_noise = True 166 | noise_level = 1.0 # scales other values 167 | class noise_scales: 168 | dof_pos = 0.01 169 | dof_vel = 1.5 170 | lin_vel = 0.1 171 | ang_vel = 0.2 172 | gravity = 0.05 173 | height_measurements = 0.1 174 | 175 | # viewer camera: 176 | class viewer: 177 | ref_env = 0 178 | pos = [10, 0, 6] # [m] 179 | lookat = [11., 5, 3.] # [m] 180 | 181 | class sim: 182 | dt = 0.005 183 | substeps = 1 184 | gravity = [0., 0. ,-9.81] # [m/s^2] 185 | up_axis = 1 # 0 is y, 1 is z 186 | 187 | class physx: 188 | num_threads = 10 189 | solver_type = 1 # 0: pgs, 1: tgs 190 | num_position_iterations = 4 191 | num_velocity_iterations = 0 192 | contact_offset = 0.01 # [m] 193 | rest_offset = 0.0 # [m] 194 | bounce_threshold_velocity = 0.5 #0.5 [m/s] 195 | max_depenetration_velocity = 1.0 196 | max_gpu_contact_pairs = 2**23 #2**24 -> needed for 8000 envs and more 197 | default_buffer_size_multiplier = 5 198 | contact_collection = 2 # 0: never, 1: last sub-step, 2: all sub-steps (default=2) 199 | 200 | class LeggedRobotCfgPPO(BaseConfig): 201 | seed = 1 202 | runner_class_name = 'OnPolicyRunner' 203 | class policy: 204 | init_noise_std = 1.0 205 | actor_hidden_dims = [512, 256, 128] 206 | critic_hidden_dims = [512, 256, 128] 207 | 208 | class algorithm: 209 | # training params 210 | value_loss_coef = 1.0 211 | use_clipped_value_loss = True 212 | clip_param = 0.2 213 | entropy_coef = 0.01 214 | num_learning_epochs = 5 215 | num_mini_batches = 4 # mini batch size = num_envs*nsteps / nminibatches 216 | learning_rate = 1.e-3 #5.e-4 217 | schedule = 'adaptive' # could be adaptive, fixed 218 | gamma = 0.99 219 | lam = 0.95 220 | desired_kl = 0.01 221 | max_grad_norm = 1. 222 | 223 | class runner: 224 | policy_class_name = 'ActorCritic' 225 | algorithm_class_name = 'PPO' 226 | num_steps_per_env = 24 # per iteration 227 | max_iterations = 1500 # number of policy updates 228 | 229 | # logging 230 | save_interval = 100 # check for potential saves every this many iterations 231 | experiment_name = 'test' 232 | run_name = '' 233 | # load and resume 234 | resume = False 235 | load_run = -1 # -1 = last run 236 | checkpoint = -1 # -1 = last saved model 237 | resume_path = None # updated from load_run and chkpt -------------------------------------------------------------------------------- /humanoid/envs/custom/humanoid_config.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions are met: 5 | # 6 | # 1. Redistributions of source code must retain the above copyright notice, this 7 | # list of conditions and the following disclaimer. 8 | # 9 | # 2. Redistributions in binary form must reproduce the above copyright notice, 10 | # this list of conditions and the following disclaimer in the documentation 11 | # and/or other materials provided with the distribution. 12 | # 13 | # 3. Neither the name of the copyright holder nor the names of its 14 | # contributors may be used to endorse or promote products derived from 15 | # this software without specific prior written permission. 16 | # 17 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 21 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 24 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 25 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | # 28 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. 29 | 30 | 31 | from humanoid.envs.base.legged_robot_config import LeggedRobotCfg, LeggedRobotCfgPPO 32 | 33 | 34 | class XBotLCfg(LeggedRobotCfg): 35 | """ 36 | Configuration class for the XBotL humanoid robot. 37 | """ 38 | class env(LeggedRobotCfg.env): 39 | # change the observation dim 40 | frame_stack = 15 41 | c_frame_stack = 3 42 | num_single_obs = 47 43 | num_observations = int(frame_stack * num_single_obs) 44 | single_num_privileged_obs = 73 45 | num_privileged_obs = int(c_frame_stack * single_num_privileged_obs) 46 | num_actions = 12 47 | num_envs = 4096 48 | episode_length_s = 24 # episode length in seconds 49 | use_ref_actions = False # speed up training by using reference actions 50 | 51 | class safety: 52 | # safety factors 53 | pos_limit = 1.0 54 | vel_limit = 1.0 55 | torque_limit = 0.85 56 | 57 | class asset(LeggedRobotCfg.asset): 58 | file = '{LEGGED_GYM_ROOT_DIR}/resources/robots/XBot/urdf/XBot-L.urdf' 59 | 60 | name = "XBot-L" 61 | foot_name = "ankle_roll" 62 | knee_name = "knee" 63 | 64 | terminate_after_contacts_on = ['base_link'] 65 | penalize_contacts_on = ["base_link"] 66 | self_collisions = 0 # 1 to disable, 0 to enable...bitwise filter 67 | flip_visual_attachments = False 68 | replace_cylinder_with_capsule = False 69 | fix_base_link = False 70 | 71 | class terrain(LeggedRobotCfg.terrain): 72 | mesh_type = 'plane' 73 | # mesh_type = 'trimesh' 74 | curriculum = False 75 | # rough terrain only: 76 | measure_heights = False 77 | static_friction = 0.6 78 | dynamic_friction = 0.6 79 | terrain_length = 8. 80 | terrain_width = 8. 81 | num_rows = 20 # number of terrain rows (levels) 82 | num_cols = 20 # number of terrain cols (types) 83 | max_init_terrain_level = 10 # starting curriculum state 84 | # plane; obstacles; uniform; slope_up; slope_down, stair_up, stair_down 85 | terrain_proportions = [0.2, 0.2, 0.4, 0.1, 0.1, 0, 0] 86 | restitution = 0. 87 | 88 | class noise: 89 | add_noise = True 90 | noise_level = 0.6 # scales other values 91 | 92 | class noise_scales: 93 | dof_pos = 0.05 94 | dof_vel = 0.5 95 | ang_vel = 0.1 96 | lin_vel = 0.05 97 | quat = 0.03 98 | height_measurements = 0.1 99 | 100 | class init_state(LeggedRobotCfg.init_state): 101 | pos = [0.0, 0.0, 0.95] 102 | 103 | default_joint_angles = { # = target angles [rad] when action = 0.0 104 | 'left_leg_roll_joint': 0., 105 | 'left_leg_yaw_joint': 0., 106 | 'left_leg_pitch_joint': 0., 107 | 'left_knee_joint': 0., 108 | 'left_ankle_pitch_joint': 0., 109 | 'left_ankle_roll_joint': 0., 110 | 'right_leg_roll_joint': 0., 111 | 'right_leg_yaw_joint': 0., 112 | 'right_leg_pitch_joint': 0., 113 | 'right_knee_joint': 0., 114 | 'right_ankle_pitch_joint': 0., 115 | 'right_ankle_roll_joint': 0., 116 | } 117 | 118 | class control(LeggedRobotCfg.control): 119 | # PD Drive parameters: 120 | stiffness = {'leg_roll': 200.0, 'leg_pitch': 350.0, 'leg_yaw': 200.0, 121 | 'knee': 350.0, 'ankle': 15} 122 | damping = {'leg_roll': 10, 'leg_pitch': 10, 'leg_yaw': 123 | 10, 'knee': 10, 'ankle': 10} 124 | 125 | # action scale: target angle = actionScale * action + defaultAngle 126 | action_scale = 0.25 127 | # decimation: Number of control action updates @ sim DT per policy DT 128 | decimation = 10 # 100hz 129 | 130 | class sim(LeggedRobotCfg.sim): 131 | dt = 0.001 # 1000 Hz 132 | substeps = 1 133 | up_axis = 1 # 0 is y, 1 is z 134 | 135 | class physx(LeggedRobotCfg.sim.physx): 136 | num_threads = 10 137 | solver_type = 1 # 0: pgs, 1: tgs 138 | num_position_iterations = 4 139 | num_velocity_iterations = 1 140 | contact_offset = 0.01 # [m] 141 | rest_offset = 0.0 # [m] 142 | bounce_threshold_velocity = 0.1 # [m/s] 143 | max_depenetration_velocity = 1.0 144 | max_gpu_contact_pairs = 2**23 # 2**24 -> needed for 8000 envs and more 145 | default_buffer_size_multiplier = 5 146 | # 0: never, 1: last sub-step, 2: all sub-steps (default=2) 147 | contact_collection = 2 148 | 149 | class domain_rand: 150 | randomize_friction = True 151 | friction_range = [0.1, 2.0] 152 | randomize_base_mass = True 153 | added_mass_range = [-5., 5.] 154 | push_robots = True 155 | push_interval_s = 4 156 | max_push_vel_xy = 0.2 157 | max_push_ang_vel = 0.4 158 | # dynamic randomization 159 | action_delay = 0.5 160 | action_noise = 0.02 161 | 162 | class commands(LeggedRobotCfg.commands): 163 | # Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error) 164 | num_commands = 4 165 | resampling_time = 8. # time before command are changed[s] 166 | heading_command = True # if true: compute ang vel command from heading error 167 | 168 | class ranges: 169 | lin_vel_x = [-0.3, 0.6] # min max [m/s] 170 | lin_vel_y = [-0.3, 0.3] # min max [m/s] 171 | ang_vel_yaw = [-0.3, 0.3] # min max [rad/s] 172 | heading = [-3.14, 3.14] 173 | 174 | class rewards: 175 | base_height_target = 0.89 176 | min_dist = 0.2 177 | max_dist = 0.5 178 | # put some settings here for LLM parameter tuning 179 | target_joint_pos_scale = 0.17 # rad 180 | target_feet_height = 0.06 # m 181 | cycle_time = 0.64 # sec 182 | # if true negative total rewards are clipped at zero (avoids early termination problems) 183 | only_positive_rewards = True 184 | # tracking reward = exp(error*sigma) 185 | tracking_sigma = 5 186 | max_contact_force = 700 # Forces above this value are penalized 187 | 188 | class scales: 189 | # reference motion tracking 190 | joint_pos = 1.6 191 | feet_clearance = 1. 192 | feet_contact_number = 1.2 193 | # gait 194 | feet_air_time = 1. 195 | foot_slip = -0.05 196 | feet_distance = 0.2 197 | knee_distance = 0.2 198 | # contact 199 | feet_contact_forces = -0.01 200 | # vel tracking 201 | tracking_lin_vel = 1.2 202 | tracking_ang_vel = 1.1 203 | vel_mismatch_exp = 0.5 # lin_z; ang x,y 204 | low_speed = 0.2 205 | track_vel_hard = 0.5 206 | # base pos 207 | default_joint_pos = 0.5 208 | orientation = 1. 209 | base_height = 0.2 210 | base_acc = 0.2 211 | # energy 212 | action_smoothness = -0.002 213 | torques = -1e-5 214 | dof_vel = -5e-4 215 | dof_acc = -1e-7 216 | collision = -1. 217 | 218 | class normalization: 219 | class obs_scales: 220 | lin_vel = 2. 221 | ang_vel = 1. 222 | dof_pos = 1. 223 | dof_vel = 0.05 224 | quat = 1. 225 | height_measurements = 5.0 226 | clip_observations = 18. 227 | clip_actions = 18. 228 | 229 | 230 | class XBotLCfgPPO(LeggedRobotCfgPPO): 231 | seed = 5 232 | runner_class_name = 'OnPolicyRunner' # DWLOnPolicyRunner 233 | 234 | class policy: 235 | init_noise_std = 1.0 236 | actor_hidden_dims = [512, 256, 128] 237 | critic_hidden_dims = [768, 256, 128] 238 | 239 | class algorithm(LeggedRobotCfgPPO.algorithm): 240 | entropy_coef = 0.001 241 | learning_rate = 1e-5 242 | num_learning_epochs = 2 243 | gamma = 0.994 244 | lam = 0.9 245 | num_mini_batches = 4 246 | 247 | class runner: 248 | policy_class_name = 'ActorCritic' 249 | algorithm_class_name = 'PPO' 250 | num_steps_per_env = 60 # per iteration 251 | max_iterations = 3001 # number of policy updates 252 | 253 | # logging 254 | save_interval = 100 # Please check for potential savings every `save_interval` iterations. 255 | experiment_name = 'XBot_ppo' 256 | run_name = '' 257 | # Load and resume 258 | resume = False 259 | load_run = -1 # -1 = last run 260 | checkpoint = -1 # -1 = last saved model 261 | resume_path = None # updated from load_run and chkpt 262 | -------------------------------------------------------------------------------- /humanoid/envs/custom/humanoid_env.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions are met: 5 | # 6 | # 1. Redistributions of source code must retain the above copyright notice, this 7 | # list of conditions and the following disclaimer. 8 | # 9 | # 2. Redistributions in binary form must reproduce the above copyright notice, 10 | # this list of conditions and the following disclaimer in the documentation 11 | # and/or other materials provided with the distribution. 12 | # 13 | # 3. Neither the name of the copyright holder nor the names of its 14 | # contributors may be used to endorse or promote products derived from 15 | # this software without specific prior written permission. 16 | # 17 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 21 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 24 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 25 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | # 28 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. 29 | 30 | 31 | from humanoid.envs.base.legged_robot_config import LeggedRobotCfg 32 | 33 | from isaacgym.torch_utils import * 34 | from isaacgym import gymtorch, gymapi 35 | 36 | import torch 37 | from humanoid.envs import LeggedRobot 38 | 39 | from humanoid.utils.terrain import HumanoidTerrain 40 | 41 | 42 | class XBotLFreeEnv(LeggedRobot): 43 | ''' 44 | XBotLFreeEnv is a class that represents a custom environment for a legged robot. 45 | 46 | Args: 47 | cfg (LeggedRobotCfg): Configuration object for the legged robot. 48 | sim_params: Parameters for the simulation. 49 | physics_engine: Physics engine used in the simulation. 50 | sim_device: Device used for the simulation. 51 | headless: Flag indicating whether the simulation should be run in headless mode. 52 | 53 | Attributes: 54 | last_feet_z (float): The z-coordinate of the last feet position. 55 | feet_height (torch.Tensor): Tensor representing the height of the feet. 56 | sim (gymtorch.GymSim): The simulation object. 57 | terrain (HumanoidTerrain): The terrain object. 58 | up_axis_idx (int): The index representing the up axis. 59 | command_input (torch.Tensor): Tensor representing the command input. 60 | privileged_obs_buf (torch.Tensor): Tensor representing the privileged observations buffer. 61 | obs_buf (torch.Tensor): Tensor representing the observations buffer. 62 | obs_history (collections.deque): Deque containing the history of observations. 63 | critic_history (collections.deque): Deque containing the history of critic observations. 64 | 65 | Methods: 66 | _push_robots(): Randomly pushes the robots by setting a randomized base velocity. 67 | _get_phase(): Calculates the phase of the gait cycle. 68 | _get_gait_phase(): Calculates the gait phase. 69 | compute_ref_state(): Computes the reference state. 70 | create_sim(): Creates the simulation, terrain, and environments. 71 | _get_noise_scale_vec(cfg): Sets a vector used to scale the noise added to the observations. 72 | step(actions): Performs a simulation step with the given actions. 73 | compute_observations(): Computes the observations. 74 | reset_idx(env_ids): Resets the environment for the specified environment IDs. 75 | ''' 76 | def __init__(self, cfg: LeggedRobotCfg, sim_params, physics_engine, sim_device, headless): 77 | super().__init__(cfg, sim_params, physics_engine, sim_device, headless) 78 | self.last_feet_z = 0.05 79 | self.feet_height = torch.zeros((self.num_envs, 2), device=self.device) 80 | self.reset_idx(torch.tensor(range(self.num_envs), device=self.device)) 81 | self.compute_observations() 82 | 83 | def _push_robots(self): 84 | """ Random pushes the robots. Emulates an impulse by setting a randomized base velocity. 85 | """ 86 | max_vel = self.cfg.domain_rand.max_push_vel_xy 87 | max_push_angular = self.cfg.domain_rand.max_push_ang_vel 88 | self.rand_push_force[:, :2] = torch_rand_float( 89 | -max_vel, max_vel, (self.num_envs, 2), device=self.device) # lin vel x/y 90 | self.root_states[:, 7:9] = self.rand_push_force[:, :2] 91 | 92 | self.rand_push_torque = torch_rand_float( 93 | -max_push_angular, max_push_angular, (self.num_envs, 3), device=self.device) 94 | 95 | self.root_states[:, 10:13] = self.rand_push_torque 96 | 97 | self.gym.set_actor_root_state_tensor( 98 | self.sim, gymtorch.unwrap_tensor(self.root_states)) 99 | 100 | def _get_phase(self): 101 | cycle_time = self.cfg.rewards.cycle_time 102 | phase = self.episode_length_buf * self.dt / cycle_time 103 | return phase 104 | 105 | def _get_gait_phase(self): 106 | # return float mask 1 is stance, 0 is swing 107 | phase = self._get_phase() 108 | sin_pos = torch.sin(2 * torch.pi * phase) 109 | # Add double support phase 110 | stance_mask = torch.zeros((self.num_envs, 2), device=self.device) 111 | # left foot stance 112 | stance_mask[:, 0] = sin_pos >= 0 113 | # right foot stance 114 | stance_mask[:, 1] = sin_pos < 0 115 | # Double support phase 116 | stance_mask[torch.abs(sin_pos) < 0.1] = 1 117 | 118 | return stance_mask 119 | 120 | 121 | def compute_ref_state(self): 122 | phase = self._get_phase() 123 | sin_pos = torch.sin(2 * torch.pi * phase) 124 | sin_pos_l = sin_pos.clone() 125 | sin_pos_r = sin_pos.clone() 126 | self.ref_dof_pos = torch.zeros_like(self.dof_pos) 127 | scale_1 = self.cfg.rewards.target_joint_pos_scale 128 | scale_2 = 2 * scale_1 129 | # left foot stance phase set to default joint pos 130 | sin_pos_l[sin_pos_l > 0] = 0 131 | self.ref_dof_pos[:, 2] = sin_pos_l * scale_1 132 | self.ref_dof_pos[:, 3] = sin_pos_l * scale_2 133 | self.ref_dof_pos[:, 4] = sin_pos_l * scale_1 134 | # right foot stance phase set to default joint pos 135 | sin_pos_r[sin_pos_r < 0] = 0 136 | self.ref_dof_pos[:, 8] = sin_pos_r * scale_1 137 | self.ref_dof_pos[:, 9] = sin_pos_r * scale_2 138 | self.ref_dof_pos[:, 10] = sin_pos_r * scale_1 139 | # Double support phase 140 | self.ref_dof_pos[torch.abs(sin_pos) < 0.1] = 0 141 | 142 | self.ref_action = 2 * self.ref_dof_pos 143 | 144 | 145 | def create_sim(self): 146 | """ Creates simulation, terrain and evironments 147 | """ 148 | self.up_axis_idx = 2 # 2 for z, 1 for y -> adapt gravity accordingly 149 | self.sim = self.gym.create_sim( 150 | self.sim_device_id, self.graphics_device_id, self.physics_engine, self.sim_params) 151 | mesh_type = self.cfg.terrain.mesh_type 152 | if mesh_type in ['heightfield', 'trimesh']: 153 | self.terrain = HumanoidTerrain(self.cfg.terrain, self.num_envs) 154 | if mesh_type == 'plane': 155 | self._create_ground_plane() 156 | elif mesh_type == 'heightfield': 157 | self._create_heightfield() 158 | elif mesh_type == 'trimesh': 159 | self._create_trimesh() 160 | elif mesh_type is not None: 161 | raise ValueError( 162 | "Terrain mesh type not recognised. Allowed types are [None, plane, heightfield, trimesh]") 163 | self._create_envs() 164 | 165 | 166 | def _get_noise_scale_vec(self, cfg): 167 | """ Sets a vector used to scale the noise added to the observations. 168 | [NOTE]: Must be adapted when changing the observations structure 169 | 170 | Args: 171 | cfg (Dict): Environment config file 172 | 173 | Returns: 174 | [torch.Tensor]: Vector of scales used to multiply a uniform distribution in [-1, 1] 175 | """ 176 | noise_vec = torch.zeros( 177 | self.cfg.env.num_single_obs, device=self.device) 178 | self.add_noise = self.cfg.noise.add_noise 179 | noise_scales = self.cfg.noise.noise_scales 180 | noise_vec[0: 5] = 0. # commands 181 | noise_vec[5: 17] = noise_scales.dof_pos * self.obs_scales.dof_pos 182 | noise_vec[17: 29] = noise_scales.dof_vel * self.obs_scales.dof_vel 183 | noise_vec[29: 41] = 0. # previous actions 184 | noise_vec[41: 44] = noise_scales.ang_vel * self.obs_scales.ang_vel # ang vel 185 | noise_vec[44: 47] = noise_scales.quat * self.obs_scales.quat # euler x,y 186 | return noise_vec 187 | 188 | 189 | def step(self, actions): 190 | if self.cfg.env.use_ref_actions: 191 | actions += self.ref_action 192 | actions = torch.clip(actions, -self.cfg.normalization.clip_actions, self.cfg.normalization.clip_actions) 193 | # dynamic randomization 194 | delay = torch.rand((self.num_envs, 1), device=self.device) * self.cfg.domain_rand.action_delay 195 | actions = (1 - delay) * actions + delay * self.actions 196 | actions += self.cfg.domain_rand.action_noise * torch.randn_like(actions) * actions 197 | return super().step(actions) 198 | 199 | 200 | def compute_observations(self): 201 | 202 | phase = self._get_phase() 203 | self.compute_ref_state() 204 | 205 | sin_pos = torch.sin(2 * torch.pi * phase).unsqueeze(1) 206 | cos_pos = torch.cos(2 * torch.pi * phase).unsqueeze(1) 207 | 208 | stance_mask = self._get_gait_phase() 209 | contact_mask = self.contact_forces[:, self.feet_indices, 2] > 5. 210 | 211 | self.command_input = torch.cat( 212 | (sin_pos, cos_pos, self.commands[:, :3] * self.commands_scale), dim=1) 213 | 214 | q = (self.dof_pos - self.default_dof_pos) * self.obs_scales.dof_pos 215 | dq = self.dof_vel * self.obs_scales.dof_vel 216 | 217 | diff = self.dof_pos - self.ref_dof_pos 218 | 219 | self.privileged_obs_buf = torch.cat(( 220 | self.command_input, # 2 + 3 221 | (self.dof_pos - self.default_joint_pd_target) * \ 222 | self.obs_scales.dof_pos, # 12 223 | self.dof_vel * self.obs_scales.dof_vel, # 12 224 | self.actions, # 12 225 | diff, # 12 226 | self.base_lin_vel * self.obs_scales.lin_vel, # 3 227 | self.base_ang_vel * self.obs_scales.ang_vel, # 3 228 | self.base_euler_xyz * self.obs_scales.quat, # 3 229 | self.rand_push_force[:, :2], # 2 230 | self.rand_push_torque, # 3 231 | self.env_frictions, # 1 232 | self.body_mass / 30., # 1 233 | stance_mask, # 2 234 | contact_mask, # 2 235 | ), dim=-1) 236 | 237 | obs_buf = torch.cat(( 238 | self.command_input, # 5 = 2D(sin cos) + 3D(vel_x, vel_y, aug_vel_yaw) 239 | q, # 12D 240 | dq, # 12D 241 | self.actions, # 12D 242 | self.base_ang_vel * self.obs_scales.ang_vel, # 3 243 | self.base_euler_xyz * self.obs_scales.quat, # 3 244 | ), dim=-1) 245 | 246 | if self.cfg.terrain.measure_heights: 247 | heights = torch.clip(self.root_states[:, 2].unsqueeze(1) - 0.5 - self.measured_heights, -1, 1.) * self.obs_scales.height_measurements 248 | self.privileged_obs_buf = torch.cat((self.obs_buf, heights), dim=-1) 249 | 250 | if self.add_noise: 251 | obs_now = obs_buf.clone() + torch.randn_like(obs_buf) * self.noise_scale_vec * self.cfg.noise.noise_level 252 | else: 253 | obs_now = obs_buf.clone() 254 | self.obs_history.append(obs_now) 255 | self.critic_history.append(self.privileged_obs_buf) 256 | 257 | 258 | obs_buf_all = torch.stack([self.obs_history[i] 259 | for i in range(self.obs_history.maxlen)], dim=1) # N,T,K 260 | 261 | self.obs_buf = obs_buf_all.reshape(self.num_envs, -1) # N, T*K 262 | self.privileged_obs_buf = torch.cat([self.critic_history[i] for i in range(self.cfg.env.c_frame_stack)], dim=1) 263 | 264 | def reset_idx(self, env_ids): 265 | super().reset_idx(env_ids) 266 | for i in range(self.obs_history.maxlen): 267 | self.obs_history[i][env_ids] *= 0 268 | for i in range(self.critic_history.maxlen): 269 | self.critic_history[i][env_ids] *= 0 270 | 271 | # ================================================ Rewards ================================================== # 272 | def _reward_joint_pos(self): 273 | """ 274 | Calculates the reward based on the difference between the current joint positions and the target joint positions. 275 | """ 276 | joint_pos = self.dof_pos.clone() 277 | pos_target = self.ref_dof_pos.clone() 278 | diff = joint_pos - pos_target 279 | r = torch.exp(-2 * torch.norm(diff, dim=1)) - 0.2 * torch.norm(diff, dim=1).clamp(0, 0.5) 280 | return r 281 | 282 | def _reward_feet_distance(self): 283 | """ 284 | Calculates the reward based on the distance between the feet. Penalize feet get close to each other or too far away. 285 | """ 286 | foot_pos = self.rigid_state[:, self.feet_indices, :2] 287 | foot_dist = torch.norm(foot_pos[:, 0, :] - foot_pos[:, 1, :], dim=1) 288 | fd = self.cfg.rewards.min_dist 289 | max_df = self.cfg.rewards.max_dist 290 | d_min = torch.clamp(foot_dist - fd, -0.5, 0.) 291 | d_max = torch.clamp(foot_dist - max_df, 0, 0.5) 292 | return (torch.exp(-torch.abs(d_min) * 100) + torch.exp(-torch.abs(d_max) * 100)) / 2 293 | 294 | 295 | def _reward_knee_distance(self): 296 | """ 297 | Calculates the reward based on the distance between the knee of the humanoid. 298 | """ 299 | foot_pos = self.rigid_state[:, self.knee_indices, :2] 300 | foot_dist = torch.norm(foot_pos[:, 0, :] - foot_pos[:, 1, :], dim=1) 301 | fd = self.cfg.rewards.min_dist 302 | max_df = self.cfg.rewards.max_dist / 2 303 | d_min = torch.clamp(foot_dist - fd, -0.5, 0.) 304 | d_max = torch.clamp(foot_dist - max_df, 0, 0.5) 305 | return (torch.exp(-torch.abs(d_min) * 100) + torch.exp(-torch.abs(d_max) * 100)) / 2 306 | 307 | 308 | def _reward_foot_slip(self): 309 | """ 310 | Calculates the reward for minimizing foot slip. The reward is based on the contact forces 311 | and the speed of the feet. A contact threshold is used to determine if the foot is in contact 312 | with the ground. The speed of the foot is calculated and scaled by the contact condition. 313 | """ 314 | contact = self.contact_forces[:, self.feet_indices, 2] > 5. 315 | foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 7:9], dim=2) 316 | rew = torch.sqrt(foot_speed_norm) 317 | rew *= contact 318 | return torch.sum(rew, dim=1) 319 | 320 | def _reward_feet_air_time(self): 321 | """ 322 | Calculates the reward for feet air time, promoting longer steps. This is achieved by 323 | checking the first contact with the ground after being in the air. The air time is 324 | limited to a maximum value for reward calculation. 325 | """ 326 | contact = self.contact_forces[:, self.feet_indices, 2] > 5. 327 | stance_mask = self._get_gait_phase() 328 | self.contact_filt = torch.logical_or(torch.logical_or(contact, stance_mask), self.last_contacts) 329 | self.last_contacts = contact 330 | first_contact = (self.feet_air_time > 0.) * self.contact_filt 331 | self.feet_air_time += self.dt 332 | air_time = self.feet_air_time.clamp(0, 0.5) * first_contact 333 | self.feet_air_time *= ~self.contact_filt 334 | return air_time.sum(dim=1) 335 | 336 | def _reward_feet_contact_number(self): 337 | """ 338 | Calculates a reward based on the number of feet contacts aligning with the gait phase. 339 | Rewards or penalizes depending on whether the foot contact matches the expected gait phase. 340 | """ 341 | contact = self.contact_forces[:, self.feet_indices, 2] > 5. 342 | stance_mask = self._get_gait_phase() 343 | reward = torch.where(contact == stance_mask, 1.0, -0.3) 344 | return torch.mean(reward, dim=1) 345 | 346 | def _reward_orientation(self): 347 | """ 348 | Calculates the reward for maintaining a flat base orientation. It penalizes deviation 349 | from the desired base orientation using the base euler angles and the projected gravity vector. 350 | """ 351 | quat_mismatch = torch.exp(-torch.sum(torch.abs(self.base_euler_xyz[:, :2]), dim=1) * 10) 352 | orientation = torch.exp(-torch.norm(self.projected_gravity[:, :2], dim=1) * 20) 353 | return (quat_mismatch + orientation) / 2. 354 | 355 | def _reward_feet_contact_forces(self): 356 | """ 357 | Calculates the reward for keeping contact forces within a specified range. Penalizes 358 | high contact forces on the feet. 359 | """ 360 | return torch.sum((torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1) - self.cfg.rewards.max_contact_force).clip(0, 400), dim=1) 361 | 362 | def _reward_default_joint_pos(self): 363 | """ 364 | Calculates the reward for keeping joint positions close to default positions, with a focus 365 | on penalizing deviation in yaw and roll directions. Excludes yaw and roll from the main penalty. 366 | """ 367 | joint_diff = self.dof_pos - self.default_joint_pd_target 368 | left_yaw_roll = joint_diff[:, :2] 369 | right_yaw_roll = joint_diff[:, 6: 8] 370 | yaw_roll = torch.norm(left_yaw_roll, dim=1) + torch.norm(right_yaw_roll, dim=1) 371 | yaw_roll = torch.clamp(yaw_roll - 0.1, 0, 50) 372 | return torch.exp(-yaw_roll * 100) - 0.01 * torch.norm(joint_diff, dim=1) 373 | 374 | def _reward_base_height(self): 375 | """ 376 | Calculates the reward based on the robot's base height. Penalizes deviation from a target base height. 377 | The reward is computed based on the height difference between the robot's base and the average height 378 | of its feet when they are in contact with the ground. 379 | """ 380 | stance_mask = self._get_gait_phase() 381 | measured_heights = torch.sum( 382 | self.rigid_state[:, self.feet_indices, 2] * stance_mask, dim=1) / torch.sum(stance_mask, dim=1) 383 | base_height = self.root_states[:, 2] - (measured_heights - 0.05) 384 | return torch.exp(-torch.abs(base_height - self.cfg.rewards.base_height_target) * 100) 385 | 386 | def _reward_base_acc(self): 387 | """ 388 | Computes the reward based on the base's acceleration. Penalizes high accelerations of the robot's base, 389 | encouraging smoother motion. 390 | """ 391 | root_acc = self.last_root_vel - self.root_states[:, 7:13] 392 | rew = torch.exp(-torch.norm(root_acc, dim=1) * 3) 393 | return rew 394 | 395 | 396 | def _reward_vel_mismatch_exp(self): 397 | """ 398 | Computes a reward based on the mismatch in the robot's linear and angular velocities. 399 | Encourages the robot to maintain a stable velocity by penalizing large deviations. 400 | """ 401 | lin_mismatch = torch.exp(-torch.square(self.base_lin_vel[:, 2]) * 10) 402 | ang_mismatch = torch.exp(-torch.norm(self.base_ang_vel[:, :2], dim=1) * 5.) 403 | 404 | c_update = (lin_mismatch + ang_mismatch) / 2. 405 | 406 | return c_update 407 | 408 | def _reward_track_vel_hard(self): 409 | """ 410 | Calculates a reward for accurately tracking both linear and angular velocity commands. 411 | Penalizes deviations from specified linear and angular velocity targets. 412 | """ 413 | # Tracking of linear velocity commands (xy axes) 414 | lin_vel_error = torch.norm( 415 | self.commands[:, :2] - self.base_lin_vel[:, :2], dim=1) 416 | lin_vel_error_exp = torch.exp(-lin_vel_error * 10) 417 | 418 | # Tracking of angular velocity commands (yaw) 419 | ang_vel_error = torch.abs( 420 | self.commands[:, 2] - self.base_ang_vel[:, 2]) 421 | ang_vel_error_exp = torch.exp(-ang_vel_error * 10) 422 | 423 | linear_error = 0.2 * (lin_vel_error + ang_vel_error) 424 | 425 | return (lin_vel_error_exp + ang_vel_error_exp) / 2. - linear_error 426 | 427 | def _reward_tracking_lin_vel(self): 428 | """ 429 | Tracks linear velocity commands along the xy axes. 430 | Calculates a reward based on how closely the robot's linear velocity matches the commanded values. 431 | """ 432 | lin_vel_error = torch.sum(torch.square( 433 | self.commands[:, :2] - self.base_lin_vel[:, :2]), dim=1) 434 | return torch.exp(-lin_vel_error * self.cfg.rewards.tracking_sigma) 435 | 436 | def _reward_tracking_ang_vel(self): 437 | """ 438 | Tracks angular velocity commands for yaw rotation. 439 | Computes a reward based on how closely the robot's angular velocity matches the commanded yaw values. 440 | """ 441 | 442 | ang_vel_error = torch.square( 443 | self.commands[:, 2] - self.base_ang_vel[:, 2]) 444 | return torch.exp(-ang_vel_error * self.cfg.rewards.tracking_sigma) 445 | 446 | def _reward_feet_clearance(self): 447 | """ 448 | Calculates reward based on the clearance of the swing leg from the ground during movement. 449 | Encourages appropriate lift of the feet during the swing phase of the gait. 450 | """ 451 | # Compute feet contact mask 452 | contact = self.contact_forces[:, self.feet_indices, 2] > 5. 453 | 454 | # Get the z-position of the feet and compute the change in z-position 455 | feet_z = self.rigid_state[:, self.feet_indices, 2] - 0.05 456 | delta_z = feet_z - self.last_feet_z 457 | self.feet_height += delta_z 458 | self.last_feet_z = feet_z 459 | 460 | # Compute swing mask 461 | swing_mask = 1 - self._get_gait_phase() 462 | 463 | # feet height should be closed to target feet height at the peak 464 | rew_pos = torch.abs(self.feet_height - self.cfg.rewards.target_feet_height) < 0.01 465 | rew_pos = torch.sum(rew_pos * swing_mask, dim=1) 466 | self.feet_height *= ~contact 467 | return rew_pos 468 | 469 | def _reward_low_speed(self): 470 | """ 471 | Rewards or penalizes the robot based on its speed relative to the commanded speed. 472 | This function checks if the robot is moving too slow, too fast, or at the desired speed, 473 | and if the movement direction matches the command. 474 | """ 475 | # Calculate the absolute value of speed and command for comparison 476 | absolute_speed = torch.abs(self.base_lin_vel[:, 0]) 477 | absolute_command = torch.abs(self.commands[:, 0]) 478 | 479 | # Define speed criteria for desired range 480 | speed_too_low = absolute_speed < 0.5 * absolute_command 481 | speed_too_high = absolute_speed > 1.2 * absolute_command 482 | speed_desired = ~(speed_too_low | speed_too_high) 483 | 484 | # Check if the speed and command directions are mismatched 485 | sign_mismatch = torch.sign( 486 | self.base_lin_vel[:, 0]) != torch.sign(self.commands[:, 0]) 487 | 488 | # Initialize reward tensor 489 | reward = torch.zeros_like(self.base_lin_vel[:, 0]) 490 | 491 | # Assign rewards based on conditions 492 | # Speed too low 493 | reward[speed_too_low] = -1.0 494 | # Speed too high 495 | reward[speed_too_high] = 0. 496 | # Speed within desired range 497 | reward[speed_desired] = 1.2 498 | # Sign mismatch has the highest priority 499 | reward[sign_mismatch] = -2.0 500 | return reward * (self.commands[:, 0].abs() > 0.1) 501 | 502 | def _reward_torques(self): 503 | """ 504 | Penalizes the use of high torques in the robot's joints. Encourages efficient movement by minimizing 505 | the necessary force exerted by the motors. 506 | """ 507 | return torch.sum(torch.square(self.torques), dim=1) 508 | 509 | def _reward_dof_vel(self): 510 | """ 511 | Penalizes high velocities at the degrees of freedom (DOF) of the robot. This encourages smoother and 512 | more controlled movements. 513 | """ 514 | return torch.sum(torch.square(self.dof_vel), dim=1) 515 | 516 | def _reward_dof_acc(self): 517 | """ 518 | Penalizes high accelerations at the robot's degrees of freedom (DOF). This is important for ensuring 519 | smooth and stable motion, reducing wear on the robot's mechanical parts. 520 | """ 521 | return torch.sum(torch.square((self.last_dof_vel - self.dof_vel) / self.dt), dim=1) 522 | 523 | def _reward_collision(self): 524 | """ 525 | Penalizes collisions of the robot with the environment, specifically focusing on selected body parts. 526 | This encourages the robot to avoid undesired contact with objects or surfaces. 527 | """ 528 | return torch.sum(1.*(torch.norm(self.contact_forces[:, self.penalised_contact_indices, :], dim=-1) > 0.1), dim=1) 529 | 530 | def _reward_action_smoothness(self): 531 | """ 532 | Encourages smoothness in the robot's actions by penalizing large differences between consecutive actions. 533 | This is important for achieving fluid motion and reducing mechanical stress. 534 | """ 535 | term_1 = torch.sum(torch.square( 536 | self.last_actions - self.actions), dim=1) 537 | term_2 = torch.sum(torch.square( 538 | self.actions + self.last_last_actions - 2 * self.last_actions), dim=1) 539 | term_3 = 0.05 * torch.sum(torch.abs(self.actions), dim=1) 540 | return term_1 + term_2 + term_3 541 | -------------------------------------------------------------------------------- /humanoid/scripts/play.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | # 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. 31 | 32 | import os 33 | import cv2 34 | import numpy as np 35 | from isaacgym import gymapi 36 | from humanoid import LEGGED_GYM_ROOT_DIR 37 | 38 | # import isaacgym 39 | from humanoid.envs import * 40 | from humanoid.utils import get_args, export_policy_as_jit, task_registry, Logger 41 | from isaacgym.torch_utils import * 42 | 43 | import torch 44 | from tqdm import tqdm 45 | from datetime import datetime 46 | 47 | 48 | def play(args): 49 | env_cfg, train_cfg = task_registry.get_cfgs(name=args.task) 50 | # override some parameters for testing 51 | env_cfg.env.num_envs = min(env_cfg.env.num_envs, 1) 52 | env_cfg.sim.max_gpu_contact_pairs = 2**10 53 | # env_cfg.terrain.mesh_type = 'trimesh' 54 | env_cfg.terrain.mesh_type = 'plane' 55 | env_cfg.terrain.num_rows = 5 56 | env_cfg.terrain.num_cols = 5 57 | env_cfg.terrain.curriculum = False 58 | env_cfg.terrain.max_init_terrain_level = 5 59 | env_cfg.noise.add_noise = True 60 | env_cfg.domain_rand.push_robots = False 61 | env_cfg.domain_rand.joint_angle_noise = 0. 62 | env_cfg.noise.curriculum = False 63 | env_cfg.noise.noise_level = 0.5 64 | 65 | 66 | train_cfg.seed = 123145 67 | print("train_cfg.runner_class_name:", train_cfg.runner_class_name) 68 | 69 | # prepare environment 70 | env, _ = task_registry.make_env(name=args.task, args=args, env_cfg=env_cfg) 71 | env.set_camera(env_cfg.viewer.pos, env_cfg.viewer.lookat) 72 | 73 | obs = env.get_observations() 74 | 75 | # load policy 76 | train_cfg.runner.resume = True 77 | ppo_runner, train_cfg = task_registry.make_alg_runner(env=env, name=args.task, args=args, train_cfg=train_cfg) 78 | policy = ppo_runner.get_inference_policy(device=env.device) 79 | 80 | # export policy as a jit module (used to run it from C++) 81 | if EXPORT_POLICY: 82 | path = os.path.join(LEGGED_GYM_ROOT_DIR, 'logs', train_cfg.runner.experiment_name, 'exported', 'policies') 83 | export_policy_as_jit(ppo_runner.alg.actor_critic, path) 84 | print('Exported policy as jit script to: ', path) 85 | 86 | logger = Logger(env.dt) 87 | robot_index = 0 # which robot is used for logging 88 | joint_index = 1 # which joint is used for logging 89 | stop_state_log = 1200 # number of steps before plotting states 90 | if RENDER: 91 | camera_properties = gymapi.CameraProperties() 92 | camera_properties.width = 1920 93 | camera_properties.height = 1080 94 | h1 = env.gym.create_camera_sensor(env.envs[0], camera_properties) 95 | camera_offset = gymapi.Vec3(1, -1, 0.5) 96 | camera_rotation = gymapi.Quat.from_axis_angle(gymapi.Vec3(-0.3, 0.2, 1), 97 | np.deg2rad(135)) 98 | actor_handle = env.gym.get_actor_handle(env.envs[0], 0) 99 | body_handle = env.gym.get_actor_rigid_body_handle(env.envs[0], actor_handle, 0) 100 | env.gym.attach_camera_to_body( 101 | h1, env.envs[0], body_handle, 102 | gymapi.Transform(camera_offset, camera_rotation), 103 | gymapi.FOLLOW_POSITION) 104 | 105 | fourcc = cv2.VideoWriter_fourcc(*"mp4v") 106 | video_dir = os.path.join(LEGGED_GYM_ROOT_DIR, 'videos') 107 | experiment_dir = os.path.join(LEGGED_GYM_ROOT_DIR, 'videos', train_cfg.runner.experiment_name) 108 | dir = os.path.join(experiment_dir, datetime.now().strftime('%b%d_%H-%M-%S')+ args.run_name + '.mp4') 109 | if not os.path.exists(video_dir): 110 | os.mkdir(video_dir) 111 | if not os.path.exists(experiment_dir): 112 | os.mkdir(experiment_dir) 113 | video = cv2.VideoWriter(dir, fourcc, 50.0, (1920, 1080)) 114 | 115 | for i in tqdm(range(stop_state_log)): 116 | 117 | actions = policy(obs.detach()) # * 0. 118 | 119 | if FIX_COMMAND: 120 | env.commands[:, 0] = 0.5 # 1.0 121 | env.commands[:, 1] = 0. 122 | env.commands[:, 2] = 0. 123 | env.commands[:, 3] = 0. 124 | 125 | obs, critic_obs, rews, dones, infos = env.step(actions.detach()) 126 | 127 | if RENDER: 128 | env.gym.fetch_results(env.sim, True) 129 | env.gym.step_graphics(env.sim) 130 | env.gym.render_all_camera_sensors(env.sim) 131 | img = env.gym.get_camera_image(env.sim, env.envs[0], h1, gymapi.IMAGE_COLOR) 132 | img = np.reshape(img, (1080, 1920, 4)) 133 | img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) 134 | video.write(img[..., :3]) 135 | 136 | logger.log_states( 137 | { 138 | 'dof_pos_target': actions[robot_index, joint_index].item() * env.cfg.control.action_scale, 139 | 'dof_pos': env.dof_pos[robot_index, joint_index].item(), 140 | 'dof_vel': env.dof_vel[robot_index, joint_index].item(), 141 | 'dof_torque': env.torques[robot_index, joint_index].item(), 142 | 'command_x': env.commands[robot_index, 0].item(), 143 | 'command_y': env.commands[robot_index, 1].item(), 144 | 'command_yaw': env.commands[robot_index, 2].item(), 145 | 'base_vel_x': env.base_lin_vel[robot_index, 0].item(), 146 | 'base_vel_y': env.base_lin_vel[robot_index, 1].item(), 147 | 'base_vel_z': env.base_lin_vel[robot_index, 2].item(), 148 | 'base_vel_yaw': env.base_ang_vel[robot_index, 2].item(), 149 | 'contact_forces_z': env.contact_forces[robot_index, env.feet_indices, 2].cpu().numpy() 150 | } 151 | ) 152 | # ====================== Log states ====================== 153 | if infos["episode"]: 154 | num_episodes = torch.sum(env.reset_buf).item() 155 | if num_episodes>0: 156 | logger.log_rewards(infos["episode"], num_episodes) 157 | 158 | logger.print_rewards() 159 | logger.plot_states() 160 | 161 | if RENDER: 162 | video.release() 163 | 164 | if __name__ == '__main__': 165 | EXPORT_POLICY = True 166 | RENDER = True 167 | FIX_COMMAND = True 168 | args = get_args() 169 | play(args) 170 | -------------------------------------------------------------------------------- /humanoid/scripts/sim2sim.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions are met: 5 | # 6 | # 1. Redistributions of source code must retain the above copyright notice, this 7 | # list of conditions and the following disclaimer. 8 | # 9 | # 2. Redistributions in binary form must reproduce the above copyright notice, 10 | # this list of conditions and the following disclaimer in the documentation 11 | # and/or other materials provided with the distribution. 12 | # 13 | # 3. Neither the name of the copyright holder nor the names of its 14 | # contributors may be used to endorse or promote products derived from 15 | # this software without specific prior written permission. 16 | # 17 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 21 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 24 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 25 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | # 28 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. 29 | 30 | 31 | import math 32 | import numpy as np 33 | import mujoco, mujoco_viewer 34 | from tqdm import tqdm 35 | from collections import deque 36 | from scipy.spatial.transform import Rotation as R 37 | from humanoid import LEGGED_GYM_ROOT_DIR 38 | from humanoid.envs import XBotLCfg 39 | import torch 40 | 41 | 42 | class cmd: 43 | vx = 0.4 44 | vy = 0.0 45 | dyaw = 0.0 46 | 47 | 48 | def quaternion_to_euler_array(quat): 49 | # Ensure quaternion is in the correct format [x, y, z, w] 50 | x, y, z, w = quat 51 | 52 | # Roll (x-axis rotation) 53 | t0 = +2.0 * (w * x + y * z) 54 | t1 = +1.0 - 2.0 * (x * x + y * y) 55 | roll_x = np.arctan2(t0, t1) 56 | 57 | # Pitch (y-axis rotation) 58 | t2 = +2.0 * (w * y - z * x) 59 | t2 = np.clip(t2, -1.0, 1.0) 60 | pitch_y = np.arcsin(t2) 61 | 62 | # Yaw (z-axis rotation) 63 | t3 = +2.0 * (w * z + x * y) 64 | t4 = +1.0 - 2.0 * (y * y + z * z) 65 | yaw_z = np.arctan2(t3, t4) 66 | 67 | # Returns roll, pitch, yaw in a NumPy array in radians 68 | return np.array([roll_x, pitch_y, yaw_z]) 69 | 70 | def get_obs(data): 71 | '''Extracts an observation from the mujoco data structure 72 | ''' 73 | q = data.qpos.astype(np.double) 74 | dq = data.qvel.astype(np.double) 75 | quat = data.sensor('orientation').data[[1, 2, 3, 0]].astype(np.double) 76 | r = R.from_quat(quat) 77 | v = r.apply(data.qvel[:3], inverse=True).astype(np.double) # In the base frame 78 | omega = data.sensor('angular-velocity').data.astype(np.double) 79 | gvec = r.apply(np.array([0., 0., -1.]), inverse=True).astype(np.double) 80 | return (q, dq, quat, v, omega, gvec) 81 | 82 | def pd_control(target_q, q, kp, target_dq, dq, kd): 83 | '''Calculates torques from position commands 84 | ''' 85 | return (target_q - q) * kp + (target_dq - dq) * kd 86 | 87 | def run_mujoco(policy, cfg): 88 | """ 89 | Run the Mujoco simulation using the provided policy and configuration. 90 | 91 | Args: 92 | policy: The policy used for controlling the simulation. 93 | cfg: The configuration object containing simulation settings. 94 | 95 | Returns: 96 | None 97 | """ 98 | model = mujoco.MjModel.from_xml_path(cfg.sim_config.mujoco_model_path) 99 | model.opt.timestep = cfg.sim_config.dt 100 | data = mujoco.MjData(model) 101 | mujoco.mj_step(model, data) 102 | viewer = mujoco_viewer.MujocoViewer(model, data) 103 | 104 | target_q = np.zeros((cfg.env.num_actions), dtype=np.double) 105 | action = np.zeros((cfg.env.num_actions), dtype=np.double) 106 | 107 | hist_obs = deque() 108 | for _ in range(cfg.env.frame_stack): 109 | hist_obs.append(np.zeros([1, cfg.env.num_single_obs], dtype=np.double)) 110 | 111 | count_lowlevel = 0 112 | 113 | 114 | for _ in tqdm(range(int(cfg.sim_config.sim_duration / cfg.sim_config.dt)), desc="Simulating..."): 115 | 116 | # Obtain an observation 117 | q, dq, quat, v, omega, gvec = get_obs(data) 118 | q = q[-cfg.env.num_actions:] 119 | dq = dq[-cfg.env.num_actions:] 120 | 121 | # 1000hz -> 100hz 122 | if count_lowlevel % cfg.sim_config.decimation == 0: 123 | 124 | obs = np.zeros([1, cfg.env.num_single_obs], dtype=np.float32) 125 | eu_ang = quaternion_to_euler_array(quat) 126 | eu_ang[eu_ang > math.pi] -= 2 * math.pi 127 | 128 | obs[0, 0] = math.sin(2 * math.pi * count_lowlevel * cfg.sim_config.dt / 0.64) 129 | obs[0, 1] = math.cos(2 * math.pi * count_lowlevel * cfg.sim_config.dt / 0.64) 130 | obs[0, 2] = cmd.vx * cfg.normalization.obs_scales.lin_vel 131 | obs[0, 3] = cmd.vy * cfg.normalization.obs_scales.lin_vel 132 | obs[0, 4] = cmd.dyaw * cfg.normalization.obs_scales.ang_vel 133 | obs[0, 5:17] = q * cfg.normalization.obs_scales.dof_pos 134 | obs[0, 17:29] = dq * cfg.normalization.obs_scales.dof_vel 135 | obs[0, 29:41] = action 136 | obs[0, 41:44] = omega 137 | obs[0, 44:47] = eu_ang 138 | 139 | obs = np.clip(obs, -cfg.normalization.clip_observations, cfg.normalization.clip_observations) 140 | 141 | hist_obs.append(obs) 142 | hist_obs.popleft() 143 | 144 | policy_input = np.zeros([1, cfg.env.num_observations], dtype=np.float32) 145 | for i in range(cfg.env.frame_stack): 146 | policy_input[0, i * cfg.env.num_single_obs : (i + 1) * cfg.env.num_single_obs] = hist_obs[i][0, :] 147 | action[:] = policy(torch.tensor(policy_input))[0].detach().numpy() 148 | action = np.clip(action, -cfg.normalization.clip_actions, cfg.normalization.clip_actions) 149 | 150 | target_q = action * cfg.control.action_scale 151 | 152 | 153 | target_dq = np.zeros((cfg.env.num_actions), dtype=np.double) 154 | # Generate PD control 155 | tau = pd_control(target_q, q, cfg.robot_config.kps, 156 | target_dq, dq, cfg.robot_config.kds) # Calc torques 157 | tau = np.clip(tau, -cfg.robot_config.tau_limit, cfg.robot_config.tau_limit) # Clamp torques 158 | data.ctrl = tau 159 | 160 | mujoco.mj_step(model, data) 161 | viewer.render() 162 | count_lowlevel += 1 163 | 164 | viewer.close() 165 | 166 | 167 | if __name__ == '__main__': 168 | import argparse 169 | 170 | parser = argparse.ArgumentParser(description='Deployment script.') 171 | parser.add_argument('--load_model', type=str, required=True, 172 | help='Run to load from.') 173 | parser.add_argument('--terrain', action='store_true', help='terrain or plane') 174 | args = parser.parse_args() 175 | 176 | class Sim2simCfg(XBotLCfg): 177 | 178 | class sim_config: 179 | if args.terrain: 180 | mujoco_model_path = f'{LEGGED_GYM_ROOT_DIR}/resources/robots/XBot/mjcf/XBot-L-terrain.xml' 181 | else: 182 | mujoco_model_path = f'{LEGGED_GYM_ROOT_DIR}/resources/robots/XBot/mjcf/XBot-L.xml' 183 | sim_duration = 60.0 184 | dt = 0.001 185 | decimation = 10 186 | 187 | class robot_config: 188 | kps = np.array([200, 200, 350, 350, 15, 15, 200, 200, 350, 350, 15, 15], dtype=np.double) 189 | kds = np.array([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10], dtype=np.double) 190 | tau_limit = 200. * np.ones(12, dtype=np.double) 191 | 192 | policy = torch.jit.load(args.load_model) 193 | run_mujoco(policy, Sim2simCfg()) 194 | -------------------------------------------------------------------------------- /humanoid/scripts/train.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | # 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. 31 | 32 | 33 | from humanoid.envs import * 34 | from humanoid.utils import get_args, task_registry 35 | 36 | def train(args): 37 | env, env_cfg = task_registry.make_env(name=args.task, args=args) 38 | ppo_runner, train_cfg = task_registry.make_alg_runner(env=env, name=args.task, args=args) 39 | ppo_runner.learn(num_learning_iterations=train_cfg.runner.max_iterations, init_at_random_ep_len=True) 40 | 41 | if __name__ == '__main__': 42 | args = get_args() 43 | train(args) 44 | -------------------------------------------------------------------------------- /humanoid/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | # 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. 31 | 32 | 33 | from .helpers import class_to_dict, get_load_path, get_args, export_policy_as_jit, set_seed, update_class_from_dict 34 | from .task_registry import task_registry 35 | from .logger import Logger 36 | from .math import * 37 | from .terrain import Terrain -------------------------------------------------------------------------------- /humanoid/utils/calculate_gait.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | # 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. 31 | 32 | import numpy as np 33 | from scipy.optimize import fsolve 34 | import matplotlib.pyplot as plt 35 | 36 | def get_coefficients(h0, hswing, v0, vswing, hmax, swing_time): 37 | 38 | def equations(coeffs): 39 | a5, a4, a3, a2, a1, a0 = coeffs 40 | 41 | # Height at t=0 should be h0 42 | eq1 = a0 - h0 43 | 44 | # Height at t=swing_time should be hswing 45 | eq2 = a5 * swing_time**5 + a4 * swing_time**4 + a3 * swing_time**3 + a2 * swing_time**2 + a1 * swing_time + a0 - hswing 46 | 47 | # Velocity at t=0 should be v0 48 | eq3 = a1 - v0 49 | 50 | # Velocity at t=swing_time should be vswing 51 | eq4 = 5 * a5 * swing_time**4 + 4 * a4 * swing_time**3 + 3 * a3 * swing_time**2 + 2 * a2 * swing_time + a1 - vswing 52 | 53 | # Height at t=swing_time/2 should be hmax 54 | eq5 = a5 * (swing_time/2)**5 + a4 * (swing_time/2)**4 + a3 * (swing_time/2)**3 + a2 * (swing_time/2)**2 + a1 * (swing_time/2) + a0 - hmax 55 | 56 | # Return the deviations from the expected values. These will be minimized by fsolve. 57 | return (eq1, eq2, eq3, eq4, eq5, a5 + a4 + a3 + a2 + a1 + a0) 58 | 59 | # Solve for the coefficients using the equations above 60 | return fsolve(equations, (1, 1, 1, 1, 1, 1)) 61 | 62 | 63 | def plot_curves(coeffs, swing_time): 64 | a5, a4, a3, a2, a1, a0 = coeffs 65 | 66 | def h(t): 67 | return a5 * t**5 + a4 * t**4 + a3 * t**3 + a2 * t**2 + a1 * t + a0 68 | 69 | def v(t): 70 | return 5 * a5 * t**4 + 4 * a4 * t**3 + 3 * a3 * t**2 + 2 * a2 * t + a1 71 | 72 | # Define the acceleration function based on the coefficients 73 | def a(t): 74 | return 20 * a5 * t**3 + 12 * a4 * t**2 + 6 * a3 * t + 2 * a2 75 | 76 | t_values = np.linspace(0, swing_time, 500) 77 | h_values = h(t_values) 78 | v_values = v(t_values) 79 | a_values = a(t_values) # Compute acceleration values 80 | 81 | 82 | discrete_t_values = np.linspace(0, swing_time, 14) 83 | 84 | plt.figure(figsize=(12, 9)) 85 | 86 | plt.subplot(3, 1, 1) 87 | plt.plot(t_values, h_values, label='Height (h(t))') 88 | plt.scatter(discrete_t_values, h(discrete_t_values), color='black', label='Discrete Height') 89 | plt.title('Height Curve') 90 | plt.grid(True) 91 | plt.legend() 92 | 93 | plt.subplot(3, 1, 2) 94 | plt.plot(t_values, v_values, label='Velocity (v(t))', color='red') 95 | plt.scatter(discrete_t_values, v(discrete_t_values), color='black', label='Discrete Velocity') 96 | # print(v(discrete_t_values)) 97 | plt.title('Velocity Curve') 98 | plt.grid(True) 99 | plt.legend() 100 | 101 | # Plotting the acceleration curve 102 | plt.subplot(3, 1, 3) 103 | plt.plot(t_values, a_values/50, label='Acceleration (a(t))', color='green') 104 | plt.scatter(discrete_t_values, a(discrete_t_values)/50, color='black', label='Discrete Acceleration') 105 | # print(a(discrete_t_values)/50) 106 | plt.title('Acceleration Curve') 107 | plt.grid(True) 108 | plt.legend() 109 | 110 | plt.tight_layout() 111 | plt.show() 112 | 113 | 114 | # Set the constraints and swing time 115 | coeffs = get_coefficients(0, 0, 0, -0.1, 0.04, 0.26) 116 | 117 | print("Coefficients (a5, a4, a3, a2, a1, a0):") 118 | print(f"a5 = {coeffs[0]:.15f}") 119 | print(f"a4 = {coeffs[1]:.15f}") 120 | print(f"a3 = {coeffs[2]:.15f}") 121 | print(f"a2 = {coeffs[3]:.15f}") 122 | print(f"a1 = {coeffs[4]:.15f}") 123 | print(f"a0 = {coeffs[5]:.15f}") 124 | 125 | plot_curves(coeffs, 0.26) -------------------------------------------------------------------------------- /humanoid/utils/helpers.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | # 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. 31 | 32 | import datetime 33 | import os 34 | import copy 35 | import torch 36 | import numpy as np 37 | import random 38 | from isaacgym import gymapi 39 | from isaacgym import gymutil 40 | 41 | from humanoid import LEGGED_GYM_ROOT_DIR, LEGGED_GYM_ENVS_DIR 42 | 43 | 44 | def class_to_dict(obj) -> dict: 45 | if not hasattr(obj, "__dict__"): 46 | return obj 47 | result = {} 48 | for key in dir(obj): 49 | if key.startswith("_"): 50 | continue 51 | element = [] 52 | val = getattr(obj, key) 53 | if isinstance(val, list): 54 | for item in val: 55 | element.append(class_to_dict(item)) 56 | else: 57 | element = class_to_dict(val) 58 | result[key] = element 59 | return result 60 | 61 | 62 | def update_class_from_dict(obj, dict): 63 | for key, val in dict.items(): 64 | attr = getattr(obj, key, None) 65 | if isinstance(attr, type): 66 | update_class_from_dict(attr, val) 67 | else: 68 | setattr(obj, key, val) 69 | return 70 | 71 | 72 | def set_seed(seed): 73 | if seed == -1: 74 | seed = np.random.randint(0, 10000) 75 | print("Setting seed: {}".format(seed)) 76 | 77 | random.seed(seed) 78 | np.random.seed(seed) 79 | torch.manual_seed(seed) 80 | os.environ["PYTHONHASHSEED"] = str(seed) 81 | torch.cuda.manual_seed(seed) 82 | torch.cuda.manual_seed_all(seed) 83 | 84 | 85 | def parse_sim_params(args, cfg): 86 | # code from Isaac Gym Preview 2 87 | # initialize sim params 88 | sim_params = gymapi.SimParams() 89 | 90 | # set some values from args 91 | if args.physics_engine == gymapi.SIM_FLEX: 92 | if args.device != "cpu": 93 | print("WARNING: Using Flex with GPU instead of PHYSX!") 94 | elif args.physics_engine == gymapi.SIM_PHYSX: 95 | sim_params.physx.use_gpu = args.use_gpu 96 | sim_params.physx.num_subscenes = args.subscenes 97 | sim_params.use_gpu_pipeline = args.use_gpu_pipeline 98 | 99 | # if sim options are provided in cfg, parse them and update/override above: 100 | if "sim" in cfg: 101 | gymutil.parse_sim_config(cfg["sim"], sim_params) 102 | 103 | # Override num_threads if passed on the command line 104 | if args.physics_engine == gymapi.SIM_PHYSX and args.num_threads > 0: 105 | sim_params.physx.num_threads = args.num_threads 106 | 107 | return sim_params 108 | 109 | 110 | def get_load_path(root, load_run=-1, checkpoint=-1): 111 | def month_to_number(month): 112 | return datetime.datetime.strptime(month, "%b").month 113 | 114 | try: 115 | runs = os.listdir(root) 116 | try: 117 | runs.sort(key=lambda x: (month_to_number(x[:3]), int(x[3:5]), x[6:])) 118 | except ValueError as e: 119 | print("WARNING - Could not sort runs by month: " + str(e)) 120 | runs.sort() 121 | if "exported" in runs: 122 | runs.remove("exported") 123 | last_run = os.path.join(root, runs[-1]) 124 | except: 125 | raise ValueError("No runs in this directory: " + root) 126 | if load_run == -1: 127 | load_run = last_run 128 | else: 129 | load_run = os.path.join(root, load_run) 130 | if checkpoint == -1: 131 | models = [file for file in os.listdir(load_run) if "model" in file] 132 | models.sort(key=lambda m: "{0:0>15}".format(m)) 133 | model = models[-1] 134 | else: 135 | model = "model_{}.pt".format(checkpoint) 136 | 137 | load_path = os.path.join(load_run, model) 138 | return load_path 139 | 140 | 141 | def update_cfg_from_args(env_cfg, cfg_train, args): 142 | # seed 143 | if env_cfg is not None: 144 | # num envs 145 | if args.num_envs is not None: 146 | env_cfg.env.num_envs = args.num_envs 147 | if cfg_train is not None: 148 | if args.seed is not None: 149 | cfg_train.seed = args.seed 150 | # alg runner parameters 151 | if args.max_iterations is not None: 152 | cfg_train.runner.max_iterations = args.max_iterations 153 | if args.resume: 154 | cfg_train.runner.resume = args.resume 155 | if args.experiment_name is not None: 156 | cfg_train.runner.experiment_name = args.experiment_name 157 | if args.run_name is not None: 158 | cfg_train.runner.run_name = args.run_name 159 | if args.load_run is not None: 160 | cfg_train.runner.load_run = args.load_run 161 | if args.checkpoint is not None: 162 | cfg_train.runner.checkpoint = args.checkpoint 163 | 164 | return env_cfg, cfg_train 165 | 166 | 167 | def get_args(): 168 | custom_parameters = [ 169 | { 170 | "name": "--task", 171 | "type": str, 172 | "default": "XBotL_free", 173 | "help": "Resume training or start testing from a checkpoint. Overrides config file if provided.", 174 | }, 175 | { 176 | "name": "--resume", 177 | "action": "store_true", 178 | "default": False, 179 | "help": "Resume training from a checkpoint", 180 | }, 181 | { 182 | "name": "--experiment_name", 183 | "type": str, 184 | "help": "Name of the experiment to run or load. Overrides config file if provided.", 185 | }, 186 | { 187 | "name": "--run_name", 188 | "type": str, 189 | "help": "Name of the run. Overrides config file if provided.", 190 | }, 191 | { 192 | "name": "--load_run", 193 | "type": str, 194 | "help": "Name of the run to load when resume=True. If -1: will load the last run. Overrides config file if provided.", 195 | }, 196 | { 197 | "name": "--checkpoint", 198 | "type": int, 199 | "help": "Saved model checkpoint number. If -1: will load the last checkpoint. Overrides config file if provided.", 200 | }, 201 | { 202 | "name": "--headless", 203 | "action": "store_true", 204 | "default": False, 205 | "help": "Force display off at all times", 206 | }, 207 | { 208 | "name": "--horovod", 209 | "action": "store_true", 210 | "default": False, 211 | "help": "Use horovod for multi-gpu training", 212 | }, 213 | { 214 | "name": "--rl_device", 215 | "type": str, 216 | "default": "cuda:0", 217 | "help": "Device used by the RL algorithm, (cpu, gpu, cuda:0, cuda:1 etc..)", 218 | }, 219 | { 220 | "name": "--num_envs", 221 | "type": int, 222 | "help": "Number of environments to create. Overrides config file if provided.", 223 | }, 224 | { 225 | "name": "--seed", 226 | "type": int, 227 | "help": "Random seed. Overrides config file if provided.", 228 | }, 229 | { 230 | "name": "--max_iterations", 231 | "type": int, 232 | "help": "Maximum number of training iterations. Overrides config file if provided.", 233 | }, 234 | ] 235 | # parse arguments 236 | args = gymutil.parse_arguments( 237 | description="RL Policy", custom_parameters=custom_parameters 238 | ) 239 | 240 | # name allignment 241 | args.sim_device_id = args.compute_device_id 242 | args.sim_device = args.sim_device_type 243 | if args.sim_device == "cuda": 244 | args.sim_device += f":{args.sim_device_id}" 245 | return args 246 | 247 | 248 | def export_policy_as_jit(actor_critic, path): 249 | os.makedirs(path, exist_ok=True) 250 | path = os.path.join(path, "policy_1.pt") 251 | model = copy.deepcopy(actor_critic.actor).to("cpu") 252 | traced_script_module = torch.jit.script(model) 253 | traced_script_module.save(path) 254 | -------------------------------------------------------------------------------- /humanoid/utils/logger.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | # 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. 31 | 32 | import matplotlib.pyplot as plt 33 | import numpy as np 34 | from collections import defaultdict 35 | from multiprocessing import Process, Value 36 | 37 | class Logger: 38 | def __init__(self, dt): 39 | self.state_log = defaultdict(list) 40 | self.rew_log = defaultdict(list) 41 | self.dt = dt 42 | self.num_episodes = 0 43 | self.plot_process = None 44 | 45 | def log_state(self, key, value): 46 | self.state_log[key].append(value) 47 | 48 | def log_states(self, dict): 49 | for key, value in dict.items(): 50 | self.log_state(key, value) 51 | 52 | def log_rewards(self, dict, num_episodes): 53 | for key, value in dict.items(): 54 | if 'rew' in key: 55 | self.rew_log[key].append(value.item() * num_episodes) 56 | self.num_episodes += num_episodes 57 | 58 | def reset(self): 59 | self.state_log.clear() 60 | self.rew_log.clear() 61 | 62 | def plot_states(self): 63 | self.plot_process = Process(target=self._plot) 64 | self.plot_process.start() 65 | 66 | def _plot(self): 67 | nb_rows = 3 68 | nb_cols = 3 69 | fig, axs = plt.subplots(nb_rows, nb_cols) 70 | for key, value in self.state_log.items(): 71 | time = np.linspace(0, len(value)*self.dt, len(value)) 72 | break 73 | log= self.state_log 74 | # plot joint targets and measured positions 75 | a = axs[1, 0] 76 | if log["dof_pos"]: a.plot(time, log["dof_pos"], label='measured') 77 | if log["dof_pos_target"]: a.plot(time, log["dof_pos_target"], label='target') 78 | a.set(xlabel='time [s]', ylabel='Position [rad]', title='DOF Position') 79 | a.legend() 80 | # plot joint velocity 81 | a = axs[1, 1] 82 | if log["dof_vel"]: a.plot(time, log["dof_vel"], label='measured') 83 | if log["dof_vel_target"]: a.plot(time, log["dof_vel_target"], label='target') 84 | a.set(xlabel='time [s]', ylabel='Velocity [rad/s]', title='Joint Velocity') 85 | a.legend() 86 | # plot base vel x 87 | a = axs[0, 0] 88 | if log["base_vel_x"]: a.plot(time, log["base_vel_x"], label='measured') 89 | if log["command_x"]: a.plot(time, log["command_x"], label='commanded') 90 | a.set(xlabel='time [s]', ylabel='base lin vel [m/s]', title='Base velocity x') 91 | a.legend() 92 | # plot base vel y 93 | a = axs[0, 1] 94 | if log["base_vel_y"]: a.plot(time, log["base_vel_y"], label='measured') 95 | if log["command_y"]: a.plot(time, log["command_y"], label='commanded') 96 | a.set(xlabel='time [s]', ylabel='base lin vel [m/s]', title='Base velocity y') 97 | a.legend() 98 | # plot base vel yaw 99 | a = axs[0, 2] 100 | if log["base_vel_yaw"]: a.plot(time, log["base_vel_yaw"], label='measured') 101 | if log["command_yaw"]: a.plot(time, log["command_yaw"], label='commanded') 102 | a.set(xlabel='time [s]', ylabel='base ang vel [rad/s]', title='Base velocity yaw') 103 | a.legend() 104 | # plot base vel z 105 | a = axs[1, 2] 106 | if log["base_vel_z"]: a.plot(time, log["base_vel_z"], label='measured') 107 | a.set(xlabel='time [s]', ylabel='base lin vel [m/s]', title='Base velocity z') 108 | a.legend() 109 | # plot contact forces 110 | a = axs[2, 0] 111 | if log["contact_forces_z"]: 112 | forces = np.array(log["contact_forces_z"]) 113 | for i in range(forces.shape[1]): 114 | a.plot(time, forces[:, i], label=f'force {i}') 115 | a.set(xlabel='time [s]', ylabel='Forces z [N]', title='Vertical Contact forces') 116 | a.legend() 117 | # plot torque/vel curves 118 | a = axs[2, 1] 119 | if log["dof_vel"]!=[] and log["dof_torque"]!=[]: a.plot(log["dof_vel"], log["dof_torque"], 'x', label='measured') 120 | a.set(xlabel='Joint vel [rad/s]', ylabel='Joint Torque [Nm]', title='Torque/velocity curves') 121 | a.legend() 122 | # plot torques 123 | a = axs[2, 2] 124 | if log["dof_torque"]!=[]: a.plot(time, log["dof_torque"], label='measured') 125 | a.set(xlabel='time [s]', ylabel='Joint Torque [Nm]', title='Torque') 126 | a.legend() 127 | plt.show() 128 | 129 | def print_rewards(self): 130 | print("Average rewards per second:") 131 | for key, values in self.rew_log.items(): 132 | mean = np.sum(np.array(values)) / self.num_episodes 133 | print(f" - {key}: {mean}") 134 | print(f"Total number of episodes: {self.num_episodes}") 135 | 136 | def __del__(self): 137 | if self.plot_process is not None: 138 | self.plot_process.kill() -------------------------------------------------------------------------------- /humanoid/utils/math.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | # 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. 31 | 32 | import torch 33 | from torch import Tensor 34 | import numpy as np 35 | from isaacgym.torch_utils import quat_apply, normalize 36 | from typing import Tuple 37 | 38 | # @ torch.jit.script 39 | def quat_apply_yaw(quat, vec): 40 | quat_yaw = quat.clone().view(-1, 4) 41 | quat_yaw[:, :2] = 0. 42 | quat_yaw = normalize(quat_yaw) 43 | return quat_apply(quat_yaw, vec) 44 | 45 | # @ torch.jit.script 46 | def wrap_to_pi(angles): 47 | angles %= 2*np.pi 48 | angles -= 2*np.pi * (angles > np.pi) 49 | return angles 50 | 51 | # @ torch.jit.script 52 | def torch_rand_sqrt_float(lower, upper, shape, device): 53 | # type: (float, float, Tuple[int, int], str) -> Tensor 54 | r = 2*torch.rand(*shape, device=device) - 1 55 | r = torch.where(r<0., -torch.sqrt(-r), torch.sqrt(r)) 56 | r = (r + 1.) / 2. 57 | return (upper - lower) * r + lower -------------------------------------------------------------------------------- /humanoid/utils/task_registry.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | # 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. 31 | 32 | 33 | import os 34 | from typing import Tuple 35 | from datetime import datetime 36 | 37 | from humanoid.algo import VecEnv 38 | from humanoid.algo import OnPolicyRunner 39 | 40 | from humanoid import LEGGED_GYM_ROOT_DIR, LEGGED_GYM_ENVS_DIR 41 | from .helpers import get_args, update_cfg_from_args, class_to_dict, get_load_path, set_seed, parse_sim_params 42 | from humanoid.envs.base.legged_robot_config import LeggedRobotCfg, LeggedRobotCfgPPO 43 | 44 | class TaskRegistry(): 45 | def __init__(self): 46 | self.task_classes = {} 47 | self.env_cfgs = {} 48 | self.train_cfgs = {} 49 | 50 | def register(self, name: str, task_class: VecEnv, env_cfg: LeggedRobotCfg, train_cfg: LeggedRobotCfgPPO): 51 | self.task_classes[name] = task_class 52 | self.env_cfgs[name] = env_cfg 53 | self.train_cfgs[name] = train_cfg 54 | 55 | def get_task_class(self, name: str) -> VecEnv: 56 | return self.task_classes[name] 57 | 58 | def get_cfgs(self, name) -> Tuple[LeggedRobotCfg, LeggedRobotCfgPPO]: 59 | train_cfg = self.train_cfgs[name] 60 | env_cfg = self.env_cfgs[name] 61 | # copy seed 62 | env_cfg.seed = train_cfg.seed 63 | return env_cfg, train_cfg 64 | 65 | def make_env(self, name, args=None, env_cfg=None) -> Tuple[VecEnv, LeggedRobotCfg]: 66 | """ Creates an environment either from a registered namme or from the provided config file. 67 | 68 | Args: 69 | name (string): Name of a registered env. 70 | args (Args, optional): Isaac Gym comand line arguments. If None get_args() will be called. Defaults to None. 71 | env_cfg (Dict, optional): Environment config file used to override the registered config. Defaults to None. 72 | 73 | Raises: 74 | ValueError: Error if no registered env corresponds to 'name' 75 | 76 | Returns: 77 | isaacgym.VecTaskPython: The created environment 78 | Dict: the corresponding config file 79 | """ 80 | # if no args passed get command line arguments 81 | if args is None: 82 | args = get_args() 83 | # check if there is a registered env with that name 84 | if name in self.task_classes: 85 | task_class = self.get_task_class(name) 86 | else: 87 | raise ValueError(f"Task with name: {name} was not registered") 88 | if env_cfg is None: 89 | # load config files 90 | env_cfg, _ = self.get_cfgs(name) 91 | # override cfg from args (if specified) 92 | env_cfg, _ = update_cfg_from_args(env_cfg, None, args) 93 | set_seed(env_cfg.seed) 94 | # parse sim params (convert to dict first) 95 | sim_params = {"sim": class_to_dict(env_cfg.sim)} 96 | sim_params = parse_sim_params(args, sim_params) 97 | env = task_class( cfg=env_cfg, 98 | sim_params=sim_params, 99 | physics_engine=args.physics_engine, 100 | sim_device=args.sim_device, 101 | headless=args.headless) 102 | self.env_cfg_for_wandb = env_cfg 103 | return env, env_cfg 104 | 105 | def make_alg_runner(self, env, name=None, args=None, train_cfg=None, log_root="default") -> Tuple[OnPolicyRunner, LeggedRobotCfgPPO]: 106 | """ Creates the training algorithm either from a registered namme or from the provided config file. 107 | 108 | Args: 109 | env (isaacgym.VecTaskPython): The environment to train (TODO: remove from within the algorithm) 110 | name (string, optional): Name of a registered env. If None, the config file will be used instead. Defaults to None. 111 | args (Args, optional): Isaac Gym comand line arguments. If None get_args() will be called. Defaults to None. 112 | train_cfg (Dict, optional): Training config file. If None 'name' will be used to get the config file. Defaults to None. 113 | log_root (str, optional): Logging directory for Tensorboard. Set to 'None' to avoid logging (at test time for example). 114 | Logs will be saved in /_. Defaults to "default"=/logs/. 115 | 116 | Raises: 117 | ValueError: Error if neither 'name' or 'train_cfg' are provided 118 | Warning: If both 'name' or 'train_cfg' are provided 'name' is ignored 119 | 120 | Returns: 121 | PPO: The created algorithm 122 | Dict: the corresponding config file 123 | """ 124 | # if no args passed get command line arguments 125 | if args is None: 126 | args = get_args() 127 | # if config files are passed use them, otherwise load from the name 128 | if train_cfg is None: 129 | if name is None: 130 | raise ValueError("Either 'name' or 'train_cfg' must be not None") 131 | # load config files 132 | _, train_cfg = self.get_cfgs(name) 133 | else: 134 | if name is not None: 135 | print(f"'train_cfg' provided -> Ignoring 'name={name}'") 136 | # override cfg from args (if specified) 137 | _, train_cfg = update_cfg_from_args(None, train_cfg, args) 138 | 139 | if log_root=="default": 140 | log_root = os.path.join(LEGGED_GYM_ROOT_DIR, 'logs', train_cfg.runner.experiment_name) 141 | log_dir = os.path.join(log_root, datetime.now().strftime('%b%d_%H-%M-%S') + '_' + train_cfg.runner.run_name) 142 | elif log_root is None: 143 | log_dir = None 144 | else: 145 | log_dir = os.path.join(log_root, datetime.now().strftime('%b%d_%H-%M-%S') + '_' + train_cfg.runner.run_name) 146 | 147 | train_cfg_dict = class_to_dict(train_cfg) 148 | env_cfg_dict = class_to_dict(self.env_cfg_for_wandb) 149 | all_cfg = {**train_cfg_dict, **env_cfg_dict} 150 | 151 | runner_class = eval(train_cfg_dict["runner_class_name"]) 152 | runner = runner_class(env, all_cfg, log_dir, device=args.rl_device) 153 | #save resume path before creating a new log_dir 154 | resume = train_cfg.runner.resume 155 | if resume: 156 | # load previously trained model 157 | resume_path = get_load_path(log_root, load_run=train_cfg.runner.load_run, checkpoint=train_cfg.runner.checkpoint) 158 | print(f"Loading model from: {resume_path}") 159 | runner.load(resume_path, load_optimizer=False) 160 | return runner, train_cfg 161 | 162 | # make global task registry 163 | task_registry = TaskRegistry() -------------------------------------------------------------------------------- /humanoid/utils/terrain.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | # 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. 31 | 32 | 33 | import numpy as np 34 | 35 | from isaacgym import terrain_utils 36 | from humanoid.envs.base.legged_robot_config import LeggedRobotCfg 37 | 38 | class Terrain: 39 | def __init__(self, cfg: LeggedRobotCfg.terrain, num_robots) -> None: 40 | 41 | self.cfg = cfg 42 | self.num_robots = num_robots 43 | self.type = cfg.mesh_type 44 | if self.type in ["none", 'plane']: 45 | return 46 | self.env_length = cfg.terrain_length 47 | self.env_width = cfg.terrain_width 48 | self.proportions = [np.sum(cfg.terrain_proportions[:i+1]) for i in range(len(cfg.terrain_proportions))] 49 | 50 | self.cfg.num_sub_terrains = cfg.num_rows * cfg.num_cols 51 | self.env_origins = np.zeros((cfg.num_rows, cfg.num_cols, 3)) 52 | 53 | self.width_per_env_pixels = int(self.env_width / cfg.horizontal_scale) 54 | self.length_per_env_pixels = int(self.env_length / cfg.horizontal_scale) 55 | 56 | self.border = int(cfg.border_size/self.cfg.horizontal_scale) 57 | self.tot_cols = int(cfg.num_cols * self.width_per_env_pixels) + 2 * self.border 58 | self.tot_rows = int(cfg.num_rows * self.length_per_env_pixels) + 2 * self.border 59 | 60 | self.height_field_raw = np.zeros((self.tot_rows , self.tot_cols), dtype=np.int16) 61 | if cfg.curriculum: 62 | self.curiculum() 63 | elif cfg.selected: 64 | self.selected_terrain() 65 | else: 66 | self.randomized_terrain() 67 | 68 | self.heightsamples = self.height_field_raw 69 | if self.type=="trimesh": 70 | self.vertices, self.triangles = terrain_utils.convert_heightfield_to_trimesh( self.height_field_raw, 71 | self.cfg.horizontal_scale, 72 | self.cfg.vertical_scale, 73 | self.cfg.slope_treshold) 74 | 75 | def randomized_terrain(self): 76 | for k in range(self.cfg.num_sub_terrains): 77 | # Env coordinates in the world 78 | (i, j) = np.unravel_index(k, (self.cfg.num_rows, self.cfg.num_cols)) 79 | 80 | choice = np.random.uniform(0, 1) 81 | difficulty = np.random.choice([0.5, 0.75, 0.9]) 82 | terrain = self.make_terrain(choice, difficulty) 83 | self.add_terrain_to_map(terrain, i, j) 84 | 85 | def curiculum(self): 86 | for j in range(self.cfg.num_cols): 87 | for i in range(self.cfg.num_rows): 88 | difficulty = i / self.cfg.num_rows 89 | choice = j / self.cfg.num_cols + 0.001 90 | 91 | terrain = self.make_terrain(choice, difficulty) 92 | self.add_terrain_to_map(terrain, i, j) 93 | 94 | def selected_terrain(self): 95 | terrain_type = self.cfg.terrain_kwargs.pop('type') 96 | for k in range(self.cfg.num_sub_terrains): 97 | # Env coordinates in the world 98 | (i, j) = np.unravel_index(k, (self.cfg.num_rows, self.cfg.num_cols)) 99 | 100 | terrain = terrain_utils.SubTerrain("terrain", 101 | width=self.width_per_env_pixels, 102 | length=self.width_per_env_pixels, 103 | vertical_scale=self.vertical_scale, 104 | horizontal_scale=self.horizontal_scale) 105 | 106 | eval(terrain_type)(terrain, **self.cfg.terrain_kwargs.terrain_kwargs) 107 | self.add_terrain_to_map(terrain, i, j) 108 | 109 | def make_terrain(self, choice, difficulty): 110 | terrain = terrain_utils.SubTerrain( "terrain", 111 | width=self.width_per_env_pixels, 112 | length=self.width_per_env_pixels, 113 | vertical_scale=self.cfg.vertical_scale, 114 | horizontal_scale=self.cfg.horizontal_scale) 115 | slope = difficulty * 0.4 116 | step_height = 0.05 + 0.18 * difficulty 117 | discrete_obstacles_height = 0.05 + difficulty * 0.2 118 | stepping_stones_size = 1.5 * (1.05 - difficulty) 119 | stone_distance = 0.05 if difficulty==0 else 0.1 120 | gap_size = 1. * difficulty 121 | pit_depth = 1. * difficulty 122 | if choice < self.proportions[0]: 123 | if choice < self.proportions[0]/ 2: 124 | slope *= -1 125 | terrain_utils.pyramid_sloped_terrain(terrain, slope=slope, platform_size=3.) 126 | elif choice < self.proportions[1]: 127 | terrain_utils.pyramid_sloped_terrain(terrain, slope=slope, platform_size=3.) 128 | terrain_utils.random_uniform_terrain(terrain, min_height=-0.05, max_height=0.05, step=0.005, downsampled_scale=0.2) 129 | elif choice < self.proportions[3]: 130 | if choice None: 191 | super().__init__(cfg, num_robots) 192 | 193 | def randomized_terrain(self): 194 | for k in range(self.cfg.num_sub_terrains): 195 | # Env coordinates in the world 196 | (i, j) = np.unravel_index(k, (self.cfg.num_rows, self.cfg.num_cols)) 197 | 198 | choice = np.random.uniform(0, 1) 199 | difficulty = np.random.uniform(0, 1) 200 | terrain = self.make_terrain(choice, difficulty) 201 | self.add_terrain_to_map(terrain, i, j) 202 | 203 | def make_terrain(self, choice, difficulty): 204 | terrain = terrain_utils.SubTerrain( "terrain", 205 | width=self.width_per_env_pixels, 206 | length=self.width_per_env_pixels, 207 | vertical_scale=self.cfg.vertical_scale, 208 | horizontal_scale=self.cfg.horizontal_scale) 209 | discrete_obstacles_height = difficulty * 0.04 210 | r_height = difficulty * 0.07 211 | h_slope = difficulty * 0.15 212 | if choice < self.proportions[0]: 213 | pass 214 | elif choice < self.proportions[1]: 215 | num_rectangles = 20 216 | rectangle_min_size = 1. 217 | rectangle_max_size = 2. 218 | terrain_utils.discrete_obstacles_terrain(terrain, discrete_obstacles_height, rectangle_min_size, rectangle_max_size, num_rectangles, platform_size=3.) 219 | elif choice < self.proportions[2]: 220 | terrain_utils.random_uniform_terrain(terrain, min_height=-r_height, max_height=r_height, step=0.005, downsampled_scale=0.2) 221 | elif choice < self.proportions[3]: 222 | terrain_utils.pyramid_sloped_terrain(terrain, slope=h_slope, platform_size=0.1) 223 | elif choice < self.proportions[4]: 224 | terrain_utils.pyramid_sloped_terrain(terrain, slope=-h_slope, platform_size=0.1) 225 | elif choice < self.proportions[5]: 226 | terrain_utils.pyramid_stairs_terrain(terrain, step_width=0.4, step_height=discrete_obstacles_height, platform_size=1.) 227 | elif choice < self.proportions[6]: 228 | terrain_utils.pyramid_stairs_terrain(terrain, step_width=0.4, step_height=-discrete_obstacles_height, platform_size=1.) 229 | else: 230 | pass 231 | return terrain 232 | -------------------------------------------------------------------------------- /images/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/images/demo.gif -------------------------------------------------------------------------------- /logs/XBot_ppo/exported/policies/policy_example.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/logs/XBot_ppo/exported/policies/policy_example.pt -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/base_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/base_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_ankle_pitch_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_ankle_pitch_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_ankle_pitch_linkage1_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_ankle_pitch_linkage1_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_ankle_pitch_linkage2_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_ankle_pitch_linkage2_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_ankle_pitch_motor1_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_ankle_pitch_motor1_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_ankle_pitch_motor2_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_ankle_pitch_motor2_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_ankle_roll_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_ankle_roll_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_arm_base_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_arm_base_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_arm_yaw_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_arm_yaw_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_elbow_pitch_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_elbow_pitch_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_elbow_yaw_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_elbow_yaw_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_foot_ee_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_foot_ee_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_hand_ee_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_ee_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_hand_index_bend_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_index_bend_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_hand_index_rota_link1.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_index_rota_link1.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_hand_index_rota_link2.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_index_rota_link2.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_hand_index_tip.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_index_tip.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_hand_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_hand_mid_link1.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_mid_link1.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_hand_mid_link2.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_mid_link2.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_hand_mid_tip.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_mid_tip.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_hand_pinky_link1.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_pinky_link1.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_hand_pinky_link2.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_pinky_link2.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_hand_pinky_tip.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_pinky_tip.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_hand_ring_link1.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_ring_link1.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_hand_ring_link2.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_ring_link2.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_hand_ring_tip.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_ring_tip.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_hand_thumb_bend_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_thumb_bend_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_hand_thumb_rota_link1.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_thumb_rota_link1.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_hand_thumb_rota_link2.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_thumb_rota_link2.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_hand_thumb_tip.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_hand_thumb_tip.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_knee_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_knee_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_knee_linkage_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_knee_linkage_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_knee_motor_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_knee_motor_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_leg_pitch_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_leg_pitch_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_leg_roll_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_leg_roll_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_leg_yaw_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_leg_yaw_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_shoulder_pitch_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_shoulder_pitch_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_shoulder_roll_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_shoulder_roll_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_wrist_roll_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_wrist_roll_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/left_wrist_yaw_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/left_wrist_yaw_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/neck_base_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/neck_base_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/neck_pitch_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/neck_pitch_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/neck_yaw_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/neck_yaw_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/realsense_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/realsense_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_ankle_pitch_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_ankle_pitch_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_ankle_pitch_linkage1_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_ankle_pitch_linkage1_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_ankle_pitch_linkage2_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_ankle_pitch_linkage2_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_ankle_pitch_motor1_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_ankle_pitch_motor1_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_ankle_pitch_motor2_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_ankle_pitch_motor2_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_ankle_roll_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_ankle_roll_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_arm_base_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_arm_base_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_arm_yaw_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_arm_yaw_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_elbow_pitch_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_elbow_pitch_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_elbow_yaw_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_elbow_yaw_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_foot_ee_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_foot_ee_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_hand_ee_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_ee_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_hand_index_bend_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_index_bend_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_hand_index_rota_link1.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_index_rota_link1.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_hand_index_rota_link2.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_index_rota_link2.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_hand_index_tip.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_index_tip.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_hand_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_hand_mid_link1.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_mid_link1.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_hand_mid_link2.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_mid_link2.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_hand_mid_tip.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_mid_tip.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_hand_pinky_link1.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_pinky_link1.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_hand_pinky_link2.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_pinky_link2.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_hand_pinky_tip.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_pinky_tip.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_hand_ring_link1.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_ring_link1.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_hand_ring_link2.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_ring_link2.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_hand_ring_tip.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_ring_tip.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_hand_thumb_bend_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_thumb_bend_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_hand_thumb_rota_link1.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_thumb_rota_link1.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_hand_thumb_rota_link2.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_thumb_rota_link2.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_hand_thumb_rota_tip.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_hand_thumb_rota_tip.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_knee_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_knee_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_knee_linkage_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_knee_linkage_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_knee_motor_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_knee_motor_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_leg_pitch_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_leg_pitch_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_leg_roll_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_leg_roll_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_leg_yaw_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_leg_yaw_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_shoulder_pitch_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_shoulder_pitch_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_shoulder_roll_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_shoulder_roll_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_wrist_roll_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_wrist_roll_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/right_wrist_yaw_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/right_wrist_yaw_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/waist_roll_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/waist_roll_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/meshes/waist_yaw_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/meshes/waist_yaw_link.STL -------------------------------------------------------------------------------- /resources/robots/XBot/terrain/uneven.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboterax/humanoid-gym/ae46e201c85a2b17e7f2cea59a441dae7ea88a8f/resources/robots/XBot/terrain/uneven.png -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | # 30 | # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. 31 | 32 | from setuptools import find_packages 33 | from distutils.core import setup 34 | 35 | setup( 36 | name='humanoid', 37 | version='1.0.0', 38 | author='Xinyang Gu, Yen-Jen Wang, Jianyu Chen', 39 | license="BSD-3-Clause", 40 | packages=find_packages(), 41 | author_email='zlw21gxy@gmail.com, wangyenjen@berkeley.edu, jianyuchen@tsinghua.edu.cn', 42 | description='Isaac Gym environments for humanoid robot', 43 | install_requires=['isaacgym', # preview4 44 | 'wandb', 45 | 'DateTime', # used for sort month 46 | 'tensorboard', 47 | 'tqdm', 48 | 'numpy==1.23.5', 49 | 'opencv-python', 50 | 'mujoco==2.3.6', 51 | 'mujoco-python-viewer', 52 | 'matplotlib'] 53 | ) 54 | --------------------------------------------------------------------------------