├── LICENSE ├── README.md ├── docs ├── changelog.md ├── deploy.md └── install.md ├── poster.gif └── simulation ├── .gitignore ├── README.md ├── legged_gym ├── .gitignore ├── LICENSE ├── legged_gym │ ├── .gitignore │ ├── __init__.py │ ├── envs │ │ ├── __init__.py │ │ ├── base │ │ │ ├── base_config.py │ │ │ ├── base_task.py │ │ │ ├── humanoid.py │ │ │ ├── humanoid_config.py │ │ │ ├── legged_robot.py │ │ │ └── legged_robot_config.py │ │ ├── g1rolltrack │ │ │ ├── g1waistroll_track.py │ │ │ └── g1waistroll_track_config.py │ │ ├── g1track │ │ │ ├── g1waist_track.py │ │ │ └── g1waist_track_config.py │ │ ├── g1waist │ │ │ ├── g1waist_up.py │ │ │ └── g1waist_up_config.py │ │ └── g1waistroll │ │ │ ├── g1waistroll_up.py │ │ │ └── g1waistroll_up_config.py │ ├── gym_utils │ │ ├── __init__.py │ │ ├── helpers.py │ │ ├── logger.py │ │ ├── math.py │ │ ├── motor_delay_fft.py │ │ ├── storage.py │ │ ├── task_registry.py │ │ └── terrain.py │ ├── scripts │ │ ├── eval.sh │ │ ├── eval_track.sh │ │ ├── facingdown_poses.npy │ │ ├── facingup_poses.npy │ │ ├── log.sh │ │ ├── log_traj.py │ │ ├── play.py │ │ ├── run.sh │ │ ├── run_track.sh │ │ ├── save_jit.py │ │ └── train.py │ └── tests │ │ ├── test_asset.py │ │ └── test_env.py ├── licenses │ ├── assets │ │ ├── ANYmal_b_license.txt │ │ ├── ANYmal_c_license.txt │ │ ├── a1_license.txt │ │ └── cassie_license.txt │ └── dependencies │ │ └── matplotlib_license.txt ├── requirements.txt ├── resources │ └── robots │ │ └── g1_modified │ │ ├── g1_23dof.urdf │ │ ├── g1_23dof.xml │ │ ├── g1_23dof_full.xml │ │ ├── g1_29dof.urdf │ │ ├── g1_29dof.xml │ │ ├── g1_29dof_fixedwrist_custom_collision.urdf │ │ ├── g1_29dof_fixedwrist_custom_collision_with_head.urdf │ │ ├── g1_29dof_fixedwrist_full_collision.urdf │ │ ├── g1_sim2sim.xml │ │ ├── meshes │ │ ├── head_link.STL │ │ ├── left_ankle_pitch_link.STL │ │ ├── left_ankle_roll_link.STL │ │ ├── left_elbow_link.STL │ │ ├── left_hand_index_0_link.STL │ │ ├── left_hand_index_1_link.STL │ │ ├── left_hand_middle_0_link.STL │ │ ├── left_hand_middle_1_link.STL │ │ ├── left_hand_palm_link.STL │ │ ├── left_hand_thumb_0_link.STL │ │ ├── left_hand_thumb_1_link.STL │ │ ├── left_hand_thumb_2_link.STL │ │ ├── left_hip_pitch_link.STL │ │ ├── left_hip_roll_link.STL │ │ ├── left_hip_yaw_link.STL │ │ ├── left_knee_link.STL │ │ ├── left_rubber_hand.STL │ │ ├── left_shoulder_pitch_link.STL │ │ ├── left_shoulder_roll_link.STL │ │ ├── left_shoulder_yaw_link.STL │ │ ├── left_wrist_pitch_link.STL │ │ ├── left_wrist_roll_link.STL │ │ ├── left_wrist_roll_rubber_hand.STL │ │ ├── left_wrist_yaw_link.STL │ │ ├── logo_link.STL │ │ ├── pelvis.STL │ │ ├── pelvis_contour_link.STL │ │ ├── right_ankle_pitch_link.STL │ │ ├── right_ankle_roll_link.STL │ │ ├── right_elbow_link.STL │ │ ├── right_hand_index_0_link.STL │ │ ├── right_hand_index_1_link.STL │ │ ├── right_hand_middle_0_link.STL │ │ ├── right_hand_middle_1_link.STL │ │ ├── right_hand_palm_link.STL │ │ ├── right_hand_thumb_0_link.STL │ │ ├── right_hand_thumb_1_link.STL │ │ ├── right_hand_thumb_2_link.STL │ │ ├── right_hip_pitch_link.STL │ │ ├── right_hip_roll_link.STL │ │ ├── right_hip_yaw_link.STL │ │ ├── right_knee_link.STL │ │ ├── right_rubber_hand.STL │ │ ├── right_shoulder_pitch_link.STL │ │ ├── right_shoulder_roll_link.STL │ │ ├── right_shoulder_yaw_link.STL │ │ ├── right_wrist_pitch_link.STL │ │ ├── right_wrist_roll_link.STL │ │ ├── right_wrist_roll_rubber_hand.STL │ │ ├── right_wrist_yaw_link.STL │ │ ├── torso_constraint_L_link.STL │ │ ├── torso_constraint_L_rod_link.STL │ │ ├── torso_constraint_R_link.STL │ │ ├── torso_constraint_R_rod_link.STL │ │ ├── torso_link.STL │ │ ├── waist_constraint_L.STL │ │ ├── waist_constraint_R.STL │ │ ├── waist_roll_link.STL │ │ ├── waist_support_link.STL │ │ └── waist_yaw_link.STL │ │ └── shabi.txt └── setup.py └── rsl_rl ├── .gitignore ├── LICENSE ├── README.md ├── licenses └── dependencies │ ├── numpy_license.txt │ └── torch_license.txt ├── rsl_rl ├── __init__.py ├── algorithms │ ├── __init__.py │ ├── ppo.py │ └── ppo_rma.py ├── env │ ├── __init__.py │ └── vec_env.py ├── modules │ ├── __init__.py │ ├── actor_critic.py │ ├── actor_critic_recurrent.py │ └── actor_critic_rma.py ├── runners │ ├── __init__.py │ ├── on_policy_runner.py │ └── runner.py ├── storage │ ├── __init__.py │ ├── replay_buffer.py │ └── rollout_storage.py └── utils │ ├── __init__.py │ ├── init.py │ ├── layers.py │ ├── math.py │ ├── running_mean_std.py │ ├── scale.py │ └── utils.py └── setup.py /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [2025] [Xialin He, Runpei Dong, Zixuan Chen, Saurabh Gupta] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

Learning Getting-Up Policies for
Real-World Humanoid Robots 2 |

3 | 4 | 5 |

6 | Xialin He*,1 7 | | 8 | Runpei Dong*,1 9 | | 10 | Zixuan Chen2 11 | | 12 | Saurabh Gupta1 13 |
14 | 1University of Illinois Urbana-Champaign 15 |   16 | 2Simon Fraser University 17 |
18 | * Equal Contribution 19 |

RSS 2025

20 |

21 | 22 |

23 | 24 | 25 | 26 | 27 | 28 | 29 |

30 | 31 | ## News 32 | - 🎉 Apr 2025: HumanUP has been accepted by RSS 2025 33 | 34 | ## HumanUP 35 | **[HumanUP](https://arxiv.org/abs/2502.12152)** is an RL learning framework for training humanoid robots to get up from supine (i.e., lying face up) or prone (i.e., lying face down) poses. This codebase is initially built for the code release of this **[HumanUP](https://arxiv.org/abs/2502.12152)** paper, which supports simulation training of **Unitree G1** humanoid robot. The simulation training is based on **Isaac Gym**. 36 | 37 | ## Installation 38 | See [installation instructions](./docs/install.md). 39 | 40 | ## Getting Started 41 | See [usage instructions](./simulation/README.md). 42 | 43 | ## Change Logs 44 | See [changelogs](./docs/changelog.md). 45 | 46 | 47 | ## Acknowledgements 48 | + We would like to thank all the authors in this project, this project cannot be finished without your efforts! 49 | + Our simulation environment implementation is based on [legged_gym](https://github.com/leggedrobotics/legged_gym), and the rl algorithm implementation is based on [rsl_rl](https://github.com/leggedrobotics/rsl_rl). 50 | + [Smooth-Humanoid-Locomotion](https://github.com/zixuan417/smooth-humanoid-locomotion) also provide lots of insights. 51 | 52 | ## Citation 53 | If you find this work useful, please consider citing: 54 | ``` 55 | @article{humanup25, 56 | title={Learning Getting-Up Policies for Real-World Humanoid Robots}, 57 | author={He, Xialin and Dong, Runpei and Chen, Zixuan and Gupta, Saurabh}, 58 | journal={arXiv preprint arXiv:2502.12152}, 59 | year={2025} 60 | } 61 | ``` 62 | 63 | -------------------------------------------------------------------------------- /docs/changelog.md: -------------------------------------------------------------------------------- 1 | # Changelog and Bugs -------------------------------------------------------------------------------- /docs/deploy.md: -------------------------------------------------------------------------------- 1 | # Deployment Instructions 2 | This document provides instructions on how to deploy a trained policy on the real G1 humanoid robot. 3 | 4 | ## ROS1 (Noetic) 5 | We use ROS1 to depoly our policy successfully on both Ubuntu 20.04 and 22.04. Please install ROS1 following the [official instructions](https://wiki.ros.org/noetic/Installation/Ubuntu) when you are using Ubuntu 20.04. If you are using Ubuntu 22.04, please consider using [robostack](https://robostack.github.io/). Follow the [official instructions](https://robostack.github.io/GettingStarted.html) to install ROS1 with robostack. -------------------------------------------------------------------------------- /docs/install.md: -------------------------------------------------------------------------------- 1 | # Installation Instruction 2 | This document provides the instructions on codebase installation. We recommend using [Anaconda](https://www.anaconda.com/) to simplify the process. 3 | 4 | ## Create a Conda Enviroment 5 | ```bash 6 | conda create -n humanup python=3.8 7 | conda activate humanup 8 | pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 9 | ``` 10 | ## IsaacGym 11 | Download [IsaacGym Preview 4.0](https://developer.nvidia.com/isaac-gym) from [Google Drive](https://drive.google.com/file/d/1YEsZPtmdzQbSePX0WMhdf0565XwBaIFi/view?usp=sharing), then install it by running 12 | ```bash 13 | cd isaacgym/python && pip install -e . 14 | ``` 15 | **Note:** NVIDIA preserves all rights of [IsaacGym](https://developer.nvidia.com/isaac-gym). 16 | After installing IsaacGym, please make sure it is working by running, 17 | ```bash 18 | # this example can only be ran with a monitor 19 | python examples/joint_monkey.py 20 | ``` 21 | 22 | ## RSL RL 23 | Install `rsl_rl` by running 24 | ```bash 25 | cd ../../rsl_rl && pip install -e . 26 | ``` 27 | 28 | ## Legged Gym and Other 29 | Install `legged_gym` and other dependencies 30 | ```bash 31 | cd ../legged_gym && pip install -e . 32 | pip install "numpy==1.23.0" pydelatin wandb tqdm opencv-python pymeshlab ipdb pyfqmr flask dill gdown hydra-core mujoco mujoco-python-viewer loguru 33 | pip install -r requirements.txt 34 | pip install imageio[ffmpeg] 35 | ``` 36 | If you cannot install `imageio[ffmpeg]`, please run 37 | ```bash 38 | pip install imageio imageio-ffmpeg 39 | ``` 40 | Next, please follow the running instruction to test a running. -------------------------------------------------------------------------------- /poster.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/poster.gif -------------------------------------------------------------------------------- /simulation/.gitignore: -------------------------------------------------------------------------------- 1 | isaacgym* 2 | .ipynb_checkpoints/ 3 | .vscode/ 4 | *img 5 | *__pycache__* 6 | fbx/ 7 | cmu_fbx_all/ 8 | *zip 9 | *ipynb 10 | npy 11 | pkl 12 | *.nfs* -------------------------------------------------------------------------------- /simulation/README.md: -------------------------------------------------------------------------------- 1 | # Usage Instructions 2 | ## Training && Playing Policy 3 | First, please go to the scripts folder 4 | ``` bash 5 | cd legged_gym/legged_gym/scripts 6 | ``` 7 | ### 1. Stage I Discovery Policy Training 8 | #### 1.1 Getting Up Policy 9 | - Training: 10 | ``` bash 11 | bash run.sh g1waist [your_exp_desc] [device] 12 | # e.g. bash run.sh g1waist stage1_get_up cuda:0 13 | ``` 14 | - Evaluation: 15 | ``` bash 16 | bash eval.sh g1waist [your_exp_desc] [checkpoint] 17 | # e.g. bash eval.sh g1waist stage1_get_up -1 18 | ``` 19 | 20 | #### 1.2 Rolling Over Policy 21 | - Training: 22 | ``` bash 23 | bash run.sh g1waistroll [your_exp_desc] [device] 24 | # bash run.sh g1waistroll stage1_roll_over cuda:0 25 | ``` 26 | - Evaluation: 27 | ``` bash 28 | bash eval.sh g1waistroll [your_exp_desc] [checkpoint] 29 | # e.g. bash eval.sh g1waist stage1_roll_over -1 30 | ``` 31 | 32 | For the main training args: 33 | + `--debug` disables wandb and sets the number of environments to 64, which is useful for debugging; 34 | + `--fix_action_std` fixes the action std, this is useful for stablizing training; 35 | + `--resume` indicates whether to resume from the previous experiment; 36 | + `--resumeid` specifies the exptid to resume from (if resume is set true); 37 | 38 | For the main evaluation args: 39 | + `--record_video` allows you to record video headlessly, this is useful for sever users; 40 | + `--checkpoint [int]` specifies the checkpoint to load, this is default set as -1, which is the latest one; 41 | + `--use_jit` use jit model to play; 42 | + `--teleop_mode` allows the user to control the robot with the keyboard; 43 | 44 | 45 | ### 2. Stage II Deployable Policy Training 46 | #### 2.1 Log the Stage I policy trajectory 47 | ```bash 48 | sh log.sh g1waistroll [your_exp_desc] [checkpoint] # getting up policy 49 | sh log.sh g1waist [your_exp_desc] [checkpoint] # rolling over policy 50 | ``` 51 | Then, please put all trajectories under the `simulation/legged_gym/logs/env_logs`, the structure looks like: 52 | ```bash 53 | . 54 | └── env_logs 55 | ├── getup_traj 56 | │ ├── dof_pos_all.pkl 57 | │ └── head_height_all.pkl 58 | └── rollover_traj 59 | ├── dof_pos_all.pkl 60 | ├── head_height_all.pkl 61 | └── projected_gravity_all.pkl 62 | ``` 63 | 64 | To help further development over our HumanUP, we provide our discovered trajectories on [Google Drive](https://drive.google.com/drive/folders/1kRSGkMDnqsX6OLr7-8OM5R6bF9mn84sK?usp=sharing). Feel free to download it to directly train Stage II policy. 65 | 66 | #### 2.2 Getting Up Tracking 67 | - Training: 68 | ``` bash 69 | bash run_track.sh g1waist [your_exp_desc] [device] [traj_name] 70 | # e.g. bash run_track.sh g1waist stage2_get_up cuda:0 getup_traj 71 | ``` 72 | - Evaluation: 73 | ``` bash 74 | bash eval_track.sh g1waist [your_exp_desc] [checkpoint] [traj_name] 75 | # e.g. bash eval_track.sh g1waist stage2_get_up -1 getup_traj 76 | ``` 77 | 78 | #### 2.3 Rolling Over Tracking 79 | - Training: 80 | ``` bash 81 | bash run_track.sh g1waistroll [your_exp_desc] [device] [traj_name] 82 | # bash run_track.sh g1waistroll stage2_roll_over cuda:0 rollover_traj 83 | ``` 84 | - Evaluation: 85 | ``` bash 86 | bash eval_track.sh g1waistroll [your_exp_desc] [checkpoint] [traj_name] 87 | # e.g. bash eval_track.sh g1waist stage2_roll_over -1 rollover_traj 88 | ``` 89 | 90 | ## 3. Save jit model 91 | ```bash 92 | # bash to_jit.sh g1waist [your_exp_desc] # e.g. bash to_jit.sh g1waist pretrained_exp 93 | python save_jit.py --proj_name g1waist_track --exptid [your_exp_desc] --checkpoint [checkpoint] --robot g1 94 | # e.g. python save_jit.py --proj_name g1waist_track --exptid stage2_get_up --checkpoint -1 --robot g1 95 | ``` 96 | 97 | You can specify which checkpoint exactly to save by adding `--checkpoint [int]` to the command, this is default set as -1, which is the latest one. 98 | 99 | You can display the jit policy by adding `--use_jit` in the eval script. 100 | 101 | # Notes 102 | There are some useful notes: 103 | 104 | ## Simulation Frequency 105 | The simulation frequency has a huge impact on the performance of the policy. Most existing codebases for humanoid robots or quadruped robots use a sim frequency of 200Hz. This is enough for locomotion tasks like walking. For the getting up policy learning, we use a higher frequency of 1k Hz (`dt=0.001`). Although you can train a reasonable policy in simulation under 200Hz, but it will not work in the real world. 106 | 107 | ## Collision Mesh 108 | For the G1 humanoid robots, we have customized the original G1's collision mesh to simplified and modified collision mesh so that we can accelerate training and improve Sim2Real performance. 109 | - **[g1_29dof_fixedwrist_custom_collision.urdf](./legged_gym/resources/robots/g1_modified/g1_29dof_fixedwrist_custom_collision.urdf)**: 110 | Simplified collision mesh, 23 DoFs G1 with wrists' DoFs removed. 111 | - **[g1_29dof_fixedwrist_custom_collision_with_head.urdf](./legged_gym/resources/robots/g1_modified/g1_29dof_fixedwrist_custom_collision_with_head.urdf)**: 112 | Simplified collision mesh with the head (better for training rolling over), 23 DoFs G1 with wrists' DoFs removed. 113 | - **[g1_29dof_fixedwrist_custom_collision.urdf](./legged_gym/resources/robots/g1_modified/g1_29dof_fixedwrist_full_collision.urdf)**: 114 | Full collision mesh, 23 DoFs G1 with wrists' DoFs removed. -------------------------------------------------------------------------------- /simulation/legged_gym/.gitignore: -------------------------------------------------------------------------------- 1 | # These are some examples of commonly ignored file patterns. 2 | # You should customize this list as applicable to your project. 3 | # Learn more about .gitignore: 4 | # https://www.atlassian.com/git/tutorials/saving-changes/gitignore 5 | 6 | # Node artifact files 7 | node_modules/ 8 | dist/ 9 | 10 | wandb/ 11 | 12 | # Compiled Java class files 13 | *.class 14 | 15 | # Compiled Python bytecode 16 | *.py[cod] 17 | 18 | # Log files 19 | *.log 20 | 21 | # Package files 22 | *.jar 23 | 24 | # Maven 25 | target/ 26 | dist/ 27 | 28 | # JetBrains IDE 29 | .idea/ 30 | 31 | # Unit test reports 32 | TEST*.xml 33 | 34 | # Generated by MacOS 35 | .DS_Store 36 | 37 | # Generated by Windows 38 | Thumbs.db 39 | 40 | # Applications 41 | *.app 42 | *.exe 43 | *.war 44 | 45 | # Large media files 46 | *.mp4 47 | *.tiff 48 | *.avi 49 | *.flv 50 | *.mov 51 | *.wmv 52 | 53 | # VS Code 54 | .vscode 55 | # logs 56 | logs 57 | runs 58 | 59 | # other 60 | *.egg-info 61 | __pycache__ 62 | 63 | data 64 | evaluate 65 | debug -------------------------------------------------------------------------------- /simulation/legged_gym/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2021, ETH Zurich, Nikita Rudin 2 | Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, 6 | are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its contributors 16 | may be used to endorse or promote products derived from this software without 17 | specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 23 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 26 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | See licenses/assets for license information for assets included in this repository. 31 | See licenses/dependencies for license information of dependencies of this package. 32 | -------------------------------------------------------------------------------- /simulation/legged_gym/legged_gym/.gitignore: -------------------------------------------------------------------------------- 1 | run/* 2 | a.py -------------------------------------------------------------------------------- /simulation/legged_gym/legged_gym/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import os 32 | 33 | LEGGED_GYM_ROOT_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 34 | LEGGED_GYM_ENVS_DIR = os.path.join(LEGGED_GYM_ROOT_DIR, 'legged_gym', 'envs') 35 | POSE_DIR = os.path.abspath(os.path.join(LEGGED_GYM_ROOT_DIR, '../pose')) 36 | -------------------------------------------------------------------------------- /simulation/legged_gym/legged_gym/envs/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # This file was modified by HumanUP authors in 2024-2025 3 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | # SPDX-FileCopyrightText: # Copyright (c) 2021 ETH Zurich, Nikita Rudin. All rights reserved. 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | # 7 | # Redistribution and use in source and binary forms, with or without 8 | # modification, are permitted provided that the following conditions are met: 9 | # 10 | # 1. Redistributions of source code must retain the above copyright notice, this 11 | # list of conditions and the following disclaimer. 12 | # 13 | # 2. Redistributions in binary form must reproduce the above copyright notice, 14 | # this list of conditions and the following disclaimer in the documentation 15 | # and/or other materials provided with the distribution. 16 | # 17 | # 3. Neither the name of the copyright holder nor the names of its 18 | # contributors may be used to endorse or promote products derived from 19 | # this software without specific prior written permission. 20 | # 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 25 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | # 32 | # Copyright (c) 2024-2025 RoboVision Lab, UIUC. All rights reserved. 33 | 34 | from legged_gym import LEGGED_GYM_ROOT_DIR, LEGGED_GYM_ENVS_DIR 35 | from .base.legged_robot import LeggedRobot 36 | 37 | from .base.humanoid import Humanoid 38 | 39 | # G1 with waist dof 40 | from .g1waist.g1waist_up_config import G1WaistHumanUPCfg, G1WaistHumanUPCfgPPO 41 | from .g1waist.g1waist_up import G1WaistHumanUP 42 | from .g1waistroll.g1waistroll_up_config import G1WaistRollHumanUPCfg, G1WaistRollHumanUPCfgPPO 43 | from .g1waistroll.g1waistroll_up import G1WaistRollHumanUP 44 | 45 | from .g1track.g1waist_track_config import G1WaistTrackCfg, G1WaistTrackCfgPPO 46 | from .g1track.g1waist_track import G1WaistTrack 47 | 48 | from .g1rolltrack.g1waistroll_track_config import G1WaistRollTrackCfg, G1WaistRollTrackCfgPPO 49 | from .g1rolltrack.g1waistroll_track import G1WaistRollTrack 50 | 51 | from legged_gym.gym_utils.task_registry import task_registry 52 | 53 | # ======================= environment registration ======================= 54 | 55 | task_registry.register("g1waist_up", G1WaistHumanUP, G1WaistHumanUPCfg(), G1WaistHumanUPCfgPPO()) 56 | 57 | task_registry.register("g1waist_track", G1WaistTrack, G1WaistTrackCfg(), G1WaistTrackCfgPPO()) 58 | 59 | task_registry.register("g1waistroll_up", G1WaistRollHumanUP, G1WaistRollHumanUPCfg(), G1WaistRollHumanUPCfgPPO()) 60 | 61 | task_registry.register("g1waistroll_track", G1WaistRollTrack, G1WaistRollTrackCfg(), G1WaistRollTrackCfgPPO()) -------------------------------------------------------------------------------- /simulation/legged_gym/legged_gym/envs/base/base_config.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import inspect 32 | 33 | class BaseConfig: 34 | def __init__(self) -> None: 35 | """ Initializes all member classes recursively. Ignores all namse starting with '__' (buit-in methods).""" 36 | self.init_member_classes(self) 37 | 38 | @staticmethod 39 | def init_member_classes(obj): 40 | # iterate over all attributes names 41 | for key in dir(obj): 42 | # disregard builtin attributes 43 | # if key.startswith("__"): 44 | if key=="__class__": 45 | continue 46 | # get the corresponding attribute object 47 | var = getattr(obj, key) 48 | # check if it the attribute is a class 49 | if inspect.isclass(var): 50 | # instantate the class 51 | i_var = var() 52 | # set the attribute to the instance instead of the type 53 | setattr(obj, key, i_var) 54 | # recursively init members of the attribute 55 | BaseConfig.init_member_classes(i_var) 56 | -------------------------------------------------------------------------------- /simulation/legged_gym/legged_gym/envs/base/base_task.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # This file was modified by HumanUP authors in 2024-2025 3 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | # SPDX-FileCopyrightText: # Copyright (c) 2021 ETH Zurich, Nikita Rudin. All rights reserved. 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | # 7 | # Redistribution and use in source and binary forms, with or without 8 | # modification, are permitted provided that the following conditions are met: 9 | # 10 | # 1. Redistributions of source code must retain the above copyright notice, this 11 | # list of conditions and the following disclaimer. 12 | # 13 | # 2. Redistributions in binary form must reproduce the above copyright notice, 14 | # this list of conditions and the following disclaimer in the documentation 15 | # and/or other materials provided with the distribution. 16 | # 17 | # 3. Neither the name of the copyright holder nor the names of its 18 | # contributors may be used to endorse or promote products derived from 19 | # this software without specific prior written permission. 20 | # 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 25 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | # 32 | # Copyright (c) 2024-2025 RoboVision Lab, UIUC. All rights reserved. 33 | 34 | import sys 35 | from isaacgym import gymapi 36 | from isaacgym import gymutil, gymtorch 37 | import numpy as np 38 | import torch 39 | import time 40 | 41 | # Base class for RL tasks 42 | class BaseTask(): 43 | 44 | def __init__(self, cfg, sim_params, physics_engine, sim_device, headless): 45 | self.gym = gymapi.acquire_gym() 46 | 47 | self.sim_params = sim_params 48 | self.physics_engine = physics_engine 49 | self.sim_device = sim_device 50 | sim_device_type, self.sim_device_id = gymutil.parse_device_str(self.sim_device) 51 | self.headless = headless 52 | 53 | # env device is GPU only if sim is on GPU and use_gpu_pipeline=True, otherwise returned tensors are copied to CPU by physX. 54 | if sim_device_type=='cuda' and sim_params.use_gpu_pipeline: 55 | self.device = self.sim_device 56 | else: 57 | self.device = 'cpu' 58 | 59 | # graphics device for rendering, -1 for no rendering 60 | self.graphics_device_id = self.sim_device_id 61 | if self.headless == True: 62 | self.graphics_device_id = -1 63 | 64 | self.num_envs = cfg.env.num_envs 65 | self.num_obs = cfg.env.num_observations 66 | self.num_privileged_obs = cfg.env.num_privileged_obs 67 | self.num_actions = cfg.env.num_actions 68 | 69 | # optimization flags for pytorch JIT 70 | torch._C._jit_set_profiling_mode(False) 71 | torch._C._jit_set_profiling_executor(False) 72 | 73 | # allocate buffers 74 | self.obs_buf = torch.zeros(self.num_envs, self.num_obs, device=self.device, dtype=torch.float) 75 | self.rew_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.float) 76 | self.reset_buf = torch.ones(self.num_envs, device=self.device, dtype=torch.long) 77 | self.episode_length_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.long) 78 | self.time_out_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.bool) 79 | if self.num_privileged_obs is not None: 80 | self.privileged_obs_buf = torch.zeros(self.num_envs, self.num_privileged_obs, device=self.device, dtype=torch.float) 81 | else: 82 | self.privileged_obs_buf = None 83 | # self.num_privileged_obs = self.num_obs 84 | 85 | self.extras = {} 86 | 87 | # create envs, sim and viewer 88 | self.create_sim() 89 | self.gym.prepare_sim(self.sim) 90 | 91 | # todo: read from config 92 | self.enable_viewer_sync = True 93 | self.viewer = None 94 | 95 | # if running with a viewer, set up keyboard shortcuts and camera 96 | if self.headless == False: 97 | # subscribe to keyboard shortcuts 98 | self.viewer = self.gym.create_viewer( 99 | self.sim, gymapi.CameraProperties()) 100 | self.gym.subscribe_viewer_keyboard_event( 101 | self.viewer, gymapi.KEY_ESCAPE, "QUIT") 102 | self.gym.subscribe_viewer_keyboard_event( 103 | self.viewer, gymapi.KEY_V, "toggle_viewer_sync") 104 | self.gym.subscribe_viewer_keyboard_event( 105 | self.viewer, gymapi.KEY_F, "free_cam") 106 | for i in range(9): 107 | self.gym.subscribe_viewer_keyboard_event( 108 | self.viewer, getattr(gymapi, "KEY_"+str(i)), "lookat"+str(i)) 109 | self.gym.subscribe_viewer_keyboard_event( 110 | self.viewer, gymapi.KEY_LEFT_BRACKET, "prev_id") 111 | self.gym.subscribe_viewer_keyboard_event( 112 | self.viewer, gymapi.KEY_RIGHT_BRACKET, "next_id") 113 | self.gym.subscribe_viewer_keyboard_event( 114 | self.viewer, gymapi.KEY_SPACE, "pause") 115 | self.gym.subscribe_viewer_keyboard_event( 116 | self.viewer, gymapi.KEY_W, "vx_plus") 117 | self.gym.subscribe_viewer_keyboard_event( 118 | self.viewer, gymapi.KEY_S, "vx_minus") 119 | self.gym.subscribe_viewer_keyboard_event( 120 | self.viewer, gymapi.KEY_A, "left_turn") 121 | self.gym.subscribe_viewer_keyboard_event( 122 | self.viewer, gymapi.KEY_D, "right_turn") 123 | self.gym.subscribe_viewer_keyboard_event( 124 | self.viewer, gymapi.KEY_MINUS, "prev_motion") 125 | self.gym.subscribe_viewer_keyboard_event( 126 | self.viewer, gymapi.KEY_EQUAL, "next_motion") 127 | self.free_cam = False 128 | self.lookat_id = 0 129 | self.lookat_vec = torch.tensor([-0, 2, 1], requires_grad=False, device=self.device) 130 | self.button_pressed = False 131 | 132 | def get_observations(self): 133 | return self.obs_buf 134 | 135 | def get_privileged_observations(self): 136 | return self.privileged_obs_buf 137 | 138 | def reset_idx(self, env_ids): 139 | """Reset selected robots""" 140 | raise NotImplementedError 141 | 142 | def reset(self): 143 | """ Reset all robots""" 144 | self.reset_idx(torch.arange(self.num_envs, device=self.device)) 145 | obs, privileged_obs, _, _, _ = self.step(torch.zeros(self.num_envs, self.num_actions, device=self.device, requires_grad=False)) 146 | return obs, privileged_obs 147 | 148 | def step(self, actions): 149 | raise NotImplementedError 150 | 151 | def lookat(self, i): 152 | look_at_pos = self.root_states[i, :3].clone() 153 | cam_pos = look_at_pos + self.lookat_vec 154 | self.set_camera(cam_pos, look_at_pos) 155 | 156 | def render(self, sync_frame_time=True): 157 | if self.viewer: 158 | # check for window closed 159 | if self.gym.query_viewer_has_closed(self.viewer): 160 | sys.exit() 161 | 162 | # check for keyboard events 163 | for evt in self.gym.query_viewer_action_events(self.viewer): 164 | if evt.action == "QUIT" and evt.value > 0: 165 | sys.exit() 166 | elif evt.action == "toggle_viewer_sync" and evt.value > 0: 167 | self.enable_viewer_sync = not self.enable_viewer_sync 168 | 169 | # fetch results 170 | if self.device != 'cpu': 171 | self.gym.fetch_results(self.sim, True) 172 | 173 | # step graphics 174 | if self.enable_viewer_sync: 175 | self.gym.step_graphics(self.sim) 176 | self.gym.draw_viewer(self.viewer, self.sim, True) 177 | if sync_frame_time: 178 | self.gym.sync_frame_time(self.sim) 179 | else: 180 | self.gym.poll_viewer_events(self.viewer) 181 | 182 | # def render(self, sync_frame_time=True): 183 | # if self.viewer: 184 | # # check for window closed 185 | # if self.gym.query_viewer_has_closed(self.viewer): 186 | # sys.exit() 187 | # if not self.free_cam: 188 | # self.lookat(self.lookat_id) 189 | # # check for keyboard events 190 | # evt_count = 0 191 | # for evt in self.gym.query_viewer_action_events(self.viewer): 192 | # if evt.action == "QUIT" and evt.value > 0: 193 | # sys.exit() 194 | # elif evt.action == "toggle_viewer_sync" and evt.value > 0: 195 | # self.enable_viewer_sync = not self.enable_viewer_sync 196 | 197 | # if not self.free_cam: 198 | # for i in range(9): 199 | # if evt.action == "lookat" + str(i) and evt.value > 0: 200 | # self.lookat(i) 201 | # self.lookat_id = i 202 | # if evt.action == "prev_id" and evt.value > 0: 203 | # self.lookat_id = (self.lookat_id-1) % self.num_envs 204 | # self.lookat(self.lookat_id) 205 | # if evt.action == "next_id" and evt.value > 0: 206 | # self.lookat_id = (self.lookat_id+1) % self.num_envs 207 | # self.lookat(self.lookat_id) 208 | # if evt.action == "vx_plus" and evt.value > 0: 209 | # self.commands[self.lookat_id, 0] += 0.1 210 | # if evt.action == "vx_minus" and evt.value > 0: 211 | # self.commands[self.lookat_id, 0] -= 0.1 212 | # if evt.action == "left_turn" and evt.value > 0: 213 | # self.commands[self.lookat_id, 2] -= 0.05 214 | # if evt.action == "right_turn" and evt.value > 0: 215 | # self.commands[self.lookat_id, 2] += 0.05 216 | # if evt.action == "next_motion" and evt.value > 0: 217 | # self._motion_ids[self.lookat_id] = (self._motion_ids[self.lookat_id] + 1) % self._motion_lib.num_motions() 218 | # self.update_motion_ids([self.lookat_id]) 219 | # if evt.action == "prev_motion" and evt.value > 0: 220 | # self._motion_ids[self.lookat_id] = (self._motion_ids[self.lookat_id] - 1) % self._motion_lib.num_motions() 221 | # self.update_motion_ids([self.lookat_id]) 222 | # if evt.action == "free_cam" and evt.value > 0: 223 | # self.free_cam = not self.free_cam 224 | # if self.free_cam: 225 | # self.set_camera(self.cfg.viewer.pos, self.cfg.viewer.lookat) 226 | 227 | # if evt.action == "pause" and evt.value > 0: 228 | # self.pause = True 229 | # while self.pause: 230 | # time.sleep(0.1) 231 | # self.gym.draw_viewer(self.viewer, self.sim, True) 232 | # for evt in self.gym.query_viewer_action_events(self.viewer): 233 | # if evt.action == "pause" and evt.value > 0: 234 | # self.pause = False 235 | # if self.gym.query_viewer_has_closed(self.viewer): 236 | # sys.exit() 237 | # if evt.value > 0: 238 | # evt_count += 1 239 | # self.button_pressed = True if evt_count > 0 else False 240 | 241 | # # fetch results 242 | # if self.device != 'cpu': 243 | # self.gym.fetch_results(self.sim, True) 244 | 245 | # self.gym.poll_viewer_events(self.viewer) 246 | # # step graphics 247 | # if self.enable_viewer_sync: 248 | # self.gym.step_graphics(self.sim) 249 | # self.gym.draw_viewer(self.viewer, self.sim, True) 250 | # if sync_frame_time: 251 | # self.gym.sync_frame_time(self.sim) 252 | # else: 253 | # self.gym.poll_viewer_events(self.viewer) 254 | 255 | # if not self.free_cam: 256 | # p = self.gym.get_viewer_camera_transform(self.viewer, None).p 257 | # cam_trans = torch.tensor([p.x, p.y, p.z], requires_grad=False, device=self.device) 258 | # look_at_pos = self.root_states[self.lookat_id, :3].clone() 259 | # self.lookat_vec = cam_trans - look_at_pos 260 | -------------------------------------------------------------------------------- /simulation/legged_gym/legged_gym/gym_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from .helpers import class_to_dict, get_load_path, get_args, export_policy_as_jit, set_seed, update_class_from_dict 32 | from .task_registry import task_registry 33 | from .logger import Logger 34 | from .math import * 35 | from .terrain import Terrain 36 | -------------------------------------------------------------------------------- /simulation/legged_gym/legged_gym/gym_utils/logger.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import matplotlib.pyplot as plt 32 | import numpy as np 33 | from collections import defaultdict 34 | from multiprocessing import Process, Value 35 | 36 | class Logger: 37 | def __init__(self, dt): 38 | self.state_log = defaultdict(list) 39 | self.rew_log = defaultdict(list) 40 | self.dt = dt 41 | self.num_episodes = 0 42 | self.plot_process = None 43 | 44 | def log_state(self, key, value): 45 | self.state_log[key].append(value) 46 | 47 | def log_states(self, dict): 48 | for key, value in dict.items(): 49 | self.log_state(key, value) 50 | 51 | def log_rewards(self, dict, num_episodes): 52 | for key, value in dict.items(): 53 | if 'rew' in key: 54 | self.rew_log[key].append(value.item() * num_episodes) 55 | self.num_episodes += num_episodes 56 | 57 | def reset(self): 58 | self.state_log.clear() 59 | self.rew_log.clear() 60 | 61 | def plot_states(self): 62 | self.plot_process = Process(target=self._plot) 63 | self.plot_process.start() 64 | 65 | def _plot(self): 66 | nb_rows = 3 67 | nb_cols = 3 68 | fig, axs = plt.subplots(nb_rows, nb_cols) 69 | for key, value in self.state_log.items(): 70 | time = np.linspace(0, len(value)*self.dt, len(value)) 71 | break 72 | log= self.state_log 73 | # plot joint targets and measured positions 74 | a = axs[1, 0] 75 | if log["dof_pos"]: a.plot(time, log["dof_pos"], label='measured') 76 | if log["dof_pos_target"]: a.plot(time, log["dof_pos_target"], label='target') 77 | a.set(xlabel='time [s]', ylabel='Position [rad]', title='DOF Position') 78 | a.legend() 79 | # plot joint velocity 80 | a = axs[1, 1] 81 | if log["dof_vel"]: a.plot(time, log["dof_vel"], label='measured') 82 | if log["dof_vel_target"]: a.plot(time, log["dof_vel_target"], label='target') 83 | a.set(xlabel='time [s]', ylabel='Velocity [rad/s]', title='Joint Velocity') 84 | a.legend() 85 | # plot base vel x 86 | a = axs[0, 0] 87 | if log["base_vel_x"]: a.plot(time, log["base_vel_x"], label='measured') 88 | if log["command_x"]: a.plot(time, log["command_x"], label='commanded') 89 | a.set(xlabel='time [s]', ylabel='base lin vel [m/s]', title='Base velocity x') 90 | a.legend() 91 | # plot base vel y 92 | a = axs[0, 1] 93 | if log["base_vel_y"]: a.plot(time, log["base_vel_y"], label='measured') 94 | if log["command_y"]: a.plot(time, log["command_y"], label='commanded') 95 | a.set(xlabel='time [s]', ylabel='base lin vel [m/s]', title='Base velocity y') 96 | a.legend() 97 | # plot base vel yaw 98 | a = axs[0, 2] 99 | if log["base_vel_yaw"]: a.plot(time, log["base_vel_yaw"], label='measured') 100 | if log["command_yaw"]: a.plot(time, log["command_yaw"], label='commanded') 101 | a.set(xlabel='time [s]', ylabel='base ang vel [rad/s]', title='Base velocity yaw') 102 | a.legend() 103 | # plot base vel z 104 | a = axs[1, 2] 105 | if log["base_vel_z"]: a.plot(time, log["base_vel_z"], label='measured') 106 | a.set(xlabel='time [s]', ylabel='base lin vel [m/s]', title='Base velocity z') 107 | a.legend() 108 | # plot contact forces 109 | a = axs[2, 0] 110 | if log["contact_forces_z"]: 111 | forces = np.array(log["contact_forces_z"]) 112 | for i in range(forces.shape[1]): 113 | a.plot(time, forces[:, i], label=f'force {i}') 114 | a.set(xlabel='time [s]', ylabel='Forces z [N]', title='Vertical Contact forces') 115 | a.legend() 116 | # plot torque/vel curves 117 | a = axs[2, 1] 118 | if log["dof_vel"]!=[] and log["dof_torque"]!=[]: a.plot(log["dof_vel"], log["dof_torque"], 'x', label='measured') 119 | a.set(xlabel='Joint vel [rad/s]', ylabel='Joint Torque [Nm]', title='Torque/velocity curves') 120 | a.legend() 121 | # plot torques 122 | a = axs[2, 2] 123 | if log["dof_torque"]!=[]: a.plot(time, log["dof_torque"], label='measured') 124 | a.set(xlabel='time [s]', ylabel='Joint Torque [Nm]', title='Torque') 125 | a.legend() 126 | plt.show() 127 | 128 | def print_rewards(self): 129 | print("Average rewards per second:") 130 | for key, values in self.rew_log.items(): 131 | mean = np.sum(np.array(values)) / self.num_episodes 132 | print(f" - {key}: {mean}") 133 | print(f"Total number of episodes: {self.num_episodes}") 134 | 135 | def __del__(self): 136 | if self.plot_process is not None: 137 | self.plot_process.kill() -------------------------------------------------------------------------------- /simulation/legged_gym/legged_gym/gym_utils/math.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import torch 32 | from torch import Tensor 33 | import numpy as np 34 | from isaacgym.torch_utils import quat_apply, normalize 35 | from typing import Tuple 36 | 37 | # @ torch.jit.script 38 | def quat_apply_yaw(quat, vec): 39 | quat_yaw = quat.clone().view(-1, 4) 40 | quat_yaw[:, :2] = 0. 41 | quat_yaw = normalize(quat_yaw) 42 | return quat_apply(quat_yaw, vec) 43 | 44 | # @ torch.jit.script 45 | def wrap_to_pi(angles): 46 | angles %= 2*np.pi 47 | angles -= 2*np.pi * (angles > np.pi) 48 | return angles 49 | 50 | # @ torch.jit.script 51 | def torch_rand_sqrt_float(lower, upper, shape, device): 52 | # type: (float, float, Tuple[int, int], str) -> Tensor 53 | r = 2*torch.rand(*shape, device=device) - 1 54 | r = torch.where(r<0., -torch.sqrt(-r), torch.sqrt(r)) 55 | r = (r + 1.) / 2. 56 | return (upper - lower) * r + lower 57 | 58 | # @ torch.jit.script 59 | def torch_rand_int(lower, upper, shape, device): 60 | # type: (float, float, Tuple[int, int], str) -> Tensor 61 | return ((upper - lower) * torch.rand(*shape, device=device).squeeze(1) + lower).long().float() 62 | 63 | def sample_unit_vector(n, dim, device): 64 | tensor = torch.randn(n, dim, device=device) 65 | unit_vector = tensor / torch.norm(tensor, dim=-1, keepdim=True) 66 | return unit_vector -------------------------------------------------------------------------------- /simulation/legged_gym/legged_gym/gym_utils/motor_delay_fft.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class MotorDelay_80(nn.Module): 6 | def __init__(self, num_envs, num_actions, device="cuda:0"): 7 | super(MotorDelay_80, self).__init__() 8 | self.a = 1.2766 9 | self.b = 12.13208 10 | # self.alpha = 1.0 11 | self.alpha = torch.exp(torch.tensor([-1 / self.b]).to(device)) 12 | self.beta = self.a / self.b 13 | # self.y_pre = 0.0 14 | self.y_pre = torch.zeros(num_envs, num_actions, dtype = torch.float, device=device) 15 | 16 | 17 | def forward(self, x): 18 | if x.dim() ==1: 19 | x = x.unsqueeze(1) 20 | 21 | # if self.y_pre is None: 22 | # self.y_pre = torch.zeros(x.size(0), x.size(1), dtype = x.dtype, device=x.device) 23 | 24 | y = self.alpha * self.y_pre + self.beta * x 25 | self.y_pre = y 26 | return y 27 | 28 | def reset(self, env_idx): 29 | self.y_pre[env_idx] = 0 30 | 31 | 32 | class MotorDelay_130(nn.Module): 33 | def __init__(self, num_envs, num_actions, device="cuda:0"): 34 | super(MotorDelay_130, self).__init__() 35 | self.a = 0.91 36 | self.b = 11.28 37 | # self.alpha = 1.0 38 | self.alpha = torch.exp(torch.tensor([-1 / self.b]).to(device)) 39 | self.beta = self.a / self.b 40 | # self.y_pre = 0.0 41 | self.y_pre = torch.zeros(num_envs, num_actions, dtype = torch.float, device=device) 42 | 43 | 44 | def forward(self, x): 45 | if x.dim() ==1: 46 | x = x.unsqueeze(1) 47 | 48 | # if self.y_pre is None: 49 | # self.y_pre = torch.zeros(x.size(0), x.size(1), dtype = x.dtype, device=x.device) 50 | 51 | y = self.alpha * self.y_pre + self.beta * x 52 | self.y_pre = y 53 | return y 54 | 55 | def reset(self, env_idx): 56 | self.y_pre[env_idx] = 0 57 | -------------------------------------------------------------------------------- /simulation/legged_gym/legged_gym/gym_utils/storage.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data.sampler import BatchSampler, SubsetRandomSampler 3 | 4 | class ObsStorage: 5 | def __init__(self, num_envs, num_transitions_per_env, obs_shape, action_shape, device): 6 | self.device = device 7 | 8 | # Core 9 | self.obs = torch.zeros(num_transitions_per_env, num_envs, *obs_shape).to(self.device) 10 | self.expert = torch.zeros(num_transitions_per_env, num_envs, *action_shape).to(self.device) 11 | self.device = device 12 | 13 | self.num_envs = num_envs 14 | self.num_transitions_per_env = num_transitions_per_env 15 | self.step = 0 16 | 17 | def add_obs(self, obs, expert_action): 18 | if self.step >= self.num_transitions_per_env: 19 | raise AssertionError("Rollout buffer overflow") 20 | self.obs[self.step].copy_(torch.from_numpy(obs).to(self.device)) 21 | self.expert[self.step].copy_(expert_action) 22 | self.step += 1 23 | 24 | def clear(self): 25 | self.step = 0 26 | 27 | def mini_batch_generator_shuffle(self, num_mini_batches): 28 | batch_size = self.num_envs * self.num_transitions_per_env 29 | mini_batch_size = batch_size // num_mini_batches 30 | 31 | for indices in BatchSampler(SubsetRandomSampler(range(batch_size)), mini_batch_size, drop_last=True): 32 | obs_batch = self.obs.view(-1, *self.obs.size()[2:])[indices] 33 | expert_action_batch = self.expert.view(-1, *self.expert.size()[2:])[indices] 34 | yield obs_batch, expert_actions_batch 35 | 36 | def mini_batch_generator_inorder(self, num_mini_batches): 37 | batch_size = self.num_envs * self.num_transitions_per_env 38 | mini_batch_size = batch_size // num_mini_batches 39 | 40 | for batch_id in range(num_mini_batches): 41 | yield self.obs.view(-1, *self.obs.size()[2:])[batch_id*mini_batch_size:(batch_id+1)*mini_batch_size], \ 42 | self.expert.view(-1, *self.expert.size()[2:])[batch_id*mini_batch_size:(batch_id+1)*mini_batch_size] 43 | 44 | class RolloutStorage: 45 | def __init__(self, num_envs, num_transitions_per_env, actor_obs_shape, critic_obs_shape, actions_shape, device): 46 | self.device = device 47 | 48 | # Core 49 | self.critic_obs = torch.zeros(num_transitions_per_env, num_envs, *actor_obs_shape).to(self.device) 50 | self.actor_obs = torch.zeros(num_transitions_per_env, num_envs, *critic_obs_shape).to(self.device) 51 | self.rewards = torch.zeros(num_transitions_per_env, num_envs, 1).to(self.device) 52 | self.actions = torch.zeros(num_transitions_per_env, num_envs, *actions_shape).to(self.device) 53 | self.dones = torch.zeros(num_transitions_per_env, num_envs, 1).byte().to(self.device) 54 | 55 | # For PPO 56 | self.actions_log_prob = torch.zeros(num_transitions_per_env, num_envs, 1).to(self.device) 57 | self.values = torch.zeros(num_transitions_per_env, num_envs, 1).to(self.device) 58 | self.returns = torch.zeros(num_transitions_per_env, num_envs, 1).to(self.device) 59 | self.advantages = torch.zeros(num_transitions_per_env, num_envs, 1).to(self.device) 60 | 61 | self.num_transitions_per_env = num_transitions_per_env 62 | self.num_envs = num_envs 63 | self.device = device 64 | 65 | self.step = 0 66 | 67 | def add_transitions(self, actor_obs, critic_obs, actions, rewards, dones, values, actions_log_prob): 68 | if self.step >= self.num_transitions_per_env: 69 | raise AssertionError("Rollout buffer overflow") 70 | self.critic_obs[self.step].copy_(torch.from_numpy(critic_obs).to(self.device)) 71 | self.actor_obs[self.step].copy_(torch.from_numpy(actor_obs).to(self.device)) 72 | self.actions[self.step].copy_(actions.to(self.device)) 73 | self.rewards[self.step].copy_(torch.from_numpy(rewards).view(-1, 1).to(self.device)) 74 | self.dones[self.step].copy_(torch.from_numpy(dones).view(-1, 1).to(self.device)) 75 | self.values[self.step].copy_(values.to(self.device)) 76 | self.actions_log_prob[self.step].copy_(actions_log_prob.view(-1, 1).to(self.device)) 77 | self.step += 1 78 | 79 | def clear(self): 80 | self.step = 0 81 | 82 | def compute_returns(self, last_values, gamma, lam): 83 | advantage = 0 84 | for step in reversed(range(self.num_transitions_per_env)): 85 | if step == self.num_transitions_per_env - 1: 86 | next_values = last_values 87 | # next_is_not_terminal = 1.0 - self.dones[step].float() 88 | else: 89 | next_values = self.values[step + 1] 90 | # next_is_not_terminal = 1.0 - self.dones[step+1].float() 91 | 92 | next_is_not_terminal = 1.0 - self.dones[step].float() 93 | delta = self.rewards[step] + next_is_not_terminal * gamma * next_values - self.values[step] 94 | advantage = delta + next_is_not_terminal * gamma * lam * advantage 95 | self.returns[step] = advantage + self.values[step] 96 | 97 | # Compute and normalize the advantages 98 | self.advantages = self.returns - self.values 99 | self.advantages = (self.advantages - self.advantages.mean()) / (self.advantages.std() + 1e-8) 100 | 101 | def mini_batch_generator_shuffle(self, num_mini_batches): 102 | batch_size = self.num_envs * self.num_transitions_per_env 103 | mini_batch_size = batch_size // num_mini_batches 104 | 105 | for indices in BatchSampler(SubsetRandomSampler(range(batch_size)), mini_batch_size, drop_last=True): 106 | actor_obs_batch = self.actor_obs.view(-1, *self.actor_obs.size()[2:])[indices] 107 | critic_obs_batch = self.critic_obs.view(-1, *self.critic_obs.size()[2:])[indices] 108 | actions_batch = self.actions.view(-1, self.actions.size(-1))[indices] 109 | values_batch = self.values.view(-1, 1)[indices] 110 | returns_batch = self.returns.view(-1, 1)[indices] 111 | old_actions_log_prob_batch = self.actions_log_prob.view(-1, 1)[indices] 112 | advantages_batch = self.advantages.view(-1, 1)[indices] 113 | yield actor_obs_batch, critic_obs_batch, actions_batch, values_batch, advantages_batch, returns_batch, old_actions_log_prob_batch 114 | 115 | def mini_batch_generator_inorder(self, num_mini_batches): 116 | batch_size = self.num_envs * self.num_transitions_per_env 117 | mini_batch_size = batch_size // num_mini_batches 118 | 119 | for batch_id in range(num_mini_batches): 120 | yield self.actor_obs.view(-1, *self.actor_obs.size()[2:])[batch_id*mini_batch_size:(batch_id+1)*mini_batch_size], \ 121 | self.critic_obs.view(-1, *self.critic_obs.size()[2:])[batch_id*mini_batch_size:(batch_id+1)*mini_batch_size], \ 122 | self.actions.view(-1, self.actions.size(-1))[batch_id*mini_batch_size:(batch_id+1)*mini_batch_size], \ 123 | self.values.view(-1, 1)[batch_id*mini_batch_size:(batch_id+1)*mini_batch_size], \ 124 | self.advantages.view(-1, 1)[batch_id*mini_batch_size:(batch_id+1)*mini_batch_size], \ 125 | self.returns.view(-1, 1)[batch_id*mini_batch_size:(batch_id+1)*mini_batch_size], \ 126 | self.actions_log_prob.view(-1, 1)[batch_id*mini_batch_size:(batch_id+1)*mini_batch_size] 127 | -------------------------------------------------------------------------------- /simulation/legged_gym/legged_gym/gym_utils/task_registry.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from copy import deepcopy 32 | import os 33 | from datetime import datetime 34 | from typing import Tuple 35 | import torch 36 | import numpy as np 37 | 38 | from rsl_rl.env import VecEnv 39 | from rsl_rl.runners import OnPolicyRunner 40 | 41 | from legged_gym import LEGGED_GYM_ROOT_DIR, LEGGED_GYM_ENVS_DIR 42 | from .helpers import get_args, update_cfg_from_args, class_to_dict, get_load_path, set_seed, parse_sim_params 43 | from legged_gym.envs.base.legged_robot_config import LeggedRobotCfg, LeggedRobotCfgPPO 44 | 45 | 46 | class TaskRegistry(): 47 | def __init__(self): 48 | self.task_classes = {} 49 | self.env_cfgs = {} 50 | self.train_cfgs = {} 51 | 52 | def register(self, name: str, task_class: VecEnv, env_cfg: LeggedRobotCfg, train_cfg: LeggedRobotCfgPPO): 53 | self.task_classes[name] = task_class 54 | self.env_cfgs[name] = env_cfg 55 | self.train_cfgs[name] = train_cfg 56 | 57 | def get_task_class(self, name: str) -> VecEnv: 58 | return self.task_classes[name] 59 | 60 | def get_cfgs(self, name) -> Tuple[LeggedRobotCfg, LeggedRobotCfgPPO]: 61 | train_cfg = self.train_cfgs[name] 62 | env_cfg = self.env_cfgs[name] 63 | # copy seed 64 | env_cfg.seed = train_cfg.seed 65 | return env_cfg, train_cfg 66 | 67 | def make_env(self, name, args=None, env_cfg=None) -> Tuple[VecEnv, LeggedRobotCfg]: 68 | """ Creates an environment either from a registered namme or from the provided config file. 69 | 70 | Args: 71 | name (string): Name of a registered env. 72 | args (Args, optional): Isaac Gym comand line arguments. If None get_args() will be called. Defaults to None. 73 | env_cfg (Dict, optional): Environment config file used to override the registered config. Defaults to None. 74 | 75 | Raises: 76 | ValueError: Error if no registered env corresponds to 'name' 77 | 78 | Returns: 79 | isaacgym.VecTaskPython: The created environment 80 | Dict: the corresponding config file 81 | """ 82 | # if no args passed get command line arguments 83 | if args is None: 84 | args = get_args() 85 | # check if there is a registered env with that name 86 | if name in self.task_classes: 87 | task_class = self.get_task_class(name) 88 | else: 89 | raise ValueError(f"Task with name: {name} was not registered") 90 | if env_cfg is None: 91 | # load config files 92 | env_cfg, _ = self.get_cfgs(name) 93 | # override cfg from args (if specified) 94 | env_cfg, _ = update_cfg_from_args(env_cfg, None, args) 95 | set_seed(env_cfg.seed) 96 | # parse sim params (convert to dict first) 97 | sim_params = {"sim": class_to_dict(env_cfg.sim)} 98 | sim_params = parse_sim_params(args, sim_params) 99 | env = task_class( cfg=env_cfg, 100 | sim_params=sim_params, 101 | physics_engine=args.physics_engine, 102 | sim_device=args.sim_device, 103 | headless=args.headless) 104 | return env, env_cfg 105 | 106 | def make_alg_runner(self, env, name=None, args=None, train_cfg=None, init_wandb=True, log_root="default", **kwargs): 107 | """ Creates the training algorithm either from a registered namme or from the provided config file. 108 | 109 | Args: 110 | env (isaacgym.VecTaskPython): The environment to train (TODO: remove from within the algorithm) 111 | name (string, optional): Name of a registered env. If None, the config file will be used instead. Defaults to None. 112 | args (Args, optional): Isaac Gym comand line arguments. If None get_args() will be called. Defaults to None. 113 | train_cfg (Dict, optional): Training config file. If None 'name' will be used to get the config file. Defaults to None. 114 | log_root (str, optional): Logging directory for Tensorboard. Set to 'None' to avoid logging (at test time for example). 115 | Logs will be saved in /_. Defaults to "default"=/logs/. 116 | 117 | Raises: 118 | ValueError: Error if neither 'name' or 'train_cfg' are provided 119 | Warning: If both 'name' or 'train_cfg' are provided 'name' is ignored 120 | 121 | Returns: 122 | PPO: The created algorithm 123 | Dict: the corresponding config file 124 | """ 125 | # if no args passed get command line arguments 126 | if args is None: 127 | args = get_args() 128 | # if config files are passed use them, otherwise load from the name 129 | if train_cfg is None: 130 | if name is None: 131 | raise ValueError("Either 'name' or 'train_cfg' must be not None") 132 | # load config files 133 | _, train_cfg = self.get_cfgs(name) 134 | else: 135 | if name is not None: 136 | print(f"'train_cfg' provided -> Ignoring 'name={name}'") 137 | # override cfg from args (if specified) 138 | _, train_cfg = update_cfg_from_args(None, train_cfg, args) 139 | 140 | if log_root=="default": 141 | log_root = os.path.join(LEGGED_GYM_ROOT_DIR, 'logs', train_cfg.runner.experiment_name) 142 | log_dir = os.path.join(log_root, datetime.now().strftime('%b%d_%H-%M-%S') + '_' + train_cfg.runner.run_name) 143 | elif log_root is None: 144 | log_dir = None 145 | else: 146 | log_dir = log_root #os.path.join(log_root, datetime.now().strftime('%b%d_%H-%M-%S') + '_' + train_cfg.runner.run_name) 147 | 148 | train_cfg_dict = class_to_dict(train_cfg) 149 | runner_class = eval(train_cfg.runner.runner_class_name) 150 | runner = runner_class(env, 151 | train_cfg_dict, 152 | log_dir, 153 | init_wandb=init_wandb, 154 | device=args.rl_device, **kwargs) 155 | #save resume path before creating a new log_dir 156 | # return runner, train_cfg, None 157 | resume = train_cfg.runner.resume 158 | if args.resumeid: 159 | log_root = LEGGED_GYM_ROOT_DIR + f"/logs/{args.proj_name}/" + args.resumeid 160 | resume = True 161 | if resume: 162 | # load previously trained model 163 | print(log_root) 164 | print(train_cfg.runner.load_run) 165 | # load_root = os.path.join(LEGGED_GYM_ROOT_DIR, 'logs', "rough_a1", train_cfg.runner.load_run) 166 | resume_path = get_load_path(log_root, load_run=train_cfg.runner.load_run, checkpoint=train_cfg.runner.checkpoint) 167 | runner.load(resume_path) 168 | # if not train_cfg.policy.continue_from_last_std: 169 | # runner.alg.actor_critic.reset_std(train_cfg.policy.init_noise_std, 19, device=runner.device) 170 | 171 | if "return_log_dir" in kwargs: 172 | return runner, train_cfg, os.path.dirname(resume_path) 173 | else: 174 | return runner, train_cfg 175 | 176 | # make global task registry 177 | task_registry = TaskRegistry() 178 | -------------------------------------------------------------------------------- /simulation/legged_gym/legged_gym/scripts/eval.sh: -------------------------------------------------------------------------------- 1 | robot_name=${1} # Remove the space around the assignment operator 2 | task_name="${robot_name}_up" 3 | 4 | proj_name="${robot_name}_up" 5 | exptid=${2} 6 | checkpoint=${3} 7 | 8 | python play.py --task "${task_name}" \ 9 | --proj_name "${proj_name}" \ 10 | --exptid "${exptid}" \ 11 | --num_envs 1 \ 12 | --checkpoint "${checkpoint}" \ 13 | --record_video 14 | -------------------------------------------------------------------------------- /simulation/legged_gym/legged_gym/scripts/eval_track.sh: -------------------------------------------------------------------------------- 1 | robot_name=${1} # Remove the space around the assignment operator 2 | task_name="${robot_name}_track" 3 | 4 | proj_name="${robot_name}_track" 5 | exptid=${2} 6 | checkpoint=${3} 7 | traj_name=${4} 8 | 9 | python play.py --task "${task_name}" \ 10 | --proj_name "${proj_name}" \ 11 | --exptid "${exptid}" \ 12 | --num_envs 1 \ 13 | --checkpoint "${checkpoint}" \ 14 | --traj_name "${traj_name}"\ 15 | --record_video 16 | -------------------------------------------------------------------------------- /simulation/legged_gym/legged_gym/scripts/facingdown_poses.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/legged_gym/scripts/facingdown_poses.npy -------------------------------------------------------------------------------- /simulation/legged_gym/legged_gym/scripts/facingup_poses.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/legged_gym/scripts/facingup_poses.npy -------------------------------------------------------------------------------- /simulation/legged_gym/legged_gym/scripts/log.sh: -------------------------------------------------------------------------------- 1 | robot_name=${1} # Remove the space around the assignment operator 2 | task_name="${robot_name}_up" 3 | 4 | proj_name="${robot_name}_up" 5 | exptid=${2} 6 | checkpoint=${3} 7 | 8 | python log_traj.py --task "${task_name}" \ 9 | --proj_name "${proj_name}" \ 10 | --exptid "${exptid}" \ 11 | --num_envs 1 \ 12 | --checkpoint "${checkpoint}" \ 13 | --record_video 14 | # --sim_device cuda:7 \ 15 | # --checkpoint 20000 \ 16 | # --use_jit \ 17 | # --teleop_mode 18 | -------------------------------------------------------------------------------- /simulation/legged_gym/legged_gym/scripts/log_traj.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # This file was modified by HumanUP authors in 2024-2025 3 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | # SPDX-FileCopyrightText: # Copyright (c) 2021 ETH Zurich, Nikita Rudin. All rights reserved. 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | # 7 | # Redistribution and use in source and binary forms, with or without 8 | # modification, are permitted provided that the following conditions are met: 9 | # 10 | # 1. Redistributions of source code must retain the above copyright notice, this 11 | # list of conditions and the following disclaimer. 12 | # 13 | # 2. Redistributions in binary form must reproduce the above copyright notice, 14 | # this list of conditions and the following disclaimer in the documentation 15 | # and/or other materials provided with the distribution. 16 | # 17 | # 3. Neither the name of the copyright holder nor the names of its 18 | # contributors may be used to endorse or promote products derived from 19 | # this software without specific prior written permission. 20 | # 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 25 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | # 32 | # Copyright (c) 2024-2025 RoboVision Lab, UIUC. All rights reserved. 33 | 34 | from legged_gym import LEGGED_GYM_ROOT_DIR 35 | import os 36 | import code 37 | 38 | import isaacgym 39 | from legged_gym.envs import * 40 | from legged_gym.gym_utils import get_args, export_policy_as_jit, task_registry, Logger 41 | from isaacgym import gymtorch, gymapi, gymutil 42 | import numpy as np 43 | import torch 44 | import cv2 45 | from collections import deque 46 | import statistics 47 | import faulthandler 48 | from copy import deepcopy 49 | import matplotlib.pyplot as plt 50 | from time import time, sleep 51 | from PIL import Image 52 | from legged_gym.gym_utils.helpers import get_load_path as get_load_path_auto 53 | from tqdm import tqdm 54 | 55 | 56 | def get_load_path(root, load_run=-1, checkpoint=-1, model_name_include="jit"): 57 | if checkpoint == -1: 58 | models = [file for file in os.listdir(root) if model_name_include in file] 59 | models.sort(key=lambda m: "{0:0>15}".format(m)) 60 | model = models[-1] 61 | checkpoint = model.split("_")[-1].split(".")[0] 62 | else: 63 | model = None 64 | checkpoint = str(checkpoint) 65 | return model, checkpoint 66 | 67 | 68 | def set_play_cfg(env_cfg): 69 | env_cfg.env.num_envs = 2 # 2 if not args.num_envs else args.num_envs 70 | env_cfg.terrain.num_rows = 5 71 | env_cfg.terrain.num_cols = 5 72 | env_cfg.terrain.curriculum = False 73 | env_cfg.terrain.max_difficulty = False 74 | 75 | env_cfg.noise.add_noise = False 76 | env_cfg.domain_rand.randomize_friction = False 77 | env_cfg.domain_rand.push_robots = False 78 | env_cfg.domain_rand.push_interval_s = 5 79 | env_cfg.domain_rand.max_push_vel_xy = 2.5 80 | env_cfg.domain_rand.randomize_base_mass = False 81 | env_cfg.domain_rand.randomize_base_com = False 82 | env_cfg.domain_rand.action_delay = False 83 | 84 | 85 | def play(args): 86 | faulthandler.enable() 87 | exptid = args.exptid 88 | log_pth = "../../logs/{}/".format(args.proj_name) + args.exptid 89 | stand_flag = False 90 | if args.proj_name.strip() == 'g1waist_up' : 91 | stand_flag = True 92 | elif args.proj_name.strip() == 'g1waistroll_up': 93 | stand_flag = False 94 | else: 95 | print("Invalid project name") 96 | return 97 | env_cfg, train_cfg = task_registry.get_cfgs(name=args.task) 98 | 99 | set_play_cfg(env_cfg) 100 | 101 | env_cfg.env.record_video = args.record_video 102 | if_normalize = env_cfg.env.normalize_obs 103 | 104 | env, _ = task_registry.make_env(name=args.task, args=args, env_cfg=env_cfg) 105 | obs = env.get_observations() 106 | 107 | # load policy 108 | train_cfg.runner.resume = True 109 | ppo_runner, train_cfg, log_pth = task_registry.make_alg_runner( 110 | log_root=log_pth, 111 | env=env, 112 | name=args.task, 113 | args=args, 114 | train_cfg=train_cfg, 115 | return_log_dir=True, 116 | ) 117 | 118 | if args.use_jit: 119 | path = os.path.join(log_pth, "traced") 120 | model, checkpoint = get_load_path(root=path, checkpoint=args.checkpoint) 121 | path = os.path.join(path, model) 122 | print("Loading jit for policy: ", path) 123 | policy_jit = torch.jit.load(path, map_location=env.device) 124 | else: 125 | policy = ppo_runner.get_inference_policy(device=env.device) 126 | if if_normalize: 127 | normalizer = ppo_runner.get_normalizer(device=env.device) 128 | 129 | actions = torch.zeros(env.num_envs, env.num_actions, device=env.device, requires_grad=False) 130 | 131 | if args.record_video: 132 | mp4_writers = [] 133 | import imageio 134 | 135 | env.enable_viewer_sync = False 136 | for i in range(env.num_envs): 137 | model, checkpoint = get_load_path(root=log_pth, checkpoint=args.checkpoint, model_name_include="model") 138 | video_name = args.proj_name + "-" + args.exptid + "-" + checkpoint + ".mp4" 139 | run_name = log_pth.split("/")[-1] 140 | path = f"../../logs/videos/{args.proj_name}/{run_name}" 141 | if not os.path.exists(path): 142 | os.makedirs(path) 143 | video_name = os.path.join(path, video_name) 144 | mp4_writer = imageio.get_writer(video_name, fps=50, codec="libx264") 145 | mp4_writers.append(mp4_writer) 146 | 147 | if args.record_log: 148 | import json 149 | 150 | run_name = log_pth.split("/")[-1] 151 | logs_dict = [] 152 | dict_name = args.proj_name + "-" + args.exptid + ".json" 153 | path = f"../../logs/env_logs/{run_name}" 154 | if not os.path.exists(path): 155 | os.makedirs(path) 156 | dict_name = os.path.join(path, dict_name) 157 | 158 | if not (args.record_video or args.record_log): 159 | traj_length = 100 * int(env.max_episode_length) 160 | else: 161 | traj_length = int(env.max_episode_length) 162 | 163 | env_id = env.lookat_id 164 | finish_cnt = 0 # break if finish_cnt > 30 165 | 166 | dof_pos_all = None 167 | head_height_all = None 168 | projected_gravity_all = None 169 | 170 | for i in tqdm(range(traj_length)): 171 | if args.use_jit: 172 | actions = policy_jit(obs.detach()) 173 | else: 174 | if if_normalize: 175 | normalized_obs = normalizer(obs.detach()) 176 | else: 177 | normalized_obs = obs.detach() 178 | actions = policy(normalized_obs, hist_encoding=False) 179 | 180 | obs, _, rews, dones, infos = env.step(actions.detach()) 181 | if dof_pos_all is None: 182 | dof_pos_all = env.dof_pos 183 | else: 184 | dof_pos_all = torch.cat((dof_pos_all, env.dof_pos), dim=0) 185 | if head_height_all is None: 186 | head_height_all = env.rigid_body_states[:, env.head_idx, 2].unsqueeze(0) 187 | else: 188 | head_height_all = torch.cat((head_height_all, env.rigid_body_states[:, env.head_idx, 2].unsqueeze(0)), dim=0) 189 | if projected_gravity_all is None: 190 | projected_gravity_all = env.projected_gravity 191 | else: 192 | projected_gravity_all = torch.cat((projected_gravity_all, env.projected_gravity), dim=0) 193 | if stand_flag: # g1waist_up 194 | if env.rigid_body_states[:, env.head_idx, 2] > 1.2: 195 | finish_cnt += 1 196 | else: # g1waistroll_up 197 | target_projected_gravity = torch.tensor([-1, 0, 0], device=env.device) 198 | gravity_error = 1 - torch.nn.functional.cosine_similarity(env.projected_gravity, target_projected_gravity, dim=-1) # [0, 2] 199 | if gravity_error < 0.1: 200 | finish_cnt += 1 201 | 202 | if args.record_video: 203 | imgs = env.render_record(mode="rgb_array") 204 | if imgs is not None: 205 | for i in range(env.num_envs): 206 | mp4_writers[i].append_data(imgs[i]) 207 | 208 | if args.record_log: 209 | log_dict = env.get_episode_log() 210 | logs_dict.append(log_dict) 211 | 212 | # Interaction 213 | if env.button_pressed: 214 | print(f"env_id: {env.lookat_id:<{5}}") 215 | 216 | if finish_cnt > 30: 217 | break 218 | 219 | if args.record_video: 220 | for mp4_writer in mp4_writers: 221 | mp4_writer.close() 222 | 223 | if args.record_log: 224 | with open(dict_name, "w") as f: 225 | json.dump(logs_dict, f) 226 | 227 | record_traj = True 228 | if record_traj: 229 | import pickle 230 | if not os.path.exists(f"../../logs/env_logs/{run_name}"): 231 | os.makedirs(f"../../logs/env_logs/{run_name}") 232 | with open(f"../../logs/env_logs/{run_name}/dof_pos_all.pkl", "wb") as f: 233 | pickle.dump(dof_pos_all, f) 234 | with open(f"../../logs/env_logs/{run_name}/head_height_all.pkl", "wb") as f: 235 | pickle.dump(head_height_all, f) 236 | with open(f"../../logs/env_logs/{run_name}/projected_gravity_all.pkl", "wb") as f: 237 | pickle.dump(projected_gravity_all, f) 238 | 239 | 240 | if __name__ == "__main__": 241 | args = get_args() 242 | play(args) 243 | -------------------------------------------------------------------------------- /simulation/legged_gym/legged_gym/scripts/play.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from legged_gym import LEGGED_GYM_ROOT_DIR 32 | import os 33 | import code 34 | 35 | import isaacgym 36 | from legged_gym.envs import * 37 | from legged_gym.gym_utils import get_args, export_policy_as_jit, task_registry, Logger 38 | from isaacgym import gymtorch, gymapi, gymutil 39 | import numpy as np 40 | import torch 41 | import cv2 42 | from collections import deque 43 | import statistics 44 | import faulthandler 45 | from copy import deepcopy 46 | import matplotlib.pyplot as plt 47 | from time import time, sleep 48 | from PIL import Image 49 | from legged_gym.gym_utils.helpers import get_load_path as get_load_path_auto 50 | from tqdm import tqdm 51 | 52 | from isaacgym.torch_utils import * 53 | 54 | 55 | def get_load_path(root, load_run=-1, checkpoint=-1, model_name_include="jit"): 56 | if checkpoint == -1: 57 | models = [file for file in os.listdir(root) if model_name_include in file] 58 | models.sort(key=lambda m: "{0:0>15}".format(m)) 59 | model = models[-1] 60 | checkpoint = model.split("_")[-1].split(".")[0] 61 | else: 62 | model = None 63 | checkpoint = str(checkpoint) 64 | return model, checkpoint 65 | 66 | 67 | def set_play_cfg(env_cfg): 68 | env_cfg.env.num_envs = 2 # 2 if not args.num_envs else args.num_envs 69 | env_cfg.terrain.num_rows = 5 70 | env_cfg.terrain.num_cols = 5 71 | env_cfg.terrain.curriculum = False 72 | env_cfg.terrain.max_difficulty = False 73 | 74 | env_cfg.domain_rand.domain_rand_general = True 75 | env_cfg.noise.add_noise = False 76 | 77 | env_cfg.domain_rand.randomize_friction = True 78 | env_cfg.domain_rand.push_robots = False 79 | env_cfg.domain_rand.push_interval_s = 5 80 | env_cfg.domain_rand.max_push_vel_xy = 2.5 81 | env_cfg.domain_rand.randomize_base_mass = False 82 | env_cfg.domain_rand.randomize_base_com = False 83 | env_cfg.domain_rand.action_delay = False 84 | 85 | 86 | def play(args): 87 | faulthandler.enable() 88 | exptid = args.exptid 89 | log_pth = "../../logs/{}/".format(args.proj_name) + args.exptid 90 | 91 | env_cfg, train_cfg = task_registry.get_cfgs(name=args.task) 92 | 93 | set_play_cfg(env_cfg) 94 | 95 | env_cfg.env.record_video = args.record_video 96 | if_normalize = env_cfg.env.normalize_obs 97 | 98 | env, _ = task_registry.make_env(name=args.task, args=args, env_cfg=env_cfg) 99 | obs = env.get_observations() 100 | 101 | # load policy 102 | train_cfg.runner.resume = True 103 | ppo_runner, train_cfg, log_pth = task_registry.make_alg_runner( 104 | log_root=log_pth, 105 | env=env, 106 | name=args.task, 107 | args=args, 108 | train_cfg=train_cfg, 109 | return_log_dir=True, 110 | ) 111 | 112 | if args.use_jit: 113 | path = os.path.join(log_pth, "traced") 114 | model, checkpoint = get_load_path(root=path, checkpoint=args.checkpoint) 115 | path = os.path.join(path, model) 116 | print("Loading jit for policy: ", path) 117 | policy_jit = torch.jit.load(path, map_location=env.device) 118 | else: 119 | policy = ppo_runner.get_inference_policy(device=env.device) 120 | if if_normalize: 121 | normalizer = ppo_runner.get_normalizer(device=env.device) 122 | 123 | actions = torch.zeros(env.num_envs, env.num_actions, device=env.device, requires_grad=False) 124 | 125 | if args.record_video: 126 | mp4_writers = [] 127 | import imageio 128 | 129 | env.enable_viewer_sync = False 130 | for i in range(env.num_envs): 131 | model, checkpoint = get_load_path(root=log_pth, checkpoint=args.checkpoint, model_name_include="model") 132 | video_name = args.proj_name + "-" + args.exptid + "-" + checkpoint + ".mp4" 133 | run_name = log_pth.split("/")[-1] 134 | path = f"../../logs/videos/{args.proj_name}/{run_name}" 135 | if not os.path.exists(path): 136 | os.makedirs(path) 137 | video_name = os.path.join(path, video_name) 138 | mp4_writer = imageio.get_writer(video_name, fps=50, codec="libx264") 139 | mp4_writers.append(mp4_writer) 140 | 141 | if args.record_log: 142 | import json 143 | 144 | run_name = log_pth.split("/")[-1] 145 | logs_dict = [] 146 | dict_name = args.proj_name + "-" + args.exptid + ".json" 147 | path = f"../../logs/env_logs/{run_name}" 148 | if not os.path.exists(path): 149 | os.makedirs(path) 150 | dict_name = os.path.join(path, dict_name) 151 | 152 | if not (args.record_video or args.record_log): 153 | traj_length = 100 * int(env.max_episode_length) 154 | else: 155 | traj_length = int(env.max_episode_length) 156 | 157 | env_id = env.lookat_id 158 | 159 | all_projected_gravity = [] 160 | all_pitch = [] 161 | all_roll = [] 162 | all_yaw = [] 163 | 164 | for i in tqdm(range(traj_length)): 165 | if args.use_jit: 166 | actions = policy_jit(obs.detach()) 167 | else: 168 | if if_normalize: 169 | normalized_obs = normalizer(obs.detach()) 170 | else: 171 | normalized_obs = obs.detach() 172 | actions = policy(normalized_obs, hist_encoding=True) 173 | 174 | obs, _, rews, dones, infos = env.step(actions.detach()) 175 | 176 | all_roll.append(env.roll[-1].item()) 177 | all_pitch.append(env.pitch[-1].item()) 178 | all_yaw.append(env.yaw[-1].item()) 179 | # all_projected_gravity.append(env.projected_gravity[0, -1].item()) 180 | 181 | # torso gravity 182 | torso_quat = env.rigid_body_rot[:, env.torso_idx] 183 | torso_projected_gravity = quat_rotate_inverse(torso_quat, env.gravity_vec) 184 | all_projected_gravity.append(torso_projected_gravity[0, 2].item()) 185 | 186 | if args.record_video: 187 | imgs = env.render_record(mode="rgb_array") 188 | if imgs is not None: 189 | for i in range(env.num_envs): 190 | mp4_writers[i].append_data(imgs[i]) 191 | 192 | if args.record_log: 193 | log_dict = env.get_episode_log() 194 | logs_dict.append(log_dict) 195 | 196 | # Interaction 197 | if env.button_pressed: 198 | print(f"env_id: {env.lookat_id:<{5}}") 199 | 200 | 201 | if args.record_video: 202 | for mp4_writer in mp4_writers: 203 | mp4_writer.close() 204 | 205 | if args.record_log: 206 | with open(dict_name, "w") as f: 207 | json.dump(logs_dict, f) 208 | 209 | 210 | if __name__ == "__main__": 211 | args = get_args() 212 | play(args) 213 | -------------------------------------------------------------------------------- /simulation/legged_gym/legged_gym/scripts/run.sh: -------------------------------------------------------------------------------- 1 | robot_name=${1} 2 | task_name="${robot_name}_up" 3 | 4 | proj_name="${robot_name}_up" 5 | exptid=${2} 6 | 7 | # Run the training script 8 | python train.py --task "${task_name}" \ 9 | --proj_name "${proj_name}" \ 10 | --exptid "${exptid}" \ 11 | --device "${3}" \ 12 | --num_envs 4096 \ 13 | --headless \ 14 | --fix_action_std \ 15 | # --debug 16 | # --resume \ 17 | # --resumeid XXX 18 | -------------------------------------------------------------------------------- /simulation/legged_gym/legged_gym/scripts/run_track.sh: -------------------------------------------------------------------------------- 1 | robot_name=${1} # Remove the space around the assignment operator 2 | task_name="${robot_name}_track" 3 | 4 | proj_name="${robot_name}_track" 5 | exptid=${2} 6 | traj_name=${4} 7 | 8 | # Run the training script 9 | python train.py --task "${task_name}" \ 10 | --proj_name "${proj_name}" \ 11 | --exptid "${exptid}" \ 12 | --device "${3}" \ 13 | --num_envs 4096 \ 14 | --headless \ 15 | --fix_action_std \ 16 | --traj_name "${traj_name}"\ 17 | # --debug \ 18 | # --resume \ 19 | # --resumeid XXX 20 | -------------------------------------------------------------------------------- /simulation/legged_gym/legged_gym/scripts/save_jit.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | from statistics import mode 3 | sys.path.append("../../../rsl_rl") 4 | import torch 5 | import torch.nn as nn 6 | from rsl_rl.modules.actor_critic_rma import Actor, StateHistoryEncoder, get_activation 7 | import argparse 8 | import code 9 | import shutil 10 | 11 | def get_load_path(root, load_run=-1, checkpoint=-1, model_name_include="model"): 12 | if not os.path.isdir(root): # use first 4 chars to mactch the run name 13 | model_name_cand = os.path.basename(root) 14 | model_parent = os.path.dirname(root) 15 | model_names = os.listdir(model_parent) 16 | model_names = [name for name in model_names if os.path.isdir(os.path.join(model_parent, name))] 17 | for name in model_names: 18 | if len(name) >= 6: 19 | if name[:6] == model_name_cand: 20 | root = os.path.join(model_parent, name) 21 | if checkpoint==-1: 22 | models = [file for file in os.listdir(root) if model_name_include in file] 23 | models.sort(key=lambda m: '{0:0>15}'.format(m)) 24 | model = models[-1] 25 | checkpoint = model.split("_")[-1].split(".")[0] 26 | else: 27 | model = "model_{}.pt".format(checkpoint) 28 | 29 | load_path = os.path.join(root, model) 30 | return load_path, checkpoint 31 | 32 | class HardwareRefNN(nn.Module): 33 | def __init__(self, num_prop, 34 | num_priv_latent, 35 | num_hist, 36 | critic_obs_extra, 37 | num_actions, 38 | actor_hidden_dims=[512, 256, 128], 39 | activation='elu', 40 | priv_encoder_dims=[64, 20], 41 | ): 42 | super().__init__() 43 | 44 | self.num_prop = num_prop 45 | self.num_hist = num_hist 46 | self.num_actions = num_actions 47 | self.num_priv_latent = num_priv_latent 48 | num_obs = num_prop + num_hist*num_prop + num_priv_latent + critic_obs_extra 49 | self.num_obs = num_obs 50 | activation = get_activation(activation) 51 | 52 | num_priv_explicit = 0 53 | 54 | self.normalizer = None 55 | 56 | self.actor = Actor(num_prop, 57 | num_actions, 58 | actor_hidden_dims, 59 | priv_encoder_dims, num_priv_latent, num_hist, 60 | activation, tanh_encoder_output=False) 61 | 62 | def load_normalizer(self, normalizer): 63 | self.normalizer = normalizer 64 | self.normalizer.eval() 65 | 66 | def forward(self, obs): 67 | assert obs.shape[1] == self.num_obs, f"Expected {self.num_obs} but got {obs.shape[1]}" 68 | obs = self.normalizer(obs) 69 | return self.actor(obs, hist_encoding=True, eval=False) 70 | 71 | def play(args): 72 | load_run = "../../logs/{}/{}".format(args.proj_name, args.exptid) 73 | checkpoint = args.checkpoint 74 | critic_obs_extra = 0 75 | if args.robot == "g1": 76 | n_priv_latent = 4 + 1 + 23*2 + 3 77 | num_scan = 0 78 | num_actions = 23 79 | 80 | n_proprio = 3 + 2 + 3*num_actions 81 | else: 82 | raise ValueError(f"Robot {args.robot} not supported!") 83 | 84 | history_len = 10 85 | 86 | device = torch.device('cpu') 87 | policy = HardwareRefNN(n_proprio, 88 | n_priv_latent, history_len, critic_obs_extra, 89 | num_actions).to(device) 90 | load_path, checkpoint = get_load_path(root=load_run, checkpoint=checkpoint) 91 | load_run = os.path.dirname(load_path) 92 | print(f"Loading model from: {load_path}") 93 | ac_state_dict = torch.load(load_path, map_location=device) 94 | policy.load_state_dict(ac_state_dict['model_state_dict'], strict=False) 95 | policy.load_normalizer(ac_state_dict['normalizer']) 96 | 97 | policy = policy.to(device)#.cpu() 98 | if not os.path.exists(os.path.join(load_run, "traced")): 99 | os.mkdir(os.path.join(load_run, "traced")) 100 | 101 | # Save the traced actor 102 | policy.eval() 103 | with torch.no_grad(): 104 | num_envs = 2 105 | 106 | obs_input = torch.ones(num_envs, n_proprio + n_priv_latent + history_len*n_proprio + critic_obs_extra, device=device) 107 | print("obs_input shape: ", obs_input.shape) 108 | 109 | traced_policy = torch.jit.trace(policy, obs_input) 110 | 111 | # traced_policy = torch.jit.script(policy) 112 | save_path = os.path.join(load_run, "traced", args.exptid + "-" + str(checkpoint) + "-jit.pt") 113 | traced_policy.save(save_path) 114 | print("Saved traced_actor at ", os.path.abspath(save_path)) 115 | print("Robot: ", args.robot) 116 | 117 | if __name__ == "__main__": 118 | parser = argparse.ArgumentParser() 119 | parser.add_argument('--proj_name', type=str) 120 | parser.add_argument('--exptid', type=str) 121 | parser.add_argument('--checkpoint', type=int, default=-1) 122 | parser.add_argument('--robot', type=str, default="g1") # options: gr1, h1, g1 123 | 124 | args = parser.parse_args() 125 | play(args) -------------------------------------------------------------------------------- /simulation/legged_gym/legged_gym/scripts/train.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import os 32 | from datetime import datetime 33 | 34 | import isaacgym 35 | from legged_gym.envs import LEGGED_GYM_ENVS_DIR, LEGGED_GYM_ROOT_DIR 36 | from legged_gym.gym_utils import get_args, task_registry 37 | 38 | import torch 39 | import wandb 40 | 41 | 42 | def train(args): 43 | log_pth = LEGGED_GYM_ROOT_DIR + "/logs/{}/".format(args.proj_name) + args.exptid 44 | try: 45 | os.makedirs(log_pth) 46 | except: 47 | pass 48 | 49 | if args.debug: 50 | mode = "disabled" 51 | args.rows = 10 52 | args.cols = 5 53 | args.num_envs = 64 54 | else: 55 | mode = "online" 56 | 57 | if args.no_wandb: 58 | mode = "disabled" 59 | 60 | robot_type = args.task.split("_")[0] 61 | 62 | wandb.init(project=args.proj_name, name=args.exptid, mode=mode, dir="../../logs") 63 | wandb.save(LEGGED_GYM_ENVS_DIR + "/base/legged_robot_config.py", policy="now") 64 | wandb.save(LEGGED_GYM_ENVS_DIR + "/base/legged_robot.py", policy="now") 65 | wandb.save(LEGGED_GYM_ENVS_DIR + "/base/humanoid_config.py", policy="now") 66 | wandb.save(LEGGED_GYM_ENVS_DIR + "/base/humanoid.py", policy="now") 67 | wandb.save(LEGGED_GYM_ENVS_DIR + "/{}/{}.py".format(robot_type, args.task), policy="now") 68 | wandb.save(LEGGED_GYM_ENVS_DIR + "/{}/{}_config.py".format(robot_type, args.task), policy="now") 69 | wandb.save(LEGGED_GYM_ENVS_DIR + "/g1track/g1waist_track_config.py", policy="now") 70 | wandb.save(LEGGED_GYM_ENVS_DIR + "/g1track/g1waist_track.py", policy="now") 71 | wandb.save(LEGGED_GYM_ENVS_DIR + "/g1rolltrack/g1waistroll_track_config.py", policy="now") 72 | wandb.save(LEGGED_GYM_ENVS_DIR + "/g1rolltrack/g1waistroll_track.py", policy="now") 73 | 74 | env, _ = task_registry.make_env(name=args.task, args=args) 75 | 76 | ppo_runner, train_cfg = task_registry.make_alg_runner( 77 | log_root=log_pth, env=env, name=args.task, args=args 78 | ) 79 | ppo_runner.learn( 80 | num_learning_iterations=train_cfg.runner.max_iterations, init_at_random_ep_len=True 81 | ) 82 | 83 | if __name__ == "__main__": 84 | args = get_args() 85 | train(args) 86 | -------------------------------------------------------------------------------- /simulation/legged_gym/legged_gym/tests/test_asset.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # This file was modified by HumanUP authors in 2024-2025 3 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | # SPDX-FileCopyrightText: # Copyright (c) 2021 ETH Zurich, Nikita Rudin. All rights reserved. 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | # 7 | # Redistribution and use in source and binary forms, with or without 8 | # modification, are permitted provided that the following conditions are met: 9 | # 10 | # 1. Redistributions of source code must retain the above copyright notice, this 11 | # list of conditions and the following disclaimer. 12 | # 13 | # 2. Redistributions in binary form must reproduce the above copyright notice, 14 | # this list of conditions and the following disclaimer in the documentation 15 | # and/or other materials provided with the distribution. 16 | # 17 | # 3. Neither the name of the copyright holder nor the names of its 18 | # contributors may be used to endorse or promote products derived from 19 | # this software without specific prior written permission. 20 | # 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 25 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | # 32 | # Copyright (c) 2024-2025 RoboVision Lab, UIUC. All rights reserved. 33 | 34 | import numpy as np 35 | from isaacgym import gymutil 36 | from isaacgym import gymapi 37 | 38 | from legged_gym import LEGGED_GYM_ROOT_DIR 39 | 40 | import torch 41 | 42 | 43 | gym = gymapi.acquire_gym() 44 | 45 | args = gymutil.parse_arguments( 46 | description="Script for testing the urdf or mjcf asset", 47 | custom_parameters=[ 48 | {"name": "--num_envs", "type": int, "default": 2, "help": "Number of environments to create"}, 49 | ]) 50 | 51 | sim_params = gymapi.SimParams() 52 | if args.physics_engine == gymapi.SIM_FLEX: 53 | sim_params.flex.shape_collision_margin = 0.25 54 | sim_params.flex.num_outer_iterations = 4 55 | sim_params.flex.num_inner_iterations = 10 56 | elif args.physics_engine == gymapi.SIM_PHYSX: 57 | sim_params.substeps = 1 58 | sim_params.physx.solver_type = 1 59 | sim_params.physx.num_position_iterations = 4 60 | sim_params.physx.num_velocity_iterations = 1 61 | sim_params.physx.num_threads = args.num_threads 62 | sim_params.physx.use_gpu = args.use_gpu 63 | 64 | sim_params.use_gpu_pipeline = False 65 | sim_params.up_axis = gymapi.UP_AXIS_Z 66 | sim_params.gravity.x = 0 67 | sim_params.gravity.y = 0 68 | sim_params.gravity.z = -9.81 69 | if args.use_gpu_pipeline: 70 | print("WARNING: Forcing CPU pipeline.") 71 | 72 | sim = gym.create_sim(args.compute_device_id, args.graphics_device_id, args.physics_engine, sim_params) 73 | if sim is None: 74 | print("*** Failed to create sim") 75 | quit() 76 | 77 | # add ground plane 78 | plane_params = gymapi.PlaneParams() 79 | plane_params.normal = gymapi.Vec3(0, 0, 1) 80 | gym.add_ground(sim, plane_params) 81 | 82 | # create viewer 83 | viewer = gym.create_viewer(sim, gymapi.CameraProperties()) 84 | if viewer is None: 85 | print("*** Failed to create viewer") 86 | quit() 87 | 88 | asset_root = f'{LEGGED_GYM_ROOT_DIR}/resources/robots/g1_modified/' 89 | 90 | asset_file = "g1_29dof_fixedwrist_custom_collision_with_head.urdf" 91 | 92 | asset_options = gymapi.AssetOptions() 93 | asset_options.fix_base_link = True 94 | asset_options.use_mesh_materials = True 95 | asset_options.disable_gravity = True 96 | 97 | asset = gym.load_asset(sim, asset_root, asset_file, asset_options) 98 | 99 | num_envs = args.num_envs 100 | num_per_row = int(np.sqrt(num_envs)) 101 | env_spacing = 2.0 102 | env_lower = gymapi.Vec3(-env_spacing, 0.0, -env_spacing) 103 | env_upper = gymapi.Vec3(env_spacing, env_spacing, env_spacing) 104 | 105 | envs = [] 106 | 107 | # subscribe to spacebar event for reset 108 | gym.subscribe_viewer_keyboard_event(viewer, gymapi.KEY_R, "reset") 109 | 110 | for i in range(num_envs): 111 | env = gym.create_env(sim, env_lower, env_upper, num_per_row) 112 | envs.append(env) 113 | 114 | # create ball pyramid 115 | pose = gymapi.Transform() 116 | pose.p = gymapi.Vec3(0, 0, 1.5) 117 | pose.r = gymapi.Quat(0, 0, 0, 1) 118 | humanoid_handle = gym.create_actor(env, asset, pose, "humanoid", i, 1) 119 | 120 | gym.viewer_camera_look_at(viewer, None, gymapi.Vec3(5, 5, 5), gymapi.Vec3(0, 0, 0)) 121 | 122 | 123 | initial_state = np.copy(gym.get_sim_rigid_body_states(sim, gymapi.STATE_ALL)) 124 | 125 | 126 | while not gym.query_viewer_has_closed(viewer): 127 | 128 | # Get input actions from the viewer and handle them appropriately 129 | for evt in gym.query_viewer_action_events(viewer): 130 | if evt.action == "reset" and evt.value > 0: 131 | gym.set_sim_rigid_body_states(sim, initial_state, gymapi.STATE_ALL) 132 | 133 | # step the physics 134 | gym.simulate(sim) 135 | gym.fetch_results(sim, True) 136 | 137 | # update the viewer 138 | gym.step_graphics(sim) 139 | gym.draw_viewer(viewer, sim, True) 140 | 141 | # Wait for dt to elapse in real time. 142 | # This synchronizes the physics simulation with the rendering rate. 143 | gym.sync_frame_time(sim) 144 | 145 | gym.destroy_viewer(viewer) 146 | gym.destroy_sim(sim) 147 | -------------------------------------------------------------------------------- /simulation/legged_gym/legged_gym/tests/test_env.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import numpy as np 32 | import os 33 | from datetime import datetime 34 | 35 | import isaacgym 36 | from legged_gym.envs import * 37 | from legged_gym.gym_utils import get_args, task_registry, Logger 38 | 39 | import torch 40 | 41 | 42 | def test_env(args): 43 | env_cfg, train_cfg = task_registry.get_cfgs(name=args.task) 44 | 45 | env_cfg.env.num_envs = 10 46 | env_cfg.terrain.num_rows = 10 47 | env_cfg.terrain.num_cols = 10 48 | 49 | # prepare environment 50 | env, _ = task_registry.make_env(name=args.task, args=args, env_cfg=env_cfg) 51 | for i in range(int(10*env.max_episode_length)): 52 | actions = 0.*torch.ones(env.num_envs, env.num_actions, device=env.device) 53 | obs, _, rew, done, info = env.step(actions) 54 | print("Done") 55 | 56 | if __name__ == '__main__': 57 | args = get_args() 58 | test_env(args) 59 | -------------------------------------------------------------------------------- /simulation/legged_gym/licenses/assets/ANYmal_b_license.txt: -------------------------------------------------------------------------------- 1 | Copyright 2019 ANYbotics, https://www.anybotics.com 2 | 3 | Redistribution and use in source and binary forms, with or without modification, 4 | are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, this 7 | list of conditions and the following disclaimer. 8 | 9 | 2. Redistributions in binary form must reproduce the above copyright notice, this 10 | list of conditions and the following disclaimer in the documentation and/or 11 | other materials provided with the distribution. 12 | 13 | 3. The name of ANYbotics and ANYmal may not be used to endorse or promote products 14 | derived from this software without specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 20 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 | NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 | POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /simulation/legged_gym/licenses/assets/ANYmal_c_license.txt: -------------------------------------------------------------------------------- 1 | Copyright 2020, ANYbotics AG. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions 5 | are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in 12 | the documentation and/or other materials provided with the 13 | distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its 16 | contributors may be used to endorse or promote products derived 17 | from this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /simulation/legged_gym/licenses/assets/cassie_license.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Jenna Reher, jreher@caltech.edu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /simulation/legged_gym/licenses/dependencies/matplotlib_license.txt: -------------------------------------------------------------------------------- 1 | 1. This LICENSE AGREEMENT is between the Matplotlib Development Team ("MDT"), and the Individual or Organization ("Licensee") accessing and otherwise using matplotlib software in source or binary form and its associated documentation. 2 | 3 | 2. Subject to the terms and conditions of this License Agreement, MDT hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use matplotlib 3.4.3 alone or in any derivative version, provided, however, that MDT's License Agreement and MDT's notice of copyright, i.e., "Copyright (c) 2012-2013 Matplotlib Development Team; All Rights Reserved" are retained in matplotlib 3.4.3 alone or in any derivative version prepared by Licensee. 4 | 5 | 3. In the event Licensee prepares a derivative work that is based on or incorporates matplotlib 3.4.3 or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to matplotlib 3.4.3. 6 | 7 | 4. MDT is making matplotlib 3.4.3 available to Licensee on an "AS IS" basis. MDT MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, MDT MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF MATPLOTLIB 3.4.3 WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. 8 | 9 | 5. MDT SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF MATPLOTLIB 3.4.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING MATPLOTLIB 3.4.3, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 10 | 11 | 6. This License Agreement will automatically terminate upon a material breach of its terms and conditions. 12 | 13 | 7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between MDT and Licensee. This License Agreement does not grant permission to use MDT trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party. 14 | 15 | 8. By copying, installing or otherwise using matplotlib 3.4.3, Licensee agrees to be bound by the terms and conditions of this License Agreement. -------------------------------------------------------------------------------- /simulation/legged_gym/requirements.txt: -------------------------------------------------------------------------------- 1 | pymeshlab 2 | pydelatin 3 | tensorboard 4 | setuptools == 59.5.0 5 | opencv-python 6 | tqdm 7 | wandb -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/head_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/head_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/left_ankle_pitch_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_ankle_pitch_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/left_ankle_roll_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_ankle_roll_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/left_elbow_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_elbow_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/left_hand_index_0_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hand_index_0_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/left_hand_index_1_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hand_index_1_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/left_hand_middle_0_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hand_middle_0_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/left_hand_middle_1_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hand_middle_1_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/left_hand_palm_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hand_palm_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/left_hand_thumb_0_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hand_thumb_0_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/left_hand_thumb_1_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hand_thumb_1_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/left_hand_thumb_2_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hand_thumb_2_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/left_hip_pitch_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hip_pitch_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/left_hip_roll_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hip_roll_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/left_hip_yaw_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hip_yaw_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/left_knee_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_knee_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/left_rubber_hand.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_rubber_hand.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/left_shoulder_pitch_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_shoulder_pitch_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/left_shoulder_roll_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_shoulder_roll_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/left_shoulder_yaw_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_shoulder_yaw_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/left_wrist_pitch_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_wrist_pitch_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/left_wrist_roll_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_wrist_roll_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/left_wrist_roll_rubber_hand.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_wrist_roll_rubber_hand.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/left_wrist_yaw_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_wrist_yaw_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/logo_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/logo_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/pelvis.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/pelvis.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/pelvis_contour_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/pelvis_contour_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/right_ankle_pitch_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_ankle_pitch_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/right_ankle_roll_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_ankle_roll_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/right_elbow_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_elbow_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/right_hand_index_0_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hand_index_0_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/right_hand_index_1_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hand_index_1_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/right_hand_middle_0_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hand_middle_0_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/right_hand_middle_1_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hand_middle_1_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/right_hand_palm_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hand_palm_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/right_hand_thumb_0_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hand_thumb_0_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/right_hand_thumb_1_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hand_thumb_1_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/right_hand_thumb_2_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hand_thumb_2_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/right_hip_pitch_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hip_pitch_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/right_hip_roll_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hip_roll_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/right_hip_yaw_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hip_yaw_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/right_knee_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_knee_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/right_rubber_hand.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_rubber_hand.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/right_shoulder_pitch_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_shoulder_pitch_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/right_shoulder_roll_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_shoulder_roll_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/right_shoulder_yaw_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_shoulder_yaw_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/right_wrist_pitch_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_wrist_pitch_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/right_wrist_roll_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_wrist_roll_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/right_wrist_roll_rubber_hand.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_wrist_roll_rubber_hand.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/right_wrist_yaw_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_wrist_yaw_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/torso_constraint_L_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/torso_constraint_L_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/torso_constraint_L_rod_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/torso_constraint_L_rod_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/torso_constraint_R_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/torso_constraint_R_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/torso_constraint_R_rod_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/torso_constraint_R_rod_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/torso_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/torso_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/waist_constraint_L.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/waist_constraint_L.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/waist_constraint_R.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/waist_constraint_R.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/waist_roll_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/waist_roll_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/waist_support_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/waist_support_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/resources/robots/g1_modified/meshes/waist_yaw_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/waist_yaw_link.STL -------------------------------------------------------------------------------- /simulation/legged_gym/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages 2 | from distutils.core import setup 3 | 4 | setup( 5 | name='legged_gym', 6 | version='1.0.0', 7 | author='Nikita Rudin', 8 | license="BSD-3-Clause", 9 | packages=find_packages(), 10 | author_email='rudinn@ethz.ch', 11 | description='Isaac Gym environments for Legged Robots', 12 | install_requires=['isaacgym', 13 | 'rsl-rl', 14 | 'matplotlib'] 15 | ) -------------------------------------------------------------------------------- /simulation/rsl_rl/.gitignore: -------------------------------------------------------------------------------- 1 | # IDEs 2 | .idea 3 | 4 | # builds 5 | *.egg-info 6 | 7 | # cache 8 | __pycache__ 9 | .pytest_cache 10 | 11 | # vs code 12 | .vscode -------------------------------------------------------------------------------- /simulation/rsl_rl/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2021, ETH Zurich, Nikita Rudin 2 | Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, 6 | are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its contributors 16 | may be used to endorse or promote products derived from this software without 17 | specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 23 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 26 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | See licenses/dependencies for license information of dependencies of this package. -------------------------------------------------------------------------------- /simulation/rsl_rl/README.md: -------------------------------------------------------------------------------- 1 | # RSL RL 2 | Fast and simple implementation of RL algorithms, designed to run fully on GPU. 3 | This code is an evolution of `rl-pytorch` provided with NVIDIA's Isaac GYM. 4 | 5 | Only PPO is implemented for now. More algorithms will be added later. 6 | Contributions are welcome. 7 | 8 | ## Setup 9 | 10 | ``` 11 | git clone https://github.com/leggedrobotics/rsl_rl 12 | cd rsl_rl 13 | pip install -e . 14 | ``` 15 | 16 | **Maintainer**: Nikita Rudin 17 | **Affiliation**: Robotic Systems Lab, ETH Zurich & NVIDIA 18 | **Contact**: rudinn@ethz.ch 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /simulation/rsl_rl/licenses/dependencies/numpy_license.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2005-2021, NumPy Developers. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are 6 | met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | * Neither the name of the NumPy Developers nor the names of any 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /simulation/rsl_rl/licenses/dependencies/torch_license.txt: -------------------------------------------------------------------------------- 1 | From PyTorch: 2 | 3 | Copyright (c) 2016- Facebook, Inc (Adam Paszke) 4 | Copyright (c) 2014- Facebook, Inc (Soumith Chintala) 5 | Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) 6 | Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) 7 | Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) 8 | Copyright (c) 2011-2013 NYU (Clement Farabet) 9 | Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston) 10 | Copyright (c) 2006 Idiap Research Institute (Samy Bengio) 11 | Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz) 12 | 13 | From Caffe2: 14 | 15 | Copyright (c) 2016-present, Facebook Inc. All rights reserved. 16 | 17 | All contributions by Facebook: 18 | Copyright (c) 2016 Facebook Inc. 19 | 20 | All contributions by Google: 21 | Copyright (c) 2015 Google Inc. 22 | All rights reserved. 23 | 24 | All contributions by Yangqing Jia: 25 | Copyright (c) 2015 Yangqing Jia 26 | All rights reserved. 27 | 28 | All contributions by Kakao Brain: 29 | Copyright 2019-2020 Kakao Brain 30 | 31 | All contributions from Caffe: 32 | Copyright(c) 2013, 2014, 2015, the respective contributors 33 | All rights reserved. 34 | 35 | All other contributions: 36 | Copyright(c) 2015, 2016 the respective contributors 37 | All rights reserved. 38 | 39 | Caffe2 uses a copyright model similar to Caffe: each contributor holds 40 | copyright over their contributions to Caffe2. The project versioning records 41 | all such contribution and copyright details. If a contributor wants to further 42 | mark their specific copyright on a particular contribution, they should 43 | indicate their copyright solely in the commit message of the change when it is 44 | committed. 45 | 46 | All rights reserved. 47 | 48 | Redistribution and use in source and binary forms, with or without 49 | modification, are permitted provided that the following conditions are met: 50 | 51 | 1. Redistributions of source code must retain the above copyright 52 | notice, this list of conditions and the following disclaimer. 53 | 54 | 2. Redistributions in binary form must reproduce the above copyright 55 | notice, this list of conditions and the following disclaimer in the 56 | documentation and/or other materials provided with the distribution. 57 | 58 | 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America 59 | and IDIAP Research Institute nor the names of its contributors may be 60 | used to endorse or promote products derived from this software without 61 | specific prior written permission. 62 | 63 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 64 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 65 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 66 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 67 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 68 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 69 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 70 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 71 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 72 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 73 | POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /simulation/rsl_rl/rsl_rl/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin -------------------------------------------------------------------------------- /simulation/rsl_rl/rsl_rl/algorithms/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from .ppo_rma import PPORMA 32 | from .ppo import PPO -------------------------------------------------------------------------------- /simulation/rsl_rl/rsl_rl/env/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from .vec_env import VecEnv 32 | -------------------------------------------------------------------------------- /simulation/rsl_rl/rsl_rl/env/vec_env.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from abc import ABC, abstractmethod 32 | import torch 33 | from typing import Tuple, Union 34 | 35 | 36 | # minimal interface of the environment 37 | class VecEnv(ABC): 38 | num_envs: int 39 | num_obs: int 40 | num_privileged_obs: int 41 | num_actions: int 42 | max_episode_length: int 43 | privileged_obs_buf: torch.Tensor 44 | obs_buf: torch.Tensor 45 | rew_buf: torch.Tensor 46 | reset_buf: torch.Tensor 47 | episode_length_buf: torch.Tensor # current episode duration 48 | extras: dict 49 | device: torch.device 50 | 51 | @abstractmethod 52 | def step( 53 | self, actions: torch.Tensor 54 | ) -> Tuple[torch.Tensor, Union[torch.Tensor, None], torch.Tensor, torch.Tensor, dict]: 55 | pass 56 | 57 | @abstractmethod 58 | def reset(self, env_ids: Union[list, torch.Tensor]): 59 | pass 60 | 61 | @abstractmethod 62 | def get_observations(self) -> torch.Tensor: 63 | pass 64 | 65 | @abstractmethod 66 | def get_privileged_observations(self) -> Union[torch.Tensor, None]: 67 | pass 68 | -------------------------------------------------------------------------------- /simulation/rsl_rl/rsl_rl/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from .actor_critic_rma import ActorCriticRMA 32 | from .actor_critic import ActorCritic 33 | -------------------------------------------------------------------------------- /simulation/rsl_rl/rsl_rl/modules/actor_critic.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import numpy as np 32 | 33 | import code 34 | import torch 35 | import torch.nn as nn 36 | from torch.distributions import Normal 37 | from torch.nn.modules import rnn 38 | from torch.nn.modules.activation import ReLU 39 | 40 | 41 | class Actor(nn.Module): 42 | def __init__( 43 | self, 44 | num_prop, 45 | num_actions, 46 | actor_hidden_dims, 47 | activation, 48 | tanh_encoder_output=False, 49 | **kwargs 50 | ) -> None: 51 | super().__init__() 52 | # prop 53 | self.num_prop = num_prop 54 | self.num_actions = num_actions 55 | 56 | actor_layers = [] 57 | actor_layers.append(nn.Linear(num_prop, actor_hidden_dims[0])) 58 | actor_layers.append(activation) 59 | for l in range(len(actor_hidden_dims)): 60 | if l == len(actor_hidden_dims) - 1: 61 | actor_layers.append(nn.Linear(actor_hidden_dims[l], num_actions)) 62 | else: 63 | actor_layers.append(nn.Linear(actor_hidden_dims[l], actor_hidden_dims[l + 1])) 64 | actor_layers.append(activation) 65 | if tanh_encoder_output: 66 | actor_layers.append(nn.Tanh()) 67 | self.actor_backbone = nn.Sequential(*actor_layers) 68 | 69 | def forward(self, obs_all, hist_encoding=False, eval=False, scandots_latent=None): 70 | obs = obs_all 71 | obs_prop = obs[:, : self.num_prop] 72 | backbone_input = obs_prop 73 | backbone_output = self.actor_backbone(backbone_input) 74 | return backbone_output 75 | 76 | 77 | class ActorCritic(nn.Module): 78 | is_recurrent = False 79 | 80 | def __init__( 81 | self, 82 | num_prop, 83 | num_critic_obs, 84 | num_priv_latent, 85 | num_priv_explicit, 86 | num_hist, 87 | num_actions, 88 | actor_hidden_dims=[256, 256, 256], 89 | critic_hidden_dims=[256, 256, 256], 90 | activation="elu", 91 | init_noise_std=1.0, 92 | fix_action_std=False, 93 | **kwargs 94 | ): 95 | if kwargs: 96 | print( 97 | "ActorCritic.__init__ got unexpected arguments, which will be ignored: " 98 | + str([key for key in kwargs.keys()]) 99 | ) 100 | super().__init__() 101 | 102 | self.fix_action_std = fix_action_std 103 | 104 | self.kwargs = kwargs 105 | priv_encoder_dims = kwargs["priv_encoder_dims"] 106 | activation = get_activation(activation) 107 | 108 | self.actor = Actor( 109 | num_prop=num_prop, 110 | num_actions=num_actions, 111 | actor_hidden_dims=actor_hidden_dims, 112 | activation=activation, 113 | tanh_encoder_output=kwargs["tanh_encoder_output"], 114 | ) 115 | 116 | # Value function 117 | critic_layers = [] 118 | critic_layers.append(nn.Linear(num_critic_obs, critic_hidden_dims[0])) 119 | critic_layers.append(activation) 120 | for l in range(len(critic_hidden_dims)): 121 | if l == len(critic_hidden_dims) - 1: 122 | critic_layers.append(nn.Linear(critic_hidden_dims[l], 1)) 123 | else: 124 | critic_layers.append(nn.Linear(critic_hidden_dims[l], critic_hidden_dims[l + 1])) 125 | critic_layers.append(activation) 126 | self.critic = nn.Sequential(*critic_layers) 127 | 128 | # Action noise 129 | if self.fix_action_std: 130 | # action_std = torch.tensor([0.5, 0.25, 0.25, 0.25, 0.2, 0.2] * 2 + [0.2, 0.2, 0.2] + [0.3] * 8) 131 | action_std = torch.tensor( 132 | [0.3, 0.3, 0.3, 0.4, 0.2] * 2 + [0.25, 0.25, 0.25] + [0.5] * 8 133 | ) 134 | self.std = nn.Parameter(action_std, requires_grad=False) 135 | else: 136 | self.std = nn.Parameter(init_noise_std * torch.ones(num_actions)) 137 | self.distribution = None 138 | # disable args validation for speedup 139 | Normal.set_default_validate_args = False 140 | 141 | @staticmethod 142 | # not used at the moment 143 | def init_weights(sequential, scales): 144 | [ 145 | torch.nn.init.orthogonal_(module.weight, gain=scales[idx]) 146 | for idx, module in enumerate(mod for mod in sequential if isinstance(mod, nn.Linear)) 147 | ] 148 | 149 | def reset(self, dones=None): 150 | pass 151 | 152 | def forward(self): 153 | raise NotImplementedError 154 | 155 | @property 156 | def action_mean(self): 157 | return self.distribution.mean 158 | 159 | @property 160 | def action_std(self): 161 | return self.distribution.stddev 162 | 163 | @property 164 | def entropy(self): 165 | return self.distribution.entropy().sum(dim=-1) 166 | 167 | def update_distribution(self, observations): 168 | mean = self.actor(observations) 169 | # has_nan_mean = torch.isnan(mean).any().item() 170 | # if has_nan_mean: 171 | # print("mean has nan") 172 | # has_nan_obs = torch.isnan(observations).any().item() 173 | # if has_nan_obs: 174 | # print("has nan obs: ", has_nan_obs) 175 | # obs_array = observations.cpu().detach().numpy() 176 | # np.savetxt('nan_obs.txt', obs_array) 177 | # exit() 178 | self.distribution = Normal(mean, mean * 0.0 + self.std) 179 | 180 | def act(self, observations, **kwargs): 181 | self.update_distribution(observations) 182 | return self.distribution.sample() 183 | 184 | def get_actions_log_prob(self, actions): 185 | return self.distribution.log_prob(actions).sum(dim=-1) 186 | 187 | def act_inference(self, observations, eval=False, **kwargs): 188 | if not eval: 189 | actions_mean = self.actor(observations, eval) 190 | return actions_mean 191 | else: 192 | actions_mean = self.actor(observations, eval=True) 193 | return actions_mean 194 | 195 | def evaluate(self, critic_observations, **kwargs): 196 | value = self.critic(critic_observations) 197 | return value 198 | 199 | def reset_std(self, std, num_actions, device): 200 | new_std = std * torch.ones(num_actions, device=device) 201 | self.std.data = new_std.data 202 | 203 | 204 | def get_activation(act_name): 205 | if act_name == "elu": 206 | return nn.ELU() 207 | elif act_name == "selu": 208 | return nn.SELU() 209 | elif act_name == "relu": 210 | return nn.ReLU() 211 | elif act_name == "crelu": 212 | return nn.ReLU() 213 | elif act_name == "lrelu": 214 | return nn.LeakyReLU() 215 | elif act_name == "tanh": 216 | return nn.Tanh() 217 | elif act_name == "sigmoid": 218 | return nn.Sigmoid() 219 | else: 220 | print("invalid activation function!") 221 | return None 222 | -------------------------------------------------------------------------------- /simulation/rsl_rl/rsl_rl/modules/actor_critic_recurrent.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import numpy as np 32 | 33 | import torch 34 | import torch.nn as nn 35 | from torch.distributions import Normal 36 | from torch.nn.modules import rnn 37 | from .actor_critic import ActorCritic, get_activation 38 | from rsl_rl.utils import unpad_trajectories 39 | 40 | 41 | class ActorCriticRecurrent(ActorCritic): 42 | is_recurrent = True 43 | 44 | def __init__( 45 | self, 46 | num_actor_obs, 47 | num_critic_obs, 48 | num_actions, 49 | actor_hidden_dims=[256, 256, 256], 50 | critic_hidden_dims=[256, 256, 256], 51 | activation="elu", 52 | rnn_type="lstm", 53 | rnn_hidden_size=256, 54 | rnn_num_layers=1, 55 | init_noise_std=1.0, 56 | **kwargs, 57 | ): 58 | if kwargs: 59 | print( 60 | "ActorCriticRecurrent.__init__ got unexpected arguments, which will be ignored: " 61 | + str(kwargs.keys()), 62 | ) 63 | 64 | super().__init__( 65 | num_actor_obs=rnn_hidden_size, 66 | num_critic_obs=rnn_hidden_size, 67 | num_actions=num_actions, 68 | actor_hidden_dims=actor_hidden_dims, 69 | critic_hidden_dims=critic_hidden_dims, 70 | activation=activation, 71 | init_noise_std=init_noise_std, 72 | **kwargs, 73 | ) 74 | 75 | activation = get_activation(activation) 76 | 77 | self.memory_a = Memory( 78 | num_actor_obs, type=rnn_type, num_layers=rnn_num_layers, hidden_size=rnn_hidden_size 79 | ) 80 | self.memory_c = Memory( 81 | num_critic_obs, type=rnn_type, num_layers=rnn_num_layers, hidden_size=rnn_hidden_size 82 | ) 83 | 84 | print(f"Actor RNN: {self.memory_a}") 85 | print(f"Critic RNN: {self.memory_c}") 86 | 87 | def reset(self, dones=None): 88 | self.memory_a.reset(dones) 89 | self.memory_c.reset(dones) 90 | 91 | def act(self, observations, masks=None, hidden_states=None): 92 | input_a = self.memory_a(observations, masks, hidden_states) 93 | return super().act(input_a.squeeze(0)) 94 | 95 | def act_inference(self, observations, **kwargs): 96 | input_a = self.memory_a(observations, **kwargs) 97 | return super().act_inference(input_a.squeeze(0)) 98 | 99 | def evaluate(self, critic_observations, masks=None, hidden_states=None): 100 | input_c = self.memory_c(critic_observations, masks, hidden_states) 101 | return super().evaluate(input_c.squeeze(0)) 102 | 103 | def get_hidden_states(self): 104 | return self.memory_a.hidden_states, self.memory_c.hidden_states 105 | 106 | 107 | class Memory(torch.nn.Module): 108 | def __init__(self, input_size, type="lstm", num_layers=1, hidden_size=256): 109 | super().__init__() 110 | # RNN 111 | rnn_cls = nn.GRU if type.lower() == "gru" else nn.LSTM 112 | self.rnn = rnn_cls(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers) 113 | self.hidden_states = None 114 | 115 | def forward(self, input, masks=None, hidden_states=None): 116 | batch_mode = masks is not None 117 | if batch_mode: 118 | # batch mode (policy update): need saved hidden states 119 | if hidden_states is None: 120 | raise ValueError("Hidden states not passed to memory module during policy update") 121 | out, _ = self.rnn(input, hidden_states) 122 | out = unpad_trajectories(out, masks) 123 | else: 124 | # inference mode (collection): use hidden states of last step 125 | out, self.hidden_states = self.rnn(input.unsqueeze(0), self.hidden_states) 126 | return out 127 | 128 | def reset(self, dones=None): 129 | # When the RNN is an LSTM, self.hidden_states_a is a list with hidden_state and cell_state 130 | for hidden_state in self.hidden_states: 131 | hidden_state[..., dones, :] = 0.0 132 | -------------------------------------------------------------------------------- /simulation/rsl_rl/rsl_rl/runners/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from .on_policy_runner import OnPolicyRunner 32 | -------------------------------------------------------------------------------- /simulation/rsl_rl/rsl_rl/runners/runner.py: -------------------------------------------------------------------------------- 1 | class Runner: 2 | def __init__(self): 3 | pass 4 | 5 | def get_inference_policy(self): 6 | pass 7 | 8 | def get_estimator_inference_policy(self): 9 | pass 10 | -------------------------------------------------------------------------------- /simulation/rsl_rl/rsl_rl/storage/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | from .rollout_storage import RolloutStorage 5 | from .replay_buffer import ReplayBuffer -------------------------------------------------------------------------------- /simulation/rsl_rl/rsl_rl/storage/replay_buffer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | class ReplayBuffer: 6 | """Fixed-size buffer to store experience tuples.""" 7 | 8 | def __init__(self, obs_dim, buffer_size, device): 9 | """Initialize a ReplayBuffer object. 10 | Arguments: 11 | buffer_size (int): maximum size of buffer 12 | """ 13 | self.amp_obs = torch.zeros(buffer_size, obs_dim).to(device) 14 | self.buffer_size = buffer_size 15 | self.device = device 16 | 17 | self.step = 0 18 | self.num_samples = 0 19 | 20 | def insert(self, amp_obs): 21 | """Add new states to memory.""" 22 | 23 | num_obs = amp_obs.shape[0] 24 | start_idx = self.step 25 | end_idx = self.step + num_obs 26 | if end_idx > self.buffer_size: 27 | self.amp_obs[self.step:self.buffer_size] = amp_obs[:self.buffer_size - self.step] 28 | self.amp_obs[:end_idx - self.buffer_size] = amp_obs[self.buffer_size - self.step:] # put the rest at the beginning 29 | else: 30 | self.amp_obs[start_idx:end_idx] = amp_obs 31 | 32 | self.num_samples = min(self.buffer_size, max(end_idx, self.num_samples)) 33 | self.step = (self.step + num_obs) % self.buffer_size 34 | 35 | def feed_forward_generator(self, num_mini_batch, mini_batch_size): 36 | for _ in range(num_mini_batch): 37 | sample_idxs = np.random.choice(self.num_samples, size=mini_batch_size) 38 | yield self.amp_obs[sample_idxs].to(self.device) 39 | -------------------------------------------------------------------------------- /simulation/rsl_rl/rsl_rl/storage/rollout_storage.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import torch 32 | import numpy as np 33 | 34 | from rsl_rl.utils import split_and_pad_trajectories 35 | 36 | class RolloutStorage: 37 | class Transition: 38 | def __init__(self): 39 | self.observations = None 40 | self.critic_observations = None 41 | self.actions = None 42 | self.rewards = None 43 | self.dones = None 44 | self.values = None 45 | self.actions_log_prob = None 46 | self.action_mean = None 47 | self.action_sigma = None 48 | self.hidden_states = None 49 | def clear(self): 50 | self.__init__() 51 | 52 | def __init__(self, num_envs, num_transitions_per_env, obs_shape, privileged_obs_shape, actions_shape, device='cpu'): 53 | 54 | self.device = device 55 | 56 | self.obs_shape = obs_shape 57 | self.privileged_obs_shape = privileged_obs_shape 58 | self.actions_shape = actions_shape 59 | 60 | # Core 61 | self.observations = torch.zeros(num_transitions_per_env, num_envs, *obs_shape, device=self.device) 62 | 63 | if privileged_obs_shape[0] is not None: 64 | self.privileged_observations = torch.zeros(num_transitions_per_env, num_envs, *privileged_obs_shape, device=self.device) 65 | else: 66 | self.privileged_observations = None 67 | self.rewards = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 68 | self.actions = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device) 69 | self.dones = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device).byte() 70 | 71 | # For PPO 72 | self.actions_log_prob = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 73 | self.values = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 74 | self.returns = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 75 | self.advantages = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 76 | self.mu = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device) 77 | self.sigma = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device) 78 | 79 | self.num_transitions_per_env = num_transitions_per_env 80 | self.num_envs = num_envs 81 | 82 | # rnn 83 | self.saved_hidden_states_a = None 84 | self.saved_hidden_states_c = None 85 | 86 | self.step = 0 87 | 88 | def add_transitions(self, transition: Transition): 89 | if self.step >= self.num_transitions_per_env: 90 | raise AssertionError("Rollout buffer overflow") 91 | self.observations[self.step].copy_(transition.observations) 92 | if self.privileged_observations is not None: self.privileged_observations[self.step].copy_(transition.critic_observations) 93 | self.actions[self.step].copy_(transition.actions) 94 | self.rewards[self.step].copy_(transition.rewards.view(-1, 1)) 95 | self.dones[self.step].copy_(transition.dones.view(-1, 1)) 96 | self.values[self.step].copy_(transition.values) 97 | self.actions_log_prob[self.step].copy_(transition.actions_log_prob.view(-1, 1)) 98 | self.mu[self.step].copy_(transition.action_mean) 99 | self.sigma[self.step].copy_(transition.action_sigma) 100 | 101 | self._save_hidden_states(transition.hidden_states) 102 | self.step += 1 103 | 104 | def _save_hidden_states(self, hidden_states): 105 | if hidden_states is None or hidden_states==(None, None): 106 | return 107 | # make a tuple out of GRU hidden state sto match the LSTM format 108 | hid_a = hidden_states[0] if isinstance(hidden_states[0], tuple) else (hidden_states[0],) 109 | hid_c = hidden_states[1] if isinstance(hidden_states[1], tuple) else (hidden_states[1],) 110 | 111 | # initialize if needed 112 | if self.saved_hidden_states_a is None: 113 | self.saved_hidden_states_a = [torch.zeros(self.observations.shape[0], *hid_a[i].shape, device=self.device) for i in range(len(hid_a))] 114 | self.saved_hidden_states_c = [torch.zeros(self.observations.shape[0], *hid_c[i].shape, device=self.device) for i in range(len(hid_c))] 115 | # copy the states 116 | for i in range(len(hid_a)): 117 | self.saved_hidden_states_a[i][self.step].copy_(hid_a[i]) 118 | self.saved_hidden_states_c[i][self.step].copy_(hid_c[i]) 119 | 120 | 121 | def clear(self): 122 | self.step = 0 123 | 124 | def compute_returns(self, last_values, gamma, lam): 125 | advantage = 0 126 | for step in reversed(range(self.num_transitions_per_env)): 127 | if step == self.num_transitions_per_env - 1: 128 | next_values = last_values 129 | else: 130 | next_values = self.values[step + 1] 131 | next_is_not_terminal = 1.0 - self.dones[step].float() 132 | delta = self.rewards[step] + next_is_not_terminal * gamma * next_values - self.values[step] 133 | advantage = delta + next_is_not_terminal * gamma * lam * advantage # nan 2237 134 | self.returns[step] = advantage + self.values[step] 135 | 136 | # Compute and normalize the advantages 137 | if torch.isnan(self.returns).any(): 138 | from loguru import logger 139 | logger.error(f"RolloutStorage.compute_returns: returns contains NaNs") 140 | import ipdb; ipdb.set_trace() 141 | self.advantages = self.returns - self.values 142 | self.advantages = (self.advantages - self.advantages.mean()) / (self.advantages.std() + 1e-8) 143 | 144 | def get_statistics(self): 145 | done = self.dones 146 | done[-1] = 1 147 | flat_dones = done.permute(1, 0, 2).reshape(-1, 1) 148 | done_indices = torch.cat((flat_dones.new_tensor([-1], dtype=torch.int64), flat_dones.nonzero(as_tuple=False)[:, 0])) 149 | trajectory_lengths = (done_indices[1:] - done_indices[:-1]) 150 | return trajectory_lengths.float().mean(), self.rewards.mean() 151 | 152 | def mini_batch_generator(self, num_mini_batches, num_epochs=8): 153 | batch_size = self.num_envs * self.num_transitions_per_env 154 | mini_batch_size = batch_size // num_mini_batches 155 | indices = torch.randperm(num_mini_batches*mini_batch_size, requires_grad=False, device=self.device) 156 | 157 | observations = self.observations.flatten(0, 1) 158 | 159 | if self.privileged_observations is not None: 160 | critic_observations = self.privileged_observations.flatten(0, 1) 161 | else: 162 | critic_observations = observations 163 | 164 | actions = self.actions.flatten(0, 1) 165 | values = self.values.flatten(0, 1) 166 | returns = self.returns.flatten(0, 1) 167 | old_actions_log_prob = self.actions_log_prob.flatten(0, 1) 168 | advantages = self.advantages.flatten(0, 1) 169 | old_mu = self.mu.flatten(0, 1) 170 | old_sigma = self.sigma.flatten(0, 1) 171 | 172 | for epoch in range(num_epochs): 173 | for i in range(num_mini_batches): 174 | 175 | start = i*mini_batch_size 176 | end = (i+1)*mini_batch_size 177 | batch_idx = indices[start:end] 178 | 179 | obs_batch = observations[batch_idx] 180 | critic_observations_batch = critic_observations[batch_idx] 181 | actions_batch = actions[batch_idx] 182 | target_values_batch = values[batch_idx] 183 | returns_batch = returns[batch_idx] 184 | old_actions_log_prob_batch = old_actions_log_prob[batch_idx] 185 | advantages_batch = advantages[batch_idx] 186 | old_mu_batch = old_mu[batch_idx] 187 | old_sigma_batch = old_sigma[batch_idx] 188 | 189 | 190 | yield obs_batch, critic_observations_batch, actions_batch, target_values_batch, advantages_batch, returns_batch, \ 191 | old_actions_log_prob_batch, old_mu_batch, old_sigma_batch, (None, None), None 192 | 193 | # for RNNs only 194 | def reccurent_mini_batch_generator(self, num_mini_batches, num_epochs=8): 195 | 196 | padded_obs_trajectories, trajectory_masks = split_and_pad_trajectories(self.observations, self.dones) 197 | if self.privileged_observations is not None: 198 | padded_critic_obs_trajectories, _ = split_and_pad_trajectories(self.privileged_observations, self.dones) 199 | else: 200 | padded_critic_obs_trajectories = padded_obs_trajectories 201 | 202 | mini_batch_size = self.num_envs // num_mini_batches 203 | for ep in range(num_epochs): 204 | first_traj = 0 205 | for i in range(num_mini_batches): 206 | start = i*mini_batch_size 207 | stop = (i+1)*mini_batch_size 208 | 209 | dones = self.dones.squeeze(-1) 210 | last_was_done = torch.zeros_like(dones, dtype=torch.bool) 211 | last_was_done[1:] = dones[:-1] 212 | last_was_done[0] = True 213 | trajectories_batch_size = torch.sum(last_was_done[:, start:stop]) 214 | last_traj = first_traj + trajectories_batch_size 215 | 216 | masks_batch = trajectory_masks[:, first_traj:last_traj] 217 | obs_batch = padded_obs_trajectories[:, first_traj:last_traj] 218 | critic_obs_batch = padded_critic_obs_trajectories[:, first_traj:last_traj] 219 | 220 | actions_batch = self.actions[:, start:stop] 221 | old_mu_batch = self.mu[:, start:stop] 222 | old_sigma_batch = self.sigma[:, start:stop] 223 | returns_batch = self.returns[:, start:stop] 224 | advantages_batch = self.advantages[:, start:stop] 225 | values_batch = self.values[:, start:stop] 226 | old_actions_log_prob_batch = self.actions_log_prob[:, start:stop] 227 | 228 | # reshape to [num_envs, time, num layers, hidden dim] (original shape: [time, num_layers, num_envs, hidden_dim]) 229 | # then take only time steps after dones (flattens num envs and time dimensions), 230 | # take a batch of trajectories and finally reshape back to [num_layers, batch, hidden_dim] 231 | last_was_done = last_was_done.permute(1, 0) 232 | hid_a_batch = [ saved_hidden_states.permute(2, 0, 1, 3)[last_was_done][first_traj:last_traj].transpose(1, 0).contiguous() 233 | for saved_hidden_states in self.saved_hidden_states_a ] 234 | hid_c_batch = [ saved_hidden_states.permute(2, 0, 1, 3)[last_was_done][first_traj:last_traj].transpose(1, 0).contiguous() 235 | for saved_hidden_states in self.saved_hidden_states_c ] 236 | # remove the tuple for GRU 237 | hid_a_batch = hid_a_batch[0] if len(hid_a_batch)==1 else hid_a_batch 238 | hid_c_batch = hid_c_batch[0] if len(hid_c_batch)==1 else hid_a_batch 239 | 240 | yield obs_batch, critic_obs_batch, actions_batch, values_batch, advantages_batch, returns_batch, \ 241 | old_actions_log_prob_batch, old_mu_batch, old_sigma_batch, (hid_a_batch, hid_c_batch), masks_batch 242 | 243 | first_traj = last_traj -------------------------------------------------------------------------------- /simulation/rsl_rl/rsl_rl/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from .utils import split_and_pad_trajectories, unpad_trajectories -------------------------------------------------------------------------------- /simulation/rsl_rl/rsl_rl/utils/init.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | def weight_init(m): 5 | """Custom weight initialization for TD-MPC2.""" 6 | if isinstance(m, nn.Linear): 7 | nn.init.trunc_normal_(m.weight, std=0.02) 8 | if m.bias is not None: 9 | nn.init.constant_(m.bias, 0) 10 | elif isinstance(m, nn.Embedding): 11 | nn.init.uniform_(m.weight, -0.02, 0.02) 12 | elif isinstance(m, nn.ParameterList): 13 | for i,p in enumerate(m): 14 | if p.dim() == 3: # Linear 15 | nn.init.trunc_normal_(p, std=0.02) # Weight 16 | nn.init.constant_(m[i+1], 0) # Bias 17 | 18 | 19 | def zero_(params): 20 | """Initialize parameters to zero.""" 21 | for p in params: 22 | p.data.fill_(0) 23 | -------------------------------------------------------------------------------- /simulation/rsl_rl/rsl_rl/utils/layers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from functorch import combine_state_for_ensemble 5 | 6 | 7 | class Ensemble(nn.Module): 8 | """ 9 | Vectorized ensemble of modules. 10 | """ 11 | 12 | def __init__(self, modules, **kwargs): 13 | super().__init__() 14 | modules = nn.ModuleList(modules) 15 | fn, params, _ = combine_state_for_ensemble(modules) 16 | self.vmap = torch.vmap(fn, in_dims=(0, 0, None), randomness='different', **kwargs) 17 | self.params = nn.ParameterList([nn.Parameter(p) for p in params]) 18 | self._repr = str(modules) 19 | 20 | def forward(self, *args, **kwargs): 21 | return self.vmap([p for p in self.params], (), *args, **kwargs) 22 | 23 | def __repr__(self): 24 | return 'Vectorized ' + self._repr 25 | 26 | 27 | class ShiftAug(nn.Module): 28 | """ 29 | Random shift image augmentation. 30 | Adapted from https://github.com/facebookresearch/drqv2 31 | """ 32 | def __init__(self, pad=3): 33 | super().__init__() 34 | self.pad = pad 35 | 36 | def forward(self, x): 37 | x = x.float() 38 | n, _, h, w = x.size() 39 | assert h == w 40 | padding = tuple([self.pad] * 4) 41 | x = F.pad(x, padding, 'replicate') 42 | eps = 1.0 / (h + 2 * self.pad) 43 | arange = torch.linspace(-1.0 + eps, 1.0 - eps, h + 2 * self.pad, device=x.device, dtype=x.dtype)[:h] 44 | arange = arange.unsqueeze(0).repeat(h, 1).unsqueeze(2) 45 | base_grid = torch.cat([arange, arange.transpose(1, 0)], dim=2) 46 | base_grid = base_grid.unsqueeze(0).repeat(n, 1, 1, 1) 47 | shift = torch.randint(0, 2 * self.pad + 1, size=(n, 1, 1, 2), device=x.device, dtype=x.dtype) 48 | shift *= 2.0 / (h + 2 * self.pad) 49 | grid = base_grid + shift 50 | return F.grid_sample(x, grid, padding_mode='zeros', align_corners=False) 51 | 52 | 53 | class PixelPreprocess(nn.Module): 54 | """ 55 | Normalizes pixel observations to [-0.5, 0.5]. 56 | """ 57 | 58 | def __init__(self): 59 | super().__init__() 60 | 61 | def forward(self, x): 62 | return x.div_(255.).sub_(0.5) 63 | 64 | 65 | class SimNorm(nn.Module): 66 | """ 67 | Simplicial normalization. 68 | Adapted from https://arxiv.org/abs/2204.00616. 69 | """ 70 | 71 | def __init__(self, cfg): 72 | super().__init__() 73 | self.dim = cfg.simnorm_dim 74 | 75 | def forward(self, x): 76 | shp = x.shape 77 | x = x.view(*shp[:-1], -1, self.dim) 78 | x = F.softmax(x, dim=-1) 79 | return x.view(*shp) 80 | 81 | def __repr__(self): 82 | return f"SimNorm(dim={self.dim})" 83 | 84 | 85 | class NormedLinear(nn.Linear): 86 | """ 87 | Linear layer with LayerNorm, activation, and optionally dropout. 88 | """ 89 | 90 | def __init__(self, *args, dropout=0., act=nn.Mish(inplace=True), **kwargs): 91 | super().__init__(*args, **kwargs) 92 | self.ln = nn.LayerNorm(self.out_features) 93 | self.act = act 94 | self.dropout = nn.Dropout(dropout, inplace=True) if dropout else None 95 | 96 | def forward(self, x): 97 | x = super().forward(x) 98 | if self.dropout: 99 | x = self.dropout(x) 100 | return self.act(self.ln(x)) 101 | 102 | def __repr__(self): 103 | repr_dropout = f", dropout={self.dropout.p}" if self.dropout else "" 104 | return f"NormedLinear(in_features={self.in_features}, "\ 105 | f"out_features={self.out_features}, "\ 106 | f"bias={self.bias is not None}{repr_dropout}, "\ 107 | f"act={self.act.__class__.__name__})" 108 | 109 | 110 | def mlp(in_dim, mlp_dims, out_dim, act=None, dropout=0.): 111 | """ 112 | Basic building block of TD-MPC2. 113 | MLP with LayerNorm, Mish activations, and optionally dropout. 114 | """ 115 | if isinstance(mlp_dims, int): 116 | mlp_dims = [mlp_dims] 117 | dims = [in_dim] + mlp_dims + [out_dim] 118 | mlp = nn.ModuleList() 119 | for i in range(len(dims) - 2): 120 | mlp.append(NormedLinear(dims[i], dims[i+1], dropout=dropout*(i==0))) 121 | mlp.append(NormedLinear(dims[-2], dims[-1], act=act) if act else nn.Linear(dims[-2], dims[-1])) 122 | return nn.Sequential(*mlp) 123 | 124 | 125 | def conv(in_shape, num_channels, act=None): 126 | """ 127 | Basic convolutional encoder for TD-MPC2 with raw image observations. 128 | 4 layers of convolution with ReLU activations, followed by a linear layer. 129 | """ 130 | assert in_shape[-1] == 64 # assumes rgb observations to be 64x64 131 | layers = [ 132 | ShiftAug(), PixelPreprocess(), 133 | nn.Conv2d(in_shape[0], num_channels, 7, stride=2), nn.ReLU(inplace=True), 134 | nn.Conv2d(num_channels, num_channels, 5, stride=2), nn.ReLU(inplace=True), 135 | nn.Conv2d(num_channels, num_channels, 3, stride=2), nn.ReLU(inplace=True), 136 | nn.Conv2d(num_channels, num_channels, 3, stride=1), nn.Flatten()] 137 | if act: 138 | layers.append(act) 139 | return nn.Sequential(*layers) 140 | 141 | 142 | def enc(cfg, out={}): 143 | """ 144 | Returns a dictionary of encoders for each observation in the dict. 145 | """ 146 | for k in cfg.obs_shape.keys(): 147 | if k == 'state': 148 | out[k] = mlp(cfg.obs_shape[k][0] + cfg.task_dim, max(cfg.num_enc_layers-1, 1)*[cfg.enc_dim], cfg.latent_dim, act=SimNorm(cfg)) 149 | elif k == 'rgb': 150 | out[k] = conv(cfg.obs_shape[k], cfg.num_channels, act=SimNorm(cfg)) 151 | else: 152 | raise NotImplementedError(f"Encoder for observation type {k} not implemented.") 153 | return nn.ModuleDict(out) 154 | -------------------------------------------------------------------------------- /simulation/rsl_rl/rsl_rl/utils/math.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | 5 | def soft_ce(pred, target, cfg): 6 | """Computes the cross entropy loss between predictions and soft targets.""" 7 | pred = F.log_softmax(pred, dim=-1) 8 | target = two_hot(target, cfg) 9 | return -(target * pred).sum(-1, keepdim=True) 10 | 11 | 12 | @torch.jit.script 13 | def log_std(x, low, dif): 14 | return low + 0.5 * dif * (torch.tanh(x) + 1) 15 | 16 | 17 | @torch.jit.script 18 | def _gaussian_residual(eps, log_std): 19 | return -0.5 * eps.pow(2) - log_std 20 | 21 | 22 | @torch.jit.script 23 | def _gaussian_logprob(residual): 24 | return residual - 0.5 * torch.log(2 * torch.pi) 25 | 26 | 27 | def gaussian_logprob(eps, log_std, size=None): 28 | """Compute Gaussian log probability.""" 29 | residual = _gaussian_residual(eps, log_std).sum(-1, keepdim=True) 30 | if size is None: 31 | size = eps.size(-1) 32 | return _gaussian_logprob(residual) * size 33 | 34 | 35 | @torch.jit.script 36 | def _squash(pi): 37 | return torch.log(F.relu(1 - pi.pow(2)) + 1e-6) 38 | 39 | 40 | def squash(mu, pi, log_pi): 41 | """Apply squashing function.""" 42 | mu = torch.tanh(mu) 43 | pi = torch.tanh(pi) 44 | log_pi -= _squash(pi).sum(-1, keepdim=True) 45 | return mu, pi, log_pi 46 | 47 | 48 | @torch.jit.script 49 | def symlog(x): 50 | """ 51 | Symmetric logarithmic function. 52 | Adapted from https://github.com/danijar/dreamerv3. 53 | """ 54 | return torch.sign(x) * torch.log(1 + torch.abs(x)) 55 | 56 | 57 | @torch.jit.script 58 | def symexp(x): 59 | """ 60 | Symmetric exponential function. 61 | Adapted from https://github.com/danijar/dreamerv3. 62 | """ 63 | return torch.sign(x) * (torch.exp(torch.abs(x)) - 1) 64 | 65 | 66 | def two_hot(x, cfg): 67 | """Converts a batch of scalars to soft two-hot encoded targets for discrete regression.""" 68 | if cfg.num_bins == 0: 69 | return x 70 | elif cfg.num_bins == 1: 71 | return symlog(x) 72 | x = torch.clamp(symlog(x), cfg.vmin, cfg.vmax).squeeze(1) 73 | bin_idx = torch.floor((x - cfg.vmin) / cfg.bin_size).long() 74 | bin_offset = ((x - cfg.vmin) / cfg.bin_size - bin_idx.float()).unsqueeze(-1) 75 | soft_two_hot = torch.zeros(x.size(0), cfg.num_bins, device=x.device) 76 | soft_two_hot.scatter_(1, bin_idx.unsqueeze(1), 1 - bin_offset) 77 | soft_two_hot.scatter_(1, (bin_idx.unsqueeze(1) + 1) % cfg.num_bins, bin_offset) 78 | return soft_two_hot 79 | 80 | 81 | DREG_BINS = None 82 | 83 | 84 | def two_hot_inv(x, cfg): 85 | """Converts a batch of soft two-hot encoded vectors to scalars.""" 86 | global DREG_BINS 87 | if cfg.num_bins == 0: 88 | return x 89 | elif cfg.num_bins == 1: 90 | return symexp(x) 91 | if DREG_BINS is None: 92 | DREG_BINS = torch.linspace(cfg.vmin, cfg.vmax, cfg.num_bins, device=x.device) 93 | x = F.softmax(x, dim=-1) 94 | x = torch.sum(x * DREG_BINS, dim=-1, keepdim=True) 95 | return symexp(x) 96 | -------------------------------------------------------------------------------- /simulation/rsl_rl/rsl_rl/utils/running_mean_std.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | ''' 5 | updates statistic from a full data 6 | ''' 7 | class RunningMeanStd(nn.Module): 8 | def __init__(self, insize, epsilon=1e-05, per_channel=False, norm_only=False): 9 | super(RunningMeanStd, self).__init__() 10 | print('RunningMeanStd: ', insize) 11 | self.insize = insize 12 | self.epsilon = epsilon 13 | 14 | self.norm_only = norm_only 15 | self.per_channel = per_channel 16 | if per_channel: 17 | if len(self.insize) == 3: 18 | self.axis = [0,2,3] 19 | if len(self.insize) == 2: 20 | self.axis = [0,2] 21 | if len(self.insize) == 1: 22 | self.axis = [0] 23 | in_size = self.insize[0] 24 | else: 25 | self.axis = [0] 26 | in_size = insize 27 | 28 | self.register_buffer("running_mean", torch.zeros(in_size, dtype = torch.float64)) 29 | self.register_buffer("running_var", torch.ones(in_size, dtype = torch.float64)) 30 | self.register_buffer("count", torch.ones((), dtype = torch.float64)) 31 | 32 | def _update_mean_var_count_from_moments(self, mean, var, count, batch_mean, batch_var, batch_count): 33 | delta = batch_mean - mean 34 | tot_count = count + batch_count 35 | 36 | new_mean = mean + delta * batch_count / tot_count 37 | m_a = var * count 38 | m_b = batch_var * batch_count 39 | M2 = m_a + m_b + delta**2 * count * batch_count / tot_count 40 | new_var = M2 / tot_count 41 | new_count = tot_count 42 | return new_mean, new_var, new_count 43 | 44 | def forward(self, input, unnorm=False): 45 | if self.training: 46 | mean = input.mean(self.axis) # along channel axis 47 | var = input.var(self.axis) 48 | self.running_mean, self.running_var, self.count = self._update_mean_var_count_from_moments(self.running_mean, self.running_var, self.count, 49 | mean, var, input.size()[0] ) 50 | 51 | # change shape 52 | if self.per_channel: 53 | if len(self.insize) == 3: 54 | current_mean = self.running_mean.view([1, self.insize[0], 1, 1]).expand_as(input) 55 | current_var = self.running_var.view([1, self.insize[0], 1, 1]).expand_as(input) 56 | if len(self.insize) == 2: 57 | current_mean = self.running_mean.view([1, self.insize[0], 1]).expand_as(input) 58 | current_var = self.running_var.view([1, self.insize[0], 1]).expand_as(input) 59 | if len(self.insize) == 1: 60 | current_mean = self.running_mean.view([1, self.insize[0]]).expand_as(input) 61 | current_var = self.running_var.view([1, self.insize[0]]).expand_as(input) 62 | else: 63 | current_mean = self.running_mean 64 | current_var = self.running_var 65 | # get output 66 | 67 | 68 | if unnorm: 69 | y = torch.clamp(input, min=-5.0, max=5.0) 70 | y = torch.sqrt(current_var.float() + self.epsilon)*y + current_mean.float() 71 | else: 72 | if self.norm_only: 73 | y = input/ torch.sqrt(current_var.float() + self.epsilon) 74 | else: 75 | y = (input - current_mean.float()) / torch.sqrt(current_var.float() + self.epsilon) 76 | y = torch.clamp(y, min=-5.0, max=5.0) 77 | return y 78 | 79 | class RunningMeanStdObs(nn.Module): 80 | def __init__(self, insize, epsilon=1e-05, per_channel=False, norm_only=False): 81 | assert(insize is dict) 82 | super(RunningMeanStdObs, self).__init__() 83 | self.running_mean_std = nn.ModuleDict({ 84 | k : RunningMeanStd(v, epsilon, per_channel, norm_only) for k,v in insize.items() 85 | }) 86 | 87 | def forward(self, input, unnorm=False): 88 | res = {k : self.running_mean_std(v, unnorm) for k,v in input.items()} 89 | return res -------------------------------------------------------------------------------- /simulation/rsl_rl/rsl_rl/utils/scale.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class RunningScale: 5 | """Running trimmed scale estimator.""" 6 | 7 | def __init__(self, cfg): 8 | self.cfg = cfg 9 | self._value = torch.ones(1, dtype=torch.float32, device=torch.device('cuda')) 10 | self._percentiles = torch.tensor([5, 95], dtype=torch.float32, device=torch.device('cuda')) 11 | 12 | def state_dict(self): 13 | return dict(value=self._value, percentiles=self._percentiles) 14 | 15 | def load_state_dict(self, state_dict): 16 | self._value.data.copy_(state_dict['value']) 17 | self._percentiles.data.copy_(state_dict['percentiles']) 18 | 19 | @property 20 | def value(self): 21 | return self._value.cpu().item() 22 | 23 | def _percentile(self, x): 24 | x_dtype, x_shape = x.dtype, x.shape 25 | x = x.view(x.shape[0], -1) 26 | in_sorted, _ = torch.sort(x, dim=0) 27 | positions = self._percentiles * (x.shape[0]-1) / 100 28 | floored = torch.floor(positions) 29 | ceiled = floored + 1 30 | ceiled[ceiled > x.shape[0] - 1] = x.shape[0] - 1 31 | weight_ceiled = positions-floored 32 | weight_floored = 1.0 - weight_ceiled 33 | d0 = in_sorted[floored.long(), :] * weight_floored[:, None] 34 | d1 = in_sorted[ceiled.long(), :] * weight_ceiled[:, None] 35 | return (d0+d1).view(-1, *x_shape[1:]).type(x_dtype) 36 | 37 | def update(self, x): 38 | percentiles = self._percentile(x.detach()) 39 | value = torch.clamp(percentiles[1] - percentiles[0], min=1.) 40 | self._value.data.lerp_(value, self.cfg.tau) 41 | 42 | def __call__(self, x, update=False): 43 | if update: 44 | self.update(x) 45 | return x * (1/self.value) 46 | 47 | def __repr__(self): 48 | return f'RunningScale(S: {self.value})' 49 | -------------------------------------------------------------------------------- /simulation/rsl_rl/rsl_rl/utils/utils.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import torch 32 | import numpy as np 33 | from typing import Tuple 34 | 35 | 36 | def split_and_pad_trajectories(tensor, dones): 37 | """ Splits trajectories at done indices. Then concatenates them and padds with zeros up to the length og the longest trajectory. 38 | Returns masks corresponding to valid parts of the trajectories 39 | Example: 40 | Input: [ [a1, a2, a3, a4 | a5, a6], 41 | [b1, b2 | b3, b4, b5 | b6] 42 | ] 43 | 44 | Output:[ [a1, a2, a3, a4], | [ [True, True, True, True], 45 | [a5, a6, 0, 0], | [True, True, False, False], 46 | [b1, b2, 0, 0], | [True, True, False, False], 47 | [b3, b4, b5, 0], | [True, True, True, False], 48 | [b6, 0, 0, 0] | [True, False, False, False], 49 | ] | ] 50 | 51 | Assumes that the inputy has the following dimension order: [time, number of envs, aditional dimensions] 52 | """ 53 | dones = dones.clone() 54 | dones[-1] = 1 55 | # Permute the buffers to have order (num_envs, num_transitions_per_env, ...), for correct reshaping 56 | flat_dones = dones.transpose(1, 0).reshape(-1, 1) 57 | 58 | # Get length of trajectory by counting the number of successive not done elements 59 | done_indices = torch.cat((flat_dones.new_tensor([-1], dtype=torch.int64), flat_dones.nonzero()[:, 0])) 60 | trajectory_lengths = done_indices[1:] - done_indices[:-1] 61 | trajectory_lengths_list = trajectory_lengths.tolist() 62 | # Extract the individual trajectories 63 | trajectories = torch.split(tensor.transpose(1, 0).flatten(0, 1),trajectory_lengths_list) 64 | padded_trajectories = torch.nn.utils.rnn.pad_sequence(trajectories) 65 | 66 | 67 | trajectory_masks = trajectory_lengths > torch.arange(0, tensor.shape[0], device=tensor.device).unsqueeze(1) 68 | return padded_trajectories, trajectory_masks 69 | 70 | def unpad_trajectories(trajectories, masks): 71 | """ Does the inverse operation of split_and_pad_trajectories() 72 | """ 73 | # Need to transpose before and after the masking to have proper reshaping 74 | return trajectories.transpose(1, 0)[masks.transpose(1, 0)].view(-1, trajectories.shape[0], trajectories.shape[-1]).transpose(1, 0) 75 | 76 | 77 | class RunningMeanStd(object): 78 | def __init__(self, epsilon: float = 1e-4, shape: Tuple[int, ...] = ()): 79 | """ 80 | Calulates the running mean and std of a data stream 81 | https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm 82 | :param epsilon: helps with arithmetic issues 83 | :param shape: the shape of the data stream's output 84 | """ 85 | self.mean = np.zeros(shape, np.float64) 86 | self.var = np.ones(shape, np.float64) 87 | self.count = epsilon 88 | 89 | def update(self, arr: np.ndarray) -> None: 90 | batch_mean = np.mean(arr, axis=0) 91 | batch_var = np.var(arr, axis=0) 92 | batch_count = arr.shape[0] 93 | self.update_from_moments(batch_mean, batch_var, batch_count) 94 | 95 | def update_from_moments(self, batch_mean: np.ndarray, batch_var: np.ndarray, batch_count: int) -> None: 96 | delta = batch_mean - self.mean 97 | tot_count = self.count + batch_count 98 | 99 | new_mean = self.mean + delta * batch_count / tot_count 100 | m_a = self.var * self.count 101 | m_b = batch_var * batch_count 102 | m_2 = m_a + m_b + np.square(delta) * self.count * batch_count / (self.count + batch_count) 103 | new_var = m_2 / (self.count + batch_count) 104 | 105 | new_count = batch_count + self.count 106 | 107 | self.mean = new_mean 108 | self.var = new_var 109 | self.count = new_count 110 | 111 | 112 | class Normalizer(RunningMeanStd): 113 | def __init__(self, input_dim, epsilon=1e-4, clip_obs=10.0): 114 | super().__init__(shape=input_dim) 115 | self.epsilon = epsilon 116 | self.clip_obs = clip_obs 117 | 118 | def normalize(self, input): 119 | return np.clip( 120 | (input - self.mean) / np.sqrt(self.var + self.epsilon), 121 | -self.clip_obs, self.clip_obs) 122 | 123 | def normalize_torch(self, input, device): 124 | mean_torch = torch.tensor( 125 | self.mean, device=device, dtype=torch.float32) 126 | std_torch = torch.sqrt(torch.tensor( 127 | self.var + self.epsilon, device=device, dtype=torch.float32)) 128 | return torch.clamp( 129 | (input - mean_torch) / std_torch, -self.clip_obs, self.clip_obs) 130 | 131 | def update_normalizer(self, rollouts, expert_loader): 132 | policy_data_generator = rollouts.feed_forward_generator_amp( 133 | None, mini_batch_size=expert_loader.batch_size) 134 | expert_data_generator = expert_loader.dataset.feed_forward_generator_amp( 135 | expert_loader.batch_size) 136 | 137 | for expert_batch, policy_batch in zip(expert_data_generator, policy_data_generator): 138 | self.update( 139 | torch.vstack(tuple(policy_batch) + tuple(expert_batch)).cpu().numpy()) -------------------------------------------------------------------------------- /simulation/rsl_rl/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup(name='rsl_rl', 4 | version='1.0.2', 5 | author='Nikita Rudin', 6 | author_email='rudinn@ethz.ch', 7 | license="BSD-3-Clause", 8 | packages=find_packages(), 9 | description='Fast and simple RL algorithms implemented in pytorch', 10 | python_requires='>=3.6', 11 | install_requires=[ 12 | "torch>=1.4.0", 13 | "torchvision>=0.5.0", 14 | "numpy>=1.16.4" 15 | ], 16 | ) 17 | --------------------------------------------------------------------------------