├── LICENSE
├── README.md
├── docs
├── changelog.md
├── deploy.md
└── install.md
├── poster.gif
└── simulation
├── .gitignore
├── README.md
├── legged_gym
├── .gitignore
├── LICENSE
├── legged_gym
│ ├── .gitignore
│ ├── __init__.py
│ ├── envs
│ │ ├── __init__.py
│ │ ├── base
│ │ │ ├── base_config.py
│ │ │ ├── base_task.py
│ │ │ ├── humanoid.py
│ │ │ ├── humanoid_config.py
│ │ │ ├── legged_robot.py
│ │ │ └── legged_robot_config.py
│ │ ├── g1rolltrack
│ │ │ ├── g1waistroll_track.py
│ │ │ └── g1waistroll_track_config.py
│ │ ├── g1track
│ │ │ ├── g1waist_track.py
│ │ │ └── g1waist_track_config.py
│ │ ├── g1waist
│ │ │ ├── g1waist_up.py
│ │ │ └── g1waist_up_config.py
│ │ └── g1waistroll
│ │ │ ├── g1waistroll_up.py
│ │ │ └── g1waistroll_up_config.py
│ ├── gym_utils
│ │ ├── __init__.py
│ │ ├── helpers.py
│ │ ├── logger.py
│ │ ├── math.py
│ │ ├── motor_delay_fft.py
│ │ ├── storage.py
│ │ ├── task_registry.py
│ │ └── terrain.py
│ ├── scripts
│ │ ├── eval.sh
│ │ ├── eval_track.sh
│ │ ├── facingdown_poses.npy
│ │ ├── facingup_poses.npy
│ │ ├── log.sh
│ │ ├── log_traj.py
│ │ ├── play.py
│ │ ├── run.sh
│ │ ├── run_track.sh
│ │ ├── save_jit.py
│ │ └── train.py
│ └── tests
│ │ ├── test_asset.py
│ │ └── test_env.py
├── licenses
│ ├── assets
│ │ ├── ANYmal_b_license.txt
│ │ ├── ANYmal_c_license.txt
│ │ ├── a1_license.txt
│ │ └── cassie_license.txt
│ └── dependencies
│ │ └── matplotlib_license.txt
├── requirements.txt
├── resources
│ └── robots
│ │ └── g1_modified
│ │ ├── g1_23dof.urdf
│ │ ├── g1_23dof.xml
│ │ ├── g1_23dof_full.xml
│ │ ├── g1_29dof.urdf
│ │ ├── g1_29dof.xml
│ │ ├── g1_29dof_fixedwrist_custom_collision.urdf
│ │ ├── g1_29dof_fixedwrist_custom_collision_with_head.urdf
│ │ ├── g1_29dof_fixedwrist_full_collision.urdf
│ │ ├── g1_sim2sim.xml
│ │ ├── meshes
│ │ ├── head_link.STL
│ │ ├── left_ankle_pitch_link.STL
│ │ ├── left_ankle_roll_link.STL
│ │ ├── left_elbow_link.STL
│ │ ├── left_hand_index_0_link.STL
│ │ ├── left_hand_index_1_link.STL
│ │ ├── left_hand_middle_0_link.STL
│ │ ├── left_hand_middle_1_link.STL
│ │ ├── left_hand_palm_link.STL
│ │ ├── left_hand_thumb_0_link.STL
│ │ ├── left_hand_thumb_1_link.STL
│ │ ├── left_hand_thumb_2_link.STL
│ │ ├── left_hip_pitch_link.STL
│ │ ├── left_hip_roll_link.STL
│ │ ├── left_hip_yaw_link.STL
│ │ ├── left_knee_link.STL
│ │ ├── left_rubber_hand.STL
│ │ ├── left_shoulder_pitch_link.STL
│ │ ├── left_shoulder_roll_link.STL
│ │ ├── left_shoulder_yaw_link.STL
│ │ ├── left_wrist_pitch_link.STL
│ │ ├── left_wrist_roll_link.STL
│ │ ├── left_wrist_roll_rubber_hand.STL
│ │ ├── left_wrist_yaw_link.STL
│ │ ├── logo_link.STL
│ │ ├── pelvis.STL
│ │ ├── pelvis_contour_link.STL
│ │ ├── right_ankle_pitch_link.STL
│ │ ├── right_ankle_roll_link.STL
│ │ ├── right_elbow_link.STL
│ │ ├── right_hand_index_0_link.STL
│ │ ├── right_hand_index_1_link.STL
│ │ ├── right_hand_middle_0_link.STL
│ │ ├── right_hand_middle_1_link.STL
│ │ ├── right_hand_palm_link.STL
│ │ ├── right_hand_thumb_0_link.STL
│ │ ├── right_hand_thumb_1_link.STL
│ │ ├── right_hand_thumb_2_link.STL
│ │ ├── right_hip_pitch_link.STL
│ │ ├── right_hip_roll_link.STL
│ │ ├── right_hip_yaw_link.STL
│ │ ├── right_knee_link.STL
│ │ ├── right_rubber_hand.STL
│ │ ├── right_shoulder_pitch_link.STL
│ │ ├── right_shoulder_roll_link.STL
│ │ ├── right_shoulder_yaw_link.STL
│ │ ├── right_wrist_pitch_link.STL
│ │ ├── right_wrist_roll_link.STL
│ │ ├── right_wrist_roll_rubber_hand.STL
│ │ ├── right_wrist_yaw_link.STL
│ │ ├── torso_constraint_L_link.STL
│ │ ├── torso_constraint_L_rod_link.STL
│ │ ├── torso_constraint_R_link.STL
│ │ ├── torso_constraint_R_rod_link.STL
│ │ ├── torso_link.STL
│ │ ├── waist_constraint_L.STL
│ │ ├── waist_constraint_R.STL
│ │ ├── waist_roll_link.STL
│ │ ├── waist_support_link.STL
│ │ └── waist_yaw_link.STL
│ │ └── shabi.txt
└── setup.py
└── rsl_rl
├── .gitignore
├── LICENSE
├── README.md
├── licenses
└── dependencies
│ ├── numpy_license.txt
│ └── torch_license.txt
├── rsl_rl
├── __init__.py
├── algorithms
│ ├── __init__.py
│ ├── ppo.py
│ └── ppo_rma.py
├── env
│ ├── __init__.py
│ └── vec_env.py
├── modules
│ ├── __init__.py
│ ├── actor_critic.py
│ ├── actor_critic_recurrent.py
│ └── actor_critic_rma.py
├── runners
│ ├── __init__.py
│ ├── on_policy_runner.py
│ └── runner.py
├── storage
│ ├── __init__.py
│ ├── replay_buffer.py
│ └── rollout_storage.py
└── utils
│ ├── __init__.py
│ ├── init.py
│ ├── layers.py
│ ├── math.py
│ ├── running_mean_std.py
│ ├── scale.py
│ └── utils.py
└── setup.py
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [2025] [Xialin He, Runpei Dong, Zixuan Chen, Saurabh Gupta]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
Learning Getting-Up Policies for
Real-World Humanoid Robots
2 |
3 |
4 |
5 |
6 | Xialin He*,1
7 | |
8 | Runpei Dong*,1
9 | |
10 | Zixuan Chen2
11 | |
12 | Saurabh Gupta1
13 |
14 | 1University of Illinois Urbana-Champaign
15 |  
16 | 2Simon Fraser University
17 |
18 | * Equal Contribution
19 |
RSS 2025
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 | ## News
32 | - 🎉 Apr 2025: HumanUP has been accepted by RSS 2025
33 |
34 | ## HumanUP
35 | **[HumanUP](https://arxiv.org/abs/2502.12152)** is an RL learning framework for training humanoid robots to get up from supine (i.e., lying face up) or prone (i.e., lying face down) poses. This codebase is initially built for the code release of this **[HumanUP](https://arxiv.org/abs/2502.12152)** paper, which supports simulation training of **Unitree G1** humanoid robot. The simulation training is based on **Isaac Gym**.
36 |
37 | ## Installation
38 | See [installation instructions](./docs/install.md).
39 |
40 | ## Getting Started
41 | See [usage instructions](./simulation/README.md).
42 |
43 | ## Change Logs
44 | See [changelogs](./docs/changelog.md).
45 |
46 |
47 | ## Acknowledgements
48 | + We would like to thank all the authors in this project, this project cannot be finished without your efforts!
49 | + Our simulation environment implementation is based on [legged_gym](https://github.com/leggedrobotics/legged_gym), and the rl algorithm implementation is based on [rsl_rl](https://github.com/leggedrobotics/rsl_rl).
50 | + [Smooth-Humanoid-Locomotion](https://github.com/zixuan417/smooth-humanoid-locomotion) also provide lots of insights.
51 |
52 | ## Citation
53 | If you find this work useful, please consider citing:
54 | ```
55 | @article{humanup25,
56 | title={Learning Getting-Up Policies for Real-World Humanoid Robots},
57 | author={He, Xialin and Dong, Runpei and Chen, Zixuan and Gupta, Saurabh},
58 | journal={arXiv preprint arXiv:2502.12152},
59 | year={2025}
60 | }
61 | ```
62 |
63 |
--------------------------------------------------------------------------------
/docs/changelog.md:
--------------------------------------------------------------------------------
1 | # Changelog and Bugs
--------------------------------------------------------------------------------
/docs/deploy.md:
--------------------------------------------------------------------------------
1 | # Deployment Instructions
2 | This document provides instructions on how to deploy a trained policy on the real G1 humanoid robot.
3 |
4 | ## ROS1 (Noetic)
5 | We use ROS1 to depoly our policy successfully on both Ubuntu 20.04 and 22.04. Please install ROS1 following the [official instructions](https://wiki.ros.org/noetic/Installation/Ubuntu) when you are using Ubuntu 20.04. If you are using Ubuntu 22.04, please consider using [robostack](https://robostack.github.io/). Follow the [official instructions](https://robostack.github.io/GettingStarted.html) to install ROS1 with robostack.
--------------------------------------------------------------------------------
/docs/install.md:
--------------------------------------------------------------------------------
1 | # Installation Instruction
2 | This document provides the instructions on codebase installation. We recommend using [Anaconda](https://www.anaconda.com/) to simplify the process.
3 |
4 | ## Create a Conda Enviroment
5 | ```bash
6 | conda create -n humanup python=3.8
7 | conda activate humanup
8 | pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
9 | ```
10 | ## IsaacGym
11 | Download [IsaacGym Preview 4.0](https://developer.nvidia.com/isaac-gym) from [Google Drive](https://drive.google.com/file/d/1YEsZPtmdzQbSePX0WMhdf0565XwBaIFi/view?usp=sharing), then install it by running
12 | ```bash
13 | cd isaacgym/python && pip install -e .
14 | ```
15 | **Note:** NVIDIA preserves all rights of [IsaacGym](https://developer.nvidia.com/isaac-gym).
16 | After installing IsaacGym, please make sure it is working by running,
17 | ```bash
18 | # this example can only be ran with a monitor
19 | python examples/joint_monkey.py
20 | ```
21 |
22 | ## RSL RL
23 | Install `rsl_rl` by running
24 | ```bash
25 | cd ../../rsl_rl && pip install -e .
26 | ```
27 |
28 | ## Legged Gym and Other
29 | Install `legged_gym` and other dependencies
30 | ```bash
31 | cd ../legged_gym && pip install -e .
32 | pip install "numpy==1.23.0" pydelatin wandb tqdm opencv-python pymeshlab ipdb pyfqmr flask dill gdown hydra-core mujoco mujoco-python-viewer loguru
33 | pip install -r requirements.txt
34 | pip install imageio[ffmpeg]
35 | ```
36 | If you cannot install `imageio[ffmpeg]`, please run
37 | ```bash
38 | pip install imageio imageio-ffmpeg
39 | ```
40 | Next, please follow the running instruction to test a running.
--------------------------------------------------------------------------------
/poster.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/poster.gif
--------------------------------------------------------------------------------
/simulation/.gitignore:
--------------------------------------------------------------------------------
1 | isaacgym*
2 | .ipynb_checkpoints/
3 | .vscode/
4 | *img
5 | *__pycache__*
6 | fbx/
7 | cmu_fbx_all/
8 | *zip
9 | *ipynb
10 | npy
11 | pkl
12 | *.nfs*
--------------------------------------------------------------------------------
/simulation/README.md:
--------------------------------------------------------------------------------
1 | # Usage Instructions
2 | ## Training && Playing Policy
3 | First, please go to the scripts folder
4 | ``` bash
5 | cd legged_gym/legged_gym/scripts
6 | ```
7 | ### 1. Stage I Discovery Policy Training
8 | #### 1.1 Getting Up Policy
9 | - Training:
10 | ``` bash
11 | bash run.sh g1waist [your_exp_desc] [device]
12 | # e.g. bash run.sh g1waist stage1_get_up cuda:0
13 | ```
14 | - Evaluation:
15 | ``` bash
16 | bash eval.sh g1waist [your_exp_desc] [checkpoint]
17 | # e.g. bash eval.sh g1waist stage1_get_up -1
18 | ```
19 |
20 | #### 1.2 Rolling Over Policy
21 | - Training:
22 | ``` bash
23 | bash run.sh g1waistroll [your_exp_desc] [device]
24 | # bash run.sh g1waistroll stage1_roll_over cuda:0
25 | ```
26 | - Evaluation:
27 | ``` bash
28 | bash eval.sh g1waistroll [your_exp_desc] [checkpoint]
29 | # e.g. bash eval.sh g1waist stage1_roll_over -1
30 | ```
31 |
32 | For the main training args:
33 | + `--debug` disables wandb and sets the number of environments to 64, which is useful for debugging;
34 | + `--fix_action_std` fixes the action std, this is useful for stablizing training;
35 | + `--resume` indicates whether to resume from the previous experiment;
36 | + `--resumeid` specifies the exptid to resume from (if resume is set true);
37 |
38 | For the main evaluation args:
39 | + `--record_video` allows you to record video headlessly, this is useful for sever users;
40 | + `--checkpoint [int]` specifies the checkpoint to load, this is default set as -1, which is the latest one;
41 | + `--use_jit` use jit model to play;
42 | + `--teleop_mode` allows the user to control the robot with the keyboard;
43 |
44 |
45 | ### 2. Stage II Deployable Policy Training
46 | #### 2.1 Log the Stage I policy trajectory
47 | ```bash
48 | sh log.sh g1waistroll [your_exp_desc] [checkpoint] # getting up policy
49 | sh log.sh g1waist [your_exp_desc] [checkpoint] # rolling over policy
50 | ```
51 | Then, please put all trajectories under the `simulation/legged_gym/logs/env_logs`, the structure looks like:
52 | ```bash
53 | .
54 | └── env_logs
55 | ├── getup_traj
56 | │ ├── dof_pos_all.pkl
57 | │ └── head_height_all.pkl
58 | └── rollover_traj
59 | ├── dof_pos_all.pkl
60 | ├── head_height_all.pkl
61 | └── projected_gravity_all.pkl
62 | ```
63 |
64 | To help further development over our HumanUP, we provide our discovered trajectories on [Google Drive](https://drive.google.com/drive/folders/1kRSGkMDnqsX6OLr7-8OM5R6bF9mn84sK?usp=sharing). Feel free to download it to directly train Stage II policy.
65 |
66 | #### 2.2 Getting Up Tracking
67 | - Training:
68 | ``` bash
69 | bash run_track.sh g1waist [your_exp_desc] [device] [traj_name]
70 | # e.g. bash run_track.sh g1waist stage2_get_up cuda:0 getup_traj
71 | ```
72 | - Evaluation:
73 | ``` bash
74 | bash eval_track.sh g1waist [your_exp_desc] [checkpoint] [traj_name]
75 | # e.g. bash eval_track.sh g1waist stage2_get_up -1 getup_traj
76 | ```
77 |
78 | #### 2.3 Rolling Over Tracking
79 | - Training:
80 | ``` bash
81 | bash run_track.sh g1waistroll [your_exp_desc] [device] [traj_name]
82 | # bash run_track.sh g1waistroll stage2_roll_over cuda:0 rollover_traj
83 | ```
84 | - Evaluation:
85 | ``` bash
86 | bash eval_track.sh g1waistroll [your_exp_desc] [checkpoint] [traj_name]
87 | # e.g. bash eval_track.sh g1waist stage2_roll_over -1 rollover_traj
88 | ```
89 |
90 | ## 3. Save jit model
91 | ```bash
92 | # bash to_jit.sh g1waist [your_exp_desc] # e.g. bash to_jit.sh g1waist pretrained_exp
93 | python save_jit.py --proj_name g1waist_track --exptid [your_exp_desc] --checkpoint [checkpoint] --robot g1
94 | # e.g. python save_jit.py --proj_name g1waist_track --exptid stage2_get_up --checkpoint -1 --robot g1
95 | ```
96 |
97 | You can specify which checkpoint exactly to save by adding `--checkpoint [int]` to the command, this is default set as -1, which is the latest one.
98 |
99 | You can display the jit policy by adding `--use_jit` in the eval script.
100 |
101 | # Notes
102 | There are some useful notes:
103 |
104 | ## Simulation Frequency
105 | The simulation frequency has a huge impact on the performance of the policy. Most existing codebases for humanoid robots or quadruped robots use a sim frequency of 200Hz. This is enough for locomotion tasks like walking. For the getting up policy learning, we use a higher frequency of 1k Hz (`dt=0.001`). Although you can train a reasonable policy in simulation under 200Hz, but it will not work in the real world.
106 |
107 | ## Collision Mesh
108 | For the G1 humanoid robots, we have customized the original G1's collision mesh to simplified and modified collision mesh so that we can accelerate training and improve Sim2Real performance.
109 | - **[g1_29dof_fixedwrist_custom_collision.urdf](./legged_gym/resources/robots/g1_modified/g1_29dof_fixedwrist_custom_collision.urdf)**:
110 | Simplified collision mesh, 23 DoFs G1 with wrists' DoFs removed.
111 | - **[g1_29dof_fixedwrist_custom_collision_with_head.urdf](./legged_gym/resources/robots/g1_modified/g1_29dof_fixedwrist_custom_collision_with_head.urdf)**:
112 | Simplified collision mesh with the head (better for training rolling over), 23 DoFs G1 with wrists' DoFs removed.
113 | - **[g1_29dof_fixedwrist_custom_collision.urdf](./legged_gym/resources/robots/g1_modified/g1_29dof_fixedwrist_full_collision.urdf)**:
114 | Full collision mesh, 23 DoFs G1 with wrists' DoFs removed.
--------------------------------------------------------------------------------
/simulation/legged_gym/.gitignore:
--------------------------------------------------------------------------------
1 | # These are some examples of commonly ignored file patterns.
2 | # You should customize this list as applicable to your project.
3 | # Learn more about .gitignore:
4 | # https://www.atlassian.com/git/tutorials/saving-changes/gitignore
5 |
6 | # Node artifact files
7 | node_modules/
8 | dist/
9 |
10 | wandb/
11 |
12 | # Compiled Java class files
13 | *.class
14 |
15 | # Compiled Python bytecode
16 | *.py[cod]
17 |
18 | # Log files
19 | *.log
20 |
21 | # Package files
22 | *.jar
23 |
24 | # Maven
25 | target/
26 | dist/
27 |
28 | # JetBrains IDE
29 | .idea/
30 |
31 | # Unit test reports
32 | TEST*.xml
33 |
34 | # Generated by MacOS
35 | .DS_Store
36 |
37 | # Generated by Windows
38 | Thumbs.db
39 |
40 | # Applications
41 | *.app
42 | *.exe
43 | *.war
44 |
45 | # Large media files
46 | *.mp4
47 | *.tiff
48 | *.avi
49 | *.flv
50 | *.mov
51 | *.wmv
52 |
53 | # VS Code
54 | .vscode
55 | # logs
56 | logs
57 | runs
58 |
59 | # other
60 | *.egg-info
61 | __pycache__
62 |
63 | data
64 | evaluate
65 | debug
--------------------------------------------------------------------------------
/simulation/legged_gym/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2021, ETH Zurich, Nikita Rudin
2 | Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES
3 | All rights reserved.
4 |
5 | Redistribution and use in source and binary forms, with or without modification,
6 | are permitted provided that the following conditions are met:
7 |
8 | 1. Redistributions of source code must retain the above copyright notice,
9 | this list of conditions and the following disclaimer.
10 |
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 | this list of conditions and the following disclaimer in the documentation
13 | and/or other materials provided with the distribution.
14 |
15 | 3. Neither the name of the copyright holder nor the names of its contributors
16 | may be used to endorse or promote products derived from this software without
17 | specific prior written permission.
18 |
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
23 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |
30 | See licenses/assets for license information for assets included in this repository.
31 | See licenses/dependencies for license information of dependencies of this package.
32 |
--------------------------------------------------------------------------------
/simulation/legged_gym/legged_gym/.gitignore:
--------------------------------------------------------------------------------
1 | run/*
2 | a.py
--------------------------------------------------------------------------------
/simulation/legged_gym/legged_gym/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: BSD-3-Clause
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 |
31 | import os
32 |
33 | LEGGED_GYM_ROOT_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
34 | LEGGED_GYM_ENVS_DIR = os.path.join(LEGGED_GYM_ROOT_DIR, 'legged_gym', 'envs')
35 | POSE_DIR = os.path.abspath(os.path.join(LEGGED_GYM_ROOT_DIR, '../pose'))
36 |
--------------------------------------------------------------------------------
/simulation/legged_gym/legged_gym/envs/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # This file was modified by HumanUP authors in 2024-2025
3 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | # SPDX-FileCopyrightText: # Copyright (c) 2021 ETH Zurich, Nikita Rudin. All rights reserved.
5 | # SPDX-License-Identifier: BSD-3-Clause
6 | #
7 | # Redistribution and use in source and binary forms, with or without
8 | # modification, are permitted provided that the following conditions are met:
9 | #
10 | # 1. Redistributions of source code must retain the above copyright notice, this
11 | # list of conditions and the following disclaimer.
12 | #
13 | # 2. Redistributions in binary form must reproduce the above copyright notice,
14 | # this list of conditions and the following disclaimer in the documentation
15 | # and/or other materials provided with the distribution.
16 | #
17 | # 3. Neither the name of the copyright holder nor the names of its
18 | # contributors may be used to endorse or promote products derived from
19 | # this software without specific prior written permission.
20 | #
21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | #
32 | # Copyright (c) 2024-2025 RoboVision Lab, UIUC. All rights reserved.
33 |
34 | from legged_gym import LEGGED_GYM_ROOT_DIR, LEGGED_GYM_ENVS_DIR
35 | from .base.legged_robot import LeggedRobot
36 |
37 | from .base.humanoid import Humanoid
38 |
39 | # G1 with waist dof
40 | from .g1waist.g1waist_up_config import G1WaistHumanUPCfg, G1WaistHumanUPCfgPPO
41 | from .g1waist.g1waist_up import G1WaistHumanUP
42 | from .g1waistroll.g1waistroll_up_config import G1WaistRollHumanUPCfg, G1WaistRollHumanUPCfgPPO
43 | from .g1waistroll.g1waistroll_up import G1WaistRollHumanUP
44 |
45 | from .g1track.g1waist_track_config import G1WaistTrackCfg, G1WaistTrackCfgPPO
46 | from .g1track.g1waist_track import G1WaistTrack
47 |
48 | from .g1rolltrack.g1waistroll_track_config import G1WaistRollTrackCfg, G1WaistRollTrackCfgPPO
49 | from .g1rolltrack.g1waistroll_track import G1WaistRollTrack
50 |
51 | from legged_gym.gym_utils.task_registry import task_registry
52 |
53 | # ======================= environment registration =======================
54 |
55 | task_registry.register("g1waist_up", G1WaistHumanUP, G1WaistHumanUPCfg(), G1WaistHumanUPCfgPPO())
56 |
57 | task_registry.register("g1waist_track", G1WaistTrack, G1WaistTrackCfg(), G1WaistTrackCfgPPO())
58 |
59 | task_registry.register("g1waistroll_up", G1WaistRollHumanUP, G1WaistRollHumanUPCfg(), G1WaistRollHumanUPCfgPPO())
60 |
61 | task_registry.register("g1waistroll_track", G1WaistRollTrack, G1WaistRollTrackCfg(), G1WaistRollTrackCfgPPO())
--------------------------------------------------------------------------------
/simulation/legged_gym/legged_gym/envs/base/base_config.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: BSD-3-Clause
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 |
31 | import inspect
32 |
33 | class BaseConfig:
34 | def __init__(self) -> None:
35 | """ Initializes all member classes recursively. Ignores all namse starting with '__' (buit-in methods)."""
36 | self.init_member_classes(self)
37 |
38 | @staticmethod
39 | def init_member_classes(obj):
40 | # iterate over all attributes names
41 | for key in dir(obj):
42 | # disregard builtin attributes
43 | # if key.startswith("__"):
44 | if key=="__class__":
45 | continue
46 | # get the corresponding attribute object
47 | var = getattr(obj, key)
48 | # check if it the attribute is a class
49 | if inspect.isclass(var):
50 | # instantate the class
51 | i_var = var()
52 | # set the attribute to the instance instead of the type
53 | setattr(obj, key, i_var)
54 | # recursively init members of the attribute
55 | BaseConfig.init_member_classes(i_var)
56 |
--------------------------------------------------------------------------------
/simulation/legged_gym/legged_gym/envs/base/base_task.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # This file was modified by HumanUP authors in 2024-2025
3 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | # SPDX-FileCopyrightText: # Copyright (c) 2021 ETH Zurich, Nikita Rudin. All rights reserved.
5 | # SPDX-License-Identifier: BSD-3-Clause
6 | #
7 | # Redistribution and use in source and binary forms, with or without
8 | # modification, are permitted provided that the following conditions are met:
9 | #
10 | # 1. Redistributions of source code must retain the above copyright notice, this
11 | # list of conditions and the following disclaimer.
12 | #
13 | # 2. Redistributions in binary form must reproduce the above copyright notice,
14 | # this list of conditions and the following disclaimer in the documentation
15 | # and/or other materials provided with the distribution.
16 | #
17 | # 3. Neither the name of the copyright holder nor the names of its
18 | # contributors may be used to endorse or promote products derived from
19 | # this software without specific prior written permission.
20 | #
21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | #
32 | # Copyright (c) 2024-2025 RoboVision Lab, UIUC. All rights reserved.
33 |
34 | import sys
35 | from isaacgym import gymapi
36 | from isaacgym import gymutil, gymtorch
37 | import numpy as np
38 | import torch
39 | import time
40 |
41 | # Base class for RL tasks
42 | class BaseTask():
43 |
44 | def __init__(self, cfg, sim_params, physics_engine, sim_device, headless):
45 | self.gym = gymapi.acquire_gym()
46 |
47 | self.sim_params = sim_params
48 | self.physics_engine = physics_engine
49 | self.sim_device = sim_device
50 | sim_device_type, self.sim_device_id = gymutil.parse_device_str(self.sim_device)
51 | self.headless = headless
52 |
53 | # env device is GPU only if sim is on GPU and use_gpu_pipeline=True, otherwise returned tensors are copied to CPU by physX.
54 | if sim_device_type=='cuda' and sim_params.use_gpu_pipeline:
55 | self.device = self.sim_device
56 | else:
57 | self.device = 'cpu'
58 |
59 | # graphics device for rendering, -1 for no rendering
60 | self.graphics_device_id = self.sim_device_id
61 | if self.headless == True:
62 | self.graphics_device_id = -1
63 |
64 | self.num_envs = cfg.env.num_envs
65 | self.num_obs = cfg.env.num_observations
66 | self.num_privileged_obs = cfg.env.num_privileged_obs
67 | self.num_actions = cfg.env.num_actions
68 |
69 | # optimization flags for pytorch JIT
70 | torch._C._jit_set_profiling_mode(False)
71 | torch._C._jit_set_profiling_executor(False)
72 |
73 | # allocate buffers
74 | self.obs_buf = torch.zeros(self.num_envs, self.num_obs, device=self.device, dtype=torch.float)
75 | self.rew_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.float)
76 | self.reset_buf = torch.ones(self.num_envs, device=self.device, dtype=torch.long)
77 | self.episode_length_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.long)
78 | self.time_out_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.bool)
79 | if self.num_privileged_obs is not None:
80 | self.privileged_obs_buf = torch.zeros(self.num_envs, self.num_privileged_obs, device=self.device, dtype=torch.float)
81 | else:
82 | self.privileged_obs_buf = None
83 | # self.num_privileged_obs = self.num_obs
84 |
85 | self.extras = {}
86 |
87 | # create envs, sim and viewer
88 | self.create_sim()
89 | self.gym.prepare_sim(self.sim)
90 |
91 | # todo: read from config
92 | self.enable_viewer_sync = True
93 | self.viewer = None
94 |
95 | # if running with a viewer, set up keyboard shortcuts and camera
96 | if self.headless == False:
97 | # subscribe to keyboard shortcuts
98 | self.viewer = self.gym.create_viewer(
99 | self.sim, gymapi.CameraProperties())
100 | self.gym.subscribe_viewer_keyboard_event(
101 | self.viewer, gymapi.KEY_ESCAPE, "QUIT")
102 | self.gym.subscribe_viewer_keyboard_event(
103 | self.viewer, gymapi.KEY_V, "toggle_viewer_sync")
104 | self.gym.subscribe_viewer_keyboard_event(
105 | self.viewer, gymapi.KEY_F, "free_cam")
106 | for i in range(9):
107 | self.gym.subscribe_viewer_keyboard_event(
108 | self.viewer, getattr(gymapi, "KEY_"+str(i)), "lookat"+str(i))
109 | self.gym.subscribe_viewer_keyboard_event(
110 | self.viewer, gymapi.KEY_LEFT_BRACKET, "prev_id")
111 | self.gym.subscribe_viewer_keyboard_event(
112 | self.viewer, gymapi.KEY_RIGHT_BRACKET, "next_id")
113 | self.gym.subscribe_viewer_keyboard_event(
114 | self.viewer, gymapi.KEY_SPACE, "pause")
115 | self.gym.subscribe_viewer_keyboard_event(
116 | self.viewer, gymapi.KEY_W, "vx_plus")
117 | self.gym.subscribe_viewer_keyboard_event(
118 | self.viewer, gymapi.KEY_S, "vx_minus")
119 | self.gym.subscribe_viewer_keyboard_event(
120 | self.viewer, gymapi.KEY_A, "left_turn")
121 | self.gym.subscribe_viewer_keyboard_event(
122 | self.viewer, gymapi.KEY_D, "right_turn")
123 | self.gym.subscribe_viewer_keyboard_event(
124 | self.viewer, gymapi.KEY_MINUS, "prev_motion")
125 | self.gym.subscribe_viewer_keyboard_event(
126 | self.viewer, gymapi.KEY_EQUAL, "next_motion")
127 | self.free_cam = False
128 | self.lookat_id = 0
129 | self.lookat_vec = torch.tensor([-0, 2, 1], requires_grad=False, device=self.device)
130 | self.button_pressed = False
131 |
132 | def get_observations(self):
133 | return self.obs_buf
134 |
135 | def get_privileged_observations(self):
136 | return self.privileged_obs_buf
137 |
138 | def reset_idx(self, env_ids):
139 | """Reset selected robots"""
140 | raise NotImplementedError
141 |
142 | def reset(self):
143 | """ Reset all robots"""
144 | self.reset_idx(torch.arange(self.num_envs, device=self.device))
145 | obs, privileged_obs, _, _, _ = self.step(torch.zeros(self.num_envs, self.num_actions, device=self.device, requires_grad=False))
146 | return obs, privileged_obs
147 |
148 | def step(self, actions):
149 | raise NotImplementedError
150 |
151 | def lookat(self, i):
152 | look_at_pos = self.root_states[i, :3].clone()
153 | cam_pos = look_at_pos + self.lookat_vec
154 | self.set_camera(cam_pos, look_at_pos)
155 |
156 | def render(self, sync_frame_time=True):
157 | if self.viewer:
158 | # check for window closed
159 | if self.gym.query_viewer_has_closed(self.viewer):
160 | sys.exit()
161 |
162 | # check for keyboard events
163 | for evt in self.gym.query_viewer_action_events(self.viewer):
164 | if evt.action == "QUIT" and evt.value > 0:
165 | sys.exit()
166 | elif evt.action == "toggle_viewer_sync" and evt.value > 0:
167 | self.enable_viewer_sync = not self.enable_viewer_sync
168 |
169 | # fetch results
170 | if self.device != 'cpu':
171 | self.gym.fetch_results(self.sim, True)
172 |
173 | # step graphics
174 | if self.enable_viewer_sync:
175 | self.gym.step_graphics(self.sim)
176 | self.gym.draw_viewer(self.viewer, self.sim, True)
177 | if sync_frame_time:
178 | self.gym.sync_frame_time(self.sim)
179 | else:
180 | self.gym.poll_viewer_events(self.viewer)
181 |
182 | # def render(self, sync_frame_time=True):
183 | # if self.viewer:
184 | # # check for window closed
185 | # if self.gym.query_viewer_has_closed(self.viewer):
186 | # sys.exit()
187 | # if not self.free_cam:
188 | # self.lookat(self.lookat_id)
189 | # # check for keyboard events
190 | # evt_count = 0
191 | # for evt in self.gym.query_viewer_action_events(self.viewer):
192 | # if evt.action == "QUIT" and evt.value > 0:
193 | # sys.exit()
194 | # elif evt.action == "toggle_viewer_sync" and evt.value > 0:
195 | # self.enable_viewer_sync = not self.enable_viewer_sync
196 |
197 | # if not self.free_cam:
198 | # for i in range(9):
199 | # if evt.action == "lookat" + str(i) and evt.value > 0:
200 | # self.lookat(i)
201 | # self.lookat_id = i
202 | # if evt.action == "prev_id" and evt.value > 0:
203 | # self.lookat_id = (self.lookat_id-1) % self.num_envs
204 | # self.lookat(self.lookat_id)
205 | # if evt.action == "next_id" and evt.value > 0:
206 | # self.lookat_id = (self.lookat_id+1) % self.num_envs
207 | # self.lookat(self.lookat_id)
208 | # if evt.action == "vx_plus" and evt.value > 0:
209 | # self.commands[self.lookat_id, 0] += 0.1
210 | # if evt.action == "vx_minus" and evt.value > 0:
211 | # self.commands[self.lookat_id, 0] -= 0.1
212 | # if evt.action == "left_turn" and evt.value > 0:
213 | # self.commands[self.lookat_id, 2] -= 0.05
214 | # if evt.action == "right_turn" and evt.value > 0:
215 | # self.commands[self.lookat_id, 2] += 0.05
216 | # if evt.action == "next_motion" and evt.value > 0:
217 | # self._motion_ids[self.lookat_id] = (self._motion_ids[self.lookat_id] + 1) % self._motion_lib.num_motions()
218 | # self.update_motion_ids([self.lookat_id])
219 | # if evt.action == "prev_motion" and evt.value > 0:
220 | # self._motion_ids[self.lookat_id] = (self._motion_ids[self.lookat_id] - 1) % self._motion_lib.num_motions()
221 | # self.update_motion_ids([self.lookat_id])
222 | # if evt.action == "free_cam" and evt.value > 0:
223 | # self.free_cam = not self.free_cam
224 | # if self.free_cam:
225 | # self.set_camera(self.cfg.viewer.pos, self.cfg.viewer.lookat)
226 |
227 | # if evt.action == "pause" and evt.value > 0:
228 | # self.pause = True
229 | # while self.pause:
230 | # time.sleep(0.1)
231 | # self.gym.draw_viewer(self.viewer, self.sim, True)
232 | # for evt in self.gym.query_viewer_action_events(self.viewer):
233 | # if evt.action == "pause" and evt.value > 0:
234 | # self.pause = False
235 | # if self.gym.query_viewer_has_closed(self.viewer):
236 | # sys.exit()
237 | # if evt.value > 0:
238 | # evt_count += 1
239 | # self.button_pressed = True if evt_count > 0 else False
240 |
241 | # # fetch results
242 | # if self.device != 'cpu':
243 | # self.gym.fetch_results(self.sim, True)
244 |
245 | # self.gym.poll_viewer_events(self.viewer)
246 | # # step graphics
247 | # if self.enable_viewer_sync:
248 | # self.gym.step_graphics(self.sim)
249 | # self.gym.draw_viewer(self.viewer, self.sim, True)
250 | # if sync_frame_time:
251 | # self.gym.sync_frame_time(self.sim)
252 | # else:
253 | # self.gym.poll_viewer_events(self.viewer)
254 |
255 | # if not self.free_cam:
256 | # p = self.gym.get_viewer_camera_transform(self.viewer, None).p
257 | # cam_trans = torch.tensor([p.x, p.y, p.z], requires_grad=False, device=self.device)
258 | # look_at_pos = self.root_states[self.lookat_id, :3].clone()
259 | # self.lookat_vec = cam_trans - look_at_pos
260 |
--------------------------------------------------------------------------------
/simulation/legged_gym/legged_gym/gym_utils/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: BSD-3-Clause
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 |
31 | from .helpers import class_to_dict, get_load_path, get_args, export_policy_as_jit, set_seed, update_class_from_dict
32 | from .task_registry import task_registry
33 | from .logger import Logger
34 | from .math import *
35 | from .terrain import Terrain
36 |
--------------------------------------------------------------------------------
/simulation/legged_gym/legged_gym/gym_utils/logger.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: BSD-3-Clause
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 |
31 | import matplotlib.pyplot as plt
32 | import numpy as np
33 | from collections import defaultdict
34 | from multiprocessing import Process, Value
35 |
36 | class Logger:
37 | def __init__(self, dt):
38 | self.state_log = defaultdict(list)
39 | self.rew_log = defaultdict(list)
40 | self.dt = dt
41 | self.num_episodes = 0
42 | self.plot_process = None
43 |
44 | def log_state(self, key, value):
45 | self.state_log[key].append(value)
46 |
47 | def log_states(self, dict):
48 | for key, value in dict.items():
49 | self.log_state(key, value)
50 |
51 | def log_rewards(self, dict, num_episodes):
52 | for key, value in dict.items():
53 | if 'rew' in key:
54 | self.rew_log[key].append(value.item() * num_episodes)
55 | self.num_episodes += num_episodes
56 |
57 | def reset(self):
58 | self.state_log.clear()
59 | self.rew_log.clear()
60 |
61 | def plot_states(self):
62 | self.plot_process = Process(target=self._plot)
63 | self.plot_process.start()
64 |
65 | def _plot(self):
66 | nb_rows = 3
67 | nb_cols = 3
68 | fig, axs = plt.subplots(nb_rows, nb_cols)
69 | for key, value in self.state_log.items():
70 | time = np.linspace(0, len(value)*self.dt, len(value))
71 | break
72 | log= self.state_log
73 | # plot joint targets and measured positions
74 | a = axs[1, 0]
75 | if log["dof_pos"]: a.plot(time, log["dof_pos"], label='measured')
76 | if log["dof_pos_target"]: a.plot(time, log["dof_pos_target"], label='target')
77 | a.set(xlabel='time [s]', ylabel='Position [rad]', title='DOF Position')
78 | a.legend()
79 | # plot joint velocity
80 | a = axs[1, 1]
81 | if log["dof_vel"]: a.plot(time, log["dof_vel"], label='measured')
82 | if log["dof_vel_target"]: a.plot(time, log["dof_vel_target"], label='target')
83 | a.set(xlabel='time [s]', ylabel='Velocity [rad/s]', title='Joint Velocity')
84 | a.legend()
85 | # plot base vel x
86 | a = axs[0, 0]
87 | if log["base_vel_x"]: a.plot(time, log["base_vel_x"], label='measured')
88 | if log["command_x"]: a.plot(time, log["command_x"], label='commanded')
89 | a.set(xlabel='time [s]', ylabel='base lin vel [m/s]', title='Base velocity x')
90 | a.legend()
91 | # plot base vel y
92 | a = axs[0, 1]
93 | if log["base_vel_y"]: a.plot(time, log["base_vel_y"], label='measured')
94 | if log["command_y"]: a.plot(time, log["command_y"], label='commanded')
95 | a.set(xlabel='time [s]', ylabel='base lin vel [m/s]', title='Base velocity y')
96 | a.legend()
97 | # plot base vel yaw
98 | a = axs[0, 2]
99 | if log["base_vel_yaw"]: a.plot(time, log["base_vel_yaw"], label='measured')
100 | if log["command_yaw"]: a.plot(time, log["command_yaw"], label='commanded')
101 | a.set(xlabel='time [s]', ylabel='base ang vel [rad/s]', title='Base velocity yaw')
102 | a.legend()
103 | # plot base vel z
104 | a = axs[1, 2]
105 | if log["base_vel_z"]: a.plot(time, log["base_vel_z"], label='measured')
106 | a.set(xlabel='time [s]', ylabel='base lin vel [m/s]', title='Base velocity z')
107 | a.legend()
108 | # plot contact forces
109 | a = axs[2, 0]
110 | if log["contact_forces_z"]:
111 | forces = np.array(log["contact_forces_z"])
112 | for i in range(forces.shape[1]):
113 | a.plot(time, forces[:, i], label=f'force {i}')
114 | a.set(xlabel='time [s]', ylabel='Forces z [N]', title='Vertical Contact forces')
115 | a.legend()
116 | # plot torque/vel curves
117 | a = axs[2, 1]
118 | if log["dof_vel"]!=[] and log["dof_torque"]!=[]: a.plot(log["dof_vel"], log["dof_torque"], 'x', label='measured')
119 | a.set(xlabel='Joint vel [rad/s]', ylabel='Joint Torque [Nm]', title='Torque/velocity curves')
120 | a.legend()
121 | # plot torques
122 | a = axs[2, 2]
123 | if log["dof_torque"]!=[]: a.plot(time, log["dof_torque"], label='measured')
124 | a.set(xlabel='time [s]', ylabel='Joint Torque [Nm]', title='Torque')
125 | a.legend()
126 | plt.show()
127 |
128 | def print_rewards(self):
129 | print("Average rewards per second:")
130 | for key, values in self.rew_log.items():
131 | mean = np.sum(np.array(values)) / self.num_episodes
132 | print(f" - {key}: {mean}")
133 | print(f"Total number of episodes: {self.num_episodes}")
134 |
135 | def __del__(self):
136 | if self.plot_process is not None:
137 | self.plot_process.kill()
--------------------------------------------------------------------------------
/simulation/legged_gym/legged_gym/gym_utils/math.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: BSD-3-Clause
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 |
31 | import torch
32 | from torch import Tensor
33 | import numpy as np
34 | from isaacgym.torch_utils import quat_apply, normalize
35 | from typing import Tuple
36 |
37 | # @ torch.jit.script
38 | def quat_apply_yaw(quat, vec):
39 | quat_yaw = quat.clone().view(-1, 4)
40 | quat_yaw[:, :2] = 0.
41 | quat_yaw = normalize(quat_yaw)
42 | return quat_apply(quat_yaw, vec)
43 |
44 | # @ torch.jit.script
45 | def wrap_to_pi(angles):
46 | angles %= 2*np.pi
47 | angles -= 2*np.pi * (angles > np.pi)
48 | return angles
49 |
50 | # @ torch.jit.script
51 | def torch_rand_sqrt_float(lower, upper, shape, device):
52 | # type: (float, float, Tuple[int, int], str) -> Tensor
53 | r = 2*torch.rand(*shape, device=device) - 1
54 | r = torch.where(r<0., -torch.sqrt(-r), torch.sqrt(r))
55 | r = (r + 1.) / 2.
56 | return (upper - lower) * r + lower
57 |
58 | # @ torch.jit.script
59 | def torch_rand_int(lower, upper, shape, device):
60 | # type: (float, float, Tuple[int, int], str) -> Tensor
61 | return ((upper - lower) * torch.rand(*shape, device=device).squeeze(1) + lower).long().float()
62 |
63 | def sample_unit_vector(n, dim, device):
64 | tensor = torch.randn(n, dim, device=device)
65 | unit_vector = tensor / torch.norm(tensor, dim=-1, keepdim=True)
66 | return unit_vector
--------------------------------------------------------------------------------
/simulation/legged_gym/legged_gym/gym_utils/motor_delay_fft.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 |
5 | class MotorDelay_80(nn.Module):
6 | def __init__(self, num_envs, num_actions, device="cuda:0"):
7 | super(MotorDelay_80, self).__init__()
8 | self.a = 1.2766
9 | self.b = 12.13208
10 | # self.alpha = 1.0
11 | self.alpha = torch.exp(torch.tensor([-1 / self.b]).to(device))
12 | self.beta = self.a / self.b
13 | # self.y_pre = 0.0
14 | self.y_pre = torch.zeros(num_envs, num_actions, dtype = torch.float, device=device)
15 |
16 |
17 | def forward(self, x):
18 | if x.dim() ==1:
19 | x = x.unsqueeze(1)
20 |
21 | # if self.y_pre is None:
22 | # self.y_pre = torch.zeros(x.size(0), x.size(1), dtype = x.dtype, device=x.device)
23 |
24 | y = self.alpha * self.y_pre + self.beta * x
25 | self.y_pre = y
26 | return y
27 |
28 | def reset(self, env_idx):
29 | self.y_pre[env_idx] = 0
30 |
31 |
32 | class MotorDelay_130(nn.Module):
33 | def __init__(self, num_envs, num_actions, device="cuda:0"):
34 | super(MotorDelay_130, self).__init__()
35 | self.a = 0.91
36 | self.b = 11.28
37 | # self.alpha = 1.0
38 | self.alpha = torch.exp(torch.tensor([-1 / self.b]).to(device))
39 | self.beta = self.a / self.b
40 | # self.y_pre = 0.0
41 | self.y_pre = torch.zeros(num_envs, num_actions, dtype = torch.float, device=device)
42 |
43 |
44 | def forward(self, x):
45 | if x.dim() ==1:
46 | x = x.unsqueeze(1)
47 |
48 | # if self.y_pre is None:
49 | # self.y_pre = torch.zeros(x.size(0), x.size(1), dtype = x.dtype, device=x.device)
50 |
51 | y = self.alpha * self.y_pre + self.beta * x
52 | self.y_pre = y
53 | return y
54 |
55 | def reset(self, env_idx):
56 | self.y_pre[env_idx] = 0
57 |
--------------------------------------------------------------------------------
/simulation/legged_gym/legged_gym/gym_utils/storage.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.utils.data.sampler import BatchSampler, SubsetRandomSampler
3 |
4 | class ObsStorage:
5 | def __init__(self, num_envs, num_transitions_per_env, obs_shape, action_shape, device):
6 | self.device = device
7 |
8 | # Core
9 | self.obs = torch.zeros(num_transitions_per_env, num_envs, *obs_shape).to(self.device)
10 | self.expert = torch.zeros(num_transitions_per_env, num_envs, *action_shape).to(self.device)
11 | self.device = device
12 |
13 | self.num_envs = num_envs
14 | self.num_transitions_per_env = num_transitions_per_env
15 | self.step = 0
16 |
17 | def add_obs(self, obs, expert_action):
18 | if self.step >= self.num_transitions_per_env:
19 | raise AssertionError("Rollout buffer overflow")
20 | self.obs[self.step].copy_(torch.from_numpy(obs).to(self.device))
21 | self.expert[self.step].copy_(expert_action)
22 | self.step += 1
23 |
24 | def clear(self):
25 | self.step = 0
26 |
27 | def mini_batch_generator_shuffle(self, num_mini_batches):
28 | batch_size = self.num_envs * self.num_transitions_per_env
29 | mini_batch_size = batch_size // num_mini_batches
30 |
31 | for indices in BatchSampler(SubsetRandomSampler(range(batch_size)), mini_batch_size, drop_last=True):
32 | obs_batch = self.obs.view(-1, *self.obs.size()[2:])[indices]
33 | expert_action_batch = self.expert.view(-1, *self.expert.size()[2:])[indices]
34 | yield obs_batch, expert_actions_batch
35 |
36 | def mini_batch_generator_inorder(self, num_mini_batches):
37 | batch_size = self.num_envs * self.num_transitions_per_env
38 | mini_batch_size = batch_size // num_mini_batches
39 |
40 | for batch_id in range(num_mini_batches):
41 | yield self.obs.view(-1, *self.obs.size()[2:])[batch_id*mini_batch_size:(batch_id+1)*mini_batch_size], \
42 | self.expert.view(-1, *self.expert.size()[2:])[batch_id*mini_batch_size:(batch_id+1)*mini_batch_size]
43 |
44 | class RolloutStorage:
45 | def __init__(self, num_envs, num_transitions_per_env, actor_obs_shape, critic_obs_shape, actions_shape, device):
46 | self.device = device
47 |
48 | # Core
49 | self.critic_obs = torch.zeros(num_transitions_per_env, num_envs, *actor_obs_shape).to(self.device)
50 | self.actor_obs = torch.zeros(num_transitions_per_env, num_envs, *critic_obs_shape).to(self.device)
51 | self.rewards = torch.zeros(num_transitions_per_env, num_envs, 1).to(self.device)
52 | self.actions = torch.zeros(num_transitions_per_env, num_envs, *actions_shape).to(self.device)
53 | self.dones = torch.zeros(num_transitions_per_env, num_envs, 1).byte().to(self.device)
54 |
55 | # For PPO
56 | self.actions_log_prob = torch.zeros(num_transitions_per_env, num_envs, 1).to(self.device)
57 | self.values = torch.zeros(num_transitions_per_env, num_envs, 1).to(self.device)
58 | self.returns = torch.zeros(num_transitions_per_env, num_envs, 1).to(self.device)
59 | self.advantages = torch.zeros(num_transitions_per_env, num_envs, 1).to(self.device)
60 |
61 | self.num_transitions_per_env = num_transitions_per_env
62 | self.num_envs = num_envs
63 | self.device = device
64 |
65 | self.step = 0
66 |
67 | def add_transitions(self, actor_obs, critic_obs, actions, rewards, dones, values, actions_log_prob):
68 | if self.step >= self.num_transitions_per_env:
69 | raise AssertionError("Rollout buffer overflow")
70 | self.critic_obs[self.step].copy_(torch.from_numpy(critic_obs).to(self.device))
71 | self.actor_obs[self.step].copy_(torch.from_numpy(actor_obs).to(self.device))
72 | self.actions[self.step].copy_(actions.to(self.device))
73 | self.rewards[self.step].copy_(torch.from_numpy(rewards).view(-1, 1).to(self.device))
74 | self.dones[self.step].copy_(torch.from_numpy(dones).view(-1, 1).to(self.device))
75 | self.values[self.step].copy_(values.to(self.device))
76 | self.actions_log_prob[self.step].copy_(actions_log_prob.view(-1, 1).to(self.device))
77 | self.step += 1
78 |
79 | def clear(self):
80 | self.step = 0
81 |
82 | def compute_returns(self, last_values, gamma, lam):
83 | advantage = 0
84 | for step in reversed(range(self.num_transitions_per_env)):
85 | if step == self.num_transitions_per_env - 1:
86 | next_values = last_values
87 | # next_is_not_terminal = 1.0 - self.dones[step].float()
88 | else:
89 | next_values = self.values[step + 1]
90 | # next_is_not_terminal = 1.0 - self.dones[step+1].float()
91 |
92 | next_is_not_terminal = 1.0 - self.dones[step].float()
93 | delta = self.rewards[step] + next_is_not_terminal * gamma * next_values - self.values[step]
94 | advantage = delta + next_is_not_terminal * gamma * lam * advantage
95 | self.returns[step] = advantage + self.values[step]
96 |
97 | # Compute and normalize the advantages
98 | self.advantages = self.returns - self.values
99 | self.advantages = (self.advantages - self.advantages.mean()) / (self.advantages.std() + 1e-8)
100 |
101 | def mini_batch_generator_shuffle(self, num_mini_batches):
102 | batch_size = self.num_envs * self.num_transitions_per_env
103 | mini_batch_size = batch_size // num_mini_batches
104 |
105 | for indices in BatchSampler(SubsetRandomSampler(range(batch_size)), mini_batch_size, drop_last=True):
106 | actor_obs_batch = self.actor_obs.view(-1, *self.actor_obs.size()[2:])[indices]
107 | critic_obs_batch = self.critic_obs.view(-1, *self.critic_obs.size()[2:])[indices]
108 | actions_batch = self.actions.view(-1, self.actions.size(-1))[indices]
109 | values_batch = self.values.view(-1, 1)[indices]
110 | returns_batch = self.returns.view(-1, 1)[indices]
111 | old_actions_log_prob_batch = self.actions_log_prob.view(-1, 1)[indices]
112 | advantages_batch = self.advantages.view(-1, 1)[indices]
113 | yield actor_obs_batch, critic_obs_batch, actions_batch, values_batch, advantages_batch, returns_batch, old_actions_log_prob_batch
114 |
115 | def mini_batch_generator_inorder(self, num_mini_batches):
116 | batch_size = self.num_envs * self.num_transitions_per_env
117 | mini_batch_size = batch_size // num_mini_batches
118 |
119 | for batch_id in range(num_mini_batches):
120 | yield self.actor_obs.view(-1, *self.actor_obs.size()[2:])[batch_id*mini_batch_size:(batch_id+1)*mini_batch_size], \
121 | self.critic_obs.view(-1, *self.critic_obs.size()[2:])[batch_id*mini_batch_size:(batch_id+1)*mini_batch_size], \
122 | self.actions.view(-1, self.actions.size(-1))[batch_id*mini_batch_size:(batch_id+1)*mini_batch_size], \
123 | self.values.view(-1, 1)[batch_id*mini_batch_size:(batch_id+1)*mini_batch_size], \
124 | self.advantages.view(-1, 1)[batch_id*mini_batch_size:(batch_id+1)*mini_batch_size], \
125 | self.returns.view(-1, 1)[batch_id*mini_batch_size:(batch_id+1)*mini_batch_size], \
126 | self.actions_log_prob.view(-1, 1)[batch_id*mini_batch_size:(batch_id+1)*mini_batch_size]
127 |
--------------------------------------------------------------------------------
/simulation/legged_gym/legged_gym/gym_utils/task_registry.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: BSD-3-Clause
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 |
31 | from copy import deepcopy
32 | import os
33 | from datetime import datetime
34 | from typing import Tuple
35 | import torch
36 | import numpy as np
37 |
38 | from rsl_rl.env import VecEnv
39 | from rsl_rl.runners import OnPolicyRunner
40 |
41 | from legged_gym import LEGGED_GYM_ROOT_DIR, LEGGED_GYM_ENVS_DIR
42 | from .helpers import get_args, update_cfg_from_args, class_to_dict, get_load_path, set_seed, parse_sim_params
43 | from legged_gym.envs.base.legged_robot_config import LeggedRobotCfg, LeggedRobotCfgPPO
44 |
45 |
46 | class TaskRegistry():
47 | def __init__(self):
48 | self.task_classes = {}
49 | self.env_cfgs = {}
50 | self.train_cfgs = {}
51 |
52 | def register(self, name: str, task_class: VecEnv, env_cfg: LeggedRobotCfg, train_cfg: LeggedRobotCfgPPO):
53 | self.task_classes[name] = task_class
54 | self.env_cfgs[name] = env_cfg
55 | self.train_cfgs[name] = train_cfg
56 |
57 | def get_task_class(self, name: str) -> VecEnv:
58 | return self.task_classes[name]
59 |
60 | def get_cfgs(self, name) -> Tuple[LeggedRobotCfg, LeggedRobotCfgPPO]:
61 | train_cfg = self.train_cfgs[name]
62 | env_cfg = self.env_cfgs[name]
63 | # copy seed
64 | env_cfg.seed = train_cfg.seed
65 | return env_cfg, train_cfg
66 |
67 | def make_env(self, name, args=None, env_cfg=None) -> Tuple[VecEnv, LeggedRobotCfg]:
68 | """ Creates an environment either from a registered namme or from the provided config file.
69 |
70 | Args:
71 | name (string): Name of a registered env.
72 | args (Args, optional): Isaac Gym comand line arguments. If None get_args() will be called. Defaults to None.
73 | env_cfg (Dict, optional): Environment config file used to override the registered config. Defaults to None.
74 |
75 | Raises:
76 | ValueError: Error if no registered env corresponds to 'name'
77 |
78 | Returns:
79 | isaacgym.VecTaskPython: The created environment
80 | Dict: the corresponding config file
81 | """
82 | # if no args passed get command line arguments
83 | if args is None:
84 | args = get_args()
85 | # check if there is a registered env with that name
86 | if name in self.task_classes:
87 | task_class = self.get_task_class(name)
88 | else:
89 | raise ValueError(f"Task with name: {name} was not registered")
90 | if env_cfg is None:
91 | # load config files
92 | env_cfg, _ = self.get_cfgs(name)
93 | # override cfg from args (if specified)
94 | env_cfg, _ = update_cfg_from_args(env_cfg, None, args)
95 | set_seed(env_cfg.seed)
96 | # parse sim params (convert to dict first)
97 | sim_params = {"sim": class_to_dict(env_cfg.sim)}
98 | sim_params = parse_sim_params(args, sim_params)
99 | env = task_class( cfg=env_cfg,
100 | sim_params=sim_params,
101 | physics_engine=args.physics_engine,
102 | sim_device=args.sim_device,
103 | headless=args.headless)
104 | return env, env_cfg
105 |
106 | def make_alg_runner(self, env, name=None, args=None, train_cfg=None, init_wandb=True, log_root="default", **kwargs):
107 | """ Creates the training algorithm either from a registered namme or from the provided config file.
108 |
109 | Args:
110 | env (isaacgym.VecTaskPython): The environment to train (TODO: remove from within the algorithm)
111 | name (string, optional): Name of a registered env. If None, the config file will be used instead. Defaults to None.
112 | args (Args, optional): Isaac Gym comand line arguments. If None get_args() will be called. Defaults to None.
113 | train_cfg (Dict, optional): Training config file. If None 'name' will be used to get the config file. Defaults to None.
114 | log_root (str, optional): Logging directory for Tensorboard. Set to 'None' to avoid logging (at test time for example).
115 | Logs will be saved in /_. Defaults to "default"=/logs/.
116 |
117 | Raises:
118 | ValueError: Error if neither 'name' or 'train_cfg' are provided
119 | Warning: If both 'name' or 'train_cfg' are provided 'name' is ignored
120 |
121 | Returns:
122 | PPO: The created algorithm
123 | Dict: the corresponding config file
124 | """
125 | # if no args passed get command line arguments
126 | if args is None:
127 | args = get_args()
128 | # if config files are passed use them, otherwise load from the name
129 | if train_cfg is None:
130 | if name is None:
131 | raise ValueError("Either 'name' or 'train_cfg' must be not None")
132 | # load config files
133 | _, train_cfg = self.get_cfgs(name)
134 | else:
135 | if name is not None:
136 | print(f"'train_cfg' provided -> Ignoring 'name={name}'")
137 | # override cfg from args (if specified)
138 | _, train_cfg = update_cfg_from_args(None, train_cfg, args)
139 |
140 | if log_root=="default":
141 | log_root = os.path.join(LEGGED_GYM_ROOT_DIR, 'logs', train_cfg.runner.experiment_name)
142 | log_dir = os.path.join(log_root, datetime.now().strftime('%b%d_%H-%M-%S') + '_' + train_cfg.runner.run_name)
143 | elif log_root is None:
144 | log_dir = None
145 | else:
146 | log_dir = log_root #os.path.join(log_root, datetime.now().strftime('%b%d_%H-%M-%S') + '_' + train_cfg.runner.run_name)
147 |
148 | train_cfg_dict = class_to_dict(train_cfg)
149 | runner_class = eval(train_cfg.runner.runner_class_name)
150 | runner = runner_class(env,
151 | train_cfg_dict,
152 | log_dir,
153 | init_wandb=init_wandb,
154 | device=args.rl_device, **kwargs)
155 | #save resume path before creating a new log_dir
156 | # return runner, train_cfg, None
157 | resume = train_cfg.runner.resume
158 | if args.resumeid:
159 | log_root = LEGGED_GYM_ROOT_DIR + f"/logs/{args.proj_name}/" + args.resumeid
160 | resume = True
161 | if resume:
162 | # load previously trained model
163 | print(log_root)
164 | print(train_cfg.runner.load_run)
165 | # load_root = os.path.join(LEGGED_GYM_ROOT_DIR, 'logs', "rough_a1", train_cfg.runner.load_run)
166 | resume_path = get_load_path(log_root, load_run=train_cfg.runner.load_run, checkpoint=train_cfg.runner.checkpoint)
167 | runner.load(resume_path)
168 | # if not train_cfg.policy.continue_from_last_std:
169 | # runner.alg.actor_critic.reset_std(train_cfg.policy.init_noise_std, 19, device=runner.device)
170 |
171 | if "return_log_dir" in kwargs:
172 | return runner, train_cfg, os.path.dirname(resume_path)
173 | else:
174 | return runner, train_cfg
175 |
176 | # make global task registry
177 | task_registry = TaskRegistry()
178 |
--------------------------------------------------------------------------------
/simulation/legged_gym/legged_gym/scripts/eval.sh:
--------------------------------------------------------------------------------
1 | robot_name=${1} # Remove the space around the assignment operator
2 | task_name="${robot_name}_up"
3 |
4 | proj_name="${robot_name}_up"
5 | exptid=${2}
6 | checkpoint=${3}
7 |
8 | python play.py --task "${task_name}" \
9 | --proj_name "${proj_name}" \
10 | --exptid "${exptid}" \
11 | --num_envs 1 \
12 | --checkpoint "${checkpoint}" \
13 | --record_video
14 |
--------------------------------------------------------------------------------
/simulation/legged_gym/legged_gym/scripts/eval_track.sh:
--------------------------------------------------------------------------------
1 | robot_name=${1} # Remove the space around the assignment operator
2 | task_name="${robot_name}_track"
3 |
4 | proj_name="${robot_name}_track"
5 | exptid=${2}
6 | checkpoint=${3}
7 | traj_name=${4}
8 |
9 | python play.py --task "${task_name}" \
10 | --proj_name "${proj_name}" \
11 | --exptid "${exptid}" \
12 | --num_envs 1 \
13 | --checkpoint "${checkpoint}" \
14 | --traj_name "${traj_name}"\
15 | --record_video
16 |
--------------------------------------------------------------------------------
/simulation/legged_gym/legged_gym/scripts/facingdown_poses.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/legged_gym/scripts/facingdown_poses.npy
--------------------------------------------------------------------------------
/simulation/legged_gym/legged_gym/scripts/facingup_poses.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/legged_gym/scripts/facingup_poses.npy
--------------------------------------------------------------------------------
/simulation/legged_gym/legged_gym/scripts/log.sh:
--------------------------------------------------------------------------------
1 | robot_name=${1} # Remove the space around the assignment operator
2 | task_name="${robot_name}_up"
3 |
4 | proj_name="${robot_name}_up"
5 | exptid=${2}
6 | checkpoint=${3}
7 |
8 | python log_traj.py --task "${task_name}" \
9 | --proj_name "${proj_name}" \
10 | --exptid "${exptid}" \
11 | --num_envs 1 \
12 | --checkpoint "${checkpoint}" \
13 | --record_video
14 | # --sim_device cuda:7 \
15 | # --checkpoint 20000 \
16 | # --use_jit \
17 | # --teleop_mode
18 |
--------------------------------------------------------------------------------
/simulation/legged_gym/legged_gym/scripts/log_traj.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # This file was modified by HumanUP authors in 2024-2025
3 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | # SPDX-FileCopyrightText: # Copyright (c) 2021 ETH Zurich, Nikita Rudin. All rights reserved.
5 | # SPDX-License-Identifier: BSD-3-Clause
6 | #
7 | # Redistribution and use in source and binary forms, with or without
8 | # modification, are permitted provided that the following conditions are met:
9 | #
10 | # 1. Redistributions of source code must retain the above copyright notice, this
11 | # list of conditions and the following disclaimer.
12 | #
13 | # 2. Redistributions in binary form must reproduce the above copyright notice,
14 | # this list of conditions and the following disclaimer in the documentation
15 | # and/or other materials provided with the distribution.
16 | #
17 | # 3. Neither the name of the copyright holder nor the names of its
18 | # contributors may be used to endorse or promote products derived from
19 | # this software without specific prior written permission.
20 | #
21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | #
32 | # Copyright (c) 2024-2025 RoboVision Lab, UIUC. All rights reserved.
33 |
34 | from legged_gym import LEGGED_GYM_ROOT_DIR
35 | import os
36 | import code
37 |
38 | import isaacgym
39 | from legged_gym.envs import *
40 | from legged_gym.gym_utils import get_args, export_policy_as_jit, task_registry, Logger
41 | from isaacgym import gymtorch, gymapi, gymutil
42 | import numpy as np
43 | import torch
44 | import cv2
45 | from collections import deque
46 | import statistics
47 | import faulthandler
48 | from copy import deepcopy
49 | import matplotlib.pyplot as plt
50 | from time import time, sleep
51 | from PIL import Image
52 | from legged_gym.gym_utils.helpers import get_load_path as get_load_path_auto
53 | from tqdm import tqdm
54 |
55 |
56 | def get_load_path(root, load_run=-1, checkpoint=-1, model_name_include="jit"):
57 | if checkpoint == -1:
58 | models = [file for file in os.listdir(root) if model_name_include in file]
59 | models.sort(key=lambda m: "{0:0>15}".format(m))
60 | model = models[-1]
61 | checkpoint = model.split("_")[-1].split(".")[0]
62 | else:
63 | model = None
64 | checkpoint = str(checkpoint)
65 | return model, checkpoint
66 |
67 |
68 | def set_play_cfg(env_cfg):
69 | env_cfg.env.num_envs = 2 # 2 if not args.num_envs else args.num_envs
70 | env_cfg.terrain.num_rows = 5
71 | env_cfg.terrain.num_cols = 5
72 | env_cfg.terrain.curriculum = False
73 | env_cfg.terrain.max_difficulty = False
74 |
75 | env_cfg.noise.add_noise = False
76 | env_cfg.domain_rand.randomize_friction = False
77 | env_cfg.domain_rand.push_robots = False
78 | env_cfg.domain_rand.push_interval_s = 5
79 | env_cfg.domain_rand.max_push_vel_xy = 2.5
80 | env_cfg.domain_rand.randomize_base_mass = False
81 | env_cfg.domain_rand.randomize_base_com = False
82 | env_cfg.domain_rand.action_delay = False
83 |
84 |
85 | def play(args):
86 | faulthandler.enable()
87 | exptid = args.exptid
88 | log_pth = "../../logs/{}/".format(args.proj_name) + args.exptid
89 | stand_flag = False
90 | if args.proj_name.strip() == 'g1waist_up' :
91 | stand_flag = True
92 | elif args.proj_name.strip() == 'g1waistroll_up':
93 | stand_flag = False
94 | else:
95 | print("Invalid project name")
96 | return
97 | env_cfg, train_cfg = task_registry.get_cfgs(name=args.task)
98 |
99 | set_play_cfg(env_cfg)
100 |
101 | env_cfg.env.record_video = args.record_video
102 | if_normalize = env_cfg.env.normalize_obs
103 |
104 | env, _ = task_registry.make_env(name=args.task, args=args, env_cfg=env_cfg)
105 | obs = env.get_observations()
106 |
107 | # load policy
108 | train_cfg.runner.resume = True
109 | ppo_runner, train_cfg, log_pth = task_registry.make_alg_runner(
110 | log_root=log_pth,
111 | env=env,
112 | name=args.task,
113 | args=args,
114 | train_cfg=train_cfg,
115 | return_log_dir=True,
116 | )
117 |
118 | if args.use_jit:
119 | path = os.path.join(log_pth, "traced")
120 | model, checkpoint = get_load_path(root=path, checkpoint=args.checkpoint)
121 | path = os.path.join(path, model)
122 | print("Loading jit for policy: ", path)
123 | policy_jit = torch.jit.load(path, map_location=env.device)
124 | else:
125 | policy = ppo_runner.get_inference_policy(device=env.device)
126 | if if_normalize:
127 | normalizer = ppo_runner.get_normalizer(device=env.device)
128 |
129 | actions = torch.zeros(env.num_envs, env.num_actions, device=env.device, requires_grad=False)
130 |
131 | if args.record_video:
132 | mp4_writers = []
133 | import imageio
134 |
135 | env.enable_viewer_sync = False
136 | for i in range(env.num_envs):
137 | model, checkpoint = get_load_path(root=log_pth, checkpoint=args.checkpoint, model_name_include="model")
138 | video_name = args.proj_name + "-" + args.exptid + "-" + checkpoint + ".mp4"
139 | run_name = log_pth.split("/")[-1]
140 | path = f"../../logs/videos/{args.proj_name}/{run_name}"
141 | if not os.path.exists(path):
142 | os.makedirs(path)
143 | video_name = os.path.join(path, video_name)
144 | mp4_writer = imageio.get_writer(video_name, fps=50, codec="libx264")
145 | mp4_writers.append(mp4_writer)
146 |
147 | if args.record_log:
148 | import json
149 |
150 | run_name = log_pth.split("/")[-1]
151 | logs_dict = []
152 | dict_name = args.proj_name + "-" + args.exptid + ".json"
153 | path = f"../../logs/env_logs/{run_name}"
154 | if not os.path.exists(path):
155 | os.makedirs(path)
156 | dict_name = os.path.join(path, dict_name)
157 |
158 | if not (args.record_video or args.record_log):
159 | traj_length = 100 * int(env.max_episode_length)
160 | else:
161 | traj_length = int(env.max_episode_length)
162 |
163 | env_id = env.lookat_id
164 | finish_cnt = 0 # break if finish_cnt > 30
165 |
166 | dof_pos_all = None
167 | head_height_all = None
168 | projected_gravity_all = None
169 |
170 | for i in tqdm(range(traj_length)):
171 | if args.use_jit:
172 | actions = policy_jit(obs.detach())
173 | else:
174 | if if_normalize:
175 | normalized_obs = normalizer(obs.detach())
176 | else:
177 | normalized_obs = obs.detach()
178 | actions = policy(normalized_obs, hist_encoding=False)
179 |
180 | obs, _, rews, dones, infos = env.step(actions.detach())
181 | if dof_pos_all is None:
182 | dof_pos_all = env.dof_pos
183 | else:
184 | dof_pos_all = torch.cat((dof_pos_all, env.dof_pos), dim=0)
185 | if head_height_all is None:
186 | head_height_all = env.rigid_body_states[:, env.head_idx, 2].unsqueeze(0)
187 | else:
188 | head_height_all = torch.cat((head_height_all, env.rigid_body_states[:, env.head_idx, 2].unsqueeze(0)), dim=0)
189 | if projected_gravity_all is None:
190 | projected_gravity_all = env.projected_gravity
191 | else:
192 | projected_gravity_all = torch.cat((projected_gravity_all, env.projected_gravity), dim=0)
193 | if stand_flag: # g1waist_up
194 | if env.rigid_body_states[:, env.head_idx, 2] > 1.2:
195 | finish_cnt += 1
196 | else: # g1waistroll_up
197 | target_projected_gravity = torch.tensor([-1, 0, 0], device=env.device)
198 | gravity_error = 1 - torch.nn.functional.cosine_similarity(env.projected_gravity, target_projected_gravity, dim=-1) # [0, 2]
199 | if gravity_error < 0.1:
200 | finish_cnt += 1
201 |
202 | if args.record_video:
203 | imgs = env.render_record(mode="rgb_array")
204 | if imgs is not None:
205 | for i in range(env.num_envs):
206 | mp4_writers[i].append_data(imgs[i])
207 |
208 | if args.record_log:
209 | log_dict = env.get_episode_log()
210 | logs_dict.append(log_dict)
211 |
212 | # Interaction
213 | if env.button_pressed:
214 | print(f"env_id: {env.lookat_id:<{5}}")
215 |
216 | if finish_cnt > 30:
217 | break
218 |
219 | if args.record_video:
220 | for mp4_writer in mp4_writers:
221 | mp4_writer.close()
222 |
223 | if args.record_log:
224 | with open(dict_name, "w") as f:
225 | json.dump(logs_dict, f)
226 |
227 | record_traj = True
228 | if record_traj:
229 | import pickle
230 | if not os.path.exists(f"../../logs/env_logs/{run_name}"):
231 | os.makedirs(f"../../logs/env_logs/{run_name}")
232 | with open(f"../../logs/env_logs/{run_name}/dof_pos_all.pkl", "wb") as f:
233 | pickle.dump(dof_pos_all, f)
234 | with open(f"../../logs/env_logs/{run_name}/head_height_all.pkl", "wb") as f:
235 | pickle.dump(head_height_all, f)
236 | with open(f"../../logs/env_logs/{run_name}/projected_gravity_all.pkl", "wb") as f:
237 | pickle.dump(projected_gravity_all, f)
238 |
239 |
240 | if __name__ == "__main__":
241 | args = get_args()
242 | play(args)
243 |
--------------------------------------------------------------------------------
/simulation/legged_gym/legged_gym/scripts/play.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: BSD-3-Clause
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 |
31 | from legged_gym import LEGGED_GYM_ROOT_DIR
32 | import os
33 | import code
34 |
35 | import isaacgym
36 | from legged_gym.envs import *
37 | from legged_gym.gym_utils import get_args, export_policy_as_jit, task_registry, Logger
38 | from isaacgym import gymtorch, gymapi, gymutil
39 | import numpy as np
40 | import torch
41 | import cv2
42 | from collections import deque
43 | import statistics
44 | import faulthandler
45 | from copy import deepcopy
46 | import matplotlib.pyplot as plt
47 | from time import time, sleep
48 | from PIL import Image
49 | from legged_gym.gym_utils.helpers import get_load_path as get_load_path_auto
50 | from tqdm import tqdm
51 |
52 | from isaacgym.torch_utils import *
53 |
54 |
55 | def get_load_path(root, load_run=-1, checkpoint=-1, model_name_include="jit"):
56 | if checkpoint == -1:
57 | models = [file for file in os.listdir(root) if model_name_include in file]
58 | models.sort(key=lambda m: "{0:0>15}".format(m))
59 | model = models[-1]
60 | checkpoint = model.split("_")[-1].split(".")[0]
61 | else:
62 | model = None
63 | checkpoint = str(checkpoint)
64 | return model, checkpoint
65 |
66 |
67 | def set_play_cfg(env_cfg):
68 | env_cfg.env.num_envs = 2 # 2 if not args.num_envs else args.num_envs
69 | env_cfg.terrain.num_rows = 5
70 | env_cfg.terrain.num_cols = 5
71 | env_cfg.terrain.curriculum = False
72 | env_cfg.terrain.max_difficulty = False
73 |
74 | env_cfg.domain_rand.domain_rand_general = True
75 | env_cfg.noise.add_noise = False
76 |
77 | env_cfg.domain_rand.randomize_friction = True
78 | env_cfg.domain_rand.push_robots = False
79 | env_cfg.domain_rand.push_interval_s = 5
80 | env_cfg.domain_rand.max_push_vel_xy = 2.5
81 | env_cfg.domain_rand.randomize_base_mass = False
82 | env_cfg.domain_rand.randomize_base_com = False
83 | env_cfg.domain_rand.action_delay = False
84 |
85 |
86 | def play(args):
87 | faulthandler.enable()
88 | exptid = args.exptid
89 | log_pth = "../../logs/{}/".format(args.proj_name) + args.exptid
90 |
91 | env_cfg, train_cfg = task_registry.get_cfgs(name=args.task)
92 |
93 | set_play_cfg(env_cfg)
94 |
95 | env_cfg.env.record_video = args.record_video
96 | if_normalize = env_cfg.env.normalize_obs
97 |
98 | env, _ = task_registry.make_env(name=args.task, args=args, env_cfg=env_cfg)
99 | obs = env.get_observations()
100 |
101 | # load policy
102 | train_cfg.runner.resume = True
103 | ppo_runner, train_cfg, log_pth = task_registry.make_alg_runner(
104 | log_root=log_pth,
105 | env=env,
106 | name=args.task,
107 | args=args,
108 | train_cfg=train_cfg,
109 | return_log_dir=True,
110 | )
111 |
112 | if args.use_jit:
113 | path = os.path.join(log_pth, "traced")
114 | model, checkpoint = get_load_path(root=path, checkpoint=args.checkpoint)
115 | path = os.path.join(path, model)
116 | print("Loading jit for policy: ", path)
117 | policy_jit = torch.jit.load(path, map_location=env.device)
118 | else:
119 | policy = ppo_runner.get_inference_policy(device=env.device)
120 | if if_normalize:
121 | normalizer = ppo_runner.get_normalizer(device=env.device)
122 |
123 | actions = torch.zeros(env.num_envs, env.num_actions, device=env.device, requires_grad=False)
124 |
125 | if args.record_video:
126 | mp4_writers = []
127 | import imageio
128 |
129 | env.enable_viewer_sync = False
130 | for i in range(env.num_envs):
131 | model, checkpoint = get_load_path(root=log_pth, checkpoint=args.checkpoint, model_name_include="model")
132 | video_name = args.proj_name + "-" + args.exptid + "-" + checkpoint + ".mp4"
133 | run_name = log_pth.split("/")[-1]
134 | path = f"../../logs/videos/{args.proj_name}/{run_name}"
135 | if not os.path.exists(path):
136 | os.makedirs(path)
137 | video_name = os.path.join(path, video_name)
138 | mp4_writer = imageio.get_writer(video_name, fps=50, codec="libx264")
139 | mp4_writers.append(mp4_writer)
140 |
141 | if args.record_log:
142 | import json
143 |
144 | run_name = log_pth.split("/")[-1]
145 | logs_dict = []
146 | dict_name = args.proj_name + "-" + args.exptid + ".json"
147 | path = f"../../logs/env_logs/{run_name}"
148 | if not os.path.exists(path):
149 | os.makedirs(path)
150 | dict_name = os.path.join(path, dict_name)
151 |
152 | if not (args.record_video or args.record_log):
153 | traj_length = 100 * int(env.max_episode_length)
154 | else:
155 | traj_length = int(env.max_episode_length)
156 |
157 | env_id = env.lookat_id
158 |
159 | all_projected_gravity = []
160 | all_pitch = []
161 | all_roll = []
162 | all_yaw = []
163 |
164 | for i in tqdm(range(traj_length)):
165 | if args.use_jit:
166 | actions = policy_jit(obs.detach())
167 | else:
168 | if if_normalize:
169 | normalized_obs = normalizer(obs.detach())
170 | else:
171 | normalized_obs = obs.detach()
172 | actions = policy(normalized_obs, hist_encoding=True)
173 |
174 | obs, _, rews, dones, infos = env.step(actions.detach())
175 |
176 | all_roll.append(env.roll[-1].item())
177 | all_pitch.append(env.pitch[-1].item())
178 | all_yaw.append(env.yaw[-1].item())
179 | # all_projected_gravity.append(env.projected_gravity[0, -1].item())
180 |
181 | # torso gravity
182 | torso_quat = env.rigid_body_rot[:, env.torso_idx]
183 | torso_projected_gravity = quat_rotate_inverse(torso_quat, env.gravity_vec)
184 | all_projected_gravity.append(torso_projected_gravity[0, 2].item())
185 |
186 | if args.record_video:
187 | imgs = env.render_record(mode="rgb_array")
188 | if imgs is not None:
189 | for i in range(env.num_envs):
190 | mp4_writers[i].append_data(imgs[i])
191 |
192 | if args.record_log:
193 | log_dict = env.get_episode_log()
194 | logs_dict.append(log_dict)
195 |
196 | # Interaction
197 | if env.button_pressed:
198 | print(f"env_id: {env.lookat_id:<{5}}")
199 |
200 |
201 | if args.record_video:
202 | for mp4_writer in mp4_writers:
203 | mp4_writer.close()
204 |
205 | if args.record_log:
206 | with open(dict_name, "w") as f:
207 | json.dump(logs_dict, f)
208 |
209 |
210 | if __name__ == "__main__":
211 | args = get_args()
212 | play(args)
213 |
--------------------------------------------------------------------------------
/simulation/legged_gym/legged_gym/scripts/run.sh:
--------------------------------------------------------------------------------
1 | robot_name=${1}
2 | task_name="${robot_name}_up"
3 |
4 | proj_name="${robot_name}_up"
5 | exptid=${2}
6 |
7 | # Run the training script
8 | python train.py --task "${task_name}" \
9 | --proj_name "${proj_name}" \
10 | --exptid "${exptid}" \
11 | --device "${3}" \
12 | --num_envs 4096 \
13 | --headless \
14 | --fix_action_std \
15 | # --debug
16 | # --resume \
17 | # --resumeid XXX
18 |
--------------------------------------------------------------------------------
/simulation/legged_gym/legged_gym/scripts/run_track.sh:
--------------------------------------------------------------------------------
1 | robot_name=${1} # Remove the space around the assignment operator
2 | task_name="${robot_name}_track"
3 |
4 | proj_name="${robot_name}_track"
5 | exptid=${2}
6 | traj_name=${4}
7 |
8 | # Run the training script
9 | python train.py --task "${task_name}" \
10 | --proj_name "${proj_name}" \
11 | --exptid "${exptid}" \
12 | --device "${3}" \
13 | --num_envs 4096 \
14 | --headless \
15 | --fix_action_std \
16 | --traj_name "${traj_name}"\
17 | # --debug \
18 | # --resume \
19 | # --resumeid XXX
20 |
--------------------------------------------------------------------------------
/simulation/legged_gym/legged_gym/scripts/save_jit.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 | from statistics import mode
3 | sys.path.append("../../../rsl_rl")
4 | import torch
5 | import torch.nn as nn
6 | from rsl_rl.modules.actor_critic_rma import Actor, StateHistoryEncoder, get_activation
7 | import argparse
8 | import code
9 | import shutil
10 |
11 | def get_load_path(root, load_run=-1, checkpoint=-1, model_name_include="model"):
12 | if not os.path.isdir(root): # use first 4 chars to mactch the run name
13 | model_name_cand = os.path.basename(root)
14 | model_parent = os.path.dirname(root)
15 | model_names = os.listdir(model_parent)
16 | model_names = [name for name in model_names if os.path.isdir(os.path.join(model_parent, name))]
17 | for name in model_names:
18 | if len(name) >= 6:
19 | if name[:6] == model_name_cand:
20 | root = os.path.join(model_parent, name)
21 | if checkpoint==-1:
22 | models = [file for file in os.listdir(root) if model_name_include in file]
23 | models.sort(key=lambda m: '{0:0>15}'.format(m))
24 | model = models[-1]
25 | checkpoint = model.split("_")[-1].split(".")[0]
26 | else:
27 | model = "model_{}.pt".format(checkpoint)
28 |
29 | load_path = os.path.join(root, model)
30 | return load_path, checkpoint
31 |
32 | class HardwareRefNN(nn.Module):
33 | def __init__(self, num_prop,
34 | num_priv_latent,
35 | num_hist,
36 | critic_obs_extra,
37 | num_actions,
38 | actor_hidden_dims=[512, 256, 128],
39 | activation='elu',
40 | priv_encoder_dims=[64, 20],
41 | ):
42 | super().__init__()
43 |
44 | self.num_prop = num_prop
45 | self.num_hist = num_hist
46 | self.num_actions = num_actions
47 | self.num_priv_latent = num_priv_latent
48 | num_obs = num_prop + num_hist*num_prop + num_priv_latent + critic_obs_extra
49 | self.num_obs = num_obs
50 | activation = get_activation(activation)
51 |
52 | num_priv_explicit = 0
53 |
54 | self.normalizer = None
55 |
56 | self.actor = Actor(num_prop,
57 | num_actions,
58 | actor_hidden_dims,
59 | priv_encoder_dims, num_priv_latent, num_hist,
60 | activation, tanh_encoder_output=False)
61 |
62 | def load_normalizer(self, normalizer):
63 | self.normalizer = normalizer
64 | self.normalizer.eval()
65 |
66 | def forward(self, obs):
67 | assert obs.shape[1] == self.num_obs, f"Expected {self.num_obs} but got {obs.shape[1]}"
68 | obs = self.normalizer(obs)
69 | return self.actor(obs, hist_encoding=True, eval=False)
70 |
71 | def play(args):
72 | load_run = "../../logs/{}/{}".format(args.proj_name, args.exptid)
73 | checkpoint = args.checkpoint
74 | critic_obs_extra = 0
75 | if args.robot == "g1":
76 | n_priv_latent = 4 + 1 + 23*2 + 3
77 | num_scan = 0
78 | num_actions = 23
79 |
80 | n_proprio = 3 + 2 + 3*num_actions
81 | else:
82 | raise ValueError(f"Robot {args.robot} not supported!")
83 |
84 | history_len = 10
85 |
86 | device = torch.device('cpu')
87 | policy = HardwareRefNN(n_proprio,
88 | n_priv_latent, history_len, critic_obs_extra,
89 | num_actions).to(device)
90 | load_path, checkpoint = get_load_path(root=load_run, checkpoint=checkpoint)
91 | load_run = os.path.dirname(load_path)
92 | print(f"Loading model from: {load_path}")
93 | ac_state_dict = torch.load(load_path, map_location=device)
94 | policy.load_state_dict(ac_state_dict['model_state_dict'], strict=False)
95 | policy.load_normalizer(ac_state_dict['normalizer'])
96 |
97 | policy = policy.to(device)#.cpu()
98 | if not os.path.exists(os.path.join(load_run, "traced")):
99 | os.mkdir(os.path.join(load_run, "traced"))
100 |
101 | # Save the traced actor
102 | policy.eval()
103 | with torch.no_grad():
104 | num_envs = 2
105 |
106 | obs_input = torch.ones(num_envs, n_proprio + n_priv_latent + history_len*n_proprio + critic_obs_extra, device=device)
107 | print("obs_input shape: ", obs_input.shape)
108 |
109 | traced_policy = torch.jit.trace(policy, obs_input)
110 |
111 | # traced_policy = torch.jit.script(policy)
112 | save_path = os.path.join(load_run, "traced", args.exptid + "-" + str(checkpoint) + "-jit.pt")
113 | traced_policy.save(save_path)
114 | print("Saved traced_actor at ", os.path.abspath(save_path))
115 | print("Robot: ", args.robot)
116 |
117 | if __name__ == "__main__":
118 | parser = argparse.ArgumentParser()
119 | parser.add_argument('--proj_name', type=str)
120 | parser.add_argument('--exptid', type=str)
121 | parser.add_argument('--checkpoint', type=int, default=-1)
122 | parser.add_argument('--robot', type=str, default="g1") # options: gr1, h1, g1
123 |
124 | args = parser.parse_args()
125 | play(args)
--------------------------------------------------------------------------------
/simulation/legged_gym/legged_gym/scripts/train.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: BSD-3-Clause
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 |
31 | import os
32 | from datetime import datetime
33 |
34 | import isaacgym
35 | from legged_gym.envs import LEGGED_GYM_ENVS_DIR, LEGGED_GYM_ROOT_DIR
36 | from legged_gym.gym_utils import get_args, task_registry
37 |
38 | import torch
39 | import wandb
40 |
41 |
42 | def train(args):
43 | log_pth = LEGGED_GYM_ROOT_DIR + "/logs/{}/".format(args.proj_name) + args.exptid
44 | try:
45 | os.makedirs(log_pth)
46 | except:
47 | pass
48 |
49 | if args.debug:
50 | mode = "disabled"
51 | args.rows = 10
52 | args.cols = 5
53 | args.num_envs = 64
54 | else:
55 | mode = "online"
56 |
57 | if args.no_wandb:
58 | mode = "disabled"
59 |
60 | robot_type = args.task.split("_")[0]
61 |
62 | wandb.init(project=args.proj_name, name=args.exptid, mode=mode, dir="../../logs")
63 | wandb.save(LEGGED_GYM_ENVS_DIR + "/base/legged_robot_config.py", policy="now")
64 | wandb.save(LEGGED_GYM_ENVS_DIR + "/base/legged_robot.py", policy="now")
65 | wandb.save(LEGGED_GYM_ENVS_DIR + "/base/humanoid_config.py", policy="now")
66 | wandb.save(LEGGED_GYM_ENVS_DIR + "/base/humanoid.py", policy="now")
67 | wandb.save(LEGGED_GYM_ENVS_DIR + "/{}/{}.py".format(robot_type, args.task), policy="now")
68 | wandb.save(LEGGED_GYM_ENVS_DIR + "/{}/{}_config.py".format(robot_type, args.task), policy="now")
69 | wandb.save(LEGGED_GYM_ENVS_DIR + "/g1track/g1waist_track_config.py", policy="now")
70 | wandb.save(LEGGED_GYM_ENVS_DIR + "/g1track/g1waist_track.py", policy="now")
71 | wandb.save(LEGGED_GYM_ENVS_DIR + "/g1rolltrack/g1waistroll_track_config.py", policy="now")
72 | wandb.save(LEGGED_GYM_ENVS_DIR + "/g1rolltrack/g1waistroll_track.py", policy="now")
73 |
74 | env, _ = task_registry.make_env(name=args.task, args=args)
75 |
76 | ppo_runner, train_cfg = task_registry.make_alg_runner(
77 | log_root=log_pth, env=env, name=args.task, args=args
78 | )
79 | ppo_runner.learn(
80 | num_learning_iterations=train_cfg.runner.max_iterations, init_at_random_ep_len=True
81 | )
82 |
83 | if __name__ == "__main__":
84 | args = get_args()
85 | train(args)
86 |
--------------------------------------------------------------------------------
/simulation/legged_gym/legged_gym/tests/test_asset.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # This file was modified by HumanUP authors in 2024-2025
3 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | # SPDX-FileCopyrightText: # Copyright (c) 2021 ETH Zurich, Nikita Rudin. All rights reserved.
5 | # SPDX-License-Identifier: BSD-3-Clause
6 | #
7 | # Redistribution and use in source and binary forms, with or without
8 | # modification, are permitted provided that the following conditions are met:
9 | #
10 | # 1. Redistributions of source code must retain the above copyright notice, this
11 | # list of conditions and the following disclaimer.
12 | #
13 | # 2. Redistributions in binary form must reproduce the above copyright notice,
14 | # this list of conditions and the following disclaimer in the documentation
15 | # and/or other materials provided with the distribution.
16 | #
17 | # 3. Neither the name of the copyright holder nor the names of its
18 | # contributors may be used to endorse or promote products derived from
19 | # this software without specific prior written permission.
20 | #
21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | #
32 | # Copyright (c) 2024-2025 RoboVision Lab, UIUC. All rights reserved.
33 |
34 | import numpy as np
35 | from isaacgym import gymutil
36 | from isaacgym import gymapi
37 |
38 | from legged_gym import LEGGED_GYM_ROOT_DIR
39 |
40 | import torch
41 |
42 |
43 | gym = gymapi.acquire_gym()
44 |
45 | args = gymutil.parse_arguments(
46 | description="Script for testing the urdf or mjcf asset",
47 | custom_parameters=[
48 | {"name": "--num_envs", "type": int, "default": 2, "help": "Number of environments to create"},
49 | ])
50 |
51 | sim_params = gymapi.SimParams()
52 | if args.physics_engine == gymapi.SIM_FLEX:
53 | sim_params.flex.shape_collision_margin = 0.25
54 | sim_params.flex.num_outer_iterations = 4
55 | sim_params.flex.num_inner_iterations = 10
56 | elif args.physics_engine == gymapi.SIM_PHYSX:
57 | sim_params.substeps = 1
58 | sim_params.physx.solver_type = 1
59 | sim_params.physx.num_position_iterations = 4
60 | sim_params.physx.num_velocity_iterations = 1
61 | sim_params.physx.num_threads = args.num_threads
62 | sim_params.physx.use_gpu = args.use_gpu
63 |
64 | sim_params.use_gpu_pipeline = False
65 | sim_params.up_axis = gymapi.UP_AXIS_Z
66 | sim_params.gravity.x = 0
67 | sim_params.gravity.y = 0
68 | sim_params.gravity.z = -9.81
69 | if args.use_gpu_pipeline:
70 | print("WARNING: Forcing CPU pipeline.")
71 |
72 | sim = gym.create_sim(args.compute_device_id, args.graphics_device_id, args.physics_engine, sim_params)
73 | if sim is None:
74 | print("*** Failed to create sim")
75 | quit()
76 |
77 | # add ground plane
78 | plane_params = gymapi.PlaneParams()
79 | plane_params.normal = gymapi.Vec3(0, 0, 1)
80 | gym.add_ground(sim, plane_params)
81 |
82 | # create viewer
83 | viewer = gym.create_viewer(sim, gymapi.CameraProperties())
84 | if viewer is None:
85 | print("*** Failed to create viewer")
86 | quit()
87 |
88 | asset_root = f'{LEGGED_GYM_ROOT_DIR}/resources/robots/g1_modified/'
89 |
90 | asset_file = "g1_29dof_fixedwrist_custom_collision_with_head.urdf"
91 |
92 | asset_options = gymapi.AssetOptions()
93 | asset_options.fix_base_link = True
94 | asset_options.use_mesh_materials = True
95 | asset_options.disable_gravity = True
96 |
97 | asset = gym.load_asset(sim, asset_root, asset_file, asset_options)
98 |
99 | num_envs = args.num_envs
100 | num_per_row = int(np.sqrt(num_envs))
101 | env_spacing = 2.0
102 | env_lower = gymapi.Vec3(-env_spacing, 0.0, -env_spacing)
103 | env_upper = gymapi.Vec3(env_spacing, env_spacing, env_spacing)
104 |
105 | envs = []
106 |
107 | # subscribe to spacebar event for reset
108 | gym.subscribe_viewer_keyboard_event(viewer, gymapi.KEY_R, "reset")
109 |
110 | for i in range(num_envs):
111 | env = gym.create_env(sim, env_lower, env_upper, num_per_row)
112 | envs.append(env)
113 |
114 | # create ball pyramid
115 | pose = gymapi.Transform()
116 | pose.p = gymapi.Vec3(0, 0, 1.5)
117 | pose.r = gymapi.Quat(0, 0, 0, 1)
118 | humanoid_handle = gym.create_actor(env, asset, pose, "humanoid", i, 1)
119 |
120 | gym.viewer_camera_look_at(viewer, None, gymapi.Vec3(5, 5, 5), gymapi.Vec3(0, 0, 0))
121 |
122 |
123 | initial_state = np.copy(gym.get_sim_rigid_body_states(sim, gymapi.STATE_ALL))
124 |
125 |
126 | while not gym.query_viewer_has_closed(viewer):
127 |
128 | # Get input actions from the viewer and handle them appropriately
129 | for evt in gym.query_viewer_action_events(viewer):
130 | if evt.action == "reset" and evt.value > 0:
131 | gym.set_sim_rigid_body_states(sim, initial_state, gymapi.STATE_ALL)
132 |
133 | # step the physics
134 | gym.simulate(sim)
135 | gym.fetch_results(sim, True)
136 |
137 | # update the viewer
138 | gym.step_graphics(sim)
139 | gym.draw_viewer(viewer, sim, True)
140 |
141 | # Wait for dt to elapse in real time.
142 | # This synchronizes the physics simulation with the rendering rate.
143 | gym.sync_frame_time(sim)
144 |
145 | gym.destroy_viewer(viewer)
146 | gym.destroy_sim(sim)
147 |
--------------------------------------------------------------------------------
/simulation/legged_gym/legged_gym/tests/test_env.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: BSD-3-Clause
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 |
31 | import numpy as np
32 | import os
33 | from datetime import datetime
34 |
35 | import isaacgym
36 | from legged_gym.envs import *
37 | from legged_gym.gym_utils import get_args, task_registry, Logger
38 |
39 | import torch
40 |
41 |
42 | def test_env(args):
43 | env_cfg, train_cfg = task_registry.get_cfgs(name=args.task)
44 |
45 | env_cfg.env.num_envs = 10
46 | env_cfg.terrain.num_rows = 10
47 | env_cfg.terrain.num_cols = 10
48 |
49 | # prepare environment
50 | env, _ = task_registry.make_env(name=args.task, args=args, env_cfg=env_cfg)
51 | for i in range(int(10*env.max_episode_length)):
52 | actions = 0.*torch.ones(env.num_envs, env.num_actions, device=env.device)
53 | obs, _, rew, done, info = env.step(actions)
54 | print("Done")
55 |
56 | if __name__ == '__main__':
57 | args = get_args()
58 | test_env(args)
59 |
--------------------------------------------------------------------------------
/simulation/legged_gym/licenses/assets/ANYmal_b_license.txt:
--------------------------------------------------------------------------------
1 | Copyright 2019 ANYbotics, https://www.anybotics.com
2 |
3 | Redistribution and use in source and binary forms, with or without modification,
4 | are permitted provided that the following conditions are met:
5 |
6 | 1. Redistributions of source code must retain the above copyright notice, this
7 | list of conditions and the following disclaimer.
8 |
9 | 2. Redistributions in binary form must reproduce the above copyright notice, this
10 | list of conditions and the following disclaimer in the documentation and/or
11 | other materials provided with the distribution.
12 |
13 | 3. The name of ANYbotics and ANYmal may not be used to endorse or promote products
14 | derived from this software without specific prior written permission.
15 |
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
20 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 | NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 | POSSIBILITY OF SUCH DAMAGE.
26 |
--------------------------------------------------------------------------------
/simulation/legged_gym/licenses/assets/ANYmal_c_license.txt:
--------------------------------------------------------------------------------
1 | Copyright 2020, ANYbotics AG.
2 |
3 | Redistribution and use in source and binary forms, with or without
4 | modification, are permitted provided that the following conditions
5 | are met:
6 |
7 | 1. Redistributions of source code must retain the above copyright
8 | notice, this list of conditions and the following disclaimer.
9 |
10 | 2. Redistributions in binary form must reproduce the above copyright
11 | notice, this list of conditions and the following disclaimer in
12 | the documentation and/or other materials provided with the
13 | distribution.
14 |
15 | 3. Neither the name of the copyright holder nor the names of its
16 | contributors may be used to endorse or promote products derived
17 | from this software without specific prior written permission.
18 |
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
--------------------------------------------------------------------------------
/simulation/legged_gym/licenses/assets/cassie_license.txt:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Jenna Reher, jreher@caltech.edu
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/simulation/legged_gym/licenses/dependencies/matplotlib_license.txt:
--------------------------------------------------------------------------------
1 | 1. This LICENSE AGREEMENT is between the Matplotlib Development Team ("MDT"), and the Individual or Organization ("Licensee") accessing and otherwise using matplotlib software in source or binary form and its associated documentation.
2 |
3 | 2. Subject to the terms and conditions of this License Agreement, MDT hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use matplotlib 3.4.3 alone or in any derivative version, provided, however, that MDT's License Agreement and MDT's notice of copyright, i.e., "Copyright (c) 2012-2013 Matplotlib Development Team; All Rights Reserved" are retained in matplotlib 3.4.3 alone or in any derivative version prepared by Licensee.
4 |
5 | 3. In the event Licensee prepares a derivative work that is based on or incorporates matplotlib 3.4.3 or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to matplotlib 3.4.3.
6 |
7 | 4. MDT is making matplotlib 3.4.3 available to Licensee on an "AS IS" basis. MDT MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, MDT MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF MATPLOTLIB 3.4.3 WILL NOT INFRINGE ANY THIRD PARTY RIGHTS.
8 |
9 | 5. MDT SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF MATPLOTLIB 3.4.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING MATPLOTLIB 3.4.3, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
10 |
11 | 6. This License Agreement will automatically terminate upon a material breach of its terms and conditions.
12 |
13 | 7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between MDT and Licensee. This License Agreement does not grant permission to use MDT trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party.
14 |
15 | 8. By copying, installing or otherwise using matplotlib 3.4.3, Licensee agrees to be bound by the terms and conditions of this License Agreement.
--------------------------------------------------------------------------------
/simulation/legged_gym/requirements.txt:
--------------------------------------------------------------------------------
1 | pymeshlab
2 | pydelatin
3 | tensorboard
4 | setuptools == 59.5.0
5 | opencv-python
6 | tqdm
7 | wandb
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/head_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/head_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/left_ankle_pitch_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_ankle_pitch_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/left_ankle_roll_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_ankle_roll_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/left_elbow_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_elbow_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hand_index_0_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hand_index_0_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hand_index_1_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hand_index_1_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hand_middle_0_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hand_middle_0_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hand_middle_1_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hand_middle_1_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hand_palm_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hand_palm_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hand_thumb_0_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hand_thumb_0_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hand_thumb_1_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hand_thumb_1_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hand_thumb_2_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hand_thumb_2_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hip_pitch_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hip_pitch_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hip_roll_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hip_roll_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hip_yaw_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_hip_yaw_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/left_knee_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_knee_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/left_rubber_hand.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_rubber_hand.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/left_shoulder_pitch_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_shoulder_pitch_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/left_shoulder_roll_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_shoulder_roll_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/left_shoulder_yaw_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_shoulder_yaw_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/left_wrist_pitch_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_wrist_pitch_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/left_wrist_roll_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_wrist_roll_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/left_wrist_roll_rubber_hand.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_wrist_roll_rubber_hand.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/left_wrist_yaw_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/left_wrist_yaw_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/logo_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/logo_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/pelvis.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/pelvis.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/pelvis_contour_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/pelvis_contour_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/right_ankle_pitch_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_ankle_pitch_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/right_ankle_roll_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_ankle_roll_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/right_elbow_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_elbow_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hand_index_0_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hand_index_0_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hand_index_1_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hand_index_1_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hand_middle_0_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hand_middle_0_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hand_middle_1_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hand_middle_1_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hand_palm_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hand_palm_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hand_thumb_0_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hand_thumb_0_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hand_thumb_1_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hand_thumb_1_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hand_thumb_2_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hand_thumb_2_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hip_pitch_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hip_pitch_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hip_roll_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hip_roll_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hip_yaw_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_hip_yaw_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/right_knee_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_knee_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/right_rubber_hand.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_rubber_hand.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/right_shoulder_pitch_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_shoulder_pitch_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/right_shoulder_roll_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_shoulder_roll_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/right_shoulder_yaw_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_shoulder_yaw_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/right_wrist_pitch_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_wrist_pitch_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/right_wrist_roll_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_wrist_roll_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/right_wrist_roll_rubber_hand.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_wrist_roll_rubber_hand.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/right_wrist_yaw_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/right_wrist_yaw_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/torso_constraint_L_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/torso_constraint_L_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/torso_constraint_L_rod_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/torso_constraint_L_rod_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/torso_constraint_R_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/torso_constraint_R_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/torso_constraint_R_rod_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/torso_constraint_R_rod_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/torso_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/torso_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/waist_constraint_L.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/waist_constraint_L.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/waist_constraint_R.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/waist_constraint_R.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/waist_roll_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/waist_roll_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/waist_support_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/waist_support_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/resources/robots/g1_modified/meshes/waist_yaw_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunpeiDong/HumanUP/7516e0f27e6f4d1e7365cf64ea577a78247bd8cb/simulation/legged_gym/resources/robots/g1_modified/meshes/waist_yaw_link.STL
--------------------------------------------------------------------------------
/simulation/legged_gym/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import find_packages
2 | from distutils.core import setup
3 |
4 | setup(
5 | name='legged_gym',
6 | version='1.0.0',
7 | author='Nikita Rudin',
8 | license="BSD-3-Clause",
9 | packages=find_packages(),
10 | author_email='rudinn@ethz.ch',
11 | description='Isaac Gym environments for Legged Robots',
12 | install_requires=['isaacgym',
13 | 'rsl-rl',
14 | 'matplotlib']
15 | )
--------------------------------------------------------------------------------
/simulation/rsl_rl/.gitignore:
--------------------------------------------------------------------------------
1 | # IDEs
2 | .idea
3 |
4 | # builds
5 | *.egg-info
6 |
7 | # cache
8 | __pycache__
9 | .pytest_cache
10 |
11 | # vs code
12 | .vscode
--------------------------------------------------------------------------------
/simulation/rsl_rl/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2021, ETH Zurich, Nikita Rudin
2 | Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES
3 | All rights reserved.
4 |
5 | Redistribution and use in source and binary forms, with or without modification,
6 | are permitted provided that the following conditions are met:
7 |
8 | 1. Redistributions of source code must retain the above copyright notice,
9 | this list of conditions and the following disclaimer.
10 |
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 | this list of conditions and the following disclaimer in the documentation
13 | and/or other materials provided with the distribution.
14 |
15 | 3. Neither the name of the copyright holder nor the names of its contributors
16 | may be used to endorse or promote products derived from this software without
17 | specific prior written permission.
18 |
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
23 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |
30 | See licenses/dependencies for license information of dependencies of this package.
--------------------------------------------------------------------------------
/simulation/rsl_rl/README.md:
--------------------------------------------------------------------------------
1 | # RSL RL
2 | Fast and simple implementation of RL algorithms, designed to run fully on GPU.
3 | This code is an evolution of `rl-pytorch` provided with NVIDIA's Isaac GYM.
4 |
5 | Only PPO is implemented for now. More algorithms will be added later.
6 | Contributions are welcome.
7 |
8 | ## Setup
9 |
10 | ```
11 | git clone https://github.com/leggedrobotics/rsl_rl
12 | cd rsl_rl
13 | pip install -e .
14 | ```
15 |
16 | **Maintainer**: Nikita Rudin
17 | **Affiliation**: Robotic Systems Lab, ETH Zurich & NVIDIA
18 | **Contact**: rudinn@ethz.ch
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/simulation/rsl_rl/licenses/dependencies/numpy_license.txt:
--------------------------------------------------------------------------------
1 | Copyright (c) 2005-2021, NumPy Developers.
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are
6 | met:
7 |
8 | * Redistributions of source code must retain the above copyright
9 | notice, this list of conditions and the following disclaimer.
10 |
11 | * Redistributions in binary form must reproduce the above
12 | copyright notice, this list of conditions and the following
13 | disclaimer in the documentation and/or other materials provided
14 | with the distribution.
15 |
16 | * Neither the name of the NumPy Developers nor the names of any
17 | contributors may be used to endorse or promote products derived
18 | from this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------
/simulation/rsl_rl/licenses/dependencies/torch_license.txt:
--------------------------------------------------------------------------------
1 | From PyTorch:
2 |
3 | Copyright (c) 2016- Facebook, Inc (Adam Paszke)
4 | Copyright (c) 2014- Facebook, Inc (Soumith Chintala)
5 | Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
6 | Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu)
7 | Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
8 | Copyright (c) 2011-2013 NYU (Clement Farabet)
9 | Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston)
10 | Copyright (c) 2006 Idiap Research Institute (Samy Bengio)
11 | Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz)
12 |
13 | From Caffe2:
14 |
15 | Copyright (c) 2016-present, Facebook Inc. All rights reserved.
16 |
17 | All contributions by Facebook:
18 | Copyright (c) 2016 Facebook Inc.
19 |
20 | All contributions by Google:
21 | Copyright (c) 2015 Google Inc.
22 | All rights reserved.
23 |
24 | All contributions by Yangqing Jia:
25 | Copyright (c) 2015 Yangqing Jia
26 | All rights reserved.
27 |
28 | All contributions by Kakao Brain:
29 | Copyright 2019-2020 Kakao Brain
30 |
31 | All contributions from Caffe:
32 | Copyright(c) 2013, 2014, 2015, the respective contributors
33 | All rights reserved.
34 |
35 | All other contributions:
36 | Copyright(c) 2015, 2016 the respective contributors
37 | All rights reserved.
38 |
39 | Caffe2 uses a copyright model similar to Caffe: each contributor holds
40 | copyright over their contributions to Caffe2. The project versioning records
41 | all such contribution and copyright details. If a contributor wants to further
42 | mark their specific copyright on a particular contribution, they should
43 | indicate their copyright solely in the commit message of the change when it is
44 | committed.
45 |
46 | All rights reserved.
47 |
48 | Redistribution and use in source and binary forms, with or without
49 | modification, are permitted provided that the following conditions are met:
50 |
51 | 1. Redistributions of source code must retain the above copyright
52 | notice, this list of conditions and the following disclaimer.
53 |
54 | 2. Redistributions in binary form must reproduce the above copyright
55 | notice, this list of conditions and the following disclaimer in the
56 | documentation and/or other materials provided with the distribution.
57 |
58 | 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America
59 | and IDIAP Research Institute nor the names of its contributors may be
60 | used to endorse or promote products derived from this software without
61 | specific prior written permission.
62 |
63 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
64 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
67 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
68 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
69 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
70 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
71 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
72 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
73 | POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------
/simulation/rsl_rl/rsl_rl/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: BSD-3-Clause
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
--------------------------------------------------------------------------------
/simulation/rsl_rl/rsl_rl/algorithms/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: BSD-3-Clause
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 |
31 | from .ppo_rma import PPORMA
32 | from .ppo import PPO
--------------------------------------------------------------------------------
/simulation/rsl_rl/rsl_rl/env/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: BSD-3-Clause
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 |
31 | from .vec_env import VecEnv
32 |
--------------------------------------------------------------------------------
/simulation/rsl_rl/rsl_rl/env/vec_env.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: BSD-3-Clause
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 |
31 | from abc import ABC, abstractmethod
32 | import torch
33 | from typing import Tuple, Union
34 |
35 |
36 | # minimal interface of the environment
37 | class VecEnv(ABC):
38 | num_envs: int
39 | num_obs: int
40 | num_privileged_obs: int
41 | num_actions: int
42 | max_episode_length: int
43 | privileged_obs_buf: torch.Tensor
44 | obs_buf: torch.Tensor
45 | rew_buf: torch.Tensor
46 | reset_buf: torch.Tensor
47 | episode_length_buf: torch.Tensor # current episode duration
48 | extras: dict
49 | device: torch.device
50 |
51 | @abstractmethod
52 | def step(
53 | self, actions: torch.Tensor
54 | ) -> Tuple[torch.Tensor, Union[torch.Tensor, None], torch.Tensor, torch.Tensor, dict]:
55 | pass
56 |
57 | @abstractmethod
58 | def reset(self, env_ids: Union[list, torch.Tensor]):
59 | pass
60 |
61 | @abstractmethod
62 | def get_observations(self) -> torch.Tensor:
63 | pass
64 |
65 | @abstractmethod
66 | def get_privileged_observations(self) -> Union[torch.Tensor, None]:
67 | pass
68 |
--------------------------------------------------------------------------------
/simulation/rsl_rl/rsl_rl/modules/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: BSD-3-Clause
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 |
31 | from .actor_critic_rma import ActorCriticRMA
32 | from .actor_critic import ActorCritic
33 |
--------------------------------------------------------------------------------
/simulation/rsl_rl/rsl_rl/modules/actor_critic.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: BSD-3-Clause
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 |
31 | import numpy as np
32 |
33 | import code
34 | import torch
35 | import torch.nn as nn
36 | from torch.distributions import Normal
37 | from torch.nn.modules import rnn
38 | from torch.nn.modules.activation import ReLU
39 |
40 |
41 | class Actor(nn.Module):
42 | def __init__(
43 | self,
44 | num_prop,
45 | num_actions,
46 | actor_hidden_dims,
47 | activation,
48 | tanh_encoder_output=False,
49 | **kwargs
50 | ) -> None:
51 | super().__init__()
52 | # prop
53 | self.num_prop = num_prop
54 | self.num_actions = num_actions
55 |
56 | actor_layers = []
57 | actor_layers.append(nn.Linear(num_prop, actor_hidden_dims[0]))
58 | actor_layers.append(activation)
59 | for l in range(len(actor_hidden_dims)):
60 | if l == len(actor_hidden_dims) - 1:
61 | actor_layers.append(nn.Linear(actor_hidden_dims[l], num_actions))
62 | else:
63 | actor_layers.append(nn.Linear(actor_hidden_dims[l], actor_hidden_dims[l + 1]))
64 | actor_layers.append(activation)
65 | if tanh_encoder_output:
66 | actor_layers.append(nn.Tanh())
67 | self.actor_backbone = nn.Sequential(*actor_layers)
68 |
69 | def forward(self, obs_all, hist_encoding=False, eval=False, scandots_latent=None):
70 | obs = obs_all
71 | obs_prop = obs[:, : self.num_prop]
72 | backbone_input = obs_prop
73 | backbone_output = self.actor_backbone(backbone_input)
74 | return backbone_output
75 |
76 |
77 | class ActorCritic(nn.Module):
78 | is_recurrent = False
79 |
80 | def __init__(
81 | self,
82 | num_prop,
83 | num_critic_obs,
84 | num_priv_latent,
85 | num_priv_explicit,
86 | num_hist,
87 | num_actions,
88 | actor_hidden_dims=[256, 256, 256],
89 | critic_hidden_dims=[256, 256, 256],
90 | activation="elu",
91 | init_noise_std=1.0,
92 | fix_action_std=False,
93 | **kwargs
94 | ):
95 | if kwargs:
96 | print(
97 | "ActorCritic.__init__ got unexpected arguments, which will be ignored: "
98 | + str([key for key in kwargs.keys()])
99 | )
100 | super().__init__()
101 |
102 | self.fix_action_std = fix_action_std
103 |
104 | self.kwargs = kwargs
105 | priv_encoder_dims = kwargs["priv_encoder_dims"]
106 | activation = get_activation(activation)
107 |
108 | self.actor = Actor(
109 | num_prop=num_prop,
110 | num_actions=num_actions,
111 | actor_hidden_dims=actor_hidden_dims,
112 | activation=activation,
113 | tanh_encoder_output=kwargs["tanh_encoder_output"],
114 | )
115 |
116 | # Value function
117 | critic_layers = []
118 | critic_layers.append(nn.Linear(num_critic_obs, critic_hidden_dims[0]))
119 | critic_layers.append(activation)
120 | for l in range(len(critic_hidden_dims)):
121 | if l == len(critic_hidden_dims) - 1:
122 | critic_layers.append(nn.Linear(critic_hidden_dims[l], 1))
123 | else:
124 | critic_layers.append(nn.Linear(critic_hidden_dims[l], critic_hidden_dims[l + 1]))
125 | critic_layers.append(activation)
126 | self.critic = nn.Sequential(*critic_layers)
127 |
128 | # Action noise
129 | if self.fix_action_std:
130 | # action_std = torch.tensor([0.5, 0.25, 0.25, 0.25, 0.2, 0.2] * 2 + [0.2, 0.2, 0.2] + [0.3] * 8)
131 | action_std = torch.tensor(
132 | [0.3, 0.3, 0.3, 0.4, 0.2] * 2 + [0.25, 0.25, 0.25] + [0.5] * 8
133 | )
134 | self.std = nn.Parameter(action_std, requires_grad=False)
135 | else:
136 | self.std = nn.Parameter(init_noise_std * torch.ones(num_actions))
137 | self.distribution = None
138 | # disable args validation for speedup
139 | Normal.set_default_validate_args = False
140 |
141 | @staticmethod
142 | # not used at the moment
143 | def init_weights(sequential, scales):
144 | [
145 | torch.nn.init.orthogonal_(module.weight, gain=scales[idx])
146 | for idx, module in enumerate(mod for mod in sequential if isinstance(mod, nn.Linear))
147 | ]
148 |
149 | def reset(self, dones=None):
150 | pass
151 |
152 | def forward(self):
153 | raise NotImplementedError
154 |
155 | @property
156 | def action_mean(self):
157 | return self.distribution.mean
158 |
159 | @property
160 | def action_std(self):
161 | return self.distribution.stddev
162 |
163 | @property
164 | def entropy(self):
165 | return self.distribution.entropy().sum(dim=-1)
166 |
167 | def update_distribution(self, observations):
168 | mean = self.actor(observations)
169 | # has_nan_mean = torch.isnan(mean).any().item()
170 | # if has_nan_mean:
171 | # print("mean has nan")
172 | # has_nan_obs = torch.isnan(observations).any().item()
173 | # if has_nan_obs:
174 | # print("has nan obs: ", has_nan_obs)
175 | # obs_array = observations.cpu().detach().numpy()
176 | # np.savetxt('nan_obs.txt', obs_array)
177 | # exit()
178 | self.distribution = Normal(mean, mean * 0.0 + self.std)
179 |
180 | def act(self, observations, **kwargs):
181 | self.update_distribution(observations)
182 | return self.distribution.sample()
183 |
184 | def get_actions_log_prob(self, actions):
185 | return self.distribution.log_prob(actions).sum(dim=-1)
186 |
187 | def act_inference(self, observations, eval=False, **kwargs):
188 | if not eval:
189 | actions_mean = self.actor(observations, eval)
190 | return actions_mean
191 | else:
192 | actions_mean = self.actor(observations, eval=True)
193 | return actions_mean
194 |
195 | def evaluate(self, critic_observations, **kwargs):
196 | value = self.critic(critic_observations)
197 | return value
198 |
199 | def reset_std(self, std, num_actions, device):
200 | new_std = std * torch.ones(num_actions, device=device)
201 | self.std.data = new_std.data
202 |
203 |
204 | def get_activation(act_name):
205 | if act_name == "elu":
206 | return nn.ELU()
207 | elif act_name == "selu":
208 | return nn.SELU()
209 | elif act_name == "relu":
210 | return nn.ReLU()
211 | elif act_name == "crelu":
212 | return nn.ReLU()
213 | elif act_name == "lrelu":
214 | return nn.LeakyReLU()
215 | elif act_name == "tanh":
216 | return nn.Tanh()
217 | elif act_name == "sigmoid":
218 | return nn.Sigmoid()
219 | else:
220 | print("invalid activation function!")
221 | return None
222 |
--------------------------------------------------------------------------------
/simulation/rsl_rl/rsl_rl/modules/actor_critic_recurrent.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: BSD-3-Clause
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 |
31 | import numpy as np
32 |
33 | import torch
34 | import torch.nn as nn
35 | from torch.distributions import Normal
36 | from torch.nn.modules import rnn
37 | from .actor_critic import ActorCritic, get_activation
38 | from rsl_rl.utils import unpad_trajectories
39 |
40 |
41 | class ActorCriticRecurrent(ActorCritic):
42 | is_recurrent = True
43 |
44 | def __init__(
45 | self,
46 | num_actor_obs,
47 | num_critic_obs,
48 | num_actions,
49 | actor_hidden_dims=[256, 256, 256],
50 | critic_hidden_dims=[256, 256, 256],
51 | activation="elu",
52 | rnn_type="lstm",
53 | rnn_hidden_size=256,
54 | rnn_num_layers=1,
55 | init_noise_std=1.0,
56 | **kwargs,
57 | ):
58 | if kwargs:
59 | print(
60 | "ActorCriticRecurrent.__init__ got unexpected arguments, which will be ignored: "
61 | + str(kwargs.keys()),
62 | )
63 |
64 | super().__init__(
65 | num_actor_obs=rnn_hidden_size,
66 | num_critic_obs=rnn_hidden_size,
67 | num_actions=num_actions,
68 | actor_hidden_dims=actor_hidden_dims,
69 | critic_hidden_dims=critic_hidden_dims,
70 | activation=activation,
71 | init_noise_std=init_noise_std,
72 | **kwargs,
73 | )
74 |
75 | activation = get_activation(activation)
76 |
77 | self.memory_a = Memory(
78 | num_actor_obs, type=rnn_type, num_layers=rnn_num_layers, hidden_size=rnn_hidden_size
79 | )
80 | self.memory_c = Memory(
81 | num_critic_obs, type=rnn_type, num_layers=rnn_num_layers, hidden_size=rnn_hidden_size
82 | )
83 |
84 | print(f"Actor RNN: {self.memory_a}")
85 | print(f"Critic RNN: {self.memory_c}")
86 |
87 | def reset(self, dones=None):
88 | self.memory_a.reset(dones)
89 | self.memory_c.reset(dones)
90 |
91 | def act(self, observations, masks=None, hidden_states=None):
92 | input_a = self.memory_a(observations, masks, hidden_states)
93 | return super().act(input_a.squeeze(0))
94 |
95 | def act_inference(self, observations, **kwargs):
96 | input_a = self.memory_a(observations, **kwargs)
97 | return super().act_inference(input_a.squeeze(0))
98 |
99 | def evaluate(self, critic_observations, masks=None, hidden_states=None):
100 | input_c = self.memory_c(critic_observations, masks, hidden_states)
101 | return super().evaluate(input_c.squeeze(0))
102 |
103 | def get_hidden_states(self):
104 | return self.memory_a.hidden_states, self.memory_c.hidden_states
105 |
106 |
107 | class Memory(torch.nn.Module):
108 | def __init__(self, input_size, type="lstm", num_layers=1, hidden_size=256):
109 | super().__init__()
110 | # RNN
111 | rnn_cls = nn.GRU if type.lower() == "gru" else nn.LSTM
112 | self.rnn = rnn_cls(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)
113 | self.hidden_states = None
114 |
115 | def forward(self, input, masks=None, hidden_states=None):
116 | batch_mode = masks is not None
117 | if batch_mode:
118 | # batch mode (policy update): need saved hidden states
119 | if hidden_states is None:
120 | raise ValueError("Hidden states not passed to memory module during policy update")
121 | out, _ = self.rnn(input, hidden_states)
122 | out = unpad_trajectories(out, masks)
123 | else:
124 | # inference mode (collection): use hidden states of last step
125 | out, self.hidden_states = self.rnn(input.unsqueeze(0), self.hidden_states)
126 | return out
127 |
128 | def reset(self, dones=None):
129 | # When the RNN is an LSTM, self.hidden_states_a is a list with hidden_state and cell_state
130 | for hidden_state in self.hidden_states:
131 | hidden_state[..., dones, :] = 0.0
132 |
--------------------------------------------------------------------------------
/simulation/rsl_rl/rsl_rl/runners/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: BSD-3-Clause
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 |
31 | from .on_policy_runner import OnPolicyRunner
32 |
--------------------------------------------------------------------------------
/simulation/rsl_rl/rsl_rl/runners/runner.py:
--------------------------------------------------------------------------------
1 | class Runner:
2 | def __init__(self):
3 | pass
4 |
5 | def get_inference_policy(self):
6 | pass
7 |
8 | def get_estimator_inference_policy(self):
9 | pass
10 |
--------------------------------------------------------------------------------
/simulation/rsl_rl/rsl_rl/storage/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION
2 | # SPDX-License-Identifier: BSD-3-Clause
3 |
4 | from .rollout_storage import RolloutStorage
5 | from .replay_buffer import ReplayBuffer
--------------------------------------------------------------------------------
/simulation/rsl_rl/rsl_rl/storage/replay_buffer.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 |
4 |
5 | class ReplayBuffer:
6 | """Fixed-size buffer to store experience tuples."""
7 |
8 | def __init__(self, obs_dim, buffer_size, device):
9 | """Initialize a ReplayBuffer object.
10 | Arguments:
11 | buffer_size (int): maximum size of buffer
12 | """
13 | self.amp_obs = torch.zeros(buffer_size, obs_dim).to(device)
14 | self.buffer_size = buffer_size
15 | self.device = device
16 |
17 | self.step = 0
18 | self.num_samples = 0
19 |
20 | def insert(self, amp_obs):
21 | """Add new states to memory."""
22 |
23 | num_obs = amp_obs.shape[0]
24 | start_idx = self.step
25 | end_idx = self.step + num_obs
26 | if end_idx > self.buffer_size:
27 | self.amp_obs[self.step:self.buffer_size] = amp_obs[:self.buffer_size - self.step]
28 | self.amp_obs[:end_idx - self.buffer_size] = amp_obs[self.buffer_size - self.step:] # put the rest at the beginning
29 | else:
30 | self.amp_obs[start_idx:end_idx] = amp_obs
31 |
32 | self.num_samples = min(self.buffer_size, max(end_idx, self.num_samples))
33 | self.step = (self.step + num_obs) % self.buffer_size
34 |
35 | def feed_forward_generator(self, num_mini_batch, mini_batch_size):
36 | for _ in range(num_mini_batch):
37 | sample_idxs = np.random.choice(self.num_samples, size=mini_batch_size)
38 | yield self.amp_obs[sample_idxs].to(self.device)
39 |
--------------------------------------------------------------------------------
/simulation/rsl_rl/rsl_rl/storage/rollout_storage.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: BSD-3-Clause
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 |
31 | import torch
32 | import numpy as np
33 |
34 | from rsl_rl.utils import split_and_pad_trajectories
35 |
36 | class RolloutStorage:
37 | class Transition:
38 | def __init__(self):
39 | self.observations = None
40 | self.critic_observations = None
41 | self.actions = None
42 | self.rewards = None
43 | self.dones = None
44 | self.values = None
45 | self.actions_log_prob = None
46 | self.action_mean = None
47 | self.action_sigma = None
48 | self.hidden_states = None
49 | def clear(self):
50 | self.__init__()
51 |
52 | def __init__(self, num_envs, num_transitions_per_env, obs_shape, privileged_obs_shape, actions_shape, device='cpu'):
53 |
54 | self.device = device
55 |
56 | self.obs_shape = obs_shape
57 | self.privileged_obs_shape = privileged_obs_shape
58 | self.actions_shape = actions_shape
59 |
60 | # Core
61 | self.observations = torch.zeros(num_transitions_per_env, num_envs, *obs_shape, device=self.device)
62 |
63 | if privileged_obs_shape[0] is not None:
64 | self.privileged_observations = torch.zeros(num_transitions_per_env, num_envs, *privileged_obs_shape, device=self.device)
65 | else:
66 | self.privileged_observations = None
67 | self.rewards = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
68 | self.actions = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device)
69 | self.dones = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device).byte()
70 |
71 | # For PPO
72 | self.actions_log_prob = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
73 | self.values = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
74 | self.returns = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
75 | self.advantages = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
76 | self.mu = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device)
77 | self.sigma = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device)
78 |
79 | self.num_transitions_per_env = num_transitions_per_env
80 | self.num_envs = num_envs
81 |
82 | # rnn
83 | self.saved_hidden_states_a = None
84 | self.saved_hidden_states_c = None
85 |
86 | self.step = 0
87 |
88 | def add_transitions(self, transition: Transition):
89 | if self.step >= self.num_transitions_per_env:
90 | raise AssertionError("Rollout buffer overflow")
91 | self.observations[self.step].copy_(transition.observations)
92 | if self.privileged_observations is not None: self.privileged_observations[self.step].copy_(transition.critic_observations)
93 | self.actions[self.step].copy_(transition.actions)
94 | self.rewards[self.step].copy_(transition.rewards.view(-1, 1))
95 | self.dones[self.step].copy_(transition.dones.view(-1, 1))
96 | self.values[self.step].copy_(transition.values)
97 | self.actions_log_prob[self.step].copy_(transition.actions_log_prob.view(-1, 1))
98 | self.mu[self.step].copy_(transition.action_mean)
99 | self.sigma[self.step].copy_(transition.action_sigma)
100 |
101 | self._save_hidden_states(transition.hidden_states)
102 | self.step += 1
103 |
104 | def _save_hidden_states(self, hidden_states):
105 | if hidden_states is None or hidden_states==(None, None):
106 | return
107 | # make a tuple out of GRU hidden state sto match the LSTM format
108 | hid_a = hidden_states[0] if isinstance(hidden_states[0], tuple) else (hidden_states[0],)
109 | hid_c = hidden_states[1] if isinstance(hidden_states[1], tuple) else (hidden_states[1],)
110 |
111 | # initialize if needed
112 | if self.saved_hidden_states_a is None:
113 | self.saved_hidden_states_a = [torch.zeros(self.observations.shape[0], *hid_a[i].shape, device=self.device) for i in range(len(hid_a))]
114 | self.saved_hidden_states_c = [torch.zeros(self.observations.shape[0], *hid_c[i].shape, device=self.device) for i in range(len(hid_c))]
115 | # copy the states
116 | for i in range(len(hid_a)):
117 | self.saved_hidden_states_a[i][self.step].copy_(hid_a[i])
118 | self.saved_hidden_states_c[i][self.step].copy_(hid_c[i])
119 |
120 |
121 | def clear(self):
122 | self.step = 0
123 |
124 | def compute_returns(self, last_values, gamma, lam):
125 | advantage = 0
126 | for step in reversed(range(self.num_transitions_per_env)):
127 | if step == self.num_transitions_per_env - 1:
128 | next_values = last_values
129 | else:
130 | next_values = self.values[step + 1]
131 | next_is_not_terminal = 1.0 - self.dones[step].float()
132 | delta = self.rewards[step] + next_is_not_terminal * gamma * next_values - self.values[step]
133 | advantage = delta + next_is_not_terminal * gamma * lam * advantage # nan 2237
134 | self.returns[step] = advantage + self.values[step]
135 |
136 | # Compute and normalize the advantages
137 | if torch.isnan(self.returns).any():
138 | from loguru import logger
139 | logger.error(f"RolloutStorage.compute_returns: returns contains NaNs")
140 | import ipdb; ipdb.set_trace()
141 | self.advantages = self.returns - self.values
142 | self.advantages = (self.advantages - self.advantages.mean()) / (self.advantages.std() + 1e-8)
143 |
144 | def get_statistics(self):
145 | done = self.dones
146 | done[-1] = 1
147 | flat_dones = done.permute(1, 0, 2).reshape(-1, 1)
148 | done_indices = torch.cat((flat_dones.new_tensor([-1], dtype=torch.int64), flat_dones.nonzero(as_tuple=False)[:, 0]))
149 | trajectory_lengths = (done_indices[1:] - done_indices[:-1])
150 | return trajectory_lengths.float().mean(), self.rewards.mean()
151 |
152 | def mini_batch_generator(self, num_mini_batches, num_epochs=8):
153 | batch_size = self.num_envs * self.num_transitions_per_env
154 | mini_batch_size = batch_size // num_mini_batches
155 | indices = torch.randperm(num_mini_batches*mini_batch_size, requires_grad=False, device=self.device)
156 |
157 | observations = self.observations.flatten(0, 1)
158 |
159 | if self.privileged_observations is not None:
160 | critic_observations = self.privileged_observations.flatten(0, 1)
161 | else:
162 | critic_observations = observations
163 |
164 | actions = self.actions.flatten(0, 1)
165 | values = self.values.flatten(0, 1)
166 | returns = self.returns.flatten(0, 1)
167 | old_actions_log_prob = self.actions_log_prob.flatten(0, 1)
168 | advantages = self.advantages.flatten(0, 1)
169 | old_mu = self.mu.flatten(0, 1)
170 | old_sigma = self.sigma.flatten(0, 1)
171 |
172 | for epoch in range(num_epochs):
173 | for i in range(num_mini_batches):
174 |
175 | start = i*mini_batch_size
176 | end = (i+1)*mini_batch_size
177 | batch_idx = indices[start:end]
178 |
179 | obs_batch = observations[batch_idx]
180 | critic_observations_batch = critic_observations[batch_idx]
181 | actions_batch = actions[batch_idx]
182 | target_values_batch = values[batch_idx]
183 | returns_batch = returns[batch_idx]
184 | old_actions_log_prob_batch = old_actions_log_prob[batch_idx]
185 | advantages_batch = advantages[batch_idx]
186 | old_mu_batch = old_mu[batch_idx]
187 | old_sigma_batch = old_sigma[batch_idx]
188 |
189 |
190 | yield obs_batch, critic_observations_batch, actions_batch, target_values_batch, advantages_batch, returns_batch, \
191 | old_actions_log_prob_batch, old_mu_batch, old_sigma_batch, (None, None), None
192 |
193 | # for RNNs only
194 | def reccurent_mini_batch_generator(self, num_mini_batches, num_epochs=8):
195 |
196 | padded_obs_trajectories, trajectory_masks = split_and_pad_trajectories(self.observations, self.dones)
197 | if self.privileged_observations is not None:
198 | padded_critic_obs_trajectories, _ = split_and_pad_trajectories(self.privileged_observations, self.dones)
199 | else:
200 | padded_critic_obs_trajectories = padded_obs_trajectories
201 |
202 | mini_batch_size = self.num_envs // num_mini_batches
203 | for ep in range(num_epochs):
204 | first_traj = 0
205 | for i in range(num_mini_batches):
206 | start = i*mini_batch_size
207 | stop = (i+1)*mini_batch_size
208 |
209 | dones = self.dones.squeeze(-1)
210 | last_was_done = torch.zeros_like(dones, dtype=torch.bool)
211 | last_was_done[1:] = dones[:-1]
212 | last_was_done[0] = True
213 | trajectories_batch_size = torch.sum(last_was_done[:, start:stop])
214 | last_traj = first_traj + trajectories_batch_size
215 |
216 | masks_batch = trajectory_masks[:, first_traj:last_traj]
217 | obs_batch = padded_obs_trajectories[:, first_traj:last_traj]
218 | critic_obs_batch = padded_critic_obs_trajectories[:, first_traj:last_traj]
219 |
220 | actions_batch = self.actions[:, start:stop]
221 | old_mu_batch = self.mu[:, start:stop]
222 | old_sigma_batch = self.sigma[:, start:stop]
223 | returns_batch = self.returns[:, start:stop]
224 | advantages_batch = self.advantages[:, start:stop]
225 | values_batch = self.values[:, start:stop]
226 | old_actions_log_prob_batch = self.actions_log_prob[:, start:stop]
227 |
228 | # reshape to [num_envs, time, num layers, hidden dim] (original shape: [time, num_layers, num_envs, hidden_dim])
229 | # then take only time steps after dones (flattens num envs and time dimensions),
230 | # take a batch of trajectories and finally reshape back to [num_layers, batch, hidden_dim]
231 | last_was_done = last_was_done.permute(1, 0)
232 | hid_a_batch = [ saved_hidden_states.permute(2, 0, 1, 3)[last_was_done][first_traj:last_traj].transpose(1, 0).contiguous()
233 | for saved_hidden_states in self.saved_hidden_states_a ]
234 | hid_c_batch = [ saved_hidden_states.permute(2, 0, 1, 3)[last_was_done][first_traj:last_traj].transpose(1, 0).contiguous()
235 | for saved_hidden_states in self.saved_hidden_states_c ]
236 | # remove the tuple for GRU
237 | hid_a_batch = hid_a_batch[0] if len(hid_a_batch)==1 else hid_a_batch
238 | hid_c_batch = hid_c_batch[0] if len(hid_c_batch)==1 else hid_a_batch
239 |
240 | yield obs_batch, critic_obs_batch, actions_batch, values_batch, advantages_batch, returns_batch, \
241 | old_actions_log_prob_batch, old_mu_batch, old_sigma_batch, (hid_a_batch, hid_c_batch), masks_batch
242 |
243 | first_traj = last_traj
--------------------------------------------------------------------------------
/simulation/rsl_rl/rsl_rl/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: BSD-3-Clause
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 |
31 | from .utils import split_and_pad_trajectories, unpad_trajectories
--------------------------------------------------------------------------------
/simulation/rsl_rl/rsl_rl/utils/init.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 |
3 |
4 | def weight_init(m):
5 | """Custom weight initialization for TD-MPC2."""
6 | if isinstance(m, nn.Linear):
7 | nn.init.trunc_normal_(m.weight, std=0.02)
8 | if m.bias is not None:
9 | nn.init.constant_(m.bias, 0)
10 | elif isinstance(m, nn.Embedding):
11 | nn.init.uniform_(m.weight, -0.02, 0.02)
12 | elif isinstance(m, nn.ParameterList):
13 | for i,p in enumerate(m):
14 | if p.dim() == 3: # Linear
15 | nn.init.trunc_normal_(p, std=0.02) # Weight
16 | nn.init.constant_(m[i+1], 0) # Bias
17 |
18 |
19 | def zero_(params):
20 | """Initialize parameters to zero."""
21 | for p in params:
22 | p.data.fill_(0)
23 |
--------------------------------------------------------------------------------
/simulation/rsl_rl/rsl_rl/utils/layers.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from functorch import combine_state_for_ensemble
5 |
6 |
7 | class Ensemble(nn.Module):
8 | """
9 | Vectorized ensemble of modules.
10 | """
11 |
12 | def __init__(self, modules, **kwargs):
13 | super().__init__()
14 | modules = nn.ModuleList(modules)
15 | fn, params, _ = combine_state_for_ensemble(modules)
16 | self.vmap = torch.vmap(fn, in_dims=(0, 0, None), randomness='different', **kwargs)
17 | self.params = nn.ParameterList([nn.Parameter(p) for p in params])
18 | self._repr = str(modules)
19 |
20 | def forward(self, *args, **kwargs):
21 | return self.vmap([p for p in self.params], (), *args, **kwargs)
22 |
23 | def __repr__(self):
24 | return 'Vectorized ' + self._repr
25 |
26 |
27 | class ShiftAug(nn.Module):
28 | """
29 | Random shift image augmentation.
30 | Adapted from https://github.com/facebookresearch/drqv2
31 | """
32 | def __init__(self, pad=3):
33 | super().__init__()
34 | self.pad = pad
35 |
36 | def forward(self, x):
37 | x = x.float()
38 | n, _, h, w = x.size()
39 | assert h == w
40 | padding = tuple([self.pad] * 4)
41 | x = F.pad(x, padding, 'replicate')
42 | eps = 1.0 / (h + 2 * self.pad)
43 | arange = torch.linspace(-1.0 + eps, 1.0 - eps, h + 2 * self.pad, device=x.device, dtype=x.dtype)[:h]
44 | arange = arange.unsqueeze(0).repeat(h, 1).unsqueeze(2)
45 | base_grid = torch.cat([arange, arange.transpose(1, 0)], dim=2)
46 | base_grid = base_grid.unsqueeze(0).repeat(n, 1, 1, 1)
47 | shift = torch.randint(0, 2 * self.pad + 1, size=(n, 1, 1, 2), device=x.device, dtype=x.dtype)
48 | shift *= 2.0 / (h + 2 * self.pad)
49 | grid = base_grid + shift
50 | return F.grid_sample(x, grid, padding_mode='zeros', align_corners=False)
51 |
52 |
53 | class PixelPreprocess(nn.Module):
54 | """
55 | Normalizes pixel observations to [-0.5, 0.5].
56 | """
57 |
58 | def __init__(self):
59 | super().__init__()
60 |
61 | def forward(self, x):
62 | return x.div_(255.).sub_(0.5)
63 |
64 |
65 | class SimNorm(nn.Module):
66 | """
67 | Simplicial normalization.
68 | Adapted from https://arxiv.org/abs/2204.00616.
69 | """
70 |
71 | def __init__(self, cfg):
72 | super().__init__()
73 | self.dim = cfg.simnorm_dim
74 |
75 | def forward(self, x):
76 | shp = x.shape
77 | x = x.view(*shp[:-1], -1, self.dim)
78 | x = F.softmax(x, dim=-1)
79 | return x.view(*shp)
80 |
81 | def __repr__(self):
82 | return f"SimNorm(dim={self.dim})"
83 |
84 |
85 | class NormedLinear(nn.Linear):
86 | """
87 | Linear layer with LayerNorm, activation, and optionally dropout.
88 | """
89 |
90 | def __init__(self, *args, dropout=0., act=nn.Mish(inplace=True), **kwargs):
91 | super().__init__(*args, **kwargs)
92 | self.ln = nn.LayerNorm(self.out_features)
93 | self.act = act
94 | self.dropout = nn.Dropout(dropout, inplace=True) if dropout else None
95 |
96 | def forward(self, x):
97 | x = super().forward(x)
98 | if self.dropout:
99 | x = self.dropout(x)
100 | return self.act(self.ln(x))
101 |
102 | def __repr__(self):
103 | repr_dropout = f", dropout={self.dropout.p}" if self.dropout else ""
104 | return f"NormedLinear(in_features={self.in_features}, "\
105 | f"out_features={self.out_features}, "\
106 | f"bias={self.bias is not None}{repr_dropout}, "\
107 | f"act={self.act.__class__.__name__})"
108 |
109 |
110 | def mlp(in_dim, mlp_dims, out_dim, act=None, dropout=0.):
111 | """
112 | Basic building block of TD-MPC2.
113 | MLP with LayerNorm, Mish activations, and optionally dropout.
114 | """
115 | if isinstance(mlp_dims, int):
116 | mlp_dims = [mlp_dims]
117 | dims = [in_dim] + mlp_dims + [out_dim]
118 | mlp = nn.ModuleList()
119 | for i in range(len(dims) - 2):
120 | mlp.append(NormedLinear(dims[i], dims[i+1], dropout=dropout*(i==0)))
121 | mlp.append(NormedLinear(dims[-2], dims[-1], act=act) if act else nn.Linear(dims[-2], dims[-1]))
122 | return nn.Sequential(*mlp)
123 |
124 |
125 | def conv(in_shape, num_channels, act=None):
126 | """
127 | Basic convolutional encoder for TD-MPC2 with raw image observations.
128 | 4 layers of convolution with ReLU activations, followed by a linear layer.
129 | """
130 | assert in_shape[-1] == 64 # assumes rgb observations to be 64x64
131 | layers = [
132 | ShiftAug(), PixelPreprocess(),
133 | nn.Conv2d(in_shape[0], num_channels, 7, stride=2), nn.ReLU(inplace=True),
134 | nn.Conv2d(num_channels, num_channels, 5, stride=2), nn.ReLU(inplace=True),
135 | nn.Conv2d(num_channels, num_channels, 3, stride=2), nn.ReLU(inplace=True),
136 | nn.Conv2d(num_channels, num_channels, 3, stride=1), nn.Flatten()]
137 | if act:
138 | layers.append(act)
139 | return nn.Sequential(*layers)
140 |
141 |
142 | def enc(cfg, out={}):
143 | """
144 | Returns a dictionary of encoders for each observation in the dict.
145 | """
146 | for k in cfg.obs_shape.keys():
147 | if k == 'state':
148 | out[k] = mlp(cfg.obs_shape[k][0] + cfg.task_dim, max(cfg.num_enc_layers-1, 1)*[cfg.enc_dim], cfg.latent_dim, act=SimNorm(cfg))
149 | elif k == 'rgb':
150 | out[k] = conv(cfg.obs_shape[k], cfg.num_channels, act=SimNorm(cfg))
151 | else:
152 | raise NotImplementedError(f"Encoder for observation type {k} not implemented.")
153 | return nn.ModuleDict(out)
154 |
--------------------------------------------------------------------------------
/simulation/rsl_rl/rsl_rl/utils/math.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn.functional as F
3 |
4 |
5 | def soft_ce(pred, target, cfg):
6 | """Computes the cross entropy loss between predictions and soft targets."""
7 | pred = F.log_softmax(pred, dim=-1)
8 | target = two_hot(target, cfg)
9 | return -(target * pred).sum(-1, keepdim=True)
10 |
11 |
12 | @torch.jit.script
13 | def log_std(x, low, dif):
14 | return low + 0.5 * dif * (torch.tanh(x) + 1)
15 |
16 |
17 | @torch.jit.script
18 | def _gaussian_residual(eps, log_std):
19 | return -0.5 * eps.pow(2) - log_std
20 |
21 |
22 | @torch.jit.script
23 | def _gaussian_logprob(residual):
24 | return residual - 0.5 * torch.log(2 * torch.pi)
25 |
26 |
27 | def gaussian_logprob(eps, log_std, size=None):
28 | """Compute Gaussian log probability."""
29 | residual = _gaussian_residual(eps, log_std).sum(-1, keepdim=True)
30 | if size is None:
31 | size = eps.size(-1)
32 | return _gaussian_logprob(residual) * size
33 |
34 |
35 | @torch.jit.script
36 | def _squash(pi):
37 | return torch.log(F.relu(1 - pi.pow(2)) + 1e-6)
38 |
39 |
40 | def squash(mu, pi, log_pi):
41 | """Apply squashing function."""
42 | mu = torch.tanh(mu)
43 | pi = torch.tanh(pi)
44 | log_pi -= _squash(pi).sum(-1, keepdim=True)
45 | return mu, pi, log_pi
46 |
47 |
48 | @torch.jit.script
49 | def symlog(x):
50 | """
51 | Symmetric logarithmic function.
52 | Adapted from https://github.com/danijar/dreamerv3.
53 | """
54 | return torch.sign(x) * torch.log(1 + torch.abs(x))
55 |
56 |
57 | @torch.jit.script
58 | def symexp(x):
59 | """
60 | Symmetric exponential function.
61 | Adapted from https://github.com/danijar/dreamerv3.
62 | """
63 | return torch.sign(x) * (torch.exp(torch.abs(x)) - 1)
64 |
65 |
66 | def two_hot(x, cfg):
67 | """Converts a batch of scalars to soft two-hot encoded targets for discrete regression."""
68 | if cfg.num_bins == 0:
69 | return x
70 | elif cfg.num_bins == 1:
71 | return symlog(x)
72 | x = torch.clamp(symlog(x), cfg.vmin, cfg.vmax).squeeze(1)
73 | bin_idx = torch.floor((x - cfg.vmin) / cfg.bin_size).long()
74 | bin_offset = ((x - cfg.vmin) / cfg.bin_size - bin_idx.float()).unsqueeze(-1)
75 | soft_two_hot = torch.zeros(x.size(0), cfg.num_bins, device=x.device)
76 | soft_two_hot.scatter_(1, bin_idx.unsqueeze(1), 1 - bin_offset)
77 | soft_two_hot.scatter_(1, (bin_idx.unsqueeze(1) + 1) % cfg.num_bins, bin_offset)
78 | return soft_two_hot
79 |
80 |
81 | DREG_BINS = None
82 |
83 |
84 | def two_hot_inv(x, cfg):
85 | """Converts a batch of soft two-hot encoded vectors to scalars."""
86 | global DREG_BINS
87 | if cfg.num_bins == 0:
88 | return x
89 | elif cfg.num_bins == 1:
90 | return symexp(x)
91 | if DREG_BINS is None:
92 | DREG_BINS = torch.linspace(cfg.vmin, cfg.vmax, cfg.num_bins, device=x.device)
93 | x = F.softmax(x, dim=-1)
94 | x = torch.sum(x * DREG_BINS, dim=-1, keepdim=True)
95 | return symexp(x)
96 |
--------------------------------------------------------------------------------
/simulation/rsl_rl/rsl_rl/utils/running_mean_std.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import numpy as np
4 | '''
5 | updates statistic from a full data
6 | '''
7 | class RunningMeanStd(nn.Module):
8 | def __init__(self, insize, epsilon=1e-05, per_channel=False, norm_only=False):
9 | super(RunningMeanStd, self).__init__()
10 | print('RunningMeanStd: ', insize)
11 | self.insize = insize
12 | self.epsilon = epsilon
13 |
14 | self.norm_only = norm_only
15 | self.per_channel = per_channel
16 | if per_channel:
17 | if len(self.insize) == 3:
18 | self.axis = [0,2,3]
19 | if len(self.insize) == 2:
20 | self.axis = [0,2]
21 | if len(self.insize) == 1:
22 | self.axis = [0]
23 | in_size = self.insize[0]
24 | else:
25 | self.axis = [0]
26 | in_size = insize
27 |
28 | self.register_buffer("running_mean", torch.zeros(in_size, dtype = torch.float64))
29 | self.register_buffer("running_var", torch.ones(in_size, dtype = torch.float64))
30 | self.register_buffer("count", torch.ones((), dtype = torch.float64))
31 |
32 | def _update_mean_var_count_from_moments(self, mean, var, count, batch_mean, batch_var, batch_count):
33 | delta = batch_mean - mean
34 | tot_count = count + batch_count
35 |
36 | new_mean = mean + delta * batch_count / tot_count
37 | m_a = var * count
38 | m_b = batch_var * batch_count
39 | M2 = m_a + m_b + delta**2 * count * batch_count / tot_count
40 | new_var = M2 / tot_count
41 | new_count = tot_count
42 | return new_mean, new_var, new_count
43 |
44 | def forward(self, input, unnorm=False):
45 | if self.training:
46 | mean = input.mean(self.axis) # along channel axis
47 | var = input.var(self.axis)
48 | self.running_mean, self.running_var, self.count = self._update_mean_var_count_from_moments(self.running_mean, self.running_var, self.count,
49 | mean, var, input.size()[0] )
50 |
51 | # change shape
52 | if self.per_channel:
53 | if len(self.insize) == 3:
54 | current_mean = self.running_mean.view([1, self.insize[0], 1, 1]).expand_as(input)
55 | current_var = self.running_var.view([1, self.insize[0], 1, 1]).expand_as(input)
56 | if len(self.insize) == 2:
57 | current_mean = self.running_mean.view([1, self.insize[0], 1]).expand_as(input)
58 | current_var = self.running_var.view([1, self.insize[0], 1]).expand_as(input)
59 | if len(self.insize) == 1:
60 | current_mean = self.running_mean.view([1, self.insize[0]]).expand_as(input)
61 | current_var = self.running_var.view([1, self.insize[0]]).expand_as(input)
62 | else:
63 | current_mean = self.running_mean
64 | current_var = self.running_var
65 | # get output
66 |
67 |
68 | if unnorm:
69 | y = torch.clamp(input, min=-5.0, max=5.0)
70 | y = torch.sqrt(current_var.float() + self.epsilon)*y + current_mean.float()
71 | else:
72 | if self.norm_only:
73 | y = input/ torch.sqrt(current_var.float() + self.epsilon)
74 | else:
75 | y = (input - current_mean.float()) / torch.sqrt(current_var.float() + self.epsilon)
76 | y = torch.clamp(y, min=-5.0, max=5.0)
77 | return y
78 |
79 | class RunningMeanStdObs(nn.Module):
80 | def __init__(self, insize, epsilon=1e-05, per_channel=False, norm_only=False):
81 | assert(insize is dict)
82 | super(RunningMeanStdObs, self).__init__()
83 | self.running_mean_std = nn.ModuleDict({
84 | k : RunningMeanStd(v, epsilon, per_channel, norm_only) for k,v in insize.items()
85 | })
86 |
87 | def forward(self, input, unnorm=False):
88 | res = {k : self.running_mean_std(v, unnorm) for k,v in input.items()}
89 | return res
--------------------------------------------------------------------------------
/simulation/rsl_rl/rsl_rl/utils/scale.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | class RunningScale:
5 | """Running trimmed scale estimator."""
6 |
7 | def __init__(self, cfg):
8 | self.cfg = cfg
9 | self._value = torch.ones(1, dtype=torch.float32, device=torch.device('cuda'))
10 | self._percentiles = torch.tensor([5, 95], dtype=torch.float32, device=torch.device('cuda'))
11 |
12 | def state_dict(self):
13 | return dict(value=self._value, percentiles=self._percentiles)
14 |
15 | def load_state_dict(self, state_dict):
16 | self._value.data.copy_(state_dict['value'])
17 | self._percentiles.data.copy_(state_dict['percentiles'])
18 |
19 | @property
20 | def value(self):
21 | return self._value.cpu().item()
22 |
23 | def _percentile(self, x):
24 | x_dtype, x_shape = x.dtype, x.shape
25 | x = x.view(x.shape[0], -1)
26 | in_sorted, _ = torch.sort(x, dim=0)
27 | positions = self._percentiles * (x.shape[0]-1) / 100
28 | floored = torch.floor(positions)
29 | ceiled = floored + 1
30 | ceiled[ceiled > x.shape[0] - 1] = x.shape[0] - 1
31 | weight_ceiled = positions-floored
32 | weight_floored = 1.0 - weight_ceiled
33 | d0 = in_sorted[floored.long(), :] * weight_floored[:, None]
34 | d1 = in_sorted[ceiled.long(), :] * weight_ceiled[:, None]
35 | return (d0+d1).view(-1, *x_shape[1:]).type(x_dtype)
36 |
37 | def update(self, x):
38 | percentiles = self._percentile(x.detach())
39 | value = torch.clamp(percentiles[1] - percentiles[0], min=1.)
40 | self._value.data.lerp_(value, self.cfg.tau)
41 |
42 | def __call__(self, x, update=False):
43 | if update:
44 | self.update(x)
45 | return x * (1/self.value)
46 |
47 | def __repr__(self):
48 | return f'RunningScale(S: {self.value})'
49 |
--------------------------------------------------------------------------------
/simulation/rsl_rl/rsl_rl/utils/utils.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: BSD-3-Clause
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 |
31 | import torch
32 | import numpy as np
33 | from typing import Tuple
34 |
35 |
36 | def split_and_pad_trajectories(tensor, dones):
37 | """ Splits trajectories at done indices. Then concatenates them and padds with zeros up to the length og the longest trajectory.
38 | Returns masks corresponding to valid parts of the trajectories
39 | Example:
40 | Input: [ [a1, a2, a3, a4 | a5, a6],
41 | [b1, b2 | b3, b4, b5 | b6]
42 | ]
43 |
44 | Output:[ [a1, a2, a3, a4], | [ [True, True, True, True],
45 | [a5, a6, 0, 0], | [True, True, False, False],
46 | [b1, b2, 0, 0], | [True, True, False, False],
47 | [b3, b4, b5, 0], | [True, True, True, False],
48 | [b6, 0, 0, 0] | [True, False, False, False],
49 | ] | ]
50 |
51 | Assumes that the inputy has the following dimension order: [time, number of envs, aditional dimensions]
52 | """
53 | dones = dones.clone()
54 | dones[-1] = 1
55 | # Permute the buffers to have order (num_envs, num_transitions_per_env, ...), for correct reshaping
56 | flat_dones = dones.transpose(1, 0).reshape(-1, 1)
57 |
58 | # Get length of trajectory by counting the number of successive not done elements
59 | done_indices = torch.cat((flat_dones.new_tensor([-1], dtype=torch.int64), flat_dones.nonzero()[:, 0]))
60 | trajectory_lengths = done_indices[1:] - done_indices[:-1]
61 | trajectory_lengths_list = trajectory_lengths.tolist()
62 | # Extract the individual trajectories
63 | trajectories = torch.split(tensor.transpose(1, 0).flatten(0, 1),trajectory_lengths_list)
64 | padded_trajectories = torch.nn.utils.rnn.pad_sequence(trajectories)
65 |
66 |
67 | trajectory_masks = trajectory_lengths > torch.arange(0, tensor.shape[0], device=tensor.device).unsqueeze(1)
68 | return padded_trajectories, trajectory_masks
69 |
70 | def unpad_trajectories(trajectories, masks):
71 | """ Does the inverse operation of split_and_pad_trajectories()
72 | """
73 | # Need to transpose before and after the masking to have proper reshaping
74 | return trajectories.transpose(1, 0)[masks.transpose(1, 0)].view(-1, trajectories.shape[0], trajectories.shape[-1]).transpose(1, 0)
75 |
76 |
77 | class RunningMeanStd(object):
78 | def __init__(self, epsilon: float = 1e-4, shape: Tuple[int, ...] = ()):
79 | """
80 | Calulates the running mean and std of a data stream
81 | https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
82 | :param epsilon: helps with arithmetic issues
83 | :param shape: the shape of the data stream's output
84 | """
85 | self.mean = np.zeros(shape, np.float64)
86 | self.var = np.ones(shape, np.float64)
87 | self.count = epsilon
88 |
89 | def update(self, arr: np.ndarray) -> None:
90 | batch_mean = np.mean(arr, axis=0)
91 | batch_var = np.var(arr, axis=0)
92 | batch_count = arr.shape[0]
93 | self.update_from_moments(batch_mean, batch_var, batch_count)
94 |
95 | def update_from_moments(self, batch_mean: np.ndarray, batch_var: np.ndarray, batch_count: int) -> None:
96 | delta = batch_mean - self.mean
97 | tot_count = self.count + batch_count
98 |
99 | new_mean = self.mean + delta * batch_count / tot_count
100 | m_a = self.var * self.count
101 | m_b = batch_var * batch_count
102 | m_2 = m_a + m_b + np.square(delta) * self.count * batch_count / (self.count + batch_count)
103 | new_var = m_2 / (self.count + batch_count)
104 |
105 | new_count = batch_count + self.count
106 |
107 | self.mean = new_mean
108 | self.var = new_var
109 | self.count = new_count
110 |
111 |
112 | class Normalizer(RunningMeanStd):
113 | def __init__(self, input_dim, epsilon=1e-4, clip_obs=10.0):
114 | super().__init__(shape=input_dim)
115 | self.epsilon = epsilon
116 | self.clip_obs = clip_obs
117 |
118 | def normalize(self, input):
119 | return np.clip(
120 | (input - self.mean) / np.sqrt(self.var + self.epsilon),
121 | -self.clip_obs, self.clip_obs)
122 |
123 | def normalize_torch(self, input, device):
124 | mean_torch = torch.tensor(
125 | self.mean, device=device, dtype=torch.float32)
126 | std_torch = torch.sqrt(torch.tensor(
127 | self.var + self.epsilon, device=device, dtype=torch.float32))
128 | return torch.clamp(
129 | (input - mean_torch) / std_torch, -self.clip_obs, self.clip_obs)
130 |
131 | def update_normalizer(self, rollouts, expert_loader):
132 | policy_data_generator = rollouts.feed_forward_generator_amp(
133 | None, mini_batch_size=expert_loader.batch_size)
134 | expert_data_generator = expert_loader.dataset.feed_forward_generator_amp(
135 | expert_loader.batch_size)
136 |
137 | for expert_batch, policy_batch in zip(expert_data_generator, policy_data_generator):
138 | self.update(
139 | torch.vstack(tuple(policy_batch) + tuple(expert_batch)).cpu().numpy())
--------------------------------------------------------------------------------
/simulation/rsl_rl/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 |
3 | setup(name='rsl_rl',
4 | version='1.0.2',
5 | author='Nikita Rudin',
6 | author_email='rudinn@ethz.ch',
7 | license="BSD-3-Clause",
8 | packages=find_packages(),
9 | description='Fast and simple RL algorithms implemented in pytorch',
10 | python_requires='>=3.6',
11 | install_requires=[
12 | "torch>=1.4.0",
13 | "torchvision>=0.5.0",
14 | "numpy>=1.16.4"
15 | ],
16 | )
17 |
--------------------------------------------------------------------------------