├── .gitignore ├── .vscode └── launch.json ├── LICENSE ├── LIPM ├── LIPM_3D.py ├── demo_LIPM_3D_vt.py └── demo_LIPM_3D_vt_analysis.py ├── README.md ├── gym ├── __init__.py ├── envs │ ├── __init__.py │ ├── base │ │ ├── base_config.py │ │ ├── base_task.py │ │ ├── fixed_robot.py │ │ ├── fixed_robot_config.py │ │ ├── legged_robot.py │ │ └── legged_robot_config.py │ ├── cartpole │ │ ├── cartpole.py │ │ └── cartpole_config.py │ ├── humanoid │ │ ├── humanoid_controller.py │ │ ├── humanoid_controller_config.py │ │ ├── humanoid_utils.py │ │ ├── humanoid_vanilla.py │ │ ├── humanoid_vanilla_config.py │ │ └── jacobian.py │ └── pendulum │ │ ├── pendulum.py │ │ └── pendulum_config.py ├── scripts │ ├── play.py │ ├── plotting │ │ ├── __init__.py │ │ ├── live_plotting.py │ │ └── npz_plotter.py │ ├── sweep.py │ ├── sweep_configs │ │ ├── sweep_config_cartpole.json │ │ ├── sweep_config_example.json │ │ ├── sweep_config_humanoid_controller.json │ │ ├── sweep_config_pendulum.json │ │ └── sweep_config_test_sweeps.json │ ├── test.py │ └── train.py ├── tests │ └── test_env.py └── utils │ ├── __init__.py │ ├── augmentor.py │ ├── gait_scheduler.py │ ├── helpers.py │ ├── interfaces │ ├── GamepadInterface.py │ ├── KeyboardInterface.py │ └── interfaces.py │ ├── logger.py │ ├── logging_and_saving │ ├── AnalysisRecorder.py │ ├── VisualizationRecorder.py │ ├── __init__.py │ ├── local_code_save_helper.py │ └── wandb_singleton.py │ ├── math.py │ ├── task_registry.py │ ├── terrain.py │ └── wandb_helper.py ├── learning ├── __init__.py ├── algorithms │ ├── __init__.py │ └── ppo.py ├── env │ ├── __init__.py │ └── vec_env.py ├── modules │ ├── __init__.py │ ├── actor.py │ ├── actor_critic.py │ ├── critic.py │ └── utils │ │ ├── __init__.py │ │ ├── neural_net.py │ │ └── running_mean_std.py ├── runners │ ├── __init__.py │ └── on_policy_runner.py ├── storage │ ├── __init__.py │ ├── base_storage.py │ └── rollout_storage.py └── utils │ ├── __init__.py │ ├── logging.py │ └── utils.py ├── requirements.txt ├── resources └── robots │ ├── cartpole │ └── urdf │ │ └── cartpole.urdf │ ├── humanoid │ ├── trajectories │ │ ├── SH_standing_roll_2021_11_1_OUTPUT_1.csv │ │ ├── SH_standing_roll_2021_11_1_OUTPUT_2.csv │ │ ├── SH_walking_roll_2021_11_1_OUTPUT.csv │ │ ├── across_back10 │ │ │ ├── JSM_across_back_1_RESAMPLED10.csv │ │ │ ├── JSM_across_back_2_RESAMPLED10.csv │ │ │ ├── JSM_across_back_3_RESAMPLED10.csv │ │ │ ├── SH_across_back_1_RESAMPLED10.csv │ │ │ ├── SH_across_back_2_RESAMPLED10.csv │ │ │ └── SH_across_back_3_RESAMPLED10.csv │ │ ├── humanoid3d_backflip.csv │ │ ├── humanoid3d_cartwheel.csv │ │ ├── humanoid3d_crawl.csv │ │ ├── humanoid3d_dance_a.csv │ │ ├── humanoid3d_dance_b.csv │ │ ├── humanoid3d_dance_spin.csv │ │ ├── humanoid3d_getup_facedown.csv │ │ ├── humanoid3d_getup_faceup.csv │ │ ├── humanoid3d_jog.csv │ │ ├── humanoid3d_jump.csv │ │ ├── humanoid3d_kick.csv │ │ ├── humanoid3d_punch.csv │ │ ├── humanoid3d_roll.csv │ │ ├── humanoid3d_run.csv │ │ ├── humanoid3d_sideflip.csv │ │ ├── humanoid3d_spinkick.csv │ │ ├── humanoid3d_stealthy_walk.csv │ │ ├── humanoid3d_walk.csv │ │ └── humanoid3d_zombie_walk.csv │ └── urdf │ │ ├── README.md │ │ ├── humanoid_fixed_arms_sf_update.urdf │ │ └── meshes_v3 │ │ ├── back.stl │ │ ├── left_foot.stl │ │ ├── left_forearm.stl │ │ ├── left_hip_abad.stl │ │ ├── left_hip_yaw.stl │ │ ├── left_leg_lower.stl │ │ ├── left_leg_upper.stl │ │ ├── left_shoulder1.stl │ │ ├── left_shoulder2.stl │ │ ├── left_shoulder3.stl │ │ └── torso.stl │ ├── pendulum │ └── urdf │ │ └── pendulum.urdf │ └── rom │ └── urdf │ ├── humanoid_RED.urdf │ ├── humanoid_compass.urdf │ ├── humanoid_fixed_arms.urdf │ ├── humanoid_fixed_arms_full.urdf │ ├── humanoid_full.urdf │ ├── humanoid_kneed.urdf │ ├── meshes_v3 │ ├── back.stl │ ├── left_foot.stl │ ├── left_forearm.stl │ ├── left_hip_abad.stl │ ├── left_hip_yaw.stl │ ├── left_leg_lower.stl │ ├── left_leg_upper.stl │ ├── left_shoulder1.stl │ ├── left_shoulder2.stl │ ├── left_shoulder3.stl │ └── torso.stl │ ├── point_mass.urdf │ ├── slip.urdf │ ├── srbm.urdf │ ├── srbm_biped.urdf │ └── srbm_biped_revolute.urdf └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | logs/ 2 | gym/scripts/wandb/ 3 | .idea/ 4 | gpuGym.egg-info/ 5 | __pycache__ 6 | wandb/ 7 | .vscode/ 8 | gym/wandb 9 | *.DS_Store 10 | *.zip 11 | 12 | 13 | # Byte-compiled / optimized / DLL files 14 | __pycache__/ 15 | *.py[cod] 16 | *$py.class 17 | 18 | # Distribution / packaging 19 | .Python 20 | build/ 21 | develop-eggs/ 22 | dist/ 23 | downloads/ 24 | eggs/ 25 | .eggs/ 26 | lib/ 27 | lib64/ 28 | parts/ 29 | sdist/ 30 | var/ 31 | wheels/ 32 | share/python-wheels/ 33 | *.egg-info/ 34 | .installed.cfg 35 | *.egg 36 | MANIFEST 37 | 38 | # PyInstaller 39 | # Usually these files are written by a python script from a template 40 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 41 | *.manifest 42 | *.spec 43 | 44 | # Installer logs 45 | pip-log.txt 46 | pip-delete-this-directory.txt 47 | 48 | # Unit test / coverage reports 49 | htmlcov/ 50 | .tox/ 51 | .nox/ 52 | .coverage 53 | .coverage.* 54 | .cache 55 | nosetests.xml 56 | coverage.xml 57 | *.cover 58 | *.py,cover 59 | .hypothesis/ 60 | .pytest_cache/ 61 | cover/ 62 | 63 | 64 | # Jupyter Notebook 65 | .ipynb_checkpoints 66 | 67 | # IPython 68 | profile_default/ 69 | ipython_config.py 70 | 71 | 72 | # Environments 73 | .env 74 | .venv 75 | venv/ 76 | ENV/ 77 | env.bak/ 78 | venv.bak/ 79 | 80 | # Personal 81 | user/ 82 | analysis/ 83 | models/ 84 | *.mp4 85 | *.pdf 86 | *.png 87 | *.gif 88 | *.jpg -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "Python: Current File", 9 | "type": "python", 10 | "request": "launch", 11 | "program": "${file}", 12 | "console": "integratedTerminal", 13 | "justMyCode": true, 14 | // "args" : ["--task", "humanoid_vanilla", "--num_envs=1", "--disable_wandb", "--disable_local_saving", "--sim_device=cpu", "--rl_device=cpu"] 15 | // "args" : ["--task", "humanoid_vanilla", "--num_envs=4096", "--disable_wandb", "--disable_local_saving", "--headless"] 16 | "args" : ["--task", "humanoid_controller", "--num_envs=1", "--sim_device=cpu", "--rl_device=cpu"] 17 | // "args" : ["--task", "humanoid_controller", "--num_envs=1", "--disable_wandb", "--disable_local_saving", "--sim_device=cpu", "--rl_device=cpu"] 18 | // "args" : ["--task", "humanoid_controller", "--num_envs=4096", "--disable_wandb", "--disable_local_saving", "--headless"] 19 | } 20 | ] 21 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Ho Jae Lee 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /LIPM/LIPM_3D.py: -------------------------------------------------------------------------------- 1 | # -*-coding:UTF-8 -*- 2 | import numpy as np 3 | 4 | # Deinition of 3D Linear Inverted Pendulum 5 | class LIPM3D: 6 | def __init__(self, 7 | dt=0.001, 8 | T=1.0, 9 | T_d = 0.4, 10 | s_d = 0.5, 11 | w_d = 0.4, 12 | support_leg='left_leg'): 13 | self.dt = dt 14 | self.t = 0 15 | self.T = T # step time 16 | self.T_d = T # desired step duration 17 | self.s_d = s_d # desired step length 18 | self.w_d = w_d # desired step width 19 | 20 | self.eICP_x = 0 # ICP location x 21 | self.eICP_y = 0 # ICP location y 22 | self.u_x = 0 # step location x 23 | self.u_y = 0 # step location y 24 | 25 | # COM initial state 26 | self.x_0 = 0 27 | self.vx_0 = 0 28 | self.y_0 = 0 29 | self.vy_0 = 0 30 | 31 | # COM real-time state 32 | self.x_t = 0 33 | self.vx_t = 0 34 | self.y_t = 0 35 | self.vy_t = 0 36 | 37 | self.support_leg = support_leg 38 | self.support_foot_pos = [0.0, 0.0, 0.0] 39 | self.left_foot_pos = [0.0, 0.0, 0.0] 40 | self.right_foot_pos = [0.0, 0.0, 0.0] 41 | self.COM_pos = [0.0, 0.0, 0.0] 42 | 43 | def initializeModel(self, COM_pos, left_foot_pos, right_foot_pos): 44 | self.COM_pos = COM_pos 45 | 46 | if self.support_leg == 'left_leg': 47 | self.left_foot_pos = left_foot_pos 48 | self.right_foot_pos = left_foot_pos 49 | self.support_foot_pos = left_foot_pos 50 | elif self.support_leg == 'right_leg': 51 | self.left_foot_pos = right_foot_pos 52 | self.right_foot_pos = right_foot_pos 53 | self.support_foot_pos = right_foot_pos 54 | 55 | self.zc = self.COM_pos[2] 56 | self.w_0 = np.sqrt(9.81 / self.zc) 57 | 58 | def step(self): 59 | self.t += self.dt 60 | t = self.t 61 | 62 | self.x_t = self.x_0*np.cosh(t*self.w_0) + self.vx_0*np.sinh(t*self.w_0)/self.w_0 63 | self.vx_t = self.x_0*self.w_0*np.sinh(t*self.w_0) + self.vx_0*np.cosh(t*self.w_0) 64 | 65 | self.y_t = self.y_0*np.cosh(t*self.w_0) + self.vy_0*np.sinh(t*self.w_0)/self.w_0 66 | self.vy_t = self.y_0*self.w_0*np.sinh(t*self.w_0) + self.vy_0*np.cosh(t*self.w_0) 67 | 68 | def calculateXfVf(self): 69 | x_f = self.x_0*np.cosh(self.T*self.w_0) + self.vx_0*np.sinh(self.T*self.w_0)/self.w_0 70 | vx_f = self.x_0*self.w_0*np.sinh(self.T*self.w_0) + self.vx_0*np.cosh(self.T*self.w_0) 71 | 72 | y_f = self.y_0*np.cosh(self.T*self.w_0) + self.vy_0*np.sinh(self.T*self.w_0)/self.w_0 73 | vy_f = self.y_0*self.w_0*np.sinh(self.T*self.w_0) + self.vy_0*np.cosh(self.T*self.w_0) 74 | 75 | return x_f, vx_f, y_f, vy_f 76 | 77 | def calculateFootLocationForNextStepXcoMWorld(self, theta=0.): 78 | x_f, vx_f, y_f, vy_f = self.calculateXfVf() 79 | x_f_world = x_f + self.support_foot_pos[0] 80 | y_f_world = y_f + self.support_foot_pos[1] 81 | self.eICP_x = x_f_world + vx_f/self.w_0 82 | self.eICP_y = y_f_world + vy_f/self.w_0 83 | b_x = self.s_d / (np.exp(self.w_0*self.T_d) - 1) 84 | b_y = self.w_d / (np.exp(self.w_0*self.T_d) + 1) 85 | 86 | original_offset_x = -b_x 87 | original_offset_y = -b_y if self.support_leg == "left_leg" else b_y 88 | offset_x = np.cos(theta) * original_offset_x - np.sin(theta) * original_offset_y 89 | offset_y = np.sin(theta) * original_offset_x + np.cos(theta) * original_offset_y 90 | 91 | self.u_x = self.eICP_x + offset_x 92 | self.u_y = self.eICP_y + offset_y 93 | 94 | def calculateFootLocationForNextStepXcoMBase(self, theta=0.): 95 | x_f, vx_f, y_f, vy_f = self.calculateXfVf() 96 | x_f_world = x_f + self.support_foot_pos[0] 97 | y_f_world = y_f + self.support_foot_pos[1] 98 | self.eICP_x = x_f_world + vx_f/self.w_0 99 | self.eICP_y = y_f_world + vy_f/self.w_0 100 | b_x = self.s_d / (np.exp(self.w_0*self.T_d) - 1) 101 | b_y = self.w_d / (np.exp(self.w_0*self.T_d) + 1) 102 | 103 | original_offset_x = -b_x 104 | original_offset_y = -b_y if self.support_leg == "left_leg" else b_y 105 | offset_x = np.cos(theta) * original_offset_x - np.sin(theta) * original_offset_y 106 | offset_y = np.sin(theta) * original_offset_x - np.cos(theta) * original_offset_y 107 | 108 | self.u_x = self.eICP_x + offset_x 109 | self.u_y = self.eICP_y + offset_y 110 | 111 | def switchSupportLeg(self): 112 | if self.support_leg == 'left_leg': 113 | print('\n---- switch the support leg to the right leg') 114 | self.support_leg = 'right_leg' 115 | COM_pos_x = self.x_t + self.left_foot_pos[0] 116 | COM_pos_y = self.y_t + self.left_foot_pos[1] 117 | self.x_0 = COM_pos_x - self.right_foot_pos[0] 118 | self.y_0 = COM_pos_y - self.right_foot_pos[1] 119 | self.support_foot_pos = self.right_foot_pos 120 | elif self.support_leg == 'right_leg': 121 | print('\n---- switch the support leg to the left leg') 122 | self.support_leg = 'left_leg' 123 | COM_pos_x = self.x_t + self.right_foot_pos[0] 124 | COM_pos_y = self.y_t + self.right_foot_pos[1] 125 | self.x_0 = COM_pos_x - self.left_foot_pos[0] 126 | self.y_0 = COM_pos_y - self.left_foot_pos[1] 127 | self.support_foot_pos = self.left_foot_pos 128 | 129 | self.t = 0 130 | self.vx_0 = self.vx_t 131 | self.vy_0 = self.vy_t 132 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ModelBasedFootstepPlanning-IROS2024 2 | This repository is an open-sourced code for the following paper presented at the 2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS). 3 | 4 | #### Title: Integrating Model-Based Footstep Planning with Model-Free Reinforcement Learning for Dynamic Legged Locomotion
5 | Paper Link: [https://arxiv.org/abs/2408.02662](https://arxiv.org/abs/2408.02662)
6 | Video Link: [https://youtu.be/Z0E9AKt6RFo](https://youtu.be/Z0E9AKt6RFo) 7 | 8 | ### Installation ### 9 | 1. Create a new python virtual env with python 3.8 using Anaconda 10 | 2. Clone this repo 11 | 3. Install humanoidGym Requirements: 12 | ```bash 13 | pip install -r requirements.txt 14 | ``` 15 | 4. Install Isaac Gym 16 | - Download and install Isaac Gym Preview 4 (Preview 3 should still work) from https://developer.nvidia.com/isaac-gym 17 | - Extract the zip package 18 | - Copy the `isaacgym` folder, and place it in a new location 19 | - Install `issacgym/python` requirements 20 | ```bash 21 | cd /python 22 | pip install -e . 23 | ``` 24 | 5. Install humanoidGym 25 | - go back to the humanoidGym repo, and install it. 26 | ```bash 27 | pip install -e . 28 | ``` 29 | --- 30 | ## User Manual ## 31 | ## 1. Linear Inverted Pendulum Model (LIPM) Animation 32 | All the LIP model-related code is in the `LIPM` folder. 33 | These codes are modified from [BipedalWalkingRobots](https://github.com/chauby/BipedalWalkingRobots) for the Center of Mass (CoM) velocity tracking task. 34 | 35 | By running the code below, you should be able to get the following videos: 36 | ```bash 37 | python LIPM/demo_LIPM_3D_vt.py 38 | ``` 39 |
40 | 41 |
42 | 43 | By running the code below, you should be able to get the following videos and images: 44 | ```bash 45 | python LIPM/demo_LIPM_3D_vt_analysis.py 46 | ``` 47 | 48 |
49 | 50 | 51 | 52 |
53 | 54 | ## 2. MIT Humanoid in IsaacGym 55 | ### Train ### 56 | ```bash 57 | python gym/scripts/train.py --task=humanoid_controller 58 | ``` 59 | - To run on CPU add the following arguments: `--sim_device=cpu`, `--rl_device=cpu` (sim on CPU and rl on GPU is possible). 60 | - To run headless (no rendering) add `--headless`. 61 | - **Important**: To improve performance, once the training starts press `v` to stop the rendering. You can then enable it later to check the progress. 62 | - The trained policy is saved in `gym/logs//_/model_.pt`. Where `` and `` are defined in the train config. 63 | - The following command line arguments override the values set in the config files: 64 | - --task TASK: Task name. 65 | - --resume: Resume training from a checkpoint 66 | - --experiment_name EXPERIMENT_NAME: Name of the experiment to run or load. 67 | - --run_name RUN_NAME: Name of the run. 68 | - --load_run LOAD_RUN: Name of the run to load when resume=True. If -1: will load the last run. 69 | - --checkpoint CHECKPOINT: Saved model checkpoint number. If -1: will load the last checkpoint. 70 | - --num_envs NUM_ENVS: Number of environments to create. 71 | - --seed SEED: Random seed. 72 | - --max_iterations MAX_ITERATIONS: Maximum number of training iterations. 73 | - --original_cfg: Use configs stored in the saved files associated with the loaded policy instead of the current one in envs. 74 | 75 | ### Play (a trained policy) ### 76 | ```bash 77 | python gym/scripts/play.py --task=humanoid_controller 78 | ``` 79 | - By default the loaded policy is the last model of the last run of the experiment folder. 80 | - Other runs/model iteration can be selected by setting `--load_run` and `--checkpoint`. 81 | - You would need around 3,000 iterations of training to obtain a well-behaved policy. 82 | 83 |
84 | 85 |
86 | 87 | ## 3. Deploy the policy to robot hardware 88 | This repository does not include a code stack for deploying a policy to MIT Humanoid hardware. 89 | Please check the [Cheetah-Software](https://github.com/mit-biomimetics/Cheetah-Software) for our lab's hardware code stack. 90 | 91 | To deploy the trained policy, you would need to set `EXPORT_POLICY=TRUE` in the `humanoidGym/scripts/play.py` script. 92 | Then you would get a `policy.onnx` file to run on C++ code. 93 | 94 | --- 95 | ### System Info ### 96 | Operating System: Ubuntu 22.04
97 | GPU: Geforce 3090 98 | 99 | --- 100 | ### Troubleshooting ### 101 | 1. If you get the following error: `ImportError: libpython3.8m.so.1.0: cannot open shared object file: No such file or directory`, do: `export LD_LIBRARY_PATH=$CONDA_PREFIX/lib:\$LD_LIBRARY_PATH` 102 | 103 | --- 104 | ### Acknowledgement ### 105 | We would appreciate it if you would cite it in academic publications: 106 | ``` 107 | @inproceedings{lee2024integrating, 108 | title={Integrating model-based footstep planning with model-free reinforcement learning for dynamic legged locomotion}, 109 | author={Lee, Ho Jae and Hong, Seungwoo and Kim, Sangbae}, 110 | booktitle={2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)}, 111 | pages={11248--11255}, 112 | year={2024}, 113 | organization={IEEE} 114 | } 115 | ``` 116 | -------------------------------------------------------------------------------- /gym/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import os 32 | 33 | LEGGED_GYM_ROOT_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 34 | LEGGED_GYM_ENVS_DIR = os.path.join(LEGGED_GYM_ROOT_DIR, 'gym', 'envs') -------------------------------------------------------------------------------- /gym/envs/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from gym import LEGGED_GYM_ROOT_DIR, LEGGED_GYM_ENVS_DIR 32 | from .base.legged_robot import LeggedRobot 33 | 34 | from .pendulum.pendulum import Pendulum 35 | from .pendulum.pendulum_config import PendulumCfg, PendulumRunnerCfg 36 | 37 | from .cartpole.cartpole import Cartpole 38 | from .cartpole.cartpole_config import CartpoleCfg, CartpoleRunnerCfg 39 | 40 | from .humanoid.humanoid_vanilla import HumanoidVanilla 41 | from .humanoid.humanoid_vanilla_config import HumanoidVanillaCfg, HumanoidVanillaRunnerCfg 42 | from .humanoid.humanoid_controller import HumanoidController 43 | from .humanoid.humanoid_controller_config import HumanoidControllerCfg, HumanoidControllerRunnerCfg 44 | 45 | from gym.utils.task_registry import task_registry 46 | 47 | task_registry.register("pendulum", Pendulum, PendulumCfg, PendulumRunnerCfg) 48 | task_registry.register("cartpole", Cartpole, CartpoleCfg, CartpoleRunnerCfg) 49 | 50 | task_registry.register("humanoid_vanilla", HumanoidVanilla, HumanoidVanillaCfg, HumanoidVanillaRunnerCfg) 51 | task_registry.register("humanoid_controller", HumanoidController, HumanoidControllerCfg, HumanoidControllerRunnerCfg) 52 | 53 | 54 | -------------------------------------------------------------------------------- /gym/envs/base/base_config.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import inspect 32 | 33 | class BaseConfig: 34 | def __init__(self) -> None: 35 | """ Initializes all member classes recursively. Ignores all namse starting with '__' (buit-in methods).""" 36 | self.init_member_classes(self) 37 | 38 | @staticmethod 39 | def init_member_classes(obj): 40 | # iterate over all attributes names 41 | for key in dir(obj): 42 | # disregard builtin attributes 43 | # if key.startswith("__"): 44 | if key=="__class__": 45 | continue 46 | # get the corresponding attribute object 47 | var = getattr(obj, key) 48 | # check if it the attribute is a class 49 | if inspect.isclass(var): 50 | # instantate the class 51 | i_var = var() 52 | # set the attribute to the instance instead of the type 53 | setattr(obj, key, i_var) 54 | # recursively init members of the attribute 55 | BaseConfig.init_member_classes(i_var) -------------------------------------------------------------------------------- /gym/envs/base/fixed_robot_config.py: -------------------------------------------------------------------------------- 1 | 2 | from .base_config import BaseConfig 3 | 4 | class FixedRobotCfg(BaseConfig): 5 | class env: 6 | num_envs = 1096 7 | num_observations = 7 8 | num_critic_obs = num_observations # if not None a priviledge_obs_buf will be returned by step() (critic obs for assymetric training). None is returned otherwise 9 | num_actions = 1 10 | num_actuators = 1 11 | env_spacing = 4. # not used with heightfields/trimeshes 12 | root_height = 2. 13 | send_timeouts = True # send time out information to the algorithm 14 | episode_length_s = 20 # episode length in seconds 15 | 16 | class terrain: 17 | mesh_type = 'none' 18 | horizontal_scale = 0.1 # [m] 19 | vertical_scale = 0.005 # [m] 20 | border_size = 25 # [m] 21 | static_friction = 1.0 22 | dynamic_friction = 1.0 23 | restitution = 0. 24 | 25 | 26 | class init_state: 27 | 28 | reset_mode = "reset_to_basic" 29 | # reset_mode chooses how the initial conditions are chosen. 30 | # "reset_to_basic" = a single position 31 | # "reset_to_range" = uniformly random from a range defined below 32 | 33 | # * target state when action = 0, also reset positions for basic mode 34 | default_joint_angles = {"joint_a": 0., 35 | "joint_b": 0.} 36 | 37 | # * initial conditiosn for reset_to_range 38 | dof_pos_range = {'joint_a': [-1., 1.], 39 | 'joint_b': [-1., 1.]} 40 | dof_vel_range = {'joint_a': [-1., 1.], 41 | 'joint_b': [-1., 1.]} 42 | 43 | 44 | 45 | class control: 46 | control_type = 'P' # P: position, V: velocity, T: torques 47 | # PD Drive parameters: 48 | stiffness = {'joint_a': 10.0} # [N*m/rad] 49 | damping = {'joint_a': 0.5} # [N*m*s/rad] 50 | # action scale: target angle = actionScale * action + defaultAngle 51 | action_scale = 0.5 52 | # decimation: Number of control action updates @ sim DT per policy DT 53 | # decimation = 4 54 | exp_avg_decay = None 55 | 56 | actuated_joints_mask = [] # for each dof: 1 if actuated, 0 if passive 57 | # Empty implies no chance in the _compute_torques step 58 | ctrl_frequency = 100 59 | desired_sim_frequency = 100 60 | 61 | class asset: 62 | file = "" 63 | penalize_contacts_on = [] 64 | terminate_after_contacts_on = [] 65 | disable_gravity = False 66 | disable_actions = False 67 | disable_motors = False 68 | collapse_fixed_joints = True # merge bodies connected by fixed joints. Specific fixed joints can be kept by adding " <... dont_collapse="true"> 69 | fix_base_link = True # fix the base of the robot 70 | default_dof_drive_mode = 3 # see GymDofDriveModeFlags (0 is none, 1 is pos tgt, 2 is vel tgt, 3 effort) 71 | self_collisions = 0 # 1 to disable, 0 to enable...bitwise filter 72 | replace_cylinder_with_capsule = True # replace collision cylinders with capsules, leads to faster/more stable simulation 73 | flip_visual_attachments = True # Some .obj meshes must be flipped from y-up to z-up 74 | 75 | density = 0.001 76 | angular_damping = 0. 77 | linear_damping = 0. 78 | max_angular_velocity = 1000. 79 | max_linear_velocity = 1000. 80 | armature = 0. 81 | thickness = 0.01 82 | 83 | class domain_rand: 84 | randomize_friction = False 85 | friction_range = [0.5, 1.25] 86 | push_robots = False 87 | push_interval_s = 15 88 | max_push_vel_xy = 1. 89 | 90 | class rewards: 91 | class weights: 92 | torques = -0.00001 93 | dof_vel = -0. 94 | collision = -1. 95 | action_rate = -0.01 96 | dof_pos_limits = -1. 97 | class termination_weights: 98 | termination = 0. 99 | 100 | tracking_sigma = 0.25 # tracking reward = exp(-error^2/sigma) 101 | soft_dof_pos_limit = 1. # percentage of urdf limits, values above this limit are penalized 102 | soft_dof_vel_limit = 1. 103 | soft_torque_limit = 1. # ! may want to turn this off 104 | 105 | class normalization: 106 | clip_actions = 1000. 107 | class obs_scales: 108 | dof_pos = 1. 109 | dof_vel = 1. 110 | 111 | class noise: 112 | add_noise = True 113 | noise_level = 1.0 # scales other values 114 | class noise_scales: 115 | noise = 0.1 # implement as needed, also in your robot class 116 | 117 | # viewer camera: 118 | class viewer: 119 | ref_env = 0 120 | pos = [10, 0, 6] # [m] 121 | lookat = [11., 5, 3.] # [m] 122 | record = False 123 | 124 | class sim: 125 | dt = 0.001 126 | substeps = 1 127 | gravity = [0., 0. , -9.81] # [m/s^2] 128 | up_axis = 1 # 0 is y, 1 is z 129 | 130 | class physx: 131 | num_threads = 10 132 | solver_type = 1 # 0: pgs, 1: tgs 133 | num_position_iterations = 4 134 | num_velocity_iterations = 0 135 | contact_offset = 0.01 # [m] 136 | rest_offset = 0.0 # [m] 137 | bounce_threshold_velocity = 0.5 #0.5 [m/s] 138 | max_depenetration_velocity = 10.0 139 | max_gpu_contact_pairs = 2**23 #2**24 -> needed for 8000 envs and more 140 | default_buffer_size_multiplier = 5 141 | contact_collection = 2 # 0: never, 1: last sub-step, 2: all sub-steps (default=2) 142 | 143 | class FixedRobotRunnerCfg(BaseConfig): 144 | seed = -1 145 | runner_class_name = 'OnPolicyRunner' 146 | 147 | class logging: 148 | enable_local_saving = True 149 | 150 | class policy: 151 | init_noise_std = 1.0 152 | num_layers = 2 153 | num_units = 64 154 | actor_hidden_dims = [num_units] * num_layers 155 | critic_hidden_dims = [num_units] * num_layers 156 | activation = 'elu' # can be elu, relu, selu, crelu, lrelu, tanh, sigmoid 157 | 158 | actor_obs = ["observation_a", 159 | "observation_b", 160 | "these_need_to_be_atributes_(states)_of_the_robot_env"] 161 | 162 | critic_obs = ["observation_x", 163 | "observation_y", 164 | "critic_obs_can_be_the_same_or_different_than_actor_obs"] 165 | class noise: 166 | noise = 0.1 # implement as needed, also in your robot class 167 | class rewards: 168 | class weights: 169 | torques = 0. 170 | dof_vel = 0. 171 | collision = 0. 172 | action_rate = 0. 173 | action_rate2 = 0. 174 | dof_pos_limits = 1. 175 | class termination_weight: 176 | termination = 0.0 177 | 178 | class algorithm: 179 | # training params 180 | value_loss_coef = 1.0 181 | use_clipped_value_loss = True 182 | clip_param = 0.2 183 | entropy_coef = 0.01 184 | num_learning_epochs = 5 185 | num_mini_batches = 4 # mini batch size = num_envs*nsteps / nminibatches 186 | learning_rate = 1.e-3 #5.e-4 187 | schedule = 'adaptive' # could be adaptive, fixed 188 | gamma = 0.99 189 | lam = 0.95 190 | desired_kl = 0.01 191 | max_grad_norm = 1. 192 | 193 | class runner: 194 | policy_class_name = 'ActorCritic' 195 | algorithm_class_name = 'PPO' 196 | num_steps_per_env = 24 # per iteration 197 | max_iterations = 1500 # number of policy updates 198 | SE_learner = None 199 | 200 | # logging 201 | save_interval = 50 # check for potential saves every this many iterations 202 | run_name = '' 203 | experiment_name = 'fixed_robot' 204 | 205 | # load and resume 206 | resume = False 207 | load_run = -1 # -1 = last run 208 | checkpoint = -1 # -1 = last saved model 209 | resume_path = None # updated from load_run and chkpt 210 | -------------------------------------------------------------------------------- /gym/envs/cartpole/cartpole.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018-2021, NVIDIA Corporation 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | from time import time 30 | import numpy as np 31 | import os 32 | 33 | from isaacgym.torch_utils import * 34 | from isaacgym import gymtorch, gymapi, gymutil 35 | 36 | import torch 37 | from gym.envs.base.fixed_robot import FixedRobot 38 | 39 | class Cartpole(FixedRobot): 40 | 41 | def _post_physics_step_callback(self): 42 | super()._post_physics_step_callback() 43 | self.poll_is_upright = torch.cos(self.dof_pos[:,1]) > 0.9 44 | 45 | def compute_observations(self): 46 | self.obs_buf = torch.cat(( 47 | self.dof_pos, # [2] "slider_to_cart , cart_to_pole" pos 48 | self.dof_vel, # [2] "slider_to_cart , cart_to_pole" vel 49 | ), dim=-1) 50 | 51 | if self.cfg.env.num_critic_obs: 52 | self.critic_obs_buf = self.obs_buf 53 | 54 | if self.add_noise: 55 | self.obs_buf += (2*torch.rand_like(self.obs_buf) - 1) * self.noise_scale_vec 56 | 57 | def _get_noise_scale_vec(self): 58 | noise_vec = torch.zeros_like(self.obs_buf[0]) 59 | self.add_noise = self.cfg.noise.add_noise 60 | noise_scales = self.cfg.noise.noise_scales 61 | noise_level = self.cfg.noise.noise_level 62 | noise_vec[:2] = noise_scales.dof_pos * self.obs_scales.dof_pos 63 | noise_vec[2:] = noise_scales.dof_vel * self.obs_scales.dof_vel 64 | 65 | noise_vec = noise_vec * noise_level 66 | return noise_vec 67 | 68 | def _compute_torques(self, actions): 69 | return torch.clip(actions, -self.torque_limits, self.torque_limits) 70 | 71 | def _reward_cart_vel(self): 72 | return -self.dof_vel[:, 0].square() * self.poll_is_upright 73 | 74 | def _reward_pole_vel(self): 75 | return -self.dof_vel[:, 1].square() * self.poll_is_upright 76 | 77 | def _reward_upright_pole(self): 78 | return torch.exp(-torch.square(self.dof_pos[:,1]) / 0.25) -------------------------------------------------------------------------------- /gym/envs/cartpole/cartpole_config.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from gym.envs.base.fixed_robot_config import FixedRobotCfg, FixedRobotRunnerCfg 3 | 4 | class CartpoleCfg(FixedRobotCfg): 5 | class env(FixedRobotCfg.env): 6 | num_envs = 4096 # 1096 7 | num_observations = 4 8 | num_actions = 1 # 1 for the cart force 9 | episode_length_s = 5 # 100 10 | num_critic_obs = num_observations 11 | 12 | class init_state(FixedRobotCfg.init_state): 13 | 14 | default_joint_angles = {"slider_to_cart": 0., 15 | "cart_to_pole": 0.} 16 | 17 | # default setup chooses how the initial conditions are chosen. 18 | # "reset_to_basic" = a single position 19 | # "reset_to_range" = uniformly random from a range defined below 20 | reset_mode = "reset_to_range" 21 | 22 | # * initial conditiosn for reset_to_range 23 | dof_pos_range = {'slider_to_cart': [-2.5, 2.5], 24 | 'cart_to_pole': [-torch.pi, torch.pi]} 25 | dof_vel_range = {'slider_to_cart': [-1., 1.], 26 | 'cart_to_pole': [-1., 1.]} 27 | 28 | class control(FixedRobotCfg.control): 29 | # PD Drive parameters: 30 | stiffness = {'slider_to_cart': 10.} # [N*m/rad] 31 | damping = {'slider_to_cart': 0.5} # [N*m*s/rad] 32 | 33 | # for each dof: 1 if actuated, 0 if passive 34 | # Empty implies no chance in the _compute_torques step 35 | actuated_joints_mask = [1, # slider_to_cart 36 | 0] # cart_to_pole 37 | 38 | # decimation: Number of control action updates @ sim DT per policy DT 39 | decimation = 2 40 | # ctrl_frequency = 250 41 | # desired_sim_frequency = 500 42 | 43 | class asset(FixedRobotCfg.asset): 44 | # Things that differ 45 | file = "{LEGGED_GYM_ROOT_DIR}/resources/robots/cartpole/urdf/cartpole.urdf" 46 | flip_visual_attachments = False 47 | 48 | # Toggles to keep 49 | disable_gravity = False 50 | disable_motors = False # all torques set to 0 51 | 52 | class rewards(FixedRobotCfg.rewards): 53 | class weights: 54 | cart_vel = 0.05 55 | pole_vel = 0.05 56 | torques = 0.1 57 | upright_pole = 30 58 | termination = 0. 59 | 60 | class normalization(FixedRobotCfg.normalization): 61 | class obs_scales: 62 | dof_pos = 1. #[1/3., 1/torch.pi] 63 | dof_vel = 1. #[1/20., 1/(4*torch.pi)] 64 | 65 | class noise(FixedRobotCfg.noise): 66 | add_noise = True 67 | noise_level = 1.0 # scales other values 68 | 69 | class noise_scales: 70 | dof_pos = 0.001 71 | dof_vel = 0.01 72 | 73 | class sim(FixedRobotCfg.sim): 74 | dt = 0.002 75 | 76 | class CartpoleRunnerCfg(FixedRobotRunnerCfg): 77 | # We need random experiments to run 78 | seed = -1 79 | runner_class_name = 'OnPolicyRunner' 80 | 81 | class policy(FixedRobotRunnerCfg.policy): 82 | init_noise_std = 1.0 83 | num_layers = 2 84 | num_units = 64 85 | actor_hidden_dims = [num_units] * num_layers 86 | critic_hidden_dims = [num_units] * num_layers 87 | activation = 'elu' # can be elu, relu, selu, crelu, lrelu, tanh, sigmoid 88 | 89 | class algorithm(FixedRobotRunnerCfg.algorithm): 90 | # training params 91 | value_loss_coef = 1.0 92 | use_clipped_value_loss = True 93 | clip_param = 0.2 94 | entropy_coef = 0.01 95 | num_learning_epochs = 5 96 | num_mini_batches = 4 # mini batch size = num_envs*nsteps / nminibatches 97 | learning_rate = 1.e-3 # 5.e-4 98 | schedule = 'adaptive' # could be adaptive, fixed 99 | gamma = 0.998 100 | lam = 0.95 101 | desired_kl = 0.01 102 | max_grad_norm = 1. 103 | 104 | class runner(FixedRobotRunnerCfg.runner): 105 | policy_class_name = 'ActorCritic' 106 | algorithm_class_name = 'PPO' 107 | num_steps_per_env = 96 # per iteration 108 | max_iterations = 500 # number of policy updates 109 | 110 | # logging 111 | save_interval = 100 # check for potential saves every this many iterations 112 | run_name = '' 113 | experiment_name = 'cartpole' 114 | 115 | # load and resume 116 | resume = False 117 | load_run = -1 # -1 = last run 118 | checkpoint = -1 # -1 = last saved model 119 | resume_path = None # updated from load_run and chkpt 120 | -------------------------------------------------------------------------------- /gym/envs/humanoid/jacobian.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def apply_coupling(q, qd, q_des, qd_des, kp, kd, tau_ff): 5 | # Create a Jacobian matrix and move it to the same device as input tensors 6 | J = torch.eye(q.shape[-1]).to(q.device) 7 | J[4, 3] = 1 8 | J[9, 8] = 1 9 | 10 | # Perform transformations using Jacobian 11 | q = torch.matmul(q, J.T) 12 | qd = torch.matmul(qd, J.T) 13 | q_des = torch.matmul(q_des, J.T) 14 | qd_des = torch.matmul(qd_des, J.T) 15 | 16 | # Inverse of the transpose of Jacobian 17 | J_inv_T = torch.inverse(J.T) 18 | 19 | # Compute feed-forward torques 20 | tau_ff = torch.matmul(J_inv_T, tau_ff.T).T 21 | 22 | # Compute kp and kd terms 23 | kp = torch.diagonal( 24 | torch.matmul( 25 | torch.matmul(J_inv_T, torch.diag_embed(kp, dim1=-2, dim2=-1)), 26 | J_inv_T.T 27 | ), 28 | dim1=-2, dim2=-1 29 | ) 30 | 31 | kd = torch.diagonal( 32 | torch.matmul( 33 | torch.matmul(J_inv_T, torch.diag_embed(kd, dim1=-2, dim2=-1)), 34 | J_inv_T.T 35 | ), 36 | dim1=-2, dim2=-1 37 | ) 38 | 39 | # Compute torques 40 | torques = kp*(q_des - q) + kd*(qd_des - qd) + tau_ff 41 | torques = torch.matmul(torques, J) 42 | 43 | return torques -------------------------------------------------------------------------------- /gym/envs/pendulum/pendulum.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018-2021, NVIDIA Corporation 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | from time import time 29 | import numpy as np 30 | import os 31 | 32 | from isaacgym.torch_utils import * 33 | from isaacgym import gymtorch, gymapi, gymutil 34 | 35 | import torch 36 | from gym.envs.base.fixed_robot import FixedRobot 37 | 38 | class Pendulum(FixedRobot): 39 | 40 | def _post_physics_step_callback(self): 41 | super()._post_physics_step_callback() 42 | self.pendulum_is_upright = torch.cos(self.dof_pos[:,0]) > 0.9 43 | 44 | def compute_observations(self): 45 | self.obs_buf = torch.cat(( 46 | self.dof_pos, # [1] "actuator" pos 47 | self.dof_vel, # [1] "actuator" vel 48 | ), dim=-1) 49 | 50 | if self.cfg.env.num_critic_obs: 51 | self.critic_obs_buf = self.obs_buf 52 | 53 | if self.add_noise: 54 | self.obs_buf += (2*torch.rand_like(self.obs_buf) - 1) * self.noise_scale_vec 55 | 56 | def _get_noise_scale_vec(self): 57 | noise_vec = torch.zeros_like(self.obs_buf[0]) 58 | self.add_noise = self.cfg.noise.add_noise 59 | noise_scales = self.cfg.noise.noise_scales 60 | noise_level = self.cfg.noise.noise_level 61 | noise_vec[:1] = noise_scales.dof_pos * self.obs_scales.dof_pos 62 | noise_vec[1:] = noise_scales.dof_vel * self.obs_scales.dof_vel 63 | 64 | noise_vec = noise_vec * noise_level 65 | return noise_vec 66 | 67 | def _compute_torques(self, actions): 68 | return torch.clip(actions, -self.torque_limits, self.torque_limits) 69 | 70 | def _reward_pendulum_vel(self): 71 | return -self.dof_vel[:, 0].square() * self.pendulum_is_upright 72 | 73 | def _reward_upright_pendulum(self): 74 | return torch.exp(-torch.square(self.dof_pos[:,0]) / 0.25) -------------------------------------------------------------------------------- /gym/envs/pendulum/pendulum_config.py: -------------------------------------------------------------------------------- 1 | from gym import LEGGED_GYM_ENVS_DIR 2 | from gym.envs.base.fixed_robot_config import FixedRobotCfg, FixedRobotRunnerCfg 3 | import torch 4 | 5 | class PendulumCfg(FixedRobotCfg): 6 | 7 | class env(FixedRobotCfg.env): 8 | num_envs = 4096 # 1096 9 | num_observations = 2 10 | num_actions = 1 11 | episode_length_s = 5 12 | num_critic_obs = num_observations 13 | 14 | class init_state(FixedRobotCfg.init_state): 15 | 16 | default_joint_angles = {"actuator": 0.} 17 | 18 | # * default setup chooses how the initial conditions are chosen. 19 | # * "reset_to_basic" = a single position 20 | # * "reset_to_range" = uniformly random from a range defined below 21 | reset_mode = "reset_to_range" 22 | # reset_mode = "reset_to_basic" 23 | 24 | # * initial conditions for reset_to_range 25 | dof_pos_range = {'actuator': [-torch.pi, torch.pi]} 26 | dof_vel_range = {'actuator': [-1., 1.]} 27 | 28 | class control(FixedRobotCfg.control): 29 | # * PD Drive parameters: 30 | stiffness = {'actuator': 10.} # [N*m/rad] 31 | damping = {'actuator': 0.5} # [N*m*s/rad] 32 | 33 | actuated_joints_mask = [1, # slider_to_cart 34 | 0] # cart_to_pole 35 | 36 | decimation = 2 37 | # ctrl_frequency = 250 38 | # desired_sim_frequency = 500 39 | 40 | class asset(FixedRobotCfg.asset): 41 | # Things that differ 42 | file = "{LEGGED_GYM_ROOT_DIR}/resources/robots/pendulum/urdf/pendulum.urdf" 43 | flip_visual_attachments = False 44 | 45 | # Toggles to keep 46 | disable_gravity = False 47 | disable_motors = False # all torques set to 0 48 | 49 | class rewards(FixedRobotCfg.rewards): 50 | tracking_sigma = 0.25 # tracking reward = exp(-error^2/sigma) 51 | soft_dof_pos_limit = 1. # percentage of urdf limits, values above this limit are penalized 52 | soft_dof_vel_limit = 1. 53 | soft_torque_limit = 1. # ! may want to turn this off 54 | 55 | class weights: 56 | pendulum_vel = 0.05 57 | torques = 0.1 58 | upright_pendulum = 30 59 | class termination_weights: 60 | termination = 0. 61 | 62 | class normalization(FixedRobotCfg.normalization): 63 | class obs_scales: 64 | dof_pos = 1. #[1/3., 1/torch.pi] 65 | dof_vel = 1. #[1/20., 1/(4*torch.pi)] 66 | 67 | class noise(FixedRobotCfg.noise): 68 | add_noise = True 69 | noise_level = 1.0 # scales other values 70 | 71 | class noise_scales: 72 | dof_pos = 0.001 73 | dof_vel = 0.01 74 | 75 | class sim(FixedRobotCfg.sim): 76 | dt = 0.002 77 | 78 | class PendulumRunnerCfg(FixedRobotRunnerCfg): 79 | # We need random experiments to run 80 | seed = 1 81 | runner_class_name = 'OnPolicyRunner' 82 | 83 | class policy(FixedRobotRunnerCfg.policy): 84 | init_noise_std = 1.0 85 | log_std_bounds = [-20., 2.] 86 | num_layers = 2 87 | num_units = 64 88 | actor_hidden_dims = [num_units] * num_layers 89 | critic_hidden_dims = [num_units] * num_layers 90 | activation = 'elu' # can be elu, relu, selu, crelu, lrelu, tanh, sigmoid 91 | actions_limits = [-1.5, 1.5] 92 | 93 | class algorithm(FixedRobotRunnerCfg.algorithm): 94 | class PPO: 95 | # training params 96 | value_loss_coef = 1.0 97 | use_clipped_value_loss = True 98 | clip_param = 0.2 99 | entropy_coef = 0.01 100 | num_learning_epochs = 5 101 | num_mini_batches = 4 # * mini batch size = num_envs*nsteps / nminibatches 102 | learning_rate = 1.e-3 103 | schedule = 'adaptive' # could be adaptive, fixed 104 | gamma = 0.998 105 | lam = 0.95 106 | desired_kl = 0.01 107 | max_grad_norm = 1. 108 | class SAC: 109 | # algorithm training hyperparameters 110 | actor_learning_rate = 1e-3 111 | critic_learning_rate = 1e-3 112 | entropy_learning_rate = 1e-3 113 | initial_entropy_value = 0.2 114 | gamma = 0.99 # discount factor 115 | polyak = 0.005 # soft update hyperparameter 116 | num_learning_epochs = 5 117 | num_mini_batches = 4 118 | mini_batch_size = 1024 #1024 119 | target_entropy = None 120 | max_grad_norm = 1. 121 | target_update_period = 1 122 | replay_buffer_size_per_env = 500 # ratio of new data = num_steps_per_env / replay_buffer_size_per_env 123 | custom_initialization = True # xavier initialization 124 | initial_random_exploration = True 125 | 126 | class runner(FixedRobotRunnerCfg.runner): 127 | policy_class_name = 'ActorCritic' 128 | # algorithm_class_name = 'PPO' 129 | algorithm_class_name = 'SAC' 130 | num_steps_per_env = 32 # per iteration 131 | max_iterations = 500 # number of policy updates 132 | 133 | # * logging 134 | # * check for potential saves every this many iterations 135 | save_interval = 100 136 | run_name = '' 137 | experiment_name = 'pendulum' 138 | 139 | # * load and resume 140 | resume = False 141 | load_run = -1 # -1 = last run 142 | checkpoint = -1 # -1 = last saved model 143 | resume_path = None # updated from load_run and chkpt 144 | -------------------------------------------------------------------------------- /gym/scripts/plotting/__init__.py: -------------------------------------------------------------------------------- 1 | from .live_plotting import LivePlotter -------------------------------------------------------------------------------- /gym/scripts/sweep.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import wandb 4 | 5 | from gym import LEGGED_GYM_ROOT_DIR 6 | from gym.scripts.train import train, setup 7 | from gym.utils import get_args, task_registry 8 | from gym.utils.logging_and_saving import wandb_singleton 9 | 10 | # torch needs to be imported after isaacgym imports in local source 11 | from torch.multiprocessing import Process 12 | from torch.multiprocessing import set_start_method 13 | 14 | 15 | def load_sweep_config(file_name): 16 | return json.load(open(os.path.join( 17 | LEGGED_GYM_ROOT_DIR, 'gym', 'scripts', 18 | 'sweep_configs', file_name))) 19 | 20 | 21 | def train_with_sweep_cfg(): 22 | train_cfg, policy_runner = setup() 23 | train(train_cfg=train_cfg, policy_runner=policy_runner) 24 | 25 | 26 | def sweep_wandb_mp(): 27 | ''' start a new process for each train function ''' 28 | 29 | p = Process(target=train_with_sweep_cfg, args=()) 30 | p.start() 31 | p.join() 32 | p.kill() 33 | 34 | 35 | def start_sweeps(args): 36 | # * required for multiprocessing CUDA workloads 37 | set_start_method('spawn') 38 | 39 | # * load sweep_config from JSON file 40 | if args.wandb_sweep_config is not None: 41 | sweep_config = load_sweep_config(args.wandb_sweep_config) 42 | else: 43 | sweep_config = load_sweep_config('sweep_config_cartpole.json') 44 | # * set sweep_id if you have a previous id to use 45 | sweep_id = None 46 | if args.wandb_sweep_id is not None: 47 | sweep_id = args.wandb_sweep_id 48 | 49 | wandb_helper = wandb_singleton.WandbSingleton() 50 | train_cfg = task_registry.train_cfgs[args.task] 51 | wandb_helper.set_wandb_values(train_cfg, args) 52 | 53 | if wandb_helper.is_wandb_enabled(): 54 | if sweep_id is None: 55 | sweep_id = wandb.sweep( 56 | sweep_config, 57 | entity=wandb_helper.get_entity_name(), 58 | project=wandb_helper.get_project_name()) 59 | wandb.agent( 60 | sweep_id, 61 | sweep_wandb_mp, 62 | entity=wandb_helper.get_entity_name(), 63 | project=wandb_helper.get_project_name(), 64 | count=sweep_config['run_cap']) 65 | else: 66 | print('ERROR: No WandB project and entity provided for sweeping') 67 | 68 | 69 | if __name__ == '__main__': 70 | args = get_args() 71 | start_sweeps(args) 72 | -------------------------------------------------------------------------------- /gym/scripts/sweep_configs/sweep_config_cartpole.json: -------------------------------------------------------------------------------- 1 | { 2 | "method": "random", 3 | "metric": { 4 | "name": "Train/mean_reward", 5 | "goal": "maximize" 6 | }, 7 | "run_cap": 20, 8 | "parameters": { 9 | "train_cfg.algorithm.SAC.actor_learning_rate":{ 10 | "min":1e-5, 11 | "max":1e-3 12 | }, 13 | "train_cfg.algorithm.SAC.critic_learning_rate":{ 14 | "min":1e-5, 15 | "max":1e-3 16 | }, 17 | "train_cfg.algorithm.SAC.entropy_learning_rate":{ 18 | "min":1e-5, 19 | "max":1e-3 20 | }, 21 | "train_cfg.algorithm.SAC.initial_entropy_value":{ 22 | "min":0.1, 23 | "max":0.3 24 | }, 25 | "train_cfg.algorithm.SAC.polyak":{ 26 | "min":0.001, 27 | "max":0.1 28 | }, 29 | "train_cfg.algorithm.SAC.mini_batch_size":{ 30 | "min":512, 31 | "max":4096 32 | }, 33 | "train_cfg.algorithm.SAC.target_update_period":{ 34 | "min":1, 35 | "max":32 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /gym/scripts/sweep_configs/sweep_config_example.json: -------------------------------------------------------------------------------- 1 | { 2 | "method": "random", 3 | "metric": { 4 | "name": "Train/mean_reward", 5 | "goal": "maximize" 6 | }, 7 | "run_cap": 10, 8 | "parameters": { 9 | "train_cfg.runner.max_iterations": { 10 | "values": [10] 11 | }, 12 | "env_cfg.env.episode_length_s": { 13 | "min": 2, 14 | "max": 10 15 | }, 16 | "train_cfg.policy.reward.weights.lin_vel_z":{ 17 | "min":0.1, 18 | "max":1.0 19 | }, 20 | "env_cfg.env.num_envs": { 21 | "values": [64] 22 | } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /gym/scripts/sweep_configs/sweep_config_humanoid_controller.json: -------------------------------------------------------------------------------- 1 | { 2 | "method": "random", 3 | "metric": { 4 | "name": "Train/mean_reward", 5 | "goal": "maximize" 6 | }, 7 | "run_cap": 20, 8 | "parameters": { 9 | "train_cfg.algorithm.SAC.actor_learning_rate":{ 10 | "min":1e-5, 11 | "max":1e-3 12 | }, 13 | "train_cfg.algorithm.SAC.critic_learning_rate":{ 14 | "min":1e-5, 15 | "max":1e-3 16 | }, 17 | "train_cfg.algorithm.SAC.entropy_learning_rate":{ 18 | "min":1e-5, 19 | "max":1e-3 20 | }, 21 | "train_cfg.algorithm.SAC.initial_entropy_value":{ 22 | "min":0.1, 23 | "max":0.3 24 | }, 25 | "train_cfg.algorithm.SAC.polyak":{ 26 | "min":0.001, 27 | "max":0.1 28 | }, 29 | "train_cfg.algorithm.SAC.mini_batch_size":{ 30 | "min":512, 31 | "max":4096 32 | }, 33 | "train_cfg.algorithm.SAC.target_update_period":{ 34 | "min":1, 35 | "max":32 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /gym/scripts/sweep_configs/sweep_config_pendulum.json: -------------------------------------------------------------------------------- 1 | { 2 | "method": "random", 3 | "metric": { 4 | "name": "Train/mean_reward", 5 | "goal": "maximize" 6 | }, 7 | "run_cap": 100, 8 | "parameters": { 9 | "train_cfg.algorithm.SAC.actor_learning_rate":{ 10 | "min":1e-5, 11 | "max":1e-3 12 | }, 13 | "train_cfg.algorithm.SAC.critic_learning_rate":{ 14 | "min":1e-5, 15 | "max":1e-3 16 | }, 17 | "train_cfg.algorithm.SAC.entropy_learning_rate":{ 18 | "min":1e-5, 19 | "max":1e-3 20 | }, 21 | "train_cfg.algorithm.SAC.initial_entropy_value":{ 22 | "min":0.1, 23 | "max":0.3 24 | }, 25 | "train_cfg.algorithm.SAC.polyak":{ 26 | "min":0.001, 27 | "max":0.1 28 | }, 29 | "train_cfg.algorithm.SAC.mini_batch_size":{ 30 | "min":512, 31 | "max":4096 32 | }, 33 | "train_cfg.algorithm.SAC.target_update_period":{ 34 | "min":1, 35 | "max":32 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /gym/scripts/sweep_configs/sweep_config_test_sweeps.json: -------------------------------------------------------------------------------- 1 | { 2 | "method": "random", 3 | "metric": { 4 | "name": "Train/mean_reward", 5 | "goal": "maximize" 6 | }, 7 | "run_cap": 10, 8 | "parameters": { 9 | "train_cfg.runner.max_iterations":{ 10 | "values": [100] 11 | }, 12 | "train_cfg.policy.reward.weights.tracking_lin_vel":{ 13 | "min":1.0, 14 | "max":100.0 15 | }, 16 | "env_cfg.seed": { 17 | "values": [150] 18 | } 19 | } 20 | } -------------------------------------------------------------------------------- /gym/scripts/test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import LEGGED_GYM_ROOT_DIR 3 | import pandas as pd 4 | import matplotlib as mpl 5 | import matplotlib.pyplot as plt 6 | 7 | def load_data(file_path): 8 | return np.load(file_path) 9 | 10 | def calculate_mean_std(data): 11 | mean_std_info = {} 12 | for key in data.files: 13 | mean_std_info[key] = [(np.mean(data[key][:,:,i]), np.std(data[key][:,:,i])) for i in range(data[key].shape[2])] 14 | return mean_std_info 15 | 16 | def print_mean_std(mean_std_info): 17 | for key, stats in mean_std_info.items(): 18 | print(f"\n{key.upper()}:") 19 | for i, (mean, std) in enumerate(stats): 20 | print(f" Dimension {i+1} - Mean: {mean:.4f}, Std: {std:.4f}") 21 | 22 | def load_csv_data(file_path): 23 | df = pd.read_csv(file_path) 24 | return df 25 | 26 | def plot_reward_graph(): 27 | methods = ['random', 'fixed', 'linear'] 28 | # methods = ['random'] 29 | colors = {'random': 'r', 'fixed': 'g', 'linear': 'b'} 30 | labels = {'random': 'random', 'fixed': 'CP', 'linear': 'CP+CR'} 31 | 32 | for method in methods: 33 | file_path = f'/home/hjlee/git/legacygym/analysis/rewards/{method}_frt.csv' 34 | df = load_csv_data(file_path) 35 | df_np = df.to_numpy() 36 | # data = np.vstack((df.random1, df.random2, df.random3, df.random4, df.random5)) 37 | data = df_np[:, [1,4,7,10,13]] 38 | step = df.Step 39 | # mean = np.nanmean(data, axis=0) 40 | # std = np.nanstd(data, axis=0) 41 | mean = data.mean(axis=1) 42 | std = data.std(axis=1) 43 | plt.plot(step, mean, color=colors[method], label=labels[method]) 44 | plt.fill_between(step, mean+std, mean-std, facecolor=colors[method], alpha=0.3) 45 | plt.xlabel('Iterations', fontsize=12) 46 | plt.ylabel('Reward', fontsize=12) 47 | plt.legend(loc='upper right', fontsize=12, markerscale=5) 48 | 49 | plt.show() 50 | 51 | if __name__ == "__main__": 52 | # file_path = f'{LEGGED_GYM_ROOT_DIR}/analysis/logs/play_log_10000.npz' # Replace with your file path 53 | # data = load_data(file_path) 54 | # mean_std_info = calculate_mean_std(data) 55 | # print_mean_std(mean_std_info) 56 | # plot_reward_graph() 57 | # fig, ax = plt.subplots(figsize=(6, 1)) 58 | # fig.subplots_adjust(bottom=0.5) 59 | 60 | # cmap = mpl.cm.viridis 61 | # bounds = np.arange(5) 62 | # norm = mpl.colors.BoundaryNorm(bounds, cmap.N) 63 | 64 | # fig.colorbar(mpl.cm.ScalarMappable(norm=norm, cmap=cmap), 65 | # cax=ax, orientation='horizontal', 66 | # label="Discrete intervals with extend='both' keyword", 67 | # ticks=np.arange(4)) 68 | # df = pd.DataFrame({"x" : np.linspace(0,1,20), 69 | # "y" : np.linspace(0,1,20), 70 | # "cluster" : np.tile(np.arange(4),5)}) 71 | # cmap = mpl.colors.ListedColormap(["navy", "crimson", "limegreen", "gold"]) 72 | # norm = mpl.colors.BoundaryNorm(np.arange(-0.5,4), cmap.N) 73 | # fig, ax = plt.subplots() 74 | # scatter = ax.scatter(x='x', y='y', c='cluster', marker='+', data=df, 75 | # cmap=cmap, norm=norm, s=100, edgecolor ='none', alpha=0.70) 76 | 77 | # cbar = fig.colorbar(scatter, ticks=np.arange(4)) 78 | # cbar.ax.set_yticklabels(['a', 'b', 'c', 'd']) 79 | 80 | 81 | # plt.show() 82 | # import matplotlib.pyplot as plt 83 | 84 | # x = np.linspace(0, 2*np.pi, 64) 85 | # y = np.cos(x) 86 | 87 | # plt.figure() 88 | # plt.plot(x,y) 89 | 90 | # n = 20 91 | # colors = plt.cm.jet(np.linspace(0,1,n)) 92 | 93 | # for i in range(n): 94 | # plt.plot(x, i*y, color=colors[i]) 95 | # plt.show() 96 | n_lines = 5 97 | x = np.linspace(0, 10, 100) 98 | y = np.sin(x[:, None] + np.pi * np.linspace(0, 1, n_lines)) 99 | c = np.arange(1, n_lines + 1) 100 | 101 | norm = mpl.colors.Normalize(vmin=c.min(), vmax=c.max()) 102 | cmap = mpl.cm.ScalarMappable(norm=norm, cmap=mpl.cm.viridis) 103 | cmap.set_array([]) 104 | 105 | fig, ax = plt.subplots(dpi=100) 106 | for i, yi in enumerate(y.T): 107 | ax.plot(x, yi, c=cmap.to_rgba(i + 1)) 108 | cbar = fig.colorbar(cmap, ticks=c) 109 | cbar.ax.set_yticklabels(['a', 'b', 'c', 'd', 'e']) 110 | plt.show() 111 | -------------------------------------------------------------------------------- /gym/scripts/train.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import numpy as np 32 | import os 33 | from datetime import datetime 34 | 35 | import isaacgym 36 | from gym.envs import * 37 | from gym.utils import get_args, task_registry, wandb_helper 38 | from gym import LEGGED_GYM_ROOT_DIR 39 | import torch 40 | from gym.utils.logging_and_saving import local_code_save_helper, wandb_singleton 41 | 42 | import wandb 43 | 44 | def train(args): 45 | # * Setup environment and policy_runner 46 | env, env_cfg = task_registry.make_env(name=args.task, args=args) 47 | 48 | policy_runner, train_cfg = task_registry.make_alg_runner(env=env, name=args.task, args=args) 49 | 50 | # * Setup wandb 51 | wandb_helper = wandb_singleton.WandbSingleton() 52 | wandb_helper.setup_wandb(env_cfg=env_cfg, train_cfg=train_cfg, args=args, log_dir=policy_runner.log_dir) 53 | local_code_save_helper.log_and_save( 54 | env, env_cfg, train_cfg, policy_runner) 55 | wandb_helper.attach_runner(policy_runner=policy_runner) 56 | 57 | # * Train 58 | policy_runner.learn(num_learning_iterations=train_cfg.runner.max_iterations, 59 | init_at_random_ep_len=True) 60 | 61 | # * Close wandb 62 | wandb_helper.close_wandb() 63 | 64 | if __name__ == '__main__': 65 | args = get_args() 66 | train(args) 67 | -------------------------------------------------------------------------------- /gym/tests/test_env.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import numpy as np 32 | import os 33 | from datetime import datetime 34 | 35 | import isaacgym 36 | from gym.envs import * 37 | from gym.utils import get_args, export_policy_as_jit, task_registry, Logger 38 | 39 | import torch 40 | 41 | 42 | def test_env(args): 43 | env_cfg, train_cfg = task_registry.get_cfgs(name=args.task) 44 | # override some parameters for testing 45 | env_cfg.env.num_envs = min(env_cfg.env.num_envs, 10) 46 | 47 | # prepare environment 48 | env, _ = task_registry.make_env(name=args.task, args=args, env_cfg=env_cfg) 49 | for i in range(int(10*env.max_episode_length)): 50 | actions = 0.*torch.ones(env.num_envs, env.num_actions, device=env.device) 51 | obs, _, rew, done, info = env.step(actions) 52 | print("Done") 53 | 54 | if __name__ == '__main__': 55 | args = get_args() 56 | test_env(args) 57 | -------------------------------------------------------------------------------- /gym/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from .helpers import class_to_dict, get_load_path, get_args, set_seed, update_class_from_dict 32 | from .task_registry import task_registry 33 | from .logger import Logger 34 | from .math import * 35 | from .terrain import Terrain 36 | from .interfaces.interfaces import BaseKeyboardInterface, KeyboardInterface, GamepadInterface, \ 37 | VanillaKeyboardInterface, ControllerKeyboardInterface, XCoMKeyboardInterface 38 | from .logging_and_saving.VisualizationRecorder import VisualizationRecorder 39 | from .logging_and_saving.AnalysisRecorder import ScreenShotter, AnalysisRecorder, CSVLogger, DictLogger, SuccessRater 40 | 41 | -------------------------------------------------------------------------------- /gym/utils/gait_scheduler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class gait_scheduler(): 5 | def __init__(self,cfg_gait,num_envs,device,dt): 6 | self.num_envs = num_envs 7 | self.device = device 8 | 9 | self.phase_offsets = cfg_gait.phase_offsets 10 | self.switchingPhaseNominal = cfg_gait.switchingPhaseNominal 11 | self.nom_gait_period = cfg_gait.nom_gait_period 12 | self.num_legs = len(self.phase_offsets) 13 | self.dt = dt 14 | 15 | self._init_gait_scheduler() 16 | 17 | def _init_gait_scheduler(self): 18 | 19 | # * init buffer for phase main variable 20 | self.phase = torch.zeros(self.num_envs, 1, dtype=torch.float, 21 | device=self.device, requires_grad=False) 22 | 23 | # * init buffer for individual leg phase variable 24 | 25 | # self.LegPhase = torch.hstack((self.phase_offsets[0]*torch.ones(self.num_envs, 1, dtype=torch.float, 26 | # device=self.device, requires_grad=False),\ 27 | # self.phase_offsets[1]*torch.ones(self.num_envs, 1, dtype=torch.float, 28 | # device=self.device, requires_grad=False))) 29 | 30 | # self.LegPhaseStance = torch.hstack((self.phase_offsets[0]*torch.ones(self.num_envs, 1, dtype=torch.float, 31 | # device=self.device, requires_grad=False),\ 32 | # self.phase_offsets[1]*torch.ones(self.num_envs, 1, dtype=torch.float, 33 | # device=self.device, requires_grad=False))) 34 | 35 | # self.LegPhaseSwing = torch.hstack((self.phase_offsets[0]*torch.ones(self.num_envs, 1, dtype=torch.float, 36 | # device=self.device, requires_grad=False),\ 37 | # self.phase_offsets[1]*torch.ones(self.num_envs, 1, dtype=torch.float, 38 | # device=self.device, requires_grad=False))) 39 | 40 | 41 | self.LegPhase = torch.tensor([],device=self.device, requires_grad=False) 42 | self.LegPhaseStance = torch.tensor([],device=self.device, requires_grad=False) 43 | self.LegPhaseSwing = torch.tensor([],device=self.device, requires_grad=False) 44 | 45 | for i in range(self.num_legs): 46 | print('i = '+str(i)) 47 | #phase_offsets_ = phase_offsets_.append(torch.tensor([self.phase_offsets[i]]).expand(num_env,1)) 48 | self.LegPhase = torch.hstack((self.LegPhase,self.phase_offsets[i]*torch.ones(self.num_envs, 1, dtype=torch.float, 49 | device=self.device, requires_grad=False))) 50 | 51 | self.LegPhaseStance = torch.hstack((self.LegPhaseStance,self.phase_offsets[i]*torch.ones(self.num_envs, 1, dtype=torch.float, 52 | device=self.device, requires_grad=False))) 53 | 54 | self.LegPhaseSwing = torch.hstack((self.LegPhaseSwing,self.phase_offsets[i]*torch.ones(self.num_envs, 1, dtype=torch.float, 55 | device=self.device, requires_grad=False))) 56 | 57 | A=10 58 | print("v1 = "+ str(self.LegPhase)) 59 | 60 | #print(self.LegPhaseSwing) 61 | 62 | 63 | def increment_phase(self): 64 | """ Callback called before computing terminations, rewards, and observations, phase-dynamics 65 | Default behaviour: Compute ang vel command based on target and heading, compute measured terrain heights and randomly push robots 66 | """ 67 | # increment phase variable 68 | dphase = self.dt / self.nom_gait_period 69 | 70 | self.phase = torch.fmod(self.phase + dphase, 1) 71 | 72 | for foot in range(self.num_legs): 73 | self.LegPhase[:, foot] = torch.fmod(self.LegPhase[:, foot] + dphase, 1) 74 | 75 | in_stance_bool = (self.LegPhase[:, foot] <= self.switchingPhaseNominal) 76 | in_swing_bool = torch.logical_not(in_stance_bool) 77 | 78 | self.LegPhaseStance[:, foot] = self.LegPhase[:, foot] / self.switchingPhaseNominal*in_stance_bool +\ 79 | in_swing_bool*1 # Stance phase has completed since foot is in swing 80 | 81 | self.LegPhaseSwing[:, foot] = 0 * in_stance_bool \ 82 | + in_swing_bool*(self.LegPhase[:, foot] - self.switchingPhaseNominal)\ 83 | / (1.0 - self.switchingPhaseNominal) 84 | 85 | # if (self.LegPhase[:, foot] <= self.cfg.gait.switchingPhaseNominal): 86 | # self.LegPhaseStance[:, foot] = self.LegPhase[:, foot]/self.cfg.gait.switchingPhaseNominal 87 | # self.LegPhaseSwing[:, foot] = 0 # Swing phase has not started since foot is in stance 88 | 89 | # # in swing phase 90 | # else: 91 | # self.LegPhaseStance[:, foot] = 1. # Stance phase has completed since foot is in swing 92 | # self.LegPhaseSwing[:, foot] = (self.LegPhase[:, foot] - self.cfg.gait.switchingPhaseNominal)\ 93 | # / (1.0 - self.cfg.gait.switchingPhaseNominal) 94 | 95 | if __name__== "__main__": 96 | # script for testing the gait scheduler 97 | import math as mt 98 | import matplotlib.pyplot as plt 99 | import numpy as np 100 | 101 | # gait config class needed to initialize the gait scheduler 102 | class gait(): 103 | nom_gait_period = 0.8 104 | phase_offsets = [0, 0.5, 0.1] # phase offset for each leg the length of this vector also determined the nb of legs used for the gait schedule 105 | switchingPhaseNominal = 0.5 # switch phase from stance to swing 106 | 107 | # environment step dt 108 | dt = 0.002 109 | cfg_gait = gait() 110 | 111 | 112 | num_env = 2 113 | GS = gait_scheduler(cfg_gait,num_env,'cpu',dt) 114 | 115 | Ttot = 2 116 | N_iter = mt.floor(Ttot/dt) 117 | numlegs = len(cfg_gait.phase_offsets) 118 | main_phase = np.zeros([num_env, N_iter]) 119 | leg_phase = np.zeros([num_env,numlegs, N_iter]) 120 | stance_phase = np.zeros([num_env,numlegs, N_iter]) 121 | swing_phase = np.zeros([num_env,numlegs, N_iter]) 122 | 123 | time = (np.arange(0,N_iter)*dt) 124 | 125 | for i in range(N_iter): 126 | GS.increment_phase() 127 | 128 | main_phase[:,i] = GS.phase.detach().cpu().numpy().ravel() 129 | leg_phase[:,:,i] = GS.LegPhase.detach().cpu().numpy() 130 | stance_phase[:,:,i] = GS.LegPhaseStance.detach().cpu().numpy() 131 | swing_phase[:,:,i] = GS.LegPhaseSwing.detach().cpu().numpy() 132 | 133 | 134 | nrows = 5 135 | ncols = 1 136 | idx=1 137 | 138 | plt.figure() 139 | plt.subplot(nrows, ncols, idx);idx+=1 140 | plt.plot(time, main_phase.T) 141 | plt.title('main phase') 142 | 143 | plt.subplot(nrows, ncols, idx);idx+=1 144 | for i in range(numlegs): 145 | plt.plot(time,leg_phase[:,i,:].T) 146 | plt.title('leg phase') 147 | #plt.legend(["leg0","leg1"]) 148 | 149 | plt.subplot(nrows, ncols, idx);idx+=1 150 | for i in range(numlegs): 151 | plt.plot(time,stance_phase[:,i,:].T) 152 | plt.title('stance phase') 153 | plt.legend(["leg0","leg1"]) 154 | 155 | plt.subplot(nrows, ncols, idx);idx+=1 156 | for i in range(numlegs): 157 | plt.plot(time,swing_phase[:,i,:].T) 158 | plt.title('swing phase') 159 | plt.legend(["leg0","leg1"]) 160 | plt.show() -------------------------------------------------------------------------------- /gym/utils/interfaces/GamepadInterface.py: -------------------------------------------------------------------------------- 1 | from isaacgym import gymapi 2 | import os 3 | os.environ['PYGAME_HIDE_SUPPORT_PROMPT'] = "hide" 4 | import pygame 5 | 6 | class GamepadInterface(): 7 | def __init__(self, env): 8 | pygame.init() 9 | pygame.joystick.init() 10 | print("__________________________________________________________") 11 | if pygame.joystick.get_count() == 1: 12 | print("Using gamepad interface, overriding default comand settings") 13 | print("left joystick: forward, strafe left, " 14 | "backward, strafe right") 15 | print("right joystick (left/right): yaw left/right") 16 | print("back button: reset environments") 17 | print("start button: quit") 18 | self.joystick = pygame.joystick.Joystick(0) 19 | 20 | env.commands[:] = 0. 21 | # never resample 22 | env.cfg.commands.resampling_time = env.max_episode_length + 1 23 | self.max_vel_backward = env.cfg.commands.ranges.lin_vel_x[0] 24 | self.max_vel_forward = env.cfg.commands.ranges.lin_vel_x[1] 25 | self.max_vel_sideways = env.cfg.commands.ranges.lin_vel_y 26 | self.max_yaw_vel = env.cfg.commands.ranges.yaw_vel 27 | # set to 0 28 | env.command_ranges["lin_vel_x"] = [0., 0.] 29 | env.command_ranges["lin_vel_y"] = 0. 30 | env.command_ranges["yaw_vel"] = 0. 31 | elif pygame.joystick.get_count() > 1: 32 | print("WARNING: you have more than one gamepad plugged in." 33 | "Please unplug one.") 34 | else: 35 | print("WARNING: failed to initialize gamepad.") 36 | print("__________________________________________________________") 37 | 38 | 39 | def update(self, env): 40 | for event in pygame.event.get(): 41 | if event.type == pygame.JOYBUTTONDOWN: 42 | if event.button == 7: # press start 43 | exit() 44 | if event.button == 6: # press back 45 | env.episode_length_buf[:] = env.max_episode_length+1 46 | if event.type == pygame.JOYAXISMOTION: 47 | # left joystick 48 | if event.axis == 1: # up-down 49 | if event.value >= 0: 50 | env.commands[:, 0] = self.max_vel_backward*event.value 51 | else: 52 | env.commands[:, 0] = -self.max_vel_forward*event.value 53 | if event.axis == 0: # left_right 54 | env.commands[:, 1] = -self.max_vel_sideways*event.value 55 | # right joystick 56 | if event.axis == 3: # left-right 57 | env.commands[:, 2] = -self.max_yaw_vel*event.value 58 | -------------------------------------------------------------------------------- /gym/utils/interfaces/interfaces.py: -------------------------------------------------------------------------------- 1 | from .KeyboardInterface import BaseKeyboardInterface, KeyboardInterface, \ 2 | VanillaKeyboardInterface, ControllerKeyboardInterface, XCoMKeyboardInterface 3 | from .GamepadInterface import GamepadInterface -------------------------------------------------------------------------------- /gym/utils/logger.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import matplotlib.pyplot as plt 32 | import numpy as np 33 | from collections import defaultdict 34 | from multiprocessing import Process, Value 35 | 36 | class Logger: 37 | def __init__(self, dt): 38 | self.state_log = defaultdict(list) 39 | self.rew_log = defaultdict(list) 40 | self.dt = dt 41 | self.num_episodes = 0 42 | self.plot_process = None 43 | 44 | def log_state(self, key, value): 45 | self.state_log[key].append(value) 46 | 47 | def log_states(self, dict): 48 | for key, value in dict.items(): 49 | self.log_state(key, value) 50 | 51 | def log_rewards(self, dict, num_episodes): 52 | for key, value in dict.items(): 53 | if 'rew' in key: 54 | self.rew_log[key].append(value.item() * num_episodes) 55 | self.num_episodes += num_episodes 56 | 57 | def reset(self): 58 | self.state_log.clear() 59 | self.rew_log.clear() 60 | 61 | def plot_states(self): 62 | self.plot_process = Process(target=self._plot) 63 | self.plot_process.start() 64 | 65 | def _plot(self): 66 | nb_rows = 3 67 | nb_cols = 3 68 | fig, axs = plt.subplots(nb_rows, nb_cols) 69 | for key, value in self.state_log.items(): 70 | time = np.linspace(0, len(value)*self.dt, len(value)) 71 | break 72 | log= self.state_log 73 | # plot joint targets and measured positions 74 | a = axs[1, 0] 75 | if log["dof_pos"]: a.plot(time, log["dof_pos"], label='measured') 76 | if log["dof_pos_target"]: a.plot(time, log["dof_pos_target"], label='target') 77 | a.set(xlabel='time [s]', ylabel='Position [rad]', title='DOF Position') 78 | a.legend() 79 | # plot joint velocity 80 | a = axs[1, 1] 81 | if log["dof_vel"]: a.plot(time, log["dof_vel"], label='measured') 82 | if log["dof_vel_target"]: a.plot(time, log["dof_vel_target"], label='target') 83 | a.set(xlabel='time [s]', ylabel='Velocity [rad/s]', title='Joint Velocity') 84 | a.legend() 85 | # plot base vel x 86 | a = axs[0, 0] 87 | if log["base_vel_x"]: a.plot(time, log["base_vel_x"], label='measured') 88 | if log["command_x"]: a.plot(time, log["command_x"], label='commanded') 89 | a.set(xlabel='time [s]', ylabel='base lin vel [m/s]', title='Base velocity x') 90 | a.legend() 91 | # plot base vel y 92 | a = axs[0, 1] 93 | if log["base_vel_y"]: a.plot(time, log["base_vel_y"], label='measured') 94 | if log["command_y"]: a.plot(time, log["command_y"], label='commanded') 95 | a.set(xlabel='time [s]', ylabel='base lin vel [m/s]', title='Base velocity y') 96 | a.legend() 97 | # plot base vel yaw 98 | a = axs[0, 2] 99 | if log["base_vel_yaw"]: a.plot(time, log["base_vel_yaw"], label='measured') 100 | if log["command_yaw"]: a.plot(time, log["command_yaw"], label='commanded') 101 | a.set(xlabel='time [s]', ylabel='base ang vel [rad/s]', title='Base velocity yaw') 102 | a.legend() 103 | # plot base vel z 104 | a = axs[1, 2] 105 | if log["base_vel_z"]: a.plot(time, log["base_vel_z"], label='measured') 106 | a.set(xlabel='time [s]', ylabel='base lin vel [m/s]', title='Base velocity z') 107 | a.legend() 108 | # plot contact forces 109 | a = axs[2, 0] 110 | if log["contact_forces_z"]: 111 | forces = np.array(log["contact_forces_z"]) 112 | for i in range(forces.shape[1]): 113 | a.plot(time, forces[:, i], label=f'force {i}') 114 | a.set(xlabel='time [s]', ylabel='Forces z [N]', title='Vertical Contact forces') 115 | a.legend() 116 | # plot torque/vel curves 117 | a = axs[2, 1] 118 | if log["dof_vel"]!=[] and log["dof_torque"]!=[]: a.plot(log["dof_vel"], log["dof_torque"], 'x', label='measured') 119 | a.set(xlabel='Joint vel [rad/s]', ylabel='Joint Torque [Nm]', title='Torque/velocity curves') 120 | a.legend() 121 | # plot torques 122 | a = axs[2, 2] 123 | if log["dof_torque"]!=[]: a.plot(time, log["dof_torque"], label='measured') 124 | a.set(xlabel='time [s]', ylabel='Joint Torque [Nm]', title='Torque') 125 | a.legend() 126 | plt.show() 127 | 128 | def print_rewards(self): 129 | print("Average rewards per second:") 130 | for key, values in self.rew_log.items(): 131 | mean = np.sum(np.array(values)) / self.num_episodes 132 | print(f" - {key}: {mean}") 133 | print(f"Total number of episodes: {self.num_episodes}") 134 | 135 | def __del__(self): 136 | if self.plot_process is not None: 137 | self.plot_process.kill() -------------------------------------------------------------------------------- /gym/utils/logging_and_saving/VisualizationRecorder.py: -------------------------------------------------------------------------------- 1 | from subprocess import Popen, PIPE 2 | import subprocess 3 | import os 4 | import mss 5 | from gym import LEGGED_GYM_ROOT_DIR 6 | from isaacgym import gymapi 7 | 8 | 9 | class VisualizationRecorder(): 10 | env = None 11 | frames = [] 12 | framerate = 50 13 | frame_sampling_rate = 0 14 | target_window = None 15 | experiment_name = None 16 | run_name = None 17 | 18 | def __init__(self, env, experiment_name, log_dir, framerate=50): 19 | self.env = env 20 | self.framerate = framerate 21 | self.experiment_name = experiment_name 22 | self.run_name = os.path.basename(log_dir) 23 | self.target_window = self.getWindowGeometry() 24 | self.framerate = max(50., self.framerate) 25 | ctrl_frequency = int(1 / round(self.env.dt, 3)) 26 | self.frame_sampling_rate = max(1, int(ctrl_frequency / self.framerate)) 27 | self.sampling_frequency = ctrl_frequency / self.frame_sampling_rate 28 | 29 | self.playback_speed = self.sampling_frequency / self.framerate 30 | 31 | def update(self, sim_iter): 32 | self.captureFrame(sim_iter) 33 | if self.env.record_done: 34 | self.save() 35 | exit() 36 | 37 | def getWindowGeometry(self): 38 | try: 39 | output = subprocess.check_output(['wmctrl', '-lG']).decode('utf-8') 40 | lines = output.splitlines() 41 | for line in lines: 42 | if line.split(None, 4)[-1].split(" ")[-2:] == ['Isaac', 'Gym']: 43 | x, y, width, height = line.split()[2:6] 44 | monitor = {'top': int(y), 'left': int(x), 45 | 'width': int(width), 'height': int(height)} 46 | return monitor 47 | return None 48 | except Exception as e: 49 | print(f"An error occurred: {e}") 50 | return None 51 | 52 | def captureFrame(self, sim_iter): 53 | with mss.mss() as sct: 54 | if sim_iter % self.frame_sampling_rate == 0: 55 | # Capture the screenshot 56 | try: 57 | screenshot = sct.grab(self.target_window) 58 | self.frames.append(screenshot) 59 | except: 60 | print("Please install wm-ctrl (sudo apt-get install \ 61 | wmctrl) if you want to record at real time.") 62 | exit() 63 | 64 | def save(self): 65 | print("Converting recorded frames to video...") 66 | folderpath = os.path.join(LEGGED_GYM_ROOT_DIR, 'logs', 67 | self.experiment_name, 'videos') 68 | filepath = os.path.join(folderpath, self.run_name + ".mp4") 69 | os.makedirs(folderpath, exist_ok=True) 70 | 71 | # Use FFmpeg directly to pipe frames 72 | ffmpeg_cmd = [ 73 | 'ffmpeg', 74 | '-y', # Overwrite output file 75 | '-f', 'rawvideo', 76 | '-vcodec', 'rawvideo', 77 | '-s', f"{self.frames[0].size.width}x{self.frames[0].size.height}", 78 | '-pix_fmt', 'rgb24', 79 | '-r', str(self.framerate), # Frame rate 80 | '-i', '-', # Read from pipe 81 | '-an', # No audio 82 | '-c:v', 'libx264', 83 | '-vf', f"setpts=PTS/{self.playback_speed}", 84 | '-pix_fmt', 'yuv420p', 85 | '-crf', '18', # High quality 86 | '-preset', 'slow', # Slow compression for better quality 87 | '-profile:v', 'high', # High profile 88 | filepath 89 | ] 90 | with Popen(ffmpeg_cmd, stdin=PIPE) as ffmpeg_proc: 91 | for screenshot in self.frames: 92 | ffmpeg_proc.stdin.write(screenshot.rgb) 93 | os.system("xdg-open " + filepath) 94 | -------------------------------------------------------------------------------- /gym/utils/logging_and_saving/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hojae-io/ModelBasedFootstepPlanning-IROS2024/62824f93e65af36e67fb6c2b59cab543ca75a3d5/gym/utils/logging_and_saving/__init__.py -------------------------------------------------------------------------------- /gym/utils/logging_and_saving/local_code_save_helper.py: -------------------------------------------------------------------------------- 1 | import os 2 | from gym import LEGGED_GYM_ROOT_DIR 3 | 4 | 5 | def log_and_save(env, env_cfg, train_cfg, runner): 6 | """Configure local and cloud code logging""" 7 | 8 | # setup local code saving if enabled 9 | if check_local_saving_flag(train_cfg): 10 | save_paths = get_local_save_paths(env, env_cfg) 11 | runner.logger.configure_local_files(save_paths) 12 | 13 | 14 | def check_local_saving_flag(train_cfg): 15 | """Check if enable_local_saving is set to true in the training_config""" 16 | 17 | if hasattr(train_cfg, 'logging') and \ 18 | hasattr(train_cfg.logging, 'enable_local_saving'): 19 | enable_local_saving = train_cfg.logging.enable_local_saving 20 | else: 21 | enable_local_saving = False 22 | return enable_local_saving 23 | 24 | 25 | def get_local_save_paths(env, env_cfg): 26 | """Create a save_paths object for saving code locally""" 27 | 28 | learning_dir = os.path.join(LEGGED_GYM_ROOT_DIR, 'learning') 29 | learning_target = os.path.join('learning') 30 | 31 | gym_dir = os.path.join(LEGGED_GYM_ROOT_DIR, 'gym') 32 | gym_target = os.path.join('gym') 33 | 34 | # list of things to copy 35 | # source paths need the full path and target are relative to log_dir 36 | save_paths = [ 37 | {'type': 'dir', 'source_dir': learning_dir, 38 | 'target_dir': learning_target, 39 | 'include_patterns': ('*.py', '*.json')}, 40 | {'type': 'dir', 'source_dir': gym_dir, 41 | 'target_dir': gym_target, 42 | 'include_patterns': ('*.py', '*.json')} 43 | ] 44 | 45 | return save_paths 46 | -------------------------------------------------------------------------------- /gym/utils/logging_and_saving/wandb_singleton.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import wandb 4 | from gym import LEGGED_GYM_ROOT_DIR 5 | 6 | 7 | class WandbSingleton(object): 8 | def __new__(self): 9 | if not hasattr(self, 'instance'): 10 | self.instance = super(WandbSingleton, self).__new__(self) 11 | self.entity_name = None 12 | self.project_name = None 13 | self.experiment_name = '' 14 | self.enabled = False 15 | self.parameters_dict = None 16 | 17 | return self.instance 18 | 19 | def set_wandb_values(self, train_cfg, args): 20 | # build the path for the wandb_config.json file 21 | wandb_config_path = os.path.join( 22 | LEGGED_GYM_ROOT_DIR, 'user', 'wandb_config.json') 23 | 24 | # load entity and project name from JSON if it exists 25 | if os.path.exists(wandb_config_path): 26 | json_data = json.load(open(wandb_config_path)) 27 | print('Loaded WandB entity and project from JSON.') 28 | self.entity_name = json_data['entity'] 29 | # self.project_name = json_data['project'] 30 | self.project_name = train_cfg.runner.experiment_name 31 | # override entity and project by commandline args if provided 32 | if args.wandb_entity is not None: 33 | print('Received WandB entity from arguments.') 34 | self.entity_name = args.wandb_entity 35 | if args.wandb_project is not None: 36 | print('Received WandB project from arguments.') 37 | self.project_name = args.wandb_project 38 | # assume WandB is off if entity or project is None and short-circuit 39 | if args.task is not None: 40 | self.experiment_name = f'{args.task}' 41 | 42 | if (self.entity_name is None or self.project_name is None 43 | or args.disable_wandb): 44 | self.enabled = False 45 | else: 46 | print(f'Setting WandB project name: {self.project_name}\n' + 47 | f'Setting WandB entitiy name: {self.entity_name}\n') 48 | self.enabled = True 49 | 50 | def set_wandb_sweep_cfg_values(self, env_cfg, train_cfg): 51 | if not self.is_wandb_enabled(): 52 | return 53 | 54 | # * update the config settings based off the sweep_dict 55 | for key, value in self.parameters_dict.items(): 56 | print('Setting: ' + key + ' = ' + str(value)) 57 | locs = key.split('.') 58 | 59 | if locs[0] == 'train_cfg': 60 | attr = train_cfg 61 | elif locs[0] == 'env_cfg': 62 | attr = env_cfg 63 | else: 64 | print('Unrecognized cfg: ' + locs[0]) 65 | break 66 | 67 | for loc in locs[1:-1]: 68 | attr = getattr(attr, loc) 69 | 70 | setattr(attr, locs[-1], value) 71 | 72 | def is_wandb_enabled(self): 73 | return self.enabled 74 | 75 | def get_entity_name(self): 76 | return self.entity_name 77 | 78 | def get_project_name(self): 79 | return self.project_name 80 | 81 | def setup_wandb(self, env_cfg, train_cfg, args, log_dir, is_sweep=False): 82 | self.set_wandb_values(train_cfg, args) 83 | 84 | # short-circuit if the values say WandB is turned off 85 | if not self.is_wandb_enabled(): 86 | print('WARNING: WandB is disabled and will not save or log.') 87 | return 88 | 89 | wandb.config = {} 90 | 91 | if is_sweep: 92 | wandb.init(dir=os.path.join(LEGGED_GYM_ROOT_DIR, 'logs'), 93 | config=wandb.config, 94 | name=log_dir.split('/')[-1]) 95 | else: 96 | wandb.init(project=self.project_name, 97 | entity=self.entity_name, 98 | dir=os.path.join(LEGGED_GYM_ROOT_DIR, 'logs'), 99 | config=wandb.config, 100 | name=log_dir.split('/')[-1]) 101 | 102 | wandb.run.log_code( 103 | os.path.join(LEGGED_GYM_ROOT_DIR, 'gym')) 104 | 105 | self.parameters_dict = wandb.config 106 | self.set_wandb_sweep_cfg_values(env_cfg=env_cfg, train_cfg=train_cfg) 107 | 108 | def attach_runner(self, policy_runner): 109 | if not self.is_wandb_enabled(): 110 | return 111 | policy_runner.attach_to_wandb(wandb) 112 | 113 | def close_wandb(self): 114 | if self.enabled: 115 | wandb.finish() 116 | -------------------------------------------------------------------------------- /gym/utils/math.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import torch 32 | from torch import Tensor 33 | import numpy as np 34 | from isaacgym.torch_utils import quat_apply, normalize, torch_rand_float 35 | from typing import Tuple 36 | 37 | # @ torch.jit.script 38 | def quat_apply_yaw(quat, vec): 39 | quat_yaw = quat.clone().view(-1, 4) 40 | quat_yaw[:, :2] = 0. 41 | quat_yaw = normalize(quat_yaw) 42 | return quat_apply(quat_yaw, vec) 43 | 44 | # @ torch.jit.script 45 | def wrap_to_pi(angles): 46 | angles %= 2*torch.pi 47 | angles -= 2*torch.pi * (angles > torch.pi) 48 | return angles 49 | 50 | # @ torch.jit.script 51 | def torch_rand_sqrt_float(lower, upper, shape, device): 52 | # type: (float, float, Tuple[int, int], str) -> Tensor 53 | r = 2*torch.rand(*shape, device=device) - 1 54 | r = torch.where(r<0., -torch.sqrt(-r), torch.sqrt(r)) 55 | r = (r + 1.) / 2. 56 | return (upper - lower) * r + lower 57 | 58 | # @ torch.jit.script 59 | def random_sample(env_ids, low, high, device): 60 | """ 61 | Generate random samples for each entry of env_ids 62 | """ 63 | rand_pos = torch_rand_float(0, 1, (len(env_ids), len(low)), 64 | device=device) 65 | diff_pos = (high - low).repeat(len(env_ids),1) 66 | random_dof_pos = rand_pos*diff_pos + low.repeat(len(env_ids), 1) 67 | return random_dof_pos 68 | 69 | # @ torch.jit.script 70 | def exp_avg_filter(x, avg, alpha=0.8): 71 | """ 72 | Simple exponential average filter 73 | """ 74 | avg = alpha*x + (1-alpha)*avg 75 | return avg -------------------------------------------------------------------------------- /gym/utils/wandb_helper.py: -------------------------------------------------------------------------------- 1 | 2 | def recursive_value_find(cfg, location): 3 | if len(location) == 1: 4 | return getattr(cfg, location[0]) 5 | 6 | if hasattr(cfg, location[0]): 7 | return recursive_value_find(getattr(cfg, location[0]), location[1:]) 8 | else: 9 | raise Exception(f"I couldn't find the value {location[0]} that you specified") 10 | 11 | 12 | def craft_log_config(env_cfg, train_cfg, wandb_cfg, what_to_log): 13 | for log_key in what_to_log: 14 | location = what_to_log[log_key] 15 | if location[0] == 'train_cfg': 16 | wandb_cfg[log_key] = recursive_value_find(train_cfg, location[1:]) 17 | elif location[0] == 'env_cfg': 18 | wandb_cfg[log_key] = recursive_value_find(env_cfg, location[1:]) 19 | else: 20 | raise Exception(f"You didn't specify a valid cfg file in location: {location}") 21 | -------------------------------------------------------------------------------- /learning/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin -------------------------------------------------------------------------------- /learning/algorithms/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from .ppo import PPO -------------------------------------------------------------------------------- /learning/algorithms/ppo.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import torch 32 | import torch.nn as nn 33 | import torch.optim as optim 34 | 35 | from learning.modules import ActorCritic 36 | from learning.storage import RolloutStorage 37 | 38 | class PPO: 39 | actor_critic: ActorCritic 40 | def __init__(self, 41 | actor_critic, 42 | num_learning_epochs=1, 43 | num_mini_batches=1, 44 | clip_param=0.2, 45 | gamma=0.998, 46 | lam=0.95, 47 | value_loss_coef=1.0, 48 | entropy_coef=0.0, 49 | learning_rate=1e-3, 50 | max_grad_norm=1.0, 51 | use_clipped_value_loss=True, 52 | schedule="fixed", 53 | desired_kl=0.01, 54 | device='cpu', 55 | ): 56 | 57 | self.device = device 58 | 59 | self.desired_kl = desired_kl 60 | self.schedule = schedule 61 | self.learning_rate = learning_rate 62 | 63 | # PPO components 64 | self.actor_critic = actor_critic 65 | self.actor_critic.to(self.device) 66 | self.storage = None # initialized later 67 | self.optimizer = optim.Adam(self.actor_critic.parameters(), 68 | lr=learning_rate) 69 | self.transition = RolloutStorage.Transition() 70 | 71 | # PPO parameters 72 | self.clip_param = clip_param 73 | self.num_learning_epochs = num_learning_epochs 74 | self.num_mini_batches = num_mini_batches 75 | self.value_loss_coef = value_loss_coef 76 | self.entropy_coef = entropy_coef 77 | self.gamma = gamma 78 | self.lam = lam 79 | self.max_grad_norm = max_grad_norm 80 | self.use_clipped_value_loss = use_clipped_value_loss 81 | 82 | def init_storage(self, num_envs, num_transitions_per_env, num_obs, num_critic_obs, num_actions): 83 | self.storage = RolloutStorage(num_envs, num_transitions_per_env, num_obs, num_critic_obs, num_actions, self.device) 84 | 85 | def test_mode(self): 86 | self.actor_critic.test() 87 | 88 | def train_mode(self): 89 | self.actor_critic.train() 90 | 91 | def act(self, obs, critic_obs): 92 | # Compute the actions and values 93 | self.transition.actions = self.actor_critic.act(obs).detach() 94 | self.transition.values = self.actor_critic.evaluate(critic_obs).detach() 95 | self.transition.actions_log_prob = self.actor_critic.get_actions_log_prob(self.transition.actions).detach() 96 | self.transition.action_mean = self.actor_critic.action_mean.detach() 97 | self.transition.action_sigma = self.actor_critic.action_std.detach() 98 | # need to record obs and critic_obs before env.step() 99 | self.transition.observations = obs 100 | self.transition.critic_observations = critic_obs 101 | return self.transition.actions 102 | 103 | def process_env_step(self, rewards, dones, timed_out=None): 104 | self.transition.rewards = rewards.clone() 105 | self.transition.dones = dones 106 | 107 | # Bootstrapping on time outs 108 | if timed_out is not None: 109 | self.transition.rewards += self.gamma * torch.squeeze(self.transition.values * timed_out.unsqueeze(1), 1) 110 | 111 | # Record the transition 112 | self.storage.add_transitions(self.transition) 113 | self.transition.clear() 114 | 115 | def compute_returns(self, last_critic_obs): 116 | last_values= self.actor_critic.evaluate(last_critic_obs).detach() 117 | self.storage.compute_returns(last_values, self.gamma, self.lam) 118 | 119 | def update(self): 120 | mean_value_loss = 0 121 | mean_surrogate_loss = 0 122 | generator = self.storage.mini_batch_generator(self.num_mini_batches, self.num_learning_epochs) 123 | for obs_batch, critic_obs_batch, actions_batch, target_values_batch, advantages_batch, returns_batch, old_actions_log_prob_batch, \ 124 | old_mu_batch, old_sigma_batch in generator: 125 | 126 | self.actor_critic.act(obs_batch) 127 | actions_log_prob_batch = self.actor_critic.get_actions_log_prob(actions_batch) 128 | value_batch = self.actor_critic.evaluate(critic_obs_batch) 129 | 130 | mu_batch = self.actor_critic.action_mean 131 | sigma_batch = self.actor_critic.action_std 132 | entropy_batch = self.actor_critic.entropy 133 | 134 | # KL 135 | if self.desired_kl != None and self.schedule == 'adaptive': 136 | with torch.inference_mode(): 137 | kl = torch.sum( 138 | torch.log(sigma_batch / old_sigma_batch + 1.e-5) + (torch.square(old_sigma_batch) + torch.square(old_mu_batch - mu_batch)) / (2.0 * torch.square(sigma_batch)) - 0.5, axis=-1) 139 | kl_mean = torch.mean(kl) 140 | 141 | if kl_mean > self.desired_kl * 2.0: 142 | self.learning_rate = max(1e-5, self.learning_rate / 1.5) 143 | elif kl_mean < self.desired_kl / 2.0 and kl_mean > 0.0: 144 | self.learning_rate = min(1e-2, self.learning_rate * 1.5) 145 | 146 | for param_group in self.optimizer.param_groups: 147 | param_group['lr'] = self.learning_rate 148 | 149 | # Surrogate loss 150 | ratio = torch.exp(actions_log_prob_batch - torch.squeeze(old_actions_log_prob_batch)) 151 | surrogate = -torch.squeeze(advantages_batch) * ratio 152 | surrogate_clipped = -torch.squeeze(advantages_batch) * torch.clamp(ratio, 1.0 - self.clip_param, 153 | 1.0 + self.clip_param) 154 | surrogate_loss = torch.max(surrogate, surrogate_clipped).mean() 155 | 156 | # Value function loss 157 | if self.use_clipped_value_loss: 158 | value_clipped = target_values_batch + (value_batch - target_values_batch).clamp(-self.clip_param, 159 | self.clip_param) 160 | value_losses = (value_batch - returns_batch).pow(2) 161 | value_losses_clipped = (value_clipped - returns_batch).pow(2) 162 | value_loss = torch.max(value_losses, value_losses_clipped).mean() 163 | else: 164 | value_loss = (returns_batch - value_batch).pow(2).mean() 165 | 166 | loss = surrogate_loss + self.value_loss_coef * value_loss - self.entropy_coef * entropy_batch.mean() 167 | 168 | # Gradient step 169 | self.optimizer.zero_grad() 170 | loss.backward() 171 | nn.utils.clip_grad_norm_(self.actor_critic.parameters(), self.max_grad_norm) 172 | self.optimizer.step() 173 | 174 | mean_value_loss += value_loss.item() 175 | mean_surrogate_loss += surrogate_loss.item() 176 | 177 | num_updates = self.num_learning_epochs * self.num_mini_batches 178 | mean_value_loss /= num_updates 179 | mean_surrogate_loss /= num_updates 180 | self.storage.clear() 181 | 182 | return mean_value_loss, mean_surrogate_loss 183 | -------------------------------------------------------------------------------- /learning/env/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from .vec_env import VecEnv -------------------------------------------------------------------------------- /learning/env/vec_env.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from abc import ABC, abstractmethod 32 | import torch 33 | from typing import Tuple, Union 34 | 35 | # minimal interface of the environment 36 | class VecEnv(ABC): 37 | num_envs: int 38 | num_actuators: int 39 | num_obs: int 40 | num_critic_obs: int 41 | num_actions: int 42 | max_episode_length: int 43 | obs_buf: torch.Tensor 44 | critic_obs_buf: torch.Tensor 45 | rew_buf: torch.Tensor 46 | reset_buf: torch.Tensor 47 | episode_length_buf: torch.Tensor # current episode duration 48 | extras: dict 49 | device: torch.device 50 | @abstractmethod 51 | def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, Union[torch.Tensor, None], torch.Tensor, torch.Tensor, dict]: 52 | pass 53 | @abstractmethod 54 | def reset(self, env_ids: Union[list, torch.Tensor]): 55 | pass 56 | @abstractmethod 57 | def get_observations(self) -> torch.Tensor: 58 | pass 59 | @abstractmethod 60 | def get_critic_observations(self) -> Union[torch.Tensor, None]: 61 | pass -------------------------------------------------------------------------------- /learning/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from .actor_critic import ActorCritic 32 | from .actor import Actor 33 | from .critic import Critic -------------------------------------------------------------------------------- /learning/modules/actor.py: -------------------------------------------------------------------------------- 1 | import os 2 | import copy 3 | 4 | import torch 5 | import torch.nn as nn 6 | from torch.distributions import Normal 7 | from .utils import create_MLP, weights_init_ 8 | from .utils import RunningMeanStd 9 | 10 | class Actor(nn.Module): 11 | def __init__(self, 12 | num_obs, 13 | num_actions, 14 | hidden_dims, 15 | activation="elu", 16 | init_noise_std=1.0, 17 | normalize_obs=False, 18 | log_std_bounds=None, 19 | actions_limits=None, 20 | custom_initialization=False, 21 | **kwargs): 22 | 23 | if kwargs: 24 | print("Actor.__init__ got unexpected arguments, " 25 | "which will be ignored: " 26 | + str([key for key in kwargs.keys()])) 27 | super().__init__() 28 | 29 | self._normalize_obs = normalize_obs 30 | if self._normalize_obs: 31 | self.obs_rms = RunningMeanStd(num_obs) 32 | 33 | self.mean_NN = create_MLP(num_obs, num_actions, hidden_dims, activation) 34 | self.log_std_NN = None 35 | 36 | # Action noise 37 | if log_std_bounds is not None: 38 | self.log_std_min, self.log_std_max = log_std_bounds 39 | self.log_std_NN = create_MLP(num_obs, num_actions, hidden_dims, activation) 40 | else: 41 | self.std = nn.Parameter(init_noise_std * torch.ones(num_actions)) 42 | 43 | self.distribution = None 44 | 45 | if actions_limits is not None: 46 | self.actions_min, self.actions_max = actions_limits 47 | self.actions_range_center = (self.actions_max + self.actions_min) / 2 48 | self.actions_range_radius = (self.actions_max - self.actions_min) / 2 49 | 50 | # disable args validation for speedup 51 | Normal.set_default_validate_args = False 52 | if custom_initialization: 53 | self.apply(weights_init_) 54 | 55 | @property 56 | def action_mean(self): 57 | return self.distribution.mean 58 | 59 | @property 60 | def action_std(self): 61 | return self.distribution.stddev 62 | 63 | @property 64 | def entropy(self): 65 | return self.distribution.entropy().sum(dim=-1) 66 | 67 | def update_distribution(self, observations): 68 | if self._normalize_obs: 69 | observations = self.norm_obs(observations) 70 | mean = self.mean_NN(observations) 71 | if self.log_std_NN is None: 72 | self.distribution = Normal(mean, mean*0. + self.std) 73 | else: # TODO: Implement s.t. mean & log_std shares parameters only last layer is different! 74 | log_std = self.log_std_NN(observations) 75 | log_std = torch.clamp(log_std, min=self.log_std_min, max=self.log_std_max) 76 | self.std = torch.exp(log_std) 77 | self.distribution = Normal(mean, mean*0. + self.std) 78 | 79 | def act(self, observations): 80 | self.update_distribution(observations) 81 | return self.distribution.sample() 82 | 83 | def ract(self, observations): 84 | """ Sample with reparametrization trick """ 85 | self.update_distribution(observations) 86 | return self.distribution.rsample() 87 | 88 | def get_actions_log_prob(self, actions): 89 | return self.distribution.log_prob(actions).sum(dim=-1) 90 | 91 | def get_scaled_ractions_and_log_prob(self, observations, only_actions=False): 92 | """ Get scaled actions using reparametrization trick and their log probability 93 | Implemented solely for SAC """ 94 | self.update_distribution(observations) 95 | actions = self.distribution.rsample() 96 | actions_normalized = torch.tanh(actions) 97 | actions_scaled = (self.actions_range_center + self.actions_range_radius * actions_normalized) 98 | 99 | if only_actions: 100 | return actions_scaled 101 | else: 102 | actions_log_prob = self.distribution.log_prob(actions).sum(dim=-1) - \ 103 | torch.log(1.0 - actions_normalized.pow(2) + 1e-6).sum(-1) 104 | return actions_scaled, actions_log_prob 105 | 106 | def act_inference(self, observations): 107 | if self._normalize_obs: 108 | observations = self.norm_obs(observations) 109 | actions_mean = self.mean_NN(observations) 110 | return actions_mean 111 | 112 | def norm_obs(self, observation): 113 | with torch.no_grad(): 114 | return self.obs_rms(observation) 115 | 116 | def export(self, path): 117 | os.makedirs(path, exist_ok=True) 118 | path_TS = os.path.join(path, 'policy.pt') # TorchScript path 119 | path_onnx = os.path.join(path, 'policy.onnx') # ONNX path 120 | 121 | if self._normalize_obs: 122 | class NormalizedActor(nn.Module): 123 | def __init__(self, actor, obs_rms): 124 | super().__init__() 125 | self.actor = actor 126 | self.obs_rms = obs_rms 127 | def forward(self, obs): 128 | obs = self.obs_rms(obs) 129 | return self.actor(obs) 130 | model = NormalizedActor(copy.deepcopy(self.mean_NN), copy.deepcopy(self.obs_rms)).to('cpu') 131 | 132 | else: 133 | model = copy.deepcopy(self.mean_NN).to('cpu') 134 | 135 | dummy_input = torch.rand(self.mean_NN[0].in_features,) 136 | model_traced = torch.jit.trace(model, dummy_input) 137 | torch.jit.save(model_traced, path_TS) 138 | torch.onnx.export(model_traced, dummy_input, path_onnx) 139 | -------------------------------------------------------------------------------- /learning/modules/actor_critic.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.distributions import Normal 4 | from torch.nn.modules import rnn 5 | from .utils import create_MLP 6 | from .actor import Actor 7 | from .critic import Critic 8 | 9 | class ActorCritic(nn.Module): 10 | def __init__(self, num_actor_obs, 11 | num_critic_obs, 12 | num_actions, 13 | actor_hidden_dims=[256, 256, 256], 14 | critic_hidden_dims=[256, 256, 256], 15 | activation="elu", 16 | init_noise_std=1.0, 17 | normalize_obs=False, 18 | **kwargs): 19 | 20 | if kwargs: 21 | print("ActorCritic.__init__ got unexpected arguments, which will be ignored: " + str([key for key in kwargs.keys()])) 22 | super(ActorCritic, self).__init__() 23 | 24 | self.actor = Actor(num_actor_obs, 25 | num_actions, 26 | actor_hidden_dims, 27 | activation, 28 | init_noise_std, 29 | normalize_obs) 30 | 31 | self.critic = Critic(num_critic_obs, 32 | critic_hidden_dims, 33 | activation, 34 | normalize_obs) 35 | 36 | print(f"Actor MLP: {self.actor.mean_NN}") 37 | print(f"Critic MLP: {self.critic.NN}") 38 | 39 | @property 40 | def action_mean(self): 41 | return self.actor.action_mean 42 | 43 | @property 44 | def action_std(self): 45 | return self.actor.action_std 46 | 47 | @property 48 | def entropy(self): 49 | return self.actor.entropy 50 | 51 | @property 52 | def std(self): 53 | return self.actor.std 54 | 55 | def update_distribution(self, observations): 56 | self.actor.update_distribution(observations) 57 | 58 | def act(self, observations, **kwargs): 59 | return self.actor.act(observations) 60 | 61 | def get_actions_log_prob(self, actions): 62 | return self.actor.get_actions_log_prob(actions) 63 | 64 | def act_inference(self, observations): 65 | return self.actor.act_inference(observations) 66 | 67 | def evaluate(self, critic_observations, actions=None, **kwargs): 68 | return self.critic.evaluate(critic_observations, actions) 69 | 70 | def export_policy(self, path): 71 | self.actor.export(path) -------------------------------------------------------------------------------- /learning/modules/critic.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from .utils import create_MLP, weights_init_ 4 | from .utils import RunningMeanStd 5 | 6 | class Critic(nn.Module): 7 | def __init__(self, 8 | num_obs, 9 | hidden_dims, 10 | activation="elu", 11 | normalize_obs=False, 12 | custom_initialization=False, 13 | **kwargs): 14 | 15 | if kwargs: 16 | print("Critic.__init__ got unexpected arguments, " 17 | "which will be ignored: " 18 | + str([key for key in kwargs.keys()])) 19 | super().__init__() 20 | 21 | self.NN = create_MLP(num_obs, 1, hidden_dims, activation) 22 | 23 | self._normalize_obs = normalize_obs 24 | if self._normalize_obs: 25 | self.obs_rms = RunningMeanStd(num_obs) 26 | 27 | if custom_initialization: 28 | self.apply(weights_init_) 29 | 30 | def evaluate(self, critic_observations, actions=None): 31 | if actions is None: 32 | if self._normalize_obs: 33 | critic_observations = self.norm_obs(critic_observations) 34 | # Critic is V(s)-function estimator 35 | return self.NN(critic_observations) 36 | else: 37 | # Critic is Q(s,a)-function estimaotr 38 | concat_input = torch.cat((critic_observations, actions), dim=1) 39 | return self.NN(concat_input) 40 | 41 | def norm_obs(self, observation): 42 | with torch.no_grad(): 43 | return self.obs_rms(observation) 44 | 45 | def freeze_parameters(self): 46 | for parameters in self.NN.parameters(): 47 | parameters.requires_grad = False 48 | 49 | def update_parameters(self, src_model: 'Critic', polyak: float): 50 | with torch.inference_mode(): 51 | for parameters, src_parameters in zip(self.NN.parameters(), src_model.NN.parameters()): 52 | parameters.data.mul_(1 - polyak) 53 | parameters.data.add_(polyak * src_parameters.data) 54 | -------------------------------------------------------------------------------- /learning/modules/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # from .neural_net import get_activation 2 | from .neural_net import create_MLP, weights_init_ 3 | from .running_mean_std import RunningMeanStd 4 | -------------------------------------------------------------------------------- /learning/modules/utils/neural_net.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch.distributions import Normal 3 | import torch 4 | 5 | # Initialize Policy weights 6 | def weights_init_(m): 7 | if isinstance(m, nn.Linear): 8 | torch.nn.init.xavier_uniform_(m.weight, gain=1) 9 | # torch.nn.init.constant_(m.bias, 0) 10 | 11 | def create_MLP(num_inputs, num_outputs, hidden_dims, activation, 12 | dropouts=None): 13 | 14 | activation = get_activation(activation) 15 | 16 | if dropouts is None: 17 | dropouts = [0]*len(hidden_dims) 18 | 19 | layers = [] 20 | if not hidden_dims: # handle no hidden layers 21 | add_layer(layers, num_inputs, num_outputs) 22 | else: 23 | add_layer(layers, num_inputs, hidden_dims[0], activation, dropouts[0]) 24 | for i in range(len(hidden_dims)): 25 | if i == len(hidden_dims) - 1: 26 | add_layer(layers, hidden_dims[i], num_outputs) 27 | else: 28 | add_layer(layers, hidden_dims[i], hidden_dims[i+1], 29 | activation, dropouts[i+1]) 30 | return nn.Sequential(*layers) 31 | 32 | 33 | def get_activation(act_name): 34 | if act_name == "elu": 35 | return nn.ELU() 36 | elif act_name == "selu": 37 | return nn.SELU() 38 | elif act_name == "relu": 39 | return nn.ReLU() 40 | elif act_name == "crelu": 41 | return nn.ReLU() 42 | elif act_name == "lrelu": 43 | return nn.LeakyReLU() 44 | elif act_name == "tanh": 45 | return nn.Tanh() 46 | elif act_name == "sigmoid": 47 | return nn.Sigmoid() 48 | else: 49 | print("invalid activation function!") 50 | return None 51 | 52 | 53 | def add_layer(layer_list, num_inputs, num_outputs, activation=None, dropout=0): 54 | layer_list.append(nn.Linear(num_inputs, num_outputs)) 55 | if dropout > 0: 56 | layer_list.append(nn.Dropout(p=dropout)) 57 | if activation is not None: 58 | layer_list.append(activation) -------------------------------------------------------------------------------- /learning/modules/utils/running_mean_std.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | 5 | def get_mean_var_with_masks(values, masks): 6 | sum_mask = masks.sum() 7 | values_mask = values * masks 8 | values_mean = values_mask.sum() / sum_mask 9 | min_sqr = ((((values_mask)**2)/sum_mask).sum() - ((values_mask/sum_mask).sum())**2) 10 | values_var = min_sqr * sum_mask / (sum_mask-1) 11 | return values_mean, values_var 12 | 13 | class RunningMeanStd(nn.Module): 14 | def __init__(self, insize, epsilon=1e-05, per_channel=False, norm_only=False): 15 | super(RunningMeanStd, self).__init__() 16 | self.insize = insize 17 | self.epsilon = epsilon 18 | 19 | self.norm_only = norm_only 20 | self.per_channel = per_channel 21 | if per_channel: 22 | if len(self.insize) == 3: 23 | self.axis = [0,2,3] 24 | if len(self.insize) == 2: 25 | self.axis = [0,2] 26 | if len(self.insize) == 1: 27 | self.axis = [0] 28 | in_size = self.insize[0] 29 | else: 30 | self.axis = [0] 31 | in_size = insize 32 | 33 | self.register_buffer("running_mean", torch.zeros(in_size, dtype = torch.float64)) 34 | self.register_buffer("running_var", torch.ones(in_size, dtype = torch.float64)) 35 | self.register_buffer("count", torch.ones((), dtype = torch.float64)) 36 | 37 | def _update_mean_var_count_from_moments(self, mean, var, count, batch_mean, batch_var, batch_count): 38 | delta = batch_mean - mean 39 | tot_count = count + batch_count 40 | 41 | new_mean = mean + delta * batch_count / tot_count 42 | m_a = var * count 43 | m_b = batch_var * batch_count 44 | M2 = m_a + m_b + delta**2 * count * batch_count / tot_count 45 | new_var = M2 / tot_count 46 | new_count = tot_count 47 | return new_mean, new_var, new_count 48 | 49 | def forward(self, input, denorm=False, mask=None): 50 | if self.training: 51 | if mask is not None: 52 | mean, var = get_mean_var_with_masks(input, mask) 53 | else: 54 | mean = input.mean(self.axis) # along channel axis 55 | var = input.var(self.axis) if input.shape[0] != 1 else input.var(self.axis, unbiased=False) 56 | self.running_mean, self.running_var, self.count = self._update_mean_var_count_from_moments(self.running_mean, self.running_var, self.count, 57 | mean, var, input.size()[0] ) 58 | 59 | # change shape 60 | if self.per_channel: 61 | if len(self.insize) == 3: 62 | current_mean = self.running_mean.view([1, self.insize[0], 1, 1]).expand_as(input) 63 | current_var = self.running_var.view([1, self.insize[0], 1, 1]).expand_as(input) 64 | if len(self.insize) == 2: 65 | current_mean = self.running_mean.view([1, self.insize[0], 1]).expand_as(input) 66 | current_var = self.running_var.view([1, self.insize[0], 1]).expand_as(input) 67 | if len(self.insize) == 1: 68 | current_mean = self.running_mean.view([1, self.insize[0]]).expand_as(input) 69 | current_var = self.running_var.view([1, self.insize[0]]).expand_as(input) 70 | else: 71 | current_mean = self.running_mean 72 | current_var = self.running_var 73 | # get output 74 | 75 | 76 | if denorm: 77 | y = torch.clamp(input, min=-5.0, max=5.0) 78 | y = torch.sqrt(current_var.float() + self.epsilon)*y + current_mean.float() 79 | else: 80 | if self.norm_only: 81 | y = input/ torch.sqrt(current_var.float() + self.epsilon) 82 | else: 83 | y = (input - current_mean.float()) / torch.sqrt(current_var.float() + self.epsilon) 84 | y = torch.clamp(y, min=-5.0, max=5.0) 85 | return y -------------------------------------------------------------------------------- /learning/runners/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from .on_policy_runner import OnPolicyRunner -------------------------------------------------------------------------------- /learning/storage/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | from .rollout_storage import RolloutStorage -------------------------------------------------------------------------------- /learning/storage/base_storage.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | """ Store data for different loop 5 | Design consideration: change the original "has-a" relationship of LT->rollout->transition to parallel. 6 | Transition stores all the needed data for every step and will be cleared/updated every step and passed to rollout and longterm 7 | Rollout store data for every iteration, generate batch for learning 8 | Longterm stores needed data from transition, and probably also generate batches 9 | """ 10 | 11 | class BaseStorage: 12 | """ This class is a skeleton for an arbitrary storage type. """ 13 | 14 | class Transition: 15 | """ Transition storage class. 16 | i.e. store data for each STEP of ALL agents 17 | """ 18 | def __init__(self): 19 | """ Define all the data you need to store in __init__ 20 | """ 21 | raise NotImplementedError 22 | 23 | def clear(self): 24 | self.__init__() 25 | 26 | def __init__(self, max_storage, device='cpu'): 27 | self.device = device 28 | self.max_storage = max_storage 29 | # fill_count keeps track of how much storage is filled with actual data 30 | # anything after the fill_count is stale and should be ignored 31 | self.fill_count = 0 32 | 33 | def add_transitions(self, transition: Transition): 34 | """ Add current transition to LT storage 35 | Store variables according to the __init__ 36 | """ 37 | self.fill_count += 1 38 | raise NotImplementedError 39 | 40 | def clear(self): 41 | self.fill_count = 0 42 | 43 | 44 | def mini_batch_generator(self): 45 | """ Generate mini batch for learning 46 | """ 47 | raise NotImplementedError 48 | -------------------------------------------------------------------------------- /learning/storage/rollout_storage.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import torch 32 | from .base_storage import BaseStorage 33 | 34 | class RolloutStorage(BaseStorage): 35 | """ A standard rollout storage, implemented for for PPO. 36 | """ 37 | class Transition: 38 | def __init__(self): 39 | self.observations = None 40 | self.critic_observations = None 41 | self.actions = None 42 | self.rewards = None 43 | self.dones = None 44 | self.values = None 45 | self.actions_log_prob = None 46 | self.action_mean = None 47 | self.action_sigma = None 48 | 49 | def clear(self): 50 | self.__init__() 51 | 52 | def __init__(self, num_envs, num_transitions_per_env, num_obs, num_critic_obs, num_actions, device='cpu'): 53 | 54 | self.device = device 55 | 56 | self.num_obs = num_obs 57 | self.num_critic_obs = num_critic_obs 58 | self.num_actions = num_actions 59 | 60 | # Core 61 | self.observations = torch.zeros(num_transitions_per_env, num_envs, num_obs, device=self.device) 62 | if num_critic_obs is not None: 63 | self.critic_observations = torch.zeros(num_transitions_per_env, num_envs, num_critic_obs, device=self.device) 64 | else: 65 | self.critic_observations = None 66 | 67 | self.rewards = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 68 | self.actions = torch.zeros(num_transitions_per_env, num_envs, num_actions, device=self.device) 69 | self.dones = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device).byte() 70 | 71 | # For PPO 72 | self.actions_log_prob = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 73 | self.values = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 74 | self.returns = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 75 | self.advantages = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 76 | self.mu = torch.zeros(num_transitions_per_env, num_envs, num_actions, device=self.device) 77 | self.sigma = torch.zeros(num_transitions_per_env, num_envs, num_actions, device=self.device) 78 | 79 | self.num_transitions_per_env = num_transitions_per_env 80 | self.num_envs = num_envs 81 | 82 | self.fill_count = 0 83 | 84 | def add_transitions(self, transition: Transition): 85 | if self.fill_count >= self.num_transitions_per_env: 86 | raise AssertionError("Rollout buffer overflow") 87 | self.observations[self.fill_count].copy_(transition.observations) 88 | if self.critic_observations is not None: 89 | self.critic_observations[self.fill_count].copy_(transition.critic_observations) 90 | self.actions[self.fill_count].copy_(transition.actions) 91 | self.rewards[self.fill_count].copy_(transition.rewards.view(-1, 1)) 92 | self.dones[self.fill_count].copy_(transition.dones.view(-1, 1)) 93 | self.values[self.fill_count].copy_(transition.values) 94 | self.actions_log_prob[self.fill_count].copy_(transition.actions_log_prob.view(-1, 1)) 95 | self.mu[self.fill_count].copy_(transition.action_mean) 96 | self.sigma[self.fill_count].copy_(transition.action_sigma) 97 | self.fill_count += 1 98 | 99 | def clear(self): 100 | self.fill_count = 0 101 | 102 | def compute_returns(self, last_values, gamma, lam): 103 | advantage = 0 104 | for step in reversed(range(self.num_transitions_per_env)): 105 | if step == self.num_transitions_per_env - 1: 106 | next_values = last_values 107 | else: 108 | next_values = self.values[step + 1] 109 | next_is_not_terminal = 1.0 - self.dones[step].float() 110 | delta = self.rewards[step] + next_is_not_terminal * gamma * next_values - self.values[step] 111 | advantage = delta + next_is_not_terminal * gamma * lam * advantage 112 | self.returns[step] = advantage + self.values[step] 113 | 114 | # Compute and normalize the advantages 115 | self.advantages = self.returns - self.values 116 | self.advantages = (self.advantages - self.advantages.mean()) / (self.advantages.std() + 1e-8) 117 | 118 | def get_statistics(self): 119 | done = self.dones 120 | done[-1] = 1 121 | flat_dones = done.permute(1, 0, 2).reshape(-1, 1) 122 | done_indices = torch.cat((flat_dones.new_tensor([-1], dtype=torch.int64), flat_dones.nonzero(as_tuple=False)[:, 0])) 123 | trajectory_lengths = (done_indices[1:] - done_indices[:-1]) 124 | return trajectory_lengths.float().mean(), self.rewards.mean() 125 | 126 | def mini_batch_generator(self, num_mini_batches, num_epochs=8): 127 | batch_size = self.num_envs * self.num_transitions_per_env 128 | mini_batch_size = batch_size // num_mini_batches 129 | indices = torch.randperm(num_mini_batches*mini_batch_size, requires_grad=False, device=self.device) 130 | 131 | observations = self.observations.flatten(0, 1) 132 | if self.critic_observations is not None: 133 | critic_observations = self.critic_observations.flatten(0, 1) 134 | else: 135 | critic_observations = observations 136 | 137 | actions = self.actions.flatten(0, 1) 138 | values = self.values.flatten(0, 1) 139 | returns = self.returns.flatten(0, 1) 140 | old_actions_log_prob = self.actions_log_prob.flatten(0, 1) 141 | advantages = self.advantages.flatten(0, 1) 142 | old_mu = self.mu.flatten(0, 1) 143 | old_sigma = self.sigma.flatten(0, 1) 144 | 145 | for epoch in range(num_epochs): 146 | for i in range(num_mini_batches): 147 | 148 | start = i*mini_batch_size 149 | end = (i+1)*mini_batch_size 150 | batch_idx = indices[start:end] 151 | 152 | obs_batch = observations[batch_idx] 153 | critic_observations_batch = critic_observations[batch_idx] 154 | actions_batch = actions[batch_idx] 155 | target_values_batch = values[batch_idx] 156 | returns_batch = returns[batch_idx] 157 | old_actions_log_prob_batch = old_actions_log_prob[batch_idx] 158 | advantages_batch = advantages[batch_idx] 159 | old_mu_batch = old_mu[batch_idx] 160 | old_sigma_batch = old_sigma[batch_idx] 161 | yield obs_batch, critic_observations_batch, actions_batch, target_values_batch, advantages_batch, returns_batch, \ 162 | old_actions_log_prob_batch, old_mu_batch, old_sigma_batch 163 | -------------------------------------------------------------------------------- /learning/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from .utils import split_and_pad_trajectories, unpad_trajectories 32 | from .logging import Logger 33 | -------------------------------------------------------------------------------- /learning/utils/logging.py: -------------------------------------------------------------------------------- 1 | import wandb 2 | import torch 3 | from collections import deque 4 | from statistics import mean 5 | import os 6 | import shutil 7 | import fnmatch 8 | 9 | class Logger: 10 | def __init__(self, log_dir, max_episode_length_s, device): 11 | self.log_dir = log_dir 12 | self.device = device 13 | self.avg_window = 100 14 | self.log = {} 15 | self.loss_dict = {} 16 | self.it = 0 17 | self.tot_iter = 0 18 | self.learning_iter = 0 19 | self.mean_episode_length = 0. 20 | self.total_mean_reward = 0. 21 | self.max_episode_length_s = max_episode_length_s 22 | 23 | def make_log_dir(self): 24 | os.makedirs(self.log_dir, exist_ok=True) 25 | 26 | def initialize_buffers(self, num_envs, reward_keys): 27 | self.current_episode_return = \ 28 | {name: torch.zeros(num_envs, dtype=torch.float, device=self.device, 29 | requires_grad=False) for name in reward_keys} 30 | self.current_episode_length = torch.zeros(num_envs, 31 | dtype=torch.float, device=self.device) 32 | self.avg_return_buffer = {name: deque(maxlen=self.avg_window) 33 | for name in reward_keys} 34 | self.avg_length_buffer = deque(maxlen=self.avg_window) 35 | self.mean_rewards = {"Episode/"+name: 0. for name in reward_keys} 36 | 37 | def log_to_wandb(self): 38 | wandb.log(self.log) 39 | 40 | def add_log(self, log_dict): 41 | self.log.update(log_dict) 42 | 43 | def add_loss(self, loss_dict): 44 | self.loss_dict.update(loss_dict) 45 | 46 | def update_iterations(self, it, tot_iter, learning_iter): 47 | self.it = it 48 | self.tot_iter = tot_iter 49 | self.learning_iter = learning_iter 50 | 51 | def log_current_reward(self, name, reward): 52 | if name in self.current_episode_return.keys(): 53 | self.current_episode_return[name] += reward 54 | 55 | def update_episode_buffer(self, dones): 56 | self.current_episode_length += 1 57 | terminated_ids = torch.where(dones == True)[0] 58 | for name in self.current_episode_return.keys(): 59 | self.avg_return_buffer[name].extend(self.current_episode_return[name] 60 | [terminated_ids].cpu().numpy().tolist()) 61 | self.current_episode_return[name][terminated_ids] = 0. 62 | 63 | self.avg_length_buffer.extend(self.current_episode_length[terminated_ids] 64 | .cpu().numpy().tolist()) 65 | self.current_episode_length[terminated_ids] = 0 66 | if (len(self.avg_length_buffer) > 0): 67 | self.calculate_reward_avg() 68 | 69 | def calculate_reward_avg(self): 70 | self.mean_episode_length = mean(self.avg_length_buffer) 71 | self.mean_rewards = {"Episode/"+name: mean(self.avg_return_buffer[name]) / self.max_episode_length_s 72 | for name in self.current_episode_return.keys()} 73 | self.total_mean_reward = sum(list(self.mean_rewards.values())) 74 | 75 | def print_to_terminal(self): 76 | width=80 77 | pad=50 78 | str = f" \033[1m Learning iteration {self.it}/{self.tot_iter} \033[0m " 79 | 80 | log_string = (f"""{'#' * width}\n""" 81 | f"""{str.center(width, ' ')}\n\n""" 82 | f"""{'Computation:':>{pad}} {self.log['Perf/total_fps']:.0f} steps/s (collection: {self.log[ 83 | 'Perf/collection_time']:.3f}s, learning {self.log['Perf/learning_time']:.3f}s)\n""") 84 | 85 | for key, value in self.loss_dict.items(): 86 | log_string += f"""{f'{key}:':>{pad}} {value:.4f}\n""" 87 | 88 | log_string += (f"""{'Mean action noise std:':>{pad}} {self.log['Policy/mean_noise_std']:.2f}\n""" 89 | f"""{'Mean reward:':>{pad}} {self.total_mean_reward:.2f}\n""" 90 | f"""{'Mean episode length:':>{pad}} {self.log['Train/mean_episode_length']:.2f}\n""") 91 | 92 | for key, value in self.mean_rewards.items(): 93 | log_string += f"""{f'Mean episode {key}:':>{pad}} {value:.4f}\n""" 94 | 95 | log_string += (f"""{'-' * width}\n""" 96 | f"""{'Total timesteps:':>{pad}} {self.log['Train/total_timesteps']}\n""" 97 | f"""{'Iteration time:':>{pad}} {self.log['Train/iteration_time']:.2f}s\n""" 98 | f"""{'Total time:':>{pad}} {self.log['Train/time']:.2f}s\n""" 99 | f"""{'ETA:':>{pad}} {self.log['Train/time'] / (self.it + 1) * ( 100 | self.learning_iter - self.it):.1f}s\n""") 101 | 102 | print(log_string) 103 | 104 | def configure_local_files(self, save_paths): 105 | # create ignore patterns dynamically based on include patterns 106 | def create_ignored_pattern_except(*patterns): 107 | def _ignore_patterns(path, names): 108 | keep = set(name for pattern in patterns for name in 109 | fnmatch.filter(names, pattern)) 110 | ignore = set(name for name in names if name not in keep and 111 | not os.path.isdir(os.path.join(path, name))) 112 | return ignore 113 | return _ignore_patterns 114 | 115 | def remove_empty_folders(path, removeRoot=True): 116 | if not os.path.isdir(path): 117 | return 118 | # remove empty subfolders 119 | files = os.listdir(path) 120 | if len(files): 121 | for f in files: 122 | fullpath = os.path.join(path, f) 123 | if os.path.isdir(fullpath): 124 | remove_empty_folders(fullpath) 125 | # if folder empty, delete it 126 | files = os.listdir(path) 127 | if len(files) == 0 and removeRoot: 128 | os.rmdir(path) 129 | 130 | # copy the relevant source files to the local logs for records 131 | save_dir = self.log_dir+'/files/' 132 | for save_path in save_paths: 133 | if save_path['type'] == 'file': 134 | os.makedirs(save_dir+save_path['target_dir'], 135 | exist_ok=True) 136 | shutil.copy2(save_path['source_file'], 137 | save_dir+save_path['target_dir']) 138 | elif save_path['type'] == 'dir': 139 | shutil.copytree( 140 | save_path['source_dir'], 141 | save_dir+save_path['target_dir'], 142 | ignore=create_ignored_pattern_except( 143 | *save_path['include_patterns'])) 144 | else: 145 | print('WARNING: uncaught save path type:', save_path['type']) 146 | 147 | remove_empty_folders(save_dir) 148 | 149 | -------------------------------------------------------------------------------- /learning/utils/utils.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import torch 32 | 33 | def split_and_pad_trajectories(tensor, dones): 34 | """ Splits trajectories at done indices. Then concatenates them and padds with zeros up to the length og the longest trajectory. 35 | Returns masks corresponding to valid parts of the trajectories 36 | Example: 37 | Input: [ [a1, a2, a3, a4 | a5, a6], 38 | [b1, b2 | b3, b4, b5 | b6] 39 | ] 40 | 41 | Output:[ [a1, a2, a3, a4], | [ [True, True, True, True], 42 | [a5, a6, 0, 0], | [True, True, False, False], 43 | [b1, b2, 0, 0], | [True, True, False, False], 44 | [b3, b4, b5, 0], | [True, True, True, False], 45 | [b6, 0, 0, 0] | [True, False, False, False], 46 | ] | ] 47 | 48 | Assumes that the inputy has the following dimension order: [time, number of envs, aditional dimensions] 49 | """ 50 | dones = dones.clone() 51 | dones[-1] = 1 52 | # Permute the buffers to have order (num_envs, num_transitions_per_env, ...), for correct reshaping 53 | flat_dones = dones.transpose(1, 0).reshape(-1, 1) 54 | 55 | # Get length of trajectory by counting the number of successive not done elements 56 | done_indices = torch.cat((flat_dones.new_tensor([-1], dtype=torch.int64), flat_dones.nonzero()[:, 0])) 57 | trajectory_lengths = done_indices[1:] - done_indices[:-1] 58 | trajectory_lengths_list = trajectory_lengths.tolist() 59 | # Extract the individual trajectories 60 | trajectories = torch.split(tensor.transpose(1, 0).flatten(0, 1),trajectory_lengths_list) 61 | padded_trajectories = torch.nn.utils.rnn.pad_sequence(trajectories) 62 | 63 | 64 | trajectory_masks = trajectory_lengths > torch.arange(0, tensor.shape[0], device=tensor.device).unsqueeze(1) 65 | return padded_trajectories, trajectory_masks 66 | 67 | def unpad_trajectories(trajectories, masks): 68 | """ Does the inverse operation of split_and_pad_trajectories() 69 | """ 70 | # Need to transpose before and after the masking to have proper reshaping 71 | return trajectories.transpose(1, 0)[masks.transpose(1, 0)].view(-1, trajectories.shape[0], trajectories.shape[-1]).transpose(1, 0) 72 | 73 | def remove_zero_weighted_rewards(reward_weights): 74 | for name in list(reward_weights.keys()): 75 | if reward_weights[name] == 0: 76 | reward_weights.pop(name) 77 | 78 | def set_discount_from_horizon(dt, horizon): 79 | """ Calculate a discount-factor from the desired discount horizon, 80 | and the time-step (dt). 81 | """ 82 | 83 | if horizon == 0: 84 | discount_factor = 0 85 | else: 86 | assert(horizon >= dt), "Invalid discounting horizon" 87 | discrete_time_horizon = int(horizon/dt) 88 | discount_factor = 1 - 1/discrete_time_horizon 89 | 90 | return discount_factor 91 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.23.2 2 | pandas==1.4.3 3 | matplotlib==3.5.3 4 | wandb==0.13.2 5 | tensorboard== 2.10.0 6 | --find-links https://download.pytorch.org/whl/cu113/torch_stable.html 7 | torch==1.10.0+cu113 8 | torchvision==0.11.1+cu113 9 | torchaudio==0.10.0+cu113 10 | pygame==2.1.2 11 | seaborn==0.13.0 12 | colorama==0.4.6 13 | pytest==7.2.2 14 | mss==9.0.1 15 | opencv-python==4.10.0.84 -------------------------------------------------------------------------------- /resources/robots/cartpole/urdf/cartpole.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /resources/robots/humanoid/trajectories/across_back10/JSM_across_back_2_RESAMPLED10.csv: -------------------------------------------------------------------------------- 1 | t,x,y,z,qw,qx,qy,qz,x_v,y_v,z_v,wx,wy,wz,left_shoulder_pitch,left_shoulder_abad,left_shoulder_yaw,left_elbow,left_hip_yaw,left_hip_abad,left_hip_pitch,left_knee,left_ankle,right_shoulder_pitch,right_shoulder_abad,right_shoulder_yaw,right_elbow,right_hip_yaw,right_hip_abad,right_hip_pitch,right_knee,right_ankle,left_shoulder_pitch_v,left_shoulder_abad_v,left_shoulder_yaw_v,left_elbow_v,left_hip_yaw_v,left_hip_abad_v,left_hip_pitch_v,left_knee_v,left_ankle_v,right_shoulder_pitch_v,right_shoulder_abad_v,right_shoulder_yaw_v,right_elbow_v,right_hip_yaw_v,right_hip_abad_v,right_hip_pitch_v,right_knee_v,right_ankle_v 2 | 0.0333333333333333,0.00354918271180132,-0.00370237297433314,0.63150572940349,0.000727730073746514,3.09510717995233e-05,3.70773776939164e-05,-0.000684160974466314,0.10647548135404,-0.111071189229994,-0.0255907587654669,0.609427650881926,0.166970937751622,-0.555124276502081,-0.265703676745309,0.511982138375704,0.323884009239767,-0.725273339441861,0.0813739177668785,-0.0960650845274529,-0.247341548083835,0.461083031103893,-0.223567592955909,-0.368090564104484,0.402412845631065,0.0923501487182662,-0.721851297928429,-0.0972801894261345,0.00693192718761122,-0.461859729538142,0.622761342058431,-0.0888699483597608,-1.135805314879,0.470394734749948,1.01382487699044,0.272197858277129,-0.51125906432984,0.0186316293976234,0.118169000583841,-0.986389726880965,0.0664294866445739,-1.09171740520441,-0.216683942103736,1.08293550599903,-0.0785106016429116,0.552808813963048,-0.0362440734438649,-1.90251593250219,2.56533239700649,-0.0933918738534396 3 | 0.325925925925926,0.00943471560204676,-0.0362127128679891,0.628243930958931,0.000714414422792258,0.000190982340085039,0.000189765450747164,-0.000645576868944859,-0.0534750582117514,-0.154366705764466,-0.000409927059564036,2.60710890782136,0.123252853790648,0.468119866618267,-0.657837721107021,0.460554667924731,0.641759470744567,-0.681605983255238,0.0992938566902886,-0.118168870198248,-0.675713703781917,0.512758049156707,-0.268165549880329,-0.601019892566515,0.440761193348335,0.225215463173313,-0.726235717825689,-0.0493013976886132,-0.0116513500491829,-1.08866050510855,0.790276108083509,-0.0378725451431615,-1.62599019808851,-0.318065163319108,1.21343813017392,0.0215798151996134,0.595448881080995,-0.00741752593089727,-2.51290060279248,0.0818030440464706,-0.0756018007201356,-1.03287706655812,0.262219250369876,0.0873441333589309,-0.136447658317884,0.0693073580519648,0.0390808644994672,-2.6668935517332,-0.19981078165457,0.394961367204383 4 | 0.618518518518519,-0.0153510521514624,-0.202893566146282,0.619280092612614,0.00056922821015623,0.000519522618238167,0.000449897456866988,-0.000450421323698029,-0.0971667729270459,-1.0232513040014,-0.0553908538779602,3.38113592371256,0.0839998257193044,0.554660666924939,-1.43310628450855,0.416538531075136,0.754559367858916,-0.581782043220164,0.199922991504443,-0.068178020434887,-1.31578566855007,0.388944580154651,-0.254179851205448,-1.2935439051021,0.57518591383646,0.37610554163793,-0.750468338592011,-0.0755541684066681,0.0967974484678918,-1.81288697618211,0.874189104809367,-0.21276208156816,-2.75435506607755,-0.111179354012085,-0.841089614570879,-0.187734263271986,-0.583114587735357,-1.6353133284984,-1.45640382374667,0.386981781285853,0.831043137827918,-2.4973308680364,0.830066723682846,0.0917143206282015,-0.998335817766259,-0.597968412171985,0.473999726831478,-1.79631716870495,0.314248675142801,-0.713130285427985 5 | 0.911111111111111,-0.0102614831182494,-0.698393220021218,0.579455104963329,0.000227086628924475,0.000759812410220982,0.000587003560941196,-0.000160722391696538,0.00619822876945052,-1.88018981157433,-0.45071195864059,3.43530990508385,0.0137535094142924,-0.00508248129827252,-1.22661423398932,0.714160282521603,0.498693703590002,-1.46614077695387,-0.0539030826772567,-0.258483640039531,-1.35499563740403,1.77740044108005,-0.256869443697185,-0.896391187390265,0.958450142050945,0.0435076618996299,-1.79162368458784,-0.31446999609848,0.279067711923039,-1.71050307322787,0.254817574590477,0.224815622864672,1.68088327379503,1.41208493504151,-0.344180188521357,-2.43316757560461,-1.13029149146763,0.80984002669033,-0.113796355908478,3.59004916449903,0.0747035778371865,3.05118196619667,0.0721415438117087,-1.87155637058498,-2.19394722146477,0.66492357793728,-2.80812256238011,0.0214035619579703,2.23647034834028,-1.1512846812396 6 | 1.2037037037037,-0.00297133514295495,-1.27152790767083,0.230664880951479,0.000341686665822823,-0.000696704152956942,-0.000547900487892652,-0.00030966543690712,0.129629250790804,-1.94484958182256,-1.6140461123262,8.80654229432827,0.127687319036773,-0.483808272685812,-1.0915535093149,0.913958279533818,0.396263936298268,-1.88706301935962,0.309926713959171,-0.674189428771552,-2.46048386488181,2.34141329391502,-0.270787210546123,-0.326331027116467,1.20164223493541,-0.478970136398665,-1.85718533495925,-0.101502969915126,-0.231018284905726,-2.27788964952447,0.89358909087951,0.195381684264482,2.02277640023322,2.1300549121468,-4.10002103384059,-0.25339125932423,6.06624037056904,-2.59946928369852,-7.04369748142634,1.13973769810897,-0.242391782785088,5.3346753314698,0.411667589238191,-8.24516384911339,2.35327106752337,1.82020142313356,-1.04935678211399,-5.04929057536772,1.49831549623406,-0.173604572513758 7 | 1.4962962962963,-0.0194119410013563,-1.51567545690552,0.0877988483126207,0.000786894694285873,-0.000268263312066235,-0.000152597420537993,-0.000533240773287345,-0.0160683050023145,-0.550316706953605,-0.0324825397864257,6.42407609823732,0.107510307235266,0.818633755784445,-0.831828222136308,0.545011761638977,0.918639423204378,-0.857032415283208,0.212030925693702,-0.557550738739666,-2.05724766274523,2.26041869672087,-0.157244062255038,-0.754261663555617,0.65809482028152,0.55255046804953,-0.77721872323633,-0.334972281453967,-0.056236098098497,-1.87680796855147,1.44515858818605,0.0326783045246127,-2.34898129050594,1.25608479843573,1.91015686729678,-0.237501429927807,1.71256334746208,0.580292745541573,-1.98253080153166,0.814010030220192,0.86045152915906,-4.82871468509705,2.85013176141728,4.57057053261642,0.617832444166748,-1.0764903851861,-0.933314258956475,-1.8334714610382,0.963286911266528,-0.395007490104168 8 | 1.78888888888889,-0.020978667908291,-1.7574097589585,0.175486373915923,0.000858785616258343,9.81270935753278e-05,0.000118932709298324,-0.000488535023664143,-0.0412731985785331,-0.750373527825093,0.499855913510603,0.846551423833888,0.2052362476959,-0.209651266642569,-1.29601422119959,0.536382638254696,1.22784746276462,-0.784531032054496,0.744712184653301,-0.810553016002501,-2.68504009663019,2.7909536783617,-0.515854230832225,-1.60332000173767,0.907658142421314,1.13053530216871,-0.556561542368247,-0.333366644253434,-0.130277920213708,-2.6293703112246,2.15587481040801,-0.0060403855696261,0.0634737598086654,-0.599252097385301,-1.58283562117904,-0.656661048371624,-3.99382380184585,1.01380223802861,-57.7414870235785,1.22596413287555,-1.56816330945846,0.856848176451966,0.356675340629912,-0.85477401255284,0.0272336767830796,0.686607184936237,0.729766716736141,1.10931120303104,1.43836671950842,-1.60594382025771 9 | 2.08148148148148,-0.0178256141600203,-1.90395275294312,0.427612405357731,0.000818878212153078,0.00016942300510534,0.000185206577863673,-0.000516152980302424,0.0305187779279579,-0.563173010901533,0.923260517135427,0.0754253719523236,-0.206297268364605,-0.37127673870126,-0.789442595216104,0.678145413948129,0.910878903765511,-0.953851617661909,-0.0453377051664248,-0.118509198862829,-1.53218670970716,2.08041370677768,-0.32449420208267,-0.416139452696563,0.908711307557685,0.273615669058725,-0.731312336253164,-0.183482128987846,-0.107435745548639,-1.84646574109787,1.80454266249297,-0.203614744926246,3.08428857509492,0.39918956396668,-1.28268365485225,1.21019994431072,-0.22482410246367,1.15731434089376,3.65935525178234,-3.8362549995838,0.402992312029373,5.28699974564936,-1.3302022484774,-2.64517328761203,-1.09545852745409,0.110817876928653,-0.0844631261050796,3.60129481688215,-2.51945778103333,0.07805595705687 10 | 2.37407407407407,0.0181761416148608,-2.10927897609035,0.635981469864297,0.000781613436680282,0.000128785803283484,9.75332394526174e-05,-0.000602455003117602,0.1166311870053,-0.722867456781041,0.4580868304442,-0.708266375474628,-0.431308675661729,0.147197674999281,0.250773454615328,0.492585690791276,0.159246134712923,-0.527959297751971,0.0262498237712537,-0.0613497150378762,-0.891121814303864,1.25863433049925,0.127508313695309,0.247360144008581,0.397273825704209,-0.274114318825031,-0.537197102633823,-0.189681228387591,0.014315858145139,-0.70402513491918,0.907890565622656,0.0140502154105548,1.89531266164579,-1.30196956815217,-1.88016866679755,1.1201700665079,0.973116654965756,0.113937452545544,0.725094635720375,-3.51352775949637,-0.900671039594835,1.70304581628829,-1.05677353982178,-0.795594265897178,0.284411188753667,-0.548079572259383,0.646835029781712,3.48365716408955,-2.97736454579556,1.50429983078867 11 | 2.66666666666667,0.0690436911333674,-2.33596070508655,0.629277771580235,0.000733192015413533,4.82332491249929e-05,-7.47092906723732e-06,-0.000678267799199963,0.240360056131662,-0.848312528888222,-0.452370404757813,-1.20946600017996,0.0546828125436039,-1.11749196217809,0.471427391772861,0.22714948721837,-0.35964124102532,-0.226810363859273,0.14986562852569,-0.00426203706219823,-0.495042868804047,0.442797704074437,0.0295413491466684,0.297203672366716,0.349273220620926,-0.383458704525184,-0.461183401607477,-0.165714591512051,0.153844611430694,-0.066090326069088,0.798210440147729,0.251166292584229,0.0801571671862167,-0.187524344923441,-0.860457655484648,0.23925131466722,-0.373727079298949,-0.273713938715149,0.944212400561573,2.1286225526697,1.52022686129723,-0.237799032128446,0.185469132772456,-0.955797426566357,-1.01313668823746,-0.860613047981475,1.02378321150211,0.276295505955367,3.58957193545599,0.217943190768124 12 | -------------------------------------------------------------------------------- /resources/robots/humanoid/trajectories/across_back10/SH_across_back_1_RESAMPLED10.csv: -------------------------------------------------------------------------------- 1 | t,x,y,z,qw,qx,qy,qz,x_v,y_v,z_v,wx,wy,wz,left_shoulder_pitch,left_shoulder_abad,left_shoulder_yaw,left_elbow,left_hip_yaw,left_hip_abad,left_hip_pitch,left_knee,left_ankle,right_shoulder_pitch,right_shoulder_abad,right_shoulder_yaw,right_elbow,right_hip_yaw,right_hip_abad,right_hip_pitch,right_knee,right_ankle,left_shoulder_pitch_v,left_shoulder_abad_v,left_shoulder_yaw_v,left_elbow_v,left_hip_yaw_v,left_hip_abad_v,left_hip_pitch_v,left_knee_v,left_ankle_v,right_shoulder_pitch_v,right_shoulder_abad_v,right_shoulder_yaw_v,right_elbow_v,right_hip_yaw_v,right_hip_abad_v,right_hip_pitch_v,right_knee_v,right_ankle_v 2 | 0.0333333333333333,-4.04721829284767e-05,8.22363410112302e-05,0.626053727417537,0.000754152226980274,8.20561007058022e-05,0.000162019099754411,-0.000631087177965472,-0.0012141654878543,0.00246709023033691,-0.0117969574182708,1.04272218377711,0.11012538612112,-0.0696392350526964,-0.740985691974209,0.629428838072515,0.589248253465709,-1.10737229320326,-0.195378478368753,-0.0052224578258778,-0.526678595187992,0.511594834644776,-0.234635753560767,-0.300189208615733,0.663096341670034,-0.137507427234445,-1.20031224879716,0.0324335375913101,-0.194694789692121,-0.866250258840062,0.637972561476923,-0.0575169140252416,-1.23463646587603,-0.646137438003617,0.976032431697037,-0.0311176522506917,0.550779383047893,-0.294216066454316,-1.06572241608155,0.0792125403282185,-0.0574636353425018,-1.53535876834042,0.279633784292657,1.64733118830625,-0.215743037627396,0.154558573043892,-0.207297203763778,-1.06301424653595,-1.164274909957,-0.406726200352843 3 | 0.307407407407407,-0.0153635998506287,-0.02579985705509,0.604697804008391,0.000714176175901488,0.00026467039364223,0.000328360907509078,-0.000558348741325908,-0.0942164314658974,-0.202176183155941,-0.161072672147913,2.61288800360824,0.155650885324578,-0.106633353550787,-1.00202751935197,0.407838582979317,0.815422228000229,-0.966025279978501,-0.0258630543825349,-0.0948338727076122,-1.13732466215384,0.771972968087791,-0.363317897550496,-0.730675900472712,0.562099918679976,0.281647800825095,-0.92802047967837,0.0962096769522819,-0.160291931686005,-1.43409413839295,0.654821720012625,-0.0612509013721962,-1.07123796795028,-0.47329820862844,0.493082913523613,0.198605345339021,0.31272347448505,-0.375535959756949,-3.24918528770819,1.86658486117541,-0.87169439708382,-1.64439817783549,-0.604410832102051,1.07313608418085,1.04946715132513,0.417491858046726,0.410328109382459,-3.26725519219419,1.61447213325581,-1.20162880011652 4 | 0.581481481481481,-0.0312540561326991,-0.144232563281361,0.522608137246472,0.000554505177197118,0.000480867261622497,0.000517735985266634,-0.000438996010902308,-0.0443497719023693,-0.698318030150199,-0.292471808620672,2.64543219619694,-0.313109405418135,-0.490357113259268,-1.3391780030271,0.435273644039666,0.818090432145413,-0.9365702840659,0.00117155484459281,-0.0299470347695167,-1.93249892508394,1.39729876216088,-0.476571148177938,-1.02828730297401,0.390870061700184,0.284932076732871,-0.960188446275734,0.24125495455082,-0.104111376587514,-2.3263347737339,1.31354357003981,-0.452831458115639,-0.812208975970346,1.00780761241795,-0.598931954305003,-0.066735289888451,-0.452290287701393,0.54212907314688,-1.3341189815898,0.800286443972528,-0.522697589637656,0.215058340078123,-0.529712846365853,-0.732240948417818,-2.0344900331611,0.98286569788564,-0.0775056840141324,-2.2768548267962,1.77498238915251,-1.57504343740366 5 | 0.855555555555556,-0.0597950000847167,-0.497194141161464,0.511593207994637,0.000281545004045535,0.000710252351163749,0.000619504365895841,-0.000177671767388023,-0.122389255487908,-1.74319393348028,0.0205675754682466,3.56005195198993,0.125764207421065,0.753169963671663,-1.1059942908618,0.599682130099077,0.480336736757254,-1.4445754590712,-0.0236763549805573,-0.144641045358488,-2.23870737784403,1.70879000847086,0.100311976012723,-0.78583448936815,0.492784840813527,0.192659887046152,-1.66960011481203,0.152105844543098,-0.0319387142145234,-2.30279684543681,0.907027474823464,-0.455240480891728,-0.370694823255392,-0.691603873167138,0.120088256756089,0.27380615263248,0.127074157958493,0.730111565553926,-1.34910759511716,6.03697920315103,1.0162731742699,-0.0479071751851917,0.881661885956438,0.259179501226224,-0.0519342294866246,-1.5382854758839,0.772847442276506,2.42844272991989,-5.60838936838935,3.06173161106878 6 | 1.12962962962963,-0.121911157094954,-1.00237665951169,0.270086800443797,0.000218897722750509,-0.00074135078381478,-0.000543558994315735,-0.000324313716578174,-0.216158864255392,-1.80288336488012,-1.29427465944606,8.17058850144098,0.772718434810345,-1.04791099896317,-1.71052938877976,0.671137004514495,0.848900568544356,-1.61241271558421,-0.193243774922072,0.133114069588277,-2.4890172581665,2.45400641011598,0.693113588945128,-1.2573010620938,0.759703758783096,0.331198731868747,-1.90925107347204,0.0495702969657639,-0.0701138117865184,-2.34203326500317,1.40128138757939,-0.0291019459452437,-5.50882279923999,2.02960620073639,1.6200121611712,-3.6769435319982,0.855814618615655,-0.775828649287965,-3.42038341266191,0.865038177898534,2.42786152943499,-4.27207100745116,1.31012369606733,1.46203980822844,-3.61271184479773,0.21341139928015,0.324837366520215,-4.56756897565987,5.060347379935,-0.904414387905598 7 | 1.4037037037037,-0.160590935865561,-1.26676312106328,0.0915577175839584,0.000613889457101654,-0.000412724254586726,-0.000222855079476889,-0.0006339881894246,-0.155466962775186,-0.626906525909487,-0.116616427752251,5.69232621029223,0.278835630582218,-1.06854669604973,-0.886919340833104,0.517509749558452,0.869104603308107,-0.85176283569987,-0.00747369732879253,0.0106865714099503,-1.98071476653611,2.53797062336079,0.0230619722506466,-0.518322730648631,0.223512238423772,0.302878654302435,-0.403774583308514,0.0666677026256757,-0.0215272241772063,-1.78735543350679,1.75192857239704,-0.0154249820625941,-3.2580224019205,0.0633283407411258,2.13525608708664,2.03935570620807,-0.349665880650671,1.44222161877056,-0.190447868066427,-1.00099021464313,-5.37969086807858,-1.94412630552979,-0.966243990826009,3.06879815681722,3.11013093848079,1.25091350496651,-0.464957853143224,-2.0758637005664,1.12805917829305,1.06750492432257 8 | 1.67777777777778,-0.162777714340738,-1.42309969575848,0.184290690136979,0.000750211628663528,0.000103669323411176,7.37326160554051e-05,-0.000648690551651036,-0.137842035552124,-0.56896070699874,0.53464345674473,1.90436404438144,0.245385529652602,-0.151050466205398,-2.08544044945785,1.06461315661703,1.22073590802532,-1.15250001128635,0.561850149124282,-0.324794213824648,-2.52786822379288,2.80259418849113,-0.462076828920121,-1.416961619856,0.556610453330483,0.521667381232373,-0.593897998799329,-0.176766326915684,0.0891517346279999,-2.46351270830266,1.96232438118141,-0.00968237444398894,-2.54926469099117,1.18413145402481,0.654499183822943,-0.428380213230313,-1.37233653892695,0.226350037598578,2.40285042440939,0.467108575616952,-2.7226608380696,-0.478472483737537,1.58908814135142,-0.194062892357031,-2.10384468285489,0.165153104284929,-0.656817960229312,0.0568183971167198,0.643389861787144,-0.80512623654933 9 | 1.95185185185185,-0.178535956030607,-1.52552220168101,0.398666110691641,0.00066003723968354,0.000180096267989845,0.000218547706888724,-0.000695766056646494,-0.0900942769628923,-0.406243626711328,0.816397073416661,0.832434503334294,0.447771645498686,-0.667921080450004,-0.661337269921148,1.01335506776785,0.32989540571443,-1.44888493796681,0.0980006581041009,0.111750334468269,-1.76749256419036,2.2312956340379,-0.494005574664826,-0.300881991115307,0.659536064184807,0.0294776648739521,-1.09282570719775,0.129555093454608,-0.0671355721014318,-2.00274747939815,1.63736461329405,-0.152613258268687,3.28947894486691,-2.6106970260593,-0.368402621348127,2.91616747139297,-1.17034716344434,1.60862275350866,2.52552293830836,-3.10927938139146,0.182388083871969,4.92205076523867,-1.23200871985676,0.0532880192589255,0.149750720377684,1.17601502642729,-0.970038446831226,2.34944409264704,-2.19722969221587,-0.539573183719154 10 | 2.22592592592593,-0.207443182499235,-1.68412184216525,0.600342245296796,0.000674369800017444,0.000127025068152278,0.000199583405873568,-0.000699413714178339,-0.0714911080208342,-0.740468364876425,0.643397934167155,-1.07932493067418,-0.289665829053403,0.388341854470287,-0.158422024759643,0.51580409476355,0.493935495303056,-0.682043621651194,-0.0652778668788577,0.150641222037234,-0.727804782961314,1.06834920606132,-0.2338921835318,0.454379866333589,0.205063760669529,0.348467504043846,-0.548630576508572,0.219120018273648,-0.217127919301649,-1.27397495981141,1.1028749442406,0.257957482385646,1.26485004419508,-1.30902947897543,1.18456839740041,3.14527453608953,-1.48755625453614,-0.686965168394003,4.35334873990501,-5.13163909183638,2.98561929810828,1.23683394717441,-0.959892268776987,1.68869265340022,1.99430986479788,-0.504718667504597,-0.46585807029113,2.11445803901926,-1.40377059497875,2.12896166082751 11 | 2.5,-0.225376493181148,-1.89889972347476,0.63559061333912,0.000686811783749034,9.22957093500218e-05,7.95287986100156e-05,-0.000716551635215979,-0.0829952204359821,-0.689865756612698,-0.0847035615221026,-0.520211035929865,-0.519298177704005,-1.08210862844226,0.19929392370951,0.481062945074285,0.486699713389305,-0.633414518458369,-0.107535312968427,0.128480111492327,-0.227252182424697,0.834619032305571,0.576669195755459,0.4639200861269,0.232316669041093,0.380633996620174,-0.508274302070861,0.0770362825190305,-0.223484732324177,-0.557388358464813,0.297960801502561,0.0667696586666233,0.956232721229631,-0.565676155266994,-3.61609612347368,-0.0147040630320705,2.39636566556254,-0.638386244908611,-1.07224125502454,4.09532936765932,0.513909229237637,-2.36196758478714,1.24580211034854,-0.703008225295251,1.01944686645553,-1.59919923233593,-0.857999582103644,1.11664091835387,1.64733361154889,-0.525534907486553 12 | -------------------------------------------------------------------------------- /resources/robots/humanoid/trajectories/across_back10/SH_across_back_2_RESAMPLED10.csv: -------------------------------------------------------------------------------- 1 | t,x,y,z,qw,qx,qy,qz,x_v,y_v,z_v,wx,wy,wz,left_shoulder_pitch,left_shoulder_abad,left_shoulder_yaw,left_elbow,left_hip_yaw,left_hip_abad,left_hip_pitch,left_knee,left_ankle,right_shoulder_pitch,right_shoulder_abad,right_shoulder_yaw,right_elbow,right_hip_yaw,right_hip_abad,right_hip_pitch,right_knee,right_ankle,left_shoulder_pitch_v,left_shoulder_abad_v,left_shoulder_yaw_v,left_elbow_v,left_hip_yaw_v,left_hip_abad_v,left_hip_pitch_v,left_knee_v,left_ankle_v,right_shoulder_pitch_v,right_shoulder_abad_v,right_shoulder_yaw_v,right_elbow_v,right_hip_yaw_v,right_hip_abad_v,right_hip_pitch_v,right_knee_v,right_ankle_v 2 | 0.0333333333333333,0.0031489474948443,-0.000232259492348385,0.632840233399579,0.000792790232428042,-3.52814002424425e-05,4.74557489456265e-05,-0.000606619173827891,0.094468424845329,-0.00696778477045154,-0.0195235285486106,-0.134630530533629,0.283988027310949,0.355311408420671,0.144725317393875,0.459974023545951,0.428501912069592,-0.445475929304811,-0.30672639038243,0.0935011445278275,-0.167975798459084,0.306213428840069,-0.150720128090617,0.135854829116403,0.234149505007327,0.147445597237119,-0.486224505357343,0.0153064217271197,-0.168909576475646,-0.306178347517632,0.551285904708532,-0.107771379538011,-0.284953132250441,0.26167803079205,0.91618709773999,0.125465490560672,-0.201928230248201,0.186467349941724,-0.0114538196255026,0.0167177168844174,-0.124085546504824,0.110207294110542,-0.163773658450823,-1.77718990150432,-0.450978500049684,-0.55741041455776,-0.0268138575437532,-0.653344766356495,1.43995042142922,0.107517846876863 3 | 0.344444444444444,0.0173404318229341,-0.0197763546487861,0.633252342464982,0.000815756883003977,-5.1912687207e-05,7.77625083276646e-05,-0.000570781706228723,0.00950717513519421,-0.121945380119808,-0.00449367288425704,0.155879502742011,0.212633908265769,0.23920763448569,-0.440207295399675,0.655358923948215,0.526757807363814,-0.8528710422807,-0.367632348397765,0.166600733764603,-0.195030570334953,0.300953109704199,-0.172313647859993,0.0411233444780315,0.431412507671461,-0.375269721952487,-0.963734629493148,-0.0437582508913524,-0.22106863285618,-0.548920544589091,0.587858355005997,-0.119614774135308,-3.17931079815999,0.0787347899607354,1.70736118391356,-1.08024671421723,0.309311203755619,0.134091418459766,-0.133334443455306,0.0506131291789757,-0.0874430055462511,-0.888257954535377,1.19268407055162,-0.525390832667108,-1.19513244955446,0.0928586218887409,-0.128628718292386,-0.552126075655301,-1.38149832306033,0.377711355067705 4 | 0.655555555555556,0.0212502553548155,-0.0866803627839168,0.616444456762991,0.000805809192538436,2.45345419815762e-05,0.0001448319604686,-0.000573547477044472,0.00720202546240627,-0.286024427100518,-0.0938715460415973,1.37524897306938,-0.0266111742266471,-0.364159992540469,-0.99266724659556,0.458666710932941,0.902761042922865,-0.861665205272987,-0.257140407577685,0.102763510293451,-0.348889633457941,0.488077434703122,-0.335579799698162,-0.425530474452725,0.636843591585496,-0.0016386678004502,-0.905870476344571,0.0342499308222738,-0.216976891138697,-0.823059406048688,0.429164930340083,-0.0792578782542634,-0.858301524089862,-0.511946825450334,0.861844740475204,0.464157469556302,0.394191621738012,-0.521359809943273,-1.07888926832759,0.95719575163728,-0.819010198213768,-1.75988205048963,0.00468689427812646,1.97192535177317,0.774313149901164,0.481766935198311,0.232895318363952,-1.52461942350307,0.449120153282256,0.268971768089524 5 | 0.966666666666667,0.00878018457896629,-0.210215746061222,0.571527094845331,0.000693862232875129,0.000315839955267086,0.000394017137543054,-0.000513372009140354,-0.0523544399849702,-0.599760567122742,-0.167828475766082,3.3611245974453,0.0470135159142393,-0.665630901991363,-1.30997293661343,0.365046613143306,1.0250180882496,-0.805532961767195,-0.104531471396103,-0.0685616341157624,-1.17446108971194,1.00356975225375,-0.396467882345769,-0.957331979161504,0.496708733294482,0.380761192640495,-0.726762402953394,0.168061911123319,-0.0917241785044793,-1.76970020094541,0.92333888719802,-0.150335017860649,-1.08340489744669,0.00648709774451695,0.481863087470873,-0.00385428570855995,0.261342063853268,-0.275261393028586,-2.96640012422815,1.27974513603781,0.706764721220227,-1.52457657494122,-0.740667537881438,0.47833387855941,-0.0912443951607589,0.484932563262949,0.444511293323402,-3.38296600375733,1.86923224108409,-1.44180300684978 6 | 1.27777777777778,-0.00841441312538945,-0.574345693703951,0.525567533071113,0.000389725547852961,0.000657961457424527,0.000594444047362854,-0.000246742803065563,-0.0745342373539237,-1.61668722632835,-0.0914153062691769,3.75519394481911,-0.00886643803142489,0.221346690787052,-1.19858564391946,0.552740628996775,0.626380608822534,-1.35018937464527,-0.0909551821422285,-0.213317051757275,-1.79947912949532,1.37890669625647,0.158420447312797,-0.804321701615837,0.423668756450695,0.0381221979231612,-1.52396814732859,0.0349757892952249,0.235083539952867,-2.27581498708515,1.0786288584852,-0.539226946354741,1.84357996928072,0.12596176045144,-2.77895107101293,-2.75630932318015,-0.517582474797513,0.797603843720615,-3.09989985965642,7.77762106074665,-1.94644772172026,1.71591876361886,-0.410156274577313,-1.41892262954438,-3.22008237025717,-0.543832025471998,-0.136527648937186,0.0990876704781041,-1.95959943148792,1.08321646742927 7 | 1.58888888888889,-0.0608063671946022,-1.20238123485542,0.282976761144993,0.000145555843741017,-0.000741982231918937,-0.000568035524685268,-0.000318181676197376,-0.115029205018485,-2.05324857788137,-1.59606271326832,8.00860350041902,0.961125318880153,-1.23339006120949,-1.78647136805791,0.529580521209731,0.992163831678126,-1.38280060350093,-0.510534643137037,0.300119643360967,-2.44252592273354,2.49014959209805,0.84154542741414,-1.11724326731791,0.712479714012571,0.286921869104161,-1.74189142387173,0.027569610731494,0.0301498047631709,-2.19411045054816,1.20504003404307,0.0705197199005124,-5.06784989279767,0.143820459205396,3.36126148313147,-1.68168980230357,2.65857130770239,-1.9258893847467,-2.40387902339626,0.138121996340908,3.11164143361087,-4.28052046474591,1.34125945225503,2.01431914047081,-3.02518898241933,1.11748630910236,-1.3623229317383,-4.35232551456458,5.34994172749281,-1.40653422112056 8 | 1.9,-0.0932861364830928,-1.53292511638515,0.0994976658295706,0.000697179196491953,-0.000347423836458522,-0.000130268608647503,-0.000613406827024546,-0.100423974216357,-0.674936280220919,0.187178025479332,6.61595486551465,-0.621671085792143,0.211239370267797,-0.995125609438714,0.559138514198893,0.895958343760747,-0.878229759731434,0.155876404084546,-0.184568217914232,-2.07727541080089,2.60136603194945,0.202810510296512,-0.567486658319332,0.258709871293564,0.347551474119849,-0.423045777652509,0.0678157461611609,0.0420300288972125,-1.67507549224182,1.49818770492407,0.027271372405739,-3.56787937738389,0.478634359181271,0.413302229894622,0.527387909272172,5.71389439260991,-6.45943960244285,-5.28495477693402,2.27369467640133,0.142144379261829,-2.99192417362524,0.912258213966469,2.29329318450473,0.810638736983241,-0.429835463962045,0.504996511694238,-2.36771158374473,0.488145185063358,1.92908744966174 9 | 2.21111111111111,-0.107492541434443,-1.70892875199432,0.304335215063385,0.000751281759324431,0.000168526564294033,0.000208183377865307,-0.000603103856685227,-0.0496007713784422,-0.610639071559866,0.988505761188707,1.16134198008683,0.436413945958141,-1.03339085046013,-1.25057483658259,1.14452454327155,0.539360756890373,-0.983535867680287,0.146860800721199,-0.0175610368307394,-2.15834657521702,2.61355030156917,-0.350687140065648,-1.09395769946661,0.683733168058945,0.449515765507975,-1.33351633081896,-0.0948445549602246,0.077187901175185,-2.29451682742041,1.61984034013722,0.0989121622399532,6.16609156200345,0.272663106519389,-4.44700901237404,-1.74152166229447,-1.31904816907612,1.81575597649547,3.6685420460351,-2.74195034817374,0.905336751565535,5.25605858819883,1.29752229700682,-1.41028624158052,-4.00770480312032,1.8292030207402,-1.2036814576081,1.59644813606355,-0.383245856631378,-0.69003854235038 10 | 2.52222222222222,-0.1007718362492,-1.96390238666547,0.577856473485,0.000686346095054649,0.000171789217012646,0.000254373385952307,-0.000659269145311447,0.13467463073591,-0.910594916362939,0.737161750592047,-0.568726691006033,-0.310505016935532,-0.789648669225962,-0.140425410799357,0.650578269018317,0.573279034961245,-0.87631064474113,0.0153010712606294,0.117081040064642,-1.00093821835797,1.40579654216984,0.134090681605182,0.39407549393936,0.377543658896098,0.0218364697114844,-0.854626644858674,0.190696752266413,-0.139562359072823,-1.32012612639459,1.04800100861128,-0.0536368301560122,3.31429601005236,-1.79951065717539,-1.225119899897,1.54419612104329,-0.159168723926328,-0.0934750349256822,1.55655364996609,-2.29612771777004,6.66552725859507,1.45341964368253,-1.14259128613276,-0.509986362108436,3.89606179145202,0.320612506159443,-0.291669064921251,3.81205080963511,-2.71200753912774,1.46466129782021 11 | 2.83333333333333,-0.0626296588872092,-2.25280852531858,0.652838833686091,0.000682285464449998,9.40041832986888e-05,0.0001402978551625,-0.000711313060726007,0.161535747860526,-0.987274166953758,-0.242030843590428,-0.928115914005527,-0.142947067564532,-0.453267332785872,0.424162130466821,0.493618135295905,0.446688335704911,-0.625063260162925,-0.183985748201592,0.0312111020890553,-0.806939129679057,0.199872890056765,0.0283498113575916,0.522307759781321,0.386342028188774,0.237641424820263,-0.490333528330073,0.148665003858209,-0.113694151476131,-0.173495551259841,0.309497456713892,0.247134992259696,1.2074546728885,0.151909603518753,-0.567184066206556,0.176129196975638,-1.11234499926742,0.219218719648552,1.03795175121287,-4.60121991082262,-0.272184988968133,-0.0100994630296114,0.899948283324046,2.15992563473675,-2.54824843806571,-0.548282990126086,0.258396005690625,3.45406257978416,-1.52900646416462,-0.449928855811907 12 | -------------------------------------------------------------------------------- /resources/robots/humanoid/urdf/README.md: -------------------------------------------------------------------------------- 1 | # MIT Humanoid URDFs 2 | 3 | ### Naming convention: 4 | F: Full collision boxes (each link gets a collision box) 5 | R: Reduced collision boxes (only feet, body, and hands get a collision box) 6 | sf: single-foot (one collision box for the entire foot) 7 | ht: heel-toe (two collision boxes for each foot) 8 | 9 | ### Notes 10 | - colors don't seem to get rendered 11 | - visualization is _a lot_ faster with just the collision boxes. -------------------------------------------------------------------------------- /resources/robots/humanoid/urdf/meshes_v3/back.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hojae-io/ModelBasedFootstepPlanning-IROS2024/62824f93e65af36e67fb6c2b59cab543ca75a3d5/resources/robots/humanoid/urdf/meshes_v3/back.stl -------------------------------------------------------------------------------- /resources/robots/humanoid/urdf/meshes_v3/left_foot.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hojae-io/ModelBasedFootstepPlanning-IROS2024/62824f93e65af36e67fb6c2b59cab543ca75a3d5/resources/robots/humanoid/urdf/meshes_v3/left_foot.stl -------------------------------------------------------------------------------- /resources/robots/humanoid/urdf/meshes_v3/left_forearm.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hojae-io/ModelBasedFootstepPlanning-IROS2024/62824f93e65af36e67fb6c2b59cab543ca75a3d5/resources/robots/humanoid/urdf/meshes_v3/left_forearm.stl -------------------------------------------------------------------------------- /resources/robots/humanoid/urdf/meshes_v3/left_hip_abad.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hojae-io/ModelBasedFootstepPlanning-IROS2024/62824f93e65af36e67fb6c2b59cab543ca75a3d5/resources/robots/humanoid/urdf/meshes_v3/left_hip_abad.stl -------------------------------------------------------------------------------- /resources/robots/humanoid/urdf/meshes_v3/left_hip_yaw.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hojae-io/ModelBasedFootstepPlanning-IROS2024/62824f93e65af36e67fb6c2b59cab543ca75a3d5/resources/robots/humanoid/urdf/meshes_v3/left_hip_yaw.stl -------------------------------------------------------------------------------- /resources/robots/humanoid/urdf/meshes_v3/left_leg_lower.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hojae-io/ModelBasedFootstepPlanning-IROS2024/62824f93e65af36e67fb6c2b59cab543ca75a3d5/resources/robots/humanoid/urdf/meshes_v3/left_leg_lower.stl -------------------------------------------------------------------------------- /resources/robots/humanoid/urdf/meshes_v3/left_leg_upper.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hojae-io/ModelBasedFootstepPlanning-IROS2024/62824f93e65af36e67fb6c2b59cab543ca75a3d5/resources/robots/humanoid/urdf/meshes_v3/left_leg_upper.stl -------------------------------------------------------------------------------- /resources/robots/humanoid/urdf/meshes_v3/left_shoulder1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hojae-io/ModelBasedFootstepPlanning-IROS2024/62824f93e65af36e67fb6c2b59cab543ca75a3d5/resources/robots/humanoid/urdf/meshes_v3/left_shoulder1.stl -------------------------------------------------------------------------------- /resources/robots/humanoid/urdf/meshes_v3/left_shoulder2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hojae-io/ModelBasedFootstepPlanning-IROS2024/62824f93e65af36e67fb6c2b59cab543ca75a3d5/resources/robots/humanoid/urdf/meshes_v3/left_shoulder2.stl -------------------------------------------------------------------------------- /resources/robots/humanoid/urdf/meshes_v3/left_shoulder3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hojae-io/ModelBasedFootstepPlanning-IROS2024/62824f93e65af36e67fb6c2b59cab543ca75a3d5/resources/robots/humanoid/urdf/meshes_v3/left_shoulder3.stl -------------------------------------------------------------------------------- /resources/robots/humanoid/urdf/meshes_v3/torso.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hojae-io/ModelBasedFootstepPlanning-IROS2024/62824f93e65af36e67fb6c2b59cab543ca75a3d5/resources/robots/humanoid/urdf/meshes_v3/torso.stl -------------------------------------------------------------------------------- /resources/robots/pendulum/urdf/pendulum.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /resources/robots/rom/urdf/meshes_v3/back.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hojae-io/ModelBasedFootstepPlanning-IROS2024/62824f93e65af36e67fb6c2b59cab543ca75a3d5/resources/robots/rom/urdf/meshes_v3/back.stl -------------------------------------------------------------------------------- /resources/robots/rom/urdf/meshes_v3/left_foot.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hojae-io/ModelBasedFootstepPlanning-IROS2024/62824f93e65af36e67fb6c2b59cab543ca75a3d5/resources/robots/rom/urdf/meshes_v3/left_foot.stl -------------------------------------------------------------------------------- /resources/robots/rom/urdf/meshes_v3/left_forearm.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hojae-io/ModelBasedFootstepPlanning-IROS2024/62824f93e65af36e67fb6c2b59cab543ca75a3d5/resources/robots/rom/urdf/meshes_v3/left_forearm.stl -------------------------------------------------------------------------------- /resources/robots/rom/urdf/meshes_v3/left_hip_abad.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hojae-io/ModelBasedFootstepPlanning-IROS2024/62824f93e65af36e67fb6c2b59cab543ca75a3d5/resources/robots/rom/urdf/meshes_v3/left_hip_abad.stl -------------------------------------------------------------------------------- /resources/robots/rom/urdf/meshes_v3/left_hip_yaw.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hojae-io/ModelBasedFootstepPlanning-IROS2024/62824f93e65af36e67fb6c2b59cab543ca75a3d5/resources/robots/rom/urdf/meshes_v3/left_hip_yaw.stl -------------------------------------------------------------------------------- /resources/robots/rom/urdf/meshes_v3/left_leg_lower.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hojae-io/ModelBasedFootstepPlanning-IROS2024/62824f93e65af36e67fb6c2b59cab543ca75a3d5/resources/robots/rom/urdf/meshes_v3/left_leg_lower.stl -------------------------------------------------------------------------------- /resources/robots/rom/urdf/meshes_v3/left_leg_upper.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hojae-io/ModelBasedFootstepPlanning-IROS2024/62824f93e65af36e67fb6c2b59cab543ca75a3d5/resources/robots/rom/urdf/meshes_v3/left_leg_upper.stl -------------------------------------------------------------------------------- /resources/robots/rom/urdf/meshes_v3/left_shoulder1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hojae-io/ModelBasedFootstepPlanning-IROS2024/62824f93e65af36e67fb6c2b59cab543ca75a3d5/resources/robots/rom/urdf/meshes_v3/left_shoulder1.stl -------------------------------------------------------------------------------- /resources/robots/rom/urdf/meshes_v3/left_shoulder2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hojae-io/ModelBasedFootstepPlanning-IROS2024/62824f93e65af36e67fb6c2b59cab543ca75a3d5/resources/robots/rom/urdf/meshes_v3/left_shoulder2.stl -------------------------------------------------------------------------------- /resources/robots/rom/urdf/meshes_v3/left_shoulder3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hojae-io/ModelBasedFootstepPlanning-IROS2024/62824f93e65af36e67fb6c2b59cab543ca75a3d5/resources/robots/rom/urdf/meshes_v3/left_shoulder3.stl -------------------------------------------------------------------------------- /resources/robots/rom/urdf/meshes_v3/torso.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hojae-io/ModelBasedFootstepPlanning-IROS2024/62824f93e65af36e67fb6c2b59cab543ca75a3d5/resources/robots/rom/urdf/meshes_v3/torso.stl -------------------------------------------------------------------------------- /resources/robots/rom/urdf/point_mass.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /resources/robots/rom/urdf/slip.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /resources/robots/rom/urdf/srbm.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 17 | 18 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /resources/robots/rom/urdf/srbm_biped.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | -------------------------------------------------------------------------------- /resources/robots/rom/urdf/srbm_biped_revolute.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages 2 | from distutils.core import setup 3 | 4 | setup( 5 | name='gpuGym', 6 | version='1.0.1', 7 | author='Biomimetic Robotics Lab', 8 | license="BSD-3-Clause", 9 | packages=find_packages(), 10 | description='Isaac Gym environments for Legged Robots', 11 | install_requires=['isaacgym', 12 | 'matplotlib', 13 | 'pandas', 14 | 'tensorboard', 15 | 'setuptools==59.5.0', 16 | 'torch>=1.4.0', 17 | 'torchvision>=0.5.0', 18 | 'numpy>=1.16.4'] 19 | ) 20 | --------------------------------------------------------------------------------