├── rsl_rl ├── .gitignore ├── rsl_rl │ ├── storage │ │ ├── __init__.py │ │ └── rollout_storage.py │ ├── __init__.py │ ├── algorithms │ │ ├── __init__.py │ │ └── ppo.py │ ├── env │ │ ├── __init__.py │ │ └── vec_env.py │ ├── runners │ │ ├── __init__.py │ │ └── on_policy_runner.py │ ├── utils │ │ ├── __init__.py │ │ └── utils.py │ └── modules │ │ ├── __init__.py │ │ ├── actor_critic_recurrent.py │ │ └── actor_critic.py ├── setup.py ├── README.md ├── licenses │ └── dependencies │ │ ├── numpy_license.txt │ │ └── torch_license.txt └── LICENSE ├── README.md ├── .gitignore ├── eval_backflip.py ├── utils.py ├── train_backflip.py ├── train_walk.py ├── reward_wrapper.py └── locomotion_env.py /rsl_rl/.gitignore: -------------------------------------------------------------------------------- 1 | # IDEs 2 | .idea 3 | 4 | # builds 5 | *.egg-info 6 | 7 | # cache 8 | __pycache__ 9 | .pytest_cache 10 | 11 | # vs code 12 | .vscode -------------------------------------------------------------------------------- /rsl_rl/rsl_rl/storage/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 ETH Zurich, NVIDIA CORPORATION 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | from .rollout_storage import RolloutStorage -------------------------------------------------------------------------------- /rsl_rl/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup(name='rsl_rl', 4 | version='1.0.2', 5 | author='Nikita Rudin', 6 | author_email='rudinn@ethz.ch', 7 | license="BSD-3-Clause", 8 | packages=find_packages(), 9 | description='Fast and simple RL algorithms implemented in pytorch', 10 | python_requires='>=3.6', 11 | install_requires=[ 12 | "torch>=1.4.0", 13 | "torchvision>=0.5.0", 14 | "numpy>=1.16.4" 15 | ], 16 | ) 17 | -------------------------------------------------------------------------------- /rsl_rl/README.md: -------------------------------------------------------------------------------- 1 | # RSL RL 2 | Fast and simple implementation of RL algorithms, designed to run fully on GPU. 3 | This code is an evolution of `rl-pytorch` provided with NVIDIA's Isaac GYM. 4 | 5 | Only PPO is implemented for now. More algorithms will be added later. 6 | Contributions are welcome. 7 | 8 | ## Setup 9 | 10 | ``` 11 | git clone https://github.com/leggedrobotics/rsl_rl 12 | cd rsl_rl 13 | pip install -e . 14 | ``` 15 | 16 | ### Useful Links ### 17 | Example use case: https://github.com/leggedrobotics/legged_gym 18 | Project website: https://leggedrobotics.github.io/legged_gym/ 19 | Paper: https://arxiv.org/abs/2109.11978 20 | 21 | **Maintainer**: Nikita Rudin 22 | **Affiliation**: Robotic Systems Lab, ETH Zurich & NVIDIA 23 | **Contact**: rudinn@ethz.ch 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Genesis-backflip 2 | 3 | A functional and streamlined codebase for locomotion training in Genesis, featuring: 4 | - Effective domain randomizations and useful rewards 5 | - Fundamental rewards for executing a complete backflip 6 | 7 | We use the same domain randomizations as in this repository in both single and double backflips. During deployment, we slightly changed Kps used on the real robot to do a clean backflip. 8 | 9 | ## Usage 10 | 11 | 1. Install the modified version rsl_rl 12 | 13 | ``` 14 | cd rsl_rl && pip install -e . 15 | ``` 16 | 17 | 2. Train the Unitree Go2 for a backflip 18 | 19 | This codebase includes only the fundamental rewards required for learning a backflip. You can observe how Go2 learns to perform a full backflip by running: 20 | ``` 21 | python train_backflip.py -e EXP_NAME 22 | ``` 23 | 24 | 3. Evaluate existing checkpoints 25 | 26 | Run 27 | ``` 28 | python eval_backflip.py -e EXP_NAME --ckpt NUM_CKPT 29 | ``` 30 | 31 | 4. Generate a deployable checkpoint 32 | 33 | To produce a deployable checkpoint comparable to the provided example in Genesis, additional rewards must be incorporated to regularize motion and minimize torque peaks. 34 | 35 | ## Acknoledgement 36 | 37 | This codebase was inspired [legged_gym](https://github.com/leggedrobotics/legged_gym) and its reward design principles. -------------------------------------------------------------------------------- /rsl_rl/licenses/dependencies/numpy_license.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2005-2021, NumPy Developers. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are 6 | met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | * Neither the name of the NumPy Developers nor the names of any 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /rsl_rl/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2021, ETH Zurich, Nikita Rudin 2 | Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, 6 | are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its contributors 16 | may be used to endorse or promote products derived from this software without 17 | specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 23 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 26 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | See licenses/dependencies for license information of dependencies of this package. -------------------------------------------------------------------------------- /rsl_rl/rsl_rl/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin -------------------------------------------------------------------------------- /rsl_rl/rsl_rl/algorithms/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from .ppo import PPO -------------------------------------------------------------------------------- /rsl_rl/rsl_rl/env/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from .vec_env import VecEnv -------------------------------------------------------------------------------- /rsl_rl/rsl_rl/runners/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from .on_policy_runner import OnPolicyRunner -------------------------------------------------------------------------------- /rsl_rl/rsl_rl/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from .utils import split_and_pad_trajectories, unpad_trajectories -------------------------------------------------------------------------------- /rsl_rl/rsl_rl/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from .actor_critic import ActorCritic, ActorCriticNostd 32 | from .actor_critic_recurrent import ActorCriticRecurrent -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | logs/ 2 | tmp/ 3 | MUJOCO_LOG.TXT 4 | imgui.ini 5 | .DS_Store 6 | sftp-config.json 7 | *.mp4 8 | 9 | # Byte-compiled / optimized / DLL files 10 | __pycache__/ 11 | *.py[cod] 12 | *$py.class 13 | 14 | # C extensions 15 | *.so 16 | 17 | # Distribution / packaging 18 | .Python 19 | build/ 20 | data/ 21 | develop-eggs/ 22 | dist/ 23 | downloads/ 24 | eggs/ 25 | .eggs/ 26 | # lib/ 27 | lib64/ 28 | parts/ 29 | sdist/ 30 | var/ 31 | wheels/ 32 | pip-wheel-metadata/ 33 | share/python-wheels/ 34 | *.egg-info/ 35 | .installed.cfg 36 | *.egg 37 | MANIFEST 38 | 39 | # debug 40 | examples/debug/* 41 | 42 | # PyInstaller 43 | # Usually these files are written by a python script from a template 44 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 45 | *.manifest 46 | *.spec 47 | 48 | # Installer logs 49 | pip-log.txt 50 | pip-delete-this-directory.txt 51 | 52 | # Unit test / coverage reports 53 | htmlcov/ 54 | .tox/ 55 | .nox/ 56 | .coverage 57 | .coverage.* 58 | .cache 59 | nosetests.xml 60 | coverage.xml 61 | *.cover 62 | *.py,cover 63 | .hypothesis/ 64 | .pytest_cache/ 65 | 66 | # Translations 67 | *.mo 68 | *.pot 69 | 70 | # Django stuff: 71 | *.log 72 | local_settings.py 73 | db.sqlite3 74 | db.sqlite3-journal 75 | 76 | # Flask stuff: 77 | instance/ 78 | .webassets-cache 79 | 80 | # Scrapy stuff: 81 | .scrapy 82 | 83 | # Sphinx documentation 84 | doc/_build/ 85 | 86 | # PyBuilder 87 | target/ 88 | 89 | # Jupyter Notebook 90 | .ipynb_checkpoints 91 | 92 | # IPython 93 | profile_default/ 94 | ipython_config.py 95 | 96 | # pyenv 97 | .python-version 98 | 99 | # pipenv 100 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 101 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 102 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 103 | # install all needed dependencies. 104 | #Pipfile.lock 105 | 106 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 107 | __pypackages__/ 108 | 109 | # Celery stuff 110 | celerybeat-schedule 111 | celerybeat.pid 112 | 113 | # SageMath parsed files 114 | *.sage.py 115 | 116 | # Environments 117 | .env 118 | .venv 119 | env/ 120 | venv/ 121 | ENV/ 122 | env.bak/ 123 | venv.bak/ 124 | .idea/ 125 | .vscode/ 126 | 127 | # Spyder project settings 128 | .spyderproject 129 | .spyproject 130 | 131 | # Rope project settings 132 | .ropeproject 133 | 134 | # mkdocs documentation 135 | /site 136 | 137 | # mypy 138 | .mypy_cache/ 139 | .dmypy.json 140 | dmypy.json 141 | 142 | # Pyre type checker 143 | .pyre/ 144 | -------------------------------------------------------------------------------- /rsl_rl/rsl_rl/env/vec_env.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | from abc import ABC, abstractmethod 32 | import torch 33 | from typing import Tuple, Union 34 | 35 | # minimal interface of the environment 36 | class VecEnv(ABC): 37 | num_envs: int 38 | num_obs: int 39 | num_privileged_obs: int 40 | num_actions: int 41 | max_episode_length: int 42 | privileged_obs_buf: torch.Tensor 43 | obs_buf: torch.Tensor 44 | rew_buf: torch.Tensor 45 | reset_buf: torch.Tensor 46 | episode_length_buf: torch.Tensor # current episode duration 47 | extras: dict 48 | device: torch.device 49 | @abstractmethod 50 | def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, Union[torch.Tensor, None], torch.Tensor, torch.Tensor, dict]: 51 | pass 52 | @abstractmethod 53 | def reset(self, env_ids: Union[list, torch.Tensor]): 54 | pass 55 | @abstractmethod 56 | def get_observations(self) -> torch.Tensor: 57 | pass 58 | @abstractmethod 59 | def get_privileged_observations(self) -> Union[torch.Tensor, None]: 60 | pass -------------------------------------------------------------------------------- /eval_backflip.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import copy 3 | import os 4 | import pickle 5 | 6 | import numpy as np 7 | import torch 8 | from reward_wrapper import Backflip 9 | from rsl_rl.runners import OnPolicyRunner 10 | 11 | import genesis as gs 12 | 13 | def export_policy_as_jit(actor_critic, path, name): 14 | os.makedirs(path, exist_ok=True) 15 | path = os.path.join(path, f'{name}.pt') 16 | model = copy.deepcopy(actor_critic.actor).to('cpu') 17 | traced_script_module = torch.jit.script(model) 18 | traced_script_module.save(path) 19 | 20 | def main(): 21 | parser = argparse.ArgumentParser() 22 | parser.add_argument('-e', '--exp_name', type=str, default='backflip') 23 | parser.add_argument('-v', '--vis', action='store_true', default=True) 24 | parser.add_argument('-c', '--cpu', action='store_true', default=False) 25 | parser.add_argument('-r', '--record', action='store_true', default=False) 26 | parser.add_argument('--ckpt', type=int, default=1000) 27 | args = parser.parse_args() 28 | 29 | gs.init(backend=gs.cpu if args.cpu else gs.gpu) 30 | 31 | env_cfg, obs_cfg, reward_cfg, command_cfg = pickle.load( 32 | open(f'logs/{args.exp_name}/cfgs.pkl', 'rb') 33 | ) 34 | reward_cfg['reward_scales'] = {"feet_distance": 1} 35 | 36 | env = Backflip( 37 | num_envs=1, 38 | env_cfg=env_cfg, 39 | obs_cfg=obs_cfg, 40 | reward_cfg=reward_cfg, 41 | command_cfg=command_cfg, 42 | show_viewer=args.vis, 43 | eval=True, 44 | debug=True, 45 | ) 46 | 47 | log_dir = f'logs/{args.exp_name}' 48 | jit_ckpt_path = os.path.join(log_dir, 'exported', args.exp_name + f'_ckpt{args.ckpt}.pt') 49 | if os.path.exists(jit_ckpt_path): 50 | policy = torch.jit.load(jit_ckpt_path) 51 | policy.to(device='cuda:0') 52 | else: 53 | args.max_iterations = 1 54 | from train_backflip import get_train_cfg 55 | runner = OnPolicyRunner(env, get_train_cfg(args), log_dir, device='cuda:0') 56 | 57 | resume_path = os.path.join(log_dir, f'model_{args.ckpt}.pt') 58 | runner.load(resume_path) 59 | path = os.path.join(log_dir, 'exported') 60 | export_policy_as_jit(runner.alg.actor_critic, path, args.exp_name + f'_ckpt{args.ckpt}') 61 | 62 | policy = runner.get_inference_policy(device='cuda:0') 63 | 64 | env.reset() 65 | obs = env.get_observations() 66 | 67 | with torch.no_grad(): 68 | stop = False 69 | n_frames = 0 70 | if args.record: 71 | env.start_recording(record_internal=False) 72 | while not stop: 73 | actions = policy(obs) 74 | obs, _, rews, dones, infos = env.step(actions) 75 | n_frames += 1 76 | if args.record: 77 | if n_frames == 100: 78 | env.stop_recording("backflip.mp4") 79 | exit() 80 | 81 | if __name__ == '__main__': 82 | main() -------------------------------------------------------------------------------- /rsl_rl/licenses/dependencies/torch_license.txt: -------------------------------------------------------------------------------- 1 | From PyTorch: 2 | 3 | Copyright (c) 2016- Facebook, Inc (Adam Paszke) 4 | Copyright (c) 2014- Facebook, Inc (Soumith Chintala) 5 | Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) 6 | Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) 7 | Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) 8 | Copyright (c) 2011-2013 NYU (Clement Farabet) 9 | Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston) 10 | Copyright (c) 2006 Idiap Research Institute (Samy Bengio) 11 | Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz) 12 | 13 | From Caffe2: 14 | 15 | Copyright (c) 2016-present, Facebook Inc. All rights reserved. 16 | 17 | All contributions by Facebook: 18 | Copyright (c) 2016 Facebook Inc. 19 | 20 | All contributions by Google: 21 | Copyright (c) 2015 Google Inc. 22 | All rights reserved. 23 | 24 | All contributions by Yangqing Jia: 25 | Copyright (c) 2015 Yangqing Jia 26 | All rights reserved. 27 | 28 | All contributions by Kakao Brain: 29 | Copyright 2019-2020 Kakao Brain 30 | 31 | All contributions from Caffe: 32 | Copyright(c) 2013, 2014, 2015, the respective contributors 33 | All rights reserved. 34 | 35 | All other contributions: 36 | Copyright(c) 2015, 2016 the respective contributors 37 | All rights reserved. 38 | 39 | Caffe2 uses a copyright model similar to Caffe: each contributor holds 40 | copyright over their contributions to Caffe2. The project versioning records 41 | all such contribution and copyright details. If a contributor wants to further 42 | mark their specific copyright on a particular contribution, they should 43 | indicate their copyright solely in the commit message of the change when it is 44 | committed. 45 | 46 | All rights reserved. 47 | 48 | Redistribution and use in source and binary forms, with or without 49 | modification, are permitted provided that the following conditions are met: 50 | 51 | 1. Redistributions of source code must retain the above copyright 52 | notice, this list of conditions and the following disclaimer. 53 | 54 | 2. Redistributions in binary form must reproduce the above copyright 55 | notice, this list of conditions and the following disclaimer in the 56 | documentation and/or other materials provided with the distribution. 57 | 58 | 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America 59 | and IDIAP Research Institute nor the names of its contributors may be 60 | used to endorse or promote products derived from this software without 61 | specific prior written permission. 62 | 63 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 64 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 65 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 66 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 67 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 68 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 69 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 70 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 71 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 72 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 73 | POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /rsl_rl/rsl_rl/utils/utils.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import torch 32 | 33 | def split_and_pad_trajectories(tensor, dones): 34 | """ Splits trajectories at done indices. Then concatenates them and padds with zeros up to the length og the longest trajectory. 35 | Returns masks corresponding to valid parts of the trajectories 36 | Example: 37 | Input: [ [a1, a2, a3, a4 | a5, a6], 38 | [b1, b2 | b3, b4, b5 | b6] 39 | ] 40 | 41 | Output:[ [a1, a2, a3, a4], | [ [True, True, True, True], 42 | [a5, a6, 0, 0], | [True, True, False, False], 43 | [b1, b2, 0, 0], | [True, True, False, False], 44 | [b3, b4, b5, 0], | [True, True, True, False], 45 | [b6, 0, 0, 0] | [True, False, False, False], 46 | ] | ] 47 | 48 | Assumes that the inputy has the following dimension order: [time, number of envs, aditional dimensions] 49 | """ 50 | dones = dones.clone() 51 | dones[-1] = 1 52 | # Permute the buffers to have order (num_envs, num_transitions_per_env, ...), for correct reshaping 53 | flat_dones = dones.transpose(1, 0).reshape(-1, 1) 54 | 55 | # Get length of trajectory by counting the number of successive not done elements 56 | done_indices = torch.cat((flat_dones.new_tensor([-1], dtype=torch.int64), flat_dones.nonzero()[:, 0])) 57 | trajectory_lengths = done_indices[1:] - done_indices[:-1] 58 | trajectory_lengths_list = trajectory_lengths.tolist() 59 | # Extract the individual trajectories 60 | trajectories = torch.split(tensor.transpose(1, 0).flatten(0, 1),trajectory_lengths_list) 61 | padded_trajectories = torch.nn.utils.rnn.pad_sequence(trajectories) 62 | 63 | 64 | trajectory_masks = trajectory_lengths > torch.arange(0, tensor.shape[0], device=tensor.device).unsqueeze(1) 65 | return padded_trajectories, trajectory_masks 66 | 67 | def unpad_trajectories(trajectories, masks): 68 | """ Does the inverse operation of split_and_pad_trajectories() 69 | """ 70 | # Need to transpose before and after the masking to have proper reshaping 71 | return trajectories.transpose(1, 0)[masks.transpose(1, 0)].view(-1, trajectories.shape[0], trajectories.shape[-1]).transpose(1, 0) -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | def wrap_to_pi(angles): 6 | angles %= 2 * np.pi 7 | angles -= 2 * np.pi * (angles > np.pi) 8 | return angles 9 | 10 | 11 | def gs_rand_float(lower, upper, shape, device): 12 | return (upper - lower) * torch.rand(size=shape, device=device) + lower 13 | 14 | 15 | def gs_inv_quat(quat): 16 | qw, qx, qy, qz = quat.unbind(-1) 17 | inv_quat = torch.stack([1.0 * qw, -qx, -qy, -qz], dim=-1) 18 | return inv_quat 19 | 20 | 21 | def gs_transform_by_quat(pos, quat): 22 | qw, qx, qy, qz = quat.unbind(-1) 23 | 24 | rot_matrix = torch.stack( 25 | [ 26 | 1.0 - 2 * qy**2 - 2 * qz**2, 27 | 2 * qx * qy - 2 * qz * qw, 28 | 2 * qx * qz + 2 * qy * qw, 29 | 2 * qx * qy + 2 * qz * qw, 30 | 1 - 2 * qx**2 - 2 * qz**2, 31 | 2 * qy * qz - 2 * qx * qw, 32 | 2 * qx * qz - 2 * qy * qw, 33 | 2 * qy * qz + 2 * qx * qw, 34 | 1 - 2 * qx**2 - 2 * qy**2, 35 | ], 36 | dim=-1, 37 | ).reshape(*quat.shape[:-1], 3, 3) 38 | rotated_pos = torch.matmul(rot_matrix, pos.unsqueeze(-1)).squeeze(-1) 39 | 40 | return rotated_pos 41 | 42 | 43 | def gs_quat2euler(quat): # xyz 44 | # Extract quaternion components 45 | qw, qx, qy, qz = quat.unbind(-1) 46 | 47 | # Roll (x-axis rotation) 48 | sinr_cosp = 2 * (qw * qx + qy * qz) 49 | cosr_cosp = 1 - 2 * (qx * qx + qy * qy) 50 | roll = torch.atan2(sinr_cosp, cosr_cosp) 51 | 52 | # Pitch (y-axis rotation) 53 | sinp = 2 * (qw * qy - qz * qx) 54 | pitch = torch.where( 55 | torch.abs(sinp) >= 1, 56 | torch.sign(sinp) * torch.tensor(torch.pi / 2), 57 | torch.asin(sinp), 58 | ) 59 | 60 | # Yaw (z-axis rotation) 61 | siny_cosp = 2 * (qw * qz + qx * qy) 62 | cosy_cosp = 1 - 2 * (qy * qy + qz * qz) 63 | yaw = torch.atan2(siny_cosp, cosy_cosp) 64 | 65 | return torch.stack([roll, pitch, yaw], dim=-1) 66 | 67 | 68 | def gs_euler2quat(xyz): # xyz 69 | 70 | roll, pitch, yaw = xyz.unbind(-1) 71 | 72 | cosr = (roll * 0.5).cos() 73 | sinr = (roll * 0.5).sin() 74 | cosp = (pitch * 0.5).cos() 75 | sinp = (pitch * 0.5).sin() 76 | cosy = (yaw * 0.5).cos() 77 | siny = (yaw * 0.5).sin() 78 | 79 | qw = cosr * cosp * cosy + sinr * sinp * siny 80 | qx = sinr * cosp * cosy - cosr * sinp * siny 81 | qy = cosr * sinp * cosy + sinr * cosp * siny 82 | qz = cosr * cosp * siny - sinr * sinp * cosy 83 | 84 | return torch.stack([qw, qx, qy, qz], dim=-1) 85 | 86 | 87 | def gs_quat_from_angle_axis(angle, axis): 88 | theta = (angle / 2).unsqueeze(-1) 89 | xyz = normalize(axis) * theta.sin() 90 | w = theta.cos() 91 | return normalize(torch.cat([w, xyz], dim=-1)) 92 | 93 | 94 | def normalize(x, eps: float = 1e-9): 95 | return x / x.norm(p=2, dim=-1).clamp(min=eps, max=None).unsqueeze(-1) 96 | 97 | 98 | def gs_quat_mul(a, b): 99 | assert a.shape == b.shape 100 | shape = a.shape 101 | a = a.reshape(-1, 4) 102 | b = b.reshape(-1, 4) 103 | 104 | w1, x1, y1, z1 = a[:, 0], a[:, 1], a[:, 2], a[:, 3] 105 | w2, x2, y2, z2 = b[:, 0], b[:, 1], b[:, 2], b[:, 3] 106 | ww = (z1 + x1) * (x2 + y2) 107 | yy = (w1 - y1) * (w2 + z2) 108 | zz = (w1 + y1) * (w2 - z2) 109 | xx = ww + yy + zz 110 | qq = 0.5 * (xx + (z1 - x1) * (x2 - y2)) 111 | w = qq - ww + (z1 - y1) * (y2 - z2) 112 | x = qq - xx + (x1 + w1) * (x2 + w2) 113 | y = qq - yy + (w1 - x1) * (y2 + z2) 114 | z = qq - zz + (z1 + y1) * (w2 - x2) 115 | 116 | quat = torch.stack([w, x, y, z], dim=-1).view(shape) 117 | 118 | return quat 119 | 120 | 121 | def gs_quat_apply(a, b): 122 | shape = b.shape 123 | a = a.reshape(-1, 4) 124 | b = b.reshape(-1, 3) 125 | xyz = a[:, 1:] 126 | t = xyz.cross(b, dim=-1) * 2 127 | return (b + a[:, :1] * t + xyz.cross(t, dim=-1)).view(shape) 128 | 129 | 130 | def gs_quat_apply_yaw(quat, vec): 131 | quat_yaw = quat.clone().view(-1, 4) 132 | quat_yaw[:, 1:3] = 0. 133 | quat_yaw = normalize(quat_yaw) 134 | return gs_quat_apply(quat_yaw, vec) 135 | 136 | 137 | def gs_quat_conjugate(a): 138 | shape = a.shape 139 | a = a.reshape(-1, 4) 140 | return torch.cat((a[:, :1], -a[:, 1:], ), dim=-1).view(shape) 141 | -------------------------------------------------------------------------------- /rsl_rl/rsl_rl/modules/actor_critic_recurrent.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import numpy as np 32 | 33 | import torch 34 | import torch.nn as nn 35 | from torch.distributions import Normal 36 | from torch.nn.modules import rnn 37 | from .actor_critic import ActorCritic, get_activation 38 | from rsl_rl.utils import unpad_trajectories 39 | 40 | class ActorCriticRecurrent(ActorCritic): 41 | is_recurrent = True 42 | def __init__(self, num_actor_obs, 43 | num_critic_obs, 44 | num_actions, 45 | actor_hidden_dims=[256, 256, 256], 46 | critic_hidden_dims=[256, 256, 256], 47 | activation='elu', 48 | rnn_type='lstm', 49 | rnn_hidden_size=256, 50 | rnn_num_layers=1, 51 | init_noise_std=1.0, 52 | **kwargs): 53 | if kwargs: 54 | print("ActorCriticRecurrent.__init__ got unexpected arguments, which will be ignored: " + str(kwargs.keys()),) 55 | 56 | super().__init__(num_actor_obs=rnn_hidden_size, 57 | num_critic_obs=rnn_hidden_size, 58 | num_actions=num_actions, 59 | actor_hidden_dims=actor_hidden_dims, 60 | critic_hidden_dims=critic_hidden_dims, 61 | activation=activation, 62 | init_noise_std=init_noise_std) 63 | 64 | activation = get_activation(activation) 65 | 66 | self.memory_a = Memory(num_actor_obs, type=rnn_type, num_layers=rnn_num_layers, hidden_size=rnn_hidden_size) 67 | self.memory_c = Memory(num_critic_obs, type=rnn_type, num_layers=rnn_num_layers, hidden_size=rnn_hidden_size) 68 | 69 | print(f"Actor RNN: {self.memory_a}") 70 | print(f"Critic RNN: {self.memory_c}") 71 | 72 | def reset(self, dones=None): 73 | self.memory_a.reset(dones) 74 | self.memory_c.reset(dones) 75 | 76 | def act(self, observations, masks=None, hidden_states=None): 77 | input_a = self.memory_a(observations, masks, hidden_states) 78 | return super().act(input_a.squeeze(0)) 79 | 80 | def act_inference(self, observations): 81 | input_a = self.memory_a(observations) 82 | return super().act_inference(input_a.squeeze(0)) 83 | 84 | def evaluate(self, critic_observations, masks=None, hidden_states=None): 85 | input_c = self.memory_c(critic_observations, masks, hidden_states) 86 | return super().evaluate(input_c.squeeze(0)) 87 | 88 | def get_hidden_states(self): 89 | return self.memory_a.hidden_states, self.memory_c.hidden_states 90 | 91 | 92 | class Memory(torch.nn.Module): 93 | def __init__(self, input_size, type='lstm', num_layers=1, hidden_size=256): 94 | super().__init__() 95 | # RNN 96 | rnn_cls = nn.GRU if type.lower() == 'gru' else nn.LSTM 97 | self.rnn = rnn_cls(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers) 98 | self.hidden_states = None 99 | 100 | def forward(self, input, masks=None, hidden_states=None): 101 | batch_mode = masks is not None 102 | if batch_mode: 103 | # batch mode (policy update): need saved hidden states 104 | if hidden_states is None: 105 | raise ValueError("Hidden states not passed to memory module during policy update") 106 | out, _ = self.rnn(input, hidden_states) 107 | out = unpad_trajectories(out, masks) 108 | else: 109 | # inference mode (collection): use hidden states of last step 110 | out, self.hidden_states = self.rnn(input.unsqueeze(0), self.hidden_states) 111 | return out 112 | 113 | def reset(self, dones=None): 114 | # When the RNN is an LSTM, self.hidden_states_a is a list with hidden_state and cell_state 115 | for hidden_state in self.hidden_states: 116 | hidden_state[..., dones, :] = 0.0 -------------------------------------------------------------------------------- /rsl_rl/rsl_rl/modules/actor_critic.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import numpy as np 32 | 33 | import torch 34 | import torch.nn as nn 35 | from torch.distributions import Normal 36 | from torch.nn.modules import rnn 37 | 38 | class ActorCritic(nn.Module): 39 | is_recurrent = False 40 | def __init__(self, num_actor_obs, 41 | num_critic_obs, 42 | num_actions, 43 | actor_hidden_dims=[256, 256, 256], 44 | critic_hidden_dims=[256, 256, 256], 45 | activation='elu', 46 | init_noise_std=1.0, 47 | **kwargs): 48 | if kwargs: 49 | print("ActorCritic.__init__ got unexpected arguments, which will be ignored: " + str([key for key in kwargs.keys()])) 50 | super(ActorCritic, self).__init__() 51 | 52 | activation = get_activation(activation) 53 | 54 | mlp_input_dim_a = num_actor_obs 55 | mlp_input_dim_c = num_critic_obs 56 | 57 | # Policy 58 | actor_layers = [] 59 | actor_layers.append(nn.Linear(mlp_input_dim_a, actor_hidden_dims[0])) 60 | actor_layers.append(activation) 61 | for l in range(len(actor_hidden_dims)): 62 | if l == len(actor_hidden_dims) - 1: 63 | actor_layers.append(nn.Linear(actor_hidden_dims[l], num_actions)) 64 | else: 65 | actor_layers.append(nn.Linear(actor_hidden_dims[l], actor_hidden_dims[l + 1])) 66 | actor_layers.append(activation) 67 | self.actor = nn.Sequential(*actor_layers) 68 | 69 | # Value function 70 | critic_layers = [] 71 | critic_layers.append(nn.Linear(mlp_input_dim_c, critic_hidden_dims[0])) 72 | critic_layers.append(activation) 73 | for l in range(len(critic_hidden_dims)): 74 | if l == len(critic_hidden_dims) - 1: 75 | critic_layers.append(nn.Linear(critic_hidden_dims[l], 1)) 76 | else: 77 | critic_layers.append(nn.Linear(critic_hidden_dims[l], critic_hidden_dims[l + 1])) 78 | critic_layers.append(activation) 79 | self.critic = nn.Sequential(*critic_layers) 80 | 81 | print(f"Actor MLP: {self.actor}") 82 | print(f"Critic MLP: {self.critic}") 83 | 84 | # Action noise 85 | self.std = nn.Parameter(init_noise_std * torch.ones(num_actions)) 86 | self.distribution = None 87 | # disable args validation for speedup 88 | Normal.set_default_validate_args = False 89 | 90 | # seems that we get better performance without init 91 | # self.init_memory_weights(self.memory_a, 0.001, 0.) 92 | # self.init_memory_weights(self.memory_c, 0.001, 0.) 93 | 94 | @staticmethod 95 | # not used at the moment 96 | def init_weights(sequential, scales): 97 | [torch.nn.init.orthogonal_(module.weight, gain=scales[idx]) for idx, module in 98 | enumerate(mod for mod in sequential if isinstance(mod, nn.Linear))] 99 | 100 | 101 | def reset(self, dones=None): 102 | pass 103 | 104 | def forward(self): 105 | raise NotImplementedError 106 | 107 | @property 108 | def action_mean(self): 109 | return self.distribution.mean 110 | 111 | @property 112 | def action_std(self): 113 | return self.distribution.stddev 114 | 115 | @property 116 | def entropy(self): 117 | return self.distribution.entropy().sum(dim=-1) 118 | 119 | def update_distribution(self, observations): 120 | mean = self.actor(observations) 121 | self.distribution = Normal(mean, mean*0. + self.std) 122 | 123 | def act(self, observations, **kwargs): 124 | self.update_distribution(observations) 125 | return self.distribution.sample() 126 | 127 | def get_actions_log_prob(self, actions): 128 | return self.distribution.log_prob(actions).sum(dim=-1) 129 | 130 | def act_inference(self, observations): 131 | actions_mean = self.actor(observations) 132 | return actions_mean 133 | 134 | def evaluate(self, critic_observations, **kwargs): 135 | value = self.critic(critic_observations) 136 | return value 137 | 138 | class ActorCriticNostd(ActorCritic): 139 | def __init__(self, num_actor_obs, num_critic_obs, num_actions, actor_hidden_dims=[256, 256, 256], critic_hidden_dims=[256, 256, 256], activation='elu', init_noise_std=1, **kwargs): 140 | super().__init__(num_actor_obs, num_critic_obs, num_actions, actor_hidden_dims, critic_hidden_dims, activation, init_noise_std, **kwargs) 141 | self.action = None 142 | 143 | @property 144 | def action_mean(self): 145 | return self.action 146 | 147 | def update_distribution(self, observations): 148 | self.actor.train() 149 | self.action = self.actor(observations) 150 | 151 | def act(self, observations, **kwargs): 152 | self.update_distribution(observations) 153 | return self.action_mean 154 | 155 | def get_activation(act_name): 156 | if act_name == "elu": 157 | return nn.ELU() 158 | elif act_name == "selu": 159 | return nn.SELU() 160 | elif act_name == "relu": 161 | return nn.ReLU() 162 | elif act_name == "crelu": 163 | return nn.ReLU() 164 | elif act_name == "lrelu": 165 | return nn.LeakyReLU() 166 | elif act_name == "tanh": 167 | return nn.Tanh() 168 | elif act_name == "sigmoid": 169 | return nn.Sigmoid() 170 | else: 171 | print("invalid activation function!") 172 | return None 173 | -------------------------------------------------------------------------------- /train_backflip.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import copy 3 | import os 4 | import pickle 5 | import shutil 6 | 7 | import numpy as np 8 | import torch 9 | import wandb 10 | from reward_wrapper import Backflip 11 | from locomotion_env import LocoEnv 12 | from rsl_rl.runners import OnPolicyRunner 13 | 14 | import genesis as gs 15 | 16 | 17 | def get_train_cfg(args): 18 | 19 | train_cfg_dict = { 20 | 'algorithm': { 21 | 'clip_param': 0.2, 22 | 'desired_kl': 0.01, 23 | 'entropy_coef': 0.01, 24 | 'gamma': 0.99, 25 | 'lam': 0.95, 26 | 'learning_rate': 0.001, 27 | 'max_grad_norm': 1.0, 28 | 'num_learning_epochs': 5, 29 | 'num_mini_batches': 4, 30 | 'schedule': 'adaptive', 31 | 'use_clipped_value_loss': True, 32 | 'value_loss_coef': 1.0, 33 | }, 34 | 'init_member_classes': {}, 35 | 'policy': { 36 | 'activation': 'elu', 37 | 'actor_hidden_dims': [512, 256, 128], 38 | 'critic_hidden_dims': [512, 256, 128], 39 | 'init_noise_std': 1.0, 40 | }, 41 | 'runner': { 42 | 'algorithm_class_name': 'PPO', 43 | 'checkpoint': -1, 44 | 'experiment_name': args.exp_name, 45 | 'load_run': -1, 46 | 'log_interval': 1, 47 | 'max_iterations': args.max_iterations, 48 | 'num_steps_per_env': 24, 49 | 'policy_class_name': 'ActorCritic', 50 | 'record_interval': 50, 51 | 'resume': False, 52 | 'resume_path': None, 53 | 'run_name': '', 54 | 'runner_class_name': 'runner_class_name', 55 | 'save_interval': 100, 56 | }, 57 | 'runner_class_name': 'OnPolicyRunner', 58 | 'seed': 1, 59 | } 60 | 61 | return train_cfg_dict 62 | 63 | 64 | def get_cfgs(): 65 | env_cfg = { 66 | 'urdf_path': 'urdf/go2/urdf/go2.urdf', 67 | 'links_to_keep': ['FL_foot', 'FR_foot', 'RL_foot', 'RR_foot',], 68 | 'num_actions': 12, 69 | 'num_dofs': 12, 70 | # joint/link names 71 | 'default_joint_angles': { # [rad] 72 | 'FL_hip_joint': 0.0, 73 | 'FR_hip_joint': 0.0, 74 | 'RL_hip_joint': 0.0, 75 | 'RR_hip_joint': 0.0, 76 | 'FL_thigh_joint': 0.8, 77 | 'FR_thigh_joint': 0.8, 78 | 'RL_thigh_joint': 1.0, 79 | 'RR_thigh_joint': 1.0, 80 | 'FL_calf_joint': -1.5, 81 | 'FR_calf_joint': -1.5, 82 | 'RL_calf_joint': -1.5, 83 | 'RR_calf_joint': -1.5, 84 | }, 85 | 'dof_names': [ 86 | 'FR_hip_joint', 87 | 'FR_thigh_joint', 88 | 'FR_calf_joint', 89 | 'FL_hip_joint', 90 | 'FL_thigh_joint', 91 | 'FL_calf_joint', 92 | 'RR_hip_joint', 93 | 'RR_thigh_joint', 94 | 'RR_calf_joint', 95 | 'RL_hip_joint', 96 | 'RL_thigh_joint', 97 | 'RL_calf_joint', 98 | ], 99 | 'termination_contact_link_names': ['base'], 100 | 'penalized_contact_link_names': ['base', 'thigh', 'calf'], 101 | 'feet_link_names': ['foot'], 102 | 'base_link_name': ['base'], 103 | # PD 104 | 'PD_stiffness': {'joint': 70.0}, 105 | 'PD_damping': {'joint': 3.0}, 106 | 'use_implicit_controller': False, 107 | # termination 108 | 'termination_if_roll_greater_than': 0.4, 109 | 'termination_if_pitch_greater_than': 0.4, 110 | 'termination_if_height_lower_than': 0.2, 111 | # base pose 112 | 'base_init_pos': [0.0, 0.0, 0.36], 113 | 'base_init_quat': [1.0, 0.0, 0.0, 0.0], 114 | # random push 115 | 'push_interval_s': -1, 116 | 'max_push_vel_xy': 1.0, 117 | # time (second) 118 | 'episode_length_s': 2.0, 119 | 'resampling_time_s': 4.0, 120 | 'command_type': 'ang_vel_yaw', # 'ang_vel_yaw' or 'heading' 121 | 'action_scale': 0.5, 122 | 'action_latency': 0.02, 123 | 'clip_actions': 100.0, 124 | 'send_timeouts': True, 125 | 'control_freq': 50, 126 | 'decimation': 4, 127 | 'feet_geom_offset': 1, 128 | 'use_terrain': False, 129 | # domain randomization 130 | 'randomize_friction': True, 131 | 'friction_range': [0.2, 1.5], 132 | 'randomize_base_mass': True, 133 | 'added_mass_range': [-1., 3.], 134 | 'randomize_com_displacement': True, 135 | 'com_displacement_range': [-0.01, 0.01], 136 | 'randomize_motor_strength': False, 137 | 'motor_strength_range': [0.9, 1.1], 138 | 'randomize_motor_offset': True, 139 | 'motor_offset_range': [-0.02, 0.02], 140 | 'randomize_kp_scale': True, 141 | 'kp_scale_range': [0.8, 1.2], 142 | 'randomize_kd_scale': True, 143 | 'kd_scale_range': [0.8, 1.2], 144 | # coupling 145 | 'coupling': False, 146 | } 147 | obs_cfg = { 148 | 'num_obs': 60, 149 | 'num_history_obs': 1, 150 | 'obs_noise': { 151 | 'ang_vel': 0.1, 152 | 'gravity': 0.02, 153 | 'dof_pos': 0.01, 154 | 'dof_vel': 0.5, 155 | }, 156 | 'obs_scales': { 157 | 'lin_vel': 2.0, 158 | 'ang_vel': 0.25, 159 | 'dof_pos': 1.0, 160 | 'dof_vel': 0.05, 161 | }, 162 | 'num_priv_obs': 64, 163 | } 164 | reward_cfg = { 165 | 'soft_dof_pos_limit': 0.9, 166 | 'reward_scales': { 167 | 'ang_vel_y': 5.0, 168 | 'ang_vel_z': -1.0, 169 | 'lin_vel_z': 20.0, 170 | 'orientation_control': -1.0, 171 | 'feet_height_before_backflip': -30.0, 172 | 'height_control': -10.0, 173 | 'actions_symmetry': -0.1, 174 | 'gravity_y': -10.0, 175 | 'feet_distance': -1.0, 176 | 'action_rate': -0.001, 177 | }, 178 | } 179 | command_cfg = { 180 | 'num_commands': 4, 181 | 'lin_vel_x_range': [-0.0, 0.0], 182 | 'lin_vel_y_range': [-0.0, 0.0], 183 | 'ang_vel_range': [-0.0, 0.0], 184 | } 185 | 186 | return env_cfg, obs_cfg, reward_cfg, command_cfg 187 | 188 | 189 | def main(): 190 | parser = argparse.ArgumentParser() 191 | parser.add_argument('-e', '--exp_name', type=str, default='backflip') 192 | parser.add_argument('-v', '--vis', action='store_true', default=False) 193 | parser.add_argument('-c', '--cpu', action='store_true', default=False) 194 | parser.add_argument('-B', '--num_envs', type=int, default=10000) 195 | parser.add_argument('--max_iterations', type=int, default=1000) 196 | parser.add_argument('--resume', type=str, default=None) 197 | parser.add_argument('-o', '--offline', action='store_true', default=False) 198 | 199 | parser.add_argument('--eval', action='store_true', default=False) 200 | parser.add_argument('--debug', action='store_true', default=False) 201 | parser.add_argument('--ckpt', type=int, default=1000) 202 | args = parser.parse_args() 203 | 204 | if args.debug: 205 | args.vis = True 206 | args.offline = True 207 | args.num_envs = 1 208 | 209 | gs.init( 210 | backend=gs.cpu if args.cpu else gs.gpu, 211 | logging_level='warning', 212 | ) 213 | 214 | log_dir = f'logs/{args.exp_name}' 215 | env_cfg, obs_cfg, reward_cfg, command_cfg = get_cfgs() 216 | 217 | if os.path.exists(log_dir): 218 | shutil.rmtree(log_dir) 219 | os.makedirs(log_dir, exist_ok=True) 220 | 221 | env = Backflip( 222 | num_envs=args.num_envs, 223 | env_cfg=env_cfg, 224 | obs_cfg=obs_cfg, 225 | reward_cfg=reward_cfg, 226 | command_cfg=command_cfg, 227 | show_viewer=args.vis, 228 | eval=args.eval, 229 | debug=args.debug, 230 | ) 231 | 232 | runner = OnPolicyRunner(env, get_train_cfg(args), log_dir, device='cuda:0') 233 | 234 | if args.resume is not None: 235 | resume_dir = f'logs/{args.resume}' 236 | resume_path = os.path.join(resume_dir, f'model_{args.ckpt}.pt') 237 | print('==> resume training from', resume_path) 238 | runner.load(resume_path) 239 | 240 | wandb.init(project='genesis', name=args.exp_name, dir=log_dir, mode='offline' if args.offline else 'online') 241 | 242 | pickle.dump( 243 | [env_cfg, obs_cfg, reward_cfg, command_cfg], 244 | open(f'{log_dir}/cfgs.pkl', 'wb'), 245 | ) 246 | 247 | runner.learn(num_learning_iterations=args.max_iterations, init_at_random_ep_len=True) 248 | 249 | 250 | if __name__ == '__main__': 251 | main() 252 | 253 | 254 | ''' 255 | # training 256 | python train_backflip.py -e EXP_NAME 257 | 258 | # evaluation 259 | python eval_backflip.py -e EXP_NAME --ckpt NUM_CKPT 260 | ''' -------------------------------------------------------------------------------- /train_walk.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import copy 3 | import os 4 | import pickle 5 | import shutil 6 | 7 | import numpy as np 8 | import torch 9 | import wandb 10 | from reward_wrapper import Go2 11 | from locomotion_env import LocoEnv 12 | from rsl_rl.runners import OnPolicyRunner 13 | 14 | import genesis as gs 15 | 16 | 17 | def get_train_cfg(args): 18 | 19 | train_cfg_dict = { 20 | 'algorithm': { 21 | 'clip_param': 0.2, 22 | 'desired_kl': 0.01, 23 | 'entropy_coef': 0.01, 24 | 'gamma': 0.99, 25 | 'lam': 0.95, 26 | 'learning_rate': 0.001, 27 | 'max_grad_norm': 1.0, 28 | 'num_learning_epochs': 5, 29 | 'num_mini_batches': 4, 30 | 'schedule': 'adaptive', 31 | 'use_clipped_value_loss': True, 32 | 'value_loss_coef': 1.0, 33 | }, 34 | 'init_member_classes': {}, 35 | 'policy': { 36 | 'activation': 'elu', 37 | 'actor_hidden_dims': [512, 256, 128], 38 | 'critic_hidden_dims': [512, 256, 128], 39 | 'init_noise_std': 1.0, 40 | }, 41 | 'runner': { 42 | 'algorithm_class_name': 'PPO', 43 | 'checkpoint': -1, 44 | 'experiment_name': args.exp_name, 45 | 'load_run': -1, 46 | 'log_interval': 1, 47 | 'max_iterations': args.max_iterations, 48 | 'num_steps_per_env': 24, 49 | 'policy_class_name': 'ActorCritic', 50 | 'record_interval': 50, 51 | 'resume': False, 52 | 'resume_path': None, 53 | 'run_name': '', 54 | 'runner_class_name': 'runner_class_name', 55 | 'save_interval': 100, 56 | }, 57 | 'runner_class_name': 'OnPolicyRunner', 58 | 'seed': 1, 59 | } 60 | 61 | return train_cfg_dict 62 | 63 | 64 | def get_cfgs(): 65 | env_cfg = { 66 | 'urdf_path': 'urdf/go2/urdf/go2.urdf', 67 | 'links_to_keep': ['FL_foot', 'FR_foot', 'RL_foot', 'RR_foot',], 68 | 'num_actions': 12, 69 | 'num_dofs': 12, 70 | # joint/link names 71 | 'default_joint_angles': { # [rad] 72 | 'FL_hip_joint': 0.0, 73 | 'FR_hip_joint': 0.0, 74 | 'RL_hip_joint': 0.0, 75 | 'RR_hip_joint': 0.0, 76 | 'FL_thigh_joint': 0.8, 77 | 'FR_thigh_joint': 0.8, 78 | 'RL_thigh_joint': 1.0, 79 | 'RR_thigh_joint': 1.0, 80 | 'FL_calf_joint': -1.5, 81 | 'FR_calf_joint': -1.5, 82 | 'RL_calf_joint': -1.5, 83 | 'RR_calf_joint': -1.5, 84 | }, 85 | 'dof_names': [ 86 | 'FR_hip_joint', 87 | 'FR_thigh_joint', 88 | 'FR_calf_joint', 89 | 'FL_hip_joint', 90 | 'FL_thigh_joint', 91 | 'FL_calf_joint', 92 | 'RR_hip_joint', 93 | 'RR_thigh_joint', 94 | 'RR_calf_joint', 95 | 'RL_hip_joint', 96 | 'RL_thigh_joint', 97 | 'RL_calf_joint', 98 | ], 99 | 'termination_contact_link_names': ['base'], 100 | 'penalized_contact_link_names': ['base', 'thigh', 'calf'], 101 | 'feet_link_names': ['foot'], 102 | 'base_link_name': ['base'], 103 | # PD 104 | 'PD_stiffness': {'joint': 30.0}, 105 | 'PD_damping': {'joint': 1.5}, 106 | 'use_implicit_controller': False, 107 | # termination 108 | 'termination_if_roll_greater_than': 0.4, 109 | 'termination_if_pitch_greater_than': 0.4, 110 | 'termination_if_height_lower_than': 0.0, 111 | # base pose 112 | 'base_init_pos': [0.0, 0.0, 0.42], 113 | 'base_init_quat': [1.0, 0.0, 0.0, 0.0], 114 | # random push 115 | 'push_interval_s': -1, 116 | 'max_push_vel_xy': 1.0, 117 | # time (second) 118 | 'episode_length_s': 20.0, 119 | 'resampling_time_s': 4.0, 120 | 'command_type': 'ang_vel_yaw', # 'ang_vel_yaw' or 'heading' 121 | 'action_scale': 0.25, 122 | 'action_latency': 0.02, 123 | 'clip_actions': 100.0, 124 | 'send_timeouts': True, 125 | 'control_freq': 50, 126 | 'decimation': 4, 127 | 'feet_geom_offset': 1, 128 | 'use_terrain': False, 129 | # domain randomization 130 | 'randomize_friction': True, 131 | 'friction_range': [0.2, 1.5], 132 | 'randomize_base_mass': True, 133 | 'added_mass_range': [-1., 3.], 134 | 'randomize_com_displacement': True, 135 | 'com_displacement_range': [-0.01, 0.01], 136 | 'randomize_motor_strength': False, 137 | 'motor_strength_range': [0.9, 1.1], 138 | 'randomize_motor_offset': True, 139 | 'motor_offset_range': [-0.02, 0.02], 140 | 'randomize_kp_scale': True, 141 | 'kp_scale_range': [0.8, 1.2], 142 | 'randomize_kd_scale': True, 143 | 'kd_scale_range': [0.8, 1.2], 144 | # coupling 145 | 'coupling': False, 146 | } 147 | obs_cfg = { 148 | 'num_obs': 9 + 3 * env_cfg['num_dofs'], 149 | 'num_history_obs': 1, 150 | 'obs_noise': { 151 | 'ang_vel': 0.1, 152 | 'gravity': 0.02, 153 | 'dof_pos': 0.01, 154 | 'dof_vel': 0.5, 155 | }, 156 | 'obs_scales': { 157 | 'lin_vel': 2.0, 158 | 'ang_vel': 0.25, 159 | 'dof_pos': 1.0, 160 | 'dof_vel': 0.05, 161 | }, 162 | 'num_priv_obs': 12 + 4 * env_cfg['num_dofs'], 163 | } 164 | reward_cfg = { 165 | 'tracking_sigma': 0.25, 166 | 'soft_dof_pos_limit': 0.9, 167 | 'base_height_target': 0.3, 168 | 'reward_scales': { 169 | 'tracking_lin_vel': 1.0, 170 | 'tracking_ang_vel': 0.5, 171 | 'lin_vel_z': -2.0, 172 | 'ang_vel_xy': -0.05, 173 | 'orientation': -10., 174 | 'base_height': -50., 175 | 'torques': -0.0002, 176 | 'collision': -1., 177 | 'dof_vel': -0., 178 | 'dof_acc': -2.5e-7, 179 | 'feet_air_time': 1.0, 180 | 'collision': -1., 181 | 'action_rate': -0.01, 182 | }, 183 | } 184 | command_cfg = { 185 | 'num_commands': 4, 186 | 'lin_vel_x_range': [-1.0, 1.0], 187 | 'lin_vel_y_range': [-1.0, 1.0], 188 | 'ang_vel_range': [-1.0, 1.0], 189 | } 190 | 191 | return env_cfg, obs_cfg, reward_cfg, command_cfg 192 | 193 | 194 | def main(): 195 | parser = argparse.ArgumentParser() 196 | parser.add_argument('-e', '--exp_name', type=str, default='Go2') 197 | parser.add_argument('-v', '--vis', action='store_true', default=False) 198 | parser.add_argument('-c', '--cpu', action='store_true', default=False) 199 | parser.add_argument('-B', '--num_envs', type=int, default=10000) 200 | parser.add_argument('--max_iterations', type=int, default=1000) 201 | parser.add_argument('--resume', type=str, default=None) 202 | parser.add_argument('-o', '--offline', action='store_true', default=False) 203 | 204 | parser.add_argument('--eval', action='store_true', default=False) 205 | parser.add_argument('--debug', action='store_true', default=False) 206 | parser.add_argument('--ckpt', type=int, default=1000) 207 | args = parser.parse_args() 208 | 209 | if args.debug: 210 | args.vis = True 211 | args.offline = True 212 | args.num_envs = 1 213 | 214 | gs.init( 215 | backend=gs.cpu if args.cpu else gs.gpu, 216 | logging_level='warning', 217 | ) 218 | 219 | log_dir = f'logs/{args.exp_name}' 220 | env_cfg, obs_cfg, reward_cfg, command_cfg = get_cfgs() 221 | 222 | if os.path.exists(log_dir): 223 | shutil.rmtree(log_dir) 224 | os.makedirs(log_dir, exist_ok=True) 225 | 226 | env = Go2( 227 | num_envs=args.num_envs, 228 | env_cfg=env_cfg, 229 | obs_cfg=obs_cfg, 230 | reward_cfg=reward_cfg, 231 | command_cfg=command_cfg, 232 | show_viewer=args.vis, 233 | eval=args.eval, 234 | debug=args.debug, 235 | ) 236 | 237 | runner = OnPolicyRunner(env, get_train_cfg(args), log_dir, device='cuda:0') 238 | 239 | if args.resume is not None: 240 | resume_dir = f'logs/{args.resume}' 241 | resume_path = os.path.join(resume_dir, f'model_{args.ckpt}.pt') 242 | print('==> resume training from', resume_path) 243 | runner.load(resume_path) 244 | 245 | wandb.init(project='genesis', name=args.exp_name, dir=log_dir, mode='offline' if args.offline else 'online') 246 | 247 | pickle.dump( 248 | [env_cfg, obs_cfg, reward_cfg, command_cfg], 249 | open(f'{log_dir}/cfgs.pkl', 'wb'), 250 | ) 251 | 252 | runner.learn(num_learning_iterations=args.max_iterations, init_at_random_ep_len=True) 253 | 254 | 255 | if __name__ == '__main__': 256 | main() 257 | 258 | 259 | ''' 260 | # training 261 | python train_backflip.py -e EXP_NAME 262 | 263 | # evaluation 264 | python eval_backflip.py -e EXP_NAME --ckpt NUM_CKPT 265 | ''' -------------------------------------------------------------------------------- /rsl_rl/rsl_rl/algorithms/ppo.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import torch 32 | import torch.nn as nn 33 | import torch.optim as optim 34 | 35 | from rsl_rl.modules import ActorCritic 36 | from rsl_rl.storage import RolloutStorage 37 | 38 | class PPO: 39 | actor_critic: ActorCritic 40 | def __init__(self, 41 | actor_critic, 42 | num_learning_epochs=1, 43 | num_mini_batches=1, 44 | clip_param=0.2, 45 | gamma=0.998, 46 | lam=0.95, 47 | value_loss_coef=1.0, 48 | entropy_coef=0.0, 49 | learning_rate=1e-3, 50 | max_grad_norm=1.0, 51 | use_clipped_value_loss=True, 52 | schedule="fixed", 53 | desired_kl=0.01, 54 | device='cpu', 55 | ): 56 | 57 | self.device = device 58 | 59 | self.desired_kl = desired_kl 60 | self.kl_mean = 0 61 | self.schedule = schedule 62 | self.learning_rate = learning_rate 63 | 64 | # PPO components 65 | self.actor_critic = actor_critic 66 | self.actor_critic.to(self.device) 67 | self.storage = None # initialized later 68 | self.optimizer = optim.Adam(self.actor_critic.parameters(), lr=learning_rate) 69 | self.transition = RolloutStorage.Transition() 70 | 71 | # PPO parameters 72 | self.clip_param = clip_param 73 | self.num_learning_epochs = num_learning_epochs 74 | self.num_mini_batches = num_mini_batches 75 | self.value_loss_coef = value_loss_coef 76 | self.entropy_coef = entropy_coef 77 | self.gamma = gamma 78 | self.lam = lam 79 | self.max_grad_norm = max_grad_norm 80 | self.use_clipped_value_loss = use_clipped_value_loss 81 | 82 | def init_storage(self, num_envs, num_transitions_per_env, actor_obs_shape, critic_obs_shape, action_shape): 83 | self.storage = RolloutStorage(num_envs, num_transitions_per_env, actor_obs_shape, critic_obs_shape, action_shape, self.device) 84 | 85 | def test_mode(self): 86 | self.actor_critic.test() 87 | 88 | def train_mode(self): 89 | self.actor_critic.train() 90 | 91 | def act(self, obs, critic_obs): 92 | if self.actor_critic.is_recurrent: 93 | self.transition.hidden_states = self.actor_critic.get_hidden_states() 94 | # Compute the actions and values 95 | self.transition.actions = self.actor_critic.act(obs).detach() 96 | self.transition.values = self.actor_critic.evaluate(critic_obs).detach() 97 | self.transition.actions_log_prob = self.actor_critic.get_actions_log_prob(self.transition.actions).detach() 98 | self.transition.action_mean = self.actor_critic.action_mean.detach() 99 | self.transition.action_sigma = self.actor_critic.action_std.detach() 100 | # need to record obs and critic_obs before env.step() 101 | self.transition.observations = obs 102 | self.transition.critic_observations = critic_obs 103 | return self.transition.actions 104 | 105 | def process_env_step(self, rewards, dones, infos): 106 | self.transition.rewards = rewards.clone() 107 | self.transition.dones = dones 108 | # Bootstrapping on time outs 109 | if 'time_outs' in infos: 110 | self.transition.rewards += self.gamma * torch.squeeze(self.transition.values * infos['time_outs'].unsqueeze(1).to(self.device), 1) 111 | 112 | # Record the transition 113 | self.storage.add_transitions(self.transition) 114 | self.transition.clear() 115 | self.actor_critic.reset(dones) 116 | 117 | def compute_returns(self, last_critic_obs): 118 | last_values= self.actor_critic.evaluate(last_critic_obs).detach() 119 | self.storage.compute_returns(last_values, self.gamma, self.lam) 120 | 121 | def update(self): 122 | mean_value_loss = 0 123 | mean_surrogate_loss = 0 124 | if self.actor_critic.is_recurrent: 125 | generator = self.storage.reccurent_mini_batch_generator(self.num_mini_batches, self.num_learning_epochs) 126 | else: 127 | generator = self.storage.mini_batch_generator(self.num_mini_batches, self.num_learning_epochs) 128 | for obs_batch, critic_obs_batch, actions_batch, target_values_batch, advantages_batch, returns_batch, old_actions_log_prob_batch, \ 129 | old_mu_batch, old_sigma_batch, hid_states_batch, masks_batch in generator: 130 | 131 | 132 | self.actor_critic.act(obs_batch, masks=masks_batch, hidden_states=hid_states_batch[0]) 133 | actions_log_prob_batch = self.actor_critic.get_actions_log_prob(actions_batch) 134 | value_batch = self.actor_critic.evaluate(critic_obs_batch, masks=masks_batch, hidden_states=hid_states_batch[1]) 135 | mu_batch = self.actor_critic.action_mean 136 | sigma_batch = self.actor_critic.action_std 137 | entropy_batch = self.actor_critic.entropy 138 | 139 | with torch.inference_mode(): 140 | kl = torch.sum(torch.log(sigma_batch / old_sigma_batch + 1.e-5) 141 | + (torch.square(old_sigma_batch) + torch.square(old_mu_batch - mu_batch)) / (2.0 * torch.square(sigma_batch)) - 0.5, axis=-1) 142 | kl_mean = torch.mean(kl) 143 | self.kl_mean = kl_mean 144 | 145 | # KL 146 | if self.desired_kl != None and self.schedule == 'adaptive': 147 | 148 | if kl_mean > self.desired_kl * 2.0: 149 | self.learning_rate = max(1e-4, self.learning_rate / 1.5) 150 | elif kl_mean < self.desired_kl / 2.0 and kl_mean > 0.0: 151 | self.learning_rate = min(1e-2, self.learning_rate * 1.5) 152 | 153 | for param_group in self.optimizer.param_groups: 154 | param_group['lr'] = self.learning_rate 155 | 156 | 157 | # Surrogate loss 158 | ratio = torch.exp(actions_log_prob_batch - torch.squeeze(old_actions_log_prob_batch)) 159 | surrogate = -torch.squeeze(advantages_batch) * ratio 160 | surrogate_clipped = -torch.squeeze(advantages_batch) * torch.clamp(ratio, 1.0 - self.clip_param, 161 | 1.0 + self.clip_param) 162 | surrogate_loss = torch.max(surrogate, surrogate_clipped).mean() 163 | 164 | # Value function loss 165 | if self.use_clipped_value_loss: 166 | value_clipped = target_values_batch + (value_batch - target_values_batch).clamp(-self.clip_param, 167 | self.clip_param) 168 | value_losses = (value_batch - returns_batch).pow(2) 169 | value_losses_clipped = (value_clipped - returns_batch).pow(2) 170 | value_loss = torch.max(value_losses, value_losses_clipped).mean() 171 | else: 172 | value_loss = (returns_batch - value_batch).pow(2).mean() 173 | 174 | loss = surrogate_loss + self.value_loss_coef * value_loss - self.entropy_coef * entropy_batch.mean() 175 | 176 | # Gradient step 177 | self.optimizer.zero_grad() 178 | loss.backward() 179 | nn.utils.clip_grad_norm_(self.actor_critic.parameters(), self.max_grad_norm) 180 | self.optimizer.step() 181 | 182 | mean_value_loss += value_loss.item() 183 | mean_surrogate_loss += surrogate_loss.item() 184 | 185 | num_updates = self.num_learning_epochs * self.num_mini_batches 186 | mean_value_loss /= num_updates 187 | mean_surrogate_loss /= num_updates 188 | self.storage.clear() 189 | 190 | return mean_value_loss, mean_surrogate_loss 191 | -------------------------------------------------------------------------------- /reward_wrapper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | import genesis as gs 5 | from locomotion_env import * 6 | 7 | class Go2(LocoEnv): 8 | 9 | def _reward_tracking_lin_vel(self): 10 | # Tracking of linear velocity commands (xy axes) 11 | lin_vel_error = torch.sum( 12 | torch.square( 13 | self.commands[:, :2] - self.base_lin_vel[:, :2] 14 | ), 15 | dim=1, 16 | ) 17 | return torch.exp(-lin_vel_error / self.reward_cfg['tracking_sigma']) 18 | 19 | def _reward_tracking_ang_vel(self): 20 | # Tracking of angular velocity commands (yaw) 21 | ang_vel_error = torch.square( 22 | self.commands[:, 2] - self.base_ang_vel[:, 2] 23 | ) 24 | return torch.exp(-ang_vel_error / self.reward_cfg['tracking_sigma']) 25 | 26 | def _reward_lin_vel_z(self): 27 | # Penalize z axis base linear velocity 28 | return torch.square(self.base_lin_vel[:, 2]) 29 | 30 | def _reward_ang_vel_xy(self): 31 | # Penalize xy axes base angular velocity 32 | return torch.sum(torch.square(self.base_ang_vel[:, :2]), dim=1) 33 | 34 | def _reward_orientation(self): 35 | # Penalize non flat base orientation 36 | return torch.sum(torch.square(self.projected_gravity[:, :2]), dim=1) 37 | 38 | def _reward_torques(self): 39 | # Penalize torques 40 | return torch.sum(torch.square(self.torques), dim=1) 41 | 42 | def _reward_dof_vel(self): 43 | # Penalize dof velocities 44 | return torch.sum(torch.square(self.dof_vel), dim=1) 45 | 46 | def _reward_dof_acc(self): 47 | # Penalize dof accelerations 48 | return torch.sum( 49 | torch.square((self.last_dof_vel - self.dof_vel) / self.dt), dim=1 50 | ) 51 | 52 | def _reward_action_rate(self): 53 | # Penalize changes in actions 54 | return torch.sum(torch.square(self.last_actions - self.actions), dim=1) 55 | 56 | def _reward_base_height(self): 57 | # Penalize base height away from target 58 | base_height = self.base_pos[:, 2] 59 | base_height_target = self.reward_cfg['base_height_target'] 60 | return torch.square(base_height - base_height_target) 61 | 62 | def _reward_collision(self): 63 | # Penalize collisions on selected bodies 64 | return torch.sum( 65 | 1.0 66 | * ( 67 | torch.norm( 68 | self.link_contact_forces[:, self.penalized_contact_link_indices, :], 69 | dim=-1, 70 | ) 71 | > 0.1 72 | ), 73 | dim=1, 74 | ) 75 | 76 | def _reward_termination(self): 77 | # Terminal reward / penalty 78 | return self.reset_buf * ~self.time_out_buf 79 | 80 | def _reward_dof_pos_limits(self): 81 | # Penalize dof positions too close to the limit 82 | out_of_limits = -(self.dof_pos - self.dof_pos_limits[:, 0]).clip(max=0.0) # lower limit 83 | out_of_limits += (self.dof_pos - self.dof_pos_limits[:, 1]).clip(min=0.0) # upper limit 84 | return torch.sum(out_of_limits, dim=1) 85 | 86 | def _reward_feet_air_time(self): 87 | # Reward long steps 88 | contact = self.link_contact_forces[:, self.feet_link_indices, 2] > 1. 89 | contact_filt = torch.logical_or(contact, self.last_contacts) 90 | self.last_contacts = contact 91 | first_contact = (self.feet_air_time > 0.) * contact_filt 92 | self.feet_air_time += self.dt 93 | rew_airTime = torch.sum((self.feet_air_time - 0.5) * first_contact, dim=1) # reward only on first contact with the ground 94 | rew_airTime *= torch.norm(self.commands[:, :2], dim=1) > 0.1 #no reward for zero command 95 | self.feet_air_time *= ~contact_filt 96 | return rew_airTime 97 | 98 | class Backflip(Go2): 99 | 100 | def reset_idx(self, envs_idx): 101 | if len(envs_idx) == 0: 102 | return 103 | 104 | # reset dofs 105 | self.dof_pos[envs_idx] = self.default_dof_pos 106 | self.dof_vel[envs_idx] = 0.0 107 | self.robot.set_dofs_position( 108 | position=self.dof_pos[envs_idx], 109 | dofs_idx_local=self.motor_dofs, 110 | zero_velocity=True, 111 | envs_idx=envs_idx, 112 | ) 113 | 114 | # reset root states - position 115 | self.base_pos[envs_idx] = self.base_init_pos 116 | self.base_pos[envs_idx, 2] = 0.32 117 | self.base_quat[envs_idx] = self.base_init_quat.reshape(1, -1) 118 | self.robot.set_pos( 119 | self.base_pos[envs_idx], zero_velocity=False, envs_idx=envs_idx 120 | ) 121 | self.robot.set_quat( 122 | self.base_quat[envs_idx], zero_velocity=False, envs_idx=envs_idx 123 | ) 124 | self.robot.zero_all_dofs_velocity(envs_idx) 125 | 126 | # update projected gravity 127 | inv_base_quat = gs_inv_quat(self.base_quat) 128 | self.projected_gravity = gs_transform_by_quat( 129 | self.global_gravity, inv_base_quat 130 | ) 131 | 132 | # reset root states - velocity 133 | self.base_lin_vel[envs_idx] = 0 134 | self.base_ang_vel[envs_idx] = 0 135 | base_vel = torch.concat( 136 | [self.base_lin_vel[envs_idx], self.base_ang_vel[envs_idx]], dim=1 137 | ) 138 | self.robot.set_dofs_velocity( 139 | velocity=base_vel, dofs_idx_local=[0, 1, 2, 3, 4, 5], envs_idx=envs_idx 140 | ) 141 | 142 | self._resample_commands(envs_idx) 143 | 144 | # reset buffers 145 | self.obs_history_buf[envs_idx] = 0.0 146 | self.actions[envs_idx] = 0.0 147 | self.last_actions[envs_idx] = 0.0 148 | self.last_last_actions[envs_idx] = 0.0 149 | self.last_dof_vel[envs_idx] = 0.0 150 | self.feet_air_time[envs_idx] = 0.0 151 | self.feet_max_height[envs_idx] = 0.0 152 | self.episode_length_buf[envs_idx] = 0 153 | self.reset_buf[envs_idx] = 1 154 | 155 | # fill extras 156 | self.extras['episode'] = {} 157 | for key in self.episode_sums.keys(): 158 | self.extras['episode']['rew_' + key] = ( 159 | torch.mean(self.episode_sums[key][envs_idx]).item() 160 | / self.max_episode_length_s 161 | ) 162 | self.episode_sums[key][envs_idx] = 0.0 163 | # send timeout info to the algorithm 164 | if self.env_cfg['send_timeouts']: 165 | self.extras['time_outs'] = self.time_out_buf 166 | 167 | def compute_observations(self): 168 | 169 | phase = torch.pi * self.episode_length_buf[:, None] * self.dt / 2 170 | self.obs_buf = torch.cat( 171 | [ 172 | self.base_ang_vel * self.obs_scales['ang_vel'], # 3 173 | self.projected_gravity, # 3 174 | (self.dof_pos - self.default_dof_pos) * self.obs_scales['dof_pos'], # 10 175 | self.dof_vel * self.obs_scales['dof_vel'], # 10 176 | self.actions, # 10 177 | self.last_actions, # 10 178 | torch.sin(phase), 179 | torch.cos(phase), 180 | torch.sin(phase / 2), 181 | torch.cos(phase / 2), 182 | torch.sin(phase / 4), 183 | torch.cos(phase / 4), 184 | ], 185 | axis=-1, 186 | ) 187 | 188 | self.obs_history_buf = torch.cat( 189 | [self.obs_history_buf[:, self.num_single_obs:], self.obs_buf.detach()], dim=1 190 | ) 191 | 192 | if self.num_privileged_obs is not None: 193 | self.privileged_obs_buf = torch.cat( 194 | [ 195 | self.base_pos[:, 2:3], # 1 196 | self.base_lin_vel * self.obs_scales['lin_vel'], # 3 197 | self.base_ang_vel * self.obs_scales['ang_vel'], # 3 198 | self.projected_gravity, # 3 199 | (self.dof_pos - self.default_dof_pos) * self.obs_scales['dof_pos'], # 10 200 | self.dof_vel * self.obs_scales['dof_vel'], # 10 201 | self.actions, # 10 202 | self.last_actions, # 10 203 | torch.sin(phase), 204 | torch.cos(phase), 205 | torch.sin(phase / 2), 206 | torch.cos(phase / 2), 207 | torch.sin(phase / 4), 208 | torch.cos(phase / 4), 209 | ], 210 | axis=-1, 211 | ) 212 | 213 | def check_termination(self): 214 | self.reset_buf = ( 215 | self.episode_length_buf > self.max_episode_length 216 | ) 217 | 218 | def _reward_orientation_control(self): 219 | # Penalize non flat base orientation 220 | current_time = self.episode_length_buf * self.dt 221 | phase = (current_time - 0.5).clamp(min=0, max=0.5) 222 | quat_pitch = gs_quat_from_angle_axis(4 * phase * torch.pi, 223 | torch.tensor([0, 1, 0], device=self.device, dtype=torch.float)) 224 | 225 | desired_base_quat = gs_quat_mul(quat_pitch, self.base_init_quat.reshape(1, -1).repeat(self.num_envs, 1)) 226 | inv_desired_base_quat = gs_inv_quat(desired_base_quat) 227 | desired_projected_gravity = gs_transform_by_quat(self.global_gravity, inv_desired_base_quat) 228 | 229 | orientation_diff = torch.sum(torch.square(self.projected_gravity - desired_projected_gravity), dim=1) 230 | 231 | return orientation_diff 232 | 233 | def _reward_ang_vel_y(self): 234 | current_time = self.episode_length_buf * self.dt 235 | ang_vel = -self.base_ang_vel[:, 1].clamp(max=7.2, min=-7.2) 236 | return ang_vel * torch.logical_and(current_time > 0.5, current_time < 1.0) 237 | 238 | def _reward_ang_vel_z(self): 239 | return torch.abs(self.base_ang_vel[:, 2]) 240 | 241 | def _reward_lin_vel_z(self): 242 | current_time = self.episode_length_buf * self.dt 243 | lin_vel = self.robot.get_vel()[:, 2].clamp(max=3) 244 | return lin_vel * torch.logical_and(current_time > 0.5, current_time < 0.75) 245 | 246 | def _reward_height_control(self): 247 | # Penalize non flat base orientation 248 | current_time = self.episode_length_buf * self.dt 249 | target_height = 0.3 250 | height_diff = torch.square(target_height - self.base_pos[:, 2]) * torch.logical_or(current_time < 0.4, current_time > 1.4) 251 | return height_diff 252 | 253 | def _reward_actions_symmetry(self): 254 | actions_diff = torch.square(self.actions[:, 0] + self.actions[:, 3]) 255 | actions_diff += torch.square(self.actions[:, 1:3] - self.actions[:, 4:6]).sum(dim=-1) 256 | actions_diff += torch.square(self.actions[:, 6] + self.actions[:, 9]) 257 | actions_diff += torch.square(self.actions[:, 7:9] - self.actions[:, 10:12]).sum(dim=-1) 258 | return actions_diff 259 | 260 | def _reward_gravity_y(self): 261 | return torch.square(self.projected_gravity[:, 1]) 262 | 263 | def _reward_feet_distance(self): 264 | current_time = self.episode_length_buf * self.dt 265 | cur_footsteps_translated = self.foot_positions - self.base_pos.unsqueeze(1) 266 | footsteps_in_body_frame = torch.zeros(self.num_envs, 4, 3, device=self.device) 267 | for i in range(4): 268 | footsteps_in_body_frame[:, i, :] = gs_quat_apply(gs_quat_conjugate(self.base_quat), 269 | cur_footsteps_translated[:, i, :]) 270 | 271 | stance_width = 0.3 * torch.zeros([self.num_envs, 1,], device=self.device) 272 | desired_ys = torch.cat([stance_width / 2, -stance_width / 2, stance_width / 2, -stance_width / 2], dim=1) 273 | stance_diff = torch.square(desired_ys - footsteps_in_body_frame[:, :, 1]).sum(dim=1) 274 | 275 | return stance_diff 276 | 277 | def _reward_feet_height_before_backflip(self): 278 | current_time = self.episode_length_buf * self.dt 279 | foot_height = (self.foot_positions[:, :, 2]).view(self.num_envs, -1) - 0.02 280 | return foot_height.clamp(min=0).sum(dim=1) * (current_time < 0.5) 281 | 282 | def _reward_collision(self): 283 | # Penalize collisions on selected bodies 284 | current_time = self.episode_length_buf * self.dt 285 | return (1.0 * (torch.norm(self.link_contact_forces[:, self.penalized_contact_link_indices, :], dim=-1) > 0.1)).sum(dim=1) 286 | -------------------------------------------------------------------------------- /rsl_rl/rsl_rl/storage/rollout_storage.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import torch 32 | import numpy as np 33 | 34 | from rsl_rl.utils import split_and_pad_trajectories 35 | 36 | class RolloutStorage: 37 | class Transition: 38 | def __init__(self): 39 | self.observations = None 40 | self.critic_observations = None 41 | self.actions = None 42 | self.rewards = None 43 | self.dones = None 44 | self.values = None 45 | self.actions_log_prob = None 46 | self.action_mean = None 47 | self.action_sigma = None 48 | self.hidden_states = None 49 | 50 | def clear(self): 51 | self.__init__() 52 | 53 | def __init__(self, num_envs, num_transitions_per_env, obs_shape, privileged_obs_shape, actions_shape, device='cpu'): 54 | 55 | self.device = device 56 | 57 | self.obs_shape = obs_shape 58 | self.privileged_obs_shape = privileged_obs_shape 59 | self.actions_shape = actions_shape 60 | 61 | # Core 62 | self.observations = torch.zeros(num_transitions_per_env, num_envs, *obs_shape, device=self.device) 63 | if privileged_obs_shape[0] is not None: 64 | self.privileged_observations = torch.zeros(num_transitions_per_env, num_envs, *privileged_obs_shape, device=self.device) 65 | else: 66 | self.privileged_observations = None 67 | self.rewards = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 68 | self.actions = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device) 69 | self.dones = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device).byte() 70 | 71 | # For PPO 72 | self.actions_log_prob = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 73 | self.values = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 74 | self.returns = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 75 | self.advantages = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 76 | self.mu = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device) 77 | self.sigma = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device) 78 | 79 | self.num_transitions_per_env = num_transitions_per_env 80 | self.num_envs = num_envs 81 | 82 | # rnn 83 | self.saved_hidden_states_a = None 84 | self.saved_hidden_states_c = None 85 | 86 | self.step = 0 87 | 88 | def add_transitions(self, transition: Transition): 89 | if self.step >= self.num_transitions_per_env: 90 | raise AssertionError("Rollout buffer overflow") 91 | self.observations[self.step].copy_(transition.observations) 92 | if self.privileged_observations is not None: self.privileged_observations[self.step].copy_(transition.critic_observations) 93 | self.actions[self.step].copy_(transition.actions) 94 | self.rewards[self.step].copy_(transition.rewards.view(-1, 1)) 95 | self.dones[self.step].copy_(transition.dones.view(-1, 1)) 96 | self.values[self.step].copy_(transition.values) 97 | self.actions_log_prob[self.step].copy_(transition.actions_log_prob.view(-1, 1)) 98 | self.mu[self.step].copy_(transition.action_mean) 99 | self.sigma[self.step].copy_(transition.action_sigma) 100 | self._save_hidden_states(transition.hidden_states) 101 | self.step += 1 102 | 103 | def _save_hidden_states(self, hidden_states): 104 | if hidden_states is None or hidden_states==(None, None): 105 | return 106 | # make a tuple out of GRU hidden state sto match the LSTM format 107 | hid_a = hidden_states[0] if isinstance(hidden_states[0], tuple) else (hidden_states[0],) 108 | hid_c = hidden_states[1] if isinstance(hidden_states[1], tuple) else (hidden_states[1],) 109 | 110 | # initialize if needed 111 | if self.saved_hidden_states_a is None: 112 | self.saved_hidden_states_a = [torch.zeros(self.observations.shape[0], *hid_a[i].shape, device=self.device) for i in range(len(hid_a))] 113 | self.saved_hidden_states_c = [torch.zeros(self.observations.shape[0], *hid_c[i].shape, device=self.device) for i in range(len(hid_c))] 114 | # copy the states 115 | for i in range(len(hid_a)): 116 | self.saved_hidden_states_a[i][self.step].copy_(hid_a[i]) 117 | self.saved_hidden_states_c[i][self.step].copy_(hid_c[i]) 118 | 119 | 120 | def clear(self): 121 | self.step = 0 122 | 123 | def compute_returns(self, last_values, gamma, lam): 124 | advantage = 0 125 | for step in reversed(range(self.num_transitions_per_env)): 126 | if step == self.num_transitions_per_env - 1: 127 | next_values = last_values 128 | else: 129 | next_values = self.values[step + 1] 130 | next_is_not_terminal = 1.0 - self.dones[step].float() 131 | delta = self.rewards[step] + next_is_not_terminal * gamma * next_values - self.values[step] 132 | advantage = delta + next_is_not_terminal * gamma * lam * advantage 133 | self.returns[step] = advantage + self.values[step] 134 | 135 | # Compute and normalize the advantages 136 | self.advantages = self.returns - self.values 137 | self.advantages = (self.advantages - self.advantages.mean()) / (self.advantages.std() + 1e-8) 138 | 139 | def get_statistics(self): 140 | done = self.dones 141 | done[-1] = 1 142 | flat_dones = done.permute(1, 0, 2).reshape(-1, 1) 143 | done_indices = torch.cat((flat_dones.new_tensor([-1], dtype=torch.int64), flat_dones.nonzero(as_tuple=False)[:, 0])) 144 | trajectory_lengths = (done_indices[1:] - done_indices[:-1]) 145 | return trajectory_lengths.float().mean(), self.rewards.mean() 146 | 147 | def mini_batch_generator(self, num_mini_batches, num_epochs=8): 148 | batch_size = self.num_envs * self.num_transitions_per_env 149 | mini_batch_size = batch_size // num_mini_batches 150 | indices = torch.randperm(num_mini_batches*mini_batch_size, requires_grad=False, device=self.device) 151 | 152 | observations = self.observations.flatten(0, 1) 153 | if self.privileged_observations is not None: 154 | critic_observations = self.privileged_observations.flatten(0, 1) 155 | else: 156 | critic_observations = observations 157 | 158 | actions = self.actions.flatten(0, 1) 159 | values = self.values.flatten(0, 1) 160 | returns = self.returns.flatten(0, 1) 161 | old_actions_log_prob = self.actions_log_prob.flatten(0, 1) 162 | advantages = self.advantages.flatten(0, 1) 163 | old_mu = self.mu.flatten(0, 1) 164 | old_sigma = self.sigma.flatten(0, 1) 165 | 166 | for epoch in range(num_epochs): 167 | for i in range(num_mini_batches): 168 | 169 | start = i*mini_batch_size 170 | end = (i+1)*mini_batch_size 171 | batch_idx = indices[start:end] 172 | 173 | obs_batch = observations[batch_idx] 174 | critic_observations_batch = critic_observations[batch_idx] 175 | actions_batch = actions[batch_idx] 176 | target_values_batch = values[batch_idx] 177 | returns_batch = returns[batch_idx] 178 | old_actions_log_prob_batch = old_actions_log_prob[batch_idx] 179 | advantages_batch = advantages[batch_idx] 180 | old_mu_batch = old_mu[batch_idx] 181 | old_sigma_batch = old_sigma[batch_idx] 182 | yield obs_batch, critic_observations_batch, actions_batch, target_values_batch, advantages_batch, returns_batch, \ 183 | old_actions_log_prob_batch, old_mu_batch, old_sigma_batch, (None, None), None 184 | 185 | # for RNNs only 186 | def reccurent_mini_batch_generator(self, num_mini_batches, num_epochs=8): 187 | 188 | padded_obs_trajectories, trajectory_masks = split_and_pad_trajectories(self.observations, self.dones) 189 | if self.privileged_observations is not None: 190 | padded_critic_obs_trajectories, _ = split_and_pad_trajectories(self.privileged_observations, self.dones) 191 | else: 192 | padded_critic_obs_trajectories = padded_obs_trajectories 193 | 194 | mini_batch_size = self.num_envs // num_mini_batches 195 | for ep in range(num_epochs): 196 | first_traj = 0 197 | for i in range(num_mini_batches): 198 | start = i*mini_batch_size 199 | stop = (i+1)*mini_batch_size 200 | 201 | dones = self.dones.squeeze(-1) 202 | last_was_done = torch.zeros_like(dones, dtype=torch.bool) 203 | last_was_done[1:] = dones[:-1] 204 | last_was_done[0] = True 205 | trajectories_batch_size = torch.sum(last_was_done[:, start:stop]) 206 | last_traj = first_traj + trajectories_batch_size 207 | 208 | masks_batch = trajectory_masks[:, first_traj:last_traj] 209 | obs_batch = padded_obs_trajectories[:, first_traj:last_traj] 210 | critic_obs_batch = padded_critic_obs_trajectories[:, first_traj:last_traj] 211 | 212 | actions_batch = self.actions[:, start:stop] 213 | old_mu_batch = self.mu[:, start:stop] 214 | old_sigma_batch = self.sigma[:, start:stop] 215 | returns_batch = self.returns[:, start:stop] 216 | advantages_batch = self.advantages[:, start:stop] 217 | values_batch = self.values[:, start:stop] 218 | old_actions_log_prob_batch = self.actions_log_prob[:, start:stop] 219 | 220 | # reshape to [num_envs, time, num layers, hidden dim] (original shape: [time, num_layers, num_envs, hidden_dim]) 221 | # then take only time steps after dones (flattens num envs and time dimensions), 222 | # take a batch of trajectories and finally reshape back to [num_layers, batch, hidden_dim] 223 | last_was_done = last_was_done.permute(1, 0) 224 | hid_a_batch = [ saved_hidden_states.permute(2, 0, 1, 3)[last_was_done][first_traj:last_traj].transpose(1, 0).contiguous() 225 | for saved_hidden_states in self.saved_hidden_states_a ] 226 | hid_c_batch = [ saved_hidden_states.permute(2, 0, 1, 3)[last_was_done][first_traj:last_traj].transpose(1, 0).contiguous() 227 | for saved_hidden_states in self.saved_hidden_states_c ] 228 | # remove the tuple for GRU 229 | hid_a_batch = hid_a_batch[0] if len(hid_a_batch)==1 else hid_a_batch 230 | hid_c_batch = hid_c_batch[0] if len(hid_c_batch)==1 else hid_a_batch 231 | 232 | yield obs_batch, critic_obs_batch, actions_batch, values_batch, advantages_batch, returns_batch, \ 233 | old_actions_log_prob_batch, old_mu_batch, old_sigma_batch, (hid_a_batch, hid_c_batch), masks_batch 234 | 235 | first_traj = last_traj 236 | -------------------------------------------------------------------------------- /rsl_rl/rsl_rl/runners/on_policy_runner.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin 30 | 31 | import time 32 | import os 33 | from collections import deque 34 | import statistics 35 | 36 | import torch 37 | import wandb 38 | import numpy as np 39 | 40 | from rsl_rl.algorithms import PPO 41 | from rsl_rl.modules import ActorCritic, ActorCriticRecurrent 42 | from rsl_rl.env import VecEnv 43 | 44 | 45 | class OnPolicyRunner: 46 | 47 | def __init__(self, 48 | env: VecEnv, 49 | train_cfg, 50 | log_dir=None, 51 | device='cpu'): 52 | 53 | self.cfg=train_cfg["runner"] 54 | self.alg_cfg = train_cfg["algorithm"] 55 | self.policy_cfg = train_cfg["policy"] 56 | self.device = device 57 | self.env = env 58 | if self.env.num_privileged_obs is not None: 59 | num_critic_obs = self.env.num_privileged_obs 60 | else: 61 | num_critic_obs = self.env.num_obs 62 | actor_critic_class = eval(self.cfg["policy_class_name"]) # ActorCritic 63 | actor_critic: ActorCritic = actor_critic_class( self.env.num_obs, 64 | num_critic_obs, 65 | self.env.num_actions, 66 | **self.policy_cfg).to(self.device) 67 | alg_class = eval(self.cfg["algorithm_class_name"]) # PPO 68 | self.alg: PPO = alg_class(actor_critic, device=self.device, **self.alg_cfg) 69 | self.num_steps_per_env = self.cfg["num_steps_per_env"] 70 | if "log_interval" in self.cfg.keys(): 71 | self.log_interval = self.cfg["log_interval"] 72 | else: 73 | self.log_interval = -1 74 | self.save_interval = self.cfg["save_interval"] 75 | if "record_interval" in self.cfg.keys(): 76 | self.record_interval = self.cfg["record_interval"] 77 | else: 78 | self.record_interval = -1 79 | if self.record_interval > 0: 80 | self.record_video = True 81 | else: 82 | self.record_video = False 83 | 84 | # init storage and model 85 | self.alg.init_storage(self.env.num_envs, self.num_steps_per_env, [self.env.num_obs], [self.env.num_privileged_obs], [self.env.num_actions]) 86 | 87 | # Log 88 | self.log_dir = log_dir 89 | self.tot_timesteps = 0 90 | self.tot_time = 0 91 | self.current_learning_iteration = 0 92 | 93 | _, _ = self.env.reset() 94 | 95 | def learn(self, num_learning_iterations, init_at_random_ep_len=False): 96 | if init_at_random_ep_len: 97 | self.env.episode_length_buf = torch.randint_like(self.env.episode_length_buf, high=int(self.env.max_episode_length)) 98 | obs = self.env.get_observations() 99 | privileged_obs = self.env.get_privileged_observations() 100 | critic_obs = privileged_obs if privileged_obs is not None else obs 101 | obs, critic_obs = obs.to(self.device), critic_obs.to(self.device) 102 | self.alg.actor_critic.train() # switch to train mode (for dropout for example) 103 | 104 | ep_infos = [] 105 | rewbuffer = deque(maxlen=100) 106 | lenbuffer = deque(maxlen=100) 107 | cur_reward_sum = torch.zeros(self.env.num_envs, dtype=torch.float, device=self.device) 108 | cur_episode_length = torch.zeros(self.env.num_envs, dtype=torch.float, device=self.device) 109 | 110 | tot_iter = self.current_learning_iteration + num_learning_iterations 111 | for it in range(self.current_learning_iteration, tot_iter): 112 | start = time.time() 113 | # Rollout 114 | with torch.inference_mode(): 115 | for i in range(self.num_steps_per_env): 116 | actions = self.alg.act(obs, critic_obs) 117 | obs, privileged_obs, rewards, dones, infos = self.env.step(actions) 118 | critic_obs = privileged_obs if privileged_obs is not None else obs 119 | obs, critic_obs, rewards, dones = obs.to(self.device), critic_obs.to(self.device), rewards.to(self.device), dones.to(self.device) 120 | self.alg.process_env_step(rewards, dones, infos) 121 | 122 | if self.log_dir is not None: 123 | # Book keeping 124 | if 'episode' in infos: 125 | ep_infos.append(infos['episode']) 126 | cur_reward_sum += rewards 127 | cur_episode_length += 1 128 | new_ids = (dones > 0).nonzero(as_tuple=False) 129 | rewbuffer.extend(cur_reward_sum[new_ids][:, 0].cpu().numpy().tolist()) 130 | lenbuffer.extend(cur_episode_length[new_ids][:, 0].cpu().numpy().tolist()) 131 | cur_reward_sum[new_ids] = 0 132 | cur_episode_length[new_ids] = 0 133 | 134 | if len(lenbuffer) > 0: 135 | mean_episode_length = statistics.mean(lenbuffer) 136 | else: 137 | mean_episode_length = 0 138 | 139 | stop = time.time() 140 | collection_time = stop - start 141 | 142 | # Learning step 143 | start = stop 144 | self.alg.compute_returns(critic_obs) 145 | 146 | mean_value_loss, mean_surrogate_loss = self.alg.update() 147 | stop = time.time() 148 | learn_time = stop - start 149 | 150 | self.tot_timesteps += self.num_steps_per_env * self.env.num_envs 151 | self.tot_time += collection_time + learn_time 152 | 153 | if self.log_dir is not None and it % self.log_interval == 0: 154 | self.log(locals()) 155 | if self.record_video: 156 | self.log_video(it) 157 | if it < 2500: 158 | if it % self.save_interval == 0: 159 | self.save(os.path.join(self.log_dir, 'model_{}.pt'.format(it))) 160 | if it % self.record_interval == 0 and self.record_interval > 0: 161 | self.start_recording() 162 | elif it < 5000: 163 | if it % (2*self.save_interval) == 0: 164 | self.save(os.path.join(self.log_dir, 'model_{}.pt'.format(it))) 165 | if it % (2*self.record_interval) == 0 and self.record_interval > 0: 166 | self.start_recording() 167 | else: 168 | if it % (5*self.save_interval) == 0: 169 | self.save(os.path.join(self.log_dir, 'model_{}.pt'.format(it))) 170 | if it % (3*self.record_interval) == 0 and self.record_interval > 0: 171 | self.start_recording() 172 | ep_infos.clear() 173 | 174 | self.current_learning_iteration += num_learning_iterations 175 | self.save(os.path.join(self.log_dir, 'model_{}.pt'.format(self.current_learning_iteration))) 176 | 177 | def log(self, locs, width=80, pad=35): 178 | iteration_time = locs['collection_time'] + locs['learn_time'] 179 | 180 | ep_string = f'' 181 | wandb_dict = {} 182 | 183 | if len(locs['rewbuffer']) > 0: 184 | mean_episode_length = statistics.mean(locs['lenbuffer']) 185 | else: 186 | mean_episode_length = 1 187 | 188 | if locs['ep_infos']: 189 | for key in locs['ep_infos'][0]: 190 | infotensor = torch.tensor([], device=self.device) 191 | for ep_info in locs['ep_infos']: 192 | # handle scalar and zero dimensional tensor infos 193 | if not isinstance(ep_info[key], torch.Tensor): 194 | ep_info[key] = torch.Tensor([ep_info[key]]) 195 | if len(ep_info[key].shape) == 0: 196 | ep_info[key] = ep_info[key].unsqueeze(0) 197 | infotensor = torch.cat((infotensor, ep_info[key].to(self.device))) 198 | value = torch.mean(infotensor) 199 | if "tracking" in key: 200 | wandb_dict['Tracking/' + key] = value 201 | elif key[:4] == "rew_": 202 | wandb_dict['Reward/' + key[4:]] = value 203 | elif "command" in key: 204 | continue 205 | wandb_dict['Command/' + key] = value 206 | else: 207 | wandb_dict['Episode/' + key] = value 208 | # wandb_dict['Step/' + key] = value / mean_episode_length 209 | # ep_string += f"""{f'Mean episode {key}:':>{pad}} {value:.4f}\n""" 210 | mean_std = self.alg.actor_critic.std.mean() 211 | fps = int(self.num_steps_per_env * self.env.num_envs / (locs['collection_time'] + locs['learn_time'])) 212 | 213 | wandb_dict['Loss/value_func'] = locs['mean_value_loss'] 214 | wandb_dict['Loss/surrogate'] = locs['mean_surrogate_loss'] 215 | # wandb_dict['Loss/entropy_coef'] = locs['entropy_coef'] 216 | wandb_dict['Loss/learning_rate'] = self.alg.learning_rate 217 | wandb_dict['Loss/kl_mean'] = self.alg.kl_mean 218 | wandb_dict['Loss/mean_noise_std'] = mean_std.item() 219 | 220 | wandb_dict['Perf/total_fps'] = fps 221 | wandb_dict['Perf/collection time'] = locs['collection_time'] 222 | wandb_dict['Perf/learning_time'] = locs['learn_time'] 223 | 224 | if len(locs['rewbuffer']) > 0: 225 | wandb_dict['Train/mean_reward'] = statistics.mean(locs['rewbuffer']) 226 | wandb_dict['Train/mean_episode_length'] = statistics.mean(locs['lenbuffer']) 227 | 228 | wandb.log(wandb_dict, step=locs['it']) 229 | 230 | str = f" \033[1m Learning iteration {locs['it']}/{self.current_learning_iteration + locs['num_learning_iterations']} \033[0m " 231 | 232 | if len(locs['rewbuffer']) > 0: 233 | log_string = (f"""{'#' * width}\n""" 234 | f"""{str.center(width, ' ')}\n\n""" 235 | f"""{'Computation:':>{pad}} {fps:.0f} steps/s (collection: {locs[ 236 | 'collection_time']:.3f}s, learning {locs['learn_time']:.3f}s)\n""" 237 | f"""{'Value function loss:':>{pad}} {locs['mean_value_loss']:.4f}\n""" 238 | f"""{'Surrogate loss:':>{pad}} {locs['mean_surrogate_loss']:.4f}\n""" 239 | f"""{'Mean action noise std:':>{pad}} {mean_std.item():.2f}\n""" 240 | f"""{'Mean reward:':>{pad}} {statistics.mean(locs['rewbuffer']):.2f}\n""" 241 | f"""{'Mean episode length:':>{pad}} {statistics.mean(locs['lenbuffer']):.2f}\n""") 242 | # f"""{'Mean reward/step:':>{pad}} {locs['mean_reward']:.2f}\n""" 243 | # f"""{'Mean episode length/episode:':>{pad}} {locs['mean_trajectory_length']:.2f}\n""") 244 | else: 245 | log_string = (f"""{'#' * width}\n""" 246 | f"""{str.center(width, ' ')}\n\n""" 247 | f"""{'Computation:':>{pad}} {fps:.0f} steps/s (collection: {locs[ 248 | 'collection_time']:.3f}s, learning {locs['learn_time']:.3f}s)\n""" 249 | f"""{'Value function loss:':>{pad}} {locs['mean_value_loss']:.4f}\n""" 250 | f"""{'Surrogate loss:':>{pad}} {locs['mean_surrogate_loss']:.4f}\n""" 251 | f"""{'Mean action noise std:':>{pad}} {mean_std.item():.2f}\n""") 252 | # f"""{'Mean reward/step:':>{pad}} {locs['mean_reward']:.2f}\n""" 253 | # f"""{'Mean episode length/episode:':>{pad}} {locs['mean_trajectory_length']:.2f}\n""") 254 | 255 | log_string += ep_string 256 | log_string += (f"""{'-' * width}\n""" 257 | f"""{'Total timesteps:':>{pad}} {self.tot_timesteps}\n""" 258 | f"""{'Iteration time:':>{pad}} {iteration_time:.2f}s\n""" 259 | f"""{'Total time:':>{pad}} {self.tot_time:.2f}s\n""" 260 | f"""{'ETA:':>{pad}} {self.tot_time / (locs['it'] + 1) * ( 261 | locs['num_learning_iterations'] - locs['it']):.1f}s\n""") 262 | print(log_string) 263 | 264 | def save(self, path, infos=None): 265 | torch.save({ 266 | 'model_state_dict': self.alg.actor_critic.state_dict(), 267 | 'optimizer_state_dict': self.alg.optimizer.state_dict(), 268 | 'iter': self.current_learning_iteration, 269 | 'infos': infos, 270 | }, path) 271 | 272 | def load(self, path, load_optimizer=True): 273 | loaded_dict = torch.load(path) 274 | self.alg.actor_critic.load_state_dict(loaded_dict['model_state_dict']) 275 | if load_optimizer: 276 | self.alg.optimizer.load_state_dict(loaded_dict['optimizer_state_dict']) 277 | self.current_learning_iteration = loaded_dict['iter'] 278 | return loaded_dict['infos'] 279 | 280 | def get_inference_policy(self, device=None): 281 | self.alg.actor_critic.eval() # switch to evaluation mode (dropout for example) 282 | if device is not None: 283 | self.alg.actor_critic.to(device) 284 | return self.alg.actor_critic.act_inference 285 | 286 | def start_recording(self): 287 | self.env.start_recording() 288 | 289 | def log_video(self, it): 290 | frames = self.env.get_recorded_frames() 291 | if frames is None: 292 | return 293 | else: 294 | video_array = np.concatenate([np.expand_dims(frame, axis=0) for frame in frames ], axis=0).swapaxes(1, 3).swapaxes(2, 3) 295 | wandb.log({"video": wandb.Video(video_array, fps=int(1/self.env.dt))}, step=it) -------------------------------------------------------------------------------- /locomotion_env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | import genesis as gs 5 | from genesis.engine.solvers.rigid.rigid_solver_decomp import RigidSolver 6 | from utils import * 7 | 8 | class LocoEnv: 9 | def __init__( 10 | self, 11 | num_envs, 12 | env_cfg, 13 | obs_cfg, 14 | reward_cfg, 15 | command_cfg, 16 | show_viewer, 17 | eval, 18 | debug, 19 | device='cuda', 20 | ) -> None: 21 | self.num_envs = 1 if num_envs == 0 else num_envs 22 | self.num_build_envs = num_envs 23 | self.num_single_obs = obs_cfg['num_obs'] 24 | self.num_obs = self.num_single_obs * obs_cfg['num_history_obs'] 25 | self.num_privileged_obs = obs_cfg['num_priv_obs'] 26 | self.num_actions = env_cfg['num_actions'] 27 | self.num_commands = command_cfg['num_commands'] 28 | 29 | self.headless = not show_viewer 30 | self.eval = eval 31 | self.debug = debug 32 | 33 | self.dt = 1 / env_cfg['control_freq'] 34 | if env_cfg['use_implicit_controller']: 35 | sim_dt = self.dt 36 | sim_substeps = env_cfg['decimation'] 37 | else: 38 | sim_dt = self.dt / env_cfg['decimation'] 39 | sim_substeps = 1 40 | self.max_episode_length_s = env_cfg['episode_length_s'] 41 | self.max_episode_length = np.ceil(self.max_episode_length_s / self.dt) 42 | 43 | self.obs_cfg = obs_cfg 44 | self.obs_scales = obs_cfg['obs_scales'] 45 | self.reward_cfg = reward_cfg 46 | self.reward_scales = reward_cfg['reward_scales'] 47 | self.env_cfg = env_cfg 48 | self.command_cfg = command_cfg 49 | 50 | self.command_type = env_cfg['command_type'] 51 | assert self.command_type in ['heading', 'ang_vel_yaw'] 52 | 53 | self.action_latency = env_cfg['action_latency'] 54 | assert self.action_latency in [0, 0.02] 55 | 56 | self.num_dof = env_cfg['num_dofs'] 57 | if not torch.cuda.is_available(): 58 | self.device = torch.device('cpu') 59 | else: 60 | assert device in ['cpu', 'cuda'] 61 | self.device = torch.device(device) 62 | 63 | # create scene 64 | self.scene = gs.Scene( 65 | sim_options=gs.options.SimOptions( 66 | dt=sim_dt, 67 | substeps=sim_substeps, 68 | ), 69 | viewer_options=gs.options.ViewerOptions( 70 | max_FPS=int(1 / self.dt * self.env_cfg['decimation']), 71 | camera_pos=(2.0, 0.0, 2.5), 72 | camera_lookat=(0.0, 0.0, 0.5), 73 | camera_fov=40, 74 | ), 75 | vis_options=gs.options.VisOptions( 76 | n_rendered_envs=1, 77 | ), 78 | rigid_options=gs.options.RigidOptions( 79 | dt=sim_dt, 80 | constraint_solver=gs.constraint_solver.Newton, 81 | enable_collision=True, 82 | enable_self_collision=True, 83 | enable_joint_limit=True, 84 | ), 85 | show_viewer=show_viewer, 86 | show_FPS=False, 87 | ) 88 | 89 | for solver in self.scene.sim.solvers: 90 | if not isinstance(solver, RigidSolver): 91 | continue 92 | self.rigid_solver = solver 93 | 94 | # add entities 95 | if self.env_cfg['use_terrain']: 96 | self.terrain_cfg = self.env_cfg['terrain_cfg'] 97 | self.terrain = self.scene.add_entity( 98 | gs.morphs.Terrain( 99 | n_subterrains=self.terrain_cfg['n_subterrains'], 100 | horizontal_scale=self.terrain_cfg['horizontal_scale'], 101 | vertical_scale=self.terrain_cfg['vertical_scale'], 102 | subterrain_size=self.terrain_cfg['subterrain_size'], 103 | subterrain_types=self.terrain_cfg['subterrain_types'], 104 | ), 105 | ) 106 | terrain_margin_x = self.terrain_cfg['n_subterrains'][0] * self.terrain_cfg['subterrain_size'][0] 107 | terrain_margin_y = self.terrain_cfg['n_subterrains'][1] * self.terrain_cfg['subterrain_size'][1] 108 | self.terrain_margin = torch.tensor( 109 | [terrain_margin_x, terrain_margin_y], device=self.device, dtype=gs.tc_float 110 | ) 111 | height_field = self.terrain.geoms[0].metadata["height_field"] 112 | self.height_field = torch.tensor( 113 | height_field, device=self.device, dtype=gs.tc_float 114 | ) * self.terrain_cfg['vertical_scale'] 115 | else: 116 | self.scene.add_entity( 117 | gs.morphs.URDF(file='urdf/plane/plane.urdf', fixed=True), 118 | ) 119 | self.base_init_pos = torch.tensor( 120 | self.env_cfg['base_init_pos'], device=self.device 121 | ) 122 | self.base_init_quat = torch.tensor( 123 | self.env_cfg['base_init_quat'], device=self.device 124 | ) 125 | 126 | self.robot = self.scene.add_entity( 127 | gs.morphs.URDF( 128 | file=self.env_cfg['urdf_path'], 129 | merge_fixed_links=True, 130 | links_to_keep=self.env_cfg['links_to_keep'], 131 | pos=self.base_init_pos.cpu().numpy(), 132 | quat=self.base_init_quat.cpu().numpy(), 133 | ), 134 | visualize_contact=self.debug, 135 | ) 136 | 137 | if gs.platform != 'macOS': 138 | self._set_camera() 139 | 140 | # build 141 | self.scene.build(n_envs=num_envs) 142 | 143 | self._init_buffers() 144 | self._prepare_reward_function() 145 | 146 | # domain randomization 147 | self._randomize_controls() 148 | self._randomize_rigids() 149 | 150 | def _prepare_reward_function(self): 151 | # remove zero scales + multiply non-zero ones by dt 152 | for key in list(self.reward_scales.keys()): 153 | scale = self.reward_scales[key] 154 | if scale == 0: 155 | self.reward_scales.pop(key) 156 | else: 157 | self.reward_scales[key] *= self.dt 158 | 159 | # prepare list of functions 160 | self.reward_functions = [] 161 | self.reward_names = [] 162 | for name, scale in self.reward_scales.items(): 163 | if name == 'termination': 164 | continue 165 | self.reward_names.append(name) 166 | name = '_reward_' + name 167 | self.reward_functions.append(getattr(self, name)) 168 | 169 | # reward episode sums 170 | self.episode_sums = { 171 | name: torch.zeros((self.num_envs,), device=self.device, dtype=gs.tc_float) 172 | for name in self.reward_scales.keys() 173 | } 174 | 175 | def _init_buffers(self): 176 | self.base_euler = torch.zeros( 177 | (self.num_envs, 3), device=self.device, dtype=gs.tc_float 178 | ) 179 | self.base_lin_vel = torch.zeros( 180 | (self.num_envs, 3), device=self.device, dtype=gs.tc_float 181 | ) 182 | self.base_ang_vel = torch.zeros( 183 | (self.num_envs, 3), device=self.device, dtype=gs.tc_float 184 | ) 185 | self.projected_gravity = torch.zeros( 186 | (self.num_envs, 3), device=self.device, dtype=gs.tc_float 187 | ) 188 | self.global_gravity = torch.tensor( 189 | np.array([0.0, 0.0, -1.0]), device=self.device, dtype=gs.tc_float 190 | ) 191 | self.forward_vec = torch.zeros( 192 | (self.num_envs, 3), device=self.device, dtype=gs.tc_float 193 | ) 194 | self.forward_vec[:, 0] = 1.0 195 | 196 | self.obs_buf = torch.zeros( 197 | (self.num_envs, self.num_single_obs), device=self.device, dtype=gs.tc_float 198 | ) 199 | self.obs_history_buf = torch.zeros( 200 | (self.num_envs, self.num_obs), device=self.device, dtype=gs.tc_float 201 | ) 202 | self.obs_noise = torch.zeros( 203 | (self.num_envs, self.num_single_obs), device=self.device, dtype=gs.tc_float 204 | ) 205 | self._prepare_obs_noise() 206 | self.privileged_obs_buf = ( 207 | None 208 | if self.num_privileged_obs is None 209 | else torch.zeros( 210 | (self.num_envs, self.num_privileged_obs), 211 | device=self.device, 212 | dtype=gs.tc_float, 213 | ) 214 | ) 215 | self.rew_buf = torch.zeros( 216 | (self.num_envs,), device=self.device, dtype=gs.tc_float 217 | ) 218 | self.rew_buf_pos = torch.zeros( 219 | (self.num_envs,), device=self.device, dtype=gs.tc_float 220 | ) 221 | self.rew_buf_neg = torch.zeros( 222 | (self.num_envs,), device=self.device, dtype=gs.tc_float 223 | ) 224 | self.reset_buf = torch.ones( 225 | (self.num_envs,), device=self.device, dtype=gs.tc_int 226 | ) 227 | self.episode_length_buf = torch.zeros( 228 | (self.num_envs,), device=self.device, dtype=gs.tc_int 229 | ) 230 | self.time_out_buf = torch.zeros( 231 | (self.num_envs,), device=self.device, dtype=gs.tc_int 232 | ) 233 | 234 | # commands 235 | self.commands = torch.zeros( 236 | (self.num_envs, self.num_commands), device=self.device, dtype=gs.tc_float 237 | ) 238 | self.commands_scale = torch.tensor( 239 | [ 240 | self.obs_scales['lin_vel'], 241 | self.obs_scales['lin_vel'], 242 | self.obs_scales['ang_vel'], 243 | ], 244 | device=self.device, 245 | dtype=gs.tc_float, 246 | ) 247 | self.stand_still = torch.zeros( 248 | (self.num_envs,), device=self.device, dtype=gs.tc_int 249 | ) 250 | 251 | # names to indices 252 | self.motor_dofs = [ 253 | self.robot.get_joint(name).dof_idx_local 254 | for name in self.env_cfg['dof_names'] 255 | ] 256 | 257 | def find_link_indices(names): 258 | link_indices = list() 259 | for link in self.robot.links: 260 | flag = False 261 | for name in names: 262 | if name in link.name: 263 | flag = True 264 | if flag: 265 | link_indices.append(link.idx - self.robot.link_start) 266 | return link_indices 267 | 268 | self.termination_contact_link_indices = find_link_indices( 269 | self.env_cfg['termination_contact_link_names'] 270 | ) 271 | self.penalized_contact_link_indices = find_link_indices( 272 | self.env_cfg['penalized_contact_link_names'] 273 | ) 274 | self.feet_link_indices = find_link_indices( 275 | self.env_cfg['feet_link_names'] 276 | ) 277 | assert len(self.termination_contact_link_indices) > 0 278 | assert len(self.penalized_contact_link_indices) > 0 279 | assert len(self.feet_link_indices) > 0 280 | self.feet_link_indices_world_frame = [i+1 for i in self.feet_link_indices] 281 | 282 | # actions 283 | self.actions = torch.zeros( 284 | (self.num_envs, self.num_dof), device=self.device, dtype=gs.tc_float 285 | ) 286 | self.last_actions = torch.zeros( 287 | (self.num_envs, self.num_dof), device=self.device, dtype=gs.tc_float 288 | ) 289 | self.last_last_actions = torch.zeros( 290 | (self.num_envs, self.num_dof), device=self.device, dtype=gs.tc_float 291 | ) 292 | self.dof_pos = torch.zeros( 293 | (self.num_envs, self.num_dof), device=self.device, dtype=gs.tc_float 294 | ) 295 | self.dof_vel = torch.zeros( 296 | (self.num_envs, self.num_dof), device=self.device, dtype=gs.tc_float 297 | ) 298 | self.last_dof_vel = torch.zeros( 299 | (self.num_envs, self.num_dof), device=self.device, dtype=gs.tc_float 300 | ) 301 | self.root_vel = torch.zeros( 302 | (self.num_envs, 3), device=self.device, dtype=gs.tc_float 303 | ) 304 | self.last_root_vel = torch.zeros( 305 | (self.num_envs, 3), device=self.device, dtype=gs.tc_float 306 | ) 307 | self.base_pos = torch.zeros( 308 | (self.num_envs, 3), device=self.device, dtype=gs.tc_float 309 | ) 310 | self.base_quat = torch.zeros( 311 | (self.num_envs, 4), device=self.device, dtype=gs.tc_float 312 | ) 313 | self.link_contact_forces = torch.zeros( 314 | (self.num_envs, self.robot.n_links, 3), device=self.device, dtype=gs.tc_float 315 | ) 316 | 317 | self.feet_air_time = torch.zeros( 318 | (self.num_envs, len(self.feet_link_indices)), 319 | device=self.device, 320 | dtype=gs.tc_float, 321 | ) 322 | self.feet_max_height = torch.zeros( 323 | (self.num_envs, len(self.feet_link_indices)), 324 | device=self.device, 325 | dtype=gs.tc_float, 326 | ) 327 | 328 | self.last_contacts = torch.zeros( 329 | (self.num_envs, len(self.feet_link_indices)), 330 | device=self.device, 331 | dtype=gs.tc_int, 332 | ) 333 | 334 | # extras 335 | self.continuous_push = torch.zeros( 336 | (self.num_envs, 3), device=self.device, dtype=gs.tc_float 337 | ) 338 | self.env_identities = torch.arange( 339 | self.num_envs, 340 | device=self.device, 341 | dtype=gs.tc_int, 342 | ) 343 | self.common_step_counter = 0 344 | self.extras = {} 345 | 346 | self.terrain_heights = torch.zeros( 347 | (self.num_envs,), 348 | device=self.device, 349 | dtype=gs.tc_float, 350 | ) 351 | 352 | # PD control 353 | stiffness = self.env_cfg['PD_stiffness'] 354 | damping = self.env_cfg['PD_damping'] 355 | 356 | self.p_gains, self.d_gains = [], [] 357 | for dof_name in self.env_cfg['dof_names']: 358 | for key in stiffness.keys(): 359 | if key in dof_name: 360 | self.p_gains.append(stiffness[key]) 361 | self.d_gains.append(damping[key]) 362 | self.p_gains = torch.tensor(self.p_gains, device=self.device) 363 | self.d_gains = torch.tensor(self.d_gains, device=self.device) 364 | self.batched_p_gains = self.p_gains[None, :].repeat(self.num_envs, 1) 365 | self.batched_d_gains = self.d_gains[None, :].repeat(self.num_envs, 1) 366 | 367 | self.robot.set_dofs_kp(self.p_gains, self.motor_dofs) 368 | self.robot.set_dofs_kv(self.d_gains, self.motor_dofs) 369 | 370 | default_joint_angles = self.env_cfg['default_joint_angles'] 371 | self.default_dof_pos = torch.tensor( 372 | [default_joint_angles[name] for name in self.env_cfg['dof_names']], 373 | device=self.device, 374 | ) 375 | 376 | self.dof_pos_limits = torch.stack(self.robot.get_dofs_limit(self.motor_dofs), dim=1) 377 | self.torque_limits = self.robot.get_dofs_force_range(self.motor_dofs)[1] 378 | for i in range(self.dof_pos_limits.shape[0]): 379 | # soft limits 380 | m = (self.dof_pos_limits[i, 0] + self.dof_pos_limits[i, 1]) / 2 381 | r = self.dof_pos_limits[i, 1] - self.dof_pos_limits[i, 0] 382 | self.dof_pos_limits[i, 0] = ( 383 | m - 0.5 * r * self.reward_cfg['soft_dof_pos_limit'] 384 | ) 385 | self.dof_pos_limits[i, 1] = ( 386 | m + 0.5 * r * self.reward_cfg['soft_dof_pos_limit'] 387 | ) 388 | 389 | self.motor_strengths = gs.ones((self.num_envs, self.num_dof), dtype=float) 390 | self.motor_offsets = gs.zeros((self.num_envs, self.num_dof), dtype=float) 391 | 392 | # gait control 393 | self.foot_positions = torch.ones( 394 | self.num_envs, len(self.feet_link_indices), 3, device=self.device, dtype=gs.tc_float, 395 | ) 396 | self.foot_quaternions = torch.ones( 397 | self.num_envs, len(self.feet_link_indices), 4, device=self.device, dtype=gs.tc_float, 398 | ) 399 | self.foot_velocities = torch.ones( 400 | self.num_envs, len(self.feet_link_indices), 3, device=self.device, dtype=gs.tc_float, 401 | ) 402 | 403 | self.base_link_index = 1 404 | 405 | self.com = torch.zeros( 406 | self.num_envs, 3, device=self.device, dtype=gs.tc_float, 407 | ) 408 | 409 | def _update_buffers(self): 410 | 411 | # update buffers 412 | # [:] is for non-parallelized scene 413 | self.base_pos[:] = self.robot.get_pos() 414 | self.base_quat[:] = self.robot.get_quat() 415 | base_quat_rel = gs_quat_mul(self.base_quat, gs_inv_quat(self.base_init_quat.reshape(1, -1).repeat(self.num_envs, 1))) 416 | self.base_euler = gs_quat2euler(base_quat_rel) 417 | 418 | inv_quat_yaw = gs_quat_from_angle_axis(-self.base_euler[:, 2], 419 | torch.tensor([0, 0, 1], device=self.device, dtype=torch.float)) 420 | 421 | inv_base_quat = gs_inv_quat(self.base_quat) 422 | self.base_lin_vel[:] = gs_transform_by_quat(self.robot.get_vel(), inv_quat_yaw) 423 | self.base_ang_vel[:] = gs_transform_by_quat(self.robot.get_ang(), inv_base_quat) 424 | self.projected_gravity = gs_transform_by_quat( 425 | self.global_gravity, inv_base_quat 426 | ) 427 | 428 | self.dof_pos[:] = self.robot.get_dofs_position(self.motor_dofs) 429 | self.dof_vel[:] = self.robot.get_dofs_velocity(self.motor_dofs) 430 | self.link_contact_forces[:] = torch.tensor( 431 | self.robot.get_links_net_contact_force(), 432 | device=self.device, 433 | dtype=gs.tc_float, 434 | ) 435 | self.com[:] = self.rigid_solver.get_links_COM([self.base_link_index,]).squeeze(dim=1) 436 | 437 | self.foot_positions[:] = self.rigid_solver.get_links_pos(self.feet_link_indices_world_frame) 438 | self.foot_quaternions[:] = self.rigid_solver.get_links_quat(self.feet_link_indices_world_frame) 439 | self.foot_velocities[:] = self.rigid_solver.get_links_vel(self.feet_link_indices_world_frame) 440 | 441 | if self.env_cfg['use_terrain']: 442 | clipped_base_pos = self.base_pos[:, :2].clamp(min=torch.zeros(2, device=self.device), max=self.terrain_margin) 443 | height_field_ids = (clipped_base_pos / self.terrain_cfg['horizontal_scale'] - 0.5).floor().int() 444 | height_field_ids.clamp(min=0) 445 | # print(self.height_field[height_field_ids[:, 0], height_field_ids[:, 1]]) 446 | self.terrain_heights = self.height_field[height_field_ids[:, 0], height_field_ids[:, 1]] 447 | 448 | def _compute_torques(self, actions): 449 | # control_type = 'P' 450 | actions_scaled = actions * self.env_cfg['action_scale'] 451 | torques = ( 452 | self.batched_p_gains * (actions_scaled + self.default_dof_pos - self.dof_pos + self.motor_offsets) 453 | - self.batched_d_gains * self.dof_vel 454 | ) 455 | return torques * self.motor_strengths 456 | 457 | def _compute_target_dof_pos(self, actions): 458 | # control_type = 'P' 459 | actions_scaled = actions * self.env_cfg['action_scale'] 460 | target_dof_pos = actions_scaled + self.default_dof_pos 461 | 462 | return target_dof_pos 463 | 464 | def check_termination(self): 465 | self.reset_buf = torch.any( 466 | torch.norm( 467 | self.link_contact_forces[:, self.termination_contact_link_indices, :], 468 | dim=-1, 469 | ) 470 | > 1.0, 471 | dim=1, 472 | ) 473 | self.time_out_buf = ( 474 | self.episode_length_buf > self.max_episode_length 475 | ) # no terminal reward for time-outs 476 | self.reset_buf |= torch.logical_or( 477 | torch.abs(self.base_euler[:, 1]) 478 | > self.env_cfg['termination_if_pitch_greater_than'], 479 | torch.abs(self.base_euler[:, 0]) 480 | > self.env_cfg['termination_if_roll_greater_than'], 481 | ) 482 | if self.env_cfg['use_terrain']: 483 | self.reset_buf |= torch.logical_or( 484 | self.base_pos[:, 0] > self.terrain_margin[0], 485 | self.base_pos[:, 1] > self.terrain_margin[1], 486 | ) 487 | self.reset_buf |= torch.logical_or( 488 | self.base_pos[:, 0] < 1, 489 | self.base_pos[:, 1] < 1, 490 | ) 491 | self.reset_buf |= self.base_pos[:, 2] < self.env_cfg['termination_if_height_lower_than'] 492 | self.reset_buf |= self.time_out_buf 493 | 494 | def compute_reward(self): 495 | self.rew_buf[:] = 0. 496 | for i in range(len(self.reward_functions)): 497 | name = self.reward_names[i] 498 | rew = self.reward_functions[i]() * self.reward_scales[name] 499 | self.rew_buf += rew 500 | self.episode_sums[name] += rew 501 | # add termination reward after clipping 502 | if 'termination' in self.reward_scales: 503 | rew = self._reward_termination() * self.reward_scales['termination'] 504 | self.rew_buf += rew 505 | self.episode_sums['termination'] += rew 506 | 507 | def get_observations(self): 508 | return self.obs_history_buf 509 | 510 | def get_privileged_observations(self): 511 | return self.privileged_obs_buf 512 | 513 | def post_physics_step(self): 514 | self.episode_length_buf += 1 515 | self.common_step_counter += 1 516 | 517 | self._update_buffers() 518 | 519 | resampling_time_s = self.env_cfg['resampling_time_s'] 520 | envs_idx = ( 521 | (self.episode_length_buf % int(resampling_time_s / self.dt) == 0) 522 | .nonzero(as_tuple=False) 523 | .flatten() 524 | ) 525 | self._resample_commands(envs_idx) 526 | self._randomize_rigids(envs_idx) 527 | self._randomize_controls(envs_idx) 528 | if self.command_type == 'heading': 529 | forward = gs_transform_by_quat(self.forward_vec, self.base_quat) 530 | heading = torch.atan2(forward[:, 1], forward[:, 0]) 531 | self.commands[:, 2] = torch.clip( 532 | 0.5 * wrap_to_pi(self.commands[:, 3] - heading), -1.0, 1.0 533 | ) 534 | 535 | # random push 536 | push_interval_s = self.env_cfg['push_interval_s'] 537 | if push_interval_s > 0 and not (self.debug or self.eval): 538 | max_push_vel_xy = self.env_cfg['max_push_vel_xy'] 539 | dofs_vel = self.robot.get_dofs_velocity() # (num_envs, num_dof) [0:3] ~ base_link_vel 540 | push_vel = gs_rand_float(-max_push_vel_xy, max_push_vel_xy, (self.num_envs, 2), self.device) 541 | push_vel[((self.common_step_counter + self.env_identities) % int(push_interval_s / self.dt) != 0)] = 0 542 | dofs_vel[:, :2] += push_vel 543 | self.robot.set_dofs_velocity(dofs_vel) 544 | 545 | self.check_termination() 546 | self.compute_reward() 547 | 548 | envs_idx = self.reset_buf.nonzero(as_tuple=False).flatten() 549 | if self.num_build_envs > 0: 550 | self.reset_idx(envs_idx) 551 | # self.rigid_solver.forward_kinematics() # no need currently 552 | self.compute_observations() 553 | 554 | if gs.platform != 'macOS': 555 | self._render_headless() 556 | if not self.headless and self.debug: 557 | self._draw_debug_vis() 558 | 559 | self.last_actions[:] = self.actions[:] 560 | self.last_last_actions[:] = self.last_actions[:] 561 | self.last_dof_vel[:] = self.dof_vel[:] 562 | self.last_root_vel[:] = self.robot.get_vel() 563 | 564 | def compute_observations(self): 565 | self.obs_buf = torch.cat( 566 | [ 567 | self.base_ang_vel * self.obs_scales['ang_vel'], # 3 568 | self.projected_gravity, # 3 569 | self.commands[:, :3] * self.commands_scale, # 3 570 | (self.dof_pos - self.default_dof_pos) * self.obs_scales['dof_pos'], 571 | self.dof_vel * self.obs_scales['dof_vel'], 572 | self.actions, 573 | ], 574 | axis=-1, 575 | ) 576 | 577 | # add noise 578 | if not self.eval: 579 | self.obs_buf += gs_rand_float( 580 | -1.0, 1.0, (self.num_single_obs,), self.device 581 | ) * self.obs_noise 582 | 583 | clip_obs = 100.0 584 | self.obs_buf = torch.clip(self.obs_buf, -clip_obs, clip_obs) 585 | 586 | self.obs_history_buf = torch.cat( 587 | [self.obs_history_buf[:, self.num_single_obs:], self.obs_buf.detach()], dim=1 588 | ) 589 | 590 | if self.num_privileged_obs is not None: 591 | self.privileged_obs_buf = torch.cat( 592 | [ 593 | self.base_lin_vel * self.obs_scales['lin_vel'], # 3 594 | self.base_ang_vel * self.obs_scales['ang_vel'], # 3 595 | self.projected_gravity, # 3 596 | self.commands[:, :3] * self.commands_scale, # 3 597 | (self.dof_pos - self.default_dof_pos) * self.obs_scales['dof_pos'], 598 | self.dof_vel * self.obs_scales['dof_vel'], 599 | self.actions, 600 | self.last_actions, 601 | ], 602 | axis=-1, 603 | ) 604 | self.privileged_obs_buf = torch.clip(self.privileged_obs_buf, -clip_obs, clip_obs) 605 | 606 | def _prepare_obs_noise(self): 607 | self.obs_noise[:3] = self.obs_cfg['obs_noise']['ang_vel'] 608 | self.obs_noise[3:6] = self.obs_cfg['obs_noise']['gravity'] 609 | self.obs_noise[21:33] = self.obs_cfg['obs_noise']['dof_pos'] 610 | self.obs_noise[33:45] = self.obs_cfg['obs_noise']['dof_vel'] 611 | 612 | def _resample_commands(self, envs_idx): 613 | # resample commands 614 | 615 | # lin_vel 616 | self.commands[envs_idx, 0] = gs_rand_float( 617 | *self.command_cfg['lin_vel_x_range'], (len(envs_idx),), self.device 618 | ) 619 | self.commands[envs_idx, 1] = gs_rand_float( 620 | *self.command_cfg['lin_vel_y_range'], (len(envs_idx),), self.device 621 | ) 622 | self.commands[envs_idx, :2] *= ( 623 | torch.norm(self.commands[envs_idx, :2], dim=1) > 0.2 624 | ).unsqueeze(1) 625 | 626 | # ang_vel 627 | if self.command_type == 'heading': 628 | self.commands[envs_idx, 3] = gs_rand_float( 629 | -3.14, 3.14, (len(envs_idx),), self.device 630 | ) 631 | elif self.command_type == 'ang_vel_yaw': 632 | self.commands[envs_idx, 2] = gs_rand_float( 633 | *self.command_cfg['ang_vel_range'], (len(envs_idx),), self.device 634 | ) 635 | self.commands[envs_idx, 2] *= torch.abs(self.commands[envs_idx, 2]) > 0.2 636 | 637 | def reset_idx(self, envs_idx): 638 | if len(envs_idx) == 0: 639 | return 640 | 641 | # reset dofs 642 | self.dof_pos[envs_idx] = ( 643 | self.default_dof_pos 644 | ) + gs_rand_float(-0.3, 0.3, (len(envs_idx), self.num_dof), self.device) 645 | self.dof_vel[envs_idx] = 0.0 646 | self.robot.set_dofs_position( 647 | position=self.dof_pos[envs_idx], 648 | dofs_idx_local=self.motor_dofs, 649 | zero_velocity=True, 650 | envs_idx=envs_idx, 651 | ) 652 | 653 | # reset root states - position 654 | self.base_pos[envs_idx] = self.base_init_pos 655 | self.base_pos[envs_idx, :2] += gs_rand_float( 656 | -1.0, 1.0, (len(envs_idx), 2), self.device 657 | ) 658 | self.base_quat[envs_idx] = self.base_init_quat.reshape(1, -1) 659 | base_euler = gs_rand_float( 660 | -0.1, 0.1, (len(envs_idx), 3), self.device 661 | ) 662 | base_euler[:, 2] = gs_rand_float(0.0, 3.14, (len(envs_idx),), self.device) 663 | self.base_quat[envs_idx] = gs_quat_mul( 664 | gs_euler2quat(base_euler), 665 | self.base_quat[envs_idx], 666 | ) 667 | self.robot.set_pos( 668 | self.base_pos[envs_idx], zero_velocity=False, envs_idx=envs_idx 669 | ) 670 | self.robot.set_quat( 671 | self.base_quat[envs_idx], zero_velocity=False, envs_idx=envs_idx 672 | ) 673 | self.robot.zero_all_dofs_velocity(envs_idx) 674 | 675 | # update projected gravity 676 | inv_base_quat = gs_inv_quat(self.base_quat) 677 | self.projected_gravity = gs_transform_by_quat( 678 | self.global_gravity, inv_base_quat 679 | ) 680 | 681 | # reset root states - velocity 682 | self.base_lin_vel[envs_idx] = ( 683 | 0 # gs_rand_float(-0.5, 0.5, (len(envs_idx), 3), self.device) 684 | ) 685 | self.base_ang_vel[envs_idx] = ( 686 | 0.0 # gs_rand_float(-0.5, 0.5, (len(envs_idx), 3), self.device) 687 | ) 688 | base_vel = torch.concat( 689 | [self.base_lin_vel[envs_idx], self.base_ang_vel[envs_idx]], dim=1 690 | ) 691 | self.robot.set_dofs_velocity( 692 | velocity=base_vel, dofs_idx_local=[0, 1, 2, 3, 4, 5], envs_idx=envs_idx 693 | ) 694 | 695 | self._resample_commands(envs_idx) 696 | 697 | # reset buffers 698 | self.obs_history_buf[envs_idx] = 0.0 699 | self.actions[envs_idx] = 0.0 700 | self.last_actions[envs_idx] = 0.0 701 | self.last_last_actions[envs_idx] = 0.0 702 | self.last_dof_vel[envs_idx] = 0.0 703 | self.feet_air_time[envs_idx] = 0.0 704 | self.feet_max_height[envs_idx] = 0.0 705 | self.episode_length_buf[envs_idx] = 0 706 | self.reset_buf[envs_idx] = 1 707 | 708 | # fill extras 709 | self.extras['episode'] = {} 710 | for key in self.episode_sums.keys(): 711 | self.extras['episode']['rew_' + key] = ( 712 | torch.mean(self.episode_sums[key][envs_idx]).item() 713 | / self.max_episode_length_s 714 | ) 715 | self.episode_sums[key][envs_idx] = 0.0 716 | # send timeout info to the algorithm 717 | if self.env_cfg['send_timeouts']: 718 | self.extras['time_outs'] = self.time_out_buf 719 | 720 | def reset(self): 721 | self.reset_buf[:] = True 722 | self.reset_idx(torch.arange(self.num_envs, device=self.device)) 723 | return None, None 724 | 725 | def step(self, actions): 726 | clip_actions = self.env_cfg['clip_actions'] 727 | self.actions = torch.clip(actions, -clip_actions, clip_actions) 728 | exec_actions = self.last_actions if self.action_latency > 0 else self.actions 729 | dof_pos_list = [] 730 | dof_vel_list = [] 731 | if self.env_cfg['use_implicit_controller']: 732 | target_dof_pos = self._compute_target_dof_pos(exec_actions) 733 | self.robot.control_dofs_position(target_dof_pos, self.motor_dofs) 734 | self.scene.step() 735 | else: 736 | for i in range(self.env_cfg['decimation']): 737 | self.torques = self._compute_torques(exec_actions) 738 | if self.num_build_envs == 0: 739 | torques = self.torques.squeeze() 740 | self.robot.control_dofs_force(torques, self.motor_dofs) 741 | else: 742 | self.robot.control_dofs_force(self.torques, self.motor_dofs) 743 | self.scene.step() 744 | self.dof_pos[:] = self.robot.get_dofs_position(self.motor_dofs) 745 | self.dof_vel[:] = self.robot.get_dofs_velocity(self.motor_dofs) 746 | 747 | if i == 0 or i == 2: 748 | dof_pos_list.append(self.robot.get_dofs_position().detach().cpu()) 749 | dof_vel_list.append(self.robot.get_dofs_velocity().detach().cpu()) 750 | 751 | self.dof_pos_list = dof_pos_list 752 | self.dof_vel_list = dof_vel_list 753 | 754 | self.post_physics_step() 755 | 756 | return ( 757 | self.obs_history_buf, 758 | self.privileged_obs_buf, 759 | self.rew_buf, 760 | self.reset_buf, 761 | self.extras, 762 | ) 763 | 764 | # ------------ domain randomization---------------- 765 | 766 | def _randomize_rigids(self, env_ids=None): 767 | 768 | if self.eval: 769 | return 770 | 771 | if env_ids == None: 772 | env_ids = torch.arange(0, self.num_envs) 773 | elif len(env_ids) == 0: 774 | return 775 | 776 | if self.env_cfg['randomize_friction']: 777 | self._randomize_link_friction(env_ids) 778 | if self.env_cfg['randomize_base_mass']: 779 | self._randomize_base_mass(env_ids) 780 | if self.env_cfg['randomize_com_displacement']: 781 | self._randomize_com_displacement(env_ids) 782 | 783 | def _randomize_controls(self, env_ids=None): 784 | 785 | if self.eval: 786 | return 787 | 788 | if env_ids == None: 789 | env_ids = torch.arange(0, self.num_envs) 790 | elif len(env_ids) == 0: 791 | return 792 | 793 | if self.env_cfg['randomize_motor_strength']: 794 | self._randomize_motor_strength(env_ids) 795 | if self.env_cfg['randomize_motor_offset']: 796 | self._randomize_motor_offset(env_ids) 797 | if self.env_cfg['randomize_kp_scale']: 798 | self._randomize_kp(env_ids) 799 | if self.env_cfg['randomize_kd_scale']: 800 | self._randomize_kd(env_ids) 801 | 802 | def _randomize_link_friction(self, env_ids): 803 | 804 | min_friction, max_friction = self.env_cfg['friction_range'] 805 | 806 | solver = self.rigid_solver 807 | 808 | ratios = gs.rand((len(env_ids), 1), dtype=float).repeat(1, solver.n_geoms) \ 809 | * (max_friction - min_friction) + min_friction 810 | solver.set_geoms_friction_ratio(ratios, torch.arange(0, solver.n_geoms), env_ids) 811 | 812 | def _randomize_base_mass(self, env_ids): 813 | 814 | min_mass, max_mass = self.env_cfg['added_mass_range'] 815 | base_link_id = 1 816 | 817 | added_mass = gs.rand((len(env_ids), 1), dtype=float) \ 818 | * (max_mass - min_mass) + min_mass 819 | 820 | self.rigid_solver.set_links_mass_shift(added_mass, [base_link_id,], env_ids) 821 | 822 | def _randomize_com_displacement(self, env_ids): 823 | 824 | min_displacement, max_displacement = self.env_cfg['com_displacement_range'] 825 | base_link_id = 1 826 | 827 | com_displacement = gs.rand((len(env_ids), 1, 3), dtype=float) \ 828 | * (max_displacement - min_displacement) + min_displacement 829 | # com_displacement[:, :, 0] -= 0.02 830 | 831 | self.rigid_solver.set_links_COM_shift(com_displacement, [base_link_id,], env_ids) 832 | 833 | def _randomize_motor_strength(self, env_ids): 834 | 835 | min_strength, max_strength = self.env_cfg['motor_strength_range'] 836 | self.motor_strengths[env_ids, :] = gs.rand((len(env_ids), 1), dtype=float) \ 837 | * (max_strength - min_strength) + min_strength 838 | 839 | def _randomize_motor_offset(self, env_ids): 840 | 841 | min_offset, max_offset = self.env_cfg['motor_offset_range'] 842 | self.motor_offsets[env_ids, :] = gs.rand((len(env_ids), self.num_dof), dtype=float) \ 843 | * (max_offset - min_offset) + min_offset 844 | 845 | def _randomize_kp(self, env_ids): 846 | 847 | min_scale, max_scale = self.env_cfg['kp_scale_range'] 848 | kp_scales = gs.rand((len(env_ids), self.num_dof), dtype=float) \ 849 | * (max_scale - min_scale) + min_scale 850 | self.batched_p_gains[env_ids, :] = kp_scales * self.p_gains[None, :] 851 | 852 | def _randomize_kd(self, env_ids): 853 | 854 | min_scale, max_scale = self.env_cfg['kd_scale_range'] 855 | kd_scales = gs.rand((len(env_ids), self.num_dof), dtype=float) \ 856 | * (max_scale - min_scale) + min_scale 857 | self.batched_d_gains[env_ids, :] = kd_scales * self.d_gains[None, :] 858 | 859 | def _draw_debug_vis(self): 860 | ''' Draws visualizations for dubugging (slows down simulation a lot). 861 | Default behaviour: draws height measurement points 862 | ''' 863 | self.scene.clear_debug_objects() 864 | 865 | foot_poss = self.foot_positions[0].reshape(-1, 3) 866 | # self.scene.draw_debug_spheres(poss=foot_poss, radius=0.03, color=(1, 0, 0, 0.7)) 867 | 868 | foot_poss = foot_poss.cpu() 869 | self.scene.draw_debug_line(foot_poss[0], foot_poss[3], radius=0.002, color=(1, 0, 0, 0.7)) 870 | self.scene.draw_debug_line(foot_poss[1], foot_poss[2], radius=0.002, color=(1, 0, 0, 0.7)) 871 | 872 | com = self.com[0] 873 | # self.scene.draw_debug_sphere(pos=com, radius=0.1, color=(0, 0, 1, 0.7)) 874 | 875 | com[2] = 0.02 + self.terrain_heights[0] 876 | self.scene.draw_debug_sphere(pos=com, radius=0.02, color=(0, 0, 1, 0.7)) 877 | 878 | def _set_camera(self): 879 | ''' Set camera position and direction 880 | ''' 881 | self._floating_camera = self.scene.add_camera( 882 | pos=np.array([0, -1, 1]), 883 | lookat=np.array([0, 0, 0]), 884 | # res=(720, 480), 885 | fov=40, 886 | GUI=False, 887 | ) 888 | 889 | self._recording = False 890 | self._recorded_frames = [] 891 | 892 | def _render_headless(self): 893 | if self._recording and len(self._recorded_frames) < 150: 894 | robot_pos = np.array(self.base_pos[0].cpu()) 895 | self._floating_camera.set_pose(pos=robot_pos + np.array([-1, -1, 0.5]), lookat=robot_pos + np.array([0, 0, -0.1])) 896 | # import time 897 | # start = time.time() 898 | frame, _, _, _ = self._floating_camera.render() 899 | # end = time.time() 900 | # print(end-start) 901 | self._recorded_frames.append(frame) 902 | # from PIL import Image 903 | # img = Image.fromarray(np.uint8(frame)) 904 | # img.save('./test.png') 905 | # print('save') 906 | 907 | def get_recorded_frames(self): 908 | if len(self._recorded_frames) == 150: 909 | frames = self._recorded_frames 910 | self._recorded_frames = [] 911 | self._recording = False 912 | return frames 913 | else: 914 | return None 915 | 916 | def start_recording(self, record_internal=True): 917 | self._recorded_frames = [] 918 | self._recording = True 919 | if record_internal: 920 | self._record_frames = True 921 | else: 922 | self._floating_camera.start_recording() 923 | 924 | def stop_recording(self, save_path=None): 925 | self._recorded_frames = [] 926 | self._recording = False 927 | if save_path is not None: 928 | print("fps", int(1 / self.dt)) 929 | self._floating_camera.stop_recording(save_path, fps = int(1 / self.dt)) 930 | --------------------------------------------------------------------------------