├── rsl_rl
    ├── .gitignore
    ├── rsl_rl
    │   ├── storage
    │   │   ├── __init__.py
    │   │   └── rollout_storage.py
    │   ├── __init__.py
    │   ├── algorithms
    │   │   ├── __init__.py
    │   │   └── ppo.py
    │   ├── env
    │   │   ├── __init__.py
    │   │   └── vec_env.py
    │   ├── runners
    │   │   ├── __init__.py
    │   │   └── on_policy_runner.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   └── utils.py
    │   └── modules
    │   │   ├── __init__.py
    │   │   ├── actor_critic_recurrent.py
    │   │   └── actor_critic.py
    ├── setup.py
    ├── README.md
    ├── licenses
    │   └── dependencies
    │   │   ├── numpy_license.txt
    │   │   └── torch_license.txt
    └── LICENSE
├── README.md
├── .gitignore
├── eval_backflip.py
├── utils.py
├── train_backflip.py
├── train_walk.py
├── reward_wrapper.py
└── locomotion_env.py


/rsl_rl/.gitignore:
--------------------------------------------------------------------------------
 1 | # IDEs
 2 | .idea
 3 | 
 4 | # builds
 5 | *.egg-info
 6 | 
 7 | # cache
 8 | __pycache__
 9 | .pytest_cache
10 | 
11 | # vs code
12 | .vscode


--------------------------------------------------------------------------------
/rsl_rl/rsl_rl/storage/__init__.py:
--------------------------------------------------------------------------------
1 | #  Copyright 2021 ETH Zurich, NVIDIA CORPORATION
2 | #  SPDX-License-Identifier: BSD-3-Clause
3 | 
4 | from .rollout_storage import RolloutStorage


--------------------------------------------------------------------------------
/rsl_rl/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(name='rsl_rl',
 4 |       version='1.0.2',
 5 |       author='Nikita Rudin',
 6 |       author_email='rudinn@ethz.ch',
 7 |       license="BSD-3-Clause",
 8 |       packages=find_packages(),
 9 |       description='Fast and simple RL algorithms implemented in pytorch',
10 |       python_requires='>=3.6',
11 |       install_requires=[
12 |             "torch>=1.4.0",
13 |             "torchvision>=0.5.0",
14 |             "numpy>=1.16.4"
15 |       ],
16 |       )
17 | 


--------------------------------------------------------------------------------
/rsl_rl/README.md:
--------------------------------------------------------------------------------
 1 | # RSL RL
 2 | Fast and simple implementation of RL algorithms, designed to run fully on GPU.
 3 | This code is an evolution of `rl-pytorch` provided with NVIDIA's Isaac GYM.
 4 | 
 5 | Only PPO is implemented for now. More algorithms will be added later.
 6 | Contributions are welcome.
 7 | 
 8 | ## Setup
 9 | 
10 | ```
11 | git clone https://github.com/leggedrobotics/rsl_rl
12 | cd rsl_rl
13 | pip install -e .
14 | ```
15 | 
16 | ### Useful Links ###
17 | Example use case: https://github.com/leggedrobotics/legged_gym  
18 | Project website: https://leggedrobotics.github.io/legged_gym/  
19 | Paper: https://arxiv.org/abs/2109.11978
20 | 
21 | **Maintainer**: Nikita Rudin  
22 | **Affiliation**: Robotic Systems Lab, ETH Zurich & NVIDIA  
23 | **Contact**: rudinn@ethz.ch  
24 | 
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Genesis-backflip
 2 | 
 3 | A functional and streamlined codebase for locomotion training in Genesis, featuring:
 4 | - Effective domain randomizations and useful rewards
 5 | - Fundamental rewards for executing a complete backflip
 6 | 
 7 | We use the same domain randomizations as in this repository in both single and double backflips. During deployment, we slightly changed Kps used on the real robot to do a clean backflip.
 8 | 
 9 | ## Usage
10 | 
11 | 1. Install the modified version rsl_rl
12 | 
13 |     ```
14 |     cd rsl_rl && pip install -e .
15 |     ```
16 | 
17 | 2. Train the Unitree Go2 for a backflip
18 | 
19 |     This codebase includes only the fundamental rewards required for learning a backflip. You can observe how Go2 learns to perform a full backflip by running:
20 |     ```
21 |     python train_backflip.py -e EXP_NAME
22 |     ```
23 | 
24 | 3. Evaluate existing checkpoints
25 | 
26 |     Run
27 |     ```
28 |     python eval_backflip.py -e EXP_NAME --ckpt NUM_CKPT
29 |     ```
30 | 
31 | 4. Generate a deployable checkpoint
32 | 
33 |     To produce a deployable checkpoint comparable to the provided example in Genesis, additional rewards must be incorporated to regularize motion and minimize torque peaks.
34 | 
35 | ## Acknoledgement
36 | 
37 | This codebase was inspired [legged_gym](https://github.com/leggedrobotics/legged_gym) and its reward design principles.


--------------------------------------------------------------------------------
/rsl_rl/licenses/dependencies/numpy_license.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2005-2021, NumPy Developers.
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are
 6 | met:
 7 | 
 8 |     * Redistributions of source code must retain the above copyright
 9 |        notice, this list of conditions and the following disclaimer.
10 | 
11 |     * Redistributions in binary form must reproduce the above
12 |        copyright notice, this list of conditions and the following
13 |        disclaimer in the documentation and/or other materials provided
14 |        with the distribution.
15 | 
16 |     * Neither the name of the NumPy Developers nor the names of any
17 |        contributors may be used to endorse or promote products derived
18 |        from this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/rsl_rl/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2021, ETH Zurich, Nikita Rudin
 2 | Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without modification,
 6 | are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice,
 9 |    this list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 |    this list of conditions and the following disclaimer in the documentation
13 |    and/or other materials provided with the distribution.
14 | 
15 | 3. Neither the name of the copyright holder nor the names of its contributors
16 |    may be used to endorse or promote products derived from this software without
17 |    specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
23 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 
30 | See licenses/dependencies for license information of dependencies of this package.


--------------------------------------------------------------------------------
/rsl_rl/rsl_rl/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: BSD-3-Clause
 3 | # 
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are met:
 6 | #
 7 | # 1. Redistributions of source code must retain the above copyright notice, this
 8 | # list of conditions and the following disclaimer.
 9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin


--------------------------------------------------------------------------------
/rsl_rl/rsl_rl/algorithms/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: BSD-3-Clause
 3 | # 
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are met:
 6 | #
 7 | # 1. Redistributions of source code must retain the above copyright notice, this
 8 | # list of conditions and the following disclaimer.
 9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 | 
31 | from .ppo import PPO


--------------------------------------------------------------------------------
/rsl_rl/rsl_rl/env/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: BSD-3-Clause
 3 | # 
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are met:
 6 | #
 7 | # 1. Redistributions of source code must retain the above copyright notice, this
 8 | # list of conditions and the following disclaimer.
 9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 | 
31 | from .vec_env import VecEnv


--------------------------------------------------------------------------------
/rsl_rl/rsl_rl/runners/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: BSD-3-Clause
 3 | # 
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are met:
 6 | #
 7 | # 1. Redistributions of source code must retain the above copyright notice, this
 8 | # list of conditions and the following disclaimer.
 9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 | 
31 | from .on_policy_runner import OnPolicyRunner


--------------------------------------------------------------------------------
/rsl_rl/rsl_rl/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: BSD-3-Clause
 3 | # 
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are met:
 6 | #
 7 | # 1. Redistributions of source code must retain the above copyright notice, this
 8 | # list of conditions and the following disclaimer.
 9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 | 
31 | from .utils import split_and_pad_trajectories, unpad_trajectories


--------------------------------------------------------------------------------
/rsl_rl/rsl_rl/modules/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: BSD-3-Clause
 3 | # 
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are met:
 6 | #
 7 | # 1. Redistributions of source code must retain the above copyright notice, this
 8 | # list of conditions and the following disclaimer.
 9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 | 
31 | from .actor_critic import ActorCritic, ActorCriticNostd
32 | from .actor_critic_recurrent import ActorCriticRecurrent


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | logs/
  2 | tmp/
  3 | MUJOCO_LOG.TXT
  4 | imgui.ini
  5 | .DS_Store
  6 | sftp-config.json
  7 | *.mp4
  8 | 
  9 | # Byte-compiled / optimized / DLL files
 10 | __pycache__/
 11 | *.py[cod]
 12 | *$py.class
 13 | 
 14 | # C extensions
 15 | *.so
 16 | 
 17 | # Distribution / packaging
 18 | .Python
 19 | build/
 20 | data/
 21 | develop-eggs/
 22 | dist/
 23 | downloads/
 24 | eggs/
 25 | .eggs/
 26 | # lib/
 27 | lib64/
 28 | parts/
 29 | sdist/
 30 | var/
 31 | wheels/
 32 | pip-wheel-metadata/
 33 | share/python-wheels/
 34 | *.egg-info/
 35 | .installed.cfg
 36 | *.egg
 37 | MANIFEST
 38 | 
 39 | # debug
 40 | examples/debug/*
 41 | 
 42 | # PyInstaller
 43 | #  Usually these files are written by a python script from a template
 44 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 45 | *.manifest
 46 | *.spec
 47 | 
 48 | # Installer logs
 49 | pip-log.txt
 50 | pip-delete-this-directory.txt
 51 | 
 52 | # Unit test / coverage reports
 53 | htmlcov/
 54 | .tox/
 55 | .nox/
 56 | .coverage
 57 | .coverage.*
 58 | .cache
 59 | nosetests.xml
 60 | coverage.xml
 61 | *.cover
 62 | *.py,cover
 63 | .hypothesis/
 64 | .pytest_cache/
 65 | 
 66 | # Translations
 67 | *.mo
 68 | *.pot
 69 | 
 70 | # Django stuff:
 71 | *.log
 72 | local_settings.py
 73 | db.sqlite3
 74 | db.sqlite3-journal
 75 | 
 76 | # Flask stuff:
 77 | instance/
 78 | .webassets-cache
 79 | 
 80 | # Scrapy stuff:
 81 | .scrapy
 82 | 
 83 | # Sphinx documentation
 84 | doc/_build/
 85 | 
 86 | # PyBuilder
 87 | target/
 88 | 
 89 | # Jupyter Notebook
 90 | .ipynb_checkpoints
 91 | 
 92 | # IPython
 93 | profile_default/
 94 | ipython_config.py
 95 | 
 96 | # pyenv
 97 | .python-version
 98 | 
 99 | # pipenv
100 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
101 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
102 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
103 | #   install all needed dependencies.
104 | #Pipfile.lock
105 | 
106 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
107 | __pypackages__/
108 | 
109 | # Celery stuff
110 | celerybeat-schedule
111 | celerybeat.pid
112 | 
113 | # SageMath parsed files
114 | *.sage.py
115 | 
116 | # Environments
117 | .env
118 | .venv
119 | env/
120 | venv/
121 | ENV/
122 | env.bak/
123 | venv.bak/
124 | .idea/
125 | .vscode/
126 | 
127 | # Spyder project settings
128 | .spyderproject
129 | .spyproject
130 | 
131 | # Rope project settings
132 | .ropeproject
133 | 
134 | # mkdocs documentation
135 | /site
136 | 
137 | # mypy
138 | .mypy_cache/
139 | .dmypy.json
140 | dmypy.json
141 | 
142 | # Pyre type checker
143 | .pyre/
144 | 


--------------------------------------------------------------------------------
/rsl_rl/rsl_rl/env/vec_env.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: BSD-3-Clause
 3 | # 
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are met:
 6 | #
 7 | # 1. Redistributions of source code must retain the above copyright notice, this
 8 | # list of conditions and the following disclaimer.
 9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 | 
31 | from abc import ABC, abstractmethod
32 | import torch
33 | from typing import Tuple, Union
34 | 
35 | # minimal interface of the environment
36 | class VecEnv(ABC):
37 |     num_envs: int
38 |     num_obs: int
39 |     num_privileged_obs: int
40 |     num_actions: int
41 |     max_episode_length: int
42 |     privileged_obs_buf: torch.Tensor
43 |     obs_buf: torch.Tensor 
44 |     rew_buf: torch.Tensor
45 |     reset_buf: torch.Tensor
46 |     episode_length_buf: torch.Tensor # current episode duration
47 |     extras: dict
48 |     device: torch.device
49 |     @abstractmethod
50 |     def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, Union[torch.Tensor, None], torch.Tensor, torch.Tensor, dict]:
51 |         pass
52 |     @abstractmethod
53 |     def reset(self, env_ids: Union[list, torch.Tensor]):
54 |         pass
55 |     @abstractmethod
56 |     def get_observations(self) -> torch.Tensor:
57 |         pass
58 |     @abstractmethod
59 |     def get_privileged_observations(self) -> Union[torch.Tensor, None]:
60 |         pass


--------------------------------------------------------------------------------
/eval_backflip.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import copy
 3 | import os
 4 | import pickle
 5 | 
 6 | import numpy as np
 7 | import torch
 8 | from reward_wrapper import Backflip
 9 | from rsl_rl.runners import OnPolicyRunner
10 | 
11 | import genesis as gs
12 | 
13 | def export_policy_as_jit(actor_critic, path, name):
14 |     os.makedirs(path, exist_ok=True)
15 |     path = os.path.join(path, f'{name}.pt')
16 |     model = copy.deepcopy(actor_critic.actor).to('cpu')
17 |     traced_script_module = torch.jit.script(model)
18 |     traced_script_module.save(path)
19 | 
20 | def main():
21 |     parser = argparse.ArgumentParser()
22 |     parser.add_argument('-e', '--exp_name', type=str, default='backflip')
23 |     parser.add_argument('-v', '--vis', action='store_true', default=True)
24 |     parser.add_argument('-c', '--cpu', action='store_true', default=False)
25 |     parser.add_argument('-r', '--record', action='store_true', default=False)
26 |     parser.add_argument('--ckpt', type=int, default=1000)
27 |     args = parser.parse_args()
28 | 
29 |     gs.init(backend=gs.cpu if args.cpu else gs.gpu)
30 | 
31 |     env_cfg, obs_cfg, reward_cfg, command_cfg = pickle.load(
32 |         open(f'logs/{args.exp_name}/cfgs.pkl', 'rb')
33 |     )
34 |     reward_cfg['reward_scales'] = {"feet_distance": 1}
35 | 
36 |     env = Backflip(
37 |         num_envs=1,
38 |         env_cfg=env_cfg,
39 |         obs_cfg=obs_cfg,
40 |         reward_cfg=reward_cfg,
41 |         command_cfg=command_cfg,
42 |         show_viewer=args.vis,
43 |         eval=True,
44 |         debug=True,
45 |     )
46 | 
47 |     log_dir = f'logs/{args.exp_name}'
48 |     jit_ckpt_path = os.path.join(log_dir, 'exported', args.exp_name + f'_ckpt{args.ckpt}.pt')
49 |     if os.path.exists(jit_ckpt_path):
50 |         policy = torch.jit.load(jit_ckpt_path)
51 |         policy.to(device='cuda:0')
52 |     else:
53 |         args.max_iterations = 1
54 |         from train_backflip import get_train_cfg
55 |         runner = OnPolicyRunner(env, get_train_cfg(args), log_dir, device='cuda:0')
56 | 
57 |         resume_path = os.path.join(log_dir, f'model_{args.ckpt}.pt')
58 |         runner.load(resume_path)
59 |         path = os.path.join(log_dir, 'exported')
60 |         export_policy_as_jit(runner.alg.actor_critic, path, args.exp_name + f'_ckpt{args.ckpt}')
61 |             
62 |         policy = runner.get_inference_policy(device='cuda:0')
63 | 
64 |     env.reset()
65 |     obs = env.get_observations()
66 | 
67 |     with torch.no_grad():
68 |         stop = False
69 |         n_frames = 0
70 |         if args.record:
71 |             env.start_recording(record_internal=False)
72 |         while not stop:
73 |             actions = policy(obs)
74 |             obs, _, rews, dones, infos = env.step(actions)
75 |             n_frames += 1
76 |             if args.record:
77 |                 if n_frames == 100:
78 |                     env.stop_recording("backflip.mp4")
79 |                     exit()
80 | 
81 | if __name__ == '__main__':
82 |     main()


--------------------------------------------------------------------------------
/rsl_rl/licenses/dependencies/torch_license.txt:
--------------------------------------------------------------------------------
 1 | From PyTorch:
 2 | 
 3 | Copyright (c) 2016-     Facebook, Inc            (Adam Paszke)
 4 | Copyright (c) 2014-     Facebook, Inc            (Soumith Chintala)
 5 | Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
 6 | Copyright (c) 2012-2014 Deepmind Technologies    (Koray Kavukcuoglu)
 7 | Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
 8 | Copyright (c) 2011-2013 NYU                      (Clement Farabet)
 9 | Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston)
10 | Copyright (c) 2006      Idiap Research Institute (Samy Bengio)
11 | Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz)
12 | 
13 | From Caffe2:
14 | 
15 | Copyright (c) 2016-present, Facebook Inc. All rights reserved.
16 | 
17 | All contributions by Facebook:
18 | Copyright (c) 2016 Facebook Inc.
19 | 
20 | All contributions by Google:
21 | Copyright (c) 2015 Google Inc.
22 | All rights reserved.
23 | 
24 | All contributions by Yangqing Jia:
25 | Copyright (c) 2015 Yangqing Jia
26 | All rights reserved.
27 | 
28 | All contributions by Kakao Brain:
29 | Copyright 2019-2020 Kakao Brain
30 | 
31 | All contributions from Caffe:
32 | Copyright(c) 2013, 2014, 2015, the respective contributors
33 | All rights reserved.
34 | 
35 | All other contributions:
36 | Copyright(c) 2015, 2016 the respective contributors
37 | All rights reserved.
38 | 
39 | Caffe2 uses a copyright model similar to Caffe: each contributor holds
40 | copyright over their contributions to Caffe2. The project versioning records
41 | all such contribution and copyright details. If a contributor wants to further
42 | mark their specific copyright on a particular contribution, they should
43 | indicate their copyright solely in the commit message of the change when it is
44 | committed.
45 | 
46 | All rights reserved.
47 | 
48 | Redistribution and use in source and binary forms, with or without
49 | modification, are permitted provided that the following conditions are met:
50 | 
51 | 1. Redistributions of source code must retain the above copyright
52 |    notice, this list of conditions and the following disclaimer.
53 | 
54 | 2. Redistributions in binary form must reproduce the above copyright
55 |    notice, this list of conditions and the following disclaimer in the
56 |    documentation and/or other materials provided with the distribution.
57 | 
58 | 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America
59 |    and IDIAP Research Institute nor the names of its contributors may be
60 |    used to endorse or promote products derived from this software without
61 |    specific prior written permission.
62 | 
63 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
64 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
67 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
68 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
69 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
70 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
71 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
72 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
73 | POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/rsl_rl/rsl_rl/utils/utils.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | # SPDX-License-Identifier: BSD-3-Clause
 3 | # 
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are met:
 6 | #
 7 | # 1. Redistributions of source code must retain the above copyright notice, this
 8 | # list of conditions and the following disclaimer.
 9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | #
29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
30 | 
31 | import torch
32 | 
33 | def split_and_pad_trajectories(tensor, dones):
34 |     """ Splits trajectories at done indices. Then concatenates them and padds with zeros up to the length og the longest trajectory.
35 |     Returns masks corresponding to valid parts of the trajectories
36 |     Example: 
37 |         Input: [ [a1, a2, a3, a4 | a5, a6],
38 |                  [b1, b2 | b3, b4, b5 | b6]
39 |                 ]
40 | 
41 |         Output:[ [a1, a2, a3, a4], | [  [True, True, True, True],
42 |                  [a5, a6, 0, 0],   |    [True, True, False, False],
43 |                  [b1, b2, 0, 0],   |    [True, True, False, False],
44 |                  [b3, b4, b5, 0],  |    [True, True, True, False],
45 |                  [b6, 0, 0, 0]     |    [True, False, False, False],
46 |                 ]                  | ]    
47 |             
48 |     Assumes that the inputy has the following dimension order: [time, number of envs, aditional dimensions]
49 |     """
50 |     dones = dones.clone()
51 |     dones[-1] = 1
52 |     # Permute the buffers to have order (num_envs, num_transitions_per_env, ...), for correct reshaping
53 |     flat_dones = dones.transpose(1, 0).reshape(-1, 1)
54 | 
55 |     # Get length of trajectory by counting the number of successive not done elements
56 |     done_indices = torch.cat((flat_dones.new_tensor([-1], dtype=torch.int64), flat_dones.nonzero()[:, 0]))
57 |     trajectory_lengths = done_indices[1:] - done_indices[:-1]
58 |     trajectory_lengths_list = trajectory_lengths.tolist()
59 |     # Extract the individual trajectories
60 |     trajectories = torch.split(tensor.transpose(1, 0).flatten(0, 1),trajectory_lengths_list)
61 |     padded_trajectories = torch.nn.utils.rnn.pad_sequence(trajectories)
62 | 
63 | 
64 |     trajectory_masks = trajectory_lengths > torch.arange(0, tensor.shape[0], device=tensor.device).unsqueeze(1)
65 |     return padded_trajectories, trajectory_masks
66 | 
67 | def unpad_trajectories(trajectories, masks):
68 |     """ Does the inverse operation of  split_and_pad_trajectories()
69 |     """
70 |     # Need to transpose before and after the masking to have proper reshaping
71 |     return trajectories.transpose(1, 0)[masks.transpose(1, 0)].view(-1, trajectories.shape[0], trajectories.shape[-1]).transpose(1, 0)


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | 
  4 | 
  5 | def wrap_to_pi(angles):
  6 |     angles %= 2 * np.pi
  7 |     angles -= 2 * np.pi * (angles > np.pi)
  8 |     return angles
  9 | 
 10 | 
 11 | def gs_rand_float(lower, upper, shape, device):
 12 |     return (upper - lower) * torch.rand(size=shape, device=device) + lower
 13 | 
 14 | 
 15 | def gs_inv_quat(quat):
 16 |     qw, qx, qy, qz = quat.unbind(-1)
 17 |     inv_quat = torch.stack([1.0 * qw, -qx, -qy, -qz], dim=-1)
 18 |     return inv_quat
 19 | 
 20 | 
 21 | def gs_transform_by_quat(pos, quat):
 22 |     qw, qx, qy, qz = quat.unbind(-1)
 23 | 
 24 |     rot_matrix = torch.stack(
 25 |         [
 26 |             1.0 - 2 * qy**2 - 2 * qz**2,
 27 |             2 * qx * qy - 2 * qz * qw,
 28 |             2 * qx * qz + 2 * qy * qw,
 29 |             2 * qx * qy + 2 * qz * qw,
 30 |             1 - 2 * qx**2 - 2 * qz**2,
 31 |             2 * qy * qz - 2 * qx * qw,
 32 |             2 * qx * qz - 2 * qy * qw,
 33 |             2 * qy * qz + 2 * qx * qw,
 34 |             1 - 2 * qx**2 - 2 * qy**2,
 35 |         ],
 36 |         dim=-1,
 37 |     ).reshape(*quat.shape[:-1], 3, 3)
 38 |     rotated_pos = torch.matmul(rot_matrix, pos.unsqueeze(-1)).squeeze(-1)
 39 | 
 40 |     return rotated_pos
 41 | 
 42 | 
 43 | def gs_quat2euler(quat):  # xyz
 44 |     # Extract quaternion components
 45 |     qw, qx, qy, qz = quat.unbind(-1)
 46 | 
 47 |     # Roll (x-axis rotation)
 48 |     sinr_cosp = 2 * (qw * qx + qy * qz)
 49 |     cosr_cosp = 1 - 2 * (qx * qx + qy * qy)
 50 |     roll = torch.atan2(sinr_cosp, cosr_cosp)
 51 | 
 52 |     # Pitch (y-axis rotation)
 53 |     sinp = 2 * (qw * qy - qz * qx)
 54 |     pitch = torch.where(
 55 |         torch.abs(sinp) >= 1,
 56 |         torch.sign(sinp) * torch.tensor(torch.pi / 2),
 57 |         torch.asin(sinp),
 58 |     )
 59 | 
 60 |     # Yaw (z-axis rotation)
 61 |     siny_cosp = 2 * (qw * qz + qx * qy)
 62 |     cosy_cosp = 1 - 2 * (qy * qy + qz * qz)
 63 |     yaw = torch.atan2(siny_cosp, cosy_cosp)
 64 | 
 65 |     return torch.stack([roll, pitch, yaw], dim=-1)
 66 | 
 67 | 
 68 | def gs_euler2quat(xyz):  # xyz
 69 | 
 70 |     roll, pitch, yaw = xyz.unbind(-1)
 71 | 
 72 |     cosr = (roll * 0.5).cos()
 73 |     sinr = (roll * 0.5).sin()
 74 |     cosp = (pitch * 0.5).cos()
 75 |     sinp = (pitch * 0.5).sin()
 76 |     cosy = (yaw * 0.5).cos()
 77 |     siny = (yaw * 0.5).sin()
 78 | 
 79 |     qw = cosr * cosp * cosy + sinr * sinp * siny
 80 |     qx = sinr * cosp * cosy - cosr * sinp * siny
 81 |     qy = cosr * sinp * cosy + sinr * cosp * siny
 82 |     qz = cosr * cosp * siny - sinr * sinp * cosy
 83 | 
 84 |     return torch.stack([qw, qx, qy, qz], dim=-1)
 85 | 
 86 | 
 87 | def gs_quat_from_angle_axis(angle, axis):
 88 |     theta = (angle / 2).unsqueeze(-1)
 89 |     xyz = normalize(axis) * theta.sin()
 90 |     w = theta.cos()
 91 |     return normalize(torch.cat([w, xyz], dim=-1))
 92 | 
 93 | 
 94 | def normalize(x, eps: float = 1e-9):
 95 |     return x / x.norm(p=2, dim=-1).clamp(min=eps, max=None).unsqueeze(-1)
 96 | 
 97 | 
 98 | def gs_quat_mul(a, b):
 99 |     assert a.shape == b.shape
100 |     shape = a.shape
101 |     a = a.reshape(-1, 4)
102 |     b = b.reshape(-1, 4)
103 | 
104 |     w1, x1, y1, z1 = a[:, 0], a[:, 1], a[:, 2], a[:, 3]
105 |     w2, x2, y2, z2 = b[:, 0], b[:, 1], b[:, 2], b[:, 3]
106 |     ww = (z1 + x1) * (x2 + y2)
107 |     yy = (w1 - y1) * (w2 + z2)
108 |     zz = (w1 + y1) * (w2 - z2)
109 |     xx = ww + yy + zz
110 |     qq = 0.5 * (xx + (z1 - x1) * (x2 - y2))
111 |     w = qq - ww + (z1 - y1) * (y2 - z2)
112 |     x = qq - xx + (x1 + w1) * (x2 + w2)
113 |     y = qq - yy + (w1 - x1) * (y2 + z2)
114 |     z = qq - zz + (z1 + y1) * (w2 - x2)
115 | 
116 |     quat = torch.stack([w, x, y, z], dim=-1).view(shape)
117 | 
118 |     return quat
119 | 
120 | 
121 | def gs_quat_apply(a, b):
122 |     shape = b.shape
123 |     a = a.reshape(-1, 4)
124 |     b = b.reshape(-1, 3)
125 |     xyz = a[:, 1:]
126 |     t = xyz.cross(b, dim=-1) * 2
127 |     return (b + a[:, :1] * t + xyz.cross(t, dim=-1)).view(shape)
128 | 
129 | 
130 | def gs_quat_apply_yaw(quat, vec):
131 |     quat_yaw = quat.clone().view(-1, 4)
132 |     quat_yaw[:, 1:3] = 0.
133 |     quat_yaw = normalize(quat_yaw)
134 |     return gs_quat_apply(quat_yaw, vec)
135 | 
136 | 
137 | def gs_quat_conjugate(a):
138 |     shape = a.shape
139 |     a = a.reshape(-1, 4)
140 |     return torch.cat((a[:, :1], -a[:, 1:], ), dim=-1).view(shape)
141 | 


--------------------------------------------------------------------------------
/rsl_rl/rsl_rl/modules/actor_critic_recurrent.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: BSD-3-Clause
  3 | # 
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are met:
  6 | #
  7 | # 1. Redistributions of source code must retain the above copyright notice, this
  8 | # list of conditions and the following disclaimer.
  9 | #
 10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 11 | # this list of conditions and the following disclaimer in the documentation
 12 | # and/or other materials provided with the distribution.
 13 | #
 14 | # 3. Neither the name of the copyright holder nor the names of its
 15 | # contributors may be used to endorse or promote products derived from
 16 | # this software without specific prior written permission.
 17 | #
 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | #
 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
 30 | 
 31 | import numpy as np
 32 | 
 33 | import torch
 34 | import torch.nn as nn
 35 | from torch.distributions import Normal
 36 | from torch.nn.modules import rnn
 37 | from .actor_critic import ActorCritic, get_activation
 38 | from rsl_rl.utils import unpad_trajectories
 39 | 
 40 | class ActorCriticRecurrent(ActorCritic):
 41 |     is_recurrent = True
 42 |     def __init__(self,  num_actor_obs,
 43 |                         num_critic_obs,
 44 |                         num_actions,
 45 |                         actor_hidden_dims=[256, 256, 256],
 46 |                         critic_hidden_dims=[256, 256, 256],
 47 |                         activation='elu',
 48 |                         rnn_type='lstm',
 49 |                         rnn_hidden_size=256,
 50 |                         rnn_num_layers=1,
 51 |                         init_noise_std=1.0,
 52 |                         **kwargs):
 53 |         if kwargs:
 54 |             print("ActorCriticRecurrent.__init__ got unexpected arguments, which will be ignored: " + str(kwargs.keys()),)
 55 | 
 56 |         super().__init__(num_actor_obs=rnn_hidden_size,
 57 |                          num_critic_obs=rnn_hidden_size,
 58 |                          num_actions=num_actions,
 59 |                          actor_hidden_dims=actor_hidden_dims,
 60 |                          critic_hidden_dims=critic_hidden_dims,
 61 |                          activation=activation,
 62 |                          init_noise_std=init_noise_std)
 63 | 
 64 |         activation = get_activation(activation)
 65 | 
 66 |         self.memory_a = Memory(num_actor_obs, type=rnn_type, num_layers=rnn_num_layers, hidden_size=rnn_hidden_size)
 67 |         self.memory_c = Memory(num_critic_obs, type=rnn_type, num_layers=rnn_num_layers, hidden_size=rnn_hidden_size)
 68 | 
 69 |         print(f"Actor RNN: {self.memory_a}")
 70 |         print(f"Critic RNN: {self.memory_c}")
 71 | 
 72 |     def reset(self, dones=None):
 73 |         self.memory_a.reset(dones)
 74 |         self.memory_c.reset(dones)
 75 | 
 76 |     def act(self, observations, masks=None, hidden_states=None):
 77 |         input_a = self.memory_a(observations, masks, hidden_states)
 78 |         return super().act(input_a.squeeze(0))
 79 | 
 80 |     def act_inference(self, observations):
 81 |         input_a = self.memory_a(observations)
 82 |         return super().act_inference(input_a.squeeze(0))
 83 | 
 84 |     def evaluate(self, critic_observations, masks=None, hidden_states=None):
 85 |         input_c = self.memory_c(critic_observations, masks, hidden_states)
 86 |         return super().evaluate(input_c.squeeze(0))
 87 |     
 88 |     def get_hidden_states(self):
 89 |         return self.memory_a.hidden_states, self.memory_c.hidden_states
 90 | 
 91 | 
 92 | class Memory(torch.nn.Module):
 93 |     def __init__(self, input_size, type='lstm', num_layers=1, hidden_size=256):
 94 |         super().__init__()
 95 |         # RNN
 96 |         rnn_cls = nn.GRU if type.lower() == 'gru' else nn.LSTM
 97 |         self.rnn = rnn_cls(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)
 98 |         self.hidden_states = None
 99 |     
100 |     def forward(self, input, masks=None, hidden_states=None):
101 |         batch_mode = masks is not None
102 |         if batch_mode:
103 |             # batch mode (policy update): need saved hidden states
104 |             if hidden_states is None:
105 |                 raise ValueError("Hidden states not passed to memory module during policy update")
106 |             out, _ = self.rnn(input, hidden_states)
107 |             out = unpad_trajectories(out, masks)
108 |         else:
109 |             # inference mode (collection): use hidden states of last step
110 |             out, self.hidden_states = self.rnn(input.unsqueeze(0), self.hidden_states)
111 |         return out
112 | 
113 |     def reset(self, dones=None):
114 |         # When the RNN is an LSTM, self.hidden_states_a is a list with hidden_state and cell_state
115 |         for hidden_state in self.hidden_states:
116 |             hidden_state[..., dones, :] = 0.0


--------------------------------------------------------------------------------
/rsl_rl/rsl_rl/modules/actor_critic.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: BSD-3-Clause
  3 | # 
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are met:
  6 | #
  7 | # 1. Redistributions of source code must retain the above copyright notice, this
  8 | # list of conditions and the following disclaimer.
  9 | #
 10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 11 | # this list of conditions and the following disclaimer in the documentation
 12 | # and/or other materials provided with the distribution.
 13 | #
 14 | # 3. Neither the name of the copyright holder nor the names of its
 15 | # contributors may be used to endorse or promote products derived from
 16 | # this software without specific prior written permission.
 17 | #
 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | #
 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
 30 | 
 31 | import numpy as np
 32 | 
 33 | import torch
 34 | import torch.nn as nn
 35 | from torch.distributions import Normal
 36 | from torch.nn.modules import rnn
 37 | 
 38 | class ActorCritic(nn.Module):
 39 |     is_recurrent = False
 40 |     def __init__(self,  num_actor_obs,
 41 |                         num_critic_obs,
 42 |                         num_actions,
 43 |                         actor_hidden_dims=[256, 256, 256],
 44 |                         critic_hidden_dims=[256, 256, 256],
 45 |                         activation='elu',
 46 |                         init_noise_std=1.0,
 47 |                         **kwargs):
 48 |         if kwargs:
 49 |             print("ActorCritic.__init__ got unexpected arguments, which will be ignored: " + str([key for key in kwargs.keys()]))
 50 |         super(ActorCritic, self).__init__()
 51 | 
 52 |         activation = get_activation(activation)
 53 | 
 54 |         mlp_input_dim_a = num_actor_obs
 55 |         mlp_input_dim_c = num_critic_obs
 56 | 
 57 |         # Policy
 58 |         actor_layers = []
 59 |         actor_layers.append(nn.Linear(mlp_input_dim_a, actor_hidden_dims[0]))
 60 |         actor_layers.append(activation)
 61 |         for l in range(len(actor_hidden_dims)):
 62 |             if l == len(actor_hidden_dims) - 1:
 63 |                 actor_layers.append(nn.Linear(actor_hidden_dims[l], num_actions))
 64 |             else:
 65 |                 actor_layers.append(nn.Linear(actor_hidden_dims[l], actor_hidden_dims[l + 1]))
 66 |                 actor_layers.append(activation)
 67 |         self.actor = nn.Sequential(*actor_layers)
 68 | 
 69 |         # Value function
 70 |         critic_layers = []
 71 |         critic_layers.append(nn.Linear(mlp_input_dim_c, critic_hidden_dims[0]))
 72 |         critic_layers.append(activation)
 73 |         for l in range(len(critic_hidden_dims)):
 74 |             if l == len(critic_hidden_dims) - 1:
 75 |                 critic_layers.append(nn.Linear(critic_hidden_dims[l], 1))
 76 |             else:
 77 |                 critic_layers.append(nn.Linear(critic_hidden_dims[l], critic_hidden_dims[l + 1]))
 78 |                 critic_layers.append(activation)
 79 |         self.critic = nn.Sequential(*critic_layers)
 80 | 
 81 |         print(f"Actor MLP: {self.actor}")
 82 |         print(f"Critic MLP: {self.critic}")
 83 | 
 84 |         # Action noise
 85 |         self.std = nn.Parameter(init_noise_std * torch.ones(num_actions))
 86 |         self.distribution = None
 87 |         # disable args validation for speedup
 88 |         Normal.set_default_validate_args = False
 89 |         
 90 |         # seems that we get better performance without init
 91 |         # self.init_memory_weights(self.memory_a, 0.001, 0.)
 92 |         # self.init_memory_weights(self.memory_c, 0.001, 0.)
 93 | 
 94 |     @staticmethod
 95 |     # not used at the moment
 96 |     def init_weights(sequential, scales):
 97 |         [torch.nn.init.orthogonal_(module.weight, gain=scales[idx]) for idx, module in
 98 |          enumerate(mod for mod in sequential if isinstance(mod, nn.Linear))]
 99 | 
100 | 
101 |     def reset(self, dones=None):
102 |         pass
103 | 
104 |     def forward(self):
105 |         raise NotImplementedError
106 |     
107 |     @property
108 |     def action_mean(self):
109 |         return self.distribution.mean
110 | 
111 |     @property
112 |     def action_std(self):
113 |         return self.distribution.stddev
114 |     
115 |     @property
116 |     def entropy(self):
117 |         return self.distribution.entropy().sum(dim=-1)
118 | 
119 |     def update_distribution(self, observations):
120 |         mean = self.actor(observations)
121 |         self.distribution = Normal(mean, mean*0. + self.std)
122 | 
123 |     def act(self, observations, **kwargs):
124 |         self.update_distribution(observations)
125 |         return self.distribution.sample()
126 |     
127 |     def get_actions_log_prob(self, actions):
128 |         return self.distribution.log_prob(actions).sum(dim=-1)
129 | 
130 |     def act_inference(self, observations):
131 |         actions_mean = self.actor(observations)
132 |         return actions_mean
133 | 
134 |     def evaluate(self, critic_observations, **kwargs):
135 |         value = self.critic(critic_observations)
136 |         return value
137 | 
138 | class ActorCriticNostd(ActorCritic):
139 |     def __init__(self, num_actor_obs, num_critic_obs, num_actions, actor_hidden_dims=[256, 256, 256], critic_hidden_dims=[256, 256, 256], activation='elu', init_noise_std=1, **kwargs):
140 |         super().__init__(num_actor_obs, num_critic_obs, num_actions, actor_hidden_dims, critic_hidden_dims, activation, init_noise_std, **kwargs)
141 |         self.action = None
142 | 
143 |     @property
144 |     def action_mean(self):
145 |         return self.action
146 | 
147 |     def update_distribution(self, observations):
148 |         self.actor.train()
149 |         self.action = self.actor(observations)
150 | 
151 |     def act(self, observations, **kwargs):
152 |         self.update_distribution(observations)
153 |         return self.action_mean
154 | 
155 | def get_activation(act_name):
156 |     if act_name == "elu":
157 |         return nn.ELU()
158 |     elif act_name == "selu":
159 |         return nn.SELU()
160 |     elif act_name == "relu":
161 |         return nn.ReLU()
162 |     elif act_name == "crelu":
163 |         return nn.ReLU()
164 |     elif act_name == "lrelu":
165 |         return nn.LeakyReLU()
166 |     elif act_name == "tanh":
167 |         return nn.Tanh()
168 |     elif act_name == "sigmoid":
169 |         return nn.Sigmoid()
170 |     else:
171 |         print("invalid activation function!")
172 |         return None
173 | 


--------------------------------------------------------------------------------
/train_backflip.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import copy
  3 | import os
  4 | import pickle
  5 | import shutil
  6 | 
  7 | import numpy as np
  8 | import torch
  9 | import wandb
 10 | from reward_wrapper import Backflip
 11 | from locomotion_env import LocoEnv
 12 | from rsl_rl.runners import OnPolicyRunner
 13 | 
 14 | import genesis as gs
 15 | 
 16 | 
 17 | def get_train_cfg(args):
 18 | 
 19 |     train_cfg_dict = {
 20 |         'algorithm': {
 21 |             'clip_param': 0.2,
 22 |             'desired_kl': 0.01,
 23 |             'entropy_coef': 0.01,
 24 |             'gamma': 0.99,
 25 |             'lam': 0.95,
 26 |             'learning_rate': 0.001,
 27 |             'max_grad_norm': 1.0,
 28 |             'num_learning_epochs': 5,
 29 |             'num_mini_batches': 4,
 30 |             'schedule': 'adaptive',
 31 |             'use_clipped_value_loss': True,
 32 |             'value_loss_coef': 1.0,
 33 |         },
 34 |         'init_member_classes': {},
 35 |         'policy': {
 36 |             'activation': 'elu',
 37 |             'actor_hidden_dims': [512, 256, 128],
 38 |             'critic_hidden_dims': [512, 256, 128],
 39 |             'init_noise_std': 1.0,
 40 |         },
 41 |         'runner': {
 42 |             'algorithm_class_name': 'PPO',
 43 |             'checkpoint': -1,
 44 |             'experiment_name': args.exp_name,
 45 |             'load_run': -1,
 46 |             'log_interval': 1,
 47 |             'max_iterations': args.max_iterations,
 48 |             'num_steps_per_env': 24,
 49 |             'policy_class_name': 'ActorCritic',
 50 |             'record_interval': 50,
 51 |             'resume': False,
 52 |             'resume_path': None,
 53 |             'run_name': '',
 54 |             'runner_class_name': 'runner_class_name',
 55 |             'save_interval': 100,
 56 |         },
 57 |         'runner_class_name': 'OnPolicyRunner',
 58 |         'seed': 1,
 59 |     }
 60 | 
 61 |     return train_cfg_dict
 62 | 
 63 | 
 64 | def get_cfgs():
 65 |     env_cfg = {
 66 |         'urdf_path': 'urdf/go2/urdf/go2.urdf',
 67 |         'links_to_keep': ['FL_foot', 'FR_foot', 'RL_foot', 'RR_foot',],
 68 |         'num_actions': 12,
 69 |         'num_dofs': 12,
 70 |         # joint/link names
 71 |         'default_joint_angles': {  # [rad]
 72 |             'FL_hip_joint': 0.0,
 73 |             'FR_hip_joint': 0.0,
 74 |             'RL_hip_joint': 0.0,
 75 |             'RR_hip_joint': 0.0,
 76 |             'FL_thigh_joint': 0.8,
 77 |             'FR_thigh_joint': 0.8,
 78 |             'RL_thigh_joint': 1.0,
 79 |             'RR_thigh_joint': 1.0,
 80 |             'FL_calf_joint': -1.5,
 81 |             'FR_calf_joint': -1.5,
 82 |             'RL_calf_joint': -1.5,
 83 |             'RR_calf_joint': -1.5,
 84 |         },
 85 |         'dof_names': [
 86 |             'FR_hip_joint',
 87 |             'FR_thigh_joint',
 88 |             'FR_calf_joint',
 89 |             'FL_hip_joint',
 90 |             'FL_thigh_joint',
 91 |             'FL_calf_joint',
 92 |             'RR_hip_joint',
 93 |             'RR_thigh_joint',
 94 |             'RR_calf_joint',
 95 |             'RL_hip_joint',
 96 |             'RL_thigh_joint',
 97 |             'RL_calf_joint',
 98 |         ],
 99 |         'termination_contact_link_names': ['base'],
100 |         'penalized_contact_link_names': ['base', 'thigh', 'calf'],
101 |         'feet_link_names': ['foot'],
102 |         'base_link_name': ['base'],
103 |         # PD
104 |         'PD_stiffness': {'joint': 70.0},
105 |         'PD_damping': {'joint': 3.0},
106 |         'use_implicit_controller': False,
107 |         # termination
108 |         'termination_if_roll_greater_than': 0.4,
109 |         'termination_if_pitch_greater_than': 0.4,
110 |         'termination_if_height_lower_than': 0.2,
111 |         # base pose
112 |         'base_init_pos': [0.0, 0.0, 0.36],
113 |         'base_init_quat': [1.0, 0.0, 0.0, 0.0],
114 |         # random push
115 |         'push_interval_s': -1,
116 |         'max_push_vel_xy': 1.0,
117 |         # time (second)
118 |         'episode_length_s': 2.0,
119 |         'resampling_time_s': 4.0,
120 |         'command_type': 'ang_vel_yaw',  # 'ang_vel_yaw' or 'heading'
121 |         'action_scale': 0.5,
122 |         'action_latency': 0.02,
123 |         'clip_actions': 100.0,
124 |         'send_timeouts': True,
125 |         'control_freq': 50,
126 |         'decimation': 4,
127 |         'feet_geom_offset': 1,
128 |         'use_terrain': False,
129 |         # domain randomization
130 |         'randomize_friction': True,
131 |         'friction_range': [0.2, 1.5],
132 |         'randomize_base_mass': True,
133 |         'added_mass_range': [-1., 3.],
134 |         'randomize_com_displacement': True,
135 |         'com_displacement_range': [-0.01, 0.01],
136 |         'randomize_motor_strength': False,
137 |         'motor_strength_range': [0.9, 1.1],
138 |         'randomize_motor_offset': True,
139 |         'motor_offset_range': [-0.02, 0.02],
140 |         'randomize_kp_scale': True,
141 |         'kp_scale_range': [0.8, 1.2],
142 |         'randomize_kd_scale': True,
143 |         'kd_scale_range': [0.8, 1.2],
144 |         # coupling
145 |         'coupling': False,
146 |     }
147 |     obs_cfg = {
148 |         'num_obs': 60,
149 |         'num_history_obs': 1,
150 |         'obs_noise': {
151 |             'ang_vel': 0.1,
152 |             'gravity': 0.02,
153 |             'dof_pos': 0.01,
154 |             'dof_vel': 0.5,
155 |         },
156 |         'obs_scales': {
157 |             'lin_vel': 2.0,
158 |             'ang_vel': 0.25,
159 |             'dof_pos': 1.0,
160 |             'dof_vel': 0.05,
161 |         },
162 |         'num_priv_obs': 64,
163 |     }
164 |     reward_cfg = {
165 |         'soft_dof_pos_limit': 0.9,
166 |         'reward_scales': {
167 |             'ang_vel_y': 5.0,
168 |             'ang_vel_z': -1.0,
169 |             'lin_vel_z': 20.0,
170 |             'orientation_control': -1.0,
171 |             'feet_height_before_backflip': -30.0,
172 |             'height_control': -10.0,
173 |             'actions_symmetry': -0.1,
174 |             'gravity_y': -10.0,
175 |             'feet_distance': -1.0,
176 |             'action_rate': -0.001,
177 |         },
178 |     }
179 |     command_cfg = {
180 |         'num_commands': 4,
181 |         'lin_vel_x_range': [-0.0, 0.0],
182 |         'lin_vel_y_range': [-0.0, 0.0],
183 |         'ang_vel_range': [-0.0, 0.0],
184 |     }
185 | 
186 |     return env_cfg, obs_cfg, reward_cfg, command_cfg
187 | 
188 | 
189 | def main():
190 |     parser = argparse.ArgumentParser()
191 |     parser.add_argument('-e', '--exp_name', type=str, default='backflip')
192 |     parser.add_argument('-v', '--vis', action='store_true', default=False)
193 |     parser.add_argument('-c', '--cpu', action='store_true', default=False)
194 |     parser.add_argument('-B', '--num_envs', type=int, default=10000)
195 |     parser.add_argument('--max_iterations', type=int, default=1000)
196 |     parser.add_argument('--resume', type=str, default=None)
197 |     parser.add_argument('-o', '--offline', action='store_true', default=False)
198 | 
199 |     parser.add_argument('--eval', action='store_true', default=False)
200 |     parser.add_argument('--debug', action='store_true', default=False)
201 |     parser.add_argument('--ckpt', type=int, default=1000)
202 |     args = parser.parse_args()
203 | 
204 |     if args.debug:
205 |         args.vis = True
206 |         args.offline = True
207 |         args.num_envs = 1
208 | 
209 |     gs.init(
210 |         backend=gs.cpu if args.cpu else gs.gpu,
211 |         logging_level='warning',
212 |     )
213 | 
214 |     log_dir = f'logs/{args.exp_name}'
215 |     env_cfg, obs_cfg, reward_cfg, command_cfg = get_cfgs()
216 | 
217 |     if os.path.exists(log_dir):
218 |         shutil.rmtree(log_dir)
219 |     os.makedirs(log_dir, exist_ok=True)
220 | 
221 |     env = Backflip(
222 |         num_envs=args.num_envs,
223 |         env_cfg=env_cfg,
224 |         obs_cfg=obs_cfg,
225 |         reward_cfg=reward_cfg,
226 |         command_cfg=command_cfg,
227 |         show_viewer=args.vis,
228 |         eval=args.eval,
229 |         debug=args.debug,
230 |     )
231 | 
232 |     runner = OnPolicyRunner(env, get_train_cfg(args), log_dir, device='cuda:0')
233 | 
234 |     if args.resume is not None:
235 |         resume_dir = f'logs/{args.resume}'
236 |         resume_path = os.path.join(resume_dir, f'model_{args.ckpt}.pt')
237 |         print('==> resume training from', resume_path)
238 |         runner.load(resume_path)
239 | 
240 |     wandb.init(project='genesis', name=args.exp_name, dir=log_dir, mode='offline' if args.offline else 'online')
241 | 
242 |     pickle.dump(
243 |         [env_cfg, obs_cfg, reward_cfg, command_cfg],
244 |         open(f'{log_dir}/cfgs.pkl', 'wb'),
245 |     )
246 | 
247 |     runner.learn(num_learning_iterations=args.max_iterations, init_at_random_ep_len=True)
248 | 
249 | 
250 | if __name__ == '__main__':
251 |     main()
252 | 
253 | 
254 | '''
255 | # training
256 | python train_backflip.py -e EXP_NAME
257 | 
258 | # evaluation
259 | python eval_backflip.py -e EXP_NAME --ckpt NUM_CKPT
260 | '''


--------------------------------------------------------------------------------
/train_walk.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import copy
  3 | import os
  4 | import pickle
  5 | import shutil
  6 | 
  7 | import numpy as np
  8 | import torch
  9 | import wandb
 10 | from reward_wrapper import Go2
 11 | from locomotion_env import LocoEnv
 12 | from rsl_rl.runners import OnPolicyRunner
 13 | 
 14 | import genesis as gs
 15 | 
 16 | 
 17 | def get_train_cfg(args):
 18 | 
 19 |     train_cfg_dict = {
 20 |         'algorithm': {
 21 |             'clip_param': 0.2,
 22 |             'desired_kl': 0.01,
 23 |             'entropy_coef': 0.01,
 24 |             'gamma': 0.99,
 25 |             'lam': 0.95,
 26 |             'learning_rate': 0.001,
 27 |             'max_grad_norm': 1.0,
 28 |             'num_learning_epochs': 5,
 29 |             'num_mini_batches': 4,
 30 |             'schedule': 'adaptive',
 31 |             'use_clipped_value_loss': True,
 32 |             'value_loss_coef': 1.0,
 33 |         },
 34 |         'init_member_classes': {},
 35 |         'policy': {
 36 |             'activation': 'elu',
 37 |             'actor_hidden_dims': [512, 256, 128],
 38 |             'critic_hidden_dims': [512, 256, 128],
 39 |             'init_noise_std': 1.0,
 40 |         },
 41 |         'runner': {
 42 |             'algorithm_class_name': 'PPO',
 43 |             'checkpoint': -1,
 44 |             'experiment_name': args.exp_name,
 45 |             'load_run': -1,
 46 |             'log_interval': 1,
 47 |             'max_iterations': args.max_iterations,
 48 |             'num_steps_per_env': 24,
 49 |             'policy_class_name': 'ActorCritic',
 50 |             'record_interval': 50,
 51 |             'resume': False,
 52 |             'resume_path': None,
 53 |             'run_name': '',
 54 |             'runner_class_name': 'runner_class_name',
 55 |             'save_interval': 100,
 56 |         },
 57 |         'runner_class_name': 'OnPolicyRunner',
 58 |         'seed': 1,
 59 |     }
 60 | 
 61 |     return train_cfg_dict
 62 | 
 63 | 
 64 | def get_cfgs():
 65 |     env_cfg = {
 66 |         'urdf_path': 'urdf/go2/urdf/go2.urdf',
 67 |         'links_to_keep': ['FL_foot', 'FR_foot', 'RL_foot', 'RR_foot',],
 68 |         'num_actions': 12,
 69 |         'num_dofs': 12,
 70 |         # joint/link names
 71 |         'default_joint_angles': {  # [rad]
 72 |             'FL_hip_joint': 0.0,
 73 |             'FR_hip_joint': 0.0,
 74 |             'RL_hip_joint': 0.0,
 75 |             'RR_hip_joint': 0.0,
 76 |             'FL_thigh_joint': 0.8,
 77 |             'FR_thigh_joint': 0.8,
 78 |             'RL_thigh_joint': 1.0,
 79 |             'RR_thigh_joint': 1.0,
 80 |             'FL_calf_joint': -1.5,
 81 |             'FR_calf_joint': -1.5,
 82 |             'RL_calf_joint': -1.5,
 83 |             'RR_calf_joint': -1.5,
 84 |         },
 85 |         'dof_names': [
 86 |             'FR_hip_joint',
 87 |             'FR_thigh_joint',
 88 |             'FR_calf_joint',
 89 |             'FL_hip_joint',
 90 |             'FL_thigh_joint',
 91 |             'FL_calf_joint',
 92 |             'RR_hip_joint',
 93 |             'RR_thigh_joint',
 94 |             'RR_calf_joint',
 95 |             'RL_hip_joint',
 96 |             'RL_thigh_joint',
 97 |             'RL_calf_joint',
 98 |         ],
 99 |         'termination_contact_link_names': ['base'],
100 |         'penalized_contact_link_names': ['base', 'thigh', 'calf'],
101 |         'feet_link_names': ['foot'],
102 |         'base_link_name': ['base'],
103 |         # PD
104 |         'PD_stiffness': {'joint': 30.0},
105 |         'PD_damping': {'joint': 1.5},
106 |         'use_implicit_controller': False,
107 |         # termination
108 |         'termination_if_roll_greater_than': 0.4,
109 |         'termination_if_pitch_greater_than': 0.4,
110 |         'termination_if_height_lower_than': 0.0,
111 |         # base pose
112 |         'base_init_pos': [0.0, 0.0, 0.42],
113 |         'base_init_quat': [1.0, 0.0, 0.0, 0.0],
114 |         # random push
115 |         'push_interval_s': -1,
116 |         'max_push_vel_xy': 1.0,
117 |         # time (second)
118 |         'episode_length_s': 20.0,
119 |         'resampling_time_s': 4.0,
120 |         'command_type': 'ang_vel_yaw',  # 'ang_vel_yaw' or 'heading'
121 |         'action_scale': 0.25,
122 |         'action_latency': 0.02,
123 |         'clip_actions': 100.0,
124 |         'send_timeouts': True,
125 |         'control_freq': 50,
126 |         'decimation': 4,
127 |         'feet_geom_offset': 1,
128 |         'use_terrain': False,
129 |         # domain randomization
130 |         'randomize_friction': True,
131 |         'friction_range': [0.2, 1.5],
132 |         'randomize_base_mass': True,
133 |         'added_mass_range': [-1., 3.],
134 |         'randomize_com_displacement': True,
135 |         'com_displacement_range': [-0.01, 0.01],
136 |         'randomize_motor_strength': False,
137 |         'motor_strength_range': [0.9, 1.1],
138 |         'randomize_motor_offset': True,
139 |         'motor_offset_range': [-0.02, 0.02],
140 |         'randomize_kp_scale': True,
141 |         'kp_scale_range': [0.8, 1.2],
142 |         'randomize_kd_scale': True,
143 |         'kd_scale_range': [0.8, 1.2],
144 |         # coupling
145 |         'coupling': False,
146 |     }
147 |     obs_cfg = {
148 |         'num_obs': 9 + 3 * env_cfg['num_dofs'],
149 |         'num_history_obs': 1,
150 |         'obs_noise': {
151 |             'ang_vel': 0.1,
152 |             'gravity': 0.02,
153 |             'dof_pos': 0.01,
154 |             'dof_vel': 0.5,
155 |         },
156 |         'obs_scales': {
157 |             'lin_vel': 2.0,
158 |             'ang_vel': 0.25,
159 |             'dof_pos': 1.0,
160 |             'dof_vel': 0.05,
161 |         },
162 |         'num_priv_obs': 12 + 4 * env_cfg['num_dofs'],
163 |     }
164 |     reward_cfg = {
165 |         'tracking_sigma': 0.25,
166 |         'soft_dof_pos_limit': 0.9,
167 |         'base_height_target': 0.3,
168 |         'reward_scales': {
169 |             'tracking_lin_vel': 1.0,
170 |             'tracking_ang_vel': 0.5,
171 |             'lin_vel_z': -2.0,
172 |             'ang_vel_xy': -0.05,
173 |             'orientation': -10.,
174 |             'base_height': -50.,
175 |             'torques': -0.0002,
176 |             'collision': -1.,
177 |             'dof_vel': -0.,
178 |             'dof_acc': -2.5e-7,
179 |             'feet_air_time': 1.0,
180 |             'collision': -1.,
181 |             'action_rate': -0.01,
182 |         },
183 |     }
184 |     command_cfg = {
185 |         'num_commands': 4,
186 |         'lin_vel_x_range': [-1.0, 1.0],
187 |         'lin_vel_y_range': [-1.0, 1.0],
188 |         'ang_vel_range': [-1.0, 1.0],
189 |     }
190 | 
191 |     return env_cfg, obs_cfg, reward_cfg, command_cfg
192 | 
193 | 
194 | def main():
195 |     parser = argparse.ArgumentParser()
196 |     parser.add_argument('-e', '--exp_name', type=str, default='Go2')
197 |     parser.add_argument('-v', '--vis', action='store_true', default=False)
198 |     parser.add_argument('-c', '--cpu', action='store_true', default=False)
199 |     parser.add_argument('-B', '--num_envs', type=int, default=10000)
200 |     parser.add_argument('--max_iterations', type=int, default=1000)
201 |     parser.add_argument('--resume', type=str, default=None)
202 |     parser.add_argument('-o', '--offline', action='store_true', default=False)
203 | 
204 |     parser.add_argument('--eval', action='store_true', default=False)
205 |     parser.add_argument('--debug', action='store_true', default=False)
206 |     parser.add_argument('--ckpt', type=int, default=1000)
207 |     args = parser.parse_args()
208 | 
209 |     if args.debug:
210 |         args.vis = True
211 |         args.offline = True
212 |         args.num_envs = 1
213 | 
214 |     gs.init(
215 |         backend=gs.cpu if args.cpu else gs.gpu,
216 |         logging_level='warning',
217 |     )
218 | 
219 |     log_dir = f'logs/{args.exp_name}'
220 |     env_cfg, obs_cfg, reward_cfg, command_cfg = get_cfgs()
221 | 
222 |     if os.path.exists(log_dir):
223 |         shutil.rmtree(log_dir)
224 |     os.makedirs(log_dir, exist_ok=True)
225 | 
226 |     env = Go2(
227 |         num_envs=args.num_envs,
228 |         env_cfg=env_cfg,
229 |         obs_cfg=obs_cfg,
230 |         reward_cfg=reward_cfg,
231 |         command_cfg=command_cfg,
232 |         show_viewer=args.vis,
233 |         eval=args.eval,
234 |         debug=args.debug,
235 |     )
236 | 
237 |     runner = OnPolicyRunner(env, get_train_cfg(args), log_dir, device='cuda:0')
238 | 
239 |     if args.resume is not None:
240 |         resume_dir = f'logs/{args.resume}'
241 |         resume_path = os.path.join(resume_dir, f'model_{args.ckpt}.pt')
242 |         print('==> resume training from', resume_path)
243 |         runner.load(resume_path)
244 | 
245 |     wandb.init(project='genesis', name=args.exp_name, dir=log_dir, mode='offline' if args.offline else 'online')
246 | 
247 |     pickle.dump(
248 |         [env_cfg, obs_cfg, reward_cfg, command_cfg],
249 |         open(f'{log_dir}/cfgs.pkl', 'wb'),
250 |     )
251 | 
252 |     runner.learn(num_learning_iterations=args.max_iterations, init_at_random_ep_len=True)
253 | 
254 | 
255 | if __name__ == '__main__':
256 |     main()
257 | 
258 | 
259 | '''
260 | # training
261 | python train_backflip.py -e EXP_NAME
262 | 
263 | # evaluation
264 | python eval_backflip.py -e EXP_NAME --ckpt NUM_CKPT
265 | '''


--------------------------------------------------------------------------------
/rsl_rl/rsl_rl/algorithms/ppo.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: BSD-3-Clause
  3 | # 
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are met:
  6 | #
  7 | # 1. Redistributions of source code must retain the above copyright notice, this
  8 | # list of conditions and the following disclaimer.
  9 | #
 10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 11 | # this list of conditions and the following disclaimer in the documentation
 12 | # and/or other materials provided with the distribution.
 13 | #
 14 | # 3. Neither the name of the copyright holder nor the names of its
 15 | # contributors may be used to endorse or promote products derived from
 16 | # this software without specific prior written permission.
 17 | #
 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | #
 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
 30 | 
 31 | import torch
 32 | import torch.nn as nn
 33 | import torch.optim as optim
 34 | 
 35 | from rsl_rl.modules import ActorCritic
 36 | from rsl_rl.storage import RolloutStorage
 37 | 
 38 | class PPO:
 39 |     actor_critic: ActorCritic
 40 |     def __init__(self,
 41 |                  actor_critic,
 42 |                  num_learning_epochs=1,
 43 |                  num_mini_batches=1,
 44 |                  clip_param=0.2,
 45 |                  gamma=0.998,
 46 |                  lam=0.95,
 47 |                  value_loss_coef=1.0,
 48 |                  entropy_coef=0.0,
 49 |                  learning_rate=1e-3,
 50 |                  max_grad_norm=1.0,
 51 |                  use_clipped_value_loss=True,
 52 |                  schedule="fixed",
 53 |                  desired_kl=0.01,
 54 |                  device='cpu',
 55 |                  ):
 56 | 
 57 |         self.device = device
 58 | 
 59 |         self.desired_kl = desired_kl
 60 |         self.kl_mean = 0
 61 |         self.schedule = schedule
 62 |         self.learning_rate = learning_rate
 63 | 
 64 |         # PPO components
 65 |         self.actor_critic = actor_critic
 66 |         self.actor_critic.to(self.device)
 67 |         self.storage = None # initialized later
 68 |         self.optimizer = optim.Adam(self.actor_critic.parameters(), lr=learning_rate)
 69 |         self.transition = RolloutStorage.Transition()
 70 | 
 71 |         # PPO parameters
 72 |         self.clip_param = clip_param
 73 |         self.num_learning_epochs = num_learning_epochs
 74 |         self.num_mini_batches = num_mini_batches
 75 |         self.value_loss_coef = value_loss_coef
 76 |         self.entropy_coef = entropy_coef
 77 |         self.gamma = gamma
 78 |         self.lam = lam
 79 |         self.max_grad_norm = max_grad_norm
 80 |         self.use_clipped_value_loss = use_clipped_value_loss
 81 | 
 82 |     def init_storage(self, num_envs, num_transitions_per_env, actor_obs_shape, critic_obs_shape, action_shape):
 83 |         self.storage = RolloutStorage(num_envs, num_transitions_per_env, actor_obs_shape, critic_obs_shape, action_shape, self.device)
 84 | 
 85 |     def test_mode(self):
 86 |         self.actor_critic.test()
 87 |     
 88 |     def train_mode(self):
 89 |         self.actor_critic.train()
 90 | 
 91 |     def act(self, obs, critic_obs):
 92 |         if self.actor_critic.is_recurrent:
 93 |             self.transition.hidden_states = self.actor_critic.get_hidden_states()
 94 |         # Compute the actions and values
 95 |         self.transition.actions = self.actor_critic.act(obs).detach()
 96 |         self.transition.values = self.actor_critic.evaluate(critic_obs).detach()
 97 |         self.transition.actions_log_prob = self.actor_critic.get_actions_log_prob(self.transition.actions).detach()
 98 |         self.transition.action_mean = self.actor_critic.action_mean.detach()
 99 |         self.transition.action_sigma = self.actor_critic.action_std.detach()
100 |         # need to record obs and critic_obs before env.step()
101 |         self.transition.observations = obs
102 |         self.transition.critic_observations = critic_obs
103 |         return self.transition.actions
104 |     
105 |     def process_env_step(self, rewards, dones, infos):
106 |         self.transition.rewards = rewards.clone()
107 |         self.transition.dones = dones
108 |         # Bootstrapping on time outs
109 |         if 'time_outs' in infos:
110 |             self.transition.rewards += self.gamma * torch.squeeze(self.transition.values * infos['time_outs'].unsqueeze(1).to(self.device), 1)
111 | 
112 |         # Record the transition
113 |         self.storage.add_transitions(self.transition)
114 |         self.transition.clear()
115 |         self.actor_critic.reset(dones)
116 |     
117 |     def compute_returns(self, last_critic_obs):
118 |         last_values= self.actor_critic.evaluate(last_critic_obs).detach()
119 |         self.storage.compute_returns(last_values, self.gamma, self.lam)
120 | 
121 |     def update(self):
122 |         mean_value_loss = 0
123 |         mean_surrogate_loss = 0
124 |         if self.actor_critic.is_recurrent:
125 |             generator = self.storage.reccurent_mini_batch_generator(self.num_mini_batches, self.num_learning_epochs)
126 |         else:
127 |             generator = self.storage.mini_batch_generator(self.num_mini_batches, self.num_learning_epochs)
128 |         for obs_batch, critic_obs_batch, actions_batch, target_values_batch, advantages_batch, returns_batch, old_actions_log_prob_batch, \
129 |             old_mu_batch, old_sigma_batch, hid_states_batch, masks_batch in generator:
130 | 
131 | 
132 |                 self.actor_critic.act(obs_batch, masks=masks_batch, hidden_states=hid_states_batch[0])
133 |                 actions_log_prob_batch = self.actor_critic.get_actions_log_prob(actions_batch)
134 |                 value_batch = self.actor_critic.evaluate(critic_obs_batch, masks=masks_batch, hidden_states=hid_states_batch[1])
135 |                 mu_batch = self.actor_critic.action_mean
136 |                 sigma_batch = self.actor_critic.action_std
137 |                 entropy_batch = self.actor_critic.entropy
138 | 
139 |                 with torch.inference_mode():
140 |                     kl = torch.sum(torch.log(sigma_batch / old_sigma_batch + 1.e-5) 
141 |                                     + (torch.square(old_sigma_batch) + torch.square(old_mu_batch - mu_batch)) / (2.0 * torch.square(sigma_batch)) - 0.5, axis=-1)
142 |                     kl_mean = torch.mean(kl)
143 |                     self.kl_mean = kl_mean
144 | 
145 |                 # KL
146 |                 if self.desired_kl != None and self.schedule == 'adaptive':
147 | 
148 |                         if kl_mean > self.desired_kl * 2.0:
149 |                             self.learning_rate = max(1e-4, self.learning_rate / 1.5)
150 |                         elif kl_mean < self.desired_kl / 2.0 and kl_mean > 0.0:
151 |                             self.learning_rate = min(1e-2, self.learning_rate * 1.5)
152 |                         
153 |                         for param_group in self.optimizer.param_groups:
154 |                             param_group['lr'] = self.learning_rate
155 | 
156 | 
157 |                 # Surrogate loss
158 |                 ratio = torch.exp(actions_log_prob_batch - torch.squeeze(old_actions_log_prob_batch))
159 |                 surrogate = -torch.squeeze(advantages_batch) * ratio
160 |                 surrogate_clipped = -torch.squeeze(advantages_batch) * torch.clamp(ratio, 1.0 - self.clip_param,
161 |                                                                                 1.0 + self.clip_param)
162 |                 surrogate_loss = torch.max(surrogate, surrogate_clipped).mean()
163 | 
164 |                 # Value function loss
165 |                 if self.use_clipped_value_loss:
166 |                     value_clipped = target_values_batch + (value_batch - target_values_batch).clamp(-self.clip_param,
167 |                                                                                                     self.clip_param)
168 |                     value_losses = (value_batch - returns_batch).pow(2)
169 |                     value_losses_clipped = (value_clipped - returns_batch).pow(2)
170 |                     value_loss = torch.max(value_losses, value_losses_clipped).mean()
171 |                 else:
172 |                     value_loss = (returns_batch - value_batch).pow(2).mean()
173 | 
174 |                 loss = surrogate_loss + self.value_loss_coef * value_loss - self.entropy_coef * entropy_batch.mean()
175 | 
176 |                 # Gradient step
177 |                 self.optimizer.zero_grad()
178 |                 loss.backward()
179 |                 nn.utils.clip_grad_norm_(self.actor_critic.parameters(), self.max_grad_norm)
180 |                 self.optimizer.step()
181 | 
182 |                 mean_value_loss += value_loss.item()
183 |                 mean_surrogate_loss += surrogate_loss.item()
184 | 
185 |         num_updates = self.num_learning_epochs * self.num_mini_batches
186 |         mean_value_loss /= num_updates
187 |         mean_surrogate_loss /= num_updates
188 |         self.storage.clear()
189 | 
190 |         return mean_value_loss, mean_surrogate_loss
191 | 


--------------------------------------------------------------------------------
/reward_wrapper.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | 
  4 | import genesis as gs
  5 | from locomotion_env import *
  6 | 
  7 | class Go2(LocoEnv):
  8 |     
  9 |     def _reward_tracking_lin_vel(self):
 10 |         # Tracking of linear velocity commands (xy axes)
 11 |         lin_vel_error = torch.sum(
 12 |             torch.square(
 13 |                 self.commands[:, :2] - self.base_lin_vel[:, :2]
 14 |             ),
 15 |             dim=1,
 16 |         )
 17 |         return torch.exp(-lin_vel_error / self.reward_cfg['tracking_sigma'])
 18 | 
 19 |     def _reward_tracking_ang_vel(self):
 20 |         # Tracking of angular velocity commands (yaw)
 21 |         ang_vel_error = torch.square(
 22 |             self.commands[:, 2] - self.base_ang_vel[:, 2]
 23 |         )
 24 |         return torch.exp(-ang_vel_error / self.reward_cfg['tracking_sigma'])
 25 | 
 26 |     def _reward_lin_vel_z(self):
 27 |         # Penalize z axis base linear velocity
 28 |         return torch.square(self.base_lin_vel[:, 2])
 29 | 
 30 |     def _reward_ang_vel_xy(self):
 31 |         # Penalize xy axes base angular velocity
 32 |         return torch.sum(torch.square(self.base_ang_vel[:, :2]), dim=1)
 33 | 
 34 |     def _reward_orientation(self):
 35 |         # Penalize non flat base orientation
 36 |         return torch.sum(torch.square(self.projected_gravity[:, :2]), dim=1)
 37 | 
 38 |     def _reward_torques(self):
 39 |         # Penalize torques
 40 |         return torch.sum(torch.square(self.torques), dim=1)
 41 | 
 42 |     def _reward_dof_vel(self):
 43 |         # Penalize dof velocities
 44 |         return torch.sum(torch.square(self.dof_vel), dim=1)
 45 | 
 46 |     def _reward_dof_acc(self):
 47 |         # Penalize dof accelerations
 48 |         return torch.sum(
 49 |             torch.square((self.last_dof_vel - self.dof_vel) / self.dt), dim=1
 50 |         )
 51 | 
 52 |     def _reward_action_rate(self):
 53 |         # Penalize changes in actions
 54 |         return torch.sum(torch.square(self.last_actions - self.actions), dim=1)
 55 | 
 56 |     def _reward_base_height(self):
 57 |         # Penalize base height away from target
 58 |         base_height = self.base_pos[:, 2]
 59 |         base_height_target = self.reward_cfg['base_height_target']
 60 |         return torch.square(base_height - base_height_target)
 61 | 
 62 |     def _reward_collision(self):
 63 |         # Penalize collisions on selected bodies
 64 |         return torch.sum(
 65 |             1.0
 66 |             * (
 67 |                 torch.norm(
 68 |                     self.link_contact_forces[:, self.penalized_contact_link_indices, :],
 69 |                     dim=-1,
 70 |                 )
 71 |                 > 0.1
 72 |             ),
 73 |             dim=1,
 74 |         )
 75 | 
 76 |     def _reward_termination(self):
 77 |         # Terminal reward / penalty
 78 |         return self.reset_buf * ~self.time_out_buf
 79 | 
 80 |     def _reward_dof_pos_limits(self):
 81 |         # Penalize dof positions too close to the limit
 82 |         out_of_limits = -(self.dof_pos - self.dof_pos_limits[:, 0]).clip(max=0.0)  # lower limit
 83 |         out_of_limits += (self.dof_pos - self.dof_pos_limits[:, 1]).clip(min=0.0)  # upper limit
 84 |         return torch.sum(out_of_limits, dim=1)
 85 | 
 86 |     def _reward_feet_air_time(self):
 87 |         # Reward long steps
 88 |         contact = self.link_contact_forces[:, self.feet_link_indices, 2] > 1.
 89 |         contact_filt = torch.logical_or(contact, self.last_contacts) 
 90 |         self.last_contacts = contact
 91 |         first_contact = (self.feet_air_time > 0.) * contact_filt
 92 |         self.feet_air_time += self.dt
 93 |         rew_airTime = torch.sum((self.feet_air_time - 0.5) * first_contact, dim=1) # reward only on first contact with the ground
 94 |         rew_airTime *= torch.norm(self.commands[:, :2], dim=1) > 0.1 #no reward for zero command
 95 |         self.feet_air_time *= ~contact_filt
 96 |         return rew_airTime
 97 |     
 98 | class Backflip(Go2):
 99 | 
100 |     def reset_idx(self, envs_idx):
101 |         if len(envs_idx) == 0:
102 |             return
103 | 
104 |         # reset dofs
105 |         self.dof_pos[envs_idx] = self.default_dof_pos
106 |         self.dof_vel[envs_idx] = 0.0
107 |         self.robot.set_dofs_position(
108 |             position=self.dof_pos[envs_idx],
109 |             dofs_idx_local=self.motor_dofs,
110 |             zero_velocity=True,
111 |             envs_idx=envs_idx,
112 |         )
113 | 
114 |         # reset root states - position
115 |         self.base_pos[envs_idx] = self.base_init_pos
116 |         self.base_pos[envs_idx, 2] = 0.32
117 |         self.base_quat[envs_idx] = self.base_init_quat.reshape(1, -1)
118 |         self.robot.set_pos(
119 |             self.base_pos[envs_idx], zero_velocity=False, envs_idx=envs_idx
120 |         )
121 |         self.robot.set_quat(
122 |             self.base_quat[envs_idx], zero_velocity=False, envs_idx=envs_idx
123 |         )
124 |         self.robot.zero_all_dofs_velocity(envs_idx)
125 | 
126 |         # update projected gravity
127 |         inv_base_quat = gs_inv_quat(self.base_quat)
128 |         self.projected_gravity = gs_transform_by_quat(
129 |             self.global_gravity, inv_base_quat
130 |         )
131 | 
132 |         # reset root states - velocity
133 |         self.base_lin_vel[envs_idx] = 0
134 |         self.base_ang_vel[envs_idx] = 0
135 |         base_vel = torch.concat(
136 |             [self.base_lin_vel[envs_idx], self.base_ang_vel[envs_idx]], dim=1
137 |         )
138 |         self.robot.set_dofs_velocity(
139 |             velocity=base_vel, dofs_idx_local=[0, 1, 2, 3, 4, 5], envs_idx=envs_idx
140 |         )
141 | 
142 |         self._resample_commands(envs_idx)
143 | 
144 |         # reset buffers
145 |         self.obs_history_buf[envs_idx] = 0.0
146 |         self.actions[envs_idx] = 0.0
147 |         self.last_actions[envs_idx] = 0.0
148 |         self.last_last_actions[envs_idx] = 0.0
149 |         self.last_dof_vel[envs_idx] = 0.0
150 |         self.feet_air_time[envs_idx] = 0.0
151 |         self.feet_max_height[envs_idx] = 0.0
152 |         self.episode_length_buf[envs_idx] = 0
153 |         self.reset_buf[envs_idx] = 1
154 | 
155 |         # fill extras
156 |         self.extras['episode'] = {}
157 |         for key in self.episode_sums.keys():
158 |             self.extras['episode']['rew_' + key] = (
159 |                 torch.mean(self.episode_sums[key][envs_idx]).item()
160 |                 / self.max_episode_length_s
161 |             )
162 |             self.episode_sums[key][envs_idx] = 0.0
163 |         # send timeout info to the algorithm
164 |         if self.env_cfg['send_timeouts']:
165 |             self.extras['time_outs'] = self.time_out_buf
166 | 
167 |     def compute_observations(self):
168 | 
169 |         phase = torch.pi * self.episode_length_buf[:, None] * self.dt / 2
170 |         self.obs_buf = torch.cat(
171 |             [
172 |                 self.base_ang_vel * self.obs_scales['ang_vel'],                     # 3
173 |                 self.projected_gravity,                                             # 3
174 |                 (self.dof_pos - self.default_dof_pos) * self.obs_scales['dof_pos'], # 10
175 |                 self.dof_vel * self.obs_scales['dof_vel'],                          # 10
176 |                 self.actions,                                                       # 10
177 |                 self.last_actions,                                                  # 10
178 |                 torch.sin(phase),
179 |                 torch.cos(phase),
180 |                 torch.sin(phase / 2),
181 |                 torch.cos(phase / 2),
182 |                 torch.sin(phase / 4),
183 |                 torch.cos(phase / 4),
184 |             ],
185 |             axis=-1,
186 |         )
187 | 
188 |         self.obs_history_buf = torch.cat(
189 |             [self.obs_history_buf[:, self.num_single_obs:], self.obs_buf.detach()], dim=1
190 |         )
191 | 
192 |         if self.num_privileged_obs is not None:
193 |             self.privileged_obs_buf = torch.cat(
194 |                 [
195 |                     self.base_pos[:, 2:3],                                              # 1
196 |                     self.base_lin_vel * self.obs_scales['lin_vel'],                     # 3
197 |                     self.base_ang_vel * self.obs_scales['ang_vel'],                     # 3
198 |                     self.projected_gravity,                                             # 3
199 |                     (self.dof_pos - self.default_dof_pos) * self.obs_scales['dof_pos'], # 10
200 |                     self.dof_vel * self.obs_scales['dof_vel'],                          # 10
201 |                     self.actions,                                                       # 10
202 |                     self.last_actions,                                                  # 10
203 |                     torch.sin(phase),
204 |                     torch.cos(phase),
205 |                     torch.sin(phase / 2),
206 |                     torch.cos(phase / 2),
207 |                     torch.sin(phase / 4),
208 |                     torch.cos(phase / 4),
209 |                 ],
210 |                 axis=-1,
211 |             )
212 | 
213 |     def check_termination(self):
214 |         self.reset_buf = (
215 |             self.episode_length_buf > self.max_episode_length
216 |         )
217 | 
218 |     def _reward_orientation_control(self):
219 |         # Penalize non flat base orientation
220 |         current_time = self.episode_length_buf * self.dt
221 |         phase = (current_time - 0.5).clamp(min=0, max=0.5)
222 |         quat_pitch = gs_quat_from_angle_axis(4 * phase * torch.pi,
223 |                                              torch.tensor([0, 1, 0], device=self.device, dtype=torch.float))
224 | 
225 |         desired_base_quat = gs_quat_mul(quat_pitch, self.base_init_quat.reshape(1, -1).repeat(self.num_envs, 1))
226 |         inv_desired_base_quat = gs_inv_quat(desired_base_quat)
227 |         desired_projected_gravity = gs_transform_by_quat(self.global_gravity, inv_desired_base_quat)
228 | 
229 |         orientation_diff = torch.sum(torch.square(self.projected_gravity - desired_projected_gravity), dim=1)
230 | 
231 |         return orientation_diff
232 |     
233 |     def _reward_ang_vel_y(self):
234 |         current_time = self.episode_length_buf * self.dt
235 |         ang_vel = -self.base_ang_vel[:, 1].clamp(max=7.2, min=-7.2)
236 |         return ang_vel * torch.logical_and(current_time > 0.5, current_time < 1.0)
237 | 
238 |     def _reward_ang_vel_z(self):
239 |         return torch.abs(self.base_ang_vel[:, 2])
240 | 
241 |     def _reward_lin_vel_z(self):
242 |         current_time = self.episode_length_buf * self.dt
243 |         lin_vel = self.robot.get_vel()[:, 2].clamp(max=3)
244 |         return lin_vel * torch.logical_and(current_time > 0.5, current_time < 0.75)
245 | 
246 |     def _reward_height_control(self):
247 |         # Penalize non flat base orientation
248 |         current_time = self.episode_length_buf * self.dt
249 |         target_height = 0.3
250 |         height_diff = torch.square(target_height - self.base_pos[:, 2]) * torch.logical_or(current_time < 0.4, current_time > 1.4)
251 |         return height_diff
252 | 
253 |     def _reward_actions_symmetry(self):
254 |         actions_diff = torch.square(self.actions[:, 0] + self.actions[:, 3])
255 |         actions_diff += torch.square(self.actions[:, 1:3] - self.actions[:, 4:6]).sum(dim=-1)
256 |         actions_diff += torch.square(self.actions[:, 6] + self.actions[:, 9])
257 |         actions_diff += torch.square(self.actions[:, 7:9] - self.actions[:, 10:12]).sum(dim=-1)
258 |         return actions_diff
259 |     
260 |     def _reward_gravity_y(self):
261 |         return torch.square(self.projected_gravity[:, 1])
262 | 
263 |     def _reward_feet_distance(self):
264 |         current_time = self.episode_length_buf * self.dt
265 |         cur_footsteps_translated = self.foot_positions - self.base_pos.unsqueeze(1)
266 |         footsteps_in_body_frame = torch.zeros(self.num_envs, 4, 3, device=self.device)
267 |         for i in range(4):
268 |             footsteps_in_body_frame[:, i, :] = gs_quat_apply(gs_quat_conjugate(self.base_quat),
269 |                                                                  cur_footsteps_translated[:, i, :])
270 | 
271 |         stance_width = 0.3 * torch.zeros([self.num_envs, 1,], device=self.device)
272 |         desired_ys = torch.cat([stance_width / 2, -stance_width / 2, stance_width / 2, -stance_width / 2], dim=1)
273 |         stance_diff = torch.square(desired_ys - footsteps_in_body_frame[:, :, 1]).sum(dim=1)
274 |         
275 |         return stance_diff
276 | 
277 |     def _reward_feet_height_before_backflip(self):
278 |         current_time = self.episode_length_buf * self.dt
279 |         foot_height = (self.foot_positions[:, :, 2]).view(self.num_envs, -1) - 0.02
280 |         return foot_height.clamp(min=0).sum(dim=1) * (current_time < 0.5)
281 | 
282 |     def _reward_collision(self):
283 |         # Penalize collisions on selected bodies
284 |         current_time = self.episode_length_buf * self.dt
285 |         return (1.0 * (torch.norm(self.link_contact_forces[:, self.penalized_contact_link_indices, :], dim=-1) > 0.1)).sum(dim=1)
286 | 


--------------------------------------------------------------------------------
/rsl_rl/rsl_rl/storage/rollout_storage.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: BSD-3-Clause
  3 | # 
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are met:
  6 | #
  7 | # 1. Redistributions of source code must retain the above copyright notice, this
  8 | # list of conditions and the following disclaimer.
  9 | #
 10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 11 | # this list of conditions and the following disclaimer in the documentation
 12 | # and/or other materials provided with the distribution.
 13 | #
 14 | # 3. Neither the name of the copyright holder nor the names of its
 15 | # contributors may be used to endorse or promote products derived from
 16 | # this software without specific prior written permission.
 17 | #
 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | #
 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
 30 | 
 31 | import torch
 32 | import numpy as np
 33 | 
 34 | from rsl_rl.utils import split_and_pad_trajectories
 35 | 
 36 | class RolloutStorage:
 37 |     class Transition:
 38 |         def __init__(self):
 39 |             self.observations = None
 40 |             self.critic_observations = None
 41 |             self.actions = None
 42 |             self.rewards = None
 43 |             self.dones = None
 44 |             self.values = None
 45 |             self.actions_log_prob = None
 46 |             self.action_mean = None
 47 |             self.action_sigma = None
 48 |             self.hidden_states = None
 49 |         
 50 |         def clear(self):
 51 |             self.__init__()
 52 | 
 53 |     def __init__(self, num_envs, num_transitions_per_env, obs_shape, privileged_obs_shape, actions_shape, device='cpu'):
 54 | 
 55 |         self.device = device
 56 | 
 57 |         self.obs_shape = obs_shape
 58 |         self.privileged_obs_shape = privileged_obs_shape
 59 |         self.actions_shape = actions_shape
 60 | 
 61 |         # Core
 62 |         self.observations = torch.zeros(num_transitions_per_env, num_envs, *obs_shape, device=self.device)
 63 |         if privileged_obs_shape[0] is not None:
 64 |             self.privileged_observations = torch.zeros(num_transitions_per_env, num_envs, *privileged_obs_shape, device=self.device)
 65 |         else:
 66 |             self.privileged_observations = None
 67 |         self.rewards = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
 68 |         self.actions = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device)
 69 |         self.dones = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device).byte()
 70 | 
 71 |         # For PPO
 72 |         self.actions_log_prob = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
 73 |         self.values = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
 74 |         self.returns = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
 75 |         self.advantages = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
 76 |         self.mu = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device)
 77 |         self.sigma = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device)
 78 | 
 79 |         self.num_transitions_per_env = num_transitions_per_env
 80 |         self.num_envs = num_envs
 81 | 
 82 |         # rnn
 83 |         self.saved_hidden_states_a = None
 84 |         self.saved_hidden_states_c = None
 85 | 
 86 |         self.step = 0
 87 | 
 88 |     def add_transitions(self, transition: Transition):
 89 |         if self.step >= self.num_transitions_per_env:
 90 |             raise AssertionError("Rollout buffer overflow")
 91 |         self.observations[self.step].copy_(transition.observations)
 92 |         if self.privileged_observations is not None: self.privileged_observations[self.step].copy_(transition.critic_observations)
 93 |         self.actions[self.step].copy_(transition.actions)
 94 |         self.rewards[self.step].copy_(transition.rewards.view(-1, 1))
 95 |         self.dones[self.step].copy_(transition.dones.view(-1, 1))
 96 |         self.values[self.step].copy_(transition.values)
 97 |         self.actions_log_prob[self.step].copy_(transition.actions_log_prob.view(-1, 1))
 98 |         self.mu[self.step].copy_(transition.action_mean)
 99 |         self.sigma[self.step].copy_(transition.action_sigma)
100 |         self._save_hidden_states(transition.hidden_states)
101 |         self.step += 1
102 | 
103 |     def _save_hidden_states(self, hidden_states):
104 |         if hidden_states is None or hidden_states==(None, None):
105 |             return
106 |         # make a tuple out of GRU hidden state sto match the LSTM format
107 |         hid_a = hidden_states[0] if isinstance(hidden_states[0], tuple) else (hidden_states[0],)
108 |         hid_c = hidden_states[1] if isinstance(hidden_states[1], tuple) else (hidden_states[1],)
109 | 
110 |         # initialize if needed 
111 |         if self.saved_hidden_states_a is None:
112 |             self.saved_hidden_states_a = [torch.zeros(self.observations.shape[0], *hid_a[i].shape, device=self.device) for i in range(len(hid_a))]
113 |             self.saved_hidden_states_c = [torch.zeros(self.observations.shape[0], *hid_c[i].shape, device=self.device) for i in range(len(hid_c))]
114 |         # copy the states
115 |         for i in range(len(hid_a)):
116 |             self.saved_hidden_states_a[i][self.step].copy_(hid_a[i])
117 |             self.saved_hidden_states_c[i][self.step].copy_(hid_c[i])
118 | 
119 | 
120 |     def clear(self):
121 |         self.step = 0
122 | 
123 |     def compute_returns(self, last_values, gamma, lam):
124 |         advantage = 0
125 |         for step in reversed(range(self.num_transitions_per_env)):
126 |             if step == self.num_transitions_per_env - 1:
127 |                 next_values = last_values
128 |             else:
129 |                 next_values = self.values[step + 1]
130 |             next_is_not_terminal = 1.0 - self.dones[step].float()
131 |             delta = self.rewards[step] + next_is_not_terminal * gamma * next_values - self.values[step]
132 |             advantage = delta + next_is_not_terminal * gamma * lam * advantage
133 |             self.returns[step] = advantage + self.values[step]
134 | 
135 |         # Compute and normalize the advantages
136 |         self.advantages = self.returns - self.values
137 |         self.advantages = (self.advantages - self.advantages.mean()) / (self.advantages.std() + 1e-8)
138 | 
139 |     def get_statistics(self):
140 |         done = self.dones
141 |         done[-1] = 1
142 |         flat_dones = done.permute(1, 0, 2).reshape(-1, 1)
143 |         done_indices = torch.cat((flat_dones.new_tensor([-1], dtype=torch.int64), flat_dones.nonzero(as_tuple=False)[:, 0]))
144 |         trajectory_lengths = (done_indices[1:] - done_indices[:-1])
145 |         return trajectory_lengths.float().mean(), self.rewards.mean()
146 | 
147 |     def mini_batch_generator(self, num_mini_batches, num_epochs=8):
148 |         batch_size = self.num_envs * self.num_transitions_per_env
149 |         mini_batch_size = batch_size // num_mini_batches
150 |         indices = torch.randperm(num_mini_batches*mini_batch_size, requires_grad=False, device=self.device)
151 | 
152 |         observations = self.observations.flatten(0, 1)
153 |         if self.privileged_observations is not None:
154 |             critic_observations = self.privileged_observations.flatten(0, 1)
155 |         else:
156 |             critic_observations = observations
157 | 
158 |         actions = self.actions.flatten(0, 1)
159 |         values = self.values.flatten(0, 1)
160 |         returns = self.returns.flatten(0, 1)
161 |         old_actions_log_prob = self.actions_log_prob.flatten(0, 1)
162 |         advantages = self.advantages.flatten(0, 1)
163 |         old_mu = self.mu.flatten(0, 1)
164 |         old_sigma = self.sigma.flatten(0, 1)
165 | 
166 |         for epoch in range(num_epochs):
167 |             for i in range(num_mini_batches):
168 | 
169 |                 start = i*mini_batch_size
170 |                 end = (i+1)*mini_batch_size
171 |                 batch_idx = indices[start:end]
172 | 
173 |                 obs_batch = observations[batch_idx]
174 |                 critic_observations_batch = critic_observations[batch_idx]
175 |                 actions_batch = actions[batch_idx]
176 |                 target_values_batch = values[batch_idx]
177 |                 returns_batch = returns[batch_idx]
178 |                 old_actions_log_prob_batch = old_actions_log_prob[batch_idx]
179 |                 advantages_batch = advantages[batch_idx]
180 |                 old_mu_batch = old_mu[batch_idx]
181 |                 old_sigma_batch = old_sigma[batch_idx]
182 |                 yield obs_batch, critic_observations_batch, actions_batch, target_values_batch, advantages_batch, returns_batch, \
183 |                        old_actions_log_prob_batch, old_mu_batch, old_sigma_batch, (None, None), None
184 | 
185 |     # for RNNs only
186 |     def reccurent_mini_batch_generator(self, num_mini_batches, num_epochs=8):
187 | 
188 |         padded_obs_trajectories, trajectory_masks = split_and_pad_trajectories(self.observations, self.dones)
189 |         if self.privileged_observations is not None: 
190 |             padded_critic_obs_trajectories, _ = split_and_pad_trajectories(self.privileged_observations, self.dones)
191 |         else: 
192 |             padded_critic_obs_trajectories = padded_obs_trajectories
193 | 
194 |         mini_batch_size = self.num_envs // num_mini_batches
195 |         for ep in range(num_epochs):
196 |             first_traj = 0
197 |             for i in range(num_mini_batches):
198 |                 start = i*mini_batch_size
199 |                 stop = (i+1)*mini_batch_size
200 | 
201 |                 dones = self.dones.squeeze(-1)
202 |                 last_was_done = torch.zeros_like(dones, dtype=torch.bool)
203 |                 last_was_done[1:] = dones[:-1]
204 |                 last_was_done[0] = True
205 |                 trajectories_batch_size = torch.sum(last_was_done[:, start:stop])
206 |                 last_traj = first_traj + trajectories_batch_size
207 |                 
208 |                 masks_batch = trajectory_masks[:, first_traj:last_traj]
209 |                 obs_batch = padded_obs_trajectories[:, first_traj:last_traj]
210 |                 critic_obs_batch = padded_critic_obs_trajectories[:, first_traj:last_traj]
211 | 
212 |                 actions_batch = self.actions[:, start:stop]
213 |                 old_mu_batch = self.mu[:, start:stop]
214 |                 old_sigma_batch = self.sigma[:, start:stop]
215 |                 returns_batch = self.returns[:, start:stop]
216 |                 advantages_batch = self.advantages[:, start:stop]
217 |                 values_batch = self.values[:, start:stop]
218 |                 old_actions_log_prob_batch = self.actions_log_prob[:, start:stop]
219 | 
220 |                 # reshape to [num_envs, time, num layers, hidden dim] (original shape: [time, num_layers, num_envs, hidden_dim])
221 |                 # then take only time steps after dones (flattens num envs and time dimensions),
222 |                 # take a batch of trajectories and finally reshape back to [num_layers, batch, hidden_dim]
223 |                 last_was_done = last_was_done.permute(1, 0)
224 |                 hid_a_batch = [ saved_hidden_states.permute(2, 0, 1, 3)[last_was_done][first_traj:last_traj].transpose(1, 0).contiguous()
225 |                                 for saved_hidden_states in self.saved_hidden_states_a ] 
226 |                 hid_c_batch = [ saved_hidden_states.permute(2, 0, 1, 3)[last_was_done][first_traj:last_traj].transpose(1, 0).contiguous()
227 |                                 for saved_hidden_states in self.saved_hidden_states_c ]
228 |                 # remove the tuple for GRU
229 |                 hid_a_batch = hid_a_batch[0] if len(hid_a_batch)==1 else hid_a_batch
230 |                 hid_c_batch = hid_c_batch[0] if len(hid_c_batch)==1 else hid_a_batch
231 | 
232 |                 yield obs_batch, critic_obs_batch, actions_batch, values_batch, advantages_batch, returns_batch, \
233 |                        old_actions_log_prob_batch, old_mu_batch, old_sigma_batch, (hid_a_batch, hid_c_batch), masks_batch
234 |                 
235 |                 first_traj = last_traj
236 | 


--------------------------------------------------------------------------------
/rsl_rl/rsl_rl/runners/on_policy_runner.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: BSD-3-Clause
  3 | # 
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are met:
  6 | #
  7 | # 1. Redistributions of source code must retain the above copyright notice, this
  8 | # list of conditions and the following disclaimer.
  9 | #
 10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 11 | # this list of conditions and the following disclaimer in the documentation
 12 | # and/or other materials provided with the distribution.
 13 | #
 14 | # 3. Neither the name of the copyright holder nor the names of its
 15 | # contributors may be used to endorse or promote products derived from
 16 | # this software without specific prior written permission.
 17 | #
 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | #
 29 | # Copyright (c) 2021 ETH Zurich, Nikita Rudin
 30 | 
 31 | import time
 32 | import os
 33 | from collections import deque
 34 | import statistics
 35 | 
 36 | import torch
 37 | import wandb
 38 | import numpy as np
 39 | 
 40 | from rsl_rl.algorithms import PPO
 41 | from rsl_rl.modules import ActorCritic, ActorCriticRecurrent
 42 | from rsl_rl.env import VecEnv
 43 | 
 44 | 
 45 | class OnPolicyRunner:
 46 | 
 47 |     def __init__(self,
 48 |                  env: VecEnv,
 49 |                  train_cfg,
 50 |                  log_dir=None,
 51 |                  device='cpu'):
 52 | 
 53 |         self.cfg=train_cfg["runner"]
 54 |         self.alg_cfg = train_cfg["algorithm"]
 55 |         self.policy_cfg = train_cfg["policy"]
 56 |         self.device = device
 57 |         self.env = env
 58 |         if self.env.num_privileged_obs is not None:
 59 |             num_critic_obs = self.env.num_privileged_obs 
 60 |         else:
 61 |             num_critic_obs = self.env.num_obs
 62 |         actor_critic_class = eval(self.cfg["policy_class_name"]) # ActorCritic
 63 |         actor_critic: ActorCritic = actor_critic_class( self.env.num_obs,
 64 |                                                         num_critic_obs,
 65 |                                                         self.env.num_actions,
 66 |                                                         **self.policy_cfg).to(self.device)
 67 |         alg_class = eval(self.cfg["algorithm_class_name"]) # PPO
 68 |         self.alg: PPO = alg_class(actor_critic, device=self.device, **self.alg_cfg)
 69 |         self.num_steps_per_env = self.cfg["num_steps_per_env"]
 70 |         if "log_interval" in self.cfg.keys():
 71 |             self.log_interval = self.cfg["log_interval"]
 72 |         else:
 73 |             self.log_interval = -1
 74 |         self.save_interval = self.cfg["save_interval"]
 75 |         if "record_interval" in self.cfg.keys():
 76 |             self.record_interval = self.cfg["record_interval"]
 77 |         else:
 78 |             self.record_interval = -1
 79 |         if self.record_interval > 0:
 80 |             self.record_video = True
 81 |         else:
 82 |             self.record_video = False
 83 | 
 84 |         # init storage and model
 85 |         self.alg.init_storage(self.env.num_envs, self.num_steps_per_env, [self.env.num_obs], [self.env.num_privileged_obs], [self.env.num_actions])
 86 | 
 87 |         # Log
 88 |         self.log_dir = log_dir
 89 |         self.tot_timesteps = 0
 90 |         self.tot_time = 0
 91 |         self.current_learning_iteration = 0
 92 | 
 93 |         _, _ = self.env.reset()
 94 |     
 95 |     def learn(self, num_learning_iterations, init_at_random_ep_len=False):
 96 |         if init_at_random_ep_len:
 97 |             self.env.episode_length_buf = torch.randint_like(self.env.episode_length_buf, high=int(self.env.max_episode_length))
 98 |         obs = self.env.get_observations()
 99 |         privileged_obs = self.env.get_privileged_observations()
100 |         critic_obs = privileged_obs if privileged_obs is not None else obs
101 |         obs, critic_obs = obs.to(self.device), critic_obs.to(self.device)
102 |         self.alg.actor_critic.train() # switch to train mode (for dropout for example)
103 | 
104 |         ep_infos = []
105 |         rewbuffer = deque(maxlen=100)
106 |         lenbuffer = deque(maxlen=100)
107 |         cur_reward_sum = torch.zeros(self.env.num_envs, dtype=torch.float, device=self.device)
108 |         cur_episode_length = torch.zeros(self.env.num_envs, dtype=torch.float, device=self.device)
109 | 
110 |         tot_iter = self.current_learning_iteration + num_learning_iterations
111 |         for it in range(self.current_learning_iteration, tot_iter):
112 |             start = time.time()
113 |             # Rollout
114 |             with torch.inference_mode():
115 |                 for i in range(self.num_steps_per_env):
116 |                     actions = self.alg.act(obs, critic_obs)
117 |                     obs, privileged_obs, rewards, dones, infos = self.env.step(actions)
118 |                     critic_obs = privileged_obs if privileged_obs is not None else obs
119 |                     obs, critic_obs, rewards, dones = obs.to(self.device), critic_obs.to(self.device), rewards.to(self.device), dones.to(self.device)
120 |                     self.alg.process_env_step(rewards, dones, infos)
121 |                     
122 |                     if self.log_dir is not None:
123 |                         # Book keeping
124 |                         if 'episode' in infos:
125 |                             ep_infos.append(infos['episode'])
126 |                         cur_reward_sum += rewards
127 |                         cur_episode_length += 1
128 |                         new_ids = (dones > 0).nonzero(as_tuple=False)
129 |                         rewbuffer.extend(cur_reward_sum[new_ids][:, 0].cpu().numpy().tolist())
130 |                         lenbuffer.extend(cur_episode_length[new_ids][:, 0].cpu().numpy().tolist())
131 |                         cur_reward_sum[new_ids] = 0
132 |                         cur_episode_length[new_ids] = 0
133 | 
134 |                 if len(lenbuffer) > 0:
135 |                     mean_episode_length = statistics.mean(lenbuffer)
136 |                 else:
137 |                     mean_episode_length = 0
138 |                 
139 |                 stop = time.time()
140 |                 collection_time = stop - start
141 | 
142 |                 # Learning step
143 |                 start = stop
144 |                 self.alg.compute_returns(critic_obs)
145 |             
146 |             mean_value_loss, mean_surrogate_loss = self.alg.update()
147 |             stop = time.time()
148 |             learn_time = stop - start
149 | 
150 |             self.tot_timesteps += self.num_steps_per_env * self.env.num_envs
151 |             self.tot_time += collection_time + learn_time
152 | 
153 |             if self.log_dir is not None and it % self.log_interval == 0:
154 |                 self.log(locals())
155 |             if self.record_video:
156 |                 self.log_video(it)
157 |             if it < 2500:
158 |                 if it % self.save_interval == 0:
159 |                     self.save(os.path.join(self.log_dir, 'model_{}.pt'.format(it)))
160 |                 if it % self.record_interval == 0 and self.record_interval > 0:
161 |                     self.start_recording()
162 |             elif it < 5000:
163 |                 if it % (2*self.save_interval) == 0:
164 |                     self.save(os.path.join(self.log_dir, 'model_{}.pt'.format(it)))
165 |                 if it % (2*self.record_interval) == 0 and self.record_interval > 0:
166 |                     self.start_recording()
167 |             else:
168 |                 if it % (5*self.save_interval) == 0:
169 |                     self.save(os.path.join(self.log_dir, 'model_{}.pt'.format(it)))
170 |                 if it % (3*self.record_interval) == 0 and self.record_interval > 0:
171 |                     self.start_recording()
172 |             ep_infos.clear()
173 |         
174 |         self.current_learning_iteration += num_learning_iterations
175 |         self.save(os.path.join(self.log_dir, 'model_{}.pt'.format(self.current_learning_iteration)))
176 | 
177 |     def log(self, locs, width=80, pad=35):
178 |         iteration_time = locs['collection_time'] + locs['learn_time']
179 | 
180 |         ep_string = f''
181 |         wandb_dict = {}
182 | 
183 |         if len(locs['rewbuffer']) > 0:
184 |             mean_episode_length = statistics.mean(locs['lenbuffer'])
185 |         else:
186 |             mean_episode_length = 1
187 | 
188 |         if locs['ep_infos']:
189 |             for key in locs['ep_infos'][0]:
190 |                 infotensor = torch.tensor([], device=self.device)
191 |                 for ep_info in locs['ep_infos']:
192 |                     # handle scalar and zero dimensional tensor infos
193 |                     if not isinstance(ep_info[key], torch.Tensor):
194 |                         ep_info[key] = torch.Tensor([ep_info[key]])
195 |                     if len(ep_info[key].shape) == 0:
196 |                         ep_info[key] = ep_info[key].unsqueeze(0)
197 |                     infotensor = torch.cat((infotensor, ep_info[key].to(self.device)))
198 |                 value = torch.mean(infotensor)
199 |                 if "tracking" in key:
200 |                     wandb_dict['Tracking/' + key] = value
201 |                 elif key[:4] == "rew_":
202 |                     wandb_dict['Reward/' + key[4:]] = value
203 |                 elif "command" in key:
204 |                     continue
205 |                     wandb_dict['Command/' + key] = value
206 |                 else:
207 |                     wandb_dict['Episode/' + key] = value
208 |                 # wandb_dict['Step/' + key] = value / mean_episode_length
209 |                 # ep_string += f"""{f'Mean episode {key}:':>{pad}} {value:.4f}\n"""
210 |         mean_std = self.alg.actor_critic.std.mean()
211 |         fps = int(self.num_steps_per_env * self.env.num_envs / (locs['collection_time'] + locs['learn_time']))
212 | 
213 |         wandb_dict['Loss/value_func'] = locs['mean_value_loss']
214 |         wandb_dict['Loss/surrogate'] = locs['mean_surrogate_loss']
215 |         # wandb_dict['Loss/entropy_coef'] = locs['entropy_coef']
216 |         wandb_dict['Loss/learning_rate'] = self.alg.learning_rate
217 |         wandb_dict['Loss/kl_mean'] = self.alg.kl_mean
218 |         wandb_dict['Loss/mean_noise_std'] = mean_std.item()
219 |         
220 |         wandb_dict['Perf/total_fps'] = fps
221 |         wandb_dict['Perf/collection time'] = locs['collection_time']
222 |         wandb_dict['Perf/learning_time'] = locs['learn_time']
223 | 
224 |         if len(locs['rewbuffer']) > 0:
225 |             wandb_dict['Train/mean_reward'] = statistics.mean(locs['rewbuffer'])
226 |             wandb_dict['Train/mean_episode_length'] = statistics.mean(locs['lenbuffer'])
227 | 
228 |         wandb.log(wandb_dict, step=locs['it'])
229 | 
230 |         str = f" \033[1m Learning iteration {locs['it']}/{self.current_learning_iteration + locs['num_learning_iterations']} \033[0m "
231 | 
232 |         if len(locs['rewbuffer']) > 0:
233 |             log_string = (f"""{'#' * width}\n"""
234 |                           f"""{str.center(width, ' ')}\n\n"""
235 |                           f"""{'Computation:':>{pad}} {fps:.0f} steps/s (collection: {locs[
236 |                             'collection_time']:.3f}s, learning {locs['learn_time']:.3f}s)\n"""
237 |                           f"""{'Value function loss:':>{pad}} {locs['mean_value_loss']:.4f}\n"""
238 |                           f"""{'Surrogate loss:':>{pad}} {locs['mean_surrogate_loss']:.4f}\n"""
239 |                           f"""{'Mean action noise std:':>{pad}} {mean_std.item():.2f}\n"""
240 |                           f"""{'Mean reward:':>{pad}} {statistics.mean(locs['rewbuffer']):.2f}\n"""
241 |                           f"""{'Mean episode length:':>{pad}} {statistics.mean(locs['lenbuffer']):.2f}\n""")
242 |                         #   f"""{'Mean reward/step:':>{pad}} {locs['mean_reward']:.2f}\n"""
243 |                         #   f"""{'Mean episode length/episode:':>{pad}} {locs['mean_trajectory_length']:.2f}\n""")
244 |         else:
245 |             log_string = (f"""{'#' * width}\n"""
246 |                           f"""{str.center(width, ' ')}\n\n"""
247 |                           f"""{'Computation:':>{pad}} {fps:.0f} steps/s (collection: {locs[
248 |                             'collection_time']:.3f}s, learning {locs['learn_time']:.3f}s)\n"""
249 |                           f"""{'Value function loss:':>{pad}} {locs['mean_value_loss']:.4f}\n"""
250 |                           f"""{'Surrogate loss:':>{pad}} {locs['mean_surrogate_loss']:.4f}\n"""
251 |                           f"""{'Mean action noise std:':>{pad}} {mean_std.item():.2f}\n""")
252 |                         #   f"""{'Mean reward/step:':>{pad}} {locs['mean_reward']:.2f}\n"""
253 |                         #   f"""{'Mean episode length/episode:':>{pad}} {locs['mean_trajectory_length']:.2f}\n""")
254 | 
255 |         log_string += ep_string
256 |         log_string += (f"""{'-' * width}\n"""
257 |                        f"""{'Total timesteps:':>{pad}} {self.tot_timesteps}\n"""
258 |                        f"""{'Iteration time:':>{pad}} {iteration_time:.2f}s\n"""
259 |                        f"""{'Total time:':>{pad}} {self.tot_time:.2f}s\n"""
260 |                        f"""{'ETA:':>{pad}} {self.tot_time / (locs['it'] + 1) * (
261 |                                locs['num_learning_iterations'] - locs['it']):.1f}s\n""")
262 |         print(log_string)
263 | 
264 |     def save(self, path, infos=None):
265 |         torch.save({
266 |             'model_state_dict': self.alg.actor_critic.state_dict(),
267 |             'optimizer_state_dict': self.alg.optimizer.state_dict(),
268 |             'iter': self.current_learning_iteration,
269 |             'infos': infos,
270 |             }, path)
271 | 
272 |     def load(self, path, load_optimizer=True):
273 |         loaded_dict = torch.load(path)
274 |         self.alg.actor_critic.load_state_dict(loaded_dict['model_state_dict'])
275 |         if load_optimizer:
276 |             self.alg.optimizer.load_state_dict(loaded_dict['optimizer_state_dict'])
277 |         self.current_learning_iteration = loaded_dict['iter']
278 |         return loaded_dict['infos']
279 | 
280 |     def get_inference_policy(self, device=None):
281 |         self.alg.actor_critic.eval() # switch to evaluation mode (dropout for example)
282 |         if device is not None:
283 |             self.alg.actor_critic.to(device)
284 |         return self.alg.actor_critic.act_inference
285 | 
286 |     def start_recording(self):
287 |         self.env.start_recording()
288 | 
289 |     def log_video(self, it):
290 |         frames = self.env.get_recorded_frames()
291 |         if frames is None:
292 |             return
293 |         else:
294 |             video_array = np.concatenate([np.expand_dims(frame, axis=0) for frame in frames ], axis=0).swapaxes(1, 3).swapaxes(2, 3)
295 |             wandb.log({"video": wandb.Video(video_array, fps=int(1/self.env.dt))}, step=it)


--------------------------------------------------------------------------------
/locomotion_env.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | 
  4 | import genesis as gs
  5 | from genesis.engine.solvers.rigid.rigid_solver_decomp import RigidSolver
  6 | from utils import *
  7 | 
  8 | class LocoEnv:
  9 |     def __init__(
 10 |         self,
 11 |         num_envs,
 12 |         env_cfg,
 13 |         obs_cfg,
 14 |         reward_cfg,
 15 |         command_cfg,
 16 |         show_viewer,
 17 |         eval,
 18 |         debug,
 19 |         device='cuda',
 20 |     ) -> None:
 21 |         self.num_envs = 1 if num_envs == 0 else num_envs
 22 |         self.num_build_envs = num_envs
 23 |         self.num_single_obs = obs_cfg['num_obs']
 24 |         self.num_obs = self.num_single_obs * obs_cfg['num_history_obs']
 25 |         self.num_privileged_obs = obs_cfg['num_priv_obs']
 26 |         self.num_actions = env_cfg['num_actions']
 27 |         self.num_commands = command_cfg['num_commands']
 28 | 
 29 |         self.headless = not show_viewer
 30 |         self.eval = eval
 31 |         self.debug = debug
 32 | 
 33 |         self.dt = 1 / env_cfg['control_freq']
 34 |         if env_cfg['use_implicit_controller']:
 35 |             sim_dt = self.dt
 36 |             sim_substeps = env_cfg['decimation']
 37 |         else:
 38 |             sim_dt = self.dt / env_cfg['decimation']
 39 |             sim_substeps = 1
 40 |         self.max_episode_length_s = env_cfg['episode_length_s']
 41 |         self.max_episode_length = np.ceil(self.max_episode_length_s / self.dt)
 42 | 
 43 |         self.obs_cfg = obs_cfg
 44 |         self.obs_scales = obs_cfg['obs_scales']
 45 |         self.reward_cfg = reward_cfg
 46 |         self.reward_scales = reward_cfg['reward_scales']
 47 |         self.env_cfg = env_cfg
 48 |         self.command_cfg = command_cfg
 49 | 
 50 |         self.command_type = env_cfg['command_type']
 51 |         assert self.command_type in ['heading', 'ang_vel_yaw']
 52 | 
 53 |         self.action_latency = env_cfg['action_latency']
 54 |         assert self.action_latency in [0, 0.02]
 55 | 
 56 |         self.num_dof = env_cfg['num_dofs']
 57 |         if not torch.cuda.is_available():
 58 |             self.device = torch.device('cpu')
 59 |         else:
 60 |             assert device in ['cpu', 'cuda']
 61 |             self.device = torch.device(device)
 62 | 
 63 |         # create scene
 64 |         self.scene = gs.Scene(
 65 |             sim_options=gs.options.SimOptions(
 66 |                 dt=sim_dt,
 67 |                 substeps=sim_substeps,
 68 |             ),
 69 |             viewer_options=gs.options.ViewerOptions(
 70 |                 max_FPS=int(1 / self.dt * self.env_cfg['decimation']),
 71 |                 camera_pos=(2.0, 0.0, 2.5),
 72 |                 camera_lookat=(0.0, 0.0, 0.5),
 73 |                 camera_fov=40,
 74 |             ),
 75 |             vis_options=gs.options.VisOptions(
 76 |                 n_rendered_envs=1,
 77 |             ),
 78 |             rigid_options=gs.options.RigidOptions(
 79 |                 dt=sim_dt,
 80 |                 constraint_solver=gs.constraint_solver.Newton,
 81 |                 enable_collision=True,
 82 |                 enable_self_collision=True,
 83 |                 enable_joint_limit=True,
 84 |             ),
 85 |             show_viewer=show_viewer,
 86 |             show_FPS=False,
 87 |         )
 88 | 
 89 |         for solver in self.scene.sim.solvers:
 90 |             if not isinstance(solver, RigidSolver):
 91 |                 continue
 92 |             self.rigid_solver = solver
 93 | 
 94 |         # add entities
 95 |         if self.env_cfg['use_terrain']:
 96 |             self.terrain_cfg = self.env_cfg['terrain_cfg']
 97 |             self.terrain = self.scene.add_entity(
 98 |                 gs.morphs.Terrain(
 99 |                     n_subterrains=self.terrain_cfg['n_subterrains'],
100 |                     horizontal_scale=self.terrain_cfg['horizontal_scale'],
101 |                     vertical_scale=self.terrain_cfg['vertical_scale'],
102 |                     subterrain_size=self.terrain_cfg['subterrain_size'],
103 |                     subterrain_types=self.terrain_cfg['subterrain_types'],
104 |                 ),
105 |             )
106 |             terrain_margin_x = self.terrain_cfg['n_subterrains'][0] * self.terrain_cfg['subterrain_size'][0]
107 |             terrain_margin_y = self.terrain_cfg['n_subterrains'][1] * self.terrain_cfg['subterrain_size'][1]
108 |             self.terrain_margin = torch.tensor(
109 |                 [terrain_margin_x, terrain_margin_y], device=self.device, dtype=gs.tc_float
110 |             )
111 |             height_field = self.terrain.geoms[0].metadata["height_field"]
112 |             self.height_field = torch.tensor(
113 |                 height_field, device=self.device, dtype=gs.tc_float
114 |             ) * self.terrain_cfg['vertical_scale']
115 |         else:
116 |             self.scene.add_entity(
117 |             gs.morphs.URDF(file='urdf/plane/plane.urdf', fixed=True),
118 |         )
119 |         self.base_init_pos = torch.tensor(
120 |             self.env_cfg['base_init_pos'], device=self.device
121 |         )
122 |         self.base_init_quat = torch.tensor(
123 |             self.env_cfg['base_init_quat'], device=self.device
124 |         )
125 | 
126 |         self.robot = self.scene.add_entity(
127 |             gs.morphs.URDF(
128 |                 file=self.env_cfg['urdf_path'],
129 |                 merge_fixed_links=True,
130 |                 links_to_keep=self.env_cfg['links_to_keep'],
131 |                 pos=self.base_init_pos.cpu().numpy(),
132 |                 quat=self.base_init_quat.cpu().numpy(),
133 |             ),
134 |             visualize_contact=self.debug,
135 |         )
136 | 
137 |         if gs.platform != 'macOS':
138 |             self._set_camera()
139 | 
140 |         # build
141 |         self.scene.build(n_envs=num_envs)
142 | 
143 |         self._init_buffers()
144 |         self._prepare_reward_function()
145 | 
146 |         # domain randomization
147 |         self._randomize_controls()
148 |         self._randomize_rigids()
149 | 
150 |     def _prepare_reward_function(self):
151 |         # remove zero scales + multiply non-zero ones by dt
152 |         for key in list(self.reward_scales.keys()):
153 |             scale = self.reward_scales[key]
154 |             if scale == 0:
155 |                 self.reward_scales.pop(key)
156 |             else:
157 |                 self.reward_scales[key] *= self.dt
158 | 
159 |         # prepare list of functions
160 |         self.reward_functions = []
161 |         self.reward_names = []
162 |         for name, scale in self.reward_scales.items():
163 |             if name == 'termination':
164 |                 continue
165 |             self.reward_names.append(name)
166 |             name = '_reward_' + name
167 |             self.reward_functions.append(getattr(self, name))
168 | 
169 |         # reward episode sums
170 |         self.episode_sums = {
171 |             name: torch.zeros((self.num_envs,), device=self.device, dtype=gs.tc_float)
172 |             for name in self.reward_scales.keys()
173 |         }
174 | 
175 |     def _init_buffers(self):
176 |         self.base_euler = torch.zeros(
177 |             (self.num_envs, 3), device=self.device, dtype=gs.tc_float
178 |         )
179 |         self.base_lin_vel = torch.zeros(
180 |             (self.num_envs, 3), device=self.device, dtype=gs.tc_float
181 |         )
182 |         self.base_ang_vel = torch.zeros(
183 |             (self.num_envs, 3), device=self.device, dtype=gs.tc_float
184 |         )
185 |         self.projected_gravity = torch.zeros(
186 |             (self.num_envs, 3), device=self.device, dtype=gs.tc_float
187 |         )
188 |         self.global_gravity = torch.tensor(
189 |             np.array([0.0, 0.0, -1.0]), device=self.device, dtype=gs.tc_float
190 |         )
191 |         self.forward_vec = torch.zeros(
192 |             (self.num_envs, 3), device=self.device, dtype=gs.tc_float
193 |         )
194 |         self.forward_vec[:, 0] = 1.0
195 | 
196 |         self.obs_buf = torch.zeros(
197 |             (self.num_envs, self.num_single_obs), device=self.device, dtype=gs.tc_float
198 |         )
199 |         self.obs_history_buf = torch.zeros(
200 |             (self.num_envs, self.num_obs), device=self.device, dtype=gs.tc_float
201 |         )
202 |         self.obs_noise = torch.zeros(
203 |             (self.num_envs, self.num_single_obs), device=self.device, dtype=gs.tc_float
204 |         )
205 |         self._prepare_obs_noise()
206 |         self.privileged_obs_buf = (
207 |             None
208 |             if self.num_privileged_obs is None
209 |             else torch.zeros(
210 |                 (self.num_envs, self.num_privileged_obs),
211 |                 device=self.device,
212 |                 dtype=gs.tc_float,
213 |             )
214 |         )
215 |         self.rew_buf = torch.zeros(
216 |             (self.num_envs,), device=self.device, dtype=gs.tc_float
217 |         )
218 |         self.rew_buf_pos = torch.zeros(
219 |             (self.num_envs,), device=self.device, dtype=gs.tc_float
220 |         )
221 |         self.rew_buf_neg = torch.zeros(
222 |             (self.num_envs,), device=self.device, dtype=gs.tc_float
223 |         )
224 |         self.reset_buf = torch.ones(
225 |             (self.num_envs,), device=self.device, dtype=gs.tc_int
226 |         )
227 |         self.episode_length_buf = torch.zeros(
228 |             (self.num_envs,), device=self.device, dtype=gs.tc_int
229 |         )
230 |         self.time_out_buf = torch.zeros(
231 |             (self.num_envs,), device=self.device, dtype=gs.tc_int
232 |         )
233 | 
234 |         # commands
235 |         self.commands = torch.zeros(
236 |             (self.num_envs, self.num_commands), device=self.device, dtype=gs.tc_float
237 |         )
238 |         self.commands_scale = torch.tensor(
239 |             [
240 |                 self.obs_scales['lin_vel'],
241 |                 self.obs_scales['lin_vel'],
242 |                 self.obs_scales['ang_vel'],
243 |             ],
244 |             device=self.device,
245 |             dtype=gs.tc_float,
246 |         )
247 |         self.stand_still = torch.zeros(
248 |             (self.num_envs,), device=self.device, dtype=gs.tc_int
249 |         )
250 | 
251 |         # names to indices
252 |         self.motor_dofs = [
253 |             self.robot.get_joint(name).dof_idx_local
254 |             for name in self.env_cfg['dof_names']
255 |         ]
256 | 
257 |         def find_link_indices(names):
258 |             link_indices = list()
259 |             for link in self.robot.links:
260 |                 flag = False
261 |                 for name in names:
262 |                     if name in link.name:
263 |                         flag = True
264 |                 if flag:
265 |                     link_indices.append(link.idx - self.robot.link_start)
266 |             return link_indices
267 | 
268 |         self.termination_contact_link_indices = find_link_indices(
269 |             self.env_cfg['termination_contact_link_names']
270 |         )
271 |         self.penalized_contact_link_indices = find_link_indices(
272 |             self.env_cfg['penalized_contact_link_names']
273 |         )
274 |         self.feet_link_indices = find_link_indices(
275 |             self.env_cfg['feet_link_names']
276 |         )
277 |         assert len(self.termination_contact_link_indices) > 0
278 |         assert len(self.penalized_contact_link_indices) > 0
279 |         assert len(self.feet_link_indices) > 0
280 |         self.feet_link_indices_world_frame = [i+1 for i in self.feet_link_indices]
281 | 
282 |         # actions
283 |         self.actions = torch.zeros(
284 |             (self.num_envs, self.num_dof), device=self.device, dtype=gs.tc_float
285 |         )
286 |         self.last_actions = torch.zeros(
287 |             (self.num_envs, self.num_dof), device=self.device, dtype=gs.tc_float
288 |         )
289 |         self.last_last_actions = torch.zeros(
290 |             (self.num_envs, self.num_dof), device=self.device, dtype=gs.tc_float
291 |         )
292 |         self.dof_pos = torch.zeros(
293 |             (self.num_envs, self.num_dof), device=self.device, dtype=gs.tc_float
294 |         )
295 |         self.dof_vel = torch.zeros(
296 |             (self.num_envs, self.num_dof), device=self.device, dtype=gs.tc_float
297 |         )
298 |         self.last_dof_vel = torch.zeros(
299 |             (self.num_envs, self.num_dof), device=self.device, dtype=gs.tc_float
300 |         )
301 |         self.root_vel = torch.zeros(
302 |             (self.num_envs, 3), device=self.device, dtype=gs.tc_float
303 |         )
304 |         self.last_root_vel = torch.zeros(
305 |             (self.num_envs, 3), device=self.device, dtype=gs.tc_float
306 |         )
307 |         self.base_pos = torch.zeros(
308 |             (self.num_envs, 3), device=self.device, dtype=gs.tc_float
309 |         )
310 |         self.base_quat = torch.zeros(
311 |             (self.num_envs, 4), device=self.device, dtype=gs.tc_float
312 |         )
313 |         self.link_contact_forces = torch.zeros(
314 |             (self.num_envs, self.robot.n_links, 3), device=self.device, dtype=gs.tc_float
315 |         )
316 | 
317 |         self.feet_air_time = torch.zeros(
318 |             (self.num_envs, len(self.feet_link_indices)),
319 |             device=self.device,
320 |             dtype=gs.tc_float,
321 |         )
322 |         self.feet_max_height = torch.zeros(
323 |             (self.num_envs, len(self.feet_link_indices)),
324 |             device=self.device,
325 |             dtype=gs.tc_float,
326 |         )
327 | 
328 |         self.last_contacts = torch.zeros(
329 |             (self.num_envs, len(self.feet_link_indices)),
330 |             device=self.device,
331 |             dtype=gs.tc_int,
332 |         )
333 | 
334 |         # extras
335 |         self.continuous_push = torch.zeros(
336 |             (self.num_envs, 3), device=self.device, dtype=gs.tc_float
337 |         )
338 |         self.env_identities = torch.arange(
339 |             self.num_envs,
340 |             device=self.device,
341 |             dtype=gs.tc_int, 
342 |         )
343 |         self.common_step_counter = 0
344 |         self.extras = {}
345 | 
346 |         self.terrain_heights = torch.zeros(
347 |             (self.num_envs,),
348 |             device=self.device,
349 |             dtype=gs.tc_float,
350 |         )
351 | 
352 |         # PD control
353 |         stiffness = self.env_cfg['PD_stiffness']
354 |         damping = self.env_cfg['PD_damping']
355 | 
356 |         self.p_gains, self.d_gains = [], []
357 |         for dof_name in self.env_cfg['dof_names']:
358 |             for key in stiffness.keys():
359 |                 if key in dof_name:
360 |                     self.p_gains.append(stiffness[key])
361 |                     self.d_gains.append(damping[key])
362 |         self.p_gains = torch.tensor(self.p_gains, device=self.device)
363 |         self.d_gains = torch.tensor(self.d_gains, device=self.device)
364 |         self.batched_p_gains = self.p_gains[None, :].repeat(self.num_envs, 1)
365 |         self.batched_d_gains = self.d_gains[None, :].repeat(self.num_envs, 1)
366 | 
367 |         self.robot.set_dofs_kp(self.p_gains, self.motor_dofs)
368 |         self.robot.set_dofs_kv(self.d_gains, self.motor_dofs)
369 | 
370 |         default_joint_angles = self.env_cfg['default_joint_angles']
371 |         self.default_dof_pos = torch.tensor(
372 |             [default_joint_angles[name] for name in self.env_cfg['dof_names']],
373 |             device=self.device,
374 |         )
375 | 
376 |         self.dof_pos_limits = torch.stack(self.robot.get_dofs_limit(self.motor_dofs), dim=1)
377 |         self.torque_limits = self.robot.get_dofs_force_range(self.motor_dofs)[1]
378 |         for i in range(self.dof_pos_limits.shape[0]):
379 |             # soft limits
380 |             m = (self.dof_pos_limits[i, 0] + self.dof_pos_limits[i, 1]) / 2
381 |             r = self.dof_pos_limits[i, 1] - self.dof_pos_limits[i, 0]
382 |             self.dof_pos_limits[i, 0] = (
383 |                 m - 0.5 * r * self.reward_cfg['soft_dof_pos_limit']
384 |             )
385 |             self.dof_pos_limits[i, 1] = (
386 |                 m + 0.5 * r * self.reward_cfg['soft_dof_pos_limit']
387 |             )
388 | 
389 |         self.motor_strengths = gs.ones((self.num_envs, self.num_dof), dtype=float)
390 |         self.motor_offsets = gs.zeros((self.num_envs, self.num_dof), dtype=float)
391 | 
392 |         # gait control
393 |         self.foot_positions = torch.ones(
394 |             self.num_envs, len(self.feet_link_indices), 3, device=self.device, dtype=gs.tc_float,
395 |         )
396 |         self.foot_quaternions = torch.ones(
397 |             self.num_envs, len(self.feet_link_indices), 4, device=self.device, dtype=gs.tc_float,
398 |         )
399 |         self.foot_velocities = torch.ones(
400 |             self.num_envs, len(self.feet_link_indices), 3, device=self.device, dtype=gs.tc_float,
401 |         )
402 | 
403 |         self.base_link_index = 1
404 | 
405 |         self.com = torch.zeros(
406 |             self.num_envs, 3, device=self.device, dtype=gs.tc_float,
407 |         )
408 | 
409 |     def _update_buffers(self):
410 |         
411 |         # update buffers
412 |         # [:] is for non-parallelized scene
413 |         self.base_pos[:] = self.robot.get_pos()
414 |         self.base_quat[:] = self.robot.get_quat()
415 |         base_quat_rel = gs_quat_mul(self.base_quat, gs_inv_quat(self.base_init_quat.reshape(1, -1).repeat(self.num_envs, 1)))
416 |         self.base_euler = gs_quat2euler(base_quat_rel)
417 | 
418 |         inv_quat_yaw = gs_quat_from_angle_axis(-self.base_euler[:, 2],
419 |                                                torch.tensor([0, 0, 1], device=self.device, dtype=torch.float))
420 | 
421 |         inv_base_quat = gs_inv_quat(self.base_quat)
422 |         self.base_lin_vel[:] = gs_transform_by_quat(self.robot.get_vel(), inv_quat_yaw)
423 |         self.base_ang_vel[:] = gs_transform_by_quat(self.robot.get_ang(), inv_base_quat)
424 |         self.projected_gravity = gs_transform_by_quat(
425 |             self.global_gravity, inv_base_quat
426 |         )
427 | 
428 |         self.dof_pos[:] = self.robot.get_dofs_position(self.motor_dofs)
429 |         self.dof_vel[:] = self.robot.get_dofs_velocity(self.motor_dofs)
430 |         self.link_contact_forces[:] = torch.tensor(
431 |             self.robot.get_links_net_contact_force(),
432 |             device=self.device,
433 |             dtype=gs.tc_float,
434 |         )
435 |         self.com[:] = self.rigid_solver.get_links_COM([self.base_link_index,]).squeeze(dim=1)
436 | 
437 |         self.foot_positions[:] = self.rigid_solver.get_links_pos(self.feet_link_indices_world_frame)
438 |         self.foot_quaternions[:] = self.rigid_solver.get_links_quat(self.feet_link_indices_world_frame)
439 |         self.foot_velocities[:] = self.rigid_solver.get_links_vel(self.feet_link_indices_world_frame)
440 | 
441 |         if self.env_cfg['use_terrain']:
442 |             clipped_base_pos = self.base_pos[:, :2].clamp(min=torch.zeros(2, device=self.device), max=self.terrain_margin)
443 |             height_field_ids = (clipped_base_pos / self.terrain_cfg['horizontal_scale'] - 0.5).floor().int()
444 |             height_field_ids.clamp(min=0)
445 |             # print(self.height_field[height_field_ids[:, 0], height_field_ids[:, 1]])
446 |             self.terrain_heights = self.height_field[height_field_ids[:, 0], height_field_ids[:, 1]]
447 | 
448 |     def _compute_torques(self, actions):
449 |         # control_type = 'P'
450 |         actions_scaled = actions * self.env_cfg['action_scale']
451 |         torques = (
452 |             self.batched_p_gains * (actions_scaled + self.default_dof_pos - self.dof_pos + self.motor_offsets)
453 |             - self.batched_d_gains * self.dof_vel
454 |         )
455 |         return torques * self.motor_strengths
456 | 
457 |     def _compute_target_dof_pos(self, actions):
458 |         # control_type = 'P'
459 |         actions_scaled = actions * self.env_cfg['action_scale']
460 |         target_dof_pos = actions_scaled + self.default_dof_pos
461 | 
462 |         return target_dof_pos
463 | 
464 |     def check_termination(self):
465 |         self.reset_buf = torch.any(
466 |             torch.norm(
467 |                 self.link_contact_forces[:, self.termination_contact_link_indices, :],
468 |                 dim=-1,
469 |             )
470 |             > 1.0,
471 |             dim=1,
472 |         )
473 |         self.time_out_buf = (
474 |             self.episode_length_buf > self.max_episode_length
475 |         )  # no terminal reward for time-outs
476 |         self.reset_buf |= torch.logical_or(
477 |             torch.abs(self.base_euler[:, 1])
478 |             > self.env_cfg['termination_if_pitch_greater_than'],
479 |             torch.abs(self.base_euler[:, 0])
480 |             > self.env_cfg['termination_if_roll_greater_than'],
481 |         )
482 |         if self.env_cfg['use_terrain']:
483 |             self.reset_buf |= torch.logical_or(
484 |                 self.base_pos[:, 0] > self.terrain_margin[0],
485 |                 self.base_pos[:, 1] > self.terrain_margin[1],
486 |             )
487 |             self.reset_buf |= torch.logical_or(
488 |                 self.base_pos[:, 0] < 1,
489 |                 self.base_pos[:, 1] < 1,
490 |             )
491 |         self.reset_buf |= self.base_pos[:, 2] < self.env_cfg['termination_if_height_lower_than']
492 |         self.reset_buf |= self.time_out_buf
493 | 
494 |     def compute_reward(self):
495 |         self.rew_buf[:] = 0.
496 |         for i in range(len(self.reward_functions)):
497 |             name = self.reward_names[i]
498 |             rew = self.reward_functions[i]() * self.reward_scales[name]
499 |             self.rew_buf += rew
500 |             self.episode_sums[name] += rew
501 |         # add termination reward after clipping
502 |         if 'termination' in self.reward_scales:
503 |             rew = self._reward_termination() * self.reward_scales['termination']
504 |             self.rew_buf += rew
505 |             self.episode_sums['termination'] += rew
506 | 
507 |     def get_observations(self):
508 |         return self.obs_history_buf
509 | 
510 |     def get_privileged_observations(self):
511 |         return self.privileged_obs_buf
512 | 
513 |     def post_physics_step(self):
514 |         self.episode_length_buf += 1
515 |         self.common_step_counter += 1
516 | 
517 |         self._update_buffers()
518 | 
519 |         resampling_time_s = self.env_cfg['resampling_time_s']
520 |         envs_idx = (
521 |             (self.episode_length_buf % int(resampling_time_s / self.dt) == 0)
522 |             .nonzero(as_tuple=False)
523 |             .flatten()
524 |         )
525 |         self._resample_commands(envs_idx)
526 |         self._randomize_rigids(envs_idx)
527 |         self._randomize_controls(envs_idx)
528 |         if self.command_type == 'heading':
529 |             forward = gs_transform_by_quat(self.forward_vec, self.base_quat)
530 |             heading = torch.atan2(forward[:, 1], forward[:, 0])
531 |             self.commands[:, 2] = torch.clip(
532 |                 0.5 * wrap_to_pi(self.commands[:, 3] - heading), -1.0, 1.0
533 |             )
534 | 
535 |         # random push
536 |         push_interval_s = self.env_cfg['push_interval_s']
537 |         if push_interval_s > 0 and not (self.debug or self.eval):
538 |             max_push_vel_xy = self.env_cfg['max_push_vel_xy']
539 |             dofs_vel = self.robot.get_dofs_velocity() # (num_envs, num_dof) [0:3] ~ base_link_vel
540 |             push_vel = gs_rand_float(-max_push_vel_xy, max_push_vel_xy, (self.num_envs, 2), self.device)
541 |             push_vel[((self.common_step_counter + self.env_identities) % int(push_interval_s / self.dt) != 0)] = 0
542 |             dofs_vel[:, :2] += push_vel
543 |             self.robot.set_dofs_velocity(dofs_vel)
544 | 
545 |         self.check_termination()
546 |         self.compute_reward()
547 | 
548 |         envs_idx = self.reset_buf.nonzero(as_tuple=False).flatten()
549 |         if self.num_build_envs > 0:
550 |             self.reset_idx(envs_idx)
551 |         # self.rigid_solver.forward_kinematics() # no need currently
552 |         self.compute_observations()
553 | 
554 |         if gs.platform != 'macOS':
555 |             self._render_headless()
556 |         if not self.headless and self.debug:
557 |             self._draw_debug_vis()
558 | 
559 |         self.last_actions[:] = self.actions[:]
560 |         self.last_last_actions[:] = self.last_actions[:]
561 |         self.last_dof_vel[:] = self.dof_vel[:]
562 |         self.last_root_vel[:] = self.robot.get_vel()
563 | 
564 |     def compute_observations(self):
565 |         self.obs_buf = torch.cat(
566 |             [
567 |                 self.base_ang_vel * self.obs_scales['ang_vel'],                     # 3
568 |                 self.projected_gravity,                                             # 3
569 |                 self.commands[:, :3] * self.commands_scale,                         # 3
570 |                 (self.dof_pos - self.default_dof_pos) * self.obs_scales['dof_pos'],
571 |                 self.dof_vel * self.obs_scales['dof_vel'],
572 |                 self.actions,
573 |             ],
574 |             axis=-1,
575 |         )
576 | 
577 |         # add noise
578 |         if not self.eval:
579 |             self.obs_buf += gs_rand_float(
580 |                 -1.0, 1.0, (self.num_single_obs,), self.device
581 |             )  * self.obs_noise
582 | 
583 |         clip_obs = 100.0
584 |         self.obs_buf = torch.clip(self.obs_buf, -clip_obs, clip_obs)
585 | 
586 |         self.obs_history_buf = torch.cat(
587 |             [self.obs_history_buf[:, self.num_single_obs:], self.obs_buf.detach()], dim=1
588 |         )
589 | 
590 |         if self.num_privileged_obs is not None:
591 |             self.privileged_obs_buf = torch.cat(
592 |                 [
593 |                     self.base_lin_vel * self.obs_scales['lin_vel'],                     # 3
594 |                     self.base_ang_vel * self.obs_scales['ang_vel'],                     # 3
595 |                     self.projected_gravity,                                             # 3
596 |                     self.commands[:, :3] * self.commands_scale,                         # 3
597 |                     (self.dof_pos - self.default_dof_pos) * self.obs_scales['dof_pos'],
598 |                     self.dof_vel * self.obs_scales['dof_vel'],
599 |                     self.actions,
600 |                     self.last_actions,
601 |                 ],
602 |                 axis=-1,
603 |             )
604 |             self.privileged_obs_buf = torch.clip(self.privileged_obs_buf, -clip_obs, clip_obs)
605 | 
606 |     def _prepare_obs_noise(self):
607 |         self.obs_noise[:3] = self.obs_cfg['obs_noise']['ang_vel']
608 |         self.obs_noise[3:6] = self.obs_cfg['obs_noise']['gravity']
609 |         self.obs_noise[21:33] = self.obs_cfg['obs_noise']['dof_pos']
610 |         self.obs_noise[33:45] = self.obs_cfg['obs_noise']['dof_vel']
611 | 
612 |     def _resample_commands(self, envs_idx):
613 |         # resample commands
614 | 
615 |         # lin_vel
616 |         self.commands[envs_idx, 0] = gs_rand_float(
617 |             *self.command_cfg['lin_vel_x_range'], (len(envs_idx),), self.device
618 |         )
619 |         self.commands[envs_idx, 1] = gs_rand_float(
620 |             *self.command_cfg['lin_vel_y_range'], (len(envs_idx),), self.device
621 |         )
622 |         self.commands[envs_idx, :2] *= (
623 |             torch.norm(self.commands[envs_idx, :2], dim=1) > 0.2
624 |         ).unsqueeze(1)
625 | 
626 |         # ang_vel
627 |         if self.command_type == 'heading':
628 |             self.commands[envs_idx, 3] = gs_rand_float(
629 |                 -3.14, 3.14, (len(envs_idx),), self.device
630 |             )
631 |         elif self.command_type == 'ang_vel_yaw':
632 |             self.commands[envs_idx, 2] = gs_rand_float(
633 |                 *self.command_cfg['ang_vel_range'], (len(envs_idx),), self.device
634 |             )
635 |             self.commands[envs_idx, 2] *= torch.abs(self.commands[envs_idx, 2]) > 0.2
636 | 
637 |     def reset_idx(self, envs_idx):
638 |         if len(envs_idx) == 0:
639 |             return
640 | 
641 |         # reset dofs
642 |         self.dof_pos[envs_idx] = (
643 |             self.default_dof_pos
644 |         )  + gs_rand_float(-0.3, 0.3, (len(envs_idx), self.num_dof), self.device)
645 |         self.dof_vel[envs_idx] = 0.0
646 |         self.robot.set_dofs_position(
647 |             position=self.dof_pos[envs_idx],
648 |             dofs_idx_local=self.motor_dofs,
649 |             zero_velocity=True,
650 |             envs_idx=envs_idx,
651 |         )
652 | 
653 |         # reset root states - position
654 |         self.base_pos[envs_idx] = self.base_init_pos
655 |         self.base_pos[envs_idx, :2] += gs_rand_float(
656 |             -1.0, 1.0, (len(envs_idx), 2), self.device
657 |         )
658 |         self.base_quat[envs_idx] = self.base_init_quat.reshape(1, -1)
659 |         base_euler = gs_rand_float(
660 |             -0.1, 0.1, (len(envs_idx), 3), self.device
661 |         )
662 |         base_euler[:, 2] = gs_rand_float(0.0, 3.14, (len(envs_idx),), self.device)
663 |         self.base_quat[envs_idx] = gs_quat_mul(
664 |             gs_euler2quat(base_euler),
665 |             self.base_quat[envs_idx],
666 |         )
667 |         self.robot.set_pos(
668 |             self.base_pos[envs_idx], zero_velocity=False, envs_idx=envs_idx
669 |         )
670 |         self.robot.set_quat(
671 |             self.base_quat[envs_idx], zero_velocity=False, envs_idx=envs_idx
672 |         )
673 |         self.robot.zero_all_dofs_velocity(envs_idx)
674 | 
675 |         # update projected gravity
676 |         inv_base_quat = gs_inv_quat(self.base_quat)
677 |         self.projected_gravity = gs_transform_by_quat(
678 |             self.global_gravity, inv_base_quat
679 |         )
680 | 
681 |         # reset root states - velocity
682 |         self.base_lin_vel[envs_idx] = (
683 |             0  # gs_rand_float(-0.5, 0.5, (len(envs_idx), 3), self.device)
684 |         )
685 |         self.base_ang_vel[envs_idx] = (
686 |             0.0  # gs_rand_float(-0.5, 0.5, (len(envs_idx), 3), self.device)
687 |         )
688 |         base_vel = torch.concat(
689 |             [self.base_lin_vel[envs_idx], self.base_ang_vel[envs_idx]], dim=1
690 |         )
691 |         self.robot.set_dofs_velocity(
692 |             velocity=base_vel, dofs_idx_local=[0, 1, 2, 3, 4, 5], envs_idx=envs_idx
693 |         )
694 | 
695 |         self._resample_commands(envs_idx)
696 | 
697 |         # reset buffers
698 |         self.obs_history_buf[envs_idx] = 0.0
699 |         self.actions[envs_idx] = 0.0
700 |         self.last_actions[envs_idx] = 0.0
701 |         self.last_last_actions[envs_idx] = 0.0
702 |         self.last_dof_vel[envs_idx] = 0.0
703 |         self.feet_air_time[envs_idx] = 0.0
704 |         self.feet_max_height[envs_idx] = 0.0
705 |         self.episode_length_buf[envs_idx] = 0
706 |         self.reset_buf[envs_idx] = 1
707 | 
708 |         # fill extras
709 |         self.extras['episode'] = {}
710 |         for key in self.episode_sums.keys():
711 |             self.extras['episode']['rew_' + key] = (
712 |                 torch.mean(self.episode_sums[key][envs_idx]).item()
713 |                 / self.max_episode_length_s
714 |             )
715 |             self.episode_sums[key][envs_idx] = 0.0
716 |         # send timeout info to the algorithm
717 |         if self.env_cfg['send_timeouts']:
718 |             self.extras['time_outs'] = self.time_out_buf
719 | 
720 |     def reset(self):
721 |         self.reset_buf[:] = True
722 |         self.reset_idx(torch.arange(self.num_envs, device=self.device))
723 |         return None, None
724 | 
725 |     def step(self, actions):
726 |         clip_actions = self.env_cfg['clip_actions']
727 |         self.actions = torch.clip(actions, -clip_actions, clip_actions)
728 |         exec_actions = self.last_actions if self.action_latency > 0 else self.actions
729 |         dof_pos_list = []
730 |         dof_vel_list = []
731 |         if self.env_cfg['use_implicit_controller']:
732 |             target_dof_pos = self._compute_target_dof_pos(exec_actions)
733 |             self.robot.control_dofs_position(target_dof_pos, self.motor_dofs)
734 |             self.scene.step()
735 |         else:
736 |             for i in range(self.env_cfg['decimation']):
737 |                 self.torques = self._compute_torques(exec_actions)
738 |                 if self.num_build_envs == 0:
739 |                     torques = self.torques.squeeze()
740 |                     self.robot.control_dofs_force(torques, self.motor_dofs)
741 |                 else:
742 |                     self.robot.control_dofs_force(self.torques, self.motor_dofs)
743 |                 self.scene.step()
744 |                 self.dof_pos[:] = self.robot.get_dofs_position(self.motor_dofs)
745 |                 self.dof_vel[:] = self.robot.get_dofs_velocity(self.motor_dofs)
746 | 
747 |                 if i == 0 or i == 2:
748 |                     dof_pos_list.append(self.robot.get_dofs_position().detach().cpu())
749 |                     dof_vel_list.append(self.robot.get_dofs_velocity().detach().cpu())
750 |         
751 |         self.dof_pos_list = dof_pos_list
752 |         self.dof_vel_list = dof_vel_list
753 | 
754 |         self.post_physics_step()
755 | 
756 |         return (
757 |             self.obs_history_buf,
758 |             self.privileged_obs_buf,
759 |             self.rew_buf,
760 |             self.reset_buf,
761 |             self.extras,
762 |         )
763 | 
764 |     # ------------ domain randomization----------------
765 | 
766 |     def _randomize_rigids(self, env_ids=None):
767 | 
768 |         if self.eval:
769 |             return
770 | 
771 |         if env_ids == None:
772 |             env_ids = torch.arange(0, self.num_envs)
773 |         elif len(env_ids) == 0:
774 |             return
775 | 
776 |         if self.env_cfg['randomize_friction']:
777 |             self._randomize_link_friction(env_ids)
778 |         if self.env_cfg['randomize_base_mass']:
779 |             self._randomize_base_mass(env_ids)
780 |         if self.env_cfg['randomize_com_displacement']:
781 |             self._randomize_com_displacement(env_ids)
782 | 
783 |     def _randomize_controls(self, env_ids=None):
784 | 
785 |         if self.eval:
786 |             return
787 | 
788 |         if env_ids == None:
789 |             env_ids = torch.arange(0, self.num_envs)
790 |         elif len(env_ids) == 0:
791 |             return
792 | 
793 |         if self.env_cfg['randomize_motor_strength']:
794 |             self._randomize_motor_strength(env_ids)
795 |         if self.env_cfg['randomize_motor_offset']:
796 |             self._randomize_motor_offset(env_ids)
797 |         if self.env_cfg['randomize_kp_scale']:
798 |             self._randomize_kp(env_ids)
799 |         if self.env_cfg['randomize_kd_scale']:
800 |             self._randomize_kd(env_ids)
801 | 
802 |     def _randomize_link_friction(self, env_ids):
803 | 
804 |         min_friction, max_friction = self.env_cfg['friction_range']
805 | 
806 |         solver = self.rigid_solver
807 | 
808 |         ratios = gs.rand((len(env_ids), 1), dtype=float).repeat(1, solver.n_geoms) \
809 |                  * (max_friction - min_friction) + min_friction
810 |         solver.set_geoms_friction_ratio(ratios, torch.arange(0, solver.n_geoms), env_ids)
811 | 
812 |     def _randomize_base_mass(self, env_ids):
813 | 
814 |         min_mass, max_mass = self.env_cfg['added_mass_range']
815 |         base_link_id = 1
816 | 
817 |         added_mass = gs.rand((len(env_ids), 1), dtype=float) \
818 |                         * (max_mass - min_mass) + min_mass
819 | 
820 |         self.rigid_solver.set_links_mass_shift(added_mass, [base_link_id,], env_ids)
821 | 
822 |     def _randomize_com_displacement(self, env_ids):
823 | 
824 |         min_displacement, max_displacement = self.env_cfg['com_displacement_range']
825 |         base_link_id = 1
826 | 
827 |         com_displacement = gs.rand((len(env_ids), 1, 3), dtype=float) \
828 |                             * (max_displacement - min_displacement) + min_displacement
829 |         # com_displacement[:, :, 0] -= 0.02
830 | 
831 |         self.rigid_solver.set_links_COM_shift(com_displacement, [base_link_id,], env_ids)
832 | 
833 |     def _randomize_motor_strength(self, env_ids):
834 | 
835 |         min_strength, max_strength = self.env_cfg['motor_strength_range']
836 |         self.motor_strengths[env_ids, :] = gs.rand((len(env_ids), 1), dtype=float) \
837 |                                            * (max_strength - min_strength) + min_strength
838 | 
839 |     def _randomize_motor_offset(self, env_ids):
840 | 
841 |         min_offset, max_offset = self.env_cfg['motor_offset_range']
842 |         self.motor_offsets[env_ids, :] = gs.rand((len(env_ids), self.num_dof), dtype=float) \
843 |                                          * (max_offset - min_offset) + min_offset
844 | 
845 |     def _randomize_kp(self, env_ids):
846 | 
847 |         min_scale, max_scale = self.env_cfg['kp_scale_range']
848 |         kp_scales = gs.rand((len(env_ids), self.num_dof), dtype=float) \
849 |                     * (max_scale - min_scale) + min_scale
850 |         self.batched_p_gains[env_ids, :] = kp_scales * self.p_gains[None, :]
851 | 
852 |     def _randomize_kd(self, env_ids):
853 | 
854 |         min_scale, max_scale = self.env_cfg['kd_scale_range']
855 |         kd_scales = gs.rand((len(env_ids), self.num_dof), dtype=float) \
856 |                     * (max_scale - min_scale) + min_scale
857 |         self.batched_d_gains[env_ids, :] = kd_scales * self.d_gains[None, :]
858 | 
859 |     def _draw_debug_vis(self):
860 |         ''' Draws visualizations for dubugging (slows down simulation a lot).
861 |             Default behaviour: draws height measurement points
862 |         '''
863 |         self.scene.clear_debug_objects()
864 | 
865 |         foot_poss = self.foot_positions[0].reshape(-1, 3)
866 |         # self.scene.draw_debug_spheres(poss=foot_poss, radius=0.03, color=(1, 0, 0, 0.7))
867 | 
868 |         foot_poss = foot_poss.cpu()
869 |         self.scene.draw_debug_line(foot_poss[0], foot_poss[3], radius=0.002, color=(1, 0, 0, 0.7))
870 |         self.scene.draw_debug_line(foot_poss[1], foot_poss[2], radius=0.002, color=(1, 0, 0, 0.7))
871 | 
872 |         com = self.com[0]
873 |         # self.scene.draw_debug_sphere(pos=com, radius=0.1, color=(0, 0, 1, 0.7))
874 | 
875 |         com[2] = 0.02 + self.terrain_heights[0]
876 |         self.scene.draw_debug_sphere(pos=com, radius=0.02, color=(0, 0, 1, 0.7))
877 | 
878 |     def _set_camera(self):
879 |         ''' Set camera position and direction
880 |         '''
881 |         self._floating_camera = self.scene.add_camera(
882 |             pos=np.array([0, -1, 1]),
883 |             lookat=np.array([0, 0, 0]),
884 |             # res=(720, 480),
885 |             fov=40,
886 |             GUI=False,
887 |         )
888 | 
889 |         self._recording = False
890 |         self._recorded_frames = []
891 | 
892 |     def _render_headless(self):
893 |         if self._recording and len(self._recorded_frames) < 150:
894 |             robot_pos = np.array(self.base_pos[0].cpu())
895 |             self._floating_camera.set_pose(pos=robot_pos + np.array([-1, -1, 0.5]), lookat=robot_pos + np.array([0, 0, -0.1]))
896 |             # import time
897 |             # start = time.time()
898 |             frame, _, _, _ = self._floating_camera.render()
899 |             # end = time.time()
900 |             # print(end-start)
901 |             self._recorded_frames.append(frame)
902 |             # from PIL import Image
903 |             # img = Image.fromarray(np.uint8(frame))
904 |             # img.save('./test.png')
905 |             # print('save')
906 | 
907 |     def get_recorded_frames(self):
908 |         if len(self._recorded_frames) == 150:
909 |             frames = self._recorded_frames
910 |             self._recorded_frames = []
911 |             self._recording = False
912 |             return frames
913 |         else:
914 |             return None
915 | 
916 |     def start_recording(self, record_internal=True):
917 |         self._recorded_frames = []
918 |         self._recording = True
919 |         if record_internal:
920 |             self._record_frames = True
921 |         else:
922 |             self._floating_camera.start_recording()
923 | 
924 |     def stop_recording(self, save_path=None):
925 |         self._recorded_frames = []
926 |         self._recording = False
927 |         if save_path is not None:
928 |             print("fps", int(1 / self.dt))
929 |             self._floating_camera.stop_recording(save_path, fps = int(1 / self.dt))
930 | 


--------------------------------------------------------------------------------