├── LICENSE ├── LICENSES ├── legged_gym │ └── LICENSE └── rsl_rl │ └── LICENSE ├── README.md ├── dribblebot ├── __init__.py ├── assets │ ├── __init__.py │ ├── asset.py │ └── ball.py ├── envs │ ├── __init__.py │ ├── base │ │ ├── __init__.py │ │ ├── base_task.py │ │ ├── curriculum.py │ │ ├── legged_robot.py │ │ └── legged_robot_config.py │ ├── go1 │ │ ├── __init__.py │ │ ├── go1_config.py │ │ └── velocity_tracking │ │ │ └── __init__.py │ └── wrappers │ │ └── history_wrapper.py ├── rewards │ ├── rewards.py │ └── soccer_rewards.py ├── robots │ ├── go1.py │ └── robot.py ├── sensors │ ├── __init__.py │ ├── action_sensor.py │ ├── attached_camera_sensor.py │ ├── body_velocity_sensor.py │ ├── clock_sensor.py │ ├── floating_camera_sensor.py │ ├── friction_sensor.py │ ├── heightmap_sensor.py │ ├── joint_position_sensor.py │ ├── joint_velocity_sensor.py │ ├── last_action_sensor.py │ ├── object_sensor.py │ ├── object_velocity_sensor.py │ ├── orientation_sensor.py │ ├── rc_sensor.py │ ├── restitution_sensor.py │ ├── sensor.py │ ├── timing_sensor.py │ └── yaw_sensor.py ├── terrains │ ├── __init__.py │ ├── box_terrain.py │ ├── ground_plane_terrain.py │ ├── heightfield_terrain.py │ ├── terrain.py │ ├── tm_box_terrain.py │ └── trimesh_terrain.py └── utils │ ├── __init__.py │ ├── logger.py │ ├── math_utils.py │ └── terrain.py ├── dribblebot_learn ├── __init__.py ├── env │ ├── __init__.py │ └── vec_env.py ├── eval_metrics │ ├── __init__.py │ ├── domain_randomization.py │ └── metrics.py ├── ppo_cse │ ├── __init__.py │ ├── actor_critic.py │ ├── metrics_caches.py │ ├── ppo.py │ └── rollout_storage.py └── utils │ ├── __init__.py │ └── utils.py ├── resources ├── actuator_nets │ └── unitree_go1.pt ├── objects │ └── ball.urdf ├── robots │ └── go1 │ │ ├── meshes │ │ ├── calf.stl │ │ ├── hip.stl │ │ ├── thigh.stl │ │ ├── thigh_mirror.stl │ │ └── trunk.stl │ │ ├── urdf │ │ └── go1.urdf │ │ └── xml │ │ └── go1.xml └── textures │ ├── background_texture_metal_rust.jpg │ ├── brick_texture.jpg │ ├── grass.jpg │ ├── ice_texture.jpg │ ├── icy_pond.jpg │ ├── metal_wall_iron_fence.jpg │ ├── particle_board_paint_aged.jpg │ ├── pebble_stone_texture_nature.jpg │ ├── snow.jpg │ ├── texture_background_wall_paint_2.jpg │ ├── texture_background_wall_paint_3.jpg │ ├── texture_stone_stone_texture_0.jpg │ ├── texture_wood_brown_1033760.jpg │ ├── tiled_brick_texture.jpg │ ├── tiled_grass.jpg │ ├── tiled_ice_texture.jpg │ ├── tiled_particle_board_paint_aged.jpg │ ├── tiled_pebble_stone_texture_nature.jpg │ ├── tiled_snow.jpg │ ├── tiled_texture_stone_stone_texture_0.jpg │ ├── tiled_texture_wood_brown_1033760.jpg │ └── tiling_util.py ├── runs └── improbableailab │ └── dribbling │ └── bvggoq26 │ └── dribbling_pretrained │ ├── ac_weights.pt │ ├── adaptation_module.jit │ ├── body.jit │ └── config.yaml ├── scripts ├── __init__.py ├── actuator_net │ ├── __init__.py │ ├── eval.py │ ├── train.py │ └── utils.py ├── config.yaml ├── loading_utils.py ├── play_dribbling_custom.py ├── play_dribbling_pretrained.py ├── prepare_policy.py └── train_dribbling.py └── setup.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023s MIT Improbable AI Lab 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | 24 | See licenses/legged_gym and licenses/rsl_rl for additional license information for some files in this package. 25 | Files associated with these additional licenses indicate so in the header. -------------------------------------------------------------------------------- /LICENSES/legged_gym/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2021, ETH Zurich, Nikita Rudin 2 | Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, 6 | are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its contributors 16 | may be used to endorse or promote products derived from this software without 17 | specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 23 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 26 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | See licenses/assets for license information for assets included in this repository. 31 | See licenses/dependencies for license information of dependencies of this package. 32 | -------------------------------------------------------------------------------- /LICENSES/rsl_rl/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2021, ETH Zurich, Nikita Rudin 2 | Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, 6 | are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its contributors 16 | may be used to endorse or promote products derived from this software without 17 | specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 23 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 26 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | See licenses/dependencies for license information of dependencies of this package. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Go1 Soccer (Dribblebot) Starter Kit 2 | 3 | # Table of contents 4 | 1. [Overview](#overview) 5 | 2. [System Requirements](#requirements) 6 | 3. [Training a Model](#simulation) 7 | 1. [Installation](#installation) 8 | 2. [Environment and Model Configuration](#configuration) 9 | 3. [Training, Logging and Evaluation](#training) 10 | 4. [Deploying a Model (Coming Soon)](#realworld) 11 | 12 | ## Overview 13 | 14 | This repository provides an implementation of the paper: 15 | 16 | 17 | 18 | 19 | DribbleBot: Dynamic Legged Manipulation in the Wild 20 | 21 |
22 | Yandong Ji*, Gabriel B. Margolis* and Pulkit Agrawal 23 |
24 | International Conference on Robotics and Automation (ICRA), 2023 25 |
26 | paper / 27 | bibtex / 28 | project page 29 |
30 | 31 | 32 |
33 | 34 | This training code, environment and documentation build on [Walk these Ways: Tuning Robot Control for Generalization with Multiplicity of Behavior](https://github.com/Improbable-AI/walk-these-ways) by Gabriel Margolis and Pulkit Agrawal, Improbable AI Lab, MIT (Paper: https://arxiv.org/pdf/2212.03238.pdf) and the Isaac Gym simulator from 35 | NVIDIA (Paper: https://arxiv.org/abs/2108.10470). All redistributed code retains its 36 | original [license](LICENSES/legged_gym/LICENSE). 37 | 38 | Our initial release provides the following features: 39 | * Train reinforcement learning policies for the Go1 robot using PPO, IsaacGym, Domain Randomization to dribble a soccer ball in simulation following a random ball velocity command in global frame. 40 | * Evaluate a pre-trained soccer policy in simulation. 41 | 42 | ## System Requirements 43 | 44 | **Simulated Training and Evaluation**: Isaac Gym requires an NVIDIA GPU. To train in the default configuration, we recommend a GPU with at least 10GB of VRAM. The code can run on a smaller GPU if you decrease the number of parallel environments (`Cfg.env.num_envs`). However, training will be slower with fewer environments. 45 | 46 | ## Training a Model 47 | 48 | ### Installation using Conda 49 | 50 | #### Create a new conda environment with Python (3.8 suggested) 51 | ```bash 52 | conda create -n dribblebot python==3.8 53 | conda activate dribblebot 54 | ``` 55 | #### Install pytorch 1.10 with cuda-11.3: 56 | 57 | ```bash 58 | pip3 install torch==1.10.0+cu113 torchvision==0.11.1+cu113 torchaudio==0.10.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html 59 | ``` 60 | 61 | #### Install Isaac Gym 62 | 63 | 1. Download and install Isaac Gym Preview 4 from https://developer.nvidia.com/isaac-gym 64 | 2. unzip the file via: 65 | ```bash 66 | tar -xf IsaacGym_Preview_4_Package.tar.gz 67 | ``` 68 | 69 | 3. now install the python package 70 | ```bash 71 | cd isaacgym/python && pip install -e . 72 | ``` 73 | 4. Verify the installation by try running an example 74 | 75 | ```bash 76 | python examples/1080_balls_of_solitude.py 77 | ``` 78 | 5. For troubleshooting check docs `isaacgym/docs/index.html` 79 | 80 | #### Install the `dribblebot` package 81 | 82 | In this repository, run `pip install -e .` 83 | 84 | ### Evaluate the pre-trained policy 85 | 86 | If everything is installed correctly, you should be able to run the evaluation script with: 87 | 88 | ```bash 89 | python scripts/play_dribbling_pretrained.py 90 | ``` 91 | 92 | You should see a robot manipulate a yellow soccer following random global velocity commands. 93 | 94 | ### Environment and Model Configuration 95 | 96 | 97 | **CODE STRUCTURE** The main environment for simulating a legged robot is 98 | in [legged_robot.py](dribblebot/envs/base/legged_robot.py). The default configuration parameters including reward 99 | weightings are defined in [legged_robot_config.py::Cfg](dribblebot/envs/base/legged_robot_config.py). 100 | 101 | There are three scripts in the [scripts](scripts/) directory: 102 | 103 | ```bash 104 | scripts 105 | ├── __init__.py 106 | ├── play_dribbling_custom.py 107 | ├── play_dribbling_pretrained.py 108 | └── train_dribbling.py 109 | ``` 110 | 111 | ### Training, Logging and evaluation 112 | 113 | To train the Go1 controller from [Dribblebot](https://gmargo11.github.io/dribblebot/), run: 114 | 115 | ```bash 116 | python scripts/train_dribbling.py 117 | ``` 118 | 119 | After initializing the simulator, the script will print out a list of metrics every ten training iterations. 120 | 121 | Training with the default configuration requires about 12GB of GPU memory. If you have less memory available, you can 122 | still train by reducing the number of parallel environments used in simulation (the default is `Cfg.env.num_envs = 1000`). 123 | 124 | To visualize training progress, first set up weights and bias (wandb): 125 | 126 | #### Set Up Weights and Bias (wandb): 127 | 128 | Weights and Biases is the service that will provide you a dashboard where you can see the progress log of your training runs, including statistics and videos. 129 | 130 | First, follow the instructions here to create you wandb account: https://docs.wandb.ai/quickstart 131 | 132 | Make sure to perform the `wandb.login()` step from your local computer. 133 | 134 | Finally, use a web browser to go to the wandb IP (defaults to `localhost:3001`) 135 | 136 | To evaluate a pretrained trained policy, run `play_dribbling_pretrained.py`. We provie a pretrained agent checkpoint in the [./runs/dribbling](runs/dribbling) directory. 137 | 138 | ## Deploying a Model (Coming Soon) 139 | 140 | We are working a modular version of the vision processing code so DribbleBot can be easily deployed on Go1. It will be added in a future release. -------------------------------------------------------------------------------- /dribblebot/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import os 4 | 5 | MINI_GYM_ROOT_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 6 | MINI_GYM_ENVS_DIR = os.path.join(MINI_GYM_ROOT_DIR, 'dribblebot', 'envs') 7 | -------------------------------------------------------------------------------- /dribblebot/assets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/dribblebot/assets/__init__.py -------------------------------------------------------------------------------- /dribblebot/assets/asset.py: -------------------------------------------------------------------------------- 1 | class Asset: 2 | """ 3 | Asset class 4 | Wraps reset, domain randomization, actuation, and observation for a 5 | non-robot asset 6 | """ 7 | def __init__(self, env): 8 | self.env = env 9 | self.num_dof = 0 # default 10 | self.num_bodies = 1 # default 11 | self.num_actuated_dof = 0 # default 12 | 13 | def initialize_asset(self): 14 | raise NotImplementedError 15 | 16 | def reset(self): 17 | raise NotImplementedError 18 | 19 | def randomize(self): 20 | raise NotImplementedError 21 | 22 | def get_force_feedback(self): 23 | raise NotImplementedError 24 | 25 | def get_observation(self): 26 | raise NotImplementedError 27 | 28 | def get_num_bodies(self): 29 | return self.num_bodies 30 | 31 | def get_num_dof(self): 32 | return self.num_dof 33 | 34 | def get_num_actuated_dof(self): 35 | return self.num_actuated_dof 36 | -------------------------------------------------------------------------------- /dribblebot/assets/ball.py: -------------------------------------------------------------------------------- 1 | from dribblebot.assets.asset import Asset 2 | from isaacgym.torch_utils import * 3 | from isaacgym import gymapi 4 | 5 | 6 | class Ball(Asset): 7 | def __init__(self, env): 8 | super().__init__(env) 9 | # self.reset() 10 | 11 | def initialize(self): 12 | ball_radius = self.env.cfg.ball.radius 13 | asset_options = gymapi.AssetOptions() 14 | asset = self.env.gym.create_sphere(self.env.sim, ball_radius, 15 | asset_options) 16 | rigid_shape_props = self.env.gym.get_asset_rigid_shape_properties( 17 | asset) 18 | 19 | self.num_bodies = self.env.gym.get_asset_rigid_body_count(asset) 20 | 21 | return asset, rigid_shape_props 22 | 23 | def get_force_feedback(self): 24 | return None 25 | 26 | def get_observation(self): 27 | robot_object_vec = self.env.root_states[self.env.object_actor_idxs, 28 | 0:3] - self.env.base_pos 29 | return robot_object_vec 30 | 31 | def get_local_pos(self): 32 | robot_object_vec = self.env.root_states[self.env.object_actor_idxs, 33 | 0:3] - self.env.base_pos 34 | return robot_object_vec 35 | 36 | def get_lin_vel(self): 37 | return self.env.root_states[self.env.object_actor_idxs, 7:10] 38 | 39 | def get_ang_vel(self): 40 | return quat_rotate_inverse(self.env.base_quat, 41 | self.env.root_states[ 42 | self.env.object_actor_idxs, 10:13]) 43 | -------------------------------------------------------------------------------- /dribblebot/envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/dribblebot/envs/__init__.py -------------------------------------------------------------------------------- /dribblebot/envs/base/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/dribblebot/envs/base/__init__.py -------------------------------------------------------------------------------- /dribblebot/envs/base/base_task.py: -------------------------------------------------------------------------------- 1 | # License: see [LICENSE, LICENSES/legged_gym/LICENSE] 2 | 3 | import sys 4 | 5 | import gym 6 | import torch 7 | from isaacgym import gymapi, gymutil 8 | 9 | from gym import spaces 10 | import numpy as np 11 | 12 | 13 | # Base class for RL tasks 14 | class BaseTask(gym.Env): 15 | 16 | def __init__(self, cfg, sim_params, physics_engine, sim_device, headless): 17 | self.gym = gymapi.acquire_gym() 18 | 19 | if isinstance(physics_engine, str) and physics_engine == "SIM_PHYSX": 20 | physics_engine = gymapi.SIM_PHYSX 21 | 22 | self.sim_params = sim_params 23 | self.physics_engine = physics_engine 24 | self.sim_device = sim_device 25 | sim_device_type, self.sim_device_id = gymutil.parse_device_str(self.sim_device) 26 | self.headless = headless 27 | 28 | 29 | # env device is GPU only if sim is on GPU and use_gpu_pipeline=True, otherwise returned tensors are copied to CPU by physX. 30 | if sim_device_type == 'cuda' and sim_params.use_gpu_pipeline: 31 | self.device = self.sim_device 32 | else: 33 | self.device = 'cpu' 34 | 35 | # graphics device for rendering, -1 for no rendering 36 | self.graphics_device_id = self.sim_device_id 37 | if self.headless == True: 38 | self.graphics_device_id = self.sim_device_id 39 | 40 | self.num_obs = cfg.env.num_observations 41 | self.num_privileged_obs = cfg.env.num_privileged_obs 42 | self.num_actions = cfg.env.num_actions 43 | 44 | 45 | self.num_train_envs = cfg.env.num_envs 46 | self.num_envs = cfg.env.num_envs 47 | 48 | # optimization flags for pytorch JIT 49 | torch._C._jit_set_profiling_mode(False) 50 | torch._C._jit_set_profiling_executor(False) 51 | 52 | # allocate buffers 53 | self.obs_buf = torch.zeros(self.num_envs, self.num_obs, device=self.device, dtype=torch.float) 54 | self.rew_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.float) 55 | self.rew_buf_pos = torch.zeros(self.num_envs, device=self.device, dtype=torch.float) 56 | self.rew_buf_neg = torch.zeros(self.num_envs, device=self.device, dtype=torch.float) 57 | self.reset_buf = torch.ones(self.num_envs, device=self.device, dtype=torch.long) 58 | self.episode_length_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.long) 59 | self.time_out_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.bool) 60 | self.privileged_obs_buf = torch.zeros(self.num_envs, self.num_privileged_obs, device=self.device, 61 | dtype=torch.float) 62 | # self.num_privileged_obs = self.num_obs 63 | 64 | self.extras = {} 65 | 66 | # create envs, sim and viewer 67 | self.create_sim() 68 | self.gym.prepare_sim(self.sim) 69 | 70 | # todo: read from config 71 | self.enable_viewer_sync = True 72 | self.viewer = None 73 | 74 | # if running with a viewer, set up keyboard shortcuts and camera 75 | if self.headless == False: 76 | # subscribe to keyboard shortcuts 77 | self.viewer = self.gym.create_viewer( 78 | self.sim, gymapi.CameraProperties()) 79 | self.gym.subscribe_viewer_keyboard_event( 80 | self.viewer, gymapi.KEY_ESCAPE, "QUIT") 81 | self.gym.subscribe_viewer_keyboard_event( 82 | self.viewer, gymapi.KEY_V, "toggle_viewer_sync") 83 | 84 | def get_observations(self): 85 | return self.obs_buf 86 | 87 | def get_privileged_observations(self): 88 | return self.privileged_obs_buf 89 | 90 | def reset_idx(self, env_ids): 91 | """Reset selected robots""" 92 | raise NotImplementedError 93 | 94 | def reset(self): 95 | """ Reset all robots""" 96 | self.reset_idx(torch.arange(self.num_envs, device=self.device)) 97 | obs, privileged_obs, _, _, _ = self.step( 98 | torch.zeros(self.num_envs, self.num_actions, device=self.device, requires_grad=False)) 99 | return obs, privileged_obs 100 | 101 | def step(self, actions): 102 | raise NotImplementedError 103 | 104 | def render_gui(self, sync_frame_time=True): 105 | if self.viewer: 106 | # check for window closed 107 | if self.gym.query_viewer_has_closed(self.viewer): 108 | sys.exit() 109 | 110 | # check for keyboard events 111 | for evt in self.gym.query_viewer_action_events(self.viewer): 112 | if evt.action == "QUIT" and evt.value > 0: 113 | sys.exit() 114 | elif evt.action == "toggle_viewer_sync" and evt.value > 0: 115 | self.enable_viewer_sync = not self.enable_viewer_sync 116 | 117 | # fetch results 118 | if self.device != 'cpu': 119 | self.gym.fetch_results(self.sim, True) 120 | 121 | # step graphics 122 | if self.enable_viewer_sync: 123 | self.gym.step_graphics(self.sim) 124 | self.gym.draw_viewer(self.viewer, self.sim, True) 125 | if sync_frame_time: 126 | self.gym.sync_frame_time(self.sim) 127 | else: 128 | self.gym.poll_viewer_events(self.viewer) 129 | 130 | def close(self): 131 | if self.headless == False: 132 | self.gym.destroy_viewer(self.viewer) 133 | self.gym.destroy_sim(self.sim) 134 | -------------------------------------------------------------------------------- /dribblebot/envs/base/curriculum.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from matplotlib import pyplot as plt 4 | 5 | 6 | def is_met(scale, l2_err, threshold): 7 | return (l2_err / scale) < threshold 8 | 9 | 10 | def key_is_met(metric_cache, config, ep_len, target_key, env_id, threshold): 11 | # metric_cache[target_key][env_id] / ep_len 12 | scale = 1 13 | l2_err = 0 14 | return is_met(scale, l2_err, threshold) 15 | 16 | 17 | class Curriculum: 18 | def set_to(self, low, high, value=1.0): 19 | inds = np.logical_and( 20 | self.grid >= low[:, None], 21 | self.grid <= high[:, None] 22 | ).all(axis=0) 23 | 24 | assert len(inds) != 0, "You are intializing your distribution with an empty domain!" 25 | 26 | self.weights[inds] = value 27 | 28 | # print("SUM", self.weights.sum(), self.weights[inds], value) 29 | 30 | def __init__(self, seed, **key_ranges): 31 | # print("SET RNG") 32 | self.rng = np.random.RandomState(seed) 33 | 34 | self.cfg = cfg = {} 35 | self.indices = indices = {} 36 | for key, v_range in key_ranges.items(): 37 | bin_size = (v_range[1] - v_range[0]) / v_range[2] 38 | cfg[key] = np.linspace(v_range[0] + bin_size / 2, v_range[1] - bin_size / 2, v_range[2]) 39 | indices[key] = np.linspace(0, v_range[2]-1, v_range[2]) 40 | 41 | self.lows = np.array([range[0] for range in key_ranges.values()]) 42 | self.highs = np.array([range[1] for range in key_ranges.values()]) 43 | 44 | # self.bin_sizes = {key: arr[1] - arr[0] for key, arr in cfg.items()} 45 | self.bin_sizes = {key: (v_range[1] - v_range[0]) / v_range[2] for key, v_range in key_ranges.items()} 46 | 47 | self._raw_grid = np.stack(np.meshgrid(*cfg.values(), indexing='ij')) 48 | self._idx_grid = np.stack(np.meshgrid(*indices.values(), indexing='ij')) 49 | self.keys = [*key_ranges.keys()] 50 | self.grid = self._raw_grid.reshape([len(self.keys), -1]) 51 | self.idx_grid = self._idx_grid.reshape([len(self.keys), -1]) 52 | # self.grid = np.stack([params.flatten() for params in raw_grid]) 53 | 54 | self._l = l = len(self.grid[0]) 55 | self.ls = {key: len(self.cfg[key]) for key in self.cfg.keys()} 56 | 57 | self.weights = np.zeros(l) 58 | self.indices = np.arange(l) 59 | 60 | def __len__(self): 61 | return self._l 62 | 63 | def __getitem__(self, *keys): 64 | pass 65 | 66 | def update(self, **kwargs): 67 | # bump the envelop if 68 | pass 69 | 70 | def sample_bins(self, batch_size, low=None, high=None): 71 | """default to uniform""" 72 | if low is not None and high is not None: # if bounds given 73 | valid_inds = np.logical_and( 74 | self.grid >= low[:, None], 75 | self.grid <= high[:, None] 76 | ).all(axis=0) 77 | temp_weights = np.zeros_like(self.weights) 78 | temp_weights[valid_inds] = self.weights[valid_inds] 79 | inds = self.rng.choice(self.indices, batch_size, p=temp_weights / temp_weights.sum()) 80 | else: # if no bounds given 81 | # print(self.weights, self.weights.sum()) 82 | inds = self.rng.choice(self.indices, batch_size, p=self.weights / self.weights.sum()) 83 | 84 | return self.grid.T[inds], inds 85 | 86 | def sample_uniform_from_cell(self, centroids): 87 | bin_sizes = np.array([*self.bin_sizes.values()]) 88 | low, high = centroids + bin_sizes / 2, centroids - bin_sizes / 2 89 | rand = self.rng.uniform(low, high) 90 | 91 | return rand#.clip(self.lows, self.highs) 92 | 93 | def sample(self, batch_size, low=None, high=None): 94 | cgf_centroid, inds = self.sample_bins(batch_size, low=low, high=high) 95 | return np.stack([self.sample_uniform_from_cell(v_range) for v_range in cgf_centroid]), inds 96 | 97 | 98 | class SumCurriculum(Curriculum): 99 | def __init__(self, seed, **kwargs): 100 | super().__init__(seed, **kwargs) 101 | 102 | self.success = np.zeros(len(self)) 103 | self.trials = np.zeros(len(self)) 104 | 105 | def update(self, bin_inds, l1_error, threshold): 106 | is_success = l1_error < threshold 107 | self.success[bin_inds[is_success]] += 1 108 | self.trials[bin_inds] += 1 109 | 110 | def success_rates(self, *keys): 111 | s_rate = self.success / (self.trials + 1e-6) 112 | s_rate = s_rate.reshape(list(self.ls.values())) 113 | marginals = tuple(i for i, key in enumerate(self.keys) if key not in keys) 114 | if marginals: 115 | return s_rate.mean(axis=marginals) 116 | return s_rate 117 | 118 | 119 | class RewardThresholdCurriculum(Curriculum): 120 | def __init__(self, seed, **kwargs): 121 | super().__init__(seed, **kwargs) 122 | 123 | self.episode_reward_lin = np.zeros(len(self)) 124 | self.episode_reward_ang = np.zeros(len(self)) 125 | self.episode_lin_vel_raw = np.zeros(len(self)) 126 | self.episode_ang_vel_raw = np.zeros(len(self)) 127 | self.episode_duration = np.zeros(len(self)) 128 | 129 | def get_local_bins(self, bin_inds, ranges=0.1): 130 | if isinstance(ranges, float): 131 | ranges = np.ones(self.grid.shape[0]) * ranges 132 | bin_inds = bin_inds.reshape(-1) 133 | 134 | adjacent_inds = np.logical_and( 135 | self.grid[:, None, :].repeat(bin_inds.shape[0], axis=1) >= self.grid[:, bin_inds, None] - ranges.reshape(-1, 1, 1), 136 | self.grid[:, None, :].repeat(bin_inds.shape[0], axis=1) <= self.grid[:, bin_inds, None] + ranges.reshape(-1, 1, 1) 137 | ).all(axis=0) 138 | 139 | return adjacent_inds 140 | 141 | def update(self, bin_inds, task_rewards, success_thresholds, local_range=0.5): 142 | 143 | is_success = 1. 144 | for task_reward, success_threshold in zip(task_rewards, success_thresholds): 145 | is_success = is_success * (task_reward > success_threshold).cpu() 146 | if len(success_thresholds) == 0: 147 | is_success = np.array([False] * len(bin_inds)) 148 | else: 149 | is_success = np.array(is_success.bool()) 150 | 151 | self.weights[bin_inds[is_success]] = np.clip(self.weights[bin_inds[is_success]] + 0.2, 0, 1) 152 | adjacents = self.get_local_bins(bin_inds[is_success], ranges=local_range) 153 | for adjacent in adjacents: 154 | adjacent_inds = np.array(adjacent.nonzero()[0]) 155 | self.weights[adjacent_inds] = np.clip(self.weights[adjacent_inds] + 0.2, 0, 1) 156 | 157 | def log(self, bin_inds, lin_vel_raw=None, ang_vel_raw=None, episode_duration=None): 158 | self.episode_lin_vel_raw[bin_inds] = lin_vel_raw.cpu().numpy() 159 | self.episode_ang_vel_raw[bin_inds] = ang_vel_raw.cpu().numpy() 160 | self.episode_duration[bin_inds] = episode_duration.cpu().numpy() 161 | 162 | if __name__ == '__main__': 163 | r = RewardThresholdCurriculum(100, x=(-1, 1, 5), y=(-1, 1, 2), z=(-1, 1, 11)) 164 | 165 | assert r._raw_grid.shape == (3, 5, 2, 11), "grid shape is wrong: {}".format(r.grid.shape) 166 | 167 | low, high = np.array([-1.0, -0.6, -1.0]), np.array([1.0, 0.6, 1.0]) 168 | 169 | adjacents = r.get_local_bins(np.array([10, ]), range=0.5) 170 | for adjacent in adjacents: 171 | adjacent_inds = np.array(adjacent.nonzero()[0]) 172 | print(adjacent_inds) 173 | r.update(bin_inds=adjacent_inds, lin_vel_rewards=np.ones_like(adjacent_inds), 174 | ang_vel_rewards=np.ones_like(adjacent_inds), lin_vel_threshold=0.0, ang_vel_threshold=0.0, 175 | local_range=0.5) 176 | 177 | samples, bins = r.sample(10_000) 178 | 179 | plt.scatter(*samples.T[:2]) 180 | plt.show() 181 | -------------------------------------------------------------------------------- /dribblebot/envs/go1/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/dribblebot/envs/go1/__init__.py -------------------------------------------------------------------------------- /dribblebot/envs/go1/go1_config.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | from params_proto import Meta 4 | 5 | from dribblebot.envs.base.legged_robot_config import Cfg 6 | 7 | 8 | def config_go1(Cnfg: Union[Cfg, Meta]): 9 | _ = Cnfg.init_state 10 | 11 | _.pos = [0.0, 0.0, 0.34] # x,y,z [m] 12 | _.default_joint_angles = { # = target angles [rad] when action = 0.0 13 | 'FL_hip_joint': 0.1, # [rad] 14 | 'RL_hip_joint': 0.1, # [rad] 15 | 'FR_hip_joint': -0.1, # [rad] 16 | 'RR_hip_joint': -0.1, # [rad] 17 | 18 | 'FL_thigh_joint': 0.8, # [rad] 19 | 'RL_thigh_joint': 1., # [rad] 20 | 'FR_thigh_joint': 0.8, # [rad] 21 | 'RR_thigh_joint': 1., # [rad] 22 | 23 | 'FL_calf_joint': -1.5, # [rad] 24 | 'RL_calf_joint': -1.5, # [rad] 25 | 'FR_calf_joint': -1.5, # [rad] 26 | 'RR_calf_joint': -1.5 # [rad] 27 | } 28 | 29 | _ = Cnfg.control 30 | _.control_type = 'P' 31 | _.stiffness = {'joint': 20.} # [N*m/rad] 32 | _.damping = {'joint': 0.5} # [N*m*s/rad] 33 | # action scale: target angle = actionScale * action + defaultAngle 34 | _.action_scale = 0.25 35 | _.hip_scale_reduction = 0.5 36 | # decimation: Number of control action updates @ sim DT per policy DT 37 | _.decimation = 4 38 | 39 | _ = Cnfg.asset 40 | _.file = '{MINI_GYM_ROOT_DIR}/resources/robots/go1/urdf/go1.urdf' 41 | _.foot_name = "foot" 42 | _.penalize_contacts_on = ["thigh", "calf"] 43 | _.terminate_after_contacts_on = ["base"] 44 | _.self_collisions = 0 # 1 to disable, 0 to enable...bitwise filter 45 | _.flip_visual_attachments = False 46 | _.fix_base_link = False 47 | 48 | _ = Cnfg.rewards 49 | _.soft_dof_pos_limit = 0.9 50 | _.base_height_target = 0.34 51 | 52 | _ = Cnfg.reward_scales 53 | _.torques = -0.0001 54 | _.action_rate = -0.01 55 | _.dof_pos_limits = -10.0 56 | _.orientation = -5. 57 | _.base_height = -30. 58 | 59 | _ = Cnfg.terrain 60 | _.mesh_type = 'trimesh' 61 | _.measure_heights = False 62 | _.terrain_noise_magnitude = 0.0 63 | _.teleport_robots = True 64 | _.border_size = 50 65 | 66 | _.terrain_proportions = [0, 0, 0, 0, 0, 0, 0, 0, 1.0] 67 | _.curriculum = False 68 | 69 | _ = Cnfg.env 70 | _.num_observations = 42 71 | _.num_envs = 4000 72 | 73 | _ = Cnfg.commands 74 | _.lin_vel_x = [-1.0, 1.0] 75 | _.lin_vel_y = [-1.0, 1.0] 76 | 77 | _ = Cnfg.commands 78 | _.heading_command = False 79 | _.resampling_time = 10.0 80 | _.command_curriculum = True 81 | _.num_lin_vel_bins = 30 82 | _.num_ang_vel_bins = 30 83 | _.lin_vel_x = [-0.6, 0.6] 84 | _.lin_vel_y = [-0.6, 0.6] 85 | _.ang_vel_yaw = [-1, 1] 86 | 87 | _ = Cnfg.domain_rand 88 | _.randomize_base_mass = True 89 | _.added_mass_range = [-1, 3] 90 | _.push_robots = False 91 | _.max_push_vel_xy = 0.5 92 | _.randomize_friction = True 93 | _.friction_range = [0.05, 4.5] 94 | _.randomize_restitution = True 95 | _.restitution_range = [0.0, 1.0] 96 | _.restitution = 0.5 # default terrain restitution 97 | _.randomize_com_displacement = True 98 | _.com_displacement_range = [-0.1, 0.1] 99 | _.randomize_motor_strength = True 100 | _.motor_strength_range = [0.9, 1.1] 101 | _.randomize_Kp_factor = False 102 | _.Kp_factor_range = [0.8, 1.3] 103 | _.randomize_Kd_factor = False 104 | _.Kd_factor_range = [0.5, 1.5] 105 | _.rand_interval_s = 6 106 | -------------------------------------------------------------------------------- /dribblebot/envs/go1/velocity_tracking/__init__.py: -------------------------------------------------------------------------------- 1 | from isaacgym import gymutil, gymapi 2 | import torch 3 | from params_proto import Meta 4 | from typing import Union 5 | 6 | from dribblebot.envs.base.legged_robot import LeggedRobot 7 | from dribblebot.envs.base.legged_robot_config import Cfg 8 | 9 | 10 | class VelocityTrackingEasyEnv(LeggedRobot): 11 | def __init__(self, sim_device, headless, num_envs=None, prone=False, deploy=False, 12 | cfg: Cfg = None, eval_cfg: Cfg = None, initial_dynamics_dict=None, physics_engine="SIM_PHYSX"): 13 | 14 | if num_envs is not None: 15 | cfg.env.num_envs = num_envs 16 | 17 | sim_params = gymapi.SimParams() 18 | gymutil.parse_sim_config(vars(cfg.sim), sim_params) 19 | super().__init__(cfg, sim_params, physics_engine, sim_device, headless, eval_cfg, initial_dynamics_dict) 20 | 21 | 22 | def step(self, actions): 23 | self.obs_buf, self.privileged_obs_buf, self.rew_buf, self.reset_buf, self.extras = super().step(actions) 24 | 25 | self.foot_positions = self.rigid_body_state.view(self.num_envs, self.num_bodies, 13)[:, self.feet_indices, 26 | 0:3] 27 | 28 | self.extras.update({ 29 | "privileged_obs": self.privileged_obs_buf, 30 | "joint_pos": self.dof_pos.cpu().numpy(), 31 | "joint_vel": self.dof_vel.cpu().numpy(), 32 | "joint_pos_target": self.joint_pos_target.cpu().detach().numpy(), 33 | "joint_vel_target": torch.zeros(12), 34 | "body_linear_vel": self.base_lin_vel.cpu().detach().numpy(), 35 | "body_angular_vel": self.base_ang_vel.cpu().detach().numpy(), 36 | "body_linear_vel_cmd": self.commands.cpu().numpy()[:, 0:2], 37 | "body_angular_vel_cmd": self.commands.cpu().numpy()[:, 2:], 38 | "contact_states": (self.contact_forces[:, self.feet_indices, 2] > 1.).detach().cpu().numpy().copy(), 39 | "foot_positions": (self.foot_positions).detach().cpu().numpy().copy(), 40 | "body_pos": self.root_states[:, 0:3].detach().cpu().numpy(), 41 | "torques": self.torques.detach().cpu().numpy() 42 | }) 43 | 44 | return self.obs_buf, self.rew_buf, self.reset_buf, self.extras 45 | 46 | def reset(self): 47 | self.reset_idx(torch.arange(self.num_envs, device=self.device)) 48 | obs, _, _, _ = self.step(torch.zeros(self.num_envs, self.num_actions, device=self.device, requires_grad=False)) 49 | return obs 50 | 51 | -------------------------------------------------------------------------------- /dribblebot/envs/wrappers/history_wrapper.py: -------------------------------------------------------------------------------- 1 | import isaacgym 2 | assert isaacgym 3 | import torch 4 | import gym 5 | 6 | class HistoryWrapper(gym.Wrapper): 7 | def __init__(self, env): 8 | super().__init__(env) 9 | self.env = env 10 | 11 | self.obs_history_length = self.env.cfg.env.num_observation_history 12 | 13 | self.num_obs_history = self.obs_history_length * self.num_obs 14 | self.obs_history = torch.zeros(self.env.num_envs, self.num_obs_history, dtype=torch.float, 15 | device=self.env.device, requires_grad=False) 16 | self.num_privileged_obs = self.num_privileged_obs 17 | 18 | self.reward_container.load_env(self) 19 | 20 | def step(self, action): 21 | # privileged information and observation history are stored in info 22 | obs, rew, done, info = self.env.step(action) 23 | privileged_obs = info["privileged_obs"] 24 | 25 | self.obs_history = torch.cat((self.obs_history[:, self.env.num_obs:], obs), dim=-1) 26 | # print("obs: ", obs) 27 | return {'obs': obs, 'privileged_obs': privileged_obs, 'obs_history': self.obs_history}, rew, done, info 28 | 29 | def get_observations(self): 30 | obs = self.env.get_observations() 31 | privileged_obs = self.env.get_privileged_observations() 32 | self.obs_history = torch.cat((self.obs_history[:, self.env.num_obs:], obs), dim=-1) 33 | return {'obs': obs, 'privileged_obs': privileged_obs, 'obs_history': self.obs_history} 34 | 35 | def reset_idx(self, env_ids): # it might be a problem that this isn't getting called!! 36 | ret = super().reset_idx(env_ids) 37 | self.obs_history[env_ids, :] = 0 38 | return ret 39 | 40 | def reset(self): 41 | ret = super().reset() 42 | privileged_obs = self.env.get_privileged_observations() 43 | self.obs_history[:, :] = 0 44 | return {"obs": ret, "privileged_obs": privileged_obs, "obs_history": self.obs_history} 45 | 46 | 47 | if __name__ == "__main__": 48 | from tqdm import trange 49 | import matplotlib.pyplot as plt 50 | 51 | import ml_logger as logger 52 | 53 | from dribblebot_learn.ppo import Runner 54 | from dribblebot.envs.wrappers.history_wrapper import HistoryWrapper 55 | from dribblebot_learn.ppo.actor_critic import AC_Args 56 | 57 | from dribblebot.envs.base.legged_robot_config import Cfg 58 | from dribblebot.envs.mini_cheetah.mini_cheetah_config import config_mini_cheetah 59 | config_mini_cheetah(Cfg) 60 | 61 | test_env = gym.make("VelocityTrackingEasyEnv-v0", cfg=Cfg) 62 | env = HistoryWrapper(test_env) 63 | 64 | env.reset() 65 | action = torch.zeros(test_env.num_envs, 12) 66 | for i in trange(3): 67 | obs, rew, done, info = env.step(action) 68 | print(obs.keys()) 69 | print(f"obs: {obs['obs']}") 70 | print(f"privileged obs: {obs['privileged_obs']}") 71 | print(f"obs_history: {obs['obs_history']}") 72 | 73 | img = env.render('rgb_array') 74 | plt.imshow(img) 75 | plt.show() 76 | -------------------------------------------------------------------------------- /dribblebot/rewards/rewards.py: -------------------------------------------------------------------------------- 1 | class Rewards: 2 | def __init__(self, env): 3 | self.env = env 4 | 5 | def get_reward(self, env_ids = None): 6 | self.rew_buf[:] = 0. 7 | self.rew_buf_pos[:] = 0. 8 | self.rew_buf_neg[:] = 0. 9 | for i in range(len(self.reward_functions)): 10 | name = self.reward_names[i] 11 | rew = self.reward_functions[i]() * self.reward_scales[name] 12 | self.rew_buf += rew 13 | if torch.sum(rew) >= 0: 14 | self.rew_buf_pos += rew 15 | elif torch.sum(rew) <= 0: 16 | self.rew_buf_neg += rew 17 | self.episode_sums[name] += rew 18 | if name in ['tracking_contacts_shaped_force', 'tracking_contacts_shaped_vel']: 19 | self.command_sums[name] += self.reward_scales[name] + rew 20 | else: 21 | self.command_sums[name] += rew 22 | if self.cfg.rewards.only_positive_rewards: 23 | self.rew_buf[:] = torch.clip(self.rew_buf[:], min=0.) 24 | elif self.cfg.rewards.only_positive_rewards_ji22_style: #TODO: update 25 | self.rew_buf[:] = self.rew_buf_pos[:] * torch.exp(self.rew_buf_neg[:] / self.cfg.rewards.sigma_rew_neg) 26 | self.episode_sums["total"] += self.rew_buf 27 | # add termination reward after clipping 28 | if "termination" in self.reward_scales: 29 | rew = self.reward_container._reward_termination() * self.reward_scales["termination"] 30 | self.rew_buf += rew 31 | self.episode_sums["termination"] += rew 32 | self.command_sums["termination"] += rew 33 | 34 | self.command_sums["lin_vel_raw"] += self.base_lin_vel[:, 0] 35 | self.command_sums["ang_vel_raw"] += self.base_ang_vel[:, 2] 36 | self.command_sums["lin_vel_residual"] += (self.base_lin_vel[:, 0] - self.commands[:, 0]) ** 2 37 | self.command_sums["ang_vel_residual"] += (self.base_ang_vel[:, 2] - self.commands[:, 2]) ** 2 38 | self.command_sums["ep_timesteps"] += 1 39 | 40 | 41 | -------------------------------------------------------------------------------- /dribblebot/rewards/soccer_rewards.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from dribblebot.utils.math_utils import quat_apply_yaw, wrap_to_pi, get_scale_shift 4 | from isaacgym.torch_utils import * 5 | from .rewards import Rewards 6 | 7 | class SoccerRewards(Rewards): 8 | def __init__(self, env): 9 | self.env = env 10 | 11 | def load_env(self, env): 12 | self.env = env 13 | 14 | def _reward_orientation(self): 15 | # Penalize non flat base orientation 16 | return torch.sum(torch.square(self.env.projected_gravity[:, :2]), dim=1) 17 | 18 | def _reward_torques(self): 19 | # Penalize torques 20 | return torch.sum(torch.square(self.env.torques), dim=1) 21 | 22 | def _reward_dof_vel(self): 23 | # Penalize dof velocities 24 | # k_qd = -6e-4 25 | return torch.sum(torch.square(self.env.dof_vel), dim=1) 26 | 27 | def _reward_dof_acc(self): 28 | # Penalize dof accelerations 29 | return torch.sum(torch.square((self.env.last_dof_vel - self.env.dof_vel) / self.env.dt), dim=1) 30 | 31 | def _reward_collision(self): 32 | # Penalize collisions on selected bodies 33 | return torch.sum(1. * (torch.norm(self.env.contact_forces[:, self.env.penalised_contact_indices, :], dim=-1) > 0.1), 34 | dim=1) 35 | 36 | def _reward_action_rate(self): 37 | # Penalize changes in actions 38 | return torch.sum(torch.square(self.env.last_actions - self.env.actions), dim=1) 39 | 40 | def _reward_tracking_contacts_shaped_force(self): 41 | foot_forces = torch.norm(self.env.contact_forces[:, self.env.feet_indices, :], dim=-1) 42 | desired_contact = self.env.desired_contact_states 43 | 44 | reward = 0 45 | for i in range(4): 46 | reward += - (1 - desired_contact[:, i]) * ( 47 | 1 - torch.exp(-1 * foot_forces[:, i] ** 2 / self.env.cfg.rewards.gait_force_sigma)) 48 | return reward / 4 49 | 50 | def _reward_tracking_contacts_shaped_vel(self): 51 | foot_velocities = torch.norm(self.env.foot_velocities, dim=2).view(self.env.num_envs, -1) 52 | desired_contact = self.env.desired_contact_states 53 | reward = 0 54 | for i in range(4): 55 | reward += - (desired_contact[:, i] * ( 56 | 1 - torch.exp(-1 * foot_velocities[:, i] ** 2 / self.env.cfg.rewards.gait_vel_sigma))) 57 | return reward / 4 58 | 59 | def _reward_dof_pos_limits(self): 60 | # Penalize dof positions too close to the limit 61 | out_of_limits = -(self.env.dof_pos - self.env.dof_pos_limits[:, 0]).clip(max=0.) # lower limit 62 | out_of_limits += (self.env.dof_pos - self.env.dof_pos_limits[:, 1]).clip(min=0.) 63 | return torch.sum(out_of_limits, dim=1) 64 | 65 | def _reward_dof_pos(self): 66 | # Penalize dof positions 67 | # k_q = -0.75 68 | return torch.sum(torch.square(self.env.dof_pos - self.env.default_dof_pos), dim=1) 69 | 70 | def _reward_action_smoothness_1(self): 71 | # Penalize changes in actions 72 | # k_s1 =-2.5 73 | diff = torch.square(self.env.joint_pos_target - self.env.last_joint_pos_target) 74 | diff = diff * (self.env.last_actions[:,:12] != 0) # ignore first step 75 | return torch.sum(diff, dim=1) 76 | 77 | def _reward_action_smoothness_2(self): 78 | # Penalize changes in actions 79 | # k_s2 = -1.2 80 | diff = torch.square(self.env.joint_pos_target - 2 * self.env.last_joint_pos_target + self.env.last_last_joint_pos_target) 81 | diff = diff * (self.env.last_actions[:,:12] != 0) # ignore first step 82 | diff = diff * (self.env.last_last_actions[:,:12] != 0) # ignore second step 83 | return torch.sum(diff, dim=1) 84 | 85 | # encourage robot velocity align vector from robot body to ball 86 | # r_cv 87 | def _reward_dribbling_robot_ball_vel(self): 88 | FR_shoulder_idx = self.env.gym.find_actor_rigid_body_handle(self.env.envs[0], self.env.robot_actor_handles[0], "FR_thigh_shoulder") 89 | FR_HIP_positions = quat_rotate_inverse(self.env.base_quat, self.env.rigid_body_state.view(self.env.num_envs, -1, 13)[:,FR_shoulder_idx,0:3].view(self.env.num_envs,3)-self.env.base_pos) 90 | FR_HIP_velocities = quat_rotate_inverse(self.env.base_quat, self.env.rigid_body_state.view(self.env.num_envs, -1, 13)[:,FR_shoulder_idx,7:10].view(self.env.num_envs,3)) 91 | 92 | delta_dribbling_robot_ball_vel = 1.0 93 | robot_ball_vec = self.env.object_local_pos[:,0:2] - FR_HIP_positions[:,0:2] 94 | d_robot_ball=robot_ball_vec / torch.norm(robot_ball_vec, dim=-1).unsqueeze(dim=-1) 95 | ball_robot_velocity_projection = torch.norm(self.env.commands[:,:2], dim=-1) - torch.sum(d_robot_ball * FR_HIP_velocities[:,0:2], dim=-1) # set approaching speed to velocity command 96 | velocity_concatenation = torch.cat((torch.zeros(self.env.num_envs,1, device=self.env.device), ball_robot_velocity_projection.unsqueeze(dim=-1)), dim=-1) 97 | rew_dribbling_robot_ball_vel=torch.exp(-delta_dribbling_robot_ball_vel* torch.pow(torch.max(velocity_concatenation,dim=-1).values, 2) ) 98 | return rew_dribbling_robot_ball_vel 99 | 100 | # encourage robot near ball 101 | # r_cp 102 | def _reward_dribbling_robot_ball_pos(self): 103 | 104 | FR_shoulder_idx = self.env.gym.find_actor_rigid_body_handle(self.env.envs[0], self.env.robot_actor_handles[0], "FR_thigh_shoulder") 105 | FR_HIP_positions = quat_rotate_inverse(self.env.base_quat, self.env.rigid_body_state.view(self.env.num_envs, -1, 13)[:,FR_shoulder_idx,0:3].view(self.env.num_envs,3)-self.env.base_pos) 106 | 107 | delta_dribbling_robot_ball_pos = 4.0 108 | rew_dribbling_robot_ball_pos = torch.exp(-delta_dribbling_robot_ball_pos * torch.pow(torch.norm(self.env.object_local_pos - FR_HIP_positions, dim=-1), 2) ) 109 | return rew_dribbling_robot_ball_pos 110 | 111 | # encourage ball vel align with unit vector between ball target and ball current position 112 | # r^bv 113 | def _reward_dribbling_ball_vel(self): 114 | # target velocity is command input 115 | lin_vel_error = torch.sum(torch.square(self.env.commands[:, :2] - self.env.object_lin_vel[:, :2]), dim=1) 116 | # rew_dribbling_ball_vel = torch.exp(-lin_vel_error / (self.env.cfg.rewards.tracking_sigma*2)) 117 | return torch.exp(-lin_vel_error / (self.env.cfg.rewards.tracking_sigma*2)) 118 | 119 | def _reward_dribbling_robot_ball_yaw(self): 120 | robot_ball_vec = self.env.object_pos_world_frame[:,0:2] - self.env.base_pos[:,0:2] 121 | d_robot_ball=robot_ball_vec / torch.norm(robot_ball_vec, dim=-1).unsqueeze(dim=-1) 122 | 123 | unit_command_vel = self.env.commands[:,:2] / torch.norm(self.env.commands[:,:2], dim=-1).unsqueeze(dim=-1) 124 | robot_ball_cmd_yaw_error = torch.norm(unit_command_vel, dim=-1) - torch.sum(d_robot_ball * unit_command_vel, dim=-1) 125 | 126 | # robot ball vector align with body yaw angle 127 | roll, pitch, yaw = get_euler_xyz(self.env.base_quat) 128 | body_yaw_vec = torch.zeros(self.env.num_envs, 2, device=self.env.device) 129 | body_yaw_vec[:,0] = torch.cos(yaw) 130 | body_yaw_vec[:,1] = torch.sin(yaw) 131 | robot_ball_body_yaw_error = torch.norm(body_yaw_vec, dim=-1) - torch.sum(d_robot_ball * body_yaw_vec, dim=-1) 132 | delta_dribbling_robot_ball_cmd_yaw = 2.0 133 | rew_dribbling_robot_ball_yaw = torch.exp(-delta_dribbling_robot_ball_cmd_yaw * (robot_ball_cmd_yaw_error+robot_ball_body_yaw_error)) 134 | return rew_dribbling_robot_ball_yaw 135 | 136 | def _reward_dribbling_ball_vel_norm(self): 137 | # target velocity is command input 138 | vel_norm_diff = torch.pow(torch.norm(self.env.commands[:, :2], dim=-1) - torch.norm(self.env.object_lin_vel[:, :2], dim=-1), 2) 139 | delta_vel_norm = 2.0 140 | rew_vel_norm_tracking = torch.exp(-delta_vel_norm * vel_norm_diff) 141 | return rew_vel_norm_tracking 142 | 143 | # def _reward_dribbling_ball_vel_angle(self): 144 | # angle_diff = torch.atan2(self.env.commands[:,1], self.env.commands[:,0]) - torch.atan2(self.env.object_lin_vel[:,1], self.env.object_lin_vel[:,0]) 145 | # angle_diff_in_pi = torch.pow(wrap_to_pi(angle_diff), 2) 146 | # rew_vel_angle_tracking = torch.exp(-5.0*angle_diff_in_pi/(torch.pi**2)) 147 | # # print("angle_diff", angle_diff, " angle_diff_in_pi: ", angle_diff_in_pi, " rew_vel_angle_tracking", rew_vel_angle_tracking, " commands", self.env.commands[:, :2], " object_lin_vel", self.env.object_lin_vel[:, :2]) 148 | # return rew_vel_angle_tracking 149 | 150 | def _reward_dribbling_ball_vel_angle(self): 151 | angle_diff = torch.atan2(self.env.commands[:,1], self.env.commands[:,0]) - torch.atan2(self.env.object_lin_vel[:,1], self.env.object_lin_vel[:,0]) 152 | angle_diff_in_pi = torch.pow(wrap_to_pi(angle_diff), 2) 153 | rew_vel_angle_tracking = 1.0 - angle_diff_in_pi/(torch.pi**2) 154 | return rew_vel_angle_tracking -------------------------------------------------------------------------------- /dribblebot/robots/go1.py: -------------------------------------------------------------------------------- 1 | from .robot import Robot 2 | 3 | from dribblebot import MINI_GYM_ROOT_DIR 4 | import os 5 | 6 | from isaacgym import gymapi 7 | 8 | class Go1(Robot): 9 | def __init__(self, env): 10 | super().__init__(env) 11 | 12 | def initialize(self): 13 | asset_file = '{MINI_GYM_ROOT_DIR}/resources/robots/go1/urdf/go1.urdf' 14 | asset_path = asset_file.format(MINI_GYM_ROOT_DIR=MINI_GYM_ROOT_DIR) 15 | asset_root = os.path.dirname(asset_path) 16 | asset_file = os.path.basename(asset_path) 17 | 18 | asset_config = self.env.cfg.asset 19 | 20 | asset_options = gymapi.AssetOptions() 21 | asset_options.default_dof_drive_mode = asset_config.default_dof_drive_mode 22 | asset_options.collapse_fixed_joints = asset_config.collapse_fixed_joints 23 | asset_options.replace_cylinder_with_capsule = asset_config.replace_cylinder_with_capsule 24 | asset_options.flip_visual_attachments = asset_config.flip_visual_attachments 25 | asset_options.fix_base_link = asset_config.fix_base_link 26 | asset_options.density = asset_config.density 27 | asset_options.angular_damping = asset_config.angular_damping 28 | asset_options.linear_damping = asset_config.linear_damping 29 | asset_options.max_angular_velocity = asset_config.max_angular_velocity 30 | asset_options.max_linear_velocity = asset_config.max_linear_velocity 31 | asset_options.armature = asset_config.armature 32 | asset_options.thickness = asset_config.thickness 33 | asset_options.disable_gravity = asset_config.disable_gravity 34 | asset_options.vhacd_enabled = True 35 | asset_options.vhacd_params = gymapi.VhacdParams() 36 | asset_options.vhacd_params.resolution = 500000 37 | 38 | asset = self.env.gym.load_asset(self.env.sim, asset_root, asset_file, asset_options) 39 | 40 | self.num_dof = self.env.gym.get_asset_dof_count(asset) 41 | self.num_actuated_dof = 12 42 | self.num_bodies = self.env.gym.get_asset_rigid_body_count(asset) 43 | dof_props_asset = self.env.gym.get_asset_dof_properties(asset) 44 | rigid_shape_props_asset = self.env.gym.get_asset_rigid_shape_properties(asset) 45 | 46 | return asset, dof_props_asset, rigid_shape_props_asset 47 | -------------------------------------------------------------------------------- /dribblebot/robots/robot.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class Robot: 4 | def __init__(self, env): 5 | self.env = env 6 | 7 | def apply_torques(self, torques): 8 | pass 9 | 10 | def get_torques(self): 11 | pass 12 | 13 | def get_state(self): 14 | pass 15 | 16 | def add_camera(self, camera): 17 | pass 18 | 19 | def get_num_bodies(self): 20 | return self.num_bodies 21 | 22 | def get_num_dof(self): 23 | return self.num_dof 24 | 25 | def get_num_actuated_dof(self): 26 | return self.num_actuated_dof -------------------------------------------------------------------------------- /dribblebot/sensors/__init__.py: -------------------------------------------------------------------------------- 1 | from .sensor import Sensor 2 | from .attached_camera_sensor import AttachedCameraSensor 3 | from .floating_camera_sensor import FloatingCameraSensor 4 | from .joint_position_sensor import JointPositionSensor 5 | from .joint_velocity_sensor import JointVelocitySensor 6 | from .orientation_sensor import OrientationSensor 7 | from .heightmap_sensor import HeightmapSensor 8 | from .rc_sensor import RCSensor 9 | from .action_sensor import ActionSensor 10 | from .last_action_sensor import LastActionSensor 11 | from .clock_sensor import ClockSensor 12 | from .yaw_sensor import YawSensor 13 | from .object_sensor import ObjectSensor 14 | from .timing_sensor import TimingSensor 15 | from .body_velocity_sensor import BodyVelocitySensor 16 | from .object_velocity_sensor import ObjectVelocitySensor 17 | from .restitution_sensor import RestitutionSensor 18 | from .friction_sensor import FrictionSensor 19 | 20 | ALL_SENSORS = { "AttachedCameraSensor": AttachedCameraSensor, 21 | "FloatingCameraSensor": FloatingCameraSensor, 22 | "JointPositionSensor": JointPositionSensor, 23 | "JointVelocitySensor": JointVelocitySensor, 24 | "OrientationSensor": OrientationSensor, 25 | "HeightmapSensor": HeightmapSensor, 26 | "RCSensor": RCSensor, 27 | "ActionSensor": ActionSensor, 28 | "LastActionSensor": LastActionSensor, 29 | "ClockSensor": ClockSensor, 30 | "YawSensor": YawSensor, 31 | "ObjectSensor": ObjectSensor, 32 | "TimingSensor": TimingSensor, 33 | "BodyVelocitySensor": BodyVelocitySensor, 34 | "ObjectVelocitySensor": ObjectVelocitySensor, 35 | "RestitutionSensor": RestitutionSensor, 36 | "FrictionSensor": FrictionSensor,} 37 | -------------------------------------------------------------------------------- /dribblebot/sensors/action_sensor.py: -------------------------------------------------------------------------------- 1 | from .sensor import Sensor 2 | 3 | class ActionSensor(Sensor): 4 | def __init__(self, env, attached_robot_asset=None, delay=0): 5 | super().__init__(env) 6 | self.env = env 7 | self.attached_robot_asset = attached_robot_asset 8 | self.delay = delay 9 | 10 | def get_observation(self, env_ids = None): 11 | if self.delay == 0: 12 | return self.env.actions 13 | elif self.delay == 1: 14 | return self.env.last_actions 15 | else: 16 | raise NotImplementedError("Action delay of {} not implemented".format(self.delay)) 17 | 18 | def get_noise_vec(self): 19 | import torch 20 | return torch.zeros(self.env.num_actions, device=self.env.device) 21 | 22 | def get_dim(self): 23 | return self.env.num_actions -------------------------------------------------------------------------------- /dribblebot/sensors/attached_camera_sensor.py: -------------------------------------------------------------------------------- 1 | from .sensor import Sensor 2 | from isaacgym import gymapi 3 | from isaacgym.torch_utils import * 4 | import torch 5 | import numpy as np 6 | 7 | class AttachedCameraSensor(Sensor): 8 | def __init__(self, env, attached_robot_asset=None): 9 | super().__init__(env) 10 | self.env = env 11 | self.attached_robot_asset = attached_robot_asset 12 | 13 | 14 | 15 | def initialize(self, camera_label, camera_pose, camera_rpy, env_ids=None): 16 | if env_ids is None: env_ids = range(self.env.num_envs) 17 | 18 | camera_props = gymapi.CameraProperties() 19 | camera_props.width = self.env.cfg.perception.image_width 20 | camera_props.height = self.env.cfg.perception.image_height 21 | camera_props.horizontal_fov = self.env.cfg.perception.image_horizontal_fov 22 | 23 | 24 | self.cams = [] 25 | 26 | for env_id in env_ids: 27 | 28 | cam = self.env.gym.create_camera_sensor(self.env.envs[env_id], camera_props) 29 | # initialize camera position 30 | # attach the camera to the base 31 | trans_pos = gymapi.Vec3(camera_pose[0], camera_pose[1], camera_pose[2]) 32 | quat_pitch = quat_from_angle_axis(torch.Tensor([-camera_rpy[1]]), torch.Tensor([0, 1, 0]))[0] 33 | quat_yaw = quat_from_angle_axis(torch.Tensor([camera_rpy[2]]), torch.Tensor([0, 0, 1]))[0] 34 | quat = quat_mul(quat_yaw, quat_pitch) 35 | trans_quat = gymapi.Quat(quat[0], quat[1], quat[2], quat[3]) 36 | transform = gymapi.Transform(trans_pos, trans_quat) 37 | follow_mode = gymapi.CameraFollowMode.FOLLOW_TRANSFORM 38 | self.env.gym.attach_camera_to_body(cam, self.env.envs[env_id], 0, transform, follow_mode) 39 | 40 | self.cams.append(cam) 41 | 42 | return self.cams 43 | 44 | def get_observation(self, env_ids = None): 45 | 46 | raise NotImplementedError 47 | 48 | def get_depth_images(self, env_ids = None): 49 | if env_ids is None: env_ids = range(self.env.num_envs) 50 | 51 | depth_images = [] 52 | for env_id in env_ids: 53 | img = self.env.gym.get_camera_image(self.env.sim, self.env.envs[env_id], self.cams[env_id], 54 | gymapi.IMAGE_DEPTH) 55 | w, h = img.shape 56 | depth_images.append(torch.from_numpy(img.reshape([1, w, h])).to(self.env.device)) 57 | depth_images = torch.cat(depth_images, dim=0) 58 | return depth_images 59 | 60 | def get_rgb_images(self, env_ids): 61 | if env_ids is None: env_ids = range(self.env.num_envs) 62 | 63 | rgb_images = [] 64 | for env_id in env_ids: 65 | img = self.env.gym.get_camera_image(self.env.sim, self.env.envs[env_id], self.cams[env_id], 66 | gymapi.IMAGE_COLOR) 67 | w, h = img.shape 68 | rgb_images.append( 69 | torch.from_numpy(img.reshape([1, w, h // 4, 4]).astype(np.int32)).to(self.env.device)) 70 | rgb_images = torch.cat(rgb_images, dim=0) 71 | return rgb_images 72 | 73 | def get_segmentation_images(self, env_ids): 74 | if env_ids is None: env_ids = range(self.env.num_envs) 75 | 76 | segmentation_images = [] 77 | for env_id in env_ids: 78 | img = self.env.gym.get_camera_image(self.env.sim, self.env.envs[env_id], self.cams[env_id], 79 | gymapi.IMAGE_SEGMENTATION) 80 | w, h = img.shape 81 | segmentation_images.append( 82 | torch.from_numpy(img.reshape([1, w, h]).astype(np.int32)).to(self.env.device)) 83 | segmentation_images = torch.cat(segmentation_images, dim=0) 84 | return segmentation_images -------------------------------------------------------------------------------- /dribblebot/sensors/body_velocity_sensor.py: -------------------------------------------------------------------------------- 1 | from .sensor import Sensor 2 | 3 | from isaacgym.torch_utils import * 4 | import torch 5 | from dribblebot.utils.math_utils import quat_apply_yaw, wrap_to_pi, get_scale_shift 6 | 7 | class BodyVelocitySensor(Sensor): 8 | def __init__(self, env, attached_robot_asset=None): 9 | super().__init__(env) 10 | self.env = env 11 | self.attached_robot_asset = attached_robot_asset 12 | 13 | def get_observation(self, env_ids = None): 14 | return self.env.base_lin_vel 15 | 16 | # Privileged sensor input does not contain noise 17 | # def get_noise_vec(self): 18 | # import torch 19 | # return torch.zeros(1, device=self.env.device) 20 | 21 | def get_dim(self): 22 | return 3 -------------------------------------------------------------------------------- /dribblebot/sensors/clock_sensor.py: -------------------------------------------------------------------------------- 1 | from .sensor import Sensor 2 | 3 | class ClockSensor(Sensor): 4 | def __init__(self, env, attached_robot_asset=None): 5 | super().__init__(env) 6 | self.env = env 7 | self.attached_robot_asset = attached_robot_asset 8 | 9 | def get_observation(self, env_ids = None): 10 | return self.env.clock_inputs 11 | 12 | def get_noise_vec(self): 13 | import torch 14 | return torch.zeros(4, device=self.env.device) 15 | 16 | def get_dim(self): 17 | return 4 -------------------------------------------------------------------------------- /dribblebot/sensors/floating_camera_sensor.py: -------------------------------------------------------------------------------- 1 | from .sensor import Sensor 2 | from isaacgym import gymapi 3 | 4 | class FloatingCameraSensor(Sensor): 5 | def __init__(self, env): 6 | super().__init__(env) 7 | self.env = env 8 | 9 | camera_props = gymapi.CameraProperties() 10 | camera_props.width = self.env.cfg.env.recording_width_px 11 | camera_props.height = self.env.cfg.env.recording_height_px 12 | self.rendering_camera = self.env.gym.create_camera_sensor(self.env.envs[0], camera_props) 13 | self.env.gym.set_camera_location(self.rendering_camera, self.env.envs[0], gymapi.Vec3(1.5, 1, 3.0), 14 | gymapi.Vec3(0, 0, 0)) 15 | 16 | def set_position(self, target_loc=None, cam_distance=None): 17 | if cam_distance is None: 18 | cam_distance = [0, -1.0, 1.0] 19 | if target_loc is None: 20 | bx, by, bz = self.env.root_states[0, 0], self.env.root_states[0, 1], self.env.root_states[0, 2] 21 | target_loc = [bx, by, bz] 22 | self.env.gym.set_camera_location(self.rendering_camera, self.env.envs[0], gymapi.Vec3(target_loc[0] + cam_distance[0], 23 | target_loc[1] + cam_distance[1], 24 | target_loc[2] + cam_distance[2]), 25 | gymapi.Vec3(target_loc[0], target_loc[1], target_loc[2])) 26 | 27 | def get_observation(self, env_ids = None): 28 | self.env.gym.step_graphics(self.env.sim) 29 | self.env.gym.render_all_camera_sensors(self.env.sim) 30 | img = self.env.gym.get_camera_image(self.env.sim, self.env.envs[0], self.rendering_camera, gymapi.IMAGE_COLOR) 31 | w, h = img.shape 32 | return img.reshape([w, h // 4, 4]) -------------------------------------------------------------------------------- /dribblebot/sensors/friction_sensor.py: -------------------------------------------------------------------------------- 1 | from .sensor import Sensor 2 | 3 | from isaacgym.torch_utils import * 4 | import torch 5 | from dribblebot.utils.math_utils import quat_apply_yaw, wrap_to_pi, get_scale_shift 6 | 7 | class FrictionSensor(Sensor): 8 | def __init__(self, env, attached_robot_asset=None): 9 | super().__init__(env) 10 | self.env = env 11 | self.attached_robot_asset = attached_robot_asset 12 | 13 | def get_observation(self, env_ids = None): 14 | friction_coeffs_scale, friction_coeffs_shift = get_scale_shift(self.env.cfg.normalization.friction_range) 15 | return (self.env.friction_coeffs[:, 0].unsqueeze(1) - friction_coeffs_shift) * friction_coeffs_scale 16 | 17 | def get_noise_vec(self): 18 | import torch 19 | return torch.zeros(1, device=self.env.device) 20 | 21 | def get_dim(self): 22 | return 1 -------------------------------------------------------------------------------- /dribblebot/sensors/heightmap_sensor.py: -------------------------------------------------------------------------------- 1 | from .sensor import Sensor 2 | import torch 3 | from dribblebot.utils.math_utils import quat_apply_yaw 4 | 5 | class HeightmapSensor(Sensor): 6 | def __init__(self, env): 7 | super().__init__(env) 8 | self.env = env 9 | 10 | def get_observation(self, env_ids = None): 11 | 12 | if env_ids is None: 13 | env_ids = torch.arange(self.num_envs, device=self.env.device) 14 | 15 | if self.env.cfg.terrain.mesh_type == 'plane': 16 | return torch.zeros(self.env.num_envs, self.env.cfg.perception.num_height_points, device=self.device, requires_grad=False) 17 | elif self.env.cfg.terrain.mesh_type == 'none': 18 | raise NameError("Can't measure height with terrain mesh type 'none'") 19 | 20 | points = quat_apply_yaw(self.base_quat[env_ids].repeat(1, self.env.cfg.perception.num_height_points), 21 | self.height_points[env_ids]) + (self.env.root_states[self.robot_actor_idxs[env_ids], :3]).unsqueeze(1) 22 | 23 | points += self.terrain.cfg.border_size 24 | points = (points / self.terrain.cfg.horizontal_scale).long() 25 | px = points[:, :, 0].view(-1) 26 | py = points[:, :, 1].view(-1) 27 | px = torch.clip(px, 0, self.env.height_samples.shape[0] - 2) 28 | py = torch.clip(py, 0, self.env.height_samples.shape[1] - 2) 29 | 30 | heights1 = self.env.height_samples[px, py] 31 | heights2 = self.env.height_samples[px + 1, py] 32 | heights3 = self.env.height_samples[px, py + 1] 33 | heights = torch.min(heights1, heights2) 34 | heights = torch.min(heights, heights3) 35 | 36 | return heights.view(len(env_ids), -1) * self.env.terrain.cfg.vertical_scale -------------------------------------------------------------------------------- /dribblebot/sensors/joint_position_sensor.py: -------------------------------------------------------------------------------- 1 | from .sensor import Sensor 2 | 3 | class JointPositionSensor(Sensor): 4 | def __init__(self, env, attached_robot_asset=None): 5 | super().__init__(env) 6 | self.env = env 7 | self.attached_robot_asset = attached_robot_asset 8 | 9 | def get_observation(self, env_ids = None): 10 | return (self.env.dof_pos[:, :self.env.num_actuated_dof] - \ 11 | self.env.default_dof_pos[:, :self.env.num_actuated_dof]) * \ 12 | self.env.cfg.obs_scales.dof_pos 13 | 14 | def get_noise_vec(self): 15 | import torch 16 | return torch.ones(self.env.num_actuated_dof, device=self.env.device) * \ 17 | self.env.cfg.noise_scales.dof_pos * \ 18 | self.env.cfg.noise.noise_level * \ 19 | self.env.cfg.obs_scales.dof_pos 20 | 21 | def get_dim(self): 22 | return self.env.num_actuated_dof -------------------------------------------------------------------------------- /dribblebot/sensors/joint_velocity_sensor.py: -------------------------------------------------------------------------------- 1 | from .sensor import Sensor 2 | 3 | class JointVelocitySensor(Sensor): 4 | def __init__(self, env, attached_robot_asset=None): 5 | super().__init__(env) 6 | self.env = env 7 | self.attached_robot_asset = attached_robot_asset 8 | 9 | def get_observation(self, env_ids = None): 10 | return self.env.dof_vel[:, :self.env.num_actuated_dof] * \ 11 | self.env.cfg.obs_scales.dof_vel 12 | 13 | def get_noise_vec(self): 14 | import torch 15 | return torch.ones(self.env.num_actuated_dof, device=self.env.device) * \ 16 | self.env.cfg.noise_scales.dof_vel * \ 17 | self.env.cfg.noise.noise_level * \ 18 | self.env.cfg.obs_scales.dof_vel 19 | 20 | def get_dim(self): 21 | return self.env.num_actuated_dof -------------------------------------------------------------------------------- /dribblebot/sensors/last_action_sensor.py: -------------------------------------------------------------------------------- 1 | from .sensor import Sensor 2 | 3 | class LastActionSensor(Sensor): 4 | def __init__(self, env, attached_robot_asset=None, delay=0): 5 | super().__init__(env) 6 | self.env = env 7 | self.attached_robot_asset = attached_robot_asset 8 | self.delay = delay 9 | 10 | def get_observation(self, env_ids = None): 11 | if self.delay == 0: 12 | return self.env.actions 13 | elif self.delay == 1: 14 | return self.env.last_actions 15 | else: 16 | raise NotImplementedError("Action delay of {} not implemented".format(self.delay)) 17 | 18 | def get_noise_vec(self): 19 | import torch 20 | return torch.zeros(self.env.num_actions, device=self.env.device) 21 | 22 | def get_dim(self): 23 | return self.env.num_actions -------------------------------------------------------------------------------- /dribblebot/sensors/object_sensor.py: -------------------------------------------------------------------------------- 1 | from .sensor import Sensor 2 | 3 | class ObjectSensor(Sensor): 4 | def __init__(self, env, attached_robot_asset=None): 5 | super().__init__(env) 6 | self.env = env 7 | self.attached_robot_asset = attached_robot_asset 8 | 9 | def get_observation(self, env_ids = None): 10 | return self.env.object_local_pos * self.env.cfg.obs_scales.ball_pos 11 | 12 | def get_noise_vec(self): 13 | import torch 14 | return torch.ones(3, device=self.env.device) * self.env.cfg.noise_scales.ball_pos * self.env.cfg.noise.noise_level * self.env.cfg.obs_scales.ball_pos 15 | 16 | def get_dim(self): 17 | return 3 -------------------------------------------------------------------------------- /dribblebot/sensors/object_velocity_sensor.py: -------------------------------------------------------------------------------- 1 | from .sensor import Sensor 2 | 3 | from isaacgym.torch_utils import * 4 | import torch 5 | from dribblebot.utils.math_utils import quat_apply_yaw, wrap_to_pi, get_scale_shift 6 | 7 | class ObjectVelocitySensor(Sensor): 8 | def __init__(self, env, attached_robot_asset=None): 9 | super().__init__(env) 10 | self.env = env 11 | # self.attached_robot_asset = attached_robot_asset 12 | 13 | def get_observation(self, env_ids = None): 14 | 15 | return self.env.object_lin_vel 16 | 17 | # Privileged sensor input does not contain noise 18 | # def get_noise_vec(self): 19 | # import torch 20 | # return torch.zeros(3, device=self.env.device) 21 | 22 | def get_dim(self): 23 | return 3 -------------------------------------------------------------------------------- /dribblebot/sensors/orientation_sensor.py: -------------------------------------------------------------------------------- 1 | from .sensor import Sensor 2 | 3 | class OrientationSensor(Sensor): 4 | def __init__(self, env, attached_robot_asset=None): 5 | super().__init__(env) 6 | self.env = env 7 | self.attached_robot_asset = attached_robot_asset 8 | 9 | def get_observation(self, env_ids = None): 10 | return self.env.projected_gravity 11 | 12 | def get_noise_vec(self): 13 | import torch 14 | return torch.ones(3, device=self.env.device) * \ 15 | self.env.cfg.noise_scales.gravity * \ 16 | self.env.cfg.noise.noise_level 17 | 18 | def get_dim(self): 19 | return 3 -------------------------------------------------------------------------------- /dribblebot/sensors/rc_sensor.py: -------------------------------------------------------------------------------- 1 | from .sensor import Sensor 2 | 3 | class RCSensor(Sensor): 4 | def __init__(self, env, attached_robot_asset=None): 5 | super().__init__(env) 6 | self.env = env 7 | self.attached_robot_asset = attached_robot_asset 8 | 9 | def get_observation(self, env_ids = None): 10 | return self.env.commands * self.env.commands_scale 11 | 12 | def get_noise_vec(self): 13 | import torch 14 | return torch.zeros(self.env.cfg.commands.num_commands, device=self.env.device) 15 | 16 | def get_dim(self): 17 | return self.env.cfg.commands.num_commands -------------------------------------------------------------------------------- /dribblebot/sensors/restitution_sensor.py: -------------------------------------------------------------------------------- 1 | from .sensor import Sensor 2 | 3 | from isaacgym.torch_utils import * 4 | import torch 5 | from dribblebot.utils.math_utils import quat_apply_yaw, wrap_to_pi, get_scale_shift 6 | 7 | class RestitutionSensor(Sensor): 8 | def __init__(self, env, attached_robot_asset=None): 9 | super().__init__(env) 10 | self.env = env 11 | self.attached_robot_asset = attached_robot_asset 12 | 13 | def get_observation(self, env_ids = None): 14 | restitution_coeffs_scale, restitution_coeffs_shift = get_scale_shift(self.env.cfg.normalization.restitution_range) 15 | return (self.env.restitutions[:, 0].unsqueeze(1) - restitution_coeffs_shift) * restitution_coeffs_scale 16 | 17 | def get_noise_vec(self): 18 | import torch 19 | return torch.zeros(1, device=self.env.device) 20 | 21 | def get_dim(self): 22 | return 1 -------------------------------------------------------------------------------- /dribblebot/sensors/sensor.py: -------------------------------------------------------------------------------- 1 | class Sensor: 2 | def __init__(self, env): 3 | self.env = env 4 | 5 | def get_observation(self): 6 | raise NotImplementedError 7 | 8 | def get_noise_vec(self): 9 | raise NotImplementedError 10 | 11 | def get_dim(self): 12 | raise NotImplementedError -------------------------------------------------------------------------------- /dribblebot/sensors/timing_sensor.py: -------------------------------------------------------------------------------- 1 | from .sensor import Sensor 2 | 3 | class TimingSensor(Sensor): 4 | def __init__(self, env, attached_robot_asset=None): 5 | super().__init__(env) 6 | self.env = env 7 | # self.attached_robot_asset = attached_robot_asset 8 | 9 | def get_observation(self, env_ids = None): 10 | # print("timeing variable: ", self.env.gait_indices) 11 | return self.env.gait_indices.unsqueeze(1) 12 | 13 | def get_noise_vec(self): 14 | import torch 15 | return torch.zeros(1, device=self.env.device) 16 | 17 | def get_dim(self): 18 | return 1 -------------------------------------------------------------------------------- /dribblebot/sensors/yaw_sensor.py: -------------------------------------------------------------------------------- 1 | from .sensor import Sensor 2 | 3 | from isaacgym.torch_utils import * 4 | import torch 5 | from dribblebot.utils.math_utils import quat_apply_yaw, wrap_to_pi, get_scale_shift 6 | 7 | class YawSensor(Sensor): 8 | def __init__(self, env, attached_robot_asset=None): 9 | super().__init__(env) 10 | self.env = env 11 | self.attached_robot_asset = attached_robot_asset 12 | 13 | def get_observation(self, env_ids = None): 14 | forward = quat_apply(self.env.base_quat, self.env.forward_vec) 15 | heading = torch.atan2(forward[:, 1], forward[:, 0]).unsqueeze(1) 16 | heading = wrap_to_pi(heading - self.env.heading_offsets.unsqueeze(1)) 17 | return heading 18 | 19 | def get_noise_vec(self): 20 | import torch 21 | return torch.zeros(1, device=self.env.device) 22 | 23 | def get_dim(self): 24 | return 1 -------------------------------------------------------------------------------- /dribblebot/terrains/__init__.py: -------------------------------------------------------------------------------- 1 | from .terrain import Terrain 2 | from .tm_box_terrain import TMBoxTerrain 3 | from .box_terrain import BoxTerrain 4 | from .heightfield_terrain import HeightfieldTerrain 5 | from .trimesh_terrain import TrimeshTerrain 6 | from .ground_plane_terrain import GroundPlaneTerrain 7 | 8 | ALL_TERRAINS = { "boxes_tm": TMBoxTerrain, 9 | "boxes": BoxTerrain, 10 | "heightfield": HeightfieldTerrain, 11 | "trimesh": TrimeshTerrain, 12 | "plane": GroundPlaneTerrain 13 | } -------------------------------------------------------------------------------- /dribblebot/terrains/ground_plane_terrain.py: -------------------------------------------------------------------------------- 1 | from .terrain import Terrain 2 | 3 | from isaacgym import gymapi 4 | import torch 5 | 6 | class GroundPlaneTerrain(Terrain): 7 | def __init__(self, env): 8 | super().__init__(env) 9 | self.prepare() 10 | 11 | def prepare(self): 12 | """ Adds a ground plane to the simulation, sets friction and restitution based on the cfg. 13 | """ 14 | plane_params = gymapi.PlaneParams() 15 | plane_params.normal = gymapi.Vec3(0.0, 0.0, 1.0) 16 | plane_params.static_friction = self.env.cfg.terrain.static_friction 17 | plane_params.dynamic_friction = self.env.cfg.terrain.dynamic_friction 18 | plane_params.restitution = self.env.cfg.terrain.restitution 19 | self.env.gym.add_ground(self.env.sim, plane_params) 20 | 21 | def initialize(self): 22 | pass 23 | -------------------------------------------------------------------------------- /dribblebot/terrains/heightfield_terrain.py: -------------------------------------------------------------------------------- 1 | from .terrain import Terrain 2 | 3 | from isaacgym import gymapi 4 | import torch 5 | 6 | class HeightfieldTerrain(Terrain): 7 | def __init__(self, env): 8 | super().__init__(env) 9 | self.prepare() 10 | 11 | def prepare(self): 12 | """ Adds a heightfield terrain to the simulation, sets parameters based on the cfg. 13 | """ 14 | hf_params = gymapi.HeightFieldParams() 15 | hf_params.column_scale = self.env.terrain.cfg.horizontal_scale 16 | hf_params.row_scale = self.env.terrain.cfg.horizontal_scale 17 | hf_params.vertical_scale = self.env.terrain.cfg.vertical_scale 18 | hf_params.nbRows = self.env.terrain.tot_cols 19 | hf_params.nbColumns = self.env.terrain.tot_rows 20 | hf_params.transform.p.x = -self.env.terrain.cfg.border_size 21 | hf_params.transform.p.y = -self.env.terrain.cfg.border_size 22 | hf_params.transform.p.z = 0.0 23 | hf_params.static_friction = self.cfg.env.terrain.static_friction 24 | hf_params.dynamic_friction = self.cfg.env.terrain.dynamic_friction 25 | hf_params.restitution = self.cfg.env.terrain.restitution 26 | 27 | print(self.env.terrain.heightsamples.shape, hf_params.nbRows, hf_params.nbColumns) 28 | 29 | self.env.gym.add_heightfield(self.env.sim, self.env.terrain.heightsamples.T, hf_params) 30 | self.height_samples = torch.tensor(self.env.terrain.heightsamples).view(self.env.terrain.tot_rows, 31 | self.env.terrain.tot_cols).to(self.env.device) 32 | 33 | 34 | def initialize(self): 35 | pass -------------------------------------------------------------------------------- /dribblebot/terrains/terrain.py: -------------------------------------------------------------------------------- 1 | class Terrain: 2 | def __init__(self, env): 3 | self.env = env 4 | 5 | def initialize(self): 6 | raise NotImplementedError 7 | 8 | def get_friction(self, x, y): 9 | """ Returns the friction at a given point in the terrain. 10 | # """ 11 | return self.friction_samples[x, y] 12 | 13 | def get_roughness(self, x, y): 14 | """ Returns the roughness at a given point in the terrain. 15 | # """ 16 | return self.roughness_samples[x, y] 17 | 18 | def get_restitution(self, x, y): 19 | """ Returns the restitution at a given point in the terrain. 20 | # """ 21 | return self.restitution_samples[x, y] 22 | 23 | def get_height(self, x, y): 24 | """ Returns the height at a given point in the terrain. 25 | # """ 26 | return self.height_samples[x, y] 27 | 28 | -------------------------------------------------------------------------------- /dribblebot/terrains/trimesh_terrain.py: -------------------------------------------------------------------------------- 1 | from .terrain import Terrain 2 | 3 | from isaacgym import gymapi 4 | import torch 5 | 6 | class TrimeshTerrain(Terrain): 7 | def __init__(self, env): 8 | super().__init__(env) 9 | self.prepare() 10 | 11 | def prepare(self): 12 | """ Adds a triangle mesh terrain to the simulation, sets parameters based on the cfg. 13 | # """ 14 | tm_params = gymapi.TriangleMeshParams() 15 | tm_params.nb_vertices = self.env.terrain.vertices.shape[0] 16 | tm_params.nb_triangles = self.env.terrain.triangles.shape[0] 17 | 18 | tm_params.transform.p.x = -self.env.terrain.cfg.border_size 19 | tm_params.transform.p.y = -self.env.terrain.cfg.border_size 20 | tm_params.transform.p.z = 0.0 21 | tm_params.static_friction = self.env.cfg.terrain.static_friction 22 | tm_params.dynamic_friction = self.env.cfg.terrain.dynamic_friction 23 | tm_params.restitution = self.env.cfg.terrain.restitution 24 | self.env.gym.add_triangle_mesh(self.env.sim, self.env.terrain.vertices.flatten(order='C'), 25 | self.env.terrain.triangles.flatten(order='C'), tm_params) 26 | self.height_samples = torch.tensor(self.env.terrain.heightsamples).view(self.env.terrain.tot_rows, 27 | self.env.terrain.tot_cols).to(self.env.device) 28 | 29 | 30 | def initialize(self): 31 | pass 32 | -------------------------------------------------------------------------------- /dribblebot/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .math_utils import * 2 | from .terrain import Terrain 3 | -------------------------------------------------------------------------------- /dribblebot/utils/logger.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | class Logger: 5 | def __init__(self, env): 6 | self.env = env 7 | 8 | def populate_log(self, env_ids): 9 | 10 | extras = {} 11 | 12 | # fill extras 13 | if len(env_ids) > 0: 14 | extras["train/episode"] = {} 15 | for key in self.env.episode_sums.keys(): 16 | extras["train/episode"]['rew_' + key] = torch.mean( 17 | self.env.episode_sums[key][env_ids]) 18 | self.env.episode_sums[key][env_ids] = 0. 19 | 20 | # log additional curriculum info 21 | if self.env.cfg.terrain.curriculum: 22 | extras["train/episode"]["terrain_level"] = torch.mean( 23 | self.env.terrain_levels.float()) 24 | if self.env.cfg.commands.command_curriculum: 25 | commands = self.env.commands 26 | extras["env_bins"] = torch.Tensor(self.env.env_command_bins) 27 | extras["train/episode"]["min_command_duration"] = torch.min(commands[:, 8]) 28 | extras["train/episode"]["max_command_duration"] = torch.max(commands[:, 8]) 29 | extras["train/episode"]["min_command_bound"] = torch.min(commands[:, 7]) 30 | extras["train/episode"]["max_command_bound"] = torch.max(commands[:, 7]) 31 | extras["train/episode"]["min_command_offset"] = torch.min(commands[:, 6]) 32 | extras["train/episode"]["max_command_offset"] = torch.max(commands[:, 6]) 33 | extras["train/episode"]["min_command_phase"] = torch.min(commands[:, 5]) 34 | extras["train/episode"]["max_command_phase"] = torch.max(commands[:, 5]) 35 | extras["train/episode"]["min_command_freq"] = torch.min(commands[:, 4]) 36 | extras["train/episode"]["max_command_freq"] = torch.max(commands[:, 4]) 37 | extras["train/episode"]["min_command_x_vel"] = torch.min(commands[:, 0]) 38 | extras["train/episode"]["max_command_x_vel"] = torch.max(commands[:, 0]) 39 | extras["train/episode"]["min_command_y_vel"] = torch.min(commands[:, 1]) 40 | extras["train/episode"]["max_command_y_vel"] = torch.max(commands[:, 1]) 41 | extras["train/episode"]["min_command_yaw_vel"] = torch.min(commands[:, 2]) 42 | extras["train/episode"]["max_command_yaw_vel"] = torch.max(commands[:, 2]) 43 | # if self.env.cfg.commands.num_commands > 9: 44 | extras["train/episode"]["min_command_swing_height"] = torch.min(commands[:, 9]) 45 | extras["train/episode"]["max_command_swing_height"] = torch.max(commands[:, 9]) 46 | for curriculum, category in zip(self.env.curricula, self.env.category_names): 47 | extras["train/episode"][f"command_area_{category}"] = np.sum(curriculum.weights) / \ 48 | curriculum.weights.shape[0] 49 | 50 | extras["train/episode"]["min_action"] = torch.min(self.env.actions) 51 | extras["train/episode"]["max_action"] = torch.max(self.env.actions) 52 | 53 | extras["curriculum/distribution"] = {} 54 | for curriculum, category in zip(self.env.curricula, self.env.category_names): 55 | extras[f"curriculum/distribution"][f"weights_{category}"] = curriculum.weights 56 | extras[f"curriculum/distribution"][f"grid_{category}"] = curriculum.grid 57 | if self.env.cfg.env.send_timeouts: 58 | extras["time_outs"] = self.env.time_out_buf 59 | 60 | return extras 61 | -------------------------------------------------------------------------------- /dribblebot/utils/math_utils.py: -------------------------------------------------------------------------------- 1 | # License: see [LICENSE, LICENSES/legged_gym/LICENSE] 2 | 3 | from typing import Tuple 4 | 5 | import numpy as np 6 | import torch 7 | from isaacgym.torch_utils import quat_apply, normalize 8 | from torch import Tensor 9 | 10 | 11 | # @ torch.jit.script 12 | def quat_apply_yaw(quat, vec): 13 | quat_yaw = quat.clone().view(-1, 4) 14 | quat_yaw[:, :2] = 0. 15 | quat_yaw = normalize(quat_yaw) 16 | return quat_apply(quat_yaw, vec) 17 | 18 | 19 | # @ torch.jit.script 20 | def wrap_to_pi(angles): 21 | angles %= 2 * np.pi 22 | angles -= 2 * np.pi * (angles > np.pi) 23 | return angles 24 | 25 | 26 | # @ torch.jit.script 27 | def torch_rand_sqrt_float(lower, upper, shape, device): 28 | # type: (float, float, Tuple[int, int], str) -> Tensor 29 | r = 2 * torch.rand(*shape, device=device) - 1 30 | r = torch.where(r < 0., -torch.sqrt(-r), torch.sqrt(r)) 31 | r = (r + 1.) / 2. 32 | return (upper - lower) * r + lower 33 | 34 | 35 | def get_scale_shift(range): 36 | scale = 2. / (range[1] - range[0]) 37 | shift = (range[1] + range[0]) / 2. 38 | return scale, shift 39 | -------------------------------------------------------------------------------- /dribblebot/utils/terrain.py: -------------------------------------------------------------------------------- 1 | # License: see [LICENSE, LICENSES/legged_gym/LICENSE] 2 | 3 | import math 4 | 5 | import numpy as np 6 | from isaacgym import terrain_utils 7 | from numpy.random import choice 8 | 9 | from dribblebot.envs.base.legged_robot_config import Cfg 10 | 11 | 12 | class Terrain: 13 | def __init__(self, cfg: Cfg.terrain, num_robots, eval_cfg=None, num_eval_robots=0) -> None: 14 | 15 | self.cfg = cfg 16 | self.eval_cfg = eval_cfg 17 | self.num_robots = num_robots 18 | self.type = cfg.mesh_type 19 | if self.type in ["none", 'plane']: 20 | return 21 | self.train_rows, self.train_cols, self.eval_rows, self.eval_cols = self.load_cfgs() 22 | self.tot_rows = len(self.train_rows) + len(self.eval_rows) 23 | self.tot_cols = max(len(self.train_cols), len(self.eval_cols)) 24 | self.cfg.env_length = cfg.terrain_length 25 | self.cfg.env_width = cfg.terrain_width 26 | 27 | self.height_field_raw = np.zeros((self.tot_rows, self.tot_cols), dtype=np.int16) 28 | 29 | self.initialize_terrains() 30 | 31 | self.heightsamples = self.height_field_raw 32 | if self.type == "trimesh": 33 | self.vertices, self.triangles = terrain_utils.convert_heightfield_to_trimesh(self.height_field_raw, 34 | self.cfg.horizontal_scale, 35 | self.cfg.vertical_scale, 36 | self.cfg.slope_treshold) 37 | 38 | def load_cfgs(self): 39 | self._load_cfg(self.cfg) 40 | self.cfg.row_indices = np.arange(0, self.cfg.tot_rows) 41 | self.cfg.col_indices = np.arange(0, self.cfg.tot_cols) 42 | self.cfg.x_offset = 0 43 | self.cfg.rows_offset = 0 44 | if self.eval_cfg is None: 45 | return self.cfg.row_indices, self.cfg.col_indices, [], [] 46 | else: 47 | self._load_cfg(self.eval_cfg) 48 | self.eval_cfg.row_indices = np.arange(self.cfg.tot_rows, self.cfg.tot_rows + self.eval_cfg.tot_rows) 49 | self.eval_cfg.col_indices = np.arange(0, self.eval_cfg.tot_cols) 50 | self.eval_cfg.x_offset = self.cfg.tot_rows 51 | self.eval_cfg.rows_offset = self.cfg.num_rows 52 | return self.cfg.row_indices, self.cfg.col_indices, self.eval_cfg.row_indices, self.eval_cfg.col_indices 53 | 54 | def _load_cfg(self, cfg): 55 | cfg.proportions = [np.sum(cfg.terrain_proportions[:i + 1]) for i in range(len(cfg.terrain_proportions))] 56 | 57 | cfg.num_sub_terrains = cfg.num_rows * cfg.num_cols 58 | cfg.env_origins = np.zeros((cfg.num_rows, cfg.num_cols, 3)) 59 | 60 | cfg.width_per_env_pixels = int(cfg.terrain_length / cfg.horizontal_scale) 61 | cfg.length_per_env_pixels = int(cfg.terrain_width / cfg.horizontal_scale) 62 | 63 | cfg.border = int(cfg.border_size / cfg.horizontal_scale) 64 | cfg.tot_cols = int(cfg.num_cols * cfg.width_per_env_pixels) + 2 * cfg.border 65 | cfg.tot_rows = int(cfg.num_rows * cfg.length_per_env_pixels) + 2 * cfg.border 66 | 67 | def initialize_terrains(self): 68 | self._initialize_terrain(self.cfg) 69 | if self.eval_cfg is not None: 70 | self._initialize_terrain(self.eval_cfg) 71 | 72 | def _initialize_terrain(self, cfg): 73 | if cfg.curriculum: 74 | self.curriculum(cfg) 75 | else: 76 | self.randomized_terrain(cfg) 77 | 78 | def randomized_terrain(self, cfg): 79 | for k in range(cfg.num_sub_terrains): 80 | # Env coordinates in the world 81 | (i, j) = np.unravel_index(k, (cfg.num_rows, cfg.num_cols)) 82 | 83 | choice = np.random.uniform(0, 1) 84 | difficulty = np.random.choice([0.5, 0.75, 0.9]) 85 | terrain = self.make_terrain(cfg, choice, difficulty, cfg.proportions) 86 | self.add_terrain_to_map(cfg, terrain, i, j) 87 | 88 | def curriculum(self, cfg): 89 | for j in range(cfg.num_cols): 90 | for i in range(cfg.num_rows): 91 | difficulty = i / cfg.num_rows * cfg.difficulty_scale 92 | choice = j / cfg.num_cols + 0.001 93 | 94 | terrain = self.make_terrain(cfg, choice, difficulty, cfg.proportions) 95 | self.add_terrain_to_map(cfg, terrain, i, j) 96 | 97 | def make_terrain(self, cfg, choice, difficulty, proportions): 98 | terrain = terrain_utils.SubTerrain("terrain", 99 | width=cfg.width_per_env_pixels, 100 | length=cfg.width_per_env_pixels, 101 | vertical_scale=cfg.vertical_scale, 102 | horizontal_scale=cfg.horizontal_scale) 103 | slope = difficulty * 0.4 104 | step_height = 0.05 + 0.18 * difficulty 105 | discrete_obstacles_height = 0.05 + difficulty * (cfg.max_platform_height - 0.05) 106 | stepping_stones_size = 1.5 * (1.05 - difficulty) 107 | stone_distance = 0.05 if difficulty == 0 else 0.1 108 | if choice < proportions[0]: 109 | if choice < proportions[0] / 2: 110 | slope *= -1 111 | terrain_utils.pyramid_sloped_terrain(terrain, slope=slope, platform_size=3.) 112 | elif choice < proportions[1]: 113 | terrain_utils.pyramid_sloped_terrain(terrain, slope=slope, platform_size=3.) 114 | terrain_utils.random_uniform_terrain(terrain, min_height=-0.05, max_height=0.05, 115 | step=self.cfg.terrain_smoothness, downsampled_scale=0.2) 116 | elif choice < proportions[3]: 117 | if choice < proportions[2]: 118 | step_height *= -1 119 | terrain_utils.pyramid_stairs_terrain(terrain, step_width=0.31, step_height=step_height, platform_size=3.) 120 | elif choice < proportions[4]: 121 | num_rectangles = 20 122 | rectangle_min_size = 1. 123 | rectangle_max_size = 2. 124 | terrain_utils.discrete_obstacles_terrain(terrain, discrete_obstacles_height, rectangle_min_size, 125 | rectangle_max_size, num_rectangles, platform_size=3.) 126 | elif choice < proportions[5]: 127 | terrain_utils.stepping_stones_terrain(terrain, stone_size=stepping_stones_size, 128 | stone_distance=stone_distance, max_height=0., platform_size=4.) 129 | elif choice < proportions[6]: 130 | pass 131 | elif choice < proportions[7]: 132 | pass 133 | elif choice < proportions[8]: 134 | terrain_utils.random_uniform_terrain(terrain, min_height=-cfg.terrain_noise_magnitude, 135 | max_height=cfg.terrain_noise_magnitude, step=0.005, 136 | downsampled_scale=0.2) 137 | elif choice < proportions[9]: 138 | terrain_utils.random_uniform_terrain(terrain, min_height=-0.05, max_height=0.05, 139 | step=self.cfg.terrain_smoothness, downsampled_scale=0.2) 140 | terrain.height_field_raw[0:terrain.length // 2, :] = 0 141 | 142 | return terrain 143 | 144 | def add_terrain_to_map(self, cfg, terrain, row, col): 145 | i = row 146 | j = col 147 | # map coordinate system 148 | start_x = cfg.border + i * cfg.length_per_env_pixels + cfg.x_offset 149 | end_x = cfg.border + (i + 1) * cfg.length_per_env_pixels + cfg.x_offset 150 | start_y = cfg.border + j * cfg.width_per_env_pixels 151 | end_y = cfg.border + (j + 1) * cfg.width_per_env_pixels 152 | self.height_field_raw[start_x: end_x, start_y:end_y] = terrain.height_field_raw 153 | 154 | env_origin_x = (i + 0.5) * cfg.terrain_length + cfg.x_offset * terrain.horizontal_scale 155 | env_origin_y = (j + 0.5) * cfg.terrain_width 156 | x1 = int((cfg.terrain_length / 2. - 1) / terrain.horizontal_scale) + cfg.x_offset 157 | x2 = int((cfg.terrain_length / 2. + 1) / terrain.horizontal_scale) + cfg.x_offset 158 | y1 = int((cfg.terrain_width / 2. - 1) / terrain.horizontal_scale) 159 | y2 = int((cfg.terrain_width / 2. + 1) / terrain.horizontal_scale) 160 | env_origin_z = np.max(self.height_field_raw[start_x: end_x, start_y:end_y]) * terrain.vertical_scale 161 | 162 | cfg.env_origins[i, j] = [env_origin_x, env_origin_y, env_origin_z] 163 | 164 | 165 | 166 | def perlin(x, y, seed=0): 167 | # permutation table 168 | np.random.seed(seed) 169 | p = np.arange(256, dtype=int) 170 | np.random.shuffle(p) 171 | p = np.stack([p, p]).flatten() 172 | # coordinates of the top-left 173 | xi, yi = x.astype(int), y.astype(int) 174 | # internal coordinates 175 | xf, yf = x - xi, y - yi 176 | # fade factors 177 | u, v = fade(xf), fade(yf) 178 | # noise components 179 | n00 = gradient(p[p[xi] + yi], xf, yf) 180 | n01 = gradient(p[p[xi] + yi + 1], xf, yf - 1) 181 | n11 = gradient(p[p[xi + 1] + yi + 1], xf - 1, yf - 1) 182 | n10 = gradient(p[p[xi + 1] + yi], xf - 1, yf) 183 | # combine noises 184 | x1 = lerp(n00, n10, u) 185 | x2 = lerp(n01, n11, u) # FIX1: I was using n10 instead of n01 186 | return lerp(x1, x2, v) # FIX2: I also had to reverse x1 and x2 here 187 | 188 | def lerp(a, b, x): 189 | "linear interpolation" 190 | return a + x * (b - a) 191 | 192 | def fade(t): 193 | "6t^5 - 15t^4 + 10t^3" 194 | return 6 * t**5 - 15 * t**4 + 10 * t**3 195 | 196 | def gradient(h, x, y): 197 | "grad converts h to the right gradient vector and return the dot product with (x,y)" 198 | vectors = np.array([[0, 1], [0, -1], [1, 0], [-1, 0]]) 199 | g = vectors[h % 4] 200 | return g[:, :, 0] * x + g[:, :, 1] * y 201 | -------------------------------------------------------------------------------- /dribblebot_learn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/dribblebot_learn/__init__.py -------------------------------------------------------------------------------- /dribblebot_learn/env/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from .vec_env import VecEnv -------------------------------------------------------------------------------- /dribblebot_learn/env/vec_env.py: -------------------------------------------------------------------------------- 1 | # License: see [LICENSE, LICENSES/rsl_rl/LICENSE] 2 | 3 | from abc import ABC, abstractmethod 4 | from typing import Tuple, Union 5 | 6 | import torch 7 | 8 | 9 | # minimal interface of the environment 10 | class VecEnv(ABC): 11 | num_envs: int 12 | num_obs: int 13 | num_privileged_obs: int 14 | num_actions: int 15 | max_episode_length: int 16 | privileged_obs_buf: torch.Tensor 17 | obs_buf: torch.Tensor 18 | rew_buf: torch.Tensor 19 | reset_buf: torch.Tensor 20 | episode_length_buf: torch.Tensor # current episode duration 21 | extras: dict 22 | device: torch.device 23 | 24 | @abstractmethod 25 | def step(self, actions: torch.Tensor) -> Tuple[ 26 | torch.Tensor, Union[torch.Tensor, None], torch.Tensor, torch.Tensor, dict]: 27 | pass 28 | 29 | @abstractmethod 30 | def reset(self, env_ids: Union[list, torch.Tensor]): 31 | pass 32 | 33 | @abstractmethod 34 | def get_observations(self) -> torch.Tensor: 35 | pass 36 | 37 | @abstractmethod 38 | def get_privileged_observations(self) -> Union[torch.Tensor, None]: 39 | pass 40 | -------------------------------------------------------------------------------- /dribblebot_learn/eval_metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/dribblebot_learn/eval_metrics/__init__.py -------------------------------------------------------------------------------- /dribblebot_learn/eval_metrics/domain_randomization.py: -------------------------------------------------------------------------------- 1 | from dribblebot.envs.base.legged_robot_config import Cfg 2 | 3 | 4 | def base_set(): 5 | # set basics 6 | Cfg.terrain.teleport_robots = True 7 | Cfg.terrain.border_size = 50 8 | Cfg.terrain.num_rows = 10 9 | Cfg.terrain.num_cols = 10 10 | Cfg.commands.resampling_time = 1e9 11 | Cfg.env.episode_length_s = 500 12 | Cfg.rewards.terminal_body_height = 0.0 13 | Cfg.rewards.use_terminal_body_height = True 14 | 15 | 16 | def rand_regular(): 17 | Cfg.domain_rand.randomize_friction = True 18 | Cfg.domain_rand.friction_range = [0.05, 4.5] 19 | Cfg.domain_rand.randomize_restitution = True 20 | Cfg.domain_rand.restitution_range = [0, 1.0] 21 | Cfg.domain_rand.restitution = 0.5 22 | Cfg.domain_rand.randomize_base_mass = True 23 | Cfg.domain_rand.added_mass_range = [-1., 3.] 24 | Cfg.domain_rand.randomize_com_displacement = True 25 | Cfg.domain_rand.com_displacement_range = [-0.1, 0.1] 26 | Cfg.domain_rand.randomize_motor_strength = True 27 | Cfg.domain_rand.motor_strength_range = [0.9, 1.1] 28 | Cfg.domain_rand.randomize_Kp_factor = False 29 | Cfg.domain_rand.Kp_factor_range = [0.8, 1.3] 30 | Cfg.domain_rand.randomize_Kd_factor = False 31 | Cfg.domain_rand.Kd_factor_range = [0.5, 1.5] 32 | Cfg.domain_rand.push_robots = False 33 | Cfg.domain_rand.push_interval_s = 15 34 | Cfg.domain_rand.max_push_vel_xy = 1. 35 | 36 | 37 | def rand_large(): 38 | Cfg.domain_rand.randomize_friction = True 39 | Cfg.domain_rand.friction_range = [0.04, 6.0] 40 | Cfg.domain_rand.randomize_restitution = True 41 | Cfg.domain_rand.restitution_range = [0, 1.0] 42 | Cfg.domain_rand.restitution = 0.5 43 | Cfg.domain_rand.randomize_base_mass = True 44 | Cfg.domain_rand.added_mass_range = [-1.5, 4.] 45 | Cfg.domain_rand.randomize_com_displacement = True 46 | Cfg.domain_rand.com_displacement_range = [-0.13, 0.13] 47 | Cfg.domain_rand.randomize_motor_strength = True 48 | Cfg.domain_rand.motor_strength_range = [0.88, 1.12] # table 1 in RMA may have a typo 49 | Cfg.domain_rand.randomize_Kp_factor = False 50 | Cfg.domain_rand.Kp_factor_range = [0.8, 1.3] 51 | Cfg.domain_rand.randomize_Kd_factor = False 52 | Cfg.domain_rand.Kd_factor_range = [0.5, 1.5] 53 | Cfg.domain_rand.push_robots = False 54 | Cfg.domain_rand.push_interval_s = 15 55 | Cfg.domain_rand.max_push_vel_xy = 1. 56 | 57 | 58 | def static_low(): 59 | Cfg.domain_rand.randomize_friction = True 60 | Cfg.domain_rand.friction_range = [0.05, 0.06] 61 | Cfg.domain_rand.randomize_restitution = True 62 | Cfg.domain_rand.restitution_range = [0, 0.01] 63 | Cfg.domain_rand.restitution = 0.5 64 | Cfg.domain_rand.randomize_base_mass = True 65 | Cfg.domain_rand.added_mass_range = [-1., -0.99] 66 | Cfg.domain_rand.randomize_com_displacement = True 67 | Cfg.domain_rand.com_displacement_range = [-0.1, -0.09] 68 | Cfg.domain_rand.randomize_motor_strength = True 69 | Cfg.domain_rand.motor_strength_range = [0.9, -0.99] 70 | Cfg.domain_rand.randomize_Kp_factor = False 71 | Cfg.domain_rand.Kp_factor_range = [0.8, 1.3] 72 | Cfg.domain_rand.randomize_Kd_factor = False 73 | Cfg.domain_rand.Kd_factor_range = [0.5, 1.5] 74 | Cfg.domain_rand.push_robots = False 75 | Cfg.domain_rand.push_interval_s = 15 76 | Cfg.domain_rand.max_push_vel_xy = 1. 77 | 78 | 79 | def static_medium(): 80 | Cfg.domain_rand.randomize_friction = True 81 | Cfg.domain_rand.friction_range = [1.0, 1.01] 82 | Cfg.domain_rand.randomize_restitution = True 83 | Cfg.domain_rand.restitution_range = [0.5, 0.51] 84 | Cfg.domain_rand.restitution = 0.5 85 | Cfg.domain_rand.randomize_base_mass = True 86 | Cfg.domain_rand.added_mass_range = [0.0, 0.01] 87 | Cfg.domain_rand.randomize_com_displacement = True 88 | Cfg.domain_rand.com_displacement_range = [0.0, 0.01] 89 | Cfg.domain_rand.randomize_motor_strength = True 90 | Cfg.domain_rand.motor_strength_range = [1.0, 1.01] 91 | Cfg.domain_rand.randomize_Kp_factor = False 92 | Cfg.domain_rand.Kp_factor_range = [0.8, 1.3] 93 | Cfg.domain_rand.randomize_Kd_factor = False 94 | Cfg.domain_rand.Kd_factor_range = [0.5, 1.5] 95 | Cfg.domain_rand.push_robots = False 96 | Cfg.domain_rand.push_interval_s = 15 97 | Cfg.domain_rand.max_push_vel_xy = 1. 98 | 99 | 100 | def static_high(): 101 | Cfg.domain_rand.randomize_friction = True 102 | Cfg.domain_rand.friction_range = [4.49, 4.5] 103 | Cfg.domain_rand.randomize_restitution = True 104 | Cfg.domain_rand.restitution_range = [0.99, 1.0] 105 | Cfg.domain_rand.restitution = 0.5 106 | Cfg.domain_rand.randomize_base_mass = True 107 | Cfg.domain_rand.added_mass_range = [2.99, 3.] 108 | Cfg.domain_rand.randomize_com_displacement = True 109 | Cfg.domain_rand.com_displacement_range = [0.09, 0.1] 110 | Cfg.domain_rand.randomize_motor_strength = True 111 | Cfg.domain_rand.motor_strength_range = [1.09, 1.1] 112 | Cfg.domain_rand.randomize_Kp_factor = False 113 | Cfg.domain_rand.Kp_factor_range = [0.8, 1.3] 114 | Cfg.domain_rand.randomize_Kd_factor = False 115 | Cfg.domain_rand.Kd_factor_range = [0.5, 1.5] 116 | Cfg.domain_rand.push_robots = False 117 | Cfg.domain_rand.push_interval_s = 15 118 | Cfg.domain_rand.max_push_vel_xy = 1. 119 | 120 | def only_base_mass(): 121 | Cfg.domain_rand.randomize_friction = True 122 | Cfg.domain_rand.friction_range = [1.0, 1.01] 123 | Cfg.domain_rand.randomize_restitution = True 124 | Cfg.domain_rand.restitution_range = [0.5, 0.51] 125 | Cfg.domain_rand.restitution = 0.5 126 | Cfg.domain_rand.randomize_base_mass = True 127 | Cfg.domain_rand.added_mass_range = [-1, 3] 128 | Cfg.domain_rand.randomize_com_displacement = True 129 | Cfg.domain_rand.com_displacement_range = [0.0, 0.01] 130 | Cfg.domain_rand.randomize_motor_strength = True 131 | Cfg.domain_rand.motor_strength_range = [1.0, 1.01] 132 | Cfg.domain_rand.randomize_Kp_factor = False 133 | Cfg.domain_rand.Kp_factor_range = [0.8, 1.3] 134 | Cfg.domain_rand.randomize_Kd_factor = False 135 | Cfg.domain_rand.Kd_factor_range = [0.5, 1.5] 136 | Cfg.domain_rand.push_robots = False 137 | Cfg.domain_rand.push_interval_s = 15 138 | Cfg.domain_rand.max_push_vel_xy = 1. 139 | 140 | 141 | DR_SETTINGS = dict( 142 | rand_regular=rand_regular, 143 | rand_large=rand_large, 144 | static_low=static_low, 145 | static_medium=static_medium, 146 | static_high=static_high, 147 | only_base_mass=only_base_mass, 148 | ) 149 | -------------------------------------------------------------------------------- /dribblebot_learn/eval_metrics/metrics.py: -------------------------------------------------------------------------------- 1 | def to_numpy(fn): 2 | def thunk(*args, **kwargs): 3 | return fn(*args, **kwargs).cpu().numpy() 4 | 5 | return thunk 6 | 7 | 8 | def lin_vel_rmsd(env, actor_critic, obs): 9 | return ((env.base_lin_vel[:, 0] - env.commands[:, 0]) ** 2).cpu() ** 0.5 10 | 11 | 12 | def ang_vel_rmsd(env, actor_critic, obs): 13 | return ((env.base_ang_vel[:, 2] - env.commands[:, 2]) ** 2).cpu() ** 0.5 14 | 15 | 16 | def lin_vel_x(env, actor_critic, obs): 17 | return env.base_lin_vel[:, 0].cpu() 18 | 19 | 20 | def ang_vel_yaw(env, actor_critic, obs): 21 | return env.base_ang_vel[:, 2].cpu() 22 | 23 | 24 | def base_height(env, actor_critic, obs): 25 | import torch 26 | return torch.mean(env.root_states[:, 2].unsqueeze(1) - env.measured_heights, dim=1).cpu() 27 | 28 | 29 | def max_torques(env, actor_critic, obs): 30 | import torch 31 | max_torque, max_torque_indices = torch.max(torch.abs(env.torques), dim=1) 32 | return max_torque.cpu() 33 | 34 | 35 | def power_consumption(env, actor_critic, obs): 36 | import torch 37 | return torch.sum(torch.multiply(env.torques, env.dof_vel), dim=1).cpu() 38 | 39 | 40 | def CoT(env, actor_critic, obs): 41 | # P / (mgv) 42 | import torch 43 | P = power_consumption(env, actor_critic, obs) 44 | m = (env.default_body_mass + env.payloads).cpu() 45 | g = 9.8 # m/s^2 46 | v = torch.norm(env.base_lin_vel[:, 0:2], dim=1).cpu() 47 | return P / (m * g * v) 48 | 49 | 50 | def froude_number(env, actor_critic, obs): 51 | # v^2 / (gh) 52 | v = lin_vel_x(env, actor_critic, obs) 53 | g = 9.8 54 | h = 0.30 55 | return v ** 2 / (g * h) 56 | 57 | 58 | def adaptation_loss(env, actor_critic, obs): 59 | import torch 60 | if hasattr(actor_critic, "adaptation_module"): 61 | pred = actor_critic.adaptation_module(obs["obs_history"]) 62 | target = actor_critic.env_factor_encoder(obs["privileged_obs"]) 63 | return torch.mean((pred.cpu().detach() - target.cpu().detach()) ** 2, dim=1) 64 | 65 | 66 | def auxiliary_rewards(env, actor_critic, obs): 67 | rewards = {} 68 | for i in range(len(env.reward_functions)): 69 | name = env.reward_names[i] 70 | rew = env.reward_functions[i]() * env.reward_scales[name] 71 | rewards[name] = rew.cpu().detach() 72 | return rewards 73 | 74 | 75 | def termination(env, actor_critic, obs): 76 | return env.reset_buf.cpu().detach() 77 | 78 | 79 | def privileged_obs(env, actor_critic, obs): 80 | return obs["privileged_obs"].cpu().numpy() 81 | 82 | 83 | def latents(env, actor_critic, obs): 84 | return actor_critic.env_factor_encoder(obs["privileged_obs"]).cpu().numpy() 85 | 86 | 87 | METRICS_FNS = {name: fn for name, fn in locals().items() if name not in ['to_numpy'] and "__" not in name} 88 | 89 | if __name__ == '__main__': 90 | print(*METRICS_FNS.items(), sep="\n") 91 | 92 | import torch 93 | 94 | env = lambda: None 95 | env.base_lin_vel = torch.rand(10, 3) 96 | env.commands = torch.rand(10, 3) 97 | 98 | metric = lin_vel_rmsd(env, None, None) 99 | print(metric) 100 | -------------------------------------------------------------------------------- /dribblebot_learn/ppo_cse/__init__.py: -------------------------------------------------------------------------------- 1 | import time 2 | from collections import deque 3 | import copy 4 | import os 5 | 6 | import torch 7 | # from ml_logger import logger 8 | import wandb 9 | from wandb_osh.hooks import TriggerWandbSyncHook 10 | 11 | from params_proto import PrefixProto 12 | 13 | from .actor_critic import ActorCritic 14 | from .rollout_storage import RolloutStorage 15 | 16 | 17 | def class_to_dict(obj) -> dict: 18 | if not hasattr(obj, "__dict__"): 19 | return obj 20 | result = {} 21 | for key in dir(obj): 22 | if key.startswith("_") or key == "terrain": 23 | continue 24 | element = [] 25 | val = getattr(obj, key) 26 | if isinstance(val, list): 27 | for item in val: 28 | element.append(class_to_dict(item)) 29 | else: 30 | element = class_to_dict(val) 31 | result[key] = element 32 | return result 33 | 34 | 35 | class DataCaches: 36 | def __init__(self, curriculum_bins): 37 | from dribblebot_learn.ppo_cse.metrics_caches import SlotCache, DistCache 38 | 39 | self.slot_cache = SlotCache(curriculum_bins) 40 | self.dist_cache = DistCache() 41 | 42 | 43 | caches = DataCaches(1) 44 | 45 | 46 | class RunnerArgs(PrefixProto, cli=False): 47 | # runner 48 | algorithm_class_name = 'RMA' 49 | num_steps_per_env = 24 # per iteration 50 | max_iterations = 1500 # number of policy updates 51 | 52 | # logging 53 | save_interval = 400 # check for potential saves every this many iterations 54 | save_video_interval = 100 55 | log_freq = 10 56 | 57 | # load and resume 58 | resume = False 59 | load_run = -1 # -1 = last run 60 | checkpoint = -1 # -1 = last saved model 61 | resume_path = None # updated from load_run and chkpt 62 | resume_curriculum = True 63 | resume_checkpoint = 'ac_weights_last.pt' 64 | 65 | 66 | class Runner: 67 | 68 | def __init__(self, env, device='cpu'): 69 | from .ppo import PPO 70 | 71 | self.device = device 72 | self.env = env 73 | 74 | actor_critic = ActorCritic(self.env.num_obs, 75 | self.env.num_privileged_obs, 76 | self.env.num_obs_history, 77 | self.env.num_actions, 78 | ).to(self.device) 79 | # Load weights from checkpoint 80 | if RunnerArgs.resume: 81 | body = wandb.restore(RunnerArgs.resume_checkpoint, run_path=RunnerArgs.resume_path) 82 | actor_critic.load_state_dict(torch.load(body.name)) 83 | print(f"Successfully loaded weights from checkpoint ({RunnerArgs.resume_checkpoint}) and run path ({RunnerArgs.resume_path})") 84 | 85 | self.alg = PPO(actor_critic, device=self.device) 86 | self.num_steps_per_env = RunnerArgs.num_steps_per_env 87 | 88 | # init storage and model 89 | self.alg.init_storage(self.env.num_train_envs, self.num_steps_per_env, [self.env.num_obs], 90 | [self.env.num_privileged_obs], [self.env.num_obs_history], [self.env.num_actions]) 91 | 92 | self.tot_timesteps = 0 93 | self.tot_time = 0 94 | self.current_learning_iteration = 0 95 | self.last_recording_it = -RunnerArgs.save_video_interval 96 | 97 | self.env.reset() 98 | 99 | def learn(self, num_learning_iterations, init_at_random_ep_len=False, eval_freq=100, curriculum_dump_freq=500, eval_expert=False): 100 | trigger_sync = TriggerWandbSyncHook() 101 | wandb.watch(self.alg.actor_critic, log="all", log_freq=RunnerArgs.log_freq) 102 | 103 | if init_at_random_ep_len: 104 | self.env.episode_length_buf[:] = torch.randint_like(self.env.episode_length_buf, 105 | high=int(self.env.max_episode_length)) 106 | 107 | # split train and test envs 108 | num_train_envs = self.env.num_train_envs 109 | 110 | obs_dict = self.env.get_observations() # TODO: check, is this correct on the first step? 111 | obs, privileged_obs, obs_history = obs_dict["obs"], obs_dict["privileged_obs"], obs_dict["obs_history"] 112 | obs, privileged_obs, obs_history = obs.to(self.device), privileged_obs.to(self.device), obs_history.to( 113 | self.device) 114 | self.alg.actor_critic.train() # switch to train mode (for dropout for example) 115 | 116 | rewbuffer = deque(maxlen=100) 117 | lenbuffer = deque(maxlen=100) 118 | cur_reward_sum = torch.zeros(self.env.num_envs, dtype=torch.float, device=self.device) 119 | cur_episode_length = torch.zeros(self.env.num_envs, dtype=torch.float, device=self.device) 120 | 121 | tot_iter = self.current_learning_iteration + num_learning_iterations 122 | for it in range(self.current_learning_iteration, tot_iter): 123 | start = time.time() 124 | # Rollout 125 | with torch.inference_mode(): 126 | for i in range(self.num_steps_per_env): 127 | actions = self.alg.act(obs[:num_train_envs], privileged_obs[:num_train_envs], 128 | obs_history[:num_train_envs]) 129 | 130 | ret = self.env.step(actions) 131 | obs_dict, rewards, dones, infos = ret 132 | obs, privileged_obs, obs_history = obs_dict["obs"], obs_dict["privileged_obs"], obs_dict[ 133 | "obs_history"] 134 | 135 | obs, privileged_obs, obs_history, rewards, dones = obs.to(self.device), privileged_obs.to( 136 | self.device), obs_history.to(self.device), rewards.to(self.device), dones.to(self.device) 137 | self.alg.process_env_step(rewards[:num_train_envs], dones[:num_train_envs], infos) 138 | 139 | if 'train/episode' in infos: 140 | wandb.log(infos['train/episode'], step=it) 141 | 142 | if 'curriculum' in infos: 143 | 144 | cur_reward_sum += rewards 145 | cur_episode_length += 1 146 | 147 | new_ids = (dones > 0).nonzero(as_tuple=False) 148 | 149 | new_ids_train = new_ids[new_ids < num_train_envs] 150 | rewbuffer.extend(cur_reward_sum[new_ids_train].cpu().numpy().tolist()) 151 | lenbuffer.extend(cur_episode_length[new_ids_train].cpu().numpy().tolist()) 152 | cur_reward_sum[new_ids_train] = 0 153 | cur_episode_length[new_ids_train] = 0 154 | 155 | if 'curriculum/distribution' in infos: 156 | distribution = infos['curriculum/distribution'] 157 | 158 | self.alg.compute_returns(obs_history[:num_train_envs], privileged_obs[:num_train_envs]) 159 | 160 | mean_value_loss, mean_surrogate_loss, mean_adaptation_module_loss, mean_decoder_loss, mean_decoder_loss_student, mean_adaptation_module_test_loss, mean_decoder_test_loss, mean_decoder_test_loss_student, mean_adaptation_losses_dict = self.alg.update() 161 | stop = time.time() 162 | learn_time = stop - start 163 | 164 | wandb.log({ 165 | "time_iter": learn_time, 166 | # "time_iter": logger.split('epoch'), 167 | "adaptation_loss": mean_adaptation_module_loss, 168 | "mean_value_loss": mean_value_loss, 169 | "mean_surrogate_loss": mean_surrogate_loss, 170 | "mean_decoder_loss": mean_decoder_loss, 171 | "mean_decoder_loss_student": mean_decoder_loss_student, 172 | "mean_decoder_test_loss": mean_decoder_test_loss, 173 | "mean_decoder_test_loss_student": mean_decoder_test_loss_student, 174 | "mean_adaptation_module_test_loss": mean_adaptation_module_test_loss 175 | }, step=it) 176 | 177 | 178 | 179 | # logger.store_metrics(**mean_adaptation_losses_dict) 180 | wandb.log(mean_adaptation_losses_dict, step=it) 181 | 182 | if RunnerArgs.save_video_interval: 183 | self.log_video(it) 184 | 185 | self.tot_timesteps += self.num_steps_per_env * self.env.num_envs 186 | 187 | wandb.log({"timesteps": self.tot_timesteps, "iterations": it}, step=it) 188 | trigger_sync() 189 | 190 | if it % RunnerArgs.save_interval == 0: 191 | print(f"Saving model at iteration {it}") 192 | 193 | path = './tmp/legged_data' 194 | os.makedirs(path, exist_ok=True) 195 | 196 | ac_weight_path = f'{path}/ac_weights_{it}.pt' 197 | torch.save(self.alg.actor_critic.state_dict(), ac_weight_path) 198 | wandb.save(ac_weight_path) 199 | 200 | ac_weight_path = f'{path}/ac_weights_latest.pt' 201 | torch.save(self.alg.actor_critic.state_dict(), ac_weight_path) 202 | wandb.save(ac_weight_path) 203 | 204 | adaptation_module_path = f'{path}/adaptation_module_{it}.jit' 205 | adaptation_module = copy.deepcopy(self.alg.actor_critic.adaptation_module).to('cpu') 206 | traced_script_adaptation_module = torch.jit.script(adaptation_module) 207 | traced_script_adaptation_module.save(adaptation_module_path) 208 | 209 | adaptation_module_path = f'{path}/adaptation_module_latest.jit' 210 | traced_script_adaptation_module.save(adaptation_module_path) 211 | 212 | body_path = f'{path}/body_{it}.jit' 213 | body_model = copy.deepcopy(self.alg.actor_critic.actor_body).to('cpu') 214 | traced_script_body_module = torch.jit.script(body_model) 215 | traced_script_body_module.save(body_path) 216 | 217 | body_path = f'{path}/body_latest.jit' 218 | traced_script_body_module.save(body_path) 219 | 220 | # logger.upload_file(file_path=adaptation_module_path, target_path=f"checkpoints/", once=False) 221 | # logger.upload_file(file_path=body_path, target_path=f"checkpoints/", once=False) 222 | 223 | ac_weights_path = f"{path}/ac_weights_{it}.pt" 224 | torch.save(self.alg.actor_critic.state_dict(), ac_weights_path) 225 | ac_weights_path = f"{path}/ac_weights_latest.pt" 226 | torch.save(self.alg.actor_critic.state_dict(), ac_weights_path) 227 | 228 | wandb.save(f"./tmp/legged_data/adaptation_module_{it}.jit") 229 | wandb.save(f"./tmp/legged_data/body_{it}.jit") 230 | wandb.save(f"./tmp/legged_data/ac_weights_{it}.pt") 231 | wandb.save(f"./tmp/legged_data/adaptation_module_latest.jit") 232 | wandb.save(f"./tmp/legged_data/body_latest.jit") 233 | wandb.save(f"./tmp/legged_data/ac_weights_latest.pt") 234 | 235 | 236 | self.current_learning_iteration += num_learning_iterations 237 | 238 | path = './tmp/legged_data' 239 | 240 | os.makedirs(path, exist_ok=True) 241 | 242 | def log_video(self, it): 243 | if it - self.last_recording_it >= RunnerArgs.save_video_interval: 244 | self.env.start_recording() 245 | print("START RECORDING") 246 | self.last_recording_it = it 247 | 248 | frames = self.env.get_complete_frames() 249 | if len(frames) > 0: 250 | self.env.pause_recording() 251 | print("LOGGING VIDEO") 252 | import numpy as np 253 | video_array = np.concatenate([np.expand_dims(frame, axis=0) for frame in frames ], axis=0).swapaxes(1, 3).swapaxes(2, 3) 254 | print(video_array.shape) 255 | # logger.save_video(frames, f"videos/{it:05d}.mp4", fps=1 / self.env.dt) 256 | wandb.log({"video": wandb.Video(video_array, fps=1 / self.env.dt)}, step=it) 257 | 258 | def get_inference_policy(self, device=None): 259 | self.alg.actor_critic.eval() # switch to evaluation mode (dropout for example) 260 | if device is not None: 261 | self.alg.actor_critic.to(device) 262 | return self.alg.actor_critic.act_inference 263 | 264 | def get_expert_policy(self, device=None): 265 | self.alg.actor_critic.eval() # switch to evaluation mode (dropout for example) 266 | if device is not None: 267 | self.alg.actor_critic.to(device) 268 | return self.alg.actor_critic.act_expert 269 | -------------------------------------------------------------------------------- /dribblebot_learn/ppo_cse/actor_critic.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from params_proto import PrefixProto 4 | from torch.distributions import Normal 5 | 6 | 7 | class AC_Args(PrefixProto, cli=False): 8 | # policy 9 | init_noise_std = 1.0 10 | actor_hidden_dims = [512, 256, 128] 11 | critic_hidden_dims = [512, 256, 128] 12 | activation = 'elu' # can be elu, relu, selu, crelu, lrelu, tanh, sigmoid 13 | 14 | adaptation_module_branch_hidden_dims = [256, 128] 15 | 16 | adaptation_labels = [] 17 | adaptation_dims = [] 18 | adaptation_weights = [] 19 | 20 | use_decoder = False 21 | 22 | 23 | class ActorCritic(nn.Module): 24 | is_recurrent = False 25 | 26 | def __init__(self, num_obs, 27 | num_privileged_obs, 28 | num_obs_history, 29 | num_actions, 30 | **kwargs): 31 | if kwargs: 32 | print("ActorCritic.__init__ got unexpected arguments, which will be ignored: " + str( 33 | [key for key in kwargs.keys()])) 34 | self.decoder = AC_Args.use_decoder 35 | super().__init__() 36 | 37 | self.adaptation_labels = AC_Args.adaptation_labels 38 | self.adaptation_dims = AC_Args.adaptation_dims 39 | self.adaptation_weights = AC_Args.adaptation_weights 40 | 41 | if len(self.adaptation_weights) < len(self.adaptation_labels): 42 | # pad 43 | self.adaptation_weights += [1.0] * (len(self.adaptation_labels) - len(self.adaptation_weights)) 44 | 45 | self.num_obs_history = num_obs_history 46 | self.num_privileged_obs = num_privileged_obs 47 | 48 | activation = get_activation(AC_Args.activation) 49 | 50 | # Adaptation module 51 | adaptation_module_layers = [] 52 | adaptation_module_layers.append(nn.Linear(self.num_obs_history, AC_Args.adaptation_module_branch_hidden_dims[0])) 53 | adaptation_module_layers.append(activation) 54 | for l in range(len(AC_Args.adaptation_module_branch_hidden_dims)): 55 | if l == len(AC_Args.adaptation_module_branch_hidden_dims) - 1: 56 | adaptation_module_layers.append( 57 | nn.Linear(AC_Args.adaptation_module_branch_hidden_dims[l], self.num_privileged_obs)) 58 | else: 59 | adaptation_module_layers.append( 60 | nn.Linear(AC_Args.adaptation_module_branch_hidden_dims[l], 61 | AC_Args.adaptation_module_branch_hidden_dims[l + 1])) 62 | adaptation_module_layers.append(activation) 63 | self.adaptation_module = nn.Sequential(*adaptation_module_layers) 64 | 65 | 66 | 67 | # Policy 68 | actor_layers = [] 69 | actor_layers.append(nn.Linear(self.num_privileged_obs + self.num_obs_history, AC_Args.actor_hidden_dims[0])) 70 | actor_layers.append(activation) 71 | for l in range(len(AC_Args.actor_hidden_dims)): 72 | if l == len(AC_Args.actor_hidden_dims) - 1: 73 | actor_layers.append(nn.Linear(AC_Args.actor_hidden_dims[l], num_actions)) 74 | else: 75 | actor_layers.append(nn.Linear(AC_Args.actor_hidden_dims[l], AC_Args.actor_hidden_dims[l + 1])) 76 | actor_layers.append(activation) 77 | self.actor_body = nn.Sequential(*actor_layers) 78 | 79 | # Value function 80 | critic_layers = [] 81 | critic_layers.append(nn.Linear(self.num_privileged_obs + self.num_obs_history, AC_Args.critic_hidden_dims[0])) 82 | critic_layers.append(activation) 83 | for l in range(len(AC_Args.critic_hidden_dims)): 84 | if l == len(AC_Args.critic_hidden_dims) - 1: 85 | critic_layers.append(nn.Linear(AC_Args.critic_hidden_dims[l], 1)) 86 | else: 87 | critic_layers.append(nn.Linear(AC_Args.critic_hidden_dims[l], AC_Args.critic_hidden_dims[l + 1])) 88 | critic_layers.append(activation) 89 | self.critic_body = nn.Sequential(*critic_layers) 90 | 91 | print(f"Adaptation Module: {self.adaptation_module}") 92 | print(f"Actor MLP: {self.actor_body}") 93 | print(f"Critic MLP: {self.critic_body}") 94 | 95 | # Action noise 96 | self.std = nn.Parameter(AC_Args.init_noise_std * torch.ones(num_actions)) 97 | self.distribution = None 98 | # disable args validation for speedup 99 | Normal.set_default_validate_args = False 100 | 101 | @staticmethod 102 | # not used at the moment 103 | def init_weights(sequential, scales): 104 | [torch.nn.init.orthogonal_(module.weight, gain=scales[idx]) for idx, module in 105 | enumerate(mod for mod in sequential if isinstance(mod, nn.Linear))] 106 | 107 | def reset(self, dones=None): 108 | pass 109 | 110 | def forward(self): 111 | raise NotImplementedError 112 | 113 | @property 114 | def action_mean(self): 115 | return self.distribution.mean 116 | 117 | @property 118 | def action_std(self): 119 | return self.distribution.stddev 120 | 121 | @property 122 | def entropy(self): 123 | return self.distribution.entropy().sum(dim=-1) 124 | 125 | def update_distribution(self, observation_history): 126 | latent = self.adaptation_module(observation_history) 127 | mean = self.actor_body(torch.cat((observation_history, latent), dim=-1)) 128 | self.distribution = Normal(mean, mean * 0. + self.std) 129 | 130 | def act(self, observation_history, **kwargs): 131 | self.update_distribution(observation_history) 132 | return self.distribution.sample() 133 | 134 | def get_actions_log_prob(self, actions): 135 | return self.distribution.log_prob(actions).sum(dim=-1) 136 | 137 | def act_expert(self, ob, policy_info={}): 138 | return self.act_teacher(ob["obs_history"], ob["privileged_obs"]) 139 | 140 | def act_inference(self, ob, policy_info={}): 141 | return self.act_student(ob["obs_history"], policy_info=policy_info) 142 | 143 | def act_student(self, observation_history, policy_info={}): 144 | latent = self.adaptation_module(observation_history) 145 | actions_mean = self.actor_body(torch.cat((observation_history, latent), dim=-1)) 146 | policy_info["latents"] = latent.detach().cpu().numpy() 147 | return actions_mean 148 | 149 | def act_teacher(self, observation_history, privileged_info, policy_info={}): 150 | actions_mean = self.actor_body(torch.cat((observation_history, privileged_info), dim=-1)) 151 | policy_info["latents"] = privileged_info 152 | return actions_mean 153 | 154 | def evaluate(self, observation_history, privileged_observations, **kwargs): 155 | value = self.critic_body(torch.cat((observation_history, privileged_observations), dim=-1)) 156 | return value 157 | 158 | def get_student_latent(self, observation_history): 159 | return self.adaptation_module(observation_history) 160 | 161 | def get_activation(act_name): 162 | if act_name == "elu": 163 | return nn.ELU() 164 | elif act_name == "selu": 165 | return nn.SELU() 166 | elif act_name == "relu": 167 | return nn.ReLU() 168 | elif act_name == "crelu": 169 | return nn.ReLU() 170 | elif act_name == "lrelu": 171 | return nn.LeakyReLU() 172 | elif act_name == "tanh": 173 | return nn.Tanh() 174 | elif act_name == "sigmoid": 175 | return nn.Sigmoid() 176 | else: 177 | print("invalid activation function!") 178 | return None 179 | -------------------------------------------------------------------------------- /dribblebot_learn/ppo_cse/metrics_caches.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | 3 | from ml_logger import logger 4 | import numpy as np 5 | import torch 6 | 7 | 8 | class DistCache: 9 | def __init__(self): 10 | """ 11 | Args: 12 | n: Number of slots for the cache 13 | """ 14 | self.cache = defaultdict(lambda: 0) 15 | 16 | def log(self, **key_vals): 17 | """ 18 | Args: 19 | slots: ids for the array 20 | **key_vals: 21 | """ 22 | for k, v in key_vals.items(): 23 | count = self.cache[k + '@counts'] + 1 24 | self.cache[k + '@counts'] = count 25 | self.cache[k] = v + (count - 1) * self.cache[k] 26 | self.cache[k] /= count 27 | 28 | def get_summary(self): 29 | ret = { 30 | k: v 31 | for k, v in self.cache.items() 32 | if not k.endswith("@counts") 33 | } 34 | self.cache.clear() 35 | return ret 36 | 37 | 38 | if __name__ == '__main__': 39 | cl = DistCache() 40 | lin_vel = np.ones((11, 11)) 41 | ang_vel = np.zeros((5, 5)) 42 | cl.log(lin_vel=lin_vel, ang_vel=ang_vel) 43 | lin_vel = np.zeros((11, 11)) 44 | ang_vel = np.zeros((5, 5)) 45 | cl.log(lin_vel=lin_vel, ang_vel=ang_vel) 46 | print(cl.get_summary()) 47 | 48 | 49 | class SlotCache: 50 | def __init__(self, n): 51 | """ 52 | Args: 53 | n: Number of slots for the cache 54 | """ 55 | self.n = n 56 | self.cache = defaultdict(lambda: np.zeros([n])) 57 | 58 | def log(self, slots=None, **key_vals): 59 | """ 60 | Args: 61 | slots: ids for the array 62 | **key_vals: 63 | """ 64 | if slots is None: 65 | slots = range(self.n) 66 | 67 | for k, v in key_vals.items(): 68 | counts = self.cache[k + '@counts'][slots] + 1 69 | self.cache[k + '@counts'][slots] = counts 70 | self.cache[k][slots] = v + (counts - 1) * self.cache[k][slots] 71 | self.cache[k][slots] /= counts 72 | 73 | def get_summary(self): 74 | ret = { 75 | k: v 76 | for k, v in self.cache.items() 77 | if not k.endswith("@counts") 78 | } 79 | self.cache.clear() 80 | return ret 81 | 82 | 83 | if __name__ == '__main__': 84 | cl = SlotCache(100) 85 | reset_env_ids = [2, 5, 6] 86 | lin_vel = [0.1, 0.5, 0.8] 87 | ang_vel = [0.4, -0.4, 0.2] 88 | cl.log(reset_env_ids, lin_vel=lin_vel, ang_vel=ang_vel) 89 | 90 | cl.log(lin_vel=np.ones(100)) 91 | -------------------------------------------------------------------------------- /dribblebot_learn/ppo_cse/ppo.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | import torch.nn.functional as F 5 | import numpy as np 6 | from params_proto import PrefixProto 7 | 8 | from dribblebot_learn.ppo_cse import ActorCritic 9 | from dribblebot_learn.ppo_cse import RolloutStorage 10 | from dribblebot_learn.ppo_cse import caches 11 | 12 | 13 | class PPO_Args(PrefixProto): 14 | # algorithm 15 | value_loss_coef = 1.0 16 | use_clipped_value_loss = True 17 | clip_param = 0.2 18 | entropy_coef = 0.01 19 | num_learning_epochs = 5 20 | num_mini_batches = 4 # mini batch size = num_envs*nsteps / nminibatches 21 | learning_rate = 1.e-3 # 5.e-4 22 | adaptation_module_learning_rate = 1.e-3 23 | num_adaptation_module_substeps = 1 24 | schedule = 'adaptive' # could be adaptive, fixed 25 | gamma = 0.99 26 | lam = 0.95 27 | desired_kl = 0.01 28 | max_grad_norm = 1. 29 | 30 | selective_adaptation_module_loss = False 31 | 32 | 33 | class PPO: 34 | actor_critic: ActorCritic 35 | 36 | def __init__(self, actor_critic, device='cpu'): 37 | 38 | self.device = device 39 | 40 | # PPO components 41 | self.actor_critic = actor_critic 42 | self.actor_critic.to(device) 43 | 44 | PPO_Args.adaptation_labels = self.actor_critic.adaptation_labels 45 | PPO_Args.adaptation_dims = self.actor_critic.adaptation_dims 46 | PPO_Args.adaptation_weights = self.actor_critic.adaptation_weights 47 | 48 | self.storage = None # initialized later 49 | self.optimizer = optim.Adam(self.actor_critic.parameters(), lr=PPO_Args.learning_rate) 50 | self.adaptation_module_optimizer = optim.Adam(self.actor_critic.parameters(), 51 | lr=PPO_Args.adaptation_module_learning_rate) 52 | if self.actor_critic.decoder: 53 | self.decoder_optimizer = optim.Adam(self.actor_critic.parameters(), 54 | lr=PPO_Args.adaptation_module_learning_rate) 55 | self.transition = RolloutStorage.Transition() 56 | 57 | self.learning_rate = PPO_Args.learning_rate 58 | 59 | def init_storage(self, num_envs, num_transitions_per_env, actor_obs_shape, privileged_obs_shape, obs_history_shape, 60 | action_shape): 61 | self.storage = RolloutStorage(num_envs, num_transitions_per_env, actor_obs_shape, privileged_obs_shape, 62 | obs_history_shape, action_shape, self.device) 63 | 64 | def test_mode(self): 65 | self.actor_critic.test() 66 | 67 | def train_mode(self): 68 | self.actor_critic.train() 69 | 70 | def act(self, obs, privileged_obs, obs_history): 71 | # Compute the actions and values 72 | self.transition.actions = self.actor_critic.act(obs_history).detach() 73 | self.transition.values = self.actor_critic.evaluate(obs_history, privileged_obs).detach() 74 | self.transition.actions_log_prob = self.actor_critic.get_actions_log_prob(self.transition.actions).detach() 75 | self.transition.action_mean = self.actor_critic.action_mean.detach() 76 | self.transition.action_sigma = self.actor_critic.action_std.detach() 77 | # need to record obs and critic_obs before env.step() 78 | self.transition.observations = obs 79 | self.transition.critic_observations = obs 80 | self.transition.privileged_observations = privileged_obs 81 | self.transition.observation_histories = obs_history 82 | return self.transition.actions 83 | 84 | def process_env_step(self, rewards, dones, infos): 85 | self.transition.rewards = rewards.clone() 86 | self.transition.dones = dones 87 | self.transition.env_bins = infos["env_bins"] 88 | # Bootstrapping on time outs 89 | if 'time_outs' in infos: 90 | self.transition.rewards += PPO_Args.gamma * torch.squeeze( 91 | self.transition.values * infos['time_outs'].unsqueeze(1).to(self.device), 1) 92 | 93 | # Record the transition 94 | self.storage.add_transitions(self.transition) 95 | self.transition.clear() 96 | self.actor_critic.reset(dones) 97 | 98 | def compute_returns(self, last_critic_obs, last_critic_privileged_obs): 99 | last_values = self.actor_critic.evaluate(last_critic_obs, last_critic_privileged_obs).detach() 100 | self.storage.compute_returns(last_values, PPO_Args.gamma, PPO_Args.lam) 101 | 102 | def update(self): 103 | mean_value_loss = 0 104 | mean_surrogate_loss = 0 105 | mean_adaptation_module_loss = 0 106 | mean_decoder_loss = 0 107 | mean_decoder_loss_student = 0 108 | mean_adaptation_module_test_loss = 0 109 | mean_decoder_test_loss = 0 110 | mean_decoder_test_loss_student = 0 111 | 112 | mean_adaptation_losses = {} 113 | label_start_end = {} 114 | si = 0 115 | for idx, (label, length) in enumerate(zip(PPO_Args.adaptation_labels, PPO_Args.adaptation_dims)): 116 | label_start_end[label] = (si, si + length) 117 | si = si + length 118 | mean_adaptation_losses[label] = 0 119 | 120 | generator = self.storage.mini_batch_generator(PPO_Args.num_mini_batches, PPO_Args.num_learning_epochs) 121 | for obs_batch, critic_obs_batch, privileged_obs_batch, obs_history_batch, actions_batch, target_values_batch, advantages_batch, returns_batch, old_actions_log_prob_batch, \ 122 | old_mu_batch, old_sigma_batch, masks_batch, env_bins_batch in generator: 123 | 124 | self.actor_critic.act(obs_history_batch, masks=masks_batch) 125 | actions_log_prob_batch = self.actor_critic.get_actions_log_prob(actions_batch) 126 | value_batch = self.actor_critic.evaluate(obs_history_batch, privileged_obs_batch, masks=masks_batch) 127 | mu_batch = self.actor_critic.action_mean 128 | sigma_batch = self.actor_critic.action_std 129 | entropy_batch = self.actor_critic.entropy 130 | 131 | # KL 132 | if PPO_Args.desired_kl != None and PPO_Args.schedule == 'adaptive': 133 | with torch.inference_mode(): 134 | kl = torch.sum( 135 | torch.log(sigma_batch / old_sigma_batch + 1.e-5) + ( 136 | torch.square(old_sigma_batch) + torch.square(old_mu_batch - mu_batch)) / ( 137 | 2.0 * torch.square(sigma_batch)) - 0.5, axis=-1) 138 | kl_mean = torch.mean(kl) 139 | 140 | if kl_mean > PPO_Args.desired_kl * 2.0: 141 | self.learning_rate = max(1e-5, self.learning_rate / 1.5) 142 | elif kl_mean < PPO_Args.desired_kl / 2.0 and kl_mean > 0.0: 143 | self.learning_rate = min(1e-2, self.learning_rate * 1.5) 144 | 145 | for param_group in self.optimizer.param_groups: 146 | param_group['lr'] = self.learning_rate 147 | 148 | # Surrogate loss 149 | ratio = torch.exp(actions_log_prob_batch - torch.squeeze(old_actions_log_prob_batch)) 150 | surrogate = -torch.squeeze(advantages_batch) * ratio 151 | surrogate_clipped = -torch.squeeze(advantages_batch) * torch.clamp(ratio, 1.0 - PPO_Args.clip_param, 152 | 1.0 + PPO_Args.clip_param) 153 | surrogate_loss = torch.max(surrogate, surrogate_clipped).mean() 154 | 155 | # Value function loss 156 | if PPO_Args.use_clipped_value_loss: 157 | value_clipped = target_values_batch + \ 158 | (value_batch - target_values_batch).clamp(-PPO_Args.clip_param, 159 | PPO_Args.clip_param) 160 | value_losses = (value_batch - returns_batch).pow(2) 161 | value_losses_clipped = (value_clipped - returns_batch).pow(2) 162 | value_loss = torch.max(value_losses, value_losses_clipped).mean() 163 | else: 164 | value_loss = (returns_batch - value_batch).pow(2).mean() 165 | 166 | loss = surrogate_loss + PPO_Args.value_loss_coef * value_loss - PPO_Args.entropy_coef * entropy_batch.mean() 167 | 168 | # Gradient step 169 | self.optimizer.zero_grad() 170 | loss.backward() 171 | nn.utils.clip_grad_norm_(self.actor_critic.parameters(), PPO_Args.max_grad_norm) 172 | self.optimizer.step() 173 | 174 | mean_value_loss += value_loss.item() 175 | mean_surrogate_loss += surrogate_loss.item() 176 | 177 | data_size = privileged_obs_batch.shape[0] 178 | num_train = int(data_size // 5 * 4) 179 | 180 | # Adaptation module gradient step, only update concurrent state estimation module, not policy network 181 | if len(PPO_Args.adaptation_labels) > 0: 182 | 183 | for epoch in range(PPO_Args.num_adaptation_module_substeps): 184 | 185 | adaptation_pred = self.actor_critic.get_student_latent(obs_history_batch) 186 | with torch.no_grad(): 187 | adaptation_target = privileged_obs_batch 188 | adaptation_loss = 0 189 | for idx, (label, length, weight) in enumerate(zip(PPO_Args.adaptation_labels, PPO_Args.adaptation_dims, PPO_Args.adaptation_weights)): 190 | 191 | start, end = label_start_end[label] 192 | selection_indices = torch.linspace(start, end - 1, steps=end - start, dtype=torch.long) 193 | 194 | idx_adaptation_loss = F.mse_loss(adaptation_pred[:, selection_indices] * weight, 195 | adaptation_target[:, selection_indices] * weight) 196 | mean_adaptation_losses[label] += idx_adaptation_loss.item() 197 | 198 | adaptation_loss += idx_adaptation_loss 199 | 200 | self.adaptation_module_optimizer.zero_grad() 201 | adaptation_loss.backward() 202 | self.adaptation_module_optimizer.step() 203 | 204 | mean_adaptation_module_loss += adaptation_loss.item() 205 | mean_adaptation_module_test_loss += 0 # adaptation_test_loss.item() 206 | 207 | num_updates = PPO_Args.num_learning_epochs * PPO_Args.num_mini_batches 208 | mean_value_loss /= num_updates 209 | mean_surrogate_loss /= num_updates 210 | mean_adaptation_module_loss /= (num_updates * PPO_Args.num_adaptation_module_substeps) 211 | mean_decoder_loss /= (num_updates * PPO_Args.num_adaptation_module_substeps) 212 | mean_decoder_loss_student /= (num_updates * PPO_Args.num_adaptation_module_substeps) 213 | mean_adaptation_module_test_loss /= (num_updates * PPO_Args.num_adaptation_module_substeps) 214 | mean_decoder_test_loss /= (num_updates * PPO_Args.num_adaptation_module_substeps) 215 | mean_decoder_test_loss_student /= (num_updates * PPO_Args.num_adaptation_module_substeps) 216 | for label in PPO_Args.adaptation_labels: 217 | mean_adaptation_losses[label] /= (num_updates * PPO_Args.num_adaptation_module_substeps) 218 | self.storage.clear() 219 | 220 | return mean_value_loss, mean_surrogate_loss, mean_adaptation_module_loss, mean_decoder_loss, mean_decoder_loss_student, mean_adaptation_module_test_loss, mean_decoder_test_loss, mean_decoder_test_loss_student, mean_adaptation_losses 221 | -------------------------------------------------------------------------------- /dribblebot_learn/ppo_cse/rollout_storage.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import torch 4 | 5 | from dribblebot_learn.utils import split_and_pad_trajectories 6 | 7 | class RolloutStorage: 8 | class Transition: 9 | def __init__(self): 10 | self.observations = None 11 | self.privileged_observations = None 12 | self.observation_histories = None 13 | self.critic_observations = None 14 | self.actions = None 15 | self.rewards = None 16 | self.dones = None 17 | self.values = None 18 | self.actions_log_prob = None 19 | self.action_mean = None 20 | self.action_sigma = None 21 | self.env_bins = None 22 | 23 | def clear(self): 24 | self.__init__() 25 | 26 | def __init__(self, num_envs, num_transitions_per_env, obs_shape, privileged_obs_shape, obs_history_shape, actions_shape, device='cpu'): 27 | 28 | self.device = device 29 | 30 | self.obs_shape = obs_shape 31 | self.privileged_obs_shape = privileged_obs_shape 32 | self.obs_history_shape = obs_history_shape 33 | self.actions_shape = actions_shape 34 | 35 | # Core 36 | self.observations = torch.zeros(num_transitions_per_env, num_envs, *obs_shape, device=self.device) 37 | self.privileged_observations = torch.zeros(num_transitions_per_env, num_envs, *privileged_obs_shape, device=self.device) 38 | self.observation_histories = torch.zeros(num_transitions_per_env, num_envs, *obs_history_shape, device=self.device) 39 | self.rewards = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 40 | self.actions = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device) 41 | self.dones = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device).byte() 42 | 43 | # For PPO 44 | self.actions_log_prob = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 45 | self.values = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 46 | self.returns = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 47 | self.advantages = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 48 | self.mu = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device) 49 | self.sigma = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device) 50 | self.env_bins = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device) 51 | 52 | self.num_transitions_per_env = num_transitions_per_env 53 | self.num_envs = num_envs 54 | 55 | self.step = 0 56 | 57 | def add_transitions(self, transition: Transition): 58 | if self.step >= self.num_transitions_per_env: 59 | raise AssertionError("Rollout buffer overflow") 60 | self.observations[self.step].copy_(transition.observations) 61 | self.privileged_observations[self.step].copy_(transition.privileged_observations) 62 | self.observation_histories[self.step].copy_(transition.observation_histories) 63 | self.actions[self.step].copy_(transition.actions) 64 | self.rewards[self.step].copy_(transition.rewards.view(-1, 1)) 65 | self.dones[self.step].copy_(transition.dones.view(-1, 1)) 66 | self.values[self.step].copy_(transition.values) 67 | self.actions_log_prob[self.step].copy_(transition.actions_log_prob.view(-1, 1)) 68 | self.mu[self.step].copy_(transition.action_mean) 69 | self.sigma[self.step].copy_(transition.action_sigma) 70 | self.env_bins[self.step].copy_(transition.env_bins.view(-1, 1)) 71 | self.step += 1 72 | 73 | def clear(self): 74 | self.step = 0 75 | 76 | def compute_returns(self, last_values, gamma, lam): 77 | advantage = 0 78 | for step in reversed(range(self.num_transitions_per_env)): 79 | if step == self.num_transitions_per_env - 1: 80 | next_values = last_values 81 | else: 82 | next_values = self.values[step + 1] 83 | next_is_not_terminal = 1.0 - self.dones[step].float() 84 | delta = self.rewards[step] + next_is_not_terminal * gamma * next_values - self.values[step] 85 | advantage = delta + next_is_not_terminal * gamma * lam * advantage 86 | self.returns[step] = advantage + self.values[step] 87 | 88 | # Compute and normalize the advantages 89 | self.advantages = self.returns - self.values 90 | self.advantages = (self.advantages - self.advantages.mean()) / (self.advantages.std() + 1e-8) 91 | 92 | def get_statistics(self): 93 | done = self.dones 94 | done[-1] = 1 95 | flat_dones = done.permute(1, 0, 2).reshape(-1, 1) 96 | done_indices = torch.cat((flat_dones.new_tensor([-1], dtype=torch.int64), flat_dones.nonzero(as_tuple=False)[:, 0])) 97 | trajectory_lengths = (done_indices[1:] - done_indices[:-1]) 98 | return trajectory_lengths.float().mean(), self.rewards.mean() 99 | 100 | def mini_batch_generator(self, num_mini_batches, num_epochs=8): 101 | batch_size = self.num_envs * self.num_transitions_per_env 102 | mini_batch_size = batch_size // num_mini_batches 103 | indices = torch.randperm(num_mini_batches*mini_batch_size, requires_grad=False, device=self.device) 104 | 105 | observations = self.observations.flatten(0, 1) 106 | privileged_obs = self.privileged_observations.flatten(0, 1) 107 | obs_history = self.observation_histories.flatten(0, 1) 108 | critic_observations = observations 109 | 110 | actions = self.actions.flatten(0, 1) 111 | values = self.values.flatten(0, 1) 112 | returns = self.returns.flatten(0, 1) 113 | old_actions_log_prob = self.actions_log_prob.flatten(0, 1) 114 | advantages = self.advantages.flatten(0, 1) 115 | old_mu = self.mu.flatten(0, 1) 116 | old_sigma = self.sigma.flatten(0, 1) 117 | old_env_bins = self.env_bins.flatten(0, 1) 118 | 119 | for epoch in range(num_epochs): 120 | for i in range(num_mini_batches): 121 | 122 | start = i*mini_batch_size 123 | end = (i+1)*mini_batch_size 124 | batch_idx = indices[start:end] 125 | 126 | obs_batch = observations[batch_idx] 127 | critic_observations_batch = critic_observations[batch_idx] 128 | privileged_obs_batch = privileged_obs[batch_idx] 129 | obs_history_batch = obs_history[batch_idx] 130 | actions_batch = actions[batch_idx] 131 | target_values_batch = values[batch_idx] 132 | returns_batch = returns[batch_idx] 133 | old_actions_log_prob_batch = old_actions_log_prob[batch_idx] 134 | advantages_batch = advantages[batch_idx] 135 | old_mu_batch = old_mu[batch_idx] 136 | old_sigma_batch = old_sigma[batch_idx] 137 | env_bins_batch = old_env_bins[batch_idx] 138 | yield obs_batch, critic_observations_batch, privileged_obs_batch, obs_history_batch, actions_batch, target_values_batch, advantages_batch, returns_batch, \ 139 | old_actions_log_prob_batch, old_mu_batch, old_sigma_batch, None, env_bins_batch 140 | 141 | # for RNNs only 142 | def reccurent_mini_batch_generator(self, num_mini_batches, num_epochs=8): 143 | 144 | padded_obs_trajectories, trajectory_masks = split_and_pad_trajectories(self.observations, self.dones) 145 | padded_privileged_obs_trajectories, trajectory_masks = split_and_pad_trajectories(self.privileged_observations, self.dones) 146 | padded_obs_history_trajectories, trajectory_masks = split_and_pad_trajectories(self.observation_histories, self.dones) 147 | padded_critic_obs_trajectories = padded_obs_trajectories 148 | 149 | mini_batch_size = self.num_envs // num_mini_batches 150 | for ep in range(num_epochs): 151 | first_traj = 0 152 | for i in range(num_mini_batches): 153 | start = i*mini_batch_size 154 | stop = (i+1)*mini_batch_size 155 | 156 | dones = self.dones.squeeze(-1) 157 | last_was_done = torch.zeros_like(dones, dtype=torch.bool) 158 | last_was_done[1:] = dones[:-1] 159 | last_was_done[0] = True 160 | trajectories_batch_size = torch.sum(last_was_done[:, start:stop]) 161 | last_traj = first_traj + trajectories_batch_size 162 | 163 | masks_batch = trajectory_masks[:, first_traj:last_traj] 164 | obs_batch = padded_obs_trajectories[:, first_traj:last_traj] 165 | critic_obs_batch = padded_critic_obs_trajectories[:, first_traj:last_traj] 166 | privileged_obs_batch = padded_privileged_obs_trajectories[:, first_traj:last_traj] 167 | obs_history_batch = padded_obs_history_trajectories[:, first_traj:last_traj] 168 | 169 | actions_batch = self.actions[:, start:stop] 170 | old_mu_batch = self.mu[:, start:stop] 171 | old_sigma_batch = self.sigma[:, start:stop] 172 | returns_batch = self.returns[:, start:stop] 173 | advantages_batch = self.advantages[:, start:stop] 174 | values_batch = self.values[:, start:stop] 175 | old_actions_log_prob_batch = self.actions_log_prob[:, start:stop] 176 | 177 | yield obs_batch, critic_obs_batch, privileged_obs_batch, obs_history_batch, actions_batch, values_batch, advantages_batch, returns_batch, \ 178 | old_actions_log_prob_batch, old_mu_batch, old_sigma_batch, masks_batch 179 | 180 | first_traj = last_traj -------------------------------------------------------------------------------- /dribblebot_learn/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # License: see [LICENSE, LICENSES/rsl_rl/LICENSE] 2 | 3 | from .utils import split_and_pad_trajectories, unpad_trajectories -------------------------------------------------------------------------------- /dribblebot_learn/utils/utils.py: -------------------------------------------------------------------------------- 1 | # License: see [LICENSE, LICENSES/rsl_rl/LICENSE] 2 | 3 | import torch 4 | 5 | def split_and_pad_trajectories(tensor, dones): 6 | """ Splits trajectories at done indices. Then concatenates them and padds with zeros up to the length og the longest trajectory. 7 | Returns masks corresponding to valid parts of the trajectories 8 | Example: 9 | Input: [ [a1, a2, a3, a4 | a5, a6], 10 | [b1, b2 | b3, b4, b5 | b6] 11 | f] 12 | 13 | Output:[ [a1, a2, a3, a4], | [ [True, True, True, True], 14 | [a5, a6, 0, 0], | [True, True, False, False], 15 | [b1, b2, 0, 0], | [True, True, False, False], 16 | [b3, b4, b5, 0], | [True, True, True, False], 17 | [b6, 0, 0, 0] | [True, False, False, False], 18 | ] | ] 19 | 20 | Assumes that the inputy has the following dimension order: [time, number of envs, aditional dimensions] 21 | """ 22 | dones = dones.clone() 23 | dones[-1] = 1 24 | # Permute the buffers to have order (num_envs, num_transitions_per_env, ...), for correct reshaping 25 | flat_dones = dones.transpose(1, 0).reshape(-1, 1) 26 | 27 | # Get length of trajectory by counting the number of successive not done elements 28 | done_indices = torch.cat((flat_dones.new_tensor([-1], dtype=torch.int64), flat_dones.nonzero()[:, 0])) 29 | trajectory_lengths = done_indices[1:] - done_indices[:-1] 30 | trajectory_lengths_list = trajectory_lengths.tolist() 31 | # Extract the individual trajectories 32 | trajectories = torch.split(tensor.transpose(1, 0).flatten(0, 1),trajectory_lengths_list) 33 | padded_trajectories = torch.nn.utils.rnn.pad_sequence(trajectories) 34 | 35 | 36 | trajectory_masks = trajectory_lengths > torch.arange(0, tensor.shape[0], device=tensor.device).unsqueeze(1) 37 | return padded_trajectories, trajectory_masks 38 | 39 | def unpad_trajectories(trajectories, masks): 40 | """ Does the inverse operation of split_and_pad_trajectories() 41 | """ 42 | # Need to transpose before and after the masking to have proper reshaping 43 | return trajectories.transpose(1, 0)[masks.transpose(1, 0)].view(-1, trajectories.shape[0], trajectories.shape[-1]).transpose(1, 0) -------------------------------------------------------------------------------- /resources/actuator_nets/unitree_go1.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/actuator_nets/unitree_go1.pt -------------------------------------------------------------------------------- /resources/objects/ball.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /resources/robots/go1/meshes/calf.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/robots/go1/meshes/calf.stl -------------------------------------------------------------------------------- /resources/robots/go1/meshes/hip.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/robots/go1/meshes/hip.stl -------------------------------------------------------------------------------- /resources/robots/go1/meshes/thigh.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/robots/go1/meshes/thigh.stl -------------------------------------------------------------------------------- /resources/robots/go1/meshes/thigh_mirror.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/robots/go1/meshes/thigh_mirror.stl -------------------------------------------------------------------------------- /resources/robots/go1/meshes/trunk.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/robots/go1/meshes/trunk.stl -------------------------------------------------------------------------------- /resources/robots/go1/xml/go1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 196 | -------------------------------------------------------------------------------- /resources/textures/background_texture_metal_rust.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/background_texture_metal_rust.jpg -------------------------------------------------------------------------------- /resources/textures/brick_texture.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/brick_texture.jpg -------------------------------------------------------------------------------- /resources/textures/grass.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/grass.jpg -------------------------------------------------------------------------------- /resources/textures/ice_texture.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/ice_texture.jpg -------------------------------------------------------------------------------- /resources/textures/icy_pond.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/icy_pond.jpg -------------------------------------------------------------------------------- /resources/textures/metal_wall_iron_fence.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/metal_wall_iron_fence.jpg -------------------------------------------------------------------------------- /resources/textures/particle_board_paint_aged.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/particle_board_paint_aged.jpg -------------------------------------------------------------------------------- /resources/textures/pebble_stone_texture_nature.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/pebble_stone_texture_nature.jpg -------------------------------------------------------------------------------- /resources/textures/snow.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/snow.jpg -------------------------------------------------------------------------------- /resources/textures/texture_background_wall_paint_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/texture_background_wall_paint_2.jpg -------------------------------------------------------------------------------- /resources/textures/texture_background_wall_paint_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/texture_background_wall_paint_3.jpg -------------------------------------------------------------------------------- /resources/textures/texture_stone_stone_texture_0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/texture_stone_stone_texture_0.jpg -------------------------------------------------------------------------------- /resources/textures/texture_wood_brown_1033760.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/texture_wood_brown_1033760.jpg -------------------------------------------------------------------------------- /resources/textures/tiled_brick_texture.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/tiled_brick_texture.jpg -------------------------------------------------------------------------------- /resources/textures/tiled_grass.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/tiled_grass.jpg -------------------------------------------------------------------------------- /resources/textures/tiled_ice_texture.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/tiled_ice_texture.jpg -------------------------------------------------------------------------------- /resources/textures/tiled_particle_board_paint_aged.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/tiled_particle_board_paint_aged.jpg -------------------------------------------------------------------------------- /resources/textures/tiled_pebble_stone_texture_nature.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/tiled_pebble_stone_texture_nature.jpg -------------------------------------------------------------------------------- /resources/textures/tiled_snow.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/tiled_snow.jpg -------------------------------------------------------------------------------- /resources/textures/tiled_texture_stone_stone_texture_0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/tiled_texture_stone_stone_texture_0.jpg -------------------------------------------------------------------------------- /resources/textures/tiled_texture_wood_brown_1033760.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/tiled_texture_wood_brown_1033760.jpg -------------------------------------------------------------------------------- /resources/textures/tiling_util.py: -------------------------------------------------------------------------------- 1 | # %% 2 | 3 | from PIL import Image 4 | 5 | def create_tiled_image(image_path, tile_count_x=10, tile_count_y=10): 6 | # Load the image 7 | original_image = Image.open(image_path) 8 | # width, height = original_image.size 9 | width, height = 3000, 2000 10 | 11 | # Calculate the size of each tile 12 | tile_width = width // tile_count_x 13 | tile_height = height // tile_count_y 14 | 15 | # Create a new image with the same resolution as the original 16 | new_image = Image.new('RGB', (width, height)) 17 | 18 | # Iterate through the tiles and paste the resized original image 19 | for x in range(tile_count_x): 20 | for y in range(tile_count_y): 21 | resized_image = original_image.resize((tile_width, tile_height)) 22 | new_image.paste(resized_image, (tile_width * x, tile_height * y)) 23 | 24 | # Save the new tiled image 25 | new_image.save(f'tiled_{image_path}') 26 | 27 | if __name__ == '__main__': 28 | image_paths = ['snow.jpg', 'grass.jpg', 'pebble_stone_texture_nature.jpg', 'particle_board_paint_aged.jpg', 'texture_stone_stone_texture_0.jpg', 'texture_wood_brown_1033760.jpg', 'brick_texture.jpg'] 29 | # image_path = 'ice_texture.jpg' # Replace with the path to your image (PNG or JPG) 30 | for image_path in image_paths: 31 | create_tiled_image(image_path, tile_count_x=5, tile_count_y=5) 32 | 33 | # %% 34 | -------------------------------------------------------------------------------- /runs/improbableailab/dribbling/bvggoq26/dribbling_pretrained/ac_weights.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/runs/improbableailab/dribbling/bvggoq26/dribbling_pretrained/ac_weights.pt -------------------------------------------------------------------------------- /runs/improbableailab/dribbling/bvggoq26/dribbling_pretrained/adaptation_module.jit: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/runs/improbableailab/dribbling/bvggoq26/dribbling_pretrained/adaptation_module.jit -------------------------------------------------------------------------------- /runs/improbableailab/dribbling/bvggoq26/dribbling_pretrained/body.jit: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/runs/improbableailab/dribbling/bvggoq26/dribbling_pretrained/body.jit -------------------------------------------------------------------------------- /scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/scripts/__init__.py -------------------------------------------------------------------------------- /scripts/actuator_net/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/scripts/actuator_net/__init__.py -------------------------------------------------------------------------------- /scripts/actuator_net/eval.py: -------------------------------------------------------------------------------- 1 | from utils import train_actuator_network_and_plot_predictions 2 | from glob import glob 3 | 4 | 5 | log_dir_root = "../../logs/" 6 | log_dir = "example_experiment/2022/11_01/16_01_50_0" 7 | 8 | # Evaluates the existing actuator network by default 9 | load_pretrained_model = True 10 | actuator_network_path = "../../resources/actuator_nets/unitree_go1.pt" 11 | 12 | log_dirs = glob(f"{log_dir_root}{log_dir}/", recursive=True) 13 | 14 | if len(log_dirs) == 0: raise FileNotFoundError(f"No log files found in {log_dir_root}{log_dir}/") 15 | 16 | for log_dir in log_dirs: 17 | try: 18 | train_actuator_network_and_plot_predictions(log_dir[:11], log_dir[11:], actuator_network_path=actuator_network_path, load_pretrained_model=load_pretrained_model) 19 | except FileNotFoundError: 20 | print(f"Couldn't find log.pkl in {log_dir}") 21 | except EOFError: 22 | print(f"Incomplete log.pkl in {log_dir}") -------------------------------------------------------------------------------- /scripts/actuator_net/train.py: -------------------------------------------------------------------------------- 1 | from utils import train_actuator_network_and_plot_predictions 2 | from glob import glob 3 | 4 | log_dir_root = "../../logs/" 5 | log_dir = "example_experiment/2022/11_01/16_01_50_0" 6 | 7 | # Evaluates the existing actuator network by default 8 | # load_pretrained_model = True 9 | # actuator_network_path = "../../resources/actuator_nets/unitree_go1.pt" 10 | 11 | # Uncomment these lines to train a new actuator network 12 | load_pretrained_model = False 13 | actuator_network_path = "../../resources/actuator_nets/unitree_go1_new.pt" 14 | 15 | 16 | log_dirs = glob(f"{log_dir_root}{log_dir}/", recursive=True) 17 | 18 | if len(log_dirs) == 0: raise FileNotFoundError(f"No log files found in {log_dir_root}{log_dir}/") 19 | 20 | for log_dir in log_dirs: 21 | try: 22 | train_actuator_network_and_plot_predictions(log_dir[:11], log_dir[11:], actuator_network_path=actuator_network_path, load_pretrained_model=load_pretrained_model) 23 | except FileNotFoundError: 24 | print(f"Couldn't find log.pkl in {log_dir}") 25 | except EOFError: 26 | print(f"Incomplete log.pkl in {log_dir}") -------------------------------------------------------------------------------- /scripts/actuator_net/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle as pkl 3 | from matplotlib import pyplot as plt 4 | import time 5 | import imageio 6 | import numpy as np 7 | from tqdm import tqdm 8 | from glob import glob 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | from torch.utils.data import Dataset 14 | from torch.utils.data import DataLoader 15 | from torch.optim import Adam 16 | 17 | class ActuatorDataset(Dataset): 18 | def __init__(self, data): 19 | self.data = data 20 | 21 | def __len__(self): 22 | return len(self.data['joint_states']) 23 | 24 | def __getitem__(self, idx): 25 | return {k: v[idx] for k,v in self.data.items()} 26 | 27 | class Act(nn.Module): 28 | def __init__(self, act, slope=0.05): 29 | super(Act, self).__init__() 30 | self.act = act 31 | self.slope = slope 32 | self.shift = torch.log(torch.tensor(2.0)).item() 33 | 34 | def forward(self, input): 35 | if self.act == "relu": 36 | return F.relu(input) 37 | elif self.act == "leaky_relu": 38 | return F.leaky_relu(input) 39 | elif self.act == "sp": 40 | return F.softplus(input, beta=1.) 41 | elif self.act == "leaky_sp": 42 | return F.softplus(input, beta=1.) - self.slope * F.relu(-input) 43 | elif self.act == "elu": 44 | return F.elu(input, alpha=1.) 45 | elif self.act == "leaky_elu": 46 | return F.elu(input, alpha=1.) - self.slope * F.relu(-input) 47 | elif self.act == "ssp": 48 | return F.softplus(input, beta=1.) - self.shift 49 | elif self.act == "leaky_ssp": 50 | return ( 51 | F.softplus(input, beta=1.) - 52 | self.slope * F.relu(-input) - 53 | self.shift 54 | ) 55 | elif self.act == "tanh": 56 | return torch.tanh(input) 57 | elif self.act == "leaky_tanh": 58 | return torch.tanh(input) + self.slope * input 59 | elif self.act == "swish": 60 | return torch.sigmoid(input) * input 61 | elif self.act == "softsign": 62 | return F.softsign(input) 63 | else: 64 | raise RuntimeError(f"Undefined activation called {self.act}") 65 | 66 | def build_mlp(in_dim, units, layers, out_dim, 67 | act='relu', layer_norm=False, act_final=False): 68 | mods = [nn.Linear(in_dim, units), Act(act)] 69 | for i in range(layers-1): 70 | mods += [nn.Linear(units, units), Act(act)] 71 | mods += [nn.Linear(units, out_dim)] 72 | if act_final: 73 | mods += [Act(act)] 74 | if layer_norm: 75 | mods += [nn.LayerNorm(out_dim)] 76 | return nn.Sequential(*mods) 77 | 78 | def train_actuator_network(xs, ys, actuator_network_path): 79 | 80 | print(xs.shape, ys.shape) 81 | 82 | num_data = xs.shape[0] 83 | num_train = num_data // 5 * 4 84 | num_test = num_data - num_train 85 | 86 | dataset = ActuatorDataset({"joint_states": xs, "tau_ests": ys}) 87 | train_set, val_set = torch.utils.data.random_split(dataset, [num_train, num_test]) 88 | train_loader = DataLoader(train_set, batch_size=128, shuffle=True) 89 | test_loader = DataLoader(val_set, batch_size=128, shuffle=True) 90 | 91 | model = build_mlp(in_dim=6, units=32, layers=2, out_dim=1, act='softsign') 92 | 93 | lr = 8e-4 94 | opt = Adam(model.parameters(), lr=lr, eps=1e-8, weight_decay=0.0) 95 | 96 | epochs = 100 97 | device = 'cuda:0' 98 | 99 | model = model.to(device) 100 | for epoch in range(epochs): 101 | epoch_loss = 0 102 | ct = 0 103 | for batch in train_loader: 104 | data = batch['joint_states'].to(device) 105 | y_pred = model(data) 106 | 107 | opt.zero_grad() 108 | 109 | y_label = batch['tau_ests'].to(device) 110 | 111 | tau_est_loss = ((y_pred - y_label) ** 2).mean() 112 | loss = tau_est_loss 113 | 114 | loss.backward() 115 | opt.step() 116 | epoch_loss += loss.detach().cpu().numpy() 117 | ct += 1 118 | epoch_loss /= ct 119 | 120 | test_loss = 0 121 | mae = 0 122 | ct = 0 123 | if epoch % 1 == 0: 124 | with torch.no_grad(): 125 | for batch in test_loader: 126 | data = batch['joint_states'].to(device) 127 | y_pred = model(data) 128 | 129 | y_label = batch['tau_ests'].to(device) 130 | 131 | tau_est_loss = ((y_pred - y_label) ** 2).mean() 132 | loss = tau_est_loss 133 | test_mae = (y_pred - y_label).abs().mean() 134 | 135 | test_loss += loss 136 | mae += test_mae 137 | ct += 1 138 | test_loss /= ct 139 | mae /= ct 140 | 141 | print( 142 | f'epoch: {epoch} | loss: {epoch_loss:.4f} | test loss: {test_loss:.4f} | mae: {mae:.4f}') 143 | 144 | model_scripted = torch.jit.script(model) # Export to TorchScript 145 | model_scripted.save(actuator_network_path) # Save 146 | return model 147 | 148 | def train_actuator_network_and_plot_predictions(log_dir_root, log_dir, actuator_network_path, load_pretrained_model=False): 149 | 150 | log_path = log_dir_root + log_dir + "log.pkl" 151 | print(log_path) 152 | with open(log_path, 'rb') as file: 153 | data = pkl.load(file) 154 | 155 | datas = data['hardware_closed_loop'][1] 156 | 157 | if len(datas) < 1: 158 | return 159 | 160 | tau_ests = np.zeros((len(datas), 12)) 161 | torques = np.zeros((len(datas), 12)) 162 | joint_positions = np.zeros((len(datas), 12)) 163 | joint_position_targets = np.zeros((len(datas), 12)) 164 | joint_velocities = np.zeros((len(datas), 12)) 165 | 166 | if "tau_est" not in datas[0].keys(): 167 | return 168 | 169 | for i in range(len(datas)): 170 | tau_ests[i, :] = datas[i]["tau_est"] 171 | torques[i, :] = datas[i]["torques"] 172 | joint_positions[i, :] = datas[i]["joint_pos"] 173 | joint_position_targets[i, :] = datas[i]["joint_pos_target"] 174 | joint_velocities[i, :] = datas[i]["joint_vel"] 175 | 176 | timesteps = np.array(range(len(datas))) / 50.0 177 | 178 | import matplotlib.pyplot as plt 179 | 180 | joint_position_errors = joint_positions - joint_position_targets 181 | joint_velocities = joint_velocities 182 | 183 | joint_position_errors = torch.tensor(joint_position_errors, dtype=torch.float) 184 | joint_velocities = torch.tensor(joint_velocities, dtype=torch.float) 185 | tau_ests = torch.tensor(tau_ests, dtype=torch.float) 186 | 187 | xs = [] 188 | ys = [] 189 | step = 2 190 | # all joints are equal 191 | for i in range(12): 192 | xs_joint = [joint_position_errors[2:-step+1, i:i+1], 193 | joint_position_errors[1:-step, i:i+1], 194 | joint_position_errors[:-step-1, i:i+1], 195 | joint_velocities[2:-step+1, i:i+1], 196 | joint_velocities[1:-step, i:i+1], 197 | joint_velocities[:-step-1, i:i+1]] 198 | 199 | tau_ests_joint = [tau_ests[step:-1, i:i+1]] 200 | 201 | xs_joint = torch.cat(xs_joint, dim=1) 202 | xs += [xs_joint] 203 | ys += tau_ests_joint 204 | 205 | xs = torch.cat(xs, dim=0) 206 | ys = torch.cat(ys, dim=0) 207 | 208 | if load_pretrained_model: 209 | model = torch.jit.load(actuator_network_path).to('cpu') 210 | else: 211 | model = train_actuator_network(xs, ys, actuator_network_path).to("cpu") 212 | 213 | tau_preds = model(xs).detach().reshape(12, -1).T 214 | 215 | plot_length = 300 216 | 217 | timesteps = timesteps[:plot_length] 218 | torques = torques[step:plot_length+step] 219 | tau_ests = tau_ests[step:plot_length+step] 220 | tau_preds = tau_preds[:plot_length] 221 | 222 | fig, axs = plt.subplots(6, 2, figsize=(14, 6)) 223 | axs = np.array(axs).flatten() 224 | for i in range(12): 225 | axs[i].plot(timesteps, torques[:, i], label="idealized torque") 226 | axs[i].plot(timesteps, tau_ests[:, i], label="true torque") 227 | axs[i].plot(timesteps, tau_preds[:, i], linestyle='--', label="actuator model predicted torque") 228 | plt.legend() 229 | 230 | plt.show() 231 | 232 | 233 | -------------------------------------------------------------------------------- /scripts/loading_utils.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import io 3 | import torch 4 | 5 | class CPU_Unpickler(pickle.Unpickler): 6 | def find_class(self, module, name): 7 | if module == 'torch.storage' and name == '_load_from_bytes': 8 | return lambda b: torch.load(io.BytesIO(b), map_location='cpu') 9 | else: 10 | return super().find_class(module, name) 11 | 12 | -------------------------------------------------------------------------------- /scripts/play_dribbling_custom.py: -------------------------------------------------------------------------------- 1 | import isaacgym 2 | 3 | assert isaacgym 4 | import torch 5 | import numpy as np 6 | 7 | import glob 8 | import pickle as pkl 9 | 10 | from dribblebot.envs import * 11 | from dribblebot.envs.base.legged_robot_config import Cfg 12 | from dribblebot.envs.go1.go1_config import config_go1 13 | from dribblebot.envs.go1.velocity_tracking import VelocityTrackingEasyEnv 14 | 15 | from loading_utils import CPU_Unpickler 16 | 17 | from tqdm import tqdm 18 | import wandb 19 | 20 | def load_policy(logdir): 21 | body_file = wandb.restore('tmp/legged_data/body_68000.jit', run_path=logdir) 22 | body = torch.jit.load(body_file.name) 23 | import os 24 | 25 | adaptation_module_file = wandb.restore('tmp/legged_data/adaptation_module_68000.jit', run_path=logdir) 26 | adaptation_module = torch.jit.load(adaptation_module_file.name) 27 | 28 | def policy(obs, info={}): 29 | i = 0 30 | latent = adaptation_module.forward(obs["obs_history"].to('cpu')) 31 | action = body.forward(torch.cat((obs["obs_history"].to('cpu'), latent), dim=-1)) 32 | info['latent'] = latent 33 | return action 34 | 35 | return policy 36 | 37 | 38 | def load_env(logdir, headless=False): 39 | 40 | import wandb 41 | cfg_file = wandb.restore('config.yaml', run_path=logdir) 42 | 43 | import yaml 44 | with open(cfg_file.name, 'rb') as file: 45 | cfg = yaml.safe_load(file) 46 | cfg = cfg["Cfg"]["value"] 47 | print("Cfg from config.yaml:", cfg.keys()) 48 | 49 | for key, value in cfg.items(): 50 | if hasattr(Cfg, key): 51 | for key2, value2 in cfg[key].items(): 52 | setattr(getattr(Cfg, key), key2, value2) 53 | 54 | # turn off DR for evaluation script 55 | Cfg.domain_rand.push_robots = False 56 | Cfg.domain_rand.randomize_friction = False 57 | Cfg.domain_rand.randomize_gravity = False 58 | Cfg.domain_rand.randomize_restitution = False 59 | Cfg.domain_rand.randomize_motor_offset = False 60 | Cfg.domain_rand.randomize_motor_strength = False 61 | Cfg.domain_rand.randomize_friction_indep = False 62 | Cfg.domain_rand.randomize_ground_friction = False 63 | Cfg.domain_rand.randomize_base_mass = False 64 | Cfg.domain_rand.randomize_Kd_factor = False 65 | Cfg.domain_rand.randomize_Kp_factor = False 66 | Cfg.domain_rand.randomize_joint_friction = False 67 | Cfg.domain_rand.randomize_com_displacement = False 68 | Cfg.domain_rand.randomize_tile_roughness = False 69 | Cfg.domain_rand.tile_roughness_range = [0.0, 0.0] 70 | 71 | Cfg.env.num_recording_envs = 1 72 | Cfg.env.num_envs = 1 73 | # Cfg.env.num_observations = 75 74 | Cfg.terrain.num_rows = 5 75 | Cfg.terrain.num_cols = 5 76 | Cfg.terrain.border_size = 0 77 | Cfg.terrain.num_border_boxes = 0 78 | Cfg.terrain.center_robots = True 79 | Cfg.terrain.center_span = 1 80 | Cfg.terrain.teleport_robots = True 81 | # Cfg.terrain.mesh_type = "plane" 82 | 83 | Cfg.robot.name = "go1" 84 | Cfg.sensors.sensor_names = [ 85 | "ObjectSensor", 86 | "OrientationSensor", 87 | "RCSensor", 88 | "JointPositionSensor", 89 | "JointVelocitySensor", 90 | "ActionSensor", 91 | "ActionSensor", 92 | "ClockSensor", 93 | "YawSensor", 94 | "TimingSensor", 95 | ] 96 | Cfg.sensors.sensor_args = { 97 | "ObjectSensor": {}, 98 | "OrientationSensor": {}, 99 | "RCSensor": {}, 100 | "JointPositionSensor": {}, 101 | "JointVelocitySensor": {}, 102 | "ActionSensor": {}, 103 | "ActionSensor": {"delay": 1}, 104 | "ClockSensor": {}, 105 | "YawSensor": {}, 106 | "TimingSensor":{}, 107 | } 108 | 109 | Cfg.sensors.privileged_sensor_names = { 110 | "BodyVelocitySensor": {}, 111 | "ObjectVelocitySensor": {}, 112 | } 113 | Cfg.sensors.privileged_sensor_args = { 114 | "BodyVelocitySensor": {}, 115 | "ObjectVelocitySensor": {}, 116 | } 117 | Cfg.domain_rand.lag_timesteps = 6 118 | Cfg.domain_rand.randomize_lag_timesteps = True 119 | Cfg.control.control_type = "actuator_net" 120 | Cfg.env.num_privileged_obs = 6 121 | 122 | from dribblebot.envs.wrappers.history_wrapper import HistoryWrapper 123 | 124 | env = VelocityTrackingEasyEnv(sim_device='cuda:0', headless=False, cfg=Cfg) 125 | env = HistoryWrapper(env) 126 | 127 | policy = load_policy(logdir) 128 | 129 | return env, policy 130 | 131 | 132 | def play_go1(headless=True): 133 | 134 | log_dir = "improbableailab/dribbling/bvggoq26/" 135 | 136 | env, policy = load_env(log_dir, headless=headless) 137 | 138 | num_eval_steps = 500 139 | gaits = {"pronking": [0, 0, 0], 140 | "trotting": [0.5, 0, 0], 141 | "bounding": [0, 0.5, 0], 142 | "pacing": [0, 0, 0.5]} 143 | 144 | x_vel_cmd, y_vel_cmd, yaw_vel_cmd = 1.0, 0.0, 0.0 145 | body_height_cmd = 0.0 146 | step_frequency_cmd = 3.0 147 | gait = torch.tensor(gaits["trotting"]) 148 | footswing_height_cmd = 0.09 149 | pitch_cmd = 0.0 150 | roll_cmd = 0.0 151 | stance_width_cmd = 0.0 152 | 153 | measured_x_vels = np.zeros(num_eval_steps) 154 | target_x_vels = np.ones(num_eval_steps) * x_vel_cmd 155 | joint_positions = np.zeros((num_eval_steps, 12)) 156 | 157 | import imageio 158 | mp4_writer = imageio.get_writer('dribbling.mp4', fps=50) 159 | 160 | obs = env.reset() 161 | ep_rew = 0 162 | for i in tqdm(range(num_eval_steps)): 163 | with torch.no_grad(): 164 | actions = policy(obs) 165 | env.commands[:, 0] = x_vel_cmd 166 | env.commands[:, 1] = y_vel_cmd 167 | env.commands[:, 2] = yaw_vel_cmd 168 | env.commands[:, 3] = body_height_cmd 169 | env.commands[:, 4] = step_frequency_cmd 170 | env.commands[:, 5:8] = gait 171 | env.commands[:, 8] = 0.5 172 | env.commands[:, 9] = footswing_height_cmd 173 | env.commands[:, 10] = pitch_cmd 174 | env.commands[:, 11] = roll_cmd 175 | env.commands[:, 12] = stance_width_cmd 176 | obs, rew, done, info = env.step(actions) 177 | measured_x_vels[i] = env.base_lin_vel[0, 0] 178 | joint_positions[i] = env.dof_pos[0, :].cpu() 179 | ep_rew += rew 180 | 181 | img = env.render(mode='rgb_array') 182 | mp4_writer.append_data(img) 183 | 184 | out_of_limits = -(env.dof_pos - env.dof_pos_limits[:, 0]).clip(max=0.) # lower limit 185 | out_of_limits += (env.dof_pos - env.dof_pos_limits[:, 1]).clip(min=0.) 186 | 187 | mp4_writer.close() 188 | 189 | # plot target and measured forward velocity 190 | from matplotlib import pyplot as plt 191 | fig, axs = plt.subplots(2, 1, figsize=(12, 5)) 192 | axs[0].plot(np.linspace(0, num_eval_steps * env.dt, num_eval_steps), measured_x_vels, color='black', linestyle="-", label="Measured") 193 | axs[0].plot(np.linspace(0, num_eval_steps * env.dt, num_eval_steps), target_x_vels, color='black', linestyle="--", label="Desired") 194 | axs[0].legend() 195 | axs[0].set_title("Forward Linear Velocity") 196 | axs[0].set_xlabel("Time (s)") 197 | axs[0].set_ylabel("Velocity (m/s)") 198 | 199 | axs[1].plot(np.linspace(0, num_eval_steps * env.dt, num_eval_steps), joint_positions, linestyle="-", label="Measured") 200 | axs[1].set_title("Joint Positions") 201 | axs[1].set_xlabel("Time (s)") 202 | axs[1].set_ylabel("Joint Position (rad)") 203 | 204 | plt.tight_layout() 205 | plt.show() 206 | 207 | 208 | if __name__ == '__main__': 209 | # to see the environment rendering, set headless=False 210 | play_go1(headless=False) 211 | -------------------------------------------------------------------------------- /scripts/play_dribbling_pretrained.py: -------------------------------------------------------------------------------- 1 | import isaacgym 2 | 3 | assert isaacgym 4 | import torch 5 | import numpy as np 6 | import glob 7 | 8 | from dribblebot.envs import * 9 | from dribblebot.envs.base.legged_robot_config import Cfg 10 | from dribblebot.envs.go1.go1_config import config_go1 11 | from dribblebot.envs.go1.velocity_tracking import VelocityTrackingEasyEnv 12 | 13 | from tqdm import tqdm 14 | 15 | def load_policy(logdir): 16 | body = torch.jit.load(logdir + '/body.jit', map_location="cpu") 17 | import os 18 | adaptation_module = torch.jit.load(logdir + '/adaptation_module.jit', map_location='cpu') 19 | 20 | def policy(obs, info={}): 21 | i = 0 22 | latent = adaptation_module.forward(obs["obs_history"].to('cpu')) 23 | action = body.forward(torch.cat((obs["obs_history"].to('cpu'), latent), dim=-1)) 24 | info['latent'] = latent 25 | return action 26 | 27 | return policy 28 | 29 | 30 | def load_env(label, headless=False): 31 | dirs = glob.glob(f"../runs/{label}/*") 32 | logdir = sorted(dirs)[-1] 33 | 34 | import yaml 35 | with open(logdir + "/config.yaml", 'rb') as file: 36 | cfg = yaml.safe_load(file) 37 | cfg = cfg["Cfg"] 38 | 39 | for key, value in cfg.items(): 40 | if hasattr(Cfg, key): 41 | for key2, value2 in cfg[key].items(): 42 | setattr(getattr(Cfg, key), key2, value2) 43 | 44 | # turn off DR for evaluation script 45 | Cfg.domain_rand.push_robots = False 46 | Cfg.domain_rand.randomize_friction = False 47 | Cfg.domain_rand.randomize_gravity = False 48 | Cfg.domain_rand.randomize_restitution = False 49 | Cfg.domain_rand.randomize_motor_offset = False 50 | Cfg.domain_rand.randomize_motor_strength = False 51 | Cfg.domain_rand.randomize_friction_indep = False 52 | Cfg.domain_rand.randomize_ground_friction = False 53 | Cfg.domain_rand.randomize_base_mass = False 54 | Cfg.domain_rand.randomize_Kd_factor = False 55 | Cfg.domain_rand.randomize_Kp_factor = False 56 | Cfg.domain_rand.randomize_joint_friction = False 57 | Cfg.domain_rand.randomize_com_displacement = False 58 | Cfg.domain_rand.randomize_tile_roughness = True 59 | # Cfg.domain_rand.tile_roughness_range = [0.1, 0.1] 60 | Cfg.domain_rand.tile_roughness_range = [0.0, 0.0] 61 | 62 | Cfg.env.num_recording_envs = 1 63 | Cfg.env.num_envs = 1 64 | # Cfg.env.num_observations = 75 65 | Cfg.terrain.num_rows = 5 66 | Cfg.terrain.num_cols = 5 67 | Cfg.terrain.border_size = 0 68 | Cfg.terrain.num_border_boxes = 0 69 | Cfg.terrain.center_robots = True 70 | Cfg.terrain.center_span = 1 71 | Cfg.terrain.teleport_robots = True 72 | 73 | Cfg.robot.name = "go1" 74 | Cfg.sensors.sensor_names = [ 75 | "ObjectSensor", 76 | "OrientationSensor", 77 | "RCSensor", 78 | "JointPositionSensor", 79 | "JointVelocitySensor", 80 | "ActionSensor", 81 | "ActionSensor", 82 | "ClockSensor", 83 | "YawSensor", 84 | "TimingSensor", 85 | ] 86 | Cfg.sensors.sensor_args = { 87 | "ObjectSensor": {}, 88 | "OrientationSensor": {}, 89 | "RCSensor": {}, 90 | "JointPositionSensor": {}, 91 | "JointVelocitySensor": {}, 92 | "ActionSensor": {}, 93 | "ActionSensor": {"delay": 1}, 94 | "ClockSensor": {}, 95 | "YawSensor": {}, 96 | "TimingSensor":{}, 97 | } 98 | 99 | Cfg.sensors.privileged_sensor_names = { 100 | "BodyVelocitySensor": {}, 101 | "ObjectVelocitySensor": {}, 102 | } 103 | Cfg.sensors.privileged_sensor_args = { 104 | "BodyVelocitySensor": {}, 105 | "ObjectVelocitySensor": {}, 106 | } 107 | Cfg.domain_rand.lag_timesteps = 6 108 | Cfg.domain_rand.randomize_lag_timesteps = True 109 | Cfg.control.control_type = "actuator_net" 110 | Cfg.env.num_privileged_obs = 6 111 | 112 | from dribblebot.envs.wrappers.history_wrapper import HistoryWrapper 113 | 114 | env = VelocityTrackingEasyEnv(sim_device='cuda:0', headless=False, cfg=Cfg) 115 | env = HistoryWrapper(env) 116 | 117 | policy = load_policy(logdir) 118 | return env, policy 119 | 120 | 121 | def play_go1(headless=True): 122 | 123 | label = "improbableailab/dribbling/bvggoq26" 124 | env, policy = load_env(label, headless=headless) 125 | 126 | num_eval_steps = 500 127 | gaits = {"pronking": [0, 0, 0], 128 | "trotting": [0.5, 0, 0], 129 | "bounding": [0, 0.5, 0], 130 | "pacing": [0, 0, 0.5]} 131 | 132 | x_vel_cmd, y_vel_cmd, yaw_vel_cmd = 1.0, 0.0, 0.0 133 | body_height_cmd = 0.0 134 | step_frequency_cmd = 3.0 135 | gait = torch.tensor(gaits["trotting"]) 136 | footswing_height_cmd = 0.09 137 | pitch_cmd = 0.0 138 | roll_cmd = 0.0 139 | stance_width_cmd = 0.0 140 | 141 | measured_x_vels = np.zeros(num_eval_steps) 142 | target_x_vels = np.ones(num_eval_steps) * x_vel_cmd 143 | joint_positions = np.zeros((num_eval_steps, 12)) 144 | 145 | import imageio 146 | mp4_writer = imageio.get_writer('dribbling.mp4', fps=50) 147 | 148 | obs = env.reset() 149 | ep_rew = 0 150 | for i in tqdm(range(num_eval_steps)): 151 | with torch.no_grad(): 152 | actions = policy(obs) 153 | env.commands[:, 0] = x_vel_cmd 154 | env.commands[:, 1] = y_vel_cmd 155 | env.commands[:, 2] = yaw_vel_cmd 156 | env.commands[:, 3] = body_height_cmd 157 | env.commands[:, 4] = step_frequency_cmd 158 | env.commands[:, 5:8] = gait 159 | env.commands[:, 8] = 0.5 160 | env.commands[:, 9] = footswing_height_cmd 161 | env.commands[:, 10] = pitch_cmd 162 | env.commands[:, 11] = roll_cmd 163 | env.commands[:, 12] = stance_width_cmd 164 | obs, rew, done, info = env.step(actions) 165 | measured_x_vels[i] = env.base_lin_vel[0, 0] 166 | joint_positions[i] = env.dof_pos[0, :].cpu() 167 | ep_rew += rew 168 | 169 | img = env.render(mode='rgb_array') 170 | mp4_writer.append_data(img) 171 | 172 | out_of_limits = -(env.dof_pos - env.dof_pos_limits[:, 0]).clip(max=0.) # lower limit 173 | out_of_limits += (env.dof_pos - env.dof_pos_limits[:, 1]).clip(min=0.) 174 | 175 | mp4_writer.close() 176 | 177 | # plot target and measured forward velocity 178 | from matplotlib import pyplot as plt 179 | fig, axs = plt.subplots(2, 1, figsize=(12, 5)) 180 | axs[0].plot(np.linspace(0, num_eval_steps * env.dt, num_eval_steps), measured_x_vels, color='black', linestyle="-", label="Measured") 181 | axs[0].plot(np.linspace(0, num_eval_steps * env.dt, num_eval_steps), target_x_vels, color='black', linestyle="--", label="Desired") 182 | axs[0].legend() 183 | axs[0].set_title("Forward Linear Velocity") 184 | axs[0].set_xlabel("Time (s)") 185 | axs[0].set_ylabel("Velocity (m/s)") 186 | 187 | axs[1].plot(np.linspace(0, num_eval_steps * env.dt, num_eval_steps), joint_positions, linestyle="-", label="Measured") 188 | axs[1].set_title("Joint Positions") 189 | axs[1].set_xlabel("Time (s)") 190 | axs[1].set_ylabel("Joint Position (rad)") 191 | 192 | plt.tight_layout() 193 | plt.show() 194 | 195 | 196 | if __name__ == '__main__': 197 | # to see the environment rendering, set headless=False 198 | play_go1(headless=False) 199 | -------------------------------------------------------------------------------- /scripts/prepare_policy.py: -------------------------------------------------------------------------------- 1 | import isaacgym 2 | 3 | assert isaacgym 4 | import torch 5 | 6 | import glob 7 | import pickle as pkl 8 | import lcm 9 | import os 10 | 11 | 12 | 13 | def load_and_save_policy(run_path, label): 14 | 15 | import wandb 16 | api = wandb.Api() 17 | run = api.run(run_path) 18 | 19 | # load config 20 | from dribblebot.envs.base.legged_robot_config import Cfg 21 | from dribblebot.envs.go1.go1_config import config_go1 22 | config_go1(Cfg) 23 | 24 | all_cfg = run.config 25 | cfg = all_cfg["Cfg"] 26 | 27 | for key, value in cfg.items(): 28 | if hasattr(Cfg, key): 29 | for key2, value2 in cfg[key].items(): 30 | setattr(getattr(Cfg, key), key2, value2) 31 | 32 | from dribblebot import MINI_GYM_ROOT_DIR 33 | path = MINI_GYM_ROOT_DIR + "/runs/" + run_path + "/" + label + "/" 34 | if not os.path.exists(path): 35 | os.makedirs(path) 36 | print("path: ", path) 37 | 38 | adaptation_module_path = os.path.join(path, f'adaptation_module.jit') 39 | adaptation_module_file = wandb.restore('tmp/legged_data/adaptation_module_latest.jit', run_path=run_path) 40 | # adaptation_module_file = run.file('tmp/legged_data/adaptation_module_68000.jit').download(replace=True, root='./tmp') 41 | adaptation_module = torch.jit.load(adaptation_module_file.name, map_location="cpu") 42 | adaptation_module.save(adaptation_module_path) 43 | 44 | body_path = os.path.join(path, f'body.jit') 45 | body_file = wandb.restore('tmp/legged_data/body_latest.jit', run_path=run_path) 46 | # body_file = run.file('tmp/legged_data/body_68000.jit').download(replace=True, root='./tmp') 47 | body = torch.jit.load(body_file.name, map_location="cpu") 48 | body.save(body_path) 49 | 50 | ac_weights_path = os.path.join(path, f'ac_weights.pt') 51 | ac_weights_file = wandb.restore('tmp/legged_data/ac_weights_68000.pt', run_path=run_path) 52 | # ac_weights_file = run.file('tmp/legged_data/ac_weights_68000.pt').download(replace=True, root='./tmp') 53 | ac_weights = torch.load(ac_weights_file.name, map_location="cpu") 54 | # ac_weights.load_state_dict(torch.load(ac_weights_file.name)) 55 | torch.save(ac_weights, ac_weights_path) 56 | 57 | cfg_path = os.path.join(path, f'config.yaml') 58 | import yaml 59 | with open(cfg_path, 'w') as f: 60 | yaml.dump(all_cfg, f) 61 | 62 | if __name__ == '__main__': 63 | 64 | run_path = "improbableailab/dribbling/bvggoq26" 65 | label = "dribbling_pretrained" 66 | 67 | 68 | load_and_save_policy(run_path, label) -------------------------------------------------------------------------------- /scripts/train_dribbling.py: -------------------------------------------------------------------------------- 1 | def train_go1(headless=True): 2 | 3 | import isaacgym 4 | assert isaacgym 5 | import torch 6 | 7 | from dribblebot.envs.base.legged_robot_config import Cfg 8 | from dribblebot.envs.go1.go1_config import config_go1 9 | from dribblebot.envs.go1.velocity_tracking import VelocityTrackingEasyEnv 10 | 11 | from dribblebot_learn.ppo_cse import Runner 12 | from dribblebot.envs.wrappers.history_wrapper import HistoryWrapper 13 | from dribblebot_learn.ppo_cse.actor_critic import AC_Args 14 | from dribblebot_learn.ppo_cse.ppo import PPO_Args 15 | from dribblebot_learn.ppo_cse import RunnerArgs 16 | 17 | config_go1(Cfg) 18 | Cfg.env.num_envs = 1000 19 | 20 | RunnerArgs.resume = True 21 | RunnerArgs.resume_path = "improbableailab/dribbling/j34kr9ds" 22 | RunnerArgs.resume_checkpoint = 'tmp/legged_data/ac_weights_last.pt' 23 | 24 | 25 | Cfg.robot.name = "go1" 26 | Cfg.sensors.sensor_names = [ 27 | "ObjectSensor", 28 | "OrientationSensor", 29 | "RCSensor", 30 | "JointPositionSensor", 31 | "JointVelocitySensor", 32 | "ActionSensor", 33 | "LastActionSensor", 34 | "ClockSensor", 35 | "YawSensor", 36 | "TimingSensor", 37 | ] 38 | Cfg.sensors.sensor_args = { 39 | "ObjectSensor": {}, 40 | "OrientationSensor": {}, 41 | "RCSensor": {}, 42 | "JointPositionSensor": {}, 43 | "JointVelocitySensor": {}, 44 | "ActionSensor": {}, 45 | "LastActionSensor": {"delay": 1}, 46 | "ClockSensor": {}, 47 | "YawSensor": {}, 48 | "TimingSensor":{}, 49 | } 50 | Cfg.sensors.privileged_sensor_names = { 51 | "BodyVelocitySensor": {}, 52 | "ObjectVelocitySensor": {}, 53 | } 54 | Cfg.sensors.privileged_sensor_args = { 55 | "BodyVelocitySensor": {}, 56 | "ObjectVelocitySensor": {}, 57 | } 58 | 59 | Cfg.commands.num_lin_vel_bins = 30 60 | Cfg.commands.num_ang_vel_bins = 30 61 | Cfg.curriculum_thresholds.tracking_ang_vel = 0.7 62 | Cfg.curriculum_thresholds.tracking_lin_vel = 0.8 63 | Cfg.curriculum_thresholds.tracking_contacts_shaped_vel = 0.90 64 | Cfg.curriculum_thresholds.tracking_contacts_shaped_force = 0.90 65 | Cfg.curriculum_thresholds.dribbling_ball_vel = 0.8 66 | 67 | Cfg.commands.distributional_commands = True 68 | 69 | Cfg.domain_rand.lag_timesteps = 6 70 | Cfg.domain_rand.randomize_lag_timesteps = True 71 | Cfg.control.control_type = "actuator_net" 72 | 73 | Cfg.domain_rand.randomize_rigids_after_start = False 74 | Cfg.domain_rand.randomize_friction_indep = False 75 | Cfg.domain_rand.randomize_friction = False # True 76 | Cfg.domain_rand.randomize_restitution = False # True 77 | Cfg.domain_rand.restitution_range = [0.0, 0.4] 78 | Cfg.domain_rand.randomize_base_mass = True 79 | Cfg.domain_rand.added_mass_range = [-1.0, 3.0] 80 | Cfg.domain_rand.randomize_gravity = False 81 | Cfg.domain_rand.gravity_range = [-1.0, 1.0] 82 | Cfg.domain_rand.gravity_rand_interval_s = 8.0 83 | Cfg.domain_rand.gravity_impulse_duration = 0.99 84 | Cfg.domain_rand.randomize_com_displacement = False 85 | Cfg.domain_rand.com_displacement_range = [-0.15, 0.15] 86 | Cfg.domain_rand.randomize_ground_friction = True 87 | Cfg.domain_rand.ground_friction_range = [0.0, 0.0] 88 | Cfg.domain_rand.randomize_motor_strength = True 89 | Cfg.domain_rand.motor_strength_range = [0.99, 1.01] 90 | Cfg.domain_rand.randomize_motor_offset = True 91 | Cfg.domain_rand.motor_offset_range = [-0.002, 0.002] 92 | Cfg.domain_rand.push_robots = False 93 | Cfg.domain_rand.randomize_Kp_factor = True 94 | Cfg.domain_rand.randomize_Kd_factor = True 95 | Cfg.domain_rand.randomize_ball_drag = True 96 | Cfg.domain_rand.drag_range = [0.1, 0.8] 97 | Cfg.domain_rand.ball_drag_rand_interval_s = 15.0 98 | 99 | Cfg.env.num_observation_history = 15 100 | Cfg.reward_scales.feet_contact_forces = 0.0 101 | Cfg.env.num_envs = 1000 102 | 103 | Cfg.commands.exclusive_phase_offset = False 104 | Cfg.commands.pacing_offset = False 105 | Cfg.commands.balance_gait_distribution = False 106 | Cfg.commands.binary_phases = False 107 | Cfg.commands.gaitwise_curricula = False 108 | 109 | ############################### 110 | # soccer dribbling configuration 111 | ############################### 112 | 113 | # ball parameters 114 | Cfg.env.add_balls = True 115 | 116 | # domain randomization ranges 117 | Cfg.domain_rand.rand_interval_s = 6 118 | Cfg.domain_rand.friction_range = [0.0, 1.5] 119 | Cfg.domain_rand.randomize_ground_friction = True 120 | Cfg.domain_rand.ground_friction_range = [0.7, 4.0] 121 | Cfg.domain_rand.restitution_range = [0.0, 0.4] 122 | Cfg.domain_rand.added_mass_range = [-1.0, 3.0] 123 | Cfg.domain_rand.gravity_range = [-1.0, 1.0] 124 | Cfg.domain_rand.motor_strength_range = [0.99, 1.01] 125 | Cfg.domain_rand.motor_offset_range = [-0.002, 0.002] 126 | Cfg.domain_rand.tile_roughness_range = [0.0, 0.0] 127 | 128 | # privileged obs in use 129 | Cfg.env.num_privileged_obs = 6 130 | Cfg.env.priv_observe_ball_drag = True 131 | 132 | 133 | # sensory observation 134 | Cfg.commands.num_commands = 15 135 | Cfg.env.episode_length_s = 40. 136 | Cfg.env.num_observations = 75 137 | 138 | # terrain configuration 139 | Cfg.terrain.border_size = 0.0 140 | Cfg.terrain.mesh_type = "boxes_tm" 141 | Cfg.terrain.num_cols = 20 142 | Cfg.terrain.num_rows = 20 143 | Cfg.terrain.terrain_length = 5.0 144 | Cfg.terrain.terrain_width = 5.0 145 | Cfg.terrain.num_border_boxes = 5.0 146 | Cfg.terrain.x_init_range = 0.2 147 | Cfg.terrain.y_init_range = 0.2 148 | Cfg.terrain.teleport_thresh = 0.3 149 | Cfg.terrain.teleport_robots = False 150 | Cfg.terrain.center_robots = False 151 | Cfg.terrain.center_span = 3 152 | Cfg.terrain.horizontal_scale = 0.05 153 | Cfg.terrain.terrain_proportions = [1.0, 0.0, 0.0, 0.0, 0.0] 154 | Cfg.terrain.curriculum = False 155 | Cfg.terrain.difficulty_scale = 1.0 156 | Cfg.terrain.max_step_height = 0.26 157 | Cfg.terrain.min_step_run = 0.25 158 | Cfg.terrain.max_step_run = 0.4 159 | Cfg.terrain.max_init_terrain_level = 1 160 | 161 | # terminal conditions 162 | Cfg.rewards.use_terminal_body_height = True 163 | Cfg.rewards.terminal_body_height = 0.2 164 | Cfg.rewards.use_terminal_roll_pitch = False 165 | Cfg.rewards.terminal_body_ori = 0.5 166 | 167 | # command sampling 168 | Cfg.commands.resampling_time = 7 169 | Cfg.commands.heading_command = False 170 | 171 | Cfg.commands.lin_vel_x = [-1.5, 1.5] 172 | Cfg.commands.lin_vel_y = [-1.5, 1.5] 173 | Cfg.commands.ang_vel_yaw = [-0.0, 0.0] 174 | Cfg.commands.body_height_cmd = [-0.05, 0.05] 175 | Cfg.commands.gait_frequency_cmd_range = [3.0, 3.0] 176 | Cfg.commands.gait_phase_cmd_range = [0.5, 0.5] 177 | Cfg.commands.gait_offset_cmd_range = [0.0, 0.0] 178 | Cfg.commands.gait_bound_cmd_range = [0.0, 0.0] 179 | Cfg.commands.gait_duration_cmd_range = [0.5, 0.5] 180 | Cfg.commands.footswing_height_range = [0.09, 0.09] 181 | Cfg.commands.body_pitch_range = [-0.0, 0.0] 182 | Cfg.commands.body_roll_range = [-0.0, 0.0] 183 | Cfg.commands.stance_width_range = [0.0, 0.1] 184 | Cfg.commands.stance_length_range = [0.0, 0.1] 185 | 186 | Cfg.commands.limit_vel_x = [-1.5, 1.5] 187 | Cfg.commands.limit_vel_y = [-1.5, 1.5] 188 | Cfg.commands.limit_vel_yaw = [-0.0, 0.0] 189 | Cfg.commands.limit_body_height = [-0.05, 0.05] 190 | Cfg.commands.limit_gait_frequency = [3.0, 3.0] 191 | Cfg.commands.limit_gait_phase = [0.5, 0.5] 192 | Cfg.commands.limit_gait_offset = [0.0, 0.0] 193 | Cfg.commands.limit_gait_bound = [0.0, 0.0] 194 | Cfg.commands.limit_gait_duration = [0.5, 0.5] 195 | Cfg.commands.limit_footswing_height = [0.09, 0.09] 196 | Cfg.commands.limit_body_pitch = [-0.0, 0.0] 197 | Cfg.commands.limit_body_roll = [-0.0, 0.0] 198 | Cfg.commands.limit_stance_width = [0.0, 0.1] 199 | Cfg.commands.limit_stance_length = [0.0, 0.1] 200 | 201 | Cfg.commands.num_bins_vel_x = 1 202 | Cfg.commands.num_bins_vel_y = 1 203 | Cfg.commands.num_bins_vel_yaw = 1 204 | Cfg.commands.num_bins_body_height = 1 205 | Cfg.commands.num_bins_gait_frequency = 1 206 | Cfg.commands.num_bins_gait_phase = 1 207 | Cfg.commands.num_bins_gait_offset = 1 208 | Cfg.commands.num_bins_gait_bound = 1 209 | Cfg.commands.num_bins_gait_duration = 1 210 | Cfg.commands.num_bins_footswing_height = 1 211 | Cfg.commands.num_bins_body_roll = 1 212 | Cfg.commands.num_bins_body_pitch = 1 213 | Cfg.commands.num_bins_stance_width = 1 214 | 215 | Cfg.rewards.constrict = False 216 | 217 | # reward function 218 | Cfg.reward_scales.orientation = -5.0 219 | Cfg.reward_scales.torques = -0.0001 220 | Cfg.reward_scales.dof_vel = -0.0001 221 | Cfg.reward_scales.dof_acc = -2.5e-7 222 | Cfg.reward_scales.collision = -5.0 223 | Cfg.reward_scales.action_rate = -0.01 224 | Cfg.reward_scales.tracking_contacts_shaped_force = 4.0 225 | Cfg.reward_scales.tracking_contacts_shaped_vel = 4.0 226 | Cfg.reward_scales.dof_pos_limits = -10.0 227 | Cfg.reward_scales.dof_pos = -0.05 228 | Cfg.reward_scales.action_smoothness_1 = -0.1 229 | Cfg.reward_scales.action_smoothness_2 = -0.1 230 | Cfg.reward_scales.dribbling_robot_ball_vel = 0.5 231 | Cfg.reward_scales.dribbling_robot_ball_pos = 4.0 232 | Cfg.reward_scales.dribbling_ball_vel = 4.0 233 | Cfg.reward_scales.dribbling_robot_ball_yaw = 4.0 234 | Cfg.reward_scales.dribbling_ball_vel_norm = 4.0 235 | Cfg.reward_scales.dribbling_ball_vel_angle = 4.0 236 | 237 | Cfg.reward_scales.tracking_lin_vel = 0.0 238 | Cfg.reward_scales.tracking_ang_vel = 0.0 239 | Cfg.reward_scales.lin_vel_z = 0.0 240 | Cfg.reward_scales.ang_vel_xy = 0.0 241 | Cfg.reward_scales.feet_air_time = 0.0 242 | 243 | Cfg.rewards.kappa_gait_probs = 0.07 244 | Cfg.rewards.gait_force_sigma = 100. 245 | Cfg.rewards.gait_vel_sigma = 10. 246 | 247 | Cfg.rewards.reward_container_name = "SoccerRewards" 248 | Cfg.rewards.only_positive_rewards = False 249 | Cfg.rewards.only_positive_rewards_ji22_style = True 250 | Cfg.rewards.sigma_rew_neg = 0.02 251 | 252 | # normalization 253 | Cfg.normalization.friction_range = [0, 1] 254 | Cfg.normalization.ground_friction_range = [0.7, 4.0] 255 | Cfg.terrain.yaw_init_range = 3.14 256 | Cfg.normalization.clip_actions = 10.0 257 | 258 | # reward function (not in use) 259 | Cfg.reward_scales.feet_slip = -0.0 260 | Cfg.reward_scales.jump = 0.0 261 | Cfg.reward_scales.base_height = -0.0 262 | Cfg.reward_scales.feet_impact_vel = -0.0 263 | Cfg.reward_scales.feet_air_time = 0.0 264 | 265 | Cfg.asset.terminate_after_contacts_on = [] 266 | 267 | AC_Args.adaptation_labels = [] 268 | AC_Args.adaptation_dims = [] 269 | 270 | RunnerArgs.save_video_interval = 500 271 | 272 | import wandb 273 | wandb.init( 274 | # set the wandb project where this run will be logged 275 | project="dribbling", 276 | 277 | # track hyperparameters and run metadata 278 | config={ 279 | "AC_Args": vars(AC_Args), 280 | "PPO_Args": vars(PPO_Args), 281 | "RunnerArgs": vars(RunnerArgs), 282 | "Cfg": vars(Cfg), 283 | } 284 | ) 285 | 286 | device = 'cuda:0' 287 | # device = 'cpu' 288 | env = VelocityTrackingEasyEnv(sim_device=device, headless=False, cfg=Cfg) 289 | 290 | env = HistoryWrapper(env) 291 | runner = Runner(env, device=device) 292 | runner.learn(num_learning_iterations=1000000, init_at_random_ep_len=True, eval_freq=100) 293 | 294 | 295 | if __name__ == '__main__': 296 | from pathlib import Path 297 | from dribblebot import MINI_GYM_ROOT_DIR 298 | 299 | stem = Path(__file__).stem 300 | 301 | # to see the environment rendering, set headless=False 302 | train_go1(headless=False) 303 | 304 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages 2 | from distutils.core import setup 3 | 4 | setup( 5 | name='dribblebot', 6 | version='1.0.0', 7 | author='Yandong Ji', 8 | license="BSD-3-Clause", 9 | packages=find_packages(), 10 | author_email='ydji1024@gmail.com', 11 | description='Toolkit for deployment of soccer dribbling on the Unitree Go1.', 12 | install_requires=[ 13 | 'params-proto==2.10.9', 14 | 'gym==0.18.0', 15 | 'tqdm', 16 | 'matplotlib', 17 | 'numpy==1.23.5', 18 | 'wandb==0.15.0', 19 | 'wandb_osh', 20 | #'moviepy', 21 | 'imageio' 22 | ] 23 | ) 24 | --------------------------------------------------------------------------------