├── LICENSE
├── LICENSES
├── legged_gym
│ └── LICENSE
└── rsl_rl
│ └── LICENSE
├── README.md
├── dribblebot
├── __init__.py
├── assets
│ ├── __init__.py
│ ├── asset.py
│ └── ball.py
├── envs
│ ├── __init__.py
│ ├── base
│ │ ├── __init__.py
│ │ ├── base_task.py
│ │ ├── curriculum.py
│ │ ├── legged_robot.py
│ │ └── legged_robot_config.py
│ ├── go1
│ │ ├── __init__.py
│ │ ├── go1_config.py
│ │ └── velocity_tracking
│ │ │ └── __init__.py
│ └── wrappers
│ │ └── history_wrapper.py
├── rewards
│ ├── rewards.py
│ └── soccer_rewards.py
├── robots
│ ├── go1.py
│ └── robot.py
├── sensors
│ ├── __init__.py
│ ├── action_sensor.py
│ ├── attached_camera_sensor.py
│ ├── body_velocity_sensor.py
│ ├── clock_sensor.py
│ ├── floating_camera_sensor.py
│ ├── friction_sensor.py
│ ├── heightmap_sensor.py
│ ├── joint_position_sensor.py
│ ├── joint_velocity_sensor.py
│ ├── last_action_sensor.py
│ ├── object_sensor.py
│ ├── object_velocity_sensor.py
│ ├── orientation_sensor.py
│ ├── rc_sensor.py
│ ├── restitution_sensor.py
│ ├── sensor.py
│ ├── timing_sensor.py
│ └── yaw_sensor.py
├── terrains
│ ├── __init__.py
│ ├── box_terrain.py
│ ├── ground_plane_terrain.py
│ ├── heightfield_terrain.py
│ ├── terrain.py
│ ├── tm_box_terrain.py
│ └── trimesh_terrain.py
└── utils
│ ├── __init__.py
│ ├── logger.py
│ ├── math_utils.py
│ └── terrain.py
├── dribblebot_learn
├── __init__.py
├── env
│ ├── __init__.py
│ └── vec_env.py
├── eval_metrics
│ ├── __init__.py
│ ├── domain_randomization.py
│ └── metrics.py
├── ppo_cse
│ ├── __init__.py
│ ├── actor_critic.py
│ ├── metrics_caches.py
│ ├── ppo.py
│ └── rollout_storage.py
└── utils
│ ├── __init__.py
│ └── utils.py
├── resources
├── actuator_nets
│ └── unitree_go1.pt
├── objects
│ └── ball.urdf
├── robots
│ └── go1
│ │ ├── meshes
│ │ ├── calf.stl
│ │ ├── hip.stl
│ │ ├── thigh.stl
│ │ ├── thigh_mirror.stl
│ │ └── trunk.stl
│ │ ├── urdf
│ │ └── go1.urdf
│ │ └── xml
│ │ └── go1.xml
└── textures
│ ├── background_texture_metal_rust.jpg
│ ├── brick_texture.jpg
│ ├── grass.jpg
│ ├── ice_texture.jpg
│ ├── icy_pond.jpg
│ ├── metal_wall_iron_fence.jpg
│ ├── particle_board_paint_aged.jpg
│ ├── pebble_stone_texture_nature.jpg
│ ├── snow.jpg
│ ├── texture_background_wall_paint_2.jpg
│ ├── texture_background_wall_paint_3.jpg
│ ├── texture_stone_stone_texture_0.jpg
│ ├── texture_wood_brown_1033760.jpg
│ ├── tiled_brick_texture.jpg
│ ├── tiled_grass.jpg
│ ├── tiled_ice_texture.jpg
│ ├── tiled_particle_board_paint_aged.jpg
│ ├── tiled_pebble_stone_texture_nature.jpg
│ ├── tiled_snow.jpg
│ ├── tiled_texture_stone_stone_texture_0.jpg
│ ├── tiled_texture_wood_brown_1033760.jpg
│ └── tiling_util.py
├── runs
└── improbableailab
│ └── dribbling
│ └── bvggoq26
│ └── dribbling_pretrained
│ ├── ac_weights.pt
│ ├── adaptation_module.jit
│ ├── body.jit
│ └── config.yaml
├── scripts
├── __init__.py
├── actuator_net
│ ├── __init__.py
│ ├── eval.py
│ ├── train.py
│ └── utils.py
├── config.yaml
├── loading_utils.py
├── play_dribbling_custom.py
├── play_dribbling_pretrained.py
├── prepare_policy.py
└── train_dribbling.py
└── setup.py
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023s MIT Improbable AI Lab
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 |
24 | See licenses/legged_gym and licenses/rsl_rl for additional license information for some files in this package.
25 | Files associated with these additional licenses indicate so in the header.
--------------------------------------------------------------------------------
/LICENSES/legged_gym/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2021, ETH Zurich, Nikita Rudin
2 | Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES
3 | All rights reserved.
4 |
5 | Redistribution and use in source and binary forms, with or without modification,
6 | are permitted provided that the following conditions are met:
7 |
8 | 1. Redistributions of source code must retain the above copyright notice,
9 | this list of conditions and the following disclaimer.
10 |
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 | this list of conditions and the following disclaimer in the documentation
13 | and/or other materials provided with the distribution.
14 |
15 | 3. Neither the name of the copyright holder nor the names of its contributors
16 | may be used to endorse or promote products derived from this software without
17 | specific prior written permission.
18 |
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
23 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |
30 | See licenses/assets for license information for assets included in this repository.
31 | See licenses/dependencies for license information of dependencies of this package.
32 |
--------------------------------------------------------------------------------
/LICENSES/rsl_rl/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2021, ETH Zurich, Nikita Rudin
2 | Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES
3 | All rights reserved.
4 |
5 | Redistribution and use in source and binary forms, with or without modification,
6 | are permitted provided that the following conditions are met:
7 |
8 | 1. Redistributions of source code must retain the above copyright notice,
9 | this list of conditions and the following disclaimer.
10 |
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 | this list of conditions and the following disclaimer in the documentation
13 | and/or other materials provided with the distribution.
14 |
15 | 3. Neither the name of the copyright holder nor the names of its contributors
16 | may be used to endorse or promote products derived from this software without
17 | specific prior written permission.
18 |
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
23 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |
30 | See licenses/dependencies for license information of dependencies of this package.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Go1 Soccer (Dribblebot) Starter Kit
2 |
3 | # Table of contents
4 | 1. [Overview](#overview)
5 | 2. [System Requirements](#requirements)
6 | 3. [Training a Model](#simulation)
7 | 1. [Installation](#installation)
8 | 2. [Environment and Model Configuration](#configuration)
9 | 3. [Training, Logging and Evaluation](#training)
10 | 4. [Deploying a Model (Coming Soon)](#realworld)
11 |
12 | ## Overview
13 |
14 | This repository provides an implementation of the paper:
15 |
16 |
17 |
18 |
19 | DribbleBot: Dynamic Legged Manipulation in the Wild
20 |
21 |
22 | Yandong Ji* , Gabriel B. Margolis* and Pulkit Agrawal
23 |
24 | International Conference on Robotics and Automation (ICRA) , 2023
25 |
26 | paper /
27 | bibtex /
28 | project page
29 |
30 |
31 |
32 |
33 |
34 | This training code, environment and documentation build on [Walk these Ways: Tuning Robot Control for Generalization with Multiplicity of Behavior](https://github.com/Improbable-AI/walk-these-ways) by Gabriel Margolis and Pulkit Agrawal, Improbable AI Lab, MIT (Paper: https://arxiv.org/pdf/2212.03238.pdf) and the Isaac Gym simulator from
35 | NVIDIA (Paper: https://arxiv.org/abs/2108.10470). All redistributed code retains its
36 | original [license](LICENSES/legged_gym/LICENSE).
37 |
38 | Our initial release provides the following features:
39 | * Train reinforcement learning policies for the Go1 robot using PPO, IsaacGym, Domain Randomization to dribble a soccer ball in simulation following a random ball velocity command in global frame.
40 | * Evaluate a pre-trained soccer policy in simulation.
41 |
42 | ## System Requirements
43 |
44 | **Simulated Training and Evaluation**: Isaac Gym requires an NVIDIA GPU. To train in the default configuration, we recommend a GPU with at least 10GB of VRAM. The code can run on a smaller GPU if you decrease the number of parallel environments (`Cfg.env.num_envs`). However, training will be slower with fewer environments.
45 |
46 | ## Training a Model
47 |
48 | ### Installation using Conda
49 |
50 | #### Create a new conda environment with Python (3.8 suggested)
51 | ```bash
52 | conda create -n dribblebot python==3.8
53 | conda activate dribblebot
54 | ```
55 | #### Install pytorch 1.10 with cuda-11.3:
56 |
57 | ```bash
58 | pip3 install torch==1.10.0+cu113 torchvision==0.11.1+cu113 torchaudio==0.10.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
59 | ```
60 |
61 | #### Install Isaac Gym
62 |
63 | 1. Download and install Isaac Gym Preview 4 from https://developer.nvidia.com/isaac-gym
64 | 2. unzip the file via:
65 | ```bash
66 | tar -xf IsaacGym_Preview_4_Package.tar.gz
67 | ```
68 |
69 | 3. now install the python package
70 | ```bash
71 | cd isaacgym/python && pip install -e .
72 | ```
73 | 4. Verify the installation by try running an example
74 |
75 | ```bash
76 | python examples/1080_balls_of_solitude.py
77 | ```
78 | 5. For troubleshooting check docs `isaacgym/docs/index.html`
79 |
80 | #### Install the `dribblebot` package
81 |
82 | In this repository, run `pip install -e .`
83 |
84 | ### Evaluate the pre-trained policy
85 |
86 | If everything is installed correctly, you should be able to run the evaluation script with:
87 |
88 | ```bash
89 | python scripts/play_dribbling_pretrained.py
90 | ```
91 |
92 | You should see a robot manipulate a yellow soccer following random global velocity commands.
93 |
94 | ### Environment and Model Configuration
95 |
96 |
97 | **CODE STRUCTURE** The main environment for simulating a legged robot is
98 | in [legged_robot.py](dribblebot/envs/base/legged_robot.py). The default configuration parameters including reward
99 | weightings are defined in [legged_robot_config.py::Cfg](dribblebot/envs/base/legged_robot_config.py).
100 |
101 | There are three scripts in the [scripts](scripts/) directory:
102 |
103 | ```bash
104 | scripts
105 | ├── __init__.py
106 | ├── play_dribbling_custom.py
107 | ├── play_dribbling_pretrained.py
108 | └── train_dribbling.py
109 | ```
110 |
111 | ### Training, Logging and evaluation
112 |
113 | To train the Go1 controller from [Dribblebot](https://gmargo11.github.io/dribblebot/), run:
114 |
115 | ```bash
116 | python scripts/train_dribbling.py
117 | ```
118 |
119 | After initializing the simulator, the script will print out a list of metrics every ten training iterations.
120 |
121 | Training with the default configuration requires about 12GB of GPU memory. If you have less memory available, you can
122 | still train by reducing the number of parallel environments used in simulation (the default is `Cfg.env.num_envs = 1000`).
123 |
124 | To visualize training progress, first set up weights and bias (wandb):
125 |
126 | #### Set Up Weights and Bias (wandb):
127 |
128 | Weights and Biases is the service that will provide you a dashboard where you can see the progress log of your training runs, including statistics and videos.
129 |
130 | First, follow the instructions here to create you wandb account: https://docs.wandb.ai/quickstart
131 |
132 | Make sure to perform the `wandb.login()` step from your local computer.
133 |
134 | Finally, use a web browser to go to the wandb IP (defaults to `localhost:3001`)
135 |
136 | To evaluate a pretrained trained policy, run `play_dribbling_pretrained.py`. We provie a pretrained agent checkpoint in the [./runs/dribbling](runs/dribbling) directory.
137 |
138 | ## Deploying a Model (Coming Soon)
139 |
140 | We are working a modular version of the vision processing code so DribbleBot can be easily deployed on Go1. It will be added in a future release.
--------------------------------------------------------------------------------
/dribblebot/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | import os
4 |
5 | MINI_GYM_ROOT_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
6 | MINI_GYM_ENVS_DIR = os.path.join(MINI_GYM_ROOT_DIR, 'dribblebot', 'envs')
7 |
--------------------------------------------------------------------------------
/dribblebot/assets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/dribblebot/assets/__init__.py
--------------------------------------------------------------------------------
/dribblebot/assets/asset.py:
--------------------------------------------------------------------------------
1 | class Asset:
2 | """
3 | Asset class
4 | Wraps reset, domain randomization, actuation, and observation for a
5 | non-robot asset
6 | """
7 | def __init__(self, env):
8 | self.env = env
9 | self.num_dof = 0 # default
10 | self.num_bodies = 1 # default
11 | self.num_actuated_dof = 0 # default
12 |
13 | def initialize_asset(self):
14 | raise NotImplementedError
15 |
16 | def reset(self):
17 | raise NotImplementedError
18 |
19 | def randomize(self):
20 | raise NotImplementedError
21 |
22 | def get_force_feedback(self):
23 | raise NotImplementedError
24 |
25 | def get_observation(self):
26 | raise NotImplementedError
27 |
28 | def get_num_bodies(self):
29 | return self.num_bodies
30 |
31 | def get_num_dof(self):
32 | return self.num_dof
33 |
34 | def get_num_actuated_dof(self):
35 | return self.num_actuated_dof
36 |
--------------------------------------------------------------------------------
/dribblebot/assets/ball.py:
--------------------------------------------------------------------------------
1 | from dribblebot.assets.asset import Asset
2 | from isaacgym.torch_utils import *
3 | from isaacgym import gymapi
4 |
5 |
6 | class Ball(Asset):
7 | def __init__(self, env):
8 | super().__init__(env)
9 | # self.reset()
10 |
11 | def initialize(self):
12 | ball_radius = self.env.cfg.ball.radius
13 | asset_options = gymapi.AssetOptions()
14 | asset = self.env.gym.create_sphere(self.env.sim, ball_radius,
15 | asset_options)
16 | rigid_shape_props = self.env.gym.get_asset_rigid_shape_properties(
17 | asset)
18 |
19 | self.num_bodies = self.env.gym.get_asset_rigid_body_count(asset)
20 |
21 | return asset, rigid_shape_props
22 |
23 | def get_force_feedback(self):
24 | return None
25 |
26 | def get_observation(self):
27 | robot_object_vec = self.env.root_states[self.env.object_actor_idxs,
28 | 0:3] - self.env.base_pos
29 | return robot_object_vec
30 |
31 | def get_local_pos(self):
32 | robot_object_vec = self.env.root_states[self.env.object_actor_idxs,
33 | 0:3] - self.env.base_pos
34 | return robot_object_vec
35 |
36 | def get_lin_vel(self):
37 | return self.env.root_states[self.env.object_actor_idxs, 7:10]
38 |
39 | def get_ang_vel(self):
40 | return quat_rotate_inverse(self.env.base_quat,
41 | self.env.root_states[
42 | self.env.object_actor_idxs, 10:13])
43 |
--------------------------------------------------------------------------------
/dribblebot/envs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/dribblebot/envs/__init__.py
--------------------------------------------------------------------------------
/dribblebot/envs/base/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/dribblebot/envs/base/__init__.py
--------------------------------------------------------------------------------
/dribblebot/envs/base/base_task.py:
--------------------------------------------------------------------------------
1 | # License: see [LICENSE, LICENSES/legged_gym/LICENSE]
2 |
3 | import sys
4 |
5 | import gym
6 | import torch
7 | from isaacgym import gymapi, gymutil
8 |
9 | from gym import spaces
10 | import numpy as np
11 |
12 |
13 | # Base class for RL tasks
14 | class BaseTask(gym.Env):
15 |
16 | def __init__(self, cfg, sim_params, physics_engine, sim_device, headless):
17 | self.gym = gymapi.acquire_gym()
18 |
19 | if isinstance(physics_engine, str) and physics_engine == "SIM_PHYSX":
20 | physics_engine = gymapi.SIM_PHYSX
21 |
22 | self.sim_params = sim_params
23 | self.physics_engine = physics_engine
24 | self.sim_device = sim_device
25 | sim_device_type, self.sim_device_id = gymutil.parse_device_str(self.sim_device)
26 | self.headless = headless
27 |
28 |
29 | # env device is GPU only if sim is on GPU and use_gpu_pipeline=True, otherwise returned tensors are copied to CPU by physX.
30 | if sim_device_type == 'cuda' and sim_params.use_gpu_pipeline:
31 | self.device = self.sim_device
32 | else:
33 | self.device = 'cpu'
34 |
35 | # graphics device for rendering, -1 for no rendering
36 | self.graphics_device_id = self.sim_device_id
37 | if self.headless == True:
38 | self.graphics_device_id = self.sim_device_id
39 |
40 | self.num_obs = cfg.env.num_observations
41 | self.num_privileged_obs = cfg.env.num_privileged_obs
42 | self.num_actions = cfg.env.num_actions
43 |
44 |
45 | self.num_train_envs = cfg.env.num_envs
46 | self.num_envs = cfg.env.num_envs
47 |
48 | # optimization flags for pytorch JIT
49 | torch._C._jit_set_profiling_mode(False)
50 | torch._C._jit_set_profiling_executor(False)
51 |
52 | # allocate buffers
53 | self.obs_buf = torch.zeros(self.num_envs, self.num_obs, device=self.device, dtype=torch.float)
54 | self.rew_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.float)
55 | self.rew_buf_pos = torch.zeros(self.num_envs, device=self.device, dtype=torch.float)
56 | self.rew_buf_neg = torch.zeros(self.num_envs, device=self.device, dtype=torch.float)
57 | self.reset_buf = torch.ones(self.num_envs, device=self.device, dtype=torch.long)
58 | self.episode_length_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.long)
59 | self.time_out_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.bool)
60 | self.privileged_obs_buf = torch.zeros(self.num_envs, self.num_privileged_obs, device=self.device,
61 | dtype=torch.float)
62 | # self.num_privileged_obs = self.num_obs
63 |
64 | self.extras = {}
65 |
66 | # create envs, sim and viewer
67 | self.create_sim()
68 | self.gym.prepare_sim(self.sim)
69 |
70 | # todo: read from config
71 | self.enable_viewer_sync = True
72 | self.viewer = None
73 |
74 | # if running with a viewer, set up keyboard shortcuts and camera
75 | if self.headless == False:
76 | # subscribe to keyboard shortcuts
77 | self.viewer = self.gym.create_viewer(
78 | self.sim, gymapi.CameraProperties())
79 | self.gym.subscribe_viewer_keyboard_event(
80 | self.viewer, gymapi.KEY_ESCAPE, "QUIT")
81 | self.gym.subscribe_viewer_keyboard_event(
82 | self.viewer, gymapi.KEY_V, "toggle_viewer_sync")
83 |
84 | def get_observations(self):
85 | return self.obs_buf
86 |
87 | def get_privileged_observations(self):
88 | return self.privileged_obs_buf
89 |
90 | def reset_idx(self, env_ids):
91 | """Reset selected robots"""
92 | raise NotImplementedError
93 |
94 | def reset(self):
95 | """ Reset all robots"""
96 | self.reset_idx(torch.arange(self.num_envs, device=self.device))
97 | obs, privileged_obs, _, _, _ = self.step(
98 | torch.zeros(self.num_envs, self.num_actions, device=self.device, requires_grad=False))
99 | return obs, privileged_obs
100 |
101 | def step(self, actions):
102 | raise NotImplementedError
103 |
104 | def render_gui(self, sync_frame_time=True):
105 | if self.viewer:
106 | # check for window closed
107 | if self.gym.query_viewer_has_closed(self.viewer):
108 | sys.exit()
109 |
110 | # check for keyboard events
111 | for evt in self.gym.query_viewer_action_events(self.viewer):
112 | if evt.action == "QUIT" and evt.value > 0:
113 | sys.exit()
114 | elif evt.action == "toggle_viewer_sync" and evt.value > 0:
115 | self.enable_viewer_sync = not self.enable_viewer_sync
116 |
117 | # fetch results
118 | if self.device != 'cpu':
119 | self.gym.fetch_results(self.sim, True)
120 |
121 | # step graphics
122 | if self.enable_viewer_sync:
123 | self.gym.step_graphics(self.sim)
124 | self.gym.draw_viewer(self.viewer, self.sim, True)
125 | if sync_frame_time:
126 | self.gym.sync_frame_time(self.sim)
127 | else:
128 | self.gym.poll_viewer_events(self.viewer)
129 |
130 | def close(self):
131 | if self.headless == False:
132 | self.gym.destroy_viewer(self.viewer)
133 | self.gym.destroy_sim(self.sim)
134 |
--------------------------------------------------------------------------------
/dribblebot/envs/base/curriculum.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | from matplotlib import pyplot as plt
4 |
5 |
6 | def is_met(scale, l2_err, threshold):
7 | return (l2_err / scale) < threshold
8 |
9 |
10 | def key_is_met(metric_cache, config, ep_len, target_key, env_id, threshold):
11 | # metric_cache[target_key][env_id] / ep_len
12 | scale = 1
13 | l2_err = 0
14 | return is_met(scale, l2_err, threshold)
15 |
16 |
17 | class Curriculum:
18 | def set_to(self, low, high, value=1.0):
19 | inds = np.logical_and(
20 | self.grid >= low[:, None],
21 | self.grid <= high[:, None]
22 | ).all(axis=0)
23 |
24 | assert len(inds) != 0, "You are intializing your distribution with an empty domain!"
25 |
26 | self.weights[inds] = value
27 |
28 | # print("SUM", self.weights.sum(), self.weights[inds], value)
29 |
30 | def __init__(self, seed, **key_ranges):
31 | # print("SET RNG")
32 | self.rng = np.random.RandomState(seed)
33 |
34 | self.cfg = cfg = {}
35 | self.indices = indices = {}
36 | for key, v_range in key_ranges.items():
37 | bin_size = (v_range[1] - v_range[0]) / v_range[2]
38 | cfg[key] = np.linspace(v_range[0] + bin_size / 2, v_range[1] - bin_size / 2, v_range[2])
39 | indices[key] = np.linspace(0, v_range[2]-1, v_range[2])
40 |
41 | self.lows = np.array([range[0] for range in key_ranges.values()])
42 | self.highs = np.array([range[1] for range in key_ranges.values()])
43 |
44 | # self.bin_sizes = {key: arr[1] - arr[0] for key, arr in cfg.items()}
45 | self.bin_sizes = {key: (v_range[1] - v_range[0]) / v_range[2] for key, v_range in key_ranges.items()}
46 |
47 | self._raw_grid = np.stack(np.meshgrid(*cfg.values(), indexing='ij'))
48 | self._idx_grid = np.stack(np.meshgrid(*indices.values(), indexing='ij'))
49 | self.keys = [*key_ranges.keys()]
50 | self.grid = self._raw_grid.reshape([len(self.keys), -1])
51 | self.idx_grid = self._idx_grid.reshape([len(self.keys), -1])
52 | # self.grid = np.stack([params.flatten() for params in raw_grid])
53 |
54 | self._l = l = len(self.grid[0])
55 | self.ls = {key: len(self.cfg[key]) for key in self.cfg.keys()}
56 |
57 | self.weights = np.zeros(l)
58 | self.indices = np.arange(l)
59 |
60 | def __len__(self):
61 | return self._l
62 |
63 | def __getitem__(self, *keys):
64 | pass
65 |
66 | def update(self, **kwargs):
67 | # bump the envelop if
68 | pass
69 |
70 | def sample_bins(self, batch_size, low=None, high=None):
71 | """default to uniform"""
72 | if low is not None and high is not None: # if bounds given
73 | valid_inds = np.logical_and(
74 | self.grid >= low[:, None],
75 | self.grid <= high[:, None]
76 | ).all(axis=0)
77 | temp_weights = np.zeros_like(self.weights)
78 | temp_weights[valid_inds] = self.weights[valid_inds]
79 | inds = self.rng.choice(self.indices, batch_size, p=temp_weights / temp_weights.sum())
80 | else: # if no bounds given
81 | # print(self.weights, self.weights.sum())
82 | inds = self.rng.choice(self.indices, batch_size, p=self.weights / self.weights.sum())
83 |
84 | return self.grid.T[inds], inds
85 |
86 | def sample_uniform_from_cell(self, centroids):
87 | bin_sizes = np.array([*self.bin_sizes.values()])
88 | low, high = centroids + bin_sizes / 2, centroids - bin_sizes / 2
89 | rand = self.rng.uniform(low, high)
90 |
91 | return rand#.clip(self.lows, self.highs)
92 |
93 | def sample(self, batch_size, low=None, high=None):
94 | cgf_centroid, inds = self.sample_bins(batch_size, low=low, high=high)
95 | return np.stack([self.sample_uniform_from_cell(v_range) for v_range in cgf_centroid]), inds
96 |
97 |
98 | class SumCurriculum(Curriculum):
99 | def __init__(self, seed, **kwargs):
100 | super().__init__(seed, **kwargs)
101 |
102 | self.success = np.zeros(len(self))
103 | self.trials = np.zeros(len(self))
104 |
105 | def update(self, bin_inds, l1_error, threshold):
106 | is_success = l1_error < threshold
107 | self.success[bin_inds[is_success]] += 1
108 | self.trials[bin_inds] += 1
109 |
110 | def success_rates(self, *keys):
111 | s_rate = self.success / (self.trials + 1e-6)
112 | s_rate = s_rate.reshape(list(self.ls.values()))
113 | marginals = tuple(i for i, key in enumerate(self.keys) if key not in keys)
114 | if marginals:
115 | return s_rate.mean(axis=marginals)
116 | return s_rate
117 |
118 |
119 | class RewardThresholdCurriculum(Curriculum):
120 | def __init__(self, seed, **kwargs):
121 | super().__init__(seed, **kwargs)
122 |
123 | self.episode_reward_lin = np.zeros(len(self))
124 | self.episode_reward_ang = np.zeros(len(self))
125 | self.episode_lin_vel_raw = np.zeros(len(self))
126 | self.episode_ang_vel_raw = np.zeros(len(self))
127 | self.episode_duration = np.zeros(len(self))
128 |
129 | def get_local_bins(self, bin_inds, ranges=0.1):
130 | if isinstance(ranges, float):
131 | ranges = np.ones(self.grid.shape[0]) * ranges
132 | bin_inds = bin_inds.reshape(-1)
133 |
134 | adjacent_inds = np.logical_and(
135 | self.grid[:, None, :].repeat(bin_inds.shape[0], axis=1) >= self.grid[:, bin_inds, None] - ranges.reshape(-1, 1, 1),
136 | self.grid[:, None, :].repeat(bin_inds.shape[0], axis=1) <= self.grid[:, bin_inds, None] + ranges.reshape(-1, 1, 1)
137 | ).all(axis=0)
138 |
139 | return adjacent_inds
140 |
141 | def update(self, bin_inds, task_rewards, success_thresholds, local_range=0.5):
142 |
143 | is_success = 1.
144 | for task_reward, success_threshold in zip(task_rewards, success_thresholds):
145 | is_success = is_success * (task_reward > success_threshold).cpu()
146 | if len(success_thresholds) == 0:
147 | is_success = np.array([False] * len(bin_inds))
148 | else:
149 | is_success = np.array(is_success.bool())
150 |
151 | self.weights[bin_inds[is_success]] = np.clip(self.weights[bin_inds[is_success]] + 0.2, 0, 1)
152 | adjacents = self.get_local_bins(bin_inds[is_success], ranges=local_range)
153 | for adjacent in adjacents:
154 | adjacent_inds = np.array(adjacent.nonzero()[0])
155 | self.weights[adjacent_inds] = np.clip(self.weights[adjacent_inds] + 0.2, 0, 1)
156 |
157 | def log(self, bin_inds, lin_vel_raw=None, ang_vel_raw=None, episode_duration=None):
158 | self.episode_lin_vel_raw[bin_inds] = lin_vel_raw.cpu().numpy()
159 | self.episode_ang_vel_raw[bin_inds] = ang_vel_raw.cpu().numpy()
160 | self.episode_duration[bin_inds] = episode_duration.cpu().numpy()
161 |
162 | if __name__ == '__main__':
163 | r = RewardThresholdCurriculum(100, x=(-1, 1, 5), y=(-1, 1, 2), z=(-1, 1, 11))
164 |
165 | assert r._raw_grid.shape == (3, 5, 2, 11), "grid shape is wrong: {}".format(r.grid.shape)
166 |
167 | low, high = np.array([-1.0, -0.6, -1.0]), np.array([1.0, 0.6, 1.0])
168 |
169 | adjacents = r.get_local_bins(np.array([10, ]), range=0.5)
170 | for adjacent in adjacents:
171 | adjacent_inds = np.array(adjacent.nonzero()[0])
172 | print(adjacent_inds)
173 | r.update(bin_inds=adjacent_inds, lin_vel_rewards=np.ones_like(adjacent_inds),
174 | ang_vel_rewards=np.ones_like(adjacent_inds), lin_vel_threshold=0.0, ang_vel_threshold=0.0,
175 | local_range=0.5)
176 |
177 | samples, bins = r.sample(10_000)
178 |
179 | plt.scatter(*samples.T[:2])
180 | plt.show()
181 |
--------------------------------------------------------------------------------
/dribblebot/envs/go1/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/dribblebot/envs/go1/__init__.py
--------------------------------------------------------------------------------
/dribblebot/envs/go1/go1_config.py:
--------------------------------------------------------------------------------
1 | from typing import Union
2 |
3 | from params_proto import Meta
4 |
5 | from dribblebot.envs.base.legged_robot_config import Cfg
6 |
7 |
8 | def config_go1(Cnfg: Union[Cfg, Meta]):
9 | _ = Cnfg.init_state
10 |
11 | _.pos = [0.0, 0.0, 0.34] # x,y,z [m]
12 | _.default_joint_angles = { # = target angles [rad] when action = 0.0
13 | 'FL_hip_joint': 0.1, # [rad]
14 | 'RL_hip_joint': 0.1, # [rad]
15 | 'FR_hip_joint': -0.1, # [rad]
16 | 'RR_hip_joint': -0.1, # [rad]
17 |
18 | 'FL_thigh_joint': 0.8, # [rad]
19 | 'RL_thigh_joint': 1., # [rad]
20 | 'FR_thigh_joint': 0.8, # [rad]
21 | 'RR_thigh_joint': 1., # [rad]
22 |
23 | 'FL_calf_joint': -1.5, # [rad]
24 | 'RL_calf_joint': -1.5, # [rad]
25 | 'FR_calf_joint': -1.5, # [rad]
26 | 'RR_calf_joint': -1.5 # [rad]
27 | }
28 |
29 | _ = Cnfg.control
30 | _.control_type = 'P'
31 | _.stiffness = {'joint': 20.} # [N*m/rad]
32 | _.damping = {'joint': 0.5} # [N*m*s/rad]
33 | # action scale: target angle = actionScale * action + defaultAngle
34 | _.action_scale = 0.25
35 | _.hip_scale_reduction = 0.5
36 | # decimation: Number of control action updates @ sim DT per policy DT
37 | _.decimation = 4
38 |
39 | _ = Cnfg.asset
40 | _.file = '{MINI_GYM_ROOT_DIR}/resources/robots/go1/urdf/go1.urdf'
41 | _.foot_name = "foot"
42 | _.penalize_contacts_on = ["thigh", "calf"]
43 | _.terminate_after_contacts_on = ["base"]
44 | _.self_collisions = 0 # 1 to disable, 0 to enable...bitwise filter
45 | _.flip_visual_attachments = False
46 | _.fix_base_link = False
47 |
48 | _ = Cnfg.rewards
49 | _.soft_dof_pos_limit = 0.9
50 | _.base_height_target = 0.34
51 |
52 | _ = Cnfg.reward_scales
53 | _.torques = -0.0001
54 | _.action_rate = -0.01
55 | _.dof_pos_limits = -10.0
56 | _.orientation = -5.
57 | _.base_height = -30.
58 |
59 | _ = Cnfg.terrain
60 | _.mesh_type = 'trimesh'
61 | _.measure_heights = False
62 | _.terrain_noise_magnitude = 0.0
63 | _.teleport_robots = True
64 | _.border_size = 50
65 |
66 | _.terrain_proportions = [0, 0, 0, 0, 0, 0, 0, 0, 1.0]
67 | _.curriculum = False
68 |
69 | _ = Cnfg.env
70 | _.num_observations = 42
71 | _.num_envs = 4000
72 |
73 | _ = Cnfg.commands
74 | _.lin_vel_x = [-1.0, 1.0]
75 | _.lin_vel_y = [-1.0, 1.0]
76 |
77 | _ = Cnfg.commands
78 | _.heading_command = False
79 | _.resampling_time = 10.0
80 | _.command_curriculum = True
81 | _.num_lin_vel_bins = 30
82 | _.num_ang_vel_bins = 30
83 | _.lin_vel_x = [-0.6, 0.6]
84 | _.lin_vel_y = [-0.6, 0.6]
85 | _.ang_vel_yaw = [-1, 1]
86 |
87 | _ = Cnfg.domain_rand
88 | _.randomize_base_mass = True
89 | _.added_mass_range = [-1, 3]
90 | _.push_robots = False
91 | _.max_push_vel_xy = 0.5
92 | _.randomize_friction = True
93 | _.friction_range = [0.05, 4.5]
94 | _.randomize_restitution = True
95 | _.restitution_range = [0.0, 1.0]
96 | _.restitution = 0.5 # default terrain restitution
97 | _.randomize_com_displacement = True
98 | _.com_displacement_range = [-0.1, 0.1]
99 | _.randomize_motor_strength = True
100 | _.motor_strength_range = [0.9, 1.1]
101 | _.randomize_Kp_factor = False
102 | _.Kp_factor_range = [0.8, 1.3]
103 | _.randomize_Kd_factor = False
104 | _.Kd_factor_range = [0.5, 1.5]
105 | _.rand_interval_s = 6
106 |
--------------------------------------------------------------------------------
/dribblebot/envs/go1/velocity_tracking/__init__.py:
--------------------------------------------------------------------------------
1 | from isaacgym import gymutil, gymapi
2 | import torch
3 | from params_proto import Meta
4 | from typing import Union
5 |
6 | from dribblebot.envs.base.legged_robot import LeggedRobot
7 | from dribblebot.envs.base.legged_robot_config import Cfg
8 |
9 |
10 | class VelocityTrackingEasyEnv(LeggedRobot):
11 | def __init__(self, sim_device, headless, num_envs=None, prone=False, deploy=False,
12 | cfg: Cfg = None, eval_cfg: Cfg = None, initial_dynamics_dict=None, physics_engine="SIM_PHYSX"):
13 |
14 | if num_envs is not None:
15 | cfg.env.num_envs = num_envs
16 |
17 | sim_params = gymapi.SimParams()
18 | gymutil.parse_sim_config(vars(cfg.sim), sim_params)
19 | super().__init__(cfg, sim_params, physics_engine, sim_device, headless, eval_cfg, initial_dynamics_dict)
20 |
21 |
22 | def step(self, actions):
23 | self.obs_buf, self.privileged_obs_buf, self.rew_buf, self.reset_buf, self.extras = super().step(actions)
24 |
25 | self.foot_positions = self.rigid_body_state.view(self.num_envs, self.num_bodies, 13)[:, self.feet_indices,
26 | 0:3]
27 |
28 | self.extras.update({
29 | "privileged_obs": self.privileged_obs_buf,
30 | "joint_pos": self.dof_pos.cpu().numpy(),
31 | "joint_vel": self.dof_vel.cpu().numpy(),
32 | "joint_pos_target": self.joint_pos_target.cpu().detach().numpy(),
33 | "joint_vel_target": torch.zeros(12),
34 | "body_linear_vel": self.base_lin_vel.cpu().detach().numpy(),
35 | "body_angular_vel": self.base_ang_vel.cpu().detach().numpy(),
36 | "body_linear_vel_cmd": self.commands.cpu().numpy()[:, 0:2],
37 | "body_angular_vel_cmd": self.commands.cpu().numpy()[:, 2:],
38 | "contact_states": (self.contact_forces[:, self.feet_indices, 2] > 1.).detach().cpu().numpy().copy(),
39 | "foot_positions": (self.foot_positions).detach().cpu().numpy().copy(),
40 | "body_pos": self.root_states[:, 0:3].detach().cpu().numpy(),
41 | "torques": self.torques.detach().cpu().numpy()
42 | })
43 |
44 | return self.obs_buf, self.rew_buf, self.reset_buf, self.extras
45 |
46 | def reset(self):
47 | self.reset_idx(torch.arange(self.num_envs, device=self.device))
48 | obs, _, _, _ = self.step(torch.zeros(self.num_envs, self.num_actions, device=self.device, requires_grad=False))
49 | return obs
50 |
51 |
--------------------------------------------------------------------------------
/dribblebot/envs/wrappers/history_wrapper.py:
--------------------------------------------------------------------------------
1 | import isaacgym
2 | assert isaacgym
3 | import torch
4 | import gym
5 |
6 | class HistoryWrapper(gym.Wrapper):
7 | def __init__(self, env):
8 | super().__init__(env)
9 | self.env = env
10 |
11 | self.obs_history_length = self.env.cfg.env.num_observation_history
12 |
13 | self.num_obs_history = self.obs_history_length * self.num_obs
14 | self.obs_history = torch.zeros(self.env.num_envs, self.num_obs_history, dtype=torch.float,
15 | device=self.env.device, requires_grad=False)
16 | self.num_privileged_obs = self.num_privileged_obs
17 |
18 | self.reward_container.load_env(self)
19 |
20 | def step(self, action):
21 | # privileged information and observation history are stored in info
22 | obs, rew, done, info = self.env.step(action)
23 | privileged_obs = info["privileged_obs"]
24 |
25 | self.obs_history = torch.cat((self.obs_history[:, self.env.num_obs:], obs), dim=-1)
26 | # print("obs: ", obs)
27 | return {'obs': obs, 'privileged_obs': privileged_obs, 'obs_history': self.obs_history}, rew, done, info
28 |
29 | def get_observations(self):
30 | obs = self.env.get_observations()
31 | privileged_obs = self.env.get_privileged_observations()
32 | self.obs_history = torch.cat((self.obs_history[:, self.env.num_obs:], obs), dim=-1)
33 | return {'obs': obs, 'privileged_obs': privileged_obs, 'obs_history': self.obs_history}
34 |
35 | def reset_idx(self, env_ids): # it might be a problem that this isn't getting called!!
36 | ret = super().reset_idx(env_ids)
37 | self.obs_history[env_ids, :] = 0
38 | return ret
39 |
40 | def reset(self):
41 | ret = super().reset()
42 | privileged_obs = self.env.get_privileged_observations()
43 | self.obs_history[:, :] = 0
44 | return {"obs": ret, "privileged_obs": privileged_obs, "obs_history": self.obs_history}
45 |
46 |
47 | if __name__ == "__main__":
48 | from tqdm import trange
49 | import matplotlib.pyplot as plt
50 |
51 | import ml_logger as logger
52 |
53 | from dribblebot_learn.ppo import Runner
54 | from dribblebot.envs.wrappers.history_wrapper import HistoryWrapper
55 | from dribblebot_learn.ppo.actor_critic import AC_Args
56 |
57 | from dribblebot.envs.base.legged_robot_config import Cfg
58 | from dribblebot.envs.mini_cheetah.mini_cheetah_config import config_mini_cheetah
59 | config_mini_cheetah(Cfg)
60 |
61 | test_env = gym.make("VelocityTrackingEasyEnv-v0", cfg=Cfg)
62 | env = HistoryWrapper(test_env)
63 |
64 | env.reset()
65 | action = torch.zeros(test_env.num_envs, 12)
66 | for i in trange(3):
67 | obs, rew, done, info = env.step(action)
68 | print(obs.keys())
69 | print(f"obs: {obs['obs']}")
70 | print(f"privileged obs: {obs['privileged_obs']}")
71 | print(f"obs_history: {obs['obs_history']}")
72 |
73 | img = env.render('rgb_array')
74 | plt.imshow(img)
75 | plt.show()
76 |
--------------------------------------------------------------------------------
/dribblebot/rewards/rewards.py:
--------------------------------------------------------------------------------
1 | class Rewards:
2 | def __init__(self, env):
3 | self.env = env
4 |
5 | def get_reward(self, env_ids = None):
6 | self.rew_buf[:] = 0.
7 | self.rew_buf_pos[:] = 0.
8 | self.rew_buf_neg[:] = 0.
9 | for i in range(len(self.reward_functions)):
10 | name = self.reward_names[i]
11 | rew = self.reward_functions[i]() * self.reward_scales[name]
12 | self.rew_buf += rew
13 | if torch.sum(rew) >= 0:
14 | self.rew_buf_pos += rew
15 | elif torch.sum(rew) <= 0:
16 | self.rew_buf_neg += rew
17 | self.episode_sums[name] += rew
18 | if name in ['tracking_contacts_shaped_force', 'tracking_contacts_shaped_vel']:
19 | self.command_sums[name] += self.reward_scales[name] + rew
20 | else:
21 | self.command_sums[name] += rew
22 | if self.cfg.rewards.only_positive_rewards:
23 | self.rew_buf[:] = torch.clip(self.rew_buf[:], min=0.)
24 | elif self.cfg.rewards.only_positive_rewards_ji22_style: #TODO: update
25 | self.rew_buf[:] = self.rew_buf_pos[:] * torch.exp(self.rew_buf_neg[:] / self.cfg.rewards.sigma_rew_neg)
26 | self.episode_sums["total"] += self.rew_buf
27 | # add termination reward after clipping
28 | if "termination" in self.reward_scales:
29 | rew = self.reward_container._reward_termination() * self.reward_scales["termination"]
30 | self.rew_buf += rew
31 | self.episode_sums["termination"] += rew
32 | self.command_sums["termination"] += rew
33 |
34 | self.command_sums["lin_vel_raw"] += self.base_lin_vel[:, 0]
35 | self.command_sums["ang_vel_raw"] += self.base_ang_vel[:, 2]
36 | self.command_sums["lin_vel_residual"] += (self.base_lin_vel[:, 0] - self.commands[:, 0]) ** 2
37 | self.command_sums["ang_vel_residual"] += (self.base_ang_vel[:, 2] - self.commands[:, 2]) ** 2
38 | self.command_sums["ep_timesteps"] += 1
39 |
40 |
41 |
--------------------------------------------------------------------------------
/dribblebot/rewards/soccer_rewards.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | from dribblebot.utils.math_utils import quat_apply_yaw, wrap_to_pi, get_scale_shift
4 | from isaacgym.torch_utils import *
5 | from .rewards import Rewards
6 |
7 | class SoccerRewards(Rewards):
8 | def __init__(self, env):
9 | self.env = env
10 |
11 | def load_env(self, env):
12 | self.env = env
13 |
14 | def _reward_orientation(self):
15 | # Penalize non flat base orientation
16 | return torch.sum(torch.square(self.env.projected_gravity[:, :2]), dim=1)
17 |
18 | def _reward_torques(self):
19 | # Penalize torques
20 | return torch.sum(torch.square(self.env.torques), dim=1)
21 |
22 | def _reward_dof_vel(self):
23 | # Penalize dof velocities
24 | # k_qd = -6e-4
25 | return torch.sum(torch.square(self.env.dof_vel), dim=1)
26 |
27 | def _reward_dof_acc(self):
28 | # Penalize dof accelerations
29 | return torch.sum(torch.square((self.env.last_dof_vel - self.env.dof_vel) / self.env.dt), dim=1)
30 |
31 | def _reward_collision(self):
32 | # Penalize collisions on selected bodies
33 | return torch.sum(1. * (torch.norm(self.env.contact_forces[:, self.env.penalised_contact_indices, :], dim=-1) > 0.1),
34 | dim=1)
35 |
36 | def _reward_action_rate(self):
37 | # Penalize changes in actions
38 | return torch.sum(torch.square(self.env.last_actions - self.env.actions), dim=1)
39 |
40 | def _reward_tracking_contacts_shaped_force(self):
41 | foot_forces = torch.norm(self.env.contact_forces[:, self.env.feet_indices, :], dim=-1)
42 | desired_contact = self.env.desired_contact_states
43 |
44 | reward = 0
45 | for i in range(4):
46 | reward += - (1 - desired_contact[:, i]) * (
47 | 1 - torch.exp(-1 * foot_forces[:, i] ** 2 / self.env.cfg.rewards.gait_force_sigma))
48 | return reward / 4
49 |
50 | def _reward_tracking_contacts_shaped_vel(self):
51 | foot_velocities = torch.norm(self.env.foot_velocities, dim=2).view(self.env.num_envs, -1)
52 | desired_contact = self.env.desired_contact_states
53 | reward = 0
54 | for i in range(4):
55 | reward += - (desired_contact[:, i] * (
56 | 1 - torch.exp(-1 * foot_velocities[:, i] ** 2 / self.env.cfg.rewards.gait_vel_sigma)))
57 | return reward / 4
58 |
59 | def _reward_dof_pos_limits(self):
60 | # Penalize dof positions too close to the limit
61 | out_of_limits = -(self.env.dof_pos - self.env.dof_pos_limits[:, 0]).clip(max=0.) # lower limit
62 | out_of_limits += (self.env.dof_pos - self.env.dof_pos_limits[:, 1]).clip(min=0.)
63 | return torch.sum(out_of_limits, dim=1)
64 |
65 | def _reward_dof_pos(self):
66 | # Penalize dof positions
67 | # k_q = -0.75
68 | return torch.sum(torch.square(self.env.dof_pos - self.env.default_dof_pos), dim=1)
69 |
70 | def _reward_action_smoothness_1(self):
71 | # Penalize changes in actions
72 | # k_s1 =-2.5
73 | diff = torch.square(self.env.joint_pos_target - self.env.last_joint_pos_target)
74 | diff = diff * (self.env.last_actions[:,:12] != 0) # ignore first step
75 | return torch.sum(diff, dim=1)
76 |
77 | def _reward_action_smoothness_2(self):
78 | # Penalize changes in actions
79 | # k_s2 = -1.2
80 | diff = torch.square(self.env.joint_pos_target - 2 * self.env.last_joint_pos_target + self.env.last_last_joint_pos_target)
81 | diff = diff * (self.env.last_actions[:,:12] != 0) # ignore first step
82 | diff = diff * (self.env.last_last_actions[:,:12] != 0) # ignore second step
83 | return torch.sum(diff, dim=1)
84 |
85 | # encourage robot velocity align vector from robot body to ball
86 | # r_cv
87 | def _reward_dribbling_robot_ball_vel(self):
88 | FR_shoulder_idx = self.env.gym.find_actor_rigid_body_handle(self.env.envs[0], self.env.robot_actor_handles[0], "FR_thigh_shoulder")
89 | FR_HIP_positions = quat_rotate_inverse(self.env.base_quat, self.env.rigid_body_state.view(self.env.num_envs, -1, 13)[:,FR_shoulder_idx,0:3].view(self.env.num_envs,3)-self.env.base_pos)
90 | FR_HIP_velocities = quat_rotate_inverse(self.env.base_quat, self.env.rigid_body_state.view(self.env.num_envs, -1, 13)[:,FR_shoulder_idx,7:10].view(self.env.num_envs,3))
91 |
92 | delta_dribbling_robot_ball_vel = 1.0
93 | robot_ball_vec = self.env.object_local_pos[:,0:2] - FR_HIP_positions[:,0:2]
94 | d_robot_ball=robot_ball_vec / torch.norm(robot_ball_vec, dim=-1).unsqueeze(dim=-1)
95 | ball_robot_velocity_projection = torch.norm(self.env.commands[:,:2], dim=-1) - torch.sum(d_robot_ball * FR_HIP_velocities[:,0:2], dim=-1) # set approaching speed to velocity command
96 | velocity_concatenation = torch.cat((torch.zeros(self.env.num_envs,1, device=self.env.device), ball_robot_velocity_projection.unsqueeze(dim=-1)), dim=-1)
97 | rew_dribbling_robot_ball_vel=torch.exp(-delta_dribbling_robot_ball_vel* torch.pow(torch.max(velocity_concatenation,dim=-1).values, 2) )
98 | return rew_dribbling_robot_ball_vel
99 |
100 | # encourage robot near ball
101 | # r_cp
102 | def _reward_dribbling_robot_ball_pos(self):
103 |
104 | FR_shoulder_idx = self.env.gym.find_actor_rigid_body_handle(self.env.envs[0], self.env.robot_actor_handles[0], "FR_thigh_shoulder")
105 | FR_HIP_positions = quat_rotate_inverse(self.env.base_quat, self.env.rigid_body_state.view(self.env.num_envs, -1, 13)[:,FR_shoulder_idx,0:3].view(self.env.num_envs,3)-self.env.base_pos)
106 |
107 | delta_dribbling_robot_ball_pos = 4.0
108 | rew_dribbling_robot_ball_pos = torch.exp(-delta_dribbling_robot_ball_pos * torch.pow(torch.norm(self.env.object_local_pos - FR_HIP_positions, dim=-1), 2) )
109 | return rew_dribbling_robot_ball_pos
110 |
111 | # encourage ball vel align with unit vector between ball target and ball current position
112 | # r^bv
113 | def _reward_dribbling_ball_vel(self):
114 | # target velocity is command input
115 | lin_vel_error = torch.sum(torch.square(self.env.commands[:, :2] - self.env.object_lin_vel[:, :2]), dim=1)
116 | # rew_dribbling_ball_vel = torch.exp(-lin_vel_error / (self.env.cfg.rewards.tracking_sigma*2))
117 | return torch.exp(-lin_vel_error / (self.env.cfg.rewards.tracking_sigma*2))
118 |
119 | def _reward_dribbling_robot_ball_yaw(self):
120 | robot_ball_vec = self.env.object_pos_world_frame[:,0:2] - self.env.base_pos[:,0:2]
121 | d_robot_ball=robot_ball_vec / torch.norm(robot_ball_vec, dim=-1).unsqueeze(dim=-1)
122 |
123 | unit_command_vel = self.env.commands[:,:2] / torch.norm(self.env.commands[:,:2], dim=-1).unsqueeze(dim=-1)
124 | robot_ball_cmd_yaw_error = torch.norm(unit_command_vel, dim=-1) - torch.sum(d_robot_ball * unit_command_vel, dim=-1)
125 |
126 | # robot ball vector align with body yaw angle
127 | roll, pitch, yaw = get_euler_xyz(self.env.base_quat)
128 | body_yaw_vec = torch.zeros(self.env.num_envs, 2, device=self.env.device)
129 | body_yaw_vec[:,0] = torch.cos(yaw)
130 | body_yaw_vec[:,1] = torch.sin(yaw)
131 | robot_ball_body_yaw_error = torch.norm(body_yaw_vec, dim=-1) - torch.sum(d_robot_ball * body_yaw_vec, dim=-1)
132 | delta_dribbling_robot_ball_cmd_yaw = 2.0
133 | rew_dribbling_robot_ball_yaw = torch.exp(-delta_dribbling_robot_ball_cmd_yaw * (robot_ball_cmd_yaw_error+robot_ball_body_yaw_error))
134 | return rew_dribbling_robot_ball_yaw
135 |
136 | def _reward_dribbling_ball_vel_norm(self):
137 | # target velocity is command input
138 | vel_norm_diff = torch.pow(torch.norm(self.env.commands[:, :2], dim=-1) - torch.norm(self.env.object_lin_vel[:, :2], dim=-1), 2)
139 | delta_vel_norm = 2.0
140 | rew_vel_norm_tracking = torch.exp(-delta_vel_norm * vel_norm_diff)
141 | return rew_vel_norm_tracking
142 |
143 | # def _reward_dribbling_ball_vel_angle(self):
144 | # angle_diff = torch.atan2(self.env.commands[:,1], self.env.commands[:,0]) - torch.atan2(self.env.object_lin_vel[:,1], self.env.object_lin_vel[:,0])
145 | # angle_diff_in_pi = torch.pow(wrap_to_pi(angle_diff), 2)
146 | # rew_vel_angle_tracking = torch.exp(-5.0*angle_diff_in_pi/(torch.pi**2))
147 | # # print("angle_diff", angle_diff, " angle_diff_in_pi: ", angle_diff_in_pi, " rew_vel_angle_tracking", rew_vel_angle_tracking, " commands", self.env.commands[:, :2], " object_lin_vel", self.env.object_lin_vel[:, :2])
148 | # return rew_vel_angle_tracking
149 |
150 | def _reward_dribbling_ball_vel_angle(self):
151 | angle_diff = torch.atan2(self.env.commands[:,1], self.env.commands[:,0]) - torch.atan2(self.env.object_lin_vel[:,1], self.env.object_lin_vel[:,0])
152 | angle_diff_in_pi = torch.pow(wrap_to_pi(angle_diff), 2)
153 | rew_vel_angle_tracking = 1.0 - angle_diff_in_pi/(torch.pi**2)
154 | return rew_vel_angle_tracking
--------------------------------------------------------------------------------
/dribblebot/robots/go1.py:
--------------------------------------------------------------------------------
1 | from .robot import Robot
2 |
3 | from dribblebot import MINI_GYM_ROOT_DIR
4 | import os
5 |
6 | from isaacgym import gymapi
7 |
8 | class Go1(Robot):
9 | def __init__(self, env):
10 | super().__init__(env)
11 |
12 | def initialize(self):
13 | asset_file = '{MINI_GYM_ROOT_DIR}/resources/robots/go1/urdf/go1.urdf'
14 | asset_path = asset_file.format(MINI_GYM_ROOT_DIR=MINI_GYM_ROOT_DIR)
15 | asset_root = os.path.dirname(asset_path)
16 | asset_file = os.path.basename(asset_path)
17 |
18 | asset_config = self.env.cfg.asset
19 |
20 | asset_options = gymapi.AssetOptions()
21 | asset_options.default_dof_drive_mode = asset_config.default_dof_drive_mode
22 | asset_options.collapse_fixed_joints = asset_config.collapse_fixed_joints
23 | asset_options.replace_cylinder_with_capsule = asset_config.replace_cylinder_with_capsule
24 | asset_options.flip_visual_attachments = asset_config.flip_visual_attachments
25 | asset_options.fix_base_link = asset_config.fix_base_link
26 | asset_options.density = asset_config.density
27 | asset_options.angular_damping = asset_config.angular_damping
28 | asset_options.linear_damping = asset_config.linear_damping
29 | asset_options.max_angular_velocity = asset_config.max_angular_velocity
30 | asset_options.max_linear_velocity = asset_config.max_linear_velocity
31 | asset_options.armature = asset_config.armature
32 | asset_options.thickness = asset_config.thickness
33 | asset_options.disable_gravity = asset_config.disable_gravity
34 | asset_options.vhacd_enabled = True
35 | asset_options.vhacd_params = gymapi.VhacdParams()
36 | asset_options.vhacd_params.resolution = 500000
37 |
38 | asset = self.env.gym.load_asset(self.env.sim, asset_root, asset_file, asset_options)
39 |
40 | self.num_dof = self.env.gym.get_asset_dof_count(asset)
41 | self.num_actuated_dof = 12
42 | self.num_bodies = self.env.gym.get_asset_rigid_body_count(asset)
43 | dof_props_asset = self.env.gym.get_asset_dof_properties(asset)
44 | rigid_shape_props_asset = self.env.gym.get_asset_rigid_shape_properties(asset)
45 |
46 | return asset, dof_props_asset, rigid_shape_props_asset
47 |
--------------------------------------------------------------------------------
/dribblebot/robots/robot.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | class Robot:
4 | def __init__(self, env):
5 | self.env = env
6 |
7 | def apply_torques(self, torques):
8 | pass
9 |
10 | def get_torques(self):
11 | pass
12 |
13 | def get_state(self):
14 | pass
15 |
16 | def add_camera(self, camera):
17 | pass
18 |
19 | def get_num_bodies(self):
20 | return self.num_bodies
21 |
22 | def get_num_dof(self):
23 | return self.num_dof
24 |
25 | def get_num_actuated_dof(self):
26 | return self.num_actuated_dof
--------------------------------------------------------------------------------
/dribblebot/sensors/__init__.py:
--------------------------------------------------------------------------------
1 | from .sensor import Sensor
2 | from .attached_camera_sensor import AttachedCameraSensor
3 | from .floating_camera_sensor import FloatingCameraSensor
4 | from .joint_position_sensor import JointPositionSensor
5 | from .joint_velocity_sensor import JointVelocitySensor
6 | from .orientation_sensor import OrientationSensor
7 | from .heightmap_sensor import HeightmapSensor
8 | from .rc_sensor import RCSensor
9 | from .action_sensor import ActionSensor
10 | from .last_action_sensor import LastActionSensor
11 | from .clock_sensor import ClockSensor
12 | from .yaw_sensor import YawSensor
13 | from .object_sensor import ObjectSensor
14 | from .timing_sensor import TimingSensor
15 | from .body_velocity_sensor import BodyVelocitySensor
16 | from .object_velocity_sensor import ObjectVelocitySensor
17 | from .restitution_sensor import RestitutionSensor
18 | from .friction_sensor import FrictionSensor
19 |
20 | ALL_SENSORS = { "AttachedCameraSensor": AttachedCameraSensor,
21 | "FloatingCameraSensor": FloatingCameraSensor,
22 | "JointPositionSensor": JointPositionSensor,
23 | "JointVelocitySensor": JointVelocitySensor,
24 | "OrientationSensor": OrientationSensor,
25 | "HeightmapSensor": HeightmapSensor,
26 | "RCSensor": RCSensor,
27 | "ActionSensor": ActionSensor,
28 | "LastActionSensor": LastActionSensor,
29 | "ClockSensor": ClockSensor,
30 | "YawSensor": YawSensor,
31 | "ObjectSensor": ObjectSensor,
32 | "TimingSensor": TimingSensor,
33 | "BodyVelocitySensor": BodyVelocitySensor,
34 | "ObjectVelocitySensor": ObjectVelocitySensor,
35 | "RestitutionSensor": RestitutionSensor,
36 | "FrictionSensor": FrictionSensor,}
37 |
--------------------------------------------------------------------------------
/dribblebot/sensors/action_sensor.py:
--------------------------------------------------------------------------------
1 | from .sensor import Sensor
2 |
3 | class ActionSensor(Sensor):
4 | def __init__(self, env, attached_robot_asset=None, delay=0):
5 | super().__init__(env)
6 | self.env = env
7 | self.attached_robot_asset = attached_robot_asset
8 | self.delay = delay
9 |
10 | def get_observation(self, env_ids = None):
11 | if self.delay == 0:
12 | return self.env.actions
13 | elif self.delay == 1:
14 | return self.env.last_actions
15 | else:
16 | raise NotImplementedError("Action delay of {} not implemented".format(self.delay))
17 |
18 | def get_noise_vec(self):
19 | import torch
20 | return torch.zeros(self.env.num_actions, device=self.env.device)
21 |
22 | def get_dim(self):
23 | return self.env.num_actions
--------------------------------------------------------------------------------
/dribblebot/sensors/attached_camera_sensor.py:
--------------------------------------------------------------------------------
1 | from .sensor import Sensor
2 | from isaacgym import gymapi
3 | from isaacgym.torch_utils import *
4 | import torch
5 | import numpy as np
6 |
7 | class AttachedCameraSensor(Sensor):
8 | def __init__(self, env, attached_robot_asset=None):
9 | super().__init__(env)
10 | self.env = env
11 | self.attached_robot_asset = attached_robot_asset
12 |
13 |
14 |
15 | def initialize(self, camera_label, camera_pose, camera_rpy, env_ids=None):
16 | if env_ids is None: env_ids = range(self.env.num_envs)
17 |
18 | camera_props = gymapi.CameraProperties()
19 | camera_props.width = self.env.cfg.perception.image_width
20 | camera_props.height = self.env.cfg.perception.image_height
21 | camera_props.horizontal_fov = self.env.cfg.perception.image_horizontal_fov
22 |
23 |
24 | self.cams = []
25 |
26 | for env_id in env_ids:
27 |
28 | cam = self.env.gym.create_camera_sensor(self.env.envs[env_id], camera_props)
29 | # initialize camera position
30 | # attach the camera to the base
31 | trans_pos = gymapi.Vec3(camera_pose[0], camera_pose[1], camera_pose[2])
32 | quat_pitch = quat_from_angle_axis(torch.Tensor([-camera_rpy[1]]), torch.Tensor([0, 1, 0]))[0]
33 | quat_yaw = quat_from_angle_axis(torch.Tensor([camera_rpy[2]]), torch.Tensor([0, 0, 1]))[0]
34 | quat = quat_mul(quat_yaw, quat_pitch)
35 | trans_quat = gymapi.Quat(quat[0], quat[1], quat[2], quat[3])
36 | transform = gymapi.Transform(trans_pos, trans_quat)
37 | follow_mode = gymapi.CameraFollowMode.FOLLOW_TRANSFORM
38 | self.env.gym.attach_camera_to_body(cam, self.env.envs[env_id], 0, transform, follow_mode)
39 |
40 | self.cams.append(cam)
41 |
42 | return self.cams
43 |
44 | def get_observation(self, env_ids = None):
45 |
46 | raise NotImplementedError
47 |
48 | def get_depth_images(self, env_ids = None):
49 | if env_ids is None: env_ids = range(self.env.num_envs)
50 |
51 | depth_images = []
52 | for env_id in env_ids:
53 | img = self.env.gym.get_camera_image(self.env.sim, self.env.envs[env_id], self.cams[env_id],
54 | gymapi.IMAGE_DEPTH)
55 | w, h = img.shape
56 | depth_images.append(torch.from_numpy(img.reshape([1, w, h])).to(self.env.device))
57 | depth_images = torch.cat(depth_images, dim=0)
58 | return depth_images
59 |
60 | def get_rgb_images(self, env_ids):
61 | if env_ids is None: env_ids = range(self.env.num_envs)
62 |
63 | rgb_images = []
64 | for env_id in env_ids:
65 | img = self.env.gym.get_camera_image(self.env.sim, self.env.envs[env_id], self.cams[env_id],
66 | gymapi.IMAGE_COLOR)
67 | w, h = img.shape
68 | rgb_images.append(
69 | torch.from_numpy(img.reshape([1, w, h // 4, 4]).astype(np.int32)).to(self.env.device))
70 | rgb_images = torch.cat(rgb_images, dim=0)
71 | return rgb_images
72 |
73 | def get_segmentation_images(self, env_ids):
74 | if env_ids is None: env_ids = range(self.env.num_envs)
75 |
76 | segmentation_images = []
77 | for env_id in env_ids:
78 | img = self.env.gym.get_camera_image(self.env.sim, self.env.envs[env_id], self.cams[env_id],
79 | gymapi.IMAGE_SEGMENTATION)
80 | w, h = img.shape
81 | segmentation_images.append(
82 | torch.from_numpy(img.reshape([1, w, h]).astype(np.int32)).to(self.env.device))
83 | segmentation_images = torch.cat(segmentation_images, dim=0)
84 | return segmentation_images
--------------------------------------------------------------------------------
/dribblebot/sensors/body_velocity_sensor.py:
--------------------------------------------------------------------------------
1 | from .sensor import Sensor
2 |
3 | from isaacgym.torch_utils import *
4 | import torch
5 | from dribblebot.utils.math_utils import quat_apply_yaw, wrap_to_pi, get_scale_shift
6 |
7 | class BodyVelocitySensor(Sensor):
8 | def __init__(self, env, attached_robot_asset=None):
9 | super().__init__(env)
10 | self.env = env
11 | self.attached_robot_asset = attached_robot_asset
12 |
13 | def get_observation(self, env_ids = None):
14 | return self.env.base_lin_vel
15 |
16 | # Privileged sensor input does not contain noise
17 | # def get_noise_vec(self):
18 | # import torch
19 | # return torch.zeros(1, device=self.env.device)
20 |
21 | def get_dim(self):
22 | return 3
--------------------------------------------------------------------------------
/dribblebot/sensors/clock_sensor.py:
--------------------------------------------------------------------------------
1 | from .sensor import Sensor
2 |
3 | class ClockSensor(Sensor):
4 | def __init__(self, env, attached_robot_asset=None):
5 | super().__init__(env)
6 | self.env = env
7 | self.attached_robot_asset = attached_robot_asset
8 |
9 | def get_observation(self, env_ids = None):
10 | return self.env.clock_inputs
11 |
12 | def get_noise_vec(self):
13 | import torch
14 | return torch.zeros(4, device=self.env.device)
15 |
16 | def get_dim(self):
17 | return 4
--------------------------------------------------------------------------------
/dribblebot/sensors/floating_camera_sensor.py:
--------------------------------------------------------------------------------
1 | from .sensor import Sensor
2 | from isaacgym import gymapi
3 |
4 | class FloatingCameraSensor(Sensor):
5 | def __init__(self, env):
6 | super().__init__(env)
7 | self.env = env
8 |
9 | camera_props = gymapi.CameraProperties()
10 | camera_props.width = self.env.cfg.env.recording_width_px
11 | camera_props.height = self.env.cfg.env.recording_height_px
12 | self.rendering_camera = self.env.gym.create_camera_sensor(self.env.envs[0], camera_props)
13 | self.env.gym.set_camera_location(self.rendering_camera, self.env.envs[0], gymapi.Vec3(1.5, 1, 3.0),
14 | gymapi.Vec3(0, 0, 0))
15 |
16 | def set_position(self, target_loc=None, cam_distance=None):
17 | if cam_distance is None:
18 | cam_distance = [0, -1.0, 1.0]
19 | if target_loc is None:
20 | bx, by, bz = self.env.root_states[0, 0], self.env.root_states[0, 1], self.env.root_states[0, 2]
21 | target_loc = [bx, by, bz]
22 | self.env.gym.set_camera_location(self.rendering_camera, self.env.envs[0], gymapi.Vec3(target_loc[0] + cam_distance[0],
23 | target_loc[1] + cam_distance[1],
24 | target_loc[2] + cam_distance[2]),
25 | gymapi.Vec3(target_loc[0], target_loc[1], target_loc[2]))
26 |
27 | def get_observation(self, env_ids = None):
28 | self.env.gym.step_graphics(self.env.sim)
29 | self.env.gym.render_all_camera_sensors(self.env.sim)
30 | img = self.env.gym.get_camera_image(self.env.sim, self.env.envs[0], self.rendering_camera, gymapi.IMAGE_COLOR)
31 | w, h = img.shape
32 | return img.reshape([w, h // 4, 4])
--------------------------------------------------------------------------------
/dribblebot/sensors/friction_sensor.py:
--------------------------------------------------------------------------------
1 | from .sensor import Sensor
2 |
3 | from isaacgym.torch_utils import *
4 | import torch
5 | from dribblebot.utils.math_utils import quat_apply_yaw, wrap_to_pi, get_scale_shift
6 |
7 | class FrictionSensor(Sensor):
8 | def __init__(self, env, attached_robot_asset=None):
9 | super().__init__(env)
10 | self.env = env
11 | self.attached_robot_asset = attached_robot_asset
12 |
13 | def get_observation(self, env_ids = None):
14 | friction_coeffs_scale, friction_coeffs_shift = get_scale_shift(self.env.cfg.normalization.friction_range)
15 | return (self.env.friction_coeffs[:, 0].unsqueeze(1) - friction_coeffs_shift) * friction_coeffs_scale
16 |
17 | def get_noise_vec(self):
18 | import torch
19 | return torch.zeros(1, device=self.env.device)
20 |
21 | def get_dim(self):
22 | return 1
--------------------------------------------------------------------------------
/dribblebot/sensors/heightmap_sensor.py:
--------------------------------------------------------------------------------
1 | from .sensor import Sensor
2 | import torch
3 | from dribblebot.utils.math_utils import quat_apply_yaw
4 |
5 | class HeightmapSensor(Sensor):
6 | def __init__(self, env):
7 | super().__init__(env)
8 | self.env = env
9 |
10 | def get_observation(self, env_ids = None):
11 |
12 | if env_ids is None:
13 | env_ids = torch.arange(self.num_envs, device=self.env.device)
14 |
15 | if self.env.cfg.terrain.mesh_type == 'plane':
16 | return torch.zeros(self.env.num_envs, self.env.cfg.perception.num_height_points, device=self.device, requires_grad=False)
17 | elif self.env.cfg.terrain.mesh_type == 'none':
18 | raise NameError("Can't measure height with terrain mesh type 'none'")
19 |
20 | points = quat_apply_yaw(self.base_quat[env_ids].repeat(1, self.env.cfg.perception.num_height_points),
21 | self.height_points[env_ids]) + (self.env.root_states[self.robot_actor_idxs[env_ids], :3]).unsqueeze(1)
22 |
23 | points += self.terrain.cfg.border_size
24 | points = (points / self.terrain.cfg.horizontal_scale).long()
25 | px = points[:, :, 0].view(-1)
26 | py = points[:, :, 1].view(-1)
27 | px = torch.clip(px, 0, self.env.height_samples.shape[0] - 2)
28 | py = torch.clip(py, 0, self.env.height_samples.shape[1] - 2)
29 |
30 | heights1 = self.env.height_samples[px, py]
31 | heights2 = self.env.height_samples[px + 1, py]
32 | heights3 = self.env.height_samples[px, py + 1]
33 | heights = torch.min(heights1, heights2)
34 | heights = torch.min(heights, heights3)
35 |
36 | return heights.view(len(env_ids), -1) * self.env.terrain.cfg.vertical_scale
--------------------------------------------------------------------------------
/dribblebot/sensors/joint_position_sensor.py:
--------------------------------------------------------------------------------
1 | from .sensor import Sensor
2 |
3 | class JointPositionSensor(Sensor):
4 | def __init__(self, env, attached_robot_asset=None):
5 | super().__init__(env)
6 | self.env = env
7 | self.attached_robot_asset = attached_robot_asset
8 |
9 | def get_observation(self, env_ids = None):
10 | return (self.env.dof_pos[:, :self.env.num_actuated_dof] - \
11 | self.env.default_dof_pos[:, :self.env.num_actuated_dof]) * \
12 | self.env.cfg.obs_scales.dof_pos
13 |
14 | def get_noise_vec(self):
15 | import torch
16 | return torch.ones(self.env.num_actuated_dof, device=self.env.device) * \
17 | self.env.cfg.noise_scales.dof_pos * \
18 | self.env.cfg.noise.noise_level * \
19 | self.env.cfg.obs_scales.dof_pos
20 |
21 | def get_dim(self):
22 | return self.env.num_actuated_dof
--------------------------------------------------------------------------------
/dribblebot/sensors/joint_velocity_sensor.py:
--------------------------------------------------------------------------------
1 | from .sensor import Sensor
2 |
3 | class JointVelocitySensor(Sensor):
4 | def __init__(self, env, attached_robot_asset=None):
5 | super().__init__(env)
6 | self.env = env
7 | self.attached_robot_asset = attached_robot_asset
8 |
9 | def get_observation(self, env_ids = None):
10 | return self.env.dof_vel[:, :self.env.num_actuated_dof] * \
11 | self.env.cfg.obs_scales.dof_vel
12 |
13 | def get_noise_vec(self):
14 | import torch
15 | return torch.ones(self.env.num_actuated_dof, device=self.env.device) * \
16 | self.env.cfg.noise_scales.dof_vel * \
17 | self.env.cfg.noise.noise_level * \
18 | self.env.cfg.obs_scales.dof_vel
19 |
20 | def get_dim(self):
21 | return self.env.num_actuated_dof
--------------------------------------------------------------------------------
/dribblebot/sensors/last_action_sensor.py:
--------------------------------------------------------------------------------
1 | from .sensor import Sensor
2 |
3 | class LastActionSensor(Sensor):
4 | def __init__(self, env, attached_robot_asset=None, delay=0):
5 | super().__init__(env)
6 | self.env = env
7 | self.attached_robot_asset = attached_robot_asset
8 | self.delay = delay
9 |
10 | def get_observation(self, env_ids = None):
11 | if self.delay == 0:
12 | return self.env.actions
13 | elif self.delay == 1:
14 | return self.env.last_actions
15 | else:
16 | raise NotImplementedError("Action delay of {} not implemented".format(self.delay))
17 |
18 | def get_noise_vec(self):
19 | import torch
20 | return torch.zeros(self.env.num_actions, device=self.env.device)
21 |
22 | def get_dim(self):
23 | return self.env.num_actions
--------------------------------------------------------------------------------
/dribblebot/sensors/object_sensor.py:
--------------------------------------------------------------------------------
1 | from .sensor import Sensor
2 |
3 | class ObjectSensor(Sensor):
4 | def __init__(self, env, attached_robot_asset=None):
5 | super().__init__(env)
6 | self.env = env
7 | self.attached_robot_asset = attached_robot_asset
8 |
9 | def get_observation(self, env_ids = None):
10 | return self.env.object_local_pos * self.env.cfg.obs_scales.ball_pos
11 |
12 | def get_noise_vec(self):
13 | import torch
14 | return torch.ones(3, device=self.env.device) * self.env.cfg.noise_scales.ball_pos * self.env.cfg.noise.noise_level * self.env.cfg.obs_scales.ball_pos
15 |
16 | def get_dim(self):
17 | return 3
--------------------------------------------------------------------------------
/dribblebot/sensors/object_velocity_sensor.py:
--------------------------------------------------------------------------------
1 | from .sensor import Sensor
2 |
3 | from isaacgym.torch_utils import *
4 | import torch
5 | from dribblebot.utils.math_utils import quat_apply_yaw, wrap_to_pi, get_scale_shift
6 |
7 | class ObjectVelocitySensor(Sensor):
8 | def __init__(self, env, attached_robot_asset=None):
9 | super().__init__(env)
10 | self.env = env
11 | # self.attached_robot_asset = attached_robot_asset
12 |
13 | def get_observation(self, env_ids = None):
14 |
15 | return self.env.object_lin_vel
16 |
17 | # Privileged sensor input does not contain noise
18 | # def get_noise_vec(self):
19 | # import torch
20 | # return torch.zeros(3, device=self.env.device)
21 |
22 | def get_dim(self):
23 | return 3
--------------------------------------------------------------------------------
/dribblebot/sensors/orientation_sensor.py:
--------------------------------------------------------------------------------
1 | from .sensor import Sensor
2 |
3 | class OrientationSensor(Sensor):
4 | def __init__(self, env, attached_robot_asset=None):
5 | super().__init__(env)
6 | self.env = env
7 | self.attached_robot_asset = attached_robot_asset
8 |
9 | def get_observation(self, env_ids = None):
10 | return self.env.projected_gravity
11 |
12 | def get_noise_vec(self):
13 | import torch
14 | return torch.ones(3, device=self.env.device) * \
15 | self.env.cfg.noise_scales.gravity * \
16 | self.env.cfg.noise.noise_level
17 |
18 | def get_dim(self):
19 | return 3
--------------------------------------------------------------------------------
/dribblebot/sensors/rc_sensor.py:
--------------------------------------------------------------------------------
1 | from .sensor import Sensor
2 |
3 | class RCSensor(Sensor):
4 | def __init__(self, env, attached_robot_asset=None):
5 | super().__init__(env)
6 | self.env = env
7 | self.attached_robot_asset = attached_robot_asset
8 |
9 | def get_observation(self, env_ids = None):
10 | return self.env.commands * self.env.commands_scale
11 |
12 | def get_noise_vec(self):
13 | import torch
14 | return torch.zeros(self.env.cfg.commands.num_commands, device=self.env.device)
15 |
16 | def get_dim(self):
17 | return self.env.cfg.commands.num_commands
--------------------------------------------------------------------------------
/dribblebot/sensors/restitution_sensor.py:
--------------------------------------------------------------------------------
1 | from .sensor import Sensor
2 |
3 | from isaacgym.torch_utils import *
4 | import torch
5 | from dribblebot.utils.math_utils import quat_apply_yaw, wrap_to_pi, get_scale_shift
6 |
7 | class RestitutionSensor(Sensor):
8 | def __init__(self, env, attached_robot_asset=None):
9 | super().__init__(env)
10 | self.env = env
11 | self.attached_robot_asset = attached_robot_asset
12 |
13 | def get_observation(self, env_ids = None):
14 | restitution_coeffs_scale, restitution_coeffs_shift = get_scale_shift(self.env.cfg.normalization.restitution_range)
15 | return (self.env.restitutions[:, 0].unsqueeze(1) - restitution_coeffs_shift) * restitution_coeffs_scale
16 |
17 | def get_noise_vec(self):
18 | import torch
19 | return torch.zeros(1, device=self.env.device)
20 |
21 | def get_dim(self):
22 | return 1
--------------------------------------------------------------------------------
/dribblebot/sensors/sensor.py:
--------------------------------------------------------------------------------
1 | class Sensor:
2 | def __init__(self, env):
3 | self.env = env
4 |
5 | def get_observation(self):
6 | raise NotImplementedError
7 |
8 | def get_noise_vec(self):
9 | raise NotImplementedError
10 |
11 | def get_dim(self):
12 | raise NotImplementedError
--------------------------------------------------------------------------------
/dribblebot/sensors/timing_sensor.py:
--------------------------------------------------------------------------------
1 | from .sensor import Sensor
2 |
3 | class TimingSensor(Sensor):
4 | def __init__(self, env, attached_robot_asset=None):
5 | super().__init__(env)
6 | self.env = env
7 | # self.attached_robot_asset = attached_robot_asset
8 |
9 | def get_observation(self, env_ids = None):
10 | # print("timeing variable: ", self.env.gait_indices)
11 | return self.env.gait_indices.unsqueeze(1)
12 |
13 | def get_noise_vec(self):
14 | import torch
15 | return torch.zeros(1, device=self.env.device)
16 |
17 | def get_dim(self):
18 | return 1
--------------------------------------------------------------------------------
/dribblebot/sensors/yaw_sensor.py:
--------------------------------------------------------------------------------
1 | from .sensor import Sensor
2 |
3 | from isaacgym.torch_utils import *
4 | import torch
5 | from dribblebot.utils.math_utils import quat_apply_yaw, wrap_to_pi, get_scale_shift
6 |
7 | class YawSensor(Sensor):
8 | def __init__(self, env, attached_robot_asset=None):
9 | super().__init__(env)
10 | self.env = env
11 | self.attached_robot_asset = attached_robot_asset
12 |
13 | def get_observation(self, env_ids = None):
14 | forward = quat_apply(self.env.base_quat, self.env.forward_vec)
15 | heading = torch.atan2(forward[:, 1], forward[:, 0]).unsqueeze(1)
16 | heading = wrap_to_pi(heading - self.env.heading_offsets.unsqueeze(1))
17 | return heading
18 |
19 | def get_noise_vec(self):
20 | import torch
21 | return torch.zeros(1, device=self.env.device)
22 |
23 | def get_dim(self):
24 | return 1
--------------------------------------------------------------------------------
/dribblebot/terrains/__init__.py:
--------------------------------------------------------------------------------
1 | from .terrain import Terrain
2 | from .tm_box_terrain import TMBoxTerrain
3 | from .box_terrain import BoxTerrain
4 | from .heightfield_terrain import HeightfieldTerrain
5 | from .trimesh_terrain import TrimeshTerrain
6 | from .ground_plane_terrain import GroundPlaneTerrain
7 |
8 | ALL_TERRAINS = { "boxes_tm": TMBoxTerrain,
9 | "boxes": BoxTerrain,
10 | "heightfield": HeightfieldTerrain,
11 | "trimesh": TrimeshTerrain,
12 | "plane": GroundPlaneTerrain
13 | }
--------------------------------------------------------------------------------
/dribblebot/terrains/ground_plane_terrain.py:
--------------------------------------------------------------------------------
1 | from .terrain import Terrain
2 |
3 | from isaacgym import gymapi
4 | import torch
5 |
6 | class GroundPlaneTerrain(Terrain):
7 | def __init__(self, env):
8 | super().__init__(env)
9 | self.prepare()
10 |
11 | def prepare(self):
12 | """ Adds a ground plane to the simulation, sets friction and restitution based on the cfg.
13 | """
14 | plane_params = gymapi.PlaneParams()
15 | plane_params.normal = gymapi.Vec3(0.0, 0.0, 1.0)
16 | plane_params.static_friction = self.env.cfg.terrain.static_friction
17 | plane_params.dynamic_friction = self.env.cfg.terrain.dynamic_friction
18 | plane_params.restitution = self.env.cfg.terrain.restitution
19 | self.env.gym.add_ground(self.env.sim, plane_params)
20 |
21 | def initialize(self):
22 | pass
23 |
--------------------------------------------------------------------------------
/dribblebot/terrains/heightfield_terrain.py:
--------------------------------------------------------------------------------
1 | from .terrain import Terrain
2 |
3 | from isaacgym import gymapi
4 | import torch
5 |
6 | class HeightfieldTerrain(Terrain):
7 | def __init__(self, env):
8 | super().__init__(env)
9 | self.prepare()
10 |
11 | def prepare(self):
12 | """ Adds a heightfield terrain to the simulation, sets parameters based on the cfg.
13 | """
14 | hf_params = gymapi.HeightFieldParams()
15 | hf_params.column_scale = self.env.terrain.cfg.horizontal_scale
16 | hf_params.row_scale = self.env.terrain.cfg.horizontal_scale
17 | hf_params.vertical_scale = self.env.terrain.cfg.vertical_scale
18 | hf_params.nbRows = self.env.terrain.tot_cols
19 | hf_params.nbColumns = self.env.terrain.tot_rows
20 | hf_params.transform.p.x = -self.env.terrain.cfg.border_size
21 | hf_params.transform.p.y = -self.env.terrain.cfg.border_size
22 | hf_params.transform.p.z = 0.0
23 | hf_params.static_friction = self.cfg.env.terrain.static_friction
24 | hf_params.dynamic_friction = self.cfg.env.terrain.dynamic_friction
25 | hf_params.restitution = self.cfg.env.terrain.restitution
26 |
27 | print(self.env.terrain.heightsamples.shape, hf_params.nbRows, hf_params.nbColumns)
28 |
29 | self.env.gym.add_heightfield(self.env.sim, self.env.terrain.heightsamples.T, hf_params)
30 | self.height_samples = torch.tensor(self.env.terrain.heightsamples).view(self.env.terrain.tot_rows,
31 | self.env.terrain.tot_cols).to(self.env.device)
32 |
33 |
34 | def initialize(self):
35 | pass
--------------------------------------------------------------------------------
/dribblebot/terrains/terrain.py:
--------------------------------------------------------------------------------
1 | class Terrain:
2 | def __init__(self, env):
3 | self.env = env
4 |
5 | def initialize(self):
6 | raise NotImplementedError
7 |
8 | def get_friction(self, x, y):
9 | """ Returns the friction at a given point in the terrain.
10 | # """
11 | return self.friction_samples[x, y]
12 |
13 | def get_roughness(self, x, y):
14 | """ Returns the roughness at a given point in the terrain.
15 | # """
16 | return self.roughness_samples[x, y]
17 |
18 | def get_restitution(self, x, y):
19 | """ Returns the restitution at a given point in the terrain.
20 | # """
21 | return self.restitution_samples[x, y]
22 |
23 | def get_height(self, x, y):
24 | """ Returns the height at a given point in the terrain.
25 | # """
26 | return self.height_samples[x, y]
27 |
28 |
--------------------------------------------------------------------------------
/dribblebot/terrains/trimesh_terrain.py:
--------------------------------------------------------------------------------
1 | from .terrain import Terrain
2 |
3 | from isaacgym import gymapi
4 | import torch
5 |
6 | class TrimeshTerrain(Terrain):
7 | def __init__(self, env):
8 | super().__init__(env)
9 | self.prepare()
10 |
11 | def prepare(self):
12 | """ Adds a triangle mesh terrain to the simulation, sets parameters based on the cfg.
13 | # """
14 | tm_params = gymapi.TriangleMeshParams()
15 | tm_params.nb_vertices = self.env.terrain.vertices.shape[0]
16 | tm_params.nb_triangles = self.env.terrain.triangles.shape[0]
17 |
18 | tm_params.transform.p.x = -self.env.terrain.cfg.border_size
19 | tm_params.transform.p.y = -self.env.terrain.cfg.border_size
20 | tm_params.transform.p.z = 0.0
21 | tm_params.static_friction = self.env.cfg.terrain.static_friction
22 | tm_params.dynamic_friction = self.env.cfg.terrain.dynamic_friction
23 | tm_params.restitution = self.env.cfg.terrain.restitution
24 | self.env.gym.add_triangle_mesh(self.env.sim, self.env.terrain.vertices.flatten(order='C'),
25 | self.env.terrain.triangles.flatten(order='C'), tm_params)
26 | self.height_samples = torch.tensor(self.env.terrain.heightsamples).view(self.env.terrain.tot_rows,
27 | self.env.terrain.tot_cols).to(self.env.device)
28 |
29 |
30 | def initialize(self):
31 | pass
32 |
--------------------------------------------------------------------------------
/dribblebot/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .math_utils import *
2 | from .terrain import Terrain
3 |
--------------------------------------------------------------------------------
/dribblebot/utils/logger.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 |
4 | class Logger:
5 | def __init__(self, env):
6 | self.env = env
7 |
8 | def populate_log(self, env_ids):
9 |
10 | extras = {}
11 |
12 | # fill extras
13 | if len(env_ids) > 0:
14 | extras["train/episode"] = {}
15 | for key in self.env.episode_sums.keys():
16 | extras["train/episode"]['rew_' + key] = torch.mean(
17 | self.env.episode_sums[key][env_ids])
18 | self.env.episode_sums[key][env_ids] = 0.
19 |
20 | # log additional curriculum info
21 | if self.env.cfg.terrain.curriculum:
22 | extras["train/episode"]["terrain_level"] = torch.mean(
23 | self.env.terrain_levels.float())
24 | if self.env.cfg.commands.command_curriculum:
25 | commands = self.env.commands
26 | extras["env_bins"] = torch.Tensor(self.env.env_command_bins)
27 | extras["train/episode"]["min_command_duration"] = torch.min(commands[:, 8])
28 | extras["train/episode"]["max_command_duration"] = torch.max(commands[:, 8])
29 | extras["train/episode"]["min_command_bound"] = torch.min(commands[:, 7])
30 | extras["train/episode"]["max_command_bound"] = torch.max(commands[:, 7])
31 | extras["train/episode"]["min_command_offset"] = torch.min(commands[:, 6])
32 | extras["train/episode"]["max_command_offset"] = torch.max(commands[:, 6])
33 | extras["train/episode"]["min_command_phase"] = torch.min(commands[:, 5])
34 | extras["train/episode"]["max_command_phase"] = torch.max(commands[:, 5])
35 | extras["train/episode"]["min_command_freq"] = torch.min(commands[:, 4])
36 | extras["train/episode"]["max_command_freq"] = torch.max(commands[:, 4])
37 | extras["train/episode"]["min_command_x_vel"] = torch.min(commands[:, 0])
38 | extras["train/episode"]["max_command_x_vel"] = torch.max(commands[:, 0])
39 | extras["train/episode"]["min_command_y_vel"] = torch.min(commands[:, 1])
40 | extras["train/episode"]["max_command_y_vel"] = torch.max(commands[:, 1])
41 | extras["train/episode"]["min_command_yaw_vel"] = torch.min(commands[:, 2])
42 | extras["train/episode"]["max_command_yaw_vel"] = torch.max(commands[:, 2])
43 | # if self.env.cfg.commands.num_commands > 9:
44 | extras["train/episode"]["min_command_swing_height"] = torch.min(commands[:, 9])
45 | extras["train/episode"]["max_command_swing_height"] = torch.max(commands[:, 9])
46 | for curriculum, category in zip(self.env.curricula, self.env.category_names):
47 | extras["train/episode"][f"command_area_{category}"] = np.sum(curriculum.weights) / \
48 | curriculum.weights.shape[0]
49 |
50 | extras["train/episode"]["min_action"] = torch.min(self.env.actions)
51 | extras["train/episode"]["max_action"] = torch.max(self.env.actions)
52 |
53 | extras["curriculum/distribution"] = {}
54 | for curriculum, category in zip(self.env.curricula, self.env.category_names):
55 | extras[f"curriculum/distribution"][f"weights_{category}"] = curriculum.weights
56 | extras[f"curriculum/distribution"][f"grid_{category}"] = curriculum.grid
57 | if self.env.cfg.env.send_timeouts:
58 | extras["time_outs"] = self.env.time_out_buf
59 |
60 | return extras
61 |
--------------------------------------------------------------------------------
/dribblebot/utils/math_utils.py:
--------------------------------------------------------------------------------
1 | # License: see [LICENSE, LICENSES/legged_gym/LICENSE]
2 |
3 | from typing import Tuple
4 |
5 | import numpy as np
6 | import torch
7 | from isaacgym.torch_utils import quat_apply, normalize
8 | from torch import Tensor
9 |
10 |
11 | # @ torch.jit.script
12 | def quat_apply_yaw(quat, vec):
13 | quat_yaw = quat.clone().view(-1, 4)
14 | quat_yaw[:, :2] = 0.
15 | quat_yaw = normalize(quat_yaw)
16 | return quat_apply(quat_yaw, vec)
17 |
18 |
19 | # @ torch.jit.script
20 | def wrap_to_pi(angles):
21 | angles %= 2 * np.pi
22 | angles -= 2 * np.pi * (angles > np.pi)
23 | return angles
24 |
25 |
26 | # @ torch.jit.script
27 | def torch_rand_sqrt_float(lower, upper, shape, device):
28 | # type: (float, float, Tuple[int, int], str) -> Tensor
29 | r = 2 * torch.rand(*shape, device=device) - 1
30 | r = torch.where(r < 0., -torch.sqrt(-r), torch.sqrt(r))
31 | r = (r + 1.) / 2.
32 | return (upper - lower) * r + lower
33 |
34 |
35 | def get_scale_shift(range):
36 | scale = 2. / (range[1] - range[0])
37 | shift = (range[1] + range[0]) / 2.
38 | return scale, shift
39 |
--------------------------------------------------------------------------------
/dribblebot/utils/terrain.py:
--------------------------------------------------------------------------------
1 | # License: see [LICENSE, LICENSES/legged_gym/LICENSE]
2 |
3 | import math
4 |
5 | import numpy as np
6 | from isaacgym import terrain_utils
7 | from numpy.random import choice
8 |
9 | from dribblebot.envs.base.legged_robot_config import Cfg
10 |
11 |
12 | class Terrain:
13 | def __init__(self, cfg: Cfg.terrain, num_robots, eval_cfg=None, num_eval_robots=0) -> None:
14 |
15 | self.cfg = cfg
16 | self.eval_cfg = eval_cfg
17 | self.num_robots = num_robots
18 | self.type = cfg.mesh_type
19 | if self.type in ["none", 'plane']:
20 | return
21 | self.train_rows, self.train_cols, self.eval_rows, self.eval_cols = self.load_cfgs()
22 | self.tot_rows = len(self.train_rows) + len(self.eval_rows)
23 | self.tot_cols = max(len(self.train_cols), len(self.eval_cols))
24 | self.cfg.env_length = cfg.terrain_length
25 | self.cfg.env_width = cfg.terrain_width
26 |
27 | self.height_field_raw = np.zeros((self.tot_rows, self.tot_cols), dtype=np.int16)
28 |
29 | self.initialize_terrains()
30 |
31 | self.heightsamples = self.height_field_raw
32 | if self.type == "trimesh":
33 | self.vertices, self.triangles = terrain_utils.convert_heightfield_to_trimesh(self.height_field_raw,
34 | self.cfg.horizontal_scale,
35 | self.cfg.vertical_scale,
36 | self.cfg.slope_treshold)
37 |
38 | def load_cfgs(self):
39 | self._load_cfg(self.cfg)
40 | self.cfg.row_indices = np.arange(0, self.cfg.tot_rows)
41 | self.cfg.col_indices = np.arange(0, self.cfg.tot_cols)
42 | self.cfg.x_offset = 0
43 | self.cfg.rows_offset = 0
44 | if self.eval_cfg is None:
45 | return self.cfg.row_indices, self.cfg.col_indices, [], []
46 | else:
47 | self._load_cfg(self.eval_cfg)
48 | self.eval_cfg.row_indices = np.arange(self.cfg.tot_rows, self.cfg.tot_rows + self.eval_cfg.tot_rows)
49 | self.eval_cfg.col_indices = np.arange(0, self.eval_cfg.tot_cols)
50 | self.eval_cfg.x_offset = self.cfg.tot_rows
51 | self.eval_cfg.rows_offset = self.cfg.num_rows
52 | return self.cfg.row_indices, self.cfg.col_indices, self.eval_cfg.row_indices, self.eval_cfg.col_indices
53 |
54 | def _load_cfg(self, cfg):
55 | cfg.proportions = [np.sum(cfg.terrain_proportions[:i + 1]) for i in range(len(cfg.terrain_proportions))]
56 |
57 | cfg.num_sub_terrains = cfg.num_rows * cfg.num_cols
58 | cfg.env_origins = np.zeros((cfg.num_rows, cfg.num_cols, 3))
59 |
60 | cfg.width_per_env_pixels = int(cfg.terrain_length / cfg.horizontal_scale)
61 | cfg.length_per_env_pixels = int(cfg.terrain_width / cfg.horizontal_scale)
62 |
63 | cfg.border = int(cfg.border_size / cfg.horizontal_scale)
64 | cfg.tot_cols = int(cfg.num_cols * cfg.width_per_env_pixels) + 2 * cfg.border
65 | cfg.tot_rows = int(cfg.num_rows * cfg.length_per_env_pixels) + 2 * cfg.border
66 |
67 | def initialize_terrains(self):
68 | self._initialize_terrain(self.cfg)
69 | if self.eval_cfg is not None:
70 | self._initialize_terrain(self.eval_cfg)
71 |
72 | def _initialize_terrain(self, cfg):
73 | if cfg.curriculum:
74 | self.curriculum(cfg)
75 | else:
76 | self.randomized_terrain(cfg)
77 |
78 | def randomized_terrain(self, cfg):
79 | for k in range(cfg.num_sub_terrains):
80 | # Env coordinates in the world
81 | (i, j) = np.unravel_index(k, (cfg.num_rows, cfg.num_cols))
82 |
83 | choice = np.random.uniform(0, 1)
84 | difficulty = np.random.choice([0.5, 0.75, 0.9])
85 | terrain = self.make_terrain(cfg, choice, difficulty, cfg.proportions)
86 | self.add_terrain_to_map(cfg, terrain, i, j)
87 |
88 | def curriculum(self, cfg):
89 | for j in range(cfg.num_cols):
90 | for i in range(cfg.num_rows):
91 | difficulty = i / cfg.num_rows * cfg.difficulty_scale
92 | choice = j / cfg.num_cols + 0.001
93 |
94 | terrain = self.make_terrain(cfg, choice, difficulty, cfg.proportions)
95 | self.add_terrain_to_map(cfg, terrain, i, j)
96 |
97 | def make_terrain(self, cfg, choice, difficulty, proportions):
98 | terrain = terrain_utils.SubTerrain("terrain",
99 | width=cfg.width_per_env_pixels,
100 | length=cfg.width_per_env_pixels,
101 | vertical_scale=cfg.vertical_scale,
102 | horizontal_scale=cfg.horizontal_scale)
103 | slope = difficulty * 0.4
104 | step_height = 0.05 + 0.18 * difficulty
105 | discrete_obstacles_height = 0.05 + difficulty * (cfg.max_platform_height - 0.05)
106 | stepping_stones_size = 1.5 * (1.05 - difficulty)
107 | stone_distance = 0.05 if difficulty == 0 else 0.1
108 | if choice < proportions[0]:
109 | if choice < proportions[0] / 2:
110 | slope *= -1
111 | terrain_utils.pyramid_sloped_terrain(terrain, slope=slope, platform_size=3.)
112 | elif choice < proportions[1]:
113 | terrain_utils.pyramid_sloped_terrain(terrain, slope=slope, platform_size=3.)
114 | terrain_utils.random_uniform_terrain(terrain, min_height=-0.05, max_height=0.05,
115 | step=self.cfg.terrain_smoothness, downsampled_scale=0.2)
116 | elif choice < proportions[3]:
117 | if choice < proportions[2]:
118 | step_height *= -1
119 | terrain_utils.pyramid_stairs_terrain(terrain, step_width=0.31, step_height=step_height, platform_size=3.)
120 | elif choice < proportions[4]:
121 | num_rectangles = 20
122 | rectangle_min_size = 1.
123 | rectangle_max_size = 2.
124 | terrain_utils.discrete_obstacles_terrain(terrain, discrete_obstacles_height, rectangle_min_size,
125 | rectangle_max_size, num_rectangles, platform_size=3.)
126 | elif choice < proportions[5]:
127 | terrain_utils.stepping_stones_terrain(terrain, stone_size=stepping_stones_size,
128 | stone_distance=stone_distance, max_height=0., platform_size=4.)
129 | elif choice < proportions[6]:
130 | pass
131 | elif choice < proportions[7]:
132 | pass
133 | elif choice < proportions[8]:
134 | terrain_utils.random_uniform_terrain(terrain, min_height=-cfg.terrain_noise_magnitude,
135 | max_height=cfg.terrain_noise_magnitude, step=0.005,
136 | downsampled_scale=0.2)
137 | elif choice < proportions[9]:
138 | terrain_utils.random_uniform_terrain(terrain, min_height=-0.05, max_height=0.05,
139 | step=self.cfg.terrain_smoothness, downsampled_scale=0.2)
140 | terrain.height_field_raw[0:terrain.length // 2, :] = 0
141 |
142 | return terrain
143 |
144 | def add_terrain_to_map(self, cfg, terrain, row, col):
145 | i = row
146 | j = col
147 | # map coordinate system
148 | start_x = cfg.border + i * cfg.length_per_env_pixels + cfg.x_offset
149 | end_x = cfg.border + (i + 1) * cfg.length_per_env_pixels + cfg.x_offset
150 | start_y = cfg.border + j * cfg.width_per_env_pixels
151 | end_y = cfg.border + (j + 1) * cfg.width_per_env_pixels
152 | self.height_field_raw[start_x: end_x, start_y:end_y] = terrain.height_field_raw
153 |
154 | env_origin_x = (i + 0.5) * cfg.terrain_length + cfg.x_offset * terrain.horizontal_scale
155 | env_origin_y = (j + 0.5) * cfg.terrain_width
156 | x1 = int((cfg.terrain_length / 2. - 1) / terrain.horizontal_scale) + cfg.x_offset
157 | x2 = int((cfg.terrain_length / 2. + 1) / terrain.horizontal_scale) + cfg.x_offset
158 | y1 = int((cfg.terrain_width / 2. - 1) / terrain.horizontal_scale)
159 | y2 = int((cfg.terrain_width / 2. + 1) / terrain.horizontal_scale)
160 | env_origin_z = np.max(self.height_field_raw[start_x: end_x, start_y:end_y]) * terrain.vertical_scale
161 |
162 | cfg.env_origins[i, j] = [env_origin_x, env_origin_y, env_origin_z]
163 |
164 |
165 |
166 | def perlin(x, y, seed=0):
167 | # permutation table
168 | np.random.seed(seed)
169 | p = np.arange(256, dtype=int)
170 | np.random.shuffle(p)
171 | p = np.stack([p, p]).flatten()
172 | # coordinates of the top-left
173 | xi, yi = x.astype(int), y.astype(int)
174 | # internal coordinates
175 | xf, yf = x - xi, y - yi
176 | # fade factors
177 | u, v = fade(xf), fade(yf)
178 | # noise components
179 | n00 = gradient(p[p[xi] + yi], xf, yf)
180 | n01 = gradient(p[p[xi] + yi + 1], xf, yf - 1)
181 | n11 = gradient(p[p[xi + 1] + yi + 1], xf - 1, yf - 1)
182 | n10 = gradient(p[p[xi + 1] + yi], xf - 1, yf)
183 | # combine noises
184 | x1 = lerp(n00, n10, u)
185 | x2 = lerp(n01, n11, u) # FIX1: I was using n10 instead of n01
186 | return lerp(x1, x2, v) # FIX2: I also had to reverse x1 and x2 here
187 |
188 | def lerp(a, b, x):
189 | "linear interpolation"
190 | return a + x * (b - a)
191 |
192 | def fade(t):
193 | "6t^5 - 15t^4 + 10t^3"
194 | return 6 * t**5 - 15 * t**4 + 10 * t**3
195 |
196 | def gradient(h, x, y):
197 | "grad converts h to the right gradient vector and return the dot product with (x,y)"
198 | vectors = np.array([[0, 1], [0, -1], [1, 0], [-1, 0]])
199 | g = vectors[h % 4]
200 | return g[:, :, 0] * x + g[:, :, 1] * y
201 |
--------------------------------------------------------------------------------
/dribblebot_learn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/dribblebot_learn/__init__.py
--------------------------------------------------------------------------------
/dribblebot_learn/env/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | from .vec_env import VecEnv
--------------------------------------------------------------------------------
/dribblebot_learn/env/vec_env.py:
--------------------------------------------------------------------------------
1 | # License: see [LICENSE, LICENSES/rsl_rl/LICENSE]
2 |
3 | from abc import ABC, abstractmethod
4 | from typing import Tuple, Union
5 |
6 | import torch
7 |
8 |
9 | # minimal interface of the environment
10 | class VecEnv(ABC):
11 | num_envs: int
12 | num_obs: int
13 | num_privileged_obs: int
14 | num_actions: int
15 | max_episode_length: int
16 | privileged_obs_buf: torch.Tensor
17 | obs_buf: torch.Tensor
18 | rew_buf: torch.Tensor
19 | reset_buf: torch.Tensor
20 | episode_length_buf: torch.Tensor # current episode duration
21 | extras: dict
22 | device: torch.device
23 |
24 | @abstractmethod
25 | def step(self, actions: torch.Tensor) -> Tuple[
26 | torch.Tensor, Union[torch.Tensor, None], torch.Tensor, torch.Tensor, dict]:
27 | pass
28 |
29 | @abstractmethod
30 | def reset(self, env_ids: Union[list, torch.Tensor]):
31 | pass
32 |
33 | @abstractmethod
34 | def get_observations(self) -> torch.Tensor:
35 | pass
36 |
37 | @abstractmethod
38 | def get_privileged_observations(self) -> Union[torch.Tensor, None]:
39 | pass
40 |
--------------------------------------------------------------------------------
/dribblebot_learn/eval_metrics/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/dribblebot_learn/eval_metrics/__init__.py
--------------------------------------------------------------------------------
/dribblebot_learn/eval_metrics/domain_randomization.py:
--------------------------------------------------------------------------------
1 | from dribblebot.envs.base.legged_robot_config import Cfg
2 |
3 |
4 | def base_set():
5 | # set basics
6 | Cfg.terrain.teleport_robots = True
7 | Cfg.terrain.border_size = 50
8 | Cfg.terrain.num_rows = 10
9 | Cfg.terrain.num_cols = 10
10 | Cfg.commands.resampling_time = 1e9
11 | Cfg.env.episode_length_s = 500
12 | Cfg.rewards.terminal_body_height = 0.0
13 | Cfg.rewards.use_terminal_body_height = True
14 |
15 |
16 | def rand_regular():
17 | Cfg.domain_rand.randomize_friction = True
18 | Cfg.domain_rand.friction_range = [0.05, 4.5]
19 | Cfg.domain_rand.randomize_restitution = True
20 | Cfg.domain_rand.restitution_range = [0, 1.0]
21 | Cfg.domain_rand.restitution = 0.5
22 | Cfg.domain_rand.randomize_base_mass = True
23 | Cfg.domain_rand.added_mass_range = [-1., 3.]
24 | Cfg.domain_rand.randomize_com_displacement = True
25 | Cfg.domain_rand.com_displacement_range = [-0.1, 0.1]
26 | Cfg.domain_rand.randomize_motor_strength = True
27 | Cfg.domain_rand.motor_strength_range = [0.9, 1.1]
28 | Cfg.domain_rand.randomize_Kp_factor = False
29 | Cfg.domain_rand.Kp_factor_range = [0.8, 1.3]
30 | Cfg.domain_rand.randomize_Kd_factor = False
31 | Cfg.domain_rand.Kd_factor_range = [0.5, 1.5]
32 | Cfg.domain_rand.push_robots = False
33 | Cfg.domain_rand.push_interval_s = 15
34 | Cfg.domain_rand.max_push_vel_xy = 1.
35 |
36 |
37 | def rand_large():
38 | Cfg.domain_rand.randomize_friction = True
39 | Cfg.domain_rand.friction_range = [0.04, 6.0]
40 | Cfg.domain_rand.randomize_restitution = True
41 | Cfg.domain_rand.restitution_range = [0, 1.0]
42 | Cfg.domain_rand.restitution = 0.5
43 | Cfg.domain_rand.randomize_base_mass = True
44 | Cfg.domain_rand.added_mass_range = [-1.5, 4.]
45 | Cfg.domain_rand.randomize_com_displacement = True
46 | Cfg.domain_rand.com_displacement_range = [-0.13, 0.13]
47 | Cfg.domain_rand.randomize_motor_strength = True
48 | Cfg.domain_rand.motor_strength_range = [0.88, 1.12] # table 1 in RMA may have a typo
49 | Cfg.domain_rand.randomize_Kp_factor = False
50 | Cfg.domain_rand.Kp_factor_range = [0.8, 1.3]
51 | Cfg.domain_rand.randomize_Kd_factor = False
52 | Cfg.domain_rand.Kd_factor_range = [0.5, 1.5]
53 | Cfg.domain_rand.push_robots = False
54 | Cfg.domain_rand.push_interval_s = 15
55 | Cfg.domain_rand.max_push_vel_xy = 1.
56 |
57 |
58 | def static_low():
59 | Cfg.domain_rand.randomize_friction = True
60 | Cfg.domain_rand.friction_range = [0.05, 0.06]
61 | Cfg.domain_rand.randomize_restitution = True
62 | Cfg.domain_rand.restitution_range = [0, 0.01]
63 | Cfg.domain_rand.restitution = 0.5
64 | Cfg.domain_rand.randomize_base_mass = True
65 | Cfg.domain_rand.added_mass_range = [-1., -0.99]
66 | Cfg.domain_rand.randomize_com_displacement = True
67 | Cfg.domain_rand.com_displacement_range = [-0.1, -0.09]
68 | Cfg.domain_rand.randomize_motor_strength = True
69 | Cfg.domain_rand.motor_strength_range = [0.9, -0.99]
70 | Cfg.domain_rand.randomize_Kp_factor = False
71 | Cfg.domain_rand.Kp_factor_range = [0.8, 1.3]
72 | Cfg.domain_rand.randomize_Kd_factor = False
73 | Cfg.domain_rand.Kd_factor_range = [0.5, 1.5]
74 | Cfg.domain_rand.push_robots = False
75 | Cfg.domain_rand.push_interval_s = 15
76 | Cfg.domain_rand.max_push_vel_xy = 1.
77 |
78 |
79 | def static_medium():
80 | Cfg.domain_rand.randomize_friction = True
81 | Cfg.domain_rand.friction_range = [1.0, 1.01]
82 | Cfg.domain_rand.randomize_restitution = True
83 | Cfg.domain_rand.restitution_range = [0.5, 0.51]
84 | Cfg.domain_rand.restitution = 0.5
85 | Cfg.domain_rand.randomize_base_mass = True
86 | Cfg.domain_rand.added_mass_range = [0.0, 0.01]
87 | Cfg.domain_rand.randomize_com_displacement = True
88 | Cfg.domain_rand.com_displacement_range = [0.0, 0.01]
89 | Cfg.domain_rand.randomize_motor_strength = True
90 | Cfg.domain_rand.motor_strength_range = [1.0, 1.01]
91 | Cfg.domain_rand.randomize_Kp_factor = False
92 | Cfg.domain_rand.Kp_factor_range = [0.8, 1.3]
93 | Cfg.domain_rand.randomize_Kd_factor = False
94 | Cfg.domain_rand.Kd_factor_range = [0.5, 1.5]
95 | Cfg.domain_rand.push_robots = False
96 | Cfg.domain_rand.push_interval_s = 15
97 | Cfg.domain_rand.max_push_vel_xy = 1.
98 |
99 |
100 | def static_high():
101 | Cfg.domain_rand.randomize_friction = True
102 | Cfg.domain_rand.friction_range = [4.49, 4.5]
103 | Cfg.domain_rand.randomize_restitution = True
104 | Cfg.domain_rand.restitution_range = [0.99, 1.0]
105 | Cfg.domain_rand.restitution = 0.5
106 | Cfg.domain_rand.randomize_base_mass = True
107 | Cfg.domain_rand.added_mass_range = [2.99, 3.]
108 | Cfg.domain_rand.randomize_com_displacement = True
109 | Cfg.domain_rand.com_displacement_range = [0.09, 0.1]
110 | Cfg.domain_rand.randomize_motor_strength = True
111 | Cfg.domain_rand.motor_strength_range = [1.09, 1.1]
112 | Cfg.domain_rand.randomize_Kp_factor = False
113 | Cfg.domain_rand.Kp_factor_range = [0.8, 1.3]
114 | Cfg.domain_rand.randomize_Kd_factor = False
115 | Cfg.domain_rand.Kd_factor_range = [0.5, 1.5]
116 | Cfg.domain_rand.push_robots = False
117 | Cfg.domain_rand.push_interval_s = 15
118 | Cfg.domain_rand.max_push_vel_xy = 1.
119 |
120 | def only_base_mass():
121 | Cfg.domain_rand.randomize_friction = True
122 | Cfg.domain_rand.friction_range = [1.0, 1.01]
123 | Cfg.domain_rand.randomize_restitution = True
124 | Cfg.domain_rand.restitution_range = [0.5, 0.51]
125 | Cfg.domain_rand.restitution = 0.5
126 | Cfg.domain_rand.randomize_base_mass = True
127 | Cfg.domain_rand.added_mass_range = [-1, 3]
128 | Cfg.domain_rand.randomize_com_displacement = True
129 | Cfg.domain_rand.com_displacement_range = [0.0, 0.01]
130 | Cfg.domain_rand.randomize_motor_strength = True
131 | Cfg.domain_rand.motor_strength_range = [1.0, 1.01]
132 | Cfg.domain_rand.randomize_Kp_factor = False
133 | Cfg.domain_rand.Kp_factor_range = [0.8, 1.3]
134 | Cfg.domain_rand.randomize_Kd_factor = False
135 | Cfg.domain_rand.Kd_factor_range = [0.5, 1.5]
136 | Cfg.domain_rand.push_robots = False
137 | Cfg.domain_rand.push_interval_s = 15
138 | Cfg.domain_rand.max_push_vel_xy = 1.
139 |
140 |
141 | DR_SETTINGS = dict(
142 | rand_regular=rand_regular,
143 | rand_large=rand_large,
144 | static_low=static_low,
145 | static_medium=static_medium,
146 | static_high=static_high,
147 | only_base_mass=only_base_mass,
148 | )
149 |
--------------------------------------------------------------------------------
/dribblebot_learn/eval_metrics/metrics.py:
--------------------------------------------------------------------------------
1 | def to_numpy(fn):
2 | def thunk(*args, **kwargs):
3 | return fn(*args, **kwargs).cpu().numpy()
4 |
5 | return thunk
6 |
7 |
8 | def lin_vel_rmsd(env, actor_critic, obs):
9 | return ((env.base_lin_vel[:, 0] - env.commands[:, 0]) ** 2).cpu() ** 0.5
10 |
11 |
12 | def ang_vel_rmsd(env, actor_critic, obs):
13 | return ((env.base_ang_vel[:, 2] - env.commands[:, 2]) ** 2).cpu() ** 0.5
14 |
15 |
16 | def lin_vel_x(env, actor_critic, obs):
17 | return env.base_lin_vel[:, 0].cpu()
18 |
19 |
20 | def ang_vel_yaw(env, actor_critic, obs):
21 | return env.base_ang_vel[:, 2].cpu()
22 |
23 |
24 | def base_height(env, actor_critic, obs):
25 | import torch
26 | return torch.mean(env.root_states[:, 2].unsqueeze(1) - env.measured_heights, dim=1).cpu()
27 |
28 |
29 | def max_torques(env, actor_critic, obs):
30 | import torch
31 | max_torque, max_torque_indices = torch.max(torch.abs(env.torques), dim=1)
32 | return max_torque.cpu()
33 |
34 |
35 | def power_consumption(env, actor_critic, obs):
36 | import torch
37 | return torch.sum(torch.multiply(env.torques, env.dof_vel), dim=1).cpu()
38 |
39 |
40 | def CoT(env, actor_critic, obs):
41 | # P / (mgv)
42 | import torch
43 | P = power_consumption(env, actor_critic, obs)
44 | m = (env.default_body_mass + env.payloads).cpu()
45 | g = 9.8 # m/s^2
46 | v = torch.norm(env.base_lin_vel[:, 0:2], dim=1).cpu()
47 | return P / (m * g * v)
48 |
49 |
50 | def froude_number(env, actor_critic, obs):
51 | # v^2 / (gh)
52 | v = lin_vel_x(env, actor_critic, obs)
53 | g = 9.8
54 | h = 0.30
55 | return v ** 2 / (g * h)
56 |
57 |
58 | def adaptation_loss(env, actor_critic, obs):
59 | import torch
60 | if hasattr(actor_critic, "adaptation_module"):
61 | pred = actor_critic.adaptation_module(obs["obs_history"])
62 | target = actor_critic.env_factor_encoder(obs["privileged_obs"])
63 | return torch.mean((pred.cpu().detach() - target.cpu().detach()) ** 2, dim=1)
64 |
65 |
66 | def auxiliary_rewards(env, actor_critic, obs):
67 | rewards = {}
68 | for i in range(len(env.reward_functions)):
69 | name = env.reward_names[i]
70 | rew = env.reward_functions[i]() * env.reward_scales[name]
71 | rewards[name] = rew.cpu().detach()
72 | return rewards
73 |
74 |
75 | def termination(env, actor_critic, obs):
76 | return env.reset_buf.cpu().detach()
77 |
78 |
79 | def privileged_obs(env, actor_critic, obs):
80 | return obs["privileged_obs"].cpu().numpy()
81 |
82 |
83 | def latents(env, actor_critic, obs):
84 | return actor_critic.env_factor_encoder(obs["privileged_obs"]).cpu().numpy()
85 |
86 |
87 | METRICS_FNS = {name: fn for name, fn in locals().items() if name not in ['to_numpy'] and "__" not in name}
88 |
89 | if __name__ == '__main__':
90 | print(*METRICS_FNS.items(), sep="\n")
91 |
92 | import torch
93 |
94 | env = lambda: None
95 | env.base_lin_vel = torch.rand(10, 3)
96 | env.commands = torch.rand(10, 3)
97 |
98 | metric = lin_vel_rmsd(env, None, None)
99 | print(metric)
100 |
--------------------------------------------------------------------------------
/dribblebot_learn/ppo_cse/__init__.py:
--------------------------------------------------------------------------------
1 | import time
2 | from collections import deque
3 | import copy
4 | import os
5 |
6 | import torch
7 | # from ml_logger import logger
8 | import wandb
9 | from wandb_osh.hooks import TriggerWandbSyncHook
10 |
11 | from params_proto import PrefixProto
12 |
13 | from .actor_critic import ActorCritic
14 | from .rollout_storage import RolloutStorage
15 |
16 |
17 | def class_to_dict(obj) -> dict:
18 | if not hasattr(obj, "__dict__"):
19 | return obj
20 | result = {}
21 | for key in dir(obj):
22 | if key.startswith("_") or key == "terrain":
23 | continue
24 | element = []
25 | val = getattr(obj, key)
26 | if isinstance(val, list):
27 | for item in val:
28 | element.append(class_to_dict(item))
29 | else:
30 | element = class_to_dict(val)
31 | result[key] = element
32 | return result
33 |
34 |
35 | class DataCaches:
36 | def __init__(self, curriculum_bins):
37 | from dribblebot_learn.ppo_cse.metrics_caches import SlotCache, DistCache
38 |
39 | self.slot_cache = SlotCache(curriculum_bins)
40 | self.dist_cache = DistCache()
41 |
42 |
43 | caches = DataCaches(1)
44 |
45 |
46 | class RunnerArgs(PrefixProto, cli=False):
47 | # runner
48 | algorithm_class_name = 'RMA'
49 | num_steps_per_env = 24 # per iteration
50 | max_iterations = 1500 # number of policy updates
51 |
52 | # logging
53 | save_interval = 400 # check for potential saves every this many iterations
54 | save_video_interval = 100
55 | log_freq = 10
56 |
57 | # load and resume
58 | resume = False
59 | load_run = -1 # -1 = last run
60 | checkpoint = -1 # -1 = last saved model
61 | resume_path = None # updated from load_run and chkpt
62 | resume_curriculum = True
63 | resume_checkpoint = 'ac_weights_last.pt'
64 |
65 |
66 | class Runner:
67 |
68 | def __init__(self, env, device='cpu'):
69 | from .ppo import PPO
70 |
71 | self.device = device
72 | self.env = env
73 |
74 | actor_critic = ActorCritic(self.env.num_obs,
75 | self.env.num_privileged_obs,
76 | self.env.num_obs_history,
77 | self.env.num_actions,
78 | ).to(self.device)
79 | # Load weights from checkpoint
80 | if RunnerArgs.resume:
81 | body = wandb.restore(RunnerArgs.resume_checkpoint, run_path=RunnerArgs.resume_path)
82 | actor_critic.load_state_dict(torch.load(body.name))
83 | print(f"Successfully loaded weights from checkpoint ({RunnerArgs.resume_checkpoint}) and run path ({RunnerArgs.resume_path})")
84 |
85 | self.alg = PPO(actor_critic, device=self.device)
86 | self.num_steps_per_env = RunnerArgs.num_steps_per_env
87 |
88 | # init storage and model
89 | self.alg.init_storage(self.env.num_train_envs, self.num_steps_per_env, [self.env.num_obs],
90 | [self.env.num_privileged_obs], [self.env.num_obs_history], [self.env.num_actions])
91 |
92 | self.tot_timesteps = 0
93 | self.tot_time = 0
94 | self.current_learning_iteration = 0
95 | self.last_recording_it = -RunnerArgs.save_video_interval
96 |
97 | self.env.reset()
98 |
99 | def learn(self, num_learning_iterations, init_at_random_ep_len=False, eval_freq=100, curriculum_dump_freq=500, eval_expert=False):
100 | trigger_sync = TriggerWandbSyncHook()
101 | wandb.watch(self.alg.actor_critic, log="all", log_freq=RunnerArgs.log_freq)
102 |
103 | if init_at_random_ep_len:
104 | self.env.episode_length_buf[:] = torch.randint_like(self.env.episode_length_buf,
105 | high=int(self.env.max_episode_length))
106 |
107 | # split train and test envs
108 | num_train_envs = self.env.num_train_envs
109 |
110 | obs_dict = self.env.get_observations() # TODO: check, is this correct on the first step?
111 | obs, privileged_obs, obs_history = obs_dict["obs"], obs_dict["privileged_obs"], obs_dict["obs_history"]
112 | obs, privileged_obs, obs_history = obs.to(self.device), privileged_obs.to(self.device), obs_history.to(
113 | self.device)
114 | self.alg.actor_critic.train() # switch to train mode (for dropout for example)
115 |
116 | rewbuffer = deque(maxlen=100)
117 | lenbuffer = deque(maxlen=100)
118 | cur_reward_sum = torch.zeros(self.env.num_envs, dtype=torch.float, device=self.device)
119 | cur_episode_length = torch.zeros(self.env.num_envs, dtype=torch.float, device=self.device)
120 |
121 | tot_iter = self.current_learning_iteration + num_learning_iterations
122 | for it in range(self.current_learning_iteration, tot_iter):
123 | start = time.time()
124 | # Rollout
125 | with torch.inference_mode():
126 | for i in range(self.num_steps_per_env):
127 | actions = self.alg.act(obs[:num_train_envs], privileged_obs[:num_train_envs],
128 | obs_history[:num_train_envs])
129 |
130 | ret = self.env.step(actions)
131 | obs_dict, rewards, dones, infos = ret
132 | obs, privileged_obs, obs_history = obs_dict["obs"], obs_dict["privileged_obs"], obs_dict[
133 | "obs_history"]
134 |
135 | obs, privileged_obs, obs_history, rewards, dones = obs.to(self.device), privileged_obs.to(
136 | self.device), obs_history.to(self.device), rewards.to(self.device), dones.to(self.device)
137 | self.alg.process_env_step(rewards[:num_train_envs], dones[:num_train_envs], infos)
138 |
139 | if 'train/episode' in infos:
140 | wandb.log(infos['train/episode'], step=it)
141 |
142 | if 'curriculum' in infos:
143 |
144 | cur_reward_sum += rewards
145 | cur_episode_length += 1
146 |
147 | new_ids = (dones > 0).nonzero(as_tuple=False)
148 |
149 | new_ids_train = new_ids[new_ids < num_train_envs]
150 | rewbuffer.extend(cur_reward_sum[new_ids_train].cpu().numpy().tolist())
151 | lenbuffer.extend(cur_episode_length[new_ids_train].cpu().numpy().tolist())
152 | cur_reward_sum[new_ids_train] = 0
153 | cur_episode_length[new_ids_train] = 0
154 |
155 | if 'curriculum/distribution' in infos:
156 | distribution = infos['curriculum/distribution']
157 |
158 | self.alg.compute_returns(obs_history[:num_train_envs], privileged_obs[:num_train_envs])
159 |
160 | mean_value_loss, mean_surrogate_loss, mean_adaptation_module_loss, mean_decoder_loss, mean_decoder_loss_student, mean_adaptation_module_test_loss, mean_decoder_test_loss, mean_decoder_test_loss_student, mean_adaptation_losses_dict = self.alg.update()
161 | stop = time.time()
162 | learn_time = stop - start
163 |
164 | wandb.log({
165 | "time_iter": learn_time,
166 | # "time_iter": logger.split('epoch'),
167 | "adaptation_loss": mean_adaptation_module_loss,
168 | "mean_value_loss": mean_value_loss,
169 | "mean_surrogate_loss": mean_surrogate_loss,
170 | "mean_decoder_loss": mean_decoder_loss,
171 | "mean_decoder_loss_student": mean_decoder_loss_student,
172 | "mean_decoder_test_loss": mean_decoder_test_loss,
173 | "mean_decoder_test_loss_student": mean_decoder_test_loss_student,
174 | "mean_adaptation_module_test_loss": mean_adaptation_module_test_loss
175 | }, step=it)
176 |
177 |
178 |
179 | # logger.store_metrics(**mean_adaptation_losses_dict)
180 | wandb.log(mean_adaptation_losses_dict, step=it)
181 |
182 | if RunnerArgs.save_video_interval:
183 | self.log_video(it)
184 |
185 | self.tot_timesteps += self.num_steps_per_env * self.env.num_envs
186 |
187 | wandb.log({"timesteps": self.tot_timesteps, "iterations": it}, step=it)
188 | trigger_sync()
189 |
190 | if it % RunnerArgs.save_interval == 0:
191 | print(f"Saving model at iteration {it}")
192 |
193 | path = './tmp/legged_data'
194 | os.makedirs(path, exist_ok=True)
195 |
196 | ac_weight_path = f'{path}/ac_weights_{it}.pt'
197 | torch.save(self.alg.actor_critic.state_dict(), ac_weight_path)
198 | wandb.save(ac_weight_path)
199 |
200 | ac_weight_path = f'{path}/ac_weights_latest.pt'
201 | torch.save(self.alg.actor_critic.state_dict(), ac_weight_path)
202 | wandb.save(ac_weight_path)
203 |
204 | adaptation_module_path = f'{path}/adaptation_module_{it}.jit'
205 | adaptation_module = copy.deepcopy(self.alg.actor_critic.adaptation_module).to('cpu')
206 | traced_script_adaptation_module = torch.jit.script(adaptation_module)
207 | traced_script_adaptation_module.save(adaptation_module_path)
208 |
209 | adaptation_module_path = f'{path}/adaptation_module_latest.jit'
210 | traced_script_adaptation_module.save(adaptation_module_path)
211 |
212 | body_path = f'{path}/body_{it}.jit'
213 | body_model = copy.deepcopy(self.alg.actor_critic.actor_body).to('cpu')
214 | traced_script_body_module = torch.jit.script(body_model)
215 | traced_script_body_module.save(body_path)
216 |
217 | body_path = f'{path}/body_latest.jit'
218 | traced_script_body_module.save(body_path)
219 |
220 | # logger.upload_file(file_path=adaptation_module_path, target_path=f"checkpoints/", once=False)
221 | # logger.upload_file(file_path=body_path, target_path=f"checkpoints/", once=False)
222 |
223 | ac_weights_path = f"{path}/ac_weights_{it}.pt"
224 | torch.save(self.alg.actor_critic.state_dict(), ac_weights_path)
225 | ac_weights_path = f"{path}/ac_weights_latest.pt"
226 | torch.save(self.alg.actor_critic.state_dict(), ac_weights_path)
227 |
228 | wandb.save(f"./tmp/legged_data/adaptation_module_{it}.jit")
229 | wandb.save(f"./tmp/legged_data/body_{it}.jit")
230 | wandb.save(f"./tmp/legged_data/ac_weights_{it}.pt")
231 | wandb.save(f"./tmp/legged_data/adaptation_module_latest.jit")
232 | wandb.save(f"./tmp/legged_data/body_latest.jit")
233 | wandb.save(f"./tmp/legged_data/ac_weights_latest.pt")
234 |
235 |
236 | self.current_learning_iteration += num_learning_iterations
237 |
238 | path = './tmp/legged_data'
239 |
240 | os.makedirs(path, exist_ok=True)
241 |
242 | def log_video(self, it):
243 | if it - self.last_recording_it >= RunnerArgs.save_video_interval:
244 | self.env.start_recording()
245 | print("START RECORDING")
246 | self.last_recording_it = it
247 |
248 | frames = self.env.get_complete_frames()
249 | if len(frames) > 0:
250 | self.env.pause_recording()
251 | print("LOGGING VIDEO")
252 | import numpy as np
253 | video_array = np.concatenate([np.expand_dims(frame, axis=0) for frame in frames ], axis=0).swapaxes(1, 3).swapaxes(2, 3)
254 | print(video_array.shape)
255 | # logger.save_video(frames, f"videos/{it:05d}.mp4", fps=1 / self.env.dt)
256 | wandb.log({"video": wandb.Video(video_array, fps=1 / self.env.dt)}, step=it)
257 |
258 | def get_inference_policy(self, device=None):
259 | self.alg.actor_critic.eval() # switch to evaluation mode (dropout for example)
260 | if device is not None:
261 | self.alg.actor_critic.to(device)
262 | return self.alg.actor_critic.act_inference
263 |
264 | def get_expert_policy(self, device=None):
265 | self.alg.actor_critic.eval() # switch to evaluation mode (dropout for example)
266 | if device is not None:
267 | self.alg.actor_critic.to(device)
268 | return self.alg.actor_critic.act_expert
269 |
--------------------------------------------------------------------------------
/dribblebot_learn/ppo_cse/actor_critic.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from params_proto import PrefixProto
4 | from torch.distributions import Normal
5 |
6 |
7 | class AC_Args(PrefixProto, cli=False):
8 | # policy
9 | init_noise_std = 1.0
10 | actor_hidden_dims = [512, 256, 128]
11 | critic_hidden_dims = [512, 256, 128]
12 | activation = 'elu' # can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
13 |
14 | adaptation_module_branch_hidden_dims = [256, 128]
15 |
16 | adaptation_labels = []
17 | adaptation_dims = []
18 | adaptation_weights = []
19 |
20 | use_decoder = False
21 |
22 |
23 | class ActorCritic(nn.Module):
24 | is_recurrent = False
25 |
26 | def __init__(self, num_obs,
27 | num_privileged_obs,
28 | num_obs_history,
29 | num_actions,
30 | **kwargs):
31 | if kwargs:
32 | print("ActorCritic.__init__ got unexpected arguments, which will be ignored: " + str(
33 | [key for key in kwargs.keys()]))
34 | self.decoder = AC_Args.use_decoder
35 | super().__init__()
36 |
37 | self.adaptation_labels = AC_Args.adaptation_labels
38 | self.adaptation_dims = AC_Args.adaptation_dims
39 | self.adaptation_weights = AC_Args.adaptation_weights
40 |
41 | if len(self.adaptation_weights) < len(self.adaptation_labels):
42 | # pad
43 | self.adaptation_weights += [1.0] * (len(self.adaptation_labels) - len(self.adaptation_weights))
44 |
45 | self.num_obs_history = num_obs_history
46 | self.num_privileged_obs = num_privileged_obs
47 |
48 | activation = get_activation(AC_Args.activation)
49 |
50 | # Adaptation module
51 | adaptation_module_layers = []
52 | adaptation_module_layers.append(nn.Linear(self.num_obs_history, AC_Args.adaptation_module_branch_hidden_dims[0]))
53 | adaptation_module_layers.append(activation)
54 | for l in range(len(AC_Args.adaptation_module_branch_hidden_dims)):
55 | if l == len(AC_Args.adaptation_module_branch_hidden_dims) - 1:
56 | adaptation_module_layers.append(
57 | nn.Linear(AC_Args.adaptation_module_branch_hidden_dims[l], self.num_privileged_obs))
58 | else:
59 | adaptation_module_layers.append(
60 | nn.Linear(AC_Args.adaptation_module_branch_hidden_dims[l],
61 | AC_Args.adaptation_module_branch_hidden_dims[l + 1]))
62 | adaptation_module_layers.append(activation)
63 | self.adaptation_module = nn.Sequential(*adaptation_module_layers)
64 |
65 |
66 |
67 | # Policy
68 | actor_layers = []
69 | actor_layers.append(nn.Linear(self.num_privileged_obs + self.num_obs_history, AC_Args.actor_hidden_dims[0]))
70 | actor_layers.append(activation)
71 | for l in range(len(AC_Args.actor_hidden_dims)):
72 | if l == len(AC_Args.actor_hidden_dims) - 1:
73 | actor_layers.append(nn.Linear(AC_Args.actor_hidden_dims[l], num_actions))
74 | else:
75 | actor_layers.append(nn.Linear(AC_Args.actor_hidden_dims[l], AC_Args.actor_hidden_dims[l + 1]))
76 | actor_layers.append(activation)
77 | self.actor_body = nn.Sequential(*actor_layers)
78 |
79 | # Value function
80 | critic_layers = []
81 | critic_layers.append(nn.Linear(self.num_privileged_obs + self.num_obs_history, AC_Args.critic_hidden_dims[0]))
82 | critic_layers.append(activation)
83 | for l in range(len(AC_Args.critic_hidden_dims)):
84 | if l == len(AC_Args.critic_hidden_dims) - 1:
85 | critic_layers.append(nn.Linear(AC_Args.critic_hidden_dims[l], 1))
86 | else:
87 | critic_layers.append(nn.Linear(AC_Args.critic_hidden_dims[l], AC_Args.critic_hidden_dims[l + 1]))
88 | critic_layers.append(activation)
89 | self.critic_body = nn.Sequential(*critic_layers)
90 |
91 | print(f"Adaptation Module: {self.adaptation_module}")
92 | print(f"Actor MLP: {self.actor_body}")
93 | print(f"Critic MLP: {self.critic_body}")
94 |
95 | # Action noise
96 | self.std = nn.Parameter(AC_Args.init_noise_std * torch.ones(num_actions))
97 | self.distribution = None
98 | # disable args validation for speedup
99 | Normal.set_default_validate_args = False
100 |
101 | @staticmethod
102 | # not used at the moment
103 | def init_weights(sequential, scales):
104 | [torch.nn.init.orthogonal_(module.weight, gain=scales[idx]) for idx, module in
105 | enumerate(mod for mod in sequential if isinstance(mod, nn.Linear))]
106 |
107 | def reset(self, dones=None):
108 | pass
109 |
110 | def forward(self):
111 | raise NotImplementedError
112 |
113 | @property
114 | def action_mean(self):
115 | return self.distribution.mean
116 |
117 | @property
118 | def action_std(self):
119 | return self.distribution.stddev
120 |
121 | @property
122 | def entropy(self):
123 | return self.distribution.entropy().sum(dim=-1)
124 |
125 | def update_distribution(self, observation_history):
126 | latent = self.adaptation_module(observation_history)
127 | mean = self.actor_body(torch.cat((observation_history, latent), dim=-1))
128 | self.distribution = Normal(mean, mean * 0. + self.std)
129 |
130 | def act(self, observation_history, **kwargs):
131 | self.update_distribution(observation_history)
132 | return self.distribution.sample()
133 |
134 | def get_actions_log_prob(self, actions):
135 | return self.distribution.log_prob(actions).sum(dim=-1)
136 |
137 | def act_expert(self, ob, policy_info={}):
138 | return self.act_teacher(ob["obs_history"], ob["privileged_obs"])
139 |
140 | def act_inference(self, ob, policy_info={}):
141 | return self.act_student(ob["obs_history"], policy_info=policy_info)
142 |
143 | def act_student(self, observation_history, policy_info={}):
144 | latent = self.adaptation_module(observation_history)
145 | actions_mean = self.actor_body(torch.cat((observation_history, latent), dim=-1))
146 | policy_info["latents"] = latent.detach().cpu().numpy()
147 | return actions_mean
148 |
149 | def act_teacher(self, observation_history, privileged_info, policy_info={}):
150 | actions_mean = self.actor_body(torch.cat((observation_history, privileged_info), dim=-1))
151 | policy_info["latents"] = privileged_info
152 | return actions_mean
153 |
154 | def evaluate(self, observation_history, privileged_observations, **kwargs):
155 | value = self.critic_body(torch.cat((observation_history, privileged_observations), dim=-1))
156 | return value
157 |
158 | def get_student_latent(self, observation_history):
159 | return self.adaptation_module(observation_history)
160 |
161 | def get_activation(act_name):
162 | if act_name == "elu":
163 | return nn.ELU()
164 | elif act_name == "selu":
165 | return nn.SELU()
166 | elif act_name == "relu":
167 | return nn.ReLU()
168 | elif act_name == "crelu":
169 | return nn.ReLU()
170 | elif act_name == "lrelu":
171 | return nn.LeakyReLU()
172 | elif act_name == "tanh":
173 | return nn.Tanh()
174 | elif act_name == "sigmoid":
175 | return nn.Sigmoid()
176 | else:
177 | print("invalid activation function!")
178 | return None
179 |
--------------------------------------------------------------------------------
/dribblebot_learn/ppo_cse/metrics_caches.py:
--------------------------------------------------------------------------------
1 | from collections import defaultdict
2 |
3 | from ml_logger import logger
4 | import numpy as np
5 | import torch
6 |
7 |
8 | class DistCache:
9 | def __init__(self):
10 | """
11 | Args:
12 | n: Number of slots for the cache
13 | """
14 | self.cache = defaultdict(lambda: 0)
15 |
16 | def log(self, **key_vals):
17 | """
18 | Args:
19 | slots: ids for the array
20 | **key_vals:
21 | """
22 | for k, v in key_vals.items():
23 | count = self.cache[k + '@counts'] + 1
24 | self.cache[k + '@counts'] = count
25 | self.cache[k] = v + (count - 1) * self.cache[k]
26 | self.cache[k] /= count
27 |
28 | def get_summary(self):
29 | ret = {
30 | k: v
31 | for k, v in self.cache.items()
32 | if not k.endswith("@counts")
33 | }
34 | self.cache.clear()
35 | return ret
36 |
37 |
38 | if __name__ == '__main__':
39 | cl = DistCache()
40 | lin_vel = np.ones((11, 11))
41 | ang_vel = np.zeros((5, 5))
42 | cl.log(lin_vel=lin_vel, ang_vel=ang_vel)
43 | lin_vel = np.zeros((11, 11))
44 | ang_vel = np.zeros((5, 5))
45 | cl.log(lin_vel=lin_vel, ang_vel=ang_vel)
46 | print(cl.get_summary())
47 |
48 |
49 | class SlotCache:
50 | def __init__(self, n):
51 | """
52 | Args:
53 | n: Number of slots for the cache
54 | """
55 | self.n = n
56 | self.cache = defaultdict(lambda: np.zeros([n]))
57 |
58 | def log(self, slots=None, **key_vals):
59 | """
60 | Args:
61 | slots: ids for the array
62 | **key_vals:
63 | """
64 | if slots is None:
65 | slots = range(self.n)
66 |
67 | for k, v in key_vals.items():
68 | counts = self.cache[k + '@counts'][slots] + 1
69 | self.cache[k + '@counts'][slots] = counts
70 | self.cache[k][slots] = v + (counts - 1) * self.cache[k][slots]
71 | self.cache[k][slots] /= counts
72 |
73 | def get_summary(self):
74 | ret = {
75 | k: v
76 | for k, v in self.cache.items()
77 | if not k.endswith("@counts")
78 | }
79 | self.cache.clear()
80 | return ret
81 |
82 |
83 | if __name__ == '__main__':
84 | cl = SlotCache(100)
85 | reset_env_ids = [2, 5, 6]
86 | lin_vel = [0.1, 0.5, 0.8]
87 | ang_vel = [0.4, -0.4, 0.2]
88 | cl.log(reset_env_ids, lin_vel=lin_vel, ang_vel=ang_vel)
89 |
90 | cl.log(lin_vel=np.ones(100))
91 |
--------------------------------------------------------------------------------
/dribblebot_learn/ppo_cse/ppo.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.optim as optim
4 | import torch.nn.functional as F
5 | import numpy as np
6 | from params_proto import PrefixProto
7 |
8 | from dribblebot_learn.ppo_cse import ActorCritic
9 | from dribblebot_learn.ppo_cse import RolloutStorage
10 | from dribblebot_learn.ppo_cse import caches
11 |
12 |
13 | class PPO_Args(PrefixProto):
14 | # algorithm
15 | value_loss_coef = 1.0
16 | use_clipped_value_loss = True
17 | clip_param = 0.2
18 | entropy_coef = 0.01
19 | num_learning_epochs = 5
20 | num_mini_batches = 4 # mini batch size = num_envs*nsteps / nminibatches
21 | learning_rate = 1.e-3 # 5.e-4
22 | adaptation_module_learning_rate = 1.e-3
23 | num_adaptation_module_substeps = 1
24 | schedule = 'adaptive' # could be adaptive, fixed
25 | gamma = 0.99
26 | lam = 0.95
27 | desired_kl = 0.01
28 | max_grad_norm = 1.
29 |
30 | selective_adaptation_module_loss = False
31 |
32 |
33 | class PPO:
34 | actor_critic: ActorCritic
35 |
36 | def __init__(self, actor_critic, device='cpu'):
37 |
38 | self.device = device
39 |
40 | # PPO components
41 | self.actor_critic = actor_critic
42 | self.actor_critic.to(device)
43 |
44 | PPO_Args.adaptation_labels = self.actor_critic.adaptation_labels
45 | PPO_Args.adaptation_dims = self.actor_critic.adaptation_dims
46 | PPO_Args.adaptation_weights = self.actor_critic.adaptation_weights
47 |
48 | self.storage = None # initialized later
49 | self.optimizer = optim.Adam(self.actor_critic.parameters(), lr=PPO_Args.learning_rate)
50 | self.adaptation_module_optimizer = optim.Adam(self.actor_critic.parameters(),
51 | lr=PPO_Args.adaptation_module_learning_rate)
52 | if self.actor_critic.decoder:
53 | self.decoder_optimizer = optim.Adam(self.actor_critic.parameters(),
54 | lr=PPO_Args.adaptation_module_learning_rate)
55 | self.transition = RolloutStorage.Transition()
56 |
57 | self.learning_rate = PPO_Args.learning_rate
58 |
59 | def init_storage(self, num_envs, num_transitions_per_env, actor_obs_shape, privileged_obs_shape, obs_history_shape,
60 | action_shape):
61 | self.storage = RolloutStorage(num_envs, num_transitions_per_env, actor_obs_shape, privileged_obs_shape,
62 | obs_history_shape, action_shape, self.device)
63 |
64 | def test_mode(self):
65 | self.actor_critic.test()
66 |
67 | def train_mode(self):
68 | self.actor_critic.train()
69 |
70 | def act(self, obs, privileged_obs, obs_history):
71 | # Compute the actions and values
72 | self.transition.actions = self.actor_critic.act(obs_history).detach()
73 | self.transition.values = self.actor_critic.evaluate(obs_history, privileged_obs).detach()
74 | self.transition.actions_log_prob = self.actor_critic.get_actions_log_prob(self.transition.actions).detach()
75 | self.transition.action_mean = self.actor_critic.action_mean.detach()
76 | self.transition.action_sigma = self.actor_critic.action_std.detach()
77 | # need to record obs and critic_obs before env.step()
78 | self.transition.observations = obs
79 | self.transition.critic_observations = obs
80 | self.transition.privileged_observations = privileged_obs
81 | self.transition.observation_histories = obs_history
82 | return self.transition.actions
83 |
84 | def process_env_step(self, rewards, dones, infos):
85 | self.transition.rewards = rewards.clone()
86 | self.transition.dones = dones
87 | self.transition.env_bins = infos["env_bins"]
88 | # Bootstrapping on time outs
89 | if 'time_outs' in infos:
90 | self.transition.rewards += PPO_Args.gamma * torch.squeeze(
91 | self.transition.values * infos['time_outs'].unsqueeze(1).to(self.device), 1)
92 |
93 | # Record the transition
94 | self.storage.add_transitions(self.transition)
95 | self.transition.clear()
96 | self.actor_critic.reset(dones)
97 |
98 | def compute_returns(self, last_critic_obs, last_critic_privileged_obs):
99 | last_values = self.actor_critic.evaluate(last_critic_obs, last_critic_privileged_obs).detach()
100 | self.storage.compute_returns(last_values, PPO_Args.gamma, PPO_Args.lam)
101 |
102 | def update(self):
103 | mean_value_loss = 0
104 | mean_surrogate_loss = 0
105 | mean_adaptation_module_loss = 0
106 | mean_decoder_loss = 0
107 | mean_decoder_loss_student = 0
108 | mean_adaptation_module_test_loss = 0
109 | mean_decoder_test_loss = 0
110 | mean_decoder_test_loss_student = 0
111 |
112 | mean_adaptation_losses = {}
113 | label_start_end = {}
114 | si = 0
115 | for idx, (label, length) in enumerate(zip(PPO_Args.adaptation_labels, PPO_Args.adaptation_dims)):
116 | label_start_end[label] = (si, si + length)
117 | si = si + length
118 | mean_adaptation_losses[label] = 0
119 |
120 | generator = self.storage.mini_batch_generator(PPO_Args.num_mini_batches, PPO_Args.num_learning_epochs)
121 | for obs_batch, critic_obs_batch, privileged_obs_batch, obs_history_batch, actions_batch, target_values_batch, advantages_batch, returns_batch, old_actions_log_prob_batch, \
122 | old_mu_batch, old_sigma_batch, masks_batch, env_bins_batch in generator:
123 |
124 | self.actor_critic.act(obs_history_batch, masks=masks_batch)
125 | actions_log_prob_batch = self.actor_critic.get_actions_log_prob(actions_batch)
126 | value_batch = self.actor_critic.evaluate(obs_history_batch, privileged_obs_batch, masks=masks_batch)
127 | mu_batch = self.actor_critic.action_mean
128 | sigma_batch = self.actor_critic.action_std
129 | entropy_batch = self.actor_critic.entropy
130 |
131 | # KL
132 | if PPO_Args.desired_kl != None and PPO_Args.schedule == 'adaptive':
133 | with torch.inference_mode():
134 | kl = torch.sum(
135 | torch.log(sigma_batch / old_sigma_batch + 1.e-5) + (
136 | torch.square(old_sigma_batch) + torch.square(old_mu_batch - mu_batch)) / (
137 | 2.0 * torch.square(sigma_batch)) - 0.5, axis=-1)
138 | kl_mean = torch.mean(kl)
139 |
140 | if kl_mean > PPO_Args.desired_kl * 2.0:
141 | self.learning_rate = max(1e-5, self.learning_rate / 1.5)
142 | elif kl_mean < PPO_Args.desired_kl / 2.0 and kl_mean > 0.0:
143 | self.learning_rate = min(1e-2, self.learning_rate * 1.5)
144 |
145 | for param_group in self.optimizer.param_groups:
146 | param_group['lr'] = self.learning_rate
147 |
148 | # Surrogate loss
149 | ratio = torch.exp(actions_log_prob_batch - torch.squeeze(old_actions_log_prob_batch))
150 | surrogate = -torch.squeeze(advantages_batch) * ratio
151 | surrogate_clipped = -torch.squeeze(advantages_batch) * torch.clamp(ratio, 1.0 - PPO_Args.clip_param,
152 | 1.0 + PPO_Args.clip_param)
153 | surrogate_loss = torch.max(surrogate, surrogate_clipped).mean()
154 |
155 | # Value function loss
156 | if PPO_Args.use_clipped_value_loss:
157 | value_clipped = target_values_batch + \
158 | (value_batch - target_values_batch).clamp(-PPO_Args.clip_param,
159 | PPO_Args.clip_param)
160 | value_losses = (value_batch - returns_batch).pow(2)
161 | value_losses_clipped = (value_clipped - returns_batch).pow(2)
162 | value_loss = torch.max(value_losses, value_losses_clipped).mean()
163 | else:
164 | value_loss = (returns_batch - value_batch).pow(2).mean()
165 |
166 | loss = surrogate_loss + PPO_Args.value_loss_coef * value_loss - PPO_Args.entropy_coef * entropy_batch.mean()
167 |
168 | # Gradient step
169 | self.optimizer.zero_grad()
170 | loss.backward()
171 | nn.utils.clip_grad_norm_(self.actor_critic.parameters(), PPO_Args.max_grad_norm)
172 | self.optimizer.step()
173 |
174 | mean_value_loss += value_loss.item()
175 | mean_surrogate_loss += surrogate_loss.item()
176 |
177 | data_size = privileged_obs_batch.shape[0]
178 | num_train = int(data_size // 5 * 4)
179 |
180 | # Adaptation module gradient step, only update concurrent state estimation module, not policy network
181 | if len(PPO_Args.adaptation_labels) > 0:
182 |
183 | for epoch in range(PPO_Args.num_adaptation_module_substeps):
184 |
185 | adaptation_pred = self.actor_critic.get_student_latent(obs_history_batch)
186 | with torch.no_grad():
187 | adaptation_target = privileged_obs_batch
188 | adaptation_loss = 0
189 | for idx, (label, length, weight) in enumerate(zip(PPO_Args.adaptation_labels, PPO_Args.adaptation_dims, PPO_Args.adaptation_weights)):
190 |
191 | start, end = label_start_end[label]
192 | selection_indices = torch.linspace(start, end - 1, steps=end - start, dtype=torch.long)
193 |
194 | idx_adaptation_loss = F.mse_loss(adaptation_pred[:, selection_indices] * weight,
195 | adaptation_target[:, selection_indices] * weight)
196 | mean_adaptation_losses[label] += idx_adaptation_loss.item()
197 |
198 | adaptation_loss += idx_adaptation_loss
199 |
200 | self.adaptation_module_optimizer.zero_grad()
201 | adaptation_loss.backward()
202 | self.adaptation_module_optimizer.step()
203 |
204 | mean_adaptation_module_loss += adaptation_loss.item()
205 | mean_adaptation_module_test_loss += 0 # adaptation_test_loss.item()
206 |
207 | num_updates = PPO_Args.num_learning_epochs * PPO_Args.num_mini_batches
208 | mean_value_loss /= num_updates
209 | mean_surrogate_loss /= num_updates
210 | mean_adaptation_module_loss /= (num_updates * PPO_Args.num_adaptation_module_substeps)
211 | mean_decoder_loss /= (num_updates * PPO_Args.num_adaptation_module_substeps)
212 | mean_decoder_loss_student /= (num_updates * PPO_Args.num_adaptation_module_substeps)
213 | mean_adaptation_module_test_loss /= (num_updates * PPO_Args.num_adaptation_module_substeps)
214 | mean_decoder_test_loss /= (num_updates * PPO_Args.num_adaptation_module_substeps)
215 | mean_decoder_test_loss_student /= (num_updates * PPO_Args.num_adaptation_module_substeps)
216 | for label in PPO_Args.adaptation_labels:
217 | mean_adaptation_losses[label] /= (num_updates * PPO_Args.num_adaptation_module_substeps)
218 | self.storage.clear()
219 |
220 | return mean_value_loss, mean_surrogate_loss, mean_adaptation_module_loss, mean_decoder_loss, mean_decoder_loss_student, mean_adaptation_module_test_loss, mean_decoder_test_loss, mean_decoder_test_loss_student, mean_adaptation_losses
221 |
--------------------------------------------------------------------------------
/dribblebot_learn/ppo_cse/rollout_storage.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | import torch
4 |
5 | from dribblebot_learn.utils import split_and_pad_trajectories
6 |
7 | class RolloutStorage:
8 | class Transition:
9 | def __init__(self):
10 | self.observations = None
11 | self.privileged_observations = None
12 | self.observation_histories = None
13 | self.critic_observations = None
14 | self.actions = None
15 | self.rewards = None
16 | self.dones = None
17 | self.values = None
18 | self.actions_log_prob = None
19 | self.action_mean = None
20 | self.action_sigma = None
21 | self.env_bins = None
22 |
23 | def clear(self):
24 | self.__init__()
25 |
26 | def __init__(self, num_envs, num_transitions_per_env, obs_shape, privileged_obs_shape, obs_history_shape, actions_shape, device='cpu'):
27 |
28 | self.device = device
29 |
30 | self.obs_shape = obs_shape
31 | self.privileged_obs_shape = privileged_obs_shape
32 | self.obs_history_shape = obs_history_shape
33 | self.actions_shape = actions_shape
34 |
35 | # Core
36 | self.observations = torch.zeros(num_transitions_per_env, num_envs, *obs_shape, device=self.device)
37 | self.privileged_observations = torch.zeros(num_transitions_per_env, num_envs, *privileged_obs_shape, device=self.device)
38 | self.observation_histories = torch.zeros(num_transitions_per_env, num_envs, *obs_history_shape, device=self.device)
39 | self.rewards = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
40 | self.actions = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device)
41 | self.dones = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device).byte()
42 |
43 | # For PPO
44 | self.actions_log_prob = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
45 | self.values = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
46 | self.returns = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
47 | self.advantages = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
48 | self.mu = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device)
49 | self.sigma = torch.zeros(num_transitions_per_env, num_envs, *actions_shape, device=self.device)
50 | self.env_bins = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
51 |
52 | self.num_transitions_per_env = num_transitions_per_env
53 | self.num_envs = num_envs
54 |
55 | self.step = 0
56 |
57 | def add_transitions(self, transition: Transition):
58 | if self.step >= self.num_transitions_per_env:
59 | raise AssertionError("Rollout buffer overflow")
60 | self.observations[self.step].copy_(transition.observations)
61 | self.privileged_observations[self.step].copy_(transition.privileged_observations)
62 | self.observation_histories[self.step].copy_(transition.observation_histories)
63 | self.actions[self.step].copy_(transition.actions)
64 | self.rewards[self.step].copy_(transition.rewards.view(-1, 1))
65 | self.dones[self.step].copy_(transition.dones.view(-1, 1))
66 | self.values[self.step].copy_(transition.values)
67 | self.actions_log_prob[self.step].copy_(transition.actions_log_prob.view(-1, 1))
68 | self.mu[self.step].copy_(transition.action_mean)
69 | self.sigma[self.step].copy_(transition.action_sigma)
70 | self.env_bins[self.step].copy_(transition.env_bins.view(-1, 1))
71 | self.step += 1
72 |
73 | def clear(self):
74 | self.step = 0
75 |
76 | def compute_returns(self, last_values, gamma, lam):
77 | advantage = 0
78 | for step in reversed(range(self.num_transitions_per_env)):
79 | if step == self.num_transitions_per_env - 1:
80 | next_values = last_values
81 | else:
82 | next_values = self.values[step + 1]
83 | next_is_not_terminal = 1.0 - self.dones[step].float()
84 | delta = self.rewards[step] + next_is_not_terminal * gamma * next_values - self.values[step]
85 | advantage = delta + next_is_not_terminal * gamma * lam * advantage
86 | self.returns[step] = advantage + self.values[step]
87 |
88 | # Compute and normalize the advantages
89 | self.advantages = self.returns - self.values
90 | self.advantages = (self.advantages - self.advantages.mean()) / (self.advantages.std() + 1e-8)
91 |
92 | def get_statistics(self):
93 | done = self.dones
94 | done[-1] = 1
95 | flat_dones = done.permute(1, 0, 2).reshape(-1, 1)
96 | done_indices = torch.cat((flat_dones.new_tensor([-1], dtype=torch.int64), flat_dones.nonzero(as_tuple=False)[:, 0]))
97 | trajectory_lengths = (done_indices[1:] - done_indices[:-1])
98 | return trajectory_lengths.float().mean(), self.rewards.mean()
99 |
100 | def mini_batch_generator(self, num_mini_batches, num_epochs=8):
101 | batch_size = self.num_envs * self.num_transitions_per_env
102 | mini_batch_size = batch_size // num_mini_batches
103 | indices = torch.randperm(num_mini_batches*mini_batch_size, requires_grad=False, device=self.device)
104 |
105 | observations = self.observations.flatten(0, 1)
106 | privileged_obs = self.privileged_observations.flatten(0, 1)
107 | obs_history = self.observation_histories.flatten(0, 1)
108 | critic_observations = observations
109 |
110 | actions = self.actions.flatten(0, 1)
111 | values = self.values.flatten(0, 1)
112 | returns = self.returns.flatten(0, 1)
113 | old_actions_log_prob = self.actions_log_prob.flatten(0, 1)
114 | advantages = self.advantages.flatten(0, 1)
115 | old_mu = self.mu.flatten(0, 1)
116 | old_sigma = self.sigma.flatten(0, 1)
117 | old_env_bins = self.env_bins.flatten(0, 1)
118 |
119 | for epoch in range(num_epochs):
120 | for i in range(num_mini_batches):
121 |
122 | start = i*mini_batch_size
123 | end = (i+1)*mini_batch_size
124 | batch_idx = indices[start:end]
125 |
126 | obs_batch = observations[batch_idx]
127 | critic_observations_batch = critic_observations[batch_idx]
128 | privileged_obs_batch = privileged_obs[batch_idx]
129 | obs_history_batch = obs_history[batch_idx]
130 | actions_batch = actions[batch_idx]
131 | target_values_batch = values[batch_idx]
132 | returns_batch = returns[batch_idx]
133 | old_actions_log_prob_batch = old_actions_log_prob[batch_idx]
134 | advantages_batch = advantages[batch_idx]
135 | old_mu_batch = old_mu[batch_idx]
136 | old_sigma_batch = old_sigma[batch_idx]
137 | env_bins_batch = old_env_bins[batch_idx]
138 | yield obs_batch, critic_observations_batch, privileged_obs_batch, obs_history_batch, actions_batch, target_values_batch, advantages_batch, returns_batch, \
139 | old_actions_log_prob_batch, old_mu_batch, old_sigma_batch, None, env_bins_batch
140 |
141 | # for RNNs only
142 | def reccurent_mini_batch_generator(self, num_mini_batches, num_epochs=8):
143 |
144 | padded_obs_trajectories, trajectory_masks = split_and_pad_trajectories(self.observations, self.dones)
145 | padded_privileged_obs_trajectories, trajectory_masks = split_and_pad_trajectories(self.privileged_observations, self.dones)
146 | padded_obs_history_trajectories, trajectory_masks = split_and_pad_trajectories(self.observation_histories, self.dones)
147 | padded_critic_obs_trajectories = padded_obs_trajectories
148 |
149 | mini_batch_size = self.num_envs // num_mini_batches
150 | for ep in range(num_epochs):
151 | first_traj = 0
152 | for i in range(num_mini_batches):
153 | start = i*mini_batch_size
154 | stop = (i+1)*mini_batch_size
155 |
156 | dones = self.dones.squeeze(-1)
157 | last_was_done = torch.zeros_like(dones, dtype=torch.bool)
158 | last_was_done[1:] = dones[:-1]
159 | last_was_done[0] = True
160 | trajectories_batch_size = torch.sum(last_was_done[:, start:stop])
161 | last_traj = first_traj + trajectories_batch_size
162 |
163 | masks_batch = trajectory_masks[:, first_traj:last_traj]
164 | obs_batch = padded_obs_trajectories[:, first_traj:last_traj]
165 | critic_obs_batch = padded_critic_obs_trajectories[:, first_traj:last_traj]
166 | privileged_obs_batch = padded_privileged_obs_trajectories[:, first_traj:last_traj]
167 | obs_history_batch = padded_obs_history_trajectories[:, first_traj:last_traj]
168 |
169 | actions_batch = self.actions[:, start:stop]
170 | old_mu_batch = self.mu[:, start:stop]
171 | old_sigma_batch = self.sigma[:, start:stop]
172 | returns_batch = self.returns[:, start:stop]
173 | advantages_batch = self.advantages[:, start:stop]
174 | values_batch = self.values[:, start:stop]
175 | old_actions_log_prob_batch = self.actions_log_prob[:, start:stop]
176 |
177 | yield obs_batch, critic_obs_batch, privileged_obs_batch, obs_history_batch, actions_batch, values_batch, advantages_batch, returns_batch, \
178 | old_actions_log_prob_batch, old_mu_batch, old_sigma_batch, masks_batch
179 |
180 | first_traj = last_traj
--------------------------------------------------------------------------------
/dribblebot_learn/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # License: see [LICENSE, LICENSES/rsl_rl/LICENSE]
2 |
3 | from .utils import split_and_pad_trajectories, unpad_trajectories
--------------------------------------------------------------------------------
/dribblebot_learn/utils/utils.py:
--------------------------------------------------------------------------------
1 | # License: see [LICENSE, LICENSES/rsl_rl/LICENSE]
2 |
3 | import torch
4 |
5 | def split_and_pad_trajectories(tensor, dones):
6 | """ Splits trajectories at done indices. Then concatenates them and padds with zeros up to the length og the longest trajectory.
7 | Returns masks corresponding to valid parts of the trajectories
8 | Example:
9 | Input: [ [a1, a2, a3, a4 | a5, a6],
10 | [b1, b2 | b3, b4, b5 | b6]
11 | f]
12 |
13 | Output:[ [a1, a2, a3, a4], | [ [True, True, True, True],
14 | [a5, a6, 0, 0], | [True, True, False, False],
15 | [b1, b2, 0, 0], | [True, True, False, False],
16 | [b3, b4, b5, 0], | [True, True, True, False],
17 | [b6, 0, 0, 0] | [True, False, False, False],
18 | ] | ]
19 |
20 | Assumes that the inputy has the following dimension order: [time, number of envs, aditional dimensions]
21 | """
22 | dones = dones.clone()
23 | dones[-1] = 1
24 | # Permute the buffers to have order (num_envs, num_transitions_per_env, ...), for correct reshaping
25 | flat_dones = dones.transpose(1, 0).reshape(-1, 1)
26 |
27 | # Get length of trajectory by counting the number of successive not done elements
28 | done_indices = torch.cat((flat_dones.new_tensor([-1], dtype=torch.int64), flat_dones.nonzero()[:, 0]))
29 | trajectory_lengths = done_indices[1:] - done_indices[:-1]
30 | trajectory_lengths_list = trajectory_lengths.tolist()
31 | # Extract the individual trajectories
32 | trajectories = torch.split(tensor.transpose(1, 0).flatten(0, 1),trajectory_lengths_list)
33 | padded_trajectories = torch.nn.utils.rnn.pad_sequence(trajectories)
34 |
35 |
36 | trajectory_masks = trajectory_lengths > torch.arange(0, tensor.shape[0], device=tensor.device).unsqueeze(1)
37 | return padded_trajectories, trajectory_masks
38 |
39 | def unpad_trajectories(trajectories, masks):
40 | """ Does the inverse operation of split_and_pad_trajectories()
41 | """
42 | # Need to transpose before and after the masking to have proper reshaping
43 | return trajectories.transpose(1, 0)[masks.transpose(1, 0)].view(-1, trajectories.shape[0], trajectories.shape[-1]).transpose(1, 0)
--------------------------------------------------------------------------------
/resources/actuator_nets/unitree_go1.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/actuator_nets/unitree_go1.pt
--------------------------------------------------------------------------------
/resources/objects/ball.urdf:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/resources/robots/go1/meshes/calf.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/robots/go1/meshes/calf.stl
--------------------------------------------------------------------------------
/resources/robots/go1/meshes/hip.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/robots/go1/meshes/hip.stl
--------------------------------------------------------------------------------
/resources/robots/go1/meshes/thigh.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/robots/go1/meshes/thigh.stl
--------------------------------------------------------------------------------
/resources/robots/go1/meshes/thigh_mirror.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/robots/go1/meshes/thigh_mirror.stl
--------------------------------------------------------------------------------
/resources/robots/go1/meshes/trunk.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/robots/go1/meshes/trunk.stl
--------------------------------------------------------------------------------
/resources/robots/go1/xml/go1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 | =
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
--------------------------------------------------------------------------------
/resources/textures/background_texture_metal_rust.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/background_texture_metal_rust.jpg
--------------------------------------------------------------------------------
/resources/textures/brick_texture.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/brick_texture.jpg
--------------------------------------------------------------------------------
/resources/textures/grass.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/grass.jpg
--------------------------------------------------------------------------------
/resources/textures/ice_texture.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/ice_texture.jpg
--------------------------------------------------------------------------------
/resources/textures/icy_pond.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/icy_pond.jpg
--------------------------------------------------------------------------------
/resources/textures/metal_wall_iron_fence.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/metal_wall_iron_fence.jpg
--------------------------------------------------------------------------------
/resources/textures/particle_board_paint_aged.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/particle_board_paint_aged.jpg
--------------------------------------------------------------------------------
/resources/textures/pebble_stone_texture_nature.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/pebble_stone_texture_nature.jpg
--------------------------------------------------------------------------------
/resources/textures/snow.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/snow.jpg
--------------------------------------------------------------------------------
/resources/textures/texture_background_wall_paint_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/texture_background_wall_paint_2.jpg
--------------------------------------------------------------------------------
/resources/textures/texture_background_wall_paint_3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/texture_background_wall_paint_3.jpg
--------------------------------------------------------------------------------
/resources/textures/texture_stone_stone_texture_0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/texture_stone_stone_texture_0.jpg
--------------------------------------------------------------------------------
/resources/textures/texture_wood_brown_1033760.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/texture_wood_brown_1033760.jpg
--------------------------------------------------------------------------------
/resources/textures/tiled_brick_texture.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/tiled_brick_texture.jpg
--------------------------------------------------------------------------------
/resources/textures/tiled_grass.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/tiled_grass.jpg
--------------------------------------------------------------------------------
/resources/textures/tiled_ice_texture.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/tiled_ice_texture.jpg
--------------------------------------------------------------------------------
/resources/textures/tiled_particle_board_paint_aged.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/tiled_particle_board_paint_aged.jpg
--------------------------------------------------------------------------------
/resources/textures/tiled_pebble_stone_texture_nature.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/tiled_pebble_stone_texture_nature.jpg
--------------------------------------------------------------------------------
/resources/textures/tiled_snow.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/tiled_snow.jpg
--------------------------------------------------------------------------------
/resources/textures/tiled_texture_stone_stone_texture_0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/tiled_texture_stone_stone_texture_0.jpg
--------------------------------------------------------------------------------
/resources/textures/tiled_texture_wood_brown_1033760.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/resources/textures/tiled_texture_wood_brown_1033760.jpg
--------------------------------------------------------------------------------
/resources/textures/tiling_util.py:
--------------------------------------------------------------------------------
1 | # %%
2 |
3 | from PIL import Image
4 |
5 | def create_tiled_image(image_path, tile_count_x=10, tile_count_y=10):
6 | # Load the image
7 | original_image = Image.open(image_path)
8 | # width, height = original_image.size
9 | width, height = 3000, 2000
10 |
11 | # Calculate the size of each tile
12 | tile_width = width // tile_count_x
13 | tile_height = height // tile_count_y
14 |
15 | # Create a new image with the same resolution as the original
16 | new_image = Image.new('RGB', (width, height))
17 |
18 | # Iterate through the tiles and paste the resized original image
19 | for x in range(tile_count_x):
20 | for y in range(tile_count_y):
21 | resized_image = original_image.resize((tile_width, tile_height))
22 | new_image.paste(resized_image, (tile_width * x, tile_height * y))
23 |
24 | # Save the new tiled image
25 | new_image.save(f'tiled_{image_path}')
26 |
27 | if __name__ == '__main__':
28 | image_paths = ['snow.jpg', 'grass.jpg', 'pebble_stone_texture_nature.jpg', 'particle_board_paint_aged.jpg', 'texture_stone_stone_texture_0.jpg', 'texture_wood_brown_1033760.jpg', 'brick_texture.jpg']
29 | # image_path = 'ice_texture.jpg' # Replace with the path to your image (PNG or JPG)
30 | for image_path in image_paths:
31 | create_tiled_image(image_path, tile_count_x=5, tile_count_y=5)
32 |
33 | # %%
34 |
--------------------------------------------------------------------------------
/runs/improbableailab/dribbling/bvggoq26/dribbling_pretrained/ac_weights.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/runs/improbableailab/dribbling/bvggoq26/dribbling_pretrained/ac_weights.pt
--------------------------------------------------------------------------------
/runs/improbableailab/dribbling/bvggoq26/dribbling_pretrained/adaptation_module.jit:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/runs/improbableailab/dribbling/bvggoq26/dribbling_pretrained/adaptation_module.jit
--------------------------------------------------------------------------------
/runs/improbableailab/dribbling/bvggoq26/dribbling_pretrained/body.jit:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/runs/improbableailab/dribbling/bvggoq26/dribbling_pretrained/body.jit
--------------------------------------------------------------------------------
/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/scripts/__init__.py
--------------------------------------------------------------------------------
/scripts/actuator_net/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Improbable-AI/dribblebot/8e743ac3ee14038bd48af3cde1b72eef03f881d0/scripts/actuator_net/__init__.py
--------------------------------------------------------------------------------
/scripts/actuator_net/eval.py:
--------------------------------------------------------------------------------
1 | from utils import train_actuator_network_and_plot_predictions
2 | from glob import glob
3 |
4 |
5 | log_dir_root = "../../logs/"
6 | log_dir = "example_experiment/2022/11_01/16_01_50_0"
7 |
8 | # Evaluates the existing actuator network by default
9 | load_pretrained_model = True
10 | actuator_network_path = "../../resources/actuator_nets/unitree_go1.pt"
11 |
12 | log_dirs = glob(f"{log_dir_root}{log_dir}/", recursive=True)
13 |
14 | if len(log_dirs) == 0: raise FileNotFoundError(f"No log files found in {log_dir_root}{log_dir}/")
15 |
16 | for log_dir in log_dirs:
17 | try:
18 | train_actuator_network_and_plot_predictions(log_dir[:11], log_dir[11:], actuator_network_path=actuator_network_path, load_pretrained_model=load_pretrained_model)
19 | except FileNotFoundError:
20 | print(f"Couldn't find log.pkl in {log_dir}")
21 | except EOFError:
22 | print(f"Incomplete log.pkl in {log_dir}")
--------------------------------------------------------------------------------
/scripts/actuator_net/train.py:
--------------------------------------------------------------------------------
1 | from utils import train_actuator_network_and_plot_predictions
2 | from glob import glob
3 |
4 | log_dir_root = "../../logs/"
5 | log_dir = "example_experiment/2022/11_01/16_01_50_0"
6 |
7 | # Evaluates the existing actuator network by default
8 | # load_pretrained_model = True
9 | # actuator_network_path = "../../resources/actuator_nets/unitree_go1.pt"
10 |
11 | # Uncomment these lines to train a new actuator network
12 | load_pretrained_model = False
13 | actuator_network_path = "../../resources/actuator_nets/unitree_go1_new.pt"
14 |
15 |
16 | log_dirs = glob(f"{log_dir_root}{log_dir}/", recursive=True)
17 |
18 | if len(log_dirs) == 0: raise FileNotFoundError(f"No log files found in {log_dir_root}{log_dir}/")
19 |
20 | for log_dir in log_dirs:
21 | try:
22 | train_actuator_network_and_plot_predictions(log_dir[:11], log_dir[11:], actuator_network_path=actuator_network_path, load_pretrained_model=load_pretrained_model)
23 | except FileNotFoundError:
24 | print(f"Couldn't find log.pkl in {log_dir}")
25 | except EOFError:
26 | print(f"Incomplete log.pkl in {log_dir}")
--------------------------------------------------------------------------------
/scripts/actuator_net/utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pickle as pkl
3 | from matplotlib import pyplot as plt
4 | import time
5 | import imageio
6 | import numpy as np
7 | from tqdm import tqdm
8 | from glob import glob
9 |
10 | import torch
11 | import torch.nn as nn
12 | import torch.nn.functional as F
13 | from torch.utils.data import Dataset
14 | from torch.utils.data import DataLoader
15 | from torch.optim import Adam
16 |
17 | class ActuatorDataset(Dataset):
18 | def __init__(self, data):
19 | self.data = data
20 |
21 | def __len__(self):
22 | return len(self.data['joint_states'])
23 |
24 | def __getitem__(self, idx):
25 | return {k: v[idx] for k,v in self.data.items()}
26 |
27 | class Act(nn.Module):
28 | def __init__(self, act, slope=0.05):
29 | super(Act, self).__init__()
30 | self.act = act
31 | self.slope = slope
32 | self.shift = torch.log(torch.tensor(2.0)).item()
33 |
34 | def forward(self, input):
35 | if self.act == "relu":
36 | return F.relu(input)
37 | elif self.act == "leaky_relu":
38 | return F.leaky_relu(input)
39 | elif self.act == "sp":
40 | return F.softplus(input, beta=1.)
41 | elif self.act == "leaky_sp":
42 | return F.softplus(input, beta=1.) - self.slope * F.relu(-input)
43 | elif self.act == "elu":
44 | return F.elu(input, alpha=1.)
45 | elif self.act == "leaky_elu":
46 | return F.elu(input, alpha=1.) - self.slope * F.relu(-input)
47 | elif self.act == "ssp":
48 | return F.softplus(input, beta=1.) - self.shift
49 | elif self.act == "leaky_ssp":
50 | return (
51 | F.softplus(input, beta=1.) -
52 | self.slope * F.relu(-input) -
53 | self.shift
54 | )
55 | elif self.act == "tanh":
56 | return torch.tanh(input)
57 | elif self.act == "leaky_tanh":
58 | return torch.tanh(input) + self.slope * input
59 | elif self.act == "swish":
60 | return torch.sigmoid(input) * input
61 | elif self.act == "softsign":
62 | return F.softsign(input)
63 | else:
64 | raise RuntimeError(f"Undefined activation called {self.act}")
65 |
66 | def build_mlp(in_dim, units, layers, out_dim,
67 | act='relu', layer_norm=False, act_final=False):
68 | mods = [nn.Linear(in_dim, units), Act(act)]
69 | for i in range(layers-1):
70 | mods += [nn.Linear(units, units), Act(act)]
71 | mods += [nn.Linear(units, out_dim)]
72 | if act_final:
73 | mods += [Act(act)]
74 | if layer_norm:
75 | mods += [nn.LayerNorm(out_dim)]
76 | return nn.Sequential(*mods)
77 |
78 | def train_actuator_network(xs, ys, actuator_network_path):
79 |
80 | print(xs.shape, ys.shape)
81 |
82 | num_data = xs.shape[0]
83 | num_train = num_data // 5 * 4
84 | num_test = num_data - num_train
85 |
86 | dataset = ActuatorDataset({"joint_states": xs, "tau_ests": ys})
87 | train_set, val_set = torch.utils.data.random_split(dataset, [num_train, num_test])
88 | train_loader = DataLoader(train_set, batch_size=128, shuffle=True)
89 | test_loader = DataLoader(val_set, batch_size=128, shuffle=True)
90 |
91 | model = build_mlp(in_dim=6, units=32, layers=2, out_dim=1, act='softsign')
92 |
93 | lr = 8e-4
94 | opt = Adam(model.parameters(), lr=lr, eps=1e-8, weight_decay=0.0)
95 |
96 | epochs = 100
97 | device = 'cuda:0'
98 |
99 | model = model.to(device)
100 | for epoch in range(epochs):
101 | epoch_loss = 0
102 | ct = 0
103 | for batch in train_loader:
104 | data = batch['joint_states'].to(device)
105 | y_pred = model(data)
106 |
107 | opt.zero_grad()
108 |
109 | y_label = batch['tau_ests'].to(device)
110 |
111 | tau_est_loss = ((y_pred - y_label) ** 2).mean()
112 | loss = tau_est_loss
113 |
114 | loss.backward()
115 | opt.step()
116 | epoch_loss += loss.detach().cpu().numpy()
117 | ct += 1
118 | epoch_loss /= ct
119 |
120 | test_loss = 0
121 | mae = 0
122 | ct = 0
123 | if epoch % 1 == 0:
124 | with torch.no_grad():
125 | for batch in test_loader:
126 | data = batch['joint_states'].to(device)
127 | y_pred = model(data)
128 |
129 | y_label = batch['tau_ests'].to(device)
130 |
131 | tau_est_loss = ((y_pred - y_label) ** 2).mean()
132 | loss = tau_est_loss
133 | test_mae = (y_pred - y_label).abs().mean()
134 |
135 | test_loss += loss
136 | mae += test_mae
137 | ct += 1
138 | test_loss /= ct
139 | mae /= ct
140 |
141 | print(
142 | f'epoch: {epoch} | loss: {epoch_loss:.4f} | test loss: {test_loss:.4f} | mae: {mae:.4f}')
143 |
144 | model_scripted = torch.jit.script(model) # Export to TorchScript
145 | model_scripted.save(actuator_network_path) # Save
146 | return model
147 |
148 | def train_actuator_network_and_plot_predictions(log_dir_root, log_dir, actuator_network_path, load_pretrained_model=False):
149 |
150 | log_path = log_dir_root + log_dir + "log.pkl"
151 | print(log_path)
152 | with open(log_path, 'rb') as file:
153 | data = pkl.load(file)
154 |
155 | datas = data['hardware_closed_loop'][1]
156 |
157 | if len(datas) < 1:
158 | return
159 |
160 | tau_ests = np.zeros((len(datas), 12))
161 | torques = np.zeros((len(datas), 12))
162 | joint_positions = np.zeros((len(datas), 12))
163 | joint_position_targets = np.zeros((len(datas), 12))
164 | joint_velocities = np.zeros((len(datas), 12))
165 |
166 | if "tau_est" not in datas[0].keys():
167 | return
168 |
169 | for i in range(len(datas)):
170 | tau_ests[i, :] = datas[i]["tau_est"]
171 | torques[i, :] = datas[i]["torques"]
172 | joint_positions[i, :] = datas[i]["joint_pos"]
173 | joint_position_targets[i, :] = datas[i]["joint_pos_target"]
174 | joint_velocities[i, :] = datas[i]["joint_vel"]
175 |
176 | timesteps = np.array(range(len(datas))) / 50.0
177 |
178 | import matplotlib.pyplot as plt
179 |
180 | joint_position_errors = joint_positions - joint_position_targets
181 | joint_velocities = joint_velocities
182 |
183 | joint_position_errors = torch.tensor(joint_position_errors, dtype=torch.float)
184 | joint_velocities = torch.tensor(joint_velocities, dtype=torch.float)
185 | tau_ests = torch.tensor(tau_ests, dtype=torch.float)
186 |
187 | xs = []
188 | ys = []
189 | step = 2
190 | # all joints are equal
191 | for i in range(12):
192 | xs_joint = [joint_position_errors[2:-step+1, i:i+1],
193 | joint_position_errors[1:-step, i:i+1],
194 | joint_position_errors[:-step-1, i:i+1],
195 | joint_velocities[2:-step+1, i:i+1],
196 | joint_velocities[1:-step, i:i+1],
197 | joint_velocities[:-step-1, i:i+1]]
198 |
199 | tau_ests_joint = [tau_ests[step:-1, i:i+1]]
200 |
201 | xs_joint = torch.cat(xs_joint, dim=1)
202 | xs += [xs_joint]
203 | ys += tau_ests_joint
204 |
205 | xs = torch.cat(xs, dim=0)
206 | ys = torch.cat(ys, dim=0)
207 |
208 | if load_pretrained_model:
209 | model = torch.jit.load(actuator_network_path).to('cpu')
210 | else:
211 | model = train_actuator_network(xs, ys, actuator_network_path).to("cpu")
212 |
213 | tau_preds = model(xs).detach().reshape(12, -1).T
214 |
215 | plot_length = 300
216 |
217 | timesteps = timesteps[:plot_length]
218 | torques = torques[step:plot_length+step]
219 | tau_ests = tau_ests[step:plot_length+step]
220 | tau_preds = tau_preds[:plot_length]
221 |
222 | fig, axs = plt.subplots(6, 2, figsize=(14, 6))
223 | axs = np.array(axs).flatten()
224 | for i in range(12):
225 | axs[i].plot(timesteps, torques[:, i], label="idealized torque")
226 | axs[i].plot(timesteps, tau_ests[:, i], label="true torque")
227 | axs[i].plot(timesteps, tau_preds[:, i], linestyle='--', label="actuator model predicted torque")
228 | plt.legend()
229 |
230 | plt.show()
231 |
232 |
233 |
--------------------------------------------------------------------------------
/scripts/loading_utils.py:
--------------------------------------------------------------------------------
1 | import pickle
2 | import io
3 | import torch
4 |
5 | class CPU_Unpickler(pickle.Unpickler):
6 | def find_class(self, module, name):
7 | if module == 'torch.storage' and name == '_load_from_bytes':
8 | return lambda b: torch.load(io.BytesIO(b), map_location='cpu')
9 | else:
10 | return super().find_class(module, name)
11 |
12 |
--------------------------------------------------------------------------------
/scripts/play_dribbling_custom.py:
--------------------------------------------------------------------------------
1 | import isaacgym
2 |
3 | assert isaacgym
4 | import torch
5 | import numpy as np
6 |
7 | import glob
8 | import pickle as pkl
9 |
10 | from dribblebot.envs import *
11 | from dribblebot.envs.base.legged_robot_config import Cfg
12 | from dribblebot.envs.go1.go1_config import config_go1
13 | from dribblebot.envs.go1.velocity_tracking import VelocityTrackingEasyEnv
14 |
15 | from loading_utils import CPU_Unpickler
16 |
17 | from tqdm import tqdm
18 | import wandb
19 |
20 | def load_policy(logdir):
21 | body_file = wandb.restore('tmp/legged_data/body_68000.jit', run_path=logdir)
22 | body = torch.jit.load(body_file.name)
23 | import os
24 |
25 | adaptation_module_file = wandb.restore('tmp/legged_data/adaptation_module_68000.jit', run_path=logdir)
26 | adaptation_module = torch.jit.load(adaptation_module_file.name)
27 |
28 | def policy(obs, info={}):
29 | i = 0
30 | latent = adaptation_module.forward(obs["obs_history"].to('cpu'))
31 | action = body.forward(torch.cat((obs["obs_history"].to('cpu'), latent), dim=-1))
32 | info['latent'] = latent
33 | return action
34 |
35 | return policy
36 |
37 |
38 | def load_env(logdir, headless=False):
39 |
40 | import wandb
41 | cfg_file = wandb.restore('config.yaml', run_path=logdir)
42 |
43 | import yaml
44 | with open(cfg_file.name, 'rb') as file:
45 | cfg = yaml.safe_load(file)
46 | cfg = cfg["Cfg"]["value"]
47 | print("Cfg from config.yaml:", cfg.keys())
48 |
49 | for key, value in cfg.items():
50 | if hasattr(Cfg, key):
51 | for key2, value2 in cfg[key].items():
52 | setattr(getattr(Cfg, key), key2, value2)
53 |
54 | # turn off DR for evaluation script
55 | Cfg.domain_rand.push_robots = False
56 | Cfg.domain_rand.randomize_friction = False
57 | Cfg.domain_rand.randomize_gravity = False
58 | Cfg.domain_rand.randomize_restitution = False
59 | Cfg.domain_rand.randomize_motor_offset = False
60 | Cfg.domain_rand.randomize_motor_strength = False
61 | Cfg.domain_rand.randomize_friction_indep = False
62 | Cfg.domain_rand.randomize_ground_friction = False
63 | Cfg.domain_rand.randomize_base_mass = False
64 | Cfg.domain_rand.randomize_Kd_factor = False
65 | Cfg.domain_rand.randomize_Kp_factor = False
66 | Cfg.domain_rand.randomize_joint_friction = False
67 | Cfg.domain_rand.randomize_com_displacement = False
68 | Cfg.domain_rand.randomize_tile_roughness = False
69 | Cfg.domain_rand.tile_roughness_range = [0.0, 0.0]
70 |
71 | Cfg.env.num_recording_envs = 1
72 | Cfg.env.num_envs = 1
73 | # Cfg.env.num_observations = 75
74 | Cfg.terrain.num_rows = 5
75 | Cfg.terrain.num_cols = 5
76 | Cfg.terrain.border_size = 0
77 | Cfg.terrain.num_border_boxes = 0
78 | Cfg.terrain.center_robots = True
79 | Cfg.terrain.center_span = 1
80 | Cfg.terrain.teleport_robots = True
81 | # Cfg.terrain.mesh_type = "plane"
82 |
83 | Cfg.robot.name = "go1"
84 | Cfg.sensors.sensor_names = [
85 | "ObjectSensor",
86 | "OrientationSensor",
87 | "RCSensor",
88 | "JointPositionSensor",
89 | "JointVelocitySensor",
90 | "ActionSensor",
91 | "ActionSensor",
92 | "ClockSensor",
93 | "YawSensor",
94 | "TimingSensor",
95 | ]
96 | Cfg.sensors.sensor_args = {
97 | "ObjectSensor": {},
98 | "OrientationSensor": {},
99 | "RCSensor": {},
100 | "JointPositionSensor": {},
101 | "JointVelocitySensor": {},
102 | "ActionSensor": {},
103 | "ActionSensor": {"delay": 1},
104 | "ClockSensor": {},
105 | "YawSensor": {},
106 | "TimingSensor":{},
107 | }
108 |
109 | Cfg.sensors.privileged_sensor_names = {
110 | "BodyVelocitySensor": {},
111 | "ObjectVelocitySensor": {},
112 | }
113 | Cfg.sensors.privileged_sensor_args = {
114 | "BodyVelocitySensor": {},
115 | "ObjectVelocitySensor": {},
116 | }
117 | Cfg.domain_rand.lag_timesteps = 6
118 | Cfg.domain_rand.randomize_lag_timesteps = True
119 | Cfg.control.control_type = "actuator_net"
120 | Cfg.env.num_privileged_obs = 6
121 |
122 | from dribblebot.envs.wrappers.history_wrapper import HistoryWrapper
123 |
124 | env = VelocityTrackingEasyEnv(sim_device='cuda:0', headless=False, cfg=Cfg)
125 | env = HistoryWrapper(env)
126 |
127 | policy = load_policy(logdir)
128 |
129 | return env, policy
130 |
131 |
132 | def play_go1(headless=True):
133 |
134 | log_dir = "improbableailab/dribbling/bvggoq26/"
135 |
136 | env, policy = load_env(log_dir, headless=headless)
137 |
138 | num_eval_steps = 500
139 | gaits = {"pronking": [0, 0, 0],
140 | "trotting": [0.5, 0, 0],
141 | "bounding": [0, 0.5, 0],
142 | "pacing": [0, 0, 0.5]}
143 |
144 | x_vel_cmd, y_vel_cmd, yaw_vel_cmd = 1.0, 0.0, 0.0
145 | body_height_cmd = 0.0
146 | step_frequency_cmd = 3.0
147 | gait = torch.tensor(gaits["trotting"])
148 | footswing_height_cmd = 0.09
149 | pitch_cmd = 0.0
150 | roll_cmd = 0.0
151 | stance_width_cmd = 0.0
152 |
153 | measured_x_vels = np.zeros(num_eval_steps)
154 | target_x_vels = np.ones(num_eval_steps) * x_vel_cmd
155 | joint_positions = np.zeros((num_eval_steps, 12))
156 |
157 | import imageio
158 | mp4_writer = imageio.get_writer('dribbling.mp4', fps=50)
159 |
160 | obs = env.reset()
161 | ep_rew = 0
162 | for i in tqdm(range(num_eval_steps)):
163 | with torch.no_grad():
164 | actions = policy(obs)
165 | env.commands[:, 0] = x_vel_cmd
166 | env.commands[:, 1] = y_vel_cmd
167 | env.commands[:, 2] = yaw_vel_cmd
168 | env.commands[:, 3] = body_height_cmd
169 | env.commands[:, 4] = step_frequency_cmd
170 | env.commands[:, 5:8] = gait
171 | env.commands[:, 8] = 0.5
172 | env.commands[:, 9] = footswing_height_cmd
173 | env.commands[:, 10] = pitch_cmd
174 | env.commands[:, 11] = roll_cmd
175 | env.commands[:, 12] = stance_width_cmd
176 | obs, rew, done, info = env.step(actions)
177 | measured_x_vels[i] = env.base_lin_vel[0, 0]
178 | joint_positions[i] = env.dof_pos[0, :].cpu()
179 | ep_rew += rew
180 |
181 | img = env.render(mode='rgb_array')
182 | mp4_writer.append_data(img)
183 |
184 | out_of_limits = -(env.dof_pos - env.dof_pos_limits[:, 0]).clip(max=0.) # lower limit
185 | out_of_limits += (env.dof_pos - env.dof_pos_limits[:, 1]).clip(min=0.)
186 |
187 | mp4_writer.close()
188 |
189 | # plot target and measured forward velocity
190 | from matplotlib import pyplot as plt
191 | fig, axs = plt.subplots(2, 1, figsize=(12, 5))
192 | axs[0].plot(np.linspace(0, num_eval_steps * env.dt, num_eval_steps), measured_x_vels, color='black', linestyle="-", label="Measured")
193 | axs[0].plot(np.linspace(0, num_eval_steps * env.dt, num_eval_steps), target_x_vels, color='black', linestyle="--", label="Desired")
194 | axs[0].legend()
195 | axs[0].set_title("Forward Linear Velocity")
196 | axs[0].set_xlabel("Time (s)")
197 | axs[0].set_ylabel("Velocity (m/s)")
198 |
199 | axs[1].plot(np.linspace(0, num_eval_steps * env.dt, num_eval_steps), joint_positions, linestyle="-", label="Measured")
200 | axs[1].set_title("Joint Positions")
201 | axs[1].set_xlabel("Time (s)")
202 | axs[1].set_ylabel("Joint Position (rad)")
203 |
204 | plt.tight_layout()
205 | plt.show()
206 |
207 |
208 | if __name__ == '__main__':
209 | # to see the environment rendering, set headless=False
210 | play_go1(headless=False)
211 |
--------------------------------------------------------------------------------
/scripts/play_dribbling_pretrained.py:
--------------------------------------------------------------------------------
1 | import isaacgym
2 |
3 | assert isaacgym
4 | import torch
5 | import numpy as np
6 | import glob
7 |
8 | from dribblebot.envs import *
9 | from dribblebot.envs.base.legged_robot_config import Cfg
10 | from dribblebot.envs.go1.go1_config import config_go1
11 | from dribblebot.envs.go1.velocity_tracking import VelocityTrackingEasyEnv
12 |
13 | from tqdm import tqdm
14 |
15 | def load_policy(logdir):
16 | body = torch.jit.load(logdir + '/body.jit', map_location="cpu")
17 | import os
18 | adaptation_module = torch.jit.load(logdir + '/adaptation_module.jit', map_location='cpu')
19 |
20 | def policy(obs, info={}):
21 | i = 0
22 | latent = adaptation_module.forward(obs["obs_history"].to('cpu'))
23 | action = body.forward(torch.cat((obs["obs_history"].to('cpu'), latent), dim=-1))
24 | info['latent'] = latent
25 | return action
26 |
27 | return policy
28 |
29 |
30 | def load_env(label, headless=False):
31 | dirs = glob.glob(f"../runs/{label}/*")
32 | logdir = sorted(dirs)[-1]
33 |
34 | import yaml
35 | with open(logdir + "/config.yaml", 'rb') as file:
36 | cfg = yaml.safe_load(file)
37 | cfg = cfg["Cfg"]
38 |
39 | for key, value in cfg.items():
40 | if hasattr(Cfg, key):
41 | for key2, value2 in cfg[key].items():
42 | setattr(getattr(Cfg, key), key2, value2)
43 |
44 | # turn off DR for evaluation script
45 | Cfg.domain_rand.push_robots = False
46 | Cfg.domain_rand.randomize_friction = False
47 | Cfg.domain_rand.randomize_gravity = False
48 | Cfg.domain_rand.randomize_restitution = False
49 | Cfg.domain_rand.randomize_motor_offset = False
50 | Cfg.domain_rand.randomize_motor_strength = False
51 | Cfg.domain_rand.randomize_friction_indep = False
52 | Cfg.domain_rand.randomize_ground_friction = False
53 | Cfg.domain_rand.randomize_base_mass = False
54 | Cfg.domain_rand.randomize_Kd_factor = False
55 | Cfg.domain_rand.randomize_Kp_factor = False
56 | Cfg.domain_rand.randomize_joint_friction = False
57 | Cfg.domain_rand.randomize_com_displacement = False
58 | Cfg.domain_rand.randomize_tile_roughness = True
59 | # Cfg.domain_rand.tile_roughness_range = [0.1, 0.1]
60 | Cfg.domain_rand.tile_roughness_range = [0.0, 0.0]
61 |
62 | Cfg.env.num_recording_envs = 1
63 | Cfg.env.num_envs = 1
64 | # Cfg.env.num_observations = 75
65 | Cfg.terrain.num_rows = 5
66 | Cfg.terrain.num_cols = 5
67 | Cfg.terrain.border_size = 0
68 | Cfg.terrain.num_border_boxes = 0
69 | Cfg.terrain.center_robots = True
70 | Cfg.terrain.center_span = 1
71 | Cfg.terrain.teleport_robots = True
72 |
73 | Cfg.robot.name = "go1"
74 | Cfg.sensors.sensor_names = [
75 | "ObjectSensor",
76 | "OrientationSensor",
77 | "RCSensor",
78 | "JointPositionSensor",
79 | "JointVelocitySensor",
80 | "ActionSensor",
81 | "ActionSensor",
82 | "ClockSensor",
83 | "YawSensor",
84 | "TimingSensor",
85 | ]
86 | Cfg.sensors.sensor_args = {
87 | "ObjectSensor": {},
88 | "OrientationSensor": {},
89 | "RCSensor": {},
90 | "JointPositionSensor": {},
91 | "JointVelocitySensor": {},
92 | "ActionSensor": {},
93 | "ActionSensor": {"delay": 1},
94 | "ClockSensor": {},
95 | "YawSensor": {},
96 | "TimingSensor":{},
97 | }
98 |
99 | Cfg.sensors.privileged_sensor_names = {
100 | "BodyVelocitySensor": {},
101 | "ObjectVelocitySensor": {},
102 | }
103 | Cfg.sensors.privileged_sensor_args = {
104 | "BodyVelocitySensor": {},
105 | "ObjectVelocitySensor": {},
106 | }
107 | Cfg.domain_rand.lag_timesteps = 6
108 | Cfg.domain_rand.randomize_lag_timesteps = True
109 | Cfg.control.control_type = "actuator_net"
110 | Cfg.env.num_privileged_obs = 6
111 |
112 | from dribblebot.envs.wrappers.history_wrapper import HistoryWrapper
113 |
114 | env = VelocityTrackingEasyEnv(sim_device='cuda:0', headless=False, cfg=Cfg)
115 | env = HistoryWrapper(env)
116 |
117 | policy = load_policy(logdir)
118 | return env, policy
119 |
120 |
121 | def play_go1(headless=True):
122 |
123 | label = "improbableailab/dribbling/bvggoq26"
124 | env, policy = load_env(label, headless=headless)
125 |
126 | num_eval_steps = 500
127 | gaits = {"pronking": [0, 0, 0],
128 | "trotting": [0.5, 0, 0],
129 | "bounding": [0, 0.5, 0],
130 | "pacing": [0, 0, 0.5]}
131 |
132 | x_vel_cmd, y_vel_cmd, yaw_vel_cmd = 1.0, 0.0, 0.0
133 | body_height_cmd = 0.0
134 | step_frequency_cmd = 3.0
135 | gait = torch.tensor(gaits["trotting"])
136 | footswing_height_cmd = 0.09
137 | pitch_cmd = 0.0
138 | roll_cmd = 0.0
139 | stance_width_cmd = 0.0
140 |
141 | measured_x_vels = np.zeros(num_eval_steps)
142 | target_x_vels = np.ones(num_eval_steps) * x_vel_cmd
143 | joint_positions = np.zeros((num_eval_steps, 12))
144 |
145 | import imageio
146 | mp4_writer = imageio.get_writer('dribbling.mp4', fps=50)
147 |
148 | obs = env.reset()
149 | ep_rew = 0
150 | for i in tqdm(range(num_eval_steps)):
151 | with torch.no_grad():
152 | actions = policy(obs)
153 | env.commands[:, 0] = x_vel_cmd
154 | env.commands[:, 1] = y_vel_cmd
155 | env.commands[:, 2] = yaw_vel_cmd
156 | env.commands[:, 3] = body_height_cmd
157 | env.commands[:, 4] = step_frequency_cmd
158 | env.commands[:, 5:8] = gait
159 | env.commands[:, 8] = 0.5
160 | env.commands[:, 9] = footswing_height_cmd
161 | env.commands[:, 10] = pitch_cmd
162 | env.commands[:, 11] = roll_cmd
163 | env.commands[:, 12] = stance_width_cmd
164 | obs, rew, done, info = env.step(actions)
165 | measured_x_vels[i] = env.base_lin_vel[0, 0]
166 | joint_positions[i] = env.dof_pos[0, :].cpu()
167 | ep_rew += rew
168 |
169 | img = env.render(mode='rgb_array')
170 | mp4_writer.append_data(img)
171 |
172 | out_of_limits = -(env.dof_pos - env.dof_pos_limits[:, 0]).clip(max=0.) # lower limit
173 | out_of_limits += (env.dof_pos - env.dof_pos_limits[:, 1]).clip(min=0.)
174 |
175 | mp4_writer.close()
176 |
177 | # plot target and measured forward velocity
178 | from matplotlib import pyplot as plt
179 | fig, axs = plt.subplots(2, 1, figsize=(12, 5))
180 | axs[0].plot(np.linspace(0, num_eval_steps * env.dt, num_eval_steps), measured_x_vels, color='black', linestyle="-", label="Measured")
181 | axs[0].plot(np.linspace(0, num_eval_steps * env.dt, num_eval_steps), target_x_vels, color='black', linestyle="--", label="Desired")
182 | axs[0].legend()
183 | axs[0].set_title("Forward Linear Velocity")
184 | axs[0].set_xlabel("Time (s)")
185 | axs[0].set_ylabel("Velocity (m/s)")
186 |
187 | axs[1].plot(np.linspace(0, num_eval_steps * env.dt, num_eval_steps), joint_positions, linestyle="-", label="Measured")
188 | axs[1].set_title("Joint Positions")
189 | axs[1].set_xlabel("Time (s)")
190 | axs[1].set_ylabel("Joint Position (rad)")
191 |
192 | plt.tight_layout()
193 | plt.show()
194 |
195 |
196 | if __name__ == '__main__':
197 | # to see the environment rendering, set headless=False
198 | play_go1(headless=False)
199 |
--------------------------------------------------------------------------------
/scripts/prepare_policy.py:
--------------------------------------------------------------------------------
1 | import isaacgym
2 |
3 | assert isaacgym
4 | import torch
5 |
6 | import glob
7 | import pickle as pkl
8 | import lcm
9 | import os
10 |
11 |
12 |
13 | def load_and_save_policy(run_path, label):
14 |
15 | import wandb
16 | api = wandb.Api()
17 | run = api.run(run_path)
18 |
19 | # load config
20 | from dribblebot.envs.base.legged_robot_config import Cfg
21 | from dribblebot.envs.go1.go1_config import config_go1
22 | config_go1(Cfg)
23 |
24 | all_cfg = run.config
25 | cfg = all_cfg["Cfg"]
26 |
27 | for key, value in cfg.items():
28 | if hasattr(Cfg, key):
29 | for key2, value2 in cfg[key].items():
30 | setattr(getattr(Cfg, key), key2, value2)
31 |
32 | from dribblebot import MINI_GYM_ROOT_DIR
33 | path = MINI_GYM_ROOT_DIR + "/runs/" + run_path + "/" + label + "/"
34 | if not os.path.exists(path):
35 | os.makedirs(path)
36 | print("path: ", path)
37 |
38 | adaptation_module_path = os.path.join(path, f'adaptation_module.jit')
39 | adaptation_module_file = wandb.restore('tmp/legged_data/adaptation_module_latest.jit', run_path=run_path)
40 | # adaptation_module_file = run.file('tmp/legged_data/adaptation_module_68000.jit').download(replace=True, root='./tmp')
41 | adaptation_module = torch.jit.load(adaptation_module_file.name, map_location="cpu")
42 | adaptation_module.save(adaptation_module_path)
43 |
44 | body_path = os.path.join(path, f'body.jit')
45 | body_file = wandb.restore('tmp/legged_data/body_latest.jit', run_path=run_path)
46 | # body_file = run.file('tmp/legged_data/body_68000.jit').download(replace=True, root='./tmp')
47 | body = torch.jit.load(body_file.name, map_location="cpu")
48 | body.save(body_path)
49 |
50 | ac_weights_path = os.path.join(path, f'ac_weights.pt')
51 | ac_weights_file = wandb.restore('tmp/legged_data/ac_weights_68000.pt', run_path=run_path)
52 | # ac_weights_file = run.file('tmp/legged_data/ac_weights_68000.pt').download(replace=True, root='./tmp')
53 | ac_weights = torch.load(ac_weights_file.name, map_location="cpu")
54 | # ac_weights.load_state_dict(torch.load(ac_weights_file.name))
55 | torch.save(ac_weights, ac_weights_path)
56 |
57 | cfg_path = os.path.join(path, f'config.yaml')
58 | import yaml
59 | with open(cfg_path, 'w') as f:
60 | yaml.dump(all_cfg, f)
61 |
62 | if __name__ == '__main__':
63 |
64 | run_path = "improbableailab/dribbling/bvggoq26"
65 | label = "dribbling_pretrained"
66 |
67 |
68 | load_and_save_policy(run_path, label)
--------------------------------------------------------------------------------
/scripts/train_dribbling.py:
--------------------------------------------------------------------------------
1 | def train_go1(headless=True):
2 |
3 | import isaacgym
4 | assert isaacgym
5 | import torch
6 |
7 | from dribblebot.envs.base.legged_robot_config import Cfg
8 | from dribblebot.envs.go1.go1_config import config_go1
9 | from dribblebot.envs.go1.velocity_tracking import VelocityTrackingEasyEnv
10 |
11 | from dribblebot_learn.ppo_cse import Runner
12 | from dribblebot.envs.wrappers.history_wrapper import HistoryWrapper
13 | from dribblebot_learn.ppo_cse.actor_critic import AC_Args
14 | from dribblebot_learn.ppo_cse.ppo import PPO_Args
15 | from dribblebot_learn.ppo_cse import RunnerArgs
16 |
17 | config_go1(Cfg)
18 | Cfg.env.num_envs = 1000
19 |
20 | RunnerArgs.resume = True
21 | RunnerArgs.resume_path = "improbableailab/dribbling/j34kr9ds"
22 | RunnerArgs.resume_checkpoint = 'tmp/legged_data/ac_weights_last.pt'
23 |
24 |
25 | Cfg.robot.name = "go1"
26 | Cfg.sensors.sensor_names = [
27 | "ObjectSensor",
28 | "OrientationSensor",
29 | "RCSensor",
30 | "JointPositionSensor",
31 | "JointVelocitySensor",
32 | "ActionSensor",
33 | "LastActionSensor",
34 | "ClockSensor",
35 | "YawSensor",
36 | "TimingSensor",
37 | ]
38 | Cfg.sensors.sensor_args = {
39 | "ObjectSensor": {},
40 | "OrientationSensor": {},
41 | "RCSensor": {},
42 | "JointPositionSensor": {},
43 | "JointVelocitySensor": {},
44 | "ActionSensor": {},
45 | "LastActionSensor": {"delay": 1},
46 | "ClockSensor": {},
47 | "YawSensor": {},
48 | "TimingSensor":{},
49 | }
50 | Cfg.sensors.privileged_sensor_names = {
51 | "BodyVelocitySensor": {},
52 | "ObjectVelocitySensor": {},
53 | }
54 | Cfg.sensors.privileged_sensor_args = {
55 | "BodyVelocitySensor": {},
56 | "ObjectVelocitySensor": {},
57 | }
58 |
59 | Cfg.commands.num_lin_vel_bins = 30
60 | Cfg.commands.num_ang_vel_bins = 30
61 | Cfg.curriculum_thresholds.tracking_ang_vel = 0.7
62 | Cfg.curriculum_thresholds.tracking_lin_vel = 0.8
63 | Cfg.curriculum_thresholds.tracking_contacts_shaped_vel = 0.90
64 | Cfg.curriculum_thresholds.tracking_contacts_shaped_force = 0.90
65 | Cfg.curriculum_thresholds.dribbling_ball_vel = 0.8
66 |
67 | Cfg.commands.distributional_commands = True
68 |
69 | Cfg.domain_rand.lag_timesteps = 6
70 | Cfg.domain_rand.randomize_lag_timesteps = True
71 | Cfg.control.control_type = "actuator_net"
72 |
73 | Cfg.domain_rand.randomize_rigids_after_start = False
74 | Cfg.domain_rand.randomize_friction_indep = False
75 | Cfg.domain_rand.randomize_friction = False # True
76 | Cfg.domain_rand.randomize_restitution = False # True
77 | Cfg.domain_rand.restitution_range = [0.0, 0.4]
78 | Cfg.domain_rand.randomize_base_mass = True
79 | Cfg.domain_rand.added_mass_range = [-1.0, 3.0]
80 | Cfg.domain_rand.randomize_gravity = False
81 | Cfg.domain_rand.gravity_range = [-1.0, 1.0]
82 | Cfg.domain_rand.gravity_rand_interval_s = 8.0
83 | Cfg.domain_rand.gravity_impulse_duration = 0.99
84 | Cfg.domain_rand.randomize_com_displacement = False
85 | Cfg.domain_rand.com_displacement_range = [-0.15, 0.15]
86 | Cfg.domain_rand.randomize_ground_friction = True
87 | Cfg.domain_rand.ground_friction_range = [0.0, 0.0]
88 | Cfg.domain_rand.randomize_motor_strength = True
89 | Cfg.domain_rand.motor_strength_range = [0.99, 1.01]
90 | Cfg.domain_rand.randomize_motor_offset = True
91 | Cfg.domain_rand.motor_offset_range = [-0.002, 0.002]
92 | Cfg.domain_rand.push_robots = False
93 | Cfg.domain_rand.randomize_Kp_factor = True
94 | Cfg.domain_rand.randomize_Kd_factor = True
95 | Cfg.domain_rand.randomize_ball_drag = True
96 | Cfg.domain_rand.drag_range = [0.1, 0.8]
97 | Cfg.domain_rand.ball_drag_rand_interval_s = 15.0
98 |
99 | Cfg.env.num_observation_history = 15
100 | Cfg.reward_scales.feet_contact_forces = 0.0
101 | Cfg.env.num_envs = 1000
102 |
103 | Cfg.commands.exclusive_phase_offset = False
104 | Cfg.commands.pacing_offset = False
105 | Cfg.commands.balance_gait_distribution = False
106 | Cfg.commands.binary_phases = False
107 | Cfg.commands.gaitwise_curricula = False
108 |
109 | ###############################
110 | # soccer dribbling configuration
111 | ###############################
112 |
113 | # ball parameters
114 | Cfg.env.add_balls = True
115 |
116 | # domain randomization ranges
117 | Cfg.domain_rand.rand_interval_s = 6
118 | Cfg.domain_rand.friction_range = [0.0, 1.5]
119 | Cfg.domain_rand.randomize_ground_friction = True
120 | Cfg.domain_rand.ground_friction_range = [0.7, 4.0]
121 | Cfg.domain_rand.restitution_range = [0.0, 0.4]
122 | Cfg.domain_rand.added_mass_range = [-1.0, 3.0]
123 | Cfg.domain_rand.gravity_range = [-1.0, 1.0]
124 | Cfg.domain_rand.motor_strength_range = [0.99, 1.01]
125 | Cfg.domain_rand.motor_offset_range = [-0.002, 0.002]
126 | Cfg.domain_rand.tile_roughness_range = [0.0, 0.0]
127 |
128 | # privileged obs in use
129 | Cfg.env.num_privileged_obs = 6
130 | Cfg.env.priv_observe_ball_drag = True
131 |
132 |
133 | # sensory observation
134 | Cfg.commands.num_commands = 15
135 | Cfg.env.episode_length_s = 40.
136 | Cfg.env.num_observations = 75
137 |
138 | # terrain configuration
139 | Cfg.terrain.border_size = 0.0
140 | Cfg.terrain.mesh_type = "boxes_tm"
141 | Cfg.terrain.num_cols = 20
142 | Cfg.terrain.num_rows = 20
143 | Cfg.terrain.terrain_length = 5.0
144 | Cfg.terrain.terrain_width = 5.0
145 | Cfg.terrain.num_border_boxes = 5.0
146 | Cfg.terrain.x_init_range = 0.2
147 | Cfg.terrain.y_init_range = 0.2
148 | Cfg.terrain.teleport_thresh = 0.3
149 | Cfg.terrain.teleport_robots = False
150 | Cfg.terrain.center_robots = False
151 | Cfg.terrain.center_span = 3
152 | Cfg.terrain.horizontal_scale = 0.05
153 | Cfg.terrain.terrain_proportions = [1.0, 0.0, 0.0, 0.0, 0.0]
154 | Cfg.terrain.curriculum = False
155 | Cfg.terrain.difficulty_scale = 1.0
156 | Cfg.terrain.max_step_height = 0.26
157 | Cfg.terrain.min_step_run = 0.25
158 | Cfg.terrain.max_step_run = 0.4
159 | Cfg.terrain.max_init_terrain_level = 1
160 |
161 | # terminal conditions
162 | Cfg.rewards.use_terminal_body_height = True
163 | Cfg.rewards.terminal_body_height = 0.2
164 | Cfg.rewards.use_terminal_roll_pitch = False
165 | Cfg.rewards.terminal_body_ori = 0.5
166 |
167 | # command sampling
168 | Cfg.commands.resampling_time = 7
169 | Cfg.commands.heading_command = False
170 |
171 | Cfg.commands.lin_vel_x = [-1.5, 1.5]
172 | Cfg.commands.lin_vel_y = [-1.5, 1.5]
173 | Cfg.commands.ang_vel_yaw = [-0.0, 0.0]
174 | Cfg.commands.body_height_cmd = [-0.05, 0.05]
175 | Cfg.commands.gait_frequency_cmd_range = [3.0, 3.0]
176 | Cfg.commands.gait_phase_cmd_range = [0.5, 0.5]
177 | Cfg.commands.gait_offset_cmd_range = [0.0, 0.0]
178 | Cfg.commands.gait_bound_cmd_range = [0.0, 0.0]
179 | Cfg.commands.gait_duration_cmd_range = [0.5, 0.5]
180 | Cfg.commands.footswing_height_range = [0.09, 0.09]
181 | Cfg.commands.body_pitch_range = [-0.0, 0.0]
182 | Cfg.commands.body_roll_range = [-0.0, 0.0]
183 | Cfg.commands.stance_width_range = [0.0, 0.1]
184 | Cfg.commands.stance_length_range = [0.0, 0.1]
185 |
186 | Cfg.commands.limit_vel_x = [-1.5, 1.5]
187 | Cfg.commands.limit_vel_y = [-1.5, 1.5]
188 | Cfg.commands.limit_vel_yaw = [-0.0, 0.0]
189 | Cfg.commands.limit_body_height = [-0.05, 0.05]
190 | Cfg.commands.limit_gait_frequency = [3.0, 3.0]
191 | Cfg.commands.limit_gait_phase = [0.5, 0.5]
192 | Cfg.commands.limit_gait_offset = [0.0, 0.0]
193 | Cfg.commands.limit_gait_bound = [0.0, 0.0]
194 | Cfg.commands.limit_gait_duration = [0.5, 0.5]
195 | Cfg.commands.limit_footswing_height = [0.09, 0.09]
196 | Cfg.commands.limit_body_pitch = [-0.0, 0.0]
197 | Cfg.commands.limit_body_roll = [-0.0, 0.0]
198 | Cfg.commands.limit_stance_width = [0.0, 0.1]
199 | Cfg.commands.limit_stance_length = [0.0, 0.1]
200 |
201 | Cfg.commands.num_bins_vel_x = 1
202 | Cfg.commands.num_bins_vel_y = 1
203 | Cfg.commands.num_bins_vel_yaw = 1
204 | Cfg.commands.num_bins_body_height = 1
205 | Cfg.commands.num_bins_gait_frequency = 1
206 | Cfg.commands.num_bins_gait_phase = 1
207 | Cfg.commands.num_bins_gait_offset = 1
208 | Cfg.commands.num_bins_gait_bound = 1
209 | Cfg.commands.num_bins_gait_duration = 1
210 | Cfg.commands.num_bins_footswing_height = 1
211 | Cfg.commands.num_bins_body_roll = 1
212 | Cfg.commands.num_bins_body_pitch = 1
213 | Cfg.commands.num_bins_stance_width = 1
214 |
215 | Cfg.rewards.constrict = False
216 |
217 | # reward function
218 | Cfg.reward_scales.orientation = -5.0
219 | Cfg.reward_scales.torques = -0.0001
220 | Cfg.reward_scales.dof_vel = -0.0001
221 | Cfg.reward_scales.dof_acc = -2.5e-7
222 | Cfg.reward_scales.collision = -5.0
223 | Cfg.reward_scales.action_rate = -0.01
224 | Cfg.reward_scales.tracking_contacts_shaped_force = 4.0
225 | Cfg.reward_scales.tracking_contacts_shaped_vel = 4.0
226 | Cfg.reward_scales.dof_pos_limits = -10.0
227 | Cfg.reward_scales.dof_pos = -0.05
228 | Cfg.reward_scales.action_smoothness_1 = -0.1
229 | Cfg.reward_scales.action_smoothness_2 = -0.1
230 | Cfg.reward_scales.dribbling_robot_ball_vel = 0.5
231 | Cfg.reward_scales.dribbling_robot_ball_pos = 4.0
232 | Cfg.reward_scales.dribbling_ball_vel = 4.0
233 | Cfg.reward_scales.dribbling_robot_ball_yaw = 4.0
234 | Cfg.reward_scales.dribbling_ball_vel_norm = 4.0
235 | Cfg.reward_scales.dribbling_ball_vel_angle = 4.0
236 |
237 | Cfg.reward_scales.tracking_lin_vel = 0.0
238 | Cfg.reward_scales.tracking_ang_vel = 0.0
239 | Cfg.reward_scales.lin_vel_z = 0.0
240 | Cfg.reward_scales.ang_vel_xy = 0.0
241 | Cfg.reward_scales.feet_air_time = 0.0
242 |
243 | Cfg.rewards.kappa_gait_probs = 0.07
244 | Cfg.rewards.gait_force_sigma = 100.
245 | Cfg.rewards.gait_vel_sigma = 10.
246 |
247 | Cfg.rewards.reward_container_name = "SoccerRewards"
248 | Cfg.rewards.only_positive_rewards = False
249 | Cfg.rewards.only_positive_rewards_ji22_style = True
250 | Cfg.rewards.sigma_rew_neg = 0.02
251 |
252 | # normalization
253 | Cfg.normalization.friction_range = [0, 1]
254 | Cfg.normalization.ground_friction_range = [0.7, 4.0]
255 | Cfg.terrain.yaw_init_range = 3.14
256 | Cfg.normalization.clip_actions = 10.0
257 |
258 | # reward function (not in use)
259 | Cfg.reward_scales.feet_slip = -0.0
260 | Cfg.reward_scales.jump = 0.0
261 | Cfg.reward_scales.base_height = -0.0
262 | Cfg.reward_scales.feet_impact_vel = -0.0
263 | Cfg.reward_scales.feet_air_time = 0.0
264 |
265 | Cfg.asset.terminate_after_contacts_on = []
266 |
267 | AC_Args.adaptation_labels = []
268 | AC_Args.adaptation_dims = []
269 |
270 | RunnerArgs.save_video_interval = 500
271 |
272 | import wandb
273 | wandb.init(
274 | # set the wandb project where this run will be logged
275 | project="dribbling",
276 |
277 | # track hyperparameters and run metadata
278 | config={
279 | "AC_Args": vars(AC_Args),
280 | "PPO_Args": vars(PPO_Args),
281 | "RunnerArgs": vars(RunnerArgs),
282 | "Cfg": vars(Cfg),
283 | }
284 | )
285 |
286 | device = 'cuda:0'
287 | # device = 'cpu'
288 | env = VelocityTrackingEasyEnv(sim_device=device, headless=False, cfg=Cfg)
289 |
290 | env = HistoryWrapper(env)
291 | runner = Runner(env, device=device)
292 | runner.learn(num_learning_iterations=1000000, init_at_random_ep_len=True, eval_freq=100)
293 |
294 |
295 | if __name__ == '__main__':
296 | from pathlib import Path
297 | from dribblebot import MINI_GYM_ROOT_DIR
298 |
299 | stem = Path(__file__).stem
300 |
301 | # to see the environment rendering, set headless=False
302 | train_go1(headless=False)
303 |
304 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import find_packages
2 | from distutils.core import setup
3 |
4 | setup(
5 | name='dribblebot',
6 | version='1.0.0',
7 | author='Yandong Ji',
8 | license="BSD-3-Clause",
9 | packages=find_packages(),
10 | author_email='ydji1024@gmail.com',
11 | description='Toolkit for deployment of soccer dribbling on the Unitree Go1.',
12 | install_requires=[
13 | 'params-proto==2.10.9',
14 | 'gym==0.18.0',
15 | 'tqdm',
16 | 'matplotlib',
17 | 'numpy==1.23.5',
18 | 'wandb==0.15.0',
19 | 'wandb_osh',
20 | #'moviepy',
21 | 'imageio'
22 | ]
23 | )
24 |
--------------------------------------------------------------------------------