├── LICENSE ├── README.md ├── a1sim ├── __init__.py ├── envs │ ├── __init__.py │ ├── env_builder.py │ ├── env_wrappers │ │ ├── __init__.py │ │ ├── observation_dictionary_to_array_wrapper.py │ │ ├── simple_forward_task.py │ │ ├── simple_openloop.py │ │ └── trajectory_generator_wrapper_env.py │ ├── external │ │ ├── __init__.py │ │ ├── constants.py │ │ └── terrain.py │ ├── gym_envs │ │ ├── __init__.py │ │ └── a1_gym_env.py │ ├── locomotion_gym_config.py │ ├── locomotion_gym_env.py │ ├── prelude_gym_env.py │ ├── sensors │ │ ├── __init__.py │ │ ├── robot_sensors.py │ │ ├── sensor.py │ │ └── space_utils.py │ └── utilities │ │ ├── __init__.py │ │ ├── env_utils.py │ │ └── pose3d.py └── robots │ ├── LieAlgebra.py │ ├── __init__.py │ ├── a1.py │ ├── action_filter.py │ ├── kinematics.py │ ├── laikago_constants.py │ ├── laikago_motor.py │ ├── laikago_pose_utils.py │ ├── minitaur.py │ ├── minitaur_constants.py │ ├── minitaur_motor.py │ ├── minitaur_pose_utils.py │ ├── moving_window_filter.py │ └── robot_config.py ├── approach.gif ├── architecture.png ├── config ├── gait │ ├── experiment.yaml │ ├── ppo.yaml │ ├── reward.yaml │ └── simulation.yaml ├── nav │ ├── bc.json │ └── bcrnn.json └── simulation.yaml ├── implementation.md ├── path.yaml ├── scripts ├── demo_nav.py ├── eval_gait.py ├── eval_nav.py ├── gait │ ├── agent.py │ ├── logger.py │ ├── model.py │ └── wrapper.py ├── nav │ ├── environments.py │ └── interfaces.py ├── path.py ├── train_gait.py ├── train_nav.py └── utils │ ├── convert_dataset.py │ ├── pid.py │ └── split_train_val.py └── setup.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 UT Robot Perception and Learning Lab 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Learning to Walk by Steering: Perceptive Quadrupedal Locomotion in Dynamic Environments 2 | [Mingyo Seo](https://mingyoseo.com), [Ryan Gupta](https://sites.utexas.edu/hcrl/people/), [Yifeng Zhu](https://www.cs.utexas.edu/~yifengz), [Alexy Skoutnev](https://alexyskoutnev.github.io/alexyskoutnev-github.io/index.html), [Luis Sentis](https://sites.google.com/view/lsentis), [Yuke Zhu](https://www.cs.utexas.edu/~yukez) 3 | 4 | [Project](https://ut-austin-rpl.github.io/PRELUDE) | [arXiv](http://arxiv.org/abs/2209.09233) 5 | 6 | ![intro](approach.gif) 7 | 8 | ## Abstract 9 | We tackle the problem of perceptive locomotion in dynamic environments. In this problem, a quadruped robot must exhibit robust and agile walking behaviors in response to environmental clutter and moving obstacles. We present a hierarchical learning framework, named PRELUDE, which decomposes the problem of perceptive locomotion into high-level decision making to predict navigation commands and low-level gait generation to realize the target commands. In this framework, we train the high-level navigation controller with imitation learning on human demonstrations collected on a steerable cart and the low-level gait controller with reinforcement learning (RL). Our method is, therefore, able to acquire complex navigation behaviors from human supervision and discover versatile gaits from trial and error. We demonstrate the effectiveness of our approach in simulation and with hardware experiments. Compared to state-of-the-art RL baselines, our method outperforms them by 38.6% in average distance traversed. 10 | 11 | If you find our work useful in your research, please consider [citing](#citing). 12 | 13 | ## Dependencies 14 | - Python 3.8.5 (recommended) 15 | - [Robomimic](https://github.com/ARISE-Initiative/robomimic/) 16 | - [Tianshou](https://github.com/thu-ml/tianshou/) 17 | - [PyBullet](https://github.com/bulletphysics/bullet3/) 18 | - [PyTorch](https://github.com/pytorch/pytorch) 19 | 20 | ## Installation 21 | Install the environments and dependencies by running the following commands. 22 | ``` 23 | pip3 install -e . 24 | ``` 25 | You need to locate asset files to `./data/*`. These asset files are found [here](https://utexas.box.com/s/oa5c39blv9ma4h4lkdkv84n5zj3mxcg5). 26 | 27 | ## Creating a Dataset for Navigation Controller 28 | 29 | For collecting human demonstration data for Navigation Controller, please use the following commands. You may need a Spacemouse. 30 | ``` 31 | python3 scripts/demo_nav.py --env_type=ENV_TYPE --demo_name=DEMO_NAME 32 | ``` 33 | You may be able to specify the difficulty of environments by changing ENV_TYPE. Collected data would be saved in `./save/data_sim/DEMO_NAME` as `pickle` files. Rendering videos and extra logs would be saved in `./save/raw_sim/DEMO_NAME`. 34 | 35 | To convert the collected data into `hdf5` dataset file, please use the following commands. The converted dataset would be saved in `PATH_TO_TARGET_FILE`. 36 | ``` 37 | python3 scripts/utils/convert_dataset.py --folder=PATH_TO_DATA_FOLDER --demo_path=PATH_TO_TARGET_FILE 38 | ``` 39 | Then, please run the following commands to split the dataset for training and evaluation. The script would overwrite the split dataset on the original dataset file. 40 | ``` 41 | python3 scripts/utils/split_train_val.py --dataset=PATH_TO_TARGET_FILE 42 | ``` 43 | Dataset files consist of sequences of the following data structure. 44 | ``` 45 | hdf5 dataset 46 | ├── agentview_rgb: 212x120x3 array 47 | ├── agentview_depth: 212x120x1 array 48 | ├── yaw: 2D value 49 | ├── actions: 2D value 50 | ├── dones: 1D value 51 | └── rewards: 1D value (not used) 52 | ``` 53 | 54 | ## Training 55 | For training the Gait Controller, please use the following commands. The configuration at `./config/gait/GAIT_CONFIG` would be used for training. Trained files would be saved in `./save/rl_checkpoint/gait/GAIT_POLICY`. 56 | ``` 57 | python3 scripts/train_gait.py --config=GAIT_CONFIG --gait_policy=GAIT_POLICY 58 | ``` 59 | 60 | For training Navigation Controller, please use the following commands. You need to create or download ([link](https://utexas.box.com/s/vuneto210i5o5c8vi09cxt49dta2may3)) an `hdf5`-format Dataset file for training. The configuration at `./config/nav/NAV_CONFIG.json` would be used for training. Trained files would be saved in `./save/bc_checkpoint`. 61 | ``` 62 | python3 scripts/train_nav.py --config=NAV_CONFIG 63 | ``` 64 | 65 | ## Evaluation 66 | You should locate pre-trained data to `./save/*`. These pre-trained data would be released later. 67 | 68 | For evaluating Gait Controller only, please use the following commands. The checkpoints of the Gait Controller at `./save/rl_checkpoint/gait/GAIT_POLICY` would be loaded. 69 | ``` 70 | python3 scripts/eval_gait.py --gait_policy=GAIT_POLICY 71 | ``` 72 | 73 | To evaluate PRELUDE with both Gait Controller and Navigation Controller, please use the following commands. The checkpoints of the Navigation Controller at `./save/bc_checkpoint/NAV_POLICY` would be loaded. 74 | ``` 75 | python3 scripts/eval_nav.py --gait_policy=GAIT_POLICY --nav_policy=NAV_POLICY 76 | ``` 77 | 78 | 79 | ## Dataset and Pre-trained Models 80 | We provide our demonstration dataset in simulation environments ([link](https://utexas.box.com/s/vuneto210i5o5c8vi09cxt49dta2may3)) and trained models of the Navigation Controller ([link](https://utexas.box.com/s/l6n5unyswuol4gxwam552u1jkogbaakq)) and the Gait Controller ([link](https://utexas.box.com/s/uv41n7550t1ao7wv0io0er2s8r2ivu2x)). 81 | 82 | 83 | ## Implementation Details 84 | Please see [this page](implementation.md) for more information about our implementation details, including the model architecture and training procedures. 85 | 86 | ## Citing 87 | ``` 88 | @inproceedings{seo2022prelude, 89 | title={Learning to Walk by Steering: Perceptive Quadrupedal Locomotion in Dynamic Environments}, 90 | author={Seo, Mingyo and Gupta, Ryan and Zhu, Yifeng and Skoutnev, Alexy and Sentis, Luis and Zhu, Yuke}, 91 | booktitle={IEEE International Conference on Robotics and Automation (ICRA)}, 92 | year={2023} 93 | } 94 | ``` 95 | -------------------------------------------------------------------------------- /a1sim/__init__.py: -------------------------------------------------------------------------------- 1 | """Set up gym interface for locomotion environments.""" 2 | from a1sim.envs import * 3 | 4 | def make_env(**kwargs): 5 | return A1GymEnv(**kwargs) -------------------------------------------------------------------------------- /a1sim/envs/__init__.py: -------------------------------------------------------------------------------- 1 | from a1sim.envs.gym_envs import * -------------------------------------------------------------------------------- /a1sim/envs/env_builder.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The Google Research Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Utilities for building environments.""" 16 | from a1sim.envs import locomotion_gym_env 17 | from a1sim.envs import locomotion_gym_config 18 | from a1sim.envs.env_wrappers import observation_dictionary_to_array_wrapper as obs_dict_to_array_wrapper 19 | from a1sim.envs.env_wrappers import trajectory_generator_wrapper_env 20 | from a1sim.envs.env_wrappers import simple_openloop 21 | from a1sim.envs.env_wrappers import simple_forward_task 22 | from a1sim.envs.sensors import robot_sensors 23 | from a1sim.robots import a1 24 | from a1sim.robots import robot_config 25 | 26 | SENSOR_MODE = {"dis":1,"motor":1,"imu":1,"contact":1,"footpose":0} 27 | 28 | def build_regular_env(robot_class, 29 | motor_control_mode, 30 | sensor_mode, 31 | normal=0, 32 | enable_rendering=False, 33 | on_rack=False, 34 | filter = 0, 35 | action_space = 0, 36 | num_action_repeat=13, 37 | action_limit=(0.75, 0.75, 0.75), 38 | wrap_trajectory_generator=True): 39 | 40 | sim_params = locomotion_gym_config.SimulationParameters() 41 | sim_params.enable_rendering = enable_rendering 42 | sim_params.motor_control_mode = motor_control_mode 43 | sim_params.reset_time = 2 44 | sim_params.num_action_repeat = num_action_repeat 45 | sim_params.enable_action_interpolation = False 46 | if filter: 47 | sim_params.enable_action_filter = True 48 | else: 49 | sim_params.enable_action_filter = False 50 | sim_params.enable_clip_motor_commands = False 51 | sim_params.robot_on_rack = on_rack 52 | dt = sim_params.num_action_repeat*sim_params.sim_time_step_s 53 | 54 | gym_config = locomotion_gym_config.LocomotionGymConfig( 55 | simulation_parameters=sim_params) 56 | 57 | sensors = [] 58 | noise = True if ("noise" in sensor_mode and sensor_mode["noise"]) else False 59 | if sensor_mode["dis"]: 60 | sensors.append(robot_sensors.BaseDisplacementSensor(convert_to_local_frame=True,normal=normal,noise=noise)) 61 | if sensor_mode["imu"]==1: 62 | sensors.append(robot_sensors.IMUSensor(channels=["R", "P", "Y","dR", "dP", "dY"],normal=normal,noise=noise)) 63 | elif sensor_mode["imu"]==2: 64 | sensors.append(robot_sensors.IMUSensor(channels=["dR", "dP", "dY"],noise=noise)) 65 | if sensor_mode["motor"]==1: 66 | sensors.append(robot_sensors.MotorAngleAccSensor(num_motors=a1.NUM_MOTORS,normal=normal,noise=noise,dt=dt)) 67 | elif sensor_mode["motor"]==2: 68 | sensors.append(robot_sensors.MotorAngleSensor(num_motors=a1.NUM_MOTORS,noise=noise)) 69 | if sensor_mode["contact"] == 1: 70 | sensors.append(robot_sensors.FootContactSensor()) 71 | elif sensor_mode["contact"] == 2: 72 | sensors.append(robot_sensors.SimpleFootForceSensor()) 73 | if sensor_mode["footpose"]: 74 | sensors.append(robot_sensors.FootPoseSensor(normal=normal)) 75 | 76 | task = simple_forward_task.SimpleForwardTask(None) 77 | 78 | env = locomotion_gym_env.LocomotionGymEnv(gym_config=gym_config, 79 | robot_class=robot_class, 80 | robot_sensors=sensors, 81 | task=task) 82 | 83 | env = obs_dict_to_array_wrapper.ObservationDictionaryToArrayWrapper(env) 84 | 85 | if (motor_control_mode == robot_config.MotorControlMode.POSITION) and wrap_trajectory_generator: 86 | env = trajectory_generator_wrapper_env.TrajectoryGeneratorWrapperEnv( 87 | env, 88 | trajectory_generator=simple_openloop.LaikagoPoseOffsetGenerator( 89 | action_limit=action_limit,action_space=action_space)) 90 | 91 | return env -------------------------------------------------------------------------------- /a1sim/envs/env_wrappers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/PRELUDE/adfa549494d49216914f13e8eadda40693e1e6c9/a1sim/envs/env_wrappers/__init__.py -------------------------------------------------------------------------------- /a1sim/envs/env_wrappers/observation_dictionary_to_array_wrapper.py: -------------------------------------------------------------------------------- 1 | # Third party code 2 | # 3 | # The following code are copied or modified from: 4 | # https://github.com/google-research/motion_imitation 5 | """An env wrapper that flattens the observation dictionary to an array.""" 6 | import gym 7 | from a1sim.envs.utilities import env_utils 8 | 9 | 10 | class ObservationDictionaryToArrayWrapper(gym.Env): 11 | """An env wrapper that flattens the observation dictionary to an array.""" 12 | def __init__(self, gym_env, observation_excluded=()): 13 | """Initializes the wrapper.""" 14 | self.observation_excluded = observation_excluded 15 | self._gym_env = gym_env 16 | self.observation_space = self._flatten_observation_spaces( 17 | self._gym_env.observation_space) 18 | self.action_space = self._gym_env.action_space 19 | 20 | def __getattr__(self, attr): 21 | return getattr(self._gym_env, attr) 22 | 23 | def _flatten_observation_spaces(self, observation_spaces): 24 | flat_observation_space = env_utils.flatten_observation_spaces( 25 | observation_spaces=observation_spaces, 26 | observation_excluded=self.observation_excluded) 27 | return flat_observation_space 28 | 29 | def _flatten_observation(self, input_observation): 30 | """Flatten the dictionary to an array.""" 31 | return env_utils.flatten_observations( 32 | observation_dict=input_observation, 33 | observation_excluded=self.observation_excluded) 34 | 35 | def reset(self): 36 | observation,info = self._gym_env.reset() 37 | return self._flatten_observation(observation),info 38 | 39 | def step(self, action): 40 | """Steps the wrapped environment. 41 | Args: 42 | action: Numpy array. The input action from an NN agent. 43 | Returns: 44 | The tuple containing the flattened observation, the reward, the epsiode 45 | end indicator. 46 | """ 47 | observation_dict, reward, done, _ = self._gym_env.step(action) 48 | return self._flatten_observation(observation_dict), reward, done, _ 49 | 50 | def render(self, mode='rgb_array'): 51 | return self._gym_env.render(mode) -------------------------------------------------------------------------------- /a1sim/envs/env_wrappers/simple_forward_task.py: -------------------------------------------------------------------------------- 1 | # Third party code 2 | # 3 | # The following code are copied or modified from: 4 | # https://github.com/google-research/motion_imitation/blob/master/motion_imitation/envs/env_wrappers/simple_forward_task.py 5 | 6 | """A simple locomotion task and termination condition.""" 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import numpy as np 13 | from a1sim.envs.utilities import pose3d 14 | from pybullet_utils import transformations 15 | 16 | class SimpleForwardTask(object): 17 | """Default empy task.""" 18 | def __init__(self,param): 19 | """Initializes the task.""" 20 | self.current_base_pos = np.zeros(3) 21 | self.last_base_pos = np.zeros(3) 22 | self.param = param 23 | self.last_foot = np.zeros(4) 24 | 25 | def __call__(self, env): 26 | return self.reward(env) 27 | 28 | def reset(self, env): 29 | """Resets the internal state of the task.""" 30 | self._env = env 31 | self.last_base_pos = env.robot.GetBasePosition() 32 | self.current_base_pos = self.last_base_pos 33 | 34 | def update(self, env): 35 | """Updates the internal state of the task.""" 36 | self.last_base_pos = self.current_base_pos 37 | self.current_base_pos = env.robot.GetBasePosition() 38 | 39 | def done(self, env): 40 | """Checks if the episode is over. 41 | 42 | If the robot base becomes unstable (based on orientation), the episode 43 | terminates early. 44 | """ 45 | return False 46 | 47 | 48 | def reward(self, env): 49 | """Get the reward without side effects.""" 50 | vel_reward = self._calc_vel_reward() 51 | return vel_reward 52 | 53 | def _calc_vel_reward(self): 54 | return self.current_base_pos[0] - self.last_base_pos[0] 55 | 56 | 57 | -------------------------------------------------------------------------------- /a1sim/envs/env_wrappers/simple_openloop.py: -------------------------------------------------------------------------------- 1 | # Third party code 2 | # 3 | # The following code are copied or modified from: 4 | # https://github.com/google-research/motion_imitation 5 | 6 | """Simple openloop trajectory generators.""" 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import os 13 | import inspect 14 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 15 | parentdir = os.path.dirname(os.path.dirname(currentdir)) 16 | os.sys.path.insert(0, parentdir) 17 | 18 | import attr 19 | from gym import spaces 20 | import numpy as np 21 | 22 | from a1sim.robots import laikago_pose_utils 23 | from a1sim.robots import minitaur_pose_utils 24 | 25 | class MinitaurPoseOffsetGenerator(object): 26 | """A trajectory generator that return a constant leg pose.""" 27 | 28 | def __init__(self, 29 | init_swing=0, 30 | init_extension=2.0, 31 | init_pose=None, 32 | action_scale=1.0, 33 | action_limit=0.5): 34 | """Initializes the controller. 35 | 36 | Args: 37 | init_swing: the swing of the default pose offset 38 | init_extension: the extension of the default pose offset 39 | init_pose: the default pose offset, which is None by default. If not None, 40 | it will define the default pose offset while ignoring init_swing and 41 | init_extension. 42 | action_scale: changes the magnitudes of actions 43 | action_limit: clips actions 44 | """ 45 | if init_pose is None: 46 | self._pose = np.array( 47 | attr.astuple( 48 | minitaur_pose_utils.MinitaurPose( 49 | swing_angle_0=init_swing, 50 | swing_angle_1=init_swing, 51 | swing_angle_2=init_swing, 52 | swing_angle_3=init_swing, 53 | extension_angle_0=init_extension, 54 | extension_angle_1=init_extension, 55 | extension_angle_2=init_extension, 56 | extension_angle_3=init_extension))) 57 | else: # Ignore init_swing and init_extension 58 | self._pose = np.array(init_pose) 59 | 60 | action_high = np.array([action_limit] * 4) 61 | self.action_space = spaces.Box(-action_high, action_high, dtype=np.float32) 62 | self._action_scale = action_scale 63 | 64 | def reset(self): 65 | pass 66 | 67 | def get_action(self, current_time=None, input_action=None): 68 | """Computes the trajectory according to input time and action. 69 | 70 | Args: 71 | current_time: The time in gym env since reset. 72 | input_action: A numpy array. The input leg pose from a NN controller. 73 | 74 | Returns: 75 | A numpy array. The desired motor angles. 76 | """ 77 | del current_time 78 | return minitaur_pose_utils.leg_pose_to_motor_angles(self._pose + 79 | self._action_scale * 80 | np.array(input_action)) 81 | 82 | def get_observation(self, input_observation): 83 | """Get the trajectory generator's observation.""" 84 | 85 | return input_observation 86 | 87 | 88 | class LaikagoPoseOffsetGenerator(object): 89 | """A trajectory generator that return constant motor angles.""" 90 | def __init__( 91 | self, 92 | init_abduction=laikago_pose_utils.LAIKAGO_DEFAULT_ABDUCTION_ANGLE, 93 | init_hip=laikago_pose_utils.LAIKAGO_DEFAULT_HIP_ANGLE, 94 | init_knee=laikago_pose_utils.LAIKAGO_DEFAULT_KNEE_ANGLE, 95 | action_limit=0.5, 96 | action_space = 0, 97 | ): 98 | """Initializes the controller. 99 | Args: 100 | action_limit: a tuple of [limit_abduction, limit_hip, limit_knee] 101 | """ 102 | self._pose = np.array( 103 | attr.astuple( 104 | laikago_pose_utils.LaikagoPose(abduction_angle_0=init_abduction, 105 | hip_angle_0=init_hip, 106 | knee_angle_0=init_knee, 107 | abduction_angle_1=init_abduction, 108 | hip_angle_1=init_hip, 109 | knee_angle_1=init_knee, 110 | abduction_angle_2=init_abduction, 111 | hip_angle_2=init_hip, 112 | knee_angle_2=init_knee, 113 | abduction_angle_3=init_abduction, 114 | hip_angle_3=init_hip, 115 | knee_angle_3=init_knee))) 116 | # print('pose:',self._pose) 117 | # raise NotImplemented 118 | # action_high = np.array([0.802,3.288,0.88] * 4) 119 | # action_low = np.array([-0.802,-1.94,-0.89]*4) 120 | self.action_mode = action_space 121 | if action_space ==0: 122 | action_high = np.array([0.2,0.7,0.7] *4) 123 | action_low = np.array([-0.2,-0.7,-0.7]*4) 124 | elif action_space==1: 125 | action_high = np.array([0.1,0.5,0.4] * 4) 126 | action_low = np.array([-0.1,-0.3,-0.6]*4) 127 | elif action_space==2: 128 | action_high = np.array([0.1,0.5,0.4,0.1,0.5,0.4,0.1,0.1,0.1,0.1,0.1,0.1]) 129 | action_low = np.array([-0.1,-0.3,-0.6,-0.1,-0.3,-0.6,-0.1,-0.1,-0.1,-0.1,-0.1,-0.1]) 130 | elif action_space==3: 131 | action_high = np.array([0.1,0.7,0.7,0.1,0.7,0.7,0.1,0.1,0.1,0.1,0.1,0.1]) 132 | action_low = np.array([-0.1,-0.7,-0.7,-0.1,-0.7,-0.7,-0.1,-0.1,-0.1,-0.1,-0.1,-0.1]) 133 | 134 | # action_high = np.array([action_limit] * 4).reshape(-1,1) 135 | # print('action_limit:',action_limit) 136 | # print('action_high:',action_high+self._pose) 137 | # print('action_low:',action_low+self._pose) 138 | self.action_space = spaces.Box(action_low, action_high, dtype=np.float32) 139 | # print('action_space_n:',self.action_space.low,self.action_space.high) 140 | # raise NotImplemented 141 | def reset(self): 142 | pass 143 | 144 | def get_action(self, current_time=None, input_action=None): 145 | """Computes the trajectory according to input time and action. 146 | 147 | Args: 148 | current_time: The time in gym env since reset. 149 | input_action: A numpy array. The input leg pose from a NN controller. 150 | 151 | Returns: 152 | A numpy array. The desired motor angles. 153 | """ 154 | del current_time 155 | # print(self._pose) 156 | if self.action_mode <=1: 157 | return self._pose + input_action 158 | elif self.action_mode >= 2: 159 | # print('input_act:',input_action) 160 | new_action = np.zeros(12) 161 | new_action[6:9] = input_action[3:6] 162 | new_action[9:12] = input_action[:3] 163 | new_action += input_action 164 | # print('new_act:',new_action) 165 | return self._pose + new_action 166 | 167 | def get_observation(self, input_observation): 168 | """Get the trajectory generator's observation.""" 169 | 170 | return input_observation 171 | -------------------------------------------------------------------------------- /a1sim/envs/env_wrappers/trajectory_generator_wrapper_env.py: -------------------------------------------------------------------------------- 1 | # Third party code 2 | # 3 | # The following code are copied or modified from: 4 | # https://github.com/google-research/motion_imitation/blob/master/motion_imitation/envs/env_wrappers/trajectory_generator_wrapper_env.py 5 | 6 | 7 | """A wrapped MinitaurGymEnv with a built-in controller.""" 8 | 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | class TrajectoryGeneratorWrapperEnv(object): 14 | """A wrapped LocomotionGymEnv with a built-in trajectory generator.""" 15 | 16 | def __init__(self, gym_env, trajectory_generator): 17 | """Initialzes the wrapped env. 18 | Args: 19 | gym_env: An instance of LocomotionGymEnv. 20 | trajectory_generator: A trajectory_generator that can potentially modify 21 | the action and observation. Typticall generators includes the PMTG and 22 | openloop signals. Expected to have get_action and get_observation 23 | interfaces. 24 | Raises: 25 | ValueError if the controller does not implement get_action and 26 | get_observation. 27 | """ 28 | self._gym_env = gym_env 29 | if not hasattr(trajectory_generator, 'get_action') or not hasattr( 30 | trajectory_generator, 'get_observation'): 31 | raise ValueError( 32 | 'The controller does not have the necessary interface(s) implemented.' 33 | ) 34 | 35 | self._trajectory_generator = trajectory_generator 36 | 37 | # The trajectory generator can subsume the action/observation space. 38 | if hasattr(trajectory_generator, 'observation_space'): 39 | self.observation_space = self._trajectory_generator.observation_space 40 | 41 | if hasattr(trajectory_generator, 'action_space'): 42 | # print('yes!') 43 | self.action_space = self._trajectory_generator.action_space 44 | # print('action_space:',self.action_space) 45 | 46 | def __getattr__(self, attr): 47 | return getattr(self._gym_env, attr) 48 | 49 | def _modify_observation(self, observation): 50 | return self._trajectory_generator.get_observation(observation) 51 | 52 | def reset(self): 53 | if getattr(self._trajectory_generator, 'reset'): 54 | self._trajectory_generator.reset() 55 | observation, info = self._gym_env.reset() 56 | return self._modify_observation(observation)[0], info 57 | 58 | def step(self, action): 59 | """Steps the wrapped environment. 60 | Args: 61 | action: Numpy array. The input action from an NN agent. 62 | Returns: 63 | The tuple containing the modified observation, the reward, the epsiode end 64 | indicator. 65 | Raises: 66 | ValueError if input action is None. 67 | """ 68 | 69 | if action is None: 70 | raise ValueError('Action cannot be None') 71 | 72 | new_action = self._trajectory_generator.get_action( 73 | self._gym_env.robot.GetTimeSinceReset(), action) 74 | 75 | original_observation, reward, done, info = self._gym_env.step(new_action) 76 | 77 | info['real_action'] = new_action 78 | 79 | return self._modify_observation(original_observation)[0], reward, done, info 80 | 81 | def render(self, mode='rgb_array'): 82 | return self._gym_env.render(mode) 83 | -------------------------------------------------------------------------------- /a1sim/envs/external/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/PRELUDE/adfa549494d49216914f13e8eadda40693e1e6c9/a1sim/envs/external/__init__.py -------------------------------------------------------------------------------- /a1sim/envs/external/constants.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python3 2 | 3 | from typing import Any, Sequence, Tuple 4 | from os import path 5 | 6 | 7 | ## Internal function variables ## 8 | 9 | TORQUE = 0 10 | POSITION = 1 11 | VELOCITY = 2 12 | 13 | 14 | ## Visualization modes ## 15 | 16 | FRAME_LOCAL = 0 17 | FRAME_GLOBAL = 1 18 | 19 | 20 | ## Simulation modes ## 21 | 22 | CONTROL_MPC = 0 23 | CONTROL_QP = 1 24 | 25 | TERRAIN_PLANE = 0 26 | TERRAIN_TRAINING = 1 27 | TERRAIN_VALIDATION = 2 28 | TERRAIN_TEST = 3 29 | TERRAIN_ICE = 4 30 | TERRAIN_SAMURAI = 5 31 | TERRAIN_BLOBBED = 6 32 | 33 | OBJECT_EMPTY = 0 34 | OBJECT_BALLS = 1 35 | 36 | ## Simulation configuartion ## 37 | 38 | TIME_STEP = 0.001 39 | REPEAT_INIT = 100 40 | REPEAT_ACTION = 10 41 | NUM_HISTORY = 50 42 | 43 | 44 | ## System configuration ## 45 | 46 | PATH_SRC = path.dirname(path.realpath(__file__)) 47 | PATH_ROOT = path.dirname(PATH_SRC) 48 | PATH_CONFIG = PATH_ROOT+"/config" 49 | PATH_SAVE = PATH_ROOT+"/save" 50 | PATH_DATA = PATH_ROOT+"/data" 51 | PATH_PLOT = PATH_ROOT+"/plot" 52 | 53 | SUBPATH_CONFIG = { "reward": "reward.yaml", 54 | "ppo": "ppo.yaml", 55 | "experiment": "experiment.yaml"} 56 | 57 | 58 | SUBPATH_TERRAIN = { TERRAIN_SAMURAI: "terrains/samurai/samurai.urdf", 59 | TERRAIN_BLOBBED: "terrains/blobbed_terrain/terrain.urdf"} 60 | 61 | 62 | PATH_TERRAIN_TRAINING=PATH_DATA+'/dataset' 63 | PATH_TERRAIN_VALIDATION=PATH_DATA+'/dataset' 64 | 65 | 66 | -------------------------------------------------------------------------------- /a1sim/envs/external/terrain.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python3 2 | 3 | import pickle 4 | import random 5 | import numpy as np 6 | from PIL import Image 7 | from os import path 8 | 9 | TERRAIN_CHOICE = [] 10 | CURRICULUM_CHOICE = {} 11 | 12 | 13 | TERRAIN_PLANE = 0 14 | TERRAIN_TRAINING = 1 15 | TERRAIN_VALIDATION = 2 16 | TERRAIN_TEST = 3 17 | TERRAIN_ICE = 4 18 | TERRAIN_SAMURAI = 5 19 | TERRAIN_BLOBBED = 6 20 | 21 | OBJECT_EMPTY = 0 22 | OBJECT_BALLS = 1 23 | 24 | PATH_SRC = path.dirname(path.realpath(__file__)) 25 | PATH_ROOT = path.dirname(PATH_SRC) 26 | PATH_DATA = PATH_ROOT+"/data" 27 | 28 | SUBPATH_TERRAIN = { TERRAIN_SAMURAI: "terrains/samurai/samurai.urdf", 29 | TERRAIN_BLOBBED: "terrains/blobbed_terrain/terrain.urdf"} 30 | 31 | PATH_TERRAIN_TRAINING=PATH_DATA+'/dataset' 32 | PATH_TERRAIN_VALIDATION=PATH_DATA+'/dataset' 33 | 34 | 35 | def loadTerrain(path): 36 | 37 | import sys 38 | import os 39 | 40 | global TERRAIN_CHOICE 41 | 42 | if len(TERRAIN_CHOICE) == 0: 43 | for root, dirs, files in os.walk(path): 44 | for file in files: 45 | if file.endswith("pkl"): 46 | TERRAIN_CHOICE.append(os.path.join(root,file)) 47 | 48 | if len(CURRICULUM_CHOICE) == 0: 49 | for pathType in os.listdir(path): 50 | for pathCur in os.listdir("{}/{}".format(path,pathType)): 51 | cur = int(pathCur) 52 | 53 | if cur not in CURRICULUM_CHOICE.keys(): 54 | CURRICULUM_CHOICE[cur] = [] 55 | 56 | for root, dirs, files in os.walk("{}/{}/{}".format(path, pathType, pathCur)): 57 | for file in files: 58 | if file.endswith("pkl"): 59 | CURRICULUM_CHOICE[cur].append(os.path.join(root,file)) 60 | 61 | return True 62 | 63 | 64 | # Define objects for the map ## 65 | class modelTerrain(): 66 | 67 | def __init__(self, pybullet, typeTerrain=TERRAIN_PLANE, curriculum=None): 68 | 69 | self.sim = pybullet 70 | self.typeTerrain = typeTerrain 71 | self.measurement = None 72 | self.curriculum = curriculum 73 | 74 | if self.typeTerrain in [TERRAIN_SAMURAI, TERRAIN_BLOBBED]: 75 | offsetHeight = 0 76 | self.idTerrain = self.sim.loadURDF(SUBPATH_TERRAIN[typeTerrain], [0.0, 0.0, 0.0], useFixedBase=1) 77 | 78 | self.sim.resetBasePositionAndOrientation(self.idTerrain,[0 ,0,offsetHeight], [0,0,0,1]) 79 | self.sim.changeDynamics(self.idTerrain, -1, lateralFriction=1.0) 80 | 81 | elif self.typeTerrain == TERRAIN_TEST: 82 | self.numHeightfieldRows = 256 83 | self.numHeightfieldColumns = 256 84 | self.raHeightField = [[0 for j in range(self.numHeightfieldRows)] for i in range (self.numHeightfieldColumns)] 85 | 86 | heightPerturbationRange = random.uniform(0,0.1) # 0.06 87 | 88 | for i in range (int(self.numHeightfieldColumns/2)): 89 | for j in range (int(self.numHeightfieldRows/2) ): 90 | height = random.uniform(0,heightPerturbationRange) 91 | self.raHeightField[2*i][2*j]=height 92 | self.raHeightField[2*i+1][2*j]=height 93 | self.raHeightField[2*i][2*j+1]=height 94 | self.raHeightField[2*i+1][2*j+1]=height 95 | 96 | raFlattenHeightField = [] 97 | for raColumnHeightField in self.raHeightField: 98 | raFlattenHeightField += raColumnHeightField 99 | 100 | # offsetHeight = 0.5*(max(raFlattenHeightField)+min(raFlattenHeightField)) - max([0,heightPerturbationRange -0.04]) 101 | # offsetHeight = 0.5*(max(raFlattenHeightField)+min(raFlattenHeightField) - heightPerturbationRange) 102 | offsetHeight = 0 103 | 104 | shapeTerrain = self.sim.createCollisionShape(shapeType = self.sim.GEOM_HEIGHTFIELD, meshScale=[.05,.05,1], heightfieldTextureScaling=1, heightfieldData=raFlattenHeightField, numHeightfieldRows=self.numHeightfieldRows, numHeightfieldColumns=self.numHeightfieldColumns) 105 | self.idTerrain = self.sim.createMultiBody(0, shapeTerrain) 106 | 107 | self.sim.resetBasePositionAndOrientation(self.idTerrain,[6 ,0,offsetHeight], [0,0,0,1]) 108 | self.sim.changeDynamics(self.idTerrain, -1, lateralFriction=1.0) 109 | 110 | elif self.typeTerrain == TERRAIN_ICE: 111 | 112 | offsetHeight = 0 113 | 114 | shapePlane = self.sim.createCollisionShape(shapeType = self.sim.GEOM_PLANE) 115 | 116 | raSizeBox = [0.15, 0.15 , 0.001] 117 | raShiftAng = [0, 0, 0, 1] 118 | raColorRGB = [0, 1, 1, 1] 119 | raColorSpecular = [0.4, .4, 0] 120 | 121 | idVisualShape = self.sim.createVisualShape( shapeType=self.sim.GEOM_BOX, halfExtents=raSizeBox, 122 | rgbaColor=raColorRGB, specularColor=raColorSpecular) 123 | idCollisionShape = self.sim.createCollisionShape(shapeType=self.sim.GEOM_BOX, halfExtents=raSizeBox) 124 | 125 | for i in range(5): 126 | for j in range(5): 127 | 128 | idIce = self.sim.createMultiBody(0, idCollisionShape, idVisualShape, [i -2, j-2, 0.0005], raShiftAng) 129 | self.sim.changeDynamics(idIce, -1, lateralFriction=0.01) 130 | 131 | self.idTerrain = self.sim.createMultiBody(0, shapePlane) 132 | 133 | self.sim.resetBasePositionAndOrientation(self.idTerrain,[0 ,0,offsetHeight], [0,0,0,1]) 134 | self.sim.changeDynamics(self.idTerrain, -1, lateralFriction=1.0) 135 | 136 | elif self.typeTerrain in [TERRAIN_TRAINING, TERRAIN_VALIDATION]: 137 | 138 | 139 | if self.curriculum == None: 140 | path = random.choice(TERRAIN_CHOICE) 141 | 142 | else: 143 | if self.curriculum in CURRICULUM_CHOICE.keys(): 144 | path = random.choice(CURRICULUM_CHOICE[self.curriculum]) 145 | else: 146 | path = random.choice(TERRAIN_CHOICE) 147 | 148 | # path = TERRAIN_CHOICE[115] 149 | 150 | with open(path, "rb") as f: 151 | choice = pickle.load(f) 152 | 153 | self.raHeightField = choice['height'] 154 | self.raCostField = choice['cost'] 155 | self.numHeightfieldRows = len(self.raHeightField[0]) 156 | self.numHeightfieldColumns = len(self.raHeightField) 157 | self.raMeshScale = [.05,.05, 1] 158 | 159 | raFlattenHeightField = [] 160 | for raColumnHeightField in self.raHeightField: 161 | raFlattenHeightField += raColumnHeightField 162 | 163 | offsetHeight = 0.5 * (np.max(raFlattenHeightField) + np.min(raFlattenHeightField)) 164 | # offsetHeight = np.average(raFlattenHeightField) 165 | self.shapeTerrain = self.sim.createCollisionShape(shapeType = self.sim.GEOM_HEIGHTFIELD, meshScale=self.raMeshScale, heightfieldTextureScaling=1, heightfieldData=raFlattenHeightField, numHeightfieldRows=self.numHeightfieldRows, numHeightfieldColumns=self.numHeightfieldColumns) 166 | self.idTerrain = self.sim.createMultiBody(0, self.shapeTerrain) 167 | 168 | self.sim.resetBasePositionAndOrientation(self.idTerrain,[0 ,0,offsetHeight], [0,0,0,1]) 169 | self.sim.changeDynamics(self.idTerrain, -1, lateralFriction=1.0) 170 | 171 | else: 172 | offsetHeight = 0 173 | shapePlane = self.sim.createCollisionShape(shapeType = self.sim.GEOM_PLANE) 174 | self.idTerrain = self.sim.createMultiBody(0, shapePlane) 175 | 176 | self.sim.resetBasePositionAndOrientation(self.idTerrain,[0 ,0,offsetHeight], [0,0,0,1]) 177 | self.sim.changeDynamics(self.idTerrain, -1, lateralFriction=1.0) 178 | 179 | 180 | def resetTerrain(self): 181 | 182 | if self.typeTerrain in [TERRAIN_TRAINING, TERRAIN_VALIDATION]: 183 | 184 | self.raHeightField, self.raCostField = random.choice(TERRAIN_CHOICE) 185 | self.numHeightfieldRows = len(self.raHeightField[0]) 186 | self.numHeightfieldColumns = len(self.raHeightField) 187 | self.raMeshScale = [.02,.02,1] 188 | 189 | raFlattenHeightField = [] 190 | for raColumnHeightField in self.raHeightField: 191 | raFlattenHeightField += raColumnHeightField 192 | 193 | offsetHeight = np.average(raFlattenHeightField) 194 | self.sim.createCollisionShape(shapeType = self.sim.GEOM_HEIGHTFIELD, meshScale=self.raMeshScale, heightfieldTextureScaling=1, heightfieldData=raFlattenHeightField, numHeightfieldRows=self.numHeightfieldRows, numHeightfieldColumns=self.numHeightfieldColumns, replaceHeightfieldIndex = self.shapeTerrain) 195 | 196 | self.sim.resetBasePositionAndOrientation(self.idTerrain,[6,0, offsetHeight], [0,0,0,1]) 197 | 198 | return 199 | 200 | 201 | 202 | class heightmap(): 203 | 204 | def __init__(self, raHeightField, raCostField, valResRef, valResOut, raOutRange): 205 | 206 | self.matHeightField = np.array(raHeightField) 207 | self.matCostField = np.array(raCostField) 208 | self.valResRef = valResRef 209 | self.valResOut = valResOut 210 | self.raOutRange = raOutRange 211 | self.valMapOffset = 6.0 212 | 213 | self.numColHeightField = len(raHeightField) 214 | self.numRowHeightField = len(raHeightField[0]) 215 | 216 | self.numColOut = int(self.raOutRange[1]/self.valResOut)+1 217 | self.numRowOut = int(self.raOutRange[0]/self.valResOut)+1 218 | 219 | 220 | def sampleTerrain(self, raPos, valYaw): 221 | 222 | idxRowOffset = int((raPos[0] - self.valMapOffset)/self.valResOut) + int(0.5*(self.numRowHeightField-1)) - int(0.5*(self.numRowOut-1)) 223 | idxColOffset = int(raPos[1]/self.valResOut) + int(0.5*(self.numColHeightField-1)) - int(0.5*(self.numColOut-1)) 224 | 225 | idxColStart = max(idxColOffset, 0) 226 | idxColTerm = min(idxColOffset + self.numColOut, self.numColHeightField) 227 | 228 | idxRowStart = max(idxRowOffset, 0) 229 | idxRowTerm = min(idxRowOffset + self.numRowOut, self.numRowHeightField) 230 | 231 | raHeightMap = np.zeros((self.numColOut, self.numRowOut)) 232 | raHeightMap[idxColStart- idxColOffset:idxColTerm - idxColOffset,idxRowStart- idxRowOffset:idxRowTerm- idxRowOffset] = self.matHeightField[idxColStart:idxColTerm,idxRowStart:idxRowTerm] 233 | 234 | return raHeightMap.tolist() 235 | 236 | 237 | def checkAffordance(self, raPos): 238 | 239 | idxRow = int((raPos[0] - self.valMapOffset)/self.valResOut) + int(0.5*(self.numRowHeightField-1)) 240 | idxCol = int(raPos[1]/self.valResOut) + int(0.5*(self.numColHeightField-1)) 241 | 242 | return self.matCostField[idxCol,idxRow] 243 | -------------------------------------------------------------------------------- /a1sim/envs/gym_envs/__init__.py: -------------------------------------------------------------------------------- 1 | """Setup such that environment can be created using gym.make().""" 2 | from a1sim.envs.gym_envs.a1_gym_env import A1GymEnv -------------------------------------------------------------------------------- /a1sim/envs/gym_envs/a1_gym_env.py: -------------------------------------------------------------------------------- 1 | # Third party code 2 | # 3 | # The following code are copied or modified from: 4 | # https://github.com/google-research/motion_imitation 5 | import gym 6 | from a1sim.envs import env_builder 7 | from a1sim.robots import a1 8 | from a1sim.robots import robot_config 9 | from a1sim.envs.prelude_gym_env import Prelude_Wrapper, REWARD_PARAM_DICT, RANDOM_PARAM_DICT, DYNAMIC_PARAM_DICT 10 | 11 | import numpy as np 12 | 13 | class A1Hybrid(a1.A1): 14 | 15 | def __init__(self, 16 | pybullet_client, 17 | # urdf_filename=None, 18 | enable_clip_motor_commands=False, 19 | time_step=0.001, 20 | action_repeat=10, 21 | sensors=None, 22 | control_latency=0.002, 23 | on_rack=False, 24 | enable_action_interpolation=True, 25 | enable_action_filter=False, 26 | motor_control_mode=None, 27 | reset_time=1, 28 | allow_knee_contact=False, 29 | ): 30 | 31 | self._pos_commands = np.zeros(12) 32 | self._tor_commands = np.zeros(12) 33 | self._hybrid_modes = np.ones(12) 34 | 35 | super().__init__( 36 | pybullet_client, 37 | enable_clip_motor_commands=enable_clip_motor_commands, 38 | time_step=time_step, 39 | action_repeat=action_repeat, 40 | sensors=sensors, 41 | control_latency=control_latency, 42 | on_rack=on_rack, 43 | enable_action_interpolation=True, 44 | enable_action_filter=False, 45 | motor_control_mode=motor_control_mode, 46 | reset_time=reset_time, 47 | allow_knee_contact=allow_knee_contact, 48 | ) 49 | 50 | 51 | def SetDirectCommands(self, pos_commands, tor_commands, hybrid_modes): 52 | self._pos_commands = pos_commands 53 | self._tor_commands = tor_commands 54 | self._hybrid_modes = hybrid_modes 55 | 56 | def ApplyAction(self, motor_commands, control_mode): 57 | """Apply the motor commands using the motor model. 58 | 59 | Args: 60 | motor_commands: np.array. Can be motor angles, torques, hybrid commands, 61 | or motor pwms (for Minitaur only). 62 | motor_control_mode: A MotorControlMode enum. 63 | """ 64 | self.last_action_time = self._state_action_counter * self.time_step 65 | 66 | q, qdot = self._GetPDObservation() 67 | qdot_true = self.GetTrueMotorVelocities() 68 | 69 | motor_init = np.array([0, 0.9, -1.8] * 4) 70 | 71 | pos_commands = self._hybrid_modes * self._motor_direction * (self._pos_commands + self._motor_offset + motor_init) + (1-self._hybrid_modes) * q 72 | tor_commands = (1-self._hybrid_modes) * self._motor_direction * self._tor_commands 73 | 74 | pos_actual_torque, pos_observed_torque = self._motor_model.convert_to_torque( 75 | pos_commands, q, qdot, qdot_true, control_mode) 76 | 77 | tor_actual_torque = self._motor_model._strength_ratios * tor_commands 78 | tor_observed_torque = self._motor_model._strength_ratios * tor_commands 79 | 80 | actual_torque = self._hybrid_modes * pos_actual_torque + (1-self._hybrid_modes) * tor_actual_torque 81 | observed_torque = self._hybrid_modes * pos_observed_torque + (1-self._hybrid_modes) * tor_observed_torque 82 | 83 | # May turn off the motor 84 | self._ApplyOverheatProtection(actual_torque) 85 | 86 | # The torque is already in the observation space because we use 87 | # GetMotorAngles and GetMotorVelocities. 88 | self._observed_motor_torques = observed_torque 89 | 90 | # Transform into the motor space when applying the torque. 91 | self._applied_motor_torque = np.multiply(actual_torque, 92 | self._motor_direction) 93 | motor_ids = [] 94 | motor_torques = [] 95 | 96 | for motor_id, motor_torque, motor_enabled in zip( 97 | self._motor_id_list, self._applied_motor_torque, 98 | self._motor_enabled_list): 99 | if motor_enabled: 100 | motor_ids.append(motor_id) 101 | motor_torques.append(motor_torque) 102 | else: 103 | motor_ids.append(motor_id) 104 | motor_torques.append(0) 105 | self._SetMotorTorqueByIds(motor_ids, motor_torques) 106 | 107 | return motor_torques 108 | 109 | 110 | class A1GymEnv(gym.Env): 111 | """A1 environment that supports the gym interface.""" 112 | metadata = {'render.modes': ['rgb_array']} 113 | 114 | def __init__(self, 115 | action_limit=(0.75, 0.75, 0.75), 116 | render=False, 117 | on_rack=False, 118 | sensor_mode = env_builder.SENSOR_MODE, 119 | normal = 0, 120 | filter_ = 0, 121 | action_space =0, 122 | reward_param = REWARD_PARAM_DICT, 123 | random_param = RANDOM_PARAM_DICT, 124 | dynamic_param = DYNAMIC_PARAM_DICT, 125 | num_history=48, 126 | terrain=None, 127 | cmd=None, 128 | control_mode='policy', 129 | **kwargs): 130 | 131 | if control_mode == 'mpc': 132 | num_action_repeat=5 133 | robot_class=A1Hybrid 134 | else: 135 | num_action_repeat=13 136 | robot_class=a1.A1 137 | 138 | self._env = env_builder.build_regular_env( 139 | robot_class, 140 | motor_control_mode = robot_config.MotorControlMode.POSITION, 141 | normal=normal, 142 | num_action_repeat=num_action_repeat, 143 | enable_rendering=render, 144 | action_limit=action_limit, 145 | sensor_mode = sensor_mode, 146 | filter = filter_, 147 | action_space = action_space, 148 | on_rack=on_rack) 149 | 150 | self.render = self._env.render 151 | 152 | self._env = Prelude_Wrapper( 153 | env = self._env, 154 | reward_param = reward_param, 155 | sensor_mode = sensor_mode, 156 | random_param = random_param, 157 | dynamic_param = dynamic_param, 158 | num_action_repeat=num_action_repeat, 159 | num_history=num_history, 160 | terrain=terrain, 161 | cmd=cmd, 162 | ) 163 | self.observation_space = self._env.observation_space 164 | self.action_space = self._env.action_space 165 | self.sensor_mode =sensor_mode 166 | 167 | def step(self, action,**kwargs): 168 | return self._env.step(action,**kwargs) 169 | 170 | def reset(self,**kwargs): 171 | return self._env.reset(**kwargs) 172 | 173 | def close(self): 174 | self._env.close() 175 | 176 | def __getattr__(self, attr): 177 | return getattr(self._env, attr) -------------------------------------------------------------------------------- /a1sim/envs/locomotion_gym_config.py: -------------------------------------------------------------------------------- 1 | # Third party code 2 | # 3 | # The following code are copied or modified from: 4 | # https://github.com/google-research/motion_imitation 5 | 6 | """A gin-config class for locomotion_gym_env. 7 | 8 | This should be identical to locomotion_gym_config.proto. 9 | """ 10 | import attr 11 | import typing 12 | from a1sim.robots import robot_config 13 | 14 | 15 | @attr.s 16 | class SimulationParameters(object): 17 | """Parameters specific for the pyBullet simulation.""" 18 | sim_time_step_s = attr.ib(type=float, default=0.002) 19 | num_action_repeat = attr.ib(type=int, default=13) 20 | enable_hard_reset = attr.ib(type=bool, default=False) 21 | enable_rendering = attr.ib(type=bool, default=False) 22 | enable_rendering_gui = attr.ib(type=bool, default=True) 23 | robot_on_rack = attr.ib(type=bool, default=False) 24 | camera_distance = attr.ib(type=float, default=1.0) 25 | camera_yaw = attr.ib(type=float, default=0) 26 | camera_pitch = attr.ib(type=float, default=-30) 27 | render_width = attr.ib(type=int, default=480) 28 | render_height = attr.ib(type=int, default=360) 29 | egl_rendering = attr.ib(type=bool, default=False) 30 | motor_control_mode = attr.ib(type=int, 31 | default=robot_config.MotorControlMode.POSITION) 32 | reset_time = attr.ib(type=float, default=-1) 33 | enable_action_filter = attr.ib(type=bool, default=True) 34 | enable_action_interpolation = attr.ib(type=bool, default=True) 35 | allow_knee_contact = attr.ib(type=bool, default=False) 36 | enable_clip_motor_commands = attr.ib(type=bool, default=True) 37 | 38 | 39 | @attr.s 40 | class ScalarField(object): 41 | """A named scalar space with bounds.""" 42 | name = attr.ib(type=str) 43 | upper_bound = attr.ib(type=float) 44 | lower_bound = attr.ib(type=float) 45 | 46 | 47 | @attr.s 48 | class LocomotionGymConfig(object): 49 | """Grouped Config Parameters for LocomotionGym.""" 50 | simulation_parameters = attr.ib(type=SimulationParameters) 51 | log_path = attr.ib(type=typing.Text, default=None) 52 | profiling_path = attr.ib(type=typing.Text, default=None) 53 | -------------------------------------------------------------------------------- /a1sim/envs/sensors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/PRELUDE/adfa549494d49216914f13e8eadda40693e1e6c9/a1sim/envs/sensors/__init__.py -------------------------------------------------------------------------------- /a1sim/envs/sensors/sensor.py: -------------------------------------------------------------------------------- 1 | # Third party code 2 | # 3 | # The following code are copied or modified from: 4 | # https://github.com/google-research/motion_imitation 5 | 6 | """A sensor prototype class. 7 | """ 8 | 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | import numpy as np 14 | import typing 15 | 16 | 17 | _ARRAY = typing.Iterable[float] # pylint: disable=invalid-name 18 | _FLOAT_OR_ARRAY = typing.Union[float, _ARRAY] # pylint: disable=invalid-name 19 | _DATATYPE_LIST = typing.Iterable[typing.Any] # pylint: disable=invalid-name 20 | 21 | 22 | class Sensor(object): 23 | """A prototype class of sensors.""" 24 | 25 | def __init__(self, 26 | name: typing.Text): 27 | """A basic constructor of the sensor. 28 | 29 | This initialized a robot as none. This instance may be regularly updated 30 | by the environment, when it resets the simulation environment. 31 | 32 | Args: 33 | name: the name of the sensor 34 | """ 35 | self._robot = None 36 | self._name = name 37 | 38 | def get_name(self) -> typing.Text: 39 | return self._name 40 | 41 | def get_dtype(self): 42 | pass 43 | 44 | def get_observation_datatype(self): 45 | """Returns the data type for the numpy structured array. 46 | 47 | It is recommended to define a list of tuples: (name, datatype, shape) 48 | Reference: https://docs.scipy.org/doc/numpy-1.15.0/user/basics.rec.html 49 | Ex: 50 | return [('motor_angles', np.float64, (8, ))] # motor angle sensor 51 | return [('IMU_x', np.float64), ('IMU_z', np.float64), ] # IMU 52 | 53 | Returns: 54 | datatype: a list of data types. 55 | """ 56 | pass 57 | 58 | def get_lower_bound(self): 59 | """Returns the lower bound of the observation. 60 | 61 | 62 | Returns: 63 | lower_bound: the lower bound of sensor values in np.array format 64 | """ 65 | pass 66 | 67 | def get_upper_bound(self): 68 | """Returns the upper bound of the observation. 69 | 70 | Returns: 71 | upper_bound: the upper bound of sensor values in np.array format 72 | """ 73 | pass 74 | 75 | def get_observation(self): 76 | """Returns the observation data. 77 | 78 | Returns: 79 | observation: the observed sensor values in np.array format 80 | """ 81 | pass 82 | 83 | def set_robot(self, robot): 84 | """Set a robot instance.""" 85 | self._robot = robot 86 | 87 | def get_robot(self): 88 | """Returns the robot instance.""" 89 | return self._robot 90 | 91 | def on_reset(self, env): 92 | """A callback function for the reset event. 93 | 94 | Args: 95 | env: the environment who invokes this callback function. 96 | """ 97 | pass 98 | 99 | def on_step(self, env): 100 | """A callback function for the step event. 101 | 102 | Args: 103 | env: the environment who invokes this callback function. 104 | """ 105 | pass 106 | 107 | def on_terminate(self, env): 108 | """A callback function for the terminate event. 109 | 110 | Args: 111 | env: the environment who invokes this callback function. 112 | """ 113 | pass 114 | 115 | 116 | class BoxSpaceSensor(Sensor): 117 | """A prototype class of sensors with Box shapes.""" 118 | 119 | def __init__(self, 120 | name: typing.Text, 121 | shape: typing.Tuple[int, ...], 122 | lower_bound: _FLOAT_OR_ARRAY = -np.pi, 123 | upper_bound: _FLOAT_OR_ARRAY = np.pi, 124 | dtype=np.float64) -> None: 125 | """Constructs a box type sensor. 126 | 127 | Args: 128 | name: the name of the sensor 129 | shape: the shape of the sensor values 130 | lower_bound: the lower_bound of sensor value, in float or np.array. 131 | upper_bound: the upper_bound of sensor value, in float or np.array. 132 | dtype: data type of sensor value 133 | """ 134 | super(BoxSpaceSensor, self).__init__(name) 135 | 136 | self._shape = shape 137 | self._dtype = dtype 138 | 139 | if isinstance(lower_bound, (float, int)): 140 | self._lower_bound = np.full(shape, lower_bound, dtype=dtype) 141 | else: 142 | self._lower_bound = np.array(lower_bound) 143 | 144 | if isinstance(upper_bound, (float, int)): 145 | self._upper_bound = np.full(shape, upper_bound, dtype=dtype) 146 | else: 147 | self._upper_bound = np.array(upper_bound) 148 | 149 | def get_shape(self) -> typing.Tuple[int, ...]: 150 | return self._shape 151 | 152 | def get_dimension(self) -> int: 153 | return len(self._shape) 154 | 155 | def get_dtype(self): 156 | pass 157 | 158 | def get_observation_datatype(self) -> _DATATYPE_LIST: 159 | """Returns box-shape data type.""" 160 | return [(self._name, self._dtype, self._shape)] 161 | 162 | def get_lower_bound(self) -> _ARRAY: 163 | """Returns the computed lower bound.""" 164 | return self._lower_bound 165 | 166 | def get_upper_bound(self) -> _ARRAY: 167 | """Returns the computed upper bound.""" 168 | return self._upper_bound 169 | 170 | def _get_observation(self) -> _ARRAY: 171 | """Returns raw observation""" 172 | raise NotImplementedError() 173 | 174 | def get_observation(self) -> np.ndarray: 175 | return np.asarray(self._get_observation(), dtype=self._dtype) 176 | -------------------------------------------------------------------------------- /a1sim/envs/sensors/space_utils.py: -------------------------------------------------------------------------------- 1 | # Third party code 2 | # 3 | # The following code are copied or modified from: 4 | # https://github.com/google-research/motion_imitation 5 | 6 | """Converts a list of sensors to gym space.""" 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import os 13 | import inspect 14 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 15 | parentdir = os.path.dirname(os.path.dirname(currentdir)) 16 | os.sys.path.insert(0, parentdir) 17 | 18 | import gym 19 | from gym import spaces 20 | import numpy as np 21 | import typing 22 | 23 | from a1sim.envs.sensors import sensor 24 | 25 | 26 | class UnsupportedConversionError(Exception): 27 | """An exception when the function cannot convert sensors to the gym space.""" 28 | 29 | 30 | class AmbiguousDataTypeError(Exception): 31 | """An exception when the function cannot determine the data type.""" 32 | 33 | 34 | def convert_sensors_to_gym_space( 35 | sensors: typing.List[sensor.Sensor]) -> gym.Space: 36 | """Convert a list of sensors to the corresponding gym space. 37 | 38 | Args: 39 | sensors: a list of the current sensors 40 | 41 | Returns: 42 | space: the converted gym space 43 | 44 | Raises: 45 | UnsupportedConversionError: raises when the function cannot convert the 46 | given list of sensors. 47 | """ 48 | 49 | if all([ 50 | isinstance(s, sensor.BoxSpaceSensor) and s.get_dimension() == 1 51 | for s in sensors 52 | ]): 53 | return convert_1d_box_sensors_to_gym_space(sensors) 54 | raise UnsupportedConversionError('sensors = ' + str(sensors)) 55 | 56 | 57 | def convert_1d_box_sensors_to_gym_space( 58 | sensors: typing.List[sensor.Sensor]) -> gym.Space: 59 | """Convert a list of 1D BoxSpaceSensors to the corresponding gym space. 60 | 61 | Args: 62 | sensors: a list of the current sensors 63 | 64 | Returns: 65 | space: the converted gym space 66 | 67 | Raises: 68 | UnsupportedConversionError: raises when the function cannot convert the 69 | given list of sensors. 70 | AmbiguousDataTypeError: raises when the function cannot determine the 71 | data types because they are not uniform. 72 | """ 73 | # Check if all sensors are 1D BoxSpaceSensors 74 | if not all([ 75 | isinstance(s, sensor.BoxSpaceSensor) and s.get_dimension() == 1 76 | for s in sensors 77 | ]): 78 | raise UnsupportedConversionError('sensors = ' + str(sensors)) 79 | 80 | # Check if all sensors have the same data type 81 | sensor_dtypes = [s.get_dtype() for s in sensors] 82 | if sensor_dtypes.count(sensor_dtypes[0]) != len(sensor_dtypes): 83 | raise AmbiguousDataTypeError('sensor datatypes are inhomogeneous') 84 | 85 | lower_bound = np.concatenate([s.get_lower_bound() for s in sensors]) 86 | upper_bound = np.concatenate([s.get_upper_bound() for s in sensors]) 87 | observation_space = spaces.Box(np.array(lower_bound), 88 | np.array(upper_bound), 89 | dtype=np.float32) 90 | return observation_space 91 | 92 | 93 | def convert_sensors_to_gym_space_dictionary( 94 | sensors: typing.List[sensor.Sensor]) -> gym.Space: 95 | """Convert a list of sensors to the corresponding gym space dictionary. 96 | 97 | Args: 98 | sensors: a list of the current sensors 99 | 100 | Returns: 101 | space: the converted gym space dictionary 102 | 103 | Raises: 104 | UnsupportedConversionError: raises when the function cannot convert the 105 | given list of sensors. 106 | """ 107 | gym_space_dict = {} 108 | for s in sensors: 109 | if isinstance(s, sensor.BoxSpaceSensor): 110 | gym_space_dict[s.get_name()] = spaces.Box(np.array(s.get_lower_bound()), 111 | np.array(s.get_upper_bound()), 112 | dtype=np.float32) 113 | else: 114 | raise UnsupportedConversionError('sensors = ' + str(sensors)) 115 | return spaces.Dict(gym_space_dict) 116 | -------------------------------------------------------------------------------- /a1sim/envs/utilities/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/PRELUDE/adfa549494d49216914f13e8eadda40693e1e6c9/a1sim/envs/utilities/__init__.py -------------------------------------------------------------------------------- /a1sim/envs/utilities/env_utils.py: -------------------------------------------------------------------------------- 1 | # Third party code 2 | # 3 | # The following code are copied or modified from: 4 | # https://github.com/google-research/motion_imitation 5 | 6 | """Utility functions to manipulate environment.""" 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import collections 13 | from gym import spaces 14 | import numpy as np 15 | 16 | def flatten_observations(observation_dict, observation_excluded=()): 17 | """Flattens the observation dictionary to an array. 18 | 19 | If observation_excluded is passed in, it will still return a dictionary, 20 | which includes all the (key, observation_dict[key]) in observation_excluded, 21 | and ('other': the flattened array). 22 | 23 | Args: 24 | observation_dict: A dictionary of all the observations. 25 | observation_excluded: A list/tuple of all the keys of the observations to be 26 | ignored during flattening. 27 | 28 | Returns: 29 | An array or a dictionary of observations based on whether 30 | observation_excluded is empty. 31 | """ 32 | if not isinstance(observation_excluded, (list, tuple)): 33 | observation_excluded = [observation_excluded] 34 | observations = [] 35 | for key, value in observation_dict.items(): 36 | if key not in observation_excluded: 37 | observations.append(np.asarray(value).flatten()) 38 | flat_observations = np.concatenate(observations) 39 | if not observation_excluded: 40 | return (flat_observations,observation_dict) 41 | else: 42 | observation_dict_after_flatten = {"other": flat_observations} 43 | for key in observation_excluded: 44 | observation_dict_after_flatten[key] = observation_dict[key] 45 | return collections.OrderedDict( 46 | sorted(list(observation_dict_after_flatten.items()))) 47 | 48 | 49 | def flatten_observation_spaces(observation_spaces, observation_excluded=()): 50 | """Flattens the dictionary observation spaces to gym.spaces.Box. 51 | 52 | If observation_excluded is passed in, it will still return a dictionary, 53 | which includes all the (key, observation_spaces[key]) in observation_excluded, 54 | and ('other': the flattened Box space). 55 | 56 | Args: 57 | observation_spaces: A dictionary of all the observation spaces. 58 | observation_excluded: A list/tuple of all the keys of the observations to be 59 | ignored during flattening. 60 | 61 | Returns: 62 | A box space or a dictionary of observation spaces based on whether 63 | observation_excluded is empty. 64 | """ 65 | if not isinstance(observation_excluded, (list, tuple)): 66 | observation_excluded = [observation_excluded] 67 | lower_bound = [] 68 | upper_bound = [] 69 | for key, value in observation_spaces.spaces.items(): 70 | if key not in observation_excluded: 71 | lower_bound.append(np.asarray(value.low).flatten()) 72 | upper_bound.append(np.asarray(value.high).flatten()) 73 | lower_bound = np.concatenate(lower_bound) 74 | upper_bound = np.concatenate(upper_bound) 75 | observation_space = spaces.Box( 76 | np.array(lower_bound), np.array(upper_bound), dtype=np.float32) 77 | if not observation_excluded: 78 | return observation_space 79 | else: 80 | observation_spaces_after_flatten = {"other": observation_space} 81 | for key in observation_excluded: 82 | observation_spaces_after_flatten[key] = observation_spaces[key] 83 | return spaces.Dict(observation_spaces_after_flatten) 84 | -------------------------------------------------------------------------------- /a1sim/envs/utilities/pose3d.py: -------------------------------------------------------------------------------- 1 | # Third party code 2 | # 3 | # The following code are copied or modified from: 4 | # https://github.com/google-research/motion_imitation 5 | """Utilities for 3D pose conversion.""" 6 | import math 7 | import numpy as np 8 | 9 | from pybullet_utils import transformations 10 | 11 | VECTOR3_0 = np.zeros(3, dtype=np.float64) 12 | VECTOR3_1 = np.ones(3, dtype=np.float64) 13 | VECTOR3_X = np.array([1, 0, 0], dtype=np.float64) 14 | VECTOR3_Y = np.array([0, 1, 0], dtype=np.float64) 15 | VECTOR3_Z = np.array([0, 0, 1], dtype=np.float64) 16 | 17 | # QUATERNION_IDENTITY is the multiplicative identity 1.0 + 0i + 0j + 0k. 18 | # When interpreted as a rotation, it is the identity rotation. 19 | QUATERNION_IDENTITY = np.array([0.0, 0.0, 0.0, 1.0], dtype=np.float64) 20 | 21 | 22 | def Vector3RandomNormal(sigma, mu=VECTOR3_0): 23 | """Returns a random 3D vector from a normal distribution. 24 | 25 | Each component is selected independently from a normal distribution. 26 | 27 | Args: 28 | sigma: Scale (or stddev) of distribution for all variables. 29 | mu: Mean of distribution for each variable. 30 | 31 | Returns: 32 | A 3D vector in a numpy array. 33 | """ 34 | 35 | random_v3 = np.random.normal(scale=sigma, size=3) + mu 36 | return random_v3 37 | 38 | 39 | def Vector3RandomUniform(low=VECTOR3_0, high=VECTOR3_1): 40 | """Returns a 3D vector selected uniformly from the input box. 41 | 42 | Args: 43 | low: The min-value corner of the box. 44 | high: The max-value corner of the box. 45 | 46 | Returns: 47 | A 3D vector in a numpy array. 48 | """ 49 | 50 | random_x = np.random.uniform(low=low[0], high=high[0]) 51 | random_y = np.random.uniform(low=low[1], high=high[1]) 52 | random_z = np.random.uniform(low=low[2], high=high[2]) 53 | return np.array([random_x, random_y, random_z]) 54 | 55 | 56 | def Vector3RandomUnit(): 57 | """Returns a random 3D vector with unit length. 58 | 59 | Generates a 3D vector selected uniformly from the unit sphere. 60 | 61 | Returns: 62 | A normalized 3D vector in a numpy array. 63 | """ 64 | longitude = np.random.uniform(low=-math.pi, high=math.pi) 65 | sin_latitude = np.random.uniform(low=-1.0, high=1.0) 66 | cos_latitude = math.sqrt(1.0 - sin_latitude * sin_latitude) 67 | x = math.cos(longitude) * cos_latitude 68 | y = math.sin(longitude) * cos_latitude 69 | z = sin_latitude 70 | return np.array([x, y, z], dtype=np.float64) 71 | 72 | 73 | def QuaternionNormalize(q): 74 | """Normalizes the quaternion to length 1. 75 | 76 | Divides the quaternion by its magnitude. If the magnitude is too 77 | small, returns the quaternion identity value (1.0). 78 | 79 | Args: 80 | q: A quaternion to be normalized. 81 | 82 | Raises: 83 | ValueError: If input quaternion has length near zero. 84 | 85 | Returns: 86 | A quaternion with magnitude 1 in a numpy array [x, y, z, w]. 87 | 88 | """ 89 | q_norm = np.linalg.norm(q) 90 | if np.isclose(q_norm, 0.0): 91 | raise ValueError( 92 | 'Quaternion may not be zero in QuaternionNormalize: |q| = %f, q = %s' % 93 | (q_norm, q)) 94 | return q / q_norm 95 | 96 | 97 | def QuaternionFromAxisAngle(axis, angle): 98 | """Returns a quaternion that generates the given axis-angle rotation. 99 | 100 | Returns the quaternion: sin(angle/2) * axis + cos(angle/2). 101 | 102 | Args: 103 | axis: Axis of rotation, a 3D vector in a numpy array. 104 | angle: The angle of rotation (radians). 105 | 106 | Raises: 107 | ValueError: If input axis is not a normalizable 3D vector. 108 | 109 | Returns: 110 | A unit quaternion in a numpy array. 111 | 112 | """ 113 | if len(axis) != 3: 114 | raise ValueError('Axis vector should have three components: %s' % axis) 115 | axis_norm = np.linalg.norm(axis) 116 | if np.isclose(axis_norm, 0.0): 117 | raise ValueError('Axis vector may not have zero length: |v| = %f, v = %s' % 118 | (axis_norm, axis)) 119 | half_angle = angle * 0.5 120 | q = np.zeros(4, dtype=np.float64) 121 | q[0:3] = axis 122 | q[0:3] *= math.sin(half_angle) / axis_norm 123 | q[3] = math.cos(half_angle) 124 | return q 125 | 126 | 127 | def QuaternionToAxisAngle(quat, default_axis=VECTOR3_Z, direction_axis=None): 128 | """Calculates axis and angle of rotation performed by a quaternion. 129 | 130 | Calculates the axis and angle of the rotation performed by the quaternion. 131 | The quaternion should have four values and be normalized. 132 | 133 | Args: 134 | quat: Unit quaternion in a numpy array. 135 | default_axis: 3D vector axis used if the rotation is near to zero. Without 136 | this default, small rotations would result in an exception. It is 137 | reasonable to use a default axis for tiny rotations, because zero angle 138 | rotations about any axis are equivalent. 139 | direction_axis: Used to disambiguate rotation directions. If the 140 | direction_axis is specified, the axis of the rotation will be chosen such 141 | that its inner product with the direction_axis is non-negative. 142 | 143 | Raises: 144 | ValueError: If quat is not a normalized quaternion. 145 | 146 | Returns: 147 | axis: Axis of rotation. 148 | angle: Angle in radians. 149 | """ 150 | if len(quat) != 4: 151 | raise ValueError( 152 | 'Quaternion should have four components [x, y, z, w]: %s' % quat) 153 | if not np.isclose(1.0, np.linalg.norm(quat)): 154 | raise ValueError('Quaternion should have unit length: |q| = %f, q = %s' % 155 | (np.linalg.norm(quat), quat)) 156 | axis = quat[:3].copy() 157 | axis_norm = np.linalg.norm(axis) 158 | min_axis_norm = 1e-8 159 | if axis_norm < min_axis_norm: 160 | axis = default_axis 161 | if len(default_axis) != 3: 162 | raise ValueError('Axis vector should have three components: %s' % axis) 163 | if not np.isclose(np.linalg.norm(axis), 1.0): 164 | raise ValueError('Axis vector should have unit length: |v| = %f, v = %s' % 165 | (np.linalg.norm(axis), axis)) 166 | else: 167 | axis /= axis_norm 168 | sin_half_angle = axis_norm 169 | if direction_axis is not None and np.inner(axis, direction_axis) < 0: 170 | sin_half_angle = -sin_half_angle 171 | axis = -axis 172 | cos_half_angle = quat[3] 173 | half_angle = math.atan2(sin_half_angle, cos_half_angle) 174 | angle = half_angle * 2 175 | return axis, angle 176 | 177 | 178 | def QuaternionRandomRotation(max_angle=math.pi): 179 | """Creates a random small rotation around a random axis. 180 | 181 | Generates a small rotation with the axis vector selected uniformly 182 | from the unit sphere and an angle selected from a uniform 183 | distribution over [0, max_angle]. 184 | 185 | If the max_angle is not specified, the rotation should be selected 186 | uniformly over all possible rotation angles. 187 | 188 | Args: 189 | max_angle: The maximum angle of rotation (radians). 190 | 191 | Returns: 192 | A unit quaternion in a numpy array. 193 | 194 | """ 195 | 196 | angle = np.random.uniform(low=0, high=max_angle) 197 | axis = Vector3RandomUnit() 198 | return QuaternionFromAxisAngle(axis, angle) 199 | 200 | 201 | def QuaternionRotatePoint(point, quat): 202 | """Performs a rotation by quaternion. 203 | 204 | Rotate the point by the quaternion using quaternion multiplication, 205 | (q * p * q^-1), without constructing the rotation matrix. 206 | 207 | Args: 208 | point: The point to be rotated. 209 | quat: The rotation represented as a quaternion [x, y, z, w]. 210 | 211 | Returns: 212 | A 3D vector in a numpy array. 213 | """ 214 | 215 | q_point = np.array([point[0], point[1], point[2], 0.0]) 216 | quat_inverse = transformations.quaternion_inverse(quat) 217 | q_point_rotated = transformations.quaternion_multiply( 218 | transformations.quaternion_multiply(quat, q_point), quat_inverse) 219 | return q_point_rotated[:3] 220 | 221 | 222 | def IsRotationMatrix(m): 223 | """Returns true if the 3x3 submatrix represents a rotation. 224 | 225 | Args: 226 | m: A transformation matrix. 227 | 228 | Raises: 229 | ValueError: If input is not a matrix of size at least 3x3. 230 | 231 | Returns: 232 | True if the 3x3 submatrix is a rotation (orthogonal). 233 | """ 234 | if len(m.shape) != 2 or m.shape[0] < 3 or m.shape[1] < 3: 235 | raise ValueError('Matrix should be 3x3 or 4x4: %s\n %s' % (m.shape, m)) 236 | rot = m[:3, :3] 237 | eye = np.matmul(rot, np.transpose(rot)) 238 | return np.isclose(eye, np.identity(3), atol=1e-4).all() 239 | 240 | # def ZAxisAlignedRobotPoseTool(robot_pose_tool): 241 | # """Returns the current gripper pose rotated for alignment with the z-axis. 242 | 243 | # Args: 244 | # robot_pose_tool: a pose3d.Pose3d() instance. 245 | 246 | # Returns: 247 | # An instance of pose.Transform representing the current gripper pose 248 | # rotated for alignment with the z-axis. 249 | # """ 250 | # # Align the current pose to the z-axis. 251 | # robot_pose_tool.quaternion = transformations.quaternion_multiply( 252 | # RotationBetween( 253 | # robot_pose_tool.matrix4x4[0:3, 0:3].dot(np.array([0, 0, 1])), 254 | # np.array([0.0, 0.0, -1.0])), robot_pose_tool.quaternion) 255 | # return robot_pose_tool 256 | 257 | # def RotationBetween(a_translation_b, a_translation_c): 258 | # """Computes the rotation from one vector to another. 259 | 260 | # The computed rotation has the property that: 261 | 262 | # a_translation_c = a_rotation_b_to_c * a_translation_b 263 | 264 | # Args: 265 | # a_translation_b: vec3, vector to rotate from 266 | # a_translation_c: vec3, vector to rotate to 267 | 268 | # Returns: 269 | # a_rotation_b_to_c: new Orientation 270 | # """ 271 | # rotation = rotation3.Rotation3.rotation_between( 272 | # a_translation_b, a_translation_c, err_msg='RotationBetween') 273 | # return rotation.quaternion.xyzw 274 | -------------------------------------------------------------------------------- /a1sim/robots/LieAlgebra.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import numpy as np 3 | 4 | # NOTE: Code snippets from Modern Robotics at Northwestern University: 5 | # See https://github.com/NxRLab/ModernRobotics 6 | 7 | 8 | def RpToTrans(R, p): 9 | """ 10 | Converts a rotation matrix and a position vector into homogeneous 11 | transformation matrix 12 | 13 | :param R: A 3x3 rotation matrix 14 | :param p: A 3-vector 15 | :return: A homogeneous transformation matrix corresponding to the inputs 16 | 17 | Example Input: 18 | R = np.array([[1, 0, 0], 19 | [0, 0, -1], 20 | [0, 1, 0]]) 21 | p = np.array([1, 2, 5]) 22 | 23 | Output: 24 | np.array([[1, 0, 0, 1], 25 | [0, 0, -1, 2], 26 | [0, 1, 0, 5], 27 | [0, 0, 0, 1]]) 28 | """ 29 | return np.r_[np.c_[R, p], [[0, 0, 0, 1]]] 30 | 31 | 32 | def TransToRp(T): 33 | """ 34 | Converts a homogeneous transformation matrix into a rotation matrix 35 | and position vector 36 | 37 | :param T: A homogeneous transformation matrix 38 | :return R: The corresponding rotation matrix, 39 | :return p: The corresponding position vector. 40 | 41 | Example Input: 42 | T = np.array([[1, 0, 0, 0], 43 | [0, 0, -1, 0], 44 | [0, 1, 0, 3], 45 | [0, 0, 0, 1]]) 46 | 47 | Output: 48 | (np.array([[1, 0, 0], 49 | [0, 0, -1], 50 | [0, 1, 0]]), 51 | np.array([0, 0, 3])) 52 | """ 53 | T = np.array(T) 54 | return T[0:3, 0:3], T[0:3, 3] 55 | 56 | 57 | def TransInv(T): 58 | """ 59 | Inverts a homogeneous transformation matrix 60 | 61 | :param T: A homogeneous transformation matrix 62 | :return: The inverse of T 63 | Uses the structure of transformation matrices to avoid taking a matrix 64 | inverse, for efficiency. 65 | 66 | Example input: 67 | T = np.array([[1, 0, 0, 0], 68 | [0, 0, -1, 0], 69 | [0, 1, 0, 3], 70 | [0, 0, 0, 1]]) 71 | Output: 72 | np.array([[1, 0, 0, 0], 73 | [0, 0, 1, -3], 74 | [0, -1, 0, 0], 75 | [0, 0, 0, 1]]) 76 | """ 77 | R, p = TransToRp(T) 78 | Rt = np.array(R).T 79 | return np.r_[np.c_[Rt, -np.dot(Rt, p)], [[0, 0, 0, 1]]] 80 | 81 | 82 | def Adjoint(T): 83 | """ 84 | Computes the adjoint representation of a homogeneous transformation 85 | matrix 86 | 87 | :param T: A homogeneous transformation matrix 88 | :return: The 6x6 adjoint representation [AdT] of T 89 | 90 | Example Input: 91 | T = np.array([[1, 0, 0, 0], 92 | [0, 0, -1, 0], 93 | [0, 1, 0, 3], 94 | [0, 0, 0, 1]]) 95 | Output: 96 | np.array([[1, 0, 0, 0, 0, 0], 97 | [0, 0, -1, 0, 0, 0], 98 | [0, 1, 0, 0, 0, 0], 99 | [0, 0, 3, 1, 0, 0], 100 | [3, 0, 0, 0, 0, -1], 101 | [0, 0, 0, 0, 1, 0]]) 102 | """ 103 | R, p = TransToRp(T) 104 | return np.r_[np.c_[R, np.zeros((3, 3))], np.c_[np.dot(VecToso3(p), R), R]] 105 | 106 | 107 | def VecToso3(omg): 108 | """ 109 | Converts a 3-vector to an so(3) representation 110 | 111 | :param omg: A 3-vector 112 | :return: The skew symmetric representation of omg 113 | 114 | Example Input: 115 | omg = np.array([1, 2, 3]) 116 | Output: 117 | np.array([[ 0, -3, 2], 118 | [ 3, 0, -1], 119 | [-2, 1, 0]]) 120 | """ 121 | return np.array([[0, -omg[2], omg[1]], [omg[2], 0, -omg[0]], 122 | [-omg[1], omg[0], 0]]) 123 | 124 | 125 | def RPY(roll, pitch, yaw): 126 | """ 127 | Creates a Roll, Pitch, Yaw Transformation Matrix 128 | 129 | :param roll: roll component of matrix 130 | :param pitch: pitch component of matrix 131 | :param yaw: yaw component of matrix 132 | :return: The transformation matrix 133 | 134 | Example Input: 135 | roll = 0.0 136 | pitch = 0.0 137 | yaw = 0.0 138 | Output: 139 | np.array([[1, 0, 0, 0], 140 | [0, 1, 0, 0], 141 | [0, 0, 1, 0], 142 | [0, 0, 0, 1]]) 143 | """ 144 | Roll = np.array([[1, 0, 0, 0], [0, np.cos(roll), -np.sin(roll), 0], 145 | [0, np.sin(roll), np.cos(roll), 0], [0, 0, 0, 1]]) 146 | Pitch = np.array([[np.cos(pitch), 0, np.sin(pitch), 0], [0, 1, 0, 0], 147 | [-np.sin(pitch), 0, np.cos(pitch), 0], [0, 0, 0, 1]]) 148 | Yaw = np.array([[np.cos(yaw), -np.sin(yaw), 0, 0], 149 | [np.sin(yaw), np.cos(yaw), 0, 0], [0, 0, 1, 0], 150 | [0, 0, 0, 1]]) 151 | return np.matmul(np.matmul(Roll, Pitch), Yaw) 152 | 153 | 154 | def RotateTranslate(rotation, position): 155 | """ 156 | Creates a Transformation Matrix from a Rotation, THEN, a Translation 157 | 158 | :param rotation: pure rotation matrix 159 | :param translation: pure translation matrix 160 | :return: The transformation matrix 161 | """ 162 | trans = np.eye(4) 163 | trans[0, 3] = position[0] 164 | trans[1, 3] = position[1] 165 | trans[2, 3] = position[2] 166 | 167 | return np.dot(rotation, trans) 168 | 169 | 170 | def TransformVector(xyz_coord, rotation, translation): 171 | """ 172 | Transforms a vector by a specified Rotation THEN Translation Matrix 173 | 174 | :param xyz_coord: the vector to transform 175 | :param rotation: pure rotation matrix 176 | :param translation: pure translation matrix 177 | :return: The transformed vector 178 | """ 179 | xyz_vec = np.append(xyz_coord, 1.0) 180 | 181 | Transformed = np.dot(RotateTranslate(rotation, translation), xyz_vec) 182 | return Transformed[:3] 183 | -------------------------------------------------------------------------------- /a1sim/robots/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/PRELUDE/adfa549494d49216914f13e8eadda40693e1e6c9/a1sim/robots/__init__.py -------------------------------------------------------------------------------- /a1sim/robots/action_filter.py: -------------------------------------------------------------------------------- 1 | # Third party code 2 | # 3 | # The following code are copied or modified from: 4 | # https://github.com/google-research/motion_imitation 5 | """Two types of filters which can be applied to policy output sequences. 6 | 7 | 1. Simple exponential filter 8 | 2. Butterworth filter - lowpass or bandpass 9 | 10 | The implementation of the butterworth filter follows scipy's lfilter 11 | https://github.com/scipy/scipy/blob/v1.2.1/scipy/signal/signaltools.py 12 | 13 | We re-implement the logic in order to explicitly manage the y states 14 | 15 | The filter implements:: 16 | a[0]*y[n] = b[0]*x[n] + b[1]*x[n-1] + ... + b[M]*x[n-M] 17 | - a[1]*y[n-1] - ... - a[N]*y[n-N] 18 | 19 | We assume M == N. 20 | """ 21 | 22 | from __future__ import absolute_import 23 | from __future__ import division 24 | from __future__ import print_function 25 | 26 | import collections 27 | import numpy as np 28 | from scipy.signal import butter 29 | 30 | ACTION_FILTER_ORDER = 2 31 | ACTION_FILTER_LOW_CUT = 0.0 32 | ACTION_FILTER_HIGH_CUT = 4.0 33 | 34 | class ActionFilter(object): 35 | """Implements a generic lowpass or bandpass action filter.""" 36 | 37 | def __init__(self, a, b, order, num_joints, ftype='lowpass'): 38 | """Initializes filter. 39 | 40 | Either one per joint or same for all joints. 41 | 42 | Args: 43 | a: filter output history coefficients 44 | b: filter input coefficients 45 | order: filter order 46 | num_joints: robot DOF 47 | ftype: filter type. 'lowpass' or 'bandpass' 48 | """ 49 | self.num_joints = num_joints 50 | if isinstance(a, list): 51 | self.a = a 52 | self.b = b 53 | else: 54 | self.a = [a] 55 | self.b = [b] 56 | 57 | # Either a set of parameters per joint must be specified as a list 58 | # Or one filter is applied to every joint 59 | if not ((len(self.a) == len(self.b) == num_joints) or ( 60 | len(self.a) == len(self.b) == 1)): 61 | raise ValueError('Incorrect number of filter values specified') 62 | 63 | # Normalize by a[0] 64 | for i in range(len(self.a)): 65 | self.b[i] /= self.a[i][0] 66 | self.a[i] /= self.a[i][0] 67 | 68 | # Convert single filter to same format as filter per joint 69 | if len(self.a) == 1: 70 | self.a *= num_joints 71 | self.b *= num_joints 72 | self.a = np.stack(self.a) 73 | self.b = np.stack(self.b) 74 | 75 | if ftype == 'bandpass': 76 | assert len(self.b[0]) == len(self.a[0]) == 2 * order + 1 77 | self.hist_len = 2 * order 78 | elif ftype == 'lowpass': 79 | assert len(self.b[0]) == len(self.a[0]) == order + 1 80 | self.hist_len = order 81 | else: 82 | raise ValueError('%s filter type not supported' % (ftype)) 83 | 84 | # logging.info('Filter shapes: a: %s, b: %s', self.a.shape, self.b.shape) 85 | # logging.info('Filter type:%s', ftype) 86 | 87 | self.yhist = collections.deque(maxlen=self.hist_len) 88 | self.xhist = collections.deque(maxlen=self.hist_len) 89 | self.reset() 90 | 91 | def reset(self): 92 | """Resets the history buffers to 0.""" 93 | self.yhist.clear() 94 | self.xhist.clear() 95 | for _ in range(self.hist_len): 96 | self.yhist.appendleft(np.zeros((self.num_joints, 1))) 97 | self.xhist.appendleft(np.zeros((self.num_joints, 1))) 98 | 99 | def filter(self, x): 100 | """Returns filtered x.""" 101 | xs = np.concatenate(list(self.xhist), axis=-1) 102 | ys = np.concatenate(list(self.yhist), axis=-1) 103 | y = np.multiply(x, self.b[:, 0]) + np.sum( 104 | np.multiply(xs, self.b[:, 1:]), axis=-1) - np.sum( 105 | np.multiply(ys, self.a[:, 1:]), axis=-1) 106 | self.xhist.appendleft(x.reshape((self.num_joints, 1)).copy()) 107 | self.yhist.appendleft(y.reshape((self.num_joints, 1)).copy()) 108 | return y 109 | 110 | def init_history(self, x): 111 | x = np.expand_dims(x, axis=-1) 112 | for i in range(self.hist_len): 113 | self.xhist[i] = x 114 | self.yhist[i] = x 115 | 116 | class ActionFilterButter(ActionFilter): 117 | """Butterworth filter.""" 118 | 119 | def __init__(self, 120 | lowcut=None, 121 | highcut=None, 122 | sampling_rate=None, 123 | order=ACTION_FILTER_ORDER, 124 | num_joints=None): 125 | """Initializes a butterworth filter. 126 | 127 | Either one per joint or same for all joints. 128 | 129 | Args: 130 | lowcut: list of strings defining the low cutoff frequencies. 131 | The list must contain either 1 element (same filter for all joints) 132 | or num_joints elements 133 | 0 for lowpass, > 0 for bandpass. Either all values must be 0 134 | or all > 0 135 | highcut: list of strings defining the high cutoff frequencies. 136 | The list must contain either 1 element (same filter for all joints) 137 | or num_joints elements 138 | All must be > 0 139 | sampling_rate: frequency of samples in Hz 140 | order: filter order 141 | num_joints: robot DOF 142 | """ 143 | self.lowcut = ([float(x) for x in lowcut] 144 | if lowcut is not None else [ACTION_FILTER_LOW_CUT]) 145 | self.highcut = ([float(x) for x in highcut] 146 | if highcut is not None else [ACTION_FILTER_HIGH_CUT]) 147 | if len(self.lowcut) != len(self.highcut): 148 | raise ValueError('Number of lowcut and highcut filter values should ' 149 | 'be the same') 150 | 151 | if sampling_rate is None: 152 | raise ValueError('sampling_rate should be provided.') 153 | 154 | if num_joints is None: 155 | raise ValueError('num_joints should be provided.') 156 | 157 | if np.any(self.lowcut): 158 | if not np.all(self.lowcut): 159 | raise ValueError('All the filters must be of the same type: ' 160 | 'lowpass or bandpass') 161 | self.ftype = 'bandpass' 162 | else: 163 | self.ftype = 'lowpass' 164 | 165 | a_coeffs = [] 166 | b_coeffs = [] 167 | for i, (l, h) in enumerate(zip(self.lowcut, self.highcut)): 168 | if h <= 0.0: 169 | raise ValueError('Highcut must be > 0') 170 | 171 | b, a = self.butter_filter(l, h, sampling_rate, order) 172 | # logging.info( 173 | # 'Butterworth filter: joint: %d, lowcut: %f, highcut: %f, ' 174 | # 'sampling rate: %d, order: %d, num joints: %d', i, l, h, 175 | # sampling_rate, order, num_joints) 176 | b_coeffs.append(b) 177 | a_coeffs.append(a) 178 | 179 | super(ActionFilterButter, self).__init__( 180 | a_coeffs, b_coeffs, order, num_joints, self.ftype) 181 | 182 | def butter_filter(self, lowcut, highcut, fs, order=5): 183 | """Returns the coefficients of a butterworth filter. 184 | 185 | If lowcut = 0, the function returns the coefficients of a low pass filter. 186 | Otherwise, the coefficients of a band pass filter are returned. 187 | Highcut should be > 0 188 | 189 | Args: 190 | lowcut: low cutoff frequency 191 | highcut: high cutoff frequency 192 | fs: sampling rate 193 | order: filter order 194 | Return: 195 | b, a: parameters of a butterworth filter 196 | """ 197 | nyq = 0.5 * fs 198 | low = lowcut / nyq 199 | high = highcut / nyq 200 | if low: 201 | b, a = butter(order, [low, high], btype='band') 202 | else: 203 | b, a = butter(order, [high], btype='low') 204 | return b, a 205 | 206 | 207 | class ActionFilterExp(ActionFilter): 208 | """Filter by way of simple exponential smoothing. 209 | 210 | y = alpha * x + (1 - alpha) * previous_y 211 | """ 212 | 213 | def __init__(self, alpha, num_joints): 214 | """Initialize the filter. 215 | 216 | Args: 217 | alpha: list of strings defining the alphas. 218 | The list must contain either 1 element (same filter for all joints) 219 | or num_joints elements 220 | 0 < alpha <= 1 221 | num_joints: robot DOF 222 | """ 223 | self.alphas = [float(x) for x in alpha] 224 | # logging.info('Exponential filter: alpha: %d', self.alphas) 225 | 226 | a_coeffs = [] 227 | b_coeffs = [] 228 | for a in self.alphas: 229 | a_coeffs.append(np.asarray([1., a - 1.])) 230 | b_coeffs.append(np.asarray([a, 0])) 231 | 232 | order = 1 233 | self.ftype = 'lowpass' 234 | 235 | super(ActionFilterExp, self).__init__( 236 | a_coeffs, b_coeffs, order, num_joints, self.ftype) 237 | -------------------------------------------------------------------------------- /a1sim/robots/kinematics.py: -------------------------------------------------------------------------------- 1 | # Third party code 2 | # 3 | # The following code are copied or modified from: 4 | # https://github.com/google-research/motion_imitation 5 | 6 | """The inverse kinematic utilities.""" 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import numpy as np 13 | import typing 14 | 15 | _IDENTITY_ORIENTATION = (0, 0, 0, 1) 16 | 17 | 18 | def joint_angles_from_link_position( 19 | robot: typing.Any, 20 | link_position: typing.Sequence[float], 21 | link_id: int, 22 | joint_ids: typing.Sequence[int], 23 | base_translation: typing.Sequence[float] = (0, 0, 0), 24 | base_rotation: typing.Sequence[float] = (0, 0, 0, 1)): 25 | """Uses Inverse Kinematics to calculate joint angles. 26 | 27 | Args: 28 | robot: A robot instance. 29 | link_position: The (x, y, z) of the link in the body frame. This local frame 30 | is transformed relative to the COM frame using a given translation and 31 | rotation. 32 | link_id: The link id as returned from loadURDF. 33 | joint_ids: The positional index of the joints. This can be different from 34 | the joint unique ids. 35 | base_translation: Additional base translation. 36 | base_rotation: Additional base rotation. 37 | 38 | Returns: 39 | A list of joint angles. 40 | """ 41 | # Projects to local frame. 42 | base_position, base_orientation = robot.GetBasePosition( 43 | ), robot.GetBaseOrientation() 44 | base_position, base_orientation = robot.pybullet_client.multiplyTransforms( 45 | base_position, base_orientation, base_translation, base_rotation) 46 | 47 | # Projects to world space. 48 | world_link_pos, _ = robot.pybullet_client.multiplyTransforms( 49 | base_position, base_orientation, link_position, _IDENTITY_ORIENTATION) 50 | ik_solver = 0 51 | all_joint_angles = robot.pybullet_client.calculateInverseKinematics( 52 | robot.quadruped, link_id, world_link_pos, solver=ik_solver) 53 | 54 | # Extract the relevant joint angles. 55 | joint_angles = [all_joint_angles[i] for i in joint_ids] 56 | return joint_angles 57 | 58 | 59 | def link_position_in_base_frame( 60 | robot: typing.Any, 61 | link_id: int, 62 | ): 63 | """Computes the link's local position in the robot frame. 64 | 65 | Args: 66 | robot: A robot instance. 67 | link_id: The link to calculate its relative position. 68 | 69 | Returns: 70 | The relative position of the link. 71 | """ 72 | base_position, base_orientation = robot.GetBasePosition( 73 | ), robot.GetBaseOrientation() 74 | inverse_translation, inverse_rotation = robot.pybullet_client.invertTransform( 75 | base_position, base_orientation) 76 | 77 | link_state = robot.pybullet_client.getLinkState(robot.quadruped, link_id) 78 | link_position = link_state[0] 79 | link_local_position, _ = robot.pybullet_client.multiplyTransforms( 80 | inverse_translation, inverse_rotation, link_position, (0, 0, 0, 1)) 81 | link_local_position = [0]*3 82 | return np.array(link_local_position) 83 | 84 | 85 | def compute_jacobian( 86 | robot: typing.Any, 87 | link_id: int, 88 | ): 89 | """Computes the Jacobian matrix for the given link. 90 | 91 | Args: 92 | robot: A robot instance. 93 | link_id: The link id as returned from loadURDF. 94 | 95 | Returns: 96 | The 3 x N transposed Jacobian matrix. where N is the total DoFs of the 97 | robot. For a quadruped, the first 6 columns of the matrix corresponds to 98 | the CoM translation and rotation. The columns corresponds to a leg can be 99 | extracted with indices [6 + leg_id * 3: 6 + leg_id * 3 + 3]. 100 | """ 101 | 102 | all_joint_angles = [state[0] for state in robot.joint_states] 103 | zero_vec = [0] * len(all_joint_angles) 104 | jv, _ = robot.pybullet_client.calculateJacobian(robot.quadruped, link_id, 105 | (0, 0, 0), all_joint_angles, 106 | zero_vec, zero_vec) 107 | jacobian = np.array(jv) 108 | return jacobian 109 | -------------------------------------------------------------------------------- /a1sim/robots/laikago_constants.py: -------------------------------------------------------------------------------- 1 | # Third party code 2 | # 3 | # The following code are copied or modified from: 4 | # https://github.com/google-research/motion_imitation 5 | 6 | # Lint as: python3 7 | """Defines the laikago robot related constants and URDF specs.""" 8 | 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | import collections 14 | import math 15 | import pybullet as pyb # pytype: disable=import-error 16 | 17 | NUM_MOTORS = 12 18 | NUM_LEGS = 4 19 | MOTORS_PER_LEG = 3 20 | 21 | INIT_RACK_POSITION = [0, 0, 1] 22 | INIT_POSITION = [0, 0, 0.48] 23 | 24 | # Will be default to (0, 0, 0, 1) once the new laikago_toes_zup.urdf checked in. 25 | INIT_ORIENTATION = pyb.getQuaternionFromEuler([math.pi / 2.0, 0, math.pi / 2.0]) 26 | 27 | # Can be different from the motors, although for laikago they are the same list. 28 | JOINT_NAMES = ( 29 | # front right leg 30 | "FR_hip_motor_2_chassis_joint", 31 | "FR_upper_leg_2_hip_motor_joint", 32 | "FR_lower_leg_2_upper_leg_joint", 33 | # front left leg 34 | "FL_hip_motor_2_chassis_joint", 35 | "FL_upper_leg_2_hip_motor_joint", 36 | "FL_lower_leg_2_upper_leg_joint", 37 | # rear right leg 38 | "RR_hip_motor_2_chassis_joint", 39 | "RR_upper_leg_2_hip_motor_joint", 40 | "RR_lower_leg_2_upper_leg_joint", 41 | # rear left leg 42 | "RL_hip_motor_2_chassis_joint", 43 | "RL_upper_leg_2_hip_motor_joint", 44 | "RL_lower_leg_2_upper_leg_joint", 45 | ) 46 | 47 | INIT_ABDUCTION_ANGLE = 0 48 | INIT_HIP_ANGLE = 0.67 49 | INIT_KNEE_ANGLE = -1.25 50 | 51 | # Note this matches the Laikago SDK/control convention, but is different from 52 | # URDF's internal joint angles which needs to be computed using the joint 53 | # offsets and directions. The conversion formula is (sdk_joint_angle + offset) * 54 | # joint direction. 55 | INIT_JOINT_ANGLES = collections.OrderedDict( 56 | zip(JOINT_NAMES, 57 | (INIT_ABDUCTION_ANGLE, INIT_HIP_ANGLE, INIT_KNEE_ANGLE) * NUM_LEGS)) 58 | 59 | # Used to convert the robot SDK joint angles to URDF joint angles. 60 | JOINT_DIRECTIONS = collections.OrderedDict( 61 | zip(JOINT_NAMES, (-1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1))) 62 | 63 | HIP_JOINT_OFFSET = 0.0 64 | UPPER_LEG_JOINT_OFFSET = -0.6 65 | KNEE_JOINT_OFFSET = 0.66 66 | 67 | # Used to convert the robot SDK joint angles to URDF joint angles. 68 | JOINT_OFFSETS = collections.OrderedDict( 69 | zip(JOINT_NAMES, 70 | [HIP_JOINT_OFFSET, UPPER_LEG_JOINT_OFFSET, KNEE_JOINT_OFFSET] * 71 | NUM_LEGS)) 72 | 73 | LEG_NAMES = ( 74 | "front_right", 75 | "front_left", 76 | "rear_right", 77 | "rear_left", 78 | ) 79 | 80 | LEG_ORDER = ( 81 | "front_right", 82 | "front_left", 83 | "back_right", 84 | "back_left", 85 | ) 86 | 87 | END_EFFECTOR_NAMES = ( 88 | "jtoeFR", 89 | "jtoeFL", 90 | "jtoeRR", 91 | "jtoeRL", 92 | ) 93 | 94 | MOTOR_NAMES = JOINT_NAMES 95 | MOTOR_GROUP = collections.OrderedDict(( 96 | (LEG_NAMES[0], JOINT_NAMES[0:3]), 97 | (LEG_NAMES[1], JOINT_NAMES[3:6]), 98 | (LEG_NAMES[2], JOINT_NAMES[6:9]), 99 | (LEG_NAMES[3], JOINT_NAMES[9:12]), 100 | )) 101 | 102 | # Regulates the joint angle change when in position control mode. 103 | MAX_MOTOR_ANGLE_CHANGE_PER_STEP = 0.12 104 | 105 | # The hip joint location in the CoM frame. 106 | HIP_POSITIONS = collections.OrderedDict(( 107 | (LEG_NAMES[0], (0.21, -0.1157, 0)), 108 | (LEG_NAMES[1], (0.21, 0.1157, 0)), 109 | (LEG_NAMES[2], (-0.21, -0.1157, 0)), 110 | (LEG_NAMES[3], (-0.21, 0.1157, 0)), 111 | )) 112 | -------------------------------------------------------------------------------- /a1sim/robots/laikago_motor.py: -------------------------------------------------------------------------------- 1 | # Third party code 2 | # 3 | # The following code are copied or modified from: 4 | # https://github.com/google-research/motion_imitation 5 | """Motor model for laikago.""" 6 | import os 7 | import inspect 8 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 9 | parentdir = os.path.dirname(os.path.dirname(currentdir)) 10 | os.sys.path.insert(0, parentdir) 11 | 12 | import collections 13 | import numpy as np 14 | 15 | from a1sim.robots import robot_config 16 | 17 | NUM_MOTORS = 12 18 | 19 | MOTOR_COMMAND_DIMENSION = 5 20 | 21 | # These values represent the indices of each field in the motor command tuple 22 | POSITION_INDEX = 0 23 | POSITION_GAIN_INDEX = 1 24 | VELOCITY_INDEX = 2 25 | VELOCITY_GAIN_INDEX = 3 26 | TORQUE_INDEX = 4 27 | 28 | class LaikagoMotorModel(object): 29 | """A simple motor model for Laikago. 30 | 31 | When in POSITION mode, the torque is calculated according to the difference 32 | between current and desired joint angle, as well as the joint velocity. 33 | For more information about PD control, please refer to: 34 | https://en.wikipedia.org/wiki/PID_controller. 35 | 36 | The model supports a HYBRID mode in which each motor command can be a tuple 37 | (desired_motor_angle, position_gain, desired_motor_velocity, velocity_gain, 38 | torque). 39 | 40 | """ 41 | 42 | def __init__(self, 43 | kp=60, 44 | kd=1, 45 | torque_limits=33.5, 46 | motor_control_mode=robot_config.MotorControlMode.POSITION): 47 | self._kp = kp 48 | self._kd = kd 49 | self._torque_limits = torque_limits 50 | if torque_limits is not None: 51 | if isinstance(torque_limits, (collections.Sequence, np.ndarray)): 52 | self._torque_limits = np.asarray(torque_limits) 53 | else: 54 | self._torque_limits = np.full(NUM_MOTORS, torque_limits) 55 | self._motor_control_mode = motor_control_mode 56 | self._strength_ratios = np.full(NUM_MOTORS, 1) 57 | 58 | def set_strength_ratios(self, ratios): 59 | """Set the strength of each motors relative to the default value. 60 | 61 | Args: 62 | ratios: The relative strength of motor output. A numpy array ranging from 63 | 0.0 to 1.0. 64 | """ 65 | self._strength_ratios = ratios 66 | 67 | def set_motor_gains(self, kp, kd): 68 | """Set the gains of all motors. 69 | 70 | These gains are PD gains for motor positional control. kp is the 71 | proportional gain and kd is the derivative gain. 72 | 73 | Args: 74 | kp: proportional gain of the motors. 75 | kd: derivative gain of the motors. 76 | """ 77 | self._kp = kp 78 | self._kd = kd 79 | 80 | def set_voltage(self, voltage): 81 | pass 82 | 83 | def get_voltage(self): 84 | return 0.0 85 | 86 | def set_viscous_damping(self, viscous_damping): 87 | pass 88 | 89 | def get_viscous_dampling(self): 90 | return 0.0 91 | 92 | def convert_to_torque(self, 93 | motor_commands, 94 | motor_angle, 95 | motor_velocity, 96 | true_motor_velocity, 97 | motor_control_mode): 98 | """Convert the commands (position control or torque control) to torque. 99 | 100 | Args: 101 | motor_commands: The desired motor angle if the motor is in position 102 | control mode. The pwm signal if the motor is in torque control mode. 103 | motor_angle: The motor angle observed at the current time step. It is 104 | actually the true motor angle observed a few milliseconds ago (pd 105 | latency). 106 | motor_velocity: The motor velocity observed at the current time step, it 107 | is actually the true motor velocity a few milliseconds ago (pd latency). 108 | true_motor_velocity: The true motor velocity. The true velocity is used to 109 | compute back EMF voltage and viscous damping. 110 | motor_control_mode: A MotorControlMode enum. 111 | 112 | Returns: 113 | actual_torque: The torque that needs to be applied to the motor. 114 | observed_torque: The torque observed by the sensor. 115 | """ 116 | del true_motor_velocity 117 | if not motor_control_mode: 118 | motor_control_mode = self._motor_control_mode 119 | 120 | if motor_control_mode is robot_config.MotorControlMode.PWM: 121 | raise ValueError( 122 | "{} is not a supported motor control mode".format(motor_control_mode)) 123 | 124 | # No processing for motor torques 125 | if motor_control_mode is robot_config.MotorControlMode.TORQUE: 126 | assert len(motor_commands) == NUM_MOTORS 127 | motor_torques = self._strength_ratios * motor_commands 128 | return motor_torques, motor_torques 129 | 130 | desired_motor_angles = None 131 | desired_motor_velocities = None 132 | kp = None 133 | kd = None 134 | additional_torques = np.full(NUM_MOTORS, 0) 135 | if motor_control_mode is robot_config.MotorControlMode.POSITION: 136 | if len(motor_commands) != NUM_MOTORS: 137 | motor_commands = motor_commands[0] 138 | assert len(motor_commands) == NUM_MOTORS 139 | kp = self._kp 140 | kd = self._kd 141 | desired_motor_angles = motor_commands 142 | desired_motor_velocities = np.full(NUM_MOTORS, 0) 143 | elif motor_control_mode is robot_config.MotorControlMode.HYBRID: 144 | # The input should be a 60 dimension vector 145 | assert len(motor_commands) == MOTOR_COMMAND_DIMENSION * NUM_MOTORS 146 | kp = motor_commands[POSITION_GAIN_INDEX::MOTOR_COMMAND_DIMENSION] 147 | kd = motor_commands[VELOCITY_GAIN_INDEX::MOTOR_COMMAND_DIMENSION] 148 | desired_motor_angles = motor_commands[ 149 | POSITION_INDEX::MOTOR_COMMAND_DIMENSION] 150 | desired_motor_velocities = motor_commands[ 151 | VELOCITY_INDEX::MOTOR_COMMAND_DIMENSION] 152 | additional_torques = motor_commands[TORQUE_INDEX::MOTOR_COMMAND_DIMENSION] 153 | else: 154 | print("Undefined motor_control_mode=",motor_control_mode) 155 | exit() 156 | motor_torques = -1 * (kp * (motor_angle - desired_motor_angles)) - kd * ( 157 | motor_velocity - desired_motor_velocities) + additional_torques 158 | motor_torques = self._strength_ratios * motor_torques 159 | # print('hh',motor_torques) 160 | # print(self._torque_limits) 161 | # raise NotImplemented 162 | if self._torque_limits is not None: 163 | if len(self._torque_limits) != len(motor_torques): 164 | raise ValueError( 165 | "Torque limits dimension does not match the number of motors.") 166 | motor_torques = np.clip(motor_torques, -1.0 * self._torque_limits, 167 | self._torque_limits) 168 | 169 | return motor_torques, motor_torques 170 | -------------------------------------------------------------------------------- /a1sim/robots/laikago_pose_utils.py: -------------------------------------------------------------------------------- 1 | # Third party code 2 | # 3 | # The following code are copied or modified from: 4 | # https://github.com/google-research/motion_imitation 5 | 6 | """Utility functions to calculate Laikago's pose and motor angles.""" 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import attr 13 | 14 | # LAIKAGO_DEFAULT_ABDUCTION_ANGLE = 0 15 | # LAIKAGO_DEFAULT_HIP_ANGLE = 0.67 16 | # LAIKAGO_DEFAULT_KNEE_ANGLE = -1.25 17 | 18 | LAIKAGO_DEFAULT_ABDUCTION_ANGLE = 0 19 | LAIKAGO_DEFAULT_HIP_ANGLE = 0.9 20 | LAIKAGO_DEFAULT_KNEE_ANGLE = -1.8 21 | 22 | @attr.s 23 | class LaikagoPose(object): 24 | """Default pose of the Laikago. 25 | 26 | Leg order: 27 | 0 -> Front Right. 28 | 1 -> Front Left. 29 | 2 -> Rear Right. 30 | 3 -> Rear Left. 31 | """ 32 | abduction_angle_0 = attr.ib(type=float, default=0) 33 | hip_angle_0 = attr.ib(type=float, default=0) 34 | knee_angle_0 = attr.ib(type=float, default=0) 35 | abduction_angle_1 = attr.ib(type=float, default=0) 36 | hip_angle_1 = attr.ib(type=float, default=0) 37 | knee_angle_1 = attr.ib(type=float, default=0) 38 | abduction_angle_2 = attr.ib(type=float, default=0) 39 | hip_angle_2 = attr.ib(type=float, default=0) 40 | knee_angle_2 = attr.ib(type=float, default=0) 41 | abduction_angle_3 = attr.ib(type=float, default=0) 42 | hip_angle_3 = attr.ib(type=float, default=0) 43 | knee_angle_3 = attr.ib(type=float, default=0) 44 | -------------------------------------------------------------------------------- /a1sim/robots/minitaur_constants.py: -------------------------------------------------------------------------------- 1 | # Third party code 2 | # 3 | # The following code are copied or modified from: 4 | # https://github.com/google-research/motion_imitation 5 | 6 | # Lint as: python3 7 | """Defines the minitaur robot related constants and URDF specs.""" 8 | 9 | LEG_ORDER = ["front_left", "back_left", "front_right", "back_right"] 10 | -------------------------------------------------------------------------------- /a1sim/robots/minitaur_motor.py: -------------------------------------------------------------------------------- 1 | # Third party code 2 | # 3 | # The following code are copied or modified from: 4 | # https://github.com/google-research/motion_imitation 5 | """This file implements an accurate motor model.""" 6 | 7 | import os 8 | import inspect 9 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 10 | parentdir = os.path.dirname(os.path.dirname(currentdir)) 11 | os.sys.path.insert(0, parentdir) 12 | import numpy as np 13 | 14 | from a1sim.robots import robot_config 15 | 16 | VOLTAGE_CLIPPING = 50 17 | OBSERVED_TORQUE_LIMIT = 5.7 18 | MOTOR_VOLTAGE = 16.0 19 | MOTOR_RESISTANCE = 0.186 20 | MOTOR_TORQUE_CONSTANT = 0.0954 21 | MOTOR_VISCOUS_DAMPING = 0 22 | MOTOR_SPEED_LIMIT = MOTOR_VOLTAGE / (MOTOR_VISCOUS_DAMPING + 23 | MOTOR_TORQUE_CONSTANT) 24 | NUM_MOTORS = 8 25 | MOTOR_POS_LB = 0.5 26 | MOTOR_POS_UB = 2.5 27 | 28 | 29 | class MotorModel(object): 30 | """The accurate motor model, which is based on the physics of DC motors. 31 | 32 | The motor model support two types of control: position control and torque 33 | control. In position control mode, a desired motor angle is specified, and a 34 | torque is computed based on the internal motor model. When the torque control 35 | is specified, a pwm signal in the range of [-1.0, 1.0] is converted to the 36 | torque. 37 | 38 | The internal motor model takes the following factors into consideration: 39 | pd gains, viscous friction, back-EMF voltage and current-torque profile. 40 | """ 41 | def __init__(self, 42 | kp=1.2, 43 | kd=0, 44 | torque_limits=None, 45 | motor_control_mode=robot_config.MotorControlMode.POSITION): 46 | self._kp = kp 47 | self._kd = kd 48 | self._torque_limits = torque_limits 49 | self._motor_control_mode = motor_control_mode 50 | self._resistance = MOTOR_RESISTANCE 51 | self._voltage = MOTOR_VOLTAGE 52 | self._torque_constant = MOTOR_TORQUE_CONSTANT 53 | self._viscous_damping = MOTOR_VISCOUS_DAMPING 54 | self._current_table = [0, 10, 20, 30, 40, 50, 60] 55 | self._torque_table = [0, 1, 1.9, 2.45, 3.0, 3.25, 3.5] 56 | self._strength_ratios = [1.0] * NUM_MOTORS 57 | 58 | def set_strength_ratios(self, ratios): 59 | """Set the strength of each motors relative to the default value. 60 | 61 | Args: 62 | ratios: The relative strength of motor output. A numpy array ranging from 63 | 0.0 to 1.0. 64 | """ 65 | self._strength_ratios = np.array(ratios) 66 | 67 | def set_motor_gains(self, kp, kd): 68 | """Set the gains of all motors. 69 | 70 | These gains are PD gains for motor positional control. kp is the 71 | proportional gain and kd is the derivative gain. 72 | 73 | Args: 74 | kp: proportional gain of the motors. 75 | kd: derivative gain of the motors. 76 | """ 77 | self._kp = kp 78 | self._kd = kd 79 | 80 | def set_voltage(self, voltage): 81 | self._voltage = voltage 82 | 83 | def get_voltage(self): 84 | return self._voltage 85 | 86 | def set_viscous_damping(self, viscous_damping): 87 | self._viscous_damping = viscous_damping 88 | 89 | def get_viscous_dampling(self): 90 | return self._viscous_damping 91 | 92 | def convert_to_torque(self, 93 | motor_commands, 94 | motor_angle, 95 | motor_velocity, 96 | true_motor_velocity, 97 | motor_control_mode=None): 98 | """Convert the commands (position control or pwm control) to torque. 99 | 100 | Args: 101 | motor_commands: The desired motor angle if the motor is in position 102 | control mode. The pwm signal if the motor is in torque control mode. 103 | motor_angle: The motor angle observed at the current time step. It is 104 | actually the true motor angle observed a few milliseconds ago (pd 105 | latency). 106 | motor_velocity: The motor velocity observed at the current time step, it 107 | is actually the true motor velocity a few milliseconds ago (pd latency). 108 | true_motor_velocity: The true motor velocity. The true velocity is used to 109 | compute back EMF voltage and viscous damping. 110 | motor_control_mode: A MotorControlMode enum. 111 | 112 | Returns: 113 | actual_torque: The torque that needs to be applied to the motor. 114 | observed_torque: The torque observed by the sensor. 115 | """ 116 | if not motor_control_mode: 117 | motor_control_mode = self._motor_control_mode 118 | 119 | if (motor_control_mode is robot_config.MotorControlMode.TORQUE) or ( 120 | motor_control_mode is robot_config.MotorControlMode.HYBRID): 121 | raise ValueError("{} is not a supported motor control mode".format( 122 | motor_control_mode)) 123 | 124 | kp = self._kp 125 | kd = self._kd 126 | 127 | if motor_control_mode is robot_config.MotorControlMode.PWM: 128 | # The following implements a safety controller that softly enforces the 129 | # joint angles to remain within safe region: If PD controller targeting 130 | # the positive (negative) joint limit outputs a negative (positive) 131 | # signal, the corresponding joint violates the joint constraint, so 132 | # we should add the PD output to motor_command to bring it back to the 133 | # safe region. 134 | pd_max = -1 * kp * (motor_angle - 135 | MOTOR_POS_UB) - kd / 2. * motor_velocity 136 | pd_min = -1 * kp * (motor_angle - 137 | MOTOR_POS_LB) - kd / 2. * motor_velocity 138 | pwm = motor_commands + np.minimum(pd_max, 0) + np.maximum(pd_min, 0) 139 | else: 140 | pwm = -1 * kp * (motor_angle - motor_commands) - kd * motor_velocity 141 | pwm = np.clip(pwm, -1.0, 1.0) 142 | return self._convert_to_torque_from_pwm(pwm, true_motor_velocity) 143 | 144 | def _convert_to_torque_from_pwm(self, pwm, true_motor_velocity): 145 | """Convert the pwm signal to torque. 146 | 147 | Args: 148 | pwm: The pulse width modulation. 149 | true_motor_velocity: The true motor velocity at the current moment. It is 150 | used to compute the back EMF voltage and the viscous damping. 151 | 152 | Returns: 153 | actual_torque: The torque that needs to be applied to the motor. 154 | observed_torque: The torque observed by the sensor. 155 | """ 156 | observed_torque = np.clip( 157 | self._torque_constant * 158 | (np.asarray(pwm) * self._voltage / self._resistance), 159 | -OBSERVED_TORQUE_LIMIT, OBSERVED_TORQUE_LIMIT) 160 | if self._torque_limits is not None: 161 | observed_torque = np.clip(observed_torque, -1.0 * self._torque_limits, 162 | self._torque_limits) 163 | 164 | # print(self._torque_limits) 165 | # raise NotImplementedError 166 | # Net voltage is clipped at 50V by diodes on the motor controller. 167 | voltage_net = np.clip( 168 | np.asarray(pwm) * self._voltage - 169 | (self._torque_constant + self._viscous_damping) * 170 | np.asarray(true_motor_velocity), -VOLTAGE_CLIPPING, VOLTAGE_CLIPPING) 171 | current = voltage_net / self._resistance 172 | current_sign = np.sign(current) 173 | current_magnitude = np.absolute(current) 174 | # Saturate torque based on empirical current relation. 175 | actual_torque = np.interp(current_magnitude, self._current_table, 176 | self._torque_table) 177 | actual_torque = np.multiply(current_sign, actual_torque) 178 | actual_torque = np.multiply(self._strength_ratios, actual_torque) 179 | if self._torque_limits is not None: 180 | actual_torque = np.clip(actual_torque, -1.0 * self._torque_limits, 181 | self._torque_limits) 182 | return actual_torque, observed_torque 183 | -------------------------------------------------------------------------------- /a1sim/robots/minitaur_pose_utils.py: -------------------------------------------------------------------------------- 1 | # Third party code 2 | # 3 | # The following code are copied or modified from: 4 | # https://github.com/google-research/motion_imitation 5 | 6 | """Utility functions to calculate Minitaur's pose and motor angles.""" 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import math 13 | import attr 14 | import numpy as np 15 | 16 | NUM_MOTORS = 8 17 | NUM_LEGS = 4 18 | MOTOR_SIGNS = (1, 1, -1, -1) 19 | # Constants for the function swing_extend_to_motor_angles 20 | EPS = 0.1 21 | # Range of motion for the legs (does not allow pointing towards the body). 22 | LEG_SWING_LIMIT_LOW = -math.pi / 2 + EPS 23 | LEG_SWING_LIMIT_HIGH = 3 * math.pi / 2 - EPS 24 | # Range of gap between motors for feasibility. 25 | MOTORS_GAP_LIMIT_HIGH = 2 * math.pi - EPS 26 | MOTORS_GAP_LIMIT_LOW = EPS 27 | 28 | 29 | @attr.s 30 | class MinitaurPose(object): 31 | """Default pose of the Minitaur.""" 32 | swing_angle_0 = attr.ib(type=float, default=0) 33 | swing_angle_1 = attr.ib(type=float, default=0) 34 | swing_angle_2 = attr.ib(type=float, default=0) 35 | swing_angle_3 = attr.ib(type=float, default=0) 36 | extension_angle_0 = attr.ib(type=float, default=0) 37 | extension_angle_1 = attr.ib(type=float, default=0) 38 | extension_angle_2 = attr.ib(type=float, default=0) 39 | extension_angle_3 = attr.ib(type=float, default=0) 40 | 41 | 42 | def motor_angles_to_leg_pose(motor_angles): 43 | """Convert motor angles to the leg pose. 44 | 45 | A single leg pose is a tuple (swing, extension). The definition can be find 46 | in: 47 | Sim-to-Real: Learning Agile Locomotion For Quadruped Robot 48 | 49 | Args: 50 | motor_angles: A numpy array. Contains all eight motor angles for Minitaur. 51 | 52 | Returns: 53 | A numpy array. Contains the leg pose for all four legs: [swing_0, swing_1, 54 | swing_2, swing_3, extension_0, extension_1, extension_2, extension_3] 55 | 56 | """ 57 | motor_angles = np.array(motor_angles) 58 | 59 | swings = 0.5 * np.multiply( 60 | np.array(MOTOR_SIGNS), (motor_angles[1::2] - motor_angles[::2])) 61 | extensions = 0.5 * (motor_angles[::2] + motor_angles[1::2]) 62 | 63 | return np.concatenate((swings, extensions), axis=None) 64 | 65 | 66 | def leg_pose_to_motor_angles(leg_pose): 67 | """Converts the leg pose to the motor angles. 68 | 69 | Args: 70 | leg_pose: A numpy array. Contains the leg pose for all four legs: [swing_0, 71 | swing_1, swing_2, swing_3, extension_0, extension_1, extension_2, 72 | extension_3] 73 | 74 | Returns: 75 | A numpy array. All eight motor angles. 76 | """ 77 | leg_pose = np.array(leg_pose) 78 | 79 | # swings multiplied with the sign array. 80 | signed_swings = np.multiply(np.array(MOTOR_SIGNS), leg_pose[0:NUM_LEGS]) 81 | extensions = leg_pose[NUM_LEGS:] 82 | 83 | motor_angles = np.zeros(NUM_MOTORS) 84 | motor_angles[1::2] = signed_swings + extensions 85 | motor_angles[::2] = extensions - signed_swings 86 | return motor_angles 87 | 88 | 89 | # This method also does the same conversion, but 0 swing and 0 extension maps 90 | # to a neutral standing still motor positions with motors at + or - pi. It also 91 | # contains a safety layer so that the legs don't swing or extend too much to hit 92 | # the body of the robot. 93 | def leg_pose_to_motor_angles_with_half_pi_offset_and_safety(leg_pose): 94 | """Converts the swing extension poses to the motor angles with safety limits. 95 | 96 | Args: 97 | leg_pose: A numpy array. Contains the leg pose for all four legs: [swing_0, 98 | extension_0, swing_1, extension_1, swing_2, extension_2, swing_3, 99 | extension_3] 100 | 101 | Returns: 102 | A numpy array. All eight motor angles. 103 | """ 104 | 105 | motor_angles = [] 106 | for idx in range(4): 107 | swing = leg_pose[idx * 2] 108 | extend = leg_pose[idx * 2 + 1] 109 | motor_angles.extend(swing_extend_to_motor_angles(idx, swing, extend)) 110 | return motor_angles 111 | 112 | 113 | def swing_extend_to_motor_angles(leg_id, swing, extension, noise_stdev=0): 114 | """Swing - extension based leg model for minitaur. 115 | 116 | Swing extension leg model calculates motor positions using 2 separate motions: 117 | swing and extension. Swing rotates the whole leg by rotating both motors 118 | equally towards same direction. Extension increases or decreases the length 119 | of the leg by turning both motors equally in opposite direction. 120 | 121 | This method also does the same conversion as leg_pose_to_motor_angles, but 0 122 | swing and 0 extension maps to a neutral standing still motor positions with 123 | motors at + or - pi. 124 | Args: 125 | leg_id: The id of the leg that the conversion is made for (0, 1, 2, 3). 126 | swing: Swing degree for the leg (in radians). 0 means perpendicular to the 127 | body). 128 | extension: Extension level (length) of the leg, limited to [-1, 1]. 129 | noise_stdev: Standard deviation of the introduced noise at the motor 130 | position level. Noise is turned off by default. 131 | 132 | Returns: 133 | motor0: Position for the first motor for that leg. 134 | motor1: Position for the second motor for that leg. 135 | Raises: 136 | ValueError: In case calculated positions are outside the allowed boundaries. 137 | """ 138 | # Check if the leg_id is in valid range 139 | if not 0 <= leg_id <= 3: 140 | raise ValueError('leg {} does not exist for a quadruped.'.format(leg_id)) 141 | 142 | # Front legs can not swing too much towards the body. 143 | if leg_id % 2 == 0: 144 | swing = np.clip(swing, LEG_SWING_LIMIT_LOW, LEG_SWING_LIMIT_HIGH) 145 | # Back legs can not swing too much towards the body (opposite direction). 146 | else: 147 | swing = np.clip(swing, -LEG_SWING_LIMIT_HIGH, -LEG_SWING_LIMIT_LOW) 148 | 149 | # Check if the motors are too close or too far away to make it impossible 150 | # for the physical robot. 151 | gap = math.pi - 2 * extension 152 | if gap < MOTORS_GAP_LIMIT_LOW or gap > MOTORS_GAP_LIMIT_HIGH: 153 | top_extension = (math.pi - MOTORS_GAP_LIMIT_LOW) / 2.0 154 | least_extension = (math.pi - MOTORS_GAP_LIMIT_HIGH) / 2.0 155 | extension = np.clip(extension, least_extension, top_extension) 156 | 157 | # Initialization to neutral standing position where both motors point to 158 | # opposite directions 159 | motor0 = math.pi / 2 160 | motor1 = math.pi / 2 161 | # Rotational move 162 | if leg_id in (0, 1): 163 | motor0 += swing 164 | motor1 -= swing 165 | elif leg_id in (2, 3): 166 | motor0 -= swing 167 | motor1 += swing 168 | # Extension 169 | motor0 += extension 170 | motor1 += extension 171 | 172 | # Add noise if requested. 173 | if noise_stdev > 0: 174 | motor0 += np.random.normal(0, noise_stdev) 175 | motor1 += np.random.normal(0, noise_stdev) 176 | 177 | return motor0, motor1 178 | -------------------------------------------------------------------------------- /a1sim/robots/moving_window_filter.py: -------------------------------------------------------------------------------- 1 | """Moving window filter to smooth out sensor readings.""" 2 | 3 | import collections 4 | 5 | class MovingWindowFilter(object): 6 | """A stable O(1) moving filter for incoming data streams. 7 | 8 | We implement the Neumaier's algorithm to calculate the moving window average, 9 | which is numerically stable. 10 | 11 | """ 12 | 13 | def __init__(self, window_size: int): 14 | """Initializes the class. 15 | 16 | Args: 17 | window_size: The moving window size. 18 | """ 19 | assert window_size > 0 20 | self._window_size = window_size 21 | self._value_deque = collections.deque(maxlen=window_size) 22 | # The moving window sum. 23 | self._sum = 0 24 | # The correction term to compensate numerical precision loss during 25 | # calculation. 26 | self._correction = 0 27 | 28 | def _neumaier_sum(self, value: float): 29 | """Update the moving window sum using Neumaier's algorithm. 30 | 31 | For more details please refer to: 32 | https://en.wikipedia.org/wiki/Kahan_summation_algorithm#Further_enhancements 33 | 34 | Args: 35 | value: The new value to be added to the window. 36 | """ 37 | 38 | new_sum = self._sum + value 39 | if abs(self._sum) >= abs(value): 40 | # If self._sum is bigger, low-order digits of value are lost. 41 | self._correction += (self._sum - new_sum) + value 42 | else: 43 | # low-order digits of sum are lost 44 | self._correction += (value - new_sum) + self._sum 45 | 46 | self._sum = new_sum 47 | 48 | def calculate_average(self, new_value: float) -> float: 49 | """Computes the moving window average in O(1) time. 50 | 51 | Args: 52 | new_value: The new value to enter the moving window. 53 | 54 | Returns: 55 | The average of the values in the window. 56 | 57 | """ 58 | deque_len = len(self._value_deque) 59 | if deque_len < self._value_deque.maxlen: 60 | pass 61 | else: 62 | # The left most value to be subtracted from the moving sum. 63 | self._neumaier_sum(-self._value_deque[0]) 64 | 65 | self._neumaier_sum(new_value) 66 | self._value_deque.append(new_value) 67 | 68 | return (self._sum + self._correction) / self._window_size 69 | -------------------------------------------------------------------------------- /a1sim/robots/robot_config.py: -------------------------------------------------------------------------------- 1 | # Third party code 2 | # 3 | # The following code are copied or modified from: 4 | # https://github.com/google-research/motion_imitation 5 | """The configuration parameters for our robots.""" 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import enum 12 | 13 | class MotorControlMode(enum.Enum): 14 | """The supported motor control modes.""" 15 | POSITION = 1 16 | 17 | # Apply motor torques directly. 18 | TORQUE = 2 19 | 20 | # Apply a tuple (q, qdot, kp, kd, tau) for each motor. Here q, qdot are motor 21 | # position and velocities. kp and kd are PD gains. tau is the additional 22 | # motor torque. This is the most flexible control mode. 23 | HYBRID = 3 24 | 25 | # PWM mode is only availalbe for Minitaur 26 | PWM = 4 27 | 28 | # GAIT = 3 29 | 30 | 31 | # Each hybrid action is a tuple (position, position_gain, velocity, 32 | # velocity_gain, torque) 33 | HYBRID_ACTION_DIMENSION = 5 34 | 35 | 36 | class HybridActionIndex(enum.Enum): 37 | # The index of each component within the hybrid action tuple. 38 | POSITION = 0 39 | POSITION_GAIN = 1 40 | VELOCITY = 2 41 | VELOCITY_GAIN = 3 42 | TORQUE = 4 43 | -------------------------------------------------------------------------------- /approach.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/PRELUDE/adfa549494d49216914f13e8eadda40693e1e6c9/approach.gif -------------------------------------------------------------------------------- /architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/PRELUDE/adfa549494d49216914f13e8eadda40693e1e6c9/architecture.png -------------------------------------------------------------------------------- /config/gait/experiment.yaml: -------------------------------------------------------------------------------- 1 | Device: cuda 2 | Note: None 3 | Project: CoRL2022 4 | Save: test_june18 5 | Seed: 6 6 | Task: walking 7 | Max Steps: 10000000 8 | Steps Per Evaluation: 20000 9 | Steps Per Video: 100000 10 | Episodes Per Evaluation: 5 11 | Steps Per Episode: 1800 12 | -------------------------------------------------------------------------------- /config/gait/ppo.yaml: -------------------------------------------------------------------------------- 1 | Advantage Normalization: 0 2 | Batch Size: 4096 3 | Buffer Size: 16384 4 | Clip: 0.2 5 | Dual Clip: null 6 | Entropy Coefficient: 0.0 7 | Epoch: 3000 8 | Epsilon Clip: 0.15 9 | GAE Lambda: 0.95 10 | Gamma: 0.99 11 | Initial Sigma: -2.2 12 | Learning Rate: 0.0001 13 | Learning Rate Decay: true 14 | Max Grad Norm: 0.5 15 | Recompute Advantage: 1 16 | Repeat/Collect: 10 17 | Reward Nomalization: true 18 | Step/Collect: 8192 19 | Step/Epoch: 500000 20 | Test Envs: 8 21 | Training Envs: 64 22 | Value Clip: 0 23 | Value Coefficient: 0.25 24 | -------------------------------------------------------------------------------- /config/gait/reward.yaml: -------------------------------------------------------------------------------- 1 | Reward Badfoot: 0.1 2 | Reward Gait: 0.00 3 | Reward Footcontact: 0.1 4 | Reward Stand: 0.0 5 | Reward Energy: 0.07 6 | Reward Tracking: 2.5 7 | Reward Balance: 0.6 8 | Reward Fail: 1 9 | -------------------------------------------------------------------------------- /config/gait/simulation.yaml: -------------------------------------------------------------------------------- 1 | Random Dynamics: 1 2 | Random Force: 1 3 | Sensor Dis: 0 4 | Sensor Motor: 1 5 | Sensor IMU: 1 6 | Sensor Contact: 1 7 | Sensor Footpose: 0 8 | Sensor Dynamic: 0 9 | Sensor Exforce: 0 10 | Sensor Noise: 1 11 | State History: 12 12 | -------------------------------------------------------------------------------- /config/nav/bc.json: -------------------------------------------------------------------------------- 1 | { 2 | "algo_name": "bc", 3 | "experiment": { 4 | "name": "bc", 5 | "validate": true, 6 | "logging": { 7 | "terminal_output_to_txt": true, 8 | "log_tb": true 9 | }, 10 | "save": { 11 | "enabled": true, 12 | "every_n_seconds": null, 13 | "every_n_epochs": 1, 14 | "epochs": [], 15 | "on_best_validation": false, 16 | "on_best_rollout_return": false, 17 | "on_best_rollout_success_rate": true 18 | }, 19 | "epoch_every_n_steps": 500, 20 | "validation_epoch_every_n_steps": 50, 21 | "env": null, 22 | "additional_envs": null, 23 | "render": false, 24 | "render_video": true, 25 | "keep_all_videos": false, 26 | "video_skip": 10, 27 | "rollout": { 28 | "enabled": true, 29 | "n": 15, 30 | "horizon": 400, 31 | "rate": 50, 32 | "warmstart": 50, 33 | "terminate_on_success": true 34 | } 35 | }, 36 | "train": { 37 | "data": "./save/dataset_sim/demo_sim.hdf5", 38 | "output_dir": "../save/bc_checkpoint/bc_sim", 39 | "num_data_workers": 0, 40 | "hdf5_cache_mode": "all", 41 | "hdf5_use_swmr": true, 42 | "hdf5_normalize_obs": false, 43 | "hdf5_filter_key": null, 44 | "seq_length": 1, 45 | "dataset_keys": [ 46 | "actions", 47 | "rewards", 48 | "dones" 49 | ], 50 | "goal_mode": null, 51 | "cuda": true, 52 | "batch_size": 16, 53 | "num_epochs": 1000, 54 | "seed": 1 55 | }, 56 | "algo": { 57 | "optim_params": { 58 | "policy": { 59 | "learning_rate": { 60 | "initial": 0.0001, 61 | "decay_factor": 0.1, 62 | "epoch_schedule": [] 63 | }, 64 | "regularization": { 65 | "L2": 0.0 66 | } 67 | } 68 | }, 69 | "loss": { 70 | "l2_weight": 1.0, 71 | "l1_weight": 0.0, 72 | "cos_weight": 0.0 73 | }, 74 | "actor_layer_dims": [ 75 | 1024, 76 | 1024 77 | ], 78 | "gaussian": { 79 | "enabled": false, 80 | "fixed_std": false, 81 | "init_std": 0.1, 82 | "min_std": 0.01, 83 | "std_activation": "softplus", 84 | "low_noise_eval": true 85 | }, 86 | "gmm": { 87 | "enabled": true, 88 | "num_modes": 5, 89 | "min_std": 0.0001, 90 | "std_activation": "softplus", 91 | "low_noise_eval": true 92 | }, 93 | "vae": { 94 | "enabled": false, 95 | "latent_dim": 14, 96 | "latent_clip": null, 97 | "kl_weight": 1.0, 98 | "decoder": { 99 | "is_conditioned": true, 100 | "reconstruction_sum_across_elements": false 101 | }, 102 | "prior": { 103 | "learn": false, 104 | "is_conditioned": false, 105 | "use_gmm": false, 106 | "gmm_num_modes": 10, 107 | "gmm_learn_weights": false, 108 | "use_categorical": false, 109 | "categorical_dim": 10, 110 | "categorical_gumbel_softmax_hard": false, 111 | "categorical_init_temp": 1.0, 112 | "categorical_temp_anneal_step": 0.001, 113 | "categorical_min_temp": 0.3 114 | }, 115 | "encoder_layer_dims": [ 116 | 300, 117 | 400 118 | ], 119 | "decoder_layer_dims": [ 120 | 300, 121 | 400 122 | ], 123 | "prior_layer_dims": [ 124 | 300, 125 | 400 126 | ] 127 | }, 128 | "rnn": { 129 | "enabled": false, 130 | "horizon": 10, 131 | "hidden_dim": 400, 132 | "rnn_type": "LSTM", 133 | "num_layers": 2, 134 | "open_loop": false, 135 | "kwargs": { 136 | "bidirectional": false 137 | } 138 | } 139 | }, 140 | "observation": { 141 | "modalities": { 142 | "obs": { 143 | "low_dim": [ 144 | "yaw" 145 | ], 146 | "rgb": [ 147 | "agentview_rgb" 148 | ], 149 | "depth": [ 150 | "agentview_depth" 151 | ], 152 | "scan": [] 153 | }, 154 | "goal": { 155 | "low_dim": [], 156 | "rgb": [], 157 | "depth": [], 158 | "scan": [] 159 | } 160 | }, 161 | "encoder": { 162 | "low_dim": { 163 | "feature_dimension": null, 164 | "core_class": null, 165 | "core_kwargs": {}, 166 | "obs_randomizer_class": null, 167 | "obs_randomizer_kwargs": {} 168 | }, 169 | "rgb": { 170 | "feature_dimension": 64, 171 | "core_class": "VisualCore", 172 | "core_kwargs": { 173 | "backbone_class": "ResNet18Conv", 174 | "backbone_kwargs": { 175 | "pretrained": false, 176 | "input_coord_conv": false 177 | } 178 | }, 179 | "obs_randomizer_class": null, 180 | "obs_randomizer_kwargs": { 181 | "crop_height": 76, 182 | "crop_width": 76, 183 | "num_crops": 1, 184 | "pos_enc": false 185 | }, 186 | "pool_class": "SpatialSoftmax", 187 | "pool_kwargs": { 188 | "num_kp": 32, 189 | "learnable_temperature": false, 190 | "temperature": 1.0, 191 | "noise_std": 0.0 192 | } 193 | }, 194 | "depth": { 195 | "feature_dimension": 64, 196 | "core_class": "VisualCore", 197 | "core_kwargs": { 198 | "backbone_class": "ResNet18Conv", 199 | "backbone_kwargs": { 200 | "pretrained": false, 201 | "input_coord_conv": false 202 | } 203 | }, 204 | "obs_randomizer_class": null, 205 | "obs_randomizer_kwargs": { 206 | "crop_height": 76, 207 | "crop_width": 76, 208 | "num_crops": 1, 209 | "pos_enc": false 210 | }, 211 | "pool_class": "SpatialSoftmax", 212 | "pool_kwargs": { 213 | "num_kp": 32, 214 | "learnable_temperature": false, 215 | "temperature": 1.0, 216 | "noise_std": 0.0 217 | } 218 | }, 219 | "scan": { 220 | "feature_dimension": 64, 221 | "core_class": "ScanCore", 222 | "core_kwargs": { 223 | "backbone_class": "ResNet18Conv", 224 | "backbone_kwargs": { 225 | "pretrained": false, 226 | "input_coord_conv": false 227 | }, 228 | "conv_kwargs": { 229 | "out_channels": [ 230 | 32, 231 | 64, 232 | 64 233 | ], 234 | "kernel_size": [ 235 | 8, 236 | 4, 237 | 2 238 | ], 239 | "stride": [ 240 | 4, 241 | 2, 242 | 1 243 | ] 244 | } 245 | }, 246 | "obs_randomizer_class": null, 247 | "obs_randomizer_kwargs": { 248 | "crop_height": 76, 249 | "crop_width": 76, 250 | "num_crops": 1, 251 | "pos_enc": false 252 | }, 253 | "pool_class": "SpatialSoftmax", 254 | "pool_kwargs": { 255 | "num_kp": 32, 256 | "learnable_temperature": false, 257 | "temperature": 1.0, 258 | "noise_std": 0.0 259 | } 260 | } 261 | } 262 | } 263 | } 264 | 265 | 266 | 267 | -------------------------------------------------------------------------------- /config/nav/bcrnn.json: -------------------------------------------------------------------------------- 1 | { 2 | "algo_name": "bc", 3 | "experiment": { 4 | "name": "bc-rnn", 5 | "validate": true, 6 | "logging": { 7 | "terminal_output_to_txt": true, 8 | "log_tb": true 9 | }, 10 | "save": { 11 | "enabled": true, 12 | "every_n_seconds": null, 13 | "every_n_epochs": 1, 14 | "epochs": [], 15 | "on_best_validation": false, 16 | "on_best_rollout_return": false, 17 | "on_best_rollout_success_rate": true 18 | }, 19 | "epoch_every_n_steps": 2000, 20 | "validation_epoch_every_n_steps": 50, 21 | "env": null, 22 | "additional_envs": null, 23 | "render": false, 24 | "render_video": true, 25 | "keep_all_videos": false, 26 | "video_skip": 10, 27 | "rollout": { 28 | "enabled": true, 29 | "n": 15, 30 | "horizon": 400, 31 | "rate": 50, 32 | "warmstart": 50, 33 | "terminate_on_success": true 34 | } 35 | }, 36 | "train": { 37 | "data": "./save/dataset_sim/demo_sim.hdf5", 38 | "output_dir": "./bc_checkpoint/bcrnn_sim", 39 | "num_data_workers": 0, 40 | "hdf5_cache_mode": "low_dim", 41 | "hdf5_use_swmr": true, 42 | "hdf5_normalize_obs": false, 43 | "hdf5_filter_key": null, 44 | "seq_length": 10, 45 | "dataset_keys": [ 46 | "actions", 47 | "rewards", 48 | "dones" 49 | ], 50 | "goal_mode": null, 51 | "cuda": true, 52 | "batch_size": 16, 53 | "num_epochs": 600, 54 | "seed": 1 55 | }, 56 | "algo": { 57 | "optim_params": { 58 | "policy": { 59 | "learning_rate": { 60 | "initial": 0.0001, 61 | "decay_factor": 0.1, 62 | "epoch_schedule": [] 63 | }, 64 | "regularization": { 65 | "L2": 0.0 66 | } 67 | } 68 | }, 69 | "loss": { 70 | "l2_weight": 1.0, 71 | "l1_weight": 0.0, 72 | "cos_weight": 0.0 73 | }, 74 | "actor_layer_dims": [ 75 | 1024, 76 | 1024 77 | ], 78 | "gaussian": { 79 | "enabled": false, 80 | "fixed_std": false, 81 | "init_std": 0.1, 82 | "min_std": 0.01, 83 | "std_activation": "softplus", 84 | "low_noise_eval": true 85 | }, 86 | "gmm": { 87 | "enabled": true, 88 | "num_modes": 5, 89 | "min_std": 0.0001, 90 | "std_activation": "softplus", 91 | "low_noise_eval": true 92 | }, 93 | "vae": { 94 | "enabled": false, 95 | "latent_dim": 16, 96 | "latent_clip": null, 97 | "kl_weight": 1.0, 98 | "decoder": { 99 | "is_conditioned": true, 100 | "reconstruction_sum_across_elements": false 101 | }, 102 | "prior": { 103 | "learn": false, 104 | "is_conditioned": false, 105 | "use_gmm": false, 106 | "gmm_num_modes": 16, 107 | "gmm_learn_weights": false, 108 | "use_categorical": false, 109 | "categorical_dim": 10, 110 | "categorical_gumbel_softmax_hard": false, 111 | "categorical_init_temp": 1.0, 112 | "categorical_temp_anneal_step": 0.001, 113 | "categorical_min_temp": 0.3 114 | }, 115 | "encoder_layer_dims": [ 116 | 300, 117 | 400 118 | ], 119 | "decoder_layer_dims": [ 120 | 300, 121 | 400 122 | ], 123 | "prior_layer_dims": [ 124 | 300, 125 | 400 126 | ] 127 | }, 128 | "rnn": { 129 | "enabled": true, 130 | "horizon": 16, 131 | "hidden_dim": 400, 132 | "rnn_type": "LSTM", 133 | "num_layers": 2, 134 | "open_loop": false, 135 | "kwargs": { 136 | "bidirectional": false 137 | } 138 | } 139 | }, 140 | "observation": { 141 | "modalities": { 142 | "obs": { 143 | "low_dim": [ 144 | "yaw" 145 | ], 146 | "rgb": [ 147 | "agentview_rgb" 148 | ], 149 | "depth": [ 150 | "agentview_depth" 151 | ], 152 | "scan": [] 153 | }, 154 | "goal": { 155 | "low_dim": [], 156 | "rgb": [], 157 | "depth": [], 158 | "scan": [] 159 | } 160 | }, 161 | "encoder": { 162 | "low_dim": { 163 | "feature_dimension": null, 164 | "core_class": null, 165 | "core_kwargs": {}, 166 | "obs_randomizer_class": null, 167 | "obs_randomizer_kwargs": {} 168 | }, 169 | "rgb": { 170 | "feature_dimension": 64, 171 | "core_class": "VisualCore", 172 | "core_kwargs": { 173 | "backbone_class": "ResNet18Conv", 174 | "backbone_kwargs": { 175 | "pretrained": false, 176 | "input_coord_conv": false 177 | } 178 | }, 179 | "obs_randomizer_class": null, 180 | "obs_randomizer_kwargs": { 181 | "crop_height": 76, 182 | "crop_width": 76, 183 | "num_crops": 1, 184 | "pos_enc": false 185 | }, 186 | "pool_class": "SpatialSoftmax", 187 | "pool_kwargs": { 188 | "num_kp": 32, 189 | "learnable_temperature": false, 190 | "temperature": 1.0, 191 | "noise_std": 0.0 192 | } 193 | }, 194 | "depth": { 195 | "feature_dimension": 64, 196 | "core_class": "VisualCore", 197 | "core_kwargs": { 198 | "backbone_class": "ResNet18Conv", 199 | "backbone_kwargs": { 200 | "pretrained": false, 201 | "input_coord_conv": false 202 | } 203 | }, 204 | "obs_randomizer_class": null, 205 | "obs_randomizer_kwargs": { 206 | "crop_height": 76, 207 | "crop_width": 76, 208 | "num_crops": 1, 209 | "pos_enc": false 210 | }, 211 | "pool_class": "SpatialSoftmax", 212 | "pool_kwargs": { 213 | "num_kp": 32, 214 | "learnable_temperature": false, 215 | "temperature": 1.0, 216 | "noise_std": 0.0 217 | } 218 | }, 219 | "scan": { 220 | "feature_dimension": 64, 221 | "core_class": "ScanCore", 222 | "core_kwargs": { 223 | "backbone_class": "ResNet18Conv", 224 | "backbone_kwargs": { 225 | "pretrained": false, 226 | "input_coord_conv": false 227 | }, 228 | "conv_kwargs": { 229 | "out_channels": [ 230 | 32, 231 | 64, 232 | 64 233 | ], 234 | "kernel_size": [ 235 | 8, 236 | 4, 237 | 2 238 | ], 239 | "stride": [ 240 | 4, 241 | 2, 242 | 1 243 | ] 244 | } 245 | }, 246 | "obs_randomizer_class": null, 247 | "obs_randomizer_kwargs": { 248 | "crop_height": 76, 249 | "crop_width": 76, 250 | "num_crops": 1, 251 | "pos_enc": false 252 | }, 253 | "pool_class": "SpatialSoftmax", 254 | "pool_kwargs": { 255 | "num_kp": 32, 256 | "learnable_temperature": false, 257 | "temperature": 1.0, 258 | "noise_std": 0.0 259 | } 260 | } 261 | } 262 | } 263 | } 264 | 265 | 266 | 267 | -------------------------------------------------------------------------------- /config/simulation.yaml: -------------------------------------------------------------------------------- 1 | Random Dynamics: 1 2 | Random Force: 1 3 | Sensor Dis: 0 4 | Sensor Motor: 1 5 | Sensor IMU: 1 6 | Sensor Contact: 1 7 | Sensor Footpose: 0 8 | Sensor Dynamic: 0 9 | Sensor Exforce: 0 10 | Sensor Noise: 1 11 | State History: 12 12 | -------------------------------------------------------------------------------- /implementation.md: -------------------------------------------------------------------------------- 1 | # Implementation Details 2 | 3 | The navigation controller predicts the target velocity commands at 10Hz. The low-level gait controller takes the buffer of recent velocity commands, robot states and previous joint-space actions as input and produces joint-space action commands at 38Hz to actuate the robot. 4 | 5 | ![intro](architecture.png) 6 | 7 | ## Navigation Controller 8 | The navigation policy uses a ResNet18-backbone network [[1]](#1) as the image encoder. The encoded image features are flattened and concatenated with the 1D heading direction value. The concatenated vector is passed through a two-layer multi-layer perceptron (MLP) with 1024 hidden units in each layer. The input RGB-D images have a size of $212\times 120$. For imitation learning, we develop our behavioral cloning implementations with the robomimic framework [[2]](#2). For the recurrent neural networks, we use LSTM [[3]](#3) of two layers with 400 hidden units for each layer. The GMM policy output has 5 modes. 9 | 10 | ## Gait Controller 11 | Each tuple in the buffer $ℬ\_t$ is a 48D vector, containing $u_i\inℝ^2$, $q_i\inℝ^{34}$, and $a_{i-1}\inℝ^{12}$. $u_i$ consists of the forward linear velocity and angular velocity. $q_i$ consists of joint positions and velocities of each leg (24D), binary foot contact states for each foot (4D), the IMU measurement of the robot's body orientation (3D), and the angular velocities (3D). $a_{i-1}$ is a 12D joint-space command of the previous time step. We choose $T=11$ for the size of the history buffer. 12 | We use 1D temporal convolution to transform the input $T+1$ tuples $ℬ\_t$ into a 32D vector. Specifically, the 48D tuple of each time step is first encoded by a linear layer, followed by 3 layers of temporal convolution. The feature from temporal convolution is flattened and projected by another linear layer into the 32D vector. 13 | We concatenate the 48D tuple of the most recent state $(u_t,q_t,a_{t-1})$ and the encoded 32D vector of history states. This concatenated feature is passed through a two-layer MLP of 256 hidden units to produce a Gaussian distribution of joint-space actions. The reward function for training $\pi_L$ consists of the terms for tracking commands, balancing the body, minimizing energy consumption, and regulating foot contacts, as below. 14 | 15 | - Tracking commands: $K(k_1 |{e_{\psi}|^2) e^{k_2 |\boldsymbol{e}_{xy}|}} $ 16 | - Balancing the body: $K(k_3|\boldsymbol{e}_{xy}|^2)K (k_4 |\boldsymbol{\theta}|^2)$ 17 | - Minimizing energy consumption: $K(k_3|\boldsymbol{e}_{xy}|^2) E$ 18 | - Regulating foot contacts: $k_5 \left(\max(n_1-3, 0) - n_2 \right)$ 19 | 20 | where $K(\cdot) := 1- \tanh (\cdot)$ and $k_1, ... k_5$ are given as positive weight coefficients. $E$ is the energy consumption, $\boldsymbol{\theta}$ is the vector of roll and pitch, $\boldsymbol{e}\_{xy}$ is the linear velocity errors in the x and y axes, and ${e}_{\psi}$ is the yaw rate error. $n_1$ , $n_2$ are number of foot contacts, and non-foot contacts. 21 | The terms of tracking commands and balancing the body are designed to improve the stability of tracking commands, and the other two terms improve the gait patterns. 64 actors of distributed PPO are used for training. 22 | 23 | ## References 24 | [1] K. He, X. Zhang, S. Ren, and J. Sun. Deep residual learning for image recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition, 2016. 25 | 26 | [2] A. Mandlekar, D. Xu, J. Wong, S. Nasiriany, C. Wang, R. Kulkarni, L. Fei-Fei, S. Savarese, Y. Zhu, and R. Martı́n-Martı́n. What matters in learning from offline human demonstrations for 27 | robot manipulation. Conference on Robot Learning, 2021. 28 | 29 | [3] S. Hochreiter and J. Schmidhuber. Long short-term memory. Neural computation, 9(8), 1997. 30 | -------------------------------------------------------------------------------- /path.yaml: -------------------------------------------------------------------------------- 1 | Configuration: config 2 | Data: data 3 | BC Checkpoint: save/bc_checkpoint 4 | RL Checkpoint: save/rl_checkpoint 5 | Dataset Sim: save/dataset_sim 6 | Dataset Real: save/dataset_real 7 | Raw Data Sim: save/raw_sim 8 | Raw Data Real: save/raw_real -------------------------------------------------------------------------------- /scripts/demo_nav.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import datetime 4 | import numpy as np 5 | import pickle 6 | import argparse 7 | 8 | import nav.environments as environments 9 | import nav.interfaces as interfaces 10 | 11 | from path import * 12 | 13 | ## Collect demo data 14 | def demo(task, env_type_id): 15 | 16 | # name the episode 17 | episode = datetime.datetime.now().strftime("%m%d_%H%M%S") 18 | 19 | # init env, buffers 20 | dic_record_array = {} 21 | dic_record_array['color'] = [] 22 | dic_record_array['depth'] = [] 23 | dic_record_array['vel_x'] = [] 24 | dic_record_array['yaw_rate'] = [] 25 | dic_record_array['yaw'] = [] 26 | dic_record_array['bird_view'] = [] 27 | 28 | dic_data_array = {} 29 | dic_data_array['observation'] = [] 30 | dic_data_array['action'] = [] 31 | dic_data_array['yaw'] = [] 32 | dic_data_array['done'] = [] 33 | 34 | space_mouse = interfaces.SpaceMouse() 35 | 36 | env = environments.build_demo_env(render=True, env_type = env_type_id) 37 | env.reset() 38 | done = False 39 | 40 | action = np.zeros(2) 41 | 42 | # Simulatio loop 43 | while not done: 44 | 45 | new_input = space_mouse.control 46 | action = 0.7 * action + 0.3 * np.array([np.clip(new_input[0]+0.7, 0, 1.0), np.clip(new_input[-1], -1.0, 1.0)]) 47 | obs, rew, done, info = env.step(action) 48 | 49 | dic_data_array['observation'].append(obs['rgbd']) 50 | dic_data_array['yaw'].append(obs['yaw']) 51 | dic_data_array['action'].append(action) 52 | dic_data_array['done'].append(done) 53 | 54 | dic_record_array['color'].append(env.render('rgb')) 55 | dic_record_array['depth'].append(env.render('depth')) 56 | dic_record_array['vel_x'].append(action[0]) 57 | dic_record_array['yaw_rate'].append(action[1]) 58 | dic_record_array['yaw'].append(obs['yaw']) 59 | dic_record_array['bird_view'].append(env.render('fpv')) 60 | 61 | if info['Fail']: 62 | episode += '_fail' 63 | 64 | # Save data 65 | os.makedirs(os.path.join(PATH_DATASETSET_SIM, task), exist_ok=True) 66 | with open('{}/{}/{}.pickle'.format(PATH_DATASETSET_SIM, task, episode), 'wb') as handle: 67 | pickle.dump(dic_data_array, handle, protocol=pickle.HIGHEST_PROTOCOL) 68 | 69 | os.makedirs(os.path.join(PATH_RAW_SIM, task, episode), exist_ok=True) 70 | with open('{}/{}/{}/data.pickle'.format(PATH_RAW_SIM, task, episode), 'wb') as handle: 71 | pickle.dump(dic_record_array, handle, protocol=pickle.HIGHEST_PROTOCOL) 72 | 73 | video_format = cv2.VideoWriter_fourcc(*'mp4v') 74 | for key in ['color', 'depth', 'bird_view']: 75 | 76 | if key == 'bird_view': 77 | recorder = cv2.VideoWriter("{}/{}/{}/{}.mp4".format(PATH_RAW_SIM, task, episode, key), video_format, 20, (480, 360)) 78 | else: 79 | recorder = cv2.VideoWriter("{}/{}/{}/{}.mp4".format(PATH_RAW_SIM, task, episode, key), video_format, 20, (212, 120)) 80 | for img in dic_record_array[key]: 81 | if key == 'depth': 82 | recorder.write(img[:, :, np.newaxis][:,:,[0,0,0]]) 83 | else: 84 | recorder.write(img) 85 | recorder.release() 86 | 87 | 88 | if __name__ == '__main__': 89 | 90 | parser = argparse.ArgumentParser() 91 | parser.add_argument("--env_type",type=int,default=2, 92 | help="2-Easy, 0,1-Medium, 1,4: Hard") 93 | parser.add_argument("--demo_name",type=str,default='corl2022', 94 | help="name of directory to save demo data.") 95 | args = parser.parse_args() 96 | assert(args.env_type in range(5)) 97 | 98 | demo(args.demo_name+str(args.env_type), args.env_type) 99 | -------------------------------------------------------------------------------- /scripts/eval_gait.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | import argparse 4 | import random 5 | import copy 6 | import numpy as np 7 | 8 | import a1sim 9 | from a1sim.envs.env_builder import SENSOR_MODE 10 | 11 | from gait.wrapper import TianshouWrapper, TrajectoryHandler 12 | from gait.agent import build_agent 13 | 14 | from utils.pid import PIDHandler 15 | 16 | import cv2 17 | import torch 18 | 19 | from path import * 20 | 21 | NUM_EVAL = 20 22 | 23 | SUBPATH_CONFIG = { "experiment": "experiment.yaml", 24 | "simulation": "simulation.yaml"} 25 | 26 | RANDOM_PARAM_DICT = {'random_dynamics':0, 'random_force':0} 27 | DYNAMIC_PARAM_DICT = {'control_latency': 0.001, 'joint_friction': 0.025, 'spin_foot_friction': 0.2, 'foot_friction': 1} 28 | 29 | 30 | ## Create 'x-linear' trajectories 31 | def generate_linear_trj(): 32 | 33 | trajectory = [] 34 | time_step = 0.1 35 | time_max = 30 36 | cnt = 0 37 | 38 | x_linear = 0.7 39 | yaw_rate = 0.0 40 | 41 | pos = np.zeros(2) 42 | yaw = 0. 43 | 44 | while cnt*time_step < time_max: 45 | pos += np.array([np.cos(yaw) * x_linear, -np.sin(yaw)* x_linear]) * time_step 46 | yaw += yaw_rate * time_step 47 | 48 | trajectory.append({'time': cnt*time_step, 'x_linear': x_linear, 'yaw_rate': yaw_rate, 'position': np.copy(pos), 'yaw': yaw}) 49 | cnt += 1 50 | return trajectory 51 | 52 | 53 | ## Create sine trajectories for 'x-sine' and 'sine' 54 | def generate_sine_trj(mode): 55 | 56 | trajectory = [] 57 | time_step = 0.1 58 | time_max = 30 59 | cnt = 0 60 | 61 | x_linear = 0.7 62 | yaw_rate = 0.0 63 | 64 | pos = np.zeros(2) 65 | yaw = 0. 66 | 67 | while cnt*time_step < time_max: 68 | 69 | if mode == 'linear': 70 | x_linear = 0.7 + 0.3 *np.cos(cnt*time_step * 2*np.pi/15) 71 | else: 72 | yaw_rate = 0.3 *np.cos(cnt*time_step * 2*np.pi/15) 73 | 74 | pos += np.array([np.cos(yaw) * x_linear, -np.sin(yaw)* x_linear]) * time_step 75 | yaw += yaw_rate * time_step 76 | 77 | trajectory.append({'time': cnt*time_step, 'x_linear': x_linear, 'yaw_rate': yaw_rate, 'position': np.copy(pos), 'yaw': yaw}) 78 | cnt += 1 79 | return trajectory 80 | 81 | 82 | ## Create step-like trajectories for 'x-step' and 'zig-zag' 83 | def generate_step_trj(mode): 84 | 85 | trajectory = [] 86 | time_step = 0.1 87 | time_max = 30 88 | cnt = 0 89 | 90 | x_linear = 0.7 91 | yaw_rate = 0.0 92 | 93 | pos = np.zeros(2) 94 | yaw = 0. 95 | 96 | while cnt*time_step < time_max: 97 | 98 | if cnt*time_step < 5.0: 99 | if mode == 'linear': 100 | x_linear = 0.7 101 | else: 102 | yaw = 0.0 * np.pi 103 | elif cnt*time_step < 15.0: 104 | if mode == 'linear': 105 | x_linear = 1.0 106 | else: 107 | yaw = 0.16 * np.pi 108 | elif cnt*time_step < 25.0: 109 | if mode == 'linear': 110 | x_linear = 0.5 111 | else: 112 | yaw = -0.16 * np.pi 113 | else: 114 | if mode == 'linear': 115 | x_linear = 0.7 116 | else: 117 | yaw = 0.0 * np.pi 118 | 119 | pos += np.array([np.cos(yaw) * x_linear, -np.sin(yaw)* x_linear]) * time_step 120 | 121 | # yaw_rate = 0.3 *np.cos(cnt*time_step * 2*np.pi/15) 122 | yaw += yaw_rate * time_step 123 | 124 | trajectory.append({'time': cnt*time_step, 'x_linear': x_linear, 'yaw_rate': yaw_rate, 'position': np.copy(pos), 'yaw': yaw}) 125 | cnt += 1 126 | return trajectory 127 | 128 | 129 | ## Evaluate Gait Controller with the setpoint trajectories with PD position controller 130 | def evaluate(load_path, trajectory, gait_mode='rl', save_path=None, config_path=None, render=True, seed=0, 131 | device=torch.device("cuda" if torch.cuda.is_available() else "cpu")): 132 | 133 | # Configuration 134 | if config_path == None: 135 | config_path = PATH_CONFIG 136 | 137 | sim_config = yaml.load(open("{}/{}".format(config_path, SUBPATH_CONFIG["simulation"]), 'r'), Loader=yaml.FullLoader) 138 | 139 | random_param = copy.copy(RANDOM_PARAM_DICT) 140 | dynamic_param = copy.copy(DYNAMIC_PARAM_DICT) 141 | sensor_mode = copy.copy(SENSOR_MODE) 142 | 143 | random_param['random_dynamics'] = sim_config["Random Dynamics"] 144 | random_param['random_force'] = sim_config["Random Force"] 145 | 146 | sensor_mode['dis'] = sim_config["Sensor Dis"] 147 | sensor_mode['motor'] = sim_config["Sensor Motor"] 148 | sensor_mode["imu"] = sim_config["Sensor IMU"] 149 | sensor_mode["contact"] = sim_config["Sensor Contact"] 150 | sensor_mode["footpose"] = sim_config["Sensor Footpose"] 151 | sensor_mode["dynamic_vec"] = sim_config["Sensor Dynamic"] 152 | sensor_mode["force_vec"] = sim_config["Sensor Exforce"] 153 | sensor_mode["noise"] = sim_config["Sensor Noise"] 154 | 155 | reward_param = {} 156 | 157 | # Create an environment 158 | cmd_trj = TrajectoryHandler() 159 | 160 | env = a1sim.make_env(task="none", 161 | random_param=random_param, reward_param=reward_param, dynamic_param=dynamic_param, sensor_mode=sensor_mode, 162 | num_history=sim_config["State History"], render=render, terrain=None, cmd=cmd_trj, 163 | normal=1) 164 | 165 | act_scale = np.array([0.2,0.6,0.6]*4) 166 | act_offset = np.zeros(12) 167 | 168 | env =TianshouWrapper(env, 169 | action_scale=act_scale, action_offset=act_offset, 170 | max_step = 1000000, 171 | video_path=None) 172 | agent = build_agent(load_path, device=device) 173 | 174 | env.pybullet_client.configureDebugVisualizer(env.pybullet_client.COV_ENABLE_GUI,0) 175 | env.pybullet_client.configureDebugVisualizer(env.pybullet_client.COV_ENABLE_SHADOWS,0) 176 | 177 | if save_path != None: 178 | video_format = cv2.VideoWriter_fourcc(*'mp4v') 179 | recorder = cv2.VideoWriter("{}/{}_eval_{}.mp4".format(save_path, gait_mode, seed), video_format, 30, (480, 360)) 180 | 181 | # Create a position PD controller 182 | pid_vel = PIDHandler(gainP = 3.0, gainD = 0.6, gainI = 0) 183 | pid_yaw = PIDHandler(gainP = 3.0, gainD = 0.6, gainI = 0) 184 | pid_vel.reset() 185 | pid_yaw.reset() 186 | 187 | 188 | # Initiation 189 | obs = env.reset(x_noise=0) 190 | done = False 191 | 192 | tracking_step = 0 193 | tracking_log = [] 194 | err_log = [] 195 | 196 | # Visualization setup 197 | idVizBallSmallTarget = env.env.pybullet_client.createVisualShape(env.env.pybullet_client.GEOM_SPHERE, radius = 0.01, rgbaColor=[1, 0, 0, 1]) 198 | idVizBallLargeTarget = env.env.pybullet_client.createVisualShape(env.env.pybullet_client.GEOM_SPHERE, radius = 0.05, rgbaColor = [1, 0, 0 ,1]) 199 | 200 | tar_viz = [np.array([0, 0, 0.1])]*5 201 | id_tar_viz = [] 202 | id_cam_viz = env.env.pybullet_client.createMultiBody(0, -1, idVizBallLargeTarget, tar_viz[0], [0, 0, 0, 1]) 203 | 204 | for pos in tar_viz: 205 | id_tar_viz.append( 206 | env.env.pybullet_client.createMultiBody(0, -1, idVizBallSmallTarget, pos, [0, 0, 0, 1]) 207 | ) 208 | 209 | rec_time = 1.0/15 210 | 211 | random.seed(seed) 212 | np.random.seed(seed) 213 | torch.manual_seed(seed) 214 | torch.cuda.manual_seed(seed) 215 | env.seed(seed) 216 | 217 | time_init = env.env.robot.GetTimeSinceReset() 218 | 219 | # Evaluation loop 220 | while not done: 221 | # Update time 222 | time_cur = env.env.robot.GetTimeSinceReset() - time_init 223 | 224 | # Update the PD controller 225 | xy = env.env.robot.GetBasePosition()[0:2] 226 | yaw = env.env.robot.GetTrueBaseRollPitchYaw()[2] 227 | rot_mat = np.array([[np.cos(yaw), np.sin(yaw)], [-np.sin(yaw), np.cos(yaw)]]) 228 | error = rot_mat @ (trajectory[tracking_step]['position'] - xy) 229 | pid_vel.update(error[0]) 230 | pid_yaw.update(error[1]) 231 | err_log.append(error) 232 | 233 | cmd_trj.set_target(np.array([pid_vel.output(), 234 | pid_yaw.output()])) 235 | 236 | tracking_step_future = min(tracking_step+1, len(trajectory)-1) 237 | cam_pos_xy = trajectory[tracking_step]['position'] + (trajectory[tracking_step_future]['position'] - trajectory[tracking_step]['position']) * (1-(trajectory[tracking_step]['time']-time_cur)/0.1) 238 | cam_pos = np.array([0, 0, 0.1]) 239 | cam_pos[0:2] = cam_pos_xy 240 | env.env.pybullet_client.resetBasePositionAndOrientation(id_cam_viz , cam_pos, [0, 0, 0, 1]) 241 | 242 | # Real-time recording, not used 243 | if save_path != None and rec_time < time_cur: 244 | 245 | view_matrix = env.env.pybullet_client.computeViewMatrixFromYawPitchRoll( 246 | cameraTargetPosition = cam_pos + np.array([-0.3, 0, 0]), 247 | distance = 1.0, 248 | roll = 0, 249 | pitch = -45, 250 | yaw = 0, 251 | upAxisIndex=2) 252 | proj_matrix = env.env.pybullet_client.computeProjectionMatrixFOV( 253 | fov=60, 254 | aspect=float(480) / 360, 255 | nearVal=0.1, 256 | farVal=100) 257 | (_, _, rgb, depth, _) = env.env.pybullet_client.getCameraImage( 258 | width=480, 259 | height=360, 260 | renderer=env.env.pybullet_client.ER_TINY_RENDERER, 261 | viewMatrix=view_matrix, 262 | shadow=0, 263 | projectionMatrix=proj_matrix) 264 | 265 | img = rgb[:,:,[2, 1, 0]] 266 | 267 | recorder.write(img) 268 | rec_time += 1.0/30 269 | 270 | # Update footprints, logs 271 | if trajectory[tracking_step]['time'] < time_cur: 272 | tracking_log.append(cmd_trj.evaluate(env.env.robot)) 273 | 274 | if tracking_step < len(trajectory)-1: 275 | tracking_step += 1 276 | tar_viz[1:] = tar_viz[0:-1] 277 | tar_viz[0] = np.array([trajectory[tracking_step]['position'][0], trajectory[tracking_step]['position'][1], 0.1]) 278 | 279 | for idx in range(len(id_tar_viz)): 280 | env.env.pybullet_client.resetBasePositionAndOrientation(id_tar_viz[idx] , tar_viz[idx], [0, 0, 0, 1]) 281 | 282 | else: 283 | break 284 | 285 | # Step environments 286 | action = agent.predict(obs) 287 | obs, rew, done, info = env.step(action) 288 | 289 | if np.sum(np.array(error[0:2])**2)>1: 290 | done = True 291 | 292 | print('errors: ', np.sqrt(np.nanmean(np.sum(np.array(err_log)**2, axis=1)))) 293 | print('complemeted: ', True if not done else False) 294 | print('==========================="') 295 | 296 | if save_path != None: 297 | recorder.release() 298 | 299 | return tracking_log 300 | 301 | 302 | if __name__ == '__main__': 303 | parser = argparse.ArgumentParser() 304 | parser.add_argument("--gait_policy",type=str,default="policy_20220125_finished", 305 | help="path for loading checkpoints, configuration and training logs. For example, --gait_policy=GAIT_POLICY will save checkpoints at ./save/rl_checkpoints/gait/GAIT_POLICY.") 306 | parser.add_argument("--config",type=str,default="deploy", 307 | help="path to a directory with the configuration files of simulation setup, etc. For example, --config=CONFIG will save checkpoints at ./config/CONFIG.") 308 | parser.add_argument("--trajectory", type=str, default="x-linear", 309 | help="shape of trajectories for the unit testing. Use one of these: 'x-linear', 'x-sine', 'x-step', 'sine' and 'zig-zag'.") 310 | args = parser.parse_args() 311 | path_load = os.path.join(PATH_CHECKPOINT_RL, "gait", args.gait_policy) 312 | 313 | if args.trajectory == 'sine': 314 | trj = generate_sine_trj('yaw') 315 | elif args.trajectory == 'x-sine': 316 | trj = generate_sine_trj('linear') 317 | elif args.trajectory == 'zig-zag': 318 | trj = generate_step_trj('yaw') 319 | elif args.trajectory == 'x-step': 320 | trj = generate_step_trj('linear') 321 | else: 322 | trj = generate_linear_trj() 323 | 324 | for idx in range(NUM_EVAL): 325 | data_logs = evaluate(path_load, trajectory=trj, config_path=PATH_CONFIG, render=False, seed = idx) 326 | -------------------------------------------------------------------------------- /scripts/eval_nav.py: -------------------------------------------------------------------------------- 1 | import os 2 | from a1sim.envs.external.constants import PATH_CONFIG 3 | from robomimic.utils.file_utils import policy_from_checkpoint 4 | from robomimic.config import config_factory 5 | import numpy as np 6 | import cv2 7 | import argparse 8 | import pickle 9 | import yaml 10 | import torch 11 | 12 | from gait.agent import build_agent 13 | from nav.environments import build_robot_env 14 | 15 | from path import * 16 | 17 | SUBPATH_CONFIG = { "ppo": "ppo.yaml", 18 | "experiment": "experiment.yaml", 19 | "simulation": "simulation.yaml"} 20 | 21 | EXP_CONFIG = {'easy': {'env_type': [2], 'epoches': 50}, 22 | 'medium': {'env_type': [0, 3], 'epoches': 25}, 23 | 'hard': {'env_type': [1, 4], 'epoches': 25}} 24 | 25 | 26 | ## Evaluate Navigation Controller with Gait Controller 27 | def evaluate(nav_policy, gait_policy, difficulty, save_path=None, config_path=None, render=True, seed=0, 28 | device=torch.device("cuda" if torch.cuda.is_available() else "cpu")): 29 | 30 | # Setup paths 31 | nav_path = "{}/{}/models/model_best_training.pth".format(PATH_CHECKPOINT_BC, nav_policy) 32 | gait_path = os.path.join(PATH_CHECKPOINT_RL, 'gait', gait_policy) 33 | env_data_path = os.path.join(PATH_DATA,'env_profile') 34 | if config_path == None: 35 | config_path = PATH_CONFIG 36 | 37 | # Configuration 38 | sim_config = yaml.load(open("{}/{}".format(config_path, SUBPATH_CONFIG["simulation"]), 'r'), Loader=yaml.FullLoader) 39 | 40 | # Load checkpoints 41 | eval_policy = policy_from_checkpoint(ckpt_path=nav_path)[0] 42 | gait_agent = build_agent(gait_path, device=device) 43 | 44 | # Build environments 45 | env = build_robot_env(sim_config, gait_agent, render=render, record=True) 46 | 47 | # Record buffer 48 | successes = [] 49 | distances = [] 50 | 51 | # Loop for difficulties 52 | for env_type in EXP_CONFIG[difficulty]['env_type']: 53 | 54 | if save_path != None: 55 | os.makedirs("{}/env_{}".format(save_path, env_type), exist_ok=True) 56 | video_format = cv2.VideoWriter_fourcc(*'mp4v') 57 | 58 | # Loop for scenarios 59 | for eval_epoch in range(EXP_CONFIG[difficulty]['epoches']): 60 | 61 | # Setup scenarios 62 | done = False 63 | with open('{}/env_{}/{}.pickle'.format(env_data_path, env_type, eval_epoch),"rb") as f: 64 | profile = pickle.load(f) 65 | 66 | obs = env.reset(env_profile=profile) 67 | 68 | dic_record_array = {} 69 | dic_record_array['bird_view'] = [] 70 | 71 | # Evaluation loop 72 | while not done: 73 | 74 | # Update Navigation Controller 75 | obs_dict = { 76 | "agentview_rgb": 255.*np.transpose(obs["rgbd"][..., :3], (2, 0, 1)), 77 | "agentview_depth": np.transpose(obs["rgbd"][..., 3:], (2, 0, 1)), 78 | "yaw": np.array([obs["yaw"]]) 79 | } 80 | action = eval_policy(obs_dict) 81 | 82 | obs, rew, done, info = env.step(action) 83 | dic_record_array['bird_view']+=env.record() 84 | 85 | successes.append(env.episode_eval_stat()["Success"]) 86 | distances.append(env.episode_eval_stat()["Distance"]) 87 | print('episode: env_{}/{}'.format(env_type, eval_epoch)) 88 | print('current distance:', distances[-1]) 89 | print('average distance:', np.average(distances)) 90 | print('average success rate:', np.average(successes)) 91 | print('==========================="') 92 | 93 | # Real-time recording, not used 94 | if save_path != None: 95 | recorder = cv2.VideoWriter("{}/env_{}/trial_{}.mp4".format(save_path, env_type, eval_epoch), video_format, 30, (480, 360)) 96 | 97 | for img in dic_record_array['bird_view']: 98 | recorder.write(img) 99 | 100 | recorder.release() 101 | 102 | print("Evaluation: ") 103 | print("\taverage: ", np.mean(distances)) 104 | print("\tdeviation: ", np.std(distances)) 105 | print("\tsuccess rate: ", np.mean(successes)) 106 | 107 | if save_path != None: 108 | with open('{}/{}.txt'.format(save_path, difficulty),"w") as handle: 109 | for item in distances: 110 | handle.write("{}\n".format(item)) 111 | handle.write("\naverage: {}\n".format(np.average(distances))) 112 | handle.write("\ndeviation: {}\n".format(np.std(distances))) 113 | handle.write("\nsuccess: {}\n".format(np.average(successes))) 114 | 115 | 116 | if __name__ == '__main__': 117 | parser = argparse.ArgumentParser() 118 | parser.add_argument("--gait_policy",type=str,default="corl2022", 119 | help="path for loading checkpoints, configuration and training logs. For example, --gait_policy=GAIT_POLICY will load checkpoints at ./save/rl_checkpoints/gait/GAIT_POLICY.") 120 | parser.add_argument("--nav_policy", type=str, default="bcrnn", 121 | help="path for loading checkpoints, configuration and training logs. For example, --nav_policy=NAV_POLICY will load checkpoints at ./save/bc_checkpoints/NAV_POLICY.") 122 | parser.add_argument("--config",type=str,default="deploy", 123 | help="path to a directory with the configuration files of simulation setup, etc. For example, --config=CONFIG will save checkpoints at ./config/CONFIG.") 124 | parser.add_argument("--difficulty", type=str, default="hard", 125 | help="difficulty of simulation environments. Use one of these: 'easy', 'medium' and 'hard'.") 126 | 127 | args = parser.parse_args() 128 | 129 | nav_policy = args.nav_policy 130 | gait_policy = args.gait_policy 131 | difficulty = args.difficulty 132 | 133 | evaluate(nav_policy, gait_policy, difficulty) 134 | -------------------------------------------------------------------------------- /scripts/gait/agent.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | import gym 3 | import os 4 | import numpy as np 5 | 6 | import torch 7 | from torch.optim.lr_scheduler import LambdaLR 8 | from torch.distributions import Independent, Normal 9 | 10 | from tianshou.policy import PPOPolicy 11 | from tianshou.utils.net.continuous import ActorProb, Critic 12 | 13 | from gait.model import GaitModel 14 | from gait.wrapper import PPOAgent 15 | 16 | 17 | SUBPATH_CONFIG = { "reward": "reward.yaml", 18 | "ppo": "ppo.yaml", 19 | "experiment": "experiment.yaml", 20 | "simulation": "simulation.yaml"} 21 | 22 | 23 | ## function for generating Gailt Controller 24 | def build_agent(load_path, device=None): 25 | 26 | # configuration 27 | policy_path = load_path + "/policy" 28 | config_path = load_path + "/config" 29 | 30 | exp_config = yaml.load(open("{}/{}".format(config_path, SUBPATH_CONFIG["experiment"]), 'r'), Loader=yaml.FullLoader) 31 | ppo_config = yaml.load(open("{}/{}".format(config_path, SUBPATH_CONFIG["ppo"]), 'r'), Loader=yaml.FullLoader) 32 | sim_config = yaml.load(open("{}/{}".format(config_path, SUBPATH_CONFIG["simulation"]), 'r'), Loader=yaml.FullLoader) 33 | 34 | if device == None: 35 | device=exp_config["Device"] 36 | 37 | action_space = gym.spaces.Box(low=np.array([-0.2, -0.7, -0.7, -0.2, -0.7, -0.7, -0.2, -0.7, -0.7, -0.2, -0.7, -0.7]), 38 | high=np.array([0.2, 0.7, 0.7, 0.2, 0.7, 0.7, 0.2, 0.7, 0.7, 0.2, 0.7, 0.7]), shape=(12,), dtype=np.float32) 39 | sensor_dim = 37 40 | action_dim = action_space.shape[0] 41 | action_max = action_space.high[0] 42 | 43 | # Initialize model, algorithm, agent, replay_memory 44 | net_a = GaitModel(sensor_dim, action_dim, lenHistory=sim_config["State History"], device=device) 45 | net_c = GaitModel(sensor_dim, action_dim, lenHistory=sim_config["State History"], device=device) 46 | 47 | # generate actor/critic models 48 | actor = ActorProb(net_a, action_dim, max_action=action_max, unbounded=True, device=device).to(device) 49 | critic = Critic(net_c, device=device).to(device) 50 | 51 | # generate an optimizer 52 | optim = torch.optim.Adam( 53 | list(actor.parameters()) + list(critic.parameters()), lr=ppo_config["Learning Rate"]) 54 | 55 | # generate a learning rate scheduler 56 | lr_scheduler = None 57 | if ppo_config["Learning Rate Decay"]: 58 | max_update_num = np.ceil( 59 | ppo_config["Step/Epoch"] / ppo_config["Step/Collect"]) * ppo_config["Epoch"] 60 | 61 | lr_scheduler = LambdaLR( 62 | optim, lr_lambda=lambda epoch: 1 - epoch / max_update_num) 63 | 64 | # generate a normal distribution for RL exploration 65 | def dist(*logits): 66 | return Independent(Normal(*logits), 1) 67 | 68 | # generate a object for Gait Controller 69 | policy = PPOPolicy( actor, critic, optim, dist, 70 | discount_factor=ppo_config["Gamma"], 71 | gae_lambda=ppo_config["GAE Lambda"], 72 | max_grad_norm=ppo_config["Max Grad Norm"], 73 | vf_coef=ppo_config["Value Coefficient"], 74 | ent_coef=ppo_config["Entropy Coefficient"], 75 | reward_normalization=ppo_config["Reward Nomalization"], 76 | action_scaling=True, 77 | action_bound_method="clip", 78 | lr_scheduler=lr_scheduler, 79 | action_space=action_space, 80 | eps_clip=ppo_config["Epsilon Clip"], 81 | value_clip=ppo_config["Value Clip"], 82 | dual_clip=ppo_config["Dual Clip"], 83 | advantage_normalization=ppo_config["Advantage Normalization"], 84 | recompute_advantage=ppo_config["Recompute Advantage"]) 85 | 86 | # load the last checkpoints 87 | list_itr = [] 88 | for file in os.listdir(policy_path): 89 | if file.endswith("pth"): 90 | if 'policy' in file: 91 | continue 92 | itr = file.replace('checkpoint_', "") 93 | itr = int(itr.replace('.pth', "")) 94 | list_itr.append(itr) 95 | 96 | checkpoint_path = '{}/checkpoint_{}.pth'.format(policy_path, max(list_itr)) 97 | checkpoint = torch.load(checkpoint_path, map_location=device) 98 | policy.load_state_dict(checkpoint['model']) 99 | optim.load_state_dict(checkpoint['optim']) 100 | 101 | print('Loaded a RL checkpoint: {}'.format(checkpoint_path)) 102 | 103 | # set the evaluation mode 104 | policy.eval() 105 | 106 | return PPOAgent(policy) -------------------------------------------------------------------------------- /scripts/gait/logger.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numbers import Number 3 | from typing import Any, Tuple, Union, Callable, Optional 4 | import wandb 5 | 6 | WRITE_TYPE = Union[int, Number, np.number, np.ndarray] 7 | 8 | SAVE_REWARD_ITEMS = [ "Reward Tracking", "Reward Balance", "Reward Gait", 9 | "Reward Energy", "Reward Badfoot", "Reward Footcontact"] 10 | SAVE_STATE_ITEMS = [ "Height", "Jonit Power" , "Contact", 11 | "Roll Rate", "Pitch Rate", "Yaw Rate", 12 | "Error XY", "Error Yaw", 13 | "Drift"] 14 | SAVE_ACTION_ITEMS = [ ] 15 | SAVE_TERMIAL_ITEMS = [ ] 16 | 17 | 18 | class WandbLogger(): 19 | """ 20 | WandB logger for training Gait Controller 21 | """ 22 | 23 | def __init__( 24 | self, 25 | project: str= "project", 26 | task: str= "task", 27 | path: str= "./log.dat", 28 | update_interval: int = 1000, 29 | actor: Any=None, 30 | critic: Any=None, 31 | reward_config = {}, 32 | ppo_config = {}, 33 | experiment_config = {}, 34 | simulation_config = {}, 35 | ) -> None: 36 | 37 | self.id = wandb.util.generate_id() 38 | self.writer = wandb.init(id=self.id, resume="allow", 39 | project=project, 40 | job_type=task, 41 | config=ppo_config, 42 | save_code=path, 43 | dir=path, 44 | sync_tensorboard=False 45 | ) 46 | self.writer.config.update(reward_config) 47 | self.writer.config.update(experiment_config) 48 | self.writer.config.update(simulation_config) 49 | 50 | self.update_interval = update_interval 51 | self.last_log_train_step = -1 52 | self.last_log_test_step = -1 53 | self.last_log_update_step = -1 54 | self.last_save_step = -1 55 | self.num_episode = 0 56 | 57 | self.writer.watch(actor, log_freq=1000) 58 | self.writer.watch(critic, log_freq=1000) 59 | 60 | def log_train_data(self, collect_result: dict, step: int) -> None: 61 | 62 | if collect_result["n/ep"] > 0: 63 | collect_result["rew"] = collect_result["rews"].mean() 64 | collect_result["len"] = collect_result["lens"].mean() 65 | collect_result["rew_std"] = collect_result["rews"].std() 66 | collect_result["len_std"] = collect_result["lens"].std() 67 | 68 | if step - self.last_log_train_step >= self.update_interval: 69 | dicLogInfo = {} 70 | 71 | dicLogInfo["train/episode/Reward"] = collect_result["rew"] 72 | dicLogInfo["train/episode/Length"] = collect_result["len"] 73 | dicLogInfo["train/episode/Reward Std"] = collect_result["rew_std"] 74 | dicLogInfo["train/episode/Length Std"] = collect_result["len_std"] 75 | 76 | for key, value in collect_result["infos"].items(): 77 | if key in SAVE_REWARD_ITEMS: 78 | dicLogInfo["train/reward/{}".format(key)] = np.average(value) 79 | if key in SAVE_STATE_ITEMS: 80 | dicLogInfo["train/state/{}".format(key)] = np.average(value) 81 | if key in SAVE_ACTION_ITEMS: 82 | dicLogInfo["train/action/{}".format(key)] = np.max(np.absolute(value)) 83 | if key in SAVE_TERMIAL_ITEMS: 84 | dicLogInfo["train/episode/{}".format(key)] = np.average(value) 85 | 86 | self.writer.log(dicLogInfo, step=step) 87 | self.last_log_train_step = step 88 | 89 | def log_test_data(self, collect_result: dict, step: int) -> None: 90 | 91 | if collect_result["n/ep"] > 0: 92 | collect_result["rew"] = collect_result["rews"].mean() 93 | collect_result["len"] = collect_result["lens"].mean() 94 | collect_result["rew_std"] = collect_result["rews"].std() 95 | collect_result["len_std"] = collect_result["lens"].std() 96 | 97 | if step - self.last_log_train_step >= self.update_interval: 98 | dicLogInfo = {} 99 | 100 | dicLogInfo["test/episode/Reward"] = collect_result["rew"] 101 | dicLogInfo["test/episode/Length"] = collect_result["len"] 102 | dicLogInfo["test/episode/Reward Std"] = collect_result["rew_std"] 103 | dicLogInfo["test/episode/Length Std"] = collect_result["len_std"] 104 | 105 | for key, value in collect_result["infos"].items(): 106 | if key in SAVE_REWARD_ITEMS: 107 | dicLogInfo["test/reward/{}".format(key)] = np.average(value) 108 | if key in SAVE_STATE_ITEMS: 109 | dicLogInfo["test/state/{}".format(key)] = np.average(value) 110 | if key in SAVE_ACTION_ITEMS: 111 | dicLogInfo["test/action/{}".format(key)] = np.max(np.absolute(value)) 112 | if key in SAVE_TERMIAL_ITEMS: 113 | dicLogInfo["test/episode/{}".format(key)] = np.average(value) 114 | 115 | self.writer.log(dicLogInfo, step=step) 116 | self.last_log_test_step = step 117 | 118 | def log_update_data(self, update_result: dict, step: int) -> None: 119 | if step - self.last_log_update_step >= self.update_interval: 120 | self.writer.log(update_result) 121 | self.last_log_update_step = step 122 | 123 | def save_data( 124 | self, 125 | epoch: int, 126 | env_step: int, 127 | gradient_step: int, 128 | save_checkpoint_fn: Optional[Callable[[int, int, int], None]] = None, 129 | ) -> None: 130 | if save_checkpoint_fn and env_step - self.last_save_step >= 1: 131 | self.last_save_step = epoch 132 | save_checkpoint_fn(epoch, env_step, gradient_step) 133 | dicLogInfo = {} 134 | dicLogInfo["save/Epoch"] = epoch 135 | dicLogInfo["save/EnvStep"] = env_step 136 | dicLogInfo["save/GradientStep"] = gradient_step 137 | self.writer.log(dicLogInfo) 138 | 139 | def restore_data(self) -> Tuple[int, int, int]: 140 | 141 | return None 142 | 143 | -------------------------------------------------------------------------------- /scripts/gait/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | from typing import Any, Dict, Tuple, Union, Optional, Sequence 5 | 6 | ## Gait Controller network 7 | class GaitModel(nn.Module): 8 | """ 9 | Gait Controller network of PRELUDE 10 | """ 11 | 12 | def __init__( 13 | self, 14 | sizeState: int, 15 | sizeAction: int = 0, 16 | lenHistory: int = 0, 17 | sizeEmbedding: int = 32, 18 | sizeFeature: int = 8, 19 | activation: Optional[nn.Module] = nn.ReLU, 20 | device: Union[str, int, torch.device] = "cpu", 21 | ) -> None: 22 | super().__init__() 23 | """ 24 | Generate a Gait Controller network. 25 | 26 | Args: 27 | sizeState: size of states: recent velocity command + recent robot states 28 | sizeAction: size of joint-space actions 29 | lenHistory: size of history buffer 30 | sizeEmbedding: size of embedding input at History Encoder 31 | sizeFeature: size of History Encoder output 32 | activation: activation function type at Policy MLP 33 | device: computing device 34 | 35 | Returns: 36 | None 37 | """ 38 | 39 | self.device = device 40 | 41 | self.sizeState = sizeState 42 | self.sizeAction = sizeAction 43 | 44 | self.output_dim = 256 45 | 46 | self.modelEncoder = HistoryEncoder(numChannelInput = sizeState + sizeAction, 47 | numChannelEmbedding = sizeEmbedding, 48 | lenHistoryInput = lenHistory, 49 | sizeOutput = sizeFeature, 50 | device=device) 51 | 52 | self.modelPolicy = MLP(sizeFeature + sizeState + sizeAction, hidden_sizes=(256, 256), activation=activation, device=device) 53 | 54 | 55 | def forward(self, s: Union[np.ndarray, torch.Tensor], state: Any = None, info: Dict[str, Any] = {}, ) -> Tuple[torch.Tensor, Any]: 56 | """ 57 | Run Gait Controller. 58 | 59 | Args: 60 | s: observation 61 | state: not used 62 | info: not used 63 | 64 | Returns: 65 | action: output joint-space action 66 | state: not used 67 | """ 68 | 69 | s = torch.as_tensor(s, device=self.device, dtype=torch.float32) 70 | o1 = s.narrow(1, 0, self.sizeState + self.sizeAction) 71 | s = s.view(-1, self.modelEncoder.lenHistoryInput, self.modelEncoder.numChannelInput) 72 | o2 = self.modelEncoder(s) 73 | o = torch.cat((o1, o2), dim=1) 74 | 75 | action = self.modelPolicy(o) 76 | 77 | return action, state 78 | 79 | 80 | class HistoryEncoder(nn.Module): 81 | """ 82 | History Encoder at Gait Controller 83 | """ 84 | 85 | def __init__(self, numChannelInput: int = 62, 86 | numChannelEmbedding: int = 32, 87 | lenHistoryInput: int = 50, 88 | sizeOutput: int = 8, 89 | device: Union[str, int, torch.device] = "cpu",) -> None: 90 | super().__init__() 91 | """ 92 | Generate a History Encoder for Gait Controller. 93 | 94 | Args: 95 | numChannelInput: size of states at each time step: recent velocity command + recent robot states + previous joint-space action 96 | numChannelEmbedding: size of embedding input at History Encoder 97 | lenHistory: size of history buffer 98 | sizeOutput: size of History Encoder output 99 | device: computing device 100 | 101 | Returns: 102 | None 103 | """ 104 | 105 | self.device = device 106 | 107 | self.numChannelInput = numChannelInput 108 | self.numChannelEmbedding = numChannelEmbedding 109 | self.lenHistoryInput = lenHistoryInput 110 | self.sizeOutput = sizeOutput 111 | 112 | sizeProj = self.lenHistoryInput 113 | raConv = [] 114 | for input, output, kernel, stride in ((self.numChannelEmbedding, self.numChannelEmbedding, 4, 2), 115 | (self.numChannelEmbedding, self.numChannelEmbedding, 3, 1), 116 | (self.numChannelEmbedding, self.numChannelEmbedding, 3, 1)): 117 | 118 | sizeProj = int(np.floor((sizeProj-kernel)/stride + 1)) 119 | raConv += [nn.Conv1d(self.numChannelEmbedding, self.numChannelEmbedding, kernel_size=kernel, stride=stride)] 120 | raConv += [nn.ReLU(inplace=True)] 121 | 122 | self.embd = nn.Sequential(nn.Linear(self.numChannelInput, self.numChannelEmbedding), nn.ReLU(inplace=True)) 123 | self.conv = nn.Sequential(*raConv) 124 | self.proj = nn.Linear(sizeProj*self.numChannelEmbedding, self.sizeOutput) 125 | 126 | 127 | def forward(self, x: Union[np.ndarray, torch.Tensor]) -> torch.Tensor: 128 | """ 129 | Run History Encoder. 130 | 131 | Args: 132 | x: observation: recent velocity command + recent robot states + previous joint-space action 133 | device: computing device 134 | 135 | Returns: 136 | out: History Encoder output 137 | """ 138 | 139 | x = torch.as_tensor(x, device=self.device, dtype=torch.float32) 140 | h1 = self.embd(x) 141 | h2 = self.conv(h1.transpose(1,2)) 142 | out = self.proj(h2.flatten(1)) 143 | 144 | return out 145 | 146 | 147 | class MLP(nn.Module): 148 | """ 149 | Policy MLP at Gait Controller 150 | """ 151 | 152 | def __init__( 153 | self, 154 | input_dim: int, 155 | output_dim: int = 0, 156 | hidden_sizes: Sequence[int] = (), 157 | activation: Optional[Union[nn.Module, Sequence[nn.Module]]] = nn.ReLU, 158 | device: Optional[Union[str, int, torch.device]] = None, 159 | ) -> None: 160 | super().__init__() 161 | """ 162 | Generate Policy MLP at Gait Controller. 163 | 164 | Args: 165 | input_dim: size of input 166 | output_dim: size of output 167 | hidden_sizes: hidden layer sizes 168 | activation: activation function type 169 | device: computing device 170 | 171 | Returns: 172 | None 173 | """ 174 | 175 | self.device = device 176 | 177 | if activation: 178 | if isinstance(activation, list): 179 | assert len(activation) == len(hidden_sizes) 180 | activation_list = activation 181 | else: 182 | activation_list = [ 183 | activation for _ in range(len(hidden_sizes))] 184 | else: 185 | activation_list = [None] * len(hidden_sizes) 186 | 187 | hidden_sizes = [input_dim] + list(hidden_sizes) 188 | 189 | model = [] 190 | for in_dim, out_dim, activ in zip( 191 | hidden_sizes[:-1], hidden_sizes[1:], activation_list): 192 | model += [nn.Linear(in_dim, out_dim)] 193 | 194 | if activ is not None: 195 | model += [activation()] 196 | if output_dim > 0: 197 | model += [nn.Linear(hidden_sizes[-1], output_dim)] 198 | 199 | self.output_dim = output_dim or hidden_sizes[-1] 200 | self.model = nn.Sequential(*model) 201 | 202 | def forward(self, x: Union[np.ndarray, torch.Tensor]) -> torch.Tensor: 203 | """ 204 | Run Policy MLP. 205 | 206 | Args: 207 | x: observation: output of History Encoder 208 | device: computing device 209 | 210 | Returns: 211 | out: Policy MLP output 212 | """ 213 | 214 | x = torch.as_tensor(x, device=self.device, dtype=torch.float32) 215 | return self.model(x.flatten(1)) -------------------------------------------------------------------------------- /scripts/gait/wrapper.py: -------------------------------------------------------------------------------- 1 | 2 | import datetime 3 | import gym 4 | import torch 5 | import cv2 6 | import numpy as np 7 | 8 | from tianshou.data import Batch, to_numpy 9 | 10 | RENDER_WIDTH = 480 11 | RENDER_HEIGHT = 360 12 | 13 | COMMAND_LIN_MAX = 1.5 14 | COMMAND_LIN_MIN = 0 15 | COMMAND_LIN_BASE = 1.0 16 | COMMAND_LIN_DIS = 1.0*0.004 * (33./13.) 17 | 18 | COMMAND_LAT_MAX = 0.5 * (33./13.) 19 | COMMAND_LAT_MIN = -0.5 * (33./13.) 20 | COMMAND_LAT_BASE = 0.0 21 | COMMAND_LAT_DIS =1.0*0.004 * (33./13.) 22 | 23 | COMMAND_YAW_MAX = 0.7 * (33./13.) 24 | COMMAND_YAW_MIN = -0.7 * (33./13.) 25 | COMMAND_YAW_BASE = 0.0 26 | COMMAND_YAW_DIS = 1.0*0.008 * (33./13.) 27 | 28 | COMMAND_SCALE = (33./13.) 29 | 30 | ## Trajectory handler for velocity commands at evaluating/deploying Gailt Controller 31 | class TrajectoryHandler(): 32 | def __init__(self): 33 | self._scale = COMMAND_SCALE 34 | self.reset() 35 | 36 | def reset(self): 37 | self.target_xy = np.zeros(2) 38 | self.target_yaw = 0.0 39 | return self.target_xy, self.target_yaw 40 | 41 | def get_target(self): 42 | return self.target_xy, self.target_yaw 43 | 44 | def set_scale(self, scale): 45 | self._scale = scale 46 | 47 | def set_target(self, action): 48 | self.target_xy[0] = action[0]/self._scale 49 | self.target_yaw = action[1]/self._scale 50 | 51 | def evaluate(self, robot): 52 | xyz_pos = robot.GetBasePosition() 53 | xyz_vel = robot.GetTrueLocalBaseVelocity() 54 | rpy_pos = robot.GetTrueBaseRollPitchYaw() 55 | rpy_vel = robot.GetTrueBaseRollPitchYawRate() 56 | errors = np.concatenate(((self._scale*self.target_xy - xyz_vel[0:2]), self._scale*self.target_yaw-rpy_vel[2]), axis=None) 57 | return {'errors':errors, 'linear':xyz_vel, 'angular':rpy_vel, 'position': xyz_pos, 'orientation': rpy_pos} 58 | 59 | @property 60 | def scale(self): 61 | return self._scale 62 | 63 | @property 64 | def tracking(self): 65 | return True 66 | 67 | 68 | ## Trajectory generator for random velocity commands at training Gailt Controller 69 | class TrajectoryGenerator(): 70 | def __init__(self, currilculum_handler=None): 71 | self._scale = COMMAND_SCALE 72 | self._curriculum = currilculum_handler 73 | self.reset() 74 | 75 | def reset(self): 76 | self.target_xy = np.array([COMMAND_LIN_BASE, COMMAND_LAT_BASE])/self._scale 77 | self.target_yaw = COMMAND_YAW_BASE/self._scale 78 | 79 | return self.target_xy, self.target_yaw 80 | 81 | def get_target(self): 82 | 83 | if self._curriculum != None: 84 | scale = self._curriculum.get_scale() 85 | self._curriculum.add_count() 86 | else: 87 | scale = 1.0 88 | 89 | self.target_xy[0] += np.random.normal(0., scale*COMMAND_LIN_DIS)/self._scale 90 | self.target_yaw += np.random.normal(0., scale*COMMAND_YAW_DIS)/self._scale 91 | self.target_xy[0] = np.clip(self.target_xy[0], COMMAND_LIN_MIN/self._scale, COMMAND_LIN_MAX/self._scale) 92 | self.target_yaw = np.clip(self.target_yaw, COMMAND_YAW_MIN/self._scale, COMMAND_YAW_MAX/self._scale) 93 | return self.target_xy, self.target_yaw 94 | 95 | @property 96 | def scale(self): 97 | return self._scale 98 | 99 | @property 100 | def tracking(self): 101 | return True 102 | 103 | 104 | ## Curriculum for RL training, not used 105 | class CurriculumHandler(): 106 | def __init__(self, min_scale=0.1, max_scale=2.0, coeff=10e6): 107 | assert(max_scale>min_scale) 108 | self._min = min_scale 109 | self._max = max_scale 110 | self._coeff = coeff 111 | self._count = 0 112 | self.reset() 113 | 114 | def reset(self): 115 | self._count = 0 116 | 117 | def add_count(self): 118 | self._count += 1 119 | 120 | def get_scale(self): 121 | scale = self._max + (self._min - self._max) * np.exp(-1.0*self._count/self._coeff) 122 | return scale 123 | 124 | 125 | ## Agent class for evaluating and deploying the Gait Controller model 126 | class PPOAgent(): 127 | def __init__(self, policy) -> None: 128 | self.policy = policy 129 | self.data = Batch(obs={}, act={}) 130 | 131 | def reset(self): 132 | pass 133 | 134 | def predict(self,obs): 135 | self.data.obs = [obs] 136 | with torch.no_grad(): 137 | result = self.policy(self.data) 138 | action = self.policy.map_action(to_numpy(result.act)) 139 | return action 140 | 141 | 142 | ## Env Wrapper for distributed PPO of Tianshou, and recording utils 143 | class TianshouWrapper(gym.Wrapper): 144 | def __init__(self, env, action_scale, action_offset=0, max_step=600, video_path=None): 145 | gym.Wrapper.__init__(self, env) 146 | 147 | self._video_save_path = video_path 148 | self.recorder = None 149 | self.max_step = max_step 150 | 151 | if self._video_save_path != None: 152 | self._recorder_reset = False 153 | self._render_width = RENDER_WIDTH 154 | self._render_height = RENDER_HEIGHT 155 | self._video_format = cv2.VideoWriter_fourcc(*'mp4v') 156 | 157 | self.action_scale = action_scale 158 | self.action_offset = action_offset 159 | 160 | 161 | def reset(self,**kwargs): 162 | obs, info = self.env.reset(**kwargs) 163 | self.num_step = 0 164 | 165 | if self._video_save_path !=None: 166 | self._recorder_reset = False 167 | 168 | return obs 169 | 170 | 171 | def step(self, action): 172 | obs, rew, done, info = self.env.step(self.action_scale * action + self.action_offset) 173 | self.num_step += 1 174 | 175 | if self._video_save_path != None: 176 | self._record_save() 177 | 178 | done = done or (self.num_step >= self.max_step) 179 | return obs, rew, done, info 180 | 181 | 182 | def close(self): 183 | self.env.close() 184 | 185 | 186 | def _record_reset(self): 187 | 188 | if self._video_save_path !=None: 189 | 190 | if self.recorder != None: 191 | self.recorder.release() 192 | del self.recorder 193 | self.recorder = None 194 | 195 | self.recorder = cv2.VideoWriter("{}/{}.mp4".format(self._video_save_path, datetime.datetime.now().strftime("%m%d_%H%M%S")), 196 | self._video_format, 30, (self._render_width, self._render_height)) 197 | 198 | 199 | def _record_save(self): 200 | 201 | if self._video_save_path !=None: 202 | 203 | if not self._recorder_reset: 204 | self._record_reset() 205 | self._recorder_reset = True 206 | 207 | img = self.env.render(mode='rgb_array')[:, :, [2, 1, 0]] 208 | self.recorder.write(img) 209 | 210 | -------------------------------------------------------------------------------- /scripts/path.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | 4 | PATH_SICRIPT = os.path.dirname(os.path.realpath(__file__)) 5 | PATH_ROOT = os.path.dirname(PATH_SICRIPT) 6 | SUBPATH = yaml.load(open(os.path.join(PATH_ROOT, 'path.yaml')), Loader=yaml.FullLoader) 7 | 8 | PATH_DATA = os.path.join(PATH_ROOT, SUBPATH['Data']) 9 | PATH_CONFIG = os.path.join(PATH_ROOT, SUBPATH['Configuration']) 10 | 11 | PATH_CHECKPOINT_RL = os.path.join(PATH_ROOT, SUBPATH['RL Checkpoint']) 12 | PATH_CHECKPOINT_BC = os.path.join(PATH_ROOT, SUBPATH['BC Checkpoint']) 13 | 14 | PATH_DATASETSET_SIM = os.path.join(PATH_ROOT, SUBPATH['Dataset Sim']) 15 | PATH_DATASETSET_REAL = os.path.join(PATH_ROOT, SUBPATH['Dataset Real']) 16 | 17 | PATH_RAW_SIM = os.path.join(PATH_ROOT, SUBPATH['Raw Data Sim']) 18 | PATH_RAW_REAL = os.path.join(PATH_ROOT, SUBPATH['Raw Data Real']) 19 | -------------------------------------------------------------------------------- /scripts/train_gait.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | import random 4 | import copy 5 | import argparse 6 | 7 | import numpy as np 8 | 9 | import torch 10 | from torch.optim.lr_scheduler import LambdaLR 11 | from torch.distributions import Independent, Normal 12 | 13 | from tianshou.policy import PPOPolicy 14 | from tianshou.env import SubprocVectorEnv 15 | from tianshou.trainer import onpolicy_trainer 16 | from tianshou.utils.net.continuous import ActorProb, Critic 17 | from tianshou.data import Collector, ReplayBuffer, VectorReplayBuffer 18 | 19 | import a1sim 20 | from a1sim.envs.env_builder import SENSOR_MODE 21 | 22 | from gait.logger import WandbLogger, SAVE_REWARD_ITEMS, SAVE_STATE_ITEMS, SAVE_TERMIAL_ITEMS 23 | from gait.model import GaitModel 24 | from gait.wrapper import TianshouWrapper, TrajectoryGenerator 25 | 26 | from path import * 27 | 28 | SUBPATH_CONFIG = { "reward": "reward.yaml", 29 | "ppo": "ppo.yaml", 30 | "experiment": "experiment.yaml", 31 | "simulation": "simulation.yaml"} 32 | 33 | LOGGER_TOPIC_LIST = SAVE_REWARD_ITEMS + SAVE_STATE_ITEMS + SAVE_TERMIAL_ITEMS 34 | 35 | RANDOM_PARAM_DICT = {'random_dynamics':0, 'random_force':0} 36 | DYNAMIC_PARAM_DICT = {'control_latency': 0.001, 'joint_friction': 0.025, 'spin_foot_friction': 0.2, 'foot_friction': 1} 37 | 38 | ## Train Gait Controller 39 | def train(save_path, config_path): 40 | 41 | # Load training configuration. 42 | exp_config = yaml.load(open("{}/{}".format(config_path, SUBPATH_CONFIG["experiment"]), 'r'), Loader=yaml.FullLoader) 43 | ppo_config = yaml.load(open("{}/{}".format(config_path, SUBPATH_CONFIG["ppo"]), 'r'), Loader=yaml.FullLoader) 44 | rew_config = yaml.load(open("{}/{}".format(config_path, SUBPATH_CONFIG["reward"]), 'r'), Loader=yaml.FullLoader) 45 | sim_config = yaml.load(open("{}/{}".format(config_path, SUBPATH_CONFIG["simulation"]), 'r'), Loader=yaml.FullLoader) 46 | 47 | print("\r\nExperiment Info") 48 | print(exp_config) 49 | print("\r\nPPO CONFIG") 50 | print(ppo_config) 51 | print("\r\nReward CONFIG") 52 | print(rew_config) 53 | 54 | # Configure envirnoment setup. 55 | random_param = copy.copy(RANDOM_PARAM_DICT) 56 | dynamic_param = copy.copy(DYNAMIC_PARAM_DICT) 57 | sensor_mode = copy.copy(SENSOR_MODE) 58 | 59 | random_param['random_dynamics'] = sim_config["Random Dynamics"] 60 | random_param['random_force'] = sim_config["Random Force"] 61 | 62 | sensor_mode['dis'] = sim_config["Sensor Dis"] 63 | sensor_mode['motor'] = sim_config["Sensor Motor"] 64 | sensor_mode["imu"] = sim_config["Sensor IMU"] 65 | sensor_mode["contact"] = sim_config["Sensor Contact"] 66 | sensor_mode["footpose"] = sim_config["Sensor Footpose"] 67 | sensor_mode["dynamic_vec"] = sim_config["Sensor Dynamic"] 68 | sensor_mode["force_vec"] = sim_config["Sensor Exforce"] 69 | sensor_mode["noise"] = sim_config["Sensor Noise"] 70 | 71 | reward_param = copy.copy(rew_config) 72 | 73 | print("\r\nReward CONFIG") 74 | print(reward_param) 75 | print("\r\nRandom CONFIG") 76 | print(random_param) 77 | print("\r\nDynamics CONFIG") 78 | print(dynamic_param) 79 | print("\r\nSensor CONFIG") 80 | print(sensor_mode) 81 | 82 | # Create directories for saved files. 83 | policy_path = os.path.join(save_path, 'policy') 84 | config_path = os.path.join(save_path, 'config') 85 | video_path = os.path.join(save_path, 'video') 86 | 87 | os.makedirs(save_path, exist_ok=True) 88 | os.makedirs(policy_path, exist_ok=True) 89 | os.makedirs(config_path, exist_ok=True) 90 | os.makedirs(video_path, exist_ok=True) 91 | 92 | # Save configuration files. 93 | with open(os.path.join(config_path, 'experiment.yaml'), 'w') as yaml_file: 94 | yaml.dump(exp_config, yaml_file, default_flow_style=False) 95 | with open(os.path.join(config_path, 'ppo.yaml'), 'w') as yaml_file: 96 | yaml.dump(ppo_config, yaml_file, default_flow_style=False) 97 | with open(os.path.join(config_path, 'reward.yaml'), 'w') as yaml_file: 98 | yaml.dump(rew_config, yaml_file, default_flow_style=False) 99 | with open(os.path.join(config_path, 'simulation.yaml'), 'w') as yaml_file: 100 | yaml.dump(sim_config, yaml_file, default_flow_style=False) 101 | 102 | cmd_trj = TrajectoryGenerator() 103 | 104 | # Checking an RL environment. 105 | env = a1sim.make_env(task="none", 106 | random_param=random_param, reward_param=reward_param, dynamic_param=dynamic_param, sensor_mode=sensor_mode, 107 | num_history=sim_config["State History"], render=False, terrain=None, cmd=cmd_trj, 108 | normal=1) 109 | obs_dim = env.observation_space.shape[0] 110 | sensor_dim = env.sensor_space.shape[0] 111 | action_dim = env.action_space.shape[0] 112 | action_max = env.action_space.high[0] 113 | 114 | # Action offset, scale 115 | act_scale = np.array([0.2,0.6,0.6]*4) 116 | act_offset = np.array([0., 0.0, 0.]*4) 117 | 118 | # curr = CurriculumHandler(min_scale=0.1, max_scale=2.0, coeff=10e6) 119 | 120 | # Training environments 121 | train_envs = SubprocVectorEnv( 122 | [lambda: TianshouWrapper(a1sim.make_env( 123 | random_param=random_param, reward_param=reward_param, dynamic_param=dynamic_param, sensor_mode=sensor_mode, 124 | num_history=sim_config["State History"], render=False, terrain=None, cmd=cmd_trj, 125 | normal=1), 126 | action_scale=act_scale, action_offset=act_offset, max_step = exp_config["Steps Per Episode"]) 127 | for _ in range(ppo_config["Training Envs"])], 128 | norm_obs=False) 129 | 130 | # Evaluation environments 131 | test_envs = SubprocVectorEnv( 132 | [lambda: TianshouWrapper(a1sim.make_env( 133 | random_param=random_param, reward_param=reward_param, dynamic_param=dynamic_param, sensor_mode=sensor_mode, 134 | num_history=sim_config["State History"], render=False, terrain=None, cmd=cmd_trj, 135 | normal=1), 136 | action_scale=act_scale, action_offset=act_offset, max_step = exp_config["Steps Per Episode"]) 137 | for _ in range(ppo_config["Test Envs"]-1)] 138 | + [lambda: TianshouWrapper(a1sim.make_env( 139 | random_param=random_param, reward_param=reward_param, dynamic_param=dynamic_param, sensor_mode=sensor_mode, 140 | num_history=sim_config["State History"], render=False, terrain=None, cmd=cmd_trj, 141 | normal=1), 142 | action_scale=act_scale, action_offset=act_offset, max_step = exp_config["Steps Per Episode"], video_path=video_path)], 143 | norm_obs=False, obs_rms=train_envs.obs_rms, update_obs_rms=False) 144 | 145 | # Seed 146 | random.seed(exp_config["Seed"]) 147 | np.random.seed(exp_config["Seed"]) 148 | torch.manual_seed(exp_config["Seed"]) 149 | torch.cuda.manual_seed(exp_config["Seed"]) 150 | train_envs.seed() 151 | test_envs.seed() 152 | 153 | # Actor model 154 | net_a = GaitModel(sensor_dim, action_dim, lenHistory=sim_config["State History"], device=exp_config["Device"]) 155 | actor = ActorProb(net_a, action_dim, max_action=action_max, unbounded=True, device=exp_config["Device"]) 156 | actor.to(exp_config["Device"]) 157 | 158 | # Critic model 159 | net_c = GaitModel(sensor_dim, action_dim, lenHistory=sim_config["State History"], device=exp_config["Device"]) 160 | critic = Critic(net_c, device=exp_config["Device"]) 161 | critic.to(exp_config["Device"]) 162 | 163 | # Scale last policy layer to make initial actions have 0 mean and std. 164 | torch.nn.init.constant_(actor.sigma_param, ppo_config["Initial Sigma"]) 165 | for m in list(actor.modules()) + list(critic.modules()): 166 | if isinstance(m, torch.nn.Linear): 167 | # orthogonal initialization 168 | torch.nn.init.orthogonal_(m.weight, gain=np.sqrt(2)) 169 | torch.nn.init.zeros_(m.bias) 170 | for m in actor.mu.modules(): 171 | if isinstance(m, torch.nn.Linear): 172 | torch.nn.init.zeros_(m.bias) 173 | m.weight.data.copy_(0.01 * m.weight.data) 174 | 175 | # Optimizer 176 | optim = torch.optim.Adam( 177 | list(actor.parameters()) + list(critic.parameters()), lr=ppo_config["Learning Rate"]) 178 | 179 | # Learning rate 180 | lr_scheduler = None 181 | if ppo_config["Learning Rate Decay"]: 182 | # decay learning rate to 0 linearly 183 | max_update_num = np.ceil( 184 | ppo_config["Step/Epoch"] / ppo_config["Step/Collect"]) * ppo_config["Epoch"] 185 | 186 | lr_scheduler = LambdaLR( 187 | optim, lr_lambda=lambda epoch: 1 - epoch / max_update_num) 188 | 189 | # Action distribution for exploration 190 | def dist(*logits): 191 | try: 192 | return Independent(Normal(*logits), 1) 193 | except ValueError as e: 194 | raise ValueError from e 195 | 196 | # PPO policy 197 | policy = PPOPolicy( actor, critic, optim, dist, 198 | discount_factor=ppo_config["Gamma"], 199 | gae_lambda=ppo_config["GAE Lambda"], 200 | max_grad_norm=ppo_config["Max Grad Norm"], 201 | vf_coef=ppo_config["Value Coefficient"], 202 | ent_coef=ppo_config["Entropy Coefficient"], 203 | reward_normalization=ppo_config["Reward Nomalization"], 204 | action_scaling=True, 205 | action_bound_method="clip", 206 | lr_scheduler=lr_scheduler, 207 | action_space=env.action_space, 208 | eps_clip=ppo_config["Epsilon Clip"], 209 | value_clip=ppo_config["Value Clip"], 210 | dual_clip=ppo_config["Dual Clip"], 211 | advantage_normalization=ppo_config["Advantage Normalization"], 212 | recompute_advantage=ppo_config["Recompute Advantage"]) 213 | 214 | # Data collector 215 | if ppo_config["Training Envs"] > 1: 216 | buffer = VectorReplayBuffer(ppo_config["Buffer Size"], len(train_envs)) 217 | else: 218 | buffer = ReplayBuffer(ppo_config["Buffer Size"]) 219 | train_collector = Collector(policy, train_envs, buffer, exploration_noise=True) 220 | test_collector = Collector(policy, test_envs) 221 | 222 | # Log writer 223 | logger = WandbLogger(project = exp_config["Project"], task = exp_config["Task"], 224 | path=save_path, update_interval=1000, 225 | reward_config=rew_config, ppo_config=ppo_config, experiment_config=exp_config, 226 | actor=net_a, critic=net_c) 227 | 228 | # Policy recorder 229 | def save_fn(policy): 230 | torch.save(policy.state_dict(), 231 | os.path.join(policy_path, 'policy.pth')) 232 | 233 | # Checkpoint recorder 234 | def save_checkpoint_fn(epoch, env_step, gradient_step): 235 | torch.save({'model': policy.state_dict(),'optim': optim.state_dict(),}, 236 | os.path.join(policy_path, 'checkpoint_{}.pth'.format(epoch))) 237 | 238 | # Trainer 239 | result = onpolicy_trainer( 240 | policy, train_collector, test_collector, ppo_config["Epoch"], ppo_config["Step/Epoch"], 241 | ppo_config["Repeat/Collect"], ppo_config["Test Envs"], ppo_config["Batch Size"], 242 | step_per_collect=ppo_config["Step/Collect"], save_fn=save_fn, save_checkpoint_fn=save_checkpoint_fn, logger=logger, 243 | test_in_train=False) 244 | 245 | 246 | if __name__ == '__main__': 247 | parser = argparse.ArgumentParser() 248 | parser.add_argument("--gait_policy",type=str,default="corl2022", 249 | help="path for saving checkpoints, configuration and training logs. For example, --gait_policy=GAIT_POLICY will save checkpoints at ./save/rl_checkpoints/gait/GAIT_POLICY.") 250 | parser.add_argument("--config",type=str,default="gait", 251 | help="path to a directory with the configuration files of RL hyper-parameters, reward, simulation setup, etc. For example, --config=CONFIG will save checkpoints at ./config/CONFIG.") 252 | args = parser.parse_args() 253 | save_path = os.path.join(PATH_CHECKPOINT_RL, "gait", args.gait_policy) 254 | train(save_path, os.path.join(PATH_CONFIG, args.config)) -------------------------------------------------------------------------------- /scripts/train_nav.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import numpy as np 4 | import os 5 | import sys 6 | 7 | import torch 8 | from torch.utils.data import DataLoader 9 | 10 | import robomimic 11 | import robomimic.utils.train_utils as TrainUtils 12 | import robomimic.utils.torch_utils as TorchUtils 13 | import robomimic.utils.obs_utils as ObsUtils 14 | import robomimic.utils.file_utils as FileUtils 15 | from robomimic.config import config_factory 16 | from robomimic.algo import algo_factory 17 | from robomimic.utils.log_utils import PrintLogger 18 | from robomimic.envs.env_base import EnvType 19 | from robomimic.utils.dataset import SequenceDataset 20 | 21 | from path import * 22 | 23 | ## Load demostration data 24 | def load_data_for_training(config, obs_keys): 25 | """ 26 | Data loading at the start of an algorithm. 27 | Args: 28 | config (BaseConfig instance): config object 29 | obs_keys (list): list of observation modalities that are required for 30 | training (this will inform the dataloader on what modalities to load) 31 | Returns: 32 | train_dataset (SequenceDataset instance): train dataset object 33 | valid_dataset (SequenceDataset instance): valid dataset object (only if using validation) 34 | """ 35 | 36 | # config can contain an attribute to filter on 37 | filter_by_attribute = config.train.hdf5_filter_key 38 | 39 | # load the dataset into memory 40 | assert not config.train.hdf5_normalize_obs, "no support for observation normalization with validation data yet" 41 | train_filter_by_attribute = "train" 42 | valid_filter_by_attribute = "valid" 43 | if filter_by_attribute is not None: 44 | train_filter_by_attribute = "{}_{}".format(filter_by_attribute, train_filter_by_attribute) 45 | valid_filter_by_attribute = "{}_{}".format(filter_by_attribute, valid_filter_by_attribute) 46 | 47 | def get_dataset(filter_by_attribute): 48 | return SequenceDataset( 49 | hdf5_path=config.train.data, 50 | obs_keys=obs_keys, 51 | dataset_keys=config.train.dataset_keys, 52 | load_next_obs=False, 53 | frame_stack=1, 54 | seq_length=config.train.seq_length, 55 | pad_frame_stack=True, 56 | pad_seq_length=True, 57 | get_pad_mask=False, 58 | goal_mode=config.train.goal_mode, 59 | hdf5_cache_mode=config.train.hdf5_cache_mode, 60 | hdf5_use_swmr=config.train.hdf5_use_swmr, 61 | hdf5_normalize_obs=config.train.hdf5_normalize_obs, 62 | filter_by_attribute=filter_by_attribute, 63 | ) 64 | 65 | train_dataset = get_dataset(train_filter_by_attribute) 66 | valid_dataset = get_dataset(valid_filter_by_attribute) 67 | 68 | 69 | return train_dataset, valid_dataset 70 | 71 | 72 | ## Train Navigation Controller 73 | def train(config, device): 74 | """ 75 | Train a model using the algorithm. 76 | """ 77 | 78 | # Configuration 79 | np.random.seed(config.train.seed) 80 | torch.manual_seed(config.train.seed) 81 | 82 | print("\n============= New Training Run with Config =============") 83 | print(config) 84 | print("") 85 | log_dir, ckpt_dir, video_dir = TrainUtils.get_exp_dir(config) 86 | 87 | if config.experiment.logging.terminal_output_to_txt: 88 | # log stdout and stderr to a text file 89 | logger = PrintLogger(os.path.join(log_dir, 'log.txt')) 90 | sys.stdout = logger 91 | sys.stderr = logger 92 | 93 | # read config to set up metadata for observation modalities (e.g. detecting rgb observations) 94 | ObsUtils.initialize_obs_utils_with_config(config) 95 | 96 | # make sure the dataset exists 97 | dataset_path = os.path.expanduser(config.train.data) 98 | if not os.path.exists(dataset_path): 99 | raise Exception("Dataset at provided path {} not found!".format(dataset_path)) 100 | 101 | # Configure meta data 102 | env_meta = { 103 | "env_name": "quadruped-nav", 104 | "type": EnvType.GYM_TYPE, 105 | "env_kwargs": {} 106 | } 107 | shape_meta = FileUtils.get_shape_metadata_from_dataset( 108 | dataset_path=config.train.data, 109 | all_obs_keys=config.all_obs_keys, 110 | verbose=True 111 | ) 112 | 113 | # BC Model 114 | model = algo_factory( 115 | algo_name=config.algo_name, 116 | config=config, 117 | obs_key_shapes=shape_meta["all_shapes"], 118 | ac_dim=shape_meta["ac_dim"], 119 | device=device 120 | ) 121 | print("\n============= Model Summary =============") 122 | print(model) 123 | 124 | # Load data set 125 | obs_normalization_stats = None 126 | if config.train.hdf5_normalize_obs: 127 | obs_normalization_stats = trainset.get_obs_normalization_stats() 128 | 129 | trainset, validset = load_data_for_training( 130 | config, obs_keys=shape_meta["all_obs_keys"]) 131 | train_sampler = trainset.get_dataset_sampler() 132 | print("\n============= Training Dataset =============") 133 | print(trainset) 134 | print("") 135 | 136 | train_loader = DataLoader( 137 | dataset=trainset, 138 | sampler=train_sampler, # no custom sampling logic (uniform sampling) 139 | batch_size=config.train.batch_size, # batches of size 100 140 | shuffle=True, 141 | num_workers=config.train.num_data_workers, 142 | drop_last=True# don't provide last batch in dataset pass if it's less than 100 in size 143 | ) 144 | 145 | valid_sampler = validset.get_dataset_sampler() 146 | valid_loader = DataLoader( 147 | dataset=validset, 148 | sampler=valid_sampler, 149 | batch_size=config.train.batch_size, 150 | shuffle=(valid_sampler is None), 151 | num_workers=0, 152 | drop_last=True 153 | ) 154 | 155 | # Train 156 | best_valid_loss = None 157 | best_training_loss = None 158 | for epoch in range(1, config.train.num_epochs + 1): 159 | should_save_ckpt = False 160 | train_step_log = TrainUtils.run_epoch(model=model, data_loader=train_loader, epoch=epoch, num_steps=config.experiment.epoch_every_n_steps) 161 | model.on_epoch_end(epoch) 162 | 163 | print("Train Epoch {}: Loss {}".format(epoch, train_step_log["Loss"])) 164 | 165 | if best_training_loss is None or train_step_log["Loss"] < best_training_loss: 166 | should_save_ckpt = True 167 | epoch_ckpt_name = "model_best_training" 168 | print("Best Model Loss: Loss {}".format(train_step_log["Loss"])) 169 | 170 | with torch.no_grad(): 171 | valid_step_log = TrainUtils.run_epoch(model=model, data_loader=valid_loader, epoch=epoch, validate=True, num_steps=config.experiment.validation_epoch_every_n_steps) 172 | valid_check = "Loss" in valid_step_log 173 | if valid_check and (best_valid_loss is None or (valid_step_log["Loss"] <= best_valid_loss)): 174 | best_valid_loss = valid_step_log["Loss"] 175 | valid_epoch_ckpt_name = "model_best_validation" 176 | should_save_ckpt = True 177 | print("Best Validation Loss: Loss {}".format(valid_step_log["Loss"])) 178 | 179 | if should_save_ckpt: 180 | print("Saving checkpoint") 181 | TrainUtils.save_model( 182 | model=model, 183 | config=config, 184 | env_meta=env_meta, 185 | shape_meta=shape_meta, 186 | ckpt_path=os.path.join(ckpt_dir, epoch_ckpt_name + ".pth"), 187 | obs_normalization_stats=obs_normalization_stats, 188 | ) 189 | 190 | if epoch >= config.experiment.rollout.warmstart: 191 | pass 192 | return model 193 | 194 | 195 | if __name__ == "__main__": 196 | parser = argparse.ArgumentParser() 197 | 198 | # External config file that overwrites default config 199 | parser.add_argument( 200 | "--config", 201 | type=str, 202 | default='bcrnn', 203 | help="path to a config json that will be used to override the default settings. \ 204 | For example, --config=CONFIG.json will load a .json config file at ./config/nav/CONFIG.json.\ 205 | If omitted, default settings are used. This is the preferred way to run experiments.") 206 | args = parser.parse_args() 207 | 208 | ext_cfg = json.load(open('{}/nav/{}.json'.format(PATH_CONFIG, args.config), 'r')) 209 | config = config_factory(ext_cfg["algo_name"]) 210 | # update config with external json - this will throw errors if 211 | # the external config has keys not present in the base algo config 212 | with config.values_unlocked(): 213 | config.update(ext_cfg) 214 | 215 | device = TorchUtils.get_torch_device(try_to_use_cuda=True) 216 | model = train(config, device) -------------------------------------------------------------------------------- /scripts/utils/convert_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import h5py 3 | import pickle 4 | from pathlib import Path 5 | import argparse 6 | import os 7 | 8 | def parse_args(): 9 | parser = argparse.ArgumentParser() 10 | 11 | parser.add_argument( 12 | "--folder", 13 | default="./save/dataset_sim/demo_sim/", 14 | type=Path, 15 | ) 16 | parser.add_argument( 17 | "--demo_path", 18 | default="./save/dataset_sim/demo_sim.hdf5", 19 | type=Path, 20 | ) 21 | 22 | return parser.parse_args() 23 | 24 | def main(): 25 | args = parse_args() 26 | 27 | demo_file_name = str(args.demo_path) 28 | demo_hdf5_file = h5py.File(demo_file_name, "w") 29 | print(demo_file_name) 30 | 31 | total = 0 32 | grp = demo_hdf5_file.create_group("data") 33 | for (demo_idx, pickle_file) in enumerate(args.folder.glob(f'*/*.pickle')): 34 | with open(pickle_file, "rb") as f: 35 | data = pickle.load(f) 36 | print(data.keys()) 37 | ep_grp = grp.create_group(f"demo_{demo_idx}") 38 | obs_grp = ep_grp.create_group(f"obs") 39 | 40 | obs_grp.create_dataset("agentview_rgb", data=((np.array(data["observation"])[..., :3]).astype('uint8'))) 41 | obs_grp.create_dataset("agentview_depth", data=np.array(data["observation"])[..., 3:]) 42 | obs_grp.create_dataset("yaw", data=np.expand_dims(data["yaw"], axis=-1)) 43 | ep_grp.create_dataset("actions", data=np.array(data["action"])) 44 | ep_grp.create_dataset("dones", data=np.array(data["done"])) 45 | ep_grp.create_dataset("rewards", data=np.array(data["done"])) 46 | ep_grp.attrs["num_samples"] = len(data["action"]) 47 | total += len(data["action"]) 48 | 49 | grp.attrs["total"] = total 50 | metadata = "" 51 | grp.attrs["env_args"] = metadata 52 | 53 | demo_hdf5_file.close() 54 | 55 | if __name__ == "__main__": 56 | main() 57 | 58 | -------------------------------------------------------------------------------- /scripts/utils/pid.py: -------------------------------------------------------------------------------- 1 | from numpy import pi as PI 2 | 3 | # Simple PID control handler 4 | class PIDHandler(): 5 | """ 6 | A simle PID feedback controller with feedforward inputs, mainly used for joint-torque control. 7 | """ 8 | 9 | 10 | def __init__(self, gainP = 0, gainD = 0, gainI = 0, gainF = 0, conFreq = 10 , conIRatioMax = 0.3, outMax = 100 , flagRotation = False): 11 | """ 12 | Generate a simle PID controller. 13 | 14 | Args: 15 | gainP: proportional gain for feedback 16 | gainD: differential gain for feedback 17 | gainI: integral gain for feedback 18 | gainF: proportional gain for feedforward 19 | conFreq: control frequency [Hz] for computing integral and differential terms 20 | conIRatioMax: maximum ratio of integral terms with respect to PID outputs 21 | outMax: maximum value of PID outputs 22 | flagRot: flag for rotational-state feedback 23 | 24 | Returns: 25 | None 26 | """ 27 | 28 | self.gainP = gainP 29 | self.gainI = gainI 30 | self.gainD = gainD 31 | 32 | self.gainF = gainF 33 | 34 | self.conFreq = conFreq 35 | self.conPeriod = 1.0/conFreq 36 | 37 | self.flagRot = flagRotation 38 | 39 | if gainI > 0: 40 | self.conIMax = conIRatioMax /(self.gainI * self.conPeriod) 41 | else: 42 | self.conIMax = 0 43 | 44 | self.coffLPF = 0.7 45 | 46 | self.outMax = outMax 47 | self.outMin = - outMax 48 | 49 | self.reset() 50 | 51 | return 52 | 53 | 54 | def reset(self): 55 | """ 56 | Reset the controller. 57 | 58 | Args: 59 | None 60 | 61 | Returns: 62 | None 63 | """ 64 | 65 | self._fwdCur = 0 66 | self._fdbCur = 0 67 | self._fdbPrv = 0 68 | self._fdbDif = 0 69 | self._fdbSum = 0 70 | 71 | self._out = 0 72 | 73 | return 74 | 75 | 76 | def update(self, fdbVal = 0, fwdVal = 0): 77 | """ 78 | Update the controller with given feedback/feedforward values. 79 | 80 | Args: 81 | fdbVal: state for feedback control 82 | fwdVal: command for feedforward control 83 | 84 | Returns: 85 | None 86 | """ 87 | 88 | self._fwdCur = fwdVal 89 | self._fdbCur = fdbVal 90 | 91 | if self.flagRot: 92 | while self._fdbCur > PI: 93 | self._fdbCur -= PI 94 | while self._fdbCur < - PI: 95 | self._fdbCur += PI 96 | 97 | if self._fwdCur == 0: 98 | self._fdbDif = self.coffLPF * (self._fdbCur - self._fdbPrv) + (1 - self.coffLPF) * self._fdbDif 99 | self._fdbSum += self._fdbCur 100 | 101 | if self._fdbSum > self.conIMax: 102 | self._fdbSum = self.conIMax 103 | elif self._fdbSum < - self.conIMax: 104 | self._fdbSum = - self.conIMax 105 | else: 106 | self._fdbDif = 0 107 | self._fdbSum = 0 108 | 109 | self._fdbPrv = self._fdbCur 110 | 111 | uF = self.gainF * self._fwdCur 112 | uP = self.gainP * self._fdbCur 113 | uD = self.gainD * self.conFreq * self._fdbDif 114 | uI = self.gainI * self.conPeriod * self._fdbSum 115 | 116 | self._out = uP + uD + uI + uF 117 | if self._out > self.outMax: 118 | self._out = self.outMax 119 | elif self._out < self.outMin: 120 | self._out = self.outMin 121 | 122 | return 123 | 124 | 125 | def output(self): 126 | """ 127 | Output control values. 128 | 129 | Args: 130 | None 131 | 132 | Returns: 133 | Control output (float-type, equlvalent to self._out) 134 | """ 135 | 136 | return self._out 137 | -------------------------------------------------------------------------------- /scripts/utils/split_train_val.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script for splitting a dataset hdf5 file into training and validation trajectories. 3 | 4 | Args: 5 | dataset (str): path to hdf5 dataset 6 | 7 | filter_key (str): if provided, split the subset of trajectories 8 | in the file that correspond to this filter key into a training 9 | and validation set of trajectories, instead of splitting the 10 | full set of trajectories 11 | 12 | ratio (float): validation ratio, in (0, 1). Defaults to 0.1, which is 10%. 13 | 14 | Example usage: 15 | python split_train_val.py --dataset /path/to/demo.hdf5 --ratio 0.1 16 | """ 17 | 18 | import argparse 19 | import h5py 20 | import numpy as np 21 | 22 | from robomimic.utils.file_utils import create_hdf5_filter_key 23 | 24 | 25 | def split_train_val_from_hdf5(hdf5_path, val_ratio=0.1, filter_key=None): 26 | """ 27 | Splits data into training set and validation set from HDF5 file. 28 | 29 | Args: 30 | hdf5_path (str): path to the hdf5 file 31 | to load the transitions from 32 | 33 | val_ratio (float): ratio of validation demonstrations to all demonstrations 34 | 35 | filter_key (str): if provided, split the subset of demonstration keys stored 36 | under mask/@filter_key instead of the full set of demonstrations 37 | """ 38 | 39 | # retrieve number of demos 40 | f = h5py.File(hdf5_path, "r") 41 | if filter_key is not None: 42 | print("using filter key: {}".format(filter_key)) 43 | demos = sorted([elem.decode("utf-8") for elem in np.array(f["mask/{}".format(filter_key)])]) 44 | else: 45 | demos = sorted(list(f["data"].keys())) 46 | num_demos = len(demos) 47 | f.close() 48 | 49 | # get random split 50 | num_demos = len(demos) 51 | num_val = int(val_ratio * num_demos) 52 | mask = np.zeros(num_demos) 53 | mask[:num_val] = 1. 54 | np.random.shuffle(mask) 55 | mask = mask.astype(int) 56 | train_inds = (1 - mask).nonzero()[0] 57 | valid_inds = mask.nonzero()[0] 58 | train_keys = [demos[i] for i in train_inds] 59 | valid_keys = [demos[i] for i in valid_inds] 60 | print("{} validation demonstrations out of {} total demonstrations.".format(num_val, num_demos)) 61 | 62 | # pass mask to generate split 63 | name_1 = "train" 64 | name_2 = "valid" 65 | if filter_key is not None: 66 | name_1 = "{}_{}".format(filter_key, name_1) 67 | name_2 = "{}_{}".format(filter_key, name_2) 68 | 69 | train_lengths = create_hdf5_filter_key(hdf5_path=hdf5_path, demo_keys=train_keys, key_name=name_1) 70 | valid_lengths = create_hdf5_filter_key(hdf5_path=hdf5_path, demo_keys=valid_keys, key_name=name_2) 71 | 72 | print("Total number of train samples: {}".format(np.sum(train_lengths))) 73 | print("Average number of train samples {}".format(np.mean(train_lengths))) 74 | 75 | print("Total number of valid samples: {}".format(np.sum(valid_lengths))) 76 | print("Average number of valid samples {}".format(np.mean(valid_lengths))) 77 | 78 | 79 | if __name__ == "__main__": 80 | parser = argparse.ArgumentParser() 81 | parser.add_argument( 82 | "--dataset", 83 | type=str, 84 | help="path to hdf5 dataset", 85 | ) 86 | parser.add_argument( 87 | "--filter_key", 88 | type=str, 89 | default=None, 90 | help="if provided, split the subset of trajectories in the file that correspond to\ 91 | this filter key into a training and validation set of trajectories, instead of\ 92 | splitting the full set of trajectories", 93 | ) 94 | parser.add_argument( 95 | "--ratio", 96 | type=float, 97 | default=0.1, 98 | help="validation ratio, in (0, 1)" 99 | ) 100 | args = parser.parse_args() 101 | 102 | # seed to make sure results are consistent 103 | np.random.seed(0) 104 | 105 | split_train_val_from_hdf5(args.dataset, val_ratio=args.ratio, filter_key=args.filter_key) 106 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import io 2 | from setuptools import setup, find_packages 3 | 4 | __version__ = '0.1.0' 5 | 6 | with io.open('README.md', 'r', encoding='utf-8') as fh: 7 | long_description = fh.read() 8 | 9 | setup( 10 | name='prelude', 11 | version=__version__, 12 | author='Mingyo Seo', 13 | author_email='', 14 | description=('Python Environments for PRELUDE'), 15 | long_description=open("README.md", encoding="utf8").read(), 16 | long_description_content_type='text/markdown', 17 | license="GPLv3", 18 | python_requires=">=3.6", 19 | keywords="PRELUDE", 20 | packages=[package for package in find_packages() 21 | if package.startswith('a1sim')], 22 | tests_require=['pytest', 'mock'], 23 | include_package_data=True, 24 | install_requires=[ 25 | "torch>=1.8.0", 26 | "torchvision>=0.9.0", 27 | "attrs==19.3.0", 28 | "pybullet>=3.0.7", 29 | "gym>=0.18.0", 30 | "numpy==1.19.5", 31 | "scipy>=1.6.2", 32 | "pybullet>=3.0.6", 33 | "typing==3.7.4.1", 34 | "numba>=0.51.0", 35 | "h5py>=2.10.0", 36 | "pyglet==1.5.0", 37 | "opencv-python>=4.5.2", 38 | "pynput", 39 | "inputs", 40 | "PyYAML", 41 | "python-dateutil", 42 | "gtimer", 43 | "hid", 44 | "tensorboard>=2.5.0", 45 | "tensorboardX", 46 | "psutil", 47 | "tqdm", 48 | "termcolor", 49 | "imageio", 50 | "imageio-ffmpeg", 51 | "egl_probe>=1.0.1", 52 | "wandb", 53 | "robomimic==0.2.0", 54 | "tianshou==0.4.1" 55 | ], 56 | extras_require={ 57 | "dev": [ 58 | "Sphinx", 59 | "sphinx_rtd_theme", 60 | "sphinxcontrib-bibtex", 61 | "flake8", 62 | "pytest", 63 | "pytest-cov", 64 | "ray>=1.0.0", 65 | "networkx", 66 | "mypy", 67 | "pydocstyle", 68 | "doc8", 69 | ], 70 | "atari": ["atari_py", "cv2"], 71 | "mujoco": ["mujoco_py"], 72 | "pybullet": ["pybullet"], 73 | }, 74 | zip_safe=False, 75 | ) 76 | --------------------------------------------------------------------------------