├── .deepsource.toml ├── .gitignore ├── README.md ├── example ├── __init__.py ├── lctv0.yaml ├── loct │ ├── __init__.py │ ├── network.py │ ├── test.py │ ├── train.py │ └── utils.py ├── test │ ├── __init__.py │ └── test_tianshou.py └── utils.py ├── qdpgym ├── __init__.py ├── sim │ ├── __init__.py │ ├── abc.py │ ├── app.py │ ├── blt │ │ ├── __init__.py │ │ ├── env.py │ │ ├── hooks.py │ │ ├── quadruped.py │ │ ├── terrain.py │ │ └── utils.py │ ├── common │ │ ├── __init__.py │ │ ├── identify.py │ │ ├── motor.py │ │ ├── noisyhandle.py │ │ └── tg.py │ ├── mjc │ │ ├── __init__.py │ │ ├── env.py │ │ ├── hooks.py │ │ ├── quadruped.py │ │ ├── terrain.py │ │ └── viewer.py │ ├── resources │ │ ├── acnet_220526.pt │ │ ├── actuator_net_with_history.pt │ │ └── aliengo │ │ │ ├── meshes │ │ │ ├── calf.stl │ │ │ ├── hip.stl │ │ │ ├── thigh.stl │ │ │ ├── thigh_mirror.stl │ │ │ └── trunk.stl │ │ │ └── model │ │ │ ├── aliengo.urdf │ │ │ └── aliengo.xml │ └── task.py ├── tasks │ ├── __init__.py │ ├── identify │ │ ├── __init__.py │ │ └── identify.py │ └── loct │ │ ├── __init__.py │ │ ├── app.py │ │ ├── loct.py │ │ ├── reward.py │ │ ├── sr_reward.py │ │ └── utils.py ├── tests │ ├── __init__.py │ ├── common.py │ └── test_pybullet.py ├── thirdparty │ ├── __init__.py │ └── gamepad │ │ ├── __init__.py │ │ ├── controllers.py │ │ └── gamepad.py └── utils │ ├── __init__.py │ ├── tf.py │ └── utils.py └── resources ├── lin.gif └── rot.gif /.deepsource.toml: -------------------------------------------------------------------------------- 1 | version = 1 2 | 3 | [[analyzers]] 4 | name = "python" 5 | enabled = true 6 | 7 | [analyzers.meta] 8 | runtime_version = "3.x.x" -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # .idea folder 10 | .idea/ 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | pip-wheel-metadata/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | cover/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | # For a library or package, you might want to ignore these files since the code is 90 | # intended to run in multiple environments; otherwise, check them in: 91 | # .python-version 92 | 93 | # pipenv 94 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 95 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 96 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 97 | # install all needed dependencies. 98 | #Pipfile.lock 99 | 100 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 101 | __pypackages__/ 102 | 103 | # Celery stuff 104 | celerybeat-schedule 105 | celerybeat.pid 106 | 107 | # SageMath parsed files 108 | *.sage.py 109 | 110 | # Environments 111 | .env 112 | .venv 113 | venv/ 114 | ENV/ 115 | env.bak/ 116 | venv.bak/ 117 | 118 | # Spyder project settings 119 | .spyderproject 120 | .spyproject 121 | 122 | # Rope project settings 123 | .ropeproject 124 | 125 | # mkdocs documentation 126 | /site 127 | 128 | # mypy 129 | .mypy_cache/ 130 | .dmypy.json 131 | dmypy.json 132 | 133 | # Pyre type checker 134 | .pyre/ 135 | 136 | # pytype static type analyzer 137 | .pytype/ 138 | 139 | # customize 140 | log/ 141 | log_wuzhen/ 142 | MUJOCO_LOG.TXT 143 | *.pth 144 | .vscode/ 145 | .DS_Store 146 | *.zip 147 | *.pstats 148 | *.swp 149 | *.pkl 150 | *.hdf5 151 | wandb/ 152 | videos/ 153 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Quadruped Locomotion Reinforcement Learning 2 | 3 | This is an environment for learning quadrupedal locomotion depending on 4 | either [bullet physics engine](https://github.com/bulletphysics/bullet3) 5 | or [mujoco](https://github.com/deepmind/mujoco) and 6 | [dm_control](https://github.com/deepmind/dm_control) 7 | (not implemented yet), containing 8 | 9 | - Interfaces of quadruped states, creating terrain from elevation maps and environmental states 10 | - Motor identification, i.e. Actuator Network 11 | - Curriculum learning on disturbance and terrain 12 | - Abundant command, force, torque, terrain and trajectory visualization 13 | 14 | [tianshou](https://github.com/thu-ml/tianshou) is used for training in 15 | our examples. 16 | 17 | [//]: # (- Imitation Learning from privileged teacher to proprioceptive student) 18 | 19 |
20 | linear 21 | rotate 22 |
23 | 24 | Requirements: `python>=3.7`, `pybullet>=3.2`, `torch>=1.10`, `numpy`, `scipy`, `wandb` 25 | 26 | For train, run: 27 | ```bash 28 | PYTHONPATH=./ python example/loct/train.py --task example/lctv0.yaml \ 29 | --batch-size 8192 --repeat-per-collect 8 --step-per-epoch 160000 \ 30 | --norm-adv 1 --run-name example 31 | ``` 32 | -------------------------------------------------------------------------------- /example/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/example/__init__.py -------------------------------------------------------------------------------- /example/lctv0.yaml: -------------------------------------------------------------------------------- 1 | terrain: plain 2 | perturb: True 3 | 4 | reward_coeff: 0.05 5 | reward_cfg: 6 | LinearVelocityReward: 0.34 7 | RotationReward: 0.24 8 | # YawRateReward: 0.12 9 | VerticalLinearPenalty: 0.04 10 | RollPitchRatePenalty: 0.04 11 | BodyPosturePenalty: 0.04 12 | FootSlipPenalty: 0.04 13 | BodyCollisionPenalty: 0.04 14 | JointConstraintPenalty: 0.04 15 | TorquePenalty: 0.02 16 | JointMotionPenalty: 0.04 17 | ActionSmoothnessReward: 0.04 18 | ClearanceOverTerrainReward: 0.02 19 | BodyHeightReward: 0.0 20 | 21 | # VelocityReward: 1.5 22 | # RotationReward: 0.75 23 | # BodyMotionPenalty: 1.0 24 | # FootClearanceReward: 0.003 25 | # BodyCollisionPenalty: 0.1 26 | # JointMotionPenalty: 0.001 27 | # TargetSmoothnessReward: 0.003 28 | # TorquePenalty: 1.0e-6 29 | # SlipPenalty: 0.003 -------------------------------------------------------------------------------- /example/loct/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/example/loct/__init__.py -------------------------------------------------------------------------------- /example/loct/network.py: -------------------------------------------------------------------------------- 1 | from typing import Type, Union, Any, Dict, Tuple 2 | 3 | import numpy as np 4 | import torch 5 | from torch import nn 6 | 7 | 8 | class ActorNetMLP(nn.Module): 9 | """ 10 | extero_obs -> extero_layers ->| 11 | |-> concat -> action_layers -> action 12 | proprio_obs -> proprio_layers ->| 13 | """ 14 | 15 | def __init__( 16 | self, 17 | extero_obs_dim, 18 | real_world_obs_dim, 19 | extero_layer_dims=(72, 64), 20 | locomotion_layer_dims=(), 21 | action_layer_dims=(256, 160, 128), 22 | activation: Type[nn.Module] = nn.Tanh, 23 | device='cpu' 24 | ): 25 | super().__init__() 26 | self.input_dim = extero_obs_dim + real_world_obs_dim 27 | self.extero_obs_dim = extero_obs_dim 28 | self.real_world_obs_dim = real_world_obs_dim 29 | extero_layers, locomotion_layers, action_layers = [], [], [] 30 | self.extero_obs_dim = extero_feature_dim = extero_obs_dim 31 | self.device = torch.device(device) 32 | if extero_layer_dims: 33 | for dim in extero_layer_dims: 34 | extero_layers.append(nn.Linear(extero_feature_dim, dim)) 35 | extero_layers.append(activation()) 36 | extero_feature_dim = dim 37 | 38 | self.real_world_obs_dim = locomotion_feature_dim = real_world_obs_dim 39 | if locomotion_layer_dims: 40 | for dim in locomotion_layer_dims: 41 | locomotion_layers.append(nn.Linear(locomotion_feature_dim, dim)) 42 | locomotion_layers.append(activation()) 43 | locomotion_feature_dim = dim 44 | 45 | action_feature_dim = extero_feature_dim + locomotion_feature_dim 46 | for dim in action_layer_dims: 47 | action_layers.append(nn.Linear(action_feature_dim, dim)) 48 | action_layers.append(activation()) 49 | action_feature_dim = dim 50 | self.output_dim = action_feature_dim 51 | 52 | self.extero_layers = nn.Sequential(*extero_layers) 53 | self.locomotion_layers = nn.Sequential(*locomotion_layers) 54 | self.action_layers = nn.Sequential(*action_layers) 55 | 56 | def forward(self, x): 57 | if self.device is not None: 58 | x = torch.as_tensor(x, device=self.device, dtype=torch.float32) 59 | extero_obs, real_world_obs = x[..., :self.extero_obs_dim], x[..., self.extero_obs_dim:] 60 | extero_feature, locomotion_feature = self.extero_layers(extero_obs), self.locomotion_layers(real_world_obs) 61 | return self.action_layers(torch.cat((extero_feature, locomotion_feature), dim=-1)) 62 | 63 | 64 | class ActorNet(nn.Module): 65 | def __init__( 66 | self, 67 | extero_obs_dim, 68 | real_world_obs_dim, 69 | extero_layer_dims=(72, 64), 70 | locomotion_layer_dims=(), 71 | action_layer_dims=(256, 160, 128), 72 | activation: Type[nn.Module] = nn.Tanh, 73 | device='cpu' 74 | ) -> None: 75 | super().__init__() 76 | self.device = device 77 | self.model = ActorNetMLP( 78 | extero_obs_dim, 79 | real_world_obs_dim, 80 | extero_layer_dims, 81 | locomotion_layer_dims, 82 | action_layer_dims, 83 | activation, 84 | device, 85 | ) 86 | self.output_dim = self.model.output_dim 87 | 88 | def forward( 89 | self, 90 | obs: Union[np.ndarray, torch.Tensor], 91 | state: Any = None, 92 | info: Dict[str, Any] = {}, 93 | ) -> Tuple[torch.Tensor, Any]: 94 | """Mapping: obs -> flatten (inside MLP)-> logits.""" 95 | logits = self.model(obs) 96 | return logits, state 97 | -------------------------------------------------------------------------------- /example/loct/test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | 5 | import numpy as np 6 | import torch 7 | import yaml 8 | from tianshou.data import Collector 9 | from tianshou.policy import PPOPolicy 10 | from tianshou.utils.net.common import Net 11 | from tianshou.utils.net.continuous import ActorProb, Critic 12 | from torch import nn 13 | from torch.distributions import Independent, Normal 14 | 15 | from example.loct.network import ActorNet 16 | from example.utils import NormObsWrapper 17 | from qdpgym import sim 18 | from qdpgym.tasks.loct import LocomotionV0, GamepadCommanderHook 19 | 20 | 21 | def parse_args(): 22 | parser = argparse.ArgumentParser() 23 | parser.add_argument("--task", type=str, required=True) 24 | parser.add_argument("--seed", type=int, default=0) 25 | parser.add_argument("--render", type=float, default=0.) 26 | parser.add_argument( 27 | "--device", type=str, default="cuda" if torch.cuda.is_available() else "cpu" 28 | ) 29 | parser.add_argument("--resume-path", type=str, default=None) 30 | return parser.parse_args() 31 | 32 | 33 | def make_loct_env(cfg): 34 | torch.set_num_threads(1) 35 | robot = sim.Aliengo(500, 'actuator_net', noisy=True) 36 | task = LocomotionV0() 37 | # task = LocomotionPMTG() 38 | 39 | if cfg['terrain'] == 'random': 40 | arena = sim.NullTerrain() 41 | task.add_hook(sim.RandomTerrainHook()) 42 | elif cfg['terrain'] == 'plain': 43 | arena = sim.Plain() 44 | else: 45 | raise ValueError(f'Unknown terrain `{cfg.terrain}`') 46 | if cfg['perturb']: 47 | task.add_hook(sim.RandomPerturbHook()) 48 | 49 | task.add_hook(GamepadCommanderHook()) 50 | # task.add_hook(sim.HeightSampleVisualizer()) 51 | # task.add_hook(RandomCommanderHookV0()) 52 | # task.add_hook(sim.VideoRecorderHook()) 53 | task.add_hook(sim.ExtraViewerHook()) 54 | for reward, weight in cfg['reward_cfg'].items(): 55 | task.add_reward(reward, weight) 56 | 57 | if 'reward_coeff' in cfg: 58 | task.set_reward_coeff(cfg['reward_coeff']) 59 | env = sim.QuadrupedEnv(robot, arena, task) 60 | return env 61 | 62 | 63 | def test_ppo(args): 64 | with open(args.task, encoding='utf-8') as f: 65 | task_cfg = yaml.load(f, Loader=yaml.SafeLoader) 66 | 67 | def make_env(): 68 | return make_loct_env(task_cfg) 69 | 70 | obs_norm = False 71 | env = make_env() 72 | if obs_norm: 73 | env = NormObsWrapper(env) 74 | 75 | # seed 76 | np.random.seed(args.seed) 77 | torch.manual_seed(args.seed) 78 | # model 79 | net_a = ActorNet(78, 132, device=args.device) 80 | actor = ActorProb( 81 | net_a, 82 | env.action_space.shape, 83 | unbounded=True, 84 | device=args.device, 85 | ).to(args.device) 86 | net_c = Net( 87 | env.observation_space.shape, 88 | hidden_sizes=(256, 256, 256), 89 | activation=nn.Tanh, 90 | device=args.device, 91 | ) 92 | critic = Critic(net_c, device=args.device).to(args.device) 93 | optim = torch.optim.AdamW( 94 | list(actor.parameters()) + list(critic.parameters()) 95 | ) 96 | 97 | policy = PPOPolicy( 98 | actor, critic, optim, 99 | lambda *logits: Independent(Normal(*logits), 1) 100 | ) 101 | # load a previous policy 102 | if args.resume_path: 103 | ckpt = torch.load(args.resume_path, map_location=args.device) 104 | policy.load_state_dict(ckpt["model"]) 105 | if obs_norm: 106 | env.set_obs_rms(ckpt['obs_rms']) 107 | print("Loaded agent from: ", args.resume_path) 108 | test_collector = Collector(policy, env) 109 | 110 | # Let's watch its performance! 111 | policy.eval() 112 | test_collector.reset() 113 | result = test_collector.collect(n_episode=100, render=args.render) 114 | print(f'Final reward: {result["rews"].mean()}, length: {result["lens"].mean()}') 115 | 116 | 117 | if __name__ == "__main__": 118 | args = parse_args() 119 | test_ppo(args) 120 | -------------------------------------------------------------------------------- /example/loct/train.py: -------------------------------------------------------------------------------- 1 | import pybullet 2 | import argparse 3 | import os 4 | import pprint 5 | 6 | import numpy as np 7 | import torch 8 | import wandb 9 | import yaml 10 | from tianshou.data import Collector, ReplayBuffer, VectorReplayBuffer 11 | from tianshou.env import ShmemVectorEnv, VectorEnvNormObs 12 | from tianshou.policy import PPOPolicy 13 | from tianshou.trainer import onpolicy_trainer 14 | from tianshou.utils.net.common import Net 15 | from tianshou.utils.net.continuous import ActorProb, Critic 16 | from torch import nn 17 | from torch.distributions import Independent, Normal 18 | from torch.optim.lr_scheduler import LambdaLR 19 | 20 | from example.loct.network import ActorNet 21 | from example.utils import init_actor_critic, MyWandbLogger 22 | from qdpgym import sim 23 | from qdpgym.tasks import loct 24 | from qdpgym.utils import get_timestamp 25 | 26 | 27 | def parse_args(): 28 | parser = argparse.ArgumentParser() 29 | parser.add_argument("--task", type=str, required=True) 30 | parser.add_argument("--seed", type=int, default=0) 31 | parser.add_argument("--buffer-size", type=int, default=4096) 32 | parser.add_argument("--lr", type=float, default=1e-4) 33 | parser.add_argument("--gamma", type=float, default=0.99) 34 | parser.add_argument("--epoch", type=int, default=100) 35 | parser.add_argument("--step-per-epoch", type=int, default=30000) 36 | parser.add_argument("--step-per-collect", type=int, default=8192) 37 | parser.add_argument("--repeat-per-collect", type=int, default=8) 38 | parser.add_argument("--batch-size", type=int, default=8192) 39 | parser.add_argument("--training-num", type=int, default=64) 40 | parser.add_argument("--test-num", type=int, default=10) 41 | parser.add_argument("--obs-norm", type=int, default=0) 42 | 43 | # ppo special 44 | parser.add_argument("--rew-norm", type=int, default=1) 45 | parser.add_argument("--vf-coef", type=float, default=1.0) 46 | parser.add_argument("--ent-coef", type=float, default=0.0) 47 | parser.add_argument("--gae-lambda", type=float, default=0.95) 48 | parser.add_argument("--bound-action-method", type=str, default="tanh") 49 | parser.add_argument("--lr-decay", type=int, default=True) 50 | parser.add_argument("--max-grad-norm", type=float, default=1.0) 51 | parser.add_argument("--eps-clip", type=float, default=0.2) 52 | parser.add_argument("--dual-clip", type=float, default=None) 53 | parser.add_argument("--value-clip", type=int, default=0) 54 | parser.add_argument("--norm-adv", type=int, default=1) 55 | parser.add_argument("--recompute-adv", type=int, default=0) 56 | parser.add_argument("--logdir", type=str, default="log") 57 | parser.add_argument( 58 | "--device", type=str, default="cuda" if torch.cuda.is_available() else "cpu" 59 | ) 60 | parser.add_argument("--run-name", type=str, required=True) 61 | parser.add_argument("--resume-path", type=str, default=None) 62 | parser.add_argument("--resume-id", type=str, default=None) 63 | parser.add_argument("--wandb-project", type=str, default="loct") 64 | return parser.parse_args() 65 | 66 | 67 | if __name__ == "__main__": 68 | args = parse_args() 69 | with open(args.task, encoding='utf-8') as f: 70 | task_cfg = yaml.load(f, Loader=yaml.SafeLoader) 71 | 72 | # commander_core = loct.ISCommanderCore('RotationReward', 100) 73 | commander_core = loct.AlpISCommanderCore('RotationReward') 74 | commander_core.start_process() 75 | 76 | 77 | def make_loct_env(cfg, train=True): 78 | torch.set_num_threads(1) 79 | robot = sim.Aliengo(500, 'actuator_net', noisy=True) 80 | task = loct.LocomotionV0() 81 | 82 | if cfg['terrain'] == 'random': 83 | arena = sim.NullTerrain() 84 | task.add_hook(sim.RandomTerrainHook()) 85 | elif cfg['terrain'] == 'plain': 86 | arena = sim.Plain() 87 | else: 88 | raise ValueError(f'Unknown terrain `{cfg.terrain}`') 89 | if cfg['perturb']: 90 | task.add_hook(sim.RandomPerturbHook()) 91 | if train: 92 | task.add_hook(commander_core.make_collector()) 93 | # task.add_hook(loct.RandomRotationCommanderHook()) 94 | task.add_hook(commander_core.make_commander()) 95 | else: 96 | task.add_hook(loct.RandomRotationCommanderHook()) 97 | for reward, weight in cfg['reward_cfg'].items(): 98 | task.add_reward(reward, weight) 99 | 100 | if 'reward_coeff' in cfg: 101 | task.set_reward_coeff(cfg['reward_coeff']) 102 | env = sim.QuadrupedEnv(robot, arena, task) 103 | return env 104 | 105 | 106 | env = make_loct_env(task_cfg) 107 | train_envs = ShmemVectorEnv([ 108 | lambda: make_loct_env(task_cfg) 109 | for _ in range(args.training_num) 110 | ]) 111 | test_envs = ShmemVectorEnv([ 112 | lambda: make_loct_env(task_cfg, False) 113 | for _ in range(args.test_num) 114 | ]) if args.test_num else None 115 | 116 | if args.obs_norm: 117 | # obs norm wrapper 118 | train_envs = VectorEnvNormObs(train_envs) 119 | test_envs = VectorEnvNormObs(test_envs, update_obs_rms=False) 120 | test_envs.set_obs_rms(train_envs.get_obs_rms()) 121 | 122 | obs_shape = env.observation_space.shape or env.observation_space.n 123 | action_shape = env.action_space.shape or env.action_space.n 124 | print("Observations shape:", obs_shape) 125 | print("Actions shape:", action_shape) 126 | 127 | # seed 128 | np.random.seed(args.seed) 129 | torch.manual_seed(args.seed) 130 | # model 131 | net_a = ActorNet(78, 132, device=args.device) 132 | actor = ActorProb( 133 | net_a, 134 | action_shape, 135 | max_action=1.0, 136 | unbounded=True, 137 | device=args.device, 138 | ).to(args.device) 139 | net_c = Net( 140 | obs_shape, 141 | hidden_sizes=(256, 256, 256), 142 | activation=nn.Tanh, 143 | device=args.device, 144 | ) 145 | critic = Critic(net_c, device=args.device).to(args.device) 146 | torch.nn.init.constant_(actor.sigma_param, -2.3) 147 | optim = torch.optim.AdamW( 148 | list(actor.parameters()) + list(critic.parameters()), lr=args.lr 149 | ) 150 | init_actor_critic(actor, critic) 151 | 152 | lr_scheduler = None 153 | if args.lr_decay: 154 | # decay learning rate to 0 linearly 155 | max_update_num = np.ceil( 156 | args.step_per_epoch / args.step_per_collect 157 | ) * args.epoch 158 | 159 | lr_scheduler = LambdaLR( 160 | optim, lr_lambda=lambda epoch: 1 - epoch / max_update_num 161 | ) 162 | 163 | 164 | def dist(*logits): 165 | return Independent(Normal(*logits), 1) 166 | 167 | 168 | policy = PPOPolicy( 169 | actor, 170 | critic, 171 | optim, 172 | dist, 173 | discount_factor=args.gamma, 174 | gae_lambda=args.gae_lambda, 175 | max_grad_norm=args.max_grad_norm, 176 | vf_coef=args.vf_coef, 177 | ent_coef=args.ent_coef, 178 | reward_normalization=args.rew_norm, 179 | action_scaling=True, 180 | action_bound_method=args.bound_action_method, 181 | lr_scheduler=lr_scheduler, 182 | action_space=env.action_space, 183 | eps_clip=args.eps_clip, 184 | value_clip=args.value_clip, 185 | dual_clip=args.dual_clip, 186 | advantage_normalization=args.norm_adv, 187 | recompute_advantage=args.recompute_adv, 188 | ) 189 | 190 | # load a previous policy 191 | if args.resume_path: 192 | ckpt = torch.load(args.resume_path, map_location=args.device) 193 | policy.load_state_dict(ckpt["model"]) 194 | if args.obs_norm: 195 | train_envs.set_obs_rms(ckpt["obs_rms"]) 196 | test_envs.set_obs_rms(ckpt["obs_rms"]) 197 | print("Loaded agent from: ", args.resume_path) 198 | 199 | # logger 200 | logger = MyWandbLogger( 201 | project=args.wandb_project, 202 | name=args.run_name, 203 | run_id=args.resume_id, 204 | config=args, 205 | ) 206 | start_time, run_name, run_id = wandb.run.start_time, wandb.run.name, wandb.run.id 207 | log_path = os.path.join(args.logdir, f'{get_timestamp(start_time)}#{run_name}@{run_id}') 208 | os.makedirs(log_path) 209 | 210 | # collector 211 | if args.training_num > 1: 212 | buffer = VectorReplayBuffer(args.step_per_collect, len(train_envs)) 213 | else: 214 | buffer = ReplayBuffer(args.step_per_collect) 215 | 216 | train_collector = Collector( 217 | policy, train_envs, buffer, 218 | preprocess_fn=logger.collect_reward_info, 219 | exploration_noise=True 220 | ) 221 | test_collector = Collector( 222 | policy, test_envs, 223 | preprocess_fn=logger.collect_reward_info, 224 | ) if test_envs is not None else None 225 | 226 | 227 | def analyse_callback(): 228 | fig1, fig2 = commander_core.analyse() 229 | return { 230 | 'curricula/reward': wandb.Image(fig1, mode='RGB'), 231 | 'curricula/weight': wandb.Image(fig2, mode='RGB'), 232 | } 233 | 234 | 235 | logger.add_callback( 236 | analyse_callback, 'test' 237 | ) 238 | 239 | 240 | def save_best_fn(policy): 241 | state = {"model": policy.state_dict()} 242 | if args.obs_norm: 243 | state["obs_rms"] = train_envs.get_obs_rms() 244 | torch.save(state, os.path.join(log_path, "policy.pth")) 245 | 246 | 247 | def save_checkpoint_fn(epoch: int, env_step: int, gradient_step: int): 248 | state = {"model": policy.state_dict()} 249 | if args.obs_norm: 250 | state["obs_rms"] = train_envs.get_obs_rms() 251 | path = os.path.join(log_path, f"policy_{epoch}.pth") 252 | torch.save(state, path) 253 | return path 254 | 255 | 256 | result = onpolicy_trainer( 257 | policy, 258 | train_collector, 259 | test_collector, 260 | args.epoch, 261 | args.step_per_epoch, 262 | args.repeat_per_collect, 263 | args.test_num, 264 | args.batch_size, 265 | step_per_collect=args.step_per_collect, 266 | save_best_fn=save_best_fn, 267 | save_checkpoint_fn=save_checkpoint_fn, 268 | logger=logger, 269 | test_in_train=False, 270 | ) 271 | pprint.pprint(result) 272 | 273 | # Let's watch its performance! 274 | policy.eval() 275 | test_envs.seed(args.seed) 276 | test_collector.reset() 277 | result = test_collector.collect(n_episode=args.test_num) 278 | print(f'Final reward: {result["rews"].mean()}, length: {result["lens"].mean()}') 279 | -------------------------------------------------------------------------------- /example/loct/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class PolicyWrapper(object): 5 | def __init__(self, net, device): 6 | self.net = net 7 | self.device = torch.device(device) 8 | 9 | def __call__(self, obs): 10 | with torch.inference_mode(): 11 | obs = torch.as_tensor(obs, dtype=torch.float32, device=self.device) 12 | return self.net(obs).detach().cpu().numpy() 13 | 14 | 15 | -------------------------------------------------------------------------------- /example/test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/example/test/__init__.py -------------------------------------------------------------------------------- /example/test/test_tianshou.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from tianshou.env import ShmemVectorEnv 4 | from tianshou.utils.net.continuous import ActorProb 5 | from torch import nn 6 | 7 | from example.loct.network import ActorNet 8 | from qdpgym import sim 9 | from qdpgym.tasks.loct import RandomCommanderHookV0, LocomotionV0 10 | 11 | 12 | def test_actor(): 13 | device = 'cuda' 14 | net_a = ActorNet( 15 | 78, 133, 16 | activation=nn.Tanh, 17 | device=device, 18 | ) 19 | actor = ActorProb( 20 | net_a, 21 | (12,), 22 | max_action=1., 23 | unbounded=True, 24 | device=device, 25 | ).to(device) 26 | 27 | print(actor(torch.randn(size=(1, 211)))) 28 | 29 | 30 | def make_loct_env(): 31 | torch.set_num_threads(1) 32 | robot = sim.Aliengo(500, 'actuator_net', noisy=False) 33 | task = LocomotionV0() 34 | task.add_hook(RandomCommanderHookV0()) 35 | arena = sim.Plain() 36 | # for reward, weight in cfg['reward_cfg'].items(): 37 | task.add_reward('UnifiedLinearReward', 0.1) 38 | env = sim.QuadrupedEnv(robot, arena, task) 39 | return env 40 | 41 | 42 | def test_mp(): 43 | envs = ShmemVectorEnv( 44 | [make_loct_env for _ in range(4)] 45 | ) 46 | 47 | print(envs.reset()) 48 | print(envs.step(np.zeros((4, 12)))) 49 | print(envs.reset()) 50 | -------------------------------------------------------------------------------- /example/utils.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from collections import defaultdict 3 | from typing import Callable, List, Union, Optional, Any, Dict 4 | 5 | import gym 6 | import numpy as np 7 | import torch 8 | import wandb 9 | from tianshou.data import Batch 10 | from tianshou.env import ShmemVectorEnv, VectorEnvNormObs 11 | from tianshou.utils import RunningMeanStd, BaseLogger 12 | 13 | 14 | class NormObsWrapper(gym.Wrapper): 15 | """An observation normalization wrapper for vectorized environments. 16 | 17 | :param bool update_obs_rms: whether to update obs_rms. Default to True. 18 | :param float clip_obs: the maximum absolute value for observation. Default to 19 | 10.0. 20 | :param float epsilon: To avoid division by zero. 21 | """ 22 | 23 | def __init__( 24 | self, 25 | env: gym.Env, 26 | update_obs_rms: bool = False, 27 | clip_obs: float = 10.0, 28 | epsilon: float = np.finfo(np.float32).eps.item(), 29 | ) -> None: 30 | 31 | super(NormObsWrapper, self).__init__(env) 32 | # initialize observation running mean/std 33 | self.update_obs_rms = update_obs_rms 34 | self.obs_rms = RunningMeanStd() 35 | self.clip_max = clip_obs 36 | self.eps = epsilon 37 | 38 | def reset( 39 | self, *args, **kwargs 40 | ): 41 | obs = self.env.reset(*args, **kwargs) 42 | if self.obs_rms and self.update_obs_rms: 43 | self.obs_rms.update(obs) 44 | return self._norm_obs(obs) 45 | 46 | def step( 47 | self, 48 | action: np.ndarray, 49 | ): 50 | obs, rew, done, info = self.env.step(action) 51 | if self.obs_rms and self.update_obs_rms: 52 | self.obs_rms.update(obs) 53 | return self._norm_obs(obs), rew, done, info 54 | 55 | def _norm_obs(self, obs: np.ndarray) -> np.ndarray: 56 | if self.obs_rms: 57 | obs = (obs - self.obs_rms.mean) / np.sqrt(self.obs_rms.var + self.eps) 58 | obs = np.clip(obs, -self.clip_max, self.clip_max) 59 | return obs 60 | 61 | def set_obs_rms(self, obs_rms: RunningMeanStd) -> None: 62 | """Set with given observation running mean/std.""" 63 | self.obs_rms = obs_rms 64 | 65 | def get_obs_rms(self) -> RunningMeanStd: 66 | """Return observation running mean/std.""" 67 | return self.obs_rms 68 | 69 | 70 | def make_parallel_env( 71 | make_env: Callable[[], gym.Env], 72 | seed: Union[int, None, List[int]], 73 | training_num: int, 74 | test_num: int, 75 | obs_norm: bool 76 | ): 77 | """Wrapper function for Mujoco env. 78 | 79 | If EnvPool is installed, it will automatically switch to EnvPool's Mujoco env. 80 | 81 | :return: a tuple of (single env, training envs, test envs). 82 | """ 83 | 84 | env = make_env() 85 | train_envs = ShmemVectorEnv( 86 | [make_env for _ in range(training_num)] 87 | ) 88 | test_envs = ShmemVectorEnv( 89 | [make_env for _ in range(test_num)] 90 | ) if test_num else None 91 | train_envs.seed(seed) 92 | test_envs.seed(seed) 93 | if obs_norm: 94 | # obs norm wrapper 95 | train_envs = VectorEnvNormObs(train_envs) 96 | test_envs = VectorEnvNormObs(test_envs, update_obs_rms=False) 97 | test_envs.set_obs_rms(train_envs.get_obs_rms()) 98 | return env, train_envs, test_envs 99 | 100 | 101 | def init_actor_critic(actor, critic): 102 | for m in list(actor.modules()) + list(critic.modules()): 103 | if isinstance(m, torch.nn.Linear): 104 | # orthogonal initialization 105 | torch.nn.init.orthogonal_(m.weight, gain=np.sqrt(2)) 106 | torch.nn.init.zeros_(m.bias) 107 | # do last policy layer scaling, this will make initial actions have (close to) 108 | # 0 mean and std, and will help boost performances, 109 | # see https://arxiv.org/abs/2006.05990, Fig.24 for details 110 | for m in actor.mu.modules(): 111 | if isinstance(m, torch.nn.Linear): 112 | torch.nn.init.zeros_(m.bias) 113 | m.weight.data.copy_(0.01 * m.weight.data) 114 | 115 | 116 | class MyWandbLogger(BaseLogger): 117 | def __init__( 118 | self, 119 | project: str, 120 | train_interval: int = 1000, 121 | test_interval: int = 1, 122 | update_interval: int = 1000, 123 | name: Optional[str] = None, 124 | entity: Optional[str] = None, 125 | run_id: Optional[str] = None, 126 | config: Optional[argparse.Namespace] = None, 127 | ) -> None: 128 | super().__init__(train_interval, test_interval, update_interval) 129 | self.wandb_run = wandb.init( 130 | project=project, 131 | name=name, 132 | id=run_id, 133 | resume="allow", 134 | entity=entity, 135 | monitor_gym=True, 136 | config=config, # type: ignore 137 | save_code=True, 138 | # mode='disabled', 139 | ) if not wandb.run else wandb.run 140 | 141 | self._reward_info = defaultdict(float) 142 | self._reward_counter = 0 143 | self._callbacks_train: List[Callable[[], dict]] = [] 144 | self._callbacks_test: List[Callable[[], dict]] = [] 145 | 146 | def collect_reward_info(self, **kwargs) -> Batch: 147 | if 'rew' in kwargs: 148 | for k, v in kwargs['info']['reward_info'].items(): 149 | self._reward_info[k] += v.mean() 150 | self._reward_counter += 1 151 | return Batch() 152 | 153 | def write(self, step_type: str, step: int, data: Dict[str, Any]) -> None: 154 | wandb.log(data, step=step) 155 | 156 | def save_data( 157 | self, 158 | epoch: int, 159 | env_step: int, 160 | gradient_step: int, 161 | save_checkpoint_fn: Optional[Callable[[int, int, int], Any]] = None, 162 | ) -> None: 163 | if save_checkpoint_fn: 164 | save_checkpoint_fn(epoch, env_step, gradient_step) 165 | 166 | def add_callback( 167 | self, callback: Callable[[], dict], 168 | period='train' 169 | ) -> None: 170 | if period == 'train': 171 | self._callbacks_train.append(callback) 172 | elif period == 'test': 173 | self._callbacks_test.append(callback) 174 | elif period == 'both': 175 | self._callbacks_train.append(callback) 176 | self._callbacks_test.append(callback) 177 | else: 178 | raise ValueError(f'Unknown period {period}') 179 | 180 | def log_train_data(self, collect_result: dict, step: int) -> None: 181 | """Use writer to log statistics generated during training. 182 | 183 | :param collect_result: a dict containing information of data collected in 184 | training stage, i.e., returns of collector.collect(). 185 | :param int step: stands for the timestep the collect_result being logged. 186 | """ 187 | if collect_result["n/ep"] > 0: 188 | if step - self.last_log_train_step >= self.train_interval: 189 | log_data = { 190 | "train/episode": collect_result["n/ep"], 191 | "train/reward": collect_result["rew"], 192 | "train/length": collect_result["len"], 193 | } 194 | for k, v in self._reward_info.items(): 195 | log_data[f'train/reward_info/{k}'] = v / self._reward_counter 196 | self._reward_info.clear() 197 | self._reward_counter = 0 198 | for callback in self._callbacks_train: 199 | log_data.update(callback()) 200 | self.write("train/env_step", step, log_data) 201 | self.last_log_train_step = step 202 | 203 | def log_test_data(self, collect_result: dict, step: int) -> None: 204 | 205 | """Use writer to log statistics generated during evaluating. 206 | 207 | :param collect_result: a dict containing information of data collected in 208 | evaluating stage, i.e., returns of collector.collect(). 209 | :param int step: stands for the timestep the collect_result being logged. 210 | """ 211 | assert collect_result["n/ep"] > 0 212 | if step - self.last_log_test_step >= self.test_interval: 213 | log_data = { 214 | "test/env_step": step, 215 | "test/reward": collect_result["rew"], 216 | "test/length": collect_result["len"], 217 | "test/reward_std": collect_result["rew_std"], 218 | "test/length_std": collect_result["len_std"], 219 | } 220 | for k, v in self._reward_info.items(): 221 | log_data[f'test/reward_info/{k}'] = v / self._reward_counter 222 | self._reward_info.clear() 223 | self._reward_counter = 0 224 | for callback in self._callbacks_test: 225 | log_data.update(callback()) 226 | self.write("test/env_step", step, log_data) 227 | self.last_log_test_step = step 228 | -------------------------------------------------------------------------------- /qdpgym/__init__.py: -------------------------------------------------------------------------------- 1 | import enum 2 | 3 | import torch 4 | 5 | from .utils import tf 6 | 7 | try: 8 | from torch import inference_mode as _im 9 | except ImportError: 10 | torch.inference_mode = torch.no_grad 11 | 12 | 13 | class Sim(enum.IntEnum): 14 | BULLET = 0 15 | MUJOCO = 1 16 | 17 | 18 | sim_engine: Sim = Sim.BULLET 19 | -------------------------------------------------------------------------------- /qdpgym/sim/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import qdpgym 4 | 5 | pkg_path = os.path.dirname(os.path.abspath(__file__)) 6 | rsc_dir = os.path.join(pkg_path, 'resources') 7 | 8 | 9 | def is_bullet_available(): 10 | try: 11 | import pybullet 12 | except ModuleNotFoundError: 13 | return False 14 | return True 15 | 16 | 17 | def is_mujoco_available(): 18 | try: 19 | import mujoco 20 | import dm_control 21 | except ModuleNotFoundError: 22 | return False 23 | return True 24 | 25 | 26 | if qdpgym.sim_engine == qdpgym.Sim.MUJOCO: 27 | assert is_mujoco_available(), 'dm_control is not installed' 28 | from .mjc.quadruped import Aliengo 29 | from .mjc.env import QuadrupedEnv 30 | from .mjc.terrain import * 31 | from .mjc.hooks import * 32 | else: 33 | assert is_bullet_available(), 'pybullet is not installed' 34 | from .blt.quadruped import Aliengo 35 | from .blt.env import QuadrupedEnv 36 | from .blt.terrain import * 37 | from .blt.hooks import * 38 | 39 | from .abc import * 40 | from .app import Application 41 | -------------------------------------------------------------------------------- /qdpgym/sim/abc.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from dataclasses import dataclass, field 3 | import multiprocessing as mp 4 | from typing import Union, Type, List, Any, Tuple 5 | 6 | import gym 7 | import numpy as np 8 | 9 | ARRAY_LIKE = Union[np.ndarray, list, tuple] 10 | NUMERIC = Union[int, float] 11 | 12 | 13 | class QuadrupedHandle(metaclass=abc.ABCMeta): 14 | @property 15 | def obs_history(self): 16 | raise NotImplementedError 17 | 18 | @property 19 | def cmd_history(self): 20 | raise NotImplementedError 21 | 22 | def get_base_pos(self) -> np.ndarray: 23 | """Get base position in WORLD frame""" 24 | raise NotImplementedError 25 | 26 | def get_base_orn(self) -> np.ndarray: 27 | """Get base quaternion in [x, y, z, w]""" 28 | raise NotImplementedError 29 | 30 | def get_base_rot(self) -> np.ndarray: 31 | """Get base 3x3 rotation matrix to WORLD frame""" 32 | raise NotImplementedError 33 | 34 | def get_base_rpy(self) -> np.ndarray: 35 | """ 36 | Get base Tait–Bryan angles in z-y-x 37 | @return: np.ndarray: [roll, pitch, yaw] 38 | """ 39 | raise NotImplementedError 40 | 41 | def get_base_rpy_rate(self) -> np.ndarray: 42 | """ 43 | Get the rate of base rpy (not angular velocity) 44 | """ 45 | raise NotImplementedError 46 | 47 | def get_base_lin(self) -> np.ndarray: 48 | """Get base linear velocity in WORLD frame""" 49 | raise NotImplementedError 50 | 51 | def get_base_ang(self) -> np.ndarray: 52 | """Get base angular velocity in WORLD frame""" 53 | raise NotImplementedError 54 | 55 | def get_velocimeter(self) -> np.ndarray: 56 | """Get base linear velocity in BASE frame""" 57 | raise NotImplementedError 58 | 59 | def get_gyro(self) -> np.ndarray: 60 | """Get base angular velocity in BASE frame""" 61 | raise NotImplementedError 62 | 63 | def get_accelerometer(self) -> np.ndarray: 64 | """Get base acceleration in BASE frame""" 65 | raise NotImplementedError 66 | 67 | def get_state_history(self, latency): 68 | raise NotImplementedError 69 | 70 | def get_cmd_history(self, latency): 71 | raise NotImplementedError 72 | 73 | def get_torso_contact(self): 74 | raise NotImplementedError 75 | 76 | def get_leg_contacts(self): 77 | raise NotImplementedError 78 | 79 | def get_foot_pos(self): 80 | raise NotImplementedError 81 | 82 | def get_foot_contacts(self): 83 | raise NotImplementedError 84 | 85 | def get_contact_forces(self): 86 | raise NotImplementedError 87 | 88 | def get_force_sensor(self): 89 | raise NotImplementedError 90 | 91 | def get_slip_vel(self): 92 | raise NotImplementedError 93 | 94 | def get_strides(self): 95 | raise NotImplementedError 96 | 97 | def get_clearances(self): 98 | raise NotImplementedError 99 | 100 | def get_joint_pos(self) -> np.ndarray: 101 | raise NotImplementedError 102 | 103 | def get_joint_vel(self) -> np.ndarray: 104 | raise NotImplementedError 105 | 106 | def get_joint_acc(self) -> np.ndarray: 107 | raise NotImplementedError 108 | 109 | def get_last_command(self) -> np.ndarray: 110 | raise NotImplementedError 111 | 112 | def get_last_torque(self) -> np.ndarray: 113 | raise NotImplementedError 114 | 115 | 116 | class Quadruped(QuadrupedHandle, metaclass=abc.ABCMeta): 117 | STANCE_HEIGHT: float 118 | STANCE_CONFIG: tuple 119 | 120 | @property 121 | def noisy(self) -> QuadrupedHandle: 122 | return self 123 | 124 | def set_init_pose(self, x=0., y=0., yaw=0.): 125 | raise NotImplementedError 126 | 127 | def set_random_dynamics(self, flag: bool = True): 128 | raise NotImplementedError 129 | 130 | def set_latency(self, lower: float, upper=None): 131 | raise NotImplementedError 132 | 133 | @classmethod 134 | def inverse_kinematics(cls, leg: int, pos: ARRAY_LIKE): 135 | raise NotImplementedError 136 | 137 | @classmethod 138 | def forward_kinematics(cls, leg: int, angles: ARRAY_LIKE): 139 | raise NotImplementedError 140 | 141 | 142 | class Terrain(metaclass=abc.ABCMeta): 143 | def get_height(self, x, y): 144 | raise NotImplementedError 145 | 146 | def get_normal(self, x, y): 147 | raise NotImplementedError 148 | 149 | def get_peak(self, x_range, y_range): 150 | raise NotImplementedError 151 | 152 | def out_of_range(self, x, y) -> bool: 153 | raise NotImplementedError 154 | 155 | 156 | @dataclass 157 | class Snapshot(object): 158 | position: np.ndarray = None 159 | orientation: np.ndarray = None 160 | rotation: np.ndarray = None 161 | rpy: np.ndarray = None 162 | linear_vel: np.ndarray = None 163 | angular_vel: np.ndarray = None 164 | joint_pos: np.ndarray = None 165 | joint_vel: np.ndarray = None 166 | joint_acc: np.ndarray = None 167 | foot_pos: np.ndarray = None 168 | velocimeter: np.ndarray = None 169 | gyro: np.ndarray = None 170 | accelerometer: np.ndarray = None 171 | torso_contact: bool = None 172 | leg_contacts: np.ndarray = None 173 | contact_forces: np.ndarray = None 174 | force_sensor: np.ndarray = None 175 | rpy_rate: np.ndarray = None 176 | 177 | 178 | @dataclass 179 | class LocomotionInfo: 180 | time: float = 0. 181 | last_stance_states: Any = field(default_factory=lambda: [None] * 4) 182 | max_foot_heights: np.ndarray = field(default_factory=lambda: np.zeros(4)) 183 | foot_clearances: np.ndarray = field(default_factory=lambda: np.zeros(4)) 184 | strides: List[Tuple[float, float]] = field(default_factory=lambda: [(0., 0.)] * 4) 185 | slips: List[float] = field(default_factory=lambda: [0.] * 4) 186 | 187 | 188 | @dataclass 189 | class Command: 190 | command: np.ndarray = None 191 | torque: np.ndarray = None 192 | 193 | 194 | class ComposedObs(tuple): 195 | pass 196 | 197 | 198 | class Environment(gym.Env, metaclass=abc.ABCMeta): 199 | @property 200 | def robot(self) -> QuadrupedHandle: 201 | raise NotImplementedError 202 | 203 | @property 204 | def arena(self) -> Terrain: 205 | raise NotImplementedError 206 | 207 | @arena.setter 208 | def arena(self, value): 209 | raise NotImplementedError 210 | 211 | @property 212 | def action_history(self): 213 | raise NotImplementedError 214 | 215 | @property 216 | def sim_time(self): 217 | raise NotImplementedError 218 | 219 | @property 220 | def num_substeps(self): 221 | raise NotImplementedError 222 | 223 | @property 224 | def timestep(self): 225 | raise NotImplementedError 226 | 227 | @property 228 | def identifier(self): 229 | raise NotImplementedError 230 | 231 | def get_action_rate(self) -> np.ndarray: 232 | raise NotImplementedError 233 | 234 | def get_action_accel(self) -> np.ndarray: 235 | raise NotImplementedError 236 | 237 | def get_relative_robot_height(self) -> float: 238 | raise NotImplementedError 239 | 240 | def get_interact_terrain_normal(self): 241 | raise NotImplementedError 242 | 243 | def get_interact_terrain_rot(self) -> np.ndarray: 244 | raise NotImplementedError 245 | 246 | def get_perturbation(self, in_robot_frame=False): 247 | raise NotImplementedError 248 | 249 | def set_perturbation(self, value=None): 250 | raise NotImplementedError 251 | 252 | 253 | class Hook(metaclass=abc.ABCMeta): 254 | def register_task(self, task): 255 | pass 256 | 257 | def initialize(self, robot, env): 258 | pass 259 | 260 | def init_episode(self, robot, env): 261 | pass 262 | 263 | def before_step(self, robot, env): 264 | pass 265 | 266 | def before_substep(self, robot, env): 267 | pass 268 | 269 | def after_step(self, robot, env): 270 | pass 271 | 272 | def after_substep(self, robot, env): 273 | pass 274 | 275 | def on_success(self, robot, env): 276 | pass 277 | 278 | def on_fail(self, robot, env): 279 | pass 280 | 281 | 282 | class CommHook(Hook): 283 | def __init__(self, comm: mp.Queue): 284 | self._comm = comm 285 | self._env_id: str = 'anonymous' 286 | 287 | def initialize(self, robot, env): 288 | self._env_id = env.identifier 289 | 290 | def _submit(self, info): 291 | self._comm.put((self._env_id, info)) 292 | 293 | 294 | class CommHookFactory(object): 295 | def __init__(self, cls: Type[CommHook]): 296 | self._cls = cls 297 | self._comm = mp.Queue() 298 | 299 | def __call__(self, *args, **kwargs): 300 | return self._cls(self._comm, *args, **kwargs) 301 | 302 | 303 | class Task(metaclass=abc.ABCMeta): 304 | """ 305 | The task prototype. 306 | A task is embedded in an environment, whose methods are 307 | automatically called in different periods of an episode; 308 | It should also manage hook callbacks. 309 | Only `before_step` should have return values, which 310 | should turn action into desired joint angles. 311 | """ 312 | 313 | observation_space: gym.Space 314 | action_space: gym.Space 315 | 316 | def init_episode(self): 317 | pass 318 | 319 | def before_step(self, action): 320 | pass 321 | 322 | def before_substep(self): 323 | pass 324 | 325 | def after_step(self): 326 | pass 327 | 328 | def after_substep(self): 329 | pass 330 | 331 | def on_success(self): 332 | pass 333 | 334 | def on_fail(self): 335 | pass 336 | 337 | def register_env(self, robot: Quadruped, env: Environment): 338 | raise NotImplementedError 339 | 340 | def add_hook(self, hook: Hook, name=None): 341 | raise NotImplementedError 342 | 343 | def remove_hook(self, name=None): 344 | raise NotImplementedError 345 | 346 | def get_observation(self): 347 | raise NotImplementedError 348 | 349 | def get_reward(self, detailed=True): 350 | """ 351 | Get the reward sum in a step. 352 | Rewards should be calculated in `after_step` or/and `after_substep`. 353 | :param detailed: returns an extra dict containing all reward terms. 354 | :return: reward(, details) 355 | """ 356 | raise NotImplementedError 357 | 358 | def is_succeeded(self): 359 | raise NotImplementedError 360 | 361 | def is_failed(self): 362 | raise NotImplementedError 363 | -------------------------------------------------------------------------------- /qdpgym/sim/app.py: -------------------------------------------------------------------------------- 1 | from typing import Callable 2 | 3 | import torch 4 | 5 | from qdpgym.sim.abc import Quadruped, Environment, Task 6 | 7 | 8 | class Application(object): 9 | def __init__(self, robot, env, task, policy): 10 | self.robot: Quadruped = robot 11 | self.env: Environment = env 12 | self.task: Task = task 13 | self.policy = policy 14 | self.callbacks = [] 15 | 16 | def add_callback(self, obj: Callable): 17 | self.callbacks.append(obj) 18 | 19 | def launch(self, allow_reset=True): 20 | with torch.inference_mode(): 21 | obs = self.env.reset() 22 | 23 | for _ in range(20000): 24 | actions = self.policy(obs) 25 | for callback in self.callbacks: 26 | callback(self.robot, self.env, self.task) 27 | obs, _, done, _ = self.env.step(actions) 28 | if done and allow_reset: 29 | self.env.reset() 30 | -------------------------------------------------------------------------------- /qdpgym/sim/blt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/sim/blt/__init__.py -------------------------------------------------------------------------------- /qdpgym/sim/blt/env.py: -------------------------------------------------------------------------------- 1 | import collections 2 | from typing import Union, Any, Optional, Tuple 3 | 4 | import gym 5 | import numpy as np 6 | import pybullet as pyb 7 | import pybullet_data as pyb_data 8 | from pybullet_utils.bullet_client import BulletClient 9 | 10 | from qdpgym.sim.abc import Task, Environment, ARRAY_LIKE 11 | from .quadruped import Aliengo 12 | from .terrain import TerrainBase 13 | from qdpgym.utils import PadWrapper, tf 14 | 15 | 16 | class QuadrupedEnv(Environment): 17 | _observation_space = None 18 | _action_space = None 19 | 20 | def __init__( 21 | self, 22 | robot: Aliengo, 23 | arena: TerrainBase, 24 | task: Task, 25 | timestep: float = 2e-3, 26 | time_limit: float = 20, 27 | num_substeps: int = 10, 28 | identifier: str = None, 29 | ): 30 | self._robot = robot 31 | self._arena = arena 32 | self._task = task 33 | self._timestep = timestep 34 | self._time_limit = time_limit 35 | self._num_substeps = num_substeps 36 | self._identifier = identifier or f'{hex(id(self))[-7:]}' 37 | self._step_freq = 1 / (self.timestep * self._num_substeps) 38 | self._render = False 39 | 40 | self._init = False 41 | self._elapsed_sim_steps = None 42 | 43 | self._sim_env = None if True else pyb # for pylint 44 | self._reset_times, self._debug_reset_param = 0, -1 45 | self._task.register_env(self._robot, self) 46 | self._action_history = collections.deque(maxlen=10) 47 | self._perturbation = None 48 | 49 | self._interact_terrain_samples = [] 50 | self._interact_terrain_normal = None 51 | self._interact_terrain_height = 0.0 52 | 53 | @property 54 | def observation_space(self) -> gym.Space: 55 | if self._observation_space is None: 56 | if hasattr(self._task, 'observation_space'): 57 | self._observation_space = self._task.observation_space 58 | else: 59 | self._observation_space = gym.Space() 60 | return self._observation_space 61 | 62 | @property 63 | def action_space(self) -> gym.Space: 64 | if self._action_space is None: 65 | if hasattr(self._task, 'action_space'): 66 | self._action_space = self._task.action_space 67 | else: 68 | self._action_space = gym.Space((12,), float) 69 | return self._action_space 70 | 71 | def render(self, mode="human"): 72 | if mode == 'human': 73 | self._render = True 74 | if self._sim_env is not None: 75 | self._sim_env.configureDebugVisualizer( 76 | pyb.COV_ENABLE_SINGLE_STEP_RENDERING, True 77 | ) 78 | else: 79 | return super().render(mode) 80 | 81 | def reset( 82 | self, 83 | seed: Optional[int] = None, 84 | return_info: bool = False, 85 | options: Optional[dict] = None, 86 | ) -> Union[Any, Tuple[Any, dict]]: 87 | 88 | super().reset(seed=seed, return_info=return_info, options=options) 89 | 90 | self._init = True 91 | self._elapsed_sim_steps = 0 92 | self._action_history.clear() 93 | 94 | if self._sim_env is None: 95 | if self._render: 96 | fps = int(self._step_freq) 97 | self._sim_env = BulletClient(pyb.GUI, options=f'--width=1024 --height=768 --mp4fps={fps}') 98 | self._debug_reset_param = self._sim_env.addUserDebugParameter('reset', 1, 0, 0) 99 | else: 100 | self._sim_env = BulletClient(pyb.DIRECT) 101 | self._sim_env.setAdditionalSearchPath(pyb_data.getDataPath()) 102 | self._sim_env.setTimeStep(self._timestep) 103 | self._sim_env.setGravity(0, 0, -9.8) 104 | 105 | self._task.init_episode() 106 | self._arena.spawn(self._sim_env) 107 | self._robot.add_to(self._arena) 108 | self._robot.spawn(self._sim_env, self.np_random) 109 | 110 | for i in range(50): 111 | self._robot.update_observation(None, minimal=True) 112 | self._robot.apply_command(self._robot.STANCE_CONFIG) 113 | self._sim_env.stepSimulation() 114 | 115 | self._action_history.append(np.array(self._robot.STANCE_CONFIG)) 116 | self._robot.update_observation(self.np_random) 117 | 118 | return ( 119 | self._task.get_observation() 120 | # if return_info else (self._task.get_observation(), {}) 121 | ) 122 | 123 | def close(self): 124 | if self._sim_env is not None: 125 | self._sim_env.disconnect() 126 | 127 | @property 128 | def robot(self): 129 | return self._robot 130 | 131 | @property 132 | def sim_env(self): 133 | return self._sim_env 134 | 135 | @property 136 | def render_mode(self): 137 | return self._render 138 | 139 | @property 140 | def arena(self): 141 | return self._arena 142 | 143 | @arena.setter 144 | def arena(self, value: TerrainBase): 145 | value.replace(self._sim_env, self._arena) 146 | self._arena = value 147 | 148 | @property 149 | def action_history(self): 150 | return PadWrapper(self._action_history) 151 | 152 | @property 153 | def sim_time(self): 154 | return self._elapsed_sim_steps * self._timestep 155 | 156 | @property 157 | def timestep(self): 158 | return self._timestep 159 | 160 | @property 161 | def num_substeps(self): 162 | return self._num_substeps 163 | 164 | @property 165 | def identifier(self) -> str: 166 | return self._identifier 167 | 168 | def step(self, action: ARRAY_LIKE) -> Tuple[Any, float, bool, dict]: 169 | if self._check_debug_reset_param(): 170 | return self.reset(), 0., True, {} 171 | 172 | assert self._init, 'Call `init_episode` before `step`!' 173 | action = self._task.before_step(action) 174 | action = np.asarray(action) 175 | prev_action = self._action_history[-1] 176 | self._action_history.append(action) 177 | 178 | for i in range(self._num_substeps): 179 | weight = (i + 1) / self._num_substeps 180 | current_action = action * weight + prev_action * (1 - weight) 181 | self._robot.apply_command(current_action) 182 | self._task.before_substep() 183 | 184 | self._apply_perturbation() 185 | self._sim_env.stepSimulation() 186 | self._elapsed_sim_steps += 1 187 | self._update_observation() 188 | 189 | self._task.after_substep() 190 | self._task.after_step() 191 | 192 | done = False 193 | info = {} 194 | if self._task.is_failed(): 195 | done = True 196 | self._task.on_fail() 197 | elif ((self._time_limit is not None and self.sim_time >= self._time_limit) 198 | or self._task.is_succeeded()): 199 | done = True 200 | info['TimeLimit.truncated'] = True 201 | self._task.on_success() 202 | 203 | reward, reward_info = self._task.get_reward(True) 204 | info['reward_info'] = reward_info 205 | return ( 206 | self._task.get_observation(), reward, done, info 207 | ) 208 | 209 | def _update_observation(self): 210 | self._robot.update_observation(self.np_random) 211 | 212 | self._interact_terrain_samples.clear() 213 | self._interact_terrain_height = 0. 214 | xy_points = self._robot.get_foot_pos() 215 | for x, y, _ in xy_points: 216 | h = self._arena.get_height(x, y) 217 | self._interact_terrain_height += h 218 | self._interact_terrain_samples.append((x, y, h)) 219 | self._interact_terrain_height /= 4 220 | self._interact_terrain_normal = tf.estimate_normal(self._interact_terrain_samples) 221 | 222 | def get_action_rate(self) -> np.ndarray: 223 | if len(self._action_history) < 2: 224 | return np.zeros(12) 225 | actions = [self._action_history[-i - 1] for i in range(2)] 226 | return (actions[0] - actions[1]) * self._step_freq 227 | 228 | def get_action_accel(self) -> np.ndarray: 229 | if len(self._action_history) < 3: 230 | return np.zeros(12) 231 | actions = [self._action_history[-i - 1] for i in range(3)] 232 | return (actions[0] - 2 * actions[1] + actions[2]) * self._step_freq ** 2 233 | 234 | def get_relative_robot_height(self) -> float: 235 | return self._robot.get_base_pos()[2] - self._interact_terrain_height 236 | 237 | def get_interact_terrain_normal(self) -> np.ndarray: 238 | return self._interact_terrain_normal 239 | 240 | def get_interact_terrain_rot(self) -> np.ndarray: 241 | return tf.Rotation.from_zaxis(self._interact_terrain_normal) 242 | 243 | def get_perturbation(self, in_robot_frame=False) -> Optional[np.ndarray]: 244 | if self._perturbation is None: 245 | return None 246 | elif in_robot_frame: 247 | rotation = self._robot.get_base_rot() 248 | perturbation = np.concatenate( 249 | [rotation.T @ self._perturbation[i * 3:i * 3 + 3] for i in range(2)] 250 | ) 251 | else: 252 | perturbation = self._perturbation 253 | return perturbation 254 | 255 | def set_perturbation(self, value=None) -> None: 256 | if value is None: 257 | self._perturbation = None 258 | else: 259 | self._perturbation = np.array(value) 260 | 261 | def _apply_perturbation(self): 262 | if self._perturbation is not None: 263 | self._applied_link_id = 0 264 | self._sim_env.applyExternalForce( 265 | objectUniqueId=self._robot.id, 266 | linkIndex=self._applied_link_id, 267 | forceObj=self._perturbation[:3], 268 | posObj=self._robot.get_base_pos(), 269 | flags=pyb.WORLD_FRAME 270 | ) 271 | self._sim_env.applyExternalTorque( 272 | objectUniqueId=self._robot.id, 273 | linkIndex=self._applied_link_id, 274 | torqueObj=self._perturbation[3:], 275 | flags=pyb.WORLD_FRAME 276 | ) 277 | 278 | def _check_debug_reset_param(self): 279 | if self._debug_reset_param != -1: 280 | reset_times = self._sim_env.readUserDebugParameter(self._debug_reset_param) 281 | if reset_times != self._reset_times: 282 | self._reset_times = reset_times 283 | return True 284 | return False 285 | -------------------------------------------------------------------------------- /qdpgym/sim/blt/hooks.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import copy 3 | import json 4 | import math 5 | import os 6 | import socket 7 | import time 8 | 9 | import numpy as np 10 | import pybullet as pyb 11 | 12 | from qdpgym.sim.abc import Hook 13 | from qdpgym.sim.blt.terrain import Hills, Slopes, Steps, PlainHf 14 | from qdpgym.utils import Angle, tf, get_timestamp, log 15 | 16 | 17 | class ViewerHook(Hook): 18 | def __init__(self): 19 | self._pre_vis = False 20 | self._init_vis = False 21 | self._sleep_on = True 22 | 23 | self._robot_yaw_buffer = collections.deque(maxlen=10) 24 | self._cam_state = 0 25 | 26 | self._last_frame_time = time.time() 27 | 28 | def initialize(self, robot, env): 29 | env.render() 30 | 31 | def init_episode(self, robot, env): 32 | self._robot_yaw_buffer.clear() 33 | if self._pre_vis: 34 | return 35 | self._pre_vis = True 36 | sim_env = env.sim_env 37 | sim_env.configureDebugVisualizer(pyb.COV_ENABLE_RENDERING, False) 38 | sim_env.configureDebugVisualizer(pyb.COV_ENABLE_GUI, False) 39 | sim_env.configureDebugVisualizer(pyb.COV_ENABLE_TINY_RENDERER, False) 40 | sim_env.configureDebugVisualizer(pyb.COV_ENABLE_SHADOWS, False) 41 | sim_env.configureDebugVisualizer(pyb.COV_ENABLE_RGB_BUFFER_PREVIEW, False) 42 | sim_env.configureDebugVisualizer(pyb.COV_ENABLE_DEPTH_BUFFER_PREVIEW, False) 43 | sim_env.configureDebugVisualizer(pyb.COV_ENABLE_SEGMENTATION_MARK_PREVIEW, False) 44 | 45 | def before_step(self, robot, env): 46 | if not self._init_vis: 47 | self._init_vis = True 48 | env.sim_env.configureDebugVisualizer(pyb.COV_ENABLE_RENDERING, True) 49 | env.sim_env.configureDebugVisualizer(pyb.COV_ENABLE_GUI, True) 50 | 51 | def after_step(self, robot, env): 52 | sim_env = env.sim_env 53 | period = env.timestep * env.num_substeps 54 | time_spent = time.time() - self._last_frame_time 55 | if self._sleep_on: 56 | sleep_time = period - time_spent 57 | if sleep_time > 0: 58 | time.sleep(sleep_time) 59 | self._last_frame_time = time.time() 60 | kbd_events = pyb.getKeyboardEvents() 61 | 62 | switch_cam_state = self.is_triggered(ord('`'), kbd_events) 63 | if switch_cam_state: 64 | self._cam_state = (self._cam_state + 1) % 5 65 | 66 | x, y, _ = robot.get_base_pos() 67 | z = robot.STANCE_HEIGHT + env.arena.get_height(x, y) 68 | if self._cam_state == 0: 69 | if switch_cam_state: 70 | yaw = Angle.to_deg(Angle.mean(self._robot_yaw_buffer)) 71 | sim_env.resetDebugVisualizerCamera(1.5, yaw, -30., (x, y, z)) 72 | else: 73 | yaw, pitch, dist = sim_env.getDebugVisualizerCamera()[8:11] 74 | sim_env.resetDebugVisualizerCamera(dist, yaw, pitch, (x, y, z)) 75 | else: 76 | self._robot_yaw_buffer.append(robot.get_base_rpy()[2] - math.pi / 2) 77 | # To avoid carsick :) 78 | yaw = Angle.mean(self._robot_yaw_buffer) 79 | degree = -30. 80 | if self._cam_state == 2: # around robot 81 | yaw = Angle.norm(yaw + math.pi / 2) 82 | degree = 0. 83 | elif self._cam_state == 3: 84 | yaw = Angle.norm(yaw + math.pi) 85 | elif self._cam_state == 4: 86 | yaw = Angle.norm(yaw - math.pi / 2) 87 | degree = 0. 88 | sim_env.resetDebugVisualizerCamera(1.5, Angle.to_deg(yaw), degree, (x, y, z)) 89 | env.render() 90 | 91 | KEY_SPACE = ord(' ') 92 | if self.is_triggered(KEY_SPACE, kbd_events): 93 | while True: # PAUSE 94 | env.sim_env.configureDebugVisualizer(pyb.COV_ENABLE_SINGLE_STEP_RENDERING, True) 95 | time.sleep(0.01) 96 | if self.is_triggered(KEY_SPACE, pyb.getKeyboardEvents()): 97 | self._cam_state = 0 98 | break 99 | 100 | @staticmethod 101 | def is_triggered(key, keyboard_events): 102 | return key in keyboard_events and keyboard_events[key] & pyb.KEY_WAS_TRIGGERED 103 | 104 | 105 | class _TorqueVisualizerHelper(object): 106 | def __init__(self): 107 | self._axis_x = None 108 | self._axis_y = None 109 | self._mk_start = None 110 | self._mk_phase = 0 111 | 112 | def update(self, torque): 113 | if (torque != 0.).any(): 114 | self._axis_x, self._axis_y, _ = tf.Rotation.from_zaxis(tf.vunit(torque)).T 115 | magnitude = tf.vnorm(torque) 116 | if self._mk_start is None: 117 | self._mk_start = self._axis_x * magnitude / 20 118 | self._mk_phase += math.pi / 36 119 | marker_end = (self._axis_x * math.cos(self._mk_phase) + 120 | self._axis_y * math.sin(self._mk_phase) 121 | ) * magnitude / 20 122 | 123 | marker_info = dict( 124 | lineFromXYZ=copy.deepcopy(self._mk_start), 125 | lineToXYZ=marker_end 126 | ) 127 | self._mk_start = marker_end 128 | return marker_info 129 | 130 | 131 | class ExtraViewerHook(ViewerHook): 132 | def __init__(self, perturb=True): 133 | super().__init__() 134 | self._show_perturb = perturb 135 | self._last_perturb = None 136 | 137 | self._force_marker = -1 138 | self._torque_vis = _TorqueVisualizerHelper() 139 | 140 | def after_step(self, robot, env): 141 | sim_env = env.sim_env 142 | if self._show_perturb: 143 | perturb = env.get_perturbation(in_robot_frame=True) 144 | if perturb is None: 145 | sim_env.removeUserDebugItem(self._force_marker) 146 | else: 147 | if (self._last_perturb != perturb).any(): 148 | self._force_marker = sim_env.addUserDebugLine( 149 | lineFromXYZ=(0., 0., 0.), 150 | lineToXYZ=perturb[:3] / 50, 151 | lineColorRGB=(1., 0., 0.), 152 | lineWidth=3, lifeTime=1, 153 | parentObjectUniqueId=robot.id, 154 | replaceItemUniqueId=self._force_marker 155 | ) 156 | sim_env.addUserDebugLine( 157 | **self._torque_vis.update(perturb[3:]), 158 | lineColorRGB=(0., 0., 1.), 159 | lineWidth=5, lifeTime=0.1, 160 | parentObjectUniqueId=robot.id 161 | ) 162 | 163 | self._last_perturb = perturb 164 | super().after_step(robot, env) 165 | 166 | 167 | class HeightSampleVisualizer(Hook): 168 | def __init__(self): 169 | super().__init__() 170 | self._terrain_visual_shape = -1 171 | self._terrain_markers = None 172 | 173 | def init_episode(self, robot, env): 174 | if self._terrain_markers is None: 175 | sim_env = env.sim_env 176 | self._terrain_visual_shape = sim_env.createVisualShape( 177 | shapeType=pyb.GEOM_SPHERE, radius=0.01, rgbaColor=(0., 0.8, 0., 0.6)) 178 | self._terrain_markers = [ 179 | sim_env.createMultiBody( 180 | baseVisualShapeIndex=self._terrain_visual_shape 181 | ) for _ in range(121) 182 | ] 183 | 184 | def after_step(self, robot, env): 185 | sim_env = env.sim_env 186 | rx, ry, _ = robot.get_base_pos() 187 | yaw = robot.get_base_rpy()[2] 188 | cy, sy = math.cos(yaw), math.sin(yaw) 189 | dx, dy = np.array(((cy, sy), (-sy, cy))) * 0.15 190 | 191 | marker = iter(self._terrain_markers) 192 | for i in range(-5, 6): 193 | for j in range(-5, 6): 194 | x, y = (rx, ry) + i * dx + j * dy 195 | height = env.arena.get_height(x, y) 196 | sim_env.resetBasePositionAndOrientation( 197 | next(marker), (x, y, height), (0., 0., 0., 1.) 198 | ) 199 | 200 | 201 | class RandomTerrainHook(Hook): 202 | def __init__(self): 203 | self.max_roughness = 0.2 204 | self.max_slope = 10 / 180 * math.pi 205 | self.max_step_height = 0.1 206 | 207 | def generate_terrain(self, random_gen: np.random.Generator): 208 | """ 209 | Generate a random terrain object of Hills, Slopes, Steps or Plain. 210 | """ 211 | size, resolution = 30, 0.1 212 | terrain_type = random_gen.integers(4) 213 | difficulty = random_gen.random() 214 | terrain = None 215 | if terrain_type == 0: 216 | roughness = self.max_roughness * difficulty 217 | terrain = Hills.make(size, resolution, (roughness, 20), 218 | random_state=random_gen) 219 | elif terrain_type == 1: 220 | slope = self.max_slope * difficulty 221 | axis = random_gen.choice(('x', 'y')) 222 | terrain = Slopes.make(size, resolution, slope, 3., axis) 223 | elif terrain_type == 2: 224 | step_height = self.max_step_height * difficulty 225 | terrain = Steps.make(size, resolution, 1., step_height, random_gen) 226 | elif terrain_type == 3: 227 | terrain = PlainHf.make(size, resolution) 228 | return terrain 229 | 230 | def init_episode(self, robot, env): 231 | env.arena = self.generate_terrain(env.np_random) 232 | 233 | 234 | class RandomPerturbHook(Hook): 235 | def __init__(self): 236 | self.perturb_prob = 0.5 237 | self.force_magnitude = np.array((20., 20.)) 238 | self.torque_magnitude = np.array((2.5, 5., 5.)) 239 | self.interval_range = (0.5, 2.0) 240 | self.interval = 0 241 | self.last_update = 0 242 | 243 | def get_random_perturb(self, random_gen): 244 | horizontal_force = random_gen.uniform(0, self.force_magnitude[0]) 245 | vertical_force = random_gen.uniform(0, self.force_magnitude[1]) 246 | yaw = random_gen.uniform(0, 2 * math.pi) 247 | external_force = np.array(( 248 | horizontal_force * np.cos(yaw), 249 | horizontal_force * np.sin(yaw), 250 | vertical_force * random_gen.choice((-1, 1)) 251 | )) 252 | 253 | external_torque = random_gen.uniform(-self.torque_magnitude, self.torque_magnitude) 254 | return external_force, external_torque 255 | 256 | def before_substep(self, robot, env): 257 | if env.sim_time >= self.last_update + self.interval: 258 | random = env.np_random 259 | if random.random() < self.perturb_prob: 260 | env.set_perturbation(np.concatenate(self.get_random_perturb(random))) 261 | else: 262 | env.set_perturbation(None) 263 | self.interval = random.uniform(*self.interval_range) 264 | self.last_update = env.sim_time 265 | 266 | def init_episode(self, robot, env): 267 | self.last_update = self.interval = 0 268 | 269 | 270 | class VideoRecorderHook(Hook): 271 | def __init__(self, path=''): 272 | if not path: 273 | path = os.path.join('Videos', f'record-{get_timestamp()}.mp4') 274 | os.makedirs(os.path.dirname(path), exist_ok=True) 275 | self._path = path 276 | self._log_id = -1 277 | self._status = True 278 | 279 | def before_step(self, robot, env): 280 | if self._log_id == -1 and self._status: 281 | log.info('start recording') 282 | self._log_id = env.sim_env.startStateLogging( 283 | pyb.STATE_LOGGING_VIDEO_MP4, self._path 284 | ) 285 | self._status = False 286 | 287 | def init_episode(self, robot, env): 288 | if self._log_id != -1: 289 | env.client.stopStateLogging(self._log_id) 290 | self._log_id = -1 291 | 292 | 293 | class _UdpPublisher(object): 294 | """ 295 | Send data stream of locomotion to outer tools such as PlotJuggler. 296 | """ 297 | 298 | def __init__(self, port): 299 | self.client = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 300 | self.port = port 301 | 302 | def send(self, data: dict): 303 | msg = json.dumps(data) 304 | ip_port = ('127.0.0.1', self.port) 305 | self.client.sendto(msg.encode('utf-8'), ip_port) 306 | 307 | 308 | class StatisticsHook(Hook): 309 | def __init__(self, publish_on=True): 310 | self._torque_sum = 0. 311 | self._torque_abs_sum = 0. 312 | self._torque_pen_sum = 0.0 313 | self._joint_motion_sum = 0.0 314 | self._step_counter = 0 315 | self._publish = publish_on 316 | if self._publish: 317 | self._udp_pub = _UdpPublisher(9870) 318 | self._task = None 319 | 320 | def register_task(self, task): 321 | self._task = task 322 | 323 | def after_step(self, robot, env): 324 | 325 | def wrap(reward_type): 326 | return getattr(self._task.ALL_REWARDS, reward_type)()(robot, env, self._task) 327 | 328 | _, reward_details = self._task.get_reward(detailed=True) 329 | self._step_counter += 1 330 | self._torque_sum += robot.get_last_torque() ** 2 331 | self._torque_abs_sum += abs(robot.get_last_torque()) 332 | self._torque_pen_sum += wrap('TorquePenalty') 333 | self._joint_motion_sum += wrap('JointMotionPenalty') 334 | 335 | # def publish(self, task, rob, env): 336 | 337 | # data = { 338 | # 'obs': env.makeNoisyProprioInfo().standard().tolist(), 339 | # 'action': env._action.tolist(), 340 | # 'joint_states': { 341 | # 'joint_pos': rob.getJointPositions().tolist(), 342 | # 'violence': env.getActionViolence().tolist(), 343 | # 'commands': rob.getLastCommand().tolist(), 344 | # 'joint_vel': rob.getJointVelocities().tolist(), 345 | # 'joint_acc': rob.getJointAccelerations().tolist(), 346 | # # 'kp_part': tuple(rob._motor._kp_part), 347 | # # 'kd_part': tuple(rob._motor._kd_part), 348 | # 'torque': rob.getLastAppliedTorques().tolist(), 349 | # 'contact': rob.getContactStates().tolist() 350 | # }, 351 | # 'body_height': env.getTerrainBasedHeightOfRobot(), 352 | # # 'cot': rob.getCostOfTransport(), 353 | # 'twist': { 354 | # 'linear': rob.getBaseLinearVelocityInBaseFrame().tolist(), 355 | # 'angular': rob.getBaseAngularVelocityInBaseFrame().tolist(), 356 | # }, 357 | # # 'torque_pen': wrap(TorquePenalty) 358 | # } 359 | # self._udp_pub.send(data) 360 | 361 | def init_episode(self, robot, env): 362 | if self._step_counter != 0.: 363 | print('episode len:', self._step_counter) 364 | print('mse torque', np.sqrt(self._torque_sum / self._step_counter)) 365 | print('abs torque', self._torque_abs_sum / self._step_counter) 366 | print('torque pen', self._torque_pen_sum / self._step_counter) 367 | print('joint motion pen', self._joint_motion_sum / self._step_counter) 368 | self._torque_sum = 0. 369 | self._torque_abs_sum = 0. 370 | self._torque_pen_sum = 0.0 371 | self._joint_motion_sum = 0.0 372 | self._step_counter = 0 373 | -------------------------------------------------------------------------------- /qdpgym/sim/blt/terrain.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import dataclasses 3 | from typing import Union, Tuple, Iterable 4 | 5 | import numpy as np 6 | import pybullet as pyb 7 | from scipy.interpolate import interp2d 8 | 9 | from qdpgym.sim.abc import Terrain, NUMERIC 10 | from qdpgym.utils.tf import vcross, vunit 11 | 12 | __all__ = [ 13 | 'NullTerrain', 'Plain', 'HeightFieldTerrain', 14 | 'Hills', 'PlainHf', 'Slopes', 'Steps', 'TerrainBase' 15 | ] 16 | 17 | 18 | class TerrainBase(Terrain, metaclass=abc.ABCMeta): 19 | def __init__(self): 20 | self._id: int = -1 21 | self._spawned = False 22 | 23 | @property 24 | def id(self): 25 | return self._id 26 | 27 | def spawn(self, sim_env): 28 | raise NotImplementedError 29 | 30 | def replace(self, sim_env, obj: 'TerrainBase'): 31 | obj.remove(sim_env) 32 | self.spawn(sim_env) 33 | 34 | def remove(self, sim_env): 35 | if self._id != -1: 36 | sim_env.removeBody(self._id) 37 | 38 | 39 | class NullTerrain(TerrainBase): 40 | pass 41 | 42 | 43 | class Plain(TerrainBase): 44 | def spawn(self, sim_env): 45 | if self._id != -1: 46 | return 47 | self._id = sim_env.loadURDF("plane.urdf") 48 | sim_env.changeDynamics(self._id, -1, lateralFriction=1.0) 49 | 50 | def get_height(self, x, y): 51 | return 0.0 52 | 53 | def get_normal(self, x, y): 54 | return np.array((0, 0, 1)) 55 | 56 | def get_peak(self, x_range, y_range): 57 | return sum(x_range) / 2, sum(y_range) / 2, 0.0 58 | 59 | def out_of_range(self, x, y): 60 | return False 61 | 62 | 63 | @dataclasses.dataclass 64 | class HeightField: 65 | data: np.ndarray 66 | size: Union[NUMERIC, Tuple[NUMERIC]] 67 | resolution: NUMERIC 68 | 69 | 70 | class HeightFieldTerrain(TerrainBase): 71 | def __init__(self, heightfield: HeightField): 72 | super().__init__() 73 | self.heightfield = heightfield.data 74 | self.y_dim, self.x_dim = self.heightfield.shape 75 | if isinstance(heightfield.size, Iterable): 76 | self.x_size, self.y_size = heightfield.size 77 | else: 78 | self.x_size, self.y_size = heightfield.size, heightfield.size 79 | self.x_rsl = self.y_rsl = heightfield.resolution 80 | self._shape_id = -1 81 | 82 | def spawn(self, sim_env): 83 | if self._id != -1: 84 | return 85 | self._shape_id = sim_env.createCollisionShape( 86 | shapeType=pyb.GEOM_HEIGHTFIELD, 87 | flags=pyb.GEOM_CONCAVE_INTERNAL_EDGE, 88 | meshScale=(self.x_rsl, self.y_rsl, 1.0), 89 | heightfieldTextureScaling=self.x_size, 90 | heightfieldData=self.heightfield.reshape(-1), 91 | numHeightfieldColumns=self.x_dim, 92 | numHeightfieldRows=self.y_dim, 93 | replaceHeightfieldIndex=-1 94 | ) 95 | self._id = sim_env.createMultiBody(0, self._shape_id) 96 | sim_env.changeVisualShape(self._id, -1, rgbaColor=(1, 1, 1, 1)) 97 | sim_env.changeDynamics(self._id, -1, lateralFriction=1.0) 98 | origin_z = (np.max(self.heightfield) + np.min(self.heightfield)) / 2 99 | sim_env.resetBasePositionAndOrientation(self._id, (0, 0, origin_z), 100 | (0., 0., 0., 1.)) 101 | 102 | def replace(self, sim_env, obj): 103 | assert self._id == self._shape_id == -1, f'`{self}` have been spawned' 104 | if obj._id == -1: 105 | return 106 | 107 | if not isinstance(obj, HeightFieldTerrain): 108 | return super().replace(sim_env, obj) 109 | 110 | # Currently, in bullet <= 3.2.4, 111 | # Heightfield replacement may cause collision detection failure. 112 | # See https://github.com/bulletphysics/bullet3/issues/4236 113 | # See https://github.com/bulletphysics/bullet3/pull/4253 114 | self._id = obj._id 115 | self._shape_id = sim_env.createCollisionShape( 116 | shapeType=pyb.GEOM_HEIGHTFIELD, flags=pyb.GEOM_CONCAVE_INTERNAL_EDGE, 117 | meshScale=(self.x_rsl, self.y_rsl, 1.0), 118 | heightfieldTextureScaling=self.x_size, 119 | heightfieldData=self.heightfield.reshape(-1), 120 | numHeightfieldColumns=self.x_dim, numHeightfieldRows=self.y_dim, 121 | replaceHeightfieldIndex=obj._shape_id 122 | ) 123 | obj._id = obj._shape_id = -1 124 | 125 | origin_z = (np.max(self.heightfield) + np.min(self.heightfield)) / 2 126 | sim_env.resetBasePositionAndOrientation(self._id, (0, 0, origin_z), 127 | (0., 0., 0., 1.)) 128 | 129 | def remove(self, sim_env): 130 | if self._id != -1: 131 | sim_env.removeBody(self._id) 132 | sim_env.removeCollisionShape(self._shape_id) 133 | self._id = self._shape_id = -1 134 | 135 | @property 136 | def shape_id(self): 137 | return self._shape_id 138 | 139 | def get_disc_x(self, x): 140 | return int((x + self.x_size / 2) / self.x_rsl) 141 | 142 | def get_disc_y(self, y): 143 | return int((y + self.y_size / 2) / self.y_rsl) 144 | 145 | def get_cont_x(self, x_idx): 146 | return x_idx * self.x_rsl - self.x_size / 2 147 | 148 | def get_cont_y(self, y_idx): 149 | return y_idx * self.y_rsl - self.y_size / 2 150 | 151 | def get_nearest_vertices(self, x, y): 152 | x_idx, y_idx = self.get_disc_x(x), self.get_disc_y(y) 153 | x_rnd, y_rnd = self.get_cont_x(x_idx), self.get_cont_y(y_idx) 154 | if (x - x_rnd) / self.x_rsl + (y - y_rnd) / self.y_rsl < 1: 155 | v1 = x_rnd, y_rnd, self.heightfield[y_idx, x_idx] 156 | else: 157 | v1 = x_rnd + self.x_rsl, y_rnd + self.y_rsl, self.heightfield[y_idx + 1, x_idx + 1] 158 | v2 = x_rnd, y_rnd + self.y_rsl, self.heightfield[y_idx + 1, x_idx] 159 | v3 = x_rnd + self.x_rsl, y_rnd, self.heightfield[y_idx, x_idx + 1] 160 | return np.array(v1), np.array(v2), np.array(v3) 161 | 162 | def get_peak(self, x_range, y_range): 163 | (x_lower, x_upper), (y_lower, y_upper) = x_range, y_range 164 | x_lower_idx, x_upper_idx = self.get_disc_x(x_lower), self.get_disc_x(x_upper) + 1 165 | y_lower_idx, y_upper_idx = self.get_disc_y(y_lower), self.get_disc_y(y_upper) + 1 166 | hfield_part = self.heightfield[y_lower_idx:y_upper_idx, x_lower_idx:x_upper_idx] 167 | y_size, x_size = hfield_part.shape 168 | max_idx = np.argmax(hfield_part) 169 | max_x_idx, max_y_idx = max_idx % x_size, max_idx // x_size 170 | max_height = hfield_part[max_y_idx, max_x_idx] 171 | max_x, max_y = x_lower + max_x_idx * self.x_rsl, y_lower + max_y_idx * self.y_rsl 172 | return max_x, max_y, max_height 173 | 174 | def get_height(self, x, y): 175 | try: 176 | v1, v2, v3 = self.get_nearest_vertices(x, y) 177 | except IndexError: 178 | return 0.0 179 | if x == v1[0] and y == v1[1]: 180 | return v1[2] 181 | x1, y1, z1 = v2 - v1 182 | x2, y2, z2 = v3 - v1 183 | x3, y3 = x - v1[0], y - v1[1] 184 | div = (x1 * y2 - x2 * y1) 185 | c1 = (x3 * y2 - x2 * y3) / div 186 | c2 = (x1 * y3 - x3 * y1) / div 187 | return c1 * z1 + c2 * z2 + v1[2] 188 | 189 | def get_normal(self, x, y) -> np.ndarray: 190 | try: 191 | v1, v2, v3 = self.get_nearest_vertices(x, y) 192 | except IndexError: 193 | return np.array((0., 0., 1.)) 194 | normal = vunit(vcross(v1 - v2, v1 - v3)) 195 | return normal if normal[2] > 0 else -normal 196 | 197 | def out_of_range(self, x, y): 198 | return abs(x) > self.x_size / 2 - 1 or abs(y) > self.y_size / 2 - 1 199 | 200 | 201 | class Hills(HeightFieldTerrain): 202 | def __init__(self, heightfield): 203 | super().__init__(heightfield) 204 | 205 | @classmethod 206 | def make(cls, size, resolution, *rough_dsp: Tuple[NUMERIC, NUMERIC], random_state): 207 | return cls(cls.make_hfield(size, resolution, *rough_dsp, random_state=random_state)) 208 | 209 | @classmethod 210 | def make_hfield(cls, size, resol, *rough_dsp: Tuple[NUMERIC, NUMERIC], random_state) -> HeightField: 211 | data_size = int(size / resol) + 1 212 | hfield_data = np.zeros((data_size, data_size)) 213 | for roughness, dsp in rough_dsp: 214 | sample_rsl = dsp * resol 215 | x = y = np.arange(-size / 2 - 3 * sample_rsl, size / 2 + 4 * sample_rsl, sample_rsl) 216 | hfield_dsp = random_state.uniform(0, roughness, (x.size, y.size)) 217 | terrain_func = interp2d(x, y, hfield_dsp, kind='cubic') 218 | x_upsampled = y_upsampled = np.linspace(-size / 2, size / 2, data_size) 219 | hfield_data += terrain_func(x_upsampled, y_upsampled) 220 | return HeightField(hfield_data, size, resol) 221 | 222 | @classmethod 223 | def default(cls): 224 | return cls.make(20, 0.1, (0.2, 10), random_state=np.random) 225 | 226 | 227 | class PlainHf(HeightFieldTerrain): 228 | @classmethod 229 | def make(cls, size, resolution): 230 | return cls(cls.make_hfield(size, resolution)) 231 | 232 | @staticmethod 233 | def make_hfield(size, resolution): 234 | data_size = int(size / resolution) + 1 235 | hfield_data = np.zeros((data_size, data_size)) 236 | return HeightField(hfield_data, size, resolution) 237 | 238 | def get_height(self, x, y): 239 | return 0.0 240 | 241 | def get_normal(self, x, y): 242 | return np.array((0., 0., 1.)) 243 | 244 | def get_peak(self, x_range, y_range): 245 | return sum(x_range) / 2, sum(y_range) / 2, 0. 246 | 247 | @classmethod 248 | def default(cls): 249 | return cls.make(20, 0.1) 250 | 251 | 252 | class Slopes(HeightFieldTerrain): 253 | @classmethod 254 | def make(cls, size, resolution, slope, slope_width, axis='x'): 255 | return cls(cls.make_hfield(size, resolution, slope, slope_width, axis)) 256 | 257 | @classmethod 258 | def make_hfield(cls, size, resolution, slope, slope_width, axis): 259 | step = int(slope_width * 2 / resolution) 260 | data_size = int(size / resolution) + 1 261 | num_steps = int(data_size / step) + 1 262 | slope = np.tan(slope) 263 | hfield_data = np.zeros((data_size, data_size)) 264 | for i in range(num_steps): 265 | x_start, x_stop = i * step, int((i + 0.4) * step) 266 | for j in range(x_start, min(x_stop, data_size)): 267 | hfield_data[:, j] = (x_stop - j) * slope * resolution 268 | x_start, x_stop = x_stop, int((i + 0.6) * step) 269 | for j in range(x_start, min(x_stop, data_size)): 270 | hfield_data[:, j] = 0. 271 | x_start, x_stop = x_stop, (i + 1) * step 272 | for j in range(x_start, min(x_stop, data_size)): 273 | hfield_data[:, j] = (j - x_start) * slope * resolution 274 | return HeightField(cls.rotate(hfield_data, axis), size, resolution) 275 | 276 | @classmethod 277 | def rotate(cls, hfield_data, axis): 278 | if axis == 'x': 279 | return hfield_data 280 | elif axis == 'y': 281 | return hfield_data.T 282 | raise RuntimeError('Unknown axis') 283 | 284 | @classmethod 285 | def default(cls): 286 | return cls.make(20, 0.1, 0.17, 3.0) 287 | 288 | 289 | class Steps(HeightFieldTerrain): 290 | @classmethod 291 | def make(cls, size, resolution, step_width, max_step_height, random_state): 292 | return cls(cls.make_hfield(size, resolution, step_width, max_step_height, random_state)) 293 | 294 | @staticmethod 295 | def make_hfield(size, resolution, step_width, max_step_height, random_state): 296 | step = int(step_width / resolution) 297 | data_size = int(size / resolution) + 1 298 | num_steps = int(data_size / step) + 1 299 | hfield_data = np.zeros((data_size, data_size)) 300 | for i in range(num_steps + 1): 301 | for j in range(num_steps + 1): 302 | x_start, x_stop = int((i - 0.5) * step), int((i + 0.5) * step) 303 | y_start, y_stop = int((j - 0.5) * step), int((j + 0.5) * step) 304 | hfield_data[y_start:y_stop, x_start:x_stop] = random_state.uniform(0., max_step_height) 305 | return HeightField(hfield_data, size, resolution) 306 | 307 | @classmethod 308 | def default(cls): 309 | return cls.make(20, 0.1, 1.0, 0.1, random_state=np.random) 310 | -------------------------------------------------------------------------------- /qdpgym/sim/blt/utils.py: -------------------------------------------------------------------------------- 1 | class JointInfo(object): 2 | def __init__(self, joint_info: tuple): 3 | self._info = joint_info 4 | 5 | idx = property(lambda self: self._info[0]) 6 | name = property(lambda self: self._info[1].decode("UTF-8")) 7 | type = property(lambda self: self._info[2]) 8 | q_idx = property(lambda self: self._info[3]) 9 | u_idx = property(lambda self: self._info[4]) 10 | damping = property(lambda self: self._info[6]) 11 | friction = property(lambda self: self._info[7]) 12 | limits = property(lambda self: (self._info[8], self._info[9])) 13 | max_force = property(lambda self: self._info[10]) 14 | max_vel = property(lambda self: self._info[11]) 15 | link_name = property(lambda self: self._info[12].decode("UTF-8")) 16 | axis = property(lambda self: self._info[13]) 17 | parent_frame_pos = property(lambda self: self._info[14]) 18 | parent_frame_orn = property(lambda self: self._info[15]) 19 | parent_idx = property(lambda self: self._info[16]) 20 | 21 | import pybullet as pyb 22 | joint_types = {pyb.JOINT_REVOLUTE: 'rev', pyb.JOINT_PRISMATIC: 'pri', pyb.JOINT_SPHERICAL: 'sph', 23 | pyb.JOINT_PLANAR: 'pla', pyb.JOINT_FIXED: 'fix'} 24 | 25 | axis_dict = {(1., 0., 0.): '+X', (0., 1., 0.): '+Y', (0., 0., 1.): '+Z', 26 | (-1., 0., 0.): '-X', (0., -1., 0.): '-Y', (0., 0., -1.): '-Z', } 27 | 28 | 29 | class DynamicsInfo(object): 30 | def __init__(self, dynamics_info: tuple): 31 | self._info = dynamics_info 32 | 33 | mass = property(lambda self: self._info[0]) 34 | lateral_fric = property(lambda self: self._info[1]) 35 | inertia = property(lambda self: self._info[2]) 36 | inertial_pos = property(lambda self: self._info[3]) 37 | inertial_orn = property(lambda self: self._info[4]) 38 | restitution = property(lambda self: self._info[5]) 39 | rolling_fric = property(lambda self: self._info[6]) 40 | spinning_fric = property(lambda self: self._info[7]) 41 | damping = property(lambda self: self._info[8]) 42 | stiffness = property(lambda self: self._info[9]) 43 | body_type = property(lambda self: self._info[10]) 44 | collision_margin = property(lambda self: self._info[11]) 45 | -------------------------------------------------------------------------------- /qdpgym/sim/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/sim/common/__init__.py -------------------------------------------------------------------------------- /qdpgym/sim/common/identify.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | 5 | __all__ = ['ActuatorNet'] 6 | 7 | 8 | class ActuatorNet(nn.Module): 9 | activation = nn.Softsign 10 | 11 | def __init__( 12 | self, 13 | input_dim=6, 14 | output_dim=1, 15 | hidden_dims=(32, 32, 32) 16 | ): 17 | super().__init__() 18 | layers = [] 19 | self.input_dim, self.output_dim, self.hidden_dims = input_dim, output_dim, hidden_dims 20 | for dim in hidden_dims: 21 | layers.append(nn.Linear(input_dim, dim)) 22 | layers.append(self.activation()) 23 | input_dim = dim 24 | layers.append(nn.Linear(input_dim, output_dim)) 25 | self.layers = nn.Sequential(*layers) 26 | self.device = torch.device('cpu') 27 | 28 | def forward(self, state): 29 | return self.layers(state) 30 | 31 | def to(self, device, *args, **kwargs): 32 | self.device = torch.device(device) 33 | return super().to(device, *args, **kwargs) 34 | 35 | def calc_torque(self, err1, err2, err3, vel1, vel2, vel3): 36 | with torch.inference_mode(): 37 | X = np.array( 38 | (err1, err2, err3, vel1, vel2, vel3), 39 | dtype=np.float32 40 | ).transpose() 41 | try: 42 | Y = self(torch.as_tensor(X, device=self.device)) 43 | except RuntimeError: 44 | self.device = next(self.parameters()).device 45 | Y = self(torch.as_tensor(X, device=self.device)) 46 | return Y.double().squeeze().cpu().numpy() 47 | -------------------------------------------------------------------------------- /qdpgym/sim/common/motor.py: -------------------------------------------------------------------------------- 1 | import collections 2 | from typing import Iterable 3 | 4 | import numpy as np 5 | import torch 6 | 7 | from qdpgym.sim.common.identify import ActuatorNet 8 | from qdpgym.utils import get_padded, replace_is 9 | 10 | 11 | class MotorSim(object): 12 | def __init__(self, frequency): 13 | self._freq = frequency 14 | self._observe_done = False 15 | self._pos = self._vel = 0. 16 | self._residue = None 17 | 18 | self._joint_lower = self._joint_upper = None 19 | self._torque_limits = None 20 | self._cmd_clip = None 21 | self._input_delay = self._output_delay = 0 22 | 23 | self._torque_history = collections.deque(maxlen=11) 24 | self._residue_history = collections.deque(maxlen=11) 25 | self._obs_history = collections.deque(maxlen=11) 26 | 27 | def set_latency(self, input_delay: int = 0, output_delay: int = 0): 28 | self._input_delay = input_delay 29 | self._output_delay = output_delay 30 | 31 | def set_joint_limits(self, lower: Iterable = None, upper: Iterable = None): 32 | lower, upper = list(lower), list(upper) 33 | if lower is not None and None in lower: 34 | replace_is(lower, None, -np.inf) 35 | if upper is not None and None in upper: 36 | replace_is(upper, None, np.inf) 37 | self._joint_lower = np.array(lower) 38 | self._joint_upper = np.array(upper) 39 | 40 | def set_torque_limits(self, upper): 41 | self._torque_limits = np.asarray(upper) 42 | 43 | def set_cmd_clip(self, upper): 44 | self._cmd_clip = np.asarray(upper) 45 | 46 | def reset(self): 47 | self._observe_done = False 48 | self._torque_history.clear() 49 | self._residue_history.clear() 50 | 51 | def update_observation(self, pos, vel): 52 | self._observe_done = True 53 | self._obs_history.append((np.array(pos), np.array(vel))) 54 | self._pos, self._vel = get_padded(self._obs_history, -self._input_delay - 1) 55 | 56 | def apply_hybrid(self, des_pos, ff_torque): 57 | if self._joint_lower is not None or self._joint_upper is not None: 58 | des_pos = np.clip(des_pos, self._joint_lower, self._joint_upper) 59 | self._residue = des_pos - self._pos 60 | if self._cmd_clip: 61 | self._residue = np.clip(self._residue, -self._cmd_clip, self._cmd_clip) 62 | self._residue_history.append(self._residue) 63 | return self.apply_torque(self.calc_torque() + ff_torque) 64 | 65 | def apply_position(self, des_pos): 66 | return self.apply_hybrid(des_pos, 0) 67 | 68 | def apply_torque(self, des_torque): 69 | assert self._observe_done, 'Update observation before executing a command' 70 | if self._torque_limits is not None: 71 | des_torque = np.clip(des_torque, -self._torque_limits, self._torque_limits) 72 | self._torque_history.append(des_torque) 73 | self._observe_done = False 74 | return get_padded(self._torque_history, -self._output_delay - 1) 75 | 76 | def calc_torque(self): 77 | raise NotImplementedError 78 | 79 | 80 | class PdMotorSim(MotorSim): 81 | def __init__(self, frequency, kp, kd): 82 | super().__init__(frequency) 83 | self._kp, self._kd = np.asarray(kp), np.asarray(kd) 84 | self._kp_part, self._kd_part = 0., 0. 85 | 86 | def calc_torque(self): 87 | self._kp_part, self._kd_part = self._kp * self._residue, self._kd * self._vel 88 | return self._kp_part - self._kd_part 89 | 90 | 91 | class ActuatorNetSim(MotorSim): 92 | def __init__(self, frequency, net=None): 93 | super().__init__(frequency) 94 | self._net = net 95 | 96 | def load_params(self, model_path, device='cpu'): 97 | model_info = torch.load(model_path, map_location=device) 98 | self._net = ActuatorNet(hidden_dims=model_info['hidden_dims']).to(device) 99 | self._net.load_state_dict(model_info['model']) 100 | 101 | def calc_torque(self): 102 | return self._net.calc_torque( 103 | self._residue, 104 | get_padded(self._residue_history, -4), 105 | get_padded(self._residue_history, -7), 106 | self._vel, 107 | get_padded(self._obs_history, -4)[1], 108 | get_padded(self._obs_history, -7)[1] 109 | ) 110 | -------------------------------------------------------------------------------- /qdpgym/sim/common/noisyhandle.py: -------------------------------------------------------------------------------- 1 | import collections 2 | from typing import Deque, Optional 3 | 4 | import numpy as np 5 | 6 | from qdpgym import tf, utils as ut 7 | from qdpgym.sim.abc import Quadruped, QuadrupedHandle, Snapshot 8 | 9 | 10 | class NoisyHandle(QuadrupedHandle): 11 | def __init__(self, robot: Quadruped, frequency, latency=0): 12 | self._robot = robot 13 | self._freq = frequency 14 | self._latency = latency 15 | 16 | self._obs: Optional[Snapshot] = None 17 | self._obs_buffer: Deque[Snapshot] = collections.deque(maxlen=20) 18 | self._obs_history: Deque[Snapshot] = collections.deque(maxlen=100) 19 | 20 | @property 21 | def raw(self): 22 | return self._robot 23 | 24 | @property 25 | def latency(self): 26 | return self._latency 27 | 28 | @latency.setter 29 | def latency(self, value): 30 | self._latency = value 31 | 32 | @property 33 | def obs_history(self): 34 | return ut.PadWrapper(self._obs_history) 35 | 36 | @property 37 | def cmd_history(self): 38 | return self._robot.cmd_history 39 | 40 | @property 41 | def not_delayed(self): 42 | if self._obs_buffer: 43 | return self._obs_buffer[-1] 44 | return self._obs 45 | 46 | def update_observation( 47 | self, 48 | state: Snapshot, 49 | random_gen: np.random.Generator 50 | ): 51 | """Add noise on observation""" 52 | add_noise = random_gen.normal 53 | obs = Snapshot( 54 | rpy=add_noise(state.rpy, 1e-2), 55 | velocimeter=add_noise(state.velocimeter, 5e-2), 56 | gyro=add_noise(state.gyro, 5e-2), 57 | joint_pos=add_noise(state.joint_pos, 5e-3), 58 | joint_vel=add_noise(state.joint_vel, 1e-1) 59 | ) 60 | obs.orientation = tf.Quaternion.from_rpy(obs.rpy) 61 | self._obs_buffer.append(obs) 62 | if len(self._obs_buffer) <= self._latency * self._freq: 63 | self._obs = self._obs_buffer[0] 64 | else: 65 | self._obs = self._obs_buffer.popleft() 66 | self._obs_history.append(self._obs) 67 | 68 | def get_base_pos(self) -> np.ndarray: 69 | return self._obs.position 70 | 71 | def get_base_orn(self) -> np.ndarray: 72 | return self._obs.orientation 73 | 74 | def get_base_rot(self) -> np.ndarray: 75 | return tf.Rotation.from_quaternion(self._obs.orientation) 76 | 77 | def get_base_rpy(self) -> np.ndarray: 78 | return self._obs.rpy 79 | 80 | def get_base_rpy_rate(self) -> np.ndarray: 81 | return tf.get_rpy_rate_from_ang_vel(self._obs.rpy, self._obs.angular_vel) 82 | 83 | def get_base_lin(self) -> np.ndarray: 84 | return self._obs.linear_vel 85 | 86 | def get_base_ang(self) -> np.ndarray: 87 | return self._obs.angular_vel 88 | 89 | def get_joint_pos(self) -> np.ndarray: 90 | return self._obs.joint_pos 91 | 92 | def get_joint_vel(self) -> np.ndarray: 93 | return self._obs.joint_vel 94 | 95 | def get_velocimeter(self): 96 | return self._obs.velocimeter 97 | 98 | def get_gyro(self): 99 | return self._obs.gyro 100 | 101 | def get_accelerometer(self): 102 | return self._obs.accelerometer 103 | 104 | def get_torso_contact(self): 105 | return self._robot.get_torso_contact() 106 | 107 | def get_leg_contacts(self): 108 | return self._robot.get_leg_contacts() 109 | 110 | def get_last_torque(self) -> np.ndarray: 111 | return self._robot.get_last_torque() 112 | 113 | def get_state_history(self, latency): 114 | return ut.get_padded(self._obs_history, -int(latency * self._freq) - 1) 115 | 116 | def get_cmd_history(self, latency): 117 | return self._robot.get_cmd_history(latency) 118 | 119 | def get_foot_contacts(self): 120 | return self._robot.get_foot_contacts() 121 | 122 | def get_contact_forces(self): 123 | return self._robot.get_contact_forces() 124 | 125 | def get_force_sensor(self): 126 | return self._robot.get_force_sensor() 127 | 128 | def get_last_command(self) -> np.ndarray: 129 | return self._robot.get_last_command() 130 | 131 | def reset(self): 132 | self._obs = None 133 | self._obs_buffer.clear() 134 | self._obs_history.clear() 135 | -------------------------------------------------------------------------------- /qdpgym/sim/common/tg.py: -------------------------------------------------------------------------------- 1 | import enum 2 | import random 3 | 4 | import numpy as np 5 | 6 | from qdpgym.utils import Angle 7 | 8 | 9 | def power(x, exp): 10 | _x = 1 11 | for _ in range(exp + 1): 12 | yield _x 13 | _x *= x 14 | 15 | 16 | def vertical_tg(h=0.12): # 0.2 in paper 17 | coeff1 = np.array((0., 0., 3., -2.)) * h 18 | coeff2 = np.array((-4., 12., -9., 2.)) * h 19 | 20 | def _tg(phases): 21 | priori = [] 22 | for phi in phases: 23 | k = phi * 2 / np.pi 24 | if k <= 0 or k >= 2: 25 | priori.append(np.zeros(3)) 26 | k_pow = list(power(k, 3)) 27 | if 0 < k <= 1: 28 | priori.append((0., 0., coeff1 @ k_pow)) 29 | if 1 < k < 2: 30 | priori.append((0., 0., coeff2 @ k_pow)) 31 | return np.array(priori) 32 | 33 | return _tg 34 | 35 | 36 | class PhaseRoller(object): 37 | TROT = (0.0, -np.pi, -np.pi, 0.0) 38 | WALK = (0.0, -np.pi / 2, -np.pi, -np.pi * 3 / 2) 39 | PACE = (0.0, -np.pi, 0.0, -np.pi) 40 | base_frequency = 2.0 # TODO: COMPLETE THE STATE MACHINE 41 | 42 | class InitType(enum.IntEnum): 43 | FIXED = 0 44 | SYMMETRIC = 1 45 | RANDOM = 2 46 | 47 | def __init__(self, time_step, random_state, init_type='random'): 48 | self._time_step = time_step 49 | self._random = random_state 50 | self._init_type = self._check_init_type(init_type) 51 | 52 | self._init_phases = self._get_init_phases() 53 | self._phases = self._init_phases.copy() 54 | self._frequency = np.ones(4) * self.base_frequency 55 | self._cycles = np.zeros(4) 56 | 57 | @staticmethod 58 | def symmetric(phases): 59 | fr, fl, rr, rl = phases 60 | return fl, fr, rl, rr 61 | 62 | def random_symmetric(self, phases): 63 | return phases if self._random.random() > 0.5 else self.symmetric(phases) 64 | 65 | def _check_init_type(self, init_type): 66 | if init_type == 'fixed': 67 | return self.InitType.FIXED 68 | elif init_type == 'symmetric': 69 | return self.InitType.SYMMETRIC 70 | elif init_type == 'random': 71 | return self.InitType.RANDOM 72 | else: 73 | raise RuntimeError(f'Unknown TG Init Mode {init_type}') 74 | 75 | def _get_init_phases(self): 76 | if self._init_type == self.InitType.FIXED: 77 | return np.array(self.TROT, dtype=np.float32) 78 | elif self._init_type == self.InitType.SYMMETRIC: 79 | return np.array(self.random_symmetric(self.TROT), dtype=np.float32) 80 | elif self._init_type == self.InitType.RANDOM: 81 | return self._random.uniform(-np.pi, np.pi, 4).astype(np.float32) 82 | 83 | @property 84 | def phases(self): 85 | return self._phases 86 | 87 | @property 88 | def frequency(self): 89 | return self._frequency 90 | 91 | @property 92 | def cycles(self): 93 | return self._cycles 94 | 95 | def reset(self): 96 | self._init_phases = self._get_init_phases() 97 | self._phases = self._init_phases.copy() 98 | self._frequency = np.ones(4) * self.base_frequency 99 | self._cycles = np.zeros(4) 100 | 101 | def update(self): 102 | _phases = self._phases.copy() 103 | self._phases += self._frequency * self._time_step * 2 * np.pi 104 | flags = np.logical_and(Angle.norm(_phases - self._init_phases) < 0, 105 | Angle.norm(self._phases - self._init_phases) >= 0) 106 | self._cycles[(flags.nonzero(),)] += 1 107 | self._phases = Angle.norm(self._phases) 108 | return self._phases 109 | 110 | 111 | class TgStateMachine(PhaseRoller): 112 | def __init__( 113 | self, 114 | time_step, 115 | random_state, 116 | traj_gen, 117 | init_type='symmetric', 118 | freq_bounds=(0.5, 3.0) 119 | ): 120 | super().__init__(time_step, random_state, init_type) 121 | self._freq_bounds = freq_bounds 122 | self._traj_gen = traj_gen 123 | 124 | def update(self, frequency_offsets=None): # FIXME: how freq_offsets works? 125 | if frequency_offsets is not None: 126 | frequency_offsets = np.asarray(frequency_offsets) 127 | self._frequency = self.base_frequency + frequency_offsets 128 | self._frequency = np.clip(self._frequency, *self._freq_bounds) 129 | return super().update() 130 | 131 | def get_priori_trajectory(self): 132 | return np.asarray(self._traj_gen(self._phases)) 133 | -------------------------------------------------------------------------------- /qdpgym/sim/mjc/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/sim/mjc/__init__.py -------------------------------------------------------------------------------- /qdpgym/sim/mjc/env.py: -------------------------------------------------------------------------------- 1 | import collections 2 | from typing import Optional, Tuple, Any, Union 3 | 4 | import gym 5 | import numpy as np 6 | from dm_control import mjcf 7 | 8 | from qdpgym.sim.abc import Task, Environment, ARRAY_LIKE 9 | from .quadruped import Aliengo 10 | from .terrain import TerrainBase 11 | from qdpgym.utils import PadWrapper, tf 12 | 13 | 14 | class QuadrupedEnv(Environment): 15 | _observation_space = None 16 | _action_space = None 17 | 18 | def __init__( 19 | self, 20 | robot: Aliengo, 21 | arena: TerrainBase, 22 | task: Task, 23 | timestep=2e-3, 24 | time_limit: float = None, 25 | num_substeps: int = 10, 26 | identifier: str = None, 27 | ): 28 | self._robot = robot 29 | self._arena = arena 30 | self._task = task 31 | self._timestep = timestep 32 | self._time_limit = time_limit 33 | self._num_substeps = num_substeps 34 | self._identifier = identifier or f'{hex(id(self))[-7:]}' 35 | self._step_freq = 1 / (self.timestep * self._num_substeps) 36 | 37 | self._init = False 38 | self._elapsed_sim_steps = 0 39 | 40 | self._arena.mjcf_model.option.timestep = timestep 41 | self._physics: Optional[mjcf.Physics] = None 42 | self._task.register_env(self._robot, self) 43 | self._action_history = collections.deque(maxlen=10) 44 | self._perturbation = None 45 | 46 | self._interact_terrain_samples = [] 47 | self._interact_terrain_normal: Optional[np.ndarray] = None 48 | self._interact_terrain_height = 0.0 49 | 50 | @property 51 | def observation_space(self) -> gym.Space: 52 | if self._observation_space is None: 53 | if hasattr(self._task, 'observation_space'): 54 | self._observation_space = self._task.observation_space 55 | else: 56 | self._observation_space = gym.Space() 57 | return self._observation_space 58 | 59 | @property 60 | def action_space(self) -> gym.Space: 61 | if self._action_space is None: 62 | if hasattr(self._task, 'action_space'): 63 | self._action_space = self._task.action_space 64 | else: 65 | self._action_space = gym.Space((12,), float) 66 | return self._action_space 67 | 68 | def reset( 69 | self, 70 | seed: Optional[int] = None, 71 | return_info: bool = False, 72 | options: Optional[dict] = None, 73 | ) -> Union[Any, Tuple[Any, dict]]: 74 | 75 | super().reset(seed=seed, return_info=return_info, options=options) 76 | 77 | self._init = True 78 | self._elapsed_sim_steps = 0 79 | self._action_history.clear() 80 | 81 | self._robot.add_to(self._arena) 82 | self._robot.init_mjcf_model(self.np_random) 83 | self._physics = mjcf.Physics.from_mjcf_model(self._arena.mjcf_model) 84 | 85 | self._task.init_episode() 86 | self._robot.init_physics(self._physics, self.np_random) 87 | 88 | for i in range(50): 89 | self._robot.update_observation(None, minimal=True) 90 | self._robot.apply_command(self._robot.STANCE_CONFIG) 91 | self._physics.step() 92 | 93 | self._action_history.append(np.array(self._robot.STANCE_CONFIG)) 94 | self._physics.data.ptr.time = 0. 95 | self._robot.update_observation(self.np_random) 96 | 97 | return ( 98 | self._task.get_observation() 99 | # if return_info else (self._task.get_observation(), {}) 100 | ) 101 | 102 | def close(self): 103 | pass 104 | 105 | @property 106 | def robot(self): 107 | return self._robot 108 | 109 | @property 110 | def physics(self): 111 | return self._physics 112 | 113 | @property 114 | def arena(self): 115 | return self._arena 116 | 117 | @arena.setter 118 | def arena(self, value): 119 | self._init = False 120 | self._arena = value 121 | self._robot.add_to(self._arena) 122 | 123 | @property 124 | def action_history(self): 125 | return PadWrapper(self._action_history) 126 | 127 | @property 128 | def sim_time(self): 129 | return self._physics.time() 130 | 131 | @property 132 | def timestep(self): 133 | return self._timestep 134 | 135 | @property 136 | def num_substeps(self): 137 | return self._num_substeps 138 | 139 | @property 140 | def identifier(self) -> str: 141 | return self._identifier 142 | 143 | def step(self, action: ARRAY_LIKE) -> Tuple[Any, float, bool, dict]: 144 | assert self._init, 'Call `init_episode` before `step`!' 145 | action = self._task.before_step(action) 146 | action = np.asarray(action) 147 | prev_action = self._action_history[-1] 148 | self._action_history.append(action) 149 | 150 | for i in range(self._num_substeps): 151 | weight = (i + 1) / self._num_substeps 152 | current_action = action * weight + prev_action * (1 - weight) 153 | self._robot.apply_command(current_action) 154 | self._task.before_substep() 155 | 156 | self._apply_perturbation() 157 | self._physics.step() 158 | self._elapsed_sim_steps += 1 159 | self._update_observation() 160 | 161 | self._task.after_substep() 162 | self._task.after_step() 163 | 164 | done = False 165 | info = {} 166 | if self._task.is_failed(): 167 | done = True 168 | self._task.on_fail() 169 | elif ((self._time_limit is not None and self.sim_time >= self._time_limit) 170 | or self._task.is_succeeded()): 171 | done = True 172 | info['TimeLimit.truncated'] = True 173 | self._task.on_success() 174 | 175 | reward, reward_info = self._task.get_reward(True) 176 | info['reward_info'] = reward_info 177 | return ( 178 | self._task.get_observation(), reward, done, info 179 | ) 180 | 181 | def _update_observation(self): 182 | self._robot.update_observation(self.np_random) 183 | 184 | self._interact_terrain_samples.clear() 185 | self._interact_terrain_height = 0. 186 | xy_points = self._robot.get_foot_pos() 187 | for x, y, _ in xy_points: 188 | h = self._arena.get_height(x, y) 189 | self._interact_terrain_height += h 190 | self._interact_terrain_samples.append((x, y, h)) 191 | self._interact_terrain_height /= 4 192 | self._interact_terrain_normal = tf.estimate_normal(self._interact_terrain_samples) 193 | 194 | def get_action_rate(self) -> np.ndarray: 195 | if len(self._action_history) < 2: 196 | return np.zeros(12) 197 | actions = [self._action_history[-i - 1] for i in range(2)] 198 | return (actions[0] - actions[1]) * self._step_freq 199 | 200 | def get_action_accel(self) -> np.ndarray: 201 | if len(self._action_history) < 3: 202 | return np.zeros(12) 203 | actions = [self._action_history[-i - 1] for i in range(3)] 204 | return (actions[0] - 2 * actions[1] + actions[2]) * self._step_freq ** 2 205 | 206 | def get_relative_robot_height(self) -> float: 207 | return self._robot.get_base_pos()[2] - self._interact_terrain_height 208 | 209 | def get_interact_terrain_normal(self) -> np.ndarray: 210 | return self._interact_terrain_normal 211 | 212 | def get_interact_terrain_rot(self) -> np.ndarray: 213 | return tf.Rotation.from_zaxis(self._interact_terrain_normal) 214 | 215 | def get_perturbation(self, in_robot_frame=False): 216 | if self._perturbation is None: 217 | return None 218 | elif in_robot_frame: 219 | rotation = self._robot.get_base_rot() 220 | perturbation = np.concatenate( 221 | [rotation.T @ self._perturbation[i * 3:i * 3 + 3] for i in range(2)] 222 | ) 223 | else: 224 | perturbation = self._perturbation 225 | return perturbation 226 | 227 | def set_perturbation(self, value=None): 228 | self._perturbation = np.array(value) 229 | 230 | def _apply_perturbation(self): 231 | if self._perturbation is not None: 232 | self._physics.bind(self._robot.entity.root_body).xfrc_applied = self._perturbation 233 | -------------------------------------------------------------------------------- /qdpgym/sim/mjc/hooks.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import mujoco as mjlib 4 | 5 | from qdpgym import tf 6 | from qdpgym.sim.abc import Hook 7 | from qdpgym.sim.mjc.viewer import ViewerMj 8 | 9 | 10 | class ViewerHook(Hook): 11 | def __init__(self): 12 | self._viewer: Optional[ViewerMj] = None 13 | 14 | def init_episode(self, robot, env): 15 | self._viewer = ViewerMj(env.physics.model.ptr, env.physics.data.ptr) 16 | 17 | def after_substep(self, robot, env): 18 | perturb = env.get_perturbation() 19 | if perturb is not None: 20 | force = perturb[:3] 21 | magnitude = tf.vnorm(force) 22 | mat = tf.Rotation.from_zaxis(force / magnitude) 23 | self._viewer.add_marker( 24 | type=mjlib.mjtGeom.mjGEOM_ARROW, 25 | pos=robot.get_base_pos(), mat=mat, 26 | size=[0.01, 0.01, magnitude / 20], 27 | rgba=(1., 0., 0., 1.) 28 | ) 29 | self._viewer.render() 30 | -------------------------------------------------------------------------------- /qdpgym/sim/mjc/terrain.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from typing import Union, Tuple 3 | 4 | import numpy as np 5 | from dm_control import composer 6 | from scipy.interpolate import interp2d 7 | 8 | from qdpgym.sim.abc import NUMERIC, Terrain 9 | 10 | 11 | def _process_size(size): 12 | if isinstance(size, (int, float)): 13 | return size, size 14 | else: 15 | return tuple(size) 16 | 17 | 18 | class TerrainBase(composer.Arena, Terrain, metaclass=abc.ABCMeta): 19 | pass 20 | 21 | 22 | class TexturedTerrainBase(TerrainBase, metaclass=abc.ABCMeta): 23 | reflectance = 0.2 24 | 25 | def _build(self, size, name=None): 26 | super()._build(name=name) 27 | self._x_size, self._y_size = _process_size(size) 28 | 29 | self._mjcf_root.visual.headlight.set_attributes( 30 | ambient=[.4, .4, .4], diffuse=[.8, .8, .8], specular=[.1, .1, .1]) 31 | 32 | self._ground_texture = self._mjcf_root.asset.add( 33 | 'texture', name='groundplane', rgb1=[.2, .3, .4], rgb2=[.1, .2, .3], 34 | type='2d', builtin='checker', width=200, height=200, 35 | mark='edge', markrgb=[0.8, 0.8, 0.8]) 36 | # Makes white squares exactly 1x1 length units. 37 | self._ground_material = self._mjcf_root.asset.add( 38 | 'material', name='groundplane', texrepeat=[2, 2], 39 | texuniform=True, reflectance=self.reflectance, texture=self._ground_texture) 40 | 41 | self._terrain_geom = None 42 | 43 | @property 44 | def ground_geoms(self): 45 | return self._terrain_geom, 46 | 47 | def regenerate(self, random_state): 48 | pass 49 | 50 | @property 51 | def size(self): 52 | return self._x_size, self._y_size 53 | 54 | def out_of_range(self, x, y): 55 | return abs(x) > self._x_size / 2 - 1 or abs(y) > self._y_size / 2 - 1 56 | 57 | 58 | class Plain(TexturedTerrainBase): 59 | def _build(self, size, name='groundplane'): 60 | super()._build(name=name, size=size) 61 | 62 | # Build groundplane. 63 | self._terrain_geom = self._mjcf_root.worldbody.add( 64 | 'geom', type='plane', name='terrain', material=self._ground_material, 65 | size=[self._x_size / 2, self._x_size / 2, 0.25]) 66 | self._terrain_geom.friction = [1, 0.005, 0.0001] 67 | 68 | def get_height(self, x, y): 69 | return 0.0 70 | 71 | def get_normal(self, x, y): 72 | return np.array((0., 0., 1.)) 73 | 74 | 75 | class Hills(TexturedTerrainBase): 76 | def _build(self, size, resol: float, 77 | *rough_dsp: Tuple[NUMERIC, NUMERIC], 78 | name='hills'): 79 | super()._build(name=name, size=size) 80 | self._resol = resol 81 | self._ncol = int(self._x_size / resol) + 1 82 | self._nrow = int(self._y_size / resol) + 1 83 | self._rough_dsp = rough_dsp 84 | 85 | self._mjcf_root.asset.add( 86 | 'hfield', name=f'{name}_hfield', 87 | nrow=self._nrow, ncol=self._ncol, 88 | size=[self._x_size / 2, self._y_size / 2, 1, 0.1]) 89 | self._terrain_geom = self._mjcf_root.worldbody.add( 90 | 'geom', name='terrain', type='hfield', material=self._ground_material, 91 | pos=(0, 0, -0.01), hfield=f'{name}_hfield') 92 | 93 | def initialize_episode(self, physics, random_state: np.random.RandomState): 94 | physics.model.hfield_data[...] = self.regenerate(random_state) 95 | 96 | def regenerate(self, random_state): 97 | hfield_data = np.zeros((self._nrow, self._ncol)) 98 | for rough, dsp in self._rough_dsp: 99 | x = np.arange(-3, int(self._ncol / dsp) + 4) 100 | y = np.arange(-3, int(self._nrow / dsp) + 4) 101 | hfield_dsp = random_state.uniform(0, rough, (x.size, y.size)) 102 | terrain_func = interp2d(x, y, hfield_dsp, kind='cubic') 103 | x_usp = np.arange(self._ncol) / dsp 104 | y_usp = np.arange(self._nrow) / dsp 105 | hfield_data += terrain_func(x_usp, y_usp) 106 | return hfield_data.reshape(-1) 107 | -------------------------------------------------------------------------------- /qdpgym/sim/resources/acnet_220526.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/sim/resources/acnet_220526.pt -------------------------------------------------------------------------------- /qdpgym/sim/resources/actuator_net_with_history.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/sim/resources/actuator_net_with_history.pt -------------------------------------------------------------------------------- /qdpgym/sim/resources/aliengo/meshes/calf.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/sim/resources/aliengo/meshes/calf.stl -------------------------------------------------------------------------------- /qdpgym/sim/resources/aliengo/meshes/hip.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/sim/resources/aliengo/meshes/hip.stl -------------------------------------------------------------------------------- /qdpgym/sim/resources/aliengo/meshes/thigh.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/sim/resources/aliengo/meshes/thigh.stl -------------------------------------------------------------------------------- /qdpgym/sim/resources/aliengo/meshes/thigh_mirror.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/sim/resources/aliengo/meshes/thigh_mirror.stl -------------------------------------------------------------------------------- /qdpgym/sim/resources/aliengo/meshes/trunk.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/sim/resources/aliengo/meshes/trunk.stl -------------------------------------------------------------------------------- /qdpgym/sim/resources/aliengo/model/aliengo.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 173 | -------------------------------------------------------------------------------- /qdpgym/sim/task.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Callable, List, Any 2 | 3 | from .abc import Task, Hook, Quadruped, Environment 4 | 5 | 6 | class NullTask(Task): 7 | """The Simplest Task Structure""" 8 | 9 | def __init__(self): 10 | self._robot: Optional[Quadruped] = None 11 | self._env: Optional[Environment] = None 12 | self._hooks: List[Hook] = [] 13 | self._hook_names = {} 14 | self._options = [] 15 | 16 | def register_env(self, robot, env): 17 | self._robot = robot 18 | self._env = env 19 | for hook in self._hooks: 20 | hook.initialize(self._robot, self._env) 21 | 22 | def add_hook(self, hook: Hook, name=None): 23 | if name is None: 24 | name = hook.__class__.__name__ 25 | if name in self._hook_names: 26 | raise ValueError(f'Duplicated Hook `{name}`') 27 | hook.register_task(self) 28 | self._hooks.append(hook) 29 | self._hook_names[name] = hook 30 | return self 31 | 32 | def remove_hook(self, name=None): 33 | if name not in self._hook_names: 34 | raise RuntimeError(f'Hook `{name}` not found') 35 | obj = self._hook_names[name] 36 | for i, hook in enumerate(self._hooks): 37 | if hook is obj: 38 | self._hooks.pop(i) 39 | 40 | def get_observation(self): 41 | return None 42 | 43 | def get_reward(self, detailed=True): 44 | if detailed: 45 | return 0., {} 46 | else: 47 | return 0. 48 | 49 | def is_succeeded(self): 50 | return False 51 | 52 | def is_failed(self): 53 | return False 54 | 55 | def init_episode(self): 56 | for hook in self._hooks: 57 | hook.init_episode(self._robot, self._env) 58 | 59 | def before_step(self, action): 60 | for hook in self._hooks: 61 | hook.before_step(self._robot, self._env) 62 | return action 63 | 64 | def before_substep(self): 65 | for hook in self._hooks: 66 | hook.before_substep(self._robot, self._env) 67 | 68 | def after_step(self): 69 | for hook in self._hooks: 70 | hook.after_step(self._robot, self._env) 71 | 72 | def after_substep(self): 73 | for hook in self._hooks: 74 | hook.after_substep(self._robot, self._env) 75 | 76 | def on_success(self): 77 | for hook in self._hooks: 78 | hook.on_success(self._robot, self._env) 79 | 80 | def on_fail(self): 81 | for hook in self._hooks: 82 | hook.on_fail(self._robot, self._env) 83 | 84 | 85 | class RewardRegistry(object): 86 | def __init__(self): 87 | self._robot = None 88 | self._env = None 89 | self._task = None 90 | 91 | self._rewards_set = set() 92 | self._rewards_weights = [] 93 | self._weight_sum = 0.0 94 | self._coefficient = 1.0 95 | self._reward_details = {} 96 | 97 | def register_task(self, robot, env, task): 98 | self._robot, self._env, self._task = robot, env, task 99 | 100 | def add_reward( 101 | self, 102 | name: str, 103 | reward_obj: Callable[..., float], 104 | weight: float 105 | ): 106 | if name in self._rewards_set: 107 | raise RuntimeError(f'Duplicated Reward Type {name}') 108 | self._rewards_set.add(name) 109 | self._weight_sum += weight 110 | self._rewards_weights.append((reward_obj, weight)) 111 | 112 | def set_coeff(self, coeff): 113 | self._coefficient = coeff 114 | 115 | def report(self): 116 | from qdpgym.utils import colored_str 117 | print(colored_str(f'Got {len(self._rewards_weights)} types of rewards:\n', 'white'), 118 | f"{'Reward Type':<28}Weight * {self._coefficient:.3f}", sep='') 119 | for reward, weight in self._rewards_weights: 120 | reward_name: str = reward.__class__.__name__ 121 | length = len(reward_name) 122 | if reward_name.endswith('Reward'): 123 | reward_name = colored_str(reward_name, 'green') 124 | elif reward_name.endswith('Penalty'): 125 | reward_name = colored_str(reward_name, 'magenta') 126 | print(f'{reward_name}{" " * (28 - length)}{weight:.3f}') 127 | print() 128 | 129 | def calc_reward(self, detailed=True): 130 | self._reward_details.clear() 131 | reward_value = 0.0 132 | for reward, weight in self._rewards_weights: 133 | rew = reward(self._robot, self._env, self._task) 134 | self._reward_details[reward.__class__.__name__] = rew 135 | reward_value += rew * weight 136 | reward_value *= self._coefficient 137 | if detailed: 138 | return reward_value, self._reward_details 139 | else: 140 | return reward_value 141 | 142 | 143 | class BasicTask(NullTask): 144 | ALL_REWARDS: Any 145 | 146 | def __init__(self, substep_reward_on=True): 147 | self._reward_registry = RewardRegistry() 148 | self._substep_reward_on = substep_reward_on 149 | self._reward = 0. 150 | self._reward_details = {} 151 | self._substep_cnt = 0 152 | 153 | super().__init__() 154 | 155 | def add_reward(self, name: str, weight: float = 0.): 156 | reward_class = getattr(self.ALL_REWARDS, name) 157 | self._reward_registry.add_reward(name, reward_class(), weight) 158 | 159 | def set_reward_coeff(self, value): 160 | self._reward_registry.set_coeff(value) 161 | 162 | def register_env(self, robot, env): 163 | super().register_env(robot, env) 164 | self._reward_registry.register_task(robot, env, self) 165 | 166 | def before_step(self, action): 167 | self._reward = 0. 168 | self._reward_details.clear() 169 | return super().before_step(action) 170 | 171 | def after_substep(self): 172 | if self._substep_reward_on: 173 | reward, reward_details = self._reward_registry.calc_reward(detailed=True) 174 | self._reward += reward 175 | if self._reward_details: 176 | for k, v in reward_details.items(): 177 | self._reward_details[k] += v 178 | else: 179 | self._reward_details = reward_details.copy() 180 | self._substep_cnt += 1 181 | super().after_substep() 182 | 183 | def after_step(self): 184 | if self._substep_reward_on: 185 | self._reward /= self._substep_cnt 186 | for k in self._reward_details: 187 | self._reward_details[k] /= self._substep_cnt 188 | self._substep_cnt = 0 189 | else: 190 | self._reward, self._reward_details = self._reward_registry.calc_reward(detailed=True) 191 | super().after_step() 192 | 193 | def get_reward(self, detailed=True): 194 | if detailed: 195 | return self._reward, self._reward_details 196 | else: 197 | return self._reward 198 | -------------------------------------------------------------------------------- /qdpgym/tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/tasks/__init__.py -------------------------------------------------------------------------------- /qdpgym/tasks/identify/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/tasks/identify/__init__.py -------------------------------------------------------------------------------- /qdpgym/tasks/identify/identify.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import math 3 | import os 4 | 5 | import numpy as np 6 | import torch 7 | import wandb 8 | from torch import nn 9 | from torch.utils.data import DataLoader, Dataset, random_split, ConcatDataset 10 | 11 | from qdpgym.sim.common.identify import ActuatorNet 12 | from qdpgym.utils import get_timestamp, log 13 | 14 | 15 | class RobotDatasetWithHistory(Dataset): 16 | def __init__(self, path, history_interval, slices=None): 17 | print(path) 18 | self.data = np.load(path) 19 | if slices: 20 | self.error = self.data['angle_error'][slices, :].astype(np.float32) 21 | self.velocity = self.data['motor_velocity'][slices, :].astype(np.float32) 22 | self.torque = self.data['motor_torque'][slices, :].astype(np.float32) 23 | else: 24 | self.error = self.data['angle_error'].astype(np.float32) 25 | self.velocity = self.data['motor_velocity'].astype(np.float32) 26 | self.torque = self.data['motor_torque'].astype(np.float32) 27 | self.X = np.stack((self.error[2 * history_interval:-1, ].flatten(), 28 | self.error[history_interval:-1 - history_interval, ].flatten(), 29 | self.error[:-1 - 2 * history_interval, ].flatten(), 30 | self.velocity[2 * history_interval:-1, ].flatten(), 31 | self.velocity[history_interval:-1 - history_interval, ].flatten(), 32 | self.velocity[:-1 - 2 * history_interval, ].flatten()), axis=1) 33 | self.Y = np.expand_dims(self.torque[1 + 2 * history_interval:, ].flatten(), axis=1) 34 | self.size = len(self.error) 35 | 36 | def __len__(self): 37 | return len(self.X) 38 | 39 | def __getitem__(self, idx): 40 | return self.X[idx], self.Y[idx] 41 | 42 | 43 | def train_actuator_net(actuator_net, dataset_class, history_interval, 44 | lr, num_epochs, batch_size, device): 45 | dataset_paths = [os.path.join(RSC_DIR, filename) for filename in os.listdir(RSC_DIR)] 46 | dataset = ConcatDataset([dataset_class(path, history_interval) for path in dataset_paths if path.endswith('npz')]) 47 | train_len = int(0.8 * len(dataset)) 48 | test_len = len(dataset) - train_len 49 | train_data, test_data = random_split(dataset, (train_len, test_len)) 50 | train_loader = DataLoader(train_data, batch_size, shuffle=True) 51 | test_loader = DataLoader(test_data, batch_size, shuffle=True) 52 | net = actuator_net.to(device) 53 | optim = torch.optim.AdamW(net.parameters(), lr=lr) 54 | criterion = nn.MSELoss(reduction='sum') 55 | 56 | wandb.init(project='actuator_net', name=str((actuator_net.hidden_dims, lr)), mode=None) 57 | log_dir = f'ident/{get_timestamp()}' 58 | os.makedirs(log_dir, exist_ok=True) 59 | torch.set_printoptions(linewidth=10000) 60 | for i in range(1, num_epochs + 1): 61 | train_loss, test_loss = 0., 0. 62 | for X, Y in train_loader: 63 | optim.zero_grad() 64 | X, Y = X.to(device), Y.to(device) 65 | loss = criterion(net(X), Y) 66 | train_loss += loss.item() 67 | loss.backward() 68 | optim.step() 69 | train_loss /= train_len 70 | log.info(f'Epoch {i:>4}/{num_epochs} train loss {train_loss:.6f}') 71 | 72 | for X, Y in test_loader: 73 | X, Y = X.to(device), Y.to(device) 74 | loss = criterion(net(X), Y) 75 | test_loss += loss.item() 76 | test_loss /= test_len 77 | log.info(f'Epoch {i:>4}/{num_epochs} test loss {test_loss:.6f}') 78 | wandb.log({'Train/loss': train_loss, 'Test/loss': test_loss}) 79 | if i % 10 == 0: 80 | torch.save({'model': net.state_dict(), 'hidden_dims': net.hidden_dims}, 81 | os.path.join(log_dir, f'{i}.pt')) 82 | 83 | 84 | def get_statistics(model, history_interval): 85 | dataset_paths = [os.path.join(RSC_DIR, filename) for filename in os.listdir(RSC_DIR)] 86 | dataset = ConcatDataset([DatasetClass(path, history_interval) for path in dataset_paths if path.endswith('npz')]) 87 | 88 | criterion = nn.MSELoss(reduction='sum') 89 | test_loader = DataLoader(dataset, 1000, shuffle=True) 90 | test_loss = 0. 91 | for X, Y in test_loader: 92 | X, Y = X.to(device), Y.to(device) 93 | loss = criterion(model(X), Y) 94 | test_loss += loss.item() 95 | mse = test_loss / (len(dataset) - 1) 96 | print('MSE:', mse) 97 | print('RMSE:', math.sqrt(mse)) 98 | 99 | 100 | if __name__ == '__main__': 101 | parser = argparse.ArgumentParser(description='Train or test quadruped locomotion.') 102 | parser.add_argument('-r', '--rsc', metavar='', 103 | help='motor data directory', required=True) 104 | parser.add_argument('-i', '--interval', type=int, help='history interval', default=3) 105 | parser.add_argument('-b', '--batch-size', type=int, help='batch size', default=1000) 106 | parser.add_argument('-l', '--lr', type=float, help='learning rate', default=1e-4) 107 | parser.add_argument('-e', '--epoch', type=int, help='num epochs', default=2000) 108 | parser.add_argument('--cuda', action='store_true', help='use cuda for training') 109 | parser.add_argument('--train', action='store_true', help='if train else test') 110 | args = parser.parse_args() 111 | 112 | RSC_DIR = args.rsc 113 | if args.cuda or torch.cuda.is_available(): 114 | DEVICE = 'cuda' 115 | else: 116 | DEVICE = 'cpu' 117 | TRAIN = args.train 118 | HISTORY_INTERVAL = args.interval 119 | BATCH_SIZE = args.batch_size 120 | LEARNING_RATE = args.lr 121 | NUM_EPOCHS = args.epoch 122 | 123 | np.set_printoptions(3, linewidth=10000, suppress=True) 124 | DatasetClass = RobotDatasetWithHistory 125 | device = torch.device(DEVICE) 126 | if TRAIN: 127 | actuator_net = ActuatorNet(hidden_dims=(32, 32, 32)) 128 | train_actuator_net( 129 | actuator_net, 130 | DatasetClass, 131 | history_interval=HISTORY_INTERVAL, 132 | batch_size=BATCH_SIZE, 133 | lr=LEARNING_RATE, 134 | num_epochs=NUM_EPOCHS, 135 | device=device 136 | ) 137 | get_statistics(actuator_net, HISTORY_INTERVAL) 138 | else: 139 | model_path = '/home/jewel/Workspaces/QuadrupedRLv2/qdpgym/sim/resources/acnet_220526.pt' 140 | model_info = torch.load(model_path, map_location=device) 141 | actuator_net = ActuatorNet(hidden_dims=model_info['hidden_dims']).to(device) 142 | actuator_net.load_state_dict(model_info['model']) 143 | get_statistics(actuator_net, HISTORY_INTERVAL) 144 | 145 | # dataset = DatasetClass(os.path.join(RSC_DIR, 'state_cmd_data_NoLoadT0.4.npz')) 146 | # motor_idx = 2 147 | # slices = slice(5000, 5500) 148 | # 149 | # error = dataset.error[slices, motor_idx] 150 | # error_his1 = dataset.error[slice(slices.start - 5, slices.stop - 5), motor_idx] 151 | # error_his2 = dataset.error[slice(slices.start - 10, slices.stop - 10), motor_idx] 152 | # velocity = dataset.velocity[slices, motor_idx] 153 | # velocity_his1 = dataset.velocity[slice(slices.start - 5, slices.stop - 5), motor_idx] 154 | # velocity_his2 = dataset.velocity[slice(slices.start - 10, slices.stop - 10), motor_idx] 155 | # error_rate = dataset.data['angle_error_rate'][slices, motor_idx].astype(np.float32) 156 | # torque = dataset.torque[slices.start + 1:slices.stop + 1, motor_idx] 157 | # predicted = actuator_net.calc_torque(error, error_his1, error_his2, velocity, velocity_his1, velocity_his2) 158 | 159 | # import matplotlib.pyplot as plt 160 | # 161 | # fig, ax = plt.subplots(3, 1, figsize=(9.6, 6.4), gridspec_kw={'height_ratios': [1, 1, 2]}) 162 | # ax[0].plot(error) 163 | # ax[0].set_ylabel('error(rad)', rotation=0) 164 | # ax[0].yaxis.set_label_coords(-0.05, 0.99) 165 | # ax[0].set_xticks([]) 166 | # ax[1].plot(velocity) 167 | # ax[1].set_xticks([]) 168 | # ax[1].set_ylabel('velocity(rad$\cdot$s$^{-1}$)', rotation=0) 169 | # ax[1].yaxis.set_label_coords(-0.03, 1.03) 170 | # ax[2].set_xlabel('sample point number') 171 | # ax[2].set_ylabel('torque') 172 | # ax[2].set_ylabel('torque(N$\cdot$m)', rotation=0) 173 | # ax[2].yaxis.set_label_coords(-0.03, 0.99) 174 | # ax[2].plot(torque, linewidth=1) 175 | # ax[2].plot(predicted, linewidth=1, linestyle='--') 176 | # ax[2].plot(error * 150 - velocity * 4, linewidth=1, linestyle='dashdot') 177 | # ax[2].set_ylim(-20, 25) 178 | # ax[2].legend(['measured', 'identified', 'vanilla pd']) 179 | # fig.show() 180 | # fig.savefig('acnet.pdf') 181 | -------------------------------------------------------------------------------- /qdpgym/tasks/loct/__init__.py: -------------------------------------------------------------------------------- 1 | from .loct import * 2 | from .app import * -------------------------------------------------------------------------------- /qdpgym/tasks/loct/app.py: -------------------------------------------------------------------------------- 1 | import qdpgym 2 | from qdpgym import sim 3 | from qdpgym.tasks import loct 4 | 5 | __all__ = ['LocomotionApp'] 6 | 7 | 8 | class LocomotionApp(sim.Application): 9 | def __init__(self, policy): 10 | robot = sim.Aliengo(500, 'actuator_net', noisy=False) 11 | task = loct.LocomotionV0() 12 | if qdpgym.sim_engine == qdpgym.Sim.BULLET: 13 | arena = sim.TerrainBase() 14 | task.add_hook(sim.ExtraViewerHook()) 15 | task.add_hook(sim.RandomTerrainHook()) 16 | task.add_hook(sim.RandomPerturbHook()) 17 | task.add_hook(loct.GamepadCommanderHook()) 18 | # task.add_hook(sim.StatisticsHook()) 19 | # task.add_hook(hooks.VideoRecorderBtHook()) 20 | else: 21 | raise NotImplementedError 22 | env = sim.QuadrupedEnv(robot, arena, task) 23 | # task.add_reward('UnifiedLinearReward') 24 | super().__init__(robot, env, task, policy) 25 | -------------------------------------------------------------------------------- /qdpgym/tasks/loct/reward.py: -------------------------------------------------------------------------------- 1 | import math 2 | from abc import ABC 3 | 4 | import numpy as np 5 | 6 | from qdpgym.utils import tf 7 | 8 | ATANH0_95 = math.atanh(0.95) 9 | ATANH0_9 = math.atanh(0.9) 10 | SQRT_LOG10 = math.sqrt(math.log(10)) 11 | SQRT_LOG20 = math.sqrt(math.log(20)) 12 | 13 | 14 | class Reward(ABC): 15 | def __call__(self, robot, env, task) -> float: 16 | raise NotImplementedError 17 | 18 | 19 | def tanh2_reshape(lower, upper): 20 | w = np.sqrt(ATANH0_95) / (upper - lower) 21 | 22 | def _reshape(v): 23 | return np.tanh(np.power((v - lower) * w, 2)) if v > lower else 0 24 | 25 | return _reshape 26 | 27 | 28 | def tanh2_reverse(lower, upper, value): 29 | w = np.sqrt(ATANH0_95) / (upper - lower) 30 | return np.sqrt(np.arctanh(value)) / w + lower 31 | 32 | 33 | def tanh_reshape(lower, upper): 34 | """ 35 | Zoom tanh so that f(lower) = -0.9 and f(upper) = 0.9 36 | :return: function f 37 | """ 38 | middle = (lower + upper) / 2 39 | w = ATANH0_9 / (upper - middle) 40 | 41 | def _reshape(v): 42 | return np.tanh((v - middle) * w) 43 | 44 | return _reshape 45 | 46 | 47 | def tanh_reverse(lower, upper, value): 48 | middle = (lower + upper) / 2 49 | w = ATANH0_9 / (upper - middle) 50 | return np.arctanh(value) / w + middle 51 | 52 | 53 | def exp_m2_reshape(upper): 54 | w = upper / SQRT_LOG10 55 | 56 | def _reshape(v): 57 | return np.exp(-(v / w) ** 2) 58 | 59 | return _reshape 60 | 61 | 62 | def quadratic_linear_reshape(upper): 63 | k = 1 / upper ** 2 64 | kl = 2 * k * upper 65 | 66 | def _reshape(v): 67 | v = abs(v) 68 | return v ** 2 * k if v < upper else kl * (v - upper) + 1 69 | 70 | return _reshape 71 | 72 | 73 | def soft_constrain(thr, upper): 74 | def _reshape(v): 75 | excess = abs(v) - thr 76 | if excess > 0: 77 | return -(excess / upper) ** 2 78 | return 0. 79 | 80 | return _reshape 81 | 82 | 83 | def expm2_resh(upper): 84 | w = upper / SQRT_LOG10 85 | 86 | def _reshape(v): 87 | return np.exp(-(v / w) ** 2) 88 | 89 | return _reshape 90 | 91 | 92 | class LinearVelocityReward(Reward): 93 | def __init__(self, forward=1.5, lateral=1.0, ort_upper=0.3, ort_weight=0.33): 94 | self.forward, self.lateral = forward, lateral 95 | self.coeff = forward * lateral 96 | self.ort_reshape = tanh2_reshape(0, ort_upper) 97 | self.ort_weight = ort_weight 98 | 99 | def __call__(self, robot, env, task): 100 | lin_cmd = task.cmd[:2] 101 | lin_vel = robot.get_velocimeter()[:2] 102 | proj_vel = np.dot(lin_cmd, lin_vel) 103 | ort_vel = tf.vnorm(lin_vel - lin_cmd[:2] * proj_vel) 104 | # print(proj_vel, ort_vel) 105 | ort_pen = 1 - self.ort_reshape(ort_vel) 106 | if (lin_cmd == 0.).all(): 107 | return ort_pen 108 | proj_rew = self.reshape(self.get_desired_velocity(lin_cmd), proj_vel) 109 | return (1 - self.ort_weight) * proj_rew + self.ort_weight * ort_pen 110 | 111 | def get_desired_velocity(self, cmd): 112 | return self.coeff / math.hypot(self.lateral * cmd[0], self.forward * cmd[1]) 113 | 114 | @staticmethod 115 | def reshape(scale, value): 116 | return math.tanh(value * ATANH0_9 / scale) 117 | 118 | 119 | class YawRateReward(Reward): 120 | def __init__(self, upper_pos=1.0, upper_neg=0.45): 121 | self.reshape_pos = tanh_reshape(-upper_pos, upper_pos) 122 | self.reshape_neg = quadratic_linear_reshape(upper_neg) 123 | 124 | def __call__(self, robot, env, task): 125 | yaw_cmd, yaw_rate = task.cmd[2], robot.get_base_rpy_rate()[2] 126 | if yaw_cmd != 0.0: 127 | return self.reshape_pos(yaw_rate / yaw_cmd) 128 | else: 129 | return 1 - self.reshape_neg(abs(yaw_rate)) 130 | 131 | 132 | class RotationReward(Reward): 133 | def __init__(self, max_ang=3.0, perception=1.0): 134 | self.max_ang = max_ang 135 | self.reshape = expm2_resh(perception) 136 | 137 | def __call__(self, robot, env, task): 138 | ang_cmd = task.cmd[2] 139 | ang_vel = robot.get_base_rpy_rate()[2] 140 | ang_rew = self.reshape(ang_vel - ang_cmd * self.max_ang) 141 | # print(ang_cmd, ang_vel, ang_rew) 142 | return ang_rew 143 | 144 | 145 | class RollPitchRatePenalty(Reward): 146 | def __init__(self, dr_upper=1.0, dp_upper=0.5): 147 | self.dr_reshape = tanh2_reshape(0.0, dr_upper) 148 | self.dp_reshape = tanh2_reshape(0.0, dp_upper) 149 | 150 | def __call__(self, robot, env, task): 151 | r_rate, p_rate, _ = robot.get_base_rpy_rate() 152 | return 1 - (self.dr_reshape(abs(r_rate)) + 153 | self.dp_reshape(abs(p_rate))) / 2 154 | 155 | 156 | # class OrthogonalLinearPenalty(Reward): 157 | # def __init__(self, linear_upper=0.3): 158 | # self.reshape = tanh2_reshape(0, linear_upper) 159 | # 160 | # def __call__(self, robot, env, task): 161 | # linear = robot.get_velocimeter()[:2] 162 | # v_o = np.asarray(linear) - np.asarray(cmd[:2]) * np.dot(linear, cmd[:2]) 163 | # return 1 - self.reshape(tf.vnorm(v_o)) 164 | 165 | 166 | class VerticalLinearPenalty(Reward): 167 | def __init__(self, upper=0.4): 168 | self.reshape = quadratic_linear_reshape(upper) 169 | 170 | def __call__(self, robot, env, task): 171 | return 1 - self.reshape(robot.get_velocimeter()[2]) 172 | 173 | 174 | class BodyPosturePenalty(Reward): 175 | def __init__(self, roll_upper=np.pi / 12, pitch_upper=np.pi / 24): 176 | self.roll_reshape = quadratic_linear_reshape(roll_upper) 177 | self.pitch_reshape = quadratic_linear_reshape(pitch_upper) 178 | 179 | def __call__(self, robot, env, task): 180 | trnZ = env.get_interact_terrain_normal() 181 | robot_rot = robot.get_base_rot() 182 | trnY = tf.vcross(trnZ, robot_rot[:, 0]) 183 | trnX = tf.vcross(trnY, trnZ) 184 | r, p, _ = tf.Rpy.from_rotation(np.array((trnX, trnY, trnZ)) @ robot_rot) 185 | return 1 - (self.roll_reshape(r) + self.pitch_reshape(p)) / 2 186 | 187 | 188 | class BodyHeightReward(Reward): 189 | def __init__(self, des=0.4, range_=0.03): 190 | self.des = des 191 | self.reshape = quadratic_linear_reshape(range_) 192 | 193 | def __call__(self, robot, env, task): 194 | return env.get_relative_robot_height() 195 | 196 | 197 | class ActionSmoothnessReward(Reward): 198 | def __init__(self, upper=400): 199 | self.reshape = exp_m2_reshape(upper) 200 | 201 | def __call__(self, robot, env, task): 202 | return self.reshape(env.get_action_accel()).sum() / 12 203 | 204 | 205 | class JointMotionPenalty(Reward): 206 | def __init__(self, vel_upper=6, acc_upper=500): 207 | self.vel_reshape = np.vectorize(quadratic_linear_reshape(vel_upper)) 208 | self.acc_reshape = np.vectorize(quadratic_linear_reshape(acc_upper)) 209 | 210 | def __call__(self, robot, env, task): 211 | vel_pen = self.vel_reshape(robot.get_joint_vel()).sum() 212 | acc_pen = self.acc_reshape(robot.get_joint_acc()).sum() 213 | return 1 - (vel_pen + acc_pen) / 12 214 | 215 | 216 | # class TorqueGradientPenalty(Reward): 217 | # def __init__(self, upper=200): 218 | # self.reshape = quadratic_linear_reshape(upper) 219 | # 220 | # def __call__(self, cmd, env, robot): 221 | # torque_grad = robot.getTorqueGradients() 222 | # return -sum(self.reshape(grad) for grad in torque_grad) / 12 223 | 224 | 225 | class FootSlipPenalty(Reward): 226 | def __init__(self, upper=0.5): 227 | self.reshape = np.vectorize(quadratic_linear_reshape(upper)) 228 | 229 | def __call__(self, robot, env, task): 230 | return 1 - sum(self.reshape(robot.get_slip_vel())) 231 | 232 | 233 | # class HipAnglePenalty(Reward): 234 | # def __init__(self, upper=0.3): 235 | # self.reshape = np.vectorize(quadratic_linear_reshape(upper)) 236 | # 237 | # def __call__(self, cmd, env, robot): 238 | # hip_angles = robot.getJointPositions()[(0, 3, 6, 9),] 239 | # return -sum(self.reshape(hip_angles)) / 4 240 | 241 | 242 | class JointConstraintPenalty(Reward): 243 | def __init__(self, constraints=(0., 0., 0.), upper=(0.4, 0.6, 0.6)): 244 | self.hip_reshape = np.vectorize(soft_constrain(constraints[0], upper[0])) 245 | self.thigh_reshape = np.vectorize(soft_constrain(constraints[1], upper[1])) 246 | self.shank_reshape = np.vectorize(soft_constrain(constraints[2], upper[2])) 247 | 248 | def __call__(self, robot, env, task): 249 | joint_angles = robot.get_joint_pos() - robot.STANCE_CONFIG 250 | return (self.hip_reshape(joint_angles[((0, 3, 6, 9),)]).sum() + 251 | self.thigh_reshape(joint_angles[((1, 4, 7, 10),)]).sum() + 252 | self.shank_reshape(joint_angles[((2, 5, 8, 11),)]).sum()) / 12 253 | 254 | 255 | # class TrivialStridePenalty(Reward): 256 | # def __init__(self, lower=-0.2, upper=0.4): 257 | # self.reshape = tanh_reshape(lower, upper) 258 | # 259 | # def __call__(self, robot, env, task): 260 | # strides = [np.dot(s, cmd[:2]) for s in robot.get_strides()] 261 | # return sum(self.reshape(s) for s in strides if s != 0.0) 262 | # # return 1 - sum(1 - self.reshape(s) for s in strides if s != 0.0) 263 | 264 | 265 | class FootClearanceReward(Reward): 266 | def __init__(self, upper=0.08): 267 | self.reshape = tanh2_reshape(0., upper) 268 | 269 | def __call__(self, robot, env, task): 270 | foot_clearances = robot.get_clearances() 271 | return sum(self.reshape(c) for c in foot_clearances) 272 | 273 | 274 | class AliveReward(Reward): 275 | def __call__(self, robot, env, task): 276 | return 1.0 277 | 278 | 279 | class ClearanceOverTerrainReward(Reward): 280 | def __call__(self, robot, env, task): 281 | reward = 0. 282 | for x, y, z in robot.get_foot_pos(): 283 | terrain_extremum = env.arena.get_peak((x - 0.1, x + 0.1), 284 | (y - 0.1, y + 0.1))[2] 285 | if z - robot.FOOT_RADIUS > terrain_extremum + 0.01: 286 | reward += 1 / 2 287 | return reward 288 | 289 | 290 | class BodyCollisionPenalty(Reward): 291 | def __call__(self, robot, env, task): 292 | contact_sum = 0 293 | for i, contact in enumerate(robot.get_leg_contacts()): 294 | if contact and i % 3 != 2: 295 | contact_sum += 1 296 | return 1 - contact_sum 297 | 298 | 299 | class TorquePenalty(Reward): 300 | def __init__(self, upper=900): 301 | self.reshape = np.vectorize(quadratic_linear_reshape(upper)) 302 | 303 | def __call__(self, robot, env, task): 304 | return 1 - sum(self.reshape(robot.get_last_torque() ** 2)) 305 | -------------------------------------------------------------------------------- /qdpgym/tasks/loct/sr_reward.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | 3 | import numpy as np 4 | 5 | from qdpgym.utils import tf 6 | 7 | 8 | class Reward(ABC): 9 | def __call__(self, robot, env, task) -> float: 10 | raise NotImplementedError 11 | 12 | 13 | def exp_m2(v, k=1): 14 | return np.exp(-k * v ** 2) 15 | 16 | 17 | class VelocityReward(Reward): 18 | def __init__(self, max_lin=1.0): 19 | self.max_lin = max_lin 20 | 21 | def __call__(self, robot, env, task): 22 | lin_cmd = task.cmd[:2] 23 | lin_vel = robot.get_velocimeter()[:2] 24 | lin_cmd_mag = tf.vnorm(lin_cmd) * self.max_lin 25 | if lin_cmd_mag != 0.: 26 | lin_cmd /= lin_cmd_mag # unit 27 | 28 | prj_vel = np.dot(lin_cmd, lin_vel) 29 | ort_vel = tf.vnorm(lin_vel - lin_cmd * prj_vel) 30 | if lin_cmd_mag == 0.: 31 | lin_rew = exp_m2(tf.vnorm(lin_vel)) 32 | elif prj_vel >= lin_cmd_mag: 33 | lin_rew = 1. 34 | else: 35 | lin_rew = exp_m2(lin_cmd_mag - prj_vel) 36 | 37 | ort_pen = exp_m2(ort_vel, 3) 38 | # print(lin_cmd_mag, prj_vel, lin_rew) 39 | return (lin_rew + ort_pen) / 2 40 | 41 | 42 | class RotationReward(Reward): 43 | def __init__(self, max_ang=1.0): 44 | self.max_ang = max_ang 45 | 46 | def __call__(self, robot, env, task): 47 | ang_cmd = task.cmd[2] 48 | ang_vel = robot.get_base_rpy_rate()[2] 49 | ang_cmd_mag = abs(ang_cmd) * self.max_ang 50 | if ang_cmd == 0.: 51 | ang_rew = exp_m2(ang_vel) 52 | elif ang_vel * np.sign(ang_cmd) >= ang_cmd_mag: 53 | ang_rew = 1. 54 | else: 55 | ang_rew = exp_m2(ang_vel - ang_cmd) 56 | # print(ang_cmd_mag, ang_vel, ang_rew) 57 | return ang_rew 58 | 59 | 60 | class BodyMotionPenalty(Reward): 61 | def __call__(self, robot, env, task): 62 | z_vel = robot.get_velocimeter()[2] 63 | r_rate, p_rate, _ = robot.get_base_rpy_rate() 64 | return -1.25 * z_vel ** 2 - 0.4 * abs(r_rate) - 0.4 * abs(p_rate) 65 | 66 | 67 | class BodyCollisionPenalty(Reward): 68 | def __call__(self, robot, env, task): 69 | contact_sum = 0 70 | for i, contact in enumerate(robot.get_leg_contacts()): 71 | if contact and i % 3 != 2: 72 | contact_sum += 1 73 | return 1 - contact_sum 74 | 75 | 76 | class JointMotionPenalty(Reward): 77 | def __call__(self, robot, env, task): 78 | vel_pen = -(robot.get_joint_vel() ** 2).sum() 79 | acc_pen = -(robot.get_joint_acc() ** 2).sum() 80 | return vel_pen + acc_pen * 1e-4 81 | 82 | 83 | class TargetSmoothnessReward(Reward): 84 | def __call__(self, robot, env, task): 85 | action_history = env.action_history 86 | actions = [action_history[-i - 1] for i in range(3)] 87 | return -( 88 | ((actions[0] - actions[1]) ** 2).sum() + 89 | ((actions[0] - 2 * actions[1] + actions[2]) ** 2).sum() 90 | ) 91 | 92 | 93 | class TorquePenalty(Reward): 94 | def __call__(self, robot, env, task): 95 | return -(robot.get_last_torque() ** 2).sum() 96 | 97 | 98 | class SlipPenalty(Reward): 99 | def __call__(self, robot, env, task): 100 | return -robot.get_slip_vel().sum() 101 | 102 | 103 | class FootClearanceReward(Reward): 104 | def __call__(self, robot, env, task): 105 | return 1. 106 | -------------------------------------------------------------------------------- /qdpgym/tasks/loct/utils.py: -------------------------------------------------------------------------------- 1 | import collections 2 | from typing import Union, Tuple, List, Any, Iterable, Callable 3 | 4 | import numpy as np 5 | import rtree 6 | import torch 7 | from rtree import Rtree 8 | from sklearn.linear_model import LinearRegression 9 | 10 | from qdpgym.utils import tf, Natural 11 | 12 | 13 | class PolicyWrapper(object): 14 | def __init__(self, net, device): 15 | self.net = net 16 | self.device = torch.device(device) 17 | 18 | def __call__(self, obs): 19 | with torch.inference_mode(): 20 | obs = torch.as_tensor(obs, dtype=torch.float32, device=self.device) 21 | return self.net(obs).detach().cpu().numpy() 22 | 23 | 24 | class GradISNaive(object): 25 | """ 26 | Adaptively change the sample weight by the gradient of reward. 27 | May lead to significant sample impoverishment. 28 | """ 29 | 30 | def __init__( 31 | self, 32 | dim: int, 33 | min_key: Union[float, Tuple[float, ...]], 34 | max_key: Union[float, Tuple[float, ...]], 35 | max_len: int, 36 | grad_coef=1.0, 37 | max_weight=5.0, 38 | neighborhood_ratio=0.1, 39 | ): 40 | self._rtree = Rtree() 41 | self._min_key = min_key 42 | self._max_key = max_key 43 | if isinstance(self._min_key, float): 44 | self._interval = max_key - min_key 45 | else: 46 | self._interval = np.asarray(max_key) - np.asarray(min_key) 47 | self._max_len = max_len 48 | self._dim = dim 49 | self._grad_coef = grad_coef 50 | self._max_weight = max_weight 51 | self._nbh_size = neighborhood_ratio * self._interval 52 | self._bbox = self._key_to_bbox(min_key, max_key) 53 | 54 | self._rtree.insert(-1, self._bbox) 55 | 56 | self._cur_len = 0 57 | self._total_len = 0 58 | self._buffer = [] 59 | self._weights = [] # save grad weights 60 | self._weight_sum = 0. 61 | 62 | def _key_to_bbox(self, key1, key2=None): 63 | if key2 is None: 64 | key2 = key1 65 | if self._dim == 1: 66 | return key1, key1, key2, key2 67 | elif self._dim == 2: 68 | return (*key1, *key2) 69 | else: 70 | raise NotImplementedError 71 | 72 | def _bbox_to_key(self, bbox): 73 | if self._dim == 1: 74 | return bbox[0] 75 | elif self._dim == 2: 76 | return bbox[:2] 77 | else: 78 | raise NotImplementedError 79 | 80 | @property 81 | def samples(self): 82 | return iter(self) 83 | 84 | @property 85 | def particles(self): 86 | return zip(self._buffer, self._weights) 87 | 88 | def __iter__(self): 89 | for item in self._rtree.intersection( 90 | self._bbox, objects=True 91 | ): 92 | if item.id != -1: 93 | yield self._bbox_to_key(item.bbox), item.object 94 | 95 | def is_full(self): 96 | return self._cur_len == self._max_len 97 | 98 | def __len__(self): 99 | return self._cur_len 100 | 101 | def __repr__(self): 102 | return self._rtree.__repr__() 103 | 104 | def insert(self, key, value): 105 | self._rtree.insert(self._total_len, self._key_to_bbox(key), value) 106 | if self._cur_len < self._max_len: 107 | self._buffer.append(key) 108 | self._weights.append(1.0) 109 | self._weight_sum += 1.0 110 | self._cur_len += 1 111 | else: 112 | idx = self._total_len % self._max_len 113 | prev_key = self._buffer[idx] 114 | self._rtree.delete( 115 | self._total_len - self._max_len, 116 | self._key_to_bbox(prev_key) 117 | ) 118 | self._buffer[idx] = key 119 | grad_weight = self.get_grad_weight(key) 120 | self._weight_sum += grad_weight - self._weights[idx] 121 | self._weights[idx] = grad_weight 122 | 123 | self._total_len += 1 124 | 125 | def get_grad_weight(self, key) -> float: 126 | neighbors = self.get_neighbors(key, self._nbh_size) 127 | if len(neighbors) < 5: 128 | return self._max_weight 129 | else: 130 | x, y = zip(*neighbors) 131 | x, y = np.array(x), np.array(y) 132 | reg = LinearRegression() 133 | reg.fit(x.reshape(-1, 1), y) 134 | return np.exp( 135 | abs(reg.coef_[0]) * self._grad_coef 136 | ).clip(max=self._max_weight) 137 | 138 | def get_neighbors(self, key, radius): 139 | neighbors = [] 140 | for item in self._rtree.intersection( 141 | self._key_to_bbox(key - radius, key + radius), objects=True 142 | ): 143 | if item.id != -1: 144 | neighbors.append((self._bbox_to_key(item.bbox), item.object)) 145 | return neighbors 146 | 147 | def sample( 148 | self, 149 | random_gen: Union[np.random.Generator, np.random.RandomState], 150 | uniform_prob: float = 0., 151 | normal_var: float = None, 152 | ): 153 | if not self.is_full() or random_gen.random() < uniform_prob: 154 | return random_gen.uniform(self._min_key, self._max_key) 155 | else: 156 | # importance sampling 157 | weights = np.array(self._weights) / self._weight_sum 158 | sample = random_gen.choice(self._buffer, p=weights) 159 | return np.clip( 160 | random_gen.normal(sample, normal_var), 161 | self._min_key, self._max_key 162 | ) if normal_var else sample 163 | 164 | 165 | class GradIS(object): 166 | def __init__( 167 | self, 168 | dim: int, 169 | min_key: Union[float, Tuple[float, ...]], 170 | max_key: Union[float, Tuple[float, ...]], 171 | approx_max_len: int, 172 | grad_coef=1.0, 173 | max_weight=5.0, 174 | neighborhood_ratio=0.1, 175 | ): 176 | self._rtree = Rtree() 177 | self._dim = dim 178 | self._min_key = min_key 179 | self._max_key = max_key 180 | if isinstance(self._min_key, float): 181 | self._interval = max_key - min_key 182 | else: 183 | self._interval = np.asarray(max_key) - np.asarray(min_key) 184 | self._approx_max_len = approx_max_len 185 | self._max_density = self._approx_max_len * neighborhood_ratio ** dim 186 | self._grad_coef = grad_coef 187 | self._max_weight = max_weight 188 | self._nbh_size = neighborhood_ratio * self._interval 189 | self._bbox = self._key_to_bbox(min_key, max_key) 190 | 191 | self._rtree.insert(-1, self._bbox) 192 | 193 | self._cur_len = 0 194 | self._total_len = 0 195 | self._buffer_weights = collections.defaultdict(float) 196 | self._weight_sum = 0. 197 | 198 | def _key_to_bbox(self, key1, key2=None): 199 | if key2 is None: 200 | key2 = key1 201 | if self._dim == 1: 202 | return key1, key1, key2, key2 203 | elif self._dim == 2: 204 | return (*key1, *key2) 205 | else: 206 | raise NotImplementedError 207 | 208 | def _bbox_to_key(self, bbox): 209 | if self._dim == 1: 210 | return bbox[0] 211 | elif self._dim == 2: 212 | return bbox[:2] 213 | else: 214 | raise NotImplementedError 215 | 216 | def _key_dist(self, key1, key2): 217 | if self._dim == 1: 218 | return abs(key1 - key2) 219 | elif self._dim == 2: 220 | return tf.vnorm(np.asarray(key1) - np.asarray(key2)) 221 | else: 222 | raise NotImplementedError 223 | 224 | @property 225 | def samples(self) -> Iterable[Tuple[Any, float]]: 226 | return iter(self) 227 | 228 | @property 229 | def particles(self): 230 | return self._buffer_weights.items() 231 | 232 | @property 233 | def initialized(self): 234 | return self._total_len >= self._approx_max_len 235 | 236 | def __iter__(self): 237 | for item in self._rtree.intersection( 238 | self._bbox, objects=True 239 | ): 240 | if item.id != -1: 241 | yield self._bbox_to_key(item.bbox), item.object 242 | 243 | def __len__(self): 244 | return self._cur_len 245 | 246 | def __repr__(self): 247 | return self._rtree.__repr__() 248 | 249 | def insert(self, key, value): 250 | raw_neighbors = self.get_neighbors(key, self._nbh_size, raw=True) 251 | 252 | if key in self._buffer_weights: 253 | # remove duplicated key 254 | dup_idx = None 255 | for idx, item in enumerate(raw_neighbors): 256 | item_key = self._bbox_to_key(item.bbox) 257 | if np.array_equal(item_key, key): 258 | # print('duplicated key', key) 259 | self._weight_sum -= self._buffer_weights[key] 260 | self._rtree.delete(item.id, item.bbox) 261 | dup_idx = idx 262 | break 263 | if dup_idx is not None: 264 | raw_neighbors.pop(dup_idx) 265 | elif len(raw_neighbors) > self._max_density: 266 | # delete the obsolete particle 267 | # obsolete: rtree.index.Item = min(raw_neighbors, key=lambda x: x.id) 268 | # or delete the nearest particle? 269 | obsolete: rtree.index.Item = min( 270 | raw_neighbors, key=lambda x: self._key_dist(self._bbox_to_key(x.bbox), key) 271 | ) 272 | raw_neighbors.remove(obsolete) 273 | self._rtree.delete(obsolete.id, obsolete.bbox) 274 | self._weight_sum -= self._buffer_weights.pop(self._bbox_to_key(obsolete.bbox)) 275 | else: 276 | self._cur_len += 1 277 | 278 | self._total_len += 1 279 | self._rtree.insert(self._total_len, self._key_to_bbox(key), value) 280 | # calculate weight from gradient 281 | neighbors = [(self._bbox_to_key(item.bbox), item.object) for item in raw_neighbors] 282 | neighbors.append((key, value)) 283 | grad_weight = self.get_weight_from_grad(key, neighbors) 284 | self._weight_sum += grad_weight 285 | self._buffer_weights[key] = grad_weight 286 | 287 | def get_weight_from_grad(self, key, neighbors=None) -> float: 288 | if neighbors is None: 289 | neighbors = self.get_neighbors(key, self._nbh_size) 290 | if len(neighbors) < 5: 291 | return 1.0 292 | else: 293 | x, y = zip(*neighbors) 294 | x, y = np.array(x), np.array(y) 295 | reg = LinearRegression() 296 | # TODO: dim >= 2 297 | reg.fit(x.reshape(-1, 1), y) 298 | r = reg.predict(np.reshape(key, (1, 1))).clip(0., 1.).item() 299 | return np.clip( 300 | np.exp(abs(reg.coef_[0]) * self._grad_coef) + 301 | np.exp(-((r - 0.7) / 0.7) ** 2), 302 | 0., self._max_weight 303 | ).item() 304 | 305 | # grad = 2 * r * math.sqrt(-math.log(r)) 306 | # return np.clip(abs(grad), 0., self._max_weight).item() 307 | # # return np.exp( 308 | # # abs(grad) * self._grad_coef 309 | # # ).clip(max=self._max_weight) 310 | 311 | def get_neighbors( 312 | self, key, radius, raw: bool = False 313 | ) -> Union[List[rtree.index.Item], List[Tuple[Any, float]]]: 314 | neighbors = [] 315 | for item in self._rtree.intersection( 316 | self._key_to_bbox(key - radius, key + radius), objects=True 317 | ): 318 | if item.id != -1: 319 | neighbors.append( 320 | item if raw else ( 321 | self._bbox_to_key(item.bbox), item.object 322 | )) 323 | return neighbors 324 | 325 | def sample( 326 | self, 327 | random_gen: Union[np.random.Generator, np.random.RandomState], 328 | uniform_prob: float = 0., 329 | normal_var: float = None, 330 | ): 331 | if not self.initialized or random_gen.random() < uniform_prob: 332 | return random_gen.uniform(self._min_key, self._max_key) 333 | else: 334 | # importance sampling 335 | # TODO: a dict for saving index and two lists 336 | # for saving samples and weights may be better? 337 | buffer, weights = zip(*self._buffer_weights.items()) 338 | weights = np.array(weights) / self._weight_sum 339 | sample = random_gen.choice(buffer, p=weights) 340 | return np.clip( 341 | random_gen.normal(sample, normal_var), 342 | self._min_key, self._max_key 343 | ) if normal_var else sample 344 | 345 | 346 | class AlpIS(object): 347 | def __init__( 348 | self, 349 | dim: int, 350 | min_key: Union[float, Tuple[float, ...], np.ndarray], 351 | max_key: Union[float, Tuple[float, ...], np.ndarray], 352 | window_size: int, 353 | default_distribution: Callable[[Union[np.random.Generator, np.random.RandomState]], Any] = None, 354 | neighborhood_ratio=0.1, 355 | min_lp=1e-4, 356 | ): 357 | self._dim = dim 358 | self._min_key = min_key 359 | self._max_key = max_key 360 | self._interval = np.array(max_key) - np.array(min_key) 361 | self._nbh_size = neighborhood_ratio * self._interval 362 | self._window_size = window_size 363 | self._max_density = self._window_size * neighborhood_ratio ** dim 364 | self._min_lp = min_lp 365 | if default_distribution is None: 366 | self._explore = lambda r: r.uniform(self._min_key, self._max_key) 367 | else: 368 | self._explore = default_distribution 369 | 370 | self._buffer = Rtree() 371 | self._history = Rtree() 372 | self._bbox = self._key_to_bbox(min_key, max_key) 373 | self._history_idx = Natural() 374 | self._window = collections.deque(maxlen=window_size) 375 | 376 | self._total_len = 0 377 | self._samples = [] 378 | self._weights = [] 379 | self._init = False 380 | 381 | def _key_to_bbox(self, key1, key2=None): 382 | if key2 is None: 383 | key2 = key1 384 | if self._dim == 1: 385 | return key1, key1, key2, key2 386 | if self._dim == 2: 387 | return (*key1, *key2) 388 | raise NotImplementedError 389 | 390 | def _bbox_to_key(self, bbox): 391 | if self._dim == 1: 392 | return bbox[0] 393 | if self._dim == 2: 394 | return bbox[:2] 395 | raise NotImplementedError 396 | 397 | def _key_dist(self, key1, key2): 398 | if self._dim == 1: 399 | return abs(key1 - key2) 400 | if self._dim == 2: 401 | return tf.vnorm(np.asarray(key1) - np.asarray(key2)) 402 | raise NotImplementedError 403 | 404 | @property 405 | def progresses(self) -> Iterable[Tuple[Any, float]]: 406 | for item in self._history.intersection(self._bbox, objects=True): 407 | if item.id != -1: 408 | yield self._bbox_to_key(item.bbox), item.object 409 | 410 | @property 411 | def particles(self): 412 | return zip(self._samples, self._weights) 413 | 414 | @property 415 | def is_init(self): 416 | return self._init 417 | 418 | def insert(self, key, value): 419 | self._total_len += 1 420 | self._buffer.insert(self._total_len, self._key_to_bbox(key), value) 421 | self._window.append((key, value)) 422 | 423 | if self._total_len % self._window_size == 0: 424 | self._samples.clear() 425 | self._weights.clear() 426 | if self._init or len(self._history): 427 | self._init = True 428 | for k, _ in self.progresses: 429 | self._samples.append(k) 430 | self._weights.append(self._compute_alp(k)) 431 | weight_sum = np.sum(self._weights) 432 | if weight_sum != 0.: 433 | self._weights = (np.array(self._weights) / weight_sum).tolist() 434 | 435 | self._patch_density_merge(self._window) 436 | self._buffer = Rtree() 437 | 438 | def sample( 439 | self, 440 | random_gen: Union[np.random.Generator, np.random.RandomState], 441 | uniform_prob: float = 0., 442 | normal_var: float = None, 443 | ): 444 | if not self._init or random_gen.random() < uniform_prob: 445 | return self._explore(random_gen) 446 | else: 447 | # importance sampling 448 | sample = random_gen.choice(self._samples, p=self._weights) 449 | return np.clip( 450 | random_gen.normal(sample, normal_var), 451 | self._min_key, self._max_key 452 | ) if normal_var else sample 453 | 454 | def _patch_density_merge(self, window): 455 | for k, v in window: 456 | neighbors = self._get_neighbors(self._history, k) 457 | if len(neighbors) > self._max_density: 458 | # delete the nearest particle 459 | obsolete: rtree.index.Item = min( 460 | neighbors, key=lambda x: self._key_dist(self._bbox_to_key(x.bbox), k) 461 | ) 462 | self._history.delete(obsolete.id, obsolete.bbox) 463 | self._history.insert(next(self._history_idx), self._key_to_bbox(k), v) 464 | 465 | def _compute_alp(self, key) -> float: 466 | n_history = self._get_neighbor_objects(self._history, key) 467 | n_buffer = self._get_neighbor_objects(self._buffer, key) 468 | # TODO: Remove Duplicated items 469 | if len(n_history) > 3 and len(n_buffer) > 3: 470 | v_history = np.array(n_history).mean() 471 | v_buffer = np.array(n_buffer).mean() 472 | alp = max(abs(v_buffer - v_history), self._min_lp) 473 | else: 474 | alp = self._min_lp 475 | return alp 476 | 477 | def _get_neighbors(self, rtree_obj, key) -> List[rtree.index.Item]: 478 | neighbors = [] 479 | bbox = self._key_to_bbox(key - self._nbh_size, key + self._nbh_size) 480 | for item in rtree_obj.intersection(bbox, objects=True): 481 | if item.id != -1: 482 | neighbors.append(item) 483 | return neighbors 484 | 485 | def _get_neighbor_objects(self, rtree_obj, key) -> List[float]: 486 | neighbors = [] 487 | bbox = self._key_to_bbox(key - self._nbh_size, key + self._nbh_size) 488 | for item in rtree_obj.intersection(bbox, objects=True): 489 | if item.id != -1: 490 | neighbors.append(item.object) 491 | return neighbors 492 | 493 | 494 | class GradIS1D(GradIS): 495 | def __init__( 496 | self, 497 | min_key: Union[float, Tuple[float, ...]], 498 | max_key: Union[float, Tuple[float, ...]], 499 | max_len: int, 500 | grad_coef=1.0, 501 | max_weight=5.0, 502 | neighborhood_ratio=0.1, 503 | ): 504 | super().__init__( 505 | 1, 506 | min_key, 507 | max_key, 508 | max_len, 509 | grad_coef, 510 | max_weight, 511 | neighborhood_ratio, 512 | ) 513 | -------------------------------------------------------------------------------- /qdpgym/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/tests/__init__.py -------------------------------------------------------------------------------- /qdpgym/tests/common.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from matplotlib import pyplot as plt 3 | 4 | 5 | def test_reward_reshape(): 6 | x = np.arange(-2, 2, 0.01) 7 | plt.plot(x, np.tanh(x)) 8 | plt.show() 9 | -------------------------------------------------------------------------------- /qdpgym/tests/test_pybullet.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import time 3 | 4 | import numpy as np 5 | import pybullet as pyb 6 | import pybullet_data 7 | 8 | from qdpgym.sim.blt.env import QuadrupedEnv 9 | from qdpgym.sim.blt.hooks import ViewerHook 10 | from qdpgym.sim.blt.quadruped import Aliengo 11 | from qdpgym.sim.blt.terrain import Plain, Hills, Slopes, Steps, PlainHf 12 | from qdpgym.sim.task import NullTask 13 | from qdpgym.tasks.loct import LocomotionV0, LocomotionPMTG 14 | from qdpgym.utils import tf 15 | 16 | 17 | def test_robot(): 18 | pyb.connect(pyb.GUI) 19 | pyb.setTimeStep(2e-3) 20 | pyb.setAdditionalSearchPath(pybullet_data.getDataPath()) 21 | Plain().spawn(pyb) 22 | rob = Aliengo(500, 'pd') 23 | rs = np.random.RandomState() 24 | rob.spawn(pyb, rs) 25 | pyb.setGravity(0, 0, -9.8) 26 | 27 | for _ in range(1000): 28 | # with MfTimer() as t: 29 | pyb.stepSimulation() 30 | rob.update_observation(rs) 31 | rob.apply_command(rob.STANCE_CONFIG) 32 | 33 | pyb.disconnect() 34 | 35 | def test_robot_extent(): 36 | pyb.connect(pyb.GUI) 37 | pyb.setTimeStep(2e-3) 38 | pyb.setAdditionalSearchPath(pybullet_data.getDataPath()) 39 | Plain().spawn(pyb) 40 | rob = Aliengo(500, 'pd') 41 | rob.spawn_on_rack(pyb, np.random) 42 | pyb.setGravity(0, 0, -9.8) 43 | 44 | joint_pos = np.concatenate(( 45 | rob.inverse_kinematics(0, (0.3, 0., 0.05)), 46 | rob.inverse_kinematics(0, (0.35, 0., 0.1)), 47 | rob.inverse_kinematics(0, (0.25, 0., 0.)), 48 | rob.inverse_kinematics(0, (-0.25, 0., 0.)), 49 | )) 50 | 51 | for _ in range(1000): 52 | pyb.stepSimulation() 53 | rob.update_observation(np.random) 54 | 55 | rob.apply_command(joint_pos) 56 | time.sleep(0.002) 57 | 58 | pyb.disconnect() 59 | 60 | 61 | def test_env(): 62 | rob = Aliengo(500, 'pd') 63 | arena = Plain() 64 | task = NullTask() 65 | task.add_hook(ViewerHook()) 66 | env = QuadrupedEnv(rob, arena, task) 67 | env.reset() 68 | for _ in range(1000): 69 | env.step(rob.STANCE_CONFIG) 70 | 71 | 72 | def test_tg(): 73 | rob = Aliengo(500, 'actuator_net', True) 74 | arena = Plain() 75 | task = LocomotionPMTG() 76 | task.add_hook(ViewerHook()) 77 | env = QuadrupedEnv(rob, arena, task) 78 | env.reset() 79 | for _ in range(1000): 80 | env.step(np.zeros(16)) 81 | 82 | 83 | def test_replaceHeightfield(): 84 | pyb.connect(pyb.GUI) 85 | pyb.setRealTimeSimulation(True) 86 | terrains = [PlainHf.default(), Hills.default(), 87 | Steps.default(), Slopes.default()] 88 | current = None 89 | for i in range(5): 90 | for terrain in terrains: 91 | if current is None: 92 | terrain.spawn(pyb) 93 | else: 94 | terrain.replace(pyb, current) 95 | current = terrain 96 | time.sleep(0.5) 97 | current.remove(pyb) 98 | time.sleep(1.0) 99 | pyb.disconnect() 100 | 101 | 102 | def test_terrainApi(): 103 | pyb.connect(pyb.GUI) 104 | pyb.setRealTimeSimulation(True) 105 | terrain = Hills.make(20, 0.1, (0.5, 10), random_state=np.random) 106 | # terrain = Steps.make(20, 0.1, 1.0, 0.5, random_state=np.random) 107 | # terrain = Slopes.make(20, 0.1, np.pi / 6, 0.5) 108 | terrain.spawn(pyb) 109 | 110 | sphere_shape = pyb.createVisualShape( 111 | shapeType=pyb.GEOM_SPHERE, radius=0.01, rgbaColor=(0., 0.8, 0., 0.6) 112 | ) 113 | ray_hit_shape = pyb.createVisualShape( 114 | shapeType=pyb.GEOM_SPHERE, radius=0.01, rgbaColor=(0.8, 0., 0., 0.6) 115 | ) 116 | cylinder_shape = pyb.createVisualShape( 117 | shapeType=pyb.GEOM_CYLINDER, radius=0.005, length=0.11, 118 | rgbaColor=(0., 0, 0.8, 0.6) 119 | ) 120 | box_shape = pyb.createVisualShape( 121 | shapeType=pyb.GEOM_BOX, halfExtents=(0.03, 0.03, 0.03), 122 | rgbaColor=(0.8, 0., 0., 0.6) 123 | ) 124 | 125 | points, vectors, ray_hits = [], [], [] 126 | box_id = -1 127 | 128 | for i in range(10): 129 | peak = terrain.get_peak((-1, 1), (-1, 1)) 130 | if i == 0: 131 | box_id = pyb.createMultiBody( 132 | baseVisualShapeIndex=box_shape, 133 | basePosition=peak 134 | ) 135 | else: 136 | pyb.resetBasePositionAndOrientation( 137 | box_id, peak, (0., 0., 0., 1.) 138 | ) 139 | for idx, (x, y) in enumerate( 140 | itertools.product(np.linspace(-1, 1, 10), np.linspace(-1, 1, 10)) 141 | ): 142 | h = terrain.get_height(x, y) 143 | vec_orn = tf.Quaternion.from_rotation( 144 | tf.Rotation.from_zaxis(terrain.get_normal(x, y)) 145 | ) 146 | ray_pos = pyb.rayTest((x, y, 2), (x, y, -1))[0][3] 147 | if i == 0: 148 | points.append(pyb.createMultiBody( 149 | baseVisualShapeIndex=sphere_shape, 150 | basePosition=(x, y, h) 151 | )) 152 | vectors.append(pyb.createMultiBody( 153 | baseVisualShapeIndex=cylinder_shape, 154 | basePosition=(x, y, h), 155 | baseOrientation=vec_orn 156 | )) 157 | ray_hits.append(pyb.createMultiBody( 158 | baseVisualShapeIndex=ray_hit_shape, 159 | basePosition=ray_pos 160 | )) 161 | else: 162 | pyb.resetBasePositionAndOrientation( 163 | points[idx], (x, y, h), (0., 0., 0., 1.) 164 | ) 165 | pyb.resetBasePositionAndOrientation( 166 | ray_hits[idx], ray_pos, (0., 0., 0., 1.) 167 | ) 168 | pyb.resetBasePositionAndOrientation( 169 | vectors[idx], (x, y, h), vec_orn 170 | ) 171 | time.sleep(3) 172 | new = Hills.make(20, 0.1, (np.random.random(), 10), random_state=np.random) 173 | new.replace(pyb, terrain) 174 | terrain = new 175 | 176 | pyb.disconnect() 177 | 178 | 179 | def test_gym_env(): 180 | rob = Aliengo(500, 'pd') 181 | arena = Plain() 182 | task = LocomotionV0() 183 | env = QuadrupedEnv(rob, arena, task) 184 | print(env.observation_space) 185 | print(env.action_space) 186 | 187 | 188 | if __name__ == '__main__': 189 | test_robot_extent() -------------------------------------------------------------------------------- /qdpgym/thirdparty/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/thirdparty/__init__.py -------------------------------------------------------------------------------- /qdpgym/thirdparty/gamepad/__init__.py: -------------------------------------------------------------------------------- 1 | # modified from https://github.com/piborg/Gamepad.git 2 | -------------------------------------------------------------------------------- /qdpgym/thirdparty/gamepad/controllers.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | Standard gamepad mappings. 4 | 5 | Pulled in to gamepad.py directly. 6 | """ 7 | import copy 8 | 9 | from .gamepad import Gamepad 10 | 11 | __all__ = ['PS4', 'XboxONE', 'Xbox'] 12 | all_controllers = copy.copy(__all__) 13 | __all__.append(all_controllers) 14 | 15 | 16 | class PS4(Gamepad): 17 | fullName = 'PlayStation 4 controller' 18 | 19 | def __init__(self, joystickNumber=0): 20 | Gamepad.__init__(self, joystickNumber) 21 | self.axisNames = { 22 | 0: 'LAS -X', 23 | 1: 'LAS -Y', 24 | 2: 'LT', 25 | 3: 'RAS -X', 26 | 4: 'RAS -Y', 27 | 5: 'RT', 28 | 6: 'DPAD-X', 29 | 7: 'DPAD-Y' 30 | } 31 | self.buttonNames = { 32 | 0: 'A', 33 | 1: 'B', 34 | 2: 'X', 35 | 3: 'Y', 36 | 4: 'L1', 37 | 5: 'R1', 38 | 6: 'L2', 39 | 7: 'R2', 40 | 8: 'SHARE', 41 | 9: 'LAS', 42 | 10: 'RAS', 43 | 11: 'L3', 44 | 12: 'R3' 45 | } 46 | self._setupReverseMaps() 47 | 48 | 49 | class XboxONE(Gamepad): 50 | fullName = 'Xbox ONE controller' 51 | 52 | def __init__(self, joystickNumber=0): 53 | Gamepad.__init__(self, joystickNumber) 54 | self.axisNames = { 55 | 0: 'LAS -X', # Left Analog Stick Left/Right 56 | 1: 'LAS -Y', # Left Analog Stick Up/Down 57 | 2: 'RAS -X', # Right Analog Stick Left/Right 58 | 3: 'RAS -Y', # Right Analog Stick Up/Down 59 | 4: 'RT', # Right Trigger 60 | 5: 'LT', # Left Trigger 61 | 6: 'DPAD -X', # D-Pad Left/Right 62 | 7: 'DPAD -Y' # D-Pad Up/Down 63 | } 64 | self.buttonNames = { 65 | 0: 'A', # A Button 66 | 1: 'B', # B Button 67 | 3: 'X', # X Button 68 | 4: 'Y', # Y Button 69 | 6: 'LB', # Left Bumper 70 | 7: 'RB', # Right Bumper 71 | 11: 'START', # Hamburger Button 72 | 12: 'HOME', # XBOX Button 73 | 13: 'LAS', # Left Analog Stick button 74 | 14: 'RAS' # Right Analog Stick button 75 | 76 | } 77 | self._setupReverseMaps() 78 | 79 | 80 | Xbox = XboxONE 81 | -------------------------------------------------------------------------------- /qdpgym/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import * 2 | -------------------------------------------------------------------------------- /qdpgym/utils/tf.py: -------------------------------------------------------------------------------- 1 | import math 2 | from typing import Tuple, Sequence, Union 3 | 4 | import numpy as np 5 | from scipy.spatial.transform import Rotation as scipyRotation 6 | 7 | __all__ = ['Rotation', 'Quaternion', 'Rpy', 'Odometry', 8 | 'vnorm', 'vunit', 'vcross', 'estimate_normal', 9 | 'get_rpy_rate_from_ang_vel'] 10 | 11 | 12 | def vnorm(vec): 13 | return math.sqrt(sum(x ** 2 for x in vec)) 14 | 15 | 16 | def vunit(x) -> np.ndarray: 17 | return np.asarray(x) / vnorm(x) 18 | 19 | 20 | def vcross(vec3_1, vec3_2) -> np.ndarray: 21 | """ 22 | A much faster alternative for np.cross. 23 | """ 24 | (x1, y1, z1), (x2, y2, z2) = vec3_1, vec3_2 25 | return np.array((y1 * z2 - y2 * z1, z1 * x2 - z2 * x1, x1 * y2 - x2 * y1)) 26 | 27 | 28 | class Rotation(object): 29 | THRESHOLD = 1e-5 30 | 31 | @staticmethod 32 | def from_rpy(rpy): # zyx 33 | sr, sp, sy = np.sin(rpy) 34 | cr, cp, cy = np.cos(rpy) 35 | matrix = ((cy * cp, cy * sp * sr - sy * cr, cy * sp * cr + sy * sr), 36 | (sy * cp, sy * sp * sr + cy * cr, sy * sp * cr - cy * sr), 37 | (-sp, cp * sr, cp * cr)) 38 | return np.array(matrix) 39 | 40 | @staticmethod 41 | def from_quaternion(q): 42 | return scipyRotation.from_quat(q).as_matrix() 43 | 44 | @staticmethod 45 | def from_zaxis(z): 46 | if z[2] == 1.0: 47 | ref = np.array((1., 0., 0.)) 48 | else: 49 | ref = np.array((0., 0., 1.)) 50 | x = vcross(z, ref) 51 | x /= vnorm(x) 52 | y = vcross(z, x) 53 | return np.array((x, y, z)).T 54 | 55 | 56 | class Quaternion(object): 57 | """ 58 | q = [x y z w] = w + xi + yj + zk 59 | """ 60 | 61 | @staticmethod 62 | def identity(): 63 | return np.array((0., 0., 0., 1.)) 64 | 65 | @staticmethod 66 | def from_rpy(rpy): 67 | rpy_2 = np.asarray(rpy) / 2 68 | (sr, sp, sy), (cr, cp, cy) = np.sin(rpy_2), np.cos(rpy_2) 69 | return np.array(((sr * cp * cy - cr * sp * sy, 70 | cr * sp * cy + sr * cp * sy, 71 | cr * cp * sy - sr * sp * cy, 72 | cr * cp * cy + sr * sp * sy))) 73 | 74 | @staticmethod 75 | def from_rotation(r): 76 | # scipy api is faster than python implementation 77 | return scipyRotation.from_matrix(r).as_quat() 78 | 79 | @staticmethod 80 | def inverse(q): 81 | x, y, z, w = q 82 | return np.array((-x, -y, -z, w)) 83 | 84 | @staticmethod 85 | def from_wxyz(q): 86 | w, x, y, z = q 87 | return np.array((x, y, z, w)) 88 | 89 | @staticmethod 90 | def to_wxyz(q): 91 | x, y, z, w = q 92 | return np.array((w, x, y, z)) 93 | 94 | 95 | class Rpy(object): 96 | """ZYX intrinsic Euler Angles (roll, pitch, yaw)""" 97 | 98 | @staticmethod 99 | def from_quaternion(q): 100 | x, y, z, w = q 101 | return np.array((np.arctan2(2 * (w * x + y * z), 1 - 2 * (x * x + y * y)), 102 | np.arcsin(2 * (w * y - x * z)), 103 | np.arctan2(2 * (w * z + x * y), 1 - 2 * (y * y + z * z)))) 104 | 105 | @classmethod 106 | def from_rotation(cls, r): 107 | return scipyRotation.from_matrix(r).as_euler('ZYX', degrees=False)[::-1] 108 | 109 | 110 | def get_rpy_rate_from_ang_vel(rpy, angular): 111 | r, p, y = rpy 112 | sp, cp = math.sin(p), math.cos(p) 113 | cr, tr = math.cos(r), math.tan(r) 114 | trans = np.array(((1, sp * tr, cp * tr), 115 | (0, cp, -sp), 116 | (0, sp / cr, cp / cr))) 117 | return np.dot(trans, angular) 118 | 119 | 120 | FLOAT3 = Tuple[float, float, float] 121 | 122 | 123 | def estimate_normal(points: Union[Sequence[FLOAT3], np.ndarray]): 124 | """ 125 | Estimate the normal of terrain from a set of points. 126 | The terrain CANNOT be vertical. 127 | :param points: a set of terrain points 128 | :return: np.ndarray, the normal vector 129 | """ 130 | X, Y, Z = np.array(points).T 131 | A = np.zeros((3, 3)) 132 | A[0, :] = np.sum(X ** 2), X @ Y, np.sum(X) 133 | A[1, :] = A[0, 1], np.sum(Y ** 2), np.sum(Y) 134 | A[2, :] = A[0, 2], A[1, 2], len(X) 135 | b = np.array((X @ Z, Y @ Z, np.sum(Z))) 136 | a, b, _ = np.linalg.solve(A, b) 137 | return vunit((-a, -b, 1)) 138 | 139 | 140 | ARR_ZERO3 = (0., 0., 0.) 141 | ARR_EYE3 = (ARR_ZERO3,) * 3 142 | 143 | 144 | class Odometry(object): 145 | """ 146 | Homogeneous coordinate transformation defined by a rotation and a translation: 147 | ((R_3x3, t_1x3) 148 | (0_3x1, 1_1x1)) 149 | """ 150 | 151 | def __init__(self, rotation=ARR_EYE3, translation=ARR_ZERO3): 152 | self.rotation, self.translation = np.asarray(rotation), np.asarray(translation) 153 | 154 | def multiply(self, other): 155 | if isinstance(other, Odometry): 156 | return Odometry(self.rotation @ other.rotation, 157 | self.translation + self.rotation @ other.translation) 158 | if isinstance(other, np.ndarray): 159 | return self.rotation @ other + self.translation 160 | raise RuntimeError(f'Unsupported datatype `{type(other)}` for multiplying') 161 | 162 | def __matmul__(self, other): 163 | return self.multiply(other) 164 | 165 | def __imatmul__(self, other): 166 | self.rotation @= other.rotation 167 | self.translation += self.rotation @ other.translation 168 | 169 | def __repr__(self): 170 | return str(np.concatenate((self.rotation, np.expand_dims(self.translation, axis=1)), axis=1)) 171 | -------------------------------------------------------------------------------- /qdpgym/utils/utils.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import logging 3 | import math 4 | import re 5 | import time 6 | from datetime import datetime 7 | from typing import Sequence, Callable, Optional, Union 8 | 9 | import numpy as np 10 | import yaml 11 | 12 | __all__ = ['make_part', 'Angle', 'replace_is', 'replace_eq', 'colored_str', 'log', 13 | 'MfTimer', 'get_padded', 'PadWrapper', 'get_timestamp', 'YamlLoader', 14 | 'print_return', 'Natural', 'plt_figure_to_numpy'] 15 | 16 | 17 | class make_part(functools.partial): 18 | """ 19 | Predefine some init parameters of a class without creating an instance. 20 | """ 21 | 22 | def __getattr__(self, item): 23 | return getattr(self.func, item) 24 | 25 | 26 | PI, TAU = math.pi, math.tau 27 | 28 | 29 | class Angle(object): 30 | @staticmethod 31 | def norm(x): 32 | """normalize an angle to [-pi, pi)""" 33 | if isinstance(x, np.ndarray): 34 | return x - ((x + PI) / TAU).astype(int) * TAU 35 | if -PI <= x < PI: 36 | return x 37 | return x - int((x + PI) / TAU) * TAU 38 | 39 | @staticmethod 40 | def mean(lst: Sequence[float]): 41 | _sum = last = lst[0] 42 | for i, a in enumerate(lst): 43 | if i == 0: 44 | continue 45 | last = Angle.near(last, a) 46 | _sum += last 47 | return Angle.norm(_sum / len(lst)) 48 | 49 | @staticmethod 50 | def near(x, y): 51 | if x > y + PI: 52 | y += TAU 53 | elif x < y - PI: 54 | y -= TAU 55 | return y 56 | 57 | @staticmethod 58 | def to_deg(rad): 59 | return rad / math.pi * 180 60 | 61 | 62 | def get_padded(queue: Sequence, idx): 63 | if idx < 0: 64 | if len(queue) >= -idx: 65 | return queue[idx] 66 | else: 67 | return queue[0] 68 | else: 69 | if len(queue) > idx: 70 | return queue[idx] 71 | else: 72 | return queue[-1] 73 | 74 | 75 | class PadWrapper(object): 76 | def __init__(self, seq: Sequence): 77 | self.seq = seq 78 | self.is_empty = not self.seq 79 | 80 | def __getitem__(self, item): 81 | if self.is_empty: 82 | raise IndexError('Empty Sequence') 83 | else: 84 | return get_padded(self.seq, item) 85 | 86 | def __len__(self): 87 | return len(self.seq) 88 | 89 | def __repr__(self): 90 | return f'PadWrapper[{self.seq}]' 91 | 92 | 93 | def replace_is(seq: list, src, dst): 94 | for idx, item in enumerate(seq): 95 | if item is src: 96 | seq[idx] = dst 97 | 98 | 99 | def replace_eq(seq: list, src, dst): 100 | for idx, item in enumerate(seq): 101 | if item == src: 102 | seq[idx] = dst 103 | 104 | 105 | class MfTimer(object): 106 | """ 107 | Multifunctional Timer. Typical usage example: 108 | 109 | with MfTimer() as timer: 110 | do_something() 111 | print(timer.time_spent) 112 | """ 113 | 114 | @classmethod 115 | def start_now(cls): 116 | timer = MfTimer() 117 | timer.start() 118 | return timer 119 | 120 | @classmethod 121 | def record(cls, func: Callable, *args, **kwargs): 122 | start_time = time.time() 123 | func(*args, **kwargs) 124 | end_time = time.time() 125 | return end_time - start_time 126 | 127 | def __init__(self): 128 | self._start_time = self._end_time = None 129 | 130 | def start(self): 131 | self._start_time = time.time() 132 | 133 | def __enter__(self): 134 | self._start_time = time.time() 135 | return self 136 | 137 | def end(self, verbose=False): 138 | self._end_time = time.time() 139 | if verbose: 140 | print(self.time_spent) 141 | return self.time_spent 142 | 143 | def __exit__(self, exc_type, exc_val, exc_tb): 144 | self._end_time = time.time() 145 | 146 | @property 147 | def time_spent(self): 148 | return self._end_time - self._start_time 149 | 150 | 151 | BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8) 152 | RESET_SEQ = "\033[0m" 153 | COLOR_SEQ = "\033[1;%dm" 154 | 155 | 156 | def colored_str(content: str, color: str): 157 | try: 158 | color = eval(color.upper()) 159 | except NameError: 160 | raise RuntimeError(f'No color named {color}') 161 | return COLOR_SEQ % (30 + color) + content + RESET_SEQ 162 | 163 | 164 | class ColoredFormatter(logging.Formatter): 165 | COLORS = { 166 | 'WARNING': YELLOW, 167 | 'INFO': WHITE, 168 | 'DEBUG': BLUE, 169 | 'ERROR': RED, 170 | 'CRITICAL': MAGENTA, 171 | } 172 | 173 | def __init__(self, *args, **kwargs): 174 | super().__init__(*args, **kwargs) 175 | self.use_color = True 176 | 177 | def format(self, record): 178 | lvl = record.levelname 179 | message = str(record.msg) 180 | funcName = record.funcName 181 | if lvl in self.COLORS: 182 | lvl_colored = COLOR_SEQ % (30 + self.COLORS[lvl]) + lvl + RESET_SEQ 183 | msg_colored = COLOR_SEQ % (30 + self.COLORS[lvl]) + message + RESET_SEQ 184 | funcName_colored = COLOR_SEQ % (30 + self.COLORS[lvl]) + funcName + RESET_SEQ 185 | record.levelname = lvl_colored 186 | record.msg = msg_colored 187 | record.funcName = funcName_colored 188 | return super().format(record) 189 | 190 | 191 | class _Log(object): 192 | logger: Optional[logging.Logger] = None 193 | 194 | @classmethod 195 | def init_logger(cls, name='logger', 196 | log_level: int = logging.INFO, 197 | log_fmt='[%(asctime)s] %(message)s', 198 | date_fmt='%b%d %H:%M:%S', 199 | log_dir=None, 200 | client_ip='127.0.0.1'): 201 | np.set_printoptions(3, linewidth=10000, suppress=True) 202 | # torch.set_printoptions(linewidth=10000, profile='short') 203 | cls.logger = logging.getLogger(name) 204 | cls.logger.setLevel(log_level) 205 | # if log_dir: 206 | # os.makedirs(log_dir) 207 | # fh = logging.FileHandler(os.path.join(log_dir, 'log.txt')) 208 | # fh.setLevel(logging.DEBUG) 209 | # cls.logger.addHandler(fh) 210 | # soh = SocketHandler(client_ip, 19996) 211 | # soh = SocketHandler('10.12.120.120', 19996) 212 | # soh.setFormatter(logging.Formatter()) 213 | # cls.logger.addHandler(soh) 214 | formatter = ColoredFormatter(log_fmt, date_fmt) 215 | sh = logging.StreamHandler() 216 | sh.setLevel(log_level) 217 | sh.setFormatter(formatter) 218 | cls.logger.addHandler(sh) 219 | cls.logger.propagate = False 220 | 221 | @classmethod 222 | def set_logger_level(cls, log_level: str): 223 | level_dict = {'DEBUG': logging.DEBUG, 224 | 'INFO': logging.INFO, 225 | 'WARNING': logging.WARNING, 226 | 'ERROR': logging.ERROR, 227 | 'CRITICAL': logging.CRITICAL} 228 | 229 | cls.logger.handlers[-1].setLevel(level_dict[log_level.upper()]) 230 | 231 | @classmethod 232 | def debug(cls, *args, **kwargs): 233 | if cls.logger is None: 234 | cls.init_logger(log_level=logging.DEBUG) 235 | return cls.logger.debug(*args, **kwargs) 236 | 237 | @classmethod 238 | def info(cls, *args, **kwargs): 239 | if cls.logger is None: 240 | cls.init_logger(log_level=logging.DEBUG) 241 | return cls.logger.info(*args, **kwargs) 242 | 243 | @classmethod 244 | def warn(cls, *args, **kwargs): 245 | if cls.logger is None: 246 | cls.init_logger(log_level=logging.DEBUG) 247 | return cls.logger.warning(*args, **kwargs) 248 | 249 | @classmethod 250 | def error(cls, *args, **kwargs): 251 | if cls.logger is None: 252 | cls.init_logger(log_level=logging.DEBUG) 253 | return cls.logger.error(*args, **kwargs) 254 | 255 | @classmethod 256 | def critical(cls, *args, **kwargs): 257 | if cls.logger is None: 258 | cls.init_logger(log_level=logging.DEBUG) 259 | return cls.logger.critical(*args, **kwargs) 260 | 261 | 262 | log = _Log 263 | 264 | 265 | def get_timestamp(timestamp: Union[int, float] = None) -> str: 266 | datetime_ = datetime.fromtimestamp(timestamp) if timestamp else datetime.now() 267 | return datetime_.strftime('%y-%m-%d_%H-%M-%S') 268 | 269 | 270 | class _NamespaceWrapper(object): 271 | def __init__(self, data: dict): 272 | self._data = data 273 | 274 | def __getattr__(self, item): 275 | try: 276 | value = self._data[item] 277 | except KeyError: 278 | value = getattr(self._data, item) 279 | if isinstance(value, dict): 280 | return _NamespaceWrapper(value) 281 | else: 282 | return value 283 | 284 | def __setattr__(self, key, value): 285 | if key.startswith('_'): 286 | super().__setattr__(key, value) 287 | else: 288 | self._data[key] = value 289 | 290 | def __getitem__(self, item): 291 | return self._data[item] 292 | 293 | def __setitem__(self, key, value): 294 | self._data[key] = value 295 | 296 | def __repr__(self): 297 | return f'Wrapper({self._data})' 298 | 299 | 300 | class YamlLoader(_NamespaceWrapper): 301 | def __init__(self, path): 302 | loader = yaml.SafeLoader 303 | # See https://stackoverflow.com/questions/30458977/yaml-loads-5e-6-as-string-and-not-a-number 304 | loader.add_implicit_resolver( 305 | u'tag:yaml.org,2002:float', 306 | re.compile(u'''^(?: 307 | [-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)? 308 | |[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+) 309 | |\\.[0-9_]+(?:[eE][-+][0-9]+)? 310 | |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]* 311 | |[-+]?\\.(?:inf|Inf|INF) 312 | |\\.(?:nan|NaN|NAN))$''', re.X), 313 | list(u'-+0123456789.')) 314 | 315 | with open(path, encoding='utf-8') as f: 316 | super().__init__(yaml.load(f, Loader=loader)) 317 | 318 | @property 319 | def data(self): 320 | return self._data 321 | 322 | @property 323 | def args(self): 324 | return () 325 | 326 | @property 327 | def kwargs(self): 328 | return {} 329 | 330 | 331 | def print_return(func): 332 | def wrapper(*args, **kwargs): 333 | ret = func(*args, **kwargs) 334 | print(ret) 335 | return ret 336 | 337 | return wrapper 338 | 339 | 340 | class Natural(object): 341 | idx = -1 342 | 343 | def __iter__(self): 344 | return self 345 | 346 | def __next__(self): 347 | self.idx += 1 348 | return self.idx 349 | 350 | 351 | def plt_figure_to_numpy(fig): 352 | fig.canvas.draw() 353 | arr = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8) 354 | arr = arr.reshape(fig.canvas.get_width_height()[::-1] + (3,)) 355 | return arr 356 | -------------------------------------------------------------------------------- /resources/lin.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/resources/lin.gif -------------------------------------------------------------------------------- /resources/rot.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/resources/rot.gif --------------------------------------------------------------------------------