├── .deepsource.toml
├── .gitignore
├── README.md
├── example
    ├── __init__.py
    ├── lctv0.yaml
    ├── loct
    │   ├── __init__.py
    │   ├── network.py
    │   ├── test.py
    │   ├── train.py
    │   └── utils.py
    ├── test
    │   ├── __init__.py
    │   └── test_tianshou.py
    └── utils.py
├── qdpgym
    ├── __init__.py
    ├── sim
    │   ├── __init__.py
    │   ├── abc.py
    │   ├── app.py
    │   ├── blt
    │   │   ├── __init__.py
    │   │   ├── env.py
    │   │   ├── hooks.py
    │   │   ├── quadruped.py
    │   │   ├── terrain.py
    │   │   └── utils.py
    │   ├── common
    │   │   ├── __init__.py
    │   │   ├── identify.py
    │   │   ├── motor.py
    │   │   ├── noisyhandle.py
    │   │   └── tg.py
    │   ├── mjc
    │   │   ├── __init__.py
    │   │   ├── env.py
    │   │   ├── hooks.py
    │   │   ├── quadruped.py
    │   │   ├── terrain.py
    │   │   └── viewer.py
    │   ├── resources
    │   │   ├── acnet_220526.pt
    │   │   ├── actuator_net_with_history.pt
    │   │   └── aliengo
    │   │   │   ├── meshes
    │   │   │       ├── calf.stl
    │   │   │       ├── hip.stl
    │   │   │       ├── thigh.stl
    │   │   │       ├── thigh_mirror.stl
    │   │   │       └── trunk.stl
    │   │   │   └── model
    │   │   │       ├── aliengo.urdf
    │   │   │       └── aliengo.xml
    │   └── task.py
    ├── tasks
    │   ├── __init__.py
    │   ├── identify
    │   │   ├── __init__.py
    │   │   └── identify.py
    │   └── loct
    │   │   ├── __init__.py
    │   │   ├── app.py
    │   │   ├── loct.py
    │   │   ├── reward.py
    │   │   ├── sr_reward.py
    │   │   └── utils.py
    ├── tests
    │   ├── __init__.py
    │   ├── common.py
    │   └── test_pybullet.py
    ├── thirdparty
    │   ├── __init__.py
    │   └── gamepad
    │   │   ├── __init__.py
    │   │   ├── controllers.py
    │   │   └── gamepad.py
    └── utils
    │   ├── __init__.py
    │   ├── tf.py
    │   └── utils.py
└── resources
    ├── lin.gif
    └── rot.gif


/.deepsource.toml:
--------------------------------------------------------------------------------
1 | version = 1
2 | 
3 | [[analyzers]]
4 | name = "python"
5 | enabled = true
6 | 
7 |   [analyzers.meta]
8 |   runtime_version = "3.x.x"


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # .idea folder
 10 | .idea/
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | pip-wheel-metadata/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .nox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | *.py,cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | cover/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | db.sqlite3-journal
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # pyenv
 89 | #   For a library or package, you might want to ignore these files since the code is
 90 | #   intended to run in multiple environments; otherwise, check them in:
 91 | # .python-version
 92 | 
 93 | # pipenv
 94 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 95 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 96 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 97 | #   install all needed dependencies.
 98 | #Pipfile.lock
 99 | 
100 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
101 | __pypackages__/
102 | 
103 | # Celery stuff
104 | celerybeat-schedule
105 | celerybeat.pid
106 | 
107 | # SageMath parsed files
108 | *.sage.py
109 | 
110 | # Environments
111 | .env
112 | .venv
113 | venv/
114 | ENV/
115 | env.bak/
116 | venv.bak/
117 | 
118 | # Spyder project settings
119 | .spyderproject
120 | .spyproject
121 | 
122 | # Rope project settings
123 | .ropeproject
124 | 
125 | # mkdocs documentation
126 | /site
127 | 
128 | # mypy
129 | .mypy_cache/
130 | .dmypy.json
131 | dmypy.json
132 | 
133 | # Pyre type checker
134 | .pyre/
135 | 
136 | # pytype static type analyzer
137 | .pytype/
138 | 
139 | # customize
140 | log/
141 | log_wuzhen/
142 | MUJOCO_LOG.TXT
143 | *.pth
144 | .vscode/
145 | .DS_Store
146 | *.zip
147 | *.pstats
148 | *.swp
149 | *.pkl
150 | *.hdf5
151 | wandb/
152 | videos/
153 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Quadruped Locomotion Reinforcement Learning
 2 | 
 3 | This is an environment for learning quadrupedal locomotion depending on 
 4 | either [bullet physics engine](https://github.com/bulletphysics/bullet3)
 5 | or [mujoco](https://github.com/deepmind/mujoco) and
 6 | [dm_control](https://github.com/deepmind/dm_control) 
 7 | (not implemented yet), containing
 8 | 
 9 | - Interfaces of quadruped states, creating terrain from elevation maps and environmental states
10 | - Motor identification, i.e. Actuator Network
11 | - Curriculum learning on disturbance and terrain
12 | - Abundant command, force, torque, terrain and trajectory visualization 
13 | 
14 | [tianshou](https://github.com/thu-ml/tianshou) is used for training in
15 | our examples.
16 | 
17 | [//]: # (- Imitation Learning from privileged teacher to proprioceptive student)
18 | 
19 | <div align=center>
20 | <img src="resources/lin.gif" alt="linear" style=" zoom:100%;" />
21 | <img src="resources/rot.gif" alt="rotate" style=" zoom:100%;" />
22 | </div>
23 | 
24 | Requirements: `python>=3.7`, `pybullet>=3.2`, `torch>=1.10`, `numpy`, `scipy`, `wandb`
25 | 
26 | For train, run:
27 | ```bash
28 | PYTHONPATH=./ python example/loct/train.py --task example/lctv0.yaml \
29 |   --batch-size 8192 --repeat-per-collect 8 --step-per-epoch 160000 \
30 |   --norm-adv 1 --run-name example
31 | ```
32 | 


--------------------------------------------------------------------------------
/example/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/example/__init__.py


--------------------------------------------------------------------------------
/example/lctv0.yaml:
--------------------------------------------------------------------------------
 1 | terrain: plain
 2 | perturb: True
 3 | 
 4 | reward_coeff: 0.05
 5 | reward_cfg:
 6 |   LinearVelocityReward: 0.34
 7 |   RotationReward: 0.24
 8 | #  YawRateReward: 0.12
 9 |   VerticalLinearPenalty: 0.04
10 |   RollPitchRatePenalty: 0.04
11 |   BodyPosturePenalty: 0.04
12 |   FootSlipPenalty: 0.04
13 |   BodyCollisionPenalty: 0.04
14 |   JointConstraintPenalty: 0.04
15 |   TorquePenalty: 0.02
16 |   JointMotionPenalty: 0.04
17 |   ActionSmoothnessReward: 0.04
18 |   ClearanceOverTerrainReward: 0.02
19 |   BodyHeightReward: 0.0
20 | 
21 | #  VelocityReward: 1.5
22 | #  RotationReward: 0.75
23 | #  BodyMotionPenalty: 1.0
24 | #  FootClearanceReward: 0.003
25 | #  BodyCollisionPenalty: 0.1
26 | #  JointMotionPenalty: 0.001
27 | #  TargetSmoothnessReward: 0.003
28 | #  TorquePenalty: 1.0e-6
29 | #  SlipPenalty: 0.003


--------------------------------------------------------------------------------
/example/loct/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/example/loct/__init__.py


--------------------------------------------------------------------------------
/example/loct/network.py:
--------------------------------------------------------------------------------
 1 | from typing import Type, Union, Any, Dict, Tuple
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | from torch import nn
 6 | 
 7 | 
 8 | class ActorNetMLP(nn.Module):
 9 |     """
10 |     extero_obs  ->  extero_layers ->|
11 |                                     |-> concat -> action_layers -> action
12 |     proprio_obs -> proprio_layers ->|
13 |     """
14 | 
15 |     def __init__(
16 |             self,
17 |             extero_obs_dim,
18 |             real_world_obs_dim,
19 |             extero_layer_dims=(72, 64),
20 |             locomotion_layer_dims=(),
21 |             action_layer_dims=(256, 160, 128),
22 |             activation: Type[nn.Module] = nn.Tanh,
23 |             device='cpu'
24 |     ):
25 |         super().__init__()
26 |         self.input_dim = extero_obs_dim + real_world_obs_dim
27 |         self.extero_obs_dim = extero_obs_dim
28 |         self.real_world_obs_dim = real_world_obs_dim
29 |         extero_layers, locomotion_layers, action_layers = [], [], []
30 |         self.extero_obs_dim = extero_feature_dim = extero_obs_dim
31 |         self.device = torch.device(device)
32 |         if extero_layer_dims:
33 |             for dim in extero_layer_dims:
34 |                 extero_layers.append(nn.Linear(extero_feature_dim, dim))
35 |                 extero_layers.append(activation())
36 |                 extero_feature_dim = dim
37 | 
38 |         self.real_world_obs_dim = locomotion_feature_dim = real_world_obs_dim
39 |         if locomotion_layer_dims:
40 |             for dim in locomotion_layer_dims:
41 |                 locomotion_layers.append(nn.Linear(locomotion_feature_dim, dim))
42 |                 locomotion_layers.append(activation())
43 |                 locomotion_feature_dim = dim
44 | 
45 |         action_feature_dim = extero_feature_dim + locomotion_feature_dim
46 |         for dim in action_layer_dims:
47 |             action_layers.append(nn.Linear(action_feature_dim, dim))
48 |             action_layers.append(activation())
49 |             action_feature_dim = dim
50 |         self.output_dim = action_feature_dim
51 | 
52 |         self.extero_layers = nn.Sequential(*extero_layers)
53 |         self.locomotion_layers = nn.Sequential(*locomotion_layers)
54 |         self.action_layers = nn.Sequential(*action_layers)
55 | 
56 |     def forward(self, x):
57 |         if self.device is not None:
58 |             x = torch.as_tensor(x, device=self.device, dtype=torch.float32)
59 |         extero_obs, real_world_obs = x[..., :self.extero_obs_dim], x[..., self.extero_obs_dim:]
60 |         extero_feature, locomotion_feature = self.extero_layers(extero_obs), self.locomotion_layers(real_world_obs)
61 |         return self.action_layers(torch.cat((extero_feature, locomotion_feature), dim=-1))
62 | 
63 | 
64 | class ActorNet(nn.Module):
65 |     def __init__(
66 |             self,
67 |             extero_obs_dim,
68 |             real_world_obs_dim,
69 |             extero_layer_dims=(72, 64),
70 |             locomotion_layer_dims=(),
71 |             action_layer_dims=(256, 160, 128),
72 |             activation: Type[nn.Module] = nn.Tanh,
73 |             device='cpu'
74 |     ) -> None:
75 |         super().__init__()
76 |         self.device = device
77 |         self.model = ActorNetMLP(
78 |             extero_obs_dim,
79 |             real_world_obs_dim,
80 |             extero_layer_dims,
81 |             locomotion_layer_dims,
82 |             action_layer_dims,
83 |             activation,
84 |             device,
85 |         )
86 |         self.output_dim = self.model.output_dim
87 | 
88 |     def forward(
89 |             self,
90 |             obs: Union[np.ndarray, torch.Tensor],
91 |             state: Any = None,
92 |             info: Dict[str, Any] = {},
93 |     ) -> Tuple[torch.Tensor, Any]:
94 |         """Mapping: obs -> flatten (inside MLP)-> logits."""
95 |         logits = self.model(obs)
96 |         return logits, state
97 | 


--------------------------------------------------------------------------------
/example/loct/test.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import argparse
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | import yaml
  8 | from tianshou.data import Collector
  9 | from tianshou.policy import PPOPolicy
 10 | from tianshou.utils.net.common import Net
 11 | from tianshou.utils.net.continuous import ActorProb, Critic
 12 | from torch import nn
 13 | from torch.distributions import Independent, Normal
 14 | 
 15 | from example.loct.network import ActorNet
 16 | from example.utils import NormObsWrapper
 17 | from qdpgym import sim
 18 | from qdpgym.tasks.loct import LocomotionV0, GamepadCommanderHook
 19 | 
 20 | 
 21 | def parse_args():
 22 |     parser = argparse.ArgumentParser()
 23 |     parser.add_argument("--task", type=str, required=True)
 24 |     parser.add_argument("--seed", type=int, default=0)
 25 |     parser.add_argument("--render", type=float, default=0.)
 26 |     parser.add_argument(
 27 |         "--device", type=str, default="cuda" if torch.cuda.is_available() else "cpu"
 28 |     )
 29 |     parser.add_argument("--resume-path", type=str, default=None)
 30 |     return parser.parse_args()
 31 | 
 32 | 
 33 | def make_loct_env(cfg):
 34 |     torch.set_num_threads(1)
 35 |     robot = sim.Aliengo(500, 'actuator_net', noisy=True)
 36 |     task = LocomotionV0()
 37 |     # task = LocomotionPMTG()
 38 | 
 39 |     if cfg['terrain'] == 'random':
 40 |         arena = sim.NullTerrain()
 41 |         task.add_hook(sim.RandomTerrainHook())
 42 |     elif cfg['terrain'] == 'plain':
 43 |         arena = sim.Plain()
 44 |     else:
 45 |         raise ValueError(f'Unknown terrain `{cfg.terrain}`')
 46 |     if cfg['perturb']:
 47 |         task.add_hook(sim.RandomPerturbHook())
 48 | 
 49 |     task.add_hook(GamepadCommanderHook())
 50 |     # task.add_hook(sim.HeightSampleVisualizer())
 51 |     # task.add_hook(RandomCommanderHookV0())
 52 |     # task.add_hook(sim.VideoRecorderHook())
 53 |     task.add_hook(sim.ExtraViewerHook())
 54 |     for reward, weight in cfg['reward_cfg'].items():
 55 |         task.add_reward(reward, weight)
 56 | 
 57 |     if 'reward_coeff' in cfg:
 58 |         task.set_reward_coeff(cfg['reward_coeff'])
 59 |     env = sim.QuadrupedEnv(robot, arena, task)
 60 |     return env
 61 | 
 62 | 
 63 | def test_ppo(args):
 64 |     with open(args.task, encoding='utf-8') as f:
 65 |         task_cfg = yaml.load(f, Loader=yaml.SafeLoader)
 66 | 
 67 |     def make_env():
 68 |         return make_loct_env(task_cfg)
 69 | 
 70 |     obs_norm = False
 71 |     env = make_env()
 72 |     if obs_norm:
 73 |         env = NormObsWrapper(env)
 74 | 
 75 |     # seed
 76 |     np.random.seed(args.seed)
 77 |     torch.manual_seed(args.seed)
 78 |     # model
 79 |     net_a = ActorNet(78, 132, device=args.device)
 80 |     actor = ActorProb(
 81 |         net_a,
 82 |         env.action_space.shape,
 83 |         unbounded=True,
 84 |         device=args.device,
 85 |     ).to(args.device)
 86 |     net_c = Net(
 87 |         env.observation_space.shape,
 88 |         hidden_sizes=(256, 256, 256),
 89 |         activation=nn.Tanh,
 90 |         device=args.device,
 91 |     )
 92 |     critic = Critic(net_c, device=args.device).to(args.device)
 93 |     optim = torch.optim.AdamW(
 94 |         list(actor.parameters()) + list(critic.parameters())
 95 |     )
 96 | 
 97 |     policy = PPOPolicy(
 98 |         actor, critic, optim,
 99 |         lambda *logits: Independent(Normal(*logits), 1)
100 |     )
101 |     # load a previous policy
102 |     if args.resume_path:
103 |         ckpt = torch.load(args.resume_path, map_location=args.device)
104 |         policy.load_state_dict(ckpt["model"])
105 |         if obs_norm:
106 |             env.set_obs_rms(ckpt['obs_rms'])
107 |         print("Loaded agent from: ", args.resume_path)
108 |     test_collector = Collector(policy, env)
109 | 
110 |     # Let's watch its performance!
111 |     policy.eval()
112 |     test_collector.reset()
113 |     result = test_collector.collect(n_episode=100, render=args.render)
114 |     print(f'Final reward: {result["rews"].mean()}, length: {result["lens"].mean()}')
115 | 
116 | 
117 | if __name__ == "__main__":
118 |     args = parse_args()
119 |     test_ppo(args)
120 | 


--------------------------------------------------------------------------------
/example/loct/train.py:
--------------------------------------------------------------------------------
  1 | import pybullet
  2 | import argparse
  3 | import os
  4 | import pprint
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | import wandb
  9 | import yaml
 10 | from tianshou.data import Collector, ReplayBuffer, VectorReplayBuffer
 11 | from tianshou.env import ShmemVectorEnv, VectorEnvNormObs
 12 | from tianshou.policy import PPOPolicy
 13 | from tianshou.trainer import onpolicy_trainer
 14 | from tianshou.utils.net.common import Net
 15 | from tianshou.utils.net.continuous import ActorProb, Critic
 16 | from torch import nn
 17 | from torch.distributions import Independent, Normal
 18 | from torch.optim.lr_scheduler import LambdaLR
 19 | 
 20 | from example.loct.network import ActorNet
 21 | from example.utils import init_actor_critic, MyWandbLogger
 22 | from qdpgym import sim
 23 | from qdpgym.tasks import loct
 24 | from qdpgym.utils import get_timestamp
 25 | 
 26 | 
 27 | def parse_args():
 28 |     parser = argparse.ArgumentParser()
 29 |     parser.add_argument("--task", type=str, required=True)
 30 |     parser.add_argument("--seed", type=int, default=0)
 31 |     parser.add_argument("--buffer-size", type=int, default=4096)
 32 |     parser.add_argument("--lr", type=float, default=1e-4)
 33 |     parser.add_argument("--gamma", type=float, default=0.99)
 34 |     parser.add_argument("--epoch", type=int, default=100)
 35 |     parser.add_argument("--step-per-epoch", type=int, default=30000)
 36 |     parser.add_argument("--step-per-collect", type=int, default=8192)
 37 |     parser.add_argument("--repeat-per-collect", type=int, default=8)
 38 |     parser.add_argument("--batch-size", type=int, default=8192)
 39 |     parser.add_argument("--training-num", type=int, default=64)
 40 |     parser.add_argument("--test-num", type=int, default=10)
 41 |     parser.add_argument("--obs-norm", type=int, default=0)
 42 | 
 43 |     # ppo special
 44 |     parser.add_argument("--rew-norm", type=int, default=1)
 45 |     parser.add_argument("--vf-coef", type=float, default=1.0)
 46 |     parser.add_argument("--ent-coef", type=float, default=0.0)
 47 |     parser.add_argument("--gae-lambda", type=float, default=0.95)
 48 |     parser.add_argument("--bound-action-method", type=str, default="tanh")
 49 |     parser.add_argument("--lr-decay", type=int, default=True)
 50 |     parser.add_argument("--max-grad-norm", type=float, default=1.0)
 51 |     parser.add_argument("--eps-clip", type=float, default=0.2)
 52 |     parser.add_argument("--dual-clip", type=float, default=None)
 53 |     parser.add_argument("--value-clip", type=int, default=0)
 54 |     parser.add_argument("--norm-adv", type=int, default=1)
 55 |     parser.add_argument("--recompute-adv", type=int, default=0)
 56 |     parser.add_argument("--logdir", type=str, default="log")
 57 |     parser.add_argument(
 58 |         "--device", type=str, default="cuda" if torch.cuda.is_available() else "cpu"
 59 |     )
 60 |     parser.add_argument("--run-name", type=str, required=True)
 61 |     parser.add_argument("--resume-path", type=str, default=None)
 62 |     parser.add_argument("--resume-id", type=str, default=None)
 63 |     parser.add_argument("--wandb-project", type=str, default="loct")
 64 |     return parser.parse_args()
 65 | 
 66 | 
 67 | if __name__ == "__main__":
 68 |     args = parse_args()
 69 |     with open(args.task, encoding='utf-8') as f:
 70 |         task_cfg = yaml.load(f, Loader=yaml.SafeLoader)
 71 | 
 72 |     # commander_core = loct.ISCommanderCore('RotationReward', 100)
 73 |     commander_core = loct.AlpISCommanderCore('RotationReward')
 74 |     commander_core.start_process()
 75 | 
 76 | 
 77 |     def make_loct_env(cfg, train=True):
 78 |         torch.set_num_threads(1)
 79 |         robot = sim.Aliengo(500, 'actuator_net', noisy=True)
 80 |         task = loct.LocomotionV0()
 81 | 
 82 |         if cfg['terrain'] == 'random':
 83 |             arena = sim.NullTerrain()
 84 |             task.add_hook(sim.RandomTerrainHook())
 85 |         elif cfg['terrain'] == 'plain':
 86 |             arena = sim.Plain()
 87 |         else:
 88 |             raise ValueError(f'Unknown terrain `{cfg.terrain}`')
 89 |         if cfg['perturb']:
 90 |             task.add_hook(sim.RandomPerturbHook())
 91 |         if train:
 92 |             task.add_hook(commander_core.make_collector())
 93 |             # task.add_hook(loct.RandomRotationCommanderHook())
 94 |             task.add_hook(commander_core.make_commander())
 95 |         else:
 96 |             task.add_hook(loct.RandomRotationCommanderHook())
 97 |         for reward, weight in cfg['reward_cfg'].items():
 98 |             task.add_reward(reward, weight)
 99 | 
100 |         if 'reward_coeff' in cfg:
101 |             task.set_reward_coeff(cfg['reward_coeff'])
102 |         env = sim.QuadrupedEnv(robot, arena, task)
103 |         return env
104 | 
105 | 
106 |     env = make_loct_env(task_cfg)
107 |     train_envs = ShmemVectorEnv([
108 |         lambda: make_loct_env(task_cfg)
109 |         for _ in range(args.training_num)
110 |     ])
111 |     test_envs = ShmemVectorEnv([
112 |         lambda: make_loct_env(task_cfg, False)
113 |         for _ in range(args.test_num)
114 |     ]) if args.test_num else None
115 | 
116 |     if args.obs_norm:
117 |         # obs norm wrapper
118 |         train_envs = VectorEnvNormObs(train_envs)
119 |         test_envs = VectorEnvNormObs(test_envs, update_obs_rms=False)
120 |         test_envs.set_obs_rms(train_envs.get_obs_rms())
121 | 
122 |     obs_shape = env.observation_space.shape or env.observation_space.n
123 |     action_shape = env.action_space.shape or env.action_space.n
124 |     print("Observations shape:", obs_shape)
125 |     print("Actions shape:", action_shape)
126 | 
127 |     # seed
128 |     np.random.seed(args.seed)
129 |     torch.manual_seed(args.seed)
130 |     # model
131 |     net_a = ActorNet(78, 132, device=args.device)
132 |     actor = ActorProb(
133 |         net_a,
134 |         action_shape,
135 |         max_action=1.0,
136 |         unbounded=True,
137 |         device=args.device,
138 |     ).to(args.device)
139 |     net_c = Net(
140 |         obs_shape,
141 |         hidden_sizes=(256, 256, 256),
142 |         activation=nn.Tanh,
143 |         device=args.device,
144 |     )
145 |     critic = Critic(net_c, device=args.device).to(args.device)
146 |     torch.nn.init.constant_(actor.sigma_param, -2.3)
147 |     optim = torch.optim.AdamW(
148 |         list(actor.parameters()) + list(critic.parameters()), lr=args.lr
149 |     )
150 |     init_actor_critic(actor, critic)
151 | 
152 |     lr_scheduler = None
153 |     if args.lr_decay:
154 |         # decay learning rate to 0 linearly
155 |         max_update_num = np.ceil(
156 |             args.step_per_epoch / args.step_per_collect
157 |         ) * args.epoch
158 | 
159 |         lr_scheduler = LambdaLR(
160 |             optim, lr_lambda=lambda epoch: 1 - epoch / max_update_num
161 |         )
162 | 
163 | 
164 |     def dist(*logits):
165 |         return Independent(Normal(*logits), 1)
166 | 
167 | 
168 |     policy = PPOPolicy(
169 |         actor,
170 |         critic,
171 |         optim,
172 |         dist,
173 |         discount_factor=args.gamma,
174 |         gae_lambda=args.gae_lambda,
175 |         max_grad_norm=args.max_grad_norm,
176 |         vf_coef=args.vf_coef,
177 |         ent_coef=args.ent_coef,
178 |         reward_normalization=args.rew_norm,
179 |         action_scaling=True,
180 |         action_bound_method=args.bound_action_method,
181 |         lr_scheduler=lr_scheduler,
182 |         action_space=env.action_space,
183 |         eps_clip=args.eps_clip,
184 |         value_clip=args.value_clip,
185 |         dual_clip=args.dual_clip,
186 |         advantage_normalization=args.norm_adv,
187 |         recompute_advantage=args.recompute_adv,
188 |     )
189 | 
190 |     # load a previous policy
191 |     if args.resume_path:
192 |         ckpt = torch.load(args.resume_path, map_location=args.device)
193 |         policy.load_state_dict(ckpt["model"])
194 |         if args.obs_norm:
195 |             train_envs.set_obs_rms(ckpt["obs_rms"])
196 |             test_envs.set_obs_rms(ckpt["obs_rms"])
197 |         print("Loaded agent from: ", args.resume_path)
198 | 
199 |     # logger
200 |     logger = MyWandbLogger(
201 |         project=args.wandb_project,
202 |         name=args.run_name,
203 |         run_id=args.resume_id,
204 |         config=args,
205 |     )
206 |     start_time, run_name, run_id = wandb.run.start_time, wandb.run.name, wandb.run.id
207 |     log_path = os.path.join(args.logdir, f'{get_timestamp(start_time)}#{run_name}@{run_id}')
208 |     os.makedirs(log_path)
209 | 
210 |     # collector
211 |     if args.training_num > 1:
212 |         buffer = VectorReplayBuffer(args.step_per_collect, len(train_envs))
213 |     else:
214 |         buffer = ReplayBuffer(args.step_per_collect)
215 | 
216 |     train_collector = Collector(
217 |         policy, train_envs, buffer,
218 |         preprocess_fn=logger.collect_reward_info,
219 |         exploration_noise=True
220 |     )
221 |     test_collector = Collector(
222 |         policy, test_envs,
223 |         preprocess_fn=logger.collect_reward_info,
224 |     ) if test_envs is not None else None
225 | 
226 | 
227 |     def analyse_callback():
228 |         fig1, fig2 = commander_core.analyse()
229 |         return {
230 |             'curricula/reward': wandb.Image(fig1, mode='RGB'),
231 |             'curricula/weight': wandb.Image(fig2, mode='RGB'),
232 |         }
233 | 
234 | 
235 |     logger.add_callback(
236 |         analyse_callback, 'test'
237 |     )
238 | 
239 | 
240 |     def save_best_fn(policy):
241 |         state = {"model": policy.state_dict()}
242 |         if args.obs_norm:
243 |             state["obs_rms"] = train_envs.get_obs_rms()
244 |         torch.save(state, os.path.join(log_path, "policy.pth"))
245 | 
246 | 
247 |     def save_checkpoint_fn(epoch: int, env_step: int, gradient_step: int):
248 |         state = {"model": policy.state_dict()}
249 |         if args.obs_norm:
250 |             state["obs_rms"] = train_envs.get_obs_rms()
251 |         path = os.path.join(log_path, f"policy_{epoch}.pth")
252 |         torch.save(state, path)
253 |         return path
254 | 
255 | 
256 |     result = onpolicy_trainer(
257 |         policy,
258 |         train_collector,
259 |         test_collector,
260 |         args.epoch,
261 |         args.step_per_epoch,
262 |         args.repeat_per_collect,
263 |         args.test_num,
264 |         args.batch_size,
265 |         step_per_collect=args.step_per_collect,
266 |         save_best_fn=save_best_fn,
267 |         save_checkpoint_fn=save_checkpoint_fn,
268 |         logger=logger,
269 |         test_in_train=False,
270 |     )
271 |     pprint.pprint(result)
272 | 
273 |     # Let's watch its performance!
274 |     policy.eval()
275 |     test_envs.seed(args.seed)
276 |     test_collector.reset()
277 |     result = test_collector.collect(n_episode=args.test_num)
278 |     print(f'Final reward: {result["rews"].mean()}, length: {result["lens"].mean()}')
279 | 


--------------------------------------------------------------------------------
/example/loct/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class PolicyWrapper(object):
 5 |     def __init__(self, net, device):
 6 |         self.net = net
 7 |         self.device = torch.device(device)
 8 | 
 9 |     def __call__(self, obs):
10 |         with torch.inference_mode():
11 |             obs = torch.as_tensor(obs, dtype=torch.float32, device=self.device)
12 |             return self.net(obs).detach().cpu().numpy()
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/example/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/example/test/__init__.py


--------------------------------------------------------------------------------
/example/test/test_tianshou.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from tianshou.env import ShmemVectorEnv
 4 | from tianshou.utils.net.continuous import ActorProb
 5 | from torch import nn
 6 | 
 7 | from example.loct.network import ActorNet
 8 | from qdpgym import sim
 9 | from qdpgym.tasks.loct import RandomCommanderHookV0, LocomotionV0
10 | 
11 | 
12 | def test_actor():
13 |     device = 'cuda'
14 |     net_a = ActorNet(
15 |         78, 133,
16 |         activation=nn.Tanh,
17 |         device=device,
18 |     )
19 |     actor = ActorProb(
20 |         net_a,
21 |         (12,),
22 |         max_action=1.,
23 |         unbounded=True,
24 |         device=device,
25 |     ).to(device)
26 | 
27 |     print(actor(torch.randn(size=(1, 211))))
28 | 
29 | 
30 | def make_loct_env():
31 |     torch.set_num_threads(1)
32 |     robot = sim.Aliengo(500, 'actuator_net', noisy=False)
33 |     task = LocomotionV0()
34 |     task.add_hook(RandomCommanderHookV0())
35 |     arena = sim.Plain()
36 |     # for reward, weight in cfg['reward_cfg'].items():
37 |     task.add_reward('UnifiedLinearReward', 0.1)
38 |     env = sim.QuadrupedEnv(robot, arena, task)
39 |     return env
40 | 
41 | 
42 | def test_mp():
43 |     envs = ShmemVectorEnv(
44 |         [make_loct_env for _ in range(4)]
45 |     )
46 | 
47 |     print(envs.reset())
48 |     print(envs.step(np.zeros((4, 12))))
49 |     print(envs.reset())
50 | 


--------------------------------------------------------------------------------
/example/utils.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from collections import defaultdict
  3 | from typing import Callable, List, Union, Optional, Any, Dict
  4 | 
  5 | import gym
  6 | import numpy as np
  7 | import torch
  8 | import wandb
  9 | from tianshou.data import Batch
 10 | from tianshou.env import ShmemVectorEnv, VectorEnvNormObs
 11 | from tianshou.utils import RunningMeanStd, BaseLogger
 12 | 
 13 | 
 14 | class NormObsWrapper(gym.Wrapper):
 15 |     """An observation normalization wrapper for vectorized environments.
 16 | 
 17 |     :param bool update_obs_rms: whether to update obs_rms. Default to True.
 18 |     :param float clip_obs: the maximum absolute value for observation. Default to
 19 |         10.0.
 20 |     :param float epsilon: To avoid division by zero.
 21 |     """
 22 | 
 23 |     def __init__(
 24 |         self,
 25 |         env: gym.Env,
 26 |         update_obs_rms: bool = False,
 27 |         clip_obs: float = 10.0,
 28 |         epsilon: float = np.finfo(np.float32).eps.item(),
 29 |     ) -> None:
 30 | 
 31 |         super(NormObsWrapper, self).__init__(env)
 32 |         # initialize observation running mean/std
 33 |         self.update_obs_rms = update_obs_rms
 34 |         self.obs_rms = RunningMeanStd()
 35 |         self.clip_max = clip_obs
 36 |         self.eps = epsilon
 37 | 
 38 |     def reset(
 39 |         self, *args, **kwargs
 40 |     ):
 41 |         obs = self.env.reset(*args, **kwargs)
 42 |         if self.obs_rms and self.update_obs_rms:
 43 |             self.obs_rms.update(obs)
 44 |         return self._norm_obs(obs)
 45 | 
 46 |     def step(
 47 |         self,
 48 |         action: np.ndarray,
 49 |     ):
 50 |         obs, rew, done, info = self.env.step(action)
 51 |         if self.obs_rms and self.update_obs_rms:
 52 |             self.obs_rms.update(obs)
 53 |         return self._norm_obs(obs), rew, done, info
 54 | 
 55 |     def _norm_obs(self, obs: np.ndarray) -> np.ndarray:
 56 |         if self.obs_rms:
 57 |             obs = (obs - self.obs_rms.mean) / np.sqrt(self.obs_rms.var + self.eps)
 58 |             obs = np.clip(obs, -self.clip_max, self.clip_max)
 59 |         return obs
 60 | 
 61 |     def set_obs_rms(self, obs_rms: RunningMeanStd) -> None:
 62 |         """Set with given observation running mean/std."""
 63 |         self.obs_rms = obs_rms
 64 | 
 65 |     def get_obs_rms(self) -> RunningMeanStd:
 66 |         """Return observation running mean/std."""
 67 |         return self.obs_rms
 68 | 
 69 | 
 70 | def make_parallel_env(
 71 |     make_env: Callable[[], gym.Env],
 72 |     seed: Union[int, None, List[int]],
 73 |     training_num: int,
 74 |     test_num: int,
 75 |     obs_norm: bool
 76 | ):
 77 |     """Wrapper function for Mujoco env.
 78 | 
 79 |     If EnvPool is installed, it will automatically switch to EnvPool's Mujoco env.
 80 | 
 81 |     :return: a tuple of (single env, training envs, test envs).
 82 |     """
 83 | 
 84 |     env = make_env()
 85 |     train_envs = ShmemVectorEnv(
 86 |         [make_env for _ in range(training_num)]
 87 |     )
 88 |     test_envs = ShmemVectorEnv(
 89 |         [make_env for _ in range(test_num)]
 90 |     ) if test_num else None
 91 |     train_envs.seed(seed)
 92 |     test_envs.seed(seed)
 93 |     if obs_norm:
 94 |         # obs norm wrapper
 95 |         train_envs = VectorEnvNormObs(train_envs)
 96 |         test_envs = VectorEnvNormObs(test_envs, update_obs_rms=False)
 97 |         test_envs.set_obs_rms(train_envs.get_obs_rms())
 98 |     return env, train_envs, test_envs
 99 | 
100 | 
101 | def init_actor_critic(actor, critic):
102 |     for m in list(actor.modules()) + list(critic.modules()):
103 |         if isinstance(m, torch.nn.Linear):
104 |             # orthogonal initialization
105 |             torch.nn.init.orthogonal_(m.weight, gain=np.sqrt(2))
106 |             torch.nn.init.zeros_(m.bias)
107 |     # do last policy layer scaling, this will make initial actions have (close to)
108 |     # 0 mean and std, and will help boost performances,
109 |     # see https://arxiv.org/abs/2006.05990, Fig.24 for details
110 |     for m in actor.mu.modules():
111 |         if isinstance(m, torch.nn.Linear):
112 |             torch.nn.init.zeros_(m.bias)
113 |             m.weight.data.copy_(0.01 * m.weight.data)
114 | 
115 | 
116 | class MyWandbLogger(BaseLogger):
117 |     def __init__(
118 |         self,
119 |         project: str,
120 |         train_interval: int = 1000,
121 |         test_interval: int = 1,
122 |         update_interval: int = 1000,
123 |         name: Optional[str] = None,
124 |         entity: Optional[str] = None,
125 |         run_id: Optional[str] = None,
126 |         config: Optional[argparse.Namespace] = None,
127 |     ) -> None:
128 |         super().__init__(train_interval, test_interval, update_interval)
129 |         self.wandb_run = wandb.init(
130 |             project=project,
131 |             name=name,
132 |             id=run_id,
133 |             resume="allow",
134 |             entity=entity,
135 |             monitor_gym=True,
136 |             config=config,  # type: ignore
137 |             save_code=True,
138 |             # mode='disabled',
139 |         ) if not wandb.run else wandb.run
140 | 
141 |         self._reward_info = defaultdict(float)
142 |         self._reward_counter = 0
143 |         self._callbacks_train: List[Callable[[], dict]] = []
144 |         self._callbacks_test: List[Callable[[], dict]] = []
145 | 
146 |     def collect_reward_info(self, **kwargs) -> Batch:
147 |         if 'rew' in kwargs:
148 |             for k, v in kwargs['info']['reward_info'].items():
149 |                 self._reward_info[k] += v.mean()
150 |             self._reward_counter += 1
151 |         return Batch()
152 | 
153 |     def write(self, step_type: str, step: int, data: Dict[str, Any]) -> None:
154 |         wandb.log(data, step=step)
155 | 
156 |     def save_data(
157 |         self,
158 |         epoch: int,
159 |         env_step: int,
160 |         gradient_step: int,
161 |         save_checkpoint_fn: Optional[Callable[[int, int, int], Any]] = None,
162 |     ) -> None:
163 |         if save_checkpoint_fn:
164 |             save_checkpoint_fn(epoch, env_step, gradient_step)
165 | 
166 |     def add_callback(
167 |         self, callback: Callable[[], dict],
168 |         period='train'
169 |     ) -> None:
170 |         if period == 'train':
171 |             self._callbacks_train.append(callback)
172 |         elif period == 'test':
173 |             self._callbacks_test.append(callback)
174 |         elif period == 'both':
175 |             self._callbacks_train.append(callback)
176 |             self._callbacks_test.append(callback)
177 |         else:
178 |             raise ValueError(f'Unknown period {period}')
179 | 
180 |     def log_train_data(self, collect_result: dict, step: int) -> None:
181 |         """Use writer to log statistics generated during training.
182 | 
183 |         :param collect_result: a dict containing information of data collected in
184 |             training stage, i.e., returns of collector.collect().
185 |         :param int step: stands for the timestep the collect_result being logged.
186 |         """
187 |         if collect_result["n/ep"] > 0:
188 |             if step - self.last_log_train_step >= self.train_interval:
189 |                 log_data = {
190 |                     "train/episode": collect_result["n/ep"],
191 |                     "train/reward": collect_result["rew"],
192 |                     "train/length": collect_result["len"],
193 |                 }
194 |                 for k, v in self._reward_info.items():
195 |                     log_data[f'train/reward_info/{k}'] = v / self._reward_counter
196 |                 self._reward_info.clear()
197 |                 self._reward_counter = 0
198 |                 for callback in self._callbacks_train:
199 |                     log_data.update(callback())
200 |                 self.write("train/env_step", step, log_data)
201 |                 self.last_log_train_step = step
202 | 
203 |     def log_test_data(self, collect_result: dict, step: int) -> None:
204 | 
205 |         """Use writer to log statistics generated during evaluating.
206 | 
207 |         :param collect_result: a dict containing information of data collected in
208 |             evaluating stage, i.e., returns of collector.collect().
209 |         :param int step: stands for the timestep the collect_result being logged.
210 |         """
211 |         assert collect_result["n/ep"] > 0
212 |         if step - self.last_log_test_step >= self.test_interval:
213 |             log_data = {
214 |                 "test/env_step": step,
215 |                 "test/reward": collect_result["rew"],
216 |                 "test/length": collect_result["len"],
217 |                 "test/reward_std": collect_result["rew_std"],
218 |                 "test/length_std": collect_result["len_std"],
219 |             }
220 |             for k, v in self._reward_info.items():
221 |                 log_data[f'test/reward_info/{k}'] = v / self._reward_counter
222 |             self._reward_info.clear()
223 |             self._reward_counter = 0
224 |             for callback in self._callbacks_test:
225 |                 log_data.update(callback())
226 |             self.write("test/env_step", step, log_data)
227 |             self.last_log_test_step = step
228 | 


--------------------------------------------------------------------------------
/qdpgym/__init__.py:
--------------------------------------------------------------------------------
 1 | import enum
 2 | 
 3 | import torch
 4 | 
 5 | from .utils import tf
 6 | 
 7 | try:
 8 |     from torch import inference_mode as _im
 9 | except ImportError:
10 |     torch.inference_mode = torch.no_grad
11 | 
12 | 
13 | class Sim(enum.IntEnum):
14 |     BULLET = 0
15 |     MUJOCO = 1
16 | 
17 | 
18 | sim_engine: Sim = Sim.BULLET
19 | 


--------------------------------------------------------------------------------
/qdpgym/sim/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import qdpgym
 4 | 
 5 | pkg_path = os.path.dirname(os.path.abspath(__file__))
 6 | rsc_dir = os.path.join(pkg_path, 'resources')
 7 | 
 8 | 
 9 | def is_bullet_available():
10 |     try:
11 |         import pybullet
12 |     except ModuleNotFoundError:
13 |         return False
14 |     return True
15 | 
16 | 
17 | def is_mujoco_available():
18 |     try:
19 |         import mujoco
20 |         import dm_control
21 |     except ModuleNotFoundError:
22 |         return False
23 |     return True
24 | 
25 | 
26 | if qdpgym.sim_engine == qdpgym.Sim.MUJOCO:
27 |     assert is_mujoco_available(), 'dm_control is not installed'
28 |     from .mjc.quadruped import Aliengo
29 |     from .mjc.env import QuadrupedEnv
30 |     from .mjc.terrain import *
31 |     from .mjc.hooks import *
32 | else:
33 |     assert is_bullet_available(), 'pybullet is not installed'
34 |     from .blt.quadruped import Aliengo
35 |     from .blt.env import QuadrupedEnv
36 |     from .blt.terrain import *
37 |     from .blt.hooks import *
38 | 
39 | from .abc import *
40 | from .app import Application
41 | 


--------------------------------------------------------------------------------
/qdpgym/sim/abc.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | from dataclasses import dataclass, field
  3 | import multiprocessing as mp
  4 | from typing import Union, Type, List, Any, Tuple
  5 | 
  6 | import gym
  7 | import numpy as np
  8 | 
  9 | ARRAY_LIKE = Union[np.ndarray, list, tuple]
 10 | NUMERIC = Union[int, float]
 11 | 
 12 | 
 13 | class QuadrupedHandle(metaclass=abc.ABCMeta):
 14 |     @property
 15 |     def obs_history(self):
 16 |         raise NotImplementedError
 17 | 
 18 |     @property
 19 |     def cmd_history(self):
 20 |         raise NotImplementedError
 21 | 
 22 |     def get_base_pos(self) -> np.ndarray:
 23 |         """Get base position in WORLD frame"""
 24 |         raise NotImplementedError
 25 | 
 26 |     def get_base_orn(self) -> np.ndarray:
 27 |         """Get base quaternion in [x, y, z, w]"""
 28 |         raise NotImplementedError
 29 | 
 30 |     def get_base_rot(self) -> np.ndarray:
 31 |         """Get base 3x3 rotation matrix to WORLD frame"""
 32 |         raise NotImplementedError
 33 | 
 34 |     def get_base_rpy(self) -> np.ndarray:
 35 |         """
 36 |         Get base Tait–Bryan angles in z-y-x
 37 |         @return: np.ndarray: [roll, pitch, yaw]
 38 |         """
 39 |         raise NotImplementedError
 40 | 
 41 |     def get_base_rpy_rate(self) -> np.ndarray:
 42 |         """
 43 |         Get the rate of base rpy (not angular velocity)
 44 |         """
 45 |         raise NotImplementedError
 46 | 
 47 |     def get_base_lin(self) -> np.ndarray:
 48 |         """Get base linear velocity in WORLD frame"""
 49 |         raise NotImplementedError
 50 | 
 51 |     def get_base_ang(self) -> np.ndarray:
 52 |         """Get base angular velocity in WORLD frame"""
 53 |         raise NotImplementedError
 54 | 
 55 |     def get_velocimeter(self) -> np.ndarray:
 56 |         """Get base linear velocity in BASE frame"""
 57 |         raise NotImplementedError
 58 | 
 59 |     def get_gyro(self) -> np.ndarray:
 60 |         """Get base angular velocity in BASE frame"""
 61 |         raise NotImplementedError
 62 | 
 63 |     def get_accelerometer(self) -> np.ndarray:
 64 |         """Get base acceleration in BASE frame"""
 65 |         raise NotImplementedError
 66 | 
 67 |     def get_state_history(self, latency):
 68 |         raise NotImplementedError
 69 | 
 70 |     def get_cmd_history(self, latency):
 71 |         raise NotImplementedError
 72 | 
 73 |     def get_torso_contact(self):
 74 |         raise NotImplementedError
 75 | 
 76 |     def get_leg_contacts(self):
 77 |         raise NotImplementedError
 78 | 
 79 |     def get_foot_pos(self):
 80 |         raise NotImplementedError
 81 | 
 82 |     def get_foot_contacts(self):
 83 |         raise NotImplementedError
 84 | 
 85 |     def get_contact_forces(self):
 86 |         raise NotImplementedError
 87 | 
 88 |     def get_force_sensor(self):
 89 |         raise NotImplementedError
 90 | 
 91 |     def get_slip_vel(self):
 92 |         raise NotImplementedError
 93 | 
 94 |     def get_strides(self):
 95 |         raise NotImplementedError
 96 | 
 97 |     def get_clearances(self):
 98 |         raise NotImplementedError
 99 | 
100 |     def get_joint_pos(self) -> np.ndarray:
101 |         raise NotImplementedError
102 | 
103 |     def get_joint_vel(self) -> np.ndarray:
104 |         raise NotImplementedError
105 | 
106 |     def get_joint_acc(self) -> np.ndarray:
107 |         raise NotImplementedError
108 | 
109 |     def get_last_command(self) -> np.ndarray:
110 |         raise NotImplementedError
111 | 
112 |     def get_last_torque(self) -> np.ndarray:
113 |         raise NotImplementedError
114 | 
115 | 
116 | class Quadruped(QuadrupedHandle, metaclass=abc.ABCMeta):
117 |     STANCE_HEIGHT: float
118 |     STANCE_CONFIG: tuple
119 | 
120 |     @property
121 |     def noisy(self) -> QuadrupedHandle:
122 |         return self
123 | 
124 |     def set_init_pose(self, x=0., y=0., yaw=0.):
125 |         raise NotImplementedError
126 | 
127 |     def set_random_dynamics(self, flag: bool = True):
128 |         raise NotImplementedError
129 | 
130 |     def set_latency(self, lower: float, upper=None):
131 |         raise NotImplementedError
132 | 
133 |     @classmethod
134 |     def inverse_kinematics(cls, leg: int, pos: ARRAY_LIKE):
135 |         raise NotImplementedError
136 | 
137 |     @classmethod
138 |     def forward_kinematics(cls, leg: int, angles: ARRAY_LIKE):
139 |         raise NotImplementedError
140 | 
141 | 
142 | class Terrain(metaclass=abc.ABCMeta):
143 |     def get_height(self, x, y):
144 |         raise NotImplementedError
145 | 
146 |     def get_normal(self, x, y):
147 |         raise NotImplementedError
148 | 
149 |     def get_peak(self, x_range, y_range):
150 |         raise NotImplementedError
151 | 
152 |     def out_of_range(self, x, y) -> bool:
153 |         raise NotImplementedError
154 | 
155 | 
156 | @dataclass
157 | class Snapshot(object):
158 |     position: np.ndarray = None
159 |     orientation: np.ndarray = None
160 |     rotation: np.ndarray = None
161 |     rpy: np.ndarray = None
162 |     linear_vel: np.ndarray = None
163 |     angular_vel: np.ndarray = None
164 |     joint_pos: np.ndarray = None
165 |     joint_vel: np.ndarray = None
166 |     joint_acc: np.ndarray = None
167 |     foot_pos: np.ndarray = None
168 |     velocimeter: np.ndarray = None
169 |     gyro: np.ndarray = None
170 |     accelerometer: np.ndarray = None
171 |     torso_contact: bool = None
172 |     leg_contacts: np.ndarray = None
173 |     contact_forces: np.ndarray = None
174 |     force_sensor: np.ndarray = None
175 |     rpy_rate: np.ndarray = None
176 | 
177 | 
178 | @dataclass
179 | class LocomotionInfo:
180 |     time: float = 0.
181 |     last_stance_states: Any = field(default_factory=lambda: [None] * 4)
182 |     max_foot_heights: np.ndarray = field(default_factory=lambda: np.zeros(4))
183 |     foot_clearances: np.ndarray = field(default_factory=lambda: np.zeros(4))
184 |     strides: List[Tuple[float, float]] = field(default_factory=lambda: [(0., 0.)] * 4)
185 |     slips: List[float] = field(default_factory=lambda: [0.] * 4)
186 | 
187 | 
188 | @dataclass
189 | class Command:
190 |     command: np.ndarray = None
191 |     torque: np.ndarray = None
192 | 
193 | 
194 | class ComposedObs(tuple):
195 |     pass
196 | 
197 | 
198 | class Environment(gym.Env, metaclass=abc.ABCMeta):
199 |     @property
200 |     def robot(self) -> QuadrupedHandle:
201 |         raise NotImplementedError
202 | 
203 |     @property
204 |     def arena(self) -> Terrain:
205 |         raise NotImplementedError
206 | 
207 |     @arena.setter
208 |     def arena(self, value):
209 |         raise NotImplementedError
210 | 
211 |     @property
212 |     def action_history(self):
213 |         raise NotImplementedError
214 | 
215 |     @property
216 |     def sim_time(self):
217 |         raise NotImplementedError
218 | 
219 |     @property
220 |     def num_substeps(self):
221 |         raise NotImplementedError
222 | 
223 |     @property
224 |     def timestep(self):
225 |         raise NotImplementedError
226 | 
227 |     @property
228 |     def identifier(self):
229 |         raise NotImplementedError
230 | 
231 |     def get_action_rate(self) -> np.ndarray:
232 |         raise NotImplementedError
233 | 
234 |     def get_action_accel(self) -> np.ndarray:
235 |         raise NotImplementedError
236 | 
237 |     def get_relative_robot_height(self) -> float:
238 |         raise NotImplementedError
239 | 
240 |     def get_interact_terrain_normal(self):
241 |         raise NotImplementedError
242 | 
243 |     def get_interact_terrain_rot(self) -> np.ndarray:
244 |         raise NotImplementedError
245 | 
246 |     def get_perturbation(self, in_robot_frame=False):
247 |         raise NotImplementedError
248 | 
249 |     def set_perturbation(self, value=None):
250 |         raise NotImplementedError
251 | 
252 | 
253 | class Hook(metaclass=abc.ABCMeta):
254 |     def register_task(self, task):
255 |         pass
256 | 
257 |     def initialize(self, robot, env):
258 |         pass
259 | 
260 |     def init_episode(self, robot, env):
261 |         pass
262 | 
263 |     def before_step(self, robot, env):
264 |         pass
265 | 
266 |     def before_substep(self, robot, env):
267 |         pass
268 | 
269 |     def after_step(self, robot, env):
270 |         pass
271 | 
272 |     def after_substep(self, robot, env):
273 |         pass
274 | 
275 |     def on_success(self, robot, env):
276 |         pass
277 | 
278 |     def on_fail(self, robot, env):
279 |         pass
280 | 
281 | 
282 | class CommHook(Hook):
283 |     def __init__(self, comm: mp.Queue):
284 |         self._comm = comm
285 |         self._env_id: str = 'anonymous'
286 | 
287 |     def initialize(self, robot, env):
288 |         self._env_id = env.identifier
289 | 
290 |     def _submit(self, info):
291 |         self._comm.put((self._env_id, info))
292 | 
293 | 
294 | class CommHookFactory(object):
295 |     def __init__(self, cls: Type[CommHook]):
296 |         self._cls = cls
297 |         self._comm = mp.Queue()
298 | 
299 |     def __call__(self, *args, **kwargs):
300 |         return self._cls(self._comm, *args, **kwargs)
301 | 
302 | 
303 | class Task(metaclass=abc.ABCMeta):
304 |     """
305 |     The task prototype.
306 |     A task is embedded in an environment, whose methods are
307 |     automatically called in different periods of an episode;
308 |     It should also manage hook callbacks.
309 |     Only `before_step` should have return values, which
310 |     should turn action into desired joint angles.
311 |     """
312 | 
313 |     observation_space: gym.Space
314 |     action_space: gym.Space
315 | 
316 |     def init_episode(self):
317 |         pass
318 | 
319 |     def before_step(self, action):
320 |         pass
321 | 
322 |     def before_substep(self):
323 |         pass
324 | 
325 |     def after_step(self):
326 |         pass
327 | 
328 |     def after_substep(self):
329 |         pass
330 | 
331 |     def on_success(self):
332 |         pass
333 | 
334 |     def on_fail(self):
335 |         pass
336 | 
337 |     def register_env(self, robot: Quadruped, env: Environment):
338 |         raise NotImplementedError
339 | 
340 |     def add_hook(self, hook: Hook, name=None):
341 |         raise NotImplementedError
342 | 
343 |     def remove_hook(self, name=None):
344 |         raise NotImplementedError
345 | 
346 |     def get_observation(self):
347 |         raise NotImplementedError
348 | 
349 |     def get_reward(self, detailed=True):
350 |         """
351 |         Get the reward sum in a step.
352 |         Rewards should be calculated in `after_step` or/and `after_substep`.
353 |         :param detailed: returns an extra dict containing all reward terms.
354 |         :return: reward(, details)
355 |         """
356 |         raise NotImplementedError
357 | 
358 |     def is_succeeded(self):
359 |         raise NotImplementedError
360 | 
361 |     def is_failed(self):
362 |         raise NotImplementedError
363 | 


--------------------------------------------------------------------------------
/qdpgym/sim/app.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable
 2 | 
 3 | import torch
 4 | 
 5 | from qdpgym.sim.abc import Quadruped, Environment, Task
 6 | 
 7 | 
 8 | class Application(object):
 9 |     def __init__(self, robot, env, task, policy):
10 |         self.robot: Quadruped = robot
11 |         self.env: Environment = env
12 |         self.task: Task = task
13 |         self.policy = policy
14 |         self.callbacks = []
15 | 
16 |     def add_callback(self, obj: Callable):
17 |         self.callbacks.append(obj)
18 | 
19 |     def launch(self, allow_reset=True):
20 |         with torch.inference_mode():
21 |             obs = self.env.reset()
22 | 
23 |             for _ in range(20000):
24 |                 actions = self.policy(obs)
25 |                 for callback in self.callbacks:
26 |                     callback(self.robot, self.env, self.task)
27 |                 obs, _, done, _ = self.env.step(actions)
28 |                 if done and allow_reset:
29 |                     self.env.reset()
30 | 


--------------------------------------------------------------------------------
/qdpgym/sim/blt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/sim/blt/__init__.py


--------------------------------------------------------------------------------
/qdpgym/sim/blt/env.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | from typing import Union, Any, Optional, Tuple
  3 | 
  4 | import gym
  5 | import numpy as np
  6 | import pybullet as pyb
  7 | import pybullet_data as pyb_data
  8 | from pybullet_utils.bullet_client import BulletClient
  9 | 
 10 | from qdpgym.sim.abc import Task, Environment, ARRAY_LIKE
 11 | from .quadruped import Aliengo
 12 | from .terrain import TerrainBase
 13 | from qdpgym.utils import PadWrapper, tf
 14 | 
 15 | 
 16 | class QuadrupedEnv(Environment):
 17 |     _observation_space = None
 18 |     _action_space = None
 19 | 
 20 |     def __init__(
 21 |         self,
 22 |         robot: Aliengo,
 23 |         arena: TerrainBase,
 24 |         task: Task,
 25 |         timestep: float = 2e-3,
 26 |         time_limit: float = 20,
 27 |         num_substeps: int = 10,
 28 |         identifier: str = None,
 29 |     ):
 30 |         self._robot = robot
 31 |         self._arena = arena
 32 |         self._task = task
 33 |         self._timestep = timestep
 34 |         self._time_limit = time_limit
 35 |         self._num_substeps = num_substeps
 36 |         self._identifier = identifier or f'{hex(id(self))[-7:]}'
 37 |         self._step_freq = 1 / (self.timestep * self._num_substeps)
 38 |         self._render = False
 39 | 
 40 |         self._init = False
 41 |         self._elapsed_sim_steps = None
 42 | 
 43 |         self._sim_env = None if True else pyb  # for pylint
 44 |         self._reset_times, self._debug_reset_param = 0, -1
 45 |         self._task.register_env(self._robot, self)
 46 |         self._action_history = collections.deque(maxlen=10)
 47 |         self._perturbation = None
 48 | 
 49 |         self._interact_terrain_samples = []
 50 |         self._interact_terrain_normal = None
 51 |         self._interact_terrain_height = 0.0
 52 | 
 53 |     @property
 54 |     def observation_space(self) -> gym.Space:
 55 |         if self._observation_space is None:
 56 |             if hasattr(self._task, 'observation_space'):
 57 |                 self._observation_space = self._task.observation_space
 58 |             else:
 59 |                 self._observation_space = gym.Space()
 60 |         return self._observation_space
 61 | 
 62 |     @property
 63 |     def action_space(self) -> gym.Space:
 64 |         if self._action_space is None:
 65 |             if hasattr(self._task, 'action_space'):
 66 |                 self._action_space = self._task.action_space
 67 |             else:
 68 |                 self._action_space = gym.Space((12,), float)
 69 |         return self._action_space
 70 | 
 71 |     def render(self, mode="human"):
 72 |         if mode == 'human':
 73 |             self._render = True
 74 |             if self._sim_env is not None:
 75 |                 self._sim_env.configureDebugVisualizer(
 76 |                     pyb.COV_ENABLE_SINGLE_STEP_RENDERING, True
 77 |                 )
 78 |         else:
 79 |             return super().render(mode)
 80 | 
 81 |     def reset(
 82 |         self,
 83 |         seed: Optional[int] = None,
 84 |         return_info: bool = False,
 85 |         options: Optional[dict] = None,
 86 |     ) -> Union[Any, Tuple[Any, dict]]:
 87 | 
 88 |         super().reset(seed=seed, return_info=return_info, options=options)
 89 | 
 90 |         self._init = True
 91 |         self._elapsed_sim_steps = 0
 92 |         self._action_history.clear()
 93 | 
 94 |         if self._sim_env is None:
 95 |             if self._render:
 96 |                 fps = int(self._step_freq)
 97 |                 self._sim_env = BulletClient(pyb.GUI, options=f'--width=1024 --height=768 --mp4fps={fps}')
 98 |                 self._debug_reset_param = self._sim_env.addUserDebugParameter('reset', 1, 0, 0)
 99 |             else:
100 |                 self._sim_env = BulletClient(pyb.DIRECT)
101 |             self._sim_env.setAdditionalSearchPath(pyb_data.getDataPath())
102 |             self._sim_env.setTimeStep(self._timestep)
103 |             self._sim_env.setGravity(0, 0, -9.8)
104 | 
105 |         self._task.init_episode()
106 |         self._arena.spawn(self._sim_env)
107 |         self._robot.add_to(self._arena)
108 |         self._robot.spawn(self._sim_env, self.np_random)
109 | 
110 |         for i in range(50):
111 |             self._robot.update_observation(None, minimal=True)
112 |             self._robot.apply_command(self._robot.STANCE_CONFIG)
113 |             self._sim_env.stepSimulation()
114 | 
115 |         self._action_history.append(np.array(self._robot.STANCE_CONFIG))
116 |         self._robot.update_observation(self.np_random)
117 | 
118 |         return (
119 |             self._task.get_observation()
120 |             # if return_info else (self._task.get_observation(), {})
121 |         )
122 | 
123 |     def close(self):
124 |         if self._sim_env is not None:
125 |             self._sim_env.disconnect()
126 | 
127 |     @property
128 |     def robot(self):
129 |         return self._robot
130 | 
131 |     @property
132 |     def sim_env(self):
133 |         return self._sim_env
134 | 
135 |     @property
136 |     def render_mode(self):
137 |         return self._render
138 | 
139 |     @property
140 |     def arena(self):
141 |         return self._arena
142 | 
143 |     @arena.setter
144 |     def arena(self, value: TerrainBase):
145 |         value.replace(self._sim_env, self._arena)
146 |         self._arena = value
147 | 
148 |     @property
149 |     def action_history(self):
150 |         return PadWrapper(self._action_history)
151 | 
152 |     @property
153 |     def sim_time(self):
154 |         return self._elapsed_sim_steps * self._timestep
155 | 
156 |     @property
157 |     def timestep(self):
158 |         return self._timestep
159 | 
160 |     @property
161 |     def num_substeps(self):
162 |         return self._num_substeps
163 | 
164 |     @property
165 |     def identifier(self) -> str:
166 |         return self._identifier
167 | 
168 |     def step(self, action: ARRAY_LIKE) -> Tuple[Any, float, bool, dict]:
169 |         if self._check_debug_reset_param():
170 |             return self.reset(), 0., True, {}
171 | 
172 |         assert self._init, 'Call `init_episode` before `step`!'
173 |         action = self._task.before_step(action)
174 |         action = np.asarray(action)
175 |         prev_action = self._action_history[-1]
176 |         self._action_history.append(action)
177 | 
178 |         for i in range(self._num_substeps):
179 |             weight = (i + 1) / self._num_substeps
180 |             current_action = action * weight + prev_action * (1 - weight)
181 |             self._robot.apply_command(current_action)
182 |             self._task.before_substep()
183 | 
184 |             self._apply_perturbation()
185 |             self._sim_env.stepSimulation()
186 |             self._elapsed_sim_steps += 1
187 |             self._update_observation()
188 | 
189 |             self._task.after_substep()
190 |         self._task.after_step()
191 | 
192 |         done = False
193 |         info = {}
194 |         if self._task.is_failed():
195 |             done = True
196 |             self._task.on_fail()
197 |         elif ((self._time_limit is not None and self.sim_time >= self._time_limit)
198 |               or self._task.is_succeeded()):
199 |             done = True
200 |             info['TimeLimit.truncated'] = True
201 |             self._task.on_success()
202 | 
203 |         reward, reward_info = self._task.get_reward(True)
204 |         info['reward_info'] = reward_info
205 |         return (
206 |             self._task.get_observation(), reward, done, info
207 |         )
208 | 
209 |     def _update_observation(self):
210 |         self._robot.update_observation(self.np_random)
211 | 
212 |         self._interact_terrain_samples.clear()
213 |         self._interact_terrain_height = 0.
214 |         xy_points = self._robot.get_foot_pos()
215 |         for x, y, _ in xy_points:
216 |             h = self._arena.get_height(x, y)
217 |             self._interact_terrain_height += h
218 |             self._interact_terrain_samples.append((x, y, h))
219 |         self._interact_terrain_height /= 4
220 |         self._interact_terrain_normal = tf.estimate_normal(self._interact_terrain_samples)
221 | 
222 |     def get_action_rate(self) -> np.ndarray:
223 |         if len(self._action_history) < 2:
224 |             return np.zeros(12)
225 |         actions = [self._action_history[-i - 1] for i in range(2)]
226 |         return (actions[0] - actions[1]) * self._step_freq
227 | 
228 |     def get_action_accel(self) -> np.ndarray:
229 |         if len(self._action_history) < 3:
230 |             return np.zeros(12)
231 |         actions = [self._action_history[-i - 1] for i in range(3)]
232 |         return (actions[0] - 2 * actions[1] + actions[2]) * self._step_freq ** 2
233 | 
234 |     def get_relative_robot_height(self) -> float:
235 |         return self._robot.get_base_pos()[2] - self._interact_terrain_height
236 | 
237 |     def get_interact_terrain_normal(self) -> np.ndarray:
238 |         return self._interact_terrain_normal
239 | 
240 |     def get_interact_terrain_rot(self) -> np.ndarray:
241 |         return tf.Rotation.from_zaxis(self._interact_terrain_normal)
242 | 
243 |     def get_perturbation(self, in_robot_frame=False) -> Optional[np.ndarray]:
244 |         if self._perturbation is None:
245 |             return None
246 |         elif in_robot_frame:
247 |             rotation = self._robot.get_base_rot()
248 |             perturbation = np.concatenate(
249 |                 [rotation.T @ self._perturbation[i * 3:i * 3 + 3] for i in range(2)]
250 |             )
251 |         else:
252 |             perturbation = self._perturbation
253 |         return perturbation
254 | 
255 |     def set_perturbation(self, value=None) -> None:
256 |         if value is None:
257 |             self._perturbation = None
258 |         else:
259 |             self._perturbation = np.array(value)
260 | 
261 |     def _apply_perturbation(self):
262 |         if self._perturbation is not None:
263 |             self._applied_link_id = 0
264 |             self._sim_env.applyExternalForce(
265 |                 objectUniqueId=self._robot.id,
266 |                 linkIndex=self._applied_link_id,
267 |                 forceObj=self._perturbation[:3],
268 |                 posObj=self._robot.get_base_pos(),
269 |                 flags=pyb.WORLD_FRAME
270 |             )
271 |             self._sim_env.applyExternalTorque(
272 |                 objectUniqueId=self._robot.id,
273 |                 linkIndex=self._applied_link_id,
274 |                 torqueObj=self._perturbation[3:],
275 |                 flags=pyb.WORLD_FRAME
276 |             )
277 | 
278 |     def _check_debug_reset_param(self):
279 |         if self._debug_reset_param != -1:
280 |             reset_times = self._sim_env.readUserDebugParameter(self._debug_reset_param)
281 |             if reset_times != self._reset_times:
282 |                 self._reset_times = reset_times
283 |                 return True
284 |         return False
285 | 


--------------------------------------------------------------------------------
/qdpgym/sim/blt/hooks.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | import copy
  3 | import json
  4 | import math
  5 | import os
  6 | import socket
  7 | import time
  8 | 
  9 | import numpy as np
 10 | import pybullet as pyb
 11 | 
 12 | from qdpgym.sim.abc import Hook
 13 | from qdpgym.sim.blt.terrain import Hills, Slopes, Steps, PlainHf
 14 | from qdpgym.utils import Angle, tf, get_timestamp, log
 15 | 
 16 | 
 17 | class ViewerHook(Hook):
 18 |     def __init__(self):
 19 |         self._pre_vis = False
 20 |         self._init_vis = False
 21 |         self._sleep_on = True
 22 | 
 23 |         self._robot_yaw_buffer = collections.deque(maxlen=10)
 24 |         self._cam_state = 0
 25 | 
 26 |         self._last_frame_time = time.time()
 27 | 
 28 |     def initialize(self, robot, env):
 29 |         env.render()
 30 | 
 31 |     def init_episode(self, robot, env):
 32 |         self._robot_yaw_buffer.clear()
 33 |         if self._pre_vis:
 34 |             return
 35 |         self._pre_vis = True
 36 |         sim_env = env.sim_env
 37 |         sim_env.configureDebugVisualizer(pyb.COV_ENABLE_RENDERING, False)
 38 |         sim_env.configureDebugVisualizer(pyb.COV_ENABLE_GUI, False)
 39 |         sim_env.configureDebugVisualizer(pyb.COV_ENABLE_TINY_RENDERER, False)
 40 |         sim_env.configureDebugVisualizer(pyb.COV_ENABLE_SHADOWS, False)
 41 |         sim_env.configureDebugVisualizer(pyb.COV_ENABLE_RGB_BUFFER_PREVIEW, False)
 42 |         sim_env.configureDebugVisualizer(pyb.COV_ENABLE_DEPTH_BUFFER_PREVIEW, False)
 43 |         sim_env.configureDebugVisualizer(pyb.COV_ENABLE_SEGMENTATION_MARK_PREVIEW, False)
 44 | 
 45 |     def before_step(self, robot, env):
 46 |         if not self._init_vis:
 47 |             self._init_vis = True
 48 |             env.sim_env.configureDebugVisualizer(pyb.COV_ENABLE_RENDERING, True)
 49 |             env.sim_env.configureDebugVisualizer(pyb.COV_ENABLE_GUI, True)
 50 | 
 51 |     def after_step(self, robot, env):
 52 |         sim_env = env.sim_env
 53 |         period = env.timestep * env.num_substeps
 54 |         time_spent = time.time() - self._last_frame_time
 55 |         if self._sleep_on:
 56 |             sleep_time = period - time_spent
 57 |             if sleep_time > 0:
 58 |                 time.sleep(sleep_time)
 59 |         self._last_frame_time = time.time()
 60 |         kbd_events = pyb.getKeyboardEvents()
 61 | 
 62 |         switch_cam_state = self.is_triggered(ord('`'), kbd_events)
 63 |         if switch_cam_state:
 64 |             self._cam_state = (self._cam_state + 1) % 5
 65 | 
 66 |         x, y, _ = robot.get_base_pos()
 67 |         z = robot.STANCE_HEIGHT + env.arena.get_height(x, y)
 68 |         if self._cam_state == 0:
 69 |             if switch_cam_state:
 70 |                 yaw = Angle.to_deg(Angle.mean(self._robot_yaw_buffer))
 71 |                 sim_env.resetDebugVisualizerCamera(1.5, yaw, -30., (x, y, z))
 72 |             else:
 73 |                 yaw, pitch, dist = sim_env.getDebugVisualizerCamera()[8:11]
 74 |                 sim_env.resetDebugVisualizerCamera(dist, yaw, pitch, (x, y, z))
 75 |         else:
 76 |             self._robot_yaw_buffer.append(robot.get_base_rpy()[2] - math.pi / 2)
 77 |             # To avoid carsick :)
 78 |             yaw = Angle.mean(self._robot_yaw_buffer)
 79 |             degree = -30.
 80 |             if self._cam_state == 2:  # around robot
 81 |                 yaw = Angle.norm(yaw + math.pi / 2)
 82 |                 degree = 0.
 83 |             elif self._cam_state == 3:
 84 |                 yaw = Angle.norm(yaw + math.pi)
 85 |             elif self._cam_state == 4:
 86 |                 yaw = Angle.norm(yaw - math.pi / 2)
 87 |                 degree = 0.
 88 |             sim_env.resetDebugVisualizerCamera(1.5, Angle.to_deg(yaw), degree, (x, y, z))
 89 |         env.render()
 90 | 
 91 |         KEY_SPACE = ord(' ')
 92 |         if self.is_triggered(KEY_SPACE, kbd_events):
 93 |             while True:  # PAUSE
 94 |                 env.sim_env.configureDebugVisualizer(pyb.COV_ENABLE_SINGLE_STEP_RENDERING, True)
 95 |                 time.sleep(0.01)
 96 |                 if self.is_triggered(KEY_SPACE, pyb.getKeyboardEvents()):
 97 |                     self._cam_state = 0
 98 |                     break
 99 | 
100 |     @staticmethod
101 |     def is_triggered(key, keyboard_events):
102 |         return key in keyboard_events and keyboard_events[key] & pyb.KEY_WAS_TRIGGERED
103 | 
104 | 
105 | class _TorqueVisualizerHelper(object):
106 |     def __init__(self):
107 |         self._axis_x = None
108 |         self._axis_y = None
109 |         self._mk_start = None
110 |         self._mk_phase = 0
111 | 
112 |     def update(self, torque):
113 |         if (torque != 0.).any():
114 |             self._axis_x, self._axis_y, _ = tf.Rotation.from_zaxis(tf.vunit(torque)).T
115 |             magnitude = tf.vnorm(torque)
116 |             if self._mk_start is None:
117 |                 self._mk_start = self._axis_x * magnitude / 20
118 |             self._mk_phase += math.pi / 36
119 |             marker_end = (self._axis_x * math.cos(self._mk_phase) +
120 |                           self._axis_y * math.sin(self._mk_phase)
121 |                           ) * magnitude / 20
122 | 
123 |             marker_info = dict(
124 |                 lineFromXYZ=copy.deepcopy(self._mk_start),
125 |                 lineToXYZ=marker_end
126 |             )
127 |             self._mk_start = marker_end
128 |             return marker_info
129 | 
130 | 
131 | class ExtraViewerHook(ViewerHook):
132 |     def __init__(self, perturb=True):
133 |         super().__init__()
134 |         self._show_perturb = perturb
135 |         self._last_perturb = None
136 | 
137 |         self._force_marker = -1
138 |         self._torque_vis = _TorqueVisualizerHelper()
139 | 
140 |     def after_step(self, robot, env):
141 |         sim_env = env.sim_env
142 |         if self._show_perturb:
143 |             perturb = env.get_perturbation(in_robot_frame=True)
144 |             if perturb is None:
145 |                 sim_env.removeUserDebugItem(self._force_marker)
146 |             else:
147 |                 if (self._last_perturb != perturb).any():
148 |                     self._force_marker = sim_env.addUserDebugLine(
149 |                         lineFromXYZ=(0., 0., 0.),
150 |                         lineToXYZ=perturb[:3] / 50,
151 |                         lineColorRGB=(1., 0., 0.),
152 |                         lineWidth=3, lifeTime=1,
153 |                         parentObjectUniqueId=robot.id,
154 |                         replaceItemUniqueId=self._force_marker
155 |                     )
156 |                     sim_env.addUserDebugLine(
157 |                         **self._torque_vis.update(perturb[3:]),
158 |                         lineColorRGB=(0., 0., 1.),
159 |                         lineWidth=5, lifeTime=0.1,
160 |                         parentObjectUniqueId=robot.id
161 |                     )
162 | 
163 |                     self._last_perturb = perturb
164 |         super().after_step(robot, env)
165 | 
166 | 
167 | class HeightSampleVisualizer(Hook):
168 |     def __init__(self):
169 |         super().__init__()
170 |         self._terrain_visual_shape = -1
171 |         self._terrain_markers = None
172 | 
173 |     def init_episode(self, robot, env):
174 |         if self._terrain_markers is None:
175 |             sim_env = env.sim_env
176 |             self._terrain_visual_shape = sim_env.createVisualShape(
177 |                 shapeType=pyb.GEOM_SPHERE, radius=0.01, rgbaColor=(0., 0.8, 0., 0.6))
178 |             self._terrain_markers = [
179 |                 sim_env.createMultiBody(
180 |                     baseVisualShapeIndex=self._terrain_visual_shape
181 |                 ) for _ in range(121)
182 |             ]
183 | 
184 |     def after_step(self, robot, env):
185 |         sim_env = env.sim_env
186 |         rx, ry, _ = robot.get_base_pos()
187 |         yaw = robot.get_base_rpy()[2]
188 |         cy, sy = math.cos(yaw), math.sin(yaw)
189 |         dx, dy = np.array(((cy, sy), (-sy, cy))) * 0.15
190 | 
191 |         marker = iter(self._terrain_markers)
192 |         for i in range(-5, 6):
193 |             for j in range(-5, 6):
194 |                 x, y = (rx, ry) + i * dx + j * dy
195 |                 height = env.arena.get_height(x, y)
196 |                 sim_env.resetBasePositionAndOrientation(
197 |                     next(marker), (x, y, height), (0., 0., 0., 1.)
198 |                 )
199 | 
200 | 
201 | class RandomTerrainHook(Hook):
202 |     def __init__(self):
203 |         self.max_roughness = 0.2
204 |         self.max_slope = 10 / 180 * math.pi
205 |         self.max_step_height = 0.1
206 | 
207 |     def generate_terrain(self, random_gen: np.random.Generator):
208 |         """
209 |         Generate a random terrain object of Hills, Slopes, Steps or Plain.
210 |         """
211 |         size, resolution = 30, 0.1
212 |         terrain_type = random_gen.integers(4)
213 |         difficulty = random_gen.random()
214 |         terrain = None
215 |         if terrain_type == 0:
216 |             roughness = self.max_roughness * difficulty
217 |             terrain = Hills.make(size, resolution, (roughness, 20),
218 |                                  random_state=random_gen)
219 |         elif terrain_type == 1:
220 |             slope = self.max_slope * difficulty
221 |             axis = random_gen.choice(('x', 'y'))
222 |             terrain = Slopes.make(size, resolution, slope, 3., axis)
223 |         elif terrain_type == 2:
224 |             step_height = self.max_step_height * difficulty
225 |             terrain = Steps.make(size, resolution, 1., step_height, random_gen)
226 |         elif terrain_type == 3:
227 |             terrain = PlainHf.make(size, resolution)
228 |         return terrain
229 | 
230 |     def init_episode(self, robot, env):
231 |         env.arena = self.generate_terrain(env.np_random)
232 | 
233 | 
234 | class RandomPerturbHook(Hook):
235 |     def __init__(self):
236 |         self.perturb_prob = 0.5
237 |         self.force_magnitude = np.array((20., 20.))
238 |         self.torque_magnitude = np.array((2.5, 5., 5.))
239 |         self.interval_range = (0.5, 2.0)
240 |         self.interval = 0
241 |         self.last_update = 0
242 | 
243 |     def get_random_perturb(self, random_gen):
244 |         horizontal_force = random_gen.uniform(0, self.force_magnitude[0])
245 |         vertical_force = random_gen.uniform(0, self.force_magnitude[1])
246 |         yaw = random_gen.uniform(0, 2 * math.pi)
247 |         external_force = np.array((
248 |             horizontal_force * np.cos(yaw),
249 |             horizontal_force * np.sin(yaw),
250 |             vertical_force * random_gen.choice((-1, 1))
251 |         ))
252 | 
253 |         external_torque = random_gen.uniform(-self.torque_magnitude, self.torque_magnitude)
254 |         return external_force, external_torque
255 | 
256 |     def before_substep(self, robot, env):
257 |         if env.sim_time >= self.last_update + self.interval:
258 |             random = env.np_random
259 |             if random.random() < self.perturb_prob:
260 |                 env.set_perturbation(np.concatenate(self.get_random_perturb(random)))
261 |             else:
262 |                 env.set_perturbation(None)
263 |             self.interval = random.uniform(*self.interval_range)
264 |             self.last_update = env.sim_time
265 | 
266 |     def init_episode(self, robot, env):
267 |         self.last_update = self.interval = 0
268 | 
269 | 
270 | class VideoRecorderHook(Hook):
271 |     def __init__(self, path=''):
272 |         if not path:
273 |             path = os.path.join('Videos', f'record-{get_timestamp()}.mp4')
274 |             os.makedirs(os.path.dirname(path), exist_ok=True)
275 |         self._path = path
276 |         self._log_id = -1
277 |         self._status = True
278 | 
279 |     def before_step(self, robot, env):
280 |         if self._log_id == -1 and self._status:
281 |             log.info('start recording')
282 |             self._log_id = env.sim_env.startStateLogging(
283 |                 pyb.STATE_LOGGING_VIDEO_MP4, self._path
284 |             )
285 |             self._status = False
286 | 
287 |     def init_episode(self, robot, env):
288 |         if self._log_id != -1:
289 |             env.client.stopStateLogging(self._log_id)
290 |             self._log_id = -1
291 | 
292 | 
293 | class _UdpPublisher(object):
294 |     """
295 |     Send data stream of locomotion to outer tools such as PlotJuggler.
296 |     """
297 | 
298 |     def __init__(self, port):
299 |         self.client = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
300 |         self.port = port
301 | 
302 |     def send(self, data: dict):
303 |         msg = json.dumps(data)
304 |         ip_port = ('127.0.0.1', self.port)
305 |         self.client.sendto(msg.encode('utf-8'), ip_port)
306 | 
307 | 
308 | class StatisticsHook(Hook):
309 |     def __init__(self, publish_on=True):
310 |         self._torque_sum = 0.
311 |         self._torque_abs_sum = 0.
312 |         self._torque_pen_sum = 0.0
313 |         self._joint_motion_sum = 0.0
314 |         self._step_counter = 0
315 |         self._publish = publish_on
316 |         if self._publish:
317 |             self._udp_pub = _UdpPublisher(9870)
318 |         self._task = None
319 | 
320 |     def register_task(self, task):
321 |         self._task = task
322 | 
323 |     def after_step(self, robot, env):
324 | 
325 |         def wrap(reward_type):
326 |             return getattr(self._task.ALL_REWARDS, reward_type)()(robot, env, self._task)
327 | 
328 |         _, reward_details = self._task.get_reward(detailed=True)
329 |         self._step_counter += 1
330 |         self._torque_sum += robot.get_last_torque() ** 2
331 |         self._torque_abs_sum += abs(robot.get_last_torque())
332 |         self._torque_pen_sum += wrap('TorquePenalty')
333 |         self._joint_motion_sum += wrap('JointMotionPenalty')
334 | 
335 |         # def publish(self, task, rob, env):
336 | 
337 |     #     data = {
338 |     #         'obs': env.makeNoisyProprioInfo().standard().tolist(),
339 |     #         'action': env._action.tolist(),
340 |     #         'joint_states': {
341 |     #             'joint_pos': rob.getJointPositions().tolist(),
342 |     #             'violence': env.getActionViolence().tolist(),
343 |     #             'commands': rob.getLastCommand().tolist(),
344 |     #             'joint_vel': rob.getJointVelocities().tolist(),
345 |     #             'joint_acc': rob.getJointAccelerations().tolist(),
346 |     #             # 'kp_part': tuple(rob._motor._kp_part),
347 |     #             # 'kd_part': tuple(rob._motor._kd_part),
348 |     #             'torque': rob.getLastAppliedTorques().tolist(),
349 |     #             'contact': rob.getContactStates().tolist()
350 |     #         },
351 |     #         'body_height': env.getTerrainBasedHeightOfRobot(),
352 |     #         # 'cot': rob.getCostOfTransport(),
353 |     #         'twist': {
354 |     #             'linear': rob.getBaseLinearVelocityInBaseFrame().tolist(),
355 |     #             'angular': rob.getBaseAngularVelocityInBaseFrame().tolist(),
356 |     #         },
357 |     #         # 'torque_pen': wrap(TorquePenalty)
358 |     #     }
359 |     #     self._udp_pub.send(data)
360 | 
361 |     def init_episode(self, robot, env):
362 |         if self._step_counter != 0.:
363 |             print('episode len:', self._step_counter)
364 |             print('mse torque', np.sqrt(self._torque_sum / self._step_counter))
365 |             print('abs torque', self._torque_abs_sum / self._step_counter)
366 |             print('torque pen', self._torque_pen_sum / self._step_counter)
367 |             print('joint motion pen', self._joint_motion_sum / self._step_counter)
368 |             self._torque_sum = 0.
369 |             self._torque_abs_sum = 0.
370 |             self._torque_pen_sum = 0.0
371 |             self._joint_motion_sum = 0.0
372 |             self._step_counter = 0
373 | 


--------------------------------------------------------------------------------
/qdpgym/sim/blt/terrain.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | import dataclasses
  3 | from typing import Union, Tuple, Iterable
  4 | 
  5 | import numpy as np
  6 | import pybullet as pyb
  7 | from scipy.interpolate import interp2d
  8 | 
  9 | from qdpgym.sim.abc import Terrain, NUMERIC
 10 | from qdpgym.utils.tf import vcross, vunit
 11 | 
 12 | __all__ = [
 13 |     'NullTerrain', 'Plain', 'HeightFieldTerrain',
 14 |     'Hills', 'PlainHf', 'Slopes', 'Steps', 'TerrainBase'
 15 | ]
 16 | 
 17 | 
 18 | class TerrainBase(Terrain, metaclass=abc.ABCMeta):
 19 |     def __init__(self):
 20 |         self._id: int = -1
 21 |         self._spawned = False
 22 | 
 23 |     @property
 24 |     def id(self):
 25 |         return self._id
 26 | 
 27 |     def spawn(self, sim_env):
 28 |         raise NotImplementedError
 29 | 
 30 |     def replace(self, sim_env, obj: 'TerrainBase'):
 31 |         obj.remove(sim_env)
 32 |         self.spawn(sim_env)
 33 | 
 34 |     def remove(self, sim_env):
 35 |         if self._id != -1:
 36 |             sim_env.removeBody(self._id)
 37 | 
 38 | 
 39 | class NullTerrain(TerrainBase):
 40 |     pass
 41 | 
 42 | 
 43 | class Plain(TerrainBase):
 44 |     def spawn(self, sim_env):
 45 |         if self._id != -1:
 46 |             return
 47 |         self._id = sim_env.loadURDF("plane.urdf")
 48 |         sim_env.changeDynamics(self._id, -1, lateralFriction=1.0)
 49 | 
 50 |     def get_height(self, x, y):
 51 |         return 0.0
 52 | 
 53 |     def get_normal(self, x, y):
 54 |         return np.array((0, 0, 1))
 55 | 
 56 |     def get_peak(self, x_range, y_range):
 57 |         return sum(x_range) / 2, sum(y_range) / 2, 0.0
 58 | 
 59 |     def out_of_range(self, x, y):
 60 |         return False
 61 | 
 62 | 
 63 | @dataclasses.dataclass
 64 | class HeightField:
 65 |     data: np.ndarray
 66 |     size: Union[NUMERIC, Tuple[NUMERIC]]
 67 |     resolution: NUMERIC
 68 | 
 69 | 
 70 | class HeightFieldTerrain(TerrainBase):
 71 |     def __init__(self, heightfield: HeightField):
 72 |         super().__init__()
 73 |         self.heightfield = heightfield.data
 74 |         self.y_dim, self.x_dim = self.heightfield.shape
 75 |         if isinstance(heightfield.size, Iterable):
 76 |             self.x_size, self.y_size = heightfield.size
 77 |         else:
 78 |             self.x_size, self.y_size = heightfield.size, heightfield.size
 79 |         self.x_rsl = self.y_rsl = heightfield.resolution
 80 |         self._shape_id = -1
 81 | 
 82 |     def spawn(self, sim_env):
 83 |         if self._id != -1:
 84 |             return
 85 |         self._shape_id = sim_env.createCollisionShape(
 86 |             shapeType=pyb.GEOM_HEIGHTFIELD,
 87 |             flags=pyb.GEOM_CONCAVE_INTERNAL_EDGE,
 88 |             meshScale=(self.x_rsl, self.y_rsl, 1.0),
 89 |             heightfieldTextureScaling=self.x_size,
 90 |             heightfieldData=self.heightfield.reshape(-1),
 91 |             numHeightfieldColumns=self.x_dim,
 92 |             numHeightfieldRows=self.y_dim,
 93 |             replaceHeightfieldIndex=-1
 94 |         )
 95 |         self._id = sim_env.createMultiBody(0, self._shape_id)
 96 |         sim_env.changeVisualShape(self._id, -1, rgbaColor=(1, 1, 1, 1))
 97 |         sim_env.changeDynamics(self._id, -1, lateralFriction=1.0)
 98 |         origin_z = (np.max(self.heightfield) + np.min(self.heightfield)) / 2
 99 |         sim_env.resetBasePositionAndOrientation(self._id, (0, 0, origin_z),
100 |                                                 (0., 0., 0., 1.))
101 | 
102 |     def replace(self, sim_env, obj):
103 |         assert self._id == self._shape_id == -1, f'`{self}` have been spawned'
104 |         if obj._id == -1:
105 |             return
106 | 
107 |         if not isinstance(obj, HeightFieldTerrain):
108 |             return super().replace(sim_env, obj)
109 | 
110 |         # Currently, in bullet <= 3.2.4,
111 |         # Heightfield replacement may cause collision detection failure.
112 |         # See https://github.com/bulletphysics/bullet3/issues/4236
113 |         # See https://github.com/bulletphysics/bullet3/pull/4253
114 |         self._id = obj._id
115 |         self._shape_id = sim_env.createCollisionShape(
116 |             shapeType=pyb.GEOM_HEIGHTFIELD, flags=pyb.GEOM_CONCAVE_INTERNAL_EDGE,
117 |             meshScale=(self.x_rsl, self.y_rsl, 1.0),
118 |             heightfieldTextureScaling=self.x_size,
119 |             heightfieldData=self.heightfield.reshape(-1),
120 |             numHeightfieldColumns=self.x_dim, numHeightfieldRows=self.y_dim,
121 |             replaceHeightfieldIndex=obj._shape_id
122 |         )
123 |         obj._id = obj._shape_id = -1
124 | 
125 |         origin_z = (np.max(self.heightfield) + np.min(self.heightfield)) / 2
126 |         sim_env.resetBasePositionAndOrientation(self._id, (0, 0, origin_z),
127 |                                                 (0., 0., 0., 1.))
128 | 
129 |     def remove(self, sim_env):
130 |         if self._id != -1:
131 |             sim_env.removeBody(self._id)
132 |             sim_env.removeCollisionShape(self._shape_id)
133 |             self._id = self._shape_id = -1
134 | 
135 |     @property
136 |     def shape_id(self):
137 |         return self._shape_id
138 | 
139 |     def get_disc_x(self, x):
140 |         return int((x + self.x_size / 2) / self.x_rsl)
141 | 
142 |     def get_disc_y(self, y):
143 |         return int((y + self.y_size / 2) / self.y_rsl)
144 | 
145 |     def get_cont_x(self, x_idx):
146 |         return x_idx * self.x_rsl - self.x_size / 2
147 | 
148 |     def get_cont_y(self, y_idx):
149 |         return y_idx * self.y_rsl - self.y_size / 2
150 | 
151 |     def get_nearest_vertices(self, x, y):
152 |         x_idx, y_idx = self.get_disc_x(x), self.get_disc_y(y)
153 |         x_rnd, y_rnd = self.get_cont_x(x_idx), self.get_cont_y(y_idx)
154 |         if (x - x_rnd) / self.x_rsl + (y - y_rnd) / self.y_rsl < 1:
155 |             v1 = x_rnd, y_rnd, self.heightfield[y_idx, x_idx]
156 |         else:
157 |             v1 = x_rnd + self.x_rsl, y_rnd + self.y_rsl, self.heightfield[y_idx + 1, x_idx + 1]
158 |         v2 = x_rnd, y_rnd + self.y_rsl, self.heightfield[y_idx + 1, x_idx]
159 |         v3 = x_rnd + self.x_rsl, y_rnd, self.heightfield[y_idx, x_idx + 1]
160 |         return np.array(v1), np.array(v2), np.array(v3)
161 | 
162 |     def get_peak(self, x_range, y_range):
163 |         (x_lower, x_upper), (y_lower, y_upper) = x_range, y_range
164 |         x_lower_idx, x_upper_idx = self.get_disc_x(x_lower), self.get_disc_x(x_upper) + 1
165 |         y_lower_idx, y_upper_idx = self.get_disc_y(y_lower), self.get_disc_y(y_upper) + 1
166 |         hfield_part = self.heightfield[y_lower_idx:y_upper_idx, x_lower_idx:x_upper_idx]
167 |         y_size, x_size = hfield_part.shape
168 |         max_idx = np.argmax(hfield_part)
169 |         max_x_idx, max_y_idx = max_idx % x_size, max_idx // x_size
170 |         max_height = hfield_part[max_y_idx, max_x_idx]
171 |         max_x, max_y = x_lower + max_x_idx * self.x_rsl, y_lower + max_y_idx * self.y_rsl
172 |         return max_x, max_y, max_height
173 | 
174 |     def get_height(self, x, y):
175 |         try:
176 |             v1, v2, v3 = self.get_nearest_vertices(x, y)
177 |         except IndexError:
178 |             return 0.0
179 |         if x == v1[0] and y == v1[1]:
180 |             return v1[2]
181 |         x1, y1, z1 = v2 - v1
182 |         x2, y2, z2 = v3 - v1
183 |         x3, y3 = x - v1[0], y - v1[1]
184 |         div = (x1 * y2 - x2 * y1)
185 |         c1 = (x3 * y2 - x2 * y3) / div
186 |         c2 = (x1 * y3 - x3 * y1) / div
187 |         return c1 * z1 + c2 * z2 + v1[2]
188 | 
189 |     def get_normal(self, x, y) -> np.ndarray:
190 |         try:
191 |             v1, v2, v3 = self.get_nearest_vertices(x, y)
192 |         except IndexError:
193 |             return np.array((0., 0., 1.))
194 |         normal = vunit(vcross(v1 - v2, v1 - v3))
195 |         return normal if normal[2] > 0 else -normal
196 | 
197 |     def out_of_range(self, x, y):
198 |         return abs(x) > self.x_size / 2 - 1 or abs(y) > self.y_size / 2 - 1
199 | 
200 | 
201 | class Hills(HeightFieldTerrain):
202 |     def __init__(self, heightfield):
203 |         super().__init__(heightfield)
204 | 
205 |     @classmethod
206 |     def make(cls, size, resolution, *rough_dsp: Tuple[NUMERIC, NUMERIC], random_state):
207 |         return cls(cls.make_hfield(size, resolution, *rough_dsp, random_state=random_state))
208 | 
209 |     @classmethod
210 |     def make_hfield(cls, size, resol, *rough_dsp: Tuple[NUMERIC, NUMERIC], random_state) -> HeightField:
211 |         data_size = int(size / resol) + 1
212 |         hfield_data = np.zeros((data_size, data_size))
213 |         for roughness, dsp in rough_dsp:
214 |             sample_rsl = dsp * resol
215 |             x = y = np.arange(-size / 2 - 3 * sample_rsl, size / 2 + 4 * sample_rsl, sample_rsl)
216 |             hfield_dsp = random_state.uniform(0, roughness, (x.size, y.size))
217 |             terrain_func = interp2d(x, y, hfield_dsp, kind='cubic')
218 |             x_upsampled = y_upsampled = np.linspace(-size / 2, size / 2, data_size)
219 |             hfield_data += terrain_func(x_upsampled, y_upsampled)
220 |         return HeightField(hfield_data, size, resol)
221 | 
222 |     @classmethod
223 |     def default(cls):
224 |         return cls.make(20, 0.1, (0.2, 10), random_state=np.random)
225 | 
226 | 
227 | class PlainHf(HeightFieldTerrain):
228 |     @classmethod
229 |     def make(cls, size, resolution):
230 |         return cls(cls.make_hfield(size, resolution))
231 | 
232 |     @staticmethod
233 |     def make_hfield(size, resolution):
234 |         data_size = int(size / resolution) + 1
235 |         hfield_data = np.zeros((data_size, data_size))
236 |         return HeightField(hfield_data, size, resolution)
237 | 
238 |     def get_height(self, x, y):
239 |         return 0.0
240 | 
241 |     def get_normal(self, x, y):
242 |         return np.array((0., 0., 1.))
243 | 
244 |     def get_peak(self, x_range, y_range):
245 |         return sum(x_range) / 2, sum(y_range) / 2, 0.
246 | 
247 |     @classmethod
248 |     def default(cls):
249 |         return cls.make(20, 0.1)
250 | 
251 | 
252 | class Slopes(HeightFieldTerrain):
253 |     @classmethod
254 |     def make(cls, size, resolution, slope, slope_width, axis='x'):
255 |         return cls(cls.make_hfield(size, resolution, slope, slope_width, axis))
256 | 
257 |     @classmethod
258 |     def make_hfield(cls, size, resolution, slope, slope_width, axis):
259 |         step = int(slope_width * 2 / resolution)
260 |         data_size = int(size / resolution) + 1
261 |         num_steps = int(data_size / step) + 1
262 |         slope = np.tan(slope)
263 |         hfield_data = np.zeros((data_size, data_size))
264 |         for i in range(num_steps):
265 |             x_start, x_stop = i * step, int((i + 0.4) * step)
266 |             for j in range(x_start, min(x_stop, data_size)):
267 |                 hfield_data[:, j] = (x_stop - j) * slope * resolution
268 |             x_start, x_stop = x_stop, int((i + 0.6) * step)
269 |             for j in range(x_start, min(x_stop, data_size)):
270 |                 hfield_data[:, j] = 0.
271 |             x_start, x_stop = x_stop, (i + 1) * step
272 |             for j in range(x_start, min(x_stop, data_size)):
273 |                 hfield_data[:, j] = (j - x_start) * slope * resolution
274 |         return HeightField(cls.rotate(hfield_data, axis), size, resolution)
275 | 
276 |     @classmethod
277 |     def rotate(cls, hfield_data, axis):
278 |         if axis == 'x':
279 |             return hfield_data
280 |         elif axis == 'y':
281 |             return hfield_data.T
282 |         raise RuntimeError('Unknown axis')
283 | 
284 |     @classmethod
285 |     def default(cls):
286 |         return cls.make(20, 0.1, 0.17, 3.0)
287 | 
288 | 
289 | class Steps(HeightFieldTerrain):
290 |     @classmethod
291 |     def make(cls, size, resolution, step_width, max_step_height, random_state):
292 |         return cls(cls.make_hfield(size, resolution, step_width, max_step_height, random_state))
293 | 
294 |     @staticmethod
295 |     def make_hfield(size, resolution, step_width, max_step_height, random_state):
296 |         step = int(step_width / resolution)
297 |         data_size = int(size / resolution) + 1
298 |         num_steps = int(data_size / step) + 1
299 |         hfield_data = np.zeros((data_size, data_size))
300 |         for i in range(num_steps + 1):
301 |             for j in range(num_steps + 1):
302 |                 x_start, x_stop = int((i - 0.5) * step), int((i + 0.5) * step)
303 |                 y_start, y_stop = int((j - 0.5) * step), int((j + 0.5) * step)
304 |                 hfield_data[y_start:y_stop, x_start:x_stop] = random_state.uniform(0., max_step_height)
305 |         return HeightField(hfield_data, size, resolution)
306 | 
307 |     @classmethod
308 |     def default(cls):
309 |         return cls.make(20, 0.1, 1.0, 0.1, random_state=np.random)
310 | 


--------------------------------------------------------------------------------
/qdpgym/sim/blt/utils.py:
--------------------------------------------------------------------------------
 1 | class JointInfo(object):
 2 |     def __init__(self, joint_info: tuple):
 3 |         self._info = joint_info
 4 | 
 5 |     idx = property(lambda self: self._info[0])
 6 |     name = property(lambda self: self._info[1].decode("UTF-8"))
 7 |     type = property(lambda self: self._info[2])
 8 |     q_idx = property(lambda self: self._info[3])
 9 |     u_idx = property(lambda self: self._info[4])
10 |     damping = property(lambda self: self._info[6])
11 |     friction = property(lambda self: self._info[7])
12 |     limits = property(lambda self: (self._info[8], self._info[9]))
13 |     max_force = property(lambda self: self._info[10])
14 |     max_vel = property(lambda self: self._info[11])
15 |     link_name = property(lambda self: self._info[12].decode("UTF-8"))
16 |     axis = property(lambda self: self._info[13])
17 |     parent_frame_pos = property(lambda self: self._info[14])
18 |     parent_frame_orn = property(lambda self: self._info[15])
19 |     parent_idx = property(lambda self: self._info[16])
20 | 
21 |     import pybullet as pyb
22 |     joint_types = {pyb.JOINT_REVOLUTE: 'rev', pyb.JOINT_PRISMATIC: 'pri', pyb.JOINT_SPHERICAL: 'sph',
23 |                    pyb.JOINT_PLANAR: 'pla', pyb.JOINT_FIXED: 'fix'}
24 | 
25 |     axis_dict = {(1., 0., 0.): '+X', (0., 1., 0.): '+Y', (0., 0., 1.): '+Z',
26 |                  (-1., 0., 0.): '-X', (0., -1., 0.): '-Y', (0., 0., -1.): '-Z', }
27 | 
28 | 
29 | class DynamicsInfo(object):
30 |     def __init__(self, dynamics_info: tuple):
31 |         self._info = dynamics_info
32 | 
33 |     mass = property(lambda self: self._info[0])
34 |     lateral_fric = property(lambda self: self._info[1])
35 |     inertia = property(lambda self: self._info[2])
36 |     inertial_pos = property(lambda self: self._info[3])
37 |     inertial_orn = property(lambda self: self._info[4])
38 |     restitution = property(lambda self: self._info[5])
39 |     rolling_fric = property(lambda self: self._info[6])
40 |     spinning_fric = property(lambda self: self._info[7])
41 |     damping = property(lambda self: self._info[8])
42 |     stiffness = property(lambda self: self._info[9])
43 |     body_type = property(lambda self: self._info[10])
44 |     collision_margin = property(lambda self: self._info[11])
45 | 


--------------------------------------------------------------------------------
/qdpgym/sim/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/sim/common/__init__.py


--------------------------------------------------------------------------------
/qdpgym/sim/common/identify.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | __all__ = ['ActuatorNet']
 6 | 
 7 | 
 8 | class ActuatorNet(nn.Module):
 9 |     activation = nn.Softsign
10 | 
11 |     def __init__(
12 |         self,
13 |         input_dim=6,
14 |         output_dim=1,
15 |         hidden_dims=(32, 32, 32)
16 |     ):
17 |         super().__init__()
18 |         layers = []
19 |         self.input_dim, self.output_dim, self.hidden_dims = input_dim, output_dim, hidden_dims
20 |         for dim in hidden_dims:
21 |             layers.append(nn.Linear(input_dim, dim))
22 |             layers.append(self.activation())
23 |             input_dim = dim
24 |         layers.append(nn.Linear(input_dim, output_dim))
25 |         self.layers = nn.Sequential(*layers)
26 |         self.device = torch.device('cpu')
27 | 
28 |     def forward(self, state):
29 |         return self.layers(state)
30 | 
31 |     def to(self, device, *args, **kwargs):
32 |         self.device = torch.device(device)
33 |         return super().to(device, *args, **kwargs)
34 | 
35 |     def calc_torque(self, err1, err2, err3, vel1, vel2, vel3):
36 |         with torch.inference_mode():
37 |             X = np.array(
38 |                 (err1, err2, err3, vel1, vel2, vel3),
39 |                 dtype=np.float32
40 |             ).transpose()
41 |             try:
42 |                 Y = self(torch.as_tensor(X, device=self.device))
43 |             except RuntimeError:
44 |                 self.device = next(self.parameters()).device
45 |                 Y = self(torch.as_tensor(X, device=self.device))
46 |             return Y.double().squeeze().cpu().numpy()
47 | 


--------------------------------------------------------------------------------
/qdpgym/sim/common/motor.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | from typing import Iterable
  3 | 
  4 | import numpy as np
  5 | import torch
  6 | 
  7 | from qdpgym.sim.common.identify import ActuatorNet
  8 | from qdpgym.utils import get_padded, replace_is
  9 | 
 10 | 
 11 | class MotorSim(object):
 12 |     def __init__(self, frequency):
 13 |         self._freq = frequency
 14 |         self._observe_done = False
 15 |         self._pos = self._vel = 0.
 16 |         self._residue = None
 17 | 
 18 |         self._joint_lower = self._joint_upper = None
 19 |         self._torque_limits = None
 20 |         self._cmd_clip = None
 21 |         self._input_delay = self._output_delay = 0
 22 | 
 23 |         self._torque_history = collections.deque(maxlen=11)
 24 |         self._residue_history = collections.deque(maxlen=11)
 25 |         self._obs_history = collections.deque(maxlen=11)
 26 | 
 27 |     def set_latency(self, input_delay: int = 0, output_delay: int = 0):
 28 |         self._input_delay = input_delay
 29 |         self._output_delay = output_delay
 30 | 
 31 |     def set_joint_limits(self, lower: Iterable = None, upper: Iterable = None):
 32 |         lower, upper = list(lower), list(upper)
 33 |         if lower is not None and None in lower:
 34 |             replace_is(lower, None, -np.inf)
 35 |         if upper is not None and None in upper:
 36 |             replace_is(upper, None, np.inf)
 37 |         self._joint_lower = np.array(lower)
 38 |         self._joint_upper = np.array(upper)
 39 | 
 40 |     def set_torque_limits(self, upper):
 41 |         self._torque_limits = np.asarray(upper)
 42 | 
 43 |     def set_cmd_clip(self, upper):
 44 |         self._cmd_clip = np.asarray(upper)
 45 | 
 46 |     def reset(self):
 47 |         self._observe_done = False
 48 |         self._torque_history.clear()
 49 |         self._residue_history.clear()
 50 | 
 51 |     def update_observation(self, pos, vel):
 52 |         self._observe_done = True
 53 |         self._obs_history.append((np.array(pos), np.array(vel)))
 54 |         self._pos, self._vel = get_padded(self._obs_history, -self._input_delay - 1)
 55 | 
 56 |     def apply_hybrid(self, des_pos, ff_torque):
 57 |         if self._joint_lower is not None or self._joint_upper is not None:
 58 |             des_pos = np.clip(des_pos, self._joint_lower, self._joint_upper)
 59 |         self._residue = des_pos - self._pos
 60 |         if self._cmd_clip:
 61 |             self._residue = np.clip(self._residue, -self._cmd_clip, self._cmd_clip)
 62 |         self._residue_history.append(self._residue)
 63 |         return self.apply_torque(self.calc_torque() + ff_torque)
 64 | 
 65 |     def apply_position(self, des_pos):
 66 |         return self.apply_hybrid(des_pos, 0)
 67 | 
 68 |     def apply_torque(self, des_torque):
 69 |         assert self._observe_done, 'Update observation before executing a command'
 70 |         if self._torque_limits is not None:
 71 |             des_torque = np.clip(des_torque, -self._torque_limits, self._torque_limits)
 72 |         self._torque_history.append(des_torque)
 73 |         self._observe_done = False
 74 |         return get_padded(self._torque_history, -self._output_delay - 1)
 75 | 
 76 |     def calc_torque(self):
 77 |         raise NotImplementedError
 78 | 
 79 | 
 80 | class PdMotorSim(MotorSim):
 81 |     def __init__(self, frequency, kp, kd):
 82 |         super().__init__(frequency)
 83 |         self._kp, self._kd = np.asarray(kp), np.asarray(kd)
 84 |         self._kp_part, self._kd_part = 0., 0.
 85 | 
 86 |     def calc_torque(self):
 87 |         self._kp_part, self._kd_part = self._kp * self._residue, self._kd * self._vel
 88 |         return self._kp_part - self._kd_part
 89 | 
 90 | 
 91 | class ActuatorNetSim(MotorSim):
 92 |     def __init__(self, frequency, net=None):
 93 |         super().__init__(frequency)
 94 |         self._net = net
 95 | 
 96 |     def load_params(self, model_path, device='cpu'):
 97 |         model_info = torch.load(model_path, map_location=device)
 98 |         self._net = ActuatorNet(hidden_dims=model_info['hidden_dims']).to(device)
 99 |         self._net.load_state_dict(model_info['model'])
100 | 
101 |     def calc_torque(self):
102 |         return self._net.calc_torque(
103 |             self._residue,
104 |             get_padded(self._residue_history, -4),
105 |             get_padded(self._residue_history, -7),
106 |             self._vel,
107 |             get_padded(self._obs_history, -4)[1],
108 |             get_padded(self._obs_history, -7)[1]
109 |         )
110 | 


--------------------------------------------------------------------------------
/qdpgym/sim/common/noisyhandle.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | from typing import Deque, Optional
  3 | 
  4 | import numpy as np
  5 | 
  6 | from qdpgym import tf, utils as ut
  7 | from qdpgym.sim.abc import Quadruped, QuadrupedHandle, Snapshot
  8 | 
  9 | 
 10 | class NoisyHandle(QuadrupedHandle):
 11 |     def __init__(self, robot: Quadruped, frequency, latency=0):
 12 |         self._robot = robot
 13 |         self._freq = frequency
 14 |         self._latency = latency
 15 | 
 16 |         self._obs: Optional[Snapshot] = None
 17 |         self._obs_buffer: Deque[Snapshot] = collections.deque(maxlen=20)
 18 |         self._obs_history: Deque[Snapshot] = collections.deque(maxlen=100)
 19 | 
 20 |     @property
 21 |     def raw(self):
 22 |         return self._robot
 23 | 
 24 |     @property
 25 |     def latency(self):
 26 |         return self._latency
 27 | 
 28 |     @latency.setter
 29 |     def latency(self, value):
 30 |         self._latency = value
 31 | 
 32 |     @property
 33 |     def obs_history(self):
 34 |         return ut.PadWrapper(self._obs_history)
 35 | 
 36 |     @property
 37 |     def cmd_history(self):
 38 |         return self._robot.cmd_history
 39 | 
 40 |     @property
 41 |     def not_delayed(self):
 42 |         if self._obs_buffer:
 43 |             return self._obs_buffer[-1]
 44 |         return self._obs
 45 | 
 46 |     def update_observation(
 47 |         self,
 48 |         state: Snapshot,
 49 |         random_gen: np.random.Generator
 50 |     ):
 51 |         """Add noise on observation"""
 52 |         add_noise = random_gen.normal
 53 |         obs = Snapshot(
 54 |             rpy=add_noise(state.rpy, 1e-2),
 55 |             velocimeter=add_noise(state.velocimeter, 5e-2),
 56 |             gyro=add_noise(state.gyro, 5e-2),
 57 |             joint_pos=add_noise(state.joint_pos, 5e-3),
 58 |             joint_vel=add_noise(state.joint_vel, 1e-1)
 59 |         )
 60 |         obs.orientation = tf.Quaternion.from_rpy(obs.rpy)
 61 |         self._obs_buffer.append(obs)
 62 |         if len(self._obs_buffer) <= self._latency * self._freq:
 63 |             self._obs = self._obs_buffer[0]
 64 |         else:
 65 |             self._obs = self._obs_buffer.popleft()
 66 |         self._obs_history.append(self._obs)
 67 | 
 68 |     def get_base_pos(self) -> np.ndarray:
 69 |         return self._obs.position
 70 | 
 71 |     def get_base_orn(self) -> np.ndarray:
 72 |         return self._obs.orientation
 73 | 
 74 |     def get_base_rot(self) -> np.ndarray:
 75 |         return tf.Rotation.from_quaternion(self._obs.orientation)
 76 | 
 77 |     def get_base_rpy(self) -> np.ndarray:
 78 |         return self._obs.rpy
 79 | 
 80 |     def get_base_rpy_rate(self) -> np.ndarray:
 81 |         return tf.get_rpy_rate_from_ang_vel(self._obs.rpy, self._obs.angular_vel)
 82 | 
 83 |     def get_base_lin(self) -> np.ndarray:
 84 |         return self._obs.linear_vel
 85 | 
 86 |     def get_base_ang(self) -> np.ndarray:
 87 |         return self._obs.angular_vel
 88 | 
 89 |     def get_joint_pos(self) -> np.ndarray:
 90 |         return self._obs.joint_pos
 91 | 
 92 |     def get_joint_vel(self) -> np.ndarray:
 93 |         return self._obs.joint_vel
 94 | 
 95 |     def get_velocimeter(self):
 96 |         return self._obs.velocimeter
 97 | 
 98 |     def get_gyro(self):
 99 |         return self._obs.gyro
100 | 
101 |     def get_accelerometer(self):
102 |         return self._obs.accelerometer
103 | 
104 |     def get_torso_contact(self):
105 |         return self._robot.get_torso_contact()
106 | 
107 |     def get_leg_contacts(self):
108 |         return self._robot.get_leg_contacts()
109 | 
110 |     def get_last_torque(self) -> np.ndarray:
111 |         return self._robot.get_last_torque()
112 | 
113 |     def get_state_history(self, latency):
114 |         return ut.get_padded(self._obs_history, -int(latency * self._freq) - 1)
115 | 
116 |     def get_cmd_history(self, latency):
117 |         return self._robot.get_cmd_history(latency)
118 | 
119 |     def get_foot_contacts(self):
120 |         return self._robot.get_foot_contacts()
121 | 
122 |     def get_contact_forces(self):
123 |         return self._robot.get_contact_forces()
124 | 
125 |     def get_force_sensor(self):
126 |         return self._robot.get_force_sensor()
127 | 
128 |     def get_last_command(self) -> np.ndarray:
129 |         return self._robot.get_last_command()
130 | 
131 |     def reset(self):
132 |         self._obs = None
133 |         self._obs_buffer.clear()
134 |         self._obs_history.clear()
135 | 


--------------------------------------------------------------------------------
/qdpgym/sim/common/tg.py:
--------------------------------------------------------------------------------
  1 | import enum
  2 | import random
  3 | 
  4 | import numpy as np
  5 | 
  6 | from qdpgym.utils import Angle
  7 | 
  8 | 
  9 | def power(x, exp):
 10 |     _x = 1
 11 |     for _ in range(exp + 1):
 12 |         yield _x
 13 |         _x *= x
 14 | 
 15 | 
 16 | def vertical_tg(h=0.12):  # 0.2 in paper
 17 |     coeff1 = np.array((0., 0., 3., -2.)) * h
 18 |     coeff2 = np.array((-4., 12., -9., 2.)) * h
 19 | 
 20 |     def _tg(phases):
 21 |         priori = []
 22 |         for phi in phases:
 23 |             k = phi * 2 / np.pi
 24 |             if k <= 0 or k >= 2:
 25 |                 priori.append(np.zeros(3))
 26 |             k_pow = list(power(k, 3))
 27 |             if 0 < k <= 1:
 28 |                 priori.append((0., 0., coeff1 @ k_pow))
 29 |             if 1 < k < 2:
 30 |                 priori.append((0., 0., coeff2 @ k_pow))
 31 |         return np.array(priori)
 32 | 
 33 |     return _tg
 34 | 
 35 | 
 36 | class PhaseRoller(object):
 37 |     TROT = (0.0, -np.pi, -np.pi, 0.0)
 38 |     WALK = (0.0, -np.pi / 2, -np.pi, -np.pi * 3 / 2)
 39 |     PACE = (0.0, -np.pi, 0.0, -np.pi)
 40 |     base_frequency = 2.0  # TODO: COMPLETE THE STATE MACHINE
 41 | 
 42 |     class InitType(enum.IntEnum):
 43 |         FIXED = 0
 44 |         SYMMETRIC = 1
 45 |         RANDOM = 2
 46 | 
 47 |     def __init__(self, time_step, random_state, init_type='random'):
 48 |         self._time_step = time_step
 49 |         self._random = random_state
 50 |         self._init_type = self._check_init_type(init_type)
 51 | 
 52 |         self._init_phases = self._get_init_phases()
 53 |         self._phases = self._init_phases.copy()
 54 |         self._frequency = np.ones(4) * self.base_frequency
 55 |         self._cycles = np.zeros(4)
 56 | 
 57 |     @staticmethod
 58 |     def symmetric(phases):
 59 |         fr, fl, rr, rl = phases
 60 |         return fl, fr, rl, rr
 61 | 
 62 |     def random_symmetric(self, phases):
 63 |         return phases if self._random.random() > 0.5 else self.symmetric(phases)
 64 | 
 65 |     def _check_init_type(self, init_type):
 66 |         if init_type == 'fixed':
 67 |             return self.InitType.FIXED
 68 |         elif init_type == 'symmetric':
 69 |             return self.InitType.SYMMETRIC
 70 |         elif init_type == 'random':
 71 |             return self.InitType.RANDOM
 72 |         else:
 73 |             raise RuntimeError(f'Unknown TG Init Mode {init_type}')
 74 | 
 75 |     def _get_init_phases(self):
 76 |         if self._init_type == self.InitType.FIXED:
 77 |             return np.array(self.TROT, dtype=np.float32)
 78 |         elif self._init_type == self.InitType.SYMMETRIC:
 79 |             return np.array(self.random_symmetric(self.TROT), dtype=np.float32)
 80 |         elif self._init_type == self.InitType.RANDOM:
 81 |             return self._random.uniform(-np.pi, np.pi, 4).astype(np.float32)
 82 | 
 83 |     @property
 84 |     def phases(self):
 85 |         return self._phases
 86 | 
 87 |     @property
 88 |     def frequency(self):
 89 |         return self._frequency
 90 | 
 91 |     @property
 92 |     def cycles(self):
 93 |         return self._cycles
 94 | 
 95 |     def reset(self):
 96 |         self._init_phases = self._get_init_phases()
 97 |         self._phases = self._init_phases.copy()
 98 |         self._frequency = np.ones(4) * self.base_frequency
 99 |         self._cycles = np.zeros(4)
100 | 
101 |     def update(self):
102 |         _phases = self._phases.copy()
103 |         self._phases += self._frequency * self._time_step * 2 * np.pi
104 |         flags = np.logical_and(Angle.norm(_phases - self._init_phases) < 0,
105 |                                Angle.norm(self._phases - self._init_phases) >= 0)
106 |         self._cycles[(flags.nonzero(),)] += 1
107 |         self._phases = Angle.norm(self._phases)
108 |         return self._phases
109 | 
110 | 
111 | class TgStateMachine(PhaseRoller):
112 |     def __init__(
113 |         self,
114 |         time_step,
115 |         random_state,
116 |         traj_gen,
117 |         init_type='symmetric',
118 |         freq_bounds=(0.5, 3.0)
119 |     ):
120 |         super().__init__(time_step, random_state, init_type)
121 |         self._freq_bounds = freq_bounds
122 |         self._traj_gen = traj_gen
123 | 
124 |     def update(self, frequency_offsets=None):  # FIXME: how freq_offsets works?
125 |         if frequency_offsets is not None:
126 |             frequency_offsets = np.asarray(frequency_offsets)
127 |             self._frequency = self.base_frequency + frequency_offsets
128 |             self._frequency = np.clip(self._frequency, *self._freq_bounds)
129 |         return super().update()
130 | 
131 |     def get_priori_trajectory(self):
132 |         return np.asarray(self._traj_gen(self._phases))
133 | 


--------------------------------------------------------------------------------
/qdpgym/sim/mjc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/sim/mjc/__init__.py


--------------------------------------------------------------------------------
/qdpgym/sim/mjc/env.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | from typing import Optional, Tuple, Any, Union
  3 | 
  4 | import gym
  5 | import numpy as np
  6 | from dm_control import mjcf
  7 | 
  8 | from qdpgym.sim.abc import Task, Environment, ARRAY_LIKE
  9 | from .quadruped import Aliengo
 10 | from .terrain import TerrainBase
 11 | from qdpgym.utils import PadWrapper, tf
 12 | 
 13 | 
 14 | class QuadrupedEnv(Environment):
 15 |     _observation_space = None
 16 |     _action_space = None
 17 | 
 18 |     def __init__(
 19 |         self,
 20 |         robot: Aliengo,
 21 |         arena: TerrainBase,
 22 |         task: Task,
 23 |         timestep=2e-3,
 24 |         time_limit: float = None,
 25 |         num_substeps: int = 10,
 26 |         identifier: str = None,
 27 |     ):
 28 |         self._robot = robot
 29 |         self._arena = arena
 30 |         self._task = task
 31 |         self._timestep = timestep
 32 |         self._time_limit = time_limit
 33 |         self._num_substeps = num_substeps
 34 |         self._identifier = identifier or f'{hex(id(self))[-7:]}'
 35 |         self._step_freq = 1 / (self.timestep * self._num_substeps)
 36 | 
 37 |         self._init = False
 38 |         self._elapsed_sim_steps = 0
 39 | 
 40 |         self._arena.mjcf_model.option.timestep = timestep
 41 |         self._physics: Optional[mjcf.Physics] = None
 42 |         self._task.register_env(self._robot, self)
 43 |         self._action_history = collections.deque(maxlen=10)
 44 |         self._perturbation = None
 45 | 
 46 |         self._interact_terrain_samples = []
 47 |         self._interact_terrain_normal: Optional[np.ndarray] = None
 48 |         self._interact_terrain_height = 0.0
 49 | 
 50 |     @property
 51 |     def observation_space(self) -> gym.Space:
 52 |         if self._observation_space is None:
 53 |             if hasattr(self._task, 'observation_space'):
 54 |                 self._observation_space = self._task.observation_space
 55 |             else:
 56 |                 self._observation_space = gym.Space()
 57 |         return self._observation_space
 58 | 
 59 |     @property
 60 |     def action_space(self) -> gym.Space:
 61 |         if self._action_space is None:
 62 |             if hasattr(self._task, 'action_space'):
 63 |                 self._action_space = self._task.action_space
 64 |             else:
 65 |                 self._action_space = gym.Space((12,), float)
 66 |         return self._action_space
 67 | 
 68 |     def reset(
 69 |         self,
 70 |         seed: Optional[int] = None,
 71 |         return_info: bool = False,
 72 |         options: Optional[dict] = None,
 73 |     ) -> Union[Any, Tuple[Any, dict]]:
 74 | 
 75 |         super().reset(seed=seed, return_info=return_info, options=options)
 76 | 
 77 |         self._init = True
 78 |         self._elapsed_sim_steps = 0
 79 |         self._action_history.clear()
 80 | 
 81 |         self._robot.add_to(self._arena)
 82 |         self._robot.init_mjcf_model(self.np_random)
 83 |         self._physics = mjcf.Physics.from_mjcf_model(self._arena.mjcf_model)
 84 | 
 85 |         self._task.init_episode()
 86 |         self._robot.init_physics(self._physics, self.np_random)
 87 | 
 88 |         for i in range(50):
 89 |             self._robot.update_observation(None, minimal=True)
 90 |             self._robot.apply_command(self._robot.STANCE_CONFIG)
 91 |             self._physics.step()
 92 | 
 93 |         self._action_history.append(np.array(self._robot.STANCE_CONFIG))
 94 |         self._physics.data.ptr.time = 0.
 95 |         self._robot.update_observation(self.np_random)
 96 | 
 97 |         return (
 98 |             self._task.get_observation()
 99 |             # if return_info else (self._task.get_observation(), {})
100 |         )
101 | 
102 |     def close(self):
103 |         pass
104 | 
105 |     @property
106 |     def robot(self):
107 |         return self._robot
108 | 
109 |     @property
110 |     def physics(self):
111 |         return self._physics
112 | 
113 |     @property
114 |     def arena(self):
115 |         return self._arena
116 | 
117 |     @arena.setter
118 |     def arena(self, value):
119 |         self._init = False
120 |         self._arena = value
121 |         self._robot.add_to(self._arena)
122 | 
123 |     @property
124 |     def action_history(self):
125 |         return PadWrapper(self._action_history)
126 | 
127 |     @property
128 |     def sim_time(self):
129 |         return self._physics.time()
130 | 
131 |     @property
132 |     def timestep(self):
133 |         return self._timestep
134 | 
135 |     @property
136 |     def num_substeps(self):
137 |         return self._num_substeps
138 | 
139 |     @property
140 |     def identifier(self) -> str:
141 |         return self._identifier
142 | 
143 |     def step(self, action: ARRAY_LIKE) -> Tuple[Any, float, bool, dict]:
144 |         assert self._init, 'Call `init_episode` before `step`!'
145 |         action = self._task.before_step(action)
146 |         action = np.asarray(action)
147 |         prev_action = self._action_history[-1]
148 |         self._action_history.append(action)
149 | 
150 |         for i in range(self._num_substeps):
151 |             weight = (i + 1) / self._num_substeps
152 |             current_action = action * weight + prev_action * (1 - weight)
153 |             self._robot.apply_command(current_action)
154 |             self._task.before_substep()
155 | 
156 |             self._apply_perturbation()
157 |             self._physics.step()
158 |             self._elapsed_sim_steps += 1
159 |             self._update_observation()
160 | 
161 |             self._task.after_substep()
162 |         self._task.after_step()
163 | 
164 |         done = False
165 |         info = {}
166 |         if self._task.is_failed():
167 |             done = True
168 |             self._task.on_fail()
169 |         elif ((self._time_limit is not None and self.sim_time >= self._time_limit)
170 |               or self._task.is_succeeded()):
171 |             done = True
172 |             info['TimeLimit.truncated'] = True
173 |             self._task.on_success()
174 | 
175 |         reward, reward_info = self._task.get_reward(True)
176 |         info['reward_info'] = reward_info
177 |         return (
178 |             self._task.get_observation(), reward, done, info
179 |         )
180 | 
181 |     def _update_observation(self):
182 |         self._robot.update_observation(self.np_random)
183 | 
184 |         self._interact_terrain_samples.clear()
185 |         self._interact_terrain_height = 0.
186 |         xy_points = self._robot.get_foot_pos()
187 |         for x, y, _ in xy_points:
188 |             h = self._arena.get_height(x, y)
189 |             self._interact_terrain_height += h
190 |             self._interact_terrain_samples.append((x, y, h))
191 |         self._interact_terrain_height /= 4
192 |         self._interact_terrain_normal = tf.estimate_normal(self._interact_terrain_samples)
193 | 
194 |     def get_action_rate(self) -> np.ndarray:
195 |         if len(self._action_history) < 2:
196 |             return np.zeros(12)
197 |         actions = [self._action_history[-i - 1] for i in range(2)]
198 |         return (actions[0] - actions[1]) * self._step_freq
199 | 
200 |     def get_action_accel(self) -> np.ndarray:
201 |         if len(self._action_history) < 3:
202 |             return np.zeros(12)
203 |         actions = [self._action_history[-i - 1] for i in range(3)]
204 |         return (actions[0] - 2 * actions[1] + actions[2]) * self._step_freq ** 2
205 | 
206 |     def get_relative_robot_height(self) -> float:
207 |         return self._robot.get_base_pos()[2] - self._interact_terrain_height
208 | 
209 |     def get_interact_terrain_normal(self) -> np.ndarray:
210 |         return self._interact_terrain_normal
211 | 
212 |     def get_interact_terrain_rot(self) -> np.ndarray:
213 |         return tf.Rotation.from_zaxis(self._interact_terrain_normal)
214 | 
215 |     def get_perturbation(self, in_robot_frame=False):
216 |         if self._perturbation is None:
217 |             return None
218 |         elif in_robot_frame:
219 |             rotation = self._robot.get_base_rot()
220 |             perturbation = np.concatenate(
221 |                 [rotation.T @ self._perturbation[i * 3:i * 3 + 3] for i in range(2)]
222 |             )
223 |         else:
224 |             perturbation = self._perturbation
225 |         return perturbation
226 | 
227 |     def set_perturbation(self, value=None):
228 |         self._perturbation = np.array(value)
229 | 
230 |     def _apply_perturbation(self):
231 |         if self._perturbation is not None:
232 |             self._physics.bind(self._robot.entity.root_body).xfrc_applied = self._perturbation
233 | 


--------------------------------------------------------------------------------
/qdpgym/sim/mjc/hooks.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | import mujoco as mjlib
 4 | 
 5 | from qdpgym import tf
 6 | from qdpgym.sim.abc import Hook
 7 | from qdpgym.sim.mjc.viewer import ViewerMj
 8 | 
 9 | 
10 | class ViewerHook(Hook):
11 |     def __init__(self):
12 |         self._viewer: Optional[ViewerMj] = None
13 | 
14 |     def init_episode(self, robot, env):
15 |         self._viewer = ViewerMj(env.physics.model.ptr, env.physics.data.ptr)
16 | 
17 |     def after_substep(self, robot, env):
18 |         perturb = env.get_perturbation()
19 |         if perturb is not None:
20 |             force = perturb[:3]
21 |             magnitude = tf.vnorm(force)
22 |             mat = tf.Rotation.from_zaxis(force / magnitude)
23 |             self._viewer.add_marker(
24 |                 type=mjlib.mjtGeom.mjGEOM_ARROW,
25 |                 pos=robot.get_base_pos(), mat=mat,
26 |                 size=[0.01, 0.01, magnitude / 20],
27 |                 rgba=(1., 0., 0., 1.)
28 |             )
29 |         self._viewer.render()
30 | 


--------------------------------------------------------------------------------
/qdpgym/sim/mjc/terrain.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | from typing import Union, Tuple
  3 | 
  4 | import numpy as np
  5 | from dm_control import composer
  6 | from scipy.interpolate import interp2d
  7 | 
  8 | from qdpgym.sim.abc import NUMERIC, Terrain
  9 | 
 10 | 
 11 | def _process_size(size):
 12 |     if isinstance(size, (int, float)):
 13 |         return size, size
 14 |     else:
 15 |         return tuple(size)
 16 | 
 17 | 
 18 | class TerrainBase(composer.Arena, Terrain, metaclass=abc.ABCMeta):
 19 |     pass
 20 | 
 21 | 
 22 | class TexturedTerrainBase(TerrainBase, metaclass=abc.ABCMeta):
 23 |     reflectance = 0.2
 24 | 
 25 |     def _build(self, size, name=None):
 26 |         super()._build(name=name)
 27 |         self._x_size, self._y_size = _process_size(size)
 28 | 
 29 |         self._mjcf_root.visual.headlight.set_attributes(
 30 |             ambient=[.4, .4, .4], diffuse=[.8, .8, .8], specular=[.1, .1, .1])
 31 | 
 32 |         self._ground_texture = self._mjcf_root.asset.add(
 33 |             'texture', name='groundplane', rgb1=[.2, .3, .4], rgb2=[.1, .2, .3],
 34 |             type='2d', builtin='checker', width=200, height=200,
 35 |             mark='edge', markrgb=[0.8, 0.8, 0.8])
 36 |         # Makes white squares exactly 1x1 length units.
 37 |         self._ground_material = self._mjcf_root.asset.add(
 38 |             'material', name='groundplane', texrepeat=[2, 2],
 39 |             texuniform=True, reflectance=self.reflectance, texture=self._ground_texture)
 40 | 
 41 |         self._terrain_geom = None
 42 | 
 43 |     @property
 44 |     def ground_geoms(self):
 45 |         return self._terrain_geom,
 46 | 
 47 |     def regenerate(self, random_state):
 48 |         pass
 49 | 
 50 |     @property
 51 |     def size(self):
 52 |         return self._x_size, self._y_size
 53 | 
 54 |     def out_of_range(self, x, y):
 55 |         return abs(x) > self._x_size / 2 - 1 or abs(y) > self._y_size / 2 - 1
 56 | 
 57 | 
 58 | class Plain(TexturedTerrainBase):
 59 |     def _build(self, size, name='groundplane'):
 60 |         super()._build(name=name, size=size)
 61 | 
 62 |         # Build groundplane.
 63 |         self._terrain_geom = self._mjcf_root.worldbody.add(
 64 |             'geom', type='plane', name='terrain', material=self._ground_material,
 65 |             size=[self._x_size / 2, self._x_size / 2, 0.25])
 66 |         self._terrain_geom.friction = [1, 0.005, 0.0001]
 67 | 
 68 |     def get_height(self, x, y):
 69 |         return 0.0
 70 | 
 71 |     def get_normal(self, x, y):
 72 |         return np.array((0., 0., 1.))
 73 | 
 74 | 
 75 | class Hills(TexturedTerrainBase):
 76 |     def _build(self, size, resol: float,
 77 |                *rough_dsp: Tuple[NUMERIC, NUMERIC],
 78 |                name='hills'):
 79 |         super()._build(name=name, size=size)
 80 |         self._resol = resol
 81 |         self._ncol = int(self._x_size / resol) + 1
 82 |         self._nrow = int(self._y_size / resol) + 1
 83 |         self._rough_dsp = rough_dsp
 84 | 
 85 |         self._mjcf_root.asset.add(
 86 |             'hfield', name=f'{name}_hfield',
 87 |             nrow=self._nrow, ncol=self._ncol,
 88 |             size=[self._x_size / 2, self._y_size / 2, 1, 0.1])
 89 |         self._terrain_geom = self._mjcf_root.worldbody.add(
 90 |             'geom', name='terrain', type='hfield', material=self._ground_material,
 91 |             pos=(0, 0, -0.01), hfield=f'{name}_hfield')
 92 | 
 93 |     def initialize_episode(self, physics, random_state: np.random.RandomState):
 94 |         physics.model.hfield_data[...] = self.regenerate(random_state)
 95 | 
 96 |     def regenerate(self, random_state):
 97 |         hfield_data = np.zeros((self._nrow, self._ncol))
 98 |         for rough, dsp in self._rough_dsp:
 99 |             x = np.arange(-3, int(self._ncol / dsp) + 4)
100 |             y = np.arange(-3, int(self._nrow / dsp) + 4)
101 |             hfield_dsp = random_state.uniform(0, rough, (x.size, y.size))
102 |             terrain_func = interp2d(x, y, hfield_dsp, kind='cubic')
103 |             x_usp = np.arange(self._ncol) / dsp
104 |             y_usp = np.arange(self._nrow) / dsp
105 |             hfield_data += terrain_func(x_usp, y_usp)
106 |         return hfield_data.reshape(-1)
107 | 


--------------------------------------------------------------------------------
/qdpgym/sim/resources/acnet_220526.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/sim/resources/acnet_220526.pt


--------------------------------------------------------------------------------
/qdpgym/sim/resources/actuator_net_with_history.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/sim/resources/actuator_net_with_history.pt


--------------------------------------------------------------------------------
/qdpgym/sim/resources/aliengo/meshes/calf.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/sim/resources/aliengo/meshes/calf.stl


--------------------------------------------------------------------------------
/qdpgym/sim/resources/aliengo/meshes/hip.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/sim/resources/aliengo/meshes/hip.stl


--------------------------------------------------------------------------------
/qdpgym/sim/resources/aliengo/meshes/thigh.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/sim/resources/aliengo/meshes/thigh.stl


--------------------------------------------------------------------------------
/qdpgym/sim/resources/aliengo/meshes/thigh_mirror.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/sim/resources/aliengo/meshes/thigh_mirror.stl


--------------------------------------------------------------------------------
/qdpgym/sim/resources/aliengo/meshes/trunk.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/sim/resources/aliengo/meshes/trunk.stl


--------------------------------------------------------------------------------
/qdpgym/sim/resources/aliengo/model/aliengo.xml:
--------------------------------------------------------------------------------
  1 | <mujoco model="aliengo">
  2 |     <compiler angle="radian" meshdir="../meshes/"/>
  3 |     <size njmax="500" nconmax="100"/>
  4 |     <option integrator="Euler"/>
  5 |     <asset>
  6 |         <mesh name="trunk" file="trunk.stl"/>
  7 |         <mesh name="hip" file="hip.stl"/>
  8 |         <mesh name="thigh_mirror" file="thigh_mirror.stl"/>
  9 |         <mesh name="calf" file="calf.stl"/>
 10 |         <mesh name="thigh" file="thigh.stl"/>
 11 |     </asset>
 12 |     <worldbody>
 13 |         <body name="torso">
 14 |             <camera name="egocentric" pos=".25 0 .11" xyaxes="0 -1 0 0 0 1" fovy="90"/>
 15 |             <camera name="side_cam" pos="0 -1 0.1" mode="track" xyaxes="1 0 0 0 0 1" fovy="90"/>
 16 |             <camera name="back_cam" pos="-1.5 0 0" mode="track" xyaxes="0 -1 0 0 0 1" fovy="90"/>
 17 |             <inertial pos="0.008465 0.004045 -0.000763" quat="-2.12791e-05 0.708321 0.00248043 0.705886" mass="9.041"
 18 |                       diaginertia="0.174606 0.161174 0.033257"/>
 19 |             <geom type="mesh" contype="0" conaffinity="0" group="1" rgba="0 0 0 0.6" mesh="trunk"/>
 20 |             <geom size="0.3235 0.075 0.056" type="box" rgba="0 0 0 0"/>
 21 |             <site name="torso_sensor_site" group="4" size="0.001 0.001 0.001"/>
 22 |             <body name="FR_hip" pos="0.2399 -0.051 0">
 23 |                 <inertial pos="-0.022191 -0.015144 -1.5e-05" quat="0.0135493 0.706835 -0.0117635 0.707151" mass="1.993"
 24 |                           diaginertia="0.00558695 0.00491009 0.00290132"/>
 25 |                 <joint name="FR_hip_joint" pos="0 0 0" axis="1 0 0" limited="true" range="-1.22173 1.22173"/>
 26 |                 <geom quat="0 1 0 0" type="mesh" contype="0" conaffinity="0" group="1" rgba="1 0.423529 0.0392157 0.6"
 27 |                       mesh="hip"/>
 28 |                 <geom size="0.046 0.0209" pos="0 -0.083 0" quat="0.707107 0.707107 0 0" type="cylinder"
 29 |                       rgba="0 0 0 0"/>
 30 |                 <body name="FR_thigh" pos="0 -0.083 0">
 31 |                     <inertial pos="-0.005607 0.003877 -0.048199" quat="0.694544 -0.0335669 -0.031246 0.717987"
 32 |                               mass="0.639" diaginertia="0.0058474 0.00571186 0.000324584"/>
 33 |                     <joint name="FR_thigh_joint" pos="0 0 0" axis="0 1 0"/>
 34 |                     <geom type="mesh" contype="0" conaffinity="0" group="1" rgba="1 0.423529 0.0392157 0.6"
 35 |                           mesh="thigh_mirror"/>
 36 |                     <geom size="0.125 0.0187 0.0215" pos="0 0 -0.125" quat="0.707107 0 0.707107 0" type="box"
 37 |                           rgba="0 0 0 0"/>
 38 |                     <body name="FR_calf" pos="0 0 -0.25">
 39 |                         <inertial pos="0.00215606 4.88427e-05 -0.166671" quat="0.707462 0.00517582 0.00535172 0.706712"
 40 |                                   mass="0.267" diaginertia="0.00690975 0.00689712 5.4885e-05"/>
 41 |                         <joint name="FR_calf_joint" pos="0 0 0" axis="0 1 0" limited="true" range="-2.77507 -0.645772"/>
 42 |                         <geom type="mesh" contype="0" conaffinity="0" group="1" rgba="1 0.423529 0.0392157 0.6"
 43 |                               mesh="calf"/>
 44 |                         <geom size="0.125 0.0104 0.008" pos="0 0 -0.125" quat="0.707107 0 0.707107 0" type="box"
 45 |                               rgba="0 0 0 0"/>
 46 |                         <body name="FR_foot" pos="0 0 -0.25">
 47 |                             <geom size="0.0165" pos="0 0 0" contype="0" conaffinity="0" group="1"
 48 |                                   rgba="0 0 0 0"/>
 49 |                             <geom size="0.0265" pos="0 0 0" rgba="0 0 0 0"/>
 50 |                         </body>
 51 |                     </body>
 52 |                 </body>
 53 |             </body>
 54 |             <body name="FL_hip" pos="0.2399 0.051 0">
 55 |                 <inertial pos="-0.022191 0.015144 -1.5e-05" quat="-0.0135493 0.706835 0.0117635 0.707151" mass="1.993"
 56 |                           diaginertia="0.00558695 0.00491009 0.00290132"/>
 57 |                 <joint name="FL_hip_joint" pos="0 0 0" axis="1 0 0" limited="true" range="-1.22173 1.22173"/>
 58 |                 <geom type="mesh" contype="0" conaffinity="0" group="1" rgba="1 0.423529 0.0392157 0.6" mesh="hip"/>
 59 |                 <geom size="0.046 0.0209" pos="0 0.083 0" quat="0.707107 0.707107 0 0" type="cylinder"
 60 |                       rgba="0 0 0 0"/>
 61 |                 <body name="FL_thigh" pos="0 0.083 0">
 62 |                     <inertial pos="-0.005607 -0.003877 -0.048199" quat="0.717987 -0.031246 -0.0335669 0.694544"
 63 |                               mass="0.639"
 64 |                               diaginertia="0.0058474 0.00571186 0.000324584"/>
 65 |                     <joint name="FL_thigh_joint" pos="0 0 0" axis="0 1 0"/>
 66 |                     <geom type="mesh" contype="0" conaffinity="0" group="1" rgba="1 0.423529 0.0392157 0.6"
 67 |                           mesh="thigh"/>
 68 |                     <geom size="0.125 0.0187 0.0215" pos="0 0 -0.125" quat="0.707107 0 0.707107 0" type="box"
 69 |                           rgba="0 0 0 0"/>
 70 |                     <body name="FL_calf" pos="0 0 -0.25">
 71 |                         <inertial pos="0.00215606 4.88427e-05 -0.166671" quat="0.707462 0.00517582 0.00535172 0.706712"
 72 |                                   mass="0.267" diaginertia="0.00690975 0.00689712 5.4885e-05"/>
 73 |                         <joint name="FL_calf_joint" pos="0 0 0" axis="0 1 0" limited="true" range="-2.77507 -0.645772"/>
 74 |                         <geom type="mesh" contype="0" conaffinity="0" group="1" rgba="1 0.423529 0.0392157 0.6"
 75 |                               mesh="calf"/>
 76 |                         <geom size="0.125 0.0104 0.008" pos="0 0 -0.125" quat="0.707107 0 0.707107 0" type="box"
 77 |                               rgba="0 0 0 0"/>
 78 |                         <body name="FL_foot" pos="0 0 -0.25">
 79 |                             <geom size="0.0165" pos="0 0 0" contype="0" conaffinity="0" group="1"
 80 |                                   rgba="0 0 0 0"/>
 81 |                             <geom size="0.0265" pos="0 0 0" rgba="0 0 0 0"/>
 82 |                         </body>
 83 |                     </body>
 84 |                 </body>
 85 |             </body>
 86 |             <body name="RR_hip" pos="-0.2399 -0.051 0">
 87 |                 <inertial pos="0.022191 -0.015144 -1.5e-05" quat="-0.0117635 0.707151 0.0135493 0.706835" mass="1.993"
 88 |                           diaginertia="0.00558695 0.00491009 0.00290132"/>
 89 |                 <joint name="RR_hip_joint" pos="0 0 0" axis="1 0 0" limited="true" range="-1.22173 1.22173"/>
 90 |                 <geom quat="0 0 0 -1" type="mesh" contype="0" conaffinity="0" group="1" rgba="1 0.423529 0.0392157 0.6"
 91 |                       mesh="hip"/>
 92 |                 <geom size="0.046 0.0209" pos="0 -0.083 0" quat="0.707107 0.707107 0 0" type="cylinder"
 93 |                       rgba="0 0 0 0"/>
 94 |                 <body name="RR_thigh" pos="0 -0.083 0">
 95 |                     <inertial pos="-0.005607 0.003877 -0.048199" quat="0.694544 -0.0335669 -0.031246 0.717987"
 96 |                               mass="0.639"
 97 |                               diaginertia="0.0058474 0.00571186 0.000324584"/>
 98 |                     <joint name="RR_thigh_joint" pos="0 0 0" axis="0 1 0"/>
 99 |                     <geom type="mesh" contype="0" conaffinity="0" group="1" rgba="1 0.423529 0.0392157 0.6"
100 |                           mesh="thigh_mirror"/>
101 |                     <geom size="0.125 0.0187 0.0215" pos="0 0 -0.125" quat="0.707107 0 0.707107 0" type="box"
102 |                           rgba="0 0 0 0"/>
103 |                     <body name="RR_calf" pos="0 0 -0.25">
104 |                         <inertial pos="0.00215606 4.88427e-05 -0.166671" quat="0.707462 0.00517582 0.00535172 0.706712"
105 |                                   mass="0.267" diaginertia="0.00690975 0.00689712 5.4885e-05"/>
106 |                         <joint name="RR_calf_joint" pos="0 0 0" axis="0 1 0" limited="true" range="-2.77507 -0.645772"/>
107 |                         <geom type="mesh" contype="0" conaffinity="0" group="1" rgba="1 0.423529 0.0392157 0.6"
108 |                               mesh="calf"/>
109 |                         <geom size="0.125 0.0104 0.008" pos="0 0 -0.125" quat="0.707107 0 0.707107 0" type="box"
110 |                               rgba="0 0 0 0"/>
111 |                         <body name="RR_foot" pos="0 0 -0.25">
112 |                             <geom size="0.0165" pos="0 0 0" contype="0" conaffinity="0" group="1"
113 |                                   rgba="0 0 0 0"/>
114 |                             <geom size="0.0265" pos="0 0 0" rgba="0 0 0 0"/>
115 |                         </body>
116 |                     </body>
117 |                 </body>
118 |             </body>
119 |             <body name="RL_hip" pos="-0.2399 0.051 0">
120 |                 <inertial pos="0.022191 0.015144 -1.5e-05" quat="0.0117635 0.707151 -0.0135493 0.706835" mass="1.993"
121 |                           diaginertia="0.00558695 0.00491009 0.00290132"/>
122 |                 <joint name="RL_hip_joint" pos="0 0 0" axis="1 0 0" limited="true" range="-1.22173 1.22173"/>
123 |                 <geom quat="0 0 1 0" type="mesh" contype="0" conaffinity="0" group="1" rgba="1 0.423529 0.0392157 0.6"
124 |                       mesh="hip"/>
125 |                 <geom size="0.046 0.0209" pos="0 0.083 0" quat="0.707107 0.707107 0 0" type="cylinder"
126 |                       rgba="0 0 0 0"/>
127 |                 <body name="RL_thigh" pos="0 0.083 0">
128 |                     <inertial pos="-0.005607 -0.003877 -0.048199" quat="0.717987 -0.031246 -0.0335669 0.694544"
129 |                               mass="0.639" diaginertia="0.0058474 0.00571186 0.000324584"/>
130 |                     <joint name="RL_thigh_joint" pos="0 0 0" axis="0 1 0"/>
131 |                     <geom type="mesh" contype="0" conaffinity="0" group="1" rgba="1 0.423529 0.0392157 0.6"
132 |                           mesh="thigh"/>
133 |                     <geom size="0.125 0.0187 0.0215" pos="0 0 -0.125" quat="0.707107 0 0.707107 0" type="box"
134 |                           rgba="0 0 0 0"/>
135 |                     <body name="RL_calf" pos="0 0 -0.25">
136 |                         <inertial pos="0.00215606 4.88427e-05 -0.166671" quat="0.707462 0.00517582 0.00535172 0.706712"
137 |                                   mass="0.267" diaginertia="0.00690975 0.00689712 5.4885e-05"/>
138 |                         <joint name="RL_calf_joint" pos="0 0 0" axis="0 1 0" limited="true" range="-2.77507 -0.645772"/>
139 |                         <geom type="mesh" contype="0" conaffinity="0" group="1" rgba="1 0.423529 0.0392157 0.6"
140 |                               mesh="calf"/>
141 |                         <geom size="0.125 0.0104 0.008" pos="0 0 -0.125" quat="0.707107 0 0.707107 0" type="box"
142 |                               rgba="0 0 0 0"/>
143 |                         <body name="RL_foot" pos="0 0 -0.25">
144 |                             <geom size="0.0165" pos="0 0 0" contype="0" conaffinity="0" group="1"
145 |                                   rgba="0 0 0 0"/>
146 |                             <geom size="0.0265" pos="0 0 0" rgba="0 0 0 0"/>
147 |                         </body>
148 |                     </body>
149 |                 </body>
150 |             </body>
151 |         </body>
152 |     </worldbody>
153 |     <actuator>
154 |         <motor name="FR_hip_joint" joint="FR_hip_joint"/>
155 |         <motor name="FR_thigh_joint" joint="FR_thigh_joint"/>
156 |         <motor name="FR_calf_joint" joint="FR_calf_joint"/>
157 |         <motor name="FL_hip_joint" joint="FL_hip_joint"/>
158 |         <motor name="FL_thigh_joint" joint="FL_thigh_joint"/>
159 |         <motor name="FL_calf_joint" joint="FL_calf_joint"/>
160 |         <motor name="RR_hip_joint" joint="RR_hip_joint"/>
161 |         <motor name="RR_thigh_joint" joint="RR_thigh_joint"/>
162 |         <motor name="RR_calf_joint" joint="RR_calf_joint"/>
163 |         <motor name="RL_hip_joint" joint="RL_hip_joint"/>
164 |         <motor name="RL_thigh_joint" joint="RL_thigh_joint"/>
165 |         <motor name="RL_calf_joint" joint="RL_calf_joint"/>
166 |     </actuator>
167 |     <sensor>
168 |         <accelerometer name="torso_accm" site="torso_sensor_site"/>
169 |         <gyro name="torso_gyro" site="torso_sensor_site"/>
170 |         <velocimeter name="torso_velm" site="torso_sensor_site"/>
171 |     </sensor>
172 | </mujoco>
173 | 


--------------------------------------------------------------------------------
/qdpgym/sim/task.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional, Callable, List, Any
  2 | 
  3 | from .abc import Task, Hook, Quadruped, Environment
  4 | 
  5 | 
  6 | class NullTask(Task):
  7 |     """The Simplest Task Structure"""
  8 | 
  9 |     def __init__(self):
 10 |         self._robot: Optional[Quadruped] = None
 11 |         self._env: Optional[Environment] = None
 12 |         self._hooks: List[Hook] = []
 13 |         self._hook_names = {}
 14 |         self._options = []
 15 | 
 16 |     def register_env(self, robot, env):
 17 |         self._robot = robot
 18 |         self._env = env
 19 |         for hook in self._hooks:
 20 |             hook.initialize(self._robot, self._env)
 21 | 
 22 |     def add_hook(self, hook: Hook, name=None):
 23 |         if name is None:
 24 |             name = hook.__class__.__name__
 25 |         if name in self._hook_names:
 26 |             raise ValueError(f'Duplicated Hook `{name}`')
 27 |         hook.register_task(self)
 28 |         self._hooks.append(hook)
 29 |         self._hook_names[name] = hook
 30 |         return self
 31 | 
 32 |     def remove_hook(self, name=None):
 33 |         if name not in self._hook_names:
 34 |             raise RuntimeError(f'Hook `{name}` not found')
 35 |         obj = self._hook_names[name]
 36 |         for i, hook in enumerate(self._hooks):
 37 |             if hook is obj:
 38 |                 self._hooks.pop(i)
 39 | 
 40 |     def get_observation(self):
 41 |         return None
 42 | 
 43 |     def get_reward(self, detailed=True):
 44 |         if detailed:
 45 |             return 0., {}
 46 |         else:
 47 |             return 0.
 48 | 
 49 |     def is_succeeded(self):
 50 |         return False
 51 | 
 52 |     def is_failed(self):
 53 |         return False
 54 | 
 55 |     def init_episode(self):
 56 |         for hook in self._hooks:
 57 |             hook.init_episode(self._robot, self._env)
 58 | 
 59 |     def before_step(self, action):
 60 |         for hook in self._hooks:
 61 |             hook.before_step(self._robot, self._env)
 62 |         return action
 63 | 
 64 |     def before_substep(self):
 65 |         for hook in self._hooks:
 66 |             hook.before_substep(self._robot, self._env)
 67 | 
 68 |     def after_step(self):
 69 |         for hook in self._hooks:
 70 |             hook.after_step(self._robot, self._env)
 71 | 
 72 |     def after_substep(self):
 73 |         for hook in self._hooks:
 74 |             hook.after_substep(self._robot, self._env)
 75 | 
 76 |     def on_success(self):
 77 |         for hook in self._hooks:
 78 |             hook.on_success(self._robot, self._env)
 79 | 
 80 |     def on_fail(self):
 81 |         for hook in self._hooks:
 82 |             hook.on_fail(self._robot, self._env)
 83 | 
 84 | 
 85 | class RewardRegistry(object):
 86 |     def __init__(self):
 87 |         self._robot = None
 88 |         self._env = None
 89 |         self._task = None
 90 | 
 91 |         self._rewards_set = set()
 92 |         self._rewards_weights = []
 93 |         self._weight_sum = 0.0
 94 |         self._coefficient = 1.0
 95 |         self._reward_details = {}
 96 | 
 97 |     def register_task(self, robot, env, task):
 98 |         self._robot, self._env, self._task = robot, env, task
 99 | 
100 |     def add_reward(
101 |         self,
102 |         name: str,
103 |         reward_obj: Callable[..., float],
104 |         weight: float
105 |     ):
106 |         if name in self._rewards_set:
107 |             raise RuntimeError(f'Duplicated Reward Type {name}')
108 |         self._rewards_set.add(name)
109 |         self._weight_sum += weight
110 |         self._rewards_weights.append((reward_obj, weight))
111 | 
112 |     def set_coeff(self, coeff):
113 |         self._coefficient = coeff
114 | 
115 |     def report(self):
116 |         from qdpgym.utils import colored_str
117 |         print(colored_str(f'Got {len(self._rewards_weights)} types of rewards:\n', 'white'),
118 |               f"{'Reward Type':<28}Weight * {self._coefficient:.3f}", sep='')
119 |         for reward, weight in self._rewards_weights:
120 |             reward_name: str = reward.__class__.__name__
121 |             length = len(reward_name)
122 |             if reward_name.endswith('Reward'):
123 |                 reward_name = colored_str(reward_name, 'green')
124 |             elif reward_name.endswith('Penalty'):
125 |                 reward_name = colored_str(reward_name, 'magenta')
126 |             print(f'{reward_name}{" " * (28 - length)}{weight:.3f}')
127 |         print()
128 | 
129 |     def calc_reward(self, detailed=True):
130 |         self._reward_details.clear()
131 |         reward_value = 0.0
132 |         for reward, weight in self._rewards_weights:
133 |             rew = reward(self._robot, self._env, self._task)
134 |             self._reward_details[reward.__class__.__name__] = rew
135 |             reward_value += rew * weight
136 |         reward_value *= self._coefficient
137 |         if detailed:
138 |             return reward_value, self._reward_details
139 |         else:
140 |             return reward_value
141 | 
142 | 
143 | class BasicTask(NullTask):
144 |     ALL_REWARDS: Any
145 | 
146 |     def __init__(self, substep_reward_on=True):
147 |         self._reward_registry = RewardRegistry()
148 |         self._substep_reward_on = substep_reward_on
149 |         self._reward = 0.
150 |         self._reward_details = {}
151 |         self._substep_cnt = 0
152 | 
153 |         super().__init__()
154 | 
155 |     def add_reward(self, name: str, weight: float = 0.):
156 |         reward_class = getattr(self.ALL_REWARDS, name)
157 |         self._reward_registry.add_reward(name, reward_class(), weight)
158 | 
159 |     def set_reward_coeff(self, value):
160 |         self._reward_registry.set_coeff(value)
161 | 
162 |     def register_env(self, robot, env):
163 |         super().register_env(robot, env)
164 |         self._reward_registry.register_task(robot, env, self)
165 | 
166 |     def before_step(self, action):
167 |         self._reward = 0.
168 |         self._reward_details.clear()
169 |         return super().before_step(action)
170 | 
171 |     def after_substep(self):
172 |         if self._substep_reward_on:
173 |             reward, reward_details = self._reward_registry.calc_reward(detailed=True)
174 |             self._reward += reward
175 |             if self._reward_details:
176 |                 for k, v in reward_details.items():
177 |                     self._reward_details[k] += v
178 |             else:
179 |                 self._reward_details = reward_details.copy()
180 |             self._substep_cnt += 1
181 |         super().after_substep()
182 | 
183 |     def after_step(self):
184 |         if self._substep_reward_on:
185 |             self._reward /= self._substep_cnt
186 |             for k in self._reward_details:
187 |                 self._reward_details[k] /= self._substep_cnt
188 |             self._substep_cnt = 0
189 |         else:
190 |             self._reward, self._reward_details = self._reward_registry.calc_reward(detailed=True)
191 |         super().after_step()
192 | 
193 |     def get_reward(self, detailed=True):
194 |         if detailed:
195 |             return self._reward, self._reward_details
196 |         else:
197 |             return self._reward
198 | 


--------------------------------------------------------------------------------
/qdpgym/tasks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/tasks/__init__.py


--------------------------------------------------------------------------------
/qdpgym/tasks/identify/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/tasks/identify/__init__.py


--------------------------------------------------------------------------------
/qdpgym/tasks/identify/identify.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import math
  3 | import os
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | import wandb
  8 | from torch import nn
  9 | from torch.utils.data import DataLoader, Dataset, random_split, ConcatDataset
 10 | 
 11 | from qdpgym.sim.common.identify import ActuatorNet
 12 | from qdpgym.utils import get_timestamp, log
 13 | 
 14 | 
 15 | class RobotDatasetWithHistory(Dataset):
 16 |     def __init__(self, path, history_interval, slices=None):
 17 |         print(path)
 18 |         self.data = np.load(path)
 19 |         if slices:
 20 |             self.error = self.data['angle_error'][slices, :].astype(np.float32)
 21 |             self.velocity = self.data['motor_velocity'][slices, :].astype(np.float32)
 22 |             self.torque = self.data['motor_torque'][slices, :].astype(np.float32)
 23 |         else:
 24 |             self.error = self.data['angle_error'].astype(np.float32)
 25 |             self.velocity = self.data['motor_velocity'].astype(np.float32)
 26 |             self.torque = self.data['motor_torque'].astype(np.float32)
 27 |         self.X = np.stack((self.error[2 * history_interval:-1, ].flatten(),
 28 |                            self.error[history_interval:-1 - history_interval, ].flatten(),
 29 |                            self.error[:-1 - 2 * history_interval, ].flatten(),
 30 |                            self.velocity[2 * history_interval:-1, ].flatten(),
 31 |                            self.velocity[history_interval:-1 - history_interval, ].flatten(),
 32 |                            self.velocity[:-1 - 2 * history_interval, ].flatten()), axis=1)
 33 |         self.Y = np.expand_dims(self.torque[1 + 2 * history_interval:, ].flatten(), axis=1)
 34 |         self.size = len(self.error)
 35 | 
 36 |     def __len__(self):
 37 |         return len(self.X)
 38 | 
 39 |     def __getitem__(self, idx):
 40 |         return self.X[idx], self.Y[idx]
 41 | 
 42 | 
 43 | def train_actuator_net(actuator_net, dataset_class, history_interval,
 44 |                        lr, num_epochs, batch_size, device):
 45 |     dataset_paths = [os.path.join(RSC_DIR, filename) for filename in os.listdir(RSC_DIR)]
 46 |     dataset = ConcatDataset([dataset_class(path, history_interval) for path in dataset_paths if path.endswith('npz')])
 47 |     train_len = int(0.8 * len(dataset))
 48 |     test_len = len(dataset) - train_len
 49 |     train_data, test_data = random_split(dataset, (train_len, test_len))
 50 |     train_loader = DataLoader(train_data, batch_size, shuffle=True)
 51 |     test_loader = DataLoader(test_data, batch_size, shuffle=True)
 52 |     net = actuator_net.to(device)
 53 |     optim = torch.optim.AdamW(net.parameters(), lr=lr)
 54 |     criterion = nn.MSELoss(reduction='sum')
 55 | 
 56 |     wandb.init(project='actuator_net', name=str((actuator_net.hidden_dims, lr)), mode=None)
 57 |     log_dir = f'ident/{get_timestamp()}'
 58 |     os.makedirs(log_dir, exist_ok=True)
 59 |     torch.set_printoptions(linewidth=10000)
 60 |     for i in range(1, num_epochs + 1):
 61 |         train_loss, test_loss = 0., 0.
 62 |         for X, Y in train_loader:
 63 |             optim.zero_grad()
 64 |             X, Y = X.to(device), Y.to(device)
 65 |             loss = criterion(net(X), Y)
 66 |             train_loss += loss.item()
 67 |             loss.backward()
 68 |             optim.step()
 69 |         train_loss /= train_len
 70 |         log.info(f'Epoch {i:>4}/{num_epochs} train loss {train_loss:.6f}')
 71 | 
 72 |         for X, Y in test_loader:
 73 |             X, Y = X.to(device), Y.to(device)
 74 |             loss = criterion(net(X), Y)
 75 |             test_loss += loss.item()
 76 |         test_loss /= test_len
 77 |         log.info(f'Epoch {i:>4}/{num_epochs} test loss {test_loss:.6f}')
 78 |         wandb.log({'Train/loss': train_loss, 'Test/loss': test_loss})
 79 |         if i % 10 == 0:
 80 |             torch.save({'model': net.state_dict(), 'hidden_dims': net.hidden_dims},
 81 |                        os.path.join(log_dir, f'{i}.pt'))
 82 | 
 83 | 
 84 | def get_statistics(model, history_interval):
 85 |     dataset_paths = [os.path.join(RSC_DIR, filename) for filename in os.listdir(RSC_DIR)]
 86 |     dataset = ConcatDataset([DatasetClass(path, history_interval) for path in dataset_paths if path.endswith('npz')])
 87 | 
 88 |     criterion = nn.MSELoss(reduction='sum')
 89 |     test_loader = DataLoader(dataset, 1000, shuffle=True)
 90 |     test_loss = 0.
 91 |     for X, Y in test_loader:
 92 |         X, Y = X.to(device), Y.to(device)
 93 |         loss = criterion(model(X), Y)
 94 |         test_loss += loss.item()
 95 |     mse = test_loss / (len(dataset) - 1)
 96 |     print('MSE:', mse)
 97 |     print('RMSE:', math.sqrt(mse))
 98 | 
 99 | 
100 | if __name__ == '__main__':
101 |     parser = argparse.ArgumentParser(description='Train or test quadruped locomotion.')
102 |     parser.add_argument('-r', '--rsc', metavar='</rsc-dir>',
103 |                         help='motor data directory', required=True)
104 |     parser.add_argument('-i', '--interval', type=int, help='history interval', default=3)
105 |     parser.add_argument('-b', '--batch-size', type=int, help='batch size', default=1000)
106 |     parser.add_argument('-l', '--lr', type=float, help='learning rate', default=1e-4)
107 |     parser.add_argument('-e', '--epoch', type=int, help='num epochs', default=2000)
108 |     parser.add_argument('--cuda', action='store_true', help='use cuda for training')
109 |     parser.add_argument('--train', action='store_true', help='if train else test')
110 |     args = parser.parse_args()
111 | 
112 |     RSC_DIR = args.rsc
113 |     if args.cuda or torch.cuda.is_available():
114 |         DEVICE = 'cuda'
115 |     else:
116 |         DEVICE = 'cpu'
117 |     TRAIN = args.train
118 |     HISTORY_INTERVAL = args.interval
119 |     BATCH_SIZE = args.batch_size
120 |     LEARNING_RATE = args.lr
121 |     NUM_EPOCHS = args.epoch
122 | 
123 |     np.set_printoptions(3, linewidth=10000, suppress=True)
124 |     DatasetClass = RobotDatasetWithHistory
125 |     device = torch.device(DEVICE)
126 |     if TRAIN:
127 |         actuator_net = ActuatorNet(hidden_dims=(32, 32, 32))
128 |         train_actuator_net(
129 |             actuator_net,
130 |             DatasetClass,
131 |             history_interval=HISTORY_INTERVAL,
132 |             batch_size=BATCH_SIZE,
133 |             lr=LEARNING_RATE,
134 |             num_epochs=NUM_EPOCHS,
135 |             device=device
136 |         )
137 |         get_statistics(actuator_net, HISTORY_INTERVAL)
138 |     else:
139 |         model_path = '/home/jewel/Workspaces/QuadrupedRLv2/qdpgym/sim/resources/acnet_220526.pt'
140 |         model_info = torch.load(model_path, map_location=device)
141 |         actuator_net = ActuatorNet(hidden_dims=model_info['hidden_dims']).to(device)
142 |         actuator_net.load_state_dict(model_info['model'])
143 |         get_statistics(actuator_net, HISTORY_INTERVAL)
144 | 
145 |         # dataset = DatasetClass(os.path.join(RSC_DIR, 'state_cmd_data_NoLoadT0.4.npz'))
146 |         # motor_idx = 2
147 |         # slices = slice(5000, 5500)
148 |         #
149 |         # error = dataset.error[slices, motor_idx]
150 |         # error_his1 = dataset.error[slice(slices.start - 5, slices.stop - 5), motor_idx]
151 |         # error_his2 = dataset.error[slice(slices.start - 10, slices.stop - 10), motor_idx]
152 |         # velocity = dataset.velocity[slices, motor_idx]
153 |         # velocity_his1 = dataset.velocity[slice(slices.start - 5, slices.stop - 5), motor_idx]
154 |         # velocity_his2 = dataset.velocity[slice(slices.start - 10, slices.stop - 10), motor_idx]
155 |         # error_rate = dataset.data['angle_error_rate'][slices, motor_idx].astype(np.float32)
156 |         # torque = dataset.torque[slices.start + 1:slices.stop + 1, motor_idx]
157 |         # predicted = actuator_net.calc_torque(error, error_his1, error_his2, velocity, velocity_his1, velocity_his2)
158 | 
159 |         # import matplotlib.pyplot as plt
160 |         #
161 |         # fig, ax = plt.subplots(3, 1, figsize=(9.6, 6.4), gridspec_kw={'height_ratios': [1, 1, 2]})
162 |         # ax[0].plot(error)
163 |         # ax[0].set_ylabel('error(rad)', rotation=0)
164 |         # ax[0].yaxis.set_label_coords(-0.05, 0.99)
165 |         # ax[0].set_xticks([])
166 |         # ax[1].plot(velocity)
167 |         # ax[1].set_xticks([])
168 |         # ax[1].set_ylabel('velocity(rad$\cdot$s$^{-1}$)', rotation=0)
169 |         # ax[1].yaxis.set_label_coords(-0.03, 1.03)
170 |         # ax[2].set_xlabel('sample point number')
171 |         # ax[2].set_ylabel('torque')
172 |         # ax[2].set_ylabel('torque(N$\cdot$m)', rotation=0)
173 |         # ax[2].yaxis.set_label_coords(-0.03, 0.99)
174 |         # ax[2].plot(torque, linewidth=1)
175 |         # ax[2].plot(predicted, linewidth=1, linestyle='--')
176 |         # ax[2].plot(error * 150 - velocity * 4, linewidth=1, linestyle='dashdot')
177 |         # ax[2].set_ylim(-20, 25)
178 |         # ax[2].legend(['measured', 'identified', 'vanilla pd'])
179 |         # fig.show()
180 |         # fig.savefig('acnet.pdf')
181 | 


--------------------------------------------------------------------------------
/qdpgym/tasks/loct/__init__.py:
--------------------------------------------------------------------------------
1 | from .loct import *
2 | from .app import *


--------------------------------------------------------------------------------
/qdpgym/tasks/loct/app.py:
--------------------------------------------------------------------------------
 1 | import qdpgym
 2 | from qdpgym import sim
 3 | from qdpgym.tasks import loct
 4 | 
 5 | __all__ = ['LocomotionApp']
 6 | 
 7 | 
 8 | class LocomotionApp(sim.Application):
 9 |     def __init__(self, policy):
10 |         robot = sim.Aliengo(500, 'actuator_net', noisy=False)
11 |         task = loct.LocomotionV0()
12 |         if qdpgym.sim_engine == qdpgym.Sim.BULLET:
13 |             arena = sim.TerrainBase()
14 |             task.add_hook(sim.ExtraViewerHook())
15 |             task.add_hook(sim.RandomTerrainHook())
16 |             task.add_hook(sim.RandomPerturbHook())
17 |             task.add_hook(loct.GamepadCommanderHook())
18 |             # task.add_hook(sim.StatisticsHook())
19 |             # task.add_hook(hooks.VideoRecorderBtHook())
20 |         else:
21 |             raise NotImplementedError
22 |         env = sim.QuadrupedEnv(robot, arena, task)
23 |         # task.add_reward('UnifiedLinearReward')
24 |         super().__init__(robot, env, task, policy)
25 | 


--------------------------------------------------------------------------------
/qdpgym/tasks/loct/reward.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from abc import ABC
  3 | 
  4 | import numpy as np
  5 | 
  6 | from qdpgym.utils import tf
  7 | 
  8 | ATANH0_95 = math.atanh(0.95)
  9 | ATANH0_9 = math.atanh(0.9)
 10 | SQRT_LOG10 = math.sqrt(math.log(10))
 11 | SQRT_LOG20 = math.sqrt(math.log(20))
 12 | 
 13 | 
 14 | class Reward(ABC):
 15 |     def __call__(self, robot, env, task) -> float:
 16 |         raise NotImplementedError
 17 | 
 18 | 
 19 | def tanh2_reshape(lower, upper):
 20 |     w = np.sqrt(ATANH0_95) / (upper - lower)
 21 | 
 22 |     def _reshape(v):
 23 |         return np.tanh(np.power((v - lower) * w, 2)) if v > lower else 0
 24 | 
 25 |     return _reshape
 26 | 
 27 | 
 28 | def tanh2_reverse(lower, upper, value):
 29 |     w = np.sqrt(ATANH0_95) / (upper - lower)
 30 |     return np.sqrt(np.arctanh(value)) / w + lower
 31 | 
 32 | 
 33 | def tanh_reshape(lower, upper):
 34 |     """
 35 |     Zoom tanh so that f(lower) = -0.9 and f(upper) = 0.9
 36 |     :return: function f
 37 |     """
 38 |     middle = (lower + upper) / 2
 39 |     w = ATANH0_9 / (upper - middle)
 40 | 
 41 |     def _reshape(v):
 42 |         return np.tanh((v - middle) * w)
 43 | 
 44 |     return _reshape
 45 | 
 46 | 
 47 | def tanh_reverse(lower, upper, value):
 48 |     middle = (lower + upper) / 2
 49 |     w = ATANH0_9 / (upper - middle)
 50 |     return np.arctanh(value) / w + middle
 51 | 
 52 | 
 53 | def exp_m2_reshape(upper):
 54 |     w = upper / SQRT_LOG10
 55 | 
 56 |     def _reshape(v):
 57 |         return np.exp(-(v / w) ** 2)
 58 | 
 59 |     return _reshape
 60 | 
 61 | 
 62 | def quadratic_linear_reshape(upper):
 63 |     k = 1 / upper ** 2
 64 |     kl = 2 * k * upper
 65 | 
 66 |     def _reshape(v):
 67 |         v = abs(v)
 68 |         return v ** 2 * k if v < upper else kl * (v - upper) + 1
 69 | 
 70 |     return _reshape
 71 | 
 72 | 
 73 | def soft_constrain(thr, upper):
 74 |     def _reshape(v):
 75 |         excess = abs(v) - thr
 76 |         if excess > 0:
 77 |             return -(excess / upper) ** 2
 78 |         return 0.
 79 | 
 80 |     return _reshape
 81 | 
 82 | 
 83 | def expm2_resh(upper):
 84 |     w = upper / SQRT_LOG10
 85 | 
 86 |     def _reshape(v):
 87 |         return np.exp(-(v / w) ** 2)
 88 | 
 89 |     return _reshape
 90 | 
 91 | 
 92 | class LinearVelocityReward(Reward):
 93 |     def __init__(self, forward=1.5, lateral=1.0, ort_upper=0.3, ort_weight=0.33):
 94 |         self.forward, self.lateral = forward, lateral
 95 |         self.coeff = forward * lateral
 96 |         self.ort_reshape = tanh2_reshape(0, ort_upper)
 97 |         self.ort_weight = ort_weight
 98 | 
 99 |     def __call__(self, robot, env, task):
100 |         lin_cmd = task.cmd[:2]
101 |         lin_vel = robot.get_velocimeter()[:2]
102 |         proj_vel = np.dot(lin_cmd, lin_vel)
103 |         ort_vel = tf.vnorm(lin_vel - lin_cmd[:2] * proj_vel)
104 |         # print(proj_vel, ort_vel)
105 |         ort_pen = 1 - self.ort_reshape(ort_vel)
106 |         if (lin_cmd == 0.).all():
107 |             return ort_pen
108 |         proj_rew = self.reshape(self.get_desired_velocity(lin_cmd), proj_vel)
109 |         return (1 - self.ort_weight) * proj_rew + self.ort_weight * ort_pen
110 | 
111 |     def get_desired_velocity(self, cmd):
112 |         return self.coeff / math.hypot(self.lateral * cmd[0], self.forward * cmd[1])
113 | 
114 |     @staticmethod
115 |     def reshape(scale, value):
116 |         return math.tanh(value * ATANH0_9 / scale)
117 | 
118 | 
119 | class YawRateReward(Reward):
120 |     def __init__(self, upper_pos=1.0, upper_neg=0.45):
121 |         self.reshape_pos = tanh_reshape(-upper_pos, upper_pos)
122 |         self.reshape_neg = quadratic_linear_reshape(upper_neg)
123 | 
124 |     def __call__(self, robot, env, task):
125 |         yaw_cmd, yaw_rate = task.cmd[2], robot.get_base_rpy_rate()[2]
126 |         if yaw_cmd != 0.0:
127 |             return self.reshape_pos(yaw_rate / yaw_cmd)
128 |         else:
129 |             return 1 - self.reshape_neg(abs(yaw_rate))
130 | 
131 | 
132 | class RotationReward(Reward):
133 |     def __init__(self, max_ang=3.0, perception=1.0):
134 |         self.max_ang = max_ang
135 |         self.reshape = expm2_resh(perception)
136 | 
137 |     def __call__(self, robot, env, task):
138 |         ang_cmd = task.cmd[2]
139 |         ang_vel = robot.get_base_rpy_rate()[2]
140 |         ang_rew = self.reshape(ang_vel - ang_cmd * self.max_ang)
141 |         # print(ang_cmd, ang_vel, ang_rew)
142 |         return ang_rew
143 | 
144 | 
145 | class RollPitchRatePenalty(Reward):
146 |     def __init__(self, dr_upper=1.0, dp_upper=0.5):
147 |         self.dr_reshape = tanh2_reshape(0.0, dr_upper)
148 |         self.dp_reshape = tanh2_reshape(0.0, dp_upper)
149 | 
150 |     def __call__(self, robot, env, task):
151 |         r_rate, p_rate, _ = robot.get_base_rpy_rate()
152 |         return 1 - (self.dr_reshape(abs(r_rate)) +
153 |                     self.dp_reshape(abs(p_rate))) / 2
154 | 
155 | 
156 | # class OrthogonalLinearPenalty(Reward):
157 | #     def __init__(self, linear_upper=0.3):
158 | #         self.reshape = tanh2_reshape(0, linear_upper)
159 | #
160 | #     def __call__(self, robot, env, task):
161 | #         linear = robot.get_velocimeter()[:2]
162 | #         v_o = np.asarray(linear) - np.asarray(cmd[:2]) * np.dot(linear, cmd[:2])
163 | #         return 1 - self.reshape(tf.vnorm(v_o))
164 | 
165 | 
166 | class VerticalLinearPenalty(Reward):
167 |     def __init__(self, upper=0.4):
168 |         self.reshape = quadratic_linear_reshape(upper)
169 | 
170 |     def __call__(self, robot, env, task):
171 |         return 1 - self.reshape(robot.get_velocimeter()[2])
172 | 
173 | 
174 | class BodyPosturePenalty(Reward):
175 |     def __init__(self, roll_upper=np.pi / 12, pitch_upper=np.pi / 24):
176 |         self.roll_reshape = quadratic_linear_reshape(roll_upper)
177 |         self.pitch_reshape = quadratic_linear_reshape(pitch_upper)
178 | 
179 |     def __call__(self, robot, env, task):
180 |         trnZ = env.get_interact_terrain_normal()
181 |         robot_rot = robot.get_base_rot()
182 |         trnY = tf.vcross(trnZ, robot_rot[:, 0])
183 |         trnX = tf.vcross(trnY, trnZ)
184 |         r, p, _ = tf.Rpy.from_rotation(np.array((trnX, trnY, trnZ)) @ robot_rot)
185 |         return 1 - (self.roll_reshape(r) + self.pitch_reshape(p)) / 2
186 | 
187 | 
188 | class BodyHeightReward(Reward):
189 |     def __init__(self, des=0.4, range_=0.03):
190 |         self.des = des
191 |         self.reshape = quadratic_linear_reshape(range_)
192 | 
193 |     def __call__(self, robot, env, task):
194 |         return env.get_relative_robot_height()
195 | 
196 | 
197 | class ActionSmoothnessReward(Reward):
198 |     def __init__(self, upper=400):
199 |         self.reshape = exp_m2_reshape(upper)
200 | 
201 |     def __call__(self, robot, env, task):
202 |         return self.reshape(env.get_action_accel()).sum() / 12
203 | 
204 | 
205 | class JointMotionPenalty(Reward):
206 |     def __init__(self, vel_upper=6, acc_upper=500):
207 |         self.vel_reshape = np.vectorize(quadratic_linear_reshape(vel_upper))
208 |         self.acc_reshape = np.vectorize(quadratic_linear_reshape(acc_upper))
209 | 
210 |     def __call__(self, robot, env, task):
211 |         vel_pen = self.vel_reshape(robot.get_joint_vel()).sum()
212 |         acc_pen = self.acc_reshape(robot.get_joint_acc()).sum()
213 |         return 1 - (vel_pen + acc_pen) / 12
214 | 
215 | 
216 | # class TorqueGradientPenalty(Reward):
217 | #     def __init__(self, upper=200):
218 | #         self.reshape = quadratic_linear_reshape(upper)
219 | #
220 | #     def __call__(self, cmd, env, robot):
221 | #         torque_grad = robot.getTorqueGradients()
222 | #         return -sum(self.reshape(grad) for grad in torque_grad) / 12
223 | 
224 | 
225 | class FootSlipPenalty(Reward):
226 |     def __init__(self, upper=0.5):
227 |         self.reshape = np.vectorize(quadratic_linear_reshape(upper))
228 | 
229 |     def __call__(self, robot, env, task):
230 |         return 1 - sum(self.reshape(robot.get_slip_vel()))
231 | 
232 | 
233 | # class HipAnglePenalty(Reward):
234 | #     def __init__(self, upper=0.3):
235 | #         self.reshape = np.vectorize(quadratic_linear_reshape(upper))
236 | #
237 | #     def __call__(self, cmd, env, robot):
238 | #         hip_angles = robot.getJointPositions()[(0, 3, 6, 9),]
239 | #         return -sum(self.reshape(hip_angles)) / 4
240 | 
241 | 
242 | class JointConstraintPenalty(Reward):
243 |     def __init__(self, constraints=(0., 0., 0.), upper=(0.4, 0.6, 0.6)):
244 |         self.hip_reshape = np.vectorize(soft_constrain(constraints[0], upper[0]))
245 |         self.thigh_reshape = np.vectorize(soft_constrain(constraints[1], upper[1]))
246 |         self.shank_reshape = np.vectorize(soft_constrain(constraints[2], upper[2]))
247 | 
248 |     def __call__(self, robot, env, task):
249 |         joint_angles = robot.get_joint_pos() - robot.STANCE_CONFIG
250 |         return (self.hip_reshape(joint_angles[((0, 3, 6, 9),)]).sum() +
251 |                 self.thigh_reshape(joint_angles[((1, 4, 7, 10),)]).sum() +
252 |                 self.shank_reshape(joint_angles[((2, 5, 8, 11),)]).sum()) / 12
253 | 
254 | 
255 | # class TrivialStridePenalty(Reward):
256 | #     def __init__(self, lower=-0.2, upper=0.4):
257 | #         self.reshape = tanh_reshape(lower, upper)
258 | #
259 | #     def __call__(self, robot, env, task):
260 | #         strides = [np.dot(s, cmd[:2]) for s in robot.get_strides()]
261 | #         return sum(self.reshape(s) for s in strides if s != 0.0)
262 | #         # return 1 - sum(1 - self.reshape(s) for s in strides if s != 0.0)
263 | 
264 | 
265 | class FootClearanceReward(Reward):
266 |     def __init__(self, upper=0.08):
267 |         self.reshape = tanh2_reshape(0., upper)
268 | 
269 |     def __call__(self, robot, env, task):
270 |         foot_clearances = robot.get_clearances()
271 |         return sum(self.reshape(c) for c in foot_clearances)
272 | 
273 | 
274 | class AliveReward(Reward):
275 |     def __call__(self, robot, env, task):
276 |         return 1.0
277 | 
278 | 
279 | class ClearanceOverTerrainReward(Reward):
280 |     def __call__(self, robot, env, task):
281 |         reward = 0.
282 |         for x, y, z in robot.get_foot_pos():
283 |             terrain_extremum = env.arena.get_peak((x - 0.1, x + 0.1),
284 |                                                   (y - 0.1, y + 0.1))[2]
285 |             if z - robot.FOOT_RADIUS > terrain_extremum + 0.01:
286 |                 reward += 1 / 2
287 |         return reward
288 | 
289 | 
290 | class BodyCollisionPenalty(Reward):
291 |     def __call__(self, robot, env, task):
292 |         contact_sum = 0
293 |         for i, contact in enumerate(robot.get_leg_contacts()):
294 |             if contact and i % 3 != 2:
295 |                 contact_sum += 1
296 |         return 1 - contact_sum
297 | 
298 | 
299 | class TorquePenalty(Reward):
300 |     def __init__(self, upper=900):
301 |         self.reshape = np.vectorize(quadratic_linear_reshape(upper))
302 | 
303 |     def __call__(self, robot, env, task):
304 |         return 1 - sum(self.reshape(robot.get_last_torque() ** 2))
305 | 


--------------------------------------------------------------------------------
/qdpgym/tasks/loct/sr_reward.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC
  2 | 
  3 | import numpy as np
  4 | 
  5 | from qdpgym.utils import tf
  6 | 
  7 | 
  8 | class Reward(ABC):
  9 |     def __call__(self, robot, env, task) -> float:
 10 |         raise NotImplementedError
 11 | 
 12 | 
 13 | def exp_m2(v, k=1):
 14 |     return np.exp(-k * v ** 2)
 15 | 
 16 | 
 17 | class VelocityReward(Reward):
 18 |     def __init__(self, max_lin=1.0):
 19 |         self.max_lin = max_lin
 20 | 
 21 |     def __call__(self, robot, env, task):
 22 |         lin_cmd = task.cmd[:2]
 23 |         lin_vel = robot.get_velocimeter()[:2]
 24 |         lin_cmd_mag = tf.vnorm(lin_cmd) * self.max_lin
 25 |         if lin_cmd_mag != 0.:
 26 |             lin_cmd /= lin_cmd_mag  # unit
 27 | 
 28 |         prj_vel = np.dot(lin_cmd, lin_vel)
 29 |         ort_vel = tf.vnorm(lin_vel - lin_cmd * prj_vel)
 30 |         if lin_cmd_mag == 0.:
 31 |             lin_rew = exp_m2(tf.vnorm(lin_vel))
 32 |         elif prj_vel >= lin_cmd_mag:
 33 |             lin_rew = 1.
 34 |         else:
 35 |             lin_rew = exp_m2(lin_cmd_mag - prj_vel)
 36 | 
 37 |         ort_pen = exp_m2(ort_vel, 3)
 38 |         # print(lin_cmd_mag, prj_vel, lin_rew)
 39 |         return (lin_rew + ort_pen) / 2
 40 | 
 41 | 
 42 | class RotationReward(Reward):
 43 |     def __init__(self, max_ang=1.0):
 44 |         self.max_ang = max_ang
 45 | 
 46 |     def __call__(self, robot, env, task):
 47 |         ang_cmd = task.cmd[2]
 48 |         ang_vel = robot.get_base_rpy_rate()[2]
 49 |         ang_cmd_mag = abs(ang_cmd) * self.max_ang
 50 |         if ang_cmd == 0.:
 51 |             ang_rew = exp_m2(ang_vel)
 52 |         elif ang_vel * np.sign(ang_cmd) >= ang_cmd_mag:
 53 |             ang_rew = 1.
 54 |         else:
 55 |             ang_rew = exp_m2(ang_vel - ang_cmd)
 56 |         # print(ang_cmd_mag, ang_vel, ang_rew)
 57 |         return ang_rew
 58 | 
 59 | 
 60 | class BodyMotionPenalty(Reward):
 61 |     def __call__(self, robot, env, task):
 62 |         z_vel = robot.get_velocimeter()[2]
 63 |         r_rate, p_rate, _ = robot.get_base_rpy_rate()
 64 |         return -1.25 * z_vel ** 2 - 0.4 * abs(r_rate) - 0.4 * abs(p_rate)
 65 | 
 66 | 
 67 | class BodyCollisionPenalty(Reward):
 68 |     def __call__(self, robot, env, task):
 69 |         contact_sum = 0
 70 |         for i, contact in enumerate(robot.get_leg_contacts()):
 71 |             if contact and i % 3 != 2:
 72 |                 contact_sum += 1
 73 |         return 1 - contact_sum
 74 | 
 75 | 
 76 | class JointMotionPenalty(Reward):
 77 |     def __call__(self, robot, env, task):
 78 |         vel_pen = -(robot.get_joint_vel() ** 2).sum()
 79 |         acc_pen = -(robot.get_joint_acc() ** 2).sum()
 80 |         return vel_pen + acc_pen * 1e-4
 81 | 
 82 | 
 83 | class TargetSmoothnessReward(Reward):
 84 |     def __call__(self, robot, env, task):
 85 |         action_history = env.action_history
 86 |         actions = [action_history[-i - 1] for i in range(3)]
 87 |         return -(
 88 |             ((actions[0] - actions[1]) ** 2).sum() +
 89 |             ((actions[0] - 2 * actions[1] + actions[2]) ** 2).sum()
 90 |         )
 91 | 
 92 | 
 93 | class TorquePenalty(Reward):
 94 |     def __call__(self, robot, env, task):
 95 |         return -(robot.get_last_torque() ** 2).sum()
 96 | 
 97 | 
 98 | class SlipPenalty(Reward):
 99 |     def __call__(self, robot, env, task):
100 |         return -robot.get_slip_vel().sum()
101 | 
102 | 
103 | class FootClearanceReward(Reward):
104 |     def __call__(self, robot, env, task):
105 |         return 1.
106 | 


--------------------------------------------------------------------------------
/qdpgym/tasks/loct/utils.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | from typing import Union, Tuple, List, Any, Iterable, Callable
  3 | 
  4 | import numpy as np
  5 | import rtree
  6 | import torch
  7 | from rtree import Rtree
  8 | from sklearn.linear_model import LinearRegression
  9 | 
 10 | from qdpgym.utils import tf, Natural
 11 | 
 12 | 
 13 | class PolicyWrapper(object):
 14 |     def __init__(self, net, device):
 15 |         self.net = net
 16 |         self.device = torch.device(device)
 17 | 
 18 |     def __call__(self, obs):
 19 |         with torch.inference_mode():
 20 |             obs = torch.as_tensor(obs, dtype=torch.float32, device=self.device)
 21 |             return self.net(obs).detach().cpu().numpy()
 22 | 
 23 | 
 24 | class GradISNaive(object):
 25 |     """
 26 |     Adaptively change the sample weight by the gradient of reward.
 27 |     May lead to significant sample impoverishment.
 28 |     """
 29 | 
 30 |     def __init__(
 31 |         self,
 32 |         dim: int,
 33 |         min_key: Union[float, Tuple[float, ...]],
 34 |         max_key: Union[float, Tuple[float, ...]],
 35 |         max_len: int,
 36 |         grad_coef=1.0,
 37 |         max_weight=5.0,
 38 |         neighborhood_ratio=0.1,
 39 |     ):
 40 |         self._rtree = Rtree()
 41 |         self._min_key = min_key
 42 |         self._max_key = max_key
 43 |         if isinstance(self._min_key, float):
 44 |             self._interval = max_key - min_key
 45 |         else:
 46 |             self._interval = np.asarray(max_key) - np.asarray(min_key)
 47 |         self._max_len = max_len
 48 |         self._dim = dim
 49 |         self._grad_coef = grad_coef
 50 |         self._max_weight = max_weight
 51 |         self._nbh_size = neighborhood_ratio * self._interval
 52 |         self._bbox = self._key_to_bbox(min_key, max_key)
 53 | 
 54 |         self._rtree.insert(-1, self._bbox)
 55 | 
 56 |         self._cur_len = 0
 57 |         self._total_len = 0
 58 |         self._buffer = []
 59 |         self._weights = []  # save grad weights
 60 |         self._weight_sum = 0.
 61 | 
 62 |     def _key_to_bbox(self, key1, key2=None):
 63 |         if key2 is None:
 64 |             key2 = key1
 65 |         if self._dim == 1:
 66 |             return key1, key1, key2, key2
 67 |         elif self._dim == 2:
 68 |             return (*key1, *key2)
 69 |         else:
 70 |             raise NotImplementedError
 71 | 
 72 |     def _bbox_to_key(self, bbox):
 73 |         if self._dim == 1:
 74 |             return bbox[0]
 75 |         elif self._dim == 2:
 76 |             return bbox[:2]
 77 |         else:
 78 |             raise NotImplementedError
 79 | 
 80 |     @property
 81 |     def samples(self):
 82 |         return iter(self)
 83 | 
 84 |     @property
 85 |     def particles(self):
 86 |         return zip(self._buffer, self._weights)
 87 | 
 88 |     def __iter__(self):
 89 |         for item in self._rtree.intersection(
 90 |             self._bbox, objects=True
 91 |         ):
 92 |             if item.id != -1:
 93 |                 yield self._bbox_to_key(item.bbox), item.object
 94 | 
 95 |     def is_full(self):
 96 |         return self._cur_len == self._max_len
 97 | 
 98 |     def __len__(self):
 99 |         return self._cur_len
100 | 
101 |     def __repr__(self):
102 |         return self._rtree.__repr__()
103 | 
104 |     def insert(self, key, value):
105 |         self._rtree.insert(self._total_len, self._key_to_bbox(key), value)
106 |         if self._cur_len < self._max_len:
107 |             self._buffer.append(key)
108 |             self._weights.append(1.0)
109 |             self._weight_sum += 1.0
110 |             self._cur_len += 1
111 |         else:
112 |             idx = self._total_len % self._max_len
113 |             prev_key = self._buffer[idx]
114 |             self._rtree.delete(
115 |                 self._total_len - self._max_len,
116 |                 self._key_to_bbox(prev_key)
117 |             )
118 |             self._buffer[idx] = key
119 |             grad_weight = self.get_grad_weight(key)
120 |             self._weight_sum += grad_weight - self._weights[idx]
121 |             self._weights[idx] = grad_weight
122 | 
123 |         self._total_len += 1
124 | 
125 |     def get_grad_weight(self, key) -> float:
126 |         neighbors = self.get_neighbors(key, self._nbh_size)
127 |         if len(neighbors) < 5:
128 |             return self._max_weight
129 |         else:
130 |             x, y = zip(*neighbors)
131 |             x, y = np.array(x), np.array(y)
132 |             reg = LinearRegression()
133 |             reg.fit(x.reshape(-1, 1), y)
134 |             return np.exp(
135 |                 abs(reg.coef_[0]) * self._grad_coef
136 |             ).clip(max=self._max_weight)
137 | 
138 |     def get_neighbors(self, key, radius):
139 |         neighbors = []
140 |         for item in self._rtree.intersection(
141 |             self._key_to_bbox(key - radius, key + radius), objects=True
142 |         ):
143 |             if item.id != -1:
144 |                 neighbors.append((self._bbox_to_key(item.bbox), item.object))
145 |         return neighbors
146 | 
147 |     def sample(
148 |         self,
149 |         random_gen: Union[np.random.Generator, np.random.RandomState],
150 |         uniform_prob: float = 0.,
151 |         normal_var: float = None,
152 |     ):
153 |         if not self.is_full() or random_gen.random() < uniform_prob:
154 |             return random_gen.uniform(self._min_key, self._max_key)
155 |         else:
156 |             # importance sampling
157 |             weights = np.array(self._weights) / self._weight_sum
158 |             sample = random_gen.choice(self._buffer, p=weights)
159 |             return np.clip(
160 |                 random_gen.normal(sample, normal_var),
161 |                 self._min_key, self._max_key
162 |             ) if normal_var else sample
163 | 
164 | 
165 | class GradIS(object):
166 |     def __init__(
167 |         self,
168 |         dim: int,
169 |         min_key: Union[float, Tuple[float, ...]],
170 |         max_key: Union[float, Tuple[float, ...]],
171 |         approx_max_len: int,
172 |         grad_coef=1.0,
173 |         max_weight=5.0,
174 |         neighborhood_ratio=0.1,
175 |     ):
176 |         self._rtree = Rtree()
177 |         self._dim = dim
178 |         self._min_key = min_key
179 |         self._max_key = max_key
180 |         if isinstance(self._min_key, float):
181 |             self._interval = max_key - min_key
182 |         else:
183 |             self._interval = np.asarray(max_key) - np.asarray(min_key)
184 |         self._approx_max_len = approx_max_len
185 |         self._max_density = self._approx_max_len * neighborhood_ratio ** dim
186 |         self._grad_coef = grad_coef
187 |         self._max_weight = max_weight
188 |         self._nbh_size = neighborhood_ratio * self._interval
189 |         self._bbox = self._key_to_bbox(min_key, max_key)
190 | 
191 |         self._rtree.insert(-1, self._bbox)
192 | 
193 |         self._cur_len = 0
194 |         self._total_len = 0
195 |         self._buffer_weights = collections.defaultdict(float)
196 |         self._weight_sum = 0.
197 | 
198 |     def _key_to_bbox(self, key1, key2=None):
199 |         if key2 is None:
200 |             key2 = key1
201 |         if self._dim == 1:
202 |             return key1, key1, key2, key2
203 |         elif self._dim == 2:
204 |             return (*key1, *key2)
205 |         else:
206 |             raise NotImplementedError
207 | 
208 |     def _bbox_to_key(self, bbox):
209 |         if self._dim == 1:
210 |             return bbox[0]
211 |         elif self._dim == 2:
212 |             return bbox[:2]
213 |         else:
214 |             raise NotImplementedError
215 | 
216 |     def _key_dist(self, key1, key2):
217 |         if self._dim == 1:
218 |             return abs(key1 - key2)
219 |         elif self._dim == 2:
220 |             return tf.vnorm(np.asarray(key1) - np.asarray(key2))
221 |         else:
222 |             raise NotImplementedError
223 | 
224 |     @property
225 |     def samples(self) -> Iterable[Tuple[Any, float]]:
226 |         return iter(self)
227 | 
228 |     @property
229 |     def particles(self):
230 |         return self._buffer_weights.items()
231 | 
232 |     @property
233 |     def initialized(self):
234 |         return self._total_len >= self._approx_max_len
235 | 
236 |     def __iter__(self):
237 |         for item in self._rtree.intersection(
238 |             self._bbox, objects=True
239 |         ):
240 |             if item.id != -1:
241 |                 yield self._bbox_to_key(item.bbox), item.object
242 | 
243 |     def __len__(self):
244 |         return self._cur_len
245 | 
246 |     def __repr__(self):
247 |         return self._rtree.__repr__()
248 | 
249 |     def insert(self, key, value):
250 |         raw_neighbors = self.get_neighbors(key, self._nbh_size, raw=True)
251 | 
252 |         if key in self._buffer_weights:
253 |             # remove duplicated key
254 |             dup_idx = None
255 |             for idx, item in enumerate(raw_neighbors):
256 |                 item_key = self._bbox_to_key(item.bbox)
257 |                 if np.array_equal(item_key, key):
258 |                     # print('duplicated key', key)
259 |                     self._weight_sum -= self._buffer_weights[key]
260 |                     self._rtree.delete(item.id, item.bbox)
261 |                     dup_idx = idx
262 |                     break
263 |             if dup_idx is not None:
264 |                 raw_neighbors.pop(dup_idx)
265 |         elif len(raw_neighbors) > self._max_density:
266 |             # delete the obsolete particle
267 |             # obsolete: rtree.index.Item = min(raw_neighbors, key=lambda x: x.id)
268 |             # or delete the nearest particle?
269 |             obsolete: rtree.index.Item = min(
270 |                 raw_neighbors, key=lambda x: self._key_dist(self._bbox_to_key(x.bbox), key)
271 |             )
272 |             raw_neighbors.remove(obsolete)
273 |             self._rtree.delete(obsolete.id, obsolete.bbox)
274 |             self._weight_sum -= self._buffer_weights.pop(self._bbox_to_key(obsolete.bbox))
275 |         else:
276 |             self._cur_len += 1
277 | 
278 |         self._total_len += 1
279 |         self._rtree.insert(self._total_len, self._key_to_bbox(key), value)
280 |         # calculate weight from gradient
281 |         neighbors = [(self._bbox_to_key(item.bbox), item.object) for item in raw_neighbors]
282 |         neighbors.append((key, value))
283 |         grad_weight = self.get_weight_from_grad(key, neighbors)
284 |         self._weight_sum += grad_weight
285 |         self._buffer_weights[key] = grad_weight
286 | 
287 |     def get_weight_from_grad(self, key, neighbors=None) -> float:
288 |         if neighbors is None:
289 |             neighbors = self.get_neighbors(key, self._nbh_size)
290 |         if len(neighbors) < 5:
291 |             return 1.0
292 |         else:
293 |             x, y = zip(*neighbors)
294 |             x, y = np.array(x), np.array(y)
295 |             reg = LinearRegression()
296 |             # TODO: dim >= 2
297 |             reg.fit(x.reshape(-1, 1), y)
298 |             r = reg.predict(np.reshape(key, (1, 1))).clip(0., 1.).item()
299 |             return np.clip(
300 |                 np.exp(abs(reg.coef_[0]) * self._grad_coef) +
301 |                 np.exp(-((r - 0.7) / 0.7) ** 2),
302 |                 0., self._max_weight
303 |             ).item()
304 | 
305 |             # grad = 2 * r * math.sqrt(-math.log(r))
306 |             # return np.clip(abs(grad), 0., self._max_weight).item()
307 |             # # return np.exp(
308 |             # #     abs(grad) * self._grad_coef
309 |             # # ).clip(max=self._max_weight)
310 | 
311 |     def get_neighbors(
312 |         self, key, radius, raw: bool = False
313 |     ) -> Union[List[rtree.index.Item], List[Tuple[Any, float]]]:
314 |         neighbors = []
315 |         for item in self._rtree.intersection(
316 |             self._key_to_bbox(key - radius, key + radius), objects=True
317 |         ):
318 |             if item.id != -1:
319 |                 neighbors.append(
320 |                     item if raw else (
321 |                         self._bbox_to_key(item.bbox), item.object
322 |                     ))
323 |         return neighbors
324 | 
325 |     def sample(
326 |         self,
327 |         random_gen: Union[np.random.Generator, np.random.RandomState],
328 |         uniform_prob: float = 0.,
329 |         normal_var: float = None,
330 |     ):
331 |         if not self.initialized or random_gen.random() < uniform_prob:
332 |             return random_gen.uniform(self._min_key, self._max_key)
333 |         else:
334 |             # importance sampling
335 |             # TODO: a dict for saving index and two lists
336 |             #   for saving samples and weights may be better?
337 |             buffer, weights = zip(*self._buffer_weights.items())
338 |             weights = np.array(weights) / self._weight_sum
339 |             sample = random_gen.choice(buffer, p=weights)
340 |             return np.clip(
341 |                 random_gen.normal(sample, normal_var),
342 |                 self._min_key, self._max_key
343 |             ) if normal_var else sample
344 | 
345 | 
346 | class AlpIS(object):
347 |     def __init__(
348 |         self,
349 |         dim: int,
350 |         min_key: Union[float, Tuple[float, ...], np.ndarray],
351 |         max_key: Union[float, Tuple[float, ...], np.ndarray],
352 |         window_size: int,
353 |         default_distribution: Callable[[Union[np.random.Generator, np.random.RandomState]], Any] = None,
354 |         neighborhood_ratio=0.1,
355 |         min_lp=1e-4,
356 |     ):
357 |         self._dim = dim
358 |         self._min_key = min_key
359 |         self._max_key = max_key
360 |         self._interval = np.array(max_key) - np.array(min_key)
361 |         self._nbh_size = neighborhood_ratio * self._interval
362 |         self._window_size = window_size
363 |         self._max_density = self._window_size * neighborhood_ratio ** dim
364 |         self._min_lp = min_lp
365 |         if default_distribution is None:
366 |             self._explore = lambda r: r.uniform(self._min_key, self._max_key)
367 |         else:
368 |             self._explore = default_distribution
369 | 
370 |         self._buffer = Rtree()
371 |         self._history = Rtree()
372 |         self._bbox = self._key_to_bbox(min_key, max_key)
373 |         self._history_idx = Natural()
374 |         self._window = collections.deque(maxlen=window_size)
375 | 
376 |         self._total_len = 0
377 |         self._samples = []
378 |         self._weights = []
379 |         self._init = False
380 | 
381 |     def _key_to_bbox(self, key1, key2=None):
382 |         if key2 is None:
383 |             key2 = key1
384 |         if self._dim == 1:
385 |             return key1, key1, key2, key2
386 |         if self._dim == 2:
387 |             return (*key1, *key2)
388 |         raise NotImplementedError
389 | 
390 |     def _bbox_to_key(self, bbox):
391 |         if self._dim == 1:
392 |             return bbox[0]
393 |         if self._dim == 2:
394 |             return bbox[:2]
395 |         raise NotImplementedError
396 | 
397 |     def _key_dist(self, key1, key2):
398 |         if self._dim == 1:
399 |             return abs(key1 - key2)
400 |         if self._dim == 2:
401 |             return tf.vnorm(np.asarray(key1) - np.asarray(key2))
402 |         raise NotImplementedError
403 | 
404 |     @property
405 |     def progresses(self) -> Iterable[Tuple[Any, float]]:
406 |         for item in self._history.intersection(self._bbox, objects=True):
407 |             if item.id != -1:
408 |                 yield self._bbox_to_key(item.bbox), item.object
409 | 
410 |     @property
411 |     def particles(self):
412 |         return zip(self._samples, self._weights)
413 | 
414 |     @property
415 |     def is_init(self):
416 |         return self._init
417 | 
418 |     def insert(self, key, value):
419 |         self._total_len += 1
420 |         self._buffer.insert(self._total_len, self._key_to_bbox(key), value)
421 |         self._window.append((key, value))
422 | 
423 |         if self._total_len % self._window_size == 0:
424 |             self._samples.clear()
425 |             self._weights.clear()
426 |             if self._init or len(self._history):
427 |                 self._init = True
428 |                 for k, _ in self.progresses:
429 |                     self._samples.append(k)
430 |                     self._weights.append(self._compute_alp(k))
431 |                 weight_sum = np.sum(self._weights)
432 |                 if weight_sum != 0.:
433 |                     self._weights = (np.array(self._weights) / weight_sum).tolist()
434 | 
435 |             self._patch_density_merge(self._window)
436 |             self._buffer = Rtree()
437 | 
438 |     def sample(
439 |         self,
440 |         random_gen: Union[np.random.Generator, np.random.RandomState],
441 |         uniform_prob: float = 0.,
442 |         normal_var: float = None,
443 |     ):
444 |         if not self._init or random_gen.random() < uniform_prob:
445 |             return self._explore(random_gen)
446 |         else:
447 |             # importance sampling
448 |             sample = random_gen.choice(self._samples, p=self._weights)
449 |             return np.clip(
450 |                 random_gen.normal(sample, normal_var),
451 |                 self._min_key, self._max_key
452 |             ) if normal_var else sample
453 | 
454 |     def _patch_density_merge(self, window):
455 |         for k, v in window:
456 |             neighbors = self._get_neighbors(self._history, k)
457 |             if len(neighbors) > self._max_density:
458 |                 # delete the nearest particle
459 |                 obsolete: rtree.index.Item = min(
460 |                     neighbors, key=lambda x: self._key_dist(self._bbox_to_key(x.bbox), k)
461 |                 )
462 |                 self._history.delete(obsolete.id, obsolete.bbox)
463 |             self._history.insert(next(self._history_idx), self._key_to_bbox(k), v)
464 | 
465 |     def _compute_alp(self, key) -> float:
466 |         n_history = self._get_neighbor_objects(self._history, key)
467 |         n_buffer = self._get_neighbor_objects(self._buffer, key)
468 |         # TODO: Remove Duplicated items
469 |         if len(n_history) > 3 and len(n_buffer) > 3:
470 |             v_history = np.array(n_history).mean()
471 |             v_buffer = np.array(n_buffer).mean()
472 |             alp = max(abs(v_buffer - v_history), self._min_lp)
473 |         else:
474 |             alp = self._min_lp
475 |         return alp
476 | 
477 |     def _get_neighbors(self, rtree_obj, key) -> List[rtree.index.Item]:
478 |         neighbors = []
479 |         bbox = self._key_to_bbox(key - self._nbh_size, key + self._nbh_size)
480 |         for item in rtree_obj.intersection(bbox, objects=True):
481 |             if item.id != -1:
482 |                 neighbors.append(item)
483 |         return neighbors
484 | 
485 |     def _get_neighbor_objects(self, rtree_obj, key) -> List[float]:
486 |         neighbors = []
487 |         bbox = self._key_to_bbox(key - self._nbh_size, key + self._nbh_size)
488 |         for item in rtree_obj.intersection(bbox, objects=True):
489 |             if item.id != -1:
490 |                 neighbors.append(item.object)
491 |         return neighbors
492 | 
493 | 
494 | class GradIS1D(GradIS):
495 |     def __init__(
496 |         self,
497 |         min_key: Union[float, Tuple[float, ...]],
498 |         max_key: Union[float, Tuple[float, ...]],
499 |         max_len: int,
500 |         grad_coef=1.0,
501 |         max_weight=5.0,
502 |         neighborhood_ratio=0.1,
503 |     ):
504 |         super().__init__(
505 |             1,
506 |             min_key,
507 |             max_key,
508 |             max_len,
509 |             grad_coef,
510 |             max_weight,
511 |             neighborhood_ratio,
512 |         )
513 | 


--------------------------------------------------------------------------------
/qdpgym/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/tests/__init__.py


--------------------------------------------------------------------------------
/qdpgym/tests/common.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from matplotlib import pyplot as plt
3 | 
4 | 
5 | def test_reward_reshape():
6 |     x = np.arange(-2, 2, 0.01)
7 |     plt.plot(x, np.tanh(x))
8 |     plt.show()
9 | 


--------------------------------------------------------------------------------
/qdpgym/tests/test_pybullet.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | import time
  3 | 
  4 | import numpy as np
  5 | import pybullet as pyb
  6 | import pybullet_data
  7 | 
  8 | from qdpgym.sim.blt.env import QuadrupedEnv
  9 | from qdpgym.sim.blt.hooks import ViewerHook
 10 | from qdpgym.sim.blt.quadruped import Aliengo
 11 | from qdpgym.sim.blt.terrain import Plain, Hills, Slopes, Steps, PlainHf
 12 | from qdpgym.sim.task import NullTask
 13 | from qdpgym.tasks.loct import LocomotionV0, LocomotionPMTG
 14 | from qdpgym.utils import tf
 15 | 
 16 | 
 17 | def test_robot():
 18 |     pyb.connect(pyb.GUI)
 19 |     pyb.setTimeStep(2e-3)
 20 |     pyb.setAdditionalSearchPath(pybullet_data.getDataPath())
 21 |     Plain().spawn(pyb)
 22 |     rob = Aliengo(500, 'pd')
 23 |     rs = np.random.RandomState()
 24 |     rob.spawn(pyb, rs)
 25 |     pyb.setGravity(0, 0, -9.8)
 26 | 
 27 |     for _ in range(1000):
 28 |         # with MfTimer() as t:
 29 |         pyb.stepSimulation()
 30 |         rob.update_observation(rs)
 31 |         rob.apply_command(rob.STANCE_CONFIG)
 32 | 
 33 |     pyb.disconnect()
 34 | 
 35 | def test_robot_extent():
 36 |     pyb.connect(pyb.GUI)
 37 |     pyb.setTimeStep(2e-3)
 38 |     pyb.setAdditionalSearchPath(pybullet_data.getDataPath())
 39 |     Plain().spawn(pyb)
 40 |     rob = Aliengo(500, 'pd')
 41 |     rob.spawn_on_rack(pyb, np.random)
 42 |     pyb.setGravity(0, 0, -9.8)
 43 | 
 44 |     joint_pos = np.concatenate((
 45 |         rob.inverse_kinematics(0, (0.3, 0., 0.05)),
 46 |         rob.inverse_kinematics(0, (0.35, 0., 0.1)),
 47 |         rob.inverse_kinematics(0, (0.25, 0., 0.)),
 48 |         rob.inverse_kinematics(0, (-0.25, 0., 0.)),
 49 |     ))
 50 | 
 51 |     for _ in range(1000):
 52 |         pyb.stepSimulation()
 53 |         rob.update_observation(np.random)
 54 | 
 55 |         rob.apply_command(joint_pos)
 56 |         time.sleep(0.002)
 57 | 
 58 |     pyb.disconnect()
 59 | 
 60 | 
 61 | def test_env():
 62 |     rob = Aliengo(500, 'pd')
 63 |     arena = Plain()
 64 |     task = NullTask()
 65 |     task.add_hook(ViewerHook())
 66 |     env = QuadrupedEnv(rob, arena, task)
 67 |     env.reset()
 68 |     for _ in range(1000):
 69 |         env.step(rob.STANCE_CONFIG)
 70 | 
 71 | 
 72 | def test_tg():
 73 |     rob = Aliengo(500, 'actuator_net', True)
 74 |     arena = Plain()
 75 |     task = LocomotionPMTG()
 76 |     task.add_hook(ViewerHook())
 77 |     env = QuadrupedEnv(rob, arena, task)
 78 |     env.reset()
 79 |     for _ in range(1000):
 80 |         env.step(np.zeros(16))
 81 | 
 82 | 
 83 | def test_replaceHeightfield():
 84 |     pyb.connect(pyb.GUI)
 85 |     pyb.setRealTimeSimulation(True)
 86 |     terrains = [PlainHf.default(), Hills.default(),
 87 |                 Steps.default(), Slopes.default()]
 88 |     current = None
 89 |     for i in range(5):
 90 |         for terrain in terrains:
 91 |             if current is None:
 92 |                 terrain.spawn(pyb)
 93 |             else:
 94 |                 terrain.replace(pyb, current)
 95 |             current = terrain
 96 |             time.sleep(0.5)
 97 |     current.remove(pyb)
 98 |     time.sleep(1.0)
 99 |     pyb.disconnect()
100 | 
101 | 
102 | def test_terrainApi():
103 |     pyb.connect(pyb.GUI)
104 |     pyb.setRealTimeSimulation(True)
105 |     terrain = Hills.make(20, 0.1, (0.5, 10), random_state=np.random)
106 |     # terrain = Steps.make(20, 0.1, 1.0, 0.5, random_state=np.random)
107 |     # terrain = Slopes.make(20, 0.1, np.pi / 6, 0.5)
108 |     terrain.spawn(pyb)
109 | 
110 |     sphere_shape = pyb.createVisualShape(
111 |         shapeType=pyb.GEOM_SPHERE, radius=0.01, rgbaColor=(0., 0.8, 0., 0.6)
112 |     )
113 |     ray_hit_shape = pyb.createVisualShape(
114 |         shapeType=pyb.GEOM_SPHERE, radius=0.01, rgbaColor=(0.8, 0., 0., 0.6)
115 |     )
116 |     cylinder_shape = pyb.createVisualShape(
117 |         shapeType=pyb.GEOM_CYLINDER, radius=0.005, length=0.11,
118 |         rgbaColor=(0., 0, 0.8, 0.6)
119 |     )
120 |     box_shape = pyb.createVisualShape(
121 |         shapeType=pyb.GEOM_BOX, halfExtents=(0.03, 0.03, 0.03),
122 |         rgbaColor=(0.8, 0., 0., 0.6)
123 |     )
124 | 
125 |     points, vectors, ray_hits = [], [], []
126 |     box_id = -1
127 | 
128 |     for i in range(10):
129 |         peak = terrain.get_peak((-1, 1), (-1, 1))
130 |         if i == 0:
131 |             box_id = pyb.createMultiBody(
132 |                 baseVisualShapeIndex=box_shape,
133 |                 basePosition=peak
134 |             )
135 |         else:
136 |             pyb.resetBasePositionAndOrientation(
137 |                 box_id, peak, (0., 0., 0., 1.)
138 |             )
139 |         for idx, (x, y) in enumerate(
140 |             itertools.product(np.linspace(-1, 1, 10), np.linspace(-1, 1, 10))
141 |         ):
142 |             h = terrain.get_height(x, y)
143 |             vec_orn = tf.Quaternion.from_rotation(
144 |                 tf.Rotation.from_zaxis(terrain.get_normal(x, y))
145 |             )
146 |             ray_pos = pyb.rayTest((x, y, 2), (x, y, -1))[0][3]
147 |             if i == 0:
148 |                 points.append(pyb.createMultiBody(
149 |                     baseVisualShapeIndex=sphere_shape,
150 |                     basePosition=(x, y, h)
151 |                 ))
152 |                 vectors.append(pyb.createMultiBody(
153 |                     baseVisualShapeIndex=cylinder_shape,
154 |                     basePosition=(x, y, h),
155 |                     baseOrientation=vec_orn
156 |                 ))
157 |                 ray_hits.append(pyb.createMultiBody(
158 |                     baseVisualShapeIndex=ray_hit_shape,
159 |                     basePosition=ray_pos
160 |                 ))
161 |             else:
162 |                 pyb.resetBasePositionAndOrientation(
163 |                     points[idx], (x, y, h), (0., 0., 0., 1.)
164 |                 )
165 |                 pyb.resetBasePositionAndOrientation(
166 |                     ray_hits[idx], ray_pos, (0., 0., 0., 1.)
167 |                 )
168 |                 pyb.resetBasePositionAndOrientation(
169 |                     vectors[idx], (x, y, h), vec_orn
170 |                 )
171 |         time.sleep(3)
172 |         new = Hills.make(20, 0.1, (np.random.random(), 10), random_state=np.random)
173 |         new.replace(pyb, terrain)
174 |         terrain = new
175 | 
176 |     pyb.disconnect()
177 | 
178 | 
179 | def test_gym_env():
180 |     rob = Aliengo(500, 'pd')
181 |     arena = Plain()
182 |     task = LocomotionV0()
183 |     env = QuadrupedEnv(rob, arena, task)
184 |     print(env.observation_space)
185 |     print(env.action_space)
186 | 
187 | 
188 | if __name__ == '__main__':
189 |     test_robot_extent()


--------------------------------------------------------------------------------
/qdpgym/thirdparty/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/thirdparty/__init__.py


--------------------------------------------------------------------------------
/qdpgym/thirdparty/gamepad/__init__.py:
--------------------------------------------------------------------------------
1 | # modified from https://github.com/piborg/Gamepad.git
2 | 


--------------------------------------------------------------------------------
/qdpgym/thirdparty/gamepad/controllers.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | """
 3 | Standard gamepad mappings.
 4 | 
 5 | Pulled in to gamepad.py directly.
 6 | """
 7 | import copy
 8 | 
 9 | from .gamepad import Gamepad
10 | 
11 | __all__ = ['PS4', 'XboxONE', 'Xbox']
12 | all_controllers = copy.copy(__all__)
13 | __all__.append(all_controllers)
14 | 
15 | 
16 | class PS4(Gamepad):
17 |     fullName = 'PlayStation 4 controller'
18 | 
19 |     def __init__(self, joystickNumber=0):
20 |         Gamepad.__init__(self, joystickNumber)
21 |         self.axisNames = {
22 |             0: 'LAS -X',
23 |             1: 'LAS -Y',
24 |             2: 'LT',
25 |             3: 'RAS -X',
26 |             4: 'RAS -Y',
27 |             5: 'RT',
28 |             6: 'DPAD-X',
29 |             7: 'DPAD-Y'
30 |         }
31 |         self.buttonNames = {
32 |             0: 'A',
33 |             1: 'B',
34 |             2: 'X',
35 |             3: 'Y',
36 |             4: 'L1',
37 |             5: 'R1',
38 |             6: 'L2',
39 |             7: 'R2',
40 |             8: 'SHARE',
41 |             9: 'LAS',
42 |             10: 'RAS',
43 |             11: 'L3',
44 |             12: 'R3'
45 |         }
46 |         self._setupReverseMaps()
47 | 
48 | 
49 | class XboxONE(Gamepad):
50 |     fullName = 'Xbox ONE controller'
51 | 
52 |     def __init__(self, joystickNumber=0):
53 |         Gamepad.__init__(self, joystickNumber)
54 |         self.axisNames = {
55 |             0: 'LAS -X',  # Left Analog Stick Left/Right
56 |             1: 'LAS -Y',  # Left Analog Stick Up/Down
57 |             2: 'RAS -X',  # Right Analog Stick Left/Right
58 |             3: 'RAS -Y',  # Right Analog Stick Up/Down
59 |             4: 'RT',  # Right Trigger
60 |             5: 'LT',  # Left Trigger
61 |             6: 'DPAD -X',  # D-Pad Left/Right
62 |             7: 'DPAD -Y'  # D-Pad Up/Down
63 |         }
64 |         self.buttonNames = {
65 |             0: 'A',  # A Button
66 |             1: 'B',  # B Button
67 |             3: 'X',  # X Button
68 |             4: 'Y',  # Y Button
69 |             6: 'LB',  # Left Bumper
70 |             7: 'RB',  # Right Bumper
71 |             11: 'START',  # Hamburger Button
72 |             12: 'HOME',  # XBOX Button
73 |             13: 'LAS',  # Left Analog Stick button
74 |             14: 'RAS'  # Right Analog Stick button
75 | 
76 |         }
77 |         self._setupReverseMaps()
78 | 
79 | 
80 | Xbox = XboxONE
81 | 


--------------------------------------------------------------------------------
/qdpgym/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .utils import *
2 | 


--------------------------------------------------------------------------------
/qdpgym/utils/tf.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from typing import Tuple, Sequence, Union
  3 | 
  4 | import numpy as np
  5 | from scipy.spatial.transform import Rotation as scipyRotation
  6 | 
  7 | __all__ = ['Rotation', 'Quaternion', 'Rpy', 'Odometry',
  8 |            'vnorm', 'vunit', 'vcross', 'estimate_normal',
  9 |            'get_rpy_rate_from_ang_vel']
 10 | 
 11 | 
 12 | def vnorm(vec):
 13 |     return math.sqrt(sum(x ** 2 for x in vec))
 14 | 
 15 | 
 16 | def vunit(x) -> np.ndarray:
 17 |     return np.asarray(x) / vnorm(x)
 18 | 
 19 | 
 20 | def vcross(vec3_1, vec3_2) -> np.ndarray:
 21 |     """
 22 |     A much faster alternative for np.cross.
 23 |     """
 24 |     (x1, y1, z1), (x2, y2, z2) = vec3_1, vec3_2
 25 |     return np.array((y1 * z2 - y2 * z1, z1 * x2 - z2 * x1, x1 * y2 - x2 * y1))
 26 | 
 27 | 
 28 | class Rotation(object):
 29 |     THRESHOLD = 1e-5
 30 | 
 31 |     @staticmethod
 32 |     def from_rpy(rpy):  # zyx
 33 |         sr, sp, sy = np.sin(rpy)
 34 |         cr, cp, cy = np.cos(rpy)
 35 |         matrix = ((cy * cp, cy * sp * sr - sy * cr, cy * sp * cr + sy * sr),
 36 |                   (sy * cp, sy * sp * sr + cy * cr, sy * sp * cr - cy * sr),
 37 |                   (-sp, cp * sr, cp * cr))
 38 |         return np.array(matrix)
 39 | 
 40 |     @staticmethod
 41 |     def from_quaternion(q):
 42 |         return scipyRotation.from_quat(q).as_matrix()
 43 | 
 44 |     @staticmethod
 45 |     def from_zaxis(z):
 46 |         if z[2] == 1.0:
 47 |             ref = np.array((1., 0., 0.))
 48 |         else:
 49 |             ref = np.array((0., 0., 1.))
 50 |         x = vcross(z, ref)
 51 |         x /= vnorm(x)
 52 |         y = vcross(z, x)
 53 |         return np.array((x, y, z)).T
 54 | 
 55 | 
 56 | class Quaternion(object):
 57 |     """
 58 |     q = [x y z w] = w + xi + yj + zk
 59 |     """
 60 | 
 61 |     @staticmethod
 62 |     def identity():
 63 |         return np.array((0., 0., 0., 1.))
 64 | 
 65 |     @staticmethod
 66 |     def from_rpy(rpy):
 67 |         rpy_2 = np.asarray(rpy) / 2
 68 |         (sr, sp, sy), (cr, cp, cy) = np.sin(rpy_2), np.cos(rpy_2)
 69 |         return np.array(((sr * cp * cy - cr * sp * sy,
 70 |                           cr * sp * cy + sr * cp * sy,
 71 |                           cr * cp * sy - sr * sp * cy,
 72 |                           cr * cp * cy + sr * sp * sy)))
 73 | 
 74 |     @staticmethod
 75 |     def from_rotation(r):
 76 |         # scipy api is faster than python implementation
 77 |         return scipyRotation.from_matrix(r).as_quat()
 78 | 
 79 |     @staticmethod
 80 |     def inverse(q):
 81 |         x, y, z, w = q
 82 |         return np.array((-x, -y, -z, w))
 83 | 
 84 |     @staticmethod
 85 |     def from_wxyz(q):
 86 |         w, x, y, z = q
 87 |         return np.array((x, y, z, w))
 88 | 
 89 |     @staticmethod
 90 |     def to_wxyz(q):
 91 |         x, y, z, w = q
 92 |         return np.array((w, x, y, z))
 93 | 
 94 | 
 95 | class Rpy(object):
 96 |     """ZYX intrinsic Euler Angles (roll, pitch, yaw)"""
 97 | 
 98 |     @staticmethod
 99 |     def from_quaternion(q):
100 |         x, y, z, w = q
101 |         return np.array((np.arctan2(2 * (w * x + y * z), 1 - 2 * (x * x + y * y)),
102 |                          np.arcsin(2 * (w * y - x * z)),
103 |                          np.arctan2(2 * (w * z + x * y), 1 - 2 * (y * y + z * z))))
104 | 
105 |     @classmethod
106 |     def from_rotation(cls, r):
107 |         return scipyRotation.from_matrix(r).as_euler('ZYX', degrees=False)[::-1]
108 | 
109 | 
110 | def get_rpy_rate_from_ang_vel(rpy, angular):
111 |     r, p, y = rpy
112 |     sp, cp = math.sin(p), math.cos(p)
113 |     cr, tr = math.cos(r), math.tan(r)
114 |     trans = np.array(((1, sp * tr, cp * tr),
115 |                       (0, cp, -sp),
116 |                       (0, sp / cr, cp / cr)))
117 |     return np.dot(trans, angular)
118 | 
119 | 
120 | FLOAT3 = Tuple[float, float, float]
121 | 
122 | 
123 | def estimate_normal(points: Union[Sequence[FLOAT3], np.ndarray]):
124 |     """
125 |     Estimate the normal of terrain from a set of points.
126 |     The terrain CANNOT be vertical.
127 |     :param points: a set of terrain points
128 |     :return: np.ndarray, the normal vector
129 |     """
130 |     X, Y, Z = np.array(points).T
131 |     A = np.zeros((3, 3))
132 |     A[0, :] = np.sum(X ** 2), X @ Y, np.sum(X)
133 |     A[1, :] = A[0, 1], np.sum(Y ** 2), np.sum(Y)
134 |     A[2, :] = A[0, 2], A[1, 2], len(X)
135 |     b = np.array((X @ Z, Y @ Z, np.sum(Z)))
136 |     a, b, _ = np.linalg.solve(A, b)
137 |     return vunit((-a, -b, 1))
138 | 
139 | 
140 | ARR_ZERO3 = (0., 0., 0.)
141 | ARR_EYE3 = (ARR_ZERO3,) * 3
142 | 
143 | 
144 | class Odometry(object):
145 |     """
146 |     Homogeneous coordinate transformation defined by a rotation and a translation:
147 |         ((R_3x3, t_1x3)
148 |          (0_3x1, 1_1x1))
149 |     """
150 | 
151 |     def __init__(self, rotation=ARR_EYE3, translation=ARR_ZERO3):
152 |         self.rotation, self.translation = np.asarray(rotation), np.asarray(translation)
153 | 
154 |     def multiply(self, other):
155 |         if isinstance(other, Odometry):
156 |             return Odometry(self.rotation @ other.rotation,
157 |                             self.translation + self.rotation @ other.translation)
158 |         if isinstance(other, np.ndarray):
159 |             return self.rotation @ other + self.translation
160 |         raise RuntimeError(f'Unsupported datatype `{type(other)}` for multiplying')
161 | 
162 |     def __matmul__(self, other):
163 |         return self.multiply(other)
164 | 
165 |     def __imatmul__(self, other):
166 |         self.rotation @= other.rotation
167 |         self.translation += self.rotation @ other.translation
168 | 
169 |     def __repr__(self):
170 |         return str(np.concatenate((self.rotation, np.expand_dims(self.translation, axis=1)), axis=1))
171 | 


--------------------------------------------------------------------------------
/qdpgym/utils/utils.py:
--------------------------------------------------------------------------------
  1 | import functools
  2 | import logging
  3 | import math
  4 | import re
  5 | import time
  6 | from datetime import datetime
  7 | from typing import Sequence, Callable, Optional, Union
  8 | 
  9 | import numpy as np
 10 | import yaml
 11 | 
 12 | __all__ = ['make_part', 'Angle', 'replace_is', 'replace_eq', 'colored_str', 'log',
 13 |            'MfTimer', 'get_padded', 'PadWrapper', 'get_timestamp', 'YamlLoader',
 14 |            'print_return', 'Natural', 'plt_figure_to_numpy']
 15 | 
 16 | 
 17 | class make_part(functools.partial):
 18 |     """
 19 |     Predefine some init parameters of a class without creating an instance.
 20 |     """
 21 | 
 22 |     def __getattr__(self, item):
 23 |         return getattr(self.func, item)
 24 | 
 25 | 
 26 | PI, TAU = math.pi, math.tau
 27 | 
 28 | 
 29 | class Angle(object):
 30 |     @staticmethod
 31 |     def norm(x):
 32 |         """normalize an angle to [-pi, pi)"""
 33 |         if isinstance(x, np.ndarray):
 34 |             return x - ((x + PI) / TAU).astype(int) * TAU
 35 |         if -PI <= x < PI:
 36 |             return x
 37 |         return x - int((x + PI) / TAU) * TAU
 38 | 
 39 |     @staticmethod
 40 |     def mean(lst: Sequence[float]):
 41 |         _sum = last = lst[0]
 42 |         for i, a in enumerate(lst):
 43 |             if i == 0:
 44 |                 continue
 45 |             last = Angle.near(last, a)
 46 |             _sum += last
 47 |         return Angle.norm(_sum / len(lst))
 48 | 
 49 |     @staticmethod
 50 |     def near(x, y):
 51 |         if x > y + PI:
 52 |             y += TAU
 53 |         elif x < y - PI:
 54 |             y -= TAU
 55 |         return y
 56 | 
 57 |     @staticmethod
 58 |     def to_deg(rad):
 59 |         return rad / math.pi * 180
 60 | 
 61 | 
 62 | def get_padded(queue: Sequence, idx):
 63 |     if idx < 0:
 64 |         if len(queue) >= -idx:
 65 |             return queue[idx]
 66 |         else:
 67 |             return queue[0]
 68 |     else:
 69 |         if len(queue) > idx:
 70 |             return queue[idx]
 71 |         else:
 72 |             return queue[-1]
 73 | 
 74 | 
 75 | class PadWrapper(object):
 76 |     def __init__(self, seq: Sequence):
 77 |         self.seq = seq
 78 |         self.is_empty = not self.seq
 79 | 
 80 |     def __getitem__(self, item):
 81 |         if self.is_empty:
 82 |             raise IndexError('Empty Sequence')
 83 |         else:
 84 |             return get_padded(self.seq, item)
 85 | 
 86 |     def __len__(self):
 87 |         return len(self.seq)
 88 | 
 89 |     def __repr__(self):
 90 |         return f'PadWrapper[{self.seq}]'
 91 | 
 92 | 
 93 | def replace_is(seq: list, src, dst):
 94 |     for idx, item in enumerate(seq):
 95 |         if item is src:
 96 |             seq[idx] = dst
 97 | 
 98 | 
 99 | def replace_eq(seq: list, src, dst):
100 |     for idx, item in enumerate(seq):
101 |         if item == src:
102 |             seq[idx] = dst
103 | 
104 | 
105 | class MfTimer(object):
106 |     """
107 |     Multifunctional Timer. Typical usage example:
108 | 
109 |     with MfTimer() as timer:
110 |         do_something()
111 |     print(timer.time_spent)
112 |     """
113 | 
114 |     @classmethod
115 |     def start_now(cls):
116 |         timer = MfTimer()
117 |         timer.start()
118 |         return timer
119 | 
120 |     @classmethod
121 |     def record(cls, func: Callable, *args, **kwargs):
122 |         start_time = time.time()
123 |         func(*args, **kwargs)
124 |         end_time = time.time()
125 |         return end_time - start_time
126 | 
127 |     def __init__(self):
128 |         self._start_time = self._end_time = None
129 | 
130 |     def start(self):
131 |         self._start_time = time.time()
132 | 
133 |     def __enter__(self):
134 |         self._start_time = time.time()
135 |         return self
136 | 
137 |     def end(self, verbose=False):
138 |         self._end_time = time.time()
139 |         if verbose:
140 |             print(self.time_spent)
141 |         return self.time_spent
142 | 
143 |     def __exit__(self, exc_type, exc_val, exc_tb):
144 |         self._end_time = time.time()
145 | 
146 |     @property
147 |     def time_spent(self):
148 |         return self._end_time - self._start_time
149 | 
150 | 
151 | BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8)
152 | RESET_SEQ = "\033[0m"
153 | COLOR_SEQ = "\033[1;%dm"
154 | 
155 | 
156 | def colored_str(content: str, color: str):
157 |     try:
158 |         color = eval(color.upper())
159 |     except NameError:
160 |         raise RuntimeError(f'No color named {color}')
161 |     return COLOR_SEQ % (30 + color) + content + RESET_SEQ
162 | 
163 | 
164 | class ColoredFormatter(logging.Formatter):
165 |     COLORS = {
166 |         'WARNING': YELLOW,
167 |         'INFO': WHITE,
168 |         'DEBUG': BLUE,
169 |         'ERROR': RED,
170 |         'CRITICAL': MAGENTA,
171 |     }
172 | 
173 |     def __init__(self, *args, **kwargs):
174 |         super().__init__(*args, **kwargs)
175 |         self.use_color = True
176 | 
177 |     def format(self, record):
178 |         lvl = record.levelname
179 |         message = str(record.msg)
180 |         funcName = record.funcName
181 |         if lvl in self.COLORS:
182 |             lvl_colored = COLOR_SEQ % (30 + self.COLORS[lvl]) + lvl + RESET_SEQ
183 |             msg_colored = COLOR_SEQ % (30 + self.COLORS[lvl]) + message + RESET_SEQ
184 |             funcName_colored = COLOR_SEQ % (30 + self.COLORS[lvl]) + funcName + RESET_SEQ
185 |             record.levelname = lvl_colored
186 |             record.msg = msg_colored
187 |             record.funcName = funcName_colored
188 |         return super().format(record)
189 | 
190 | 
191 | class _Log(object):
192 |     logger: Optional[logging.Logger] = None
193 | 
194 |     @classmethod
195 |     def init_logger(cls, name='logger',
196 |                     log_level: int = logging.INFO,
197 |                     log_fmt='[%(asctime)s] %(message)s',
198 |                     date_fmt='%b%d %H:%M:%S',
199 |                     log_dir=None,
200 |                     client_ip='127.0.0.1'):
201 |         np.set_printoptions(3, linewidth=10000, suppress=True)
202 |         # torch.set_printoptions(linewidth=10000, profile='short')
203 |         cls.logger = logging.getLogger(name)
204 |         cls.logger.setLevel(log_level)
205 |         # if log_dir:
206 |         #     os.makedirs(log_dir)
207 |         #     fh = logging.FileHandler(os.path.join(log_dir, 'log.txt'))
208 |         #     fh.setLevel(logging.DEBUG)
209 |         #     cls.logger.addHandler(fh)
210 |         # soh = SocketHandler(client_ip, 19996)
211 |         # soh = SocketHandler('10.12.120.120', 19996)
212 |         # soh.setFormatter(logging.Formatter())
213 |         # cls.logger.addHandler(soh)
214 |         formatter = ColoredFormatter(log_fmt, date_fmt)
215 |         sh = logging.StreamHandler()
216 |         sh.setLevel(log_level)
217 |         sh.setFormatter(formatter)
218 |         cls.logger.addHandler(sh)
219 |         cls.logger.propagate = False
220 | 
221 |     @classmethod
222 |     def set_logger_level(cls, log_level: str):
223 |         level_dict = {'DEBUG': logging.DEBUG,
224 |                       'INFO': logging.INFO,
225 |                       'WARNING': logging.WARNING,
226 |                       'ERROR': logging.ERROR,
227 |                       'CRITICAL': logging.CRITICAL}
228 | 
229 |         cls.logger.handlers[-1].setLevel(level_dict[log_level.upper()])
230 | 
231 |     @classmethod
232 |     def debug(cls, *args, **kwargs):
233 |         if cls.logger is None:
234 |             cls.init_logger(log_level=logging.DEBUG)
235 |         return cls.logger.debug(*args, **kwargs)
236 | 
237 |     @classmethod
238 |     def info(cls, *args, **kwargs):
239 |         if cls.logger is None:
240 |             cls.init_logger(log_level=logging.DEBUG)
241 |         return cls.logger.info(*args, **kwargs)
242 | 
243 |     @classmethod
244 |     def warn(cls, *args, **kwargs):
245 |         if cls.logger is None:
246 |             cls.init_logger(log_level=logging.DEBUG)
247 |         return cls.logger.warning(*args, **kwargs)
248 | 
249 |     @classmethod
250 |     def error(cls, *args, **kwargs):
251 |         if cls.logger is None:
252 |             cls.init_logger(log_level=logging.DEBUG)
253 |         return cls.logger.error(*args, **kwargs)
254 | 
255 |     @classmethod
256 |     def critical(cls, *args, **kwargs):
257 |         if cls.logger is None:
258 |             cls.init_logger(log_level=logging.DEBUG)
259 |         return cls.logger.critical(*args, **kwargs)
260 | 
261 | 
262 | log = _Log
263 | 
264 | 
265 | def get_timestamp(timestamp: Union[int, float] = None) -> str:
266 |     datetime_ = datetime.fromtimestamp(timestamp) if timestamp else datetime.now()
267 |     return datetime_.strftime('%y-%m-%d_%H-%M-%S')
268 | 
269 | 
270 | class _NamespaceWrapper(object):
271 |     def __init__(self, data: dict):
272 |         self._data = data
273 | 
274 |     def __getattr__(self, item):
275 |         try:
276 |             value = self._data[item]
277 |         except KeyError:
278 |             value = getattr(self._data, item)
279 |         if isinstance(value, dict):
280 |             return _NamespaceWrapper(value)
281 |         else:
282 |             return value
283 | 
284 |     def __setattr__(self, key, value):
285 |         if key.startswith('_'):
286 |             super().__setattr__(key, value)
287 |         else:
288 |             self._data[key] = value
289 | 
290 |     def __getitem__(self, item):
291 |         return self._data[item]
292 | 
293 |     def __setitem__(self, key, value):
294 |         self._data[key] = value
295 | 
296 |     def __repr__(self):
297 |         return f'Wrapper({self._data})'
298 | 
299 | 
300 | class YamlLoader(_NamespaceWrapper):
301 |     def __init__(self, path):
302 |         loader = yaml.SafeLoader
303 |         # See https://stackoverflow.com/questions/30458977/yaml-loads-5e-6-as-string-and-not-a-number
304 |         loader.add_implicit_resolver(
305 |             u'tag:yaml.org,2002:float',
306 |             re.compile(u'''^(?:
307 |              [-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)?
308 |             |[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+)
309 |             |\\.[0-9_]+(?:[eE][-+][0-9]+)?
310 |             |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*
311 |             |[-+]?\\.(?:inf|Inf|INF)
312 |             |\\.(?:nan|NaN|NAN))$''', re.X),
313 |             list(u'-+0123456789.'))
314 | 
315 |         with open(path, encoding='utf-8') as f:
316 |             super().__init__(yaml.load(f, Loader=loader))
317 | 
318 |     @property
319 |     def data(self):
320 |         return self._data
321 | 
322 |     @property
323 |     def args(self):
324 |         return ()
325 | 
326 |     @property
327 |     def kwargs(self):
328 |         return {}
329 | 
330 | 
331 | def print_return(func):
332 |     def wrapper(*args, **kwargs):
333 |         ret = func(*args, **kwargs)
334 |         print(ret)
335 |         return ret
336 | 
337 |     return wrapper
338 | 
339 | 
340 | class Natural(object):
341 |     idx = -1
342 | 
343 |     def __iter__(self):
344 |         return self
345 | 
346 |     def __next__(self):
347 |         self.idx += 1
348 |         return self.idx
349 | 
350 | 
351 | def plt_figure_to_numpy(fig):
352 |     fig.canvas.draw()
353 |     arr = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
354 |     arr = arr.reshape(fig.canvas.get_width_height()[::-1] + (3,))
355 |     return arr
356 | 


--------------------------------------------------------------------------------
/resources/lin.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/resources/lin.gif


--------------------------------------------------------------------------------
/resources/rot.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/resources/rot.gif


--------------------------------------------------------------------------------