├── .deepsource.toml
├── .gitignore
├── README.md
├── example
├── __init__.py
├── lctv0.yaml
├── loct
│ ├── __init__.py
│ ├── network.py
│ ├── test.py
│ ├── train.py
│ └── utils.py
├── test
│ ├── __init__.py
│ └── test_tianshou.py
└── utils.py
├── qdpgym
├── __init__.py
├── sim
│ ├── __init__.py
│ ├── abc.py
│ ├── app.py
│ ├── blt
│ │ ├── __init__.py
│ │ ├── env.py
│ │ ├── hooks.py
│ │ ├── quadruped.py
│ │ ├── terrain.py
│ │ └── utils.py
│ ├── common
│ │ ├── __init__.py
│ │ ├── identify.py
│ │ ├── motor.py
│ │ ├── noisyhandle.py
│ │ └── tg.py
│ ├── mjc
│ │ ├── __init__.py
│ │ ├── env.py
│ │ ├── hooks.py
│ │ ├── quadruped.py
│ │ ├── terrain.py
│ │ └── viewer.py
│ ├── resources
│ │ ├── acnet_220526.pt
│ │ ├── actuator_net_with_history.pt
│ │ └── aliengo
│ │ │ ├── meshes
│ │ │ ├── calf.stl
│ │ │ ├── hip.stl
│ │ │ ├── thigh.stl
│ │ │ ├── thigh_mirror.stl
│ │ │ └── trunk.stl
│ │ │ └── model
│ │ │ ├── aliengo.urdf
│ │ │ └── aliengo.xml
│ └── task.py
├── tasks
│ ├── __init__.py
│ ├── identify
│ │ ├── __init__.py
│ │ └── identify.py
│ └── loct
│ │ ├── __init__.py
│ │ ├── app.py
│ │ ├── loct.py
│ │ ├── reward.py
│ │ ├── sr_reward.py
│ │ └── utils.py
├── tests
│ ├── __init__.py
│ ├── common.py
│ └── test_pybullet.py
├── thirdparty
│ ├── __init__.py
│ └── gamepad
│ │ ├── __init__.py
│ │ ├── controllers.py
│ │ └── gamepad.py
└── utils
│ ├── __init__.py
│ ├── tf.py
│ └── utils.py
└── resources
├── lin.gif
└── rot.gif
/.deepsource.toml:
--------------------------------------------------------------------------------
1 | version = 1
2 |
3 | [[analyzers]]
4 | name = "python"
5 | enabled = true
6 |
7 | [analyzers.meta]
8 | runtime_version = "3.x.x"
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # .idea folder
10 | .idea/
11 |
12 | # Distribution / packaging
13 | .Python
14 | build/
15 | develop-eggs/
16 | dist/
17 | downloads/
18 | eggs/
19 | .eggs/
20 | lib/
21 | lib64/
22 | parts/
23 | sdist/
24 | var/
25 | wheels/
26 | pip-wheel-metadata/
27 | share/python-wheels/
28 | *.egg-info/
29 | .installed.cfg
30 | *.egg
31 | MANIFEST
32 |
33 | # PyInstaller
34 | # Usually these files are written by a python script from a template
35 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
36 | *.manifest
37 | *.spec
38 |
39 | # Installer logs
40 | pip-log.txt
41 | pip-delete-this-directory.txt
42 |
43 | # Unit test / coverage reports
44 | htmlcov/
45 | .tox/
46 | .nox/
47 | .coverage
48 | .coverage.*
49 | .cache
50 | nosetests.xml
51 | coverage.xml
52 | *.cover
53 | *.py,cover
54 | .hypothesis/
55 | .pytest_cache/
56 | cover/
57 |
58 | # Translations
59 | *.mo
60 | *.pot
61 |
62 | # Django stuff:
63 | *.log
64 | local_settings.py
65 | db.sqlite3
66 | db.sqlite3-journal
67 |
68 | # Flask stuff:
69 | instance/
70 | .webassets-cache
71 |
72 | # Scrapy stuff:
73 | .scrapy
74 |
75 | # Sphinx documentation
76 | docs/_build/
77 |
78 | # PyBuilder
79 | target/
80 |
81 | # Jupyter Notebook
82 | .ipynb_checkpoints
83 |
84 | # IPython
85 | profile_default/
86 | ipython_config.py
87 |
88 | # pyenv
89 | # For a library or package, you might want to ignore these files since the code is
90 | # intended to run in multiple environments; otherwise, check them in:
91 | # .python-version
92 |
93 | # pipenv
94 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
95 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
96 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
97 | # install all needed dependencies.
98 | #Pipfile.lock
99 |
100 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
101 | __pypackages__/
102 |
103 | # Celery stuff
104 | celerybeat-schedule
105 | celerybeat.pid
106 |
107 | # SageMath parsed files
108 | *.sage.py
109 |
110 | # Environments
111 | .env
112 | .venv
113 | venv/
114 | ENV/
115 | env.bak/
116 | venv.bak/
117 |
118 | # Spyder project settings
119 | .spyderproject
120 | .spyproject
121 |
122 | # Rope project settings
123 | .ropeproject
124 |
125 | # mkdocs documentation
126 | /site
127 |
128 | # mypy
129 | .mypy_cache/
130 | .dmypy.json
131 | dmypy.json
132 |
133 | # Pyre type checker
134 | .pyre/
135 |
136 | # pytype static type analyzer
137 | .pytype/
138 |
139 | # customize
140 | log/
141 | log_wuzhen/
142 | MUJOCO_LOG.TXT
143 | *.pth
144 | .vscode/
145 | .DS_Store
146 | *.zip
147 | *.pstats
148 | *.swp
149 | *.pkl
150 | *.hdf5
151 | wandb/
152 | videos/
153 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Quadruped Locomotion Reinforcement Learning
2 |
3 | This is an environment for learning quadrupedal locomotion depending on
4 | either [bullet physics engine](https://github.com/bulletphysics/bullet3)
5 | or [mujoco](https://github.com/deepmind/mujoco) and
6 | [dm_control](https://github.com/deepmind/dm_control)
7 | (not implemented yet), containing
8 |
9 | - Interfaces of quadruped states, creating terrain from elevation maps and environmental states
10 | - Motor identification, i.e. Actuator Network
11 | - Curriculum learning on disturbance and terrain
12 | - Abundant command, force, torque, terrain and trajectory visualization
13 |
14 | [tianshou](https://github.com/thu-ml/tianshou) is used for training in
15 | our examples.
16 |
17 | [//]: # (- Imitation Learning from privileged teacher to proprioceptive student)
18 |
19 |
20 |

21 |

22 |
23 |
24 | Requirements: `python>=3.7`, `pybullet>=3.2`, `torch>=1.10`, `numpy`, `scipy`, `wandb`
25 |
26 | For train, run:
27 | ```bash
28 | PYTHONPATH=./ python example/loct/train.py --task example/lctv0.yaml \
29 | --batch-size 8192 --repeat-per-collect 8 --step-per-epoch 160000 \
30 | --norm-adv 1 --run-name example
31 | ```
32 |
--------------------------------------------------------------------------------
/example/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/example/__init__.py
--------------------------------------------------------------------------------
/example/lctv0.yaml:
--------------------------------------------------------------------------------
1 | terrain: plain
2 | perturb: True
3 |
4 | reward_coeff: 0.05
5 | reward_cfg:
6 | LinearVelocityReward: 0.34
7 | RotationReward: 0.24
8 | # YawRateReward: 0.12
9 | VerticalLinearPenalty: 0.04
10 | RollPitchRatePenalty: 0.04
11 | BodyPosturePenalty: 0.04
12 | FootSlipPenalty: 0.04
13 | BodyCollisionPenalty: 0.04
14 | JointConstraintPenalty: 0.04
15 | TorquePenalty: 0.02
16 | JointMotionPenalty: 0.04
17 | ActionSmoothnessReward: 0.04
18 | ClearanceOverTerrainReward: 0.02
19 | BodyHeightReward: 0.0
20 |
21 | # VelocityReward: 1.5
22 | # RotationReward: 0.75
23 | # BodyMotionPenalty: 1.0
24 | # FootClearanceReward: 0.003
25 | # BodyCollisionPenalty: 0.1
26 | # JointMotionPenalty: 0.001
27 | # TargetSmoothnessReward: 0.003
28 | # TorquePenalty: 1.0e-6
29 | # SlipPenalty: 0.003
--------------------------------------------------------------------------------
/example/loct/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/example/loct/__init__.py
--------------------------------------------------------------------------------
/example/loct/network.py:
--------------------------------------------------------------------------------
1 | from typing import Type, Union, Any, Dict, Tuple
2 |
3 | import numpy as np
4 | import torch
5 | from torch import nn
6 |
7 |
8 | class ActorNetMLP(nn.Module):
9 | """
10 | extero_obs -> extero_layers ->|
11 | |-> concat -> action_layers -> action
12 | proprio_obs -> proprio_layers ->|
13 | """
14 |
15 | def __init__(
16 | self,
17 | extero_obs_dim,
18 | real_world_obs_dim,
19 | extero_layer_dims=(72, 64),
20 | locomotion_layer_dims=(),
21 | action_layer_dims=(256, 160, 128),
22 | activation: Type[nn.Module] = nn.Tanh,
23 | device='cpu'
24 | ):
25 | super().__init__()
26 | self.input_dim = extero_obs_dim + real_world_obs_dim
27 | self.extero_obs_dim = extero_obs_dim
28 | self.real_world_obs_dim = real_world_obs_dim
29 | extero_layers, locomotion_layers, action_layers = [], [], []
30 | self.extero_obs_dim = extero_feature_dim = extero_obs_dim
31 | self.device = torch.device(device)
32 | if extero_layer_dims:
33 | for dim in extero_layer_dims:
34 | extero_layers.append(nn.Linear(extero_feature_dim, dim))
35 | extero_layers.append(activation())
36 | extero_feature_dim = dim
37 |
38 | self.real_world_obs_dim = locomotion_feature_dim = real_world_obs_dim
39 | if locomotion_layer_dims:
40 | for dim in locomotion_layer_dims:
41 | locomotion_layers.append(nn.Linear(locomotion_feature_dim, dim))
42 | locomotion_layers.append(activation())
43 | locomotion_feature_dim = dim
44 |
45 | action_feature_dim = extero_feature_dim + locomotion_feature_dim
46 | for dim in action_layer_dims:
47 | action_layers.append(nn.Linear(action_feature_dim, dim))
48 | action_layers.append(activation())
49 | action_feature_dim = dim
50 | self.output_dim = action_feature_dim
51 |
52 | self.extero_layers = nn.Sequential(*extero_layers)
53 | self.locomotion_layers = nn.Sequential(*locomotion_layers)
54 | self.action_layers = nn.Sequential(*action_layers)
55 |
56 | def forward(self, x):
57 | if self.device is not None:
58 | x = torch.as_tensor(x, device=self.device, dtype=torch.float32)
59 | extero_obs, real_world_obs = x[..., :self.extero_obs_dim], x[..., self.extero_obs_dim:]
60 | extero_feature, locomotion_feature = self.extero_layers(extero_obs), self.locomotion_layers(real_world_obs)
61 | return self.action_layers(torch.cat((extero_feature, locomotion_feature), dim=-1))
62 |
63 |
64 | class ActorNet(nn.Module):
65 | def __init__(
66 | self,
67 | extero_obs_dim,
68 | real_world_obs_dim,
69 | extero_layer_dims=(72, 64),
70 | locomotion_layer_dims=(),
71 | action_layer_dims=(256, 160, 128),
72 | activation: Type[nn.Module] = nn.Tanh,
73 | device='cpu'
74 | ) -> None:
75 | super().__init__()
76 | self.device = device
77 | self.model = ActorNetMLP(
78 | extero_obs_dim,
79 | real_world_obs_dim,
80 | extero_layer_dims,
81 | locomotion_layer_dims,
82 | action_layer_dims,
83 | activation,
84 | device,
85 | )
86 | self.output_dim = self.model.output_dim
87 |
88 | def forward(
89 | self,
90 | obs: Union[np.ndarray, torch.Tensor],
91 | state: Any = None,
92 | info: Dict[str, Any] = {},
93 | ) -> Tuple[torch.Tensor, Any]:
94 | """Mapping: obs -> flatten (inside MLP)-> logits."""
95 | logits = self.model(obs)
96 | return logits, state
97 |
--------------------------------------------------------------------------------
/example/loct/test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import argparse
4 |
5 | import numpy as np
6 | import torch
7 | import yaml
8 | from tianshou.data import Collector
9 | from tianshou.policy import PPOPolicy
10 | from tianshou.utils.net.common import Net
11 | from tianshou.utils.net.continuous import ActorProb, Critic
12 | from torch import nn
13 | from torch.distributions import Independent, Normal
14 |
15 | from example.loct.network import ActorNet
16 | from example.utils import NormObsWrapper
17 | from qdpgym import sim
18 | from qdpgym.tasks.loct import LocomotionV0, GamepadCommanderHook
19 |
20 |
21 | def parse_args():
22 | parser = argparse.ArgumentParser()
23 | parser.add_argument("--task", type=str, required=True)
24 | parser.add_argument("--seed", type=int, default=0)
25 | parser.add_argument("--render", type=float, default=0.)
26 | parser.add_argument(
27 | "--device", type=str, default="cuda" if torch.cuda.is_available() else "cpu"
28 | )
29 | parser.add_argument("--resume-path", type=str, default=None)
30 | return parser.parse_args()
31 |
32 |
33 | def make_loct_env(cfg):
34 | torch.set_num_threads(1)
35 | robot = sim.Aliengo(500, 'actuator_net', noisy=True)
36 | task = LocomotionV0()
37 | # task = LocomotionPMTG()
38 |
39 | if cfg['terrain'] == 'random':
40 | arena = sim.NullTerrain()
41 | task.add_hook(sim.RandomTerrainHook())
42 | elif cfg['terrain'] == 'plain':
43 | arena = sim.Plain()
44 | else:
45 | raise ValueError(f'Unknown terrain `{cfg.terrain}`')
46 | if cfg['perturb']:
47 | task.add_hook(sim.RandomPerturbHook())
48 |
49 | task.add_hook(GamepadCommanderHook())
50 | # task.add_hook(sim.HeightSampleVisualizer())
51 | # task.add_hook(RandomCommanderHookV0())
52 | # task.add_hook(sim.VideoRecorderHook())
53 | task.add_hook(sim.ExtraViewerHook())
54 | for reward, weight in cfg['reward_cfg'].items():
55 | task.add_reward(reward, weight)
56 |
57 | if 'reward_coeff' in cfg:
58 | task.set_reward_coeff(cfg['reward_coeff'])
59 | env = sim.QuadrupedEnv(robot, arena, task)
60 | return env
61 |
62 |
63 | def test_ppo(args):
64 | with open(args.task, encoding='utf-8') as f:
65 | task_cfg = yaml.load(f, Loader=yaml.SafeLoader)
66 |
67 | def make_env():
68 | return make_loct_env(task_cfg)
69 |
70 | obs_norm = False
71 | env = make_env()
72 | if obs_norm:
73 | env = NormObsWrapper(env)
74 |
75 | # seed
76 | np.random.seed(args.seed)
77 | torch.manual_seed(args.seed)
78 | # model
79 | net_a = ActorNet(78, 132, device=args.device)
80 | actor = ActorProb(
81 | net_a,
82 | env.action_space.shape,
83 | unbounded=True,
84 | device=args.device,
85 | ).to(args.device)
86 | net_c = Net(
87 | env.observation_space.shape,
88 | hidden_sizes=(256, 256, 256),
89 | activation=nn.Tanh,
90 | device=args.device,
91 | )
92 | critic = Critic(net_c, device=args.device).to(args.device)
93 | optim = torch.optim.AdamW(
94 | list(actor.parameters()) + list(critic.parameters())
95 | )
96 |
97 | policy = PPOPolicy(
98 | actor, critic, optim,
99 | lambda *logits: Independent(Normal(*logits), 1)
100 | )
101 | # load a previous policy
102 | if args.resume_path:
103 | ckpt = torch.load(args.resume_path, map_location=args.device)
104 | policy.load_state_dict(ckpt["model"])
105 | if obs_norm:
106 | env.set_obs_rms(ckpt['obs_rms'])
107 | print("Loaded agent from: ", args.resume_path)
108 | test_collector = Collector(policy, env)
109 |
110 | # Let's watch its performance!
111 | policy.eval()
112 | test_collector.reset()
113 | result = test_collector.collect(n_episode=100, render=args.render)
114 | print(f'Final reward: {result["rews"].mean()}, length: {result["lens"].mean()}')
115 |
116 |
117 | if __name__ == "__main__":
118 | args = parse_args()
119 | test_ppo(args)
120 |
--------------------------------------------------------------------------------
/example/loct/train.py:
--------------------------------------------------------------------------------
1 | import pybullet
2 | import argparse
3 | import os
4 | import pprint
5 |
6 | import numpy as np
7 | import torch
8 | import wandb
9 | import yaml
10 | from tianshou.data import Collector, ReplayBuffer, VectorReplayBuffer
11 | from tianshou.env import ShmemVectorEnv, VectorEnvNormObs
12 | from tianshou.policy import PPOPolicy
13 | from tianshou.trainer import onpolicy_trainer
14 | from tianshou.utils.net.common import Net
15 | from tianshou.utils.net.continuous import ActorProb, Critic
16 | from torch import nn
17 | from torch.distributions import Independent, Normal
18 | from torch.optim.lr_scheduler import LambdaLR
19 |
20 | from example.loct.network import ActorNet
21 | from example.utils import init_actor_critic, MyWandbLogger
22 | from qdpgym import sim
23 | from qdpgym.tasks import loct
24 | from qdpgym.utils import get_timestamp
25 |
26 |
27 | def parse_args():
28 | parser = argparse.ArgumentParser()
29 | parser.add_argument("--task", type=str, required=True)
30 | parser.add_argument("--seed", type=int, default=0)
31 | parser.add_argument("--buffer-size", type=int, default=4096)
32 | parser.add_argument("--lr", type=float, default=1e-4)
33 | parser.add_argument("--gamma", type=float, default=0.99)
34 | parser.add_argument("--epoch", type=int, default=100)
35 | parser.add_argument("--step-per-epoch", type=int, default=30000)
36 | parser.add_argument("--step-per-collect", type=int, default=8192)
37 | parser.add_argument("--repeat-per-collect", type=int, default=8)
38 | parser.add_argument("--batch-size", type=int, default=8192)
39 | parser.add_argument("--training-num", type=int, default=64)
40 | parser.add_argument("--test-num", type=int, default=10)
41 | parser.add_argument("--obs-norm", type=int, default=0)
42 |
43 | # ppo special
44 | parser.add_argument("--rew-norm", type=int, default=1)
45 | parser.add_argument("--vf-coef", type=float, default=1.0)
46 | parser.add_argument("--ent-coef", type=float, default=0.0)
47 | parser.add_argument("--gae-lambda", type=float, default=0.95)
48 | parser.add_argument("--bound-action-method", type=str, default="tanh")
49 | parser.add_argument("--lr-decay", type=int, default=True)
50 | parser.add_argument("--max-grad-norm", type=float, default=1.0)
51 | parser.add_argument("--eps-clip", type=float, default=0.2)
52 | parser.add_argument("--dual-clip", type=float, default=None)
53 | parser.add_argument("--value-clip", type=int, default=0)
54 | parser.add_argument("--norm-adv", type=int, default=1)
55 | parser.add_argument("--recompute-adv", type=int, default=0)
56 | parser.add_argument("--logdir", type=str, default="log")
57 | parser.add_argument(
58 | "--device", type=str, default="cuda" if torch.cuda.is_available() else "cpu"
59 | )
60 | parser.add_argument("--run-name", type=str, required=True)
61 | parser.add_argument("--resume-path", type=str, default=None)
62 | parser.add_argument("--resume-id", type=str, default=None)
63 | parser.add_argument("--wandb-project", type=str, default="loct")
64 | return parser.parse_args()
65 |
66 |
67 | if __name__ == "__main__":
68 | args = parse_args()
69 | with open(args.task, encoding='utf-8') as f:
70 | task_cfg = yaml.load(f, Loader=yaml.SafeLoader)
71 |
72 | # commander_core = loct.ISCommanderCore('RotationReward', 100)
73 | commander_core = loct.AlpISCommanderCore('RotationReward')
74 | commander_core.start_process()
75 |
76 |
77 | def make_loct_env(cfg, train=True):
78 | torch.set_num_threads(1)
79 | robot = sim.Aliengo(500, 'actuator_net', noisy=True)
80 | task = loct.LocomotionV0()
81 |
82 | if cfg['terrain'] == 'random':
83 | arena = sim.NullTerrain()
84 | task.add_hook(sim.RandomTerrainHook())
85 | elif cfg['terrain'] == 'plain':
86 | arena = sim.Plain()
87 | else:
88 | raise ValueError(f'Unknown terrain `{cfg.terrain}`')
89 | if cfg['perturb']:
90 | task.add_hook(sim.RandomPerturbHook())
91 | if train:
92 | task.add_hook(commander_core.make_collector())
93 | # task.add_hook(loct.RandomRotationCommanderHook())
94 | task.add_hook(commander_core.make_commander())
95 | else:
96 | task.add_hook(loct.RandomRotationCommanderHook())
97 | for reward, weight in cfg['reward_cfg'].items():
98 | task.add_reward(reward, weight)
99 |
100 | if 'reward_coeff' in cfg:
101 | task.set_reward_coeff(cfg['reward_coeff'])
102 | env = sim.QuadrupedEnv(robot, arena, task)
103 | return env
104 |
105 |
106 | env = make_loct_env(task_cfg)
107 | train_envs = ShmemVectorEnv([
108 | lambda: make_loct_env(task_cfg)
109 | for _ in range(args.training_num)
110 | ])
111 | test_envs = ShmemVectorEnv([
112 | lambda: make_loct_env(task_cfg, False)
113 | for _ in range(args.test_num)
114 | ]) if args.test_num else None
115 |
116 | if args.obs_norm:
117 | # obs norm wrapper
118 | train_envs = VectorEnvNormObs(train_envs)
119 | test_envs = VectorEnvNormObs(test_envs, update_obs_rms=False)
120 | test_envs.set_obs_rms(train_envs.get_obs_rms())
121 |
122 | obs_shape = env.observation_space.shape or env.observation_space.n
123 | action_shape = env.action_space.shape or env.action_space.n
124 | print("Observations shape:", obs_shape)
125 | print("Actions shape:", action_shape)
126 |
127 | # seed
128 | np.random.seed(args.seed)
129 | torch.manual_seed(args.seed)
130 | # model
131 | net_a = ActorNet(78, 132, device=args.device)
132 | actor = ActorProb(
133 | net_a,
134 | action_shape,
135 | max_action=1.0,
136 | unbounded=True,
137 | device=args.device,
138 | ).to(args.device)
139 | net_c = Net(
140 | obs_shape,
141 | hidden_sizes=(256, 256, 256),
142 | activation=nn.Tanh,
143 | device=args.device,
144 | )
145 | critic = Critic(net_c, device=args.device).to(args.device)
146 | torch.nn.init.constant_(actor.sigma_param, -2.3)
147 | optim = torch.optim.AdamW(
148 | list(actor.parameters()) + list(critic.parameters()), lr=args.lr
149 | )
150 | init_actor_critic(actor, critic)
151 |
152 | lr_scheduler = None
153 | if args.lr_decay:
154 | # decay learning rate to 0 linearly
155 | max_update_num = np.ceil(
156 | args.step_per_epoch / args.step_per_collect
157 | ) * args.epoch
158 |
159 | lr_scheduler = LambdaLR(
160 | optim, lr_lambda=lambda epoch: 1 - epoch / max_update_num
161 | )
162 |
163 |
164 | def dist(*logits):
165 | return Independent(Normal(*logits), 1)
166 |
167 |
168 | policy = PPOPolicy(
169 | actor,
170 | critic,
171 | optim,
172 | dist,
173 | discount_factor=args.gamma,
174 | gae_lambda=args.gae_lambda,
175 | max_grad_norm=args.max_grad_norm,
176 | vf_coef=args.vf_coef,
177 | ent_coef=args.ent_coef,
178 | reward_normalization=args.rew_norm,
179 | action_scaling=True,
180 | action_bound_method=args.bound_action_method,
181 | lr_scheduler=lr_scheduler,
182 | action_space=env.action_space,
183 | eps_clip=args.eps_clip,
184 | value_clip=args.value_clip,
185 | dual_clip=args.dual_clip,
186 | advantage_normalization=args.norm_adv,
187 | recompute_advantage=args.recompute_adv,
188 | )
189 |
190 | # load a previous policy
191 | if args.resume_path:
192 | ckpt = torch.load(args.resume_path, map_location=args.device)
193 | policy.load_state_dict(ckpt["model"])
194 | if args.obs_norm:
195 | train_envs.set_obs_rms(ckpt["obs_rms"])
196 | test_envs.set_obs_rms(ckpt["obs_rms"])
197 | print("Loaded agent from: ", args.resume_path)
198 |
199 | # logger
200 | logger = MyWandbLogger(
201 | project=args.wandb_project,
202 | name=args.run_name,
203 | run_id=args.resume_id,
204 | config=args,
205 | )
206 | start_time, run_name, run_id = wandb.run.start_time, wandb.run.name, wandb.run.id
207 | log_path = os.path.join(args.logdir, f'{get_timestamp(start_time)}#{run_name}@{run_id}')
208 | os.makedirs(log_path)
209 |
210 | # collector
211 | if args.training_num > 1:
212 | buffer = VectorReplayBuffer(args.step_per_collect, len(train_envs))
213 | else:
214 | buffer = ReplayBuffer(args.step_per_collect)
215 |
216 | train_collector = Collector(
217 | policy, train_envs, buffer,
218 | preprocess_fn=logger.collect_reward_info,
219 | exploration_noise=True
220 | )
221 | test_collector = Collector(
222 | policy, test_envs,
223 | preprocess_fn=logger.collect_reward_info,
224 | ) if test_envs is not None else None
225 |
226 |
227 | def analyse_callback():
228 | fig1, fig2 = commander_core.analyse()
229 | return {
230 | 'curricula/reward': wandb.Image(fig1, mode='RGB'),
231 | 'curricula/weight': wandb.Image(fig2, mode='RGB'),
232 | }
233 |
234 |
235 | logger.add_callback(
236 | analyse_callback, 'test'
237 | )
238 |
239 |
240 | def save_best_fn(policy):
241 | state = {"model": policy.state_dict()}
242 | if args.obs_norm:
243 | state["obs_rms"] = train_envs.get_obs_rms()
244 | torch.save(state, os.path.join(log_path, "policy.pth"))
245 |
246 |
247 | def save_checkpoint_fn(epoch: int, env_step: int, gradient_step: int):
248 | state = {"model": policy.state_dict()}
249 | if args.obs_norm:
250 | state["obs_rms"] = train_envs.get_obs_rms()
251 | path = os.path.join(log_path, f"policy_{epoch}.pth")
252 | torch.save(state, path)
253 | return path
254 |
255 |
256 | result = onpolicy_trainer(
257 | policy,
258 | train_collector,
259 | test_collector,
260 | args.epoch,
261 | args.step_per_epoch,
262 | args.repeat_per_collect,
263 | args.test_num,
264 | args.batch_size,
265 | step_per_collect=args.step_per_collect,
266 | save_best_fn=save_best_fn,
267 | save_checkpoint_fn=save_checkpoint_fn,
268 | logger=logger,
269 | test_in_train=False,
270 | )
271 | pprint.pprint(result)
272 |
273 | # Let's watch its performance!
274 | policy.eval()
275 | test_envs.seed(args.seed)
276 | test_collector.reset()
277 | result = test_collector.collect(n_episode=args.test_num)
278 | print(f'Final reward: {result["rews"].mean()}, length: {result["lens"].mean()}')
279 |
--------------------------------------------------------------------------------
/example/loct/utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | class PolicyWrapper(object):
5 | def __init__(self, net, device):
6 | self.net = net
7 | self.device = torch.device(device)
8 |
9 | def __call__(self, obs):
10 | with torch.inference_mode():
11 | obs = torch.as_tensor(obs, dtype=torch.float32, device=self.device)
12 | return self.net(obs).detach().cpu().numpy()
13 |
14 |
15 |
--------------------------------------------------------------------------------
/example/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/example/test/__init__.py
--------------------------------------------------------------------------------
/example/test/test_tianshou.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | from tianshou.env import ShmemVectorEnv
4 | from tianshou.utils.net.continuous import ActorProb
5 | from torch import nn
6 |
7 | from example.loct.network import ActorNet
8 | from qdpgym import sim
9 | from qdpgym.tasks.loct import RandomCommanderHookV0, LocomotionV0
10 |
11 |
12 | def test_actor():
13 | device = 'cuda'
14 | net_a = ActorNet(
15 | 78, 133,
16 | activation=nn.Tanh,
17 | device=device,
18 | )
19 | actor = ActorProb(
20 | net_a,
21 | (12,),
22 | max_action=1.,
23 | unbounded=True,
24 | device=device,
25 | ).to(device)
26 |
27 | print(actor(torch.randn(size=(1, 211))))
28 |
29 |
30 | def make_loct_env():
31 | torch.set_num_threads(1)
32 | robot = sim.Aliengo(500, 'actuator_net', noisy=False)
33 | task = LocomotionV0()
34 | task.add_hook(RandomCommanderHookV0())
35 | arena = sim.Plain()
36 | # for reward, weight in cfg['reward_cfg'].items():
37 | task.add_reward('UnifiedLinearReward', 0.1)
38 | env = sim.QuadrupedEnv(robot, arena, task)
39 | return env
40 |
41 |
42 | def test_mp():
43 | envs = ShmemVectorEnv(
44 | [make_loct_env for _ in range(4)]
45 | )
46 |
47 | print(envs.reset())
48 | print(envs.step(np.zeros((4, 12))))
49 | print(envs.reset())
50 |
--------------------------------------------------------------------------------
/example/utils.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from collections import defaultdict
3 | from typing import Callable, List, Union, Optional, Any, Dict
4 |
5 | import gym
6 | import numpy as np
7 | import torch
8 | import wandb
9 | from tianshou.data import Batch
10 | from tianshou.env import ShmemVectorEnv, VectorEnvNormObs
11 | from tianshou.utils import RunningMeanStd, BaseLogger
12 |
13 |
14 | class NormObsWrapper(gym.Wrapper):
15 | """An observation normalization wrapper for vectorized environments.
16 |
17 | :param bool update_obs_rms: whether to update obs_rms. Default to True.
18 | :param float clip_obs: the maximum absolute value for observation. Default to
19 | 10.0.
20 | :param float epsilon: To avoid division by zero.
21 | """
22 |
23 | def __init__(
24 | self,
25 | env: gym.Env,
26 | update_obs_rms: bool = False,
27 | clip_obs: float = 10.0,
28 | epsilon: float = np.finfo(np.float32).eps.item(),
29 | ) -> None:
30 |
31 | super(NormObsWrapper, self).__init__(env)
32 | # initialize observation running mean/std
33 | self.update_obs_rms = update_obs_rms
34 | self.obs_rms = RunningMeanStd()
35 | self.clip_max = clip_obs
36 | self.eps = epsilon
37 |
38 | def reset(
39 | self, *args, **kwargs
40 | ):
41 | obs = self.env.reset(*args, **kwargs)
42 | if self.obs_rms and self.update_obs_rms:
43 | self.obs_rms.update(obs)
44 | return self._norm_obs(obs)
45 |
46 | def step(
47 | self,
48 | action: np.ndarray,
49 | ):
50 | obs, rew, done, info = self.env.step(action)
51 | if self.obs_rms and self.update_obs_rms:
52 | self.obs_rms.update(obs)
53 | return self._norm_obs(obs), rew, done, info
54 |
55 | def _norm_obs(self, obs: np.ndarray) -> np.ndarray:
56 | if self.obs_rms:
57 | obs = (obs - self.obs_rms.mean) / np.sqrt(self.obs_rms.var + self.eps)
58 | obs = np.clip(obs, -self.clip_max, self.clip_max)
59 | return obs
60 |
61 | def set_obs_rms(self, obs_rms: RunningMeanStd) -> None:
62 | """Set with given observation running mean/std."""
63 | self.obs_rms = obs_rms
64 |
65 | def get_obs_rms(self) -> RunningMeanStd:
66 | """Return observation running mean/std."""
67 | return self.obs_rms
68 |
69 |
70 | def make_parallel_env(
71 | make_env: Callable[[], gym.Env],
72 | seed: Union[int, None, List[int]],
73 | training_num: int,
74 | test_num: int,
75 | obs_norm: bool
76 | ):
77 | """Wrapper function for Mujoco env.
78 |
79 | If EnvPool is installed, it will automatically switch to EnvPool's Mujoco env.
80 |
81 | :return: a tuple of (single env, training envs, test envs).
82 | """
83 |
84 | env = make_env()
85 | train_envs = ShmemVectorEnv(
86 | [make_env for _ in range(training_num)]
87 | )
88 | test_envs = ShmemVectorEnv(
89 | [make_env for _ in range(test_num)]
90 | ) if test_num else None
91 | train_envs.seed(seed)
92 | test_envs.seed(seed)
93 | if obs_norm:
94 | # obs norm wrapper
95 | train_envs = VectorEnvNormObs(train_envs)
96 | test_envs = VectorEnvNormObs(test_envs, update_obs_rms=False)
97 | test_envs.set_obs_rms(train_envs.get_obs_rms())
98 | return env, train_envs, test_envs
99 |
100 |
101 | def init_actor_critic(actor, critic):
102 | for m in list(actor.modules()) + list(critic.modules()):
103 | if isinstance(m, torch.nn.Linear):
104 | # orthogonal initialization
105 | torch.nn.init.orthogonal_(m.weight, gain=np.sqrt(2))
106 | torch.nn.init.zeros_(m.bias)
107 | # do last policy layer scaling, this will make initial actions have (close to)
108 | # 0 mean and std, and will help boost performances,
109 | # see https://arxiv.org/abs/2006.05990, Fig.24 for details
110 | for m in actor.mu.modules():
111 | if isinstance(m, torch.nn.Linear):
112 | torch.nn.init.zeros_(m.bias)
113 | m.weight.data.copy_(0.01 * m.weight.data)
114 |
115 |
116 | class MyWandbLogger(BaseLogger):
117 | def __init__(
118 | self,
119 | project: str,
120 | train_interval: int = 1000,
121 | test_interval: int = 1,
122 | update_interval: int = 1000,
123 | name: Optional[str] = None,
124 | entity: Optional[str] = None,
125 | run_id: Optional[str] = None,
126 | config: Optional[argparse.Namespace] = None,
127 | ) -> None:
128 | super().__init__(train_interval, test_interval, update_interval)
129 | self.wandb_run = wandb.init(
130 | project=project,
131 | name=name,
132 | id=run_id,
133 | resume="allow",
134 | entity=entity,
135 | monitor_gym=True,
136 | config=config, # type: ignore
137 | save_code=True,
138 | # mode='disabled',
139 | ) if not wandb.run else wandb.run
140 |
141 | self._reward_info = defaultdict(float)
142 | self._reward_counter = 0
143 | self._callbacks_train: List[Callable[[], dict]] = []
144 | self._callbacks_test: List[Callable[[], dict]] = []
145 |
146 | def collect_reward_info(self, **kwargs) -> Batch:
147 | if 'rew' in kwargs:
148 | for k, v in kwargs['info']['reward_info'].items():
149 | self._reward_info[k] += v.mean()
150 | self._reward_counter += 1
151 | return Batch()
152 |
153 | def write(self, step_type: str, step: int, data: Dict[str, Any]) -> None:
154 | wandb.log(data, step=step)
155 |
156 | def save_data(
157 | self,
158 | epoch: int,
159 | env_step: int,
160 | gradient_step: int,
161 | save_checkpoint_fn: Optional[Callable[[int, int, int], Any]] = None,
162 | ) -> None:
163 | if save_checkpoint_fn:
164 | save_checkpoint_fn(epoch, env_step, gradient_step)
165 |
166 | def add_callback(
167 | self, callback: Callable[[], dict],
168 | period='train'
169 | ) -> None:
170 | if period == 'train':
171 | self._callbacks_train.append(callback)
172 | elif period == 'test':
173 | self._callbacks_test.append(callback)
174 | elif period == 'both':
175 | self._callbacks_train.append(callback)
176 | self._callbacks_test.append(callback)
177 | else:
178 | raise ValueError(f'Unknown period {period}')
179 |
180 | def log_train_data(self, collect_result: dict, step: int) -> None:
181 | """Use writer to log statistics generated during training.
182 |
183 | :param collect_result: a dict containing information of data collected in
184 | training stage, i.e., returns of collector.collect().
185 | :param int step: stands for the timestep the collect_result being logged.
186 | """
187 | if collect_result["n/ep"] > 0:
188 | if step - self.last_log_train_step >= self.train_interval:
189 | log_data = {
190 | "train/episode": collect_result["n/ep"],
191 | "train/reward": collect_result["rew"],
192 | "train/length": collect_result["len"],
193 | }
194 | for k, v in self._reward_info.items():
195 | log_data[f'train/reward_info/{k}'] = v / self._reward_counter
196 | self._reward_info.clear()
197 | self._reward_counter = 0
198 | for callback in self._callbacks_train:
199 | log_data.update(callback())
200 | self.write("train/env_step", step, log_data)
201 | self.last_log_train_step = step
202 |
203 | def log_test_data(self, collect_result: dict, step: int) -> None:
204 |
205 | """Use writer to log statistics generated during evaluating.
206 |
207 | :param collect_result: a dict containing information of data collected in
208 | evaluating stage, i.e., returns of collector.collect().
209 | :param int step: stands for the timestep the collect_result being logged.
210 | """
211 | assert collect_result["n/ep"] > 0
212 | if step - self.last_log_test_step >= self.test_interval:
213 | log_data = {
214 | "test/env_step": step,
215 | "test/reward": collect_result["rew"],
216 | "test/length": collect_result["len"],
217 | "test/reward_std": collect_result["rew_std"],
218 | "test/length_std": collect_result["len_std"],
219 | }
220 | for k, v in self._reward_info.items():
221 | log_data[f'test/reward_info/{k}'] = v / self._reward_counter
222 | self._reward_info.clear()
223 | self._reward_counter = 0
224 | for callback in self._callbacks_test:
225 | log_data.update(callback())
226 | self.write("test/env_step", step, log_data)
227 | self.last_log_test_step = step
228 |
--------------------------------------------------------------------------------
/qdpgym/__init__.py:
--------------------------------------------------------------------------------
1 | import enum
2 |
3 | import torch
4 |
5 | from .utils import tf
6 |
7 | try:
8 | from torch import inference_mode as _im
9 | except ImportError:
10 | torch.inference_mode = torch.no_grad
11 |
12 |
13 | class Sim(enum.IntEnum):
14 | BULLET = 0
15 | MUJOCO = 1
16 |
17 |
18 | sim_engine: Sim = Sim.BULLET
19 |
--------------------------------------------------------------------------------
/qdpgym/sim/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import qdpgym
4 |
5 | pkg_path = os.path.dirname(os.path.abspath(__file__))
6 | rsc_dir = os.path.join(pkg_path, 'resources')
7 |
8 |
9 | def is_bullet_available():
10 | try:
11 | import pybullet
12 | except ModuleNotFoundError:
13 | return False
14 | return True
15 |
16 |
17 | def is_mujoco_available():
18 | try:
19 | import mujoco
20 | import dm_control
21 | except ModuleNotFoundError:
22 | return False
23 | return True
24 |
25 |
26 | if qdpgym.sim_engine == qdpgym.Sim.MUJOCO:
27 | assert is_mujoco_available(), 'dm_control is not installed'
28 | from .mjc.quadruped import Aliengo
29 | from .mjc.env import QuadrupedEnv
30 | from .mjc.terrain import *
31 | from .mjc.hooks import *
32 | else:
33 | assert is_bullet_available(), 'pybullet is not installed'
34 | from .blt.quadruped import Aliengo
35 | from .blt.env import QuadrupedEnv
36 | from .blt.terrain import *
37 | from .blt.hooks import *
38 |
39 | from .abc import *
40 | from .app import Application
41 |
--------------------------------------------------------------------------------
/qdpgym/sim/abc.py:
--------------------------------------------------------------------------------
1 | import abc
2 | from dataclasses import dataclass, field
3 | import multiprocessing as mp
4 | from typing import Union, Type, List, Any, Tuple
5 |
6 | import gym
7 | import numpy as np
8 |
9 | ARRAY_LIKE = Union[np.ndarray, list, tuple]
10 | NUMERIC = Union[int, float]
11 |
12 |
13 | class QuadrupedHandle(metaclass=abc.ABCMeta):
14 | @property
15 | def obs_history(self):
16 | raise NotImplementedError
17 |
18 | @property
19 | def cmd_history(self):
20 | raise NotImplementedError
21 |
22 | def get_base_pos(self) -> np.ndarray:
23 | """Get base position in WORLD frame"""
24 | raise NotImplementedError
25 |
26 | def get_base_orn(self) -> np.ndarray:
27 | """Get base quaternion in [x, y, z, w]"""
28 | raise NotImplementedError
29 |
30 | def get_base_rot(self) -> np.ndarray:
31 | """Get base 3x3 rotation matrix to WORLD frame"""
32 | raise NotImplementedError
33 |
34 | def get_base_rpy(self) -> np.ndarray:
35 | """
36 | Get base Tait–Bryan angles in z-y-x
37 | @return: np.ndarray: [roll, pitch, yaw]
38 | """
39 | raise NotImplementedError
40 |
41 | def get_base_rpy_rate(self) -> np.ndarray:
42 | """
43 | Get the rate of base rpy (not angular velocity)
44 | """
45 | raise NotImplementedError
46 |
47 | def get_base_lin(self) -> np.ndarray:
48 | """Get base linear velocity in WORLD frame"""
49 | raise NotImplementedError
50 |
51 | def get_base_ang(self) -> np.ndarray:
52 | """Get base angular velocity in WORLD frame"""
53 | raise NotImplementedError
54 |
55 | def get_velocimeter(self) -> np.ndarray:
56 | """Get base linear velocity in BASE frame"""
57 | raise NotImplementedError
58 |
59 | def get_gyro(self) -> np.ndarray:
60 | """Get base angular velocity in BASE frame"""
61 | raise NotImplementedError
62 |
63 | def get_accelerometer(self) -> np.ndarray:
64 | """Get base acceleration in BASE frame"""
65 | raise NotImplementedError
66 |
67 | def get_state_history(self, latency):
68 | raise NotImplementedError
69 |
70 | def get_cmd_history(self, latency):
71 | raise NotImplementedError
72 |
73 | def get_torso_contact(self):
74 | raise NotImplementedError
75 |
76 | def get_leg_contacts(self):
77 | raise NotImplementedError
78 |
79 | def get_foot_pos(self):
80 | raise NotImplementedError
81 |
82 | def get_foot_contacts(self):
83 | raise NotImplementedError
84 |
85 | def get_contact_forces(self):
86 | raise NotImplementedError
87 |
88 | def get_force_sensor(self):
89 | raise NotImplementedError
90 |
91 | def get_slip_vel(self):
92 | raise NotImplementedError
93 |
94 | def get_strides(self):
95 | raise NotImplementedError
96 |
97 | def get_clearances(self):
98 | raise NotImplementedError
99 |
100 | def get_joint_pos(self) -> np.ndarray:
101 | raise NotImplementedError
102 |
103 | def get_joint_vel(self) -> np.ndarray:
104 | raise NotImplementedError
105 |
106 | def get_joint_acc(self) -> np.ndarray:
107 | raise NotImplementedError
108 |
109 | def get_last_command(self) -> np.ndarray:
110 | raise NotImplementedError
111 |
112 | def get_last_torque(self) -> np.ndarray:
113 | raise NotImplementedError
114 |
115 |
116 | class Quadruped(QuadrupedHandle, metaclass=abc.ABCMeta):
117 | STANCE_HEIGHT: float
118 | STANCE_CONFIG: tuple
119 |
120 | @property
121 | def noisy(self) -> QuadrupedHandle:
122 | return self
123 |
124 | def set_init_pose(self, x=0., y=0., yaw=0.):
125 | raise NotImplementedError
126 |
127 | def set_random_dynamics(self, flag: bool = True):
128 | raise NotImplementedError
129 |
130 | def set_latency(self, lower: float, upper=None):
131 | raise NotImplementedError
132 |
133 | @classmethod
134 | def inverse_kinematics(cls, leg: int, pos: ARRAY_LIKE):
135 | raise NotImplementedError
136 |
137 | @classmethod
138 | def forward_kinematics(cls, leg: int, angles: ARRAY_LIKE):
139 | raise NotImplementedError
140 |
141 |
142 | class Terrain(metaclass=abc.ABCMeta):
143 | def get_height(self, x, y):
144 | raise NotImplementedError
145 |
146 | def get_normal(self, x, y):
147 | raise NotImplementedError
148 |
149 | def get_peak(self, x_range, y_range):
150 | raise NotImplementedError
151 |
152 | def out_of_range(self, x, y) -> bool:
153 | raise NotImplementedError
154 |
155 |
156 | @dataclass
157 | class Snapshot(object):
158 | position: np.ndarray = None
159 | orientation: np.ndarray = None
160 | rotation: np.ndarray = None
161 | rpy: np.ndarray = None
162 | linear_vel: np.ndarray = None
163 | angular_vel: np.ndarray = None
164 | joint_pos: np.ndarray = None
165 | joint_vel: np.ndarray = None
166 | joint_acc: np.ndarray = None
167 | foot_pos: np.ndarray = None
168 | velocimeter: np.ndarray = None
169 | gyro: np.ndarray = None
170 | accelerometer: np.ndarray = None
171 | torso_contact: bool = None
172 | leg_contacts: np.ndarray = None
173 | contact_forces: np.ndarray = None
174 | force_sensor: np.ndarray = None
175 | rpy_rate: np.ndarray = None
176 |
177 |
178 | @dataclass
179 | class LocomotionInfo:
180 | time: float = 0.
181 | last_stance_states: Any = field(default_factory=lambda: [None] * 4)
182 | max_foot_heights: np.ndarray = field(default_factory=lambda: np.zeros(4))
183 | foot_clearances: np.ndarray = field(default_factory=lambda: np.zeros(4))
184 | strides: List[Tuple[float, float]] = field(default_factory=lambda: [(0., 0.)] * 4)
185 | slips: List[float] = field(default_factory=lambda: [0.] * 4)
186 |
187 |
188 | @dataclass
189 | class Command:
190 | command: np.ndarray = None
191 | torque: np.ndarray = None
192 |
193 |
194 | class ComposedObs(tuple):
195 | pass
196 |
197 |
198 | class Environment(gym.Env, metaclass=abc.ABCMeta):
199 | @property
200 | def robot(self) -> QuadrupedHandle:
201 | raise NotImplementedError
202 |
203 | @property
204 | def arena(self) -> Terrain:
205 | raise NotImplementedError
206 |
207 | @arena.setter
208 | def arena(self, value):
209 | raise NotImplementedError
210 |
211 | @property
212 | def action_history(self):
213 | raise NotImplementedError
214 |
215 | @property
216 | def sim_time(self):
217 | raise NotImplementedError
218 |
219 | @property
220 | def num_substeps(self):
221 | raise NotImplementedError
222 |
223 | @property
224 | def timestep(self):
225 | raise NotImplementedError
226 |
227 | @property
228 | def identifier(self):
229 | raise NotImplementedError
230 |
231 | def get_action_rate(self) -> np.ndarray:
232 | raise NotImplementedError
233 |
234 | def get_action_accel(self) -> np.ndarray:
235 | raise NotImplementedError
236 |
237 | def get_relative_robot_height(self) -> float:
238 | raise NotImplementedError
239 |
240 | def get_interact_terrain_normal(self):
241 | raise NotImplementedError
242 |
243 | def get_interact_terrain_rot(self) -> np.ndarray:
244 | raise NotImplementedError
245 |
246 | def get_perturbation(self, in_robot_frame=False):
247 | raise NotImplementedError
248 |
249 | def set_perturbation(self, value=None):
250 | raise NotImplementedError
251 |
252 |
253 | class Hook(metaclass=abc.ABCMeta):
254 | def register_task(self, task):
255 | pass
256 |
257 | def initialize(self, robot, env):
258 | pass
259 |
260 | def init_episode(self, robot, env):
261 | pass
262 |
263 | def before_step(self, robot, env):
264 | pass
265 |
266 | def before_substep(self, robot, env):
267 | pass
268 |
269 | def after_step(self, robot, env):
270 | pass
271 |
272 | def after_substep(self, robot, env):
273 | pass
274 |
275 | def on_success(self, robot, env):
276 | pass
277 |
278 | def on_fail(self, robot, env):
279 | pass
280 |
281 |
282 | class CommHook(Hook):
283 | def __init__(self, comm: mp.Queue):
284 | self._comm = comm
285 | self._env_id: str = 'anonymous'
286 |
287 | def initialize(self, robot, env):
288 | self._env_id = env.identifier
289 |
290 | def _submit(self, info):
291 | self._comm.put((self._env_id, info))
292 |
293 |
294 | class CommHookFactory(object):
295 | def __init__(self, cls: Type[CommHook]):
296 | self._cls = cls
297 | self._comm = mp.Queue()
298 |
299 | def __call__(self, *args, **kwargs):
300 | return self._cls(self._comm, *args, **kwargs)
301 |
302 |
303 | class Task(metaclass=abc.ABCMeta):
304 | """
305 | The task prototype.
306 | A task is embedded in an environment, whose methods are
307 | automatically called in different periods of an episode;
308 | It should also manage hook callbacks.
309 | Only `before_step` should have return values, which
310 | should turn action into desired joint angles.
311 | """
312 |
313 | observation_space: gym.Space
314 | action_space: gym.Space
315 |
316 | def init_episode(self):
317 | pass
318 |
319 | def before_step(self, action):
320 | pass
321 |
322 | def before_substep(self):
323 | pass
324 |
325 | def after_step(self):
326 | pass
327 |
328 | def after_substep(self):
329 | pass
330 |
331 | def on_success(self):
332 | pass
333 |
334 | def on_fail(self):
335 | pass
336 |
337 | def register_env(self, robot: Quadruped, env: Environment):
338 | raise NotImplementedError
339 |
340 | def add_hook(self, hook: Hook, name=None):
341 | raise NotImplementedError
342 |
343 | def remove_hook(self, name=None):
344 | raise NotImplementedError
345 |
346 | def get_observation(self):
347 | raise NotImplementedError
348 |
349 | def get_reward(self, detailed=True):
350 | """
351 | Get the reward sum in a step.
352 | Rewards should be calculated in `after_step` or/and `after_substep`.
353 | :param detailed: returns an extra dict containing all reward terms.
354 | :return: reward(, details)
355 | """
356 | raise NotImplementedError
357 |
358 | def is_succeeded(self):
359 | raise NotImplementedError
360 |
361 | def is_failed(self):
362 | raise NotImplementedError
363 |
--------------------------------------------------------------------------------
/qdpgym/sim/app.py:
--------------------------------------------------------------------------------
1 | from typing import Callable
2 |
3 | import torch
4 |
5 | from qdpgym.sim.abc import Quadruped, Environment, Task
6 |
7 |
8 | class Application(object):
9 | def __init__(self, robot, env, task, policy):
10 | self.robot: Quadruped = robot
11 | self.env: Environment = env
12 | self.task: Task = task
13 | self.policy = policy
14 | self.callbacks = []
15 |
16 | def add_callback(self, obj: Callable):
17 | self.callbacks.append(obj)
18 |
19 | def launch(self, allow_reset=True):
20 | with torch.inference_mode():
21 | obs = self.env.reset()
22 |
23 | for _ in range(20000):
24 | actions = self.policy(obs)
25 | for callback in self.callbacks:
26 | callback(self.robot, self.env, self.task)
27 | obs, _, done, _ = self.env.step(actions)
28 | if done and allow_reset:
29 | self.env.reset()
30 |
--------------------------------------------------------------------------------
/qdpgym/sim/blt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/sim/blt/__init__.py
--------------------------------------------------------------------------------
/qdpgym/sim/blt/env.py:
--------------------------------------------------------------------------------
1 | import collections
2 | from typing import Union, Any, Optional, Tuple
3 |
4 | import gym
5 | import numpy as np
6 | import pybullet as pyb
7 | import pybullet_data as pyb_data
8 | from pybullet_utils.bullet_client import BulletClient
9 |
10 | from qdpgym.sim.abc import Task, Environment, ARRAY_LIKE
11 | from .quadruped import Aliengo
12 | from .terrain import TerrainBase
13 | from qdpgym.utils import PadWrapper, tf
14 |
15 |
16 | class QuadrupedEnv(Environment):
17 | _observation_space = None
18 | _action_space = None
19 |
20 | def __init__(
21 | self,
22 | robot: Aliengo,
23 | arena: TerrainBase,
24 | task: Task,
25 | timestep: float = 2e-3,
26 | time_limit: float = 20,
27 | num_substeps: int = 10,
28 | identifier: str = None,
29 | ):
30 | self._robot = robot
31 | self._arena = arena
32 | self._task = task
33 | self._timestep = timestep
34 | self._time_limit = time_limit
35 | self._num_substeps = num_substeps
36 | self._identifier = identifier or f'{hex(id(self))[-7:]}'
37 | self._step_freq = 1 / (self.timestep * self._num_substeps)
38 | self._render = False
39 |
40 | self._init = False
41 | self._elapsed_sim_steps = None
42 |
43 | self._sim_env = None if True else pyb # for pylint
44 | self._reset_times, self._debug_reset_param = 0, -1
45 | self._task.register_env(self._robot, self)
46 | self._action_history = collections.deque(maxlen=10)
47 | self._perturbation = None
48 |
49 | self._interact_terrain_samples = []
50 | self._interact_terrain_normal = None
51 | self._interact_terrain_height = 0.0
52 |
53 | @property
54 | def observation_space(self) -> gym.Space:
55 | if self._observation_space is None:
56 | if hasattr(self._task, 'observation_space'):
57 | self._observation_space = self._task.observation_space
58 | else:
59 | self._observation_space = gym.Space()
60 | return self._observation_space
61 |
62 | @property
63 | def action_space(self) -> gym.Space:
64 | if self._action_space is None:
65 | if hasattr(self._task, 'action_space'):
66 | self._action_space = self._task.action_space
67 | else:
68 | self._action_space = gym.Space((12,), float)
69 | return self._action_space
70 |
71 | def render(self, mode="human"):
72 | if mode == 'human':
73 | self._render = True
74 | if self._sim_env is not None:
75 | self._sim_env.configureDebugVisualizer(
76 | pyb.COV_ENABLE_SINGLE_STEP_RENDERING, True
77 | )
78 | else:
79 | return super().render(mode)
80 |
81 | def reset(
82 | self,
83 | seed: Optional[int] = None,
84 | return_info: bool = False,
85 | options: Optional[dict] = None,
86 | ) -> Union[Any, Tuple[Any, dict]]:
87 |
88 | super().reset(seed=seed, return_info=return_info, options=options)
89 |
90 | self._init = True
91 | self._elapsed_sim_steps = 0
92 | self._action_history.clear()
93 |
94 | if self._sim_env is None:
95 | if self._render:
96 | fps = int(self._step_freq)
97 | self._sim_env = BulletClient(pyb.GUI, options=f'--width=1024 --height=768 --mp4fps={fps}')
98 | self._debug_reset_param = self._sim_env.addUserDebugParameter('reset', 1, 0, 0)
99 | else:
100 | self._sim_env = BulletClient(pyb.DIRECT)
101 | self._sim_env.setAdditionalSearchPath(pyb_data.getDataPath())
102 | self._sim_env.setTimeStep(self._timestep)
103 | self._sim_env.setGravity(0, 0, -9.8)
104 |
105 | self._task.init_episode()
106 | self._arena.spawn(self._sim_env)
107 | self._robot.add_to(self._arena)
108 | self._robot.spawn(self._sim_env, self.np_random)
109 |
110 | for i in range(50):
111 | self._robot.update_observation(None, minimal=True)
112 | self._robot.apply_command(self._robot.STANCE_CONFIG)
113 | self._sim_env.stepSimulation()
114 |
115 | self._action_history.append(np.array(self._robot.STANCE_CONFIG))
116 | self._robot.update_observation(self.np_random)
117 |
118 | return (
119 | self._task.get_observation()
120 | # if return_info else (self._task.get_observation(), {})
121 | )
122 |
123 | def close(self):
124 | if self._sim_env is not None:
125 | self._sim_env.disconnect()
126 |
127 | @property
128 | def robot(self):
129 | return self._robot
130 |
131 | @property
132 | def sim_env(self):
133 | return self._sim_env
134 |
135 | @property
136 | def render_mode(self):
137 | return self._render
138 |
139 | @property
140 | def arena(self):
141 | return self._arena
142 |
143 | @arena.setter
144 | def arena(self, value: TerrainBase):
145 | value.replace(self._sim_env, self._arena)
146 | self._arena = value
147 |
148 | @property
149 | def action_history(self):
150 | return PadWrapper(self._action_history)
151 |
152 | @property
153 | def sim_time(self):
154 | return self._elapsed_sim_steps * self._timestep
155 |
156 | @property
157 | def timestep(self):
158 | return self._timestep
159 |
160 | @property
161 | def num_substeps(self):
162 | return self._num_substeps
163 |
164 | @property
165 | def identifier(self) -> str:
166 | return self._identifier
167 |
168 | def step(self, action: ARRAY_LIKE) -> Tuple[Any, float, bool, dict]:
169 | if self._check_debug_reset_param():
170 | return self.reset(), 0., True, {}
171 |
172 | assert self._init, 'Call `init_episode` before `step`!'
173 | action = self._task.before_step(action)
174 | action = np.asarray(action)
175 | prev_action = self._action_history[-1]
176 | self._action_history.append(action)
177 |
178 | for i in range(self._num_substeps):
179 | weight = (i + 1) / self._num_substeps
180 | current_action = action * weight + prev_action * (1 - weight)
181 | self._robot.apply_command(current_action)
182 | self._task.before_substep()
183 |
184 | self._apply_perturbation()
185 | self._sim_env.stepSimulation()
186 | self._elapsed_sim_steps += 1
187 | self._update_observation()
188 |
189 | self._task.after_substep()
190 | self._task.after_step()
191 |
192 | done = False
193 | info = {}
194 | if self._task.is_failed():
195 | done = True
196 | self._task.on_fail()
197 | elif ((self._time_limit is not None and self.sim_time >= self._time_limit)
198 | or self._task.is_succeeded()):
199 | done = True
200 | info['TimeLimit.truncated'] = True
201 | self._task.on_success()
202 |
203 | reward, reward_info = self._task.get_reward(True)
204 | info['reward_info'] = reward_info
205 | return (
206 | self._task.get_observation(), reward, done, info
207 | )
208 |
209 | def _update_observation(self):
210 | self._robot.update_observation(self.np_random)
211 |
212 | self._interact_terrain_samples.clear()
213 | self._interact_terrain_height = 0.
214 | xy_points = self._robot.get_foot_pos()
215 | for x, y, _ in xy_points:
216 | h = self._arena.get_height(x, y)
217 | self._interact_terrain_height += h
218 | self._interact_terrain_samples.append((x, y, h))
219 | self._interact_terrain_height /= 4
220 | self._interact_terrain_normal = tf.estimate_normal(self._interact_terrain_samples)
221 |
222 | def get_action_rate(self) -> np.ndarray:
223 | if len(self._action_history) < 2:
224 | return np.zeros(12)
225 | actions = [self._action_history[-i - 1] for i in range(2)]
226 | return (actions[0] - actions[1]) * self._step_freq
227 |
228 | def get_action_accel(self) -> np.ndarray:
229 | if len(self._action_history) < 3:
230 | return np.zeros(12)
231 | actions = [self._action_history[-i - 1] for i in range(3)]
232 | return (actions[0] - 2 * actions[1] + actions[2]) * self._step_freq ** 2
233 |
234 | def get_relative_robot_height(self) -> float:
235 | return self._robot.get_base_pos()[2] - self._interact_terrain_height
236 |
237 | def get_interact_terrain_normal(self) -> np.ndarray:
238 | return self._interact_terrain_normal
239 |
240 | def get_interact_terrain_rot(self) -> np.ndarray:
241 | return tf.Rotation.from_zaxis(self._interact_terrain_normal)
242 |
243 | def get_perturbation(self, in_robot_frame=False) -> Optional[np.ndarray]:
244 | if self._perturbation is None:
245 | return None
246 | elif in_robot_frame:
247 | rotation = self._robot.get_base_rot()
248 | perturbation = np.concatenate(
249 | [rotation.T @ self._perturbation[i * 3:i * 3 + 3] for i in range(2)]
250 | )
251 | else:
252 | perturbation = self._perturbation
253 | return perturbation
254 |
255 | def set_perturbation(self, value=None) -> None:
256 | if value is None:
257 | self._perturbation = None
258 | else:
259 | self._perturbation = np.array(value)
260 |
261 | def _apply_perturbation(self):
262 | if self._perturbation is not None:
263 | self._applied_link_id = 0
264 | self._sim_env.applyExternalForce(
265 | objectUniqueId=self._robot.id,
266 | linkIndex=self._applied_link_id,
267 | forceObj=self._perturbation[:3],
268 | posObj=self._robot.get_base_pos(),
269 | flags=pyb.WORLD_FRAME
270 | )
271 | self._sim_env.applyExternalTorque(
272 | objectUniqueId=self._robot.id,
273 | linkIndex=self._applied_link_id,
274 | torqueObj=self._perturbation[3:],
275 | flags=pyb.WORLD_FRAME
276 | )
277 |
278 | def _check_debug_reset_param(self):
279 | if self._debug_reset_param != -1:
280 | reset_times = self._sim_env.readUserDebugParameter(self._debug_reset_param)
281 | if reset_times != self._reset_times:
282 | self._reset_times = reset_times
283 | return True
284 | return False
285 |
--------------------------------------------------------------------------------
/qdpgym/sim/blt/hooks.py:
--------------------------------------------------------------------------------
1 | import collections
2 | import copy
3 | import json
4 | import math
5 | import os
6 | import socket
7 | import time
8 |
9 | import numpy as np
10 | import pybullet as pyb
11 |
12 | from qdpgym.sim.abc import Hook
13 | from qdpgym.sim.blt.terrain import Hills, Slopes, Steps, PlainHf
14 | from qdpgym.utils import Angle, tf, get_timestamp, log
15 |
16 |
17 | class ViewerHook(Hook):
18 | def __init__(self):
19 | self._pre_vis = False
20 | self._init_vis = False
21 | self._sleep_on = True
22 |
23 | self._robot_yaw_buffer = collections.deque(maxlen=10)
24 | self._cam_state = 0
25 |
26 | self._last_frame_time = time.time()
27 |
28 | def initialize(self, robot, env):
29 | env.render()
30 |
31 | def init_episode(self, robot, env):
32 | self._robot_yaw_buffer.clear()
33 | if self._pre_vis:
34 | return
35 | self._pre_vis = True
36 | sim_env = env.sim_env
37 | sim_env.configureDebugVisualizer(pyb.COV_ENABLE_RENDERING, False)
38 | sim_env.configureDebugVisualizer(pyb.COV_ENABLE_GUI, False)
39 | sim_env.configureDebugVisualizer(pyb.COV_ENABLE_TINY_RENDERER, False)
40 | sim_env.configureDebugVisualizer(pyb.COV_ENABLE_SHADOWS, False)
41 | sim_env.configureDebugVisualizer(pyb.COV_ENABLE_RGB_BUFFER_PREVIEW, False)
42 | sim_env.configureDebugVisualizer(pyb.COV_ENABLE_DEPTH_BUFFER_PREVIEW, False)
43 | sim_env.configureDebugVisualizer(pyb.COV_ENABLE_SEGMENTATION_MARK_PREVIEW, False)
44 |
45 | def before_step(self, robot, env):
46 | if not self._init_vis:
47 | self._init_vis = True
48 | env.sim_env.configureDebugVisualizer(pyb.COV_ENABLE_RENDERING, True)
49 | env.sim_env.configureDebugVisualizer(pyb.COV_ENABLE_GUI, True)
50 |
51 | def after_step(self, robot, env):
52 | sim_env = env.sim_env
53 | period = env.timestep * env.num_substeps
54 | time_spent = time.time() - self._last_frame_time
55 | if self._sleep_on:
56 | sleep_time = period - time_spent
57 | if sleep_time > 0:
58 | time.sleep(sleep_time)
59 | self._last_frame_time = time.time()
60 | kbd_events = pyb.getKeyboardEvents()
61 |
62 | switch_cam_state = self.is_triggered(ord('`'), kbd_events)
63 | if switch_cam_state:
64 | self._cam_state = (self._cam_state + 1) % 5
65 |
66 | x, y, _ = robot.get_base_pos()
67 | z = robot.STANCE_HEIGHT + env.arena.get_height(x, y)
68 | if self._cam_state == 0:
69 | if switch_cam_state:
70 | yaw = Angle.to_deg(Angle.mean(self._robot_yaw_buffer))
71 | sim_env.resetDebugVisualizerCamera(1.5, yaw, -30., (x, y, z))
72 | else:
73 | yaw, pitch, dist = sim_env.getDebugVisualizerCamera()[8:11]
74 | sim_env.resetDebugVisualizerCamera(dist, yaw, pitch, (x, y, z))
75 | else:
76 | self._robot_yaw_buffer.append(robot.get_base_rpy()[2] - math.pi / 2)
77 | # To avoid carsick :)
78 | yaw = Angle.mean(self._robot_yaw_buffer)
79 | degree = -30.
80 | if self._cam_state == 2: # around robot
81 | yaw = Angle.norm(yaw + math.pi / 2)
82 | degree = 0.
83 | elif self._cam_state == 3:
84 | yaw = Angle.norm(yaw + math.pi)
85 | elif self._cam_state == 4:
86 | yaw = Angle.norm(yaw - math.pi / 2)
87 | degree = 0.
88 | sim_env.resetDebugVisualizerCamera(1.5, Angle.to_deg(yaw), degree, (x, y, z))
89 | env.render()
90 |
91 | KEY_SPACE = ord(' ')
92 | if self.is_triggered(KEY_SPACE, kbd_events):
93 | while True: # PAUSE
94 | env.sim_env.configureDebugVisualizer(pyb.COV_ENABLE_SINGLE_STEP_RENDERING, True)
95 | time.sleep(0.01)
96 | if self.is_triggered(KEY_SPACE, pyb.getKeyboardEvents()):
97 | self._cam_state = 0
98 | break
99 |
100 | @staticmethod
101 | def is_triggered(key, keyboard_events):
102 | return key in keyboard_events and keyboard_events[key] & pyb.KEY_WAS_TRIGGERED
103 |
104 |
105 | class _TorqueVisualizerHelper(object):
106 | def __init__(self):
107 | self._axis_x = None
108 | self._axis_y = None
109 | self._mk_start = None
110 | self._mk_phase = 0
111 |
112 | def update(self, torque):
113 | if (torque != 0.).any():
114 | self._axis_x, self._axis_y, _ = tf.Rotation.from_zaxis(tf.vunit(torque)).T
115 | magnitude = tf.vnorm(torque)
116 | if self._mk_start is None:
117 | self._mk_start = self._axis_x * magnitude / 20
118 | self._mk_phase += math.pi / 36
119 | marker_end = (self._axis_x * math.cos(self._mk_phase) +
120 | self._axis_y * math.sin(self._mk_phase)
121 | ) * magnitude / 20
122 |
123 | marker_info = dict(
124 | lineFromXYZ=copy.deepcopy(self._mk_start),
125 | lineToXYZ=marker_end
126 | )
127 | self._mk_start = marker_end
128 | return marker_info
129 |
130 |
131 | class ExtraViewerHook(ViewerHook):
132 | def __init__(self, perturb=True):
133 | super().__init__()
134 | self._show_perturb = perturb
135 | self._last_perturb = None
136 |
137 | self._force_marker = -1
138 | self._torque_vis = _TorqueVisualizerHelper()
139 |
140 | def after_step(self, robot, env):
141 | sim_env = env.sim_env
142 | if self._show_perturb:
143 | perturb = env.get_perturbation(in_robot_frame=True)
144 | if perturb is None:
145 | sim_env.removeUserDebugItem(self._force_marker)
146 | else:
147 | if (self._last_perturb != perturb).any():
148 | self._force_marker = sim_env.addUserDebugLine(
149 | lineFromXYZ=(0., 0., 0.),
150 | lineToXYZ=perturb[:3] / 50,
151 | lineColorRGB=(1., 0., 0.),
152 | lineWidth=3, lifeTime=1,
153 | parentObjectUniqueId=robot.id,
154 | replaceItemUniqueId=self._force_marker
155 | )
156 | sim_env.addUserDebugLine(
157 | **self._torque_vis.update(perturb[3:]),
158 | lineColorRGB=(0., 0., 1.),
159 | lineWidth=5, lifeTime=0.1,
160 | parentObjectUniqueId=robot.id
161 | )
162 |
163 | self._last_perturb = perturb
164 | super().after_step(robot, env)
165 |
166 |
167 | class HeightSampleVisualizer(Hook):
168 | def __init__(self):
169 | super().__init__()
170 | self._terrain_visual_shape = -1
171 | self._terrain_markers = None
172 |
173 | def init_episode(self, robot, env):
174 | if self._terrain_markers is None:
175 | sim_env = env.sim_env
176 | self._terrain_visual_shape = sim_env.createVisualShape(
177 | shapeType=pyb.GEOM_SPHERE, radius=0.01, rgbaColor=(0., 0.8, 0., 0.6))
178 | self._terrain_markers = [
179 | sim_env.createMultiBody(
180 | baseVisualShapeIndex=self._terrain_visual_shape
181 | ) for _ in range(121)
182 | ]
183 |
184 | def after_step(self, robot, env):
185 | sim_env = env.sim_env
186 | rx, ry, _ = robot.get_base_pos()
187 | yaw = robot.get_base_rpy()[2]
188 | cy, sy = math.cos(yaw), math.sin(yaw)
189 | dx, dy = np.array(((cy, sy), (-sy, cy))) * 0.15
190 |
191 | marker = iter(self._terrain_markers)
192 | for i in range(-5, 6):
193 | for j in range(-5, 6):
194 | x, y = (rx, ry) + i * dx + j * dy
195 | height = env.arena.get_height(x, y)
196 | sim_env.resetBasePositionAndOrientation(
197 | next(marker), (x, y, height), (0., 0., 0., 1.)
198 | )
199 |
200 |
201 | class RandomTerrainHook(Hook):
202 | def __init__(self):
203 | self.max_roughness = 0.2
204 | self.max_slope = 10 / 180 * math.pi
205 | self.max_step_height = 0.1
206 |
207 | def generate_terrain(self, random_gen: np.random.Generator):
208 | """
209 | Generate a random terrain object of Hills, Slopes, Steps or Plain.
210 | """
211 | size, resolution = 30, 0.1
212 | terrain_type = random_gen.integers(4)
213 | difficulty = random_gen.random()
214 | terrain = None
215 | if terrain_type == 0:
216 | roughness = self.max_roughness * difficulty
217 | terrain = Hills.make(size, resolution, (roughness, 20),
218 | random_state=random_gen)
219 | elif terrain_type == 1:
220 | slope = self.max_slope * difficulty
221 | axis = random_gen.choice(('x', 'y'))
222 | terrain = Slopes.make(size, resolution, slope, 3., axis)
223 | elif terrain_type == 2:
224 | step_height = self.max_step_height * difficulty
225 | terrain = Steps.make(size, resolution, 1., step_height, random_gen)
226 | elif terrain_type == 3:
227 | terrain = PlainHf.make(size, resolution)
228 | return terrain
229 |
230 | def init_episode(self, robot, env):
231 | env.arena = self.generate_terrain(env.np_random)
232 |
233 |
234 | class RandomPerturbHook(Hook):
235 | def __init__(self):
236 | self.perturb_prob = 0.5
237 | self.force_magnitude = np.array((20., 20.))
238 | self.torque_magnitude = np.array((2.5, 5., 5.))
239 | self.interval_range = (0.5, 2.0)
240 | self.interval = 0
241 | self.last_update = 0
242 |
243 | def get_random_perturb(self, random_gen):
244 | horizontal_force = random_gen.uniform(0, self.force_magnitude[0])
245 | vertical_force = random_gen.uniform(0, self.force_magnitude[1])
246 | yaw = random_gen.uniform(0, 2 * math.pi)
247 | external_force = np.array((
248 | horizontal_force * np.cos(yaw),
249 | horizontal_force * np.sin(yaw),
250 | vertical_force * random_gen.choice((-1, 1))
251 | ))
252 |
253 | external_torque = random_gen.uniform(-self.torque_magnitude, self.torque_magnitude)
254 | return external_force, external_torque
255 |
256 | def before_substep(self, robot, env):
257 | if env.sim_time >= self.last_update + self.interval:
258 | random = env.np_random
259 | if random.random() < self.perturb_prob:
260 | env.set_perturbation(np.concatenate(self.get_random_perturb(random)))
261 | else:
262 | env.set_perturbation(None)
263 | self.interval = random.uniform(*self.interval_range)
264 | self.last_update = env.sim_time
265 |
266 | def init_episode(self, robot, env):
267 | self.last_update = self.interval = 0
268 |
269 |
270 | class VideoRecorderHook(Hook):
271 | def __init__(self, path=''):
272 | if not path:
273 | path = os.path.join('Videos', f'record-{get_timestamp()}.mp4')
274 | os.makedirs(os.path.dirname(path), exist_ok=True)
275 | self._path = path
276 | self._log_id = -1
277 | self._status = True
278 |
279 | def before_step(self, robot, env):
280 | if self._log_id == -1 and self._status:
281 | log.info('start recording')
282 | self._log_id = env.sim_env.startStateLogging(
283 | pyb.STATE_LOGGING_VIDEO_MP4, self._path
284 | )
285 | self._status = False
286 |
287 | def init_episode(self, robot, env):
288 | if self._log_id != -1:
289 | env.client.stopStateLogging(self._log_id)
290 | self._log_id = -1
291 |
292 |
293 | class _UdpPublisher(object):
294 | """
295 | Send data stream of locomotion to outer tools such as PlotJuggler.
296 | """
297 |
298 | def __init__(self, port):
299 | self.client = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
300 | self.port = port
301 |
302 | def send(self, data: dict):
303 | msg = json.dumps(data)
304 | ip_port = ('127.0.0.1', self.port)
305 | self.client.sendto(msg.encode('utf-8'), ip_port)
306 |
307 |
308 | class StatisticsHook(Hook):
309 | def __init__(self, publish_on=True):
310 | self._torque_sum = 0.
311 | self._torque_abs_sum = 0.
312 | self._torque_pen_sum = 0.0
313 | self._joint_motion_sum = 0.0
314 | self._step_counter = 0
315 | self._publish = publish_on
316 | if self._publish:
317 | self._udp_pub = _UdpPublisher(9870)
318 | self._task = None
319 |
320 | def register_task(self, task):
321 | self._task = task
322 |
323 | def after_step(self, robot, env):
324 |
325 | def wrap(reward_type):
326 | return getattr(self._task.ALL_REWARDS, reward_type)()(robot, env, self._task)
327 |
328 | _, reward_details = self._task.get_reward(detailed=True)
329 | self._step_counter += 1
330 | self._torque_sum += robot.get_last_torque() ** 2
331 | self._torque_abs_sum += abs(robot.get_last_torque())
332 | self._torque_pen_sum += wrap('TorquePenalty')
333 | self._joint_motion_sum += wrap('JointMotionPenalty')
334 |
335 | # def publish(self, task, rob, env):
336 |
337 | # data = {
338 | # 'obs': env.makeNoisyProprioInfo().standard().tolist(),
339 | # 'action': env._action.tolist(),
340 | # 'joint_states': {
341 | # 'joint_pos': rob.getJointPositions().tolist(),
342 | # 'violence': env.getActionViolence().tolist(),
343 | # 'commands': rob.getLastCommand().tolist(),
344 | # 'joint_vel': rob.getJointVelocities().tolist(),
345 | # 'joint_acc': rob.getJointAccelerations().tolist(),
346 | # # 'kp_part': tuple(rob._motor._kp_part),
347 | # # 'kd_part': tuple(rob._motor._kd_part),
348 | # 'torque': rob.getLastAppliedTorques().tolist(),
349 | # 'contact': rob.getContactStates().tolist()
350 | # },
351 | # 'body_height': env.getTerrainBasedHeightOfRobot(),
352 | # # 'cot': rob.getCostOfTransport(),
353 | # 'twist': {
354 | # 'linear': rob.getBaseLinearVelocityInBaseFrame().tolist(),
355 | # 'angular': rob.getBaseAngularVelocityInBaseFrame().tolist(),
356 | # },
357 | # # 'torque_pen': wrap(TorquePenalty)
358 | # }
359 | # self._udp_pub.send(data)
360 |
361 | def init_episode(self, robot, env):
362 | if self._step_counter != 0.:
363 | print('episode len:', self._step_counter)
364 | print('mse torque', np.sqrt(self._torque_sum / self._step_counter))
365 | print('abs torque', self._torque_abs_sum / self._step_counter)
366 | print('torque pen', self._torque_pen_sum / self._step_counter)
367 | print('joint motion pen', self._joint_motion_sum / self._step_counter)
368 | self._torque_sum = 0.
369 | self._torque_abs_sum = 0.
370 | self._torque_pen_sum = 0.0
371 | self._joint_motion_sum = 0.0
372 | self._step_counter = 0
373 |
--------------------------------------------------------------------------------
/qdpgym/sim/blt/terrain.py:
--------------------------------------------------------------------------------
1 | import abc
2 | import dataclasses
3 | from typing import Union, Tuple, Iterable
4 |
5 | import numpy as np
6 | import pybullet as pyb
7 | from scipy.interpolate import interp2d
8 |
9 | from qdpgym.sim.abc import Terrain, NUMERIC
10 | from qdpgym.utils.tf import vcross, vunit
11 |
12 | __all__ = [
13 | 'NullTerrain', 'Plain', 'HeightFieldTerrain',
14 | 'Hills', 'PlainHf', 'Slopes', 'Steps', 'TerrainBase'
15 | ]
16 |
17 |
18 | class TerrainBase(Terrain, metaclass=abc.ABCMeta):
19 | def __init__(self):
20 | self._id: int = -1
21 | self._spawned = False
22 |
23 | @property
24 | def id(self):
25 | return self._id
26 |
27 | def spawn(self, sim_env):
28 | raise NotImplementedError
29 |
30 | def replace(self, sim_env, obj: 'TerrainBase'):
31 | obj.remove(sim_env)
32 | self.spawn(sim_env)
33 |
34 | def remove(self, sim_env):
35 | if self._id != -1:
36 | sim_env.removeBody(self._id)
37 |
38 |
39 | class NullTerrain(TerrainBase):
40 | pass
41 |
42 |
43 | class Plain(TerrainBase):
44 | def spawn(self, sim_env):
45 | if self._id != -1:
46 | return
47 | self._id = sim_env.loadURDF("plane.urdf")
48 | sim_env.changeDynamics(self._id, -1, lateralFriction=1.0)
49 |
50 | def get_height(self, x, y):
51 | return 0.0
52 |
53 | def get_normal(self, x, y):
54 | return np.array((0, 0, 1))
55 |
56 | def get_peak(self, x_range, y_range):
57 | return sum(x_range) / 2, sum(y_range) / 2, 0.0
58 |
59 | def out_of_range(self, x, y):
60 | return False
61 |
62 |
63 | @dataclasses.dataclass
64 | class HeightField:
65 | data: np.ndarray
66 | size: Union[NUMERIC, Tuple[NUMERIC]]
67 | resolution: NUMERIC
68 |
69 |
70 | class HeightFieldTerrain(TerrainBase):
71 | def __init__(self, heightfield: HeightField):
72 | super().__init__()
73 | self.heightfield = heightfield.data
74 | self.y_dim, self.x_dim = self.heightfield.shape
75 | if isinstance(heightfield.size, Iterable):
76 | self.x_size, self.y_size = heightfield.size
77 | else:
78 | self.x_size, self.y_size = heightfield.size, heightfield.size
79 | self.x_rsl = self.y_rsl = heightfield.resolution
80 | self._shape_id = -1
81 |
82 | def spawn(self, sim_env):
83 | if self._id != -1:
84 | return
85 | self._shape_id = sim_env.createCollisionShape(
86 | shapeType=pyb.GEOM_HEIGHTFIELD,
87 | flags=pyb.GEOM_CONCAVE_INTERNAL_EDGE,
88 | meshScale=(self.x_rsl, self.y_rsl, 1.0),
89 | heightfieldTextureScaling=self.x_size,
90 | heightfieldData=self.heightfield.reshape(-1),
91 | numHeightfieldColumns=self.x_dim,
92 | numHeightfieldRows=self.y_dim,
93 | replaceHeightfieldIndex=-1
94 | )
95 | self._id = sim_env.createMultiBody(0, self._shape_id)
96 | sim_env.changeVisualShape(self._id, -1, rgbaColor=(1, 1, 1, 1))
97 | sim_env.changeDynamics(self._id, -1, lateralFriction=1.0)
98 | origin_z = (np.max(self.heightfield) + np.min(self.heightfield)) / 2
99 | sim_env.resetBasePositionAndOrientation(self._id, (0, 0, origin_z),
100 | (0., 0., 0., 1.))
101 |
102 | def replace(self, sim_env, obj):
103 | assert self._id == self._shape_id == -1, f'`{self}` have been spawned'
104 | if obj._id == -1:
105 | return
106 |
107 | if not isinstance(obj, HeightFieldTerrain):
108 | return super().replace(sim_env, obj)
109 |
110 | # Currently, in bullet <= 3.2.4,
111 | # Heightfield replacement may cause collision detection failure.
112 | # See https://github.com/bulletphysics/bullet3/issues/4236
113 | # See https://github.com/bulletphysics/bullet3/pull/4253
114 | self._id = obj._id
115 | self._shape_id = sim_env.createCollisionShape(
116 | shapeType=pyb.GEOM_HEIGHTFIELD, flags=pyb.GEOM_CONCAVE_INTERNAL_EDGE,
117 | meshScale=(self.x_rsl, self.y_rsl, 1.0),
118 | heightfieldTextureScaling=self.x_size,
119 | heightfieldData=self.heightfield.reshape(-1),
120 | numHeightfieldColumns=self.x_dim, numHeightfieldRows=self.y_dim,
121 | replaceHeightfieldIndex=obj._shape_id
122 | )
123 | obj._id = obj._shape_id = -1
124 |
125 | origin_z = (np.max(self.heightfield) + np.min(self.heightfield)) / 2
126 | sim_env.resetBasePositionAndOrientation(self._id, (0, 0, origin_z),
127 | (0., 0., 0., 1.))
128 |
129 | def remove(self, sim_env):
130 | if self._id != -1:
131 | sim_env.removeBody(self._id)
132 | sim_env.removeCollisionShape(self._shape_id)
133 | self._id = self._shape_id = -1
134 |
135 | @property
136 | def shape_id(self):
137 | return self._shape_id
138 |
139 | def get_disc_x(self, x):
140 | return int((x + self.x_size / 2) / self.x_rsl)
141 |
142 | def get_disc_y(self, y):
143 | return int((y + self.y_size / 2) / self.y_rsl)
144 |
145 | def get_cont_x(self, x_idx):
146 | return x_idx * self.x_rsl - self.x_size / 2
147 |
148 | def get_cont_y(self, y_idx):
149 | return y_idx * self.y_rsl - self.y_size / 2
150 |
151 | def get_nearest_vertices(self, x, y):
152 | x_idx, y_idx = self.get_disc_x(x), self.get_disc_y(y)
153 | x_rnd, y_rnd = self.get_cont_x(x_idx), self.get_cont_y(y_idx)
154 | if (x - x_rnd) / self.x_rsl + (y - y_rnd) / self.y_rsl < 1:
155 | v1 = x_rnd, y_rnd, self.heightfield[y_idx, x_idx]
156 | else:
157 | v1 = x_rnd + self.x_rsl, y_rnd + self.y_rsl, self.heightfield[y_idx + 1, x_idx + 1]
158 | v2 = x_rnd, y_rnd + self.y_rsl, self.heightfield[y_idx + 1, x_idx]
159 | v3 = x_rnd + self.x_rsl, y_rnd, self.heightfield[y_idx, x_idx + 1]
160 | return np.array(v1), np.array(v2), np.array(v3)
161 |
162 | def get_peak(self, x_range, y_range):
163 | (x_lower, x_upper), (y_lower, y_upper) = x_range, y_range
164 | x_lower_idx, x_upper_idx = self.get_disc_x(x_lower), self.get_disc_x(x_upper) + 1
165 | y_lower_idx, y_upper_idx = self.get_disc_y(y_lower), self.get_disc_y(y_upper) + 1
166 | hfield_part = self.heightfield[y_lower_idx:y_upper_idx, x_lower_idx:x_upper_idx]
167 | y_size, x_size = hfield_part.shape
168 | max_idx = np.argmax(hfield_part)
169 | max_x_idx, max_y_idx = max_idx % x_size, max_idx // x_size
170 | max_height = hfield_part[max_y_idx, max_x_idx]
171 | max_x, max_y = x_lower + max_x_idx * self.x_rsl, y_lower + max_y_idx * self.y_rsl
172 | return max_x, max_y, max_height
173 |
174 | def get_height(self, x, y):
175 | try:
176 | v1, v2, v3 = self.get_nearest_vertices(x, y)
177 | except IndexError:
178 | return 0.0
179 | if x == v1[0] and y == v1[1]:
180 | return v1[2]
181 | x1, y1, z1 = v2 - v1
182 | x2, y2, z2 = v3 - v1
183 | x3, y3 = x - v1[0], y - v1[1]
184 | div = (x1 * y2 - x2 * y1)
185 | c1 = (x3 * y2 - x2 * y3) / div
186 | c2 = (x1 * y3 - x3 * y1) / div
187 | return c1 * z1 + c2 * z2 + v1[2]
188 |
189 | def get_normal(self, x, y) -> np.ndarray:
190 | try:
191 | v1, v2, v3 = self.get_nearest_vertices(x, y)
192 | except IndexError:
193 | return np.array((0., 0., 1.))
194 | normal = vunit(vcross(v1 - v2, v1 - v3))
195 | return normal if normal[2] > 0 else -normal
196 |
197 | def out_of_range(self, x, y):
198 | return abs(x) > self.x_size / 2 - 1 or abs(y) > self.y_size / 2 - 1
199 |
200 |
201 | class Hills(HeightFieldTerrain):
202 | def __init__(self, heightfield):
203 | super().__init__(heightfield)
204 |
205 | @classmethod
206 | def make(cls, size, resolution, *rough_dsp: Tuple[NUMERIC, NUMERIC], random_state):
207 | return cls(cls.make_hfield(size, resolution, *rough_dsp, random_state=random_state))
208 |
209 | @classmethod
210 | def make_hfield(cls, size, resol, *rough_dsp: Tuple[NUMERIC, NUMERIC], random_state) -> HeightField:
211 | data_size = int(size / resol) + 1
212 | hfield_data = np.zeros((data_size, data_size))
213 | for roughness, dsp in rough_dsp:
214 | sample_rsl = dsp * resol
215 | x = y = np.arange(-size / 2 - 3 * sample_rsl, size / 2 + 4 * sample_rsl, sample_rsl)
216 | hfield_dsp = random_state.uniform(0, roughness, (x.size, y.size))
217 | terrain_func = interp2d(x, y, hfield_dsp, kind='cubic')
218 | x_upsampled = y_upsampled = np.linspace(-size / 2, size / 2, data_size)
219 | hfield_data += terrain_func(x_upsampled, y_upsampled)
220 | return HeightField(hfield_data, size, resol)
221 |
222 | @classmethod
223 | def default(cls):
224 | return cls.make(20, 0.1, (0.2, 10), random_state=np.random)
225 |
226 |
227 | class PlainHf(HeightFieldTerrain):
228 | @classmethod
229 | def make(cls, size, resolution):
230 | return cls(cls.make_hfield(size, resolution))
231 |
232 | @staticmethod
233 | def make_hfield(size, resolution):
234 | data_size = int(size / resolution) + 1
235 | hfield_data = np.zeros((data_size, data_size))
236 | return HeightField(hfield_data, size, resolution)
237 |
238 | def get_height(self, x, y):
239 | return 0.0
240 |
241 | def get_normal(self, x, y):
242 | return np.array((0., 0., 1.))
243 |
244 | def get_peak(self, x_range, y_range):
245 | return sum(x_range) / 2, sum(y_range) / 2, 0.
246 |
247 | @classmethod
248 | def default(cls):
249 | return cls.make(20, 0.1)
250 |
251 |
252 | class Slopes(HeightFieldTerrain):
253 | @classmethod
254 | def make(cls, size, resolution, slope, slope_width, axis='x'):
255 | return cls(cls.make_hfield(size, resolution, slope, slope_width, axis))
256 |
257 | @classmethod
258 | def make_hfield(cls, size, resolution, slope, slope_width, axis):
259 | step = int(slope_width * 2 / resolution)
260 | data_size = int(size / resolution) + 1
261 | num_steps = int(data_size / step) + 1
262 | slope = np.tan(slope)
263 | hfield_data = np.zeros((data_size, data_size))
264 | for i in range(num_steps):
265 | x_start, x_stop = i * step, int((i + 0.4) * step)
266 | for j in range(x_start, min(x_stop, data_size)):
267 | hfield_data[:, j] = (x_stop - j) * slope * resolution
268 | x_start, x_stop = x_stop, int((i + 0.6) * step)
269 | for j in range(x_start, min(x_stop, data_size)):
270 | hfield_data[:, j] = 0.
271 | x_start, x_stop = x_stop, (i + 1) * step
272 | for j in range(x_start, min(x_stop, data_size)):
273 | hfield_data[:, j] = (j - x_start) * slope * resolution
274 | return HeightField(cls.rotate(hfield_data, axis), size, resolution)
275 |
276 | @classmethod
277 | def rotate(cls, hfield_data, axis):
278 | if axis == 'x':
279 | return hfield_data
280 | elif axis == 'y':
281 | return hfield_data.T
282 | raise RuntimeError('Unknown axis')
283 |
284 | @classmethod
285 | def default(cls):
286 | return cls.make(20, 0.1, 0.17, 3.0)
287 |
288 |
289 | class Steps(HeightFieldTerrain):
290 | @classmethod
291 | def make(cls, size, resolution, step_width, max_step_height, random_state):
292 | return cls(cls.make_hfield(size, resolution, step_width, max_step_height, random_state))
293 |
294 | @staticmethod
295 | def make_hfield(size, resolution, step_width, max_step_height, random_state):
296 | step = int(step_width / resolution)
297 | data_size = int(size / resolution) + 1
298 | num_steps = int(data_size / step) + 1
299 | hfield_data = np.zeros((data_size, data_size))
300 | for i in range(num_steps + 1):
301 | for j in range(num_steps + 1):
302 | x_start, x_stop = int((i - 0.5) * step), int((i + 0.5) * step)
303 | y_start, y_stop = int((j - 0.5) * step), int((j + 0.5) * step)
304 | hfield_data[y_start:y_stop, x_start:x_stop] = random_state.uniform(0., max_step_height)
305 | return HeightField(hfield_data, size, resolution)
306 |
307 | @classmethod
308 | def default(cls):
309 | return cls.make(20, 0.1, 1.0, 0.1, random_state=np.random)
310 |
--------------------------------------------------------------------------------
/qdpgym/sim/blt/utils.py:
--------------------------------------------------------------------------------
1 | class JointInfo(object):
2 | def __init__(self, joint_info: tuple):
3 | self._info = joint_info
4 |
5 | idx = property(lambda self: self._info[0])
6 | name = property(lambda self: self._info[1].decode("UTF-8"))
7 | type = property(lambda self: self._info[2])
8 | q_idx = property(lambda self: self._info[3])
9 | u_idx = property(lambda self: self._info[4])
10 | damping = property(lambda self: self._info[6])
11 | friction = property(lambda self: self._info[7])
12 | limits = property(lambda self: (self._info[8], self._info[9]))
13 | max_force = property(lambda self: self._info[10])
14 | max_vel = property(lambda self: self._info[11])
15 | link_name = property(lambda self: self._info[12].decode("UTF-8"))
16 | axis = property(lambda self: self._info[13])
17 | parent_frame_pos = property(lambda self: self._info[14])
18 | parent_frame_orn = property(lambda self: self._info[15])
19 | parent_idx = property(lambda self: self._info[16])
20 |
21 | import pybullet as pyb
22 | joint_types = {pyb.JOINT_REVOLUTE: 'rev', pyb.JOINT_PRISMATIC: 'pri', pyb.JOINT_SPHERICAL: 'sph',
23 | pyb.JOINT_PLANAR: 'pla', pyb.JOINT_FIXED: 'fix'}
24 |
25 | axis_dict = {(1., 0., 0.): '+X', (0., 1., 0.): '+Y', (0., 0., 1.): '+Z',
26 | (-1., 0., 0.): '-X', (0., -1., 0.): '-Y', (0., 0., -1.): '-Z', }
27 |
28 |
29 | class DynamicsInfo(object):
30 | def __init__(self, dynamics_info: tuple):
31 | self._info = dynamics_info
32 |
33 | mass = property(lambda self: self._info[0])
34 | lateral_fric = property(lambda self: self._info[1])
35 | inertia = property(lambda self: self._info[2])
36 | inertial_pos = property(lambda self: self._info[3])
37 | inertial_orn = property(lambda self: self._info[4])
38 | restitution = property(lambda self: self._info[5])
39 | rolling_fric = property(lambda self: self._info[6])
40 | spinning_fric = property(lambda self: self._info[7])
41 | damping = property(lambda self: self._info[8])
42 | stiffness = property(lambda self: self._info[9])
43 | body_type = property(lambda self: self._info[10])
44 | collision_margin = property(lambda self: self._info[11])
45 |
--------------------------------------------------------------------------------
/qdpgym/sim/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/sim/common/__init__.py
--------------------------------------------------------------------------------
/qdpgym/sim/common/identify.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | from torch import nn
4 |
5 | __all__ = ['ActuatorNet']
6 |
7 |
8 | class ActuatorNet(nn.Module):
9 | activation = nn.Softsign
10 |
11 | def __init__(
12 | self,
13 | input_dim=6,
14 | output_dim=1,
15 | hidden_dims=(32, 32, 32)
16 | ):
17 | super().__init__()
18 | layers = []
19 | self.input_dim, self.output_dim, self.hidden_dims = input_dim, output_dim, hidden_dims
20 | for dim in hidden_dims:
21 | layers.append(nn.Linear(input_dim, dim))
22 | layers.append(self.activation())
23 | input_dim = dim
24 | layers.append(nn.Linear(input_dim, output_dim))
25 | self.layers = nn.Sequential(*layers)
26 | self.device = torch.device('cpu')
27 |
28 | def forward(self, state):
29 | return self.layers(state)
30 |
31 | def to(self, device, *args, **kwargs):
32 | self.device = torch.device(device)
33 | return super().to(device, *args, **kwargs)
34 |
35 | def calc_torque(self, err1, err2, err3, vel1, vel2, vel3):
36 | with torch.inference_mode():
37 | X = np.array(
38 | (err1, err2, err3, vel1, vel2, vel3),
39 | dtype=np.float32
40 | ).transpose()
41 | try:
42 | Y = self(torch.as_tensor(X, device=self.device))
43 | except RuntimeError:
44 | self.device = next(self.parameters()).device
45 | Y = self(torch.as_tensor(X, device=self.device))
46 | return Y.double().squeeze().cpu().numpy()
47 |
--------------------------------------------------------------------------------
/qdpgym/sim/common/motor.py:
--------------------------------------------------------------------------------
1 | import collections
2 | from typing import Iterable
3 |
4 | import numpy as np
5 | import torch
6 |
7 | from qdpgym.sim.common.identify import ActuatorNet
8 | from qdpgym.utils import get_padded, replace_is
9 |
10 |
11 | class MotorSim(object):
12 | def __init__(self, frequency):
13 | self._freq = frequency
14 | self._observe_done = False
15 | self._pos = self._vel = 0.
16 | self._residue = None
17 |
18 | self._joint_lower = self._joint_upper = None
19 | self._torque_limits = None
20 | self._cmd_clip = None
21 | self._input_delay = self._output_delay = 0
22 |
23 | self._torque_history = collections.deque(maxlen=11)
24 | self._residue_history = collections.deque(maxlen=11)
25 | self._obs_history = collections.deque(maxlen=11)
26 |
27 | def set_latency(self, input_delay: int = 0, output_delay: int = 0):
28 | self._input_delay = input_delay
29 | self._output_delay = output_delay
30 |
31 | def set_joint_limits(self, lower: Iterable = None, upper: Iterable = None):
32 | lower, upper = list(lower), list(upper)
33 | if lower is not None and None in lower:
34 | replace_is(lower, None, -np.inf)
35 | if upper is not None and None in upper:
36 | replace_is(upper, None, np.inf)
37 | self._joint_lower = np.array(lower)
38 | self._joint_upper = np.array(upper)
39 |
40 | def set_torque_limits(self, upper):
41 | self._torque_limits = np.asarray(upper)
42 |
43 | def set_cmd_clip(self, upper):
44 | self._cmd_clip = np.asarray(upper)
45 |
46 | def reset(self):
47 | self._observe_done = False
48 | self._torque_history.clear()
49 | self._residue_history.clear()
50 |
51 | def update_observation(self, pos, vel):
52 | self._observe_done = True
53 | self._obs_history.append((np.array(pos), np.array(vel)))
54 | self._pos, self._vel = get_padded(self._obs_history, -self._input_delay - 1)
55 |
56 | def apply_hybrid(self, des_pos, ff_torque):
57 | if self._joint_lower is not None or self._joint_upper is not None:
58 | des_pos = np.clip(des_pos, self._joint_lower, self._joint_upper)
59 | self._residue = des_pos - self._pos
60 | if self._cmd_clip:
61 | self._residue = np.clip(self._residue, -self._cmd_clip, self._cmd_clip)
62 | self._residue_history.append(self._residue)
63 | return self.apply_torque(self.calc_torque() + ff_torque)
64 |
65 | def apply_position(self, des_pos):
66 | return self.apply_hybrid(des_pos, 0)
67 |
68 | def apply_torque(self, des_torque):
69 | assert self._observe_done, 'Update observation before executing a command'
70 | if self._torque_limits is not None:
71 | des_torque = np.clip(des_torque, -self._torque_limits, self._torque_limits)
72 | self._torque_history.append(des_torque)
73 | self._observe_done = False
74 | return get_padded(self._torque_history, -self._output_delay - 1)
75 |
76 | def calc_torque(self):
77 | raise NotImplementedError
78 |
79 |
80 | class PdMotorSim(MotorSim):
81 | def __init__(self, frequency, kp, kd):
82 | super().__init__(frequency)
83 | self._kp, self._kd = np.asarray(kp), np.asarray(kd)
84 | self._kp_part, self._kd_part = 0., 0.
85 |
86 | def calc_torque(self):
87 | self._kp_part, self._kd_part = self._kp * self._residue, self._kd * self._vel
88 | return self._kp_part - self._kd_part
89 |
90 |
91 | class ActuatorNetSim(MotorSim):
92 | def __init__(self, frequency, net=None):
93 | super().__init__(frequency)
94 | self._net = net
95 |
96 | def load_params(self, model_path, device='cpu'):
97 | model_info = torch.load(model_path, map_location=device)
98 | self._net = ActuatorNet(hidden_dims=model_info['hidden_dims']).to(device)
99 | self._net.load_state_dict(model_info['model'])
100 |
101 | def calc_torque(self):
102 | return self._net.calc_torque(
103 | self._residue,
104 | get_padded(self._residue_history, -4),
105 | get_padded(self._residue_history, -7),
106 | self._vel,
107 | get_padded(self._obs_history, -4)[1],
108 | get_padded(self._obs_history, -7)[1]
109 | )
110 |
--------------------------------------------------------------------------------
/qdpgym/sim/common/noisyhandle.py:
--------------------------------------------------------------------------------
1 | import collections
2 | from typing import Deque, Optional
3 |
4 | import numpy as np
5 |
6 | from qdpgym import tf, utils as ut
7 | from qdpgym.sim.abc import Quadruped, QuadrupedHandle, Snapshot
8 |
9 |
10 | class NoisyHandle(QuadrupedHandle):
11 | def __init__(self, robot: Quadruped, frequency, latency=0):
12 | self._robot = robot
13 | self._freq = frequency
14 | self._latency = latency
15 |
16 | self._obs: Optional[Snapshot] = None
17 | self._obs_buffer: Deque[Snapshot] = collections.deque(maxlen=20)
18 | self._obs_history: Deque[Snapshot] = collections.deque(maxlen=100)
19 |
20 | @property
21 | def raw(self):
22 | return self._robot
23 |
24 | @property
25 | def latency(self):
26 | return self._latency
27 |
28 | @latency.setter
29 | def latency(self, value):
30 | self._latency = value
31 |
32 | @property
33 | def obs_history(self):
34 | return ut.PadWrapper(self._obs_history)
35 |
36 | @property
37 | def cmd_history(self):
38 | return self._robot.cmd_history
39 |
40 | @property
41 | def not_delayed(self):
42 | if self._obs_buffer:
43 | return self._obs_buffer[-1]
44 | return self._obs
45 |
46 | def update_observation(
47 | self,
48 | state: Snapshot,
49 | random_gen: np.random.Generator
50 | ):
51 | """Add noise on observation"""
52 | add_noise = random_gen.normal
53 | obs = Snapshot(
54 | rpy=add_noise(state.rpy, 1e-2),
55 | velocimeter=add_noise(state.velocimeter, 5e-2),
56 | gyro=add_noise(state.gyro, 5e-2),
57 | joint_pos=add_noise(state.joint_pos, 5e-3),
58 | joint_vel=add_noise(state.joint_vel, 1e-1)
59 | )
60 | obs.orientation = tf.Quaternion.from_rpy(obs.rpy)
61 | self._obs_buffer.append(obs)
62 | if len(self._obs_buffer) <= self._latency * self._freq:
63 | self._obs = self._obs_buffer[0]
64 | else:
65 | self._obs = self._obs_buffer.popleft()
66 | self._obs_history.append(self._obs)
67 |
68 | def get_base_pos(self) -> np.ndarray:
69 | return self._obs.position
70 |
71 | def get_base_orn(self) -> np.ndarray:
72 | return self._obs.orientation
73 |
74 | def get_base_rot(self) -> np.ndarray:
75 | return tf.Rotation.from_quaternion(self._obs.orientation)
76 |
77 | def get_base_rpy(self) -> np.ndarray:
78 | return self._obs.rpy
79 |
80 | def get_base_rpy_rate(self) -> np.ndarray:
81 | return tf.get_rpy_rate_from_ang_vel(self._obs.rpy, self._obs.angular_vel)
82 |
83 | def get_base_lin(self) -> np.ndarray:
84 | return self._obs.linear_vel
85 |
86 | def get_base_ang(self) -> np.ndarray:
87 | return self._obs.angular_vel
88 |
89 | def get_joint_pos(self) -> np.ndarray:
90 | return self._obs.joint_pos
91 |
92 | def get_joint_vel(self) -> np.ndarray:
93 | return self._obs.joint_vel
94 |
95 | def get_velocimeter(self):
96 | return self._obs.velocimeter
97 |
98 | def get_gyro(self):
99 | return self._obs.gyro
100 |
101 | def get_accelerometer(self):
102 | return self._obs.accelerometer
103 |
104 | def get_torso_contact(self):
105 | return self._robot.get_torso_contact()
106 |
107 | def get_leg_contacts(self):
108 | return self._robot.get_leg_contacts()
109 |
110 | def get_last_torque(self) -> np.ndarray:
111 | return self._robot.get_last_torque()
112 |
113 | def get_state_history(self, latency):
114 | return ut.get_padded(self._obs_history, -int(latency * self._freq) - 1)
115 |
116 | def get_cmd_history(self, latency):
117 | return self._robot.get_cmd_history(latency)
118 |
119 | def get_foot_contacts(self):
120 | return self._robot.get_foot_contacts()
121 |
122 | def get_contact_forces(self):
123 | return self._robot.get_contact_forces()
124 |
125 | def get_force_sensor(self):
126 | return self._robot.get_force_sensor()
127 |
128 | def get_last_command(self) -> np.ndarray:
129 | return self._robot.get_last_command()
130 |
131 | def reset(self):
132 | self._obs = None
133 | self._obs_buffer.clear()
134 | self._obs_history.clear()
135 |
--------------------------------------------------------------------------------
/qdpgym/sim/common/tg.py:
--------------------------------------------------------------------------------
1 | import enum
2 | import random
3 |
4 | import numpy as np
5 |
6 | from qdpgym.utils import Angle
7 |
8 |
9 | def power(x, exp):
10 | _x = 1
11 | for _ in range(exp + 1):
12 | yield _x
13 | _x *= x
14 |
15 |
16 | def vertical_tg(h=0.12): # 0.2 in paper
17 | coeff1 = np.array((0., 0., 3., -2.)) * h
18 | coeff2 = np.array((-4., 12., -9., 2.)) * h
19 |
20 | def _tg(phases):
21 | priori = []
22 | for phi in phases:
23 | k = phi * 2 / np.pi
24 | if k <= 0 or k >= 2:
25 | priori.append(np.zeros(3))
26 | k_pow = list(power(k, 3))
27 | if 0 < k <= 1:
28 | priori.append((0., 0., coeff1 @ k_pow))
29 | if 1 < k < 2:
30 | priori.append((0., 0., coeff2 @ k_pow))
31 | return np.array(priori)
32 |
33 | return _tg
34 |
35 |
36 | class PhaseRoller(object):
37 | TROT = (0.0, -np.pi, -np.pi, 0.0)
38 | WALK = (0.0, -np.pi / 2, -np.pi, -np.pi * 3 / 2)
39 | PACE = (0.0, -np.pi, 0.0, -np.pi)
40 | base_frequency = 2.0 # TODO: COMPLETE THE STATE MACHINE
41 |
42 | class InitType(enum.IntEnum):
43 | FIXED = 0
44 | SYMMETRIC = 1
45 | RANDOM = 2
46 |
47 | def __init__(self, time_step, random_state, init_type='random'):
48 | self._time_step = time_step
49 | self._random = random_state
50 | self._init_type = self._check_init_type(init_type)
51 |
52 | self._init_phases = self._get_init_phases()
53 | self._phases = self._init_phases.copy()
54 | self._frequency = np.ones(4) * self.base_frequency
55 | self._cycles = np.zeros(4)
56 |
57 | @staticmethod
58 | def symmetric(phases):
59 | fr, fl, rr, rl = phases
60 | return fl, fr, rl, rr
61 |
62 | def random_symmetric(self, phases):
63 | return phases if self._random.random() > 0.5 else self.symmetric(phases)
64 |
65 | def _check_init_type(self, init_type):
66 | if init_type == 'fixed':
67 | return self.InitType.FIXED
68 | elif init_type == 'symmetric':
69 | return self.InitType.SYMMETRIC
70 | elif init_type == 'random':
71 | return self.InitType.RANDOM
72 | else:
73 | raise RuntimeError(f'Unknown TG Init Mode {init_type}')
74 |
75 | def _get_init_phases(self):
76 | if self._init_type == self.InitType.FIXED:
77 | return np.array(self.TROT, dtype=np.float32)
78 | elif self._init_type == self.InitType.SYMMETRIC:
79 | return np.array(self.random_symmetric(self.TROT), dtype=np.float32)
80 | elif self._init_type == self.InitType.RANDOM:
81 | return self._random.uniform(-np.pi, np.pi, 4).astype(np.float32)
82 |
83 | @property
84 | def phases(self):
85 | return self._phases
86 |
87 | @property
88 | def frequency(self):
89 | return self._frequency
90 |
91 | @property
92 | def cycles(self):
93 | return self._cycles
94 |
95 | def reset(self):
96 | self._init_phases = self._get_init_phases()
97 | self._phases = self._init_phases.copy()
98 | self._frequency = np.ones(4) * self.base_frequency
99 | self._cycles = np.zeros(4)
100 |
101 | def update(self):
102 | _phases = self._phases.copy()
103 | self._phases += self._frequency * self._time_step * 2 * np.pi
104 | flags = np.logical_and(Angle.norm(_phases - self._init_phases) < 0,
105 | Angle.norm(self._phases - self._init_phases) >= 0)
106 | self._cycles[(flags.nonzero(),)] += 1
107 | self._phases = Angle.norm(self._phases)
108 | return self._phases
109 |
110 |
111 | class TgStateMachine(PhaseRoller):
112 | def __init__(
113 | self,
114 | time_step,
115 | random_state,
116 | traj_gen,
117 | init_type='symmetric',
118 | freq_bounds=(0.5, 3.0)
119 | ):
120 | super().__init__(time_step, random_state, init_type)
121 | self._freq_bounds = freq_bounds
122 | self._traj_gen = traj_gen
123 |
124 | def update(self, frequency_offsets=None): # FIXME: how freq_offsets works?
125 | if frequency_offsets is not None:
126 | frequency_offsets = np.asarray(frequency_offsets)
127 | self._frequency = self.base_frequency + frequency_offsets
128 | self._frequency = np.clip(self._frequency, *self._freq_bounds)
129 | return super().update()
130 |
131 | def get_priori_trajectory(self):
132 | return np.asarray(self._traj_gen(self._phases))
133 |
--------------------------------------------------------------------------------
/qdpgym/sim/mjc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/sim/mjc/__init__.py
--------------------------------------------------------------------------------
/qdpgym/sim/mjc/env.py:
--------------------------------------------------------------------------------
1 | import collections
2 | from typing import Optional, Tuple, Any, Union
3 |
4 | import gym
5 | import numpy as np
6 | from dm_control import mjcf
7 |
8 | from qdpgym.sim.abc import Task, Environment, ARRAY_LIKE
9 | from .quadruped import Aliengo
10 | from .terrain import TerrainBase
11 | from qdpgym.utils import PadWrapper, tf
12 |
13 |
14 | class QuadrupedEnv(Environment):
15 | _observation_space = None
16 | _action_space = None
17 |
18 | def __init__(
19 | self,
20 | robot: Aliengo,
21 | arena: TerrainBase,
22 | task: Task,
23 | timestep=2e-3,
24 | time_limit: float = None,
25 | num_substeps: int = 10,
26 | identifier: str = None,
27 | ):
28 | self._robot = robot
29 | self._arena = arena
30 | self._task = task
31 | self._timestep = timestep
32 | self._time_limit = time_limit
33 | self._num_substeps = num_substeps
34 | self._identifier = identifier or f'{hex(id(self))[-7:]}'
35 | self._step_freq = 1 / (self.timestep * self._num_substeps)
36 |
37 | self._init = False
38 | self._elapsed_sim_steps = 0
39 |
40 | self._arena.mjcf_model.option.timestep = timestep
41 | self._physics: Optional[mjcf.Physics] = None
42 | self._task.register_env(self._robot, self)
43 | self._action_history = collections.deque(maxlen=10)
44 | self._perturbation = None
45 |
46 | self._interact_terrain_samples = []
47 | self._interact_terrain_normal: Optional[np.ndarray] = None
48 | self._interact_terrain_height = 0.0
49 |
50 | @property
51 | def observation_space(self) -> gym.Space:
52 | if self._observation_space is None:
53 | if hasattr(self._task, 'observation_space'):
54 | self._observation_space = self._task.observation_space
55 | else:
56 | self._observation_space = gym.Space()
57 | return self._observation_space
58 |
59 | @property
60 | def action_space(self) -> gym.Space:
61 | if self._action_space is None:
62 | if hasattr(self._task, 'action_space'):
63 | self._action_space = self._task.action_space
64 | else:
65 | self._action_space = gym.Space((12,), float)
66 | return self._action_space
67 |
68 | def reset(
69 | self,
70 | seed: Optional[int] = None,
71 | return_info: bool = False,
72 | options: Optional[dict] = None,
73 | ) -> Union[Any, Tuple[Any, dict]]:
74 |
75 | super().reset(seed=seed, return_info=return_info, options=options)
76 |
77 | self._init = True
78 | self._elapsed_sim_steps = 0
79 | self._action_history.clear()
80 |
81 | self._robot.add_to(self._arena)
82 | self._robot.init_mjcf_model(self.np_random)
83 | self._physics = mjcf.Physics.from_mjcf_model(self._arena.mjcf_model)
84 |
85 | self._task.init_episode()
86 | self._robot.init_physics(self._physics, self.np_random)
87 |
88 | for i in range(50):
89 | self._robot.update_observation(None, minimal=True)
90 | self._robot.apply_command(self._robot.STANCE_CONFIG)
91 | self._physics.step()
92 |
93 | self._action_history.append(np.array(self._robot.STANCE_CONFIG))
94 | self._physics.data.ptr.time = 0.
95 | self._robot.update_observation(self.np_random)
96 |
97 | return (
98 | self._task.get_observation()
99 | # if return_info else (self._task.get_observation(), {})
100 | )
101 |
102 | def close(self):
103 | pass
104 |
105 | @property
106 | def robot(self):
107 | return self._robot
108 |
109 | @property
110 | def physics(self):
111 | return self._physics
112 |
113 | @property
114 | def arena(self):
115 | return self._arena
116 |
117 | @arena.setter
118 | def arena(self, value):
119 | self._init = False
120 | self._arena = value
121 | self._robot.add_to(self._arena)
122 |
123 | @property
124 | def action_history(self):
125 | return PadWrapper(self._action_history)
126 |
127 | @property
128 | def sim_time(self):
129 | return self._physics.time()
130 |
131 | @property
132 | def timestep(self):
133 | return self._timestep
134 |
135 | @property
136 | def num_substeps(self):
137 | return self._num_substeps
138 |
139 | @property
140 | def identifier(self) -> str:
141 | return self._identifier
142 |
143 | def step(self, action: ARRAY_LIKE) -> Tuple[Any, float, bool, dict]:
144 | assert self._init, 'Call `init_episode` before `step`!'
145 | action = self._task.before_step(action)
146 | action = np.asarray(action)
147 | prev_action = self._action_history[-1]
148 | self._action_history.append(action)
149 |
150 | for i in range(self._num_substeps):
151 | weight = (i + 1) / self._num_substeps
152 | current_action = action * weight + prev_action * (1 - weight)
153 | self._robot.apply_command(current_action)
154 | self._task.before_substep()
155 |
156 | self._apply_perturbation()
157 | self._physics.step()
158 | self._elapsed_sim_steps += 1
159 | self._update_observation()
160 |
161 | self._task.after_substep()
162 | self._task.after_step()
163 |
164 | done = False
165 | info = {}
166 | if self._task.is_failed():
167 | done = True
168 | self._task.on_fail()
169 | elif ((self._time_limit is not None and self.sim_time >= self._time_limit)
170 | or self._task.is_succeeded()):
171 | done = True
172 | info['TimeLimit.truncated'] = True
173 | self._task.on_success()
174 |
175 | reward, reward_info = self._task.get_reward(True)
176 | info['reward_info'] = reward_info
177 | return (
178 | self._task.get_observation(), reward, done, info
179 | )
180 |
181 | def _update_observation(self):
182 | self._robot.update_observation(self.np_random)
183 |
184 | self._interact_terrain_samples.clear()
185 | self._interact_terrain_height = 0.
186 | xy_points = self._robot.get_foot_pos()
187 | for x, y, _ in xy_points:
188 | h = self._arena.get_height(x, y)
189 | self._interact_terrain_height += h
190 | self._interact_terrain_samples.append((x, y, h))
191 | self._interact_terrain_height /= 4
192 | self._interact_terrain_normal = tf.estimate_normal(self._interact_terrain_samples)
193 |
194 | def get_action_rate(self) -> np.ndarray:
195 | if len(self._action_history) < 2:
196 | return np.zeros(12)
197 | actions = [self._action_history[-i - 1] for i in range(2)]
198 | return (actions[0] - actions[1]) * self._step_freq
199 |
200 | def get_action_accel(self) -> np.ndarray:
201 | if len(self._action_history) < 3:
202 | return np.zeros(12)
203 | actions = [self._action_history[-i - 1] for i in range(3)]
204 | return (actions[0] - 2 * actions[1] + actions[2]) * self._step_freq ** 2
205 |
206 | def get_relative_robot_height(self) -> float:
207 | return self._robot.get_base_pos()[2] - self._interact_terrain_height
208 |
209 | def get_interact_terrain_normal(self) -> np.ndarray:
210 | return self._interact_terrain_normal
211 |
212 | def get_interact_terrain_rot(self) -> np.ndarray:
213 | return tf.Rotation.from_zaxis(self._interact_terrain_normal)
214 |
215 | def get_perturbation(self, in_robot_frame=False):
216 | if self._perturbation is None:
217 | return None
218 | elif in_robot_frame:
219 | rotation = self._robot.get_base_rot()
220 | perturbation = np.concatenate(
221 | [rotation.T @ self._perturbation[i * 3:i * 3 + 3] for i in range(2)]
222 | )
223 | else:
224 | perturbation = self._perturbation
225 | return perturbation
226 |
227 | def set_perturbation(self, value=None):
228 | self._perturbation = np.array(value)
229 |
230 | def _apply_perturbation(self):
231 | if self._perturbation is not None:
232 | self._physics.bind(self._robot.entity.root_body).xfrc_applied = self._perturbation
233 |
--------------------------------------------------------------------------------
/qdpgym/sim/mjc/hooks.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | import mujoco as mjlib
4 |
5 | from qdpgym import tf
6 | from qdpgym.sim.abc import Hook
7 | from qdpgym.sim.mjc.viewer import ViewerMj
8 |
9 |
10 | class ViewerHook(Hook):
11 | def __init__(self):
12 | self._viewer: Optional[ViewerMj] = None
13 |
14 | def init_episode(self, robot, env):
15 | self._viewer = ViewerMj(env.physics.model.ptr, env.physics.data.ptr)
16 |
17 | def after_substep(self, robot, env):
18 | perturb = env.get_perturbation()
19 | if perturb is not None:
20 | force = perturb[:3]
21 | magnitude = tf.vnorm(force)
22 | mat = tf.Rotation.from_zaxis(force / magnitude)
23 | self._viewer.add_marker(
24 | type=mjlib.mjtGeom.mjGEOM_ARROW,
25 | pos=robot.get_base_pos(), mat=mat,
26 | size=[0.01, 0.01, magnitude / 20],
27 | rgba=(1., 0., 0., 1.)
28 | )
29 | self._viewer.render()
30 |
--------------------------------------------------------------------------------
/qdpgym/sim/mjc/terrain.py:
--------------------------------------------------------------------------------
1 | import abc
2 | from typing import Union, Tuple
3 |
4 | import numpy as np
5 | from dm_control import composer
6 | from scipy.interpolate import interp2d
7 |
8 | from qdpgym.sim.abc import NUMERIC, Terrain
9 |
10 |
11 | def _process_size(size):
12 | if isinstance(size, (int, float)):
13 | return size, size
14 | else:
15 | return tuple(size)
16 |
17 |
18 | class TerrainBase(composer.Arena, Terrain, metaclass=abc.ABCMeta):
19 | pass
20 |
21 |
22 | class TexturedTerrainBase(TerrainBase, metaclass=abc.ABCMeta):
23 | reflectance = 0.2
24 |
25 | def _build(self, size, name=None):
26 | super()._build(name=name)
27 | self._x_size, self._y_size = _process_size(size)
28 |
29 | self._mjcf_root.visual.headlight.set_attributes(
30 | ambient=[.4, .4, .4], diffuse=[.8, .8, .8], specular=[.1, .1, .1])
31 |
32 | self._ground_texture = self._mjcf_root.asset.add(
33 | 'texture', name='groundplane', rgb1=[.2, .3, .4], rgb2=[.1, .2, .3],
34 | type='2d', builtin='checker', width=200, height=200,
35 | mark='edge', markrgb=[0.8, 0.8, 0.8])
36 | # Makes white squares exactly 1x1 length units.
37 | self._ground_material = self._mjcf_root.asset.add(
38 | 'material', name='groundplane', texrepeat=[2, 2],
39 | texuniform=True, reflectance=self.reflectance, texture=self._ground_texture)
40 |
41 | self._terrain_geom = None
42 |
43 | @property
44 | def ground_geoms(self):
45 | return self._terrain_geom,
46 |
47 | def regenerate(self, random_state):
48 | pass
49 |
50 | @property
51 | def size(self):
52 | return self._x_size, self._y_size
53 |
54 | def out_of_range(self, x, y):
55 | return abs(x) > self._x_size / 2 - 1 or abs(y) > self._y_size / 2 - 1
56 |
57 |
58 | class Plain(TexturedTerrainBase):
59 | def _build(self, size, name='groundplane'):
60 | super()._build(name=name, size=size)
61 |
62 | # Build groundplane.
63 | self._terrain_geom = self._mjcf_root.worldbody.add(
64 | 'geom', type='plane', name='terrain', material=self._ground_material,
65 | size=[self._x_size / 2, self._x_size / 2, 0.25])
66 | self._terrain_geom.friction = [1, 0.005, 0.0001]
67 |
68 | def get_height(self, x, y):
69 | return 0.0
70 |
71 | def get_normal(self, x, y):
72 | return np.array((0., 0., 1.))
73 |
74 |
75 | class Hills(TexturedTerrainBase):
76 | def _build(self, size, resol: float,
77 | *rough_dsp: Tuple[NUMERIC, NUMERIC],
78 | name='hills'):
79 | super()._build(name=name, size=size)
80 | self._resol = resol
81 | self._ncol = int(self._x_size / resol) + 1
82 | self._nrow = int(self._y_size / resol) + 1
83 | self._rough_dsp = rough_dsp
84 |
85 | self._mjcf_root.asset.add(
86 | 'hfield', name=f'{name}_hfield',
87 | nrow=self._nrow, ncol=self._ncol,
88 | size=[self._x_size / 2, self._y_size / 2, 1, 0.1])
89 | self._terrain_geom = self._mjcf_root.worldbody.add(
90 | 'geom', name='terrain', type='hfield', material=self._ground_material,
91 | pos=(0, 0, -0.01), hfield=f'{name}_hfield')
92 |
93 | def initialize_episode(self, physics, random_state: np.random.RandomState):
94 | physics.model.hfield_data[...] = self.regenerate(random_state)
95 |
96 | def regenerate(self, random_state):
97 | hfield_data = np.zeros((self._nrow, self._ncol))
98 | for rough, dsp in self._rough_dsp:
99 | x = np.arange(-3, int(self._ncol / dsp) + 4)
100 | y = np.arange(-3, int(self._nrow / dsp) + 4)
101 | hfield_dsp = random_state.uniform(0, rough, (x.size, y.size))
102 | terrain_func = interp2d(x, y, hfield_dsp, kind='cubic')
103 | x_usp = np.arange(self._ncol) / dsp
104 | y_usp = np.arange(self._nrow) / dsp
105 | hfield_data += terrain_func(x_usp, y_usp)
106 | return hfield_data.reshape(-1)
107 |
--------------------------------------------------------------------------------
/qdpgym/sim/resources/acnet_220526.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/sim/resources/acnet_220526.pt
--------------------------------------------------------------------------------
/qdpgym/sim/resources/actuator_net_with_history.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/sim/resources/actuator_net_with_history.pt
--------------------------------------------------------------------------------
/qdpgym/sim/resources/aliengo/meshes/calf.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/sim/resources/aliengo/meshes/calf.stl
--------------------------------------------------------------------------------
/qdpgym/sim/resources/aliengo/meshes/hip.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/sim/resources/aliengo/meshes/hip.stl
--------------------------------------------------------------------------------
/qdpgym/sim/resources/aliengo/meshes/thigh.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/sim/resources/aliengo/meshes/thigh.stl
--------------------------------------------------------------------------------
/qdpgym/sim/resources/aliengo/meshes/thigh_mirror.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/sim/resources/aliengo/meshes/thigh_mirror.stl
--------------------------------------------------------------------------------
/qdpgym/sim/resources/aliengo/meshes/trunk.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/sim/resources/aliengo/meshes/trunk.stl
--------------------------------------------------------------------------------
/qdpgym/sim/resources/aliengo/model/aliengo.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
19 |
20 |
21 |
22 |
23 |
25 |
26 |
28 |
30 |
31 |
33 |
34 |
36 |
38 |
39 |
41 |
42 |
44 |
46 |
47 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
57 |
58 |
59 |
61 |
62 |
65 |
66 |
68 |
70 |
71 |
73 |
74 |
76 |
78 |
79 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
89 |
90 |
92 |
94 |
95 |
98 |
99 |
101 |
103 |
104 |
106 |
107 |
109 |
111 |
112 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
122 |
123 |
125 |
127 |
128 |
130 |
131 |
133 |
135 |
136 |
138 |
139 |
141 |
143 |
144 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
--------------------------------------------------------------------------------
/qdpgym/sim/task.py:
--------------------------------------------------------------------------------
1 | from typing import Optional, Callable, List, Any
2 |
3 | from .abc import Task, Hook, Quadruped, Environment
4 |
5 |
6 | class NullTask(Task):
7 | """The Simplest Task Structure"""
8 |
9 | def __init__(self):
10 | self._robot: Optional[Quadruped] = None
11 | self._env: Optional[Environment] = None
12 | self._hooks: List[Hook] = []
13 | self._hook_names = {}
14 | self._options = []
15 |
16 | def register_env(self, robot, env):
17 | self._robot = robot
18 | self._env = env
19 | for hook in self._hooks:
20 | hook.initialize(self._robot, self._env)
21 |
22 | def add_hook(self, hook: Hook, name=None):
23 | if name is None:
24 | name = hook.__class__.__name__
25 | if name in self._hook_names:
26 | raise ValueError(f'Duplicated Hook `{name}`')
27 | hook.register_task(self)
28 | self._hooks.append(hook)
29 | self._hook_names[name] = hook
30 | return self
31 |
32 | def remove_hook(self, name=None):
33 | if name not in self._hook_names:
34 | raise RuntimeError(f'Hook `{name}` not found')
35 | obj = self._hook_names[name]
36 | for i, hook in enumerate(self._hooks):
37 | if hook is obj:
38 | self._hooks.pop(i)
39 |
40 | def get_observation(self):
41 | return None
42 |
43 | def get_reward(self, detailed=True):
44 | if detailed:
45 | return 0., {}
46 | else:
47 | return 0.
48 |
49 | def is_succeeded(self):
50 | return False
51 |
52 | def is_failed(self):
53 | return False
54 |
55 | def init_episode(self):
56 | for hook in self._hooks:
57 | hook.init_episode(self._robot, self._env)
58 |
59 | def before_step(self, action):
60 | for hook in self._hooks:
61 | hook.before_step(self._robot, self._env)
62 | return action
63 |
64 | def before_substep(self):
65 | for hook in self._hooks:
66 | hook.before_substep(self._robot, self._env)
67 |
68 | def after_step(self):
69 | for hook in self._hooks:
70 | hook.after_step(self._robot, self._env)
71 |
72 | def after_substep(self):
73 | for hook in self._hooks:
74 | hook.after_substep(self._robot, self._env)
75 |
76 | def on_success(self):
77 | for hook in self._hooks:
78 | hook.on_success(self._robot, self._env)
79 |
80 | def on_fail(self):
81 | for hook in self._hooks:
82 | hook.on_fail(self._robot, self._env)
83 |
84 |
85 | class RewardRegistry(object):
86 | def __init__(self):
87 | self._robot = None
88 | self._env = None
89 | self._task = None
90 |
91 | self._rewards_set = set()
92 | self._rewards_weights = []
93 | self._weight_sum = 0.0
94 | self._coefficient = 1.0
95 | self._reward_details = {}
96 |
97 | def register_task(self, robot, env, task):
98 | self._robot, self._env, self._task = robot, env, task
99 |
100 | def add_reward(
101 | self,
102 | name: str,
103 | reward_obj: Callable[..., float],
104 | weight: float
105 | ):
106 | if name in self._rewards_set:
107 | raise RuntimeError(f'Duplicated Reward Type {name}')
108 | self._rewards_set.add(name)
109 | self._weight_sum += weight
110 | self._rewards_weights.append((reward_obj, weight))
111 |
112 | def set_coeff(self, coeff):
113 | self._coefficient = coeff
114 |
115 | def report(self):
116 | from qdpgym.utils import colored_str
117 | print(colored_str(f'Got {len(self._rewards_weights)} types of rewards:\n', 'white'),
118 | f"{'Reward Type':<28}Weight * {self._coefficient:.3f}", sep='')
119 | for reward, weight in self._rewards_weights:
120 | reward_name: str = reward.__class__.__name__
121 | length = len(reward_name)
122 | if reward_name.endswith('Reward'):
123 | reward_name = colored_str(reward_name, 'green')
124 | elif reward_name.endswith('Penalty'):
125 | reward_name = colored_str(reward_name, 'magenta')
126 | print(f'{reward_name}{" " * (28 - length)}{weight:.3f}')
127 | print()
128 |
129 | def calc_reward(self, detailed=True):
130 | self._reward_details.clear()
131 | reward_value = 0.0
132 | for reward, weight in self._rewards_weights:
133 | rew = reward(self._robot, self._env, self._task)
134 | self._reward_details[reward.__class__.__name__] = rew
135 | reward_value += rew * weight
136 | reward_value *= self._coefficient
137 | if detailed:
138 | return reward_value, self._reward_details
139 | else:
140 | return reward_value
141 |
142 |
143 | class BasicTask(NullTask):
144 | ALL_REWARDS: Any
145 |
146 | def __init__(self, substep_reward_on=True):
147 | self._reward_registry = RewardRegistry()
148 | self._substep_reward_on = substep_reward_on
149 | self._reward = 0.
150 | self._reward_details = {}
151 | self._substep_cnt = 0
152 |
153 | super().__init__()
154 |
155 | def add_reward(self, name: str, weight: float = 0.):
156 | reward_class = getattr(self.ALL_REWARDS, name)
157 | self._reward_registry.add_reward(name, reward_class(), weight)
158 |
159 | def set_reward_coeff(self, value):
160 | self._reward_registry.set_coeff(value)
161 |
162 | def register_env(self, robot, env):
163 | super().register_env(robot, env)
164 | self._reward_registry.register_task(robot, env, self)
165 |
166 | def before_step(self, action):
167 | self._reward = 0.
168 | self._reward_details.clear()
169 | return super().before_step(action)
170 |
171 | def after_substep(self):
172 | if self._substep_reward_on:
173 | reward, reward_details = self._reward_registry.calc_reward(detailed=True)
174 | self._reward += reward
175 | if self._reward_details:
176 | for k, v in reward_details.items():
177 | self._reward_details[k] += v
178 | else:
179 | self._reward_details = reward_details.copy()
180 | self._substep_cnt += 1
181 | super().after_substep()
182 |
183 | def after_step(self):
184 | if self._substep_reward_on:
185 | self._reward /= self._substep_cnt
186 | for k in self._reward_details:
187 | self._reward_details[k] /= self._substep_cnt
188 | self._substep_cnt = 0
189 | else:
190 | self._reward, self._reward_details = self._reward_registry.calc_reward(detailed=True)
191 | super().after_step()
192 |
193 | def get_reward(self, detailed=True):
194 | if detailed:
195 | return self._reward, self._reward_details
196 | else:
197 | return self._reward
198 |
--------------------------------------------------------------------------------
/qdpgym/tasks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/tasks/__init__.py
--------------------------------------------------------------------------------
/qdpgym/tasks/identify/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/tasks/identify/__init__.py
--------------------------------------------------------------------------------
/qdpgym/tasks/identify/identify.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import math
3 | import os
4 |
5 | import numpy as np
6 | import torch
7 | import wandb
8 | from torch import nn
9 | from torch.utils.data import DataLoader, Dataset, random_split, ConcatDataset
10 |
11 | from qdpgym.sim.common.identify import ActuatorNet
12 | from qdpgym.utils import get_timestamp, log
13 |
14 |
15 | class RobotDatasetWithHistory(Dataset):
16 | def __init__(self, path, history_interval, slices=None):
17 | print(path)
18 | self.data = np.load(path)
19 | if slices:
20 | self.error = self.data['angle_error'][slices, :].astype(np.float32)
21 | self.velocity = self.data['motor_velocity'][slices, :].astype(np.float32)
22 | self.torque = self.data['motor_torque'][slices, :].astype(np.float32)
23 | else:
24 | self.error = self.data['angle_error'].astype(np.float32)
25 | self.velocity = self.data['motor_velocity'].astype(np.float32)
26 | self.torque = self.data['motor_torque'].astype(np.float32)
27 | self.X = np.stack((self.error[2 * history_interval:-1, ].flatten(),
28 | self.error[history_interval:-1 - history_interval, ].flatten(),
29 | self.error[:-1 - 2 * history_interval, ].flatten(),
30 | self.velocity[2 * history_interval:-1, ].flatten(),
31 | self.velocity[history_interval:-1 - history_interval, ].flatten(),
32 | self.velocity[:-1 - 2 * history_interval, ].flatten()), axis=1)
33 | self.Y = np.expand_dims(self.torque[1 + 2 * history_interval:, ].flatten(), axis=1)
34 | self.size = len(self.error)
35 |
36 | def __len__(self):
37 | return len(self.X)
38 |
39 | def __getitem__(self, idx):
40 | return self.X[idx], self.Y[idx]
41 |
42 |
43 | def train_actuator_net(actuator_net, dataset_class, history_interval,
44 | lr, num_epochs, batch_size, device):
45 | dataset_paths = [os.path.join(RSC_DIR, filename) for filename in os.listdir(RSC_DIR)]
46 | dataset = ConcatDataset([dataset_class(path, history_interval) for path in dataset_paths if path.endswith('npz')])
47 | train_len = int(0.8 * len(dataset))
48 | test_len = len(dataset) - train_len
49 | train_data, test_data = random_split(dataset, (train_len, test_len))
50 | train_loader = DataLoader(train_data, batch_size, shuffle=True)
51 | test_loader = DataLoader(test_data, batch_size, shuffle=True)
52 | net = actuator_net.to(device)
53 | optim = torch.optim.AdamW(net.parameters(), lr=lr)
54 | criterion = nn.MSELoss(reduction='sum')
55 |
56 | wandb.init(project='actuator_net', name=str((actuator_net.hidden_dims, lr)), mode=None)
57 | log_dir = f'ident/{get_timestamp()}'
58 | os.makedirs(log_dir, exist_ok=True)
59 | torch.set_printoptions(linewidth=10000)
60 | for i in range(1, num_epochs + 1):
61 | train_loss, test_loss = 0., 0.
62 | for X, Y in train_loader:
63 | optim.zero_grad()
64 | X, Y = X.to(device), Y.to(device)
65 | loss = criterion(net(X), Y)
66 | train_loss += loss.item()
67 | loss.backward()
68 | optim.step()
69 | train_loss /= train_len
70 | log.info(f'Epoch {i:>4}/{num_epochs} train loss {train_loss:.6f}')
71 |
72 | for X, Y in test_loader:
73 | X, Y = X.to(device), Y.to(device)
74 | loss = criterion(net(X), Y)
75 | test_loss += loss.item()
76 | test_loss /= test_len
77 | log.info(f'Epoch {i:>4}/{num_epochs} test loss {test_loss:.6f}')
78 | wandb.log({'Train/loss': train_loss, 'Test/loss': test_loss})
79 | if i % 10 == 0:
80 | torch.save({'model': net.state_dict(), 'hidden_dims': net.hidden_dims},
81 | os.path.join(log_dir, f'{i}.pt'))
82 |
83 |
84 | def get_statistics(model, history_interval):
85 | dataset_paths = [os.path.join(RSC_DIR, filename) for filename in os.listdir(RSC_DIR)]
86 | dataset = ConcatDataset([DatasetClass(path, history_interval) for path in dataset_paths if path.endswith('npz')])
87 |
88 | criterion = nn.MSELoss(reduction='sum')
89 | test_loader = DataLoader(dataset, 1000, shuffle=True)
90 | test_loss = 0.
91 | for X, Y in test_loader:
92 | X, Y = X.to(device), Y.to(device)
93 | loss = criterion(model(X), Y)
94 | test_loss += loss.item()
95 | mse = test_loss / (len(dataset) - 1)
96 | print('MSE:', mse)
97 | print('RMSE:', math.sqrt(mse))
98 |
99 |
100 | if __name__ == '__main__':
101 | parser = argparse.ArgumentParser(description='Train or test quadruped locomotion.')
102 | parser.add_argument('-r', '--rsc', metavar='',
103 | help='motor data directory', required=True)
104 | parser.add_argument('-i', '--interval', type=int, help='history interval', default=3)
105 | parser.add_argument('-b', '--batch-size', type=int, help='batch size', default=1000)
106 | parser.add_argument('-l', '--lr', type=float, help='learning rate', default=1e-4)
107 | parser.add_argument('-e', '--epoch', type=int, help='num epochs', default=2000)
108 | parser.add_argument('--cuda', action='store_true', help='use cuda for training')
109 | parser.add_argument('--train', action='store_true', help='if train else test')
110 | args = parser.parse_args()
111 |
112 | RSC_DIR = args.rsc
113 | if args.cuda or torch.cuda.is_available():
114 | DEVICE = 'cuda'
115 | else:
116 | DEVICE = 'cpu'
117 | TRAIN = args.train
118 | HISTORY_INTERVAL = args.interval
119 | BATCH_SIZE = args.batch_size
120 | LEARNING_RATE = args.lr
121 | NUM_EPOCHS = args.epoch
122 |
123 | np.set_printoptions(3, linewidth=10000, suppress=True)
124 | DatasetClass = RobotDatasetWithHistory
125 | device = torch.device(DEVICE)
126 | if TRAIN:
127 | actuator_net = ActuatorNet(hidden_dims=(32, 32, 32))
128 | train_actuator_net(
129 | actuator_net,
130 | DatasetClass,
131 | history_interval=HISTORY_INTERVAL,
132 | batch_size=BATCH_SIZE,
133 | lr=LEARNING_RATE,
134 | num_epochs=NUM_EPOCHS,
135 | device=device
136 | )
137 | get_statistics(actuator_net, HISTORY_INTERVAL)
138 | else:
139 | model_path = '/home/jewel/Workspaces/QuadrupedRLv2/qdpgym/sim/resources/acnet_220526.pt'
140 | model_info = torch.load(model_path, map_location=device)
141 | actuator_net = ActuatorNet(hidden_dims=model_info['hidden_dims']).to(device)
142 | actuator_net.load_state_dict(model_info['model'])
143 | get_statistics(actuator_net, HISTORY_INTERVAL)
144 |
145 | # dataset = DatasetClass(os.path.join(RSC_DIR, 'state_cmd_data_NoLoadT0.4.npz'))
146 | # motor_idx = 2
147 | # slices = slice(5000, 5500)
148 | #
149 | # error = dataset.error[slices, motor_idx]
150 | # error_his1 = dataset.error[slice(slices.start - 5, slices.stop - 5), motor_idx]
151 | # error_his2 = dataset.error[slice(slices.start - 10, slices.stop - 10), motor_idx]
152 | # velocity = dataset.velocity[slices, motor_idx]
153 | # velocity_his1 = dataset.velocity[slice(slices.start - 5, slices.stop - 5), motor_idx]
154 | # velocity_his2 = dataset.velocity[slice(slices.start - 10, slices.stop - 10), motor_idx]
155 | # error_rate = dataset.data['angle_error_rate'][slices, motor_idx].astype(np.float32)
156 | # torque = dataset.torque[slices.start + 1:slices.stop + 1, motor_idx]
157 | # predicted = actuator_net.calc_torque(error, error_his1, error_his2, velocity, velocity_his1, velocity_his2)
158 |
159 | # import matplotlib.pyplot as plt
160 | #
161 | # fig, ax = plt.subplots(3, 1, figsize=(9.6, 6.4), gridspec_kw={'height_ratios': [1, 1, 2]})
162 | # ax[0].plot(error)
163 | # ax[0].set_ylabel('error(rad)', rotation=0)
164 | # ax[0].yaxis.set_label_coords(-0.05, 0.99)
165 | # ax[0].set_xticks([])
166 | # ax[1].plot(velocity)
167 | # ax[1].set_xticks([])
168 | # ax[1].set_ylabel('velocity(rad$\cdot$s$^{-1}$)', rotation=0)
169 | # ax[1].yaxis.set_label_coords(-0.03, 1.03)
170 | # ax[2].set_xlabel('sample point number')
171 | # ax[2].set_ylabel('torque')
172 | # ax[2].set_ylabel('torque(N$\cdot$m)', rotation=0)
173 | # ax[2].yaxis.set_label_coords(-0.03, 0.99)
174 | # ax[2].plot(torque, linewidth=1)
175 | # ax[2].plot(predicted, linewidth=1, linestyle='--')
176 | # ax[2].plot(error * 150 - velocity * 4, linewidth=1, linestyle='dashdot')
177 | # ax[2].set_ylim(-20, 25)
178 | # ax[2].legend(['measured', 'identified', 'vanilla pd'])
179 | # fig.show()
180 | # fig.savefig('acnet.pdf')
181 |
--------------------------------------------------------------------------------
/qdpgym/tasks/loct/__init__.py:
--------------------------------------------------------------------------------
1 | from .loct import *
2 | from .app import *
--------------------------------------------------------------------------------
/qdpgym/tasks/loct/app.py:
--------------------------------------------------------------------------------
1 | import qdpgym
2 | from qdpgym import sim
3 | from qdpgym.tasks import loct
4 |
5 | __all__ = ['LocomotionApp']
6 |
7 |
8 | class LocomotionApp(sim.Application):
9 | def __init__(self, policy):
10 | robot = sim.Aliengo(500, 'actuator_net', noisy=False)
11 | task = loct.LocomotionV0()
12 | if qdpgym.sim_engine == qdpgym.Sim.BULLET:
13 | arena = sim.TerrainBase()
14 | task.add_hook(sim.ExtraViewerHook())
15 | task.add_hook(sim.RandomTerrainHook())
16 | task.add_hook(sim.RandomPerturbHook())
17 | task.add_hook(loct.GamepadCommanderHook())
18 | # task.add_hook(sim.StatisticsHook())
19 | # task.add_hook(hooks.VideoRecorderBtHook())
20 | else:
21 | raise NotImplementedError
22 | env = sim.QuadrupedEnv(robot, arena, task)
23 | # task.add_reward('UnifiedLinearReward')
24 | super().__init__(robot, env, task, policy)
25 |
--------------------------------------------------------------------------------
/qdpgym/tasks/loct/reward.py:
--------------------------------------------------------------------------------
1 | import math
2 | from abc import ABC
3 |
4 | import numpy as np
5 |
6 | from qdpgym.utils import tf
7 |
8 | ATANH0_95 = math.atanh(0.95)
9 | ATANH0_9 = math.atanh(0.9)
10 | SQRT_LOG10 = math.sqrt(math.log(10))
11 | SQRT_LOG20 = math.sqrt(math.log(20))
12 |
13 |
14 | class Reward(ABC):
15 | def __call__(self, robot, env, task) -> float:
16 | raise NotImplementedError
17 |
18 |
19 | def tanh2_reshape(lower, upper):
20 | w = np.sqrt(ATANH0_95) / (upper - lower)
21 |
22 | def _reshape(v):
23 | return np.tanh(np.power((v - lower) * w, 2)) if v > lower else 0
24 |
25 | return _reshape
26 |
27 |
28 | def tanh2_reverse(lower, upper, value):
29 | w = np.sqrt(ATANH0_95) / (upper - lower)
30 | return np.sqrt(np.arctanh(value)) / w + lower
31 |
32 |
33 | def tanh_reshape(lower, upper):
34 | """
35 | Zoom tanh so that f(lower) = -0.9 and f(upper) = 0.9
36 | :return: function f
37 | """
38 | middle = (lower + upper) / 2
39 | w = ATANH0_9 / (upper - middle)
40 |
41 | def _reshape(v):
42 | return np.tanh((v - middle) * w)
43 |
44 | return _reshape
45 |
46 |
47 | def tanh_reverse(lower, upper, value):
48 | middle = (lower + upper) / 2
49 | w = ATANH0_9 / (upper - middle)
50 | return np.arctanh(value) / w + middle
51 |
52 |
53 | def exp_m2_reshape(upper):
54 | w = upper / SQRT_LOG10
55 |
56 | def _reshape(v):
57 | return np.exp(-(v / w) ** 2)
58 |
59 | return _reshape
60 |
61 |
62 | def quadratic_linear_reshape(upper):
63 | k = 1 / upper ** 2
64 | kl = 2 * k * upper
65 |
66 | def _reshape(v):
67 | v = abs(v)
68 | return v ** 2 * k if v < upper else kl * (v - upper) + 1
69 |
70 | return _reshape
71 |
72 |
73 | def soft_constrain(thr, upper):
74 | def _reshape(v):
75 | excess = abs(v) - thr
76 | if excess > 0:
77 | return -(excess / upper) ** 2
78 | return 0.
79 |
80 | return _reshape
81 |
82 |
83 | def expm2_resh(upper):
84 | w = upper / SQRT_LOG10
85 |
86 | def _reshape(v):
87 | return np.exp(-(v / w) ** 2)
88 |
89 | return _reshape
90 |
91 |
92 | class LinearVelocityReward(Reward):
93 | def __init__(self, forward=1.5, lateral=1.0, ort_upper=0.3, ort_weight=0.33):
94 | self.forward, self.lateral = forward, lateral
95 | self.coeff = forward * lateral
96 | self.ort_reshape = tanh2_reshape(0, ort_upper)
97 | self.ort_weight = ort_weight
98 |
99 | def __call__(self, robot, env, task):
100 | lin_cmd = task.cmd[:2]
101 | lin_vel = robot.get_velocimeter()[:2]
102 | proj_vel = np.dot(lin_cmd, lin_vel)
103 | ort_vel = tf.vnorm(lin_vel - lin_cmd[:2] * proj_vel)
104 | # print(proj_vel, ort_vel)
105 | ort_pen = 1 - self.ort_reshape(ort_vel)
106 | if (lin_cmd == 0.).all():
107 | return ort_pen
108 | proj_rew = self.reshape(self.get_desired_velocity(lin_cmd), proj_vel)
109 | return (1 - self.ort_weight) * proj_rew + self.ort_weight * ort_pen
110 |
111 | def get_desired_velocity(self, cmd):
112 | return self.coeff / math.hypot(self.lateral * cmd[0], self.forward * cmd[1])
113 |
114 | @staticmethod
115 | def reshape(scale, value):
116 | return math.tanh(value * ATANH0_9 / scale)
117 |
118 |
119 | class YawRateReward(Reward):
120 | def __init__(self, upper_pos=1.0, upper_neg=0.45):
121 | self.reshape_pos = tanh_reshape(-upper_pos, upper_pos)
122 | self.reshape_neg = quadratic_linear_reshape(upper_neg)
123 |
124 | def __call__(self, robot, env, task):
125 | yaw_cmd, yaw_rate = task.cmd[2], robot.get_base_rpy_rate()[2]
126 | if yaw_cmd != 0.0:
127 | return self.reshape_pos(yaw_rate / yaw_cmd)
128 | else:
129 | return 1 - self.reshape_neg(abs(yaw_rate))
130 |
131 |
132 | class RotationReward(Reward):
133 | def __init__(self, max_ang=3.0, perception=1.0):
134 | self.max_ang = max_ang
135 | self.reshape = expm2_resh(perception)
136 |
137 | def __call__(self, robot, env, task):
138 | ang_cmd = task.cmd[2]
139 | ang_vel = robot.get_base_rpy_rate()[2]
140 | ang_rew = self.reshape(ang_vel - ang_cmd * self.max_ang)
141 | # print(ang_cmd, ang_vel, ang_rew)
142 | return ang_rew
143 |
144 |
145 | class RollPitchRatePenalty(Reward):
146 | def __init__(self, dr_upper=1.0, dp_upper=0.5):
147 | self.dr_reshape = tanh2_reshape(0.0, dr_upper)
148 | self.dp_reshape = tanh2_reshape(0.0, dp_upper)
149 |
150 | def __call__(self, robot, env, task):
151 | r_rate, p_rate, _ = robot.get_base_rpy_rate()
152 | return 1 - (self.dr_reshape(abs(r_rate)) +
153 | self.dp_reshape(abs(p_rate))) / 2
154 |
155 |
156 | # class OrthogonalLinearPenalty(Reward):
157 | # def __init__(self, linear_upper=0.3):
158 | # self.reshape = tanh2_reshape(0, linear_upper)
159 | #
160 | # def __call__(self, robot, env, task):
161 | # linear = robot.get_velocimeter()[:2]
162 | # v_o = np.asarray(linear) - np.asarray(cmd[:2]) * np.dot(linear, cmd[:2])
163 | # return 1 - self.reshape(tf.vnorm(v_o))
164 |
165 |
166 | class VerticalLinearPenalty(Reward):
167 | def __init__(self, upper=0.4):
168 | self.reshape = quadratic_linear_reshape(upper)
169 |
170 | def __call__(self, robot, env, task):
171 | return 1 - self.reshape(robot.get_velocimeter()[2])
172 |
173 |
174 | class BodyPosturePenalty(Reward):
175 | def __init__(self, roll_upper=np.pi / 12, pitch_upper=np.pi / 24):
176 | self.roll_reshape = quadratic_linear_reshape(roll_upper)
177 | self.pitch_reshape = quadratic_linear_reshape(pitch_upper)
178 |
179 | def __call__(self, robot, env, task):
180 | trnZ = env.get_interact_terrain_normal()
181 | robot_rot = robot.get_base_rot()
182 | trnY = tf.vcross(trnZ, robot_rot[:, 0])
183 | trnX = tf.vcross(trnY, trnZ)
184 | r, p, _ = tf.Rpy.from_rotation(np.array((trnX, trnY, trnZ)) @ robot_rot)
185 | return 1 - (self.roll_reshape(r) + self.pitch_reshape(p)) / 2
186 |
187 |
188 | class BodyHeightReward(Reward):
189 | def __init__(self, des=0.4, range_=0.03):
190 | self.des = des
191 | self.reshape = quadratic_linear_reshape(range_)
192 |
193 | def __call__(self, robot, env, task):
194 | return env.get_relative_robot_height()
195 |
196 |
197 | class ActionSmoothnessReward(Reward):
198 | def __init__(self, upper=400):
199 | self.reshape = exp_m2_reshape(upper)
200 |
201 | def __call__(self, robot, env, task):
202 | return self.reshape(env.get_action_accel()).sum() / 12
203 |
204 |
205 | class JointMotionPenalty(Reward):
206 | def __init__(self, vel_upper=6, acc_upper=500):
207 | self.vel_reshape = np.vectorize(quadratic_linear_reshape(vel_upper))
208 | self.acc_reshape = np.vectorize(quadratic_linear_reshape(acc_upper))
209 |
210 | def __call__(self, robot, env, task):
211 | vel_pen = self.vel_reshape(robot.get_joint_vel()).sum()
212 | acc_pen = self.acc_reshape(robot.get_joint_acc()).sum()
213 | return 1 - (vel_pen + acc_pen) / 12
214 |
215 |
216 | # class TorqueGradientPenalty(Reward):
217 | # def __init__(self, upper=200):
218 | # self.reshape = quadratic_linear_reshape(upper)
219 | #
220 | # def __call__(self, cmd, env, robot):
221 | # torque_grad = robot.getTorqueGradients()
222 | # return -sum(self.reshape(grad) for grad in torque_grad) / 12
223 |
224 |
225 | class FootSlipPenalty(Reward):
226 | def __init__(self, upper=0.5):
227 | self.reshape = np.vectorize(quadratic_linear_reshape(upper))
228 |
229 | def __call__(self, robot, env, task):
230 | return 1 - sum(self.reshape(robot.get_slip_vel()))
231 |
232 |
233 | # class HipAnglePenalty(Reward):
234 | # def __init__(self, upper=0.3):
235 | # self.reshape = np.vectorize(quadratic_linear_reshape(upper))
236 | #
237 | # def __call__(self, cmd, env, robot):
238 | # hip_angles = robot.getJointPositions()[(0, 3, 6, 9),]
239 | # return -sum(self.reshape(hip_angles)) / 4
240 |
241 |
242 | class JointConstraintPenalty(Reward):
243 | def __init__(self, constraints=(0., 0., 0.), upper=(0.4, 0.6, 0.6)):
244 | self.hip_reshape = np.vectorize(soft_constrain(constraints[0], upper[0]))
245 | self.thigh_reshape = np.vectorize(soft_constrain(constraints[1], upper[1]))
246 | self.shank_reshape = np.vectorize(soft_constrain(constraints[2], upper[2]))
247 |
248 | def __call__(self, robot, env, task):
249 | joint_angles = robot.get_joint_pos() - robot.STANCE_CONFIG
250 | return (self.hip_reshape(joint_angles[((0, 3, 6, 9),)]).sum() +
251 | self.thigh_reshape(joint_angles[((1, 4, 7, 10),)]).sum() +
252 | self.shank_reshape(joint_angles[((2, 5, 8, 11),)]).sum()) / 12
253 |
254 |
255 | # class TrivialStridePenalty(Reward):
256 | # def __init__(self, lower=-0.2, upper=0.4):
257 | # self.reshape = tanh_reshape(lower, upper)
258 | #
259 | # def __call__(self, robot, env, task):
260 | # strides = [np.dot(s, cmd[:2]) for s in robot.get_strides()]
261 | # return sum(self.reshape(s) for s in strides if s != 0.0)
262 | # # return 1 - sum(1 - self.reshape(s) for s in strides if s != 0.0)
263 |
264 |
265 | class FootClearanceReward(Reward):
266 | def __init__(self, upper=0.08):
267 | self.reshape = tanh2_reshape(0., upper)
268 |
269 | def __call__(self, robot, env, task):
270 | foot_clearances = robot.get_clearances()
271 | return sum(self.reshape(c) for c in foot_clearances)
272 |
273 |
274 | class AliveReward(Reward):
275 | def __call__(self, robot, env, task):
276 | return 1.0
277 |
278 |
279 | class ClearanceOverTerrainReward(Reward):
280 | def __call__(self, robot, env, task):
281 | reward = 0.
282 | for x, y, z in robot.get_foot_pos():
283 | terrain_extremum = env.arena.get_peak((x - 0.1, x + 0.1),
284 | (y - 0.1, y + 0.1))[2]
285 | if z - robot.FOOT_RADIUS > terrain_extremum + 0.01:
286 | reward += 1 / 2
287 | return reward
288 |
289 |
290 | class BodyCollisionPenalty(Reward):
291 | def __call__(self, robot, env, task):
292 | contact_sum = 0
293 | for i, contact in enumerate(robot.get_leg_contacts()):
294 | if contact and i % 3 != 2:
295 | contact_sum += 1
296 | return 1 - contact_sum
297 |
298 |
299 | class TorquePenalty(Reward):
300 | def __init__(self, upper=900):
301 | self.reshape = np.vectorize(quadratic_linear_reshape(upper))
302 |
303 | def __call__(self, robot, env, task):
304 | return 1 - sum(self.reshape(robot.get_last_torque() ** 2))
305 |
--------------------------------------------------------------------------------
/qdpgym/tasks/loct/sr_reward.py:
--------------------------------------------------------------------------------
1 | from abc import ABC
2 |
3 | import numpy as np
4 |
5 | from qdpgym.utils import tf
6 |
7 |
8 | class Reward(ABC):
9 | def __call__(self, robot, env, task) -> float:
10 | raise NotImplementedError
11 |
12 |
13 | def exp_m2(v, k=1):
14 | return np.exp(-k * v ** 2)
15 |
16 |
17 | class VelocityReward(Reward):
18 | def __init__(self, max_lin=1.0):
19 | self.max_lin = max_lin
20 |
21 | def __call__(self, robot, env, task):
22 | lin_cmd = task.cmd[:2]
23 | lin_vel = robot.get_velocimeter()[:2]
24 | lin_cmd_mag = tf.vnorm(lin_cmd) * self.max_lin
25 | if lin_cmd_mag != 0.:
26 | lin_cmd /= lin_cmd_mag # unit
27 |
28 | prj_vel = np.dot(lin_cmd, lin_vel)
29 | ort_vel = tf.vnorm(lin_vel - lin_cmd * prj_vel)
30 | if lin_cmd_mag == 0.:
31 | lin_rew = exp_m2(tf.vnorm(lin_vel))
32 | elif prj_vel >= lin_cmd_mag:
33 | lin_rew = 1.
34 | else:
35 | lin_rew = exp_m2(lin_cmd_mag - prj_vel)
36 |
37 | ort_pen = exp_m2(ort_vel, 3)
38 | # print(lin_cmd_mag, prj_vel, lin_rew)
39 | return (lin_rew + ort_pen) / 2
40 |
41 |
42 | class RotationReward(Reward):
43 | def __init__(self, max_ang=1.0):
44 | self.max_ang = max_ang
45 |
46 | def __call__(self, robot, env, task):
47 | ang_cmd = task.cmd[2]
48 | ang_vel = robot.get_base_rpy_rate()[2]
49 | ang_cmd_mag = abs(ang_cmd) * self.max_ang
50 | if ang_cmd == 0.:
51 | ang_rew = exp_m2(ang_vel)
52 | elif ang_vel * np.sign(ang_cmd) >= ang_cmd_mag:
53 | ang_rew = 1.
54 | else:
55 | ang_rew = exp_m2(ang_vel - ang_cmd)
56 | # print(ang_cmd_mag, ang_vel, ang_rew)
57 | return ang_rew
58 |
59 |
60 | class BodyMotionPenalty(Reward):
61 | def __call__(self, robot, env, task):
62 | z_vel = robot.get_velocimeter()[2]
63 | r_rate, p_rate, _ = robot.get_base_rpy_rate()
64 | return -1.25 * z_vel ** 2 - 0.4 * abs(r_rate) - 0.4 * abs(p_rate)
65 |
66 |
67 | class BodyCollisionPenalty(Reward):
68 | def __call__(self, robot, env, task):
69 | contact_sum = 0
70 | for i, contact in enumerate(robot.get_leg_contacts()):
71 | if contact and i % 3 != 2:
72 | contact_sum += 1
73 | return 1 - contact_sum
74 |
75 |
76 | class JointMotionPenalty(Reward):
77 | def __call__(self, robot, env, task):
78 | vel_pen = -(robot.get_joint_vel() ** 2).sum()
79 | acc_pen = -(robot.get_joint_acc() ** 2).sum()
80 | return vel_pen + acc_pen * 1e-4
81 |
82 |
83 | class TargetSmoothnessReward(Reward):
84 | def __call__(self, robot, env, task):
85 | action_history = env.action_history
86 | actions = [action_history[-i - 1] for i in range(3)]
87 | return -(
88 | ((actions[0] - actions[1]) ** 2).sum() +
89 | ((actions[0] - 2 * actions[1] + actions[2]) ** 2).sum()
90 | )
91 |
92 |
93 | class TorquePenalty(Reward):
94 | def __call__(self, robot, env, task):
95 | return -(robot.get_last_torque() ** 2).sum()
96 |
97 |
98 | class SlipPenalty(Reward):
99 | def __call__(self, robot, env, task):
100 | return -robot.get_slip_vel().sum()
101 |
102 |
103 | class FootClearanceReward(Reward):
104 | def __call__(self, robot, env, task):
105 | return 1.
106 |
--------------------------------------------------------------------------------
/qdpgym/tasks/loct/utils.py:
--------------------------------------------------------------------------------
1 | import collections
2 | from typing import Union, Tuple, List, Any, Iterable, Callable
3 |
4 | import numpy as np
5 | import rtree
6 | import torch
7 | from rtree import Rtree
8 | from sklearn.linear_model import LinearRegression
9 |
10 | from qdpgym.utils import tf, Natural
11 |
12 |
13 | class PolicyWrapper(object):
14 | def __init__(self, net, device):
15 | self.net = net
16 | self.device = torch.device(device)
17 |
18 | def __call__(self, obs):
19 | with torch.inference_mode():
20 | obs = torch.as_tensor(obs, dtype=torch.float32, device=self.device)
21 | return self.net(obs).detach().cpu().numpy()
22 |
23 |
24 | class GradISNaive(object):
25 | """
26 | Adaptively change the sample weight by the gradient of reward.
27 | May lead to significant sample impoverishment.
28 | """
29 |
30 | def __init__(
31 | self,
32 | dim: int,
33 | min_key: Union[float, Tuple[float, ...]],
34 | max_key: Union[float, Tuple[float, ...]],
35 | max_len: int,
36 | grad_coef=1.0,
37 | max_weight=5.0,
38 | neighborhood_ratio=0.1,
39 | ):
40 | self._rtree = Rtree()
41 | self._min_key = min_key
42 | self._max_key = max_key
43 | if isinstance(self._min_key, float):
44 | self._interval = max_key - min_key
45 | else:
46 | self._interval = np.asarray(max_key) - np.asarray(min_key)
47 | self._max_len = max_len
48 | self._dim = dim
49 | self._grad_coef = grad_coef
50 | self._max_weight = max_weight
51 | self._nbh_size = neighborhood_ratio * self._interval
52 | self._bbox = self._key_to_bbox(min_key, max_key)
53 |
54 | self._rtree.insert(-1, self._bbox)
55 |
56 | self._cur_len = 0
57 | self._total_len = 0
58 | self._buffer = []
59 | self._weights = [] # save grad weights
60 | self._weight_sum = 0.
61 |
62 | def _key_to_bbox(self, key1, key2=None):
63 | if key2 is None:
64 | key2 = key1
65 | if self._dim == 1:
66 | return key1, key1, key2, key2
67 | elif self._dim == 2:
68 | return (*key1, *key2)
69 | else:
70 | raise NotImplementedError
71 |
72 | def _bbox_to_key(self, bbox):
73 | if self._dim == 1:
74 | return bbox[0]
75 | elif self._dim == 2:
76 | return bbox[:2]
77 | else:
78 | raise NotImplementedError
79 |
80 | @property
81 | def samples(self):
82 | return iter(self)
83 |
84 | @property
85 | def particles(self):
86 | return zip(self._buffer, self._weights)
87 |
88 | def __iter__(self):
89 | for item in self._rtree.intersection(
90 | self._bbox, objects=True
91 | ):
92 | if item.id != -1:
93 | yield self._bbox_to_key(item.bbox), item.object
94 |
95 | def is_full(self):
96 | return self._cur_len == self._max_len
97 |
98 | def __len__(self):
99 | return self._cur_len
100 |
101 | def __repr__(self):
102 | return self._rtree.__repr__()
103 |
104 | def insert(self, key, value):
105 | self._rtree.insert(self._total_len, self._key_to_bbox(key), value)
106 | if self._cur_len < self._max_len:
107 | self._buffer.append(key)
108 | self._weights.append(1.0)
109 | self._weight_sum += 1.0
110 | self._cur_len += 1
111 | else:
112 | idx = self._total_len % self._max_len
113 | prev_key = self._buffer[idx]
114 | self._rtree.delete(
115 | self._total_len - self._max_len,
116 | self._key_to_bbox(prev_key)
117 | )
118 | self._buffer[idx] = key
119 | grad_weight = self.get_grad_weight(key)
120 | self._weight_sum += grad_weight - self._weights[idx]
121 | self._weights[idx] = grad_weight
122 |
123 | self._total_len += 1
124 |
125 | def get_grad_weight(self, key) -> float:
126 | neighbors = self.get_neighbors(key, self._nbh_size)
127 | if len(neighbors) < 5:
128 | return self._max_weight
129 | else:
130 | x, y = zip(*neighbors)
131 | x, y = np.array(x), np.array(y)
132 | reg = LinearRegression()
133 | reg.fit(x.reshape(-1, 1), y)
134 | return np.exp(
135 | abs(reg.coef_[0]) * self._grad_coef
136 | ).clip(max=self._max_weight)
137 |
138 | def get_neighbors(self, key, radius):
139 | neighbors = []
140 | for item in self._rtree.intersection(
141 | self._key_to_bbox(key - radius, key + radius), objects=True
142 | ):
143 | if item.id != -1:
144 | neighbors.append((self._bbox_to_key(item.bbox), item.object))
145 | return neighbors
146 |
147 | def sample(
148 | self,
149 | random_gen: Union[np.random.Generator, np.random.RandomState],
150 | uniform_prob: float = 0.,
151 | normal_var: float = None,
152 | ):
153 | if not self.is_full() or random_gen.random() < uniform_prob:
154 | return random_gen.uniform(self._min_key, self._max_key)
155 | else:
156 | # importance sampling
157 | weights = np.array(self._weights) / self._weight_sum
158 | sample = random_gen.choice(self._buffer, p=weights)
159 | return np.clip(
160 | random_gen.normal(sample, normal_var),
161 | self._min_key, self._max_key
162 | ) if normal_var else sample
163 |
164 |
165 | class GradIS(object):
166 | def __init__(
167 | self,
168 | dim: int,
169 | min_key: Union[float, Tuple[float, ...]],
170 | max_key: Union[float, Tuple[float, ...]],
171 | approx_max_len: int,
172 | grad_coef=1.0,
173 | max_weight=5.0,
174 | neighborhood_ratio=0.1,
175 | ):
176 | self._rtree = Rtree()
177 | self._dim = dim
178 | self._min_key = min_key
179 | self._max_key = max_key
180 | if isinstance(self._min_key, float):
181 | self._interval = max_key - min_key
182 | else:
183 | self._interval = np.asarray(max_key) - np.asarray(min_key)
184 | self._approx_max_len = approx_max_len
185 | self._max_density = self._approx_max_len * neighborhood_ratio ** dim
186 | self._grad_coef = grad_coef
187 | self._max_weight = max_weight
188 | self._nbh_size = neighborhood_ratio * self._interval
189 | self._bbox = self._key_to_bbox(min_key, max_key)
190 |
191 | self._rtree.insert(-1, self._bbox)
192 |
193 | self._cur_len = 0
194 | self._total_len = 0
195 | self._buffer_weights = collections.defaultdict(float)
196 | self._weight_sum = 0.
197 |
198 | def _key_to_bbox(self, key1, key2=None):
199 | if key2 is None:
200 | key2 = key1
201 | if self._dim == 1:
202 | return key1, key1, key2, key2
203 | elif self._dim == 2:
204 | return (*key1, *key2)
205 | else:
206 | raise NotImplementedError
207 |
208 | def _bbox_to_key(self, bbox):
209 | if self._dim == 1:
210 | return bbox[0]
211 | elif self._dim == 2:
212 | return bbox[:2]
213 | else:
214 | raise NotImplementedError
215 |
216 | def _key_dist(self, key1, key2):
217 | if self._dim == 1:
218 | return abs(key1 - key2)
219 | elif self._dim == 2:
220 | return tf.vnorm(np.asarray(key1) - np.asarray(key2))
221 | else:
222 | raise NotImplementedError
223 |
224 | @property
225 | def samples(self) -> Iterable[Tuple[Any, float]]:
226 | return iter(self)
227 |
228 | @property
229 | def particles(self):
230 | return self._buffer_weights.items()
231 |
232 | @property
233 | def initialized(self):
234 | return self._total_len >= self._approx_max_len
235 |
236 | def __iter__(self):
237 | for item in self._rtree.intersection(
238 | self._bbox, objects=True
239 | ):
240 | if item.id != -1:
241 | yield self._bbox_to_key(item.bbox), item.object
242 |
243 | def __len__(self):
244 | return self._cur_len
245 |
246 | def __repr__(self):
247 | return self._rtree.__repr__()
248 |
249 | def insert(self, key, value):
250 | raw_neighbors = self.get_neighbors(key, self._nbh_size, raw=True)
251 |
252 | if key in self._buffer_weights:
253 | # remove duplicated key
254 | dup_idx = None
255 | for idx, item in enumerate(raw_neighbors):
256 | item_key = self._bbox_to_key(item.bbox)
257 | if np.array_equal(item_key, key):
258 | # print('duplicated key', key)
259 | self._weight_sum -= self._buffer_weights[key]
260 | self._rtree.delete(item.id, item.bbox)
261 | dup_idx = idx
262 | break
263 | if dup_idx is not None:
264 | raw_neighbors.pop(dup_idx)
265 | elif len(raw_neighbors) > self._max_density:
266 | # delete the obsolete particle
267 | # obsolete: rtree.index.Item = min(raw_neighbors, key=lambda x: x.id)
268 | # or delete the nearest particle?
269 | obsolete: rtree.index.Item = min(
270 | raw_neighbors, key=lambda x: self._key_dist(self._bbox_to_key(x.bbox), key)
271 | )
272 | raw_neighbors.remove(obsolete)
273 | self._rtree.delete(obsolete.id, obsolete.bbox)
274 | self._weight_sum -= self._buffer_weights.pop(self._bbox_to_key(obsolete.bbox))
275 | else:
276 | self._cur_len += 1
277 |
278 | self._total_len += 1
279 | self._rtree.insert(self._total_len, self._key_to_bbox(key), value)
280 | # calculate weight from gradient
281 | neighbors = [(self._bbox_to_key(item.bbox), item.object) for item in raw_neighbors]
282 | neighbors.append((key, value))
283 | grad_weight = self.get_weight_from_grad(key, neighbors)
284 | self._weight_sum += grad_weight
285 | self._buffer_weights[key] = grad_weight
286 |
287 | def get_weight_from_grad(self, key, neighbors=None) -> float:
288 | if neighbors is None:
289 | neighbors = self.get_neighbors(key, self._nbh_size)
290 | if len(neighbors) < 5:
291 | return 1.0
292 | else:
293 | x, y = zip(*neighbors)
294 | x, y = np.array(x), np.array(y)
295 | reg = LinearRegression()
296 | # TODO: dim >= 2
297 | reg.fit(x.reshape(-1, 1), y)
298 | r = reg.predict(np.reshape(key, (1, 1))).clip(0., 1.).item()
299 | return np.clip(
300 | np.exp(abs(reg.coef_[0]) * self._grad_coef) +
301 | np.exp(-((r - 0.7) / 0.7) ** 2),
302 | 0., self._max_weight
303 | ).item()
304 |
305 | # grad = 2 * r * math.sqrt(-math.log(r))
306 | # return np.clip(abs(grad), 0., self._max_weight).item()
307 | # # return np.exp(
308 | # # abs(grad) * self._grad_coef
309 | # # ).clip(max=self._max_weight)
310 |
311 | def get_neighbors(
312 | self, key, radius, raw: bool = False
313 | ) -> Union[List[rtree.index.Item], List[Tuple[Any, float]]]:
314 | neighbors = []
315 | for item in self._rtree.intersection(
316 | self._key_to_bbox(key - radius, key + radius), objects=True
317 | ):
318 | if item.id != -1:
319 | neighbors.append(
320 | item if raw else (
321 | self._bbox_to_key(item.bbox), item.object
322 | ))
323 | return neighbors
324 |
325 | def sample(
326 | self,
327 | random_gen: Union[np.random.Generator, np.random.RandomState],
328 | uniform_prob: float = 0.,
329 | normal_var: float = None,
330 | ):
331 | if not self.initialized or random_gen.random() < uniform_prob:
332 | return random_gen.uniform(self._min_key, self._max_key)
333 | else:
334 | # importance sampling
335 | # TODO: a dict for saving index and two lists
336 | # for saving samples and weights may be better?
337 | buffer, weights = zip(*self._buffer_weights.items())
338 | weights = np.array(weights) / self._weight_sum
339 | sample = random_gen.choice(buffer, p=weights)
340 | return np.clip(
341 | random_gen.normal(sample, normal_var),
342 | self._min_key, self._max_key
343 | ) if normal_var else sample
344 |
345 |
346 | class AlpIS(object):
347 | def __init__(
348 | self,
349 | dim: int,
350 | min_key: Union[float, Tuple[float, ...], np.ndarray],
351 | max_key: Union[float, Tuple[float, ...], np.ndarray],
352 | window_size: int,
353 | default_distribution: Callable[[Union[np.random.Generator, np.random.RandomState]], Any] = None,
354 | neighborhood_ratio=0.1,
355 | min_lp=1e-4,
356 | ):
357 | self._dim = dim
358 | self._min_key = min_key
359 | self._max_key = max_key
360 | self._interval = np.array(max_key) - np.array(min_key)
361 | self._nbh_size = neighborhood_ratio * self._interval
362 | self._window_size = window_size
363 | self._max_density = self._window_size * neighborhood_ratio ** dim
364 | self._min_lp = min_lp
365 | if default_distribution is None:
366 | self._explore = lambda r: r.uniform(self._min_key, self._max_key)
367 | else:
368 | self._explore = default_distribution
369 |
370 | self._buffer = Rtree()
371 | self._history = Rtree()
372 | self._bbox = self._key_to_bbox(min_key, max_key)
373 | self._history_idx = Natural()
374 | self._window = collections.deque(maxlen=window_size)
375 |
376 | self._total_len = 0
377 | self._samples = []
378 | self._weights = []
379 | self._init = False
380 |
381 | def _key_to_bbox(self, key1, key2=None):
382 | if key2 is None:
383 | key2 = key1
384 | if self._dim == 1:
385 | return key1, key1, key2, key2
386 | if self._dim == 2:
387 | return (*key1, *key2)
388 | raise NotImplementedError
389 |
390 | def _bbox_to_key(self, bbox):
391 | if self._dim == 1:
392 | return bbox[0]
393 | if self._dim == 2:
394 | return bbox[:2]
395 | raise NotImplementedError
396 |
397 | def _key_dist(self, key1, key2):
398 | if self._dim == 1:
399 | return abs(key1 - key2)
400 | if self._dim == 2:
401 | return tf.vnorm(np.asarray(key1) - np.asarray(key2))
402 | raise NotImplementedError
403 |
404 | @property
405 | def progresses(self) -> Iterable[Tuple[Any, float]]:
406 | for item in self._history.intersection(self._bbox, objects=True):
407 | if item.id != -1:
408 | yield self._bbox_to_key(item.bbox), item.object
409 |
410 | @property
411 | def particles(self):
412 | return zip(self._samples, self._weights)
413 |
414 | @property
415 | def is_init(self):
416 | return self._init
417 |
418 | def insert(self, key, value):
419 | self._total_len += 1
420 | self._buffer.insert(self._total_len, self._key_to_bbox(key), value)
421 | self._window.append((key, value))
422 |
423 | if self._total_len % self._window_size == 0:
424 | self._samples.clear()
425 | self._weights.clear()
426 | if self._init or len(self._history):
427 | self._init = True
428 | for k, _ in self.progresses:
429 | self._samples.append(k)
430 | self._weights.append(self._compute_alp(k))
431 | weight_sum = np.sum(self._weights)
432 | if weight_sum != 0.:
433 | self._weights = (np.array(self._weights) / weight_sum).tolist()
434 |
435 | self._patch_density_merge(self._window)
436 | self._buffer = Rtree()
437 |
438 | def sample(
439 | self,
440 | random_gen: Union[np.random.Generator, np.random.RandomState],
441 | uniform_prob: float = 0.,
442 | normal_var: float = None,
443 | ):
444 | if not self._init or random_gen.random() < uniform_prob:
445 | return self._explore(random_gen)
446 | else:
447 | # importance sampling
448 | sample = random_gen.choice(self._samples, p=self._weights)
449 | return np.clip(
450 | random_gen.normal(sample, normal_var),
451 | self._min_key, self._max_key
452 | ) if normal_var else sample
453 |
454 | def _patch_density_merge(self, window):
455 | for k, v in window:
456 | neighbors = self._get_neighbors(self._history, k)
457 | if len(neighbors) > self._max_density:
458 | # delete the nearest particle
459 | obsolete: rtree.index.Item = min(
460 | neighbors, key=lambda x: self._key_dist(self._bbox_to_key(x.bbox), k)
461 | )
462 | self._history.delete(obsolete.id, obsolete.bbox)
463 | self._history.insert(next(self._history_idx), self._key_to_bbox(k), v)
464 |
465 | def _compute_alp(self, key) -> float:
466 | n_history = self._get_neighbor_objects(self._history, key)
467 | n_buffer = self._get_neighbor_objects(self._buffer, key)
468 | # TODO: Remove Duplicated items
469 | if len(n_history) > 3 and len(n_buffer) > 3:
470 | v_history = np.array(n_history).mean()
471 | v_buffer = np.array(n_buffer).mean()
472 | alp = max(abs(v_buffer - v_history), self._min_lp)
473 | else:
474 | alp = self._min_lp
475 | return alp
476 |
477 | def _get_neighbors(self, rtree_obj, key) -> List[rtree.index.Item]:
478 | neighbors = []
479 | bbox = self._key_to_bbox(key - self._nbh_size, key + self._nbh_size)
480 | for item in rtree_obj.intersection(bbox, objects=True):
481 | if item.id != -1:
482 | neighbors.append(item)
483 | return neighbors
484 |
485 | def _get_neighbor_objects(self, rtree_obj, key) -> List[float]:
486 | neighbors = []
487 | bbox = self._key_to_bbox(key - self._nbh_size, key + self._nbh_size)
488 | for item in rtree_obj.intersection(bbox, objects=True):
489 | if item.id != -1:
490 | neighbors.append(item.object)
491 | return neighbors
492 |
493 |
494 | class GradIS1D(GradIS):
495 | def __init__(
496 | self,
497 | min_key: Union[float, Tuple[float, ...]],
498 | max_key: Union[float, Tuple[float, ...]],
499 | max_len: int,
500 | grad_coef=1.0,
501 | max_weight=5.0,
502 | neighborhood_ratio=0.1,
503 | ):
504 | super().__init__(
505 | 1,
506 | min_key,
507 | max_key,
508 | max_len,
509 | grad_coef,
510 | max_weight,
511 | neighborhood_ratio,
512 | )
513 |
--------------------------------------------------------------------------------
/qdpgym/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/tests/__init__.py
--------------------------------------------------------------------------------
/qdpgym/tests/common.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from matplotlib import pyplot as plt
3 |
4 |
5 | def test_reward_reshape():
6 | x = np.arange(-2, 2, 0.01)
7 | plt.plot(x, np.tanh(x))
8 | plt.show()
9 |
--------------------------------------------------------------------------------
/qdpgym/tests/test_pybullet.py:
--------------------------------------------------------------------------------
1 | import itertools
2 | import time
3 |
4 | import numpy as np
5 | import pybullet as pyb
6 | import pybullet_data
7 |
8 | from qdpgym.sim.blt.env import QuadrupedEnv
9 | from qdpgym.sim.blt.hooks import ViewerHook
10 | from qdpgym.sim.blt.quadruped import Aliengo
11 | from qdpgym.sim.blt.terrain import Plain, Hills, Slopes, Steps, PlainHf
12 | from qdpgym.sim.task import NullTask
13 | from qdpgym.tasks.loct import LocomotionV0, LocomotionPMTG
14 | from qdpgym.utils import tf
15 |
16 |
17 | def test_robot():
18 | pyb.connect(pyb.GUI)
19 | pyb.setTimeStep(2e-3)
20 | pyb.setAdditionalSearchPath(pybullet_data.getDataPath())
21 | Plain().spawn(pyb)
22 | rob = Aliengo(500, 'pd')
23 | rs = np.random.RandomState()
24 | rob.spawn(pyb, rs)
25 | pyb.setGravity(0, 0, -9.8)
26 |
27 | for _ in range(1000):
28 | # with MfTimer() as t:
29 | pyb.stepSimulation()
30 | rob.update_observation(rs)
31 | rob.apply_command(rob.STANCE_CONFIG)
32 |
33 | pyb.disconnect()
34 |
35 | def test_robot_extent():
36 | pyb.connect(pyb.GUI)
37 | pyb.setTimeStep(2e-3)
38 | pyb.setAdditionalSearchPath(pybullet_data.getDataPath())
39 | Plain().spawn(pyb)
40 | rob = Aliengo(500, 'pd')
41 | rob.spawn_on_rack(pyb, np.random)
42 | pyb.setGravity(0, 0, -9.8)
43 |
44 | joint_pos = np.concatenate((
45 | rob.inverse_kinematics(0, (0.3, 0., 0.05)),
46 | rob.inverse_kinematics(0, (0.35, 0., 0.1)),
47 | rob.inverse_kinematics(0, (0.25, 0., 0.)),
48 | rob.inverse_kinematics(0, (-0.25, 0., 0.)),
49 | ))
50 |
51 | for _ in range(1000):
52 | pyb.stepSimulation()
53 | rob.update_observation(np.random)
54 |
55 | rob.apply_command(joint_pos)
56 | time.sleep(0.002)
57 |
58 | pyb.disconnect()
59 |
60 |
61 | def test_env():
62 | rob = Aliengo(500, 'pd')
63 | arena = Plain()
64 | task = NullTask()
65 | task.add_hook(ViewerHook())
66 | env = QuadrupedEnv(rob, arena, task)
67 | env.reset()
68 | for _ in range(1000):
69 | env.step(rob.STANCE_CONFIG)
70 |
71 |
72 | def test_tg():
73 | rob = Aliengo(500, 'actuator_net', True)
74 | arena = Plain()
75 | task = LocomotionPMTG()
76 | task.add_hook(ViewerHook())
77 | env = QuadrupedEnv(rob, arena, task)
78 | env.reset()
79 | for _ in range(1000):
80 | env.step(np.zeros(16))
81 |
82 |
83 | def test_replaceHeightfield():
84 | pyb.connect(pyb.GUI)
85 | pyb.setRealTimeSimulation(True)
86 | terrains = [PlainHf.default(), Hills.default(),
87 | Steps.default(), Slopes.default()]
88 | current = None
89 | for i in range(5):
90 | for terrain in terrains:
91 | if current is None:
92 | terrain.spawn(pyb)
93 | else:
94 | terrain.replace(pyb, current)
95 | current = terrain
96 | time.sleep(0.5)
97 | current.remove(pyb)
98 | time.sleep(1.0)
99 | pyb.disconnect()
100 |
101 |
102 | def test_terrainApi():
103 | pyb.connect(pyb.GUI)
104 | pyb.setRealTimeSimulation(True)
105 | terrain = Hills.make(20, 0.1, (0.5, 10), random_state=np.random)
106 | # terrain = Steps.make(20, 0.1, 1.0, 0.5, random_state=np.random)
107 | # terrain = Slopes.make(20, 0.1, np.pi / 6, 0.5)
108 | terrain.spawn(pyb)
109 |
110 | sphere_shape = pyb.createVisualShape(
111 | shapeType=pyb.GEOM_SPHERE, radius=0.01, rgbaColor=(0., 0.8, 0., 0.6)
112 | )
113 | ray_hit_shape = pyb.createVisualShape(
114 | shapeType=pyb.GEOM_SPHERE, radius=0.01, rgbaColor=(0.8, 0., 0., 0.6)
115 | )
116 | cylinder_shape = pyb.createVisualShape(
117 | shapeType=pyb.GEOM_CYLINDER, radius=0.005, length=0.11,
118 | rgbaColor=(0., 0, 0.8, 0.6)
119 | )
120 | box_shape = pyb.createVisualShape(
121 | shapeType=pyb.GEOM_BOX, halfExtents=(0.03, 0.03, 0.03),
122 | rgbaColor=(0.8, 0., 0., 0.6)
123 | )
124 |
125 | points, vectors, ray_hits = [], [], []
126 | box_id = -1
127 |
128 | for i in range(10):
129 | peak = terrain.get_peak((-1, 1), (-1, 1))
130 | if i == 0:
131 | box_id = pyb.createMultiBody(
132 | baseVisualShapeIndex=box_shape,
133 | basePosition=peak
134 | )
135 | else:
136 | pyb.resetBasePositionAndOrientation(
137 | box_id, peak, (0., 0., 0., 1.)
138 | )
139 | for idx, (x, y) in enumerate(
140 | itertools.product(np.linspace(-1, 1, 10), np.linspace(-1, 1, 10))
141 | ):
142 | h = terrain.get_height(x, y)
143 | vec_orn = tf.Quaternion.from_rotation(
144 | tf.Rotation.from_zaxis(terrain.get_normal(x, y))
145 | )
146 | ray_pos = pyb.rayTest((x, y, 2), (x, y, -1))[0][3]
147 | if i == 0:
148 | points.append(pyb.createMultiBody(
149 | baseVisualShapeIndex=sphere_shape,
150 | basePosition=(x, y, h)
151 | ))
152 | vectors.append(pyb.createMultiBody(
153 | baseVisualShapeIndex=cylinder_shape,
154 | basePosition=(x, y, h),
155 | baseOrientation=vec_orn
156 | ))
157 | ray_hits.append(pyb.createMultiBody(
158 | baseVisualShapeIndex=ray_hit_shape,
159 | basePosition=ray_pos
160 | ))
161 | else:
162 | pyb.resetBasePositionAndOrientation(
163 | points[idx], (x, y, h), (0., 0., 0., 1.)
164 | )
165 | pyb.resetBasePositionAndOrientation(
166 | ray_hits[idx], ray_pos, (0., 0., 0., 1.)
167 | )
168 | pyb.resetBasePositionAndOrientation(
169 | vectors[idx], (x, y, h), vec_orn
170 | )
171 | time.sleep(3)
172 | new = Hills.make(20, 0.1, (np.random.random(), 10), random_state=np.random)
173 | new.replace(pyb, terrain)
174 | terrain = new
175 |
176 | pyb.disconnect()
177 |
178 |
179 | def test_gym_env():
180 | rob = Aliengo(500, 'pd')
181 | arena = Plain()
182 | task = LocomotionV0()
183 | env = QuadrupedEnv(rob, arena, task)
184 | print(env.observation_space)
185 | print(env.action_space)
186 |
187 |
188 | if __name__ == '__main__':
189 | test_robot_extent()
--------------------------------------------------------------------------------
/qdpgym/thirdparty/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/qdpgym/thirdparty/__init__.py
--------------------------------------------------------------------------------
/qdpgym/thirdparty/gamepad/__init__.py:
--------------------------------------------------------------------------------
1 | # modified from https://github.com/piborg/Gamepad.git
2 |
--------------------------------------------------------------------------------
/qdpgym/thirdparty/gamepad/controllers.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | Standard gamepad mappings.
4 |
5 | Pulled in to gamepad.py directly.
6 | """
7 | import copy
8 |
9 | from .gamepad import Gamepad
10 |
11 | __all__ = ['PS4', 'XboxONE', 'Xbox']
12 | all_controllers = copy.copy(__all__)
13 | __all__.append(all_controllers)
14 |
15 |
16 | class PS4(Gamepad):
17 | fullName = 'PlayStation 4 controller'
18 |
19 | def __init__(self, joystickNumber=0):
20 | Gamepad.__init__(self, joystickNumber)
21 | self.axisNames = {
22 | 0: 'LAS -X',
23 | 1: 'LAS -Y',
24 | 2: 'LT',
25 | 3: 'RAS -X',
26 | 4: 'RAS -Y',
27 | 5: 'RT',
28 | 6: 'DPAD-X',
29 | 7: 'DPAD-Y'
30 | }
31 | self.buttonNames = {
32 | 0: 'A',
33 | 1: 'B',
34 | 2: 'X',
35 | 3: 'Y',
36 | 4: 'L1',
37 | 5: 'R1',
38 | 6: 'L2',
39 | 7: 'R2',
40 | 8: 'SHARE',
41 | 9: 'LAS',
42 | 10: 'RAS',
43 | 11: 'L3',
44 | 12: 'R3'
45 | }
46 | self._setupReverseMaps()
47 |
48 |
49 | class XboxONE(Gamepad):
50 | fullName = 'Xbox ONE controller'
51 |
52 | def __init__(self, joystickNumber=0):
53 | Gamepad.__init__(self, joystickNumber)
54 | self.axisNames = {
55 | 0: 'LAS -X', # Left Analog Stick Left/Right
56 | 1: 'LAS -Y', # Left Analog Stick Up/Down
57 | 2: 'RAS -X', # Right Analog Stick Left/Right
58 | 3: 'RAS -Y', # Right Analog Stick Up/Down
59 | 4: 'RT', # Right Trigger
60 | 5: 'LT', # Left Trigger
61 | 6: 'DPAD -X', # D-Pad Left/Right
62 | 7: 'DPAD -Y' # D-Pad Up/Down
63 | }
64 | self.buttonNames = {
65 | 0: 'A', # A Button
66 | 1: 'B', # B Button
67 | 3: 'X', # X Button
68 | 4: 'Y', # Y Button
69 | 6: 'LB', # Left Bumper
70 | 7: 'RB', # Right Bumper
71 | 11: 'START', # Hamburger Button
72 | 12: 'HOME', # XBOX Button
73 | 13: 'LAS', # Left Analog Stick button
74 | 14: 'RAS' # Right Analog Stick button
75 |
76 | }
77 | self._setupReverseMaps()
78 |
79 |
80 | Xbox = XboxONE
81 |
--------------------------------------------------------------------------------
/qdpgym/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .utils import *
2 |
--------------------------------------------------------------------------------
/qdpgym/utils/tf.py:
--------------------------------------------------------------------------------
1 | import math
2 | from typing import Tuple, Sequence, Union
3 |
4 | import numpy as np
5 | from scipy.spatial.transform import Rotation as scipyRotation
6 |
7 | __all__ = ['Rotation', 'Quaternion', 'Rpy', 'Odometry',
8 | 'vnorm', 'vunit', 'vcross', 'estimate_normal',
9 | 'get_rpy_rate_from_ang_vel']
10 |
11 |
12 | def vnorm(vec):
13 | return math.sqrt(sum(x ** 2 for x in vec))
14 |
15 |
16 | def vunit(x) -> np.ndarray:
17 | return np.asarray(x) / vnorm(x)
18 |
19 |
20 | def vcross(vec3_1, vec3_2) -> np.ndarray:
21 | """
22 | A much faster alternative for np.cross.
23 | """
24 | (x1, y1, z1), (x2, y2, z2) = vec3_1, vec3_2
25 | return np.array((y1 * z2 - y2 * z1, z1 * x2 - z2 * x1, x1 * y2 - x2 * y1))
26 |
27 |
28 | class Rotation(object):
29 | THRESHOLD = 1e-5
30 |
31 | @staticmethod
32 | def from_rpy(rpy): # zyx
33 | sr, sp, sy = np.sin(rpy)
34 | cr, cp, cy = np.cos(rpy)
35 | matrix = ((cy * cp, cy * sp * sr - sy * cr, cy * sp * cr + sy * sr),
36 | (sy * cp, sy * sp * sr + cy * cr, sy * sp * cr - cy * sr),
37 | (-sp, cp * sr, cp * cr))
38 | return np.array(matrix)
39 |
40 | @staticmethod
41 | def from_quaternion(q):
42 | return scipyRotation.from_quat(q).as_matrix()
43 |
44 | @staticmethod
45 | def from_zaxis(z):
46 | if z[2] == 1.0:
47 | ref = np.array((1., 0., 0.))
48 | else:
49 | ref = np.array((0., 0., 1.))
50 | x = vcross(z, ref)
51 | x /= vnorm(x)
52 | y = vcross(z, x)
53 | return np.array((x, y, z)).T
54 |
55 |
56 | class Quaternion(object):
57 | """
58 | q = [x y z w] = w + xi + yj + zk
59 | """
60 |
61 | @staticmethod
62 | def identity():
63 | return np.array((0., 0., 0., 1.))
64 |
65 | @staticmethod
66 | def from_rpy(rpy):
67 | rpy_2 = np.asarray(rpy) / 2
68 | (sr, sp, sy), (cr, cp, cy) = np.sin(rpy_2), np.cos(rpy_2)
69 | return np.array(((sr * cp * cy - cr * sp * sy,
70 | cr * sp * cy + sr * cp * sy,
71 | cr * cp * sy - sr * sp * cy,
72 | cr * cp * cy + sr * sp * sy)))
73 |
74 | @staticmethod
75 | def from_rotation(r):
76 | # scipy api is faster than python implementation
77 | return scipyRotation.from_matrix(r).as_quat()
78 |
79 | @staticmethod
80 | def inverse(q):
81 | x, y, z, w = q
82 | return np.array((-x, -y, -z, w))
83 |
84 | @staticmethod
85 | def from_wxyz(q):
86 | w, x, y, z = q
87 | return np.array((x, y, z, w))
88 |
89 | @staticmethod
90 | def to_wxyz(q):
91 | x, y, z, w = q
92 | return np.array((w, x, y, z))
93 |
94 |
95 | class Rpy(object):
96 | """ZYX intrinsic Euler Angles (roll, pitch, yaw)"""
97 |
98 | @staticmethod
99 | def from_quaternion(q):
100 | x, y, z, w = q
101 | return np.array((np.arctan2(2 * (w * x + y * z), 1 - 2 * (x * x + y * y)),
102 | np.arcsin(2 * (w * y - x * z)),
103 | np.arctan2(2 * (w * z + x * y), 1 - 2 * (y * y + z * z))))
104 |
105 | @classmethod
106 | def from_rotation(cls, r):
107 | return scipyRotation.from_matrix(r).as_euler('ZYX', degrees=False)[::-1]
108 |
109 |
110 | def get_rpy_rate_from_ang_vel(rpy, angular):
111 | r, p, y = rpy
112 | sp, cp = math.sin(p), math.cos(p)
113 | cr, tr = math.cos(r), math.tan(r)
114 | trans = np.array(((1, sp * tr, cp * tr),
115 | (0, cp, -sp),
116 | (0, sp / cr, cp / cr)))
117 | return np.dot(trans, angular)
118 |
119 |
120 | FLOAT3 = Tuple[float, float, float]
121 |
122 |
123 | def estimate_normal(points: Union[Sequence[FLOAT3], np.ndarray]):
124 | """
125 | Estimate the normal of terrain from a set of points.
126 | The terrain CANNOT be vertical.
127 | :param points: a set of terrain points
128 | :return: np.ndarray, the normal vector
129 | """
130 | X, Y, Z = np.array(points).T
131 | A = np.zeros((3, 3))
132 | A[0, :] = np.sum(X ** 2), X @ Y, np.sum(X)
133 | A[1, :] = A[0, 1], np.sum(Y ** 2), np.sum(Y)
134 | A[2, :] = A[0, 2], A[1, 2], len(X)
135 | b = np.array((X @ Z, Y @ Z, np.sum(Z)))
136 | a, b, _ = np.linalg.solve(A, b)
137 | return vunit((-a, -b, 1))
138 |
139 |
140 | ARR_ZERO3 = (0., 0., 0.)
141 | ARR_EYE3 = (ARR_ZERO3,) * 3
142 |
143 |
144 | class Odometry(object):
145 | """
146 | Homogeneous coordinate transformation defined by a rotation and a translation:
147 | ((R_3x3, t_1x3)
148 | (0_3x1, 1_1x1))
149 | """
150 |
151 | def __init__(self, rotation=ARR_EYE3, translation=ARR_ZERO3):
152 | self.rotation, self.translation = np.asarray(rotation), np.asarray(translation)
153 |
154 | def multiply(self, other):
155 | if isinstance(other, Odometry):
156 | return Odometry(self.rotation @ other.rotation,
157 | self.translation + self.rotation @ other.translation)
158 | if isinstance(other, np.ndarray):
159 | return self.rotation @ other + self.translation
160 | raise RuntimeError(f'Unsupported datatype `{type(other)}` for multiplying')
161 |
162 | def __matmul__(self, other):
163 | return self.multiply(other)
164 |
165 | def __imatmul__(self, other):
166 | self.rotation @= other.rotation
167 | self.translation += self.rotation @ other.translation
168 |
169 | def __repr__(self):
170 | return str(np.concatenate((self.rotation, np.expand_dims(self.translation, axis=1)), axis=1))
171 |
--------------------------------------------------------------------------------
/qdpgym/utils/utils.py:
--------------------------------------------------------------------------------
1 | import functools
2 | import logging
3 | import math
4 | import re
5 | import time
6 | from datetime import datetime
7 | from typing import Sequence, Callable, Optional, Union
8 |
9 | import numpy as np
10 | import yaml
11 |
12 | __all__ = ['make_part', 'Angle', 'replace_is', 'replace_eq', 'colored_str', 'log',
13 | 'MfTimer', 'get_padded', 'PadWrapper', 'get_timestamp', 'YamlLoader',
14 | 'print_return', 'Natural', 'plt_figure_to_numpy']
15 |
16 |
17 | class make_part(functools.partial):
18 | """
19 | Predefine some init parameters of a class without creating an instance.
20 | """
21 |
22 | def __getattr__(self, item):
23 | return getattr(self.func, item)
24 |
25 |
26 | PI, TAU = math.pi, math.tau
27 |
28 |
29 | class Angle(object):
30 | @staticmethod
31 | def norm(x):
32 | """normalize an angle to [-pi, pi)"""
33 | if isinstance(x, np.ndarray):
34 | return x - ((x + PI) / TAU).astype(int) * TAU
35 | if -PI <= x < PI:
36 | return x
37 | return x - int((x + PI) / TAU) * TAU
38 |
39 | @staticmethod
40 | def mean(lst: Sequence[float]):
41 | _sum = last = lst[0]
42 | for i, a in enumerate(lst):
43 | if i == 0:
44 | continue
45 | last = Angle.near(last, a)
46 | _sum += last
47 | return Angle.norm(_sum / len(lst))
48 |
49 | @staticmethod
50 | def near(x, y):
51 | if x > y + PI:
52 | y += TAU
53 | elif x < y - PI:
54 | y -= TAU
55 | return y
56 |
57 | @staticmethod
58 | def to_deg(rad):
59 | return rad / math.pi * 180
60 |
61 |
62 | def get_padded(queue: Sequence, idx):
63 | if idx < 0:
64 | if len(queue) >= -idx:
65 | return queue[idx]
66 | else:
67 | return queue[0]
68 | else:
69 | if len(queue) > idx:
70 | return queue[idx]
71 | else:
72 | return queue[-1]
73 |
74 |
75 | class PadWrapper(object):
76 | def __init__(self, seq: Sequence):
77 | self.seq = seq
78 | self.is_empty = not self.seq
79 |
80 | def __getitem__(self, item):
81 | if self.is_empty:
82 | raise IndexError('Empty Sequence')
83 | else:
84 | return get_padded(self.seq, item)
85 |
86 | def __len__(self):
87 | return len(self.seq)
88 |
89 | def __repr__(self):
90 | return f'PadWrapper[{self.seq}]'
91 |
92 |
93 | def replace_is(seq: list, src, dst):
94 | for idx, item in enumerate(seq):
95 | if item is src:
96 | seq[idx] = dst
97 |
98 |
99 | def replace_eq(seq: list, src, dst):
100 | for idx, item in enumerate(seq):
101 | if item == src:
102 | seq[idx] = dst
103 |
104 |
105 | class MfTimer(object):
106 | """
107 | Multifunctional Timer. Typical usage example:
108 |
109 | with MfTimer() as timer:
110 | do_something()
111 | print(timer.time_spent)
112 | """
113 |
114 | @classmethod
115 | def start_now(cls):
116 | timer = MfTimer()
117 | timer.start()
118 | return timer
119 |
120 | @classmethod
121 | def record(cls, func: Callable, *args, **kwargs):
122 | start_time = time.time()
123 | func(*args, **kwargs)
124 | end_time = time.time()
125 | return end_time - start_time
126 |
127 | def __init__(self):
128 | self._start_time = self._end_time = None
129 |
130 | def start(self):
131 | self._start_time = time.time()
132 |
133 | def __enter__(self):
134 | self._start_time = time.time()
135 | return self
136 |
137 | def end(self, verbose=False):
138 | self._end_time = time.time()
139 | if verbose:
140 | print(self.time_spent)
141 | return self.time_spent
142 |
143 | def __exit__(self, exc_type, exc_val, exc_tb):
144 | self._end_time = time.time()
145 |
146 | @property
147 | def time_spent(self):
148 | return self._end_time - self._start_time
149 |
150 |
151 | BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8)
152 | RESET_SEQ = "\033[0m"
153 | COLOR_SEQ = "\033[1;%dm"
154 |
155 |
156 | def colored_str(content: str, color: str):
157 | try:
158 | color = eval(color.upper())
159 | except NameError:
160 | raise RuntimeError(f'No color named {color}')
161 | return COLOR_SEQ % (30 + color) + content + RESET_SEQ
162 |
163 |
164 | class ColoredFormatter(logging.Formatter):
165 | COLORS = {
166 | 'WARNING': YELLOW,
167 | 'INFO': WHITE,
168 | 'DEBUG': BLUE,
169 | 'ERROR': RED,
170 | 'CRITICAL': MAGENTA,
171 | }
172 |
173 | def __init__(self, *args, **kwargs):
174 | super().__init__(*args, **kwargs)
175 | self.use_color = True
176 |
177 | def format(self, record):
178 | lvl = record.levelname
179 | message = str(record.msg)
180 | funcName = record.funcName
181 | if lvl in self.COLORS:
182 | lvl_colored = COLOR_SEQ % (30 + self.COLORS[lvl]) + lvl + RESET_SEQ
183 | msg_colored = COLOR_SEQ % (30 + self.COLORS[lvl]) + message + RESET_SEQ
184 | funcName_colored = COLOR_SEQ % (30 + self.COLORS[lvl]) + funcName + RESET_SEQ
185 | record.levelname = lvl_colored
186 | record.msg = msg_colored
187 | record.funcName = funcName_colored
188 | return super().format(record)
189 |
190 |
191 | class _Log(object):
192 | logger: Optional[logging.Logger] = None
193 |
194 | @classmethod
195 | def init_logger(cls, name='logger',
196 | log_level: int = logging.INFO,
197 | log_fmt='[%(asctime)s] %(message)s',
198 | date_fmt='%b%d %H:%M:%S',
199 | log_dir=None,
200 | client_ip='127.0.0.1'):
201 | np.set_printoptions(3, linewidth=10000, suppress=True)
202 | # torch.set_printoptions(linewidth=10000, profile='short')
203 | cls.logger = logging.getLogger(name)
204 | cls.logger.setLevel(log_level)
205 | # if log_dir:
206 | # os.makedirs(log_dir)
207 | # fh = logging.FileHandler(os.path.join(log_dir, 'log.txt'))
208 | # fh.setLevel(logging.DEBUG)
209 | # cls.logger.addHandler(fh)
210 | # soh = SocketHandler(client_ip, 19996)
211 | # soh = SocketHandler('10.12.120.120', 19996)
212 | # soh.setFormatter(logging.Formatter())
213 | # cls.logger.addHandler(soh)
214 | formatter = ColoredFormatter(log_fmt, date_fmt)
215 | sh = logging.StreamHandler()
216 | sh.setLevel(log_level)
217 | sh.setFormatter(formatter)
218 | cls.logger.addHandler(sh)
219 | cls.logger.propagate = False
220 |
221 | @classmethod
222 | def set_logger_level(cls, log_level: str):
223 | level_dict = {'DEBUG': logging.DEBUG,
224 | 'INFO': logging.INFO,
225 | 'WARNING': logging.WARNING,
226 | 'ERROR': logging.ERROR,
227 | 'CRITICAL': logging.CRITICAL}
228 |
229 | cls.logger.handlers[-1].setLevel(level_dict[log_level.upper()])
230 |
231 | @classmethod
232 | def debug(cls, *args, **kwargs):
233 | if cls.logger is None:
234 | cls.init_logger(log_level=logging.DEBUG)
235 | return cls.logger.debug(*args, **kwargs)
236 |
237 | @classmethod
238 | def info(cls, *args, **kwargs):
239 | if cls.logger is None:
240 | cls.init_logger(log_level=logging.DEBUG)
241 | return cls.logger.info(*args, **kwargs)
242 |
243 | @classmethod
244 | def warn(cls, *args, **kwargs):
245 | if cls.logger is None:
246 | cls.init_logger(log_level=logging.DEBUG)
247 | return cls.logger.warning(*args, **kwargs)
248 |
249 | @classmethod
250 | def error(cls, *args, **kwargs):
251 | if cls.logger is None:
252 | cls.init_logger(log_level=logging.DEBUG)
253 | return cls.logger.error(*args, **kwargs)
254 |
255 | @classmethod
256 | def critical(cls, *args, **kwargs):
257 | if cls.logger is None:
258 | cls.init_logger(log_level=logging.DEBUG)
259 | return cls.logger.critical(*args, **kwargs)
260 |
261 |
262 | log = _Log
263 |
264 |
265 | def get_timestamp(timestamp: Union[int, float] = None) -> str:
266 | datetime_ = datetime.fromtimestamp(timestamp) if timestamp else datetime.now()
267 | return datetime_.strftime('%y-%m-%d_%H-%M-%S')
268 |
269 |
270 | class _NamespaceWrapper(object):
271 | def __init__(self, data: dict):
272 | self._data = data
273 |
274 | def __getattr__(self, item):
275 | try:
276 | value = self._data[item]
277 | except KeyError:
278 | value = getattr(self._data, item)
279 | if isinstance(value, dict):
280 | return _NamespaceWrapper(value)
281 | else:
282 | return value
283 |
284 | def __setattr__(self, key, value):
285 | if key.startswith('_'):
286 | super().__setattr__(key, value)
287 | else:
288 | self._data[key] = value
289 |
290 | def __getitem__(self, item):
291 | return self._data[item]
292 |
293 | def __setitem__(self, key, value):
294 | self._data[key] = value
295 |
296 | def __repr__(self):
297 | return f'Wrapper({self._data})'
298 |
299 |
300 | class YamlLoader(_NamespaceWrapper):
301 | def __init__(self, path):
302 | loader = yaml.SafeLoader
303 | # See https://stackoverflow.com/questions/30458977/yaml-loads-5e-6-as-string-and-not-a-number
304 | loader.add_implicit_resolver(
305 | u'tag:yaml.org,2002:float',
306 | re.compile(u'''^(?:
307 | [-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)?
308 | |[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+)
309 | |\\.[0-9_]+(?:[eE][-+][0-9]+)?
310 | |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*
311 | |[-+]?\\.(?:inf|Inf|INF)
312 | |\\.(?:nan|NaN|NAN))$''', re.X),
313 | list(u'-+0123456789.'))
314 |
315 | with open(path, encoding='utf-8') as f:
316 | super().__init__(yaml.load(f, Loader=loader))
317 |
318 | @property
319 | def data(self):
320 | return self._data
321 |
322 | @property
323 | def args(self):
324 | return ()
325 |
326 | @property
327 | def kwargs(self):
328 | return {}
329 |
330 |
331 | def print_return(func):
332 | def wrapper(*args, **kwargs):
333 | ret = func(*args, **kwargs)
334 | print(ret)
335 | return ret
336 |
337 | return wrapper
338 |
339 |
340 | class Natural(object):
341 | idx = -1
342 |
343 | def __iter__(self):
344 | return self
345 |
346 | def __next__(self):
347 | self.idx += 1
348 | return self.idx
349 |
350 |
351 | def plt_figure_to_numpy(fig):
352 | fig.canvas.draw()
353 | arr = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
354 | arr = arr.reshape(fig.canvas.get_width_height()[::-1] + (3,))
355 | return arr
356 |
--------------------------------------------------------------------------------
/resources/lin.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/resources/lin.gif
--------------------------------------------------------------------------------
/resources/rot.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengruiz/QuadrupedRL/689d83fe84ed513aaa3e228628ae6d2bea7f054e/resources/rot.gif
--------------------------------------------------------------------------------