├── .gitignore
├── image
└── figure.png
├── model
├── modules
│ ├── __init__.py
│ ├── base_nn.py
│ └── actor.py
├── config.yaml
├── utils
│ └── color.py
├── storage.py
├── main.py
└── agent.py
├── noise_generator
├── CMakeLists.txt
├── main.cpp
└── TrainManager.hpp
└── README.md
/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | __pycache__/
3 | .idea/*
4 | cmake-*/*
--------------------------------------------------------------------------------
/image/figure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awesomericky/quadruped-robot-belief-encoder/HEAD/image/figure.png
--------------------------------------------------------------------------------
/model/modules/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | sys.path.append(os.path.dirname(os.path.realpath(__file__)))
--------------------------------------------------------------------------------
/noise_generator/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.10)
2 | project(noise_example)
3 |
4 | set(CMAKE_CXX_STANDARD 17)
5 |
6 | set(EXEC_NAME ${PROJECT_NAME})
7 | add_executable(${EXEC_NAME} main.cpp)
8 | target_include_directories(${EXEC_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
--------------------------------------------------------------------------------
/model/config.yaml:
--------------------------------------------------------------------------------
1 | student_model:
2 | policy:
3 | MLP:
4 | extero_encoder:
5 | shape: [ 80, 60 ]
6 | activation: leakyrelu
7 | output: 24
8 | base_net:
9 | shape: [ 256, 160, 128 ]
10 | activation: leakyrelu
11 |
12 | belief_encoder:
13 | GRU:
14 | recurrent_encoder:
15 | hidden: 50
16 | num_layers: 2
17 | batch_first: False
18 | dropout: 0.
19 | MLP:
20 | attention_encoder:
21 | shape: [ 64, 64 ]
22 | activation: leakyrelu
23 | state_encoder:
24 | shape: [ 64, 64 ]
25 | activation: leakyrelu
26 | output: 24
27 |
28 | belief_decoder:
29 | MLP:
30 | attention_encoder:
31 | shape: [64, 64]
32 | activation: leakyrelu
33 | extero_decoder:
34 | shape: [64, 64]
35 | activation: leakyrelu
--------------------------------------------------------------------------------
/model/utils/color.py:
--------------------------------------------------------------------------------
1 | class bcolors:
2 | HEADER = '\033[95m'
3 | OKBLUE = '\033[94m'
4 | OKCYAN = '\033[96m'
5 | OKGREEN = '\033[92m'
6 | WARNING = '\033[93m'
7 | FAIL = '\033[91m'
8 | ENDC = '\033[0m'
9 | BOLD = '\033[1m'
10 | UNDERLINE = '\033[4m'
11 |
12 | def cprint(str, bold=False, underline=False, color=''):
13 | color_encoding = ''
14 | if color == 'blue':
15 | color_encoding = bcolors.OKBLUE
16 | elif color == 'cyan':
17 | color_encoding = bcolors.OKCYAN
18 | elif color == "orange":
19 | color_encoding = bcolors.WARNING
20 | elif color == 'red':
21 | color_encoding = bcolors.FAIL
22 |
23 | bold_encoding = bcolors.BOLD if bold else ''
24 | underline_encoding = bcolors.UNDERLINE if underline else ''
25 | print(bold_encoding + underline_encoding + color_encoding + str + bcolors.ENDC)
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Quadruped robot belief encoder
2 | Implementation of some core elements of "Learning robust perceptive locomotion for quadrupedal robots in the wild" [[Paper](https://www.science.org/doi/10.1126/scirobotics.abk2822)]
3 |
4 |
5 |
6 | ## Description
7 | This repository includes implementation of two elements.
8 | 1. Student policy network
9 | 2. Heightmap noise generator
10 |
11 | Student policy network is composed of **belief encoder** and **belief decoder** to appropriately fuse both proprioceptive and exteroceptive sensor data. It is implemented in *Python*.
12 | Privilege information decoder, included in the paper, is excluded because they were not that critical in our experiement.
13 |
14 | Heightmap noise generator is composed of **three noise models** to handle errors available in real-world use cases due to depth camera noise, state estimation error/drift etc.
15 | It is implemented in *C++* because the [Raisim](https://raisim.com/) simulator that we are actively using implements environments in *C++* for fast simulation.
16 |
17 | ## Dependencies
18 | - numpy
19 | - pytorch
20 | - ruamel.yaml
21 |
22 | ## Run example
23 | 1. Student policy network
24 | ```
25 | cd model
26 | python main.py
27 | ```
28 |
29 | 2. Heightmap noise generator
30 | ```
31 | cd noise_generator
32 | mkdir build && cd build
33 | cmake ..
34 | make
35 | # After build is finished
36 | ./noise_example
37 | ```
38 |
39 | ## Contributor
40 | - [Yunho Kim](https://github.com/awesomericky)
41 | - [Jinhyeok Choi](https://github.com/Triangle2022)
42 |
--------------------------------------------------------------------------------
/model/storage.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 | class VisionRolloutBuffer:
5 | def __init__(self, args):
6 | self.proprio_dim = args.proprio_obs_dim
7 | self.extero_dim = args.extero_obs_dim
8 | self.action_dim = args.action_dim
9 | self.n_envs = args.n_envs
10 | self.n_steps = args.n_steps
11 | self.n_steps_per_env = int(self.n_steps/self.n_envs)
12 | self.device = args.device
13 |
14 | self.cnt = 0
15 | self.proprio_states = np.zeros((self.n_steps_per_env, self.n_envs, self.proprio_dim), dtype=np.float32)
16 | self.noisy_extero_states = np.zeros((self.n_steps_per_env, self.n_envs, self.extero_dim), dtype=np.float32)
17 | self.extero_states = np.zeros((self.n_steps_per_env, self.n_envs, self.extero_dim), dtype=np.float32)
18 | self.actions = np.zeros((self.n_steps_per_env, self.n_envs, self.action_dim), dtype=np.float32)
19 |
20 | def addTransition(self, proprio_states, noisy_extero_states, extero_states, actions):
21 | """
22 | :param proprio_states: proprioceptive sensor data [numpy.float32]
23 | :param noisy_extero_states: noisy exteroceptive sensor data [numpy.float32]
24 | :param extero_states: (teacher) exteroceptive sensor data [numpy.float32]
25 | :param actions: (teacher) action [numpy.float32]
26 | :return:
27 | """
28 | assert self.cnt < self.n_steps_per_env
29 | self.proprio_states[self.cnt] = proprio_states
30 | self.noisy_extero_states[self.cnt] = noisy_extero_states
31 | self.extero_states[self.cnt] = extero_states
32 | self.actions[self.cnt] = actions
33 | self.cnt += 1
34 |
35 | def getBatches(self):
36 | """
37 | :return: (L, N, D)
38 | """
39 | self.cnt = 0
40 | proprio_states_tensor = torch.from_numpy(self.proprio_states).to(self.device)
41 | noisy_extero_states_tensor = torch.from_numpy(self.noisy_extero_states).to(self.device)
42 | extero_states_tensor = torch.from_numpy(self.extero_states).to(self.device)
43 | actions_tensor = torch.from_numpy(self.actions).to(self.device)
44 | return proprio_states_tensor, noisy_extero_states_tensor, extero_states_tensor, actions_tensor
45 |
--------------------------------------------------------------------------------
/noise_generator/main.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include "TrainManager.hpp"
5 |
6 | namespace raisim {
7 |
8 | extern "C"
9 | int main(int argc, char * argv[]) {
10 | static std::mt19937 gen;
11 | static std::uniform_real_distribution uniDist(0., 1.);
12 | static std::normal_distribution normDist(0., 1.);
13 |
14 | HeightNoiseGenerator heightNoiseGenerator;
15 | HeightNoiseGenerator::Noise heightNoise;
16 |
17 | // initialize container of the true height scan data
18 | const int nFoots = 4;
19 | const int nScansPerFoot = 10;
20 | std::array, nFoots> heightScan;
21 | std::array, nFoots> heightScanNoisy;
22 | for (int i = 0; i < nFoots; i++)
23 | for (int j = 0; j < nScansPerFoot; j++)
24 | heightScan[i][j] = 0;
25 |
26 | // call when the episode starts
27 | heightNoiseGenerator.sampleNoiseType(gen, uniDist);
28 | heightNoiseGenerator.sampleNoise(
29 | heightNoise, HeightNoiseGenerator::SampleType::INIT, gen, uniDist, normDist);
30 |
31 | // simulate
32 | int nSteps = 400;
33 | for (int t = 0; t < nSteps; t++) {
34 | for (int i = 0; i < nFoots; i++) {
35 | // call when the foot changes
36 | heightNoiseGenerator.sampleNoise(
37 | heightNoise, HeightNoiseGenerator::SampleType::FOOT_CHANGE, gen, uniDist, normDist);
38 |
39 | for (int j = 0; j < nScansPerFoot; j++) {
40 | // call for every height scan points
41 | heightNoiseGenerator.sampleNoise(
42 | heightNoise, HeightNoiseGenerator::SampleType::POINT_CHANGE, gen, uniDist, normDist);
43 |
44 | double xOffset = heightNoise.x;
45 | double yOffset = heightNoise.y;
46 | double zOffset = heightNoise.z;
47 |
48 | /// Read height scan with x, y, z offset from the true value as below.
49 | /// heightScanNoisy[i][j] = heightmap->getHeight(default_x_ij + xOffset, default_y_ij + yOffset) + zOffset
50 | /// However, in this code, there is no heightmap generated.
51 | /// Thus, only zOffset is added to the default height value to show the example use case.
52 | heightScanNoisy[i][j] = heightScan[i][j] + zOffset;
53 |
54 | std::cout << xOffset << " " << yOffset << " " << zOffset << "\n";
55 | }
56 | }
57 | }
58 |
59 | return 0;
60 | }
61 |
62 | }
--------------------------------------------------------------------------------
/model/modules/base_nn.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 |
3 |
4 | class BaseNet(nn.Module):
5 | def __init__(self, model_config):
6 | super(BaseNet, self).__init__()
7 | self.model_config = model_config
8 | self.activation_map = {"relu": nn.ReLU, "tanh": nn.Tanh, "leakyrelu": nn.LeakyReLU, "gelu": nn.GELU}
9 |
10 | used_model_archs = self.model_config.keys()
11 |
12 | if "LSTM" in used_model_archs:
13 | for model_name, model_arch in self.model_config["LSTM"].items():
14 | self.add_module(model_name, nn.LSTM(
15 | input_size=model_arch["input"],
16 | hidden_size=model_arch["hidden"],
17 | num_layers=model_arch["num_layers"],
18 | batch_first=model_arch["batch_first"],
19 | dropout=model_arch["dropout"]
20 | ))
21 |
22 | if "GRU" in used_model_archs:
23 | for model_name, model_arch in self.model_config["GRU"].items():
24 | self.add_module(model_name, nn.GRU(
25 | input_size=model_arch["input"],
26 | hidden_size=model_arch["hidden"],
27 | num_layers=model_arch["num_layers"],
28 | batch_first=model_arch["batch_first"],
29 | dropout=model_arch["dropout"]
30 | ))
31 |
32 | if "MLP" in used_model_archs:
33 | for model_name, model_arch in self.model_config["MLP"].items():
34 | assert model_arch["activation"] in list(self.activation_map.keys()), "Unavailable activation."
35 | self.add_module(model_name, MLP(
36 | input_size=model_arch["input"],
37 | output_size=model_arch["output"],
38 | shape=model_arch["shape"],
39 | activation=self.activation_map[model_arch["activation"]],
40 | dropout=model_arch["dropout"] if "dropout" in model_arch.keys() else 0.,
41 | batchnorm=model_arch["batchnorm"] if "batchnorm" in model_arch.keys() else False
42 | ))
43 |
44 | if "Linear" in used_model_archs:
45 | for model_name, model_arch in self.model_config["Linear"].items():
46 | self.add_module(model_name, nn.Linear(
47 | in_features=model_arch["input"],
48 | out_features=model_arch["output"]
49 | ))
50 |
51 |
52 | class MLP(nn.Module):
53 | def __init__(self, input_size, output_size, shape, activation, dropout=0.0, batchnorm=False):
54 | super(MLP, self).__init__()
55 | self.activation_fn = activation
56 |
57 | modules = [nn.Linear(input_size, shape[0]), self.activation_fn()]
58 |
59 | for idx in range(len(shape)-1):
60 | modules.append(nn.Linear(shape[idx], shape[idx+1]))
61 | if batchnorm:
62 | modules.append(nn.BatchNorm1d(shape[idx+1]))
63 | modules.append(self.activation_fn())
64 | if dropout != 0.0:
65 | modules.append(nn.Dropout(dropout))
66 |
67 | modules.append(nn.Linear(shape[-1], output_size))
68 | self.architecture = nn.Sequential(*modules)
69 |
70 | self.input_shape = [input_size]
71 | self.output_shape = [output_size]
72 |
73 | def forward(self, input):
74 | return self.architecture(input)
--------------------------------------------------------------------------------
/model/main.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from ruamel.yaml import YAML
3 | import numpy as np
4 | import torch
5 |
6 | from agent import VisionStudentAgent
7 |
8 |
9 | class Environment:
10 | """
11 | Change to the environment you are using
12 | """
13 | def __init__(self, args):
14 | self.obs_dim = args.proprio_obs_dim + args.extero_obs_dim
15 | self.action_dim = args.action_dim
16 | self.n_envs = args.n_envs
17 |
18 | def observe(self):
19 | observations = np.random.normal(size=(self.n_envs, self.obs_dim)).astype(np.float32)
20 | return observations
21 |
22 | def observe_noisy(self):
23 | noisy_observations = np.random.normal(size=(self.n_envs, self.obs_dim)).astype(np.float32)
24 | return noisy_observations
25 |
26 | def step(self, action):
27 | rewards = np.random.normal(size=self.n_envs).astype(np.float32)
28 | dones = np.zeros(shape=self.n_envs).astype(np.bool_)
29 | return rewards, dones
30 |
31 | class TeacherAgent:
32 | """
33 | Change to the teacher agent you are using
34 | """
35 | def __init__(self, args):
36 | self.action_dim = args.action_dim
37 | self.n_envs = args.n_envs
38 |
39 | def getAction(self, observations):
40 | actions = np.random.normal(size=(self.n_envs, self.action_dim)).astype(np.float32)
41 | return actions
42 |
43 | def getParser():
44 | parser = argparse.ArgumentParser(description='RL')
45 | parser.add_argument('--name', type=str, default='example')
46 | parser.add_argument('--device', type=str, default='cuda', help='gpu or cpu.')
47 | parser.add_argument('--save_dir', type=str, default='example', help='directory name to save weights')
48 | return parser
49 |
50 | if __name__ == "__main__":
51 | parser = getParser()
52 | args = parser.parse_args()
53 |
54 | # parameters to be set from the environment you are running
55 | args.student_model_num = 0
56 | args.student_lr = 3e-4
57 | args.student_epochs = 1
58 | args.max_grad_norm = 1.
59 | args.student_policy_type = "vision_recurrent"
60 | args.n_envs = 100
61 | args.n_steps = 40000
62 | args.n_steps_per_env = int(args.n_steps / args.n_envs)
63 |
64 | args.proprio_obs_dim = 10 #100
65 | args.extero_obs_dim = 20 #200
66 | args.action_dim = 12
67 |
68 | # config
69 | cfg = YAML().load(open("config.yaml", 'r'))
70 |
71 | # define teacher agent (pretrained)
72 | teacher = TeacherAgent(args)
73 |
74 | # define student agent
75 | student = VisionStudentAgent(args, cfg["student_model"])
76 | hidden_state_tensor = None
77 |
78 | # define environment
79 | env = Environment(args)
80 |
81 | max_update = 10
82 |
83 | for update in range(max_update):
84 | for _ in range(args.n_steps_per_env):
85 | obs = env.observe()
86 | noisy_obs = env.observe_noisy()
87 |
88 | proprio_obs = obs[:, :args.proprio_obs_dim]
89 | extero_obs = obs[:, -args.extero_obs_dim:]
90 | noisy_extero_obs = noisy_obs[:, -args.extero_obs_dim:]
91 | proprio_obs_tensor = torch.from_numpy(proprio_obs).to(args.device)
92 | noisy_extero_obs_tensor = torch.from_numpy(noisy_extero_obs).to(args.device)
93 |
94 | with torch.no_grad():
95 | # get student action
96 | actions_tensor, hidden_state_tensor = student.getAction(
97 | proprio_state=proprio_obs_tensor,
98 | extero_state=noisy_extero_obs_tensor,
99 | hidden_state=hidden_state_tensor
100 | )
101 |
102 | # get teacher action
103 | teacher_actions = teacher.getAction(obs)
104 |
105 | actions = actions_tensor.detach().cpu().numpy()
106 | rewards, dones = env.step(actions)
107 |
108 | # add data the buffer
109 | student.step(proprio_obs, noisy_extero_obs, extero_obs, teacher_actions)
110 |
111 | # train model
112 | loss, reconstruction_loss, action_loss = student.train()
113 |
114 | # save model
115 | if update % 5 == 0:
116 | student.save(update)
117 |
118 | print('----------------------------------------------------')
119 | print('{:>6}th iteration'.format(update))
120 | print('{:<40} {:>6}'.format("total loss: ", '{:6.4f}'.format(loss)))
121 | print('{:<40} {:>6}'.format("reconstruction loss: ", '{:6.4f}'.format(reconstruction_loss)))
122 | print('{:<40} {:>6}'.format("action loss: ", '{:6.4f}'.format(action_loss)))
123 | print('----------------------------------------------------\n')
124 |
125 |
--------------------------------------------------------------------------------
/model/agent.py:
--------------------------------------------------------------------------------
1 | from torch.optim import Adam
2 | import torch
3 | import os
4 |
5 | from modules.actor import RecurrentAttentionPolicy
6 | from storage import VisionRolloutBuffer
7 | from utils.color import cprint
8 |
9 | policy_modules = {"vision_recurrent": RecurrentAttentionPolicy}
10 | storage_modules = {"vision_recurrent": VisionRolloutBuffer}
11 |
12 |
13 | class VisionStudentAgent:
14 | def __init__(self, args, model_cfg):
15 | # base
16 | self.device = args.device
17 | self.name = args.name
18 | self.model_num = args.student_model_num
19 | self.checkpoint_dir = f'{args.save_dir}/student_checkpoint'
20 |
21 | # for regression
22 | self.student_lr = args.student_lr
23 | self.student_epochs = args.student_epochs
24 | self.max_grad_norm = args.max_grad_norm
25 |
26 | # for models
27 | assert args.student_policy_type in policy_modules.keys()
28 | Policy = policy_modules[args.student_policy_type]
29 | self.actor = Policy(args, model_cfg).to(self.device)
30 | self.actor_optimizer = Adam(self.actor.parameters(), lr=self.student_lr)
31 |
32 | # for data storage
33 | Storage = storage_modules[args.student_policy_type]
34 | self.rollout_buffer = Storage(args)
35 |
36 | self.epoch = self.load()
37 |
38 | def getAction(self, proprio_state, extero_state, hidden_state):
39 | """
40 | :param proprio_state: [(N, H)]
41 | :param extero_state: [(N, H)]
42 | :param hidden_state: [(N_layer, N, H)]
43 | :return:
44 | action: [(N, H)]
45 | next_hidden_state: [(N_layer, N, H)]
46 | """
47 | return self.actor.getAction(proprio_state, extero_state, hidden_state)
48 |
49 | def step(self, proprio_state, noisy_extero_state, extero_state, action):
50 | self.rollout_buffer.addTransition(proprio_state, noisy_extero_state, extero_state, action)
51 |
52 | def train(self):
53 | proprio_states_tensor, noisy_extero_states_tensor, extero_states_tensor, actions_tensor \
54 | = self.rollout_buffer.getBatches()
55 |
56 | total_loss = 0
57 | total_reconstruction_loss = 0
58 | total_action_loss = 0
59 |
60 | for _ in range(self.student_epochs):
61 | # forward pass
62 | output = self.actor(proprio_states_tensor, noisy_extero_states_tensor)
63 | student_action = output["action"]
64 | estimated_extero_state = output["estimated_extero_state"]
65 |
66 | # compute loss
67 | reconstruction_loss = torch.mean(torch.pow(estimated_extero_state - extero_states_tensor, 2))
68 | action_loss = torch.mean(torch.pow(student_action - actions_tensor, 2))
69 | loss = 0.5 * reconstruction_loss + action_loss
70 |
71 | # optimize
72 | self.actor_optimizer.zero_grad()
73 | loss.backward()
74 | torch.nn.utils.clip_grad_norm_(self.actor.parameters(), self.max_grad_norm)
75 | self.actor_optimizer.step()
76 |
77 | # logging
78 | total_loss += loss.item()
79 | total_reconstruction_loss += reconstruction_loss.item()
80 | total_action_loss += action_loss.item()
81 |
82 | total_loss /= self.student_epochs
83 | total_reconstruction_loss /= self.student_epochs
84 | total_action_loss /= self.student_epochs
85 |
86 | return total_loss, total_reconstruction_loss, total_action_loss
87 |
88 | def load_exteroceptive_encoder(self, checkpoint):
89 | """
90 | :param checkpoint: teacher model torch checkpoint (dict) (cf: Only "actor"!)
91 | """
92 | loaded_checkpoint = dict()
93 | for k, v in checkpoint.items():
94 | if k.split('.')[0] == "extero_encoder":
95 | loaded_checkpoint['.'.join(k.split('.')[1:])] = v
96 |
97 | if len(loaded_checkpoint.keys()) != 0:
98 | self.actor.extero_encoder.load_state_dict(loaded_checkpoint)
99 | cprint("Exteroceptive encoder load success", bold=True, color="blue")
100 | else:
101 | cprint("Exteroceptive encoder load fail", bold=True, color="blue")
102 |
103 | def save(self, model_name):
104 | save_dict = {
105 | 'actor': self.actor.state_dict(),
106 | 'actor_optimizer': self.actor_optimizer.state_dict()
107 | }
108 | torch.save(save_dict, f"{self.checkpoint_dir}/full_{model_name}.pt")
109 | cprint(f'[{self.name} - full_{model_name}.pt] save success.', bold=True, color="blue")
110 |
111 | def load(self):
112 | if not os.path.isdir(self.checkpoint_dir):
113 | os.makedirs(self.checkpoint_dir)
114 | checkpoint_file = f"{self.checkpoint_dir}/full_{self.model_num}.pt"
115 |
116 | if os.path.isfile(checkpoint_file):
117 | checkpoint = torch.load(checkpoint_file)
118 | self.actor.load_state_dict(checkpoint['actor'])
119 | self.actor_optimizer.load_state_dict(checkpoint['actor_optimizer'])
120 | cprint(f'[{self.name} - full_{self.model_num}.pt] load success.', bold=True, color="blue")
121 | return int(self.model_num)
122 | else:
123 | cprint(f'[{self.name} - full_{self.model_num}.pt] load fail.', bold=True, color="red")
124 | return 0
125 |
--------------------------------------------------------------------------------
/noise_generator/TrainManager.hpp:
--------------------------------------------------------------------------------
1 | #ifndef TRAIN_MANAGER_HPP
2 | #define TRAIN_MANAGER_HPP
3 |
4 | namespace raisim {
5 |
6 | class HeightNoiseGenerator {
7 | public:
8 | enum class NoiseType : int {
9 | NOMINAL = 0,
10 | OFFSET,
11 | NOISY
12 | };
13 |
14 | // INIT (episode start) --> FOOT_CHANGE (foot change) --> POINT_CHANGE (point change)
15 | enum class SampleType : int {
16 | INIT = 0,
17 | FOOT_CHANGE,
18 | POINT_CHANGE
19 | };
20 |
21 | struct Noise {
22 | double x;
23 | double y;
24 | double z;
25 | };
26 |
27 | struct NoiseParam {
28 | Noise pointAndTime; // sampled for each point every time step
29 | Noise footAndTime; // sampled for each foot every time step
30 | Noise foot; // sampled for each foot at the beginning of the episode
31 | double zOutlier;
32 | double constantOffsetProb;
33 | double zOutlierProb;
34 | };
35 |
36 | HeightNoiseGenerator() = default;
37 |
38 | ~HeightNoiseGenerator() = default;
39 |
40 | void sampleNoiseType(std::mt19937 &gen,
41 | std::uniform_real_distribution &uniDist) {
42 | double val = uniDist(gen);
43 |
44 | if (val < 0.6)
45 | noiseType = NoiseType::NOMINAL;
46 | else if (val < 0.6 + 0.3)
47 | noiseType = NoiseType::OFFSET;
48 | else
49 | noiseType = NoiseType::NOISY;
50 |
51 | switch (noiseType) {
52 | case NoiseType::NOMINAL:
53 | defaultNoiseParam.pointAndTime.x = 0.004;
54 | defaultNoiseParam.pointAndTime.y = 0.004;
55 | defaultNoiseParam.pointAndTime.z = 0.005;
56 | defaultNoiseParam.footAndTime.x = 0.01;
57 | defaultNoiseParam.footAndTime.y = 0.01;
58 | defaultNoiseParam.footAndTime.z = 0.04;
59 | defaultNoiseParam.zOutlierProb = 0.02;
60 | defaultNoiseParam.zOutlier = 0.03;
61 | defaultNoiseParam.constantOffsetProb = 0.05;
62 | defaultNoiseParam.foot.x = 0.1;
63 | defaultNoiseParam.foot.y = 0.1;
64 | defaultNoiseParam.foot.z = 0.1;
65 | break;
66 | case NoiseType::OFFSET:
67 | defaultNoiseParam.pointAndTime.x = 0.004;
68 | defaultNoiseParam.pointAndTime.y = 0.004;
69 | defaultNoiseParam.pointAndTime.z = 0.005;
70 | defaultNoiseParam.footAndTime.x = 0.01;
71 | defaultNoiseParam.footAndTime.y = 0.01;
72 | defaultNoiseParam.footAndTime.z = 0.1;
73 | defaultNoiseParam.zOutlierProb = 0.02;
74 | defaultNoiseParam.zOutlier = 0.1;
75 | defaultNoiseParam.constantOffsetProb = 0.02;
76 | defaultNoiseParam.foot.x = 0.1;
77 | defaultNoiseParam.foot.y = 0.1;
78 | defaultNoiseParam.foot.z = 0.1;
79 | break;
80 | case NoiseType::NOISY:
81 | defaultNoiseParam.pointAndTime.x = 0.004;
82 | defaultNoiseParam.pointAndTime.y = 0.004;
83 | defaultNoiseParam.pointAndTime.z = 0.1;
84 | defaultNoiseParam.footAndTime.x = 0.1;
85 | defaultNoiseParam.footAndTime.y = 0.1;
86 | defaultNoiseParam.footAndTime.z = 0.3;
87 | defaultNoiseParam.zOutlierProb = 0.05;
88 | defaultNoiseParam.zOutlier = 0.3;
89 | defaultNoiseParam.constantOffsetProb = 0.3;
90 | defaultNoiseParam.foot.x = 0.1;
91 | defaultNoiseParam.foot.y = 0.1;
92 | defaultNoiseParam.foot.z = 0.1;
93 | break;
94 | }
95 | }
96 |
97 | void sampleNoise(Noise &noise,
98 | const SampleType &sampleType,
99 | std::mt19937 &gen,
100 | std::uniform_real_distribution &uniDist,
101 | std::normal_distribution &normDist) {
102 | switch (sampleType) {
103 | case SampleType::INIT:
104 | if (uniDist(gen) < defaultNoiseParam.constantOffsetProb) {
105 | sampledNoiseParam.foot.x = defaultNoiseParam.foot.x * normDist(gen);
106 | sampledNoiseParam.foot.y = defaultNoiseParam.foot.y * normDist(gen);
107 | sampledNoiseParam.foot.z = defaultNoiseParam.foot.z * normDist(gen);
108 | } else {
109 | sampledNoiseParam.foot.x = 0.;
110 | sampledNoiseParam.foot.y = 0.;
111 | sampledNoiseParam.foot.z = 0.;
112 | }
113 | return;
114 | case SampleType::FOOT_CHANGE:
115 | sampledNoiseParam.footAndTime.x = defaultNoiseParam.footAndTime.x * normDist(gen);
116 | sampledNoiseParam.footAndTime.y = defaultNoiseParam.footAndTime.y * normDist(gen);
117 | sampledNoiseParam.footAndTime.z = defaultNoiseParam.footAndTime.z * normDist(gen);
118 | return;
119 | case SampleType::POINT_CHANGE:
120 | sampledNoiseParam.pointAndTime.x = defaultNoiseParam.pointAndTime.x * normDist(gen);
121 | sampledNoiseParam.pointAndTime.y = defaultNoiseParam.pointAndTime.y * normDist(gen);
122 | sampledNoiseParam.pointAndTime.z = defaultNoiseParam.pointAndTime.z * normDist(gen);
123 | if (uniDist(gen) < defaultNoiseParam.zOutlierProb)
124 | sampledNoiseParam.zOutlier = defaultNoiseParam.zOutlier * normDist(gen);
125 | else
126 | sampledNoiseParam.zOutlier = 0.;
127 | break;
128 | }
129 |
130 | noise.x = sampledNoiseParam.foot.x + sampledNoiseParam.footAndTime.x + sampledNoiseParam.pointAndTime.x;
131 | noise.y = sampledNoiseParam.foot.y + sampledNoiseParam.footAndTime.y + sampledNoiseParam.pointAndTime.y;
132 | noise.z = sampledNoiseParam.foot.z + sampledNoiseParam.footAndTime.z +
133 | sampledNoiseParam.pointAndTime.z + sampledNoiseParam.zOutlier;
134 | }
135 |
136 | private:
137 | NoiseType noiseType;
138 | NoiseParam defaultNoiseParam, sampledNoiseParam;
139 | };
140 |
141 | } // namespace raisim
142 |
143 | #endif // TRAIN_MANAGER_HPP
--------------------------------------------------------------------------------
/model/modules/actor.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch
3 |
4 | from base_nn import BaseNet
5 |
6 |
7 | class RecurrentAttentionPolicy(BaseNet):
8 | class BeliefEncoder(BaseNet):
9 | def __init__(self, model_cfg):
10 | super().__init__(model_config=model_cfg)
11 |
12 | def forward(self, proprio_state, encoded_extero_state, hidden_state=None):
13 | """
14 | :param proprio_state: proprioceptive sensor data [(L, H) / (L. N, H)]
15 | :param encoded_extero_state: encoded exteroceptive sensor data [(L, H) / (L. N, H)]
16 | :param hidden_state: hidden state of the recurrent layer [(N_layer, H) / (N_layer, N, H)]
17 | :return:
18 | recurrent_output: output state of the recurrent layer [(L, H) / (L. N, H)]
19 | recurrent_hidden: next hidden state of the recurrent layer [(N_layer, H) / (N_layer, N, H)]
20 | belief_state: next belief state of the belief encoder [(L, H) / (L. N, H)]
21 | """
22 | output = dict()
23 |
24 | fused_state = torch.cat((proprio_state, encoded_extero_state), dim=-1) #[(L, H) / (L. N, H)]
25 | if hidden_state is None:
26 | output["recurrent_output"], output["recurrent_hidden"] = self.recurrent_encoder(fused_state) # (L. N, H)
27 | else:
28 | output["recurrent_output"], output["recurrent_hidden"] = self.recurrent_encoder(fused_state, hidden_state) # (L. N, H)
29 |
30 | tensor_shape = torch.Size(torch.ones_like(torch.tensor(output["recurrent_output"].shape)[:-1])) + (4,)
31 | output["belief_state"] = \
32 | torch.tile(self.state_encoder(output["recurrent_output"]), dims=tensor_shape) + \
33 | nn.functional.sigmoid(self.attention_encoder(output["recurrent_output"])) * encoded_extero_state
34 | return output
35 |
36 | class BeliefDecoder(BaseNet):
37 | def __int__(self, model_cfg):
38 | super().__int__(model_config=model_cfg)
39 |
40 | def forward(self, extero_state, hidden_state):
41 | """
42 | :param extero_state: exteroceptive sensor data [(L, H) / (L. N, H)]
43 | :param hidden_state: output state of the recurrent layer [(L, H) / (L. N, H)]
44 | (cf: In GRU, last output state is same as the hidden state)
45 | :return:
46 | estimated_extero_state: estimated exteroceptive sensor data [(L, H) / (L. N, H)]
47 | """
48 | estimated_extero_state = \
49 | self.extero_decoder(hidden_state) + \
50 | nn.functional.sigmoid(self.attention_encoder(hidden_state)) * extero_state
51 | return estimated_extero_state
52 |
53 | def __init__(self, args, model_cfg):
54 | self.proprio_dim = args.proprio_obs_dim
55 | self.extero_dim = args.extero_obs_dim
56 | self.action_dim = args.action_dim
57 |
58 | self.device = args.device
59 | self.args = args
60 | self.model_cfg = model_cfg
61 | self.adapt_model()
62 |
63 | super(RecurrentAttentionPolicy, self).__init__(model_config=model_cfg["policy"])
64 | self.belief_encoder = self.BeliefEncoder(model_cfg["belief_encoder"])
65 | self.belief_decoder = self.BeliefDecoder(model_cfg["belief_decoder"])
66 |
67 | def adapt_model(self):
68 | assert self.extero_dim % 4 == 0
69 | self.model_cfg["policy"]["MLP"]["extero_encoder"]["input"] = self.extero_dim // 4
70 | self.model_cfg["policy"]["MLP"]["base_net"]["input"] = \
71 | self.proprio_dim + self.model_cfg["policy"]["MLP"]["extero_encoder"]["output"] * 4
72 | self.model_cfg["policy"]["MLP"]["base_net"]["output"] = self.action_dim
73 |
74 | self.model_cfg["belief_encoder"]["GRU"]["recurrent_encoder"]["input"] = \
75 | self.proprio_dim + self.model_cfg["policy"]["MLP"]["extero_encoder"]["output"] * 4
76 |
77 | self.model_cfg["belief_encoder"]["MLP"]["attention_encoder"]["input"] \
78 | = self.model_cfg["belief_encoder"]["GRU"]["recurrent_encoder"]["hidden"]
79 | self.model_cfg["belief_encoder"]["MLP"]["attention_encoder"]["output"] \
80 | = self.model_cfg["policy"]["MLP"]["extero_encoder"]["output"] * 4
81 | self.model_cfg["belief_encoder"]["MLP"]["state_encoder"]["input"] \
82 | = self.model_cfg["belief_encoder"]["GRU"]["recurrent_encoder"]["hidden"]
83 |
84 | self.model_cfg["belief_decoder"]["MLP"]["attention_encoder"]["input"] \
85 | = self.model_cfg["belief_encoder"]["GRU"]["recurrent_encoder"]["hidden"]
86 | self.model_cfg["belief_decoder"]["MLP"]["attention_encoder"]["output"] \
87 | = self.extero_dim
88 | self.model_cfg["belief_decoder"]["MLP"]["extero_decoder"]["input"] \
89 | = self.model_cfg["belief_encoder"]["GRU"]["recurrent_encoder"]["hidden"]
90 | self.model_cfg["belief_decoder"]["MLP"]["extero_decoder"]["output"] \
91 | = self.extero_dim
92 |
93 | def forward(self, proprio_state, extero_state, hidden_state=None, use_decoder=True):
94 | """
95 | :param proprio_state: proprioceptive sensor data [(L, H) / (L. N, H)]
96 | :param extero_state: exteroceptive sensor data [(L, H) / (L. N, H)]
97 | :param hidden_state: hidden state of the recurrent layer in the belief encoder [(N_layer, H) / (N_layer, N, H)]
98 | :param use_decoder: use belief decoder to estimate exteroceptive data or not
99 | :return:
100 | action: [(L, H) / (L. N, H)]
101 | recurrent_hidden: [(N_layer, H) / (N_layer, N, H)]
102 | estimated_extero_state: estimated exteroceptive sensor data [(L, H) / (L. N, H)]
103 | """
104 | output = dict()
105 |
106 | length_and_batch = proprio_state.shape[:-1]
107 | encoded_extero_state = extero_state.view(*length_and_batch, 4, int(self.extero_dim / 4))
108 | encoded_extero_state = self.extero_encoder(encoded_extero_state).view(*length_and_batch, -1)
109 |
110 | belief_encoder_output = self.belief_encoder(proprio_state, encoded_extero_state, hidden_state)
111 | fused_state = torch.cat((proprio_state, belief_encoder_output["belief_state"]), dim=-1)
112 | output["action"] = self.base_net(fused_state)
113 | output["recurrent_hidden"] = belief_encoder_output["recurrent_hidden"]
114 |
115 | if use_decoder:
116 | output["estimated_extero_state"] = \
117 | self.belief_decoder(extero_state, belief_encoder_output["recurrent_output"])
118 | return output
119 |
120 | def getAction(self, proprio_state, extero_state, hidden_state):
121 | """
122 | :param proprio_state: [(N, H)]
123 | :param extero_state: [(N, H)]
124 | :param hidden_state: [(N_layer, N, H)]
125 | :return:
126 | action: [(N, H)]
127 | hidden_state: [(N_layer, N, H)]
128 | """
129 | assert len(proprio_state.shape) == 2 and proprio_state.shape[0] == self.args.n_envs
130 | proprio_state = proprio_state.unsqueeze(0)
131 | extero_state = extero_state.unsqueeze(0)
132 | output = self.forward(proprio_state, extero_state, hidden_state, use_decoder=False)
133 | action = output["action"].squeeze(0)
134 | next_hidden_state = output["recurrent_hidden"].squeeze(0)
135 | return action, next_hidden_state
--------------------------------------------------------------------------------