├── .gitignore ├── image └── figure.png ├── model ├── modules │ ├── __init__.py │ ├── base_nn.py │ └── actor.py ├── config.yaml ├── utils │ └── color.py ├── storage.py ├── main.py └── agent.py ├── noise_generator ├── CMakeLists.txt ├── main.cpp └── TrainManager.hpp └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | __pycache__/ 3 | .idea/* 4 | cmake-*/* -------------------------------------------------------------------------------- /image/figure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awesomericky/quadruped-robot-belief-encoder/HEAD/image/figure.png -------------------------------------------------------------------------------- /model/modules/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.path.dirname(os.path.realpath(__file__))) -------------------------------------------------------------------------------- /noise_generator/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | project(noise_example) 3 | 4 | set(CMAKE_CXX_STANDARD 17) 5 | 6 | set(EXEC_NAME ${PROJECT_NAME}) 7 | add_executable(${EXEC_NAME} main.cpp) 8 | target_include_directories(${EXEC_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) -------------------------------------------------------------------------------- /model/config.yaml: -------------------------------------------------------------------------------- 1 | student_model: 2 | policy: 3 | MLP: 4 | extero_encoder: 5 | shape: [ 80, 60 ] 6 | activation: leakyrelu 7 | output: 24 8 | base_net: 9 | shape: [ 256, 160, 128 ] 10 | activation: leakyrelu 11 | 12 | belief_encoder: 13 | GRU: 14 | recurrent_encoder: 15 | hidden: 50 16 | num_layers: 2 17 | batch_first: False 18 | dropout: 0. 19 | MLP: 20 | attention_encoder: 21 | shape: [ 64, 64 ] 22 | activation: leakyrelu 23 | state_encoder: 24 | shape: [ 64, 64 ] 25 | activation: leakyrelu 26 | output: 24 27 | 28 | belief_decoder: 29 | MLP: 30 | attention_encoder: 31 | shape: [64, 64] 32 | activation: leakyrelu 33 | extero_decoder: 34 | shape: [64, 64] 35 | activation: leakyrelu -------------------------------------------------------------------------------- /model/utils/color.py: -------------------------------------------------------------------------------- 1 | class bcolors: 2 | HEADER = '\033[95m' 3 | OKBLUE = '\033[94m' 4 | OKCYAN = '\033[96m' 5 | OKGREEN = '\033[92m' 6 | WARNING = '\033[93m' 7 | FAIL = '\033[91m' 8 | ENDC = '\033[0m' 9 | BOLD = '\033[1m' 10 | UNDERLINE = '\033[4m' 11 | 12 | def cprint(str, bold=False, underline=False, color=''): 13 | color_encoding = '' 14 | if color == 'blue': 15 | color_encoding = bcolors.OKBLUE 16 | elif color == 'cyan': 17 | color_encoding = bcolors.OKCYAN 18 | elif color == "orange": 19 | color_encoding = bcolors.WARNING 20 | elif color == 'red': 21 | color_encoding = bcolors.FAIL 22 | 23 | bold_encoding = bcolors.BOLD if bold else '' 24 | underline_encoding = bcolors.UNDERLINE if underline else '' 25 | print(bold_encoding + underline_encoding + color_encoding + str + bcolors.ENDC) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Quadruped robot belief encoder 2 | Implementation of some core elements of "Learning robust perceptive locomotion for quadrupedal robots in the wild" [[Paper](https://www.science.org/doi/10.1126/scirobotics.abk2822)] 3 | 4 | 5 | 6 | ## Description 7 | This repository includes implementation of two elements. 8 | 1. Student policy network 9 | 2. Heightmap noise generator 10 | 11 | Student policy network is composed of **belief encoder** and **belief decoder** to appropriately fuse both proprioceptive and exteroceptive sensor data. It is implemented in *Python*. 12 | Privilege information decoder, included in the paper, is excluded because they were not that critical in our experiement. 13 | 14 | Heightmap noise generator is composed of **three noise models** to handle errors available in real-world use cases due to depth camera noise, state estimation error/drift etc. 15 | It is implemented in *C++* because the [Raisim](https://raisim.com/) simulator that we are actively using implements environments in *C++* for fast simulation. 16 | 17 | ## Dependencies 18 | - numpy 19 | - pytorch 20 | - ruamel.yaml 21 | 22 | ## Run example 23 | 1. Student policy network 24 | ``` 25 | cd model 26 | python main.py 27 | ``` 28 | 29 | 2. Heightmap noise generator 30 | ``` 31 | cd noise_generator 32 | mkdir build && cd build 33 | cmake .. 34 | make 35 | # After build is finished 36 | ./noise_example 37 | ``` 38 | 39 | ## Contributor 40 | - [Yunho Kim](https://github.com/awesomericky) 41 | - [Jinhyeok Choi](https://github.com/Triangle2022) 42 | -------------------------------------------------------------------------------- /model/storage.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | class VisionRolloutBuffer: 5 | def __init__(self, args): 6 | self.proprio_dim = args.proprio_obs_dim 7 | self.extero_dim = args.extero_obs_dim 8 | self.action_dim = args.action_dim 9 | self.n_envs = args.n_envs 10 | self.n_steps = args.n_steps 11 | self.n_steps_per_env = int(self.n_steps/self.n_envs) 12 | self.device = args.device 13 | 14 | self.cnt = 0 15 | self.proprio_states = np.zeros((self.n_steps_per_env, self.n_envs, self.proprio_dim), dtype=np.float32) 16 | self.noisy_extero_states = np.zeros((self.n_steps_per_env, self.n_envs, self.extero_dim), dtype=np.float32) 17 | self.extero_states = np.zeros((self.n_steps_per_env, self.n_envs, self.extero_dim), dtype=np.float32) 18 | self.actions = np.zeros((self.n_steps_per_env, self.n_envs, self.action_dim), dtype=np.float32) 19 | 20 | def addTransition(self, proprio_states, noisy_extero_states, extero_states, actions): 21 | """ 22 | :param proprio_states: proprioceptive sensor data [numpy.float32] 23 | :param noisy_extero_states: noisy exteroceptive sensor data [numpy.float32] 24 | :param extero_states: (teacher) exteroceptive sensor data [numpy.float32] 25 | :param actions: (teacher) action [numpy.float32] 26 | :return: 27 | """ 28 | assert self.cnt < self.n_steps_per_env 29 | self.proprio_states[self.cnt] = proprio_states 30 | self.noisy_extero_states[self.cnt] = noisy_extero_states 31 | self.extero_states[self.cnt] = extero_states 32 | self.actions[self.cnt] = actions 33 | self.cnt += 1 34 | 35 | def getBatches(self): 36 | """ 37 | :return: (L, N, D) 38 | """ 39 | self.cnt = 0 40 | proprio_states_tensor = torch.from_numpy(self.proprio_states).to(self.device) 41 | noisy_extero_states_tensor = torch.from_numpy(self.noisy_extero_states).to(self.device) 42 | extero_states_tensor = torch.from_numpy(self.extero_states).to(self.device) 43 | actions_tensor = torch.from_numpy(self.actions).to(self.device) 44 | return proprio_states_tensor, noisy_extero_states_tensor, extero_states_tensor, actions_tensor 45 | -------------------------------------------------------------------------------- /noise_generator/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "TrainManager.hpp" 5 | 6 | namespace raisim { 7 | 8 | extern "C" 9 | int main(int argc, char * argv[]) { 10 | static std::mt19937 gen; 11 | static std::uniform_real_distribution uniDist(0., 1.); 12 | static std::normal_distribution normDist(0., 1.); 13 | 14 | HeightNoiseGenerator heightNoiseGenerator; 15 | HeightNoiseGenerator::Noise heightNoise; 16 | 17 | // initialize container of the true height scan data 18 | const int nFoots = 4; 19 | const int nScansPerFoot = 10; 20 | std::array, nFoots> heightScan; 21 | std::array, nFoots> heightScanNoisy; 22 | for (int i = 0; i < nFoots; i++) 23 | for (int j = 0; j < nScansPerFoot; j++) 24 | heightScan[i][j] = 0; 25 | 26 | // call when the episode starts 27 | heightNoiseGenerator.sampleNoiseType(gen, uniDist); 28 | heightNoiseGenerator.sampleNoise( 29 | heightNoise, HeightNoiseGenerator::SampleType::INIT, gen, uniDist, normDist); 30 | 31 | // simulate 32 | int nSteps = 400; 33 | for (int t = 0; t < nSteps; t++) { 34 | for (int i = 0; i < nFoots; i++) { 35 | // call when the foot changes 36 | heightNoiseGenerator.sampleNoise( 37 | heightNoise, HeightNoiseGenerator::SampleType::FOOT_CHANGE, gen, uniDist, normDist); 38 | 39 | for (int j = 0; j < nScansPerFoot; j++) { 40 | // call for every height scan points 41 | heightNoiseGenerator.sampleNoise( 42 | heightNoise, HeightNoiseGenerator::SampleType::POINT_CHANGE, gen, uniDist, normDist); 43 | 44 | double xOffset = heightNoise.x; 45 | double yOffset = heightNoise.y; 46 | double zOffset = heightNoise.z; 47 | 48 | /// Read height scan with x, y, z offset from the true value as below. 49 | /// heightScanNoisy[i][j] = heightmap->getHeight(default_x_ij + xOffset, default_y_ij + yOffset) + zOffset 50 | /// However, in this code, there is no heightmap generated. 51 | /// Thus, only zOffset is added to the default height value to show the example use case. 52 | heightScanNoisy[i][j] = heightScan[i][j] + zOffset; 53 | 54 | std::cout << xOffset << " " << yOffset << " " << zOffset << "\n"; 55 | } 56 | } 57 | } 58 | 59 | return 0; 60 | } 61 | 62 | } -------------------------------------------------------------------------------- /model/modules/base_nn.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class BaseNet(nn.Module): 5 | def __init__(self, model_config): 6 | super(BaseNet, self).__init__() 7 | self.model_config = model_config 8 | self.activation_map = {"relu": nn.ReLU, "tanh": nn.Tanh, "leakyrelu": nn.LeakyReLU, "gelu": nn.GELU} 9 | 10 | used_model_archs = self.model_config.keys() 11 | 12 | if "LSTM" in used_model_archs: 13 | for model_name, model_arch in self.model_config["LSTM"].items(): 14 | self.add_module(model_name, nn.LSTM( 15 | input_size=model_arch["input"], 16 | hidden_size=model_arch["hidden"], 17 | num_layers=model_arch["num_layers"], 18 | batch_first=model_arch["batch_first"], 19 | dropout=model_arch["dropout"] 20 | )) 21 | 22 | if "GRU" in used_model_archs: 23 | for model_name, model_arch in self.model_config["GRU"].items(): 24 | self.add_module(model_name, nn.GRU( 25 | input_size=model_arch["input"], 26 | hidden_size=model_arch["hidden"], 27 | num_layers=model_arch["num_layers"], 28 | batch_first=model_arch["batch_first"], 29 | dropout=model_arch["dropout"] 30 | )) 31 | 32 | if "MLP" in used_model_archs: 33 | for model_name, model_arch in self.model_config["MLP"].items(): 34 | assert model_arch["activation"] in list(self.activation_map.keys()), "Unavailable activation." 35 | self.add_module(model_name, MLP( 36 | input_size=model_arch["input"], 37 | output_size=model_arch["output"], 38 | shape=model_arch["shape"], 39 | activation=self.activation_map[model_arch["activation"]], 40 | dropout=model_arch["dropout"] if "dropout" in model_arch.keys() else 0., 41 | batchnorm=model_arch["batchnorm"] if "batchnorm" in model_arch.keys() else False 42 | )) 43 | 44 | if "Linear" in used_model_archs: 45 | for model_name, model_arch in self.model_config["Linear"].items(): 46 | self.add_module(model_name, nn.Linear( 47 | in_features=model_arch["input"], 48 | out_features=model_arch["output"] 49 | )) 50 | 51 | 52 | class MLP(nn.Module): 53 | def __init__(self, input_size, output_size, shape, activation, dropout=0.0, batchnorm=False): 54 | super(MLP, self).__init__() 55 | self.activation_fn = activation 56 | 57 | modules = [nn.Linear(input_size, shape[0]), self.activation_fn()] 58 | 59 | for idx in range(len(shape)-1): 60 | modules.append(nn.Linear(shape[idx], shape[idx+1])) 61 | if batchnorm: 62 | modules.append(nn.BatchNorm1d(shape[idx+1])) 63 | modules.append(self.activation_fn()) 64 | if dropout != 0.0: 65 | modules.append(nn.Dropout(dropout)) 66 | 67 | modules.append(nn.Linear(shape[-1], output_size)) 68 | self.architecture = nn.Sequential(*modules) 69 | 70 | self.input_shape = [input_size] 71 | self.output_shape = [output_size] 72 | 73 | def forward(self, input): 74 | return self.architecture(input) -------------------------------------------------------------------------------- /model/main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from ruamel.yaml import YAML 3 | import numpy as np 4 | import torch 5 | 6 | from agent import VisionStudentAgent 7 | 8 | 9 | class Environment: 10 | """ 11 | Change to the environment you are using 12 | """ 13 | def __init__(self, args): 14 | self.obs_dim = args.proprio_obs_dim + args.extero_obs_dim 15 | self.action_dim = args.action_dim 16 | self.n_envs = args.n_envs 17 | 18 | def observe(self): 19 | observations = np.random.normal(size=(self.n_envs, self.obs_dim)).astype(np.float32) 20 | return observations 21 | 22 | def observe_noisy(self): 23 | noisy_observations = np.random.normal(size=(self.n_envs, self.obs_dim)).astype(np.float32) 24 | return noisy_observations 25 | 26 | def step(self, action): 27 | rewards = np.random.normal(size=self.n_envs).astype(np.float32) 28 | dones = np.zeros(shape=self.n_envs).astype(np.bool_) 29 | return rewards, dones 30 | 31 | class TeacherAgent: 32 | """ 33 | Change to the teacher agent you are using 34 | """ 35 | def __init__(self, args): 36 | self.action_dim = args.action_dim 37 | self.n_envs = args.n_envs 38 | 39 | def getAction(self, observations): 40 | actions = np.random.normal(size=(self.n_envs, self.action_dim)).astype(np.float32) 41 | return actions 42 | 43 | def getParser(): 44 | parser = argparse.ArgumentParser(description='RL') 45 | parser.add_argument('--name', type=str, default='example') 46 | parser.add_argument('--device', type=str, default='cuda', help='gpu or cpu.') 47 | parser.add_argument('--save_dir', type=str, default='example', help='directory name to save weights') 48 | return parser 49 | 50 | if __name__ == "__main__": 51 | parser = getParser() 52 | args = parser.parse_args() 53 | 54 | # parameters to be set from the environment you are running 55 | args.student_model_num = 0 56 | args.student_lr = 3e-4 57 | args.student_epochs = 1 58 | args.max_grad_norm = 1. 59 | args.student_policy_type = "vision_recurrent" 60 | args.n_envs = 100 61 | args.n_steps = 40000 62 | args.n_steps_per_env = int(args.n_steps / args.n_envs) 63 | 64 | args.proprio_obs_dim = 10 #100 65 | args.extero_obs_dim = 20 #200 66 | args.action_dim = 12 67 | 68 | # config 69 | cfg = YAML().load(open("config.yaml", 'r')) 70 | 71 | # define teacher agent (pretrained) 72 | teacher = TeacherAgent(args) 73 | 74 | # define student agent 75 | student = VisionStudentAgent(args, cfg["student_model"]) 76 | hidden_state_tensor = None 77 | 78 | # define environment 79 | env = Environment(args) 80 | 81 | max_update = 10 82 | 83 | for update in range(max_update): 84 | for _ in range(args.n_steps_per_env): 85 | obs = env.observe() 86 | noisy_obs = env.observe_noisy() 87 | 88 | proprio_obs = obs[:, :args.proprio_obs_dim] 89 | extero_obs = obs[:, -args.extero_obs_dim:] 90 | noisy_extero_obs = noisy_obs[:, -args.extero_obs_dim:] 91 | proprio_obs_tensor = torch.from_numpy(proprio_obs).to(args.device) 92 | noisy_extero_obs_tensor = torch.from_numpy(noisy_extero_obs).to(args.device) 93 | 94 | with torch.no_grad(): 95 | # get student action 96 | actions_tensor, hidden_state_tensor = student.getAction( 97 | proprio_state=proprio_obs_tensor, 98 | extero_state=noisy_extero_obs_tensor, 99 | hidden_state=hidden_state_tensor 100 | ) 101 | 102 | # get teacher action 103 | teacher_actions = teacher.getAction(obs) 104 | 105 | actions = actions_tensor.detach().cpu().numpy() 106 | rewards, dones = env.step(actions) 107 | 108 | # add data the buffer 109 | student.step(proprio_obs, noisy_extero_obs, extero_obs, teacher_actions) 110 | 111 | # train model 112 | loss, reconstruction_loss, action_loss = student.train() 113 | 114 | # save model 115 | if update % 5 == 0: 116 | student.save(update) 117 | 118 | print('----------------------------------------------------') 119 | print('{:>6}th iteration'.format(update)) 120 | print('{:<40} {:>6}'.format("total loss: ", '{:6.4f}'.format(loss))) 121 | print('{:<40} {:>6}'.format("reconstruction loss: ", '{:6.4f}'.format(reconstruction_loss))) 122 | print('{:<40} {:>6}'.format("action loss: ", '{:6.4f}'.format(action_loss))) 123 | print('----------------------------------------------------\n') 124 | 125 | -------------------------------------------------------------------------------- /model/agent.py: -------------------------------------------------------------------------------- 1 | from torch.optim import Adam 2 | import torch 3 | import os 4 | 5 | from modules.actor import RecurrentAttentionPolicy 6 | from storage import VisionRolloutBuffer 7 | from utils.color import cprint 8 | 9 | policy_modules = {"vision_recurrent": RecurrentAttentionPolicy} 10 | storage_modules = {"vision_recurrent": VisionRolloutBuffer} 11 | 12 | 13 | class VisionStudentAgent: 14 | def __init__(self, args, model_cfg): 15 | # base 16 | self.device = args.device 17 | self.name = args.name 18 | self.model_num = args.student_model_num 19 | self.checkpoint_dir = f'{args.save_dir}/student_checkpoint' 20 | 21 | # for regression 22 | self.student_lr = args.student_lr 23 | self.student_epochs = args.student_epochs 24 | self.max_grad_norm = args.max_grad_norm 25 | 26 | # for models 27 | assert args.student_policy_type in policy_modules.keys() 28 | Policy = policy_modules[args.student_policy_type] 29 | self.actor = Policy(args, model_cfg).to(self.device) 30 | self.actor_optimizer = Adam(self.actor.parameters(), lr=self.student_lr) 31 | 32 | # for data storage 33 | Storage = storage_modules[args.student_policy_type] 34 | self.rollout_buffer = Storage(args) 35 | 36 | self.epoch = self.load() 37 | 38 | def getAction(self, proprio_state, extero_state, hidden_state): 39 | """ 40 | :param proprio_state: [(N, H)] 41 | :param extero_state: [(N, H)] 42 | :param hidden_state: [(N_layer, N, H)] 43 | :return: 44 | action: [(N, H)] 45 | next_hidden_state: [(N_layer, N, H)] 46 | """ 47 | return self.actor.getAction(proprio_state, extero_state, hidden_state) 48 | 49 | def step(self, proprio_state, noisy_extero_state, extero_state, action): 50 | self.rollout_buffer.addTransition(proprio_state, noisy_extero_state, extero_state, action) 51 | 52 | def train(self): 53 | proprio_states_tensor, noisy_extero_states_tensor, extero_states_tensor, actions_tensor \ 54 | = self.rollout_buffer.getBatches() 55 | 56 | total_loss = 0 57 | total_reconstruction_loss = 0 58 | total_action_loss = 0 59 | 60 | for _ in range(self.student_epochs): 61 | # forward pass 62 | output = self.actor(proprio_states_tensor, noisy_extero_states_tensor) 63 | student_action = output["action"] 64 | estimated_extero_state = output["estimated_extero_state"] 65 | 66 | # compute loss 67 | reconstruction_loss = torch.mean(torch.pow(estimated_extero_state - extero_states_tensor, 2)) 68 | action_loss = torch.mean(torch.pow(student_action - actions_tensor, 2)) 69 | loss = 0.5 * reconstruction_loss + action_loss 70 | 71 | # optimize 72 | self.actor_optimizer.zero_grad() 73 | loss.backward() 74 | torch.nn.utils.clip_grad_norm_(self.actor.parameters(), self.max_grad_norm) 75 | self.actor_optimizer.step() 76 | 77 | # logging 78 | total_loss += loss.item() 79 | total_reconstruction_loss += reconstruction_loss.item() 80 | total_action_loss += action_loss.item() 81 | 82 | total_loss /= self.student_epochs 83 | total_reconstruction_loss /= self.student_epochs 84 | total_action_loss /= self.student_epochs 85 | 86 | return total_loss, total_reconstruction_loss, total_action_loss 87 | 88 | def load_exteroceptive_encoder(self, checkpoint): 89 | """ 90 | :param checkpoint: teacher model torch checkpoint (dict) (cf: Only "actor"!) 91 | """ 92 | loaded_checkpoint = dict() 93 | for k, v in checkpoint.items(): 94 | if k.split('.')[0] == "extero_encoder": 95 | loaded_checkpoint['.'.join(k.split('.')[1:])] = v 96 | 97 | if len(loaded_checkpoint.keys()) != 0: 98 | self.actor.extero_encoder.load_state_dict(loaded_checkpoint) 99 | cprint("Exteroceptive encoder load success", bold=True, color="blue") 100 | else: 101 | cprint("Exteroceptive encoder load fail", bold=True, color="blue") 102 | 103 | def save(self, model_name): 104 | save_dict = { 105 | 'actor': self.actor.state_dict(), 106 | 'actor_optimizer': self.actor_optimizer.state_dict() 107 | } 108 | torch.save(save_dict, f"{self.checkpoint_dir}/full_{model_name}.pt") 109 | cprint(f'[{self.name} - full_{model_name}.pt] save success.', bold=True, color="blue") 110 | 111 | def load(self): 112 | if not os.path.isdir(self.checkpoint_dir): 113 | os.makedirs(self.checkpoint_dir) 114 | checkpoint_file = f"{self.checkpoint_dir}/full_{self.model_num}.pt" 115 | 116 | if os.path.isfile(checkpoint_file): 117 | checkpoint = torch.load(checkpoint_file) 118 | self.actor.load_state_dict(checkpoint['actor']) 119 | self.actor_optimizer.load_state_dict(checkpoint['actor_optimizer']) 120 | cprint(f'[{self.name} - full_{self.model_num}.pt] load success.', bold=True, color="blue") 121 | return int(self.model_num) 122 | else: 123 | cprint(f'[{self.name} - full_{self.model_num}.pt] load fail.', bold=True, color="red") 124 | return 0 125 | -------------------------------------------------------------------------------- /noise_generator/TrainManager.hpp: -------------------------------------------------------------------------------- 1 | #ifndef TRAIN_MANAGER_HPP 2 | #define TRAIN_MANAGER_HPP 3 | 4 | namespace raisim { 5 | 6 | class HeightNoiseGenerator { 7 | public: 8 | enum class NoiseType : int { 9 | NOMINAL = 0, 10 | OFFSET, 11 | NOISY 12 | }; 13 | 14 | // INIT (episode start) --> FOOT_CHANGE (foot change) --> POINT_CHANGE (point change) 15 | enum class SampleType : int { 16 | INIT = 0, 17 | FOOT_CHANGE, 18 | POINT_CHANGE 19 | }; 20 | 21 | struct Noise { 22 | double x; 23 | double y; 24 | double z; 25 | }; 26 | 27 | struct NoiseParam { 28 | Noise pointAndTime; // sampled for each point every time step 29 | Noise footAndTime; // sampled for each foot every time step 30 | Noise foot; // sampled for each foot at the beginning of the episode 31 | double zOutlier; 32 | double constantOffsetProb; 33 | double zOutlierProb; 34 | }; 35 | 36 | HeightNoiseGenerator() = default; 37 | 38 | ~HeightNoiseGenerator() = default; 39 | 40 | void sampleNoiseType(std::mt19937 &gen, 41 | std::uniform_real_distribution &uniDist) { 42 | double val = uniDist(gen); 43 | 44 | if (val < 0.6) 45 | noiseType = NoiseType::NOMINAL; 46 | else if (val < 0.6 + 0.3) 47 | noiseType = NoiseType::OFFSET; 48 | else 49 | noiseType = NoiseType::NOISY; 50 | 51 | switch (noiseType) { 52 | case NoiseType::NOMINAL: 53 | defaultNoiseParam.pointAndTime.x = 0.004; 54 | defaultNoiseParam.pointAndTime.y = 0.004; 55 | defaultNoiseParam.pointAndTime.z = 0.005; 56 | defaultNoiseParam.footAndTime.x = 0.01; 57 | defaultNoiseParam.footAndTime.y = 0.01; 58 | defaultNoiseParam.footAndTime.z = 0.04; 59 | defaultNoiseParam.zOutlierProb = 0.02; 60 | defaultNoiseParam.zOutlier = 0.03; 61 | defaultNoiseParam.constantOffsetProb = 0.05; 62 | defaultNoiseParam.foot.x = 0.1; 63 | defaultNoiseParam.foot.y = 0.1; 64 | defaultNoiseParam.foot.z = 0.1; 65 | break; 66 | case NoiseType::OFFSET: 67 | defaultNoiseParam.pointAndTime.x = 0.004; 68 | defaultNoiseParam.pointAndTime.y = 0.004; 69 | defaultNoiseParam.pointAndTime.z = 0.005; 70 | defaultNoiseParam.footAndTime.x = 0.01; 71 | defaultNoiseParam.footAndTime.y = 0.01; 72 | defaultNoiseParam.footAndTime.z = 0.1; 73 | defaultNoiseParam.zOutlierProb = 0.02; 74 | defaultNoiseParam.zOutlier = 0.1; 75 | defaultNoiseParam.constantOffsetProb = 0.02; 76 | defaultNoiseParam.foot.x = 0.1; 77 | defaultNoiseParam.foot.y = 0.1; 78 | defaultNoiseParam.foot.z = 0.1; 79 | break; 80 | case NoiseType::NOISY: 81 | defaultNoiseParam.pointAndTime.x = 0.004; 82 | defaultNoiseParam.pointAndTime.y = 0.004; 83 | defaultNoiseParam.pointAndTime.z = 0.1; 84 | defaultNoiseParam.footAndTime.x = 0.1; 85 | defaultNoiseParam.footAndTime.y = 0.1; 86 | defaultNoiseParam.footAndTime.z = 0.3; 87 | defaultNoiseParam.zOutlierProb = 0.05; 88 | defaultNoiseParam.zOutlier = 0.3; 89 | defaultNoiseParam.constantOffsetProb = 0.3; 90 | defaultNoiseParam.foot.x = 0.1; 91 | defaultNoiseParam.foot.y = 0.1; 92 | defaultNoiseParam.foot.z = 0.1; 93 | break; 94 | } 95 | } 96 | 97 | void sampleNoise(Noise &noise, 98 | const SampleType &sampleType, 99 | std::mt19937 &gen, 100 | std::uniform_real_distribution &uniDist, 101 | std::normal_distribution &normDist) { 102 | switch (sampleType) { 103 | case SampleType::INIT: 104 | if (uniDist(gen) < defaultNoiseParam.constantOffsetProb) { 105 | sampledNoiseParam.foot.x = defaultNoiseParam.foot.x * normDist(gen); 106 | sampledNoiseParam.foot.y = defaultNoiseParam.foot.y * normDist(gen); 107 | sampledNoiseParam.foot.z = defaultNoiseParam.foot.z * normDist(gen); 108 | } else { 109 | sampledNoiseParam.foot.x = 0.; 110 | sampledNoiseParam.foot.y = 0.; 111 | sampledNoiseParam.foot.z = 0.; 112 | } 113 | return; 114 | case SampleType::FOOT_CHANGE: 115 | sampledNoiseParam.footAndTime.x = defaultNoiseParam.footAndTime.x * normDist(gen); 116 | sampledNoiseParam.footAndTime.y = defaultNoiseParam.footAndTime.y * normDist(gen); 117 | sampledNoiseParam.footAndTime.z = defaultNoiseParam.footAndTime.z * normDist(gen); 118 | return; 119 | case SampleType::POINT_CHANGE: 120 | sampledNoiseParam.pointAndTime.x = defaultNoiseParam.pointAndTime.x * normDist(gen); 121 | sampledNoiseParam.pointAndTime.y = defaultNoiseParam.pointAndTime.y * normDist(gen); 122 | sampledNoiseParam.pointAndTime.z = defaultNoiseParam.pointAndTime.z * normDist(gen); 123 | if (uniDist(gen) < defaultNoiseParam.zOutlierProb) 124 | sampledNoiseParam.zOutlier = defaultNoiseParam.zOutlier * normDist(gen); 125 | else 126 | sampledNoiseParam.zOutlier = 0.; 127 | break; 128 | } 129 | 130 | noise.x = sampledNoiseParam.foot.x + sampledNoiseParam.footAndTime.x + sampledNoiseParam.pointAndTime.x; 131 | noise.y = sampledNoiseParam.foot.y + sampledNoiseParam.footAndTime.y + sampledNoiseParam.pointAndTime.y; 132 | noise.z = sampledNoiseParam.foot.z + sampledNoiseParam.footAndTime.z + 133 | sampledNoiseParam.pointAndTime.z + sampledNoiseParam.zOutlier; 134 | } 135 | 136 | private: 137 | NoiseType noiseType; 138 | NoiseParam defaultNoiseParam, sampledNoiseParam; 139 | }; 140 | 141 | } // namespace raisim 142 | 143 | #endif // TRAIN_MANAGER_HPP -------------------------------------------------------------------------------- /model/modules/actor.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | 4 | from base_nn import BaseNet 5 | 6 | 7 | class RecurrentAttentionPolicy(BaseNet): 8 | class BeliefEncoder(BaseNet): 9 | def __init__(self, model_cfg): 10 | super().__init__(model_config=model_cfg) 11 | 12 | def forward(self, proprio_state, encoded_extero_state, hidden_state=None): 13 | """ 14 | :param proprio_state: proprioceptive sensor data [(L, H) / (L. N, H)] 15 | :param encoded_extero_state: encoded exteroceptive sensor data [(L, H) / (L. N, H)] 16 | :param hidden_state: hidden state of the recurrent layer [(N_layer, H) / (N_layer, N, H)] 17 | :return: 18 | recurrent_output: output state of the recurrent layer [(L, H) / (L. N, H)] 19 | recurrent_hidden: next hidden state of the recurrent layer [(N_layer, H) / (N_layer, N, H)] 20 | belief_state: next belief state of the belief encoder [(L, H) / (L. N, H)] 21 | """ 22 | output = dict() 23 | 24 | fused_state = torch.cat((proprio_state, encoded_extero_state), dim=-1) #[(L, H) / (L. N, H)] 25 | if hidden_state is None: 26 | output["recurrent_output"], output["recurrent_hidden"] = self.recurrent_encoder(fused_state) # (L. N, H) 27 | else: 28 | output["recurrent_output"], output["recurrent_hidden"] = self.recurrent_encoder(fused_state, hidden_state) # (L. N, H) 29 | 30 | tensor_shape = torch.Size(torch.ones_like(torch.tensor(output["recurrent_output"].shape)[:-1])) + (4,) 31 | output["belief_state"] = \ 32 | torch.tile(self.state_encoder(output["recurrent_output"]), dims=tensor_shape) + \ 33 | nn.functional.sigmoid(self.attention_encoder(output["recurrent_output"])) * encoded_extero_state 34 | return output 35 | 36 | class BeliefDecoder(BaseNet): 37 | def __int__(self, model_cfg): 38 | super().__int__(model_config=model_cfg) 39 | 40 | def forward(self, extero_state, hidden_state): 41 | """ 42 | :param extero_state: exteroceptive sensor data [(L, H) / (L. N, H)] 43 | :param hidden_state: output state of the recurrent layer [(L, H) / (L. N, H)] 44 | (cf: In GRU, last output state is same as the hidden state) 45 | :return: 46 | estimated_extero_state: estimated exteroceptive sensor data [(L, H) / (L. N, H)] 47 | """ 48 | estimated_extero_state = \ 49 | self.extero_decoder(hidden_state) + \ 50 | nn.functional.sigmoid(self.attention_encoder(hidden_state)) * extero_state 51 | return estimated_extero_state 52 | 53 | def __init__(self, args, model_cfg): 54 | self.proprio_dim = args.proprio_obs_dim 55 | self.extero_dim = args.extero_obs_dim 56 | self.action_dim = args.action_dim 57 | 58 | self.device = args.device 59 | self.args = args 60 | self.model_cfg = model_cfg 61 | self.adapt_model() 62 | 63 | super(RecurrentAttentionPolicy, self).__init__(model_config=model_cfg["policy"]) 64 | self.belief_encoder = self.BeliefEncoder(model_cfg["belief_encoder"]) 65 | self.belief_decoder = self.BeliefDecoder(model_cfg["belief_decoder"]) 66 | 67 | def adapt_model(self): 68 | assert self.extero_dim % 4 == 0 69 | self.model_cfg["policy"]["MLP"]["extero_encoder"]["input"] = self.extero_dim // 4 70 | self.model_cfg["policy"]["MLP"]["base_net"]["input"] = \ 71 | self.proprio_dim + self.model_cfg["policy"]["MLP"]["extero_encoder"]["output"] * 4 72 | self.model_cfg["policy"]["MLP"]["base_net"]["output"] = self.action_dim 73 | 74 | self.model_cfg["belief_encoder"]["GRU"]["recurrent_encoder"]["input"] = \ 75 | self.proprio_dim + self.model_cfg["policy"]["MLP"]["extero_encoder"]["output"] * 4 76 | 77 | self.model_cfg["belief_encoder"]["MLP"]["attention_encoder"]["input"] \ 78 | = self.model_cfg["belief_encoder"]["GRU"]["recurrent_encoder"]["hidden"] 79 | self.model_cfg["belief_encoder"]["MLP"]["attention_encoder"]["output"] \ 80 | = self.model_cfg["policy"]["MLP"]["extero_encoder"]["output"] * 4 81 | self.model_cfg["belief_encoder"]["MLP"]["state_encoder"]["input"] \ 82 | = self.model_cfg["belief_encoder"]["GRU"]["recurrent_encoder"]["hidden"] 83 | 84 | self.model_cfg["belief_decoder"]["MLP"]["attention_encoder"]["input"] \ 85 | = self.model_cfg["belief_encoder"]["GRU"]["recurrent_encoder"]["hidden"] 86 | self.model_cfg["belief_decoder"]["MLP"]["attention_encoder"]["output"] \ 87 | = self.extero_dim 88 | self.model_cfg["belief_decoder"]["MLP"]["extero_decoder"]["input"] \ 89 | = self.model_cfg["belief_encoder"]["GRU"]["recurrent_encoder"]["hidden"] 90 | self.model_cfg["belief_decoder"]["MLP"]["extero_decoder"]["output"] \ 91 | = self.extero_dim 92 | 93 | def forward(self, proprio_state, extero_state, hidden_state=None, use_decoder=True): 94 | """ 95 | :param proprio_state: proprioceptive sensor data [(L, H) / (L. N, H)] 96 | :param extero_state: exteroceptive sensor data [(L, H) / (L. N, H)] 97 | :param hidden_state: hidden state of the recurrent layer in the belief encoder [(N_layer, H) / (N_layer, N, H)] 98 | :param use_decoder: use belief decoder to estimate exteroceptive data or not 99 | :return: 100 | action: [(L, H) / (L. N, H)] 101 | recurrent_hidden: [(N_layer, H) / (N_layer, N, H)] 102 | estimated_extero_state: estimated exteroceptive sensor data [(L, H) / (L. N, H)] 103 | """ 104 | output = dict() 105 | 106 | length_and_batch = proprio_state.shape[:-1] 107 | encoded_extero_state = extero_state.view(*length_and_batch, 4, int(self.extero_dim / 4)) 108 | encoded_extero_state = self.extero_encoder(encoded_extero_state).view(*length_and_batch, -1) 109 | 110 | belief_encoder_output = self.belief_encoder(proprio_state, encoded_extero_state, hidden_state) 111 | fused_state = torch.cat((proprio_state, belief_encoder_output["belief_state"]), dim=-1) 112 | output["action"] = self.base_net(fused_state) 113 | output["recurrent_hidden"] = belief_encoder_output["recurrent_hidden"] 114 | 115 | if use_decoder: 116 | output["estimated_extero_state"] = \ 117 | self.belief_decoder(extero_state, belief_encoder_output["recurrent_output"]) 118 | return output 119 | 120 | def getAction(self, proprio_state, extero_state, hidden_state): 121 | """ 122 | :param proprio_state: [(N, H)] 123 | :param extero_state: [(N, H)] 124 | :param hidden_state: [(N_layer, N, H)] 125 | :return: 126 | action: [(N, H)] 127 | hidden_state: [(N_layer, N, H)] 128 | """ 129 | assert len(proprio_state.shape) == 2 and proprio_state.shape[0] == self.args.n_envs 130 | proprio_state = proprio_state.unsqueeze(0) 131 | extero_state = extero_state.unsqueeze(0) 132 | output = self.forward(proprio_state, extero_state, hidden_state, use_decoder=False) 133 | action = output["action"].squeeze(0) 134 | next_hidden_state = output["recurrent_hidden"].squeeze(0) 135 | return action, next_hidden_state --------------------------------------------------------------------------------