├── .gitignore
├── image
    └── figure.png
├── model
    ├── modules
    │   ├── __init__.py
    │   ├── base_nn.py
    │   └── actor.py
    ├── config.yaml
    ├── utils
    │   └── color.py
    ├── storage.py
    ├── main.py
    └── agent.py
├── noise_generator
    ├── CMakeLists.txt
    ├── main.cpp
    └── TrainManager.hpp
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | __pycache__/
3 | .idea/*
4 | cmake-*/*


--------------------------------------------------------------------------------
/image/figure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awesomericky/quadruped-robot-belief-encoder/HEAD/image/figure.png


--------------------------------------------------------------------------------
/model/modules/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | sys.path.append(os.path.dirname(os.path.realpath(__file__)))


--------------------------------------------------------------------------------
/noise_generator/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.10)
2 | project(noise_example)
3 | 
4 | set(CMAKE_CXX_STANDARD 17)
5 | 
6 | set(EXEC_NAME ${PROJECT_NAME})
7 | add_executable(${EXEC_NAME} main.cpp)
8 | target_include_directories(${EXEC_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})


--------------------------------------------------------------------------------
/model/config.yaml:
--------------------------------------------------------------------------------
 1 | student_model:
 2 |   policy:
 3 |     MLP:
 4 |       extero_encoder:
 5 |         shape: [ 80, 60 ]
 6 |         activation: leakyrelu
 7 |         output: 24
 8 |       base_net:
 9 |         shape: [ 256, 160, 128 ]
10 |         activation: leakyrelu
11 | 
12 |   belief_encoder:
13 |     GRU:
14 |       recurrent_encoder:
15 |         hidden: 50
16 |         num_layers: 2
17 |         batch_first: False
18 |         dropout: 0.
19 |     MLP:
20 |       attention_encoder:
21 |         shape: [ 64, 64 ]
22 |         activation: leakyrelu
23 |       state_encoder:
24 |         shape: [ 64, 64 ]
25 |         activation: leakyrelu
26 |         output: 24
27 | 
28 |   belief_decoder:
29 |     MLP:
30 |       attention_encoder:
31 |         shape: [64, 64]
32 |         activation: leakyrelu
33 |       extero_decoder:
34 |         shape: [64, 64]
35 |         activation: leakyrelu


--------------------------------------------------------------------------------
/model/utils/color.py:
--------------------------------------------------------------------------------
 1 | class bcolors:
 2 |     HEADER = '\033[95m'
 3 |     OKBLUE = '\033[94m'
 4 |     OKCYAN = '\033[96m'
 5 |     OKGREEN = '\033[92m'
 6 |     WARNING = '\033[93m'
 7 |     FAIL = '\033[91m'
 8 |     ENDC = '\033[0m'
 9 |     BOLD = '\033[1m'
10 |     UNDERLINE = '\033[4m'
11 | 
12 | def cprint(str, bold=False, underline=False, color=''):
13 |     color_encoding = ''
14 |     if color == 'blue':
15 |         color_encoding = bcolors.OKBLUE
16 |     elif color == 'cyan':
17 |         color_encoding = bcolors.OKCYAN
18 |     elif color == "orange":
19 |         color_encoding = bcolors.WARNING
20 |     elif color == 'red':
21 |         color_encoding = bcolors.FAIL
22 | 
23 |     bold_encoding = bcolors.BOLD if bold else ''
24 |     underline_encoding = bcolors.UNDERLINE if underline else ''
25 |     print(bold_encoding + underline_encoding + color_encoding + str + bcolors.ENDC)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Quadruped robot belief encoder
 2 | Implementation of some core elements of "Learning robust perceptive locomotion for quadrupedal robots in the wild" [[Paper](https://www.science.org/doi/10.1126/scirobotics.abk2822)]
 3 | 
 4 | <img width=1000 src='image/figure.png'>
 5 | 
 6 | ## Description
 7 | This repository includes implementation of two elements.
 8 | 1. Student policy network
 9 | 2. Heightmap noise generator
10 | 
11 | Student policy network is composed of **belief encoder** and **belief decoder** to appropriately fuse both proprioceptive and exteroceptive sensor data. It is implemented in *Python*. 
12 | Privilege information decoder, included in the paper, is excluded because they were not that critical in our experiement.
13 | 
14 | Heightmap noise generator is composed of **three noise models** to handle errors available in real-world use cases due to depth camera noise, state estimation error/drift etc. 
15 | It is implemented in *C++* because the [Raisim](https://raisim.com/) simulator that we are actively using implements environments in *C++* for fast simulation.
16 | 
17 | ## Dependencies
18 | - numpy
19 | - pytorch
20 | - ruamel.yaml
21 | 
22 | ## Run example
23 | 1. Student policy network
24 | ```
25 | cd model
26 | python main.py
27 | ```
28 | 
29 | 2. Heightmap noise generator
30 | ```
31 | cd noise_generator
32 | mkdir build && cd build
33 | cmake ..
34 | make
35 | # After build is finished
36 | ./noise_example
37 | ```
38 | 
39 | ## Contributor
40 | - [Yunho Kim](https://github.com/awesomericky)
41 | - [Jinhyeok Choi](https://github.com/Triangle2022)
42 | 


--------------------------------------------------------------------------------
/model/storage.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | class VisionRolloutBuffer:
 5 |     def __init__(self, args):
 6 |         self.proprio_dim = args.proprio_obs_dim
 7 |         self.extero_dim = args.extero_obs_dim
 8 |         self.action_dim = args.action_dim
 9 |         self.n_envs = args.n_envs
10 |         self.n_steps = args.n_steps
11 |         self.n_steps_per_env = int(self.n_steps/self.n_envs)
12 |         self.device = args.device
13 | 
14 |         self.cnt = 0
15 |         self.proprio_states = np.zeros((self.n_steps_per_env, self.n_envs, self.proprio_dim), dtype=np.float32)
16 |         self.noisy_extero_states = np.zeros((self.n_steps_per_env, self.n_envs, self.extero_dim), dtype=np.float32)
17 |         self.extero_states = np.zeros((self.n_steps_per_env, self.n_envs, self.extero_dim), dtype=np.float32)
18 |         self.actions = np.zeros((self.n_steps_per_env, self.n_envs, self.action_dim), dtype=np.float32)
19 | 
20 |     def addTransition(self, proprio_states, noisy_extero_states, extero_states, actions):
21 |         """
22 |         :param proprio_states: proprioceptive sensor data [numpy.float32]
23 |         :param noisy_extero_states: noisy exteroceptive sensor data [numpy.float32]
24 |         :param extero_states: (teacher) exteroceptive sensor data [numpy.float32]
25 |         :param actions: (teacher) action [numpy.float32]
26 |         :return:
27 |         """
28 |         assert self.cnt < self.n_steps_per_env
29 |         self.proprio_states[self.cnt] = proprio_states
30 |         self.noisy_extero_states[self.cnt] = noisy_extero_states
31 |         self.extero_states[self.cnt] = extero_states
32 |         self.actions[self.cnt] = actions
33 |         self.cnt += 1
34 | 
35 |     def getBatches(self):
36 |         """
37 |         :return: (L, N, D)
38 |         """
39 |         self.cnt = 0
40 |         proprio_states_tensor = torch.from_numpy(self.proprio_states).to(self.device)
41 |         noisy_extero_states_tensor = torch.from_numpy(self.noisy_extero_states).to(self.device)
42 |         extero_states_tensor = torch.from_numpy(self.extero_states).to(self.device)
43 |         actions_tensor = torch.from_numpy(self.actions).to(self.device)
44 |         return proprio_states_tensor, noisy_extero_states_tensor, extero_states_tensor, actions_tensor
45 | 


--------------------------------------------------------------------------------
/noise_generator/main.cpp:
--------------------------------------------------------------------------------
 1 | #include <random>
 2 | #include <array>
 3 | #include <iostream>
 4 | #include "TrainManager.hpp"
 5 | 
 6 | namespace raisim {
 7 | 
 8 | extern "C"
 9 | int main(int argc, char * argv[]) {
10 |   static std::mt19937 gen;
11 |   static std::uniform_real_distribution<double> uniDist(0., 1.);
12 |   static std::normal_distribution<double> normDist(0., 1.);
13 | 
14 |   HeightNoiseGenerator heightNoiseGenerator;
15 |   HeightNoiseGenerator::Noise heightNoise;
16 | 
17 |   // initialize container of the true height scan data
18 |   const int nFoots = 4;
19 |   const int nScansPerFoot = 10;
20 |   std::array<std::array<double, nScansPerFoot>, nFoots> heightScan;
21 |   std::array<std::array<double, nScansPerFoot>, nFoots> heightScanNoisy;
22 |   for (int i = 0; i < nFoots; i++)
23 |     for (int j = 0; j < nScansPerFoot; j++)
24 |       heightScan[i][j] = 0;
25 | 
26 |   // call when the episode starts
27 |   heightNoiseGenerator.sampleNoiseType(gen, uniDist);
28 |   heightNoiseGenerator.sampleNoise(
29 |     heightNoise, HeightNoiseGenerator::SampleType::INIT, gen, uniDist, normDist);
30 | 
31 |   // simulate
32 |   int nSteps = 400;
33 |   for (int t = 0; t < nSteps; t++) {
34 |     for (int i = 0; i < nFoots; i++) {
35 |       // call when the foot changes
36 |       heightNoiseGenerator.sampleNoise(
37 |         heightNoise, HeightNoiseGenerator::SampleType::FOOT_CHANGE, gen, uniDist, normDist);
38 | 
39 |       for (int j = 0; j < nScansPerFoot; j++) {
40 |         // call for every height scan points
41 |         heightNoiseGenerator.sampleNoise(
42 |           heightNoise, HeightNoiseGenerator::SampleType::POINT_CHANGE, gen, uniDist, normDist);
43 | 
44 |         double xOffset = heightNoise.x;
45 |         double yOffset = heightNoise.y;
46 |         double zOffset = heightNoise.z;
47 | 
48 |         /// Read height scan with x, y, z offset from the true value as below.
49 |         /// heightScanNoisy[i][j] = heightmap->getHeight(default_x_ij + xOffset, default_y_ij + yOffset) + zOffset
50 |         /// However, in this code, there is no heightmap generated.
51 |         /// Thus, only zOffset is added to the default height value to show the example use case.
52 |         heightScanNoisy[i][j] = heightScan[i][j] + zOffset;
53 | 
54 |         std::cout << xOffset << " " << yOffset << " " << zOffset << "\n";
55 |       }
56 |     }
57 |   }
58 | 
59 |   return 0;
60 | }
61 | 
62 | }


--------------------------------------------------------------------------------
/model/modules/base_nn.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | class BaseNet(nn.Module):
 5 |     def __init__(self, model_config):
 6 |         super(BaseNet, self).__init__()
 7 |         self.model_config = model_config
 8 |         self.activation_map = {"relu": nn.ReLU, "tanh": nn.Tanh, "leakyrelu": nn.LeakyReLU, "gelu": nn.GELU}
 9 | 
10 |         used_model_archs = self.model_config.keys()
11 | 
12 |         if "LSTM" in used_model_archs:
13 |             for model_name, model_arch in self.model_config["LSTM"].items():
14 |                 self.add_module(model_name, nn.LSTM(
15 |                     input_size=model_arch["input"],
16 |                     hidden_size=model_arch["hidden"],
17 |                     num_layers=model_arch["num_layers"],
18 |                     batch_first=model_arch["batch_first"],
19 |                     dropout=model_arch["dropout"]
20 |                 ))
21 | 
22 |         if "GRU" in used_model_archs:
23 |             for model_name, model_arch in self.model_config["GRU"].items():
24 |                 self.add_module(model_name, nn.GRU(
25 |                     input_size=model_arch["input"],
26 |                     hidden_size=model_arch["hidden"],
27 |                     num_layers=model_arch["num_layers"],
28 |                     batch_first=model_arch["batch_first"],
29 |                     dropout=model_arch["dropout"]
30 |                 ))
31 | 
32 |         if "MLP" in used_model_archs:
33 |             for model_name, model_arch in self.model_config["MLP"].items():
34 |                 assert model_arch["activation"] in list(self.activation_map.keys()), "Unavailable activation."
35 |                 self.add_module(model_name, MLP(
36 |                     input_size=model_arch["input"],
37 |                     output_size=model_arch["output"],
38 |                     shape=model_arch["shape"],
39 |                     activation=self.activation_map[model_arch["activation"]],
40 |                     dropout=model_arch["dropout"] if "dropout" in model_arch.keys() else 0.,
41 |                     batchnorm=model_arch["batchnorm"] if "batchnorm" in model_arch.keys() else False
42 |                 ))
43 | 
44 |         if "Linear" in used_model_archs:
45 |             for model_name, model_arch in self.model_config["Linear"].items():
46 |                 self.add_module(model_name, nn.Linear(
47 |                     in_features=model_arch["input"],
48 |                     out_features=model_arch["output"]
49 |                 ))
50 | 
51 | 
52 | class MLP(nn.Module):
53 |     def __init__(self, input_size, output_size, shape, activation, dropout=0.0, batchnorm=False):
54 |         super(MLP, self).__init__()
55 |         self.activation_fn = activation
56 | 
57 |         modules = [nn.Linear(input_size, shape[0]), self.activation_fn()]
58 | 
59 |         for idx in range(len(shape)-1):
60 |             modules.append(nn.Linear(shape[idx], shape[idx+1]))
61 |             if batchnorm:
62 |                 modules.append(nn.BatchNorm1d(shape[idx+1]))
63 |             modules.append(self.activation_fn())
64 |             if dropout != 0.0:
65 |                 modules.append(nn.Dropout(dropout))
66 | 
67 |         modules.append(nn.Linear(shape[-1], output_size))
68 |         self.architecture = nn.Sequential(*modules)
69 | 
70 |         self.input_shape = [input_size]
71 |         self.output_shape = [output_size]
72 | 
73 |     def forward(self, input):
74 |         return self.architecture(input)


--------------------------------------------------------------------------------
/model/main.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from ruamel.yaml import YAML
  3 | import numpy as np
  4 | import torch
  5 | 
  6 | from agent import VisionStudentAgent
  7 | 
  8 | 
  9 | class Environment:
 10 |     """
 11 |     Change to the environment you are using
 12 |     """
 13 |     def __init__(self, args):
 14 |         self.obs_dim = args.proprio_obs_dim + args.extero_obs_dim
 15 |         self.action_dim = args.action_dim
 16 |         self.n_envs = args.n_envs
 17 | 
 18 |     def observe(self):
 19 |         observations = np.random.normal(size=(self.n_envs, self.obs_dim)).astype(np.float32)
 20 |         return observations
 21 | 
 22 |     def observe_noisy(self):
 23 |         noisy_observations = np.random.normal(size=(self.n_envs, self.obs_dim)).astype(np.float32)
 24 |         return noisy_observations
 25 | 
 26 |     def step(self, action):
 27 |         rewards = np.random.normal(size=self.n_envs).astype(np.float32)
 28 |         dones = np.zeros(shape=self.n_envs).astype(np.bool_)
 29 |         return rewards, dones
 30 | 
 31 | class TeacherAgent:
 32 |     """
 33 |     Change to the teacher agent you are using
 34 |     """
 35 |     def __init__(self, args):
 36 |         self.action_dim = args.action_dim
 37 |         self.n_envs = args.n_envs
 38 | 
 39 |     def getAction(self, observations):
 40 |         actions = np.random.normal(size=(self.n_envs, self.action_dim)).astype(np.float32)
 41 |         return actions
 42 | 
 43 | def getParser():
 44 |     parser = argparse.ArgumentParser(description='RL')
 45 |     parser.add_argument('--name', type=str, default='example')
 46 |     parser.add_argument('--device', type=str, default='cuda', help='gpu or cpu.')
 47 |     parser.add_argument('--save_dir', type=str, default='example', help='directory name to save weights')
 48 |     return parser
 49 | 
 50 | if __name__ == "__main__":
 51 |     parser = getParser()
 52 |     args = parser.parse_args()
 53 | 
 54 |     # parameters to be set from the environment you are running
 55 |     args.student_model_num = 0
 56 |     args.student_lr = 3e-4
 57 |     args.student_epochs = 1
 58 |     args.max_grad_norm = 1.
 59 |     args.student_policy_type = "vision_recurrent"
 60 |     args.n_envs = 100
 61 |     args.n_steps = 40000
 62 |     args.n_steps_per_env = int(args.n_steps / args.n_envs)
 63 | 
 64 |     args.proprio_obs_dim = 10 #100
 65 |     args.extero_obs_dim = 20 #200
 66 |     args.action_dim = 12
 67 | 
 68 |     # config
 69 |     cfg = YAML().load(open("config.yaml", 'r'))
 70 | 
 71 |     # define teacher agent (pretrained)
 72 |     teacher = TeacherAgent(args)
 73 | 
 74 |     # define student agent
 75 |     student = VisionStudentAgent(args, cfg["student_model"])
 76 |     hidden_state_tensor = None
 77 | 
 78 |     # define environment
 79 |     env = Environment(args)
 80 | 
 81 |     max_update = 10
 82 | 
 83 |     for update in range(max_update):
 84 |         for _ in range(args.n_steps_per_env):
 85 |             obs = env.observe()
 86 |             noisy_obs = env.observe_noisy()
 87 | 
 88 |             proprio_obs = obs[:, :args.proprio_obs_dim]
 89 |             extero_obs = obs[:, -args.extero_obs_dim:]
 90 |             noisy_extero_obs = noisy_obs[:, -args.extero_obs_dim:]
 91 |             proprio_obs_tensor = torch.from_numpy(proprio_obs).to(args.device)
 92 |             noisy_extero_obs_tensor = torch.from_numpy(noisy_extero_obs).to(args.device)
 93 | 
 94 |             with torch.no_grad():
 95 |                 # get student action
 96 |                 actions_tensor, hidden_state_tensor = student.getAction(
 97 |                     proprio_state=proprio_obs_tensor,
 98 |                     extero_state=noisy_extero_obs_tensor,
 99 |                     hidden_state=hidden_state_tensor
100 |                 )
101 | 
102 |                 # get teacher action
103 |                 teacher_actions = teacher.getAction(obs)
104 | 
105 |             actions = actions_tensor.detach().cpu().numpy()
106 |             rewards, dones = env.step(actions)
107 | 
108 |             # add data the buffer
109 |             student.step(proprio_obs, noisy_extero_obs, extero_obs, teacher_actions)
110 | 
111 |         # train model
112 |         loss, reconstruction_loss, action_loss = student.train()
113 | 
114 |         # save model
115 |         if update % 5 == 0:
116 |             student.save(update)
117 | 
118 |         print('----------------------------------------------------')
119 |         print('{:>6}th iteration'.format(update))
120 |         print('{:<40} {:>6}'.format("total loss: ", '{:6.4f}'.format(loss)))
121 |         print('{:<40} {:>6}'.format("reconstruction loss: ", '{:6.4f}'.format(reconstruction_loss)))
122 |         print('{:<40} {:>6}'.format("action loss: ", '{:6.4f}'.format(action_loss)))
123 |         print('----------------------------------------------------\n')
124 | 
125 | 


--------------------------------------------------------------------------------
/model/agent.py:
--------------------------------------------------------------------------------
  1 | from torch.optim import Adam
  2 | import torch
  3 | import os
  4 | 
  5 | from modules.actor import RecurrentAttentionPolicy
  6 | from storage import VisionRolloutBuffer
  7 | from utils.color import cprint
  8 | 
  9 | policy_modules = {"vision_recurrent": RecurrentAttentionPolicy}
 10 | storage_modules = {"vision_recurrent": VisionRolloutBuffer}
 11 | 
 12 | 
 13 | class VisionStudentAgent:
 14 |     def __init__(self, args, model_cfg):
 15 |         # base
 16 |         self.device = args.device
 17 |         self.name = args.name
 18 |         self.model_num = args.student_model_num
 19 |         self.checkpoint_dir = f'{args.save_dir}/student_checkpoint'
 20 | 
 21 |         # for regression
 22 |         self.student_lr = args.student_lr
 23 |         self.student_epochs = args.student_epochs
 24 |         self.max_grad_norm = args.max_grad_norm
 25 | 
 26 |         # for models
 27 |         assert args.student_policy_type in policy_modules.keys()
 28 |         Policy = policy_modules[args.student_policy_type]
 29 |         self.actor = Policy(args, model_cfg).to(self.device)
 30 |         self.actor_optimizer = Adam(self.actor.parameters(), lr=self.student_lr)
 31 | 
 32 |         # for data storage
 33 |         Storage = storage_modules[args.student_policy_type]
 34 |         self.rollout_buffer = Storage(args)
 35 | 
 36 |         self.epoch = self.load()
 37 | 
 38 |     def getAction(self, proprio_state, extero_state, hidden_state):
 39 |         """
 40 |         :param proprio_state: [(N, H)]
 41 |         :param extero_state: [(N, H)]
 42 |         :param hidden_state: [(N_layer, N, H)]
 43 |         :return:
 44 |             action: [(N, H)]
 45 |             next_hidden_state: [(N_layer, N, H)]
 46 |         """
 47 |         return self.actor.getAction(proprio_state, extero_state, hidden_state)
 48 | 
 49 |     def step(self, proprio_state, noisy_extero_state, extero_state, action):
 50 |         self.rollout_buffer.addTransition(proprio_state, noisy_extero_state, extero_state, action)
 51 | 
 52 |     def train(self):
 53 |         proprio_states_tensor, noisy_extero_states_tensor, extero_states_tensor, actions_tensor \
 54 |             = self.rollout_buffer.getBatches()
 55 | 
 56 |         total_loss = 0
 57 |         total_reconstruction_loss = 0
 58 |         total_action_loss = 0
 59 | 
 60 |         for _ in range(self.student_epochs):
 61 |             # forward pass
 62 |             output = self.actor(proprio_states_tensor, noisy_extero_states_tensor)
 63 |             student_action = output["action"]
 64 |             estimated_extero_state = output["estimated_extero_state"]
 65 | 
 66 |             # compute loss
 67 |             reconstruction_loss = torch.mean(torch.pow(estimated_extero_state - extero_states_tensor, 2))
 68 |             action_loss = torch.mean(torch.pow(student_action - actions_tensor, 2))
 69 |             loss = 0.5 * reconstruction_loss + action_loss
 70 | 
 71 |             # optimize
 72 |             self.actor_optimizer.zero_grad()
 73 |             loss.backward()
 74 |             torch.nn.utils.clip_grad_norm_(self.actor.parameters(), self.max_grad_norm)
 75 |             self.actor_optimizer.step()
 76 | 
 77 |             # logging
 78 |             total_loss += loss.item()
 79 |             total_reconstruction_loss += reconstruction_loss.item()
 80 |             total_action_loss += action_loss.item()
 81 | 
 82 |         total_loss /= self.student_epochs
 83 |         total_reconstruction_loss /= self.student_epochs
 84 |         total_action_loss /= self.student_epochs
 85 | 
 86 |         return total_loss, total_reconstruction_loss, total_action_loss
 87 | 
 88 |     def load_exteroceptive_encoder(self, checkpoint):
 89 |         """
 90 |         :param checkpoint: teacher model torch checkpoint (dict)  (cf: Only "actor"!)
 91 |         """
 92 |         loaded_checkpoint = dict()
 93 |         for k, v in checkpoint.items():
 94 |             if k.split('.')[0] == "extero_encoder":
 95 |                 loaded_checkpoint['.'.join(k.split('.')[1:])] = v
 96 | 
 97 |         if len(loaded_checkpoint.keys()) != 0:
 98 |             self.actor.extero_encoder.load_state_dict(loaded_checkpoint)
 99 |             cprint("Exteroceptive encoder load success", bold=True, color="blue")
100 |         else:
101 |             cprint("Exteroceptive encoder load fail", bold=True, color="blue")
102 | 
103 |     def save(self, model_name):
104 |         save_dict = {
105 |             'actor': self.actor.state_dict(),
106 |             'actor_optimizer': self.actor_optimizer.state_dict()
107 |         }
108 |         torch.save(save_dict, f"{self.checkpoint_dir}/full_{model_name}.pt")
109 |         cprint(f'[{self.name} - full_{model_name}.pt] save success.', bold=True, color="blue")
110 | 
111 |     def load(self):
112 |         if not os.path.isdir(self.checkpoint_dir):
113 |             os.makedirs(self.checkpoint_dir)
114 |         checkpoint_file = f"{self.checkpoint_dir}/full_{self.model_num}.pt"
115 | 
116 |         if os.path.isfile(checkpoint_file):
117 |             checkpoint = torch.load(checkpoint_file)
118 |             self.actor.load_state_dict(checkpoint['actor'])
119 |             self.actor_optimizer.load_state_dict(checkpoint['actor_optimizer'])
120 |             cprint(f'[{self.name} - full_{self.model_num}.pt] load success.', bold=True, color="blue")
121 |             return int(self.model_num)
122 |         else:
123 |             cprint(f'[{self.name} - full_{self.model_num}.pt] load fail.', bold=True, color="red")
124 |             return 0
125 | 


--------------------------------------------------------------------------------
/noise_generator/TrainManager.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef TRAIN_MANAGER_HPP
  2 | #define TRAIN_MANAGER_HPP
  3 | 
  4 | namespace raisim {
  5 | 
  6 | class HeightNoiseGenerator {
  7 |   public:
  8 |     enum class NoiseType : int {
  9 |       NOMINAL = 0,
 10 |       OFFSET,
 11 |       NOISY
 12 |     };
 13 | 
 14 |     // INIT (episode start) --> FOOT_CHANGE (foot change) --> POINT_CHANGE (point change)
 15 |     enum class SampleType : int {
 16 |       INIT = 0,
 17 |       FOOT_CHANGE,
 18 |       POINT_CHANGE
 19 |     };
 20 | 
 21 |     struct Noise {
 22 |       double x;
 23 |       double y;
 24 |       double z;
 25 |     };
 26 | 
 27 |     struct NoiseParam {
 28 |       Noise pointAndTime;  // sampled for each point every time step
 29 |       Noise footAndTime;  // sampled for each foot every time step
 30 |       Noise foot;  // sampled for each foot at the beginning of the episode
 31 |       double zOutlier;
 32 |       double constantOffsetProb;
 33 |       double zOutlierProb;
 34 |     };
 35 | 
 36 |     HeightNoiseGenerator() = default;
 37 | 
 38 |     ~HeightNoiseGenerator() = default;
 39 | 
 40 |     void sampleNoiseType(std::mt19937 &gen,
 41 |                          std::uniform_real_distribution<double> &uniDist) {
 42 |       double val = uniDist(gen);
 43 | 
 44 |       if (val < 0.6)
 45 |         noiseType = NoiseType::NOMINAL;
 46 |       else if (val < 0.6 + 0.3)
 47 |         noiseType = NoiseType::OFFSET;
 48 |       else
 49 |         noiseType = NoiseType::NOISY;
 50 | 
 51 |       switch (noiseType) {
 52 |         case NoiseType::NOMINAL:
 53 |           defaultNoiseParam.pointAndTime.x = 0.004;
 54 |           defaultNoiseParam.pointAndTime.y = 0.004;
 55 |           defaultNoiseParam.pointAndTime.z = 0.005;
 56 |           defaultNoiseParam.footAndTime.x = 0.01;
 57 |           defaultNoiseParam.footAndTime.y = 0.01;
 58 |           defaultNoiseParam.footAndTime.z = 0.04;
 59 |           defaultNoiseParam.zOutlierProb = 0.02;
 60 |           defaultNoiseParam.zOutlier = 0.03;
 61 |           defaultNoiseParam.constantOffsetProb = 0.05;
 62 |           defaultNoiseParam.foot.x = 0.1;
 63 |           defaultNoiseParam.foot.y = 0.1;
 64 |           defaultNoiseParam.foot.z = 0.1;
 65 |           break;
 66 |         case NoiseType::OFFSET:
 67 |           defaultNoiseParam.pointAndTime.x = 0.004;
 68 |           defaultNoiseParam.pointAndTime.y = 0.004;
 69 |           defaultNoiseParam.pointAndTime.z = 0.005;
 70 |           defaultNoiseParam.footAndTime.x = 0.01;
 71 |           defaultNoiseParam.footAndTime.y = 0.01;
 72 |           defaultNoiseParam.footAndTime.z = 0.1;
 73 |           defaultNoiseParam.zOutlierProb = 0.02;
 74 |           defaultNoiseParam.zOutlier = 0.1;
 75 |           defaultNoiseParam.constantOffsetProb = 0.02;
 76 |           defaultNoiseParam.foot.x = 0.1;
 77 |           defaultNoiseParam.foot.y = 0.1;
 78 |           defaultNoiseParam.foot.z = 0.1;
 79 |           break;
 80 |         case NoiseType::NOISY:
 81 |           defaultNoiseParam.pointAndTime.x = 0.004;
 82 |           defaultNoiseParam.pointAndTime.y = 0.004;
 83 |           defaultNoiseParam.pointAndTime.z = 0.1;
 84 |           defaultNoiseParam.footAndTime.x = 0.1;
 85 |           defaultNoiseParam.footAndTime.y = 0.1;
 86 |           defaultNoiseParam.footAndTime.z = 0.3;
 87 |           defaultNoiseParam.zOutlierProb = 0.05;
 88 |           defaultNoiseParam.zOutlier = 0.3;
 89 |           defaultNoiseParam.constantOffsetProb = 0.3;
 90 |           defaultNoiseParam.foot.x = 0.1;
 91 |           defaultNoiseParam.foot.y = 0.1;
 92 |           defaultNoiseParam.foot.z = 0.1;
 93 |           break;
 94 |       }
 95 |     }
 96 | 
 97 |     void sampleNoise(Noise &noise,
 98 |                      const SampleType &sampleType,
 99 |                      std::mt19937 &gen,
100 |                      std::uniform_real_distribution<double> &uniDist,
101 |                      std::normal_distribution<double> &normDist) {
102 |       switch (sampleType) {
103 |         case SampleType::INIT:
104 |           if (uniDist(gen) < defaultNoiseParam.constantOffsetProb) {
105 |             sampledNoiseParam.foot.x = defaultNoiseParam.foot.x * normDist(gen);
106 |             sampledNoiseParam.foot.y = defaultNoiseParam.foot.y * normDist(gen);
107 |             sampledNoiseParam.foot.z = defaultNoiseParam.foot.z * normDist(gen);
108 |           } else {
109 |             sampledNoiseParam.foot.x = 0.;
110 |             sampledNoiseParam.foot.y = 0.;
111 |             sampledNoiseParam.foot.z = 0.;
112 |           }
113 |           return;
114 |         case SampleType::FOOT_CHANGE:
115 |           sampledNoiseParam.footAndTime.x = defaultNoiseParam.footAndTime.x * normDist(gen);
116 |           sampledNoiseParam.footAndTime.y = defaultNoiseParam.footAndTime.y * normDist(gen);
117 |           sampledNoiseParam.footAndTime.z = defaultNoiseParam.footAndTime.z * normDist(gen);
118 |           return;
119 |         case SampleType::POINT_CHANGE:
120 |           sampledNoiseParam.pointAndTime.x = defaultNoiseParam.pointAndTime.x * normDist(gen);
121 |           sampledNoiseParam.pointAndTime.y = defaultNoiseParam.pointAndTime.y * normDist(gen);
122 |           sampledNoiseParam.pointAndTime.z = defaultNoiseParam.pointAndTime.z * normDist(gen);
123 |           if (uniDist(gen) < defaultNoiseParam.zOutlierProb)
124 |             sampledNoiseParam.zOutlier = defaultNoiseParam.zOutlier * normDist(gen);
125 |           else
126 |             sampledNoiseParam.zOutlier = 0.;
127 |           break;
128 |       }
129 | 
130 |       noise.x = sampledNoiseParam.foot.x + sampledNoiseParam.footAndTime.x + sampledNoiseParam.pointAndTime.x;
131 |       noise.y = sampledNoiseParam.foot.y + sampledNoiseParam.footAndTime.y + sampledNoiseParam.pointAndTime.y;
132 |       noise.z = sampledNoiseParam.foot.z + sampledNoiseParam.footAndTime.z +
133 |                 sampledNoiseParam.pointAndTime.z + sampledNoiseParam.zOutlier;
134 |     }
135 | 
136 |   private:
137 |     NoiseType noiseType;
138 |     NoiseParam defaultNoiseParam, sampledNoiseParam;
139 | };
140 | 
141 | }  // namespace raisim
142 | 
143 | #endif    // TRAIN_MANAGER_HPP


--------------------------------------------------------------------------------
/model/modules/actor.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch
  3 | 
  4 | from base_nn import BaseNet
  5 | 
  6 | 
  7 | class RecurrentAttentionPolicy(BaseNet):
  8 |     class BeliefEncoder(BaseNet):
  9 |         def __init__(self, model_cfg):
 10 |             super().__init__(model_config=model_cfg)
 11 | 
 12 |         def forward(self, proprio_state, encoded_extero_state, hidden_state=None):
 13 |             """
 14 |             :param proprio_state: proprioceptive sensor data [(L, H) / (L. N, H)]
 15 |             :param encoded_extero_state: encoded exteroceptive sensor data [(L, H) / (L. N, H)]
 16 |             :param hidden_state: hidden state of the recurrent layer [(N_layer, H) / (N_layer, N, H)]
 17 |             :return:
 18 |                 recurrent_output: output state of the recurrent layer [(L, H) / (L. N, H)]
 19 |                 recurrent_hidden: next hidden state of the recurrent layer [(N_layer, H) / (N_layer, N, H)]
 20 |                 belief_state: next belief state of the belief encoder [(L, H) / (L. N, H)]
 21 |             """
 22 |             output = dict()
 23 | 
 24 |             fused_state = torch.cat((proprio_state, encoded_extero_state), dim=-1)  #[(L, H) / (L. N, H)]
 25 |             if hidden_state is None:
 26 |                 output["recurrent_output"], output["recurrent_hidden"] = self.recurrent_encoder(fused_state)  # (L. N, H)
 27 |             else:
 28 |                 output["recurrent_output"], output["recurrent_hidden"] = self.recurrent_encoder(fused_state, hidden_state)  # (L. N, H)
 29 | 
 30 |             tensor_shape = torch.Size(torch.ones_like(torch.tensor(output["recurrent_output"].shape)[:-1])) + (4,)
 31 |             output["belief_state"] = \
 32 |                 torch.tile(self.state_encoder(output["recurrent_output"]), dims=tensor_shape) + \
 33 |                 nn.functional.sigmoid(self.attention_encoder(output["recurrent_output"])) * encoded_extero_state
 34 |             return output
 35 | 
 36 |     class BeliefDecoder(BaseNet):
 37 |         def __int__(self, model_cfg):
 38 |             super().__int__(model_config=model_cfg)
 39 | 
 40 |         def forward(self, extero_state, hidden_state):
 41 |             """
 42 |             :param extero_state: exteroceptive sensor data [(L, H) / (L. N, H)]
 43 |             :param hidden_state: output state of the recurrent layer [(L, H) / (L. N, H)]
 44 |             (cf: In GRU, last output state is same as the hidden state)
 45 |             :return:
 46 |                 estimated_extero_state: estimated exteroceptive sensor data [(L, H) / (L. N, H)]
 47 |             """
 48 |             estimated_extero_state = \
 49 |                 self.extero_decoder(hidden_state) + \
 50 |                 nn.functional.sigmoid(self.attention_encoder(hidden_state)) * extero_state
 51 |             return estimated_extero_state
 52 | 
 53 |     def __init__(self, args, model_cfg):
 54 |         self.proprio_dim = args.proprio_obs_dim
 55 |         self.extero_dim = args.extero_obs_dim
 56 |         self.action_dim = args.action_dim
 57 | 
 58 |         self.device = args.device
 59 |         self.args = args
 60 |         self.model_cfg = model_cfg
 61 |         self.adapt_model()
 62 | 
 63 |         super(RecurrentAttentionPolicy, self).__init__(model_config=model_cfg["policy"])
 64 |         self.belief_encoder = self.BeliefEncoder(model_cfg["belief_encoder"])
 65 |         self.belief_decoder = self.BeliefDecoder(model_cfg["belief_decoder"])
 66 | 
 67 |     def adapt_model(self):
 68 |         assert self.extero_dim % 4 == 0
 69 |         self.model_cfg["policy"]["MLP"]["extero_encoder"]["input"] = self.extero_dim // 4
 70 |         self.model_cfg["policy"]["MLP"]["base_net"]["input"] = \
 71 |             self.proprio_dim + self.model_cfg["policy"]["MLP"]["extero_encoder"]["output"] * 4
 72 |         self.model_cfg["policy"]["MLP"]["base_net"]["output"] = self.action_dim
 73 | 
 74 |         self.model_cfg["belief_encoder"]["GRU"]["recurrent_encoder"]["input"] = \
 75 |             self.proprio_dim + self.model_cfg["policy"]["MLP"]["extero_encoder"]["output"] * 4
 76 | 
 77 |         self.model_cfg["belief_encoder"]["MLP"]["attention_encoder"]["input"] \
 78 |             = self.model_cfg["belief_encoder"]["GRU"]["recurrent_encoder"]["hidden"]
 79 |         self.model_cfg["belief_encoder"]["MLP"]["attention_encoder"]["output"] \
 80 |             = self.model_cfg["policy"]["MLP"]["extero_encoder"]["output"] * 4
 81 |         self.model_cfg["belief_encoder"]["MLP"]["state_encoder"]["input"] \
 82 |             = self.model_cfg["belief_encoder"]["GRU"]["recurrent_encoder"]["hidden"]
 83 | 
 84 |         self.model_cfg["belief_decoder"]["MLP"]["attention_encoder"]["input"] \
 85 |             = self.model_cfg["belief_encoder"]["GRU"]["recurrent_encoder"]["hidden"]
 86 |         self.model_cfg["belief_decoder"]["MLP"]["attention_encoder"]["output"] \
 87 |             = self.extero_dim
 88 |         self.model_cfg["belief_decoder"]["MLP"]["extero_decoder"]["input"] \
 89 |             = self.model_cfg["belief_encoder"]["GRU"]["recurrent_encoder"]["hidden"]
 90 |         self.model_cfg["belief_decoder"]["MLP"]["extero_decoder"]["output"] \
 91 |             = self.extero_dim
 92 | 
 93 |     def forward(self, proprio_state, extero_state, hidden_state=None, use_decoder=True):
 94 |         """
 95 |         :param proprio_state: proprioceptive sensor data [(L, H) / (L. N, H)]
 96 |         :param extero_state: exteroceptive sensor data [(L, H) / (L. N, H)]
 97 |         :param hidden_state: hidden state of the recurrent layer in the belief encoder [(N_layer, H) / (N_layer, N, H)]
 98 |         :param use_decoder: use belief decoder to estimate exteroceptive data or not
 99 |         :return:
100 |             action: [(L, H) / (L. N, H)]
101 |             recurrent_hidden: [(N_layer, H) / (N_layer, N, H)]
102 |             estimated_extero_state: estimated exteroceptive sensor data [(L, H) / (L. N, H)]
103 |         """
104 |         output = dict()
105 | 
106 |         length_and_batch = proprio_state.shape[:-1]
107 |         encoded_extero_state = extero_state.view(*length_and_batch, 4, int(self.extero_dim / 4))
108 |         encoded_extero_state = self.extero_encoder(encoded_extero_state).view(*length_and_batch, -1)
109 | 
110 |         belief_encoder_output = self.belief_encoder(proprio_state, encoded_extero_state, hidden_state)
111 |         fused_state = torch.cat((proprio_state, belief_encoder_output["belief_state"]), dim=-1)
112 |         output["action"] = self.base_net(fused_state)
113 |         output["recurrent_hidden"] = belief_encoder_output["recurrent_hidden"]
114 | 
115 |         if use_decoder:
116 |             output["estimated_extero_state"] = \
117 |                 self.belief_decoder(extero_state, belief_encoder_output["recurrent_output"])
118 |         return output
119 | 
120 |     def getAction(self, proprio_state, extero_state, hidden_state):
121 |         """
122 |         :param proprio_state: [(N, H)]
123 |         :param extero_state: [(N, H)]
124 |         :param hidden_state: [(N_layer, N, H)]
125 |         :return:
126 |             action: [(N, H)]
127 |             hidden_state: [(N_layer, N, H)]
128 |         """
129 |         assert len(proprio_state.shape) == 2 and proprio_state.shape[0] == self.args.n_envs
130 |         proprio_state = proprio_state.unsqueeze(0)
131 |         extero_state = extero_state.unsqueeze(0)
132 |         output = self.forward(proprio_state, extero_state, hidden_state, use_decoder=False)
133 |         action = output["action"].squeeze(0)
134 |         next_hidden_state = output["recurrent_hidden"].squeeze(0)
135 |         return action, next_hidden_state


--------------------------------------------------------------------------------