├── .gitiginore ├── .gitignore ├── README.md ├── algo ├── models │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── __init__.cpython-38.pyc │ │ ├── imagenet_depth_encoder.cpython-37.pyc │ │ ├── models.cpython-37.pyc │ │ ├── models.cpython-38.pyc │ │ ├── models_priv.cpython-37.pyc │ │ ├── models_priv.cpython-38.pyc │ │ ├── observation_encoder.cpython-37.pyc │ │ ├── proprio_depth_transformer.cpython-37.pyc │ │ ├── proprio_depth_transformer.cpython-38.pyc │ │ ├── proprio_embd_transformer.cpython-37.pyc │ │ ├── proprio_mvp_rgb_transformer.cpython-37.pyc │ │ ├── proprio_r3m_rgb_transformer.cpython-37.pyc │ │ ├── proprio_vip_transformer.cpython-37.pyc │ │ ├── proprio_vit_transformer.cpython-37.pyc │ │ ├── pt_actor_critic.cpython-37.pyc │ │ ├── rt_actor_critic.cpython-37.pyc │ │ ├── rt_embed_actor_critic.cpython-37.pyc │ │ ├── running_mean_std.cpython-37.pyc │ │ ├── running_mean_std.cpython-38.pyc │ │ ├── vision_encoder.cpython-37.pyc │ │ └── vision_encoder.cpython-38.pyc │ ├── models.py │ ├── models_priv.py │ ├── rt_actor_critic.py │ └── running_mean_std.py ├── ppo_transformer │ ├── __pycache__ │ │ ├── experience.cpython-37.pyc │ │ ├── mem_eff_experience.cpython-37.pyc │ │ ├── ppo_transformer.cpython-37.pyc │ │ └── ppobc_transformer.cpython-37.pyc │ ├── experience.py │ └── ppo_transformer.py └── pretrained │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-37.pyc │ ├── policy_transformer.cpython-37.pyc │ ├── robot_transformer.cpython-37.pyc │ ├── robot_transformer_ar.cpython-37.pyc │ └── transformer.cpython-37.pyc │ ├── dataset.py │ ├── depth_trainer.py │ ├── depth_trainer_multigpu.py │ ├── robot_dataset.py │ ├── robot_transformer_ar.py │ ├── trainer.py │ └── transformer.py ├── cfg ├── config.yaml ├── launcher │ └── default.yaml ├── pretrain │ ├── AllegroXarmCabinet.yaml │ ├── AllegroXarmNew.yaml │ └── AllegroXarmThrowing.yaml ├── task │ ├── AllegroXarmCabinet.yaml │ ├── AllegroXarmNew.yaml │ └── AllegroXarmThrowing.yaml └── train │ ├── AllegroXarmCabinetPPO.yaml │ ├── AllegroXarmNewPPO.yaml │ └── AllegroXarmThrowingPPO.yaml ├── env.yml ├── imgs └── approach.png ├── scripts ├── finetune.py ├── finetune │ ├── finetune_cabinet.sh │ ├── finetune_grasp.sh │ └── finetune_throw.sh ├── pretrain.py ├── pretrain.sh └── run_policy.sh ├── tasks ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-37.pyc │ ├── __init__.cpython-38.pyc │ ├── allegro_kuka_grasping.cpython-37.pyc │ ├── allegro_kuka_grasping.cpython-38.pyc │ ├── torch_jit_utils.cpython-37.pyc │ ├── torch_jit_utils.cpython-38.pyc │ ├── xarm_cabinet.cpython-37.pyc │ ├── xarm_cabinet.cpython-38.pyc │ ├── xarm_grasping.cpython-37.pyc │ ├── xarm_grasping.cpython-38.pyc │ ├── xarm_grasping_debug.cpython-37.pyc │ ├── xarm_grasping_debug.cpython-38.pyc │ ├── xarm_grasping_new.cpython-37.pyc │ ├── xarm_grasping_new.cpython-38.pyc │ ├── xarm_grasping_real.cpython-37.pyc │ ├── xarm_throwing.cpython-37.pyc │ └── xarm_throwing.cpython-38.pyc ├── base │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── __init__.cpython-38.pyc │ │ ├── vec_task.cpython-37.pyc │ │ └── vec_task.cpython-38.pyc │ └── vec_task.py ├── torch_jit_utils.py ├── xarm7_utils.py ├── xarm_cabinet.py ├── xarm_grasping_new.py └── xarm_throwing.py └── utils ├── __init__.py ├── __pycache__ ├── __init__.cpython-37.pyc ├── __init__.cpython-38.pyc ├── allegro_kuka_utils.cpython-37.pyc ├── allegro_kuka_utils.cpython-38.pyc ├── hand_arm_utils.cpython-37.pyc ├── hand_arm_utils.cpython-38.pyc ├── logger.cpython-37.pyc ├── logger.cpython-38.pyc ├── misc.cpython-37.pyc ├── misc.cpython-38.pyc ├── pytorch_utils.cpython-37.pyc ├── pytorch_utils.cpython-38.pyc ├── randomization_utils.cpython-37.pyc ├── randomization_utils.cpython-38.pyc ├── reformat.cpython-37.pyc ├── reformat.cpython-38.pyc ├── torch_jit_utils.cpython-37.pyc ├── urdf_utils.cpython-37.pyc ├── urdf_utils.cpython-38.pyc ├── utils.cpython-37.pyc ├── utils.cpython-38.pyc ├── warmup_scheduler.cpython-37.pyc └── warmup_scheduler.cpython-38.pyc ├── allegro_kuka_utils.py ├── camera.json ├── camera2.json ├── dr_utils.py ├── hand_arm_utils.py ├── logger.py ├── misc.py ├── pytorch_utils.py ├── randomization_utils.py ├── reformat.py ├── rlgames_utils.py ├── rna_util.py ├── torch_jit_utils.py ├── urdf_utils.py ├── utils.py ├── wandb_utils.py └── warmup_scheduler.py /.gitiginore: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.zip 2 | outputs/ 3 | assets/ 4 | */*/.pyc 5 | *.pyc 6 | __pycache__/ 7 | */__pycache__/ 8 | wandb/ 9 | *.log 10 | algo/pretrained/models/* 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Hand-object Interaction Pretraining from Videos 2 | 3 | This repo contains code for the paper [Hand-object interaction Pretraining from Videos](https://hgaurav2k.github.io/hop/pdf/manuscript.pdf) 4 | 5 | 6 | 7 | For a brief overview, check out the project [webpage](https://hgaurav2k.github.io/hop)! 8 | 9 | 10 | 11 | 12 | For any questions, please contact [Himanshu Gaurav Singh](https://hgaurav2k.github.io/). 13 | 14 | 15 | ## Setup 16 | 17 | * Create conda environment using `conda env create -f env.yml` 18 | * Install [IsaacGym](https://developer.nvidia.com/isaac-gym) in this environment. 19 | * Download the [asset](https://drive.google.com/drive/folders/1BE3lg8k1kssGxojtL0OkQLscSAkbpNzS?usp=sharing) folder and put them in the root directory. 20 | 21 | ## Running the code 22 | 23 | ### Pretraining 24 | 25 | 26 | * Download the hand-object interaction dataset from [here](https://drive.google.com/file/d/12-xghxt0rf_0xDo5SMdrRBnNr7LWJ02Y/view?usp=drive_link). Extract using `tar -xf hoi_pretraining_data.tar.xz`. Put it under the root directory. 27 | * Run `bash scripts/pretrain.sh ` 28 | 29 | ### Finetuning 30 | 31 | 32 | * Download pretrained checkpoint from [here](https://drive.google.com/file/d/10zYrzPK8T-1zB8dqB5o2MfK_iF0Uda_f/view?usp=sharing). You can also use your own trained checkpoint. 33 | * For your choice of `task`, run `bash scripts/finetune/finetune_{task}.sh`. 34 | 36 | 37 | 38 | ### Visualising trained policies 39 | 40 | * Run `bash scripts/run_policy.sh `. 41 | 42 | 43 | ## Citation 44 | 45 | 46 | ## Acknowledgment 47 | This work was supported by the DARPA Machine Common Sense program, the DARPA Transfer from Imprecise and Abstract Models to Autonomous Technologies (TIAMAT) program, and by the ONR MURI award N00014-21-1-2801. This work was also funded by ONR MURI N00014-22-1-2773. We thank Adhithya Iyer for assistance with teleoperation systems, Phillip Wu for setting-up the real robot, and Raven Huang, Jathushan Rajasegaran and Yutong Bai for helpful discussions. 48 | -------------------------------------------------------------------------------- /algo/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__init__.py -------------------------------------------------------------------------------- /algo/models/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /algo/models/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /algo/models/__pycache__/imagenet_depth_encoder.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/imagenet_depth_encoder.cpython-37.pyc -------------------------------------------------------------------------------- /algo/models/__pycache__/models.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/models.cpython-37.pyc -------------------------------------------------------------------------------- /algo/models/__pycache__/models.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/models.cpython-38.pyc -------------------------------------------------------------------------------- /algo/models/__pycache__/models_priv.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/models_priv.cpython-37.pyc -------------------------------------------------------------------------------- /algo/models/__pycache__/models_priv.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/models_priv.cpython-38.pyc -------------------------------------------------------------------------------- /algo/models/__pycache__/observation_encoder.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/observation_encoder.cpython-37.pyc -------------------------------------------------------------------------------- /algo/models/__pycache__/proprio_depth_transformer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/proprio_depth_transformer.cpython-37.pyc -------------------------------------------------------------------------------- /algo/models/__pycache__/proprio_depth_transformer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/proprio_depth_transformer.cpython-38.pyc -------------------------------------------------------------------------------- /algo/models/__pycache__/proprio_embd_transformer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/proprio_embd_transformer.cpython-37.pyc -------------------------------------------------------------------------------- /algo/models/__pycache__/proprio_mvp_rgb_transformer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/proprio_mvp_rgb_transformer.cpython-37.pyc -------------------------------------------------------------------------------- /algo/models/__pycache__/proprio_r3m_rgb_transformer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/proprio_r3m_rgb_transformer.cpython-37.pyc -------------------------------------------------------------------------------- /algo/models/__pycache__/proprio_vip_transformer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/proprio_vip_transformer.cpython-37.pyc -------------------------------------------------------------------------------- /algo/models/__pycache__/proprio_vit_transformer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/proprio_vit_transformer.cpython-37.pyc -------------------------------------------------------------------------------- /algo/models/__pycache__/pt_actor_critic.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/pt_actor_critic.cpython-37.pyc -------------------------------------------------------------------------------- /algo/models/__pycache__/rt_actor_critic.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/rt_actor_critic.cpython-37.pyc -------------------------------------------------------------------------------- /algo/models/__pycache__/rt_embed_actor_critic.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/rt_embed_actor_critic.cpython-37.pyc -------------------------------------------------------------------------------- /algo/models/__pycache__/running_mean_std.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/running_mean_std.cpython-37.pyc -------------------------------------------------------------------------------- /algo/models/__pycache__/running_mean_std.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/running_mean_std.cpython-38.pyc -------------------------------------------------------------------------------- /algo/models/__pycache__/vision_encoder.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/vision_encoder.cpython-37.pyc -------------------------------------------------------------------------------- /algo/models/__pycache__/vision_encoder.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/vision_encoder.cpython-38.pyc -------------------------------------------------------------------------------- /algo/models/models.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # In-Hand Object Rotation via Rapid Motor Adaptation 3 | # https://arxiv.org/abs/2210.04887 4 | # Copyright (c) 2022 Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | import copy 13 | 14 | 15 | class SavingModel(nn.Module): 16 | "Saves the two models (runnig_mean_std and actor_critic) required for infence and simplifies TT code" 17 | def __init__(self, actor_critic_model, running_std_model): 18 | super(SavingModel, self).__init__() 19 | self.actor_critic_model = copy.deepcopy(actor_critic_model) 20 | self.running_std_model = copy.deepcopy(running_std_model) 21 | self.running_std_model.eval() 22 | 23 | def forward(self, x): 24 | x = self.running_std_model(x) 25 | input_dict = {'obs': x} 26 | mu = self.actor_critic_model.infer_action(input_dict) 27 | return mu 28 | 29 | class MLP(nn.Module): 30 | def __init__(self, units, input_size): 31 | super(MLP, self).__init__() 32 | layers = [] 33 | for output_size in units: 34 | layers.append(nn.Linear(input_size, output_size)) 35 | layers.append(nn.ELU()) 36 | input_size = output_size 37 | self.mlp = nn.Sequential(*layers) 38 | 39 | def forward(self, x): 40 | return self.mlp(x) 41 | 42 | 43 | class ProprioAdaptTConv(nn.Module): 44 | def __init__(self): 45 | super(ProprioAdaptTConv, self).__init__() 46 | self.channel_transform = nn.Sequential( 47 | nn.Linear(16 + 16, 32), 48 | nn.ReLU(inplace=True), 49 | nn.Linear(32, 32), 50 | nn.ReLU(inplace=True), 51 | ) 52 | self.temporal_aggregation = nn.Sequential( 53 | nn.Conv1d(32, 32, (9,), stride=(2,)), 54 | nn.ReLU(inplace=True), 55 | nn.Conv1d(32, 32, (5,), stride=(1,)), 56 | nn.ReLU(inplace=True), 57 | nn.Conv1d(32, 32, (5,), stride=(1,)), 58 | nn.ReLU(inplace=True), 59 | ) 60 | self.low_dim_proj = nn.Linear(32 * 3, 8) 61 | 62 | def forward(self, x): 63 | x = self.channel_transform(x) # (N, 50, 32) 64 | x = x.permute((0, 2, 1)) # (N, 32, 50) 65 | x = self.temporal_aggregation(x) # (N, 32, 3) 66 | x = self.low_dim_proj(x.flatten(1)) 67 | return x 68 | 69 | 70 | class ActorCritic(nn.Module): 71 | def __init__(self, kwargs): 72 | nn.Module.__init__(self) 73 | actions_num = kwargs.pop('actions_num') 74 | input_shape = kwargs.pop('input_shape') 75 | self.units = kwargs.pop('actor_units') 76 | mlp_input_shape = input_shape 77 | 78 | out_size = self.units[-1] 79 | 80 | self.actor_mlp = MLP(units=self.units, input_size=mlp_input_shape) 81 | self.value = torch.nn.Linear(out_size, 1) 82 | self.mu = torch.nn.Linear(out_size, actions_num) 83 | self.sigma = nn.Parameter(torch.zeros(actions_num, requires_grad=True, dtype=torch.float32), requires_grad=True) 84 | 85 | for m in self.modules(): 86 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d): 87 | fan_out = m.kernel_size[0] * m.out_channels 88 | m.weight.data.normal_(mean=0.0, std=np.sqrt(2.0 / fan_out)) 89 | if getattr(m, 'bias', None) is not None: 90 | torch.nn.init.zeros_(m.bias) 91 | if isinstance(m, nn.Linear): 92 | if getattr(m, 'bias', None) is not None: 93 | torch.nn.init.zeros_(m.bias) 94 | nn.init.constant_(self.sigma, 0) 95 | 96 | @torch.no_grad() 97 | def get_action(self, obs_dict): 98 | # used specifically to collection samples during training 99 | # it contains exploration so needs to sample from distribution 100 | mu, logstd, value = self._actor_critic(obs_dict) 101 | sigma = torch.exp(logstd) 102 | distr = torch.distributions.Normal(mu, sigma) 103 | selected_action = distr.sample() 104 | result = { 105 | 'neglogpacs': -distr.log_prob(selected_action).sum(1), # self.neglogp(selected_action, mu, sigma, logstd), 106 | 'values': value, 107 | 'actions': selected_action, 108 | 'mus': mu, 109 | 'sigmas': sigma, 110 | } 111 | return result 112 | 113 | @torch.no_grad() 114 | def infer_action(self, obs_dict): 115 | # used during inference 116 | mu, _, _= self._actor_critic(obs_dict) 117 | return mu 118 | 119 | def _actor_critic(self, obs_dict): 120 | obs = obs_dict['obs'] 121 | x = self.actor_mlp(obs) 122 | value = self.value(x) 123 | mu = self.mu(x) 124 | sigma = self.sigma 125 | return mu, mu * 0 + sigma, value 126 | 127 | def forward(self, input_dict): 128 | mu,logstd,value = self._actor_critic(input_dict) 129 | sigma = torch.exp(logstd) 130 | prev_actions = input_dict.get('prev_actions', mu.clone()) 131 | distr = torch.distributions.Normal(mu, sigma) 132 | entropy = distr.entropy().sum(dim=-1) 133 | prev_neglogp = -distr.log_prob(prev_actions).sum(1) 134 | 135 | result = { 136 | 'prev_neglogp': torch.squeeze(prev_neglogp), 137 | 'values': value, 138 | 'entropy': entropy, 139 | 'mus': mu, 140 | 'sigmas': sigma 141 | } 142 | 143 | return result 144 | 145 | 146 | 147 | class PointNetActorCritic(nn.Module): 148 | 149 | def __init__(self, kwargs): 150 | nn.Module.__init__(self) 151 | actions_num = kwargs.pop('actions_num') 152 | input_shape = kwargs.pop('input_shape') 153 | self.units = kwargs.pop('actor_units') 154 | self.pc_out_dim = kwargs.pop('point_cloud_out_dim') 155 | self.pc_begin, self.pc_end = kwargs.pop('point_cloud_index') 156 | self.pc_num = kwargs.pop('point_cloud_num') 157 | 158 | mlp_input_shape = input_shape 159 | out_size = self.units[-1] 160 | 161 | self.point_net = nn.Sequential( 162 | nn.Linear(3,self.pc_out_dim), 163 | nn.ELU(inplace=True), 164 | nn.Linear(self.pc_out_dim,self.pc_out_dim), 165 | nn.ELU(inplace=True), 166 | nn.Linear(self.pc_out_dim,self.pc_out_dim), 167 | nn.MaxPool2d((self.pc_num,1)) 168 | ) 169 | 170 | self.actor_mlp = MLP(units=self.units, input_size=self.pc_begin + self.pc_out_dim) 171 | self.obs_end_actor = self.pc_begin + self.pc_out_dim 172 | self.value = MLP(units=self.units, input_size=mlp_input_shape) 173 | self.value_final = nn.Linear(out_size, 1) 174 | # self.value = nn.Linear(out_size, 1) 175 | self.mu = nn.Linear(out_size, actions_num) 176 | self.sigma = nn.Parameter(torch.zeros(actions_num, requires_grad=True, dtype=torch.float32), requires_grad=True) 177 | 178 | for m in self.modules(): 179 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d): 180 | fan_out = m.kernel_size[0] * m.out_channels 181 | m.weight.data.normal_(mean=0.0, std=np.sqrt(2.0 / fan_out)) 182 | if getattr(m, 'bias', None) is not None: 183 | torch.nn.init.zeros_(m.bias) 184 | if isinstance(m, nn.Linear): 185 | if getattr(m, 'bias', None) is not None: 186 | torch.nn.init.zeros_(m.bias) 187 | nn.init.constant_(self.sigma, 0) 188 | 189 | @torch.no_grad() 190 | def get_action(self, obs_dict): 191 | # used specifically to collection samples during training 192 | # it contains exploration so needs to sample from distribution 193 | mu, logstd, value = self._actor_critic(obs_dict) 194 | sigma = torch.exp(logstd) 195 | distr = torch.distributions.Normal(mu, sigma) 196 | selected_action = distr.sample() 197 | result = { 198 | 'neglogpacs': -distr.log_prob(selected_action).sum(1), # self.neglogp(selected_action, mu, sigma, logstd), 199 | 'values': value, 200 | 'actions': selected_action, 201 | 'mus': mu, 202 | 'sigmas': sigma, 203 | } 204 | return result 205 | 206 | @torch.no_grad() 207 | def infer_action(self, obs_dict): 208 | # used during inference 209 | mu, _, _= self._actor_critic(obs_dict) 210 | return mu 211 | 212 | def _actor_critic(self, obs_dict): 213 | 214 | obs = obs_dict['obs'] 215 | pc_info = obs[:,self.pc_begin:self.pc_end].reshape(-1,self.pc_num,3) 216 | pc_rep = self.point_net(pc_info).squeeze(1) 217 | obs = torch.cat([obs[:,:self.pc_begin],pc_rep,obs[:,self.pc_end:]],dim=1) 218 | x = self.actor_mlp(obs[:,:self.obs_end_actor]) 219 | value_h = self.value(obs) 220 | value = self.value_final(value_h) 221 | mu = self.mu(x) 222 | sigma = self.sigma 223 | return mu, mu * 0 + sigma, value 224 | 225 | def forward(self, input_dict): 226 | prev_actions = input_dict.get('prev_actions', None) 227 | mu,logstd,value = self._actor_critic(input_dict) 228 | sigma = torch.exp(logstd) 229 | distr = torch.distributions.Normal(mu, sigma) 230 | entropy = distr.entropy().sum(dim=-1) 231 | prev_neglogp = -distr.log_prob(prev_actions).sum(1) 232 | result = { 233 | 'prev_neglogp': torch.squeeze(prev_neglogp), 234 | 'values': value, 235 | 'entropy': entropy, 236 | 'mus': mu, 237 | 'sigmas': sigma 238 | } 239 | return result 240 | -------------------------------------------------------------------------------- /algo/models/models_priv.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # In-Hand Object Rotation via Rapid Motor Adaptation 3 | # https://arxiv.org/abs/2210.04887 4 | # Copyright (c) 2022 Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | 13 | 14 | class MLP(nn.Module): 15 | def __init__(self, units, input_size): 16 | super(MLP, self).__init__() 17 | layers = [] 18 | for output_size in units: 19 | layers.append(nn.Linear(input_size, output_size)) 20 | layers.append(nn.ELU()) 21 | input_size = output_size 22 | self.mlp = nn.Sequential(*layers) 23 | 24 | def forward(self, x): 25 | return self.mlp(x) 26 | 27 | 28 | class ProprioAdaptTConv(nn.Module): 29 | def __init__(self): 30 | super(ProprioAdaptTConv, self).__init__() 31 | self.channel_transform = nn.Sequential( 32 | nn.Linear(16 + 16, 32), 33 | nn.ReLU(inplace=True), 34 | nn.Linear(32, 32), 35 | nn.ReLU(inplace=True), 36 | ) 37 | self.temporal_aggregation = nn.Sequential( 38 | nn.Conv1d(32, 32, (9,), stride=(2,)), 39 | nn.ReLU(inplace=True), 40 | nn.Conv1d(32, 32, (5,), stride=(1,)), 41 | nn.ReLU(inplace=True), 42 | nn.Conv1d(32, 32, (5,), stride=(1,)), 43 | nn.ReLU(inplace=True), 44 | ) 45 | self.low_dim_proj = nn.Linear(32 * 3, 8) 46 | 47 | def forward(self, x): 48 | x = self.channel_transform(x) # (N, 50, 32) 49 | x = x.permute((0, 2, 1)) # (N, 32, 50) 50 | x = self.temporal_aggregation(x) # (N, 32, 3) 51 | x = self.low_dim_proj(x.flatten(1)) 52 | return x 53 | 54 | 55 | class ActorCritic(nn.Module): 56 | def __init__(self, kwargs): 57 | nn.Module.__init__(self) 58 | actions_num = kwargs.pop('actions_num') 59 | input_shape = kwargs.pop('input_shape') 60 | self.units = kwargs.pop('actor_units') 61 | self.priv_mlp = kwargs.pop('priv_mlp_units') 62 | mlp_input_shape = input_shape[0] 63 | 64 | out_size = self.units[-1] 65 | self.priv_info = kwargs['priv_info'] 66 | self.priv_info_stage2 = kwargs['proprio_adapt'] 67 | if self.priv_info: 68 | mlp_input_shape += self.priv_mlp[-1] 69 | self.env_mlp = MLP(units=self.priv_mlp, input_size=kwargs['priv_info_dim']) 70 | 71 | if self.priv_info_stage2: 72 | self.adapt_tconv = ProprioAdaptTConv() 73 | 74 | self.actor_mlp = MLP(units=self.units, input_size=mlp_input_shape) 75 | self.value = torch.nn.Linear(out_size, 1) 76 | self.mu = torch.nn.Linear(out_size, actions_num) 77 | self.sigma = nn.Parameter(torch.zeros(actions_num, requires_grad=True, dtype=torch.float32), requires_grad=True) 78 | 79 | for m in self.modules(): 80 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d): 81 | fan_out = m.kernel_size[0] * m.out_channels 82 | m.weight.data.normal_(mean=0.0, std=np.sqrt(2.0 / fan_out)) 83 | if getattr(m, 'bias', None) is not None: 84 | torch.nn.init.zeros_(m.bias) 85 | if isinstance(m, nn.Linear): 86 | if getattr(m, 'bias', None) is not None: 87 | torch.nn.init.zeros_(m.bias) 88 | nn.init.constant_(self.sigma, 0) 89 | 90 | @torch.no_grad() 91 | def get_action(self, obs_dict): 92 | # used specifically to collection samples during training 93 | # it contains exploration so needs to sample from distribution 94 | mu, logstd, value, _, _ = self._actor_critic(obs_dict) 95 | sigma = torch.exp(logstd) 96 | distr = torch.distributions.Normal(mu, sigma) 97 | selected_action = distr.sample() 98 | result = { 99 | 'neglogpacs': -distr.log_prob(selected_action).sum(1), # self.neglogp(selected_action, mu, sigma, logstd), 100 | 'values': value, 101 | 'actions': selected_action, 102 | 'mus': mu, 103 | 'sigmas': sigma, 104 | } 105 | return result 106 | 107 | @torch.no_grad() 108 | def get_action_sample(self, obs_dict): 109 | # used for testing 110 | mu, logstd, value, _, _ = self._actor_critic(obs_dict) 111 | return mu 112 | 113 | def _actor_critic(self, obs_dict): 114 | obs = obs_dict['obs'] 115 | extrin, extrin_gt = None, None 116 | if self.priv_info: 117 | if self.priv_info_stage2: 118 | extrin = self.adapt_tconv(obs_dict['proprio_hist']) 119 | # during supervised training, extrin has gt label 120 | extrin_gt = self.env_mlp(obs_dict['priv_info']) if 'priv_info' in obs_dict else extrin 121 | extrin_gt = torch.tanh(extrin_gt) 122 | extrin = torch.tanh(extrin) 123 | obs = torch.cat([obs, extrin], dim=-1) 124 | else: 125 | extrin = self.env_mlp(obs_dict['priv_info']) 126 | extrin = torch.tanh(extrin) 127 | obs = torch.cat([obs, extrin], dim=-1) 128 | 129 | x = self.actor_mlp(obs) 130 | value = self.value(x) 131 | mu = self.mu(x) 132 | sigma = self.sigma 133 | return mu, mu * 0 + sigma, value, extrin, extrin_gt 134 | 135 | def forward(self, input_dict): 136 | prev_actions = input_dict.get('prev_actions', None) 137 | rst = self._actor_critic(input_dict) 138 | mu, logstd, value, extrin, extrin_gt = rst 139 | sigma = torch.exp(logstd) 140 | distr = torch.distributions.Normal(mu, sigma) 141 | entropy = distr.entropy().sum(dim=-1) 142 | prev_neglogp = -distr.log_prob(prev_actions).sum(1) 143 | result = { 144 | 'prev_neglogp': torch.squeeze(prev_neglogp), 145 | 'values': value, 146 | 'entropy': entropy, 147 | 'mus': mu, 148 | 'sigmas': sigma, 149 | 'extrin': extrin, 150 | 'extrin_gt': extrin_gt, 151 | } 152 | return result 153 | -------------------------------------------------------------------------------- /algo/models/rt_actor_critic.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import copy 6 | from algo.pretrained.robot_transformer_ar import RobotTransformerAR 7 | from algo.models.models import MLP 8 | 9 | class RTActorCritic(nn.Module): 10 | 11 | def __init__(self, config, network_config, device, kwargs): 12 | 13 | nn.Module.__init__(self) 14 | self.network_config = network_config 15 | self.device = device 16 | actions_num =self.network_config.action_dim 17 | input_shape = kwargs.pop('value_input_shape') 18 | 19 | self.pc_to_value = config.train.ppo.point_cloud_input_to_value 20 | if config.get('pc_input', False) and self.pc_to_value: 21 | self.pc_begin, self.pc_end = kwargs.pop('point_cloud_index') 22 | 23 | self.value_grads_to_pointnet = config.train.ppo.value_grads_to_pointnet 24 | self.pc_num = self.network_config.pc_num 25 | self.scale_proprio = self.network_config.scale_proprio 26 | self.scale_action = self.network_config.scale_action 27 | 28 | 29 | mlp_input_shape = input_shape 30 | 31 | 32 | self.limits = {'upper': torch.tensor([6.2832, 2.0944, 6.2832, 3.9270, 6.2832, 3.1416, 6.2832, 0.4700, 1.6100, 1.7090, 1.6180, 1.3960, 33 | 1.1630, 1.6440, 1.7190, 0.4700, 1.6100, 1.7090, 1.6180, 0.4700, 1.6100, 1.7090, 1.6180], 34 | requires_grad=False, dtype=torch.float32, device=self.device), 35 | 'lower': torch.tensor([-6.2832, -2.0590, -6.2832, -0.1920, -6.2832, -1.6930, -6.2832, -0.4700, -0.1960, -0.1740, -0.2270, 36 | 0.2630, -0.1050, -0.1890, -0.1620, -0.4700, -0.1960, -0.1740, -0.2270, -0.4700, -0.1960, -0.1740, -0.2270] 37 | ,requires_grad=False, dtype=torch.float32, device=self.device)} 38 | 39 | 40 | self.actor = RobotTransformerAR( 41 | cfg= config) 42 | 43 | 44 | self.value_fn = nn.Sequential( 45 | nn.Linear(mlp_input_shape,512), 46 | nn.ELU(inplace=True), 47 | nn.Linear(512,256), 48 | nn.ELU(inplace=True), 49 | nn.Linear(256,128), 50 | nn.ELU(inplace=True), 51 | nn.Linear(128, 1) 52 | ) #check this 53 | 54 | self.logstd = nn.Parameter(torch.zeros(actions_num, requires_grad=True, dtype=torch.float32)) 55 | #backbone sharing between value and critic? can this be implemented here in some way? 56 | #not doing for now 57 | nn.init.constant_(self.logstd[:7], torch.log(torch.tensor(kwargs['init_eps_arm']))) 58 | nn.init.constant_(self.logstd[7:], torch.log(torch.tensor(kwargs['init_eps_hand']))) 59 | 60 | def scale_q(self, q): 61 | """ 62 | Scale the proprioceptive data to be between -1 and 1. 63 | """ 64 | q = (q - self.limits['lower'].view((1,-1))) / (self.limits['upper'] - self.limits['lower']) 65 | q = 2 * q - 1 66 | return q 67 | 68 | @torch.no_grad() 69 | def get_action(self, obs_dict): 70 | # used specifically to collection samples during training 71 | # it contains exploration so needs to sample from distribution 72 | mu, value = self._actor_critic(obs_dict) 73 | sigma = torch.exp(self.logstd) 74 | distr = torch.distributions.Normal(mu, sigma) 75 | selected_action = distr.sample() 76 | result = { 77 | 'neglogpacs': -distr.log_prob(selected_action).sum(1), # self.neglogp(selected_action, mu, sigma, logstd), 78 | 'values': value, 79 | 'actions': selected_action, 80 | 'mus': mu, 81 | 'sigmas': sigma, 82 | } 83 | return result 84 | 85 | @torch.no_grad() 86 | def infer_action(self, obs_dict): 87 | # used during inference 88 | mu, _ = self._actor_critic(obs_dict) 89 | return mu 90 | 91 | def _actor_critic(self, obs_dict): 92 | 93 | #what to do with the value network? 94 | obs = obs_dict['obs'] 95 | 96 | proprio_hist = obs_dict['proprio_buf'] 97 | 98 | if self.scale_proprio: 99 | proprio_hist = self.scale_q(proprio_hist) #scale proprio hist 100 | 101 | pc_hist = obs_dict['pc_buf'] #this is normalized 102 | 103 | 104 | attention_mask = obs_dict['attn_mask'] 105 | timesteps = obs_dict['timesteps'] 106 | 107 | if self.actor.cfg: 108 | action_hist = obs_dict['action_buf'] 109 | action_hist = torch.cat((action_hist, torch.zeros_like(action_hist[:,:1,:])), dim=1) 110 | else: 111 | action_hist=None 112 | 113 | res_dict, pc_embed = self.actor(proprio_hist, pc_hist, action_hist, timesteps.long(), attention_mask) 114 | 115 | # Value function should reuse features? 116 | 117 | if not self.value_grads_to_pointnet: 118 | pc_embed = pc_embed.detach() 119 | 120 | if self.pc_to_value: 121 | obs = torch.cat([obs[:,:self.pc_begin],pc_embed[:,-1],obs[:,self.pc_end:]],dim=1) 122 | value = self.value_fn(obs) 123 | 124 | mu = res_dict['action'][:,-1] #sigma in previous policy was independent of observations..F 125 | 126 | if not self.scale_action: 127 | mu = self.scale_q(mu) 128 | 129 | return mu, value 130 | 131 | def forward(self, input_dict): 132 | 133 | prev_actions = input_dict.get('prev_actions', None) 134 | mu, value = self._actor_critic(input_dict) 135 | sigma = torch.exp(self.logstd) 136 | distr = torch.distributions.Normal(mu, sigma) 137 | entropy = distr.entropy().sum(dim=-1) 138 | prev_neglogp = -distr.log_prob(prev_actions).sum(1) 139 | result = { 140 | 'prev_neglogp': torch.squeeze(prev_neglogp), 141 | 'values': value, 142 | 'entropy': entropy, 143 | 'mus': mu, 144 | 'sigmas': sigma 145 | } 146 | return result 147 | -------------------------------------------------------------------------------- /algo/models/running_mean_std.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # In-Hand Object Rotation via Rapid Motor Adaptation 3 | # https://arxiv.org/abs/2210.04887 4 | # Copyright (c) 2022 Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | # Based on: IsaacGymEnvs 8 | # Copyright (c) 2018-2022, NVIDIA Corporation 9 | # Licence under BSD 3-Clause License 10 | # https://github.com/NVIDIA-Omniverse/IsaacGymEnvs/ 11 | # -------------------------------------------------------- 12 | 13 | import torch 14 | import torch.nn as nn 15 | import numpy as np 16 | 17 | class RunningMeanStd(nn.Module): 18 | def __init__(self, insize, epsilon=1e-05, per_channel=False, norm_only=False): 19 | super(RunningMeanStd, self).__init__() 20 | print('RunningMeanStd: ', insize) 21 | self.insize = insize 22 | self.epsilon = epsilon 23 | 24 | self.norm_only = norm_only 25 | self.per_channel = per_channel 26 | if per_channel: 27 | if len(self.insize) == 3: 28 | self.axis = [0,1,2] 29 | if len(self.insize) == 2: 30 | self.axis = [0,1] #make this 0 and 1? 31 | if len(self.insize) == 1: 32 | self.axis = [0] 33 | self.in_size = self.insize[-1] 34 | else: 35 | self.axis = [0] 36 | self.in_size = insize 37 | 38 | self.register_buffer('running_mean', torch.zeros(self.in_size, dtype = torch.float64)) 39 | self.register_buffer('running_var', torch.ones(self.in_size, dtype = torch.float64)) 40 | self.register_buffer('count', torch.ones((), dtype = torch.float64)) 41 | 42 | def _update_mean_var_count_from_moments(self, mean, var, count, batch_mean, batch_var, batch_count): 43 | delta = batch_mean - mean 44 | tot_count = count + batch_count 45 | 46 | new_mean = mean + delta * batch_count / tot_count 47 | m_a = var * count 48 | m_b = batch_var * batch_count 49 | M2 = m_a + m_b + delta**2 * count * batch_count / tot_count 50 | new_var = M2 / tot_count 51 | new_count = tot_count 52 | return new_mean, new_var, new_count 53 | 54 | def forward(self, input, unnorm=False): 55 | if self.training: 56 | mean = input.mean(self.axis) # along channel axis 57 | var = input.var(self.axis) 58 | self.running_mean, self.running_var, self.count = self._update_mean_var_count_from_moments(self.running_mean, self.running_var, self.count, 59 | mean, var, input.size()[0] ) 60 | 61 | # change shape 62 | if self.per_channel: 63 | if len(self.insize) == 3: 64 | current_mean = self.running_mean.view([1, 1, 1, self.in_size]).expand_as(input) 65 | current_var = self.running_var.view([1, 1, 1, self.in_size]).expand_as(input) 66 | if len(self.insize) == 2: 67 | current_mean = self.running_mean.view([1, 1, self.in_size]).expand_as(input) 68 | current_var = self.running_var.view([1, 1, self.in_size]).expand_as(input) 69 | if len(self.insize) == 1: 70 | current_mean = self.running_mean.view([1, self.in_size]).expand_as(input) 71 | current_var = self.running_var.view([1, self.in_size]).expand_as(input) 72 | else: 73 | current_mean = self.running_mean 74 | current_var = self.running_var 75 | # get output 76 | 77 | 78 | if unnorm: 79 | y = torch.clamp(input, min=-5.0, max=5.0) 80 | y = torch.sqrt(current_var.float() + self.epsilon)*y + current_mean.float() 81 | else: 82 | if self.norm_only: 83 | y = input/ torch.sqrt(current_var.float() + self.epsilon) 84 | else: 85 | y = (input - current_mean.float()) / torch.sqrt(current_var.float() + self.epsilon) 86 | y = torch.clamp(y, min=-5.0, max=5.0) 87 | return y 88 | -------------------------------------------------------------------------------- /algo/ppo_transformer/__pycache__/experience.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/ppo_transformer/__pycache__/experience.cpython-37.pyc -------------------------------------------------------------------------------- /algo/ppo_transformer/__pycache__/mem_eff_experience.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/ppo_transformer/__pycache__/mem_eff_experience.cpython-37.pyc -------------------------------------------------------------------------------- /algo/ppo_transformer/__pycache__/ppo_transformer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/ppo_transformer/__pycache__/ppo_transformer.cpython-37.pyc -------------------------------------------------------------------------------- /algo/ppo_transformer/__pycache__/ppobc_transformer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/ppo_transformer/__pycache__/ppobc_transformer.cpython-37.pyc -------------------------------------------------------------------------------- /algo/ppo_transformer/experience.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # In-Hand Object Rotation via Rapid Motor Adaptation 3 | # https://arxiv.org/abs/2210.04887 4 | # Copyright (c) 2022 Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | # Based on: RLGames 8 | # Copyright (c) 2019 Denys88 9 | # Licence under MIT License 10 | # https://github.com/Denys88/rl_games/ 11 | # -------------------------------------------------------- 12 | 13 | import gym 14 | import torch 15 | from torch.utils.data import Dataset 16 | import utils.pytorch_utils as ptu 17 | from termcolor import cprint 18 | 19 | def transform_op(arr): 20 | """ 21 | swap and then flatten axes 0 and 1 22 | """ 23 | if arr is None: 24 | return arr 25 | s = arr.size() 26 | return arr.transpose(0, 1).reshape(s[0] * s[1], *s[2:]) 27 | 28 | 29 | class ExperienceBuffer(Dataset): 30 | def __init__(self, num_envs, 31 | horizon_length, 32 | batch_size, 33 | minibatch_size, 34 | num_gradient_steps, 35 | obs_dim, 36 | proprio_dim, 37 | act_dim, 38 | pc_num, 39 | ctx_len, 40 | device): 41 | 42 | self.device = device 43 | self.num_envs = num_envs 44 | self.max_ep_len = horizon_length 45 | 46 | self.data_dict = None 47 | self.obs_dim = obs_dim 48 | self.proprio_dim = proprio_dim 49 | self.act_dim = act_dim 50 | self.ctx_len = ctx_len 51 | self.pc_num = pc_num 52 | self.storage_dict = { 53 | 'obses': torch.zeros((self.max_ep_len, self.num_envs, self.obs_dim), dtype=torch.float32, device=self.device), 54 | 'proprio_buf': torch.zeros((self.max_ep_len,self.num_envs, self.ctx_len, self.proprio_dim),dtype=torch.float32, device=self.device), 55 | 'pc_buf': torch.zeros((self.max_ep_len,self.num_envs, self.ctx_len, self.pc_num,3),dtype=torch.float32, device=self.device), 56 | 'action_buf': torch.zeros((self.max_ep_len,self.num_envs, self.ctx_len-1, self.act_dim),dtype=torch.float32, device=self.device), 57 | # 'priv_info': torch.zeros((self.self.max_ep_len, self.num_envs, self.priv_dim), dtype=torch.float32, device=self.device), 58 | 'attn_mask': torch.zeros((self.max_ep_len, self.num_envs, self.ctx_len), dtype=torch.float32, device=self.device), 59 | 'timesteps': -1*torch.ones((self.max_ep_len, self.num_envs, self.ctx_len), dtype=torch.float32, device=self.device), 60 | 'rewards': torch.zeros((self.max_ep_len, self.num_envs, 1), dtype=torch.float32, device=self.device), 61 | 'values': torch.zeros((self.max_ep_len, self.num_envs, 1), dtype=torch.float32, device=self.device), 62 | 'neglogpacs': torch.zeros((self.max_ep_len, self.num_envs), dtype=torch.float32, device=self.device), 63 | 'dones': torch.zeros((self.max_ep_len, self.num_envs), dtype=torch.uint8, device=self.device), 64 | 'actions': torch.zeros((self.max_ep_len, self.num_envs, self.act_dim), dtype=torch.float32, device=self.device), 65 | 'mus': torch.zeros((self.max_ep_len, self.num_envs, self.act_dim), dtype=torch.float32, device=self.device), 66 | 'sigmas': torch.zeros((self.max_ep_len, self.num_envs, self.act_dim), dtype=torch.float32, device=self.device), 67 | 'returns': torch.zeros((self.max_ep_len, self.num_envs, 1), dtype=torch.float32, device=self.device), 68 | } 69 | 70 | self.batch_size = batch_size 71 | self.length = self.num_gradient_steps = num_gradient_steps 72 | 73 | if self.length < self.max_ep_len: 74 | cprint('Warning: length of buffer is less than max_ep_len, full data is not getting used', 'red') 75 | self.minibatch_size = minibatch_size 76 | 77 | def __len__(self): 78 | return self.length 79 | 80 | def __getitem__(self, idx): 81 | start = idx * self.minibatch_size 82 | end = (idx + 1) * self.minibatch_size 83 | 84 | self.last_range = (start, end) 85 | input_dict = {} 86 | for k, v in self.data_dict.items(): 87 | if type(v) is dict: 88 | v_dict = {kd: vd[start:end] for kd, vd in v.items()} 89 | input_dict[k] = v_dict 90 | else: 91 | input_dict[k] = v[start:end] 92 | 93 | return input_dict['values'], input_dict['neglogpacs'], input_dict['advantages'], input_dict['mus'], \ 94 | input_dict['sigmas'], input_dict['returns'], input_dict['actions'], \ 95 | input_dict['obses'], input_dict['proprio_buf'], input_dict['pc_buf'], input_dict['action_buf'], \ 96 | input_dict['attn_mask'], input_dict['timesteps'] 97 | 98 | 99 | def update_mu_sigma(self, mu, sigma): 100 | start = self.last_range[0] 101 | end = self.last_range[1] 102 | self.data_dict['mus'][start:end] = mu 103 | self.data_dict['sigmas'][start:end] = sigma 104 | 105 | def update_data(self, name, index, val): 106 | if type(val) is dict: 107 | for k, v in val.items(): 108 | self.storage_dict[name][k][index,:] = v 109 | else: 110 | self.storage_dict[name][index,:] = val 111 | 112 | def compute_return(self, last_values, gamma, tau): 113 | last_gae_lam = 0 114 | mb_advs = torch.zeros_like(self.storage_dict['rewards']) 115 | for t in reversed(range(self.max_ep_len)): 116 | if t == self.max_ep_len - 1: 117 | next_values = last_values 118 | else: 119 | next_values = self.storage_dict['values'][t + 1] 120 | next_nonterminal = 1.0 - self.storage_dict['dones'].float()[t] 121 | next_nonterminal = next_nonterminal.unsqueeze(1) 122 | delta = self.storage_dict['rewards'][t] + gamma * next_values * next_nonterminal - self.storage_dict['values'][t] 123 | mb_advs[t] = last_gae_lam = delta + gamma * tau * next_nonterminal * last_gae_lam 124 | self.storage_dict['returns'][t, :] = mb_advs[t] + self.storage_dict['values'][t] #why? 125 | 126 | def prepare_training(self): 127 | self.data_dict = {} 128 | for k, v in self.storage_dict.items(): 129 | self.data_dict[k] = transform_op(v) 130 | advantages = self.data_dict['returns'] - self.data_dict['values'] 131 | self.data_dict['advantages'] = ((advantages - advantages.mean()) / (advantages.std() + 1e-8)).squeeze(1) 132 | return self.data_dict 133 | 134 | 135 | 136 | def get_info(self): 137 | buffer_info = { 138 | 'AverageReward' : ptu.to_numpy(self.storage_dict['rewards'].mean()), 139 | 'AverageReturn' : ptu.to_numpy(self.storage_dict['returns'].mean()), 140 | } 141 | 142 | return buffer_info 143 | -------------------------------------------------------------------------------- /algo/pretrained/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/pretrained/__init__.py -------------------------------------------------------------------------------- /algo/pretrained/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/pretrained/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /algo/pretrained/__pycache__/policy_transformer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/pretrained/__pycache__/policy_transformer.cpython-37.pyc -------------------------------------------------------------------------------- /algo/pretrained/__pycache__/robot_transformer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/pretrained/__pycache__/robot_transformer.cpython-37.pyc -------------------------------------------------------------------------------- /algo/pretrained/__pycache__/robot_transformer_ar.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/pretrained/__pycache__/robot_transformer_ar.cpython-37.pyc -------------------------------------------------------------------------------- /algo/pretrained/__pycache__/transformer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/pretrained/__pycache__/transformer.cpython-37.pyc -------------------------------------------------------------------------------- /algo/pretrained/dataset.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import Dataset, DataLoader 3 | import os 4 | import pickle as pkl 5 | from termcolor import cprint 6 | class TrajectoryDataset(Dataset): 7 | 8 | def __init__(self, root,ctx_length=64,device='cuda'): 9 | """ 10 | Args: 11 | data (Any): Your dataset (e.g., images, files, tensors). 12 | targets (Any): The labels or targets associated with your data. 13 | transform (callable, optional): Optional transform to be applied on a sample. 14 | """ 15 | super(TrajectoryDataset, self).__init__() 16 | self.root = root 17 | self.device = device 18 | #assuming not many files in the directory 19 | self.episodes = [pkl.load(open(os.path.join(root,episode),'rb')) for episode in os.listdir(root)] 20 | self.ctx = ctx_length 21 | self.ep_lens = torch.tensor([(len(episode)- self.ctx+1) for episode in self.episodes]) 22 | self.cumsum = torch.cumsum(self.ep_lens,0) 23 | self.visualise() 24 | 25 | def visualise(self): 26 | """ 27 | Visualise the dataset. 28 | """ 29 | cprint(f"Number of episodes: {len(self.episodes)}",color='green',attrs=['bold']) 30 | cprint(f"Number of examples: {torch.sum(self.ep_lens)}",color='green',attrs=['bold']) 31 | cprint(f"Proprio dimension: {len(self.episodes[0]['robot_state'][0])}",color='green',attrs=['bold']) 32 | cprint(f"Action dimension: {len(self.episodes[0]['action'][0])}",color='green',attrs=['bold']) 33 | 34 | def __len__(self): 35 | """Returns the size of the dataset.""" 36 | return torch.sum(self.ep_lens).item() 37 | 38 | def __getitem__(self, index): 39 | """ 40 | Generates one sample of data. 41 | 42 | Args: 43 | index (int): The index of the item in the dataset 44 | 45 | Returns: 46 | sample (Any): The data sample corresponding to the given index. 47 | target (Any): The target corresponding to the given data sample. 48 | """ 49 | 50 | ep_idx = torch.searchsorted(self.cumsum, index, right=True) 51 | ep = self.episodes[ep_idx] 52 | idx = index - torch.sum(self.ep_lens[:ep_idx]) 53 | return { 54 | 'state': torch.tensor(ep['robot_state'][idx:idx+self.ctx]).to(self.device), 55 | 'action': torch.tensor(ep['action'][idx:idx+self.ctx]).to(self.device), 56 | 'timesteps': torch.tensor(torch.arange(idx,idx+self.ctx)).to(self.device), 57 | } 58 | 59 | 60 | 61 | def collate_fn(batch): 62 | 63 | state = torch.stack([torch.tensor(item['state']) for item in batch]) 64 | action = torch.stack([torch.tensor(item['action']) for item in batch]) 65 | timesteps = torch.stack([torch.tensor(item['timesteps']) for item in batch]) 66 | attention_mask = None 67 | 68 | return state, action, timesteps, attention_mask 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /algo/pretrained/depth_trainer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import torch 4 | import time 5 | from torch.utils.data import DataLoader 6 | import os 7 | from datetime import datetime 8 | import wandb 9 | import tqdm 10 | from torch.nn.parallel import DistributedDataParallel as DDP 11 | from termcolor import cprint 12 | class DepthTrainer: 13 | 14 | def __init__(self, 15 | model, 16 | collate_fn, 17 | optimizer, 18 | loss_fn, 19 | model_save_dir, 20 | train_dataloader, 21 | val_dataset=None, 22 | config=None, 23 | scheduler=None, 24 | eval_fns=None, 25 | logger=None, 26 | rank=0, 27 | world_size=1, 28 | device='cuda'): 29 | 30 | self.model = model 31 | self.device = device 32 | self.optimizer = optimizer 33 | self.batch_size = config.pretrain.training.batch_size 34 | self.val_dataset = val_dataset 35 | self.collate_fn = collate_fn 36 | self.loss_fn = loss_fn 37 | self.scheduler = scheduler 38 | self.save_dir = model_save_dir 39 | self.rank = rank 40 | self.world_size = world_size 41 | self.eval_fns = [] if eval_fns is None else eval_fns 42 | self.diagnostics = dict() 43 | self.logger = logger 44 | self.saved_model_number = 0 45 | self.add_proprio_noise = config.pretrain.training.add_proprio_noise 46 | self.add_action_noise = config.pretrain.training.add_action_noise 47 | num_workers = config.pretrain.training.num_workers #add this to bash file 48 | self.log_freq = config.pretrain.training.log_freq 49 | self.model_save_freq = config.pretrain.training.model_save_freq 50 | # create a dataloader 51 | self.train_dataloader = train_dataloader 52 | 53 | self.start_time = time.time() 54 | 55 | def train_epoch(self, iter_num=0, print_logs=False): 56 | 57 | train_losses = [] 58 | train_losses_action = [] 59 | logs = dict() 60 | 61 | train_start = time.time() 62 | 63 | self.model.train() 64 | 65 | for i, batch in enumerate(tqdm.tqdm(self.train_dataloader)): 66 | 67 | proprio, depth , actions, timesteps, attention_mask = batch 68 | batch = proprio.to(self.device), depth.to(self.device), \ 69 | actions.to(self.device), timesteps.to(self.device), \ 70 | attention_mask.to(self.device) if attention_mask is not None else None 71 | 72 | train_loss = self.train_step(batch) 73 | 74 | train_losses_action.append(train_loss['action']) 75 | train_losses.append(train_loss['full']) 76 | 77 | if self.scheduler is not None: 78 | self.scheduler.step() 79 | 80 | if self.logger is not None and i % self.log_freq == 0: 81 | logs['time/training'] = time.time() - train_start 82 | logs['time/total'] = time.time() - self.start_time 83 | logs['optimizer/lr'] = self.optimizer.param_groups[0]['lr'] 84 | global_step = iter_num * len(self.train_dataloader) + i 85 | self.logger.log_dict(logs, global_step) 86 | logs['training/train_loss_mean'] = np.mean(train_losses) 87 | logs['training/train_loss_std'] = np.std(train_losses) 88 | logs['training/train_loss_action_mean'] = np.mean(train_losses_action) 89 | logs['training/train_loss_action_std'] = np.std(train_losses_action) 90 | 91 | global_step = iter_num * len(self.train_dataloader) + i 92 | if self.save_dir is not None and global_step % self.model_save_freq == 0: 93 | torch.save(self.model.state_dict(), os.path.join(self.save_dir, f'model_step_{global_step}.pt')) 94 | self.saved_model_number += 1 95 | 96 | #if self.save_dir is not None and global_step % self.model_save_freq == 0: 97 | #torch.save(self.model.state_dict(), os.path.join(self.save_dir, f'model_step_{global_step}.pt')) 98 | 99 | if print_logs and i % self.log_freq == 0: 100 | for k in self.diagnostics: 101 | logs[k] = self.diagnostics[k] 102 | print('=' * 80) 103 | print(f'Iteration {iter_num}') 104 | for k, v in logs.items(): 105 | print(f'{k}: {v}') 106 | 107 | return logs 108 | 109 | def train_step(self,batch): 110 | 111 | proprio, depth, actions, timesteps, attention_mask = batch 112 | 113 | 114 | 115 | action_target = torch.clone(actions) 116 | 117 | if self.add_proprio_noise: 118 | noise = torch.zeros_like(proprio) 119 | noise[...,:7] = torch.randn_like(proprio[...,:7])*0.1 #self.noise_arm 120 | noise[...,7:] = torch.randn_like(proprio[...,7:])*0.1 #self.noise_hand 121 | proprio = proprio + noise 122 | 123 | 124 | action_preds, _ = self.model.forward(proprio,depth,timesteps,attention_mask) 125 | 126 | act_dim = action_preds.shape[2] 127 | 128 | if attention_mask is not None: 129 | action_preds = action_preds.reshape(-1, act_dim)[attention_mask.reshape(-1) > 0] 130 | action_target = action_target.reshape(-1, act_dim)[attention_mask.reshape(-1) > 0] 131 | 132 | 133 | loss_action = self.loss_fn(action_preds, action_target) 134 | 135 | loss = loss_action 136 | 137 | self.optimizer.zero_grad() 138 | loss.backward() 139 | torch.nn.utils.clip_grad_norm_(self.model.parameters(), .25) 140 | self.optimizer.step() 141 | 142 | with torch.no_grad(): 143 | self.diagnostics['training/action_error'] = loss_action.detach().cpu().item() 144 | 145 | return_dict = {'action': loss_action.detach().cpu().item(), 146 | 'full': loss.detach().cpu().item() 147 | } 148 | 149 | return return_dict 150 | 151 | -------------------------------------------------------------------------------- /algo/pretrained/depth_trainer_multigpu.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import torch 4 | import time 5 | from torch.utils.data import DataLoader 6 | import os 7 | from datetime import datetime 8 | import wandb 9 | import tqdm 10 | from torch.nn.parallel import DistributedDataParallel as DDP 11 | from termcolor import cprint 12 | 13 | class MultiGPUTrainer: 14 | 15 | def __init__(self, 16 | model, 17 | train_dataset, 18 | collate_fn, 19 | loss_fn, 20 | model_save_dir, 21 | rank, 22 | world_size, 23 | val_dataset=None, 24 | config=None, 25 | scheduler=None, 26 | eval_fns=None, 27 | logger=None, 28 | device='cuda'): 29 | 30 | self.model = model 31 | self.rank = rank 32 | self.world_size = world_size 33 | if self.world_size > 1: 34 | self.device = f'cuda:{self.rank}' 35 | self.model = self.model.to(self.device) 36 | self.ddp_model = DDP(self.model, device_ids=[self.rank], output_device=self.rank) 37 | self.optimizer = torch.optim.Adam(self.ddp_model.parameters(), lr=config.pretrain.training.lr*config.num_gpus,weight_decay=config.pretrain.training.weight_decay) 38 | else: 39 | self.device = device 40 | self.model = self.model.to(self.device) 41 | self.optimizer = torch.optim.Adam(self.model.parameters(), lr=config.pretrain.training.lr,weight_decay=config.pretrain.training.weight_decay) 42 | 43 | self.batch_size = config.pretrain.training.batch_size 44 | self.train_dataset = train_dataset 45 | self.val_dataset = val_dataset 46 | self.collate_fn = collate_fn 47 | self.loss_fn = loss_fn 48 | self.scheduler = scheduler 49 | self.save_dir = model_save_dir 50 | self.eval_fns = [] if eval_fns is None else eval_fns 51 | self.diagnostics = dict() 52 | self.logger = logger 53 | 54 | self.saved_model_number = 0 55 | self.action_input = config.pretrain.model.action_input 56 | self.add_proprio_noise = config.pretrain.training.add_proprio_noise 57 | self.add_action_noise = config.pretrain.training.add_action_noise 58 | self.num_workers = config.pretrain.training.num_workers 59 | self.log_freq = config.pretrain.training.log_freq 60 | self.noise_arm = config.pretrain.training.noise_arm 61 | self.noise_hand = config.pretrain.training.noise_hand 62 | self.model_save_freq = config.pretrain.training.model_save_freq 63 | 64 | 65 | if self.world_size > 1: 66 | sampler = torch.utils.data.distributed.DistributedSampler(self.train_dataset, num_replicas=world_size, rank=rank) 67 | self.train_dataloader = DataLoader(self.train_dataset, 68 | batch_size=self.batch_size, 69 | num_workers=self.num_workers, 70 | collate_fn=self.collate_fn, 71 | sampler=sampler) 72 | if self.val_dataset is not None: 73 | sampler = torch.utils.data.distributed.DistributedSampler(self.val_dataset, 74 | num_replicas=world_size, 75 | rank=rank) 76 | 77 | self.val_dataloader = DataLoader(self.val_dataset, 78 | batch_size=self.batch_size, 79 | num_workers=self.num_workers, 80 | collate_fn=self.collate_fn, 81 | sampler=sampler) 82 | else: 83 | # create a dataloader 84 | print('Creating dataloader') 85 | self.train_dataloader = DataLoader(self.train_dataset, 86 | batch_size=self.batch_size, 87 | num_workers=self.num_workers, 88 | shuffle=True, 89 | collate_fn=self.collate_fn) 90 | 91 | if self.val_dataset is not None: 92 | self.val_dataloader = DataLoader(self.val_dataset, 93 | batch_size=self.batch_size, 94 | num_workers=self.num_workers, 95 | shuffle=False, 96 | collate_fn=self.collate_fn) 97 | 98 | self.start_time = time.time() 99 | 100 | def train_epoch(self, iter_num=0, print_logs=False): 101 | 102 | train_losses, train_losses_action = [], [] 103 | logs = dict() 104 | 105 | train_start = time.time() 106 | 107 | if self.world_size > 1: 108 | self.ddp_model.train() 109 | self.model.train() 110 | 111 | if self.world_size > 1: 112 | self.train_dataloader.sampler.set_epoch(iter_num) 113 | 114 | for i, batch in enumerate(tqdm.tqdm(self.train_dataloader)): 115 | 116 | proprio, depth, actions, timesteps, attention_mask = batch 117 | batch = proprio.to(self.device), depth.to(self.device), \ 118 | actions.to(self.device), timesteps.to(self.device), \ 119 | attention_mask.to(self.device) if attention_mask is not None else None 120 | 121 | 122 | print(self.device) 123 | train_loss = self.train_step(batch) 124 | 125 | print(train_loss) 126 | 127 | train_losses_action.append(train_loss['action']) 128 | train_losses.append(train_loss['full']) 129 | 130 | if self.scheduler is not None: 131 | self.scheduler.step() 132 | 133 | 134 | if self.world_size > 1: 135 | torch.distributed.barrier() 136 | 137 | if self.logger is not None and i % self.log_freq == 0 and (self.world_size == 1 or self.rank==0): 138 | logs['time/training'] = time.time() - train_start 139 | logs['time/total'] = time.time() - self.start_time 140 | logs['optimizer/lr'] = self.optimizer.param_groups[0]['lr'] 141 | global_step = iter_num * len(self.train_dataloader) + i 142 | self.logger.log_dict(logs, global_step) 143 | logs['training/train_loss_mean'] = np.mean(train_losses) 144 | logs['training/train_loss_std'] = np.std(train_losses) 145 | logs['training/train_loss_action_mean'] = np.mean(train_losses_action) 146 | logs['training/train_loss_action_std'] = np.std(train_losses_action) 147 | 148 | if self.save_dir is not None and i % self.model_save_freq == 0 and (self.world_size == 1 or self.rank==0): 149 | torch.save(self.model.state_dict(), os.path.join(self.save_dir, f'last.pt')) 150 | self.saved_model_number += 1 151 | 152 | if self.save_dir is not None and i % 5000 == 0 and (self.world_size == 1 or self.rank==0): 153 | global_step = iter_num * len(self.train_dataloader) + i 154 | torch.save(self.model.state_dict(), os.path.join(self.save_dir, f'model_step_{global_step}.pt')) 155 | 156 | if print_logs and i % self.log_freq == 0 and (self.world_size == 1 or self.rank==0): 157 | for k in self.diagnostics: 158 | logs[k] = self.diagnostics[k] 159 | print('=' * 80) 160 | print(f'Iteration {iter_num}') 161 | for k, v in logs.items(): 162 | print(f'{k}: {v}') 163 | return logs 164 | 165 | def train_step(self, batch): 166 | 167 | proprio, depth, actions, timesteps, attention_mask = batch 168 | 169 | action_target = torch.clone(actions) 170 | 171 | if self.add_proprio_noise: 172 | noise = torch.zeros_like(proprio) 173 | noise[...,:7] = torch.randn_like(proprio[...,:7])*self.noise_arm 174 | noise[...,7:] = torch.randn_like(proprio[...,7:])*self.noise_hand 175 | proprio = proprio + noise 176 | 177 | 178 | if self.world_size > 1: 179 | action_preds, _ = self.ddp_model.forward( 180 | proprio, depth, timesteps=timesteps, attention_mask=attention_mask,) 181 | 182 | else: 183 | action_preds, _ = self.model.forward( 184 | proprio, depth, timesteps=timesteps, attention_mask=attention_mask,) 185 | 186 | 187 | act_dim = action_preds.shape[2] 188 | 189 | if attention_mask is not None: 190 | action_preds = action_preds.reshape(-1, act_dim)[attention_mask.reshape(-1) > 0] 191 | action_target = action_target.reshape(-1, act_dim)[attention_mask.reshape(-1) > 0] 192 | 193 | 194 | loss_action = self.loss_fn(action_preds, action_target) 195 | loss = loss_action 196 | 197 | 198 | self.optimizer.zero_grad() 199 | loss.backward() 200 | if self.world_size > 1: 201 | torch.nn.utils.clip_grad_norm_(self.ddp_model.parameters(), .25) 202 | else: 203 | torch.nn.utils.clip_grad_norm_(self.model.parameters(), .25) 204 | 205 | self.optimizer.step() 206 | 207 | with torch.no_grad(): 208 | self.diagnostics['training/action_error'] = loss_action.detach().cpu().item() 209 | 210 | return_dict = {'action': loss_action.detach().cpu().item(), 211 | 'full': loss.detach().cpu().item() 212 | } 213 | 214 | return return_dict 215 | -------------------------------------------------------------------------------- /algo/pretrained/robot_dataset.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import Dataset, DataLoader 3 | import os 4 | import pickle as pkl 5 | from termcolor import cprint 6 | import numpy as np 7 | class RobotDataset(Dataset): 8 | 9 | def __init__(self, root=None, cfg=None): 10 | """ 11 | Args: 12 | data (Any): Your dataset (e.g., images, files, tensors). 13 | targets (Any): The labels or targets associated with your data. 14 | transform (callable, optional): Optional transform to be applied on a sample. 15 | """ 16 | assert root is not None, "Please provide the root directory of the dataset" 17 | assert os.path.exists(root), f"The directory {root} does not exist" 18 | super(RobotDataset, self).__init__() 19 | self.root = root 20 | print(f"Loading dataset from {root}") 21 | self.device = cfg.pretrain.device 22 | self.ctx = cfg.pretrain.model.context_length 23 | self.scale_action = cfg.pretrain.model.scale_action 24 | self.scale_proprio = cfg.pretrain.model.scale_proprio 25 | # set variable to store the episodes 26 | self.episodes_npy = [] 27 | self.ep_lens = [] 28 | # self.dt = kwargs.get('dt', 0.008333) # 120 Hz 29 | # self.dt = np.float32(self.dt) 30 | self.use_residuals = cfg.pretrain.training.use_residuals 31 | # get all folders of depth 2 in the directory 32 | subjects_dir = [os.path.join(root,episode) for episode in os.listdir(root) if os.path.isdir(os.path.join(root,episode))] 33 | # get all subfolders of depth 2 in subjects_dir 34 | self.episodes_dir = [os.path.join(subject,episode) for subject in subjects_dir for episode in os.listdir(subject) if os.path.isdir(os.path.join(subject,episode))] 35 | self.episodes_dir = sorted(self.episodes_dir) 36 | 37 | assert len(self.episodes_dir) > 0, f"No episodes found in the directory {root}" 38 | # load all the episodes 39 | for episode in self.episodes_dir: 40 | self.load_episode_fnames(episode) 41 | 42 | assert len(self.episodes_npy) > 0, f"No trajectories found in the directory {root}" 43 | # save the min, max, and mean of the episode lengths 44 | self.min_ep_len = np.min(self.ep_lens) 45 | self.max_ep_len = np.max(self.ep_lens) 46 | self.mean_ep_len = np.mean(self.ep_lens) 47 | cprint(f"Min episode length: {self.min_ep_len}, Max episode length: {self.max_ep_len}, Mean episode length: {self.mean_ep_len}",color='cyan',attrs=['bold']) 48 | self.ep_lens = torch.tensor(self.ep_lens) 49 | self.cumsum = torch.cumsum(self.ep_lens,0) 50 | self.visualise() 51 | 52 | # IG lower and upper limits 53 | self.limits = {'upper': [6.2832, 2.0944, 6.2832, 3.9270, 6.2832, 3.1416, 6.2832, 0.4700, 1.6100, 1.7090, 1.6180, 1.3960, 54 | 1.1630, 1.6440, 1.7190, 0.4700, 1.6100, 1.7090, 1.6180, 0.4700, 1.6100, 1.7090, 1.6180], 55 | 'lower': [-6.2832, -2.0590, -6.2832, -0.1920, -6.2832, -1.6930, -6.2832, -0.4700, -0.1960, -0.1740, -0.2270, 56 | 0.2630, -0.1050, -0.1890, -0.1620, -0.4700, -0.1960, -0.1740, -0.2270, -0.4700, -0.1960, -0.1740, -0.2270]} 57 | 58 | 59 | self.limits['upper'] = np.array(self.limits['upper']).astype(np.float32) 60 | self.limits['lower'] = np.array(self.limits['lower']).astype(np.float32) 61 | 62 | 63 | def load_episode_fnames(self, episode_dir:str): 64 | """ 65 | Load the episodes filenames. 66 | """ 67 | for episode_fname in sorted(os.listdir(episode_dir)): 68 | # continue if the file is not a npy file 69 | if not episode_fname.endswith('.npy'): 70 | continue 71 | ep = np.load(os.path.join(episode_dir,episode_fname), allow_pickle=True).item() 72 | self.episodes_npy.append(ep) 73 | # load the file and get the length 74 | eplen = len(ep['robot_qpos']) - self.ctx + 1 75 | 76 | assert eplen > 0, f"Episode length is less than the context length {self.ctx}" 77 | 78 | self.ep_lens.append(eplen) 79 | 80 | def scale_q(self, q): 81 | """ 82 | Scale the proprioceptive data to be between -1 and 1. 83 | """ 84 | q = (q - self.limits['lower']) / (self.limits['upper'] - self.limits['lower']) 85 | q = 2 * q - 1 86 | return q 87 | 88 | def change_order(self, q): 89 | IG_mapping = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 19, 20, 21, 22, 11, 12, 13, 14, 15, 16, 17, 18] 90 | return q[:,IG_mapping] 91 | 92 | def visualise(self): 93 | """ 94 | Visualise the dataset. 95 | """ 96 | cprint(f"Number of episodes: {len(self.episodes_npy)}",color='green',attrs=['bold']) 97 | cprint(f"Number of examples: {torch.sum(self.ep_lens)}",color='green',attrs=['bold']) 98 | # Load the first episode to get the dimension of the proprio and action 99 | ep = self.episodes_npy[0] 100 | cprint(f"Proprio dimension: {len(ep['robot_qpos'][0])}",color='green',attrs=['bold']) 101 | cprint(f"Action dimension: {len(ep['target_qpos'][0])}",color='green',attrs=['bold']) 102 | 103 | def __len__(self): 104 | """Returns the size of the dataset.""" 105 | return torch.sum(self.ep_lens).item() 106 | 107 | def __getitem__(self, index): 108 | """ 109 | Generates one sample of data. 110 | 111 | Args: 112 | index (int): The index of the item in the dataset 113 | 114 | Returns: 115 | sample (Any): The data sample corresponding to the given index. 116 | target (Any): The target corresponding to the given data sample. 117 | """ 118 | 119 | ep_idx = torch.searchsorted(self.cumsum, index, right=True) 120 | # open the pickle file 121 | idx = index - torch.sum(self.ep_lens[:ep_idx]) 122 | ep = self.episodes_npy[ep_idx] 123 | action_npy = np.stack(ep['target_qpos'][idx:idx+self.ctx]) 124 | proprio_npy = np.stack(ep['robot_qpos'][idx:idx+self.ctx]) 125 | # Put in IG order 126 | action = self.change_order(action_npy) 127 | proprio = self.change_order(proprio_npy) 128 | # Scale the proprioceptive data in [-1,1] 129 | # For the first 7 elements of the action vector, predict the residual with respect to the previous action 130 | if self.use_residuals: 131 | action_res = np.concatenate([np.zeros((1,action.shape[1])), np.diff(action, axis=0)], axis=0) 132 | action_res[0] = action[0] - proprio[0] 133 | action_res = action_res.astype(np.float32) 134 | action = action_res / self.dt 135 | 136 | if self.scale_proprio: 137 | proprio = self.scale_q(proprio) 138 | if self.scale_action: 139 | action = self.scale_q(action) 140 | 141 | obj_pc = np.stack(ep['object_pc'][idx:idx+self.ctx]) 142 | 143 | return { 144 | 'proprio': proprio, 145 | 'action': action, 146 | 'obj_pc': obj_pc, 147 | 'timesteps': np.arange(self.ctx), 148 | } 149 | 150 | 151 | def collate_fn(batch): 152 | 153 | proprio = np.stack([item['proprio'] for item in batch]) 154 | object_pc = np.stack([item['obj_pc'] for item in batch]) 155 | action = np.stack([item['action'] for item in batch]) 156 | timesteps = np.stack([item['timesteps'] for item in batch]) 157 | attention_mask = None 158 | 159 | proprio = torch.tensor(proprio, dtype=torch.float32, requires_grad=False) 160 | object_pc = torch.tensor(object_pc, dtype=torch.float32, requires_grad=False) 161 | action = torch.tensor(action, dtype=torch.float32, requires_grad=False) 162 | timesteps = torch.tensor(timesteps, dtype=torch.long, requires_grad=False) 163 | 164 | return proprio, object_pc, action, timesteps, attention_mask 165 | -------------------------------------------------------------------------------- /cfg/config.yaml: -------------------------------------------------------------------------------- 1 | 2 | # Task name - used to pick the class to load 3 | task_name: ${task.name} 4 | teacher_mode: False 5 | pc_input: True 6 | #shape 7 | shape: "" 8 | # if set to positive integer, overrides the default number of environments 9 | num_envs: 4096 10 | # seed - set to -1 to choose random seed 11 | seed: 0 12 | # set to True for deterministic performance 13 | torch_deterministic: False 14 | 15 | # set the maximum number of learning iterations to train for. overrides default per-environment setting 16 | max_iterations: '' 17 | 18 | ## Device config 19 | # 'physx' or 'flex' 20 | physics_engine: 'physx' 21 | # whether to use cpu or gpu pipeline 22 | pipeline: 'cpu' 23 | num_gpus: 1 # if 1, it will only use the gpu indicated below. Otherwise it will use num_gpus in order starting from zero (ignoring the gpu config below) 24 | # device for running physics simulation 25 | sim_device: 'cpu' 26 | # device to run RL 27 | rl_device: 'cpu' 28 | graphics_device_id: 0 29 | 30 | ## PhysX arguments 31 | num_threads: 4 # Number of worker threads per scene used by PhysX - for CPU PhysX only. 32 | solver_type: 1 # 0: pgs, 1: tgs 33 | num_subscenes: 4 # Splits the simulation into N physics scenes and runs each one in a separate thread 34 | 35 | 36 | # RLGames Arguments 37 | # test - if set, run policy in inference mode (requires setting checkpoint to load) 38 | test: False 39 | track_pose: False 40 | get_target_reference: False 41 | get_target_traj: False 42 | # save_jit - if Yes, it will save the Jit for execution on a real robot 43 | save_jit: False 44 | # used to set checkpoint path 45 | checkpoint: '' 46 | dagger_checkpoint: '' 47 | # set sigma when restoring network 48 | sigma: '' 49 | # set to True to use multi-gpu training 50 | multi_gpu: False 51 | 52 | wandb_activate: False 53 | wandb_group: '' 54 | wandb_name: AllegroKukaGraspingTest 55 | wandb_entity: 'himanshu_singh' 56 | wandb_project: 'isaacgym' 57 | wandb_tags: [] 58 | wandb_logcode_dir: '' 59 | 60 | capture_video: False 61 | capture_video_freq: 1464 62 | capture_video_len: 100 63 | force_render: True 64 | 65 | # disables rendering 66 | headless: True 67 | 68 | # set default task and default training config based on task 69 | defaults: 70 | - _self_ 71 | - task: AllegroXarmNew 72 | - train: ${task}PPO 73 | - pretrain: ${task} 74 | 75 | # set the directory where the output files get saved 76 | hydra: 77 | output_subdir: null 78 | run: 79 | dir: . 80 | -------------------------------------------------------------------------------- /cfg/launcher/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/cfg/launcher/default.yaml -------------------------------------------------------------------------------- /cfg/pretrain/AllegroXarmCabinet.yaml: -------------------------------------------------------------------------------- 1 | device: cuda:0 2 | wandb_name: "Pretrain_residuals_16ctx" 3 | model: 4 | proprio_dim: 23 5 | action_dim: 23 6 | pc_num: 100 7 | hidden_dim: 192 8 | max_ep_len: 4096 9 | max_length: null 10 | action_tanh: false 11 | context_length: 16 12 | n_layer: 4 13 | n_head: 4 14 | attn_pdrop: 0.0 15 | resid_pdrop: 0.0 16 | embd_pdrop: 0.0 17 | action_input: False 18 | scale_proprio: True 19 | full_autoregressive: True 20 | scale_action: True #these settings are for the working PolicyTransformer model 21 | test: False 22 | wandb_activate: False 23 | checkpoint: '' 24 | groundtruth_policy: '' 25 | load_trajectory: '' 26 | training: 27 | batch_size: 512 28 | modality_aligned: False 29 | use_pc_loss: False 30 | use_proprio_loss: False 31 | num_epochs: 100 32 | use_residuals: False 33 | num_workers: 16 34 | dt: 0.05 # 20Hz, control frequency to scale the action (if activated) 35 | lr: 0.0001 36 | add_proprio_noise: True 37 | add_action_noise: True 38 | noise_arm: 0.1 39 | noise_hand: 0.1 40 | add_data_driven_noise: False 41 | weight_decay: 0.01 42 | log_freq: 1000 43 | model_save_freq: 1000 44 | time_shift: 0 45 | model_save_dir: algo/pretrained/models 46 | root_dir: retarget_data/train 47 | load_checkpoint: False 48 | checkpoint_path: '' 49 | validation: 50 | root_dir: retarget_data/val 51 | -------------------------------------------------------------------------------- /cfg/pretrain/AllegroXarmNew.yaml: -------------------------------------------------------------------------------- 1 | device: cuda:0 2 | wandb_name: "Pretrain_residuals_16ctx" 3 | model: 4 | proprio_dim: 23 5 | action_dim: 23 6 | pc_num: 100 7 | hidden_dim: 192 8 | max_ep_len: 4096 9 | max_length: null 10 | action_tanh: false 11 | context_length: 16 12 | n_layer: 4 13 | n_head: 4 14 | attn_pdrop: 0.0 15 | resid_pdrop: 0.0 16 | embd_pdrop: 0.0 17 | action_input: False 18 | scale_proprio: True 19 | full_autoregressive: True 20 | use_imagenet: False 21 | use_vit: False 22 | use_diffusion_policy: False 23 | use_r3m: False 24 | use_mvp_rgb: False 25 | use_r3m_depth: False 26 | use_vip: False 27 | diffusion_policy_horizon: None 28 | cache_all: False 29 | scale_action: True #these settings are for the working PolicyTransformer model 30 | test: False 31 | wandb_activate: False 32 | checkpoint: '' 33 | groundtruth_policy: '' 34 | load_trajectory: '' 35 | training: 36 | finetune_layernorm: False 37 | finetune_lastlayer: False 38 | batch_size: 256 39 | modality_aligned: False 40 | use_pc_loss: False 41 | use_proprio_loss: False 42 | num_epochs: 100 43 | use_residuals: False 44 | num_workers: 16 45 | dt: 0.05 # 20Hz, control frequency to scale the action (if activated) 46 | lr: 0.0001 47 | add_proprio_noise: True 48 | add_action_noise: True 49 | noise_arm: 0.1 50 | noise_hand: 0.1 51 | add_data_driven_noise: False 52 | weight_decay: 0.01 53 | log_freq: 3000 54 | model_save_freq: 10000 55 | time_shift: 0 56 | model_save_dir: algo/pretrained/models 57 | root_dir: retarget_data/train 58 | load_checkpoint: False 59 | checkpoint_path: '' 60 | validation: 61 | root_dir: retarget_data/val 62 | 63 | diffusion: 64 | num_inference_steps: 100 65 | -------------------------------------------------------------------------------- /cfg/pretrain/AllegroXarmThrowing.yaml: -------------------------------------------------------------------------------- 1 | device: cuda:0 2 | wandb_name: "Pretrain_residuals_16ctx" 3 | model: 4 | proprio_dim: 23 5 | action_dim: 23 6 | pc_num: 100 7 | hidden_dim: 192 8 | max_ep_len: 4096 9 | max_length: null 10 | action_tanh: false 11 | context_length: 16 12 | n_layer: 4 13 | n_head: 4 14 | attn_pdrop: 0.0 15 | resid_pdrop: 0.0 16 | embd_pdrop: 0.0 17 | action_input: False 18 | scale_proprio: True 19 | full_autoregressive: True 20 | scale_action: True #these settings are for the working PolicyTransformer model 21 | test: False 22 | wandb_activate: False 23 | checkpoint: '' 24 | groundtruth_policy: '' 25 | load_trajectory: '' 26 | training: 27 | batch_size: 512 28 | modality_aligned: False 29 | use_pc_loss: False 30 | use_proprio_loss: False 31 | num_epochs: 100 32 | use_residuals: False 33 | num_workers: 16 34 | dt: 0.05 # 20Hz, control frequency to scale the action (if activated) 35 | lr: 0.0001 36 | add_proprio_noise: True 37 | add_action_noise: True 38 | noise_arm: 0.1 39 | noise_hand: 0.1 40 | add_data_driven_noise: False 41 | weight_decay: 0.01 42 | log_freq: 1000 43 | model_save_freq: 1000 44 | time_shift: 0 45 | model_save_dir: algo/pretrained/models 46 | root_dir: retarget_data/train 47 | load_checkpoint: False 48 | checkpoint_path: '' 49 | validation: 50 | root_dir: retarget_data/val 51 | -------------------------------------------------------------------------------- /cfg/task/AllegroXarmCabinet.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - _self_ 3 | 4 | name: AllegroXarmCabinet 5 | 6 | physics_engine: ${..physics_engine} 7 | asset_root: '../assets' 8 | 9 | 10 | env: 11 | subtask: "" 12 | throw_far: False 13 | bucket_in_front: False 14 | use_leap: False 15 | use_allegro: True 16 | urdfFolder: "ycb_real_inertia" 17 | # if given, will override the device setting in gym. 18 | #numEnvs: ${resolve_default:8192,${...num_envs}} 19 | numEnvs: ${...num_envs} 20 | envSpacing: 1.2 21 | episodeLength: 600 #change 22 | tablePosey: -0.15 23 | tablePosez: 0.023 24 | enableDebugVis: False 25 | enableVideoLog: False 26 | videoLogIdx: 0 27 | videoLogFreq: 20 28 | evalStats: False # extra evaluation-time statistics 29 | doSimpleObjects: True 30 | doVerySimpleObjects: False 31 | doDexYcbObjects: False 32 | useSavedInitPose: False 33 | limitArmDeltaTarget: True 34 | useRandomInitRot: False 35 | addZerosInPrivBuf: False 36 | usePoseRewardUnlifted: False 37 | usePoseRewardLifted: False 38 | leapFingers: ["fingertip", "fingertip_2", "fingertip_3", "thumb_fingertip"] 39 | leapDIP: ["dip", "dip_2", "dip_3", "thumb_dip"] 40 | initPoseVersion: v16 41 | useDIPFinger: False 42 | lowmem: False 43 | input_priv: True 44 | enableVhacd: True 45 | vhacdObjects: ['070-a','070-b','072','036','032','029','048','027','019','032','026'] 46 | simpleObjects: ['002', '011', '036', '010', '025', '024', '005', '007'] 47 | 48 | verysimpleObjects: ['002'] 49 | DexYcbObjects: ['035','003','004','005','007','008','009','010','011', '021','024','025','002','036','037','040','051','052','061'] 50 | 51 | clampAbsObservations: 10.0 52 | useOldActionSpace: False 53 | clampArmTarget: False 54 | 55 | stiffnessScale: 1.0 56 | forceLimitScale: 1.0 57 | useRelativeControl: False 58 | dofSpeedScale: 1.0 59 | actionsMovingAverage: 1.0 60 | controlFrequencyInv: 6 # 20 Hz 61 | jointVelocityLimit: 0.5 62 | 63 | resetPositionNoiseX: 0.1 64 | resetPositionNoiseY: 0.1 65 | resetPositionNoiseZ: 0.02 66 | resetRotationNoise: 1.0 67 | resetDofPosRandomIntervalFingers: 0.1 68 | resetDofPosRandomIntervalArm: 0.1 69 | resetDofVelRandomInterval: 0. 70 | 71 | 72 | pointCloudScale: 0.01 73 | # Random forces applied to the 74 | forceScale: 0.0 75 | forceProbRange: [0.001, 0.1] 76 | forceDecay: 0.99 77 | forceDecayInterval: 0.08 78 | 79 | resetOnArmCollision: False 80 | ArmTableCollisionThreshold: 10 81 | resetOnCollision: False 82 | ContactForceThreshold: 50 83 | resetOnFingerCrash: False 84 | FingerClearanceThreshold: 0.050 85 | 86 | liftingRewScale: 20.0 87 | goalHeight: 0.45 88 | handJointRewCoeff: 1 #work on this 89 | liftingBonus: 300.0 90 | liftingBonusThreshold: 0.10 # when the object is lifted this distance (in meters) above the table, the agent gets the lifting bonus 91 | keypointRewScale: 200.0 92 | useFingertipReward: True 93 | usePalmReward: False 94 | useLiftingReward: True 95 | useKeypointReward: True 96 | distanceDeltaRewScale: 50.0 97 | useFingertipShapeDistReward: False 98 | useHandJointPoseRew: False 99 | 100 | 101 | handleDistRewardScale: 0.0 102 | aroundHandleRewardScale: 0.0 103 | openBonusRewardScale: 2.0 104 | goalDistRewardScale: 6.0 105 | openPoseRewardScale: 0.0 106 | goalBonusRewardScale: 2.0 107 | actionPenaltyScale: 0.01 108 | fingerDistRewardScale: 0.04 109 | thumbDistRewardScale: 0.08 110 | 111 | reachGoalBonus: 1000.0 112 | kukaActionsPenaltyScale: 0.003 113 | allegroActionsPenaltyScale: 0.0003 114 | fallDistance: 0.24 115 | fallPenalty: 0.0 116 | 117 | privilegedActions: False 118 | privilegedActionsTorque: 0.02 119 | 120 | # Physics v1, pretty much default settings we used from the start of the project 121 | dofFriction: 1.0 # negative values are ignored and the default friction from URDF file is used 122 | 123 | # gain of PD controller. 124 | handStiffness: 40.0 #increasing stiffness leads to stiffer movements 125 | armStiffness: 1000 #40.0 126 | handVelocity: 10.0 127 | armVelocity: 10.0 128 | 129 | handEffort: 0.35 # this is what was used in sim-to-real experiment. Motor torque in Newton*meters 130 | # armEffort: [300, 300, 300, 300, 300, 300, 300] # see Physics v2 131 | armEffort: [500, 500, 500, 500, 500, 500, 500] # see Physics v2 132 | 133 | handDamping: 5 #increasing damping leads to less local oscillatory moment 134 | armDamping: 100 #5 135 | 136 | handArmature: 0 137 | armArmature: 0 138 | 139 | keypointScale: 1.5 140 | objectBaseSize: 0.05 141 | numPointCloud: 100 142 | 143 | randomizeObjectDimensions: True 144 | withSmallCuboids: True 145 | withBigCuboids: True 146 | withSticks: True 147 | 148 | objectType: "" #changing to ball only for now 149 | observationType: "full_state" 150 | successTolerance: 0.075 151 | targetSuccessTolerance: 0.01 152 | toleranceCurriculumIncrement: 0.9 # multiplicative 153 | toleranceCurriculumInterval: 3000 # in env steps across all agents, with 8192 this is 3000 * 8192 = 24.6M env steps 154 | maxConsecutiveSuccesses: 2 155 | successSteps: 50 # how many steps we should be within the tolerance before we declare a success 156 | 157 | saveStates: False 158 | saveStatesFile: "rootTensorsDofStates.bin" 159 | 160 | loadInitialStates: False 161 | loadStatesFile: "rootTensorsDofStates.bin" 162 | enableProprioHistory: True 163 | useObsAsProp: False 164 | enableActionHistory: True 165 | enableAttnMask: True 166 | enablePointCloud: True 167 | enableCameraSensors: False 168 | # set to True if you use camera sensors in the environment 169 | rgbd_camera: 170 | enable_depth: False 171 | enable_rgb: False 172 | render_slowness: 1 173 | camera_width: 60 174 | camera_height: 60 175 | buffer_width: 60 176 | buffer_height: 60 177 | fov: 60 178 | ss: 2 179 | num_cameras: 1 180 | intrinsics: 'utils/camera.json' 181 | randomize_camera_pose: 0.04 #in meters 182 | randomize_camera_rot: 5 #in degrees 183 | cam0: 184 | #pos: [0.20, -0.55, 0.65] 185 | #pos: [0.0, -0.31, 0.49] 186 | #pos: [0.12, -0.31, 0.55] 187 | pos: [0.12, -0.35, 0.60] 188 | target: [0.10, -0.25, 0.45] 189 | cam1: 190 | pos: [0.50, -0.15, 0.65] 191 | target: [0.0, -0.15, 0.6] 192 | wrist_camera: False 193 | 194 | stage2_hist_len: 16 # 3 seconds of history #GRU history not yet 195 | 196 | asset: 197 | # Whis was the original kuka_allegro asset. 198 | # This URDF has some issues, i.e. weights of fingers are too high and the mass of the Allegro hand is too 199 | # high in general. But in turn this leads to smoother movements and better looking behaviors. 200 | # Additionally, collision shapes of fingertips are more primitive (just rough convex hulls), which 201 | # gives a bit more FPS. 202 | kukaAllegro: "urdf/kuka_allegro_description/kuka_allegro_touch_sensor.urdf" 203 | FrankAllegro: "urdf/franka_description/allegro_hand_description/franka_panda_allegro.urdf" 204 | # Xarm7_allegro: 'new_asset/xarm7_description_new/xarm7_allegro.urdf' #"urdf/xarm7_color.urdf" #"urdf/xarm7_hand.urdf" 205 | Xarm7_allegro: 'urdf/xarm7_allegro_vertical/xarm7_allegro.urdf' #"urdf/xarm7_color.urdf" #"urdf/xarm7_hand.urdf" 206 | Xarm7_leap_hand: "urdf/xarm7_leap.urdf" 207 | # This is the URDF which has more accurate collision shapes and weights. 208 | # I believe since the hand is much lighter, the policy has more control over the movement of both arm and 209 | # fingers which leads to faster training (better sample efficiency). But overall the resulting 210 | # behaviors look too fast and a bit unrealistic. 211 | # For sim-to-real experiments this needs to be addressed. Overall, v2 is a "Better" URDF, and it should not 212 | # lead to behaviors that would be worse for sim-to-real experiments. Most likely the problem is elsewhere, 213 | # for example the max torques might be too high, or the armature of the motors is too low. 214 | # The exercise of finding the right URDF and other parameters is left for the sim-to-real part of the project. 215 | # kukaAllegro: "urdf/kuka_allegro_description/kuka_allegro_v2.urdf" 216 | 217 | task: 218 | 219 | do_random_resets: False 220 | 221 | domain_randomization: 222 | randomize_friction: False 223 | friction_lower_limit: 0.6 224 | friction_upper_limit: 1.2 225 | 226 | randomize_object_mass: False 227 | mass_lower_limit: 0.8 228 | mass_upper_limit: 1.2 229 | 230 | randomize_object_com: False 231 | com_lower_limit: -0.05 232 | com_upper_limit: 0.05 233 | 234 | randomize_table_position: False 235 | table_y_lower: 0.45 236 | table_y_upper: 0.55 237 | table_z_lower: 0.01 238 | table_z_upper: 0.05 239 | 240 | randomize_table_friction: False 241 | table_friction_lower_limit: 0.6 242 | table_friction_upper_limit: 1.2 243 | 244 | 245 | sim: 246 | substeps: 2 247 | dt: 0.00833 # 1/120 248 | up_axis: "z" 249 | use_gpu_pipeline: True #${eq:${...pipeline},"gpu"} 250 | num_client_threads: 8 251 | 252 | gravity: [0.0, 0.0, -9.81] 253 | physx: 254 | num_threads: 6 255 | solver_type: 1 # 0: pgs, 1: tgs 256 | num_position_iterations: 8 257 | num_velocity_iterations: 0 258 | 259 | max_gpu_contact_pairs: 8388608 # 8*1024*1024 260 | num_subscenes: ${....num_subscenes} 261 | contact_offset: 0.002 262 | rest_offset: 0.0 263 | bounce_threshold_velocity: 0.2 264 | max_depenetration_velocity: 1000.0 265 | default_buffer_size_multiplier: 25.0 266 | contact_collection: 1 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!) 267 | -------------------------------------------------------------------------------- /cfg/task/AllegroXarmNew.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - _self_ 3 | 4 | name: AllegroXarmNew 5 | 6 | physics_engine: ${..physics_engine} 7 | asset_root: '../assets' 8 | 9 | 10 | env: 11 | subtask: "" 12 | use_leap: False 13 | use_allegro: True 14 | urdfFolder: "ycb_real_inertia" 15 | # if given, will override the device setting in gym. 16 | #numEnvs: ${resolve_default:8192,${...num_envs}} 17 | numEnvs: ${...num_envs} 18 | envSpacing: 1.2 19 | episodeLength: 600 #change 20 | tablePosey: -0.15 21 | tablePosez: 0.023 22 | enableDebugVis: False 23 | enableVideoLog: False 24 | videoLogIdx: 0 25 | videoLogFreq: 20 26 | evalStats: False # extra evaluation-time statistics 27 | doSimpleObjects: True 28 | doVerySimpleObjects: False 29 | doDexYcbObjects: False 30 | useSavedInitPose: False 31 | limitArmDeltaTarget: True 32 | useRandomInitRot: False 33 | addZerosInPrivBuf: False 34 | usePoseRewardUnlifted: False 35 | usePoseRewardLifted: False 36 | leapFingers: ["fingertip", "fingertip_2", "fingertip_3", "thumb_fingertip"] 37 | leapDIP: ["dip", "dip_2", "dip_3", "thumb_dip"] 38 | initPoseVersion: v16 39 | useDIPFinger: False 40 | lowmem: False 41 | input_priv: True 42 | enableVhacd: True 43 | vhacdObjects: ['070-a','070-b','072','036','032','029','048','027','019','032','026'] 44 | simpleObjects: ['002', '036', '010', '025', '024', '005'] #['021', '035', '036', '019'] # 45 | 46 | verysimpleObjects: ['002'] 47 | DexYcbObjects: ['035','003','004','007','008','009','011', '021','037','040','051','052','061'] #['035','003','004','005','007','008','009','010','011', '021','024','025','002','036','037','040','051','052','061'] 48 | 49 | clampAbsObservations: 10.0 50 | useOldActionSpace: False 51 | clampArmTarget: False 52 | 53 | stiffnessScale: 1.0 54 | forceLimitScale: 1.0 55 | useRelativeControl: False 56 | dofSpeedScale: 1.0 57 | actionsMovingAverage: 1.0 58 | controlFrequencyInv: 6 # 20 Hz 59 | jointVelocityLimit: 0.5 60 | 61 | resetPositionNoiseX: 0.1 62 | resetPositionNoiseY: 0.1 63 | resetPositionNoiseZ: 0.02 64 | resetRotationNoise: 1.0 65 | resetDofPosRandomIntervalFingers: 0.1 66 | resetDofPosRandomIntervalArm: 0.1 67 | resetDofVelRandomInterval: 0. 68 | 69 | 70 | pointCloudScale: 0.01 71 | # Random forces applied to the 72 | forceScale: 0.0 73 | forceProbRange: [0.8, 0.8] 74 | forceDecay: 0.99 75 | forceDecayInterval: 0.08 76 | 77 | resetOnArmCollision: False 78 | ArmTableCollisionThreshold: 10 79 | resetOnCollision: False 80 | ContactForceThreshold: 50 81 | resetOnFingerCrash: False 82 | FingerClearanceThreshold: 0.050 83 | 84 | liftingRewScale: 20.0 85 | goalHeight: 0.45 86 | handJointRewCoeff: 1 #work on this 87 | liftingBonus: 300.0 88 | liftingBonusThreshold: 0.10 # when the object is lifted this distance (in meters) above the table, the agent gets the lifting bonus 89 | keypointRewScale: 200.0 90 | useFingertipReward: True 91 | usePalmReward: False 92 | useLiftingReward: True 93 | useKeypointReward: True 94 | distanceDeltaRewScale: 50.0 95 | useFingertipShapeDistReward: False 96 | useHandJointPoseRew: False 97 | 98 | reachGoalBonus: 1000.0 99 | kukaActionsPenaltyScale: 0.003 100 | allegroActionsPenaltyScale: 0.0003 101 | fallDistance: 0.24 102 | fallPenalty: 0.0 103 | 104 | privilegedActions: False 105 | privilegedActionsTorque: 0.02 106 | 107 | # Physics v1, pretty much default settings we used from the start of the project 108 | dofFriction: 1.0 # negative values are ignored and the default friction from URDF file is used 109 | 110 | # gain of PD controller. 111 | handStiffness: 40.0 #increasing stiffness leads to stiffer movements 112 | armStiffness: 1000 #40.0 113 | handVelocity: 10.0 114 | armVelocity: 10.0 115 | 116 | handEffort: 0.35 # this is what was used in sim-to-real experiment. Motor torque in Newton*meters 117 | # armEffort: [300, 300, 300, 300, 300, 300, 300] # see Physics v2 118 | armEffort: [500, 500, 500, 500, 500, 500, 500] # see Physics v2 119 | 120 | handDamping: 5 #increasing damping leads to less local oscillatory moment 121 | armDamping: 100 #5 122 | 123 | handArmature: 0 124 | armArmature: 0 125 | 126 | keypointScale: 1.5 127 | objectBaseSize: 0.05 128 | numPointCloud: 100 129 | 130 | randomizeObjectDimensions: True 131 | withSmallCuboids: True 132 | withBigCuboids: True 133 | withSticks: True 134 | 135 | objectType: "" #changing to ball only for now 136 | observationType: "full_state" 137 | successTolerance: 0.075 138 | targetSuccessTolerance: 0.01 139 | toleranceCurriculumIncrement: 0.9 # multiplicative 140 | toleranceCurriculumInterval: 3000 # in env steps across all agents, with 8192 this is 3000 * 8192 = 24.6M env steps 141 | maxConsecutiveSuccesses: 2 142 | successSteps: 50 # how many steps we should be within the tolerance before we declare a success 143 | 144 | saveStates: False 145 | saveStatesFile: "rootTensorsDofStates.bin" 146 | 147 | loadInitialStates: False 148 | loadStatesFile: "rootTensorsDofStates.bin" 149 | enableProprioHistory: True 150 | useObsAsProp: False 151 | enableActionHistory: True 152 | enableAttnMask: True 153 | enablePointCloud: True 154 | enableCameraSensors: False 155 | # set to True if you use camera sensors in the environment 156 | rgbd_camera: 157 | enable_depth: False 158 | enable_rgb: False 159 | render_slowness: 1 160 | camera_width: 60 161 | camera_height: 60 162 | buffer_width: 60 163 | buffer_height: 60 164 | fov: 60 165 | ss: 2 166 | num_cameras: 1 167 | intrinsics: 'utils/camera2.json' 168 | randomize_camera_pose: 0.04 #in meters 169 | randomize_camera_rot: 5 #in degrees 170 | cam0: 171 | #pos: [0.20, -0.55, 0.65] 172 | #pos: [0.0, -0.31, 0.49] 173 | #pos: [0.12, -0.31, 0.55] 174 | pos: [0.12, -0.35, 0.60] 175 | target: [0.10, -0.25, 0.45] 176 | cam1: 177 | pos: [0.50, -0.15, 0.65] 178 | target: [0.0, -0.15, 0.6] 179 | wrist_camera: False 180 | 181 | stage2_hist_len: 16 # 3 seconds of history #GRU history not yet 182 | 183 | asset: 184 | # Whis was the original kuka_allegro asset. 185 | # This URDF has some issues, i.e. weights of fingers are too high and the mass of the Allegro hand is too 186 | # high in general. But in turn this leads to smoother movements and better looking behaviors. 187 | # Additionally, collision shapes of fingertips are more primitive (just rough convex hulls), which 188 | # gives a bit more FPS. 189 | kukaAllegro: "urdf/kuka_allegro_description/kuka_allegro_touch_sensor.urdf" 190 | FrankAllegro: "urdf/franka_description/allegro_hand_description/franka_panda_allegro.urdf" 191 | # Xarm7_allegro: 'new_asset/xarm7_description_new/xarm7_allegro.urdf' #"urdf/xarm7_color.urdf" #"urdf/xarm7_hand.urdf" 192 | Xarm7_allegro: 'urdf/xarm7_allegro_vertical/xarm7_allegro.urdf' #"urdf/xarm7_color.urdf" #"urdf/xarm7_hand.urdf" 193 | Xarm7_leap_hand: "urdf/xarm7_leap.urdf" 194 | # This is the URDF which has more accurate collision shapes and weights. 195 | # I believe since the hand is much lighter, the policy has more control over the movement of both arm and 196 | # fingers which leads to faster training (better sample efficiency). But overall the resulting 197 | # behaviors look too fast and a bit unrealistic. 198 | # For sim-to-real experiments this needs to be addressed. Overall, v2 is a "Better" URDF, and it should not 199 | # lead to behaviors that would be worse for sim-to-real experiments. Most likely the problem is elsewhere, 200 | # for example the max torques might be too high, or the armature of the motors is too low. 201 | # The exercise of finding the right URDF and other parameters is left for the sim-to-real part of the project. 202 | # kukaAllegro: "urdf/kuka_allegro_description/kuka_allegro_v2.urdf" 203 | 204 | task: 205 | 206 | do_random_resets: False 207 | 208 | domain_randomization: 209 | randomize_friction: False 210 | friction_lower_limit: 0.6 211 | friction_upper_limit: 1.2 212 | 213 | randomize_object_mass: False 214 | mass_lower_limit: 0.8 215 | mass_upper_limit: 1.2 216 | 217 | randomize_object_com: False 218 | com_lower_limit: -0.05 219 | com_upper_limit: 0.05 220 | 221 | randomize_table_position: False 222 | table_rnd_y: 0.02 223 | table_rnd_z: 0.02 224 | table_rnd_x: 0.02 225 | 226 | randomize_table_friction: False 227 | table_friction_lower_limit: 0.6 228 | table_friction_upper_limit: 1.2 229 | 230 | 231 | sim: 232 | substeps: 2 233 | dt: 0.00833 # 1/120 234 | up_axis: "z" 235 | use_gpu_pipeline: True #${eq:${...pipeline},"gpu"} 236 | num_client_threads: 8 237 | 238 | gravity: [0.0, 0.0, -9.81] 239 | physx: 240 | num_threads: 6 241 | solver_type: 1 # 0: pgs, 1: tgs 242 | num_position_iterations: 8 243 | num_velocity_iterations: 0 244 | 245 | max_gpu_contact_pairs: 8388608 # 8*1024*1024 246 | num_subscenes: ${....num_subscenes} 247 | contact_offset: 0.002 248 | rest_offset: 0.0 249 | bounce_threshold_velocity: 0.2 250 | max_depenetration_velocity: 1000.0 251 | default_buffer_size_multiplier: 25.0 252 | contact_collection: 1 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!) 253 | -------------------------------------------------------------------------------- /cfg/task/AllegroXarmThrowing.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - _self_ 3 | 4 | name: AllegroXarmThrowing 5 | 6 | physics_engine: ${..physics_engine} 7 | asset_root: '../assets' 8 | 9 | 10 | env: 11 | subtask: "" 12 | throw_far: False 13 | bucket_in_front: False 14 | use_leap: False 15 | use_allegro: True 16 | urdfFolder: "ycb_real_inertia" 17 | # if given, will override the device setting in gym. 18 | #numEnvs: ${resolve_default:8192,${...num_envs}} 19 | numEnvs: ${...num_envs} 20 | envSpacing: 1.2 21 | episodeLength: 600 #change 22 | tablePosey: -0.15 23 | tablePosez: 0.023 24 | enableDebugVis: False 25 | enableVideoLog: False 26 | videoLogIdx: 0 27 | videoLogFreq: 20 28 | evalStats: False # extra evaluation-time statistics 29 | doSimpleObjects: True 30 | doVerySimpleObjects: False 31 | doDexYcbObjects: False 32 | useSavedInitPose: False 33 | limitArmDeltaTarget: True 34 | useRandomInitRot: False 35 | addZerosInPrivBuf: False 36 | usePoseRewardUnlifted: False 37 | usePoseRewardLifted: False 38 | leapFingers: ["fingertip", "fingertip_2", "fingertip_3", "thumb_fingertip"] 39 | leapDIP: ["dip", "dip_2", "dip_3", "thumb_dip"] 40 | initPoseVersion: v16 41 | useDIPFinger: False 42 | lowmem: False 43 | input_priv: True 44 | enableVhacd: True 45 | vhacdObjects: ['070-a','070-b','072','036','032','029','048','027','019','032','026'] 46 | simpleObjects: ['002', '011', '036', '010', '025', '024', '005', '007'] 47 | 48 | verysimpleObjects: ['002'] 49 | DexYcbObjects: ['035','003','004','005','007','008','009','010','011', '021','024','025','002','036','037','040','051','052','061'] 50 | 51 | clampAbsObservations: 10.0 52 | useOldActionSpace: False 53 | clampArmTarget: False 54 | 55 | stiffnessScale: 1.0 56 | forceLimitScale: 1.0 57 | useRelativeControl: False 58 | dofSpeedScale: 1.0 59 | actionsMovingAverage: 1.0 60 | controlFrequencyInv: 6 # 20 Hz 61 | jointVelocityLimit: 0.5 62 | 63 | resetPositionNoiseX: 0.1 64 | resetPositionNoiseY: 0.1 65 | resetPositionNoiseZ: 0.02 66 | resetRotationNoise: 1.0 67 | resetDofPosRandomIntervalFingers: 0.1 68 | resetDofPosRandomIntervalArm: 0.1 69 | resetDofVelRandomInterval: 0. 70 | 71 | 72 | pointCloudScale: 0.01 73 | # Random forces applied to the 74 | forceScale: 0.0 75 | forceProbRange: [0.001, 0.1] 76 | forceDecay: 0.99 77 | forceDecayInterval: 0.08 78 | 79 | resetOnArmCollision: False 80 | ArmTableCollisionThreshold: 10 81 | resetOnCollision: False 82 | ContactForceThreshold: 50 83 | resetOnFingerCrash: False 84 | FingerClearanceThreshold: 0.050 85 | 86 | liftingRewScale: 20.0 87 | goalHeight: 0.45 88 | handJointRewCoeff: 1 #work on this 89 | liftingBonus: 300.0 90 | liftingBonusThreshold: 0.10 # when the object is lifted this distance (in meters) above the table, the agent gets the lifting bonus 91 | keypointRewScale: 200.0 92 | useFingertipReward: True 93 | usePalmReward: False 94 | useLiftingReward: True 95 | useKeypointReward: True 96 | distanceDeltaRewScale: 50.0 97 | useFingertipShapeDistReward: False 98 | useHandJointPoseRew: False 99 | 100 | reachGoalBonus: 1000.0 101 | kukaActionsPenaltyScale: 0.003 102 | allegroActionsPenaltyScale: 0.0003 103 | fallDistance: 0.24 104 | fallPenalty: 0.0 105 | 106 | privilegedActions: False 107 | privilegedActionsTorque: 0.02 108 | 109 | # Physics v1, pretty much default settings we used from the start of the project 110 | dofFriction: 1.0 # negative values are ignored and the default friction from URDF file is used 111 | 112 | # gain of PD controller. 113 | handStiffness: 40.0 #increasing stiffness leads to stiffer movements 114 | armStiffness: 1000 #40.0 115 | handVelocity: 10.0 116 | armVelocity: 10.0 117 | 118 | handEffort: 0.35 # this is what was used in sim-to-real experiment. Motor torque in Newton*meters 119 | # armEffort: [300, 300, 300, 300, 300, 300, 300] # see Physics v2 120 | armEffort: [500, 500, 500, 500, 500, 500, 500] # see Physics v2 121 | 122 | handDamping: 5 #increasing damping leads to less local oscillatory moment 123 | armDamping: 100 #5 124 | 125 | handArmature: 0 126 | armArmature: 0 127 | 128 | keypointScale: 1.5 129 | objectBaseSize: 0.05 130 | numPointCloud: 100 131 | 132 | randomizeObjectDimensions: True 133 | withSmallCuboids: True 134 | withBigCuboids: True 135 | withSticks: True 136 | 137 | objectType: "" #changing to ball only for now 138 | observationType: "full_state" 139 | successTolerance: 0.075 140 | targetSuccessTolerance: 0.01 141 | toleranceCurriculumIncrement: 0.9 # multiplicative 142 | toleranceCurriculumInterval: 3000 # in env steps across all agents, with 8192 this is 3000 * 8192 = 24.6M env steps 143 | maxConsecutiveSuccesses: 2 144 | successSteps: 50 # how many steps we should be within the tolerance before we declare a success 145 | 146 | saveStates: False 147 | saveStatesFile: "rootTensorsDofStates.bin" 148 | 149 | loadInitialStates: False 150 | loadStatesFile: "rootTensorsDofStates.bin" 151 | enableProprioHistory: True 152 | useObsAsProp: False 153 | enableActionHistory: True 154 | enableAttnMask: True 155 | enablePointCloud: True 156 | enableCameraSensors: False 157 | # set to True if you use camera sensors in the environment 158 | rgbd_camera: 159 | enable_depth: False 160 | enable_rgb: False 161 | render_slowness: 1 162 | camera_width: 60 163 | camera_height: 60 164 | buffer_width: 60 165 | buffer_height: 60 166 | fov: 60 167 | ss: 2 168 | num_cameras: 1 169 | intrinsics: 'utils/camera.json' 170 | randomize_camera_pose: 0.04 #in meters 171 | randomize_camera_rot: 5 #in degrees 172 | cam0: 173 | #pos: [0.20, -0.55, 0.65] 174 | #pos: [0.0, -0.31, 0.49] 175 | #pos: [0.12, -0.31, 0.55] 176 | pos: [0.12, -0.35, 0.60] 177 | target: [0.10, -0.25, 0.45] 178 | cam1: 179 | pos: [0.50, -0.15, 0.65] 180 | target: [0.0, -0.15, 0.6] 181 | wrist_camera: False 182 | 183 | stage2_hist_len: 16 # 3 seconds of history #GRU history not yet 184 | 185 | asset: 186 | # Whis was the original kuka_allegro asset. 187 | # This URDF has some issues, i.e. weights of fingers are too high and the mass of the Allegro hand is too 188 | # high in general. But in turn this leads to smoother movements and better looking behaviors. 189 | # Additionally, collision shapes of fingertips are more primitive (just rough convex hulls), which 190 | # gives a bit more FPS. 191 | kukaAllegro: "urdf/kuka_allegro_description/kuka_allegro_touch_sensor.urdf" 192 | FrankAllegro: "urdf/franka_description/allegro_hand_description/franka_panda_allegro.urdf" 193 | # Xarm7_allegro: 'new_asset/xarm7_description_new/xarm7_allegro.urdf' #"urdf/xarm7_color.urdf" #"urdf/xarm7_hand.urdf" 194 | Xarm7_allegro: 'urdf/xarm7_allegro_vertical/xarm7_allegro.urdf' #"urdf/xarm7_color.urdf" #"urdf/xarm7_hand.urdf" 195 | Xarm7_leap_hand: "urdf/xarm7_leap.urdf" 196 | # This is the URDF which has more accurate collision shapes and weights. 197 | # I believe since the hand is much lighter, the policy has more control over the movement of both arm and 198 | # fingers which leads to faster training (better sample efficiency). But overall the resulting 199 | # behaviors look too fast and a bit unrealistic. 200 | # For sim-to-real experiments this needs to be addressed. Overall, v2 is a "Better" URDF, and it should not 201 | # lead to behaviors that would be worse for sim-to-real experiments. Most likely the problem is elsewhere, 202 | # for example the max torques might be too high, or the armature of the motors is too low. 203 | # The exercise of finding the right URDF and other parameters is left for the sim-to-real part of the project. 204 | # kukaAllegro: "urdf/kuka_allegro_description/kuka_allegro_v2.urdf" 205 | 206 | task: 207 | 208 | do_random_resets: False 209 | 210 | domain_randomization: 211 | randomize_friction: False 212 | friction_lower_limit: 0.6 213 | friction_upper_limit: 1.2 214 | 215 | randomize_object_mass: False 216 | mass_lower_limit: 0.8 217 | mass_upper_limit: 1.2 218 | 219 | randomize_object_com: False 220 | com_lower_limit: -0.05 221 | com_upper_limit: 0.05 222 | 223 | randomize_table_position: False 224 | table_y_lower: 0.45 225 | table_y_upper: 0.55 226 | table_z_lower: 0.01 227 | table_z_upper: 0.05 228 | 229 | randomize_table_friction: False 230 | table_friction_lower_limit: 0.6 231 | table_friction_upper_limit: 1.2 232 | 233 | 234 | sim: 235 | substeps: 2 236 | dt: 0.00833 # 1/120 237 | up_axis: "z" 238 | use_gpu_pipeline: True #${eq:${...pipeline},"gpu"} 239 | num_client_threads: 8 240 | 241 | gravity: [0.0, 0.0, -9.81] 242 | physx: 243 | num_threads: 6 244 | solver_type: 1 # 0: pgs, 1: tgs 245 | num_position_iterations: 8 246 | num_velocity_iterations: 0 247 | 248 | max_gpu_contact_pairs: 8388608 # 8*1024*1024 249 | num_subscenes: ${....num_subscenes} 250 | contact_offset: 0.002 251 | rest_offset: 0.0 252 | bounce_threshold_velocity: 0.2 253 | max_depenetration_velocity: 1000.0 254 | default_buffer_size_multiplier: 25.0 255 | contact_collection: 1 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!) 256 | -------------------------------------------------------------------------------- /cfg/train/AllegroXarmCabinetPPO.yaml: -------------------------------------------------------------------------------- 1 | seed: ${..seed} 2 | algo: PPO 3 | network: 4 | mlp: 5 | units: [512, 256, 128] 6 | priv_mlp: 7 | units: [256, 128, 8] 8 | 9 | pc_mlp: 10 | out_dim: 64 11 | units: [64,64] 12 | 13 | load_path: ${..checkpoint} # path to the checkpoint to load 14 | 15 | ppo: 16 | output_name: 'debug' 17 | normalize_input: True 18 | normalize_value: True 19 | normalize_pc: False 20 | normalize_proprio_hist: False 21 | value_bootstrap: True 22 | num_actors: ${...task.env.numEnvs} 23 | num_gradient_steps: ${...train.ppo.horizon_length} 24 | normalize_advantage: True 25 | gamma: 0.99 26 | tau: 0.95 27 | initEpsArm: 1.0 28 | initEpsHand: 1.0 29 | value_grads_to_pointnet: True 30 | point_cloud_input_to_value: False 31 | learning_rate: 1e-4 32 | kl_threshold: 0.02 33 | min_lr: 1e-6 34 | max_lr: 1e-4 35 | # PPO batch collection 36 | horizon_length: 10 37 | minibatch_size: 32768 38 | mini_epochs: 1 39 | # PPO loss setting 40 | clip_value: True 41 | critic_coef: 4 42 | entropy_coef: 0.0 43 | e_clip: 0.2 44 | bounds_loss_coef: 0.0001 45 | # grad clipping 46 | truncate_grads: True 47 | grad_norm: 1.0 48 | # snapshot setting 49 | save_best_after: 0 50 | save_frequency: 1250 51 | max_agent_steps: 5000000000 52 | critic_warmup_steps: -1 53 | # hora setting 54 | priv_info: False 55 | priv_info_dim: 9 56 | priv_info_embed_dim: 8 57 | proprio_adapt: False 58 | useMemoryEfficientBuffer: False 59 | dapg: 60 | l1: 0.1 61 | l2: 0.999 62 | dapg_threshold: 0.002 63 | 64 | wandb: 65 | activate: True 66 | entity: himanshu_singh 67 | project: grasping 68 | -------------------------------------------------------------------------------- /cfg/train/AllegroXarmNewPPO.yaml: -------------------------------------------------------------------------------- 1 | # params: 2 | # seed: ${...seed} 3 | 4 | # algo: 5 | # name: a2c_continuous 6 | 7 | # model: 8 | # name: continuous_a2c_logstd 9 | 10 | # network: 11 | # name: a2c_pointnet 12 | # separate: False 13 | 14 | # space: 15 | # continuous: 16 | # mu_activation: None 17 | # sigma_activation: None 18 | # mu_init: 19 | # name: default 20 | # sigma_init: 21 | # name: const_initializer 22 | # val: 0 23 | # fixed_sigma: True 24 | 25 | # mlp: 26 | # units: [1024, 1024, 512, 512] 27 | # activation: elu 28 | # d2rl: False 29 | # initializer: 30 | # name: default 31 | # regularizer: 32 | # name: None 33 | 34 | # load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint 35 | # load_path: ${...checkpoint} # path to the checkpoint to load 36 | 37 | # config: 38 | # name: ${resolve_default:AllegroKukaPPO,${....experiment}} 39 | # # full_experiment_name: ${.name} 40 | # env_name: rlgpu 41 | # multi_gpu: ${....multi_gpu} 42 | # ppo: True 43 | # mixed_precision: True 44 | # normalize_input: True 45 | # normalize_value: True 46 | # normalize_advantage: True 47 | # reward_shaper: 48 | # scale_value: 0.01 49 | 50 | # num_actors: ${....task.env.numEnvs} 51 | # gamma: 0.99 52 | # tau: 0.95 53 | # learning_rate: 1e-4 54 | # lr_schedule: adaptive 55 | # schedule_type: standard 56 | # kl_threshold: 0.016 57 | # score_to_win: 1000000 58 | # max_epochs: 100000 59 | # max_frames: 10_000_000_000 60 | # save_best_after: 100 61 | # save_frequency: 5000 62 | # print_stats: True 63 | # grad_norm: 1.0 64 | # entropy_coef: 0.0 65 | # truncate_grads: True 66 | # e_clip: 0.1 67 | # minibatch_size: 8192 68 | # mini_epochs: 4 69 | # critic_coef: 4.0 70 | # clip_value: True 71 | # horizon_length: 16 72 | # seq_length: 16 73 | 74 | # # SampleFactory currently gives better results without bounds loss but I don't think this loss matters too much 75 | # # bounds_loss_coef: 0.0 76 | # bounds_loss_coef: 0.0001 77 | 78 | # # optimize summaries to prevent tf.event files from growing to gigabytes 79 | # defer_summaries_sec: 5 80 | # summaries_interval_sec_min: 5 81 | # summaries_interval_sec_max: 300 82 | 83 | # player: 84 | # #render: True 85 | # deterministic: False # be careful there's a typo in older versions of rl_games in this parameter name ("determenistic") 86 | # games_num: 100000 87 | # print_stats: False 88 | seed: ${..seed} 89 | algo: PPOTransformer 90 | network: 91 | mlp: 92 | units: [512, 256, 128] 93 | priv_mlp: 94 | units: [256, 128, 8] 95 | 96 | pc_mlp: 97 | out_dim: 64 98 | units: [64,64] 99 | 100 | load_path: ${..checkpoint} # path to the checkpoint to load 101 | 102 | ppo: 103 | output_name: 'debug' 104 | normalize_input: True 105 | normalize_value: True 106 | normalize_pc: False 107 | normalize_proprio_hist: False 108 | value_bootstrap: True 109 | num_actors: ${...task.env.numEnvs} 110 | num_gradient_steps: ${...train.ppo.horizon_length} 111 | normalize_advantage: True 112 | gamma: 0.99 113 | tau: 0.95 114 | initEpsArm: 1.0 115 | initEpsHand: 1.0 116 | value_grads_to_pointnet: True 117 | point_cloud_input_to_value: True 118 | learning_rate: 1e-4 119 | kl_threshold: 0.02 120 | min_lr: 1e-6 121 | max_lr: 1e-4 122 | # PPO batch collection 123 | horizon_length: 10 124 | minibatch_size: 4096 125 | mini_epochs: 1 126 | # PPO loss setting 127 | clip_value: True 128 | critic_coef: 4 129 | entropy_coef: 0.0 130 | e_clip: 0.2 131 | bounds_loss_coef: 0.0001 132 | # grad clipping 133 | truncate_grads: True 134 | grad_norm: 1.0 135 | # snapshot setting 136 | save_best_after: 0 137 | save_frequency: 1250 138 | max_agent_steps: 5000000000 139 | critic_warmup_steps: -1 140 | # hora setting 141 | priv_info: False 142 | priv_info_dim: 9 143 | priv_info_embed_dim: 8 144 | proprio_adapt: False 145 | useMemoryEfficientBuffer: False 146 | dapg: 147 | l1: 0.1 148 | l2: 0.999 149 | dapg_threshold: 0.002 150 | 151 | wandb: 152 | activate: True 153 | entity: himanshu_singh 154 | project: grasping 155 | -------------------------------------------------------------------------------- /cfg/train/AllegroXarmThrowingPPO.yaml: -------------------------------------------------------------------------------- 1 | # params: 2 | # seed: ${...seed} 3 | 4 | # algo: 5 | # name: a2c_continuous 6 | 7 | # model: 8 | # name: continuous_a2c_logstd 9 | 10 | # network: 11 | # name: a2c_pointnet 12 | # separate: False 13 | 14 | # space: 15 | # continuous: 16 | # mu_activation: None 17 | # sigma_activation: None 18 | # mu_init: 19 | # name: default 20 | # sigma_init: 21 | # name: const_initializer 22 | # val: 0 23 | # fixed_sigma: True 24 | 25 | # mlp: 26 | # units: [1024, 1024, 512, 512] 27 | # activation: elu 28 | # d2rl: False 29 | # initializer: 30 | # name: default 31 | # regularizer: 32 | # name: None 33 | 34 | # load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint 35 | # load_path: ${...checkpoint} # path to the checkpoint to load 36 | 37 | # config: 38 | # name: ${resolve_default:AllegroKukaPPO,${....experiment}} 39 | # # full_experiment_name: ${.name} 40 | # env_name: rlgpu 41 | # multi_gpu: ${....multi_gpu} 42 | # ppo: True 43 | # mixed_precision: True 44 | # normalize_input: True 45 | # normalize_value: True 46 | # normalize_advantage: True 47 | # reward_shaper: 48 | # scale_value: 0.01 49 | 50 | # num_actors: ${....task.env.numEnvs} 51 | # gamma: 0.99 52 | # tau: 0.95 53 | # learning_rate: 1e-4 54 | # lr_schedule: adaptive 55 | # schedule_type: standard 56 | # kl_threshold: 0.016 57 | # score_to_win: 1000000 58 | # max_epochs: 100000 59 | # max_frames: 10_000_000_000 60 | # save_best_after: 100 61 | # save_frequency: 5000 62 | # print_stats: True 63 | # grad_norm: 1.0 64 | # entropy_coef: 0.0 65 | # truncate_grads: True 66 | # e_clip: 0.1 67 | # minibatch_size: 8192 68 | # mini_epochs: 4 69 | # critic_coef: 4.0 70 | # clip_value: True 71 | # horizon_length: 16 72 | # seq_length: 16 73 | 74 | # # SampleFactory currently gives better results without bounds loss but I don't think this loss matters too much 75 | # # bounds_loss_coef: 0.0 76 | # bounds_loss_coef: 0.0001 77 | 78 | # # optimize summaries to prevent tf.event files from growing to gigabytes 79 | # defer_summaries_sec: 5 80 | # summaries_interval_sec_min: 5 81 | # summaries_interval_sec_max: 300 82 | 83 | # player: 84 | # #render: True 85 | # deterministic: False # be careful there's a typo in older versions of rl_games in this parameter name ("determenistic") 86 | # games_num: 100000 87 | # print_stats: False 88 | seed: ${..seed} 89 | algo: PPO 90 | network: 91 | mlp: 92 | units: [512, 256, 128] 93 | priv_mlp: 94 | units: [256, 128, 8] 95 | 96 | pc_mlp: 97 | out_dim: 64 98 | units: [64,64] 99 | 100 | load_path: ${..checkpoint} # path to the checkpoint to load 101 | 102 | ppo: 103 | output_name: 'debug' 104 | normalize_input: True 105 | normalize_value: True 106 | normalize_pc: False 107 | normalize_proprio_hist: False 108 | value_bootstrap: True 109 | num_actors: ${...task.env.numEnvs} 110 | num_gradient_steps: ${...train.ppo.horizon_length} 111 | normalize_advantage: True 112 | gamma: 0.99 113 | tau: 0.95 114 | initEpsArm: 1.0 115 | initEpsHand: 1.0 116 | value_grads_to_pointnet: True 117 | point_cloud_input_to_value: True 118 | learning_rate: 1e-4 119 | kl_threshold: 0.02 120 | min_lr: 1e-6 121 | max_lr: 1e-4 122 | # PPO batch collection 123 | horizon_length: 10 124 | minibatch_size: 32768 125 | mini_epochs: 1 126 | # PPO loss setting 127 | clip_value: True 128 | critic_coef: 4 129 | entropy_coef: 0.0 130 | e_clip: 0.2 131 | bounds_loss_coef: 0.0001 132 | # grad clipping 133 | truncate_grads: True 134 | grad_norm: 1.0 135 | # snapshot setting 136 | save_best_after: 0 137 | save_frequency: 1250 138 | max_agent_steps: 5000000000 139 | critic_warmup_steps: -1 140 | # hora setting 141 | priv_info: False 142 | priv_info_dim: 9 143 | priv_info_embed_dim: 8 144 | proprio_adapt: False 145 | useMemoryEfficientBuffer: False 146 | dapg: 147 | l1: 0.1 148 | l2: 0.999 149 | dapg_threshold: 0.002 150 | 151 | wandb: 152 | activate: True 153 | entity: himanshu_singh 154 | project: grasping 155 | -------------------------------------------------------------------------------- /env.yml: -------------------------------------------------------------------------------- 1 | name: rlgpu 2 | channels: 3 | - pytorch3d 4 | - pytorch 5 | - conda-forge 6 | - defaults 7 | dependencies: 8 | - _libgcc_mutex=0.1=conda_forge 9 | - _openmp_mutex=4.5=2_kmp_llvm 10 | - absl-py=2.1.0=pyhd8ed1ab_0 11 | - aiohttp=3.7.4.post0=py37h5e8e339_1 12 | - antlr-python-runtime=4.9.3=pyhd8ed1ab_1 13 | - appdirs=1.4.4=pyhd3eb1b0_0 14 | - async-timeout=3.0.1=py_1000 15 | - attrs=23.2.0=pyh71513ae_0 16 | - backcall=0.2.0=pyh9f0ad1d_0 17 | - backports=1.0=pyhd8ed1ab_3 18 | - backports.functools_lru_cache=2.0.0=pyhd8ed1ab_0 19 | - blas=1.0=mkl 20 | - blas-devel=3.9.0=16_linux64_mkl 21 | - blinker=1.6.3=pyhd8ed1ab_0 22 | - brotli=1.0.9=h5eee18b_7 23 | - brotli-bin=1.0.9=h5eee18b_7 24 | - brotli-python=1.0.9=py37hd23a5d3_7 25 | - bzip2=1.0.8=hd590300_5 26 | - c-ares=1.28.1=hd590300_0 27 | - ca-certificates=2024.7.2=h06a4308_0 28 | - cachetools=5.3.3=pyhd8ed1ab_0 29 | - certifi=2024.2.2=pyhd8ed1ab_0 30 | - cffi=1.15.1=py37h43b0acd_1 31 | - chardet=4.0.0=py37h89c1867_3 32 | - charset-normalizer=3.3.2=pyhd8ed1ab_0 33 | - click=8.1.3=py37h89c1867_0 34 | - cloudpickle=2.0.0=pyhd3eb1b0_0 35 | - cryptography=38.0.2=py37h5994e8b_1 36 | - cudatoolkit=11.1.1=hb139c0e_13 37 | - cycler=0.11.0=pyhd3eb1b0_0 38 | - dataclasses=0.8=pyh6d0b6a4_7 39 | - dbus=1.13.18=hb2f20db_0 40 | - debugpy=1.6.3=py37hd23a5d3_0 41 | - docker-pycreds=0.4.0=pyhd3eb1b0_0 42 | - einops=0.6.1=pyhd8ed1ab_0 43 | - entrypoints=0.4=pyhd8ed1ab_0 44 | - expat=2.5.0=h6a678d5_0 45 | - filelock=3.9.0=py37h06a4308_0 46 | - fontconfig=2.14.1=h52c9d5c_1 47 | - fonttools=4.25.0=pyhd3eb1b0_0 48 | - freetype=2.12.1=h267a509_2 49 | - fvcore=0.1.5.post20221221=pyhd8ed1ab_0 50 | - giflib=5.2.1=h5eee18b_3 51 | - gitdb=4.0.7=pyhd3eb1b0_0 52 | - gitpython=3.1.30=py37h06a4308_0 53 | - glib=2.78.4=h6a678d5_0 54 | - glib-tools=2.78.4=h6a678d5_0 55 | - gmp=6.3.0=h59595ed_1 56 | - gnutls=3.6.13=h85f3911_1 57 | - google-auth=2.23.0=pyh1a96a4e_0 58 | - google-auth-oauthlib=0.4.6=pyhd8ed1ab_0 59 | - grpc-cpp=1.48.1=h30feacc_1 60 | - grpcio=1.48.1=py37he7b19e7_1 61 | - gst-plugins-base=1.14.1=h6a678d5_1 62 | - gstreamer=1.14.1=h5eee18b_1 63 | - hydra-core=1.3.2=pyhd8ed1ab_0 64 | - icu=58.2=he6710b0_3 65 | - idna=3.6=pyhd8ed1ab_0 66 | - importlib-metadata=4.11.4=py37h89c1867_0 67 | - importlib_metadata=4.11.4=hd8ed1ab_0 68 | - importlib_resources=5.2.0=pyhd3eb1b0_1 69 | - intel-openmp=2022.1.0=h9e868ea_3769 70 | - iopath=0.1.9=pyhd8ed1ab_0 71 | - ipykernel=6.16.2=pyh210e3f2_0 72 | - ipython=7.33.0=py37h89c1867_0 73 | - jedi=0.19.1=pyhd8ed1ab_0 74 | - joblib=1.1.1=py37h06a4308_0 75 | - jpeg=9b=h024ee3a_2 76 | - jupyter_client=7.4.9=pyhd8ed1ab_0 77 | - jupyter_core=4.11.1=py37h89c1867_0 78 | - kiwisolver=1.4.4=py37h6a678d5_0 79 | - lame=3.100=h166bdaf_1003 80 | - lcms2=2.12=h3be6417_0 81 | - ld_impl_linux-64=2.40=h41732ed_0 82 | - libabseil=20220623.0=cxx17_h05df665_6 83 | - libblas=3.9.0=16_linux64_mkl 84 | - libbrotlicommon=1.0.9=h5eee18b_7 85 | - libbrotlidec=1.0.9=h5eee18b_7 86 | - libbrotlienc=1.0.9=h5eee18b_7 87 | - libcblas=3.9.0=16_linux64_mkl 88 | - libffi=3.4.2=h7f98852_5 89 | - libgcc-ng=13.2.0=h807b86a_5 90 | - libgfortran-ng=13.2.0=h69a702a_5 91 | - libgfortran5=13.2.0=ha4646dd_5 92 | - libglib=2.78.4=hdc74915_0 93 | - libhwloc=2.8.0=h32351e8_1 94 | - libiconv=1.17=hd590300_2 95 | - liblapack=3.9.0=16_linux64_mkl 96 | - liblapacke=3.9.0=16_linux64_mkl 97 | - libnsl=2.0.1=hd590300_0 98 | - libpng=1.6.43=h2797004_0 99 | - libprotobuf=3.21.8=h6239696_0 100 | - libsodium=1.0.18=h36c2ea0_1 101 | - libsqlite=3.45.2=h2797004_0 102 | - libstdcxx-ng=13.2.0=h7e041cc_5 103 | - libtiff=4.2.0=h85742a9_0 104 | - libuuid=1.41.5=h5eee18b_0 105 | - libuv=1.48.0=hd590300_0 106 | - libwebp=1.2.0=h89dd481_0 107 | - libwebp-base=1.2.0=h27cfd23_0 108 | - libxcb=1.15=h7f8727e_0 109 | - libxml2=2.9.14=h74e7548_0 110 | - libzlib=1.2.13=hd590300_5 111 | - llvm-openmp=14.0.6=h9e868ea_0 112 | - lz4-c=1.9.4=h6a678d5_0 113 | - markdown=3.6=pyhd8ed1ab_0 114 | - markupsafe=2.1.1=py37h540881e_1 115 | - matplotlib=3.5.3=py37h06a4308_0 116 | - matplotlib-base=3.5.3=py37hf590b9c_0 117 | - matplotlib-inline=0.1.7=pyhd8ed1ab_0 118 | - mkl=2022.1.0=hc2b9512_224 119 | - mkl-devel=2022.1.0=h66538d2_224 120 | - mkl-include=2022.1.0=h06a4308_224 121 | - multidict=6.0.2=py37h540881e_1 122 | - munkres=1.1.4=py_0 123 | - ncurses=6.4.20240210=h59595ed_0 124 | - nest-asyncio=1.6.0=pyhd8ed1ab_0 125 | - nettle=3.6=he412f7d_0 126 | - numpy=1.21.6=py37h976b520_0 127 | - oauthlib=3.2.2=pyhd8ed1ab_0 128 | - olefile=0.47=pyhd8ed1ab_0 129 | - omegaconf=2.3.0=pyhd8ed1ab_0 130 | - openh264=2.1.1=h780b84a_0 131 | - openssl=3.2.1=hd590300_1 132 | - packaging=22.0=py37h06a4308_0 133 | - parso=0.8.4=pyhd8ed1ab_0 134 | - pathtools=0.1.2=pyhd3eb1b0_1 135 | - pcre2=10.42=hebb0a14_0 136 | - pexpect=4.9.0=pyhd8ed1ab_0 137 | - pickleshare=0.7.5=py_1003 138 | - pip=24.0=pyhd8ed1ab_0 139 | - portalocker=2.3.0=py37h06a4308_0 140 | - prompt-toolkit=3.0.42=pyha770c72_0 141 | - psutil=5.9.0=py37h5eee18b_0 142 | - ptyprocess=0.7.0=pyhd3deb0d_0 143 | - pyasn1=0.5.1=pyhd8ed1ab_0 144 | - pyasn1-modules=0.3.0=pyhd8ed1ab_0 145 | - pycparser=2.21=pyhd8ed1ab_0 146 | - pygments=2.17.2=pyhd8ed1ab_0 147 | - pyjwt=2.8.0=pyhd8ed1ab_1 148 | - pyopenssl=23.2.0=pyhd8ed1ab_1 149 | - pyparsing=3.0.9=py37h06a4308_0 150 | - pyqt=5.6.0=py37h22d08a2_6 151 | - pysocks=1.7.1=py37h89c1867_5 152 | - python=3.7.12=hf930737_100_cpython 153 | - python-dateutil=2.8.2=pyhd3eb1b0_0 154 | - python_abi=3.7=4_cp37m 155 | - pytorch=1.8.1=py3.7_cuda11.1_cudnn8.0.5_0 156 | - pytorch3d=0.7.0=py37_cu111_pyt181 157 | - pyu2f=0.1.5=pyhd8ed1ab_0 158 | - pyyaml=6.0=py37h540881e_4 159 | - pyzmq=24.0.1=py37h0c0c2a8_0 160 | - qt=5.6.3=h8bf5577_3 161 | - re2=2022.06.01=h27087fc_1 162 | - readline=8.2=h8228510_1 163 | - regex=2022.7.9=py37h5eee18b_0 164 | - requests=2.31.0=pyhd8ed1ab_0 165 | - requests-oauthlib=2.0.0=pyhd8ed1ab_0 166 | - rsa=4.9=pyhd8ed1ab_0 167 | - ruamel=1.0=py37h06a4308_2 168 | - ruamel.yaml=0.17.21=py37h5eee18b_0 169 | - ruamel.yaml.clib=0.2.6=py37h5eee18b_1 170 | - scipy=1.7.3=py37hf2a6cf1_0 171 | - sentry-sdk=1.9.0=py37h06a4308_0 172 | - setproctitle=1.2.2=py37h27cfd23_1004 173 | - setuptools=69.0.3=pyhd8ed1ab_0 174 | - sip=4.18.1=py37h295c915_2 175 | - six=1.16.0=pyh6c4a22f_0 176 | - smmap=4.0.0=pyhd3eb1b0_0 177 | - sqlite=3.45.2=h2c6b66d_0 178 | - tabulate=0.8.10=py37h06a4308_0 179 | - tbb=2021.8.0=hdb19cb5_0 180 | - tensorboard=2.11.2=pyhd8ed1ab_0 181 | - tensorboard-data-server=0.6.1=py37h52d8a92_0 182 | - tensorboard-plugin-wit=1.8.1=pyhd8ed1ab_0 183 | - termcolor=2.1.0=py37h06a4308_0 184 | - tk=8.6.13=noxft_h4845f30_101 185 | - tokenizers=0.13.1=py37hfb4b0a8_0 186 | - torchvision=0.9.1=py37_cu111 187 | - tornado=6.2=py37h5eee18b_0 188 | - tqdm=4.64.1=py37h06a4308_0 189 | - traitlets=5.9.0=pyhd8ed1ab_0 190 | - transformers=4.24.0=py37h06a4308_0 191 | - typing-extensions=4.7.1=hd8ed1ab_0 192 | - typing_extensions=4.7.1=pyha770c72_0 193 | - urllib3=1.26.18=pyhd8ed1ab_0 194 | - wcwidth=0.2.10=pyhd8ed1ab_0 195 | - werkzeug=2.2.3=pyhd8ed1ab_0 196 | - wheel=0.42.0=pyhd8ed1ab_0 197 | - xlrd=2.0.1=pyhd3eb1b0_1 198 | - xz=5.2.6=h166bdaf_0 199 | - yacs=0.1.6=pyhd3eb1b0_1 200 | - yaml=0.2.5=h7f98852_2 201 | - yarl=1.7.2=py37h540881e_2 202 | - zeromq=4.3.5=h59595ed_1 203 | - zipp=3.15.0=pyhd8ed1ab_0 204 | - zlib=1.2.13=hd590300_5 205 | - zstd=1.4.9=haebb681_0 206 | - pip: 207 | - backports-cached-property==1.0.2 208 | - decorator==4.4.2 209 | - diffusers==0.21.4 210 | - docstring-parser==0.16 211 | - eval-type-backport==0.1.3 212 | - ffmpeg==1.4 213 | - freetype-py==2.4.0 214 | - fsspec==2023.1.0 215 | - gym==0.23.1 216 | - gym-notices==0.0.8 217 | - h5py==3.8.0 218 | - huggingface-hub==0.16.4 219 | - imageio==2.19.2 220 | - imageio-ffmpeg==0.4.9 221 | - jinja2==3.1.4 222 | - lxml==5.2.2 223 | - markdown-it-py==2.2.0 224 | - mdurl==0.1.2 225 | - mediapy==1.1.2 226 | - moviepy==1.0.3 227 | - natsort==8.4.0 228 | - networkx==2.2 229 | - ninja==1.11.1.1 230 | - nltk==3.8.1 231 | - numexpr==2.8.6 232 | - opencv-python==4.9.0.80 233 | - palettable==3.3.3 234 | - pandas==1.3.5 235 | - pillow==9.5.0 236 | - platformdirs==4.0.0 237 | - proglog==0.1.10 238 | - promise==2.3 239 | - protobuf==3.20.3 240 | - pycollada==0.6 241 | - pyglet==2.0.10 242 | - pyopengl==3.1.0 243 | - pyrender==0.1.45 244 | - pysdf==0.1.9 245 | - pytz==2024.1 246 | - pyvirtualdisplay==3.0 247 | - rich==13.7.1 248 | - rl-games==1.6.1 249 | - safetensors==0.4.3 250 | - scikit-learn==1.0.2 251 | - seaborn==0.12.2 252 | - sentence-transformers==2.2.2 253 | - sentencepiece==0.2.0 254 | - shortuuid==1.0.13 255 | - shtab==1.7.1 256 | - tables==3.7.0 257 | - tensorboardx==2.6.2.2 258 | - threadpoolctl==3.1.0 259 | - transforms3d==0.4.1 260 | - trimesh==3.23.5 261 | - tyro==0.8.4 262 | - urdfpy==0.0.22 263 | - wandb==0.17.0 264 | - warp-lang==0.10.1 265 | prefix: /home/himanshu/anaconda3/envs/rlgpu 266 | -------------------------------------------------------------------------------- /imgs/approach.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/imgs/approach.png -------------------------------------------------------------------------------- /scripts/finetune.py: -------------------------------------------------------------------------------- 1 | import isaacgym 2 | import os 3 | import hydra 4 | import datetime 5 | from termcolor import cprint 6 | from omegaconf import DictConfig, OmegaConf 7 | from hydra.utils import to_absolute_path 8 | import wandb 9 | from algo.ppo_transformer.ppo_transformer import PPOTransformer 10 | from tasks import isaacgym_task_map 11 | from utils.reformat import omegaconf_to_dict, print_dict 12 | from utils.utils import set_np_formatting, set_seed, git_hash, git_diff_config 13 | from utils.logger import Logger 14 | import torch 15 | import torch.distributed as dist 16 | import torch.multiprocessing as mp 17 | 18 | def main(rank, world_size, config): 19 | 20 | print(config.task_name) 21 | if world_size > 1: 22 | dist.init_process_group("nccl", rank=rank, world_size=world_size) 23 | global_rank = rank 24 | seed = config.seed + global_rank 25 | else: 26 | global_rank = rank 27 | seed = config.seed 28 | 29 | if config.checkpoint: 30 | config.checkpoint = to_absolute_path(config.checkpoint) 31 | 32 | # set numpy formatting for printing only 33 | set_np_formatting() 34 | 35 | # sets seed. if seed is -1 will pick a random one 36 | _ = set_seed(seed) 37 | 38 | print(f"global_rank = {global_rank} seed = {seed}") 39 | 40 | if config.wandb_activate and not config.test and (global_rank == 0 or world_size ==1): 41 | wandb_logger = wandb.init(project=config.wandb_project, name=config.wandb_name, config=omegaconf_to_dict(config)) 42 | else: 43 | wandb_logger=None 44 | 45 | if (global_rank == 0 or world_size == 1): 46 | output_dif = os.path.join('outputs', config.wandb_name) 47 | logger = Logger(output_dif, summary_writer=wandb_logger) 48 | else: 49 | logger = None 50 | 51 | cprint('Start Building the Environment', 'green', attrs=['bold']) 52 | 53 | 54 | if config.num_gpus > 1: 55 | rl_device = f'cuda:{global_rank}' 56 | sim_device = f'cuda:{global_rank}' 57 | graphics_id = global_rank 58 | else: 59 | rl_device = config.rl_device 60 | sim_device = config.sim_device 61 | graphics_id = config.graphics_device_id 62 | 63 | env = isaacgym_task_map[config.task_name]( 64 | cfg=omegaconf_to_dict(config.task), 65 | rl_device = rl_device, 66 | sim_device=sim_device, 67 | graphics_device_id=graphics_id, 68 | headless=config.headless, 69 | virtual_screen_capture=config.capture_video, 70 | force_render=config.force_render, 71 | ) 72 | 73 | #for debugging 74 | if config.train.algo == 'PPOTransformer': 75 | if env.use_obs_as_prop: 76 | config.pretrain.model.proprio_dim = env.full_state_size 77 | config.train.network = config.pretrain.model 78 | config.task.env.stage2_hist_len = config.pretrain.model.context_length 79 | # Load the model to finetune 80 | 81 | 82 | agent = eval(config.train.algo)(env, config=config,logger=logger, rank=global_rank) 83 | 84 | if config.test: 85 | # agent.restore_test(config.train.load_path) 86 | assert config.checkpoint is not None 87 | print(config.checkpoint) 88 | #agent.model.actor.load_state_dict(torch.load(config.checkpoint)) 89 | agent.restore_test(config.checkpoint) 90 | #breakpoint() 91 | agent.test(name=config.wandb_name) 92 | else: 93 | if rank <= 0: 94 | date = str(datetime.datetime.now().strftime('%m%d%H')) 95 | if config.wandb_activate: 96 | pid = os.getpid() 97 | wandb.log({'pid': pid}) 98 | #cprint(git_diff_config('./'),color='green',attrs=['bold']) 99 | #os.system(f'git diff HEAD > {output_dif}/gitdiff.patch') 100 | #with open(os.path.join(output_dif, f'config_{date}_{git_hash()}.yaml'), 'w') as f: 101 | # f.write(OmegaConf.to_yaml(config)) 102 | 103 | if config.train.load_path == '': 104 | cprint("Train model from scratch", 'green', attrs=['bold']) 105 | agent.train() 106 | else: 107 | agent.restore_train(config.train.load_path) 108 | cprint("Loaded actor model from: " + config.train.load_path, 'green', attrs=['bold']) 109 | agent.train() 110 | 111 | if config.wandb_activate and (global_rank==0 or world_size==1): 112 | wandb.finish() 113 | 114 | 115 | @hydra.main(config_name='config', config_path='../cfg/') 116 | def main_multi_gpu(config: DictConfig): 117 | if config.test: 118 | # single gpu testing only! 119 | config.num_gpus = 1 120 | world_size = config.num_gpus 121 | if world_size > 1: 122 | mp.spawn(main, 123 | args=(world_size, config), 124 | nprocs=world_size, 125 | join=True) 126 | else: 127 | rank = 0 #config.sim_device.split(":")[1] 128 | main(rank, 1, config) 129 | 130 | 131 | if __name__ == '__main__': 132 | os.environ["MASTER_ADDR"] = "localhost" 133 | #randomize port address 134 | 135 | os.environ["MASTER_PORT"] = "29435" 136 | main_multi_gpu() 137 | -------------------------------------------------------------------------------- /scripts/finetune/finetune_cabinet.sh: -------------------------------------------------------------------------------- 1 | cmd="python scripts/finetune.py num_gpus=8 \ 2 | checkpoint="outputs/Policy_noise01_l4h4_ctx_16_data_mix_simrob/dt_25-05-2024_07-02-31/model_step_831207.pt"\ 3 | task=AllegroXarmCabinet \ 4 | train.algo=PPOTransformer \ 5 | train.ppo.initEpsHand=0.5 \ 6 | train.ppo.initEpsArm=0.5 \ 7 | train.ppo.value_grads_to_pointnet=False \ 8 | train.ppo.critic_warmup_steps=200 \ 9 | train.ppo.learning_rate=1e-5 \ 10 | wandb_activate=True wandb_name=AllegroXarmCabinet_finetune_datamix_pretraining_initeps_0.5 \ 11 | pipeline=gpu rl_device=cuda:0 sim_device=cuda:0 \ 12 | train.ppo.minibatch_size=512 num_envs=512 \ 13 | seed=-1" 14 | 15 | echo $cmd 16 | eval $cmd 17 | -------------------------------------------------------------------------------- /scripts/finetune/finetune_grasp.sh: -------------------------------------------------------------------------------- 1 | cmd="python scripts/finetune.py num_gpus=4 \ 2 | checkpoint="outputs/Policy_noise01_l4h4_ctx_16_data_mix_simrob/dt_25-05-2024_07-02-31/model_step_831207.pt"\ 3 | task=AllegroXarmNew \ 4 | train.algo=PPOTransformer \ 5 | train.ppo.initEpsHand=0.1 \ 6 | train.ppo.initEpsArm=0.1 \ 7 | train.ppo.learning_rate=1e-5 \ 8 | train.ppo.value_grads_to_pointnet=False \ 9 | train.ppo.critic_warmup_steps=200 \ 10 | wandb_activate=True wandb_name=AllegroXarmGrasping_finetune_datamix_pretraining\ 11 | pipeline=gpu rl_device=cuda:0 sim_device=cuda:0 \ 12 | train.ppo.minibatch_size=512 num_envs=512 \ 13 | seed=-1" 14 | 15 | echo $cmd 16 | eval $cmd 17 | #algo/pretrained/models/Policy_noise01_l4h4_ctx_16_shift0_scaled_inputs_new_setup/dt_17-04-2024_23-42-00/model_step_711071.pt 18 | -------------------------------------------------------------------------------- /scripts/finetune/finetune_throw.sh: -------------------------------------------------------------------------------- 1 | # cmd="python scripts/finetune.py num_gpus=8 \ 2 | # checkpoint="algo/pretrained/models/Policy_noise01_l4h4_ctx_16_data_mix_simrob/dt_25-05-2024_07-02-31/model_step_831207.pt"\ 3 | # task=AllegroXarmThrowing \ 4 | # train.algo=PPOTransformer \ 5 | # train.ppo.value_grads_to_pointnet=False \ 6 | # train.ppo.critic_warmup_steps=200 \ 7 | # train.ppo.learning_rate=1e-5 \ 8 | # train.ppo.initEpsHand=0.1 \ 9 | # train.ppo.initEpsArm=0.1 \ 10 | # wandb_activate=True wandb_name=AllegroXarmThrowing_finetune_datamix_pretraining_eps_20 \ 11 | # pipeline=gpu rl_device=cuda:0 sim_device=cuda:0 \ 12 | # train.ppo.minibatch_size=512 num_envs=512 \ 13 | # seed=20" 14 | 15 | # echo $cmd 16 | # eval $cmd 17 | cmd="python scripts/finetune.py num_gpus=3 \ 18 | checkpoint="outputs/Policy_noise01_l4h4_ctx_16_data_mix_simrob/dt_25-05-2024_07-02-31/model_step_831207.pt"\ 19 | task=AllegroXarmThrowing \ 20 | train.algo=PPOTransformer \ 21 | train.ppo.value_grads_to_pointnet=False \ 22 | train.ppo.critic_warmup_steps=200 \ 23 | train.ppo.learning_rate=1e-5 \ 24 | train.ppo.initEpsHand=0.1 \ 25 | train.ppo.initEpsArm=0.1 \ 26 | wandb_activate=True wandb_name=AllegroXarmThrowing_noobj_pretraining \ 27 | pipeline=gpu rl_device=cuda:0 sim_device=cuda:0 \ 28 | train.ppo.minibatch_size=1365 num_envs=1365 \ 29 | seed=-1" 30 | 31 | echo $cmd 32 | eval $cmd 33 | -------------------------------------------------------------------------------- /scripts/pretrain.py: -------------------------------------------------------------------------------- 1 | import isaacgym 2 | from tasks import isaacgym_task_map 3 | import torch 4 | from torch.utils.data import DataLoader 5 | from omegaconf import DictConfig, OmegaConf 6 | from termcolor import cprint 7 | import wandb 8 | from torch.optim import Adam, AdamW 9 | from algo.pretrained.trainer import RobotTrainer 10 | import wandb 11 | from algo.pretrained.robot_transformer_ar import RobotTransformerAR 12 | from algo.pretrained.robot_dataset import RobotDataset , collate_fn 13 | import os 14 | from datetime import datetime 15 | import json 16 | import hydra 17 | from utils.reformat import omegaconf_to_dict, print_dict 18 | from utils.utils import set_np_formatting, set_seed 19 | from utils.logger import Logger 20 | import random 21 | import numpy as np 22 | from torch.optim.lr_scheduler import CosineAnnealingLR 23 | import imageio 24 | 25 | @hydra.main(config_name='config', config_path='../cfg/') 26 | def main(config: DictConfig): 27 | 28 | 29 | device = config.pretrain.device 30 | config.seed = set_seed(config.seed) 31 | 32 | capture_video = config.task.env.enableVideoLog 33 | 34 | if config.pretrain.wandb_activate: 35 | wandb.init(project="manipulation-pretraining", 36 | name=config.pretrain.wandb_name, 37 | config=omegaconf_to_dict(config)) 38 | 39 | tmodel = RobotTransformerAR 40 | 41 | if config.pretrain.test: 42 | 43 | model = tmodel( 44 | cfg=config 45 | ) 46 | 47 | model = model.to(device) 48 | 49 | model.eval() 50 | 51 | assert config.pretrain.checkpoint != '' 52 | # set numpy formatting for printing only 53 | set_np_formatting() 54 | 55 | 56 | if config.pretrain.wandb_activate: 57 | wandb_logger = wandb.init(project=config.wandb_project, 58 | name=config.pretrain.wandb_name, 59 | entity=config.wandb_entity, 60 | config=omegaconf_to_dict(config), 61 | sync_tensorboard=True) 62 | else: 63 | wandb_logger=None 64 | 65 | output_dif = os.path.join('outputs', config.wandb_name) 66 | logger = Logger(output_dif, summary_writer=wandb_logger) 67 | 68 | cprint('Start Building the Environment', 'green', attrs=['bold']) 69 | 70 | env = isaacgym_task_map[config.task_name]( 71 | cfg=omegaconf_to_dict(config.task), 72 | pretrain_cfg=omegaconf_to_dict(config.pretrain), 73 | rl_device = config.rl_device, 74 | sim_device=config.sim_device, 75 | graphics_device_id=config.graphics_device_id, 76 | headless=config.headless, 77 | virtual_screen_capture=config.capture_video, 78 | force_render=config.force_render 79 | ) 80 | 81 | model.load_state_dict(torch.load(config.pretrain.checkpoint,map_location=device)) 82 | 83 | cprint(f"Model loaded from {config.pretrain.checkpoint}", color='green', attrs=['bold']) 84 | 85 | model.run_multi_env(env, cfg=config) 86 | 87 | return 88 | 89 | else: 90 | 91 | if config.pretrain.wandb_activate: 92 | wandb_logger = wandb.init(project=config.wandb_project, name=config.wandb_name, 93 | entity=config.wandb_entity, config=omegaconf_to_dict(config)) 94 | else: 95 | wandb_logger=None 96 | 97 | train_dataset = RobotDataset(cfg=config, root=config.pretrain.training.root_dir) 98 | val_dataset = RobotDataset(cfg=config, root=config.pretrain.validation.root_dir) 99 | 100 | max_ep_len = max(train_dataset.max_ep_len, val_dataset.max_ep_len) 101 | 102 | cprint(f"Dataloader built", color='green', attrs=['bold']) 103 | 104 | model = tmodel( 105 | cfg=config, 106 | max_ep_len=max_ep_len 107 | ) 108 | 109 | model = model.to(device) 110 | 111 | if config.pretrain.training.model_save_dir is not None: 112 | save_dir = config.pretrain.training.model_save_dir 113 | # Create the saving directory using the wandb name and the date and time 114 | os.makedirs(save_dir, exist_ok=True) 115 | #get date and time 116 | now = datetime.now() 117 | dt_string = now.strftime("%d-%m-%Y_%H-%M-%S") 118 | experiment_folder = os.path.join(save_dir, f'{config.pretrain.wandb_name}', f'dt_{dt_string}') 119 | # create the experiment folder if not exists 120 | os.makedirs(experiment_folder, exist_ok=True) 121 | json.dump(OmegaConf.to_container(config), open(os.path.join(experiment_folder, 'config.json'), 'w')) 122 | logger = Logger(experiment_folder, summary_writer=wandb_logger) 123 | 124 | else: 125 | save_dir = None 126 | logger = None 127 | 128 | cprint(f"Model built", color='green', attrs=['bold']) 129 | 130 | if config.pretrain.training.load_checkpoint: 131 | assert os.path.exists(config.pretrain.checkpoint), f"Checkpoint {config.pretrain.checkpoint} does not exist" 132 | model.load_state_dict(torch.load(config.pretrain.checkpoint,map_location=device)) 133 | model.train() 134 | cprint(f"Model loaded from {config.pretrain.checkpoint}", color='green', attrs=['bold']) 135 | 136 | scheduler = None #CosineAnnealingLR(optimizer, T_max=10000, eta_min=1e-6) 137 | optimizer = AdamW(model.parameters(), lr=config.pretrain.training.lr, weight_decay=config.pretrain.training.weight_decay) 138 | loss_fn = torch.nn.L1Loss() #torch.nn.MSELoss() 139 | 140 | trainer = RobotTrainer( 141 | model = model, 142 | optimizer = optimizer, 143 | scheduler = scheduler, 144 | train_dataset = train_dataset, 145 | val_dataset = val_dataset, 146 | collate_fn=collate_fn, 147 | loss_fn = loss_fn, 148 | model_save_dir = experiment_folder, 149 | logger = logger, 150 | config=config 151 | ) 152 | 153 | if capture_video: 154 | assert config.pretrain.wandb_activate, "Video capture requires wandb activation" 155 | # create the environment to capture the video 156 | env = isaacgym_task_map[config.task_name]( 157 | cfg=omegaconf_to_dict(config.task), 158 | pretrain_cfg=omegaconf_to_dict(config.pretrain), 159 | rl_device = config.pretrain.device, 160 | sim_device=config.pretrain.device, 161 | graphics_device_id=config.graphics_device_id, 162 | headless=config.headless, 163 | virtual_screen_capture=config.capture_video, 164 | force_render=config.force_render 165 | ) 166 | 167 | for i in range(config.pretrain.training.num_epochs): 168 | cprint("Training iteration {}".format(i), color='magenta', attrs=['bold']) 169 | outputs = trainer.train_epoch(iter_num=i, 170 | print_logs=True) 171 | if config.pretrain.wandb_activate: 172 | wandb.log(outputs, commit=True) 173 | 174 | 175 | if capture_video: 176 | fps = int(1/(config.task.sim.dt*config.task.env.controlFrequencyInv)) 177 | print(f"Capturing video from simulation") 178 | env.start_video_recording() 179 | info_dict = model.run_multi_env(env, cfg=config) 180 | video_frames = env.stop_video_recording() 181 | video_path = os.path.join(experiment_folder, f'{config.pretrain.wandb_name}_video.mp4') 182 | video_frames = [np.array(frame.detach().cpu()).astype(np.uint8) for frame in video_frames] 183 | imageio.mimsave(video_path, video_frames, fps=fps) 184 | env.video_frames = [] 185 | 186 | 187 | 188 | if __name__ == '__main__': 189 | main() 190 | -------------------------------------------------------------------------------- /scripts/pretrain.sh: -------------------------------------------------------------------------------- 1 | 2 | 3 | DATADIR=$1 4 | CMD="python scripts/pretrain.py num_gpus=4 headless=True \ 5 | track_pose=False get_target_reference=False num_envs=25 \ 6 | pc_input=True pipeline=cuda rl_device=cuda:0 sim_device=cuda:0 \ 7 | pretrain.training.root_dir=$DATADIR/train \ 8 | pretrain.validation.root_dir=$DATADIR/val pretrain.wandb_activate=True \ 9 | pretrain.wandb_name=Policy_noise01_l4h4_ctx_16_data_mix_simrob seed=-1 \ 10 | task.env.enableVideoLog=True \ 11 | task.env.episodeLength=400" 12 | 13 | echo $CMD 14 | eval $CMD 15 | -------------------------------------------------------------------------------- /scripts/run_policy.sh: -------------------------------------------------------------------------------- 1 | POLICY=$1 #"outputs/AllegroXarmGrasping_scratch_vel_control/2024-05-29_00-49/stage1_nn/ep_41700_step_1708M_reward_1876.28.pth" 2 | cmd="python scripts/finetune.py num_gpus=1 \ 3 | task=AllegroXarmNew test=True headless=False \ 4 | checkpoint=$POLICY \ 5 | train.algo=PPOTransformer \ 6 | wandb_activate=False wandb_name=AllegroXarmGrasping_Finetuned \ 7 | pipeline=gpu rl_device=cuda:0 sim_device=cuda:0 \ 8 | train.ppo.minibatch_size=16 num_envs=16 \ 9 | task.env.episodeLength=600 \ 10 | task.env.maxConsecutiveSuccesses=1 \ 11 | pc_input=True \ 12 | seed=-1" 13 | 14 | echo $cmd 15 | eval $cmd 16 | -------------------------------------------------------------------------------- /tasks/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018-2023, NVIDIA Corporation 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | 30 | 31 | from tasks.xarm_grasping_new import AllegroXarmGraspingNew 32 | from tasks.xarm_throwing import AllegroXarmThrowing 33 | from tasks.xarm_cabinet import AllegroXarmCabinet 34 | 35 | 36 | # Mappings from strings to environments 37 | isaacgym_task_map = { 38 | "AllegroXarmNew": AllegroXarmGraspingNew, 39 | "AllegroXarmThrowing": AllegroXarmThrowing, 40 | "AllegroXarmCabinet": AllegroXarmCabinet 41 | # "XarmReaching" : XarmReaching 42 | } 43 | -------------------------------------------------------------------------------- /tasks/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /tasks/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /tasks/__pycache__/allegro_kuka_grasping.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/allegro_kuka_grasping.cpython-37.pyc -------------------------------------------------------------------------------- /tasks/__pycache__/allegro_kuka_grasping.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/allegro_kuka_grasping.cpython-38.pyc -------------------------------------------------------------------------------- /tasks/__pycache__/torch_jit_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/torch_jit_utils.cpython-37.pyc -------------------------------------------------------------------------------- /tasks/__pycache__/torch_jit_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/torch_jit_utils.cpython-38.pyc -------------------------------------------------------------------------------- /tasks/__pycache__/xarm_cabinet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_cabinet.cpython-37.pyc -------------------------------------------------------------------------------- /tasks/__pycache__/xarm_cabinet.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_cabinet.cpython-38.pyc -------------------------------------------------------------------------------- /tasks/__pycache__/xarm_grasping.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_grasping.cpython-37.pyc -------------------------------------------------------------------------------- /tasks/__pycache__/xarm_grasping.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_grasping.cpython-38.pyc -------------------------------------------------------------------------------- /tasks/__pycache__/xarm_grasping_debug.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_grasping_debug.cpython-37.pyc -------------------------------------------------------------------------------- /tasks/__pycache__/xarm_grasping_debug.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_grasping_debug.cpython-38.pyc -------------------------------------------------------------------------------- /tasks/__pycache__/xarm_grasping_new.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_grasping_new.cpython-37.pyc -------------------------------------------------------------------------------- /tasks/__pycache__/xarm_grasping_new.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_grasping_new.cpython-38.pyc -------------------------------------------------------------------------------- /tasks/__pycache__/xarm_grasping_real.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_grasping_real.cpython-37.pyc -------------------------------------------------------------------------------- /tasks/__pycache__/xarm_throwing.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_throwing.cpython-37.pyc -------------------------------------------------------------------------------- /tasks/__pycache__/xarm_throwing.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_throwing.cpython-38.pyc -------------------------------------------------------------------------------- /tasks/base/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018-2023, NVIDIA Corporation 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /tasks/base/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/base/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /tasks/base/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/base/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /tasks/base/__pycache__/vec_task.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/base/__pycache__/vec_task.cpython-37.pyc -------------------------------------------------------------------------------- /tasks/base/__pycache__/vec_task.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/base/__pycache__/vec_task.cpython-38.pyc -------------------------------------------------------------------------------- /tasks/xarm7_utils.py: -------------------------------------------------------------------------------- 1 | 2 | # Copyright (c) 2018-2023, NVIDIA Corporation 3 | # All rights reserved. 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | from __future__ import annotations 31 | 32 | from dataclasses import dataclass 33 | from typing import Tuple, Dict, List 34 | 35 | from torch import Tensor 36 | 37 | 38 | @dataclass 39 | class DofParameters: 40 | """Joint/dof parameters.""" 41 | xarm_stiffness: float 42 | xarm_effort: List[float] # separate per DOF 43 | xarm_damping: float 44 | xarm_velocity: float 45 | xarm_armature: float 46 | dof_friction: float 47 | 48 | @staticmethod 49 | def from_cfg(cfg: Dict) -> DofParameters: 50 | return DofParameters( 51 | xarm_stiffness=cfg["env"]["kukaStiffness"], 52 | xarm_effort=cfg["env"]["kukaEffort"], 53 | xarm_damping=cfg["env"]["kukaDamping"], 54 | xarm_velocity=cfg["env"]["kukaVelocity"], 55 | xarm_armature=cfg["env"]["kukaArmature"], 56 | dof_friction=cfg["env"]["dofFriction"], 57 | ) 58 | 59 | 60 | def populate_dof_properties(arm_dof_props, params: DofParameters, arm_dofs: int) -> None: 61 | assert len(arm_dof_props["stiffness"]) == arm_dofs 62 | 63 | arm_dof_props["stiffness"].fill(params.xarm_stiffness) 64 | 65 | assert len(params.xarm_effort) == arm_dofs 66 | arm_dof_props["effort"] = params.xarm_effort 67 | arm_dof_props["velocity"] = params.xarm_velocity 68 | arm_dof_props["damping"].fill(params.xarm_damping) 69 | 70 | if params.dof_friction >= 0: 71 | arm_dof_props["friction"].fill(params.dof_friction) 72 | 73 | arm_dof_props["armature"].fill(params.xarm_armature) 74 | 75 | def tolerance_curriculum( 76 | last_curriculum_update: int, 77 | frames_since_restart: int, 78 | curriculum_interval: int, 79 | prev_episode_successes: Tensor, 80 | success_tolerance: float, 81 | initial_tolerance: float, 82 | target_tolerance: float, 83 | tolerance_curriculum_increment: float, 84 | ) -> Tuple[float, int]: 85 | """ 86 | Returns: new tolerance, new last_curriculum_update 87 | """ 88 | if frames_since_restart - last_curriculum_update < curriculum_interval: 89 | return success_tolerance, last_curriculum_update 90 | 91 | mean_successes_per_episode = prev_episode_successes.mean() 92 | if mean_successes_per_episode < 3.0: 93 | # this policy is not good enough with the previous tolerance value, keep training for now... 94 | return success_tolerance, last_curriculum_update 95 | 96 | # decrease the tolerance now 97 | success_tolerance *= tolerance_curriculum_increment 98 | success_tolerance = min(success_tolerance, initial_tolerance) 99 | success_tolerance = max(success_tolerance, target_tolerance) 100 | 101 | print(f"Prev episode successes: {mean_successes_per_episode}, success tolerance: {success_tolerance}") 102 | 103 | last_curriculum_update = frames_since_restart 104 | return success_tolerance, last_curriculum_update 105 | 106 | 107 | def interp_0_1(x_curr: float, x_initial: float, x_target: float) -> float: 108 | """ 109 | Outputs 1 when x_curr == x_target (curriculum completed) 110 | Outputs 0 when x_curr == x_initial (just started training) 111 | Interpolates value in between. 112 | """ 113 | span = x_initial - x_target 114 | return (x_initial - x_curr) / span 115 | 116 | 117 | def tolerance_successes_objective( 118 | success_tolerance: float, initial_tolerance: float, target_tolerance: float, successes: Tensor 119 | ) -> Tensor: 120 | """ 121 | Objective for the PBT. This basically prioritizes tolerance over everything else when we 122 | execute the curriculum, after that it's just #successes. 123 | """ 124 | # this grows from 0 to 1 as we reach the target tolerance 125 | if initial_tolerance > target_tolerance: 126 | # makeshift unit tests: 127 | eps = 1e-5 128 | assert abs(interp_0_1(initial_tolerance, initial_tolerance, target_tolerance)) < eps 129 | assert abs(interp_0_1(target_tolerance, initial_tolerance, target_tolerance) - 1.0) < eps 130 | mid_tolerance = (initial_tolerance + target_tolerance) / 2 131 | assert abs(interp_0_1(mid_tolerance, initial_tolerance, target_tolerance) - 0.5) < eps 132 | 133 | tolerance_objective = interp_0_1(success_tolerance, initial_tolerance, target_tolerance) 134 | else: 135 | tolerance_objective = 1.0 136 | 137 | if success_tolerance > target_tolerance: 138 | # add succeses with a small coefficient to differentiate between policies at the beginning of training 139 | # increment in tolerance improvement should always give higher value than higher successes with the 140 | # previous tolerance, that's why this coefficient is very small 141 | true_objective = (successes * 0.01) + tolerance_objective 142 | else: 143 | # basically just the successes + tolerance objective so that true_objective never decreases when we cross 144 | # the threshold 145 | true_objective = successes + tolerance_objective 146 | 147 | return true_objective 148 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__init__.py -------------------------------------------------------------------------------- /utils/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/allegro_kuka_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/allegro_kuka_utils.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/allegro_kuka_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/allegro_kuka_utils.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/hand_arm_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/hand_arm_utils.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/hand_arm_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/hand_arm_utils.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/logger.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/logger.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/logger.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/logger.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/misc.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/misc.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/misc.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/misc.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/pytorch_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/pytorch_utils.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/pytorch_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/pytorch_utils.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/randomization_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/randomization_utils.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/randomization_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/randomization_utils.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/reformat.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/reformat.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/reformat.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/reformat.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/torch_jit_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/torch_jit_utils.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/urdf_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/urdf_utils.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/urdf_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/urdf_utils.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/utils.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/warmup_scheduler.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/warmup_scheduler.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/warmup_scheduler.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/warmup_scheduler.cpython-38.pyc -------------------------------------------------------------------------------- /utils/allegro_kuka_utils.py: -------------------------------------------------------------------------------- 1 | 2 | # Copyright (c) 2018-2023, NVIDIA Corporation 3 | # All rights reserved. 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | from __future__ import annotations 31 | 32 | from dataclasses import dataclass 33 | from typing import Tuple, Dict, List 34 | 35 | from torch import Tensor 36 | 37 | 38 | @dataclass 39 | class DofParameters: 40 | """Joint/dof parameters.""" 41 | allegro_stiffness: float 42 | kuka_stiffness: float 43 | allegro_effort: float 44 | allegro_velocity: float 45 | kuka_effort: List[float] # separate per DOF 46 | allegro_damping: float 47 | kuka_damping: float 48 | kuka_velocity: float 49 | dof_friction: float 50 | allegro_armature: float 51 | kuka_armature: float 52 | 53 | @staticmethod 54 | def from_cfg(cfg: Dict) -> DofParameters: 55 | return DofParameters( 56 | allegro_stiffness=cfg["env"]["allegroStiffness"], 57 | kuka_stiffness=cfg["env"]["kukaStiffness"], 58 | allegro_effort=cfg["env"]["allegroEffort"], 59 | allegro_velocity=cfg["env"]["allegroVelocity"], 60 | kuka_effort=cfg["env"]["kukaEffort"], 61 | allegro_damping=cfg["env"]["allegroDamping"], 62 | kuka_damping=cfg["env"]["kukaDamping"], 63 | kuka_velocity=cfg["env"]["kukaVelocity"], 64 | dof_friction=cfg["env"]["dofFriction"], 65 | allegro_armature=cfg["env"]["allegroArmature"], 66 | kuka_armature=cfg["env"]["kukaArmature"], 67 | ) 68 | 69 | 70 | def populate_dof_properties(hand_arm_dof_props, params: DofParameters, arm_dofs: int, hand_dofs: int) -> None: 71 | assert len(hand_arm_dof_props["stiffness"]) == arm_dofs + hand_dofs 72 | 73 | hand_arm_dof_props["stiffness"][0:arm_dofs].fill(params.kuka_stiffness) 74 | hand_arm_dof_props["stiffness"][arm_dofs:].fill(params.allegro_stiffness) 75 | 76 | assert len(params.kuka_effort) == arm_dofs 77 | hand_arm_dof_props["effort"][0:arm_dofs] = params.kuka_effort 78 | hand_arm_dof_props["effort"][arm_dofs:].fill(params.allegro_effort) 79 | 80 | hand_arm_dof_props["velocity"][0:arm_dofs] = params.kuka_velocity 81 | hand_arm_dof_props["velocity"][arm_dofs:].fill(params.allegro_velocity) 82 | 83 | hand_arm_dof_props["damping"][0:arm_dofs].fill(params.kuka_damping) 84 | hand_arm_dof_props["damping"][arm_dofs:].fill(params.allegro_damping) 85 | 86 | if params.dof_friction >= 0: 87 | hand_arm_dof_props["friction"].fill(params.dof_friction) 88 | 89 | hand_arm_dof_props["armature"][0:arm_dofs].fill(params.kuka_armature) 90 | hand_arm_dof_props["armature"][arm_dofs:].fill(params.allegro_armature) 91 | 92 | 93 | def tolerance_curriculum( 94 | last_curriculum_update: int, 95 | frames_since_restart: int, 96 | curriculum_interval: int, 97 | prev_episode_successes: Tensor, 98 | success_tolerance: float, 99 | initial_tolerance: float, 100 | target_tolerance: float, 101 | tolerance_curriculum_increment: float, 102 | ) -> Tuple[float, int]: 103 | """ 104 | Returns: new tolerance, new last_curriculum_update 105 | """ 106 | if frames_since_restart - last_curriculum_update < curriculum_interval: 107 | return success_tolerance, last_curriculum_update 108 | 109 | mean_successes_per_episode = prev_episode_successes.mean() 110 | if mean_successes_per_episode < 3.0: 111 | # this policy is not good enough with the previous tolerance value, keep training for now... 112 | return success_tolerance, last_curriculum_update 113 | 114 | # decrease the tolerance now 115 | success_tolerance *= tolerance_curriculum_increment 116 | success_tolerance = min(success_tolerance, initial_tolerance) 117 | success_tolerance = max(success_tolerance, target_tolerance) 118 | 119 | print(f"Prev episode successes: {mean_successes_per_episode}, success tolerance: {success_tolerance}") 120 | 121 | last_curriculum_update = frames_since_restart 122 | return success_tolerance, last_curriculum_update 123 | 124 | 125 | def interp_0_1(x_curr: float, x_initial: float, x_target: float) -> float: 126 | """ 127 | Outputs 1 when x_curr == x_target (curriculum completed) 128 | Outputs 0 when x_curr == x_initial (just started training) 129 | Interpolates value in between. 130 | """ 131 | span = x_initial - x_target 132 | return (x_initial - x_curr) / span 133 | 134 | 135 | def tolerance_successes_objective( 136 | success_tolerance: float, initial_tolerance: float, target_tolerance: float, successes: Tensor 137 | ) -> Tensor: 138 | """ 139 | Objective for the PBT. This basically prioritizes tolerance over everything else when we 140 | execute the curriculum, after that it's just #successes. 141 | """ 142 | # this grows from 0 to 1 as we reach the target tolerance 143 | if initial_tolerance > target_tolerance: 144 | # makeshift unit tests: 145 | eps = 1e-5 146 | assert abs(interp_0_1(initial_tolerance, initial_tolerance, target_tolerance)) < eps 147 | assert abs(interp_0_1(target_tolerance, initial_tolerance, target_tolerance) - 1.0) < eps 148 | mid_tolerance = (initial_tolerance + target_tolerance) / 2 149 | assert abs(interp_0_1(mid_tolerance, initial_tolerance, target_tolerance) - 0.5) < eps 150 | 151 | tolerance_objective = interp_0_1(success_tolerance, initial_tolerance, target_tolerance) 152 | else: 153 | tolerance_objective = 1.0 154 | 155 | if success_tolerance > target_tolerance: 156 | # add succeses with a small coefficient to differentiate between policies at the beginning of training 157 | # increment in tolerance improvement should always give higher value than higher successes with the 158 | # previous tolerance, that's why this coefficient is very small 159 | true_objective = (successes * 0.01) + tolerance_objective 160 | else: 161 | # basically just the successes + tolerance objective so that true_objective never decreases when we cross 162 | # the threshold 163 | true_objective = successes + tolerance_objective 164 | 165 | return true_objective 166 | -------------------------------------------------------------------------------- /utils/camera.json: -------------------------------------------------------------------------------- 1 | { 2 | "width": 300, 3 | "height": 300, 4 | "fx": 267.9263610839844, 5 | "fy": 267.9263610839844, 6 | "pose": [ 7 | 0.5531, 8 | -0.0643, 9 | 0.4484 10 | ], 11 | "R": [ 12 | [ 13 | 0.0347, 14 | 0.4223, 15 | -0.9058 16 | ], 17 | [ 18 | 0.9993, 19 | -0.0294, 20 | 0.0245 21 | ], 22 | [ 23 | -0.0163, 24 | -0.9060, 25 | -0.4231 26 | ] 27 | ] 28 | } -------------------------------------------------------------------------------- /utils/camera2.json: -------------------------------------------------------------------------------- 1 | { 2 | "width": 300, 3 | "height": 300, 4 | "fx": 267.5335, 5 | "fy": 267.5335, 6 | "pose": [ 7 | 0.5545, 8 | -0.0563, 9 | 0.4281 10 | ], 11 | "R": [ 12 | [ 13 | 0.0285, 14 | 0.4060, 15 | -0.9134 16 | ], 17 | [ 18 | 0.9993, 19 | -0.0355, 20 | 0.0154 21 | ], 22 | [ 23 | -0.0261, 24 | -0.9132, 25 | -0.4068 26 | ] 27 | ] 28 | } -------------------------------------------------------------------------------- /utils/hand_arm_utils.py: -------------------------------------------------------------------------------- 1 | 2 | # Copyright (c) 2018-2023, NVIDIA Corporation 3 | # All rights reserved. 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | from __future__ import annotations 31 | 32 | from dataclasses import dataclass 33 | from typing import Tuple, Dict, List 34 | 35 | from torch import Tensor 36 | 37 | 38 | @dataclass 39 | class DofParameters: 40 | """Joint/dof parameters.""" 41 | hand_stiffness: float 42 | arm_stiffness: float 43 | hand_effort: float 44 | hand_velocity: float 45 | arm_effort: List[float] # separate per DOF 46 | hand_damping: float 47 | arm_damping: float 48 | arm_velocity: float 49 | dof_friction: float 50 | hand_armature: float 51 | arm_armature: float 52 | 53 | @staticmethod 54 | def from_cfg(cfg: Dict) -> DofParameters: 55 | return DofParameters( 56 | hand_stiffness=cfg["env"]["handStiffness"], 57 | arm_stiffness=cfg["env"]["armStiffness"], 58 | hand_effort=cfg["env"]["handEffort"], 59 | hand_velocity=cfg["env"]["handVelocity"], 60 | arm_effort=cfg["env"]["armEffort"], 61 | hand_damping=cfg["env"]["handDamping"], 62 | arm_damping=cfg["env"]["armDamping"], 63 | arm_velocity=cfg["env"]["armVelocity"], 64 | dof_friction=cfg["env"]["dofFriction"], 65 | hand_armature=cfg["env"]["handArmature"], 66 | arm_armature=cfg["env"]["armArmature"], 67 | ) 68 | 69 | 70 | def populate_dof_properties(hand_arm_dof_props, params: DofParameters, arm_dofs: int, hand_dofs: int) -> None: 71 | assert len(hand_arm_dof_props["stiffness"]) == arm_dofs + hand_dofs 72 | 73 | hand_arm_dof_props["stiffness"][0:arm_dofs].fill(params.arm_stiffness) 74 | hand_arm_dof_props["stiffness"][arm_dofs:].fill(params.hand_stiffness) 75 | 76 | assert len(params.arm_effort) == arm_dofs 77 | hand_arm_dof_props["effort"][0:arm_dofs] = params.arm_effort 78 | hand_arm_dof_props["effort"][arm_dofs:].fill(params.hand_effort) 79 | 80 | hand_arm_dof_props["velocity"][0:arm_dofs] = params.arm_velocity 81 | hand_arm_dof_props["velocity"][arm_dofs:].fill(params.hand_velocity) 82 | 83 | hand_arm_dof_props["damping"][0:arm_dofs].fill(params.arm_damping) 84 | hand_arm_dof_props["damping"][arm_dofs:].fill(params.hand_damping) 85 | 86 | if params.dof_friction >= 0: 87 | hand_arm_dof_props["friction"].fill(params.dof_friction) 88 | 89 | hand_arm_dof_props["armature"][0:arm_dofs].fill(params.arm_armature) 90 | hand_arm_dof_props["armature"][arm_dofs:].fill(params.hand_armature) 91 | 92 | 93 | def tolerance_curriculum( 94 | last_curriculum_update: int, 95 | frames_since_restart: int, 96 | curriculum_interval: int, 97 | prev_episode_successes: Tensor, 98 | success_tolerance: float, 99 | initial_tolerance: float, 100 | target_tolerance: float, 101 | tolerance_curriculum_increment: float, 102 | ) -> Tuple[float, int]: 103 | """ 104 | Returns: new tolerance, new last_curriculum_update 105 | """ 106 | if frames_since_restart - last_curriculum_update < curriculum_interval: 107 | return success_tolerance, last_curriculum_update 108 | 109 | mean_successes_per_episode = prev_episode_successes.mean() 110 | if mean_successes_per_episode < 3.0: 111 | # this policy is not good enough with the previous tolerance value, keep training for now... 112 | return success_tolerance, last_curriculum_update 113 | 114 | # decrease the tolerance now 115 | success_tolerance *= tolerance_curriculum_increment 116 | success_tolerance = min(success_tolerance, initial_tolerance) 117 | success_tolerance = max(success_tolerance, target_tolerance) 118 | 119 | print(f"Prev episode successes: {mean_successes_per_episode}, success tolerance: {success_tolerance}") 120 | 121 | last_curriculum_update = frames_since_restart 122 | return success_tolerance, last_curriculum_update 123 | 124 | 125 | def interp_0_1(x_curr: float, x_initial: float, x_target: float) -> float: 126 | """ 127 | Outputs 1 when x_curr == x_target (curriculum completed) 128 | Outputs 0 when x_curr == x_initial (just started training) 129 | Interpolates value in between. 130 | """ 131 | span = x_initial - x_target 132 | return (x_initial - x_curr) / span 133 | 134 | 135 | def tolerance_successes_objective( 136 | success_tolerance: float, initial_tolerance: float, target_tolerance: float, successes: Tensor 137 | ) -> Tensor: 138 | """ 139 | Objective for the PBT. This basically prioritizes tolerance over everything else when we 140 | execute the curriculum, after that it's just #successes. 141 | """ 142 | # this grows from 0 to 1 as we reach the target tolerance 143 | if initial_tolerance > target_tolerance: 144 | # makeshift unit tests: 145 | eps = 1e-5 146 | assert abs(interp_0_1(initial_tolerance, initial_tolerance, target_tolerance)) < eps 147 | assert abs(interp_0_1(target_tolerance, initial_tolerance, target_tolerance) - 1.0) < eps 148 | mid_tolerance = (initial_tolerance + target_tolerance) / 2 149 | assert abs(interp_0_1(mid_tolerance, initial_tolerance, target_tolerance) - 0.5) < eps 150 | 151 | tolerance_objective = interp_0_1(success_tolerance, initial_tolerance, target_tolerance) 152 | else: 153 | tolerance_objective = 1.0 154 | 155 | if success_tolerance > target_tolerance: 156 | # add succeses with a small coefficient to differentiate between policies at the beginning of training 157 | # increment in tolerance improvement should always give higher value than higher successes with the 158 | # previous tolerance, that's why this coefficient is very small 159 | true_objective = (successes * 0.01) + tolerance_objective 160 | else: 161 | # basically just the successes + tolerance objective so that true_objective never decreases when we cross 162 | # the threshold 163 | true_objective = successes + tolerance_objective 164 | 165 | return true_objective 166 | -------------------------------------------------------------------------------- /utils/logger.py: -------------------------------------------------------------------------------- 1 | import os 2 | from tkinter import commondialog 3 | import numpy as np 4 | import wandb 5 | from PIL import Image 6 | import cv2 7 | class Logger: 8 | def __init__(self, log_dir, n_logged_samples=10, summary_writer=None): 9 | self._log_dir = log_dir 10 | print('########################') 11 | print('logging outputs to ', log_dir) 12 | print('########################') 13 | self._n_logged_samples = n_logged_samples 14 | self._summ_writer = summary_writer 15 | 16 | def flush(self): 17 | self._summ_writer.flush() 18 | return 19 | 20 | def log_scalar(self, scalar, name, step_, commit=False): 21 | if self._summ_writer: 22 | self._summ_writer.log({'{}'.format(name): scalar}, step=step_) #, commit=commit) 23 | 24 | def log_scalars(self, scalar_dict, group_name, step, phase, commit=True): 25 | """Will log all scalars in the same plot.""" 26 | if self._summ_writer: 27 | self._summ_writer.log({'{}/{}'.format(group_name, phase): scalar_dict}, step=step) # Not sure if this will work! 28 | #self._summ_writer.add_scalars('{}_{}'.format(group_name, phase), scalar_dict, step) 29 | 30 | def log_image(self, image, name, step, commit=False): 31 | assert(len(image.shape) == 3) # [C, H, W] 32 | image = wandb.Image(image, caption=f"{name}", step=step, commit=commit) 33 | #self._summ_writer.add_image('{}'.format(name), image, step) 34 | 35 | # TODO: Add more logging as needed 36 | def log_gifs(self,imgs,name="gif",commit=False): 37 | 38 | images = [Image.fromarray(image.cpu().numpy().astype(np.uint8)) for image in imgs] 39 | wandb.log({name: [wandb.Image(image) for image in images]}) 40 | 41 | def log_video(self,imgs,name="video", step=0, commit=False, fps=15): 42 | 43 | frames = [img.cpu().numpy().astype(np.uint8) for img in imgs] 44 | frames = np.array(frames) # [T, H, W, C] 45 | frames = np.transpose(frames, (0, 3, 1, 2)) # [T, C, H, W] 46 | 47 | print("here") 48 | wandb.log({ 49 | name: wandb.Video(frames, fps=fps, format='mp4'), 50 | }, step=step) 51 | 52 | print("here2") 53 | 54 | #def log_video(self, video_frames, name, step, fps=10): 55 | # assert len(video_frames.shape) == 5, "Need [N, T, C, H, W] input tensor for video logging!" 56 | # self._summ_writer.add_video('{}'.format(name), video_frames, step, fps=fps) 57 | 58 | #def log_trajs_as_videos(self, trajs, step, max_videos_to_save=2, fps=10, video_title='video'): 59 | 60 | # # reshape the rollouts 61 | # videos = [np.transpose(p['image_obs'], [0, 3, 1, 2]) for p in trajs] 62 | 63 | # # max rollout length 64 | # max_videos_to_save = np.min([max_videos_to_save, len(videos)]) 65 | # max_length = videos[0].shape[0] 66 | # for i in range(max_videos_to_save): 67 | # if videos[i].shape[0]>max_length: 68 | # max_length = videos[i].shape[0] 69 | 70 | # # pad rollouts to all be same length 71 | # for i in range(max_videos_to_save): 72 | # if videos[i].shape[0] 0, "Figure logging requires input shape [batch x figures]!" 83 | # self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step) 84 | 85 | #def log_figure(self, figure, name, step, phase): 86 | # """figure: matplotlib.pyplot figure handle""" 87 | # self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step) 88 | 89 | #def log_graph(self, array, name, step, phase): 90 | # """figure: matplotlib.pyplot figure handle""" 91 | # im = plot_graph(array) 92 | # self._summ_writer.add_image('{}_{}'.format(name, phase), im, step) 93 | 94 | #def dump_scalars(self, log_path=None): 95 | # log_path = os.path.join(self._log_dir, "scalar_data.json") if log_path is None else log_path 96 | # self._summ_writer.export_scalars_to_json(log_path) 97 | 98 | def log_dict(self, logs, itr, verbose=True): 99 | if self._summ_writer: 100 | for key, value in logs.items(): 101 | if verbose: 102 | print("{} : {}".format(key, value)) 103 | self.log_scalar(value, key, itr) 104 | -------------------------------------------------------------------------------- /utils/pytorch_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from copy import deepcopy 4 | 5 | device=None 6 | 7 | 8 | def from_numpy(*args, **kwargs): 9 | return torch.from_numpy(*args, **kwargs).float().to(device) 10 | 11 | 12 | def to_numpy(tensor): 13 | return tensor.to('cpu').detach().numpy() 14 | 15 | 16 | def to_torch(element,device): 17 | 18 | if isinstance(element,dict): 19 | 20 | new_element = deepcopy(element) 21 | for key in element: 22 | new_element[key] = to_torch(element[key],device) 23 | return new_element 24 | 25 | elif isinstance(element,list): 26 | try: 27 | return torch.tensor(element).float().to(device) 28 | except: 29 | return element 30 | 31 | elif isinstance(element,np.ndarray): 32 | return torch.from_numpy(element).float().to(device) 33 | 34 | else: 35 | return element 36 | 37 | -------------------------------------------------------------------------------- /utils/randomization_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from termcolor import cprint 3 | 4 | 5 | ##CAN ONLY BE DONE AT THE START OF SIMULATION## 6 | def randomize_table_z(gym,env_ptr,table_handle,table_rand_config): 7 | #does not work. need to change table position differently. 8 | fr_z = np.random.uniform(table_rand_config['lower'],table_rand_config['upper']) 9 | prop = gym.get_actor_rigid_body_properties(env_ptr, table_handle) 10 | assert len(prop) == 1 11 | print(fr_z) 12 | obj_com = prop[0].com.z*fr_z 13 | prop[0].com.z = obj_com 14 | gym.set_actor_rigid_body_properties(env_ptr, table_handle, prop) 15 | 16 | 17 | ##CAN ONLY BE DONE AT THE START OF SIMULATION## 18 | def randomize_object_scale(gym,env_ptr,object_handle,object_rand_config): 19 | 20 | scale = np.random.uniform(object_rand_config['lower'], object_rand_config['upper']) 21 | gym.set_actor_scale(env_ptr, object_handle,scale) 22 | return scale 23 | 24 | 25 | ##CAN ONLY BE DONE AT THE START OF SIMULATION## 26 | def randomize_object_mass(gym,env_ptr,object_handle,objmass_rand_config): 27 | 28 | prop = gym.get_actor_rigid_body_properties(env_ptr, object_handle) 29 | ret = [] 30 | for p in prop: 31 | fr = np.random.uniform(objmass_rand_config['lower'], objmass_rand_config['upper']) 32 | p.mass = p.mass*fr 33 | p.inertia.x = p.inertia.x*fr 34 | p.inertia.y = p.inertia.y*fr 35 | p.inertia.z = p.inertia.z*fr 36 | ret.append(p.mass) 37 | 38 | gym.set_actor_rigid_body_properties(env_ptr, object_handle, prop) 39 | 40 | return ret 41 | 42 | 43 | ##CAN ONLY BE DONE AT THE START OF SIMULATION## 44 | def randomize_friction(gym,env_ptr,handle,rand_friction_config): 45 | 46 | rand_friction = np.random.uniform(rand_friction_config['lower'], rand_friction_config['upper']) 47 | rest = np.random.uniform(rand_friction_config['lower'], rand_friction_config['upper']) 48 | props = gym.get_actor_rigid_shape_properties(env_ptr, handle) 49 | friction = [] 50 | restitution = [] 51 | for p in props: 52 | p.friction = rand_friction*p.friction 53 | p.restitution = rest*p.restitution 54 | friction.append(p.friction) 55 | restitution.append(p.restitution) 56 | 57 | gym.set_actor_rigid_shape_properties(env_ptr, handle, props) 58 | 59 | return friction,restitution 60 | 61 | # def randomize_friction(gym,env_ptr,hand_handle,object_handle,rand_friction_config): 62 | 63 | # rand_friction = np.random.uniform(rand_friction_config['lower'], rand_friction_config['upper']) 64 | # obj_restitution = np.random.uniform(rand_friction_config['lower'], rand_friction_config['upper']) 65 | # hand_props = gym.get_actor_rigid_shape_properties(env_ptr, hand_handle) 66 | # hand_friction = [] 67 | # hand_restitution = [] 68 | # for p in hand_props: 69 | # p.friction = rand_friction 70 | # p.restitution = obj_restitution 71 | # hand_friction.append(p.friction) 72 | # hand_restitution.append(p.restitution) 73 | 74 | # gym.set_actor_rigid_shape_properties(env_ptr, hand_handle, hand_props) 75 | 76 | 77 | # rand_friction = np.random.uniform(rand_friction_config['lower'], rand_friction_config['upper']) 78 | # obj_rest = np.random.uniform(rand_friction_config['lower'], rand_friction_config['upper']) 79 | # obj_friction = [] 80 | # obj_restitution = [] 81 | # obj_props = gym.get_actor_rigid_shape_properties(env_ptr, object_handle) 82 | # for p in obj_props: 83 | # p.friction = rand_friction*p.friction 84 | # p.restitution = obj_rest*p.restitution 85 | # obj_friction.append(p.friction) 86 | # obj_restitution.append(p.restitution) 87 | 88 | # gym.set_actor_rigid_shape_properties(env_ptr, object_handle, obj_props) 89 | 90 | # return hand_friction, hand_restitution, obj_friction, obj_restitution #not sure if just one value can influence the full policy but okay for now. 91 | 92 | 93 | # def randomize_object_position(env): 94 | # "already randomized in code" 95 | # pass 96 | 97 | # def randomize_robot_damping(env): 98 | # pass 99 | 100 | # def randomize_robot_stiffness(env): 101 | # pass 102 | 103 | -------------------------------------------------------------------------------- /utils/reformat.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018-2023, NVIDIA Corporation 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | from omegaconf import DictConfig, OmegaConf 30 | from typing import Dict 31 | 32 | def omegaconf_to_dict(d: DictConfig)->Dict: 33 | """Converts an omegaconf DictConfig to a python Dict, respecting variable interpolation.""" 34 | ret = {} 35 | for k, v in d.items(): 36 | if isinstance(v, DictConfig): 37 | ret[k] = omegaconf_to_dict(v) 38 | else: 39 | ret[k] = v 40 | return ret 41 | 42 | def print_dict(val, nesting: int = -4, start: bool = True): 43 | """Outputs a nested dictionory.""" 44 | if type(val) == dict: 45 | if not start: 46 | print('') 47 | nesting += 4 48 | for k in val: 49 | print(nesting * ' ', end='') 50 | print(k, end=': ') 51 | print_dict(val[k], nesting, start=False) 52 | else: 53 | print(val) 54 | 55 | # EOF 56 | -------------------------------------------------------------------------------- /utils/rna_util.py: -------------------------------------------------------------------------------- 1 | 2 | # Copyright (c) 2018-2023, NVIDIA Corporation 3 | # All rights reserved. 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | from __future__ import print_function 31 | 32 | import torch 33 | import torch.nn as nn 34 | import torch.nn.functional as F 35 | 36 | 37 | class RandomNetworkAdversary(nn.Module): 38 | 39 | def __init__(self, num_envs, in_dims, out_dims, softmax_bins, device): 40 | super(RandomNetworkAdversary, self).__init__() 41 | 42 | """ 43 | Class to add random action to the action generated by the policy. 44 | The output is binned to 32 bins per channel and we do softmax over 45 | these bins to figure out the most likely joint angle. 46 | 47 | Note: OpenAI et al. 2019 found out that if they used a continuous space 48 | and a tanh non-linearity, actions would always be close to 0. 49 | Section B.3 https://arxiv.org/abs/1910.07113 50 | 51 | Q: Why do we need dropouts here? 52 | 53 | A: If we were using a CPU-based simulator as in OpenAI et al. 2019, we 54 | will use a different RNA network for different CPU. However, 55 | this is not feasible for a GPU-based simulator as that would mean 56 | creating N_envs RNA networks which will overwhelm the GPU-memory. 57 | Therefore, dropout is a nice approximation of this by re-sampling 58 | weights of the same neural network for each different env on the GPU. 59 | """ 60 | 61 | self.in_dims = in_dims 62 | self.out_dims = out_dims 63 | self.softmax_bins = softmax_bins 64 | self.num_envs = num_envs 65 | 66 | self.device = device 67 | 68 | self.num_feats1 = 512 69 | self.num_feats2 = 1024 70 | 71 | # Sampling random probablities for dropout masks 72 | dropout_probs = torch.rand((2, )) 73 | 74 | # Setting up the RNA neural network here 75 | 76 | # First layer 77 | 78 | self.fc1 = nn.Linear(in_dims, self.num_feats1).to(self.device) 79 | 80 | self.dropout_masks1 = torch.bernoulli(torch.ones((self.num_envs, \ 81 | self.num_feats1)), p=dropout_probs[0]).to(self.device) 82 | 83 | self.fc1_1 = nn.Linear(self.num_feats1, self.num_feats1).to(self.device) 84 | 85 | # Second layer 86 | self.fc2 = nn.Linear(self.num_feats1, self.num_feats2).to(self.device) 87 | 88 | self.dropout_masks2 = torch.bernoulli(torch.ones((self.num_envs, \ 89 | self.num_feats2)), p=dropout_probs[1]).to(self.device) 90 | 91 | self.fc2_1 = nn.Linear(self.num_feats2, self.num_feats2).to(self.device) 92 | 93 | # Last layer 94 | self.fc3 = nn.Linear(self.num_feats2, out_dims*softmax_bins).to(self.device) 95 | 96 | # This is needed to reset weights and dropout masks 97 | self._refresh() 98 | 99 | def _refresh(self): 100 | 101 | self._init_weights() 102 | self.eval() 103 | self.refresh_dropout_masks() 104 | 105 | def _init_weights(self): 106 | 107 | print('initialising weights for random network') 108 | 109 | nn.init.kaiming_uniform_(self.fc1.weight) 110 | nn.init.kaiming_uniform_(self.fc1_1.weight) 111 | nn.init.kaiming_uniform_(self.fc2.weight) 112 | nn.init.kaiming_uniform_(self.fc2_1.weight) 113 | nn.init.kaiming_uniform_(self.fc3.weight) 114 | 115 | return 116 | 117 | def refresh_dropout_masks(self): 118 | 119 | dropout_probs = torch.rand((2, )) 120 | 121 | self.dropout_masks1 = torch.bernoulli(torch.ones((self.num_envs, self.num_feats1)), \ 122 | p=dropout_probs[0]).to(self.dropout_masks1.device) 123 | 124 | self.dropout_masks2 = torch.bernoulli(torch.ones((self.num_envs, self.num_feats2)), \ 125 | p=dropout_probs[1]).to(self.dropout_masks2.device) 126 | 127 | return 128 | 129 | def forward(self, x): 130 | 131 | x = self.fc1(x) 132 | x = F.relu(x) 133 | x = self.fc1_1(x) 134 | x = self.dropout_masks1 * x 135 | 136 | x = self.fc2(x) 137 | x = F.relu(x) 138 | x = self.fc2_1(x) 139 | x = self.dropout_masks2 * x 140 | 141 | x = self.fc3(x) 142 | 143 | x = x.view(-1, self.out_dims, self.softmax_bins) 144 | output = F.softmax(x, dim=-1) 145 | 146 | # We have discretised the joint angles into bins 147 | # Now we pick up the bin for each joint angle 148 | # corresponding to the highest softmax value / prob. 149 | 150 | return output 151 | 152 | 153 | if __name__ == "__main__": 154 | 155 | num_envs = 1024 156 | RNA = RandomNetworkAdversary(num_envs=num_envs, in_dims=16, out_dims=16, softmax_bins=32, device='cuda') 157 | 158 | x = torch.tensor(torch.randn(num_envs, 16).to(RNA.device)) 159 | y = RNA(x) 160 | import ipdb; ipdb.set_trace() 161 | 162 | 163 | 164 | -------------------------------------------------------------------------------- /utils/urdf_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | def read_xml(filename): 3 | import xml.etree.ElementTree as Et 4 | root = Et.parse(filename).getroot() 5 | return root 6 | 7 | 8 | def get_link_meshes_from_urdf(urdf_file,link_names): 9 | root = read_xml(urdf_file) 10 | link_meshfiles =[] 11 | for link_name in link_names: 12 | for link in root.findall('link'): 13 | if link.attrib['name'] == link_name: 14 | for mesh in link.findall('visual/geometry/mesh'): 15 | link_meshfiles.append(mesh.attrib['filename']) 16 | 17 | assert len(link_meshfiles) == len(link_names) 18 | return link_meshfiles 19 | 20 | 21 | def load_asset_files_public(asset_root): 22 | import os 23 | folder_name = 'pybullet-URDF-models/urdf_models/models' 24 | asset_files = {} 25 | 26 | for root, dirs, files in os.walk(os.path.join(asset_root,folder_name)): 27 | 28 | for file in files: 29 | if file.endswith("model.urdf"): 30 | obj_name = root.split('/')[-1] 31 | dir = root[len(asset_root)+1:] 32 | asset_files[obj_name]=os.path.join(dir, file) 33 | 34 | return asset_files 35 | 36 | 37 | 38 | 39 | def load_asset_files_ycb(asset_root,folder_name='ycb_real_inertia'): 40 | 41 | import os 42 | asset_files = {} 43 | 44 | for root, dirs, files in os.walk(os.path.join(asset_root,folder_name)): 45 | 46 | for file in files: 47 | if file.endswith(".urdf"): 48 | obj_name = file.split('.')[0] 49 | dir = root[len(asset_root)+1:] 50 | asset_files[obj_name]={} 51 | asset_files[obj_name]['urdf']=os.path.join(dir, file) 52 | asset_files[obj_name]['mesh']=os.path.join(dir, file.split('.')[0]+'/google_16k/textured.obj') 53 | assert os.path.exists(os.path.join(asset_root,asset_files[obj_name]['mesh'])) 54 | assert os.path.exists(os.path.join(asset_root,asset_files[obj_name]['urdf'])) 55 | 56 | return asset_files 57 | 58 | def load_asset_files_ycb_lowmem(asset_root,folder_name='ycb_real_inertia'): 59 | import os 60 | asset_files = {} 61 | 62 | for root, dirs, files in os.walk(os.path.join(asset_root,folder_name)): 63 | 64 | for file in files: 65 | if file.endswith(".urdf"): 66 | obj_name = file.split('.')[0] 67 | number = obj_name.split('_')[0] 68 | print(obj_name,number) 69 | if number in ['070-a','070-b','072','036','032','029','048','027','019','032','026']: 70 | dir = root[len(asset_root)+1:] 71 | asset_files[obj_name]={} 72 | asset_files[obj_name]['urdf']=os.path.join(dir, file) 73 | asset_files[obj_name]['mesh']=os.path.join(dir, file.split('.')[0]+'/google_16k/textured.obj') 74 | assert os.path.exists(os.path.join(asset_root,asset_files[obj_name]['mesh'])) 75 | assert os.path.exists(os.path.join(asset_root,asset_files[obj_name]['urdf'])) 76 | 77 | return asset_files 78 | 79 | 80 | def fix_ycb_scale(asset_root): 81 | import os 82 | import shutil 83 | import xml.etree.ElementTree as Et 84 | folder_name = 'ycb' 85 | new_folder_name = 'ycb_scaled' 86 | if not os.path.exists(os.path.join(asset_root,new_folder_name)): 87 | shutil.copytree(os.path.join(asset_root,folder_name), os.path.join(asset_root,new_folder_name)) 88 | 89 | for root, dirs, files in os.walk(os.path.join(asset_root,new_folder_name)): 90 | for file in files: 91 | if file.endswith(".urdf"): 92 | filepath = os.path.join(root, file) 93 | urdf = read_xml(filepath) 94 | for mesh in urdf.findall(f'.//collision/geometry/'): 95 | mesh.attrib['scale']='1 1 1' 96 | for mesh in urdf.findall(f'.//visual/geometry/'): 97 | mesh.attrib['scale']='1 1 1' 98 | 99 | new_xml = Et.ElementTree() 100 | new_xml._setroot(urdf) 101 | with open(filepath, "wb") as f: 102 | new_xml.write(f) 103 | 104 | return 105 | 106 | 107 | 108 | 109 | 110 | def get_vol_ratio(scale1,scale2): 111 | nums1 = [float(s) for s in scale1.split(' ')] 112 | nums2 = [float(s) for s in scale2.split(' ')] 113 | nums1 = np.array(nums1) 114 | nums2 = np.array(nums2) 115 | return np.prod(nums1)/np.prod(nums2) 116 | -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018-2023, NVIDIA Corporation 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | # python 30 | #import pwd 31 | import getpass 32 | import tempfile 33 | import time 34 | from collections import OrderedDict 35 | from os.path import join 36 | 37 | import numpy as np 38 | import torch 39 | import random 40 | import os 41 | import subprocess 42 | import shlex 43 | 44 | def retry(times, exceptions): 45 | """ 46 | Retry Decorator https://stackoverflow.com/a/64030200/1645784 47 | Retries the wrapped function/method `times` times if the exceptions listed 48 | in ``exceptions`` are thrown 49 | :param times: The number of times to repeat the wrapped function/method 50 | :type times: Int 51 | :param exceptions: Lists of exceptions that trigger a retry attempt 52 | :type exceptions: Tuple of Exceptions 53 | """ 54 | def decorator(func): 55 | def newfn(*args, **kwargs): 56 | attempt = 0 57 | while attempt < times: 58 | try: 59 | return func(*args, **kwargs) 60 | except exceptions: 61 | print(f'Exception thrown when attempting to run {func}, attempt {attempt} out of {times}') 62 | time.sleep(min(2 ** attempt, 30)) 63 | attempt += 1 64 | 65 | return func(*args, **kwargs) 66 | return newfn 67 | return decorator 68 | 69 | 70 | def flatten_dict(d, prefix='', separator='.'): 71 | res = dict() 72 | for key, value in d.items(): 73 | if isinstance(value, (dict, OrderedDict)): 74 | res.update(flatten_dict(value, prefix + key + separator, separator)) 75 | else: 76 | res[prefix + key] = value 77 | 78 | return res 79 | 80 | 81 | def set_np_formatting(): 82 | """ formats numpy print """ 83 | np.set_printoptions(edgeitems=30, infstr='inf', 84 | linewidth=4000, nanstr='nan', precision=2, 85 | suppress=False, threshold=10000, formatter=None) 86 | 87 | 88 | def set_seed(seed, torch_deterministic=False, rank=0): 89 | """ set seed across modules """ 90 | if seed == -1 and torch_deterministic: 91 | seed = 42 + rank 92 | elif seed == -1: 93 | seed = np.random.randint(0, 10000) 94 | else: 95 | seed = seed + rank 96 | 97 | print("Setting seed: {}".format(seed)) 98 | 99 | random.seed(seed) 100 | np.random.seed(seed) 101 | torch.manual_seed(seed) 102 | os.environ['PYTHONHASHSEED'] = str(seed) 103 | torch.cuda.manual_seed(seed) 104 | torch.cuda.manual_seed_all(seed) 105 | 106 | if torch_deterministic: 107 | # refer to https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility 108 | os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8' 109 | torch.backends.cudnn.benchmark = False 110 | torch.backends.cudnn.deterministic = True 111 | torch.use_deterministic_algorithms(True) 112 | else: 113 | torch.backends.cudnn.benchmark = True 114 | torch.backends.cudnn.deterministic = False 115 | 116 | return seed 117 | 118 | def nested_dict_set_attr(d, key, val): 119 | pre, _, post = key.partition('.') 120 | if post: 121 | nested_dict_set_attr(d[pre], post, val) 122 | else: 123 | d[key] = val 124 | 125 | def nested_dict_get_attr(d, key): 126 | pre, _, post = key.partition('.') 127 | if post: 128 | return nested_dict_get_attr(d[pre], post) 129 | else: 130 | return d[key] 131 | 132 | def ensure_dir_exists(path): 133 | if not os.path.exists(path): 134 | os.makedirs(path) 135 | return path 136 | 137 | 138 | def safe_ensure_dir_exists(path): 139 | """Should be safer in multi-treaded environment.""" 140 | try: 141 | return ensure_dir_exists(path) 142 | except FileExistsError: 143 | return path 144 | 145 | 146 | def get_username(): 147 | uid = os.getuid() 148 | try: 149 | return getpass.getuser() 150 | except KeyError: 151 | # worst case scenario - let's just use uid 152 | return str(uid) 153 | 154 | 155 | def project_tmp_dir(): 156 | tmp_dir_name = f'ige_{get_username()}' 157 | return safe_ensure_dir_exists(join(tempfile.gettempdir(), tmp_dir_name)) 158 | 159 | # EOF 160 | 161 | 162 | def git_hash(): 163 | cmd = 'git log -n 1 --pretty="%h"' 164 | ret = subprocess.check_output(shlex.split(cmd)).strip() 165 | if isinstance(ret, bytes): 166 | ret = ret.decode() 167 | return ret 168 | 169 | 170 | def git_diff_config(name): 171 | cmd = f'git diff --unified=0 {name}' 172 | ret = subprocess.check_output(shlex.split(cmd)).strip() 173 | if isinstance(ret, bytes): 174 | ret = ret.decode() 175 | return ret 176 | 177 | 178 | -------------------------------------------------------------------------------- /utils/wandb_utils.py: -------------------------------------------------------------------------------- 1 | from rl_games.common.algo_observer import AlgoObserver 2 | 3 | from utils.utils import retry 4 | from utils.reformat import omegaconf_to_dict 5 | 6 | 7 | class WandbAlgoObserver(AlgoObserver): 8 | """Need this to propagate the correct experiment name after initialization.""" 9 | 10 | def __init__(self, cfg): 11 | super().__init__() 12 | self.cfg = cfg 13 | 14 | def before_init(self, base_name, config, experiment_name): 15 | """ 16 | Must call initialization of Wandb before RL-games summary writer is initialized, otherwise 17 | sync_tensorboard does not work. 18 | """ 19 | 20 | import wandb 21 | 22 | wandb_unique_id = f"uid_{experiment_name}" 23 | print(f"Wandb using unique id {wandb_unique_id}") 24 | 25 | cfg = self.cfg 26 | 27 | # this can fail occasionally, so we try a couple more times 28 | @retry(3, exceptions=(Exception,)) 29 | def init_wandb(): 30 | wandb.init( 31 | project=cfg.wandb_project, 32 | entity=cfg.wandb_entity, 33 | group=cfg.wandb_group, 34 | tags=cfg.wandb_tags, 35 | sync_tensorboard=True, 36 | id=wandb_unique_id, 37 | name=experiment_name, 38 | resume=True, 39 | settings=wandb.Settings(start_method='fork'), 40 | ) 41 | 42 | if cfg.wandb_logcode_dir: 43 | wandb.run.log_code(root=cfg.wandb_logcode_dir) 44 | print('wandb running directory........', wandb.run.dir) 45 | 46 | print('Initializing WandB...') 47 | try: 48 | init_wandb() 49 | except Exception as exc: 50 | print(f'Could not initialize WandB! {exc}') 51 | 52 | if isinstance(self.cfg, dict): 53 | wandb.config.update(self.cfg, allow_val_change=True) 54 | else: 55 | wandb.config.update(omegaconf_to_dict(self.cfg), allow_val_change=True) 56 | -------------------------------------------------------------------------------- /utils/warmup_scheduler.py: -------------------------------------------------------------------------------- 1 | class WarmupScheduler: 2 | def __init__(self, optimizer, target_lr,initial_lr=1e-7,warmup_steps=25): 3 | self.optimizer = optimizer 4 | self.warmup_steps = warmup_steps 5 | self.initial_lr = initial_lr 6 | self.target_lr = target_lr 7 | self.current_step = 0 8 | 9 | def step(self): 10 | if self.current_step < self.warmup_steps: 11 | # Linearly increase the learning rate 12 | lr = (self.target_lr - self.initial_lr) * (self.current_step / self.warmup_steps) + self.initial_lr 13 | # Apply the learning rate to the optimizer 14 | for param_group in self.optimizer.param_groups: 15 | param_group['lr'] = lr 16 | # Increment the step count 17 | self.current_step += 1 18 | --------------------------------------------------------------------------------