├── .gitiginore
├── .gitignore
├── README.md
├── algo
    ├── models
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── imagenet_depth_encoder.cpython-37.pyc
    │   │   ├── models.cpython-37.pyc
    │   │   ├── models.cpython-38.pyc
    │   │   ├── models_priv.cpython-37.pyc
    │   │   ├── models_priv.cpython-38.pyc
    │   │   ├── observation_encoder.cpython-37.pyc
    │   │   ├── proprio_depth_transformer.cpython-37.pyc
    │   │   ├── proprio_depth_transformer.cpython-38.pyc
    │   │   ├── proprio_embd_transformer.cpython-37.pyc
    │   │   ├── proprio_mvp_rgb_transformer.cpython-37.pyc
    │   │   ├── proprio_r3m_rgb_transformer.cpython-37.pyc
    │   │   ├── proprio_vip_transformer.cpython-37.pyc
    │   │   ├── proprio_vit_transformer.cpython-37.pyc
    │   │   ├── pt_actor_critic.cpython-37.pyc
    │   │   ├── rt_actor_critic.cpython-37.pyc
    │   │   ├── rt_embed_actor_critic.cpython-37.pyc
    │   │   ├── running_mean_std.cpython-37.pyc
    │   │   ├── running_mean_std.cpython-38.pyc
    │   │   ├── vision_encoder.cpython-37.pyc
    │   │   └── vision_encoder.cpython-38.pyc
    │   ├── models.py
    │   ├── models_priv.py
    │   ├── rt_actor_critic.py
    │   └── running_mean_std.py
    ├── ppo_transformer
    │   ├── __pycache__
    │   │   ├── experience.cpython-37.pyc
    │   │   ├── mem_eff_experience.cpython-37.pyc
    │   │   ├── ppo_transformer.cpython-37.pyc
    │   │   └── ppobc_transformer.cpython-37.pyc
    │   ├── experience.py
    │   └── ppo_transformer.py
    └── pretrained
    │   ├── __init__.py
    │   ├── __pycache__
    │       ├── __init__.cpython-37.pyc
    │       ├── policy_transformer.cpython-37.pyc
    │       ├── robot_transformer.cpython-37.pyc
    │       ├── robot_transformer_ar.cpython-37.pyc
    │       └── transformer.cpython-37.pyc
    │   ├── dataset.py
    │   ├── depth_trainer.py
    │   ├── depth_trainer_multigpu.py
    │   ├── robot_dataset.py
    │   ├── robot_transformer_ar.py
    │   ├── trainer.py
    │   └── transformer.py
├── cfg
    ├── config.yaml
    ├── launcher
    │   └── default.yaml
    ├── pretrain
    │   ├── AllegroXarmCabinet.yaml
    │   ├── AllegroXarmNew.yaml
    │   └── AllegroXarmThrowing.yaml
    ├── task
    │   ├── AllegroXarmCabinet.yaml
    │   ├── AllegroXarmNew.yaml
    │   └── AllegroXarmThrowing.yaml
    └── train
    │   ├── AllegroXarmCabinetPPO.yaml
    │   ├── AllegroXarmNewPPO.yaml
    │   └── AllegroXarmThrowingPPO.yaml
├── env.yml
├── imgs
    └── approach.png
├── scripts
    ├── finetune.py
    ├── finetune
    │   ├── finetune_cabinet.sh
    │   ├── finetune_grasp.sh
    │   └── finetune_throw.sh
    ├── pretrain.py
    ├── pretrain.sh
    └── run_policy.sh
├── tasks
    ├── __init__.py
    ├── __pycache__
    │   ├── __init__.cpython-37.pyc
    │   ├── __init__.cpython-38.pyc
    │   ├── allegro_kuka_grasping.cpython-37.pyc
    │   ├── allegro_kuka_grasping.cpython-38.pyc
    │   ├── torch_jit_utils.cpython-37.pyc
    │   ├── torch_jit_utils.cpython-38.pyc
    │   ├── xarm_cabinet.cpython-37.pyc
    │   ├── xarm_cabinet.cpython-38.pyc
    │   ├── xarm_grasping.cpython-37.pyc
    │   ├── xarm_grasping.cpython-38.pyc
    │   ├── xarm_grasping_debug.cpython-37.pyc
    │   ├── xarm_grasping_debug.cpython-38.pyc
    │   ├── xarm_grasping_new.cpython-37.pyc
    │   ├── xarm_grasping_new.cpython-38.pyc
    │   ├── xarm_grasping_real.cpython-37.pyc
    │   ├── xarm_throwing.cpython-37.pyc
    │   └── xarm_throwing.cpython-38.pyc
    ├── base
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── vec_task.cpython-37.pyc
    │   │   └── vec_task.cpython-38.pyc
    │   └── vec_task.py
    ├── torch_jit_utils.py
    ├── xarm7_utils.py
    ├── xarm_cabinet.py
    ├── xarm_grasping_new.py
    └── xarm_throwing.py
└── utils
    ├── __init__.py
    ├── __pycache__
        ├── __init__.cpython-37.pyc
        ├── __init__.cpython-38.pyc
        ├── allegro_kuka_utils.cpython-37.pyc
        ├── allegro_kuka_utils.cpython-38.pyc
        ├── hand_arm_utils.cpython-37.pyc
        ├── hand_arm_utils.cpython-38.pyc
        ├── logger.cpython-37.pyc
        ├── logger.cpython-38.pyc
        ├── misc.cpython-37.pyc
        ├── misc.cpython-38.pyc
        ├── pytorch_utils.cpython-37.pyc
        ├── pytorch_utils.cpython-38.pyc
        ├── randomization_utils.cpython-37.pyc
        ├── randomization_utils.cpython-38.pyc
        ├── reformat.cpython-37.pyc
        ├── reformat.cpython-38.pyc
        ├── torch_jit_utils.cpython-37.pyc
        ├── urdf_utils.cpython-37.pyc
        ├── urdf_utils.cpython-38.pyc
        ├── utils.cpython-37.pyc
        ├── utils.cpython-38.pyc
        ├── warmup_scheduler.cpython-37.pyc
        └── warmup_scheduler.cpython-38.pyc
    ├── allegro_kuka_utils.py
    ├── camera.json
    ├── camera2.json
    ├── dr_utils.py
    ├── hand_arm_utils.py
    ├── logger.py
    ├── misc.py
    ├── pytorch_utils.py
    ├── randomization_utils.py
    ├── reformat.py
    ├── rlgames_utils.py
    ├── rna_util.py
    ├── torch_jit_utils.py
    ├── urdf_utils.py
    ├── utils.py
    ├── wandb_utils.py
    └── warmup_scheduler.py


/.gitiginore:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.zip
 2 | outputs/
 3 | assets/
 4 | */*/.pyc
 5 | *.pyc
 6 | __pycache__/
 7 | */__pycache__/
 8 | wandb/
 9 | *.log
10 | algo/pretrained/models/*
11 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Hand-object Interaction Pretraining from Videos
 2 | 
 3 | This repo contains code for the paper [Hand-object interaction Pretraining from Videos](https://hgaurav2k.github.io/hop/pdf/manuscript.pdf)
 4 | 
 5 | <!-- Published in the International Conference of Computer Vision and Pattern Recognition (CVPR) 2019. -->
 6 | 
 7 | For a brief overview, check out the project [webpage](https://hgaurav2k.github.io/hop)!
 8 | 
 9 | <img src='imgs/approach.png'>
10 | 
11 | 
12 | For any questions, please contact [Himanshu Gaurav Singh](https://hgaurav2k.github.io/).
13 | 
14 | 
15 | ## Setup
16 | 
17 | * Create conda environment using `conda env create -f env.yml`
18 | * Install [IsaacGym](https://developer.nvidia.com/isaac-gym) in this environment. 
19 | * Download the [asset](https://drive.google.com/drive/folders/1BE3lg8k1kssGxojtL0OkQLscSAkbpNzS?usp=sharing) folder and put them in the root directory.  
20 | 
21 | ## Running the code
22 | 
23 | ### Pretraining
24 | 
25 | 
26 | * Download the hand-object interaction dataset from [here](https://drive.google.com/file/d/12-xghxt0rf_0xDo5SMdrRBnNr7LWJ02Y/view?usp=drive_link). Extract using `tar -xf hoi_pretraining_data.tar.xz`. Put it under the root directory. 
27 | * Run `bash scripts/pretrain.sh <DATADIR>`
28 | 
29 | ### Finetuning 
30 | 
31 | 
32 | * Download pretrained checkpoint from [here](https://drive.google.com/file/d/10zYrzPK8T-1zB8dqB5o2MfK_iF0Uda_f/view?usp=sharing). You can also use your own trained checkpoint. 
33 | * For your choice of `task`, run `bash scripts/finetune/finetune_{task}.sh`.
34 | <!-- 
35 |  -->
36 | 
37 | 
38 | ### Visualising trained policies 
39 | 
40 | * Run `bash scripts/run_policy.sh <PATH_TO_POLICY>`.
41 | 
42 | 
43 | ## Citation 
44 | 
45 | 
46 | ## Acknowledgment
47 | This work was supported by the DARPA Machine Common Sense program, the DARPA Transfer from Imprecise and Abstract Models to Autonomous Technologies (TIAMAT) program, and by the ONR MURI award N00014-21-1-2801. This work was also funded by ONR MURI N00014-22-1-2773. We thank Adhithya Iyer for assistance with teleoperation systems, Phillip Wu for setting-up the real robot, and Raven Huang, Jathushan Rajasegaran and Yutong Bai for helpful discussions.
48 | 


--------------------------------------------------------------------------------
/algo/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__init__.py


--------------------------------------------------------------------------------
/algo/models/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/algo/models/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/algo/models/__pycache__/imagenet_depth_encoder.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/imagenet_depth_encoder.cpython-37.pyc


--------------------------------------------------------------------------------
/algo/models/__pycache__/models.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/models.cpython-37.pyc


--------------------------------------------------------------------------------
/algo/models/__pycache__/models.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/models.cpython-38.pyc


--------------------------------------------------------------------------------
/algo/models/__pycache__/models_priv.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/models_priv.cpython-37.pyc


--------------------------------------------------------------------------------
/algo/models/__pycache__/models_priv.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/models_priv.cpython-38.pyc


--------------------------------------------------------------------------------
/algo/models/__pycache__/observation_encoder.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/observation_encoder.cpython-37.pyc


--------------------------------------------------------------------------------
/algo/models/__pycache__/proprio_depth_transformer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/proprio_depth_transformer.cpython-37.pyc


--------------------------------------------------------------------------------
/algo/models/__pycache__/proprio_depth_transformer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/proprio_depth_transformer.cpython-38.pyc


--------------------------------------------------------------------------------
/algo/models/__pycache__/proprio_embd_transformer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/proprio_embd_transformer.cpython-37.pyc


--------------------------------------------------------------------------------
/algo/models/__pycache__/proprio_mvp_rgb_transformer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/proprio_mvp_rgb_transformer.cpython-37.pyc


--------------------------------------------------------------------------------
/algo/models/__pycache__/proprio_r3m_rgb_transformer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/proprio_r3m_rgb_transformer.cpython-37.pyc


--------------------------------------------------------------------------------
/algo/models/__pycache__/proprio_vip_transformer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/proprio_vip_transformer.cpython-37.pyc


--------------------------------------------------------------------------------
/algo/models/__pycache__/proprio_vit_transformer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/proprio_vit_transformer.cpython-37.pyc


--------------------------------------------------------------------------------
/algo/models/__pycache__/pt_actor_critic.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/pt_actor_critic.cpython-37.pyc


--------------------------------------------------------------------------------
/algo/models/__pycache__/rt_actor_critic.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/rt_actor_critic.cpython-37.pyc


--------------------------------------------------------------------------------
/algo/models/__pycache__/rt_embed_actor_critic.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/rt_embed_actor_critic.cpython-37.pyc


--------------------------------------------------------------------------------
/algo/models/__pycache__/running_mean_std.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/running_mean_std.cpython-37.pyc


--------------------------------------------------------------------------------
/algo/models/__pycache__/running_mean_std.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/running_mean_std.cpython-38.pyc


--------------------------------------------------------------------------------
/algo/models/__pycache__/vision_encoder.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/vision_encoder.cpython-37.pyc


--------------------------------------------------------------------------------
/algo/models/__pycache__/vision_encoder.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/vision_encoder.cpython-38.pyc


--------------------------------------------------------------------------------
/algo/models/models.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # In-Hand Object Rotation via Rapid Motor Adaptation
  3 | # https://arxiv.org/abs/2210.04887
  4 | # Copyright (c) 2022 Haozhi Qi
  5 | # Licensed under The MIT License [see LICENSE for details]
  6 | # --------------------------------------------------------
  7 | 
  8 | import numpy as np
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.nn.functional as F
 12 | import copy
 13 | 
 14 | 
 15 | class SavingModel(nn.Module):
 16 |     "Saves the two models (runnig_mean_std and actor_critic) required for infence and simplifies TT code"
 17 |     def __init__(self, actor_critic_model, running_std_model):
 18 |         super(SavingModel, self).__init__()
 19 |         self.actor_critic_model = copy.deepcopy(actor_critic_model)
 20 |         self.running_std_model = copy.deepcopy(running_std_model)
 21 |         self.running_std_model.eval()
 22 | 
 23 |     def forward(self, x):
 24 |         x = self.running_std_model(x)
 25 |         input_dict = {'obs': x}
 26 |         mu = self.actor_critic_model.infer_action(input_dict)
 27 |         return mu
 28 | 
 29 | class MLP(nn.Module):
 30 |     def __init__(self, units, input_size):
 31 |         super(MLP, self).__init__()
 32 |         layers = []
 33 |         for output_size in units:
 34 |             layers.append(nn.Linear(input_size, output_size))
 35 |             layers.append(nn.ELU())
 36 |             input_size = output_size
 37 |         self.mlp = nn.Sequential(*layers)
 38 | 
 39 |     def forward(self, x):
 40 |         return self.mlp(x)
 41 | 
 42 | 
 43 | class ProprioAdaptTConv(nn.Module):
 44 |     def __init__(self):
 45 |         super(ProprioAdaptTConv, self).__init__()
 46 |         self.channel_transform = nn.Sequential(
 47 |             nn.Linear(16 + 16, 32),
 48 |             nn.ReLU(inplace=True),
 49 |             nn.Linear(32, 32),
 50 |             nn.ReLU(inplace=True),
 51 |         )
 52 |         self.temporal_aggregation = nn.Sequential(
 53 |             nn.Conv1d(32, 32, (9,), stride=(2,)),
 54 |             nn.ReLU(inplace=True),
 55 |             nn.Conv1d(32, 32, (5,), stride=(1,)),
 56 |             nn.ReLU(inplace=True),
 57 |             nn.Conv1d(32, 32, (5,), stride=(1,)),
 58 |             nn.ReLU(inplace=True),
 59 |         )
 60 |         self.low_dim_proj = nn.Linear(32 * 3, 8)
 61 | 
 62 |     def forward(self, x):
 63 |         x = self.channel_transform(x)  # (N, 50, 32)
 64 |         x = x.permute((0, 2, 1))  # (N, 32, 50)
 65 |         x = self.temporal_aggregation(x)  # (N, 32, 3)
 66 |         x = self.low_dim_proj(x.flatten(1))
 67 |         return x
 68 | 
 69 | 
 70 | class ActorCritic(nn.Module):
 71 |     def __init__(self, kwargs):
 72 |         nn.Module.__init__(self)
 73 |         actions_num = kwargs.pop('actions_num')
 74 |         input_shape = kwargs.pop('input_shape')
 75 |         self.units = kwargs.pop('actor_units')
 76 |         mlp_input_shape = input_shape
 77 | 
 78 |         out_size = self.units[-1]
 79 | 
 80 |         self.actor_mlp = MLP(units=self.units, input_size=mlp_input_shape)
 81 |         self.value = torch.nn.Linear(out_size, 1)
 82 |         self.mu = torch.nn.Linear(out_size, actions_num)
 83 |         self.sigma = nn.Parameter(torch.zeros(actions_num, requires_grad=True, dtype=torch.float32), requires_grad=True)
 84 | 
 85 |         for m in self.modules():
 86 |             if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d):
 87 |                 fan_out = m.kernel_size[0] * m.out_channels
 88 |                 m.weight.data.normal_(mean=0.0, std=np.sqrt(2.0 / fan_out))
 89 |                 if getattr(m, 'bias', None) is not None:
 90 |                     torch.nn.init.zeros_(m.bias)
 91 |             if isinstance(m, nn.Linear):
 92 |                 if getattr(m, 'bias', None) is not None:
 93 |                     torch.nn.init.zeros_(m.bias)
 94 |         nn.init.constant_(self.sigma, 0)
 95 | 
 96 |     @torch.no_grad()
 97 |     def get_action(self, obs_dict):
 98 |         # used specifically to collection samples during training
 99 |         # it contains exploration so needs to sample from distribution
100 |         mu, logstd, value = self._actor_critic(obs_dict)
101 |         sigma = torch.exp(logstd)
102 |         distr = torch.distributions.Normal(mu, sigma)
103 |         selected_action = distr.sample()
104 |         result = {
105 |             'neglogpacs': -distr.log_prob(selected_action).sum(1), # self.neglogp(selected_action, mu, sigma, logstd),
106 |             'values': value,
107 |             'actions': selected_action,
108 |             'mus': mu,
109 |             'sigmas': sigma,
110 |         }
111 |         return result
112 | 
113 |     @torch.no_grad()
114 |     def infer_action(self, obs_dict):
115 |         # used during inference
116 |         mu, _, _= self._actor_critic(obs_dict)
117 |         return mu
118 | 
119 |     def _actor_critic(self, obs_dict):
120 |         obs = obs_dict['obs']
121 |         x = self.actor_mlp(obs)
122 |         value = self.value(x)
123 |         mu = self.mu(x)
124 |         sigma = self.sigma
125 |         return mu, mu * 0 + sigma, value
126 | 
127 |     def forward(self, input_dict):
128 |         mu,logstd,value = self._actor_critic(input_dict)
129 |         sigma = torch.exp(logstd)
130 |         prev_actions = input_dict.get('prev_actions', mu.clone())
131 |         distr = torch.distributions.Normal(mu, sigma)
132 |         entropy = distr.entropy().sum(dim=-1)
133 |         prev_neglogp = -distr.log_prob(prev_actions).sum(1)
134 |         
135 |         result = {
136 |             'prev_neglogp': torch.squeeze(prev_neglogp),
137 |             'values': value,
138 |             'entropy': entropy,
139 |             'mus': mu,
140 |             'sigmas': sigma
141 |         }
142 | 
143 |         return result
144 | 
145 | 
146 | 
147 | class PointNetActorCritic(nn.Module):
148 | 
149 |     def __init__(self, kwargs):
150 |         nn.Module.__init__(self)
151 |         actions_num = kwargs.pop('actions_num')
152 |         input_shape = kwargs.pop('input_shape')
153 |         self.units = kwargs.pop('actor_units')
154 |         self.pc_out_dim = kwargs.pop('point_cloud_out_dim')
155 |         self.pc_begin, self.pc_end = kwargs.pop('point_cloud_index')
156 |         self.pc_num = kwargs.pop('point_cloud_num')
157 | 
158 |         mlp_input_shape = input_shape
159 |         out_size = self.units[-1]
160 | 
161 |         self.point_net = nn.Sequential(
162 |             nn.Linear(3,self.pc_out_dim),
163 |             nn.ELU(inplace=True),
164 |             nn.Linear(self.pc_out_dim,self.pc_out_dim),
165 |             nn.ELU(inplace=True),
166 |             nn.Linear(self.pc_out_dim,self.pc_out_dim),
167 |             nn.MaxPool2d((self.pc_num,1))
168 |         )
169 | 
170 |         self.actor_mlp = MLP(units=self.units, input_size=self.pc_begin + self.pc_out_dim)
171 |         self.obs_end_actor = self.pc_begin + self.pc_out_dim
172 |         self.value = MLP(units=self.units, input_size=mlp_input_shape)
173 |         self.value_final = nn.Linear(out_size, 1)
174 |         # self.value = nn.Linear(out_size, 1)
175 |         self.mu = nn.Linear(out_size, actions_num)
176 |         self.sigma = nn.Parameter(torch.zeros(actions_num, requires_grad=True, dtype=torch.float32), requires_grad=True)
177 | 
178 |         for m in self.modules():
179 |             if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d):
180 |                 fan_out = m.kernel_size[0] * m.out_channels
181 |                 m.weight.data.normal_(mean=0.0, std=np.sqrt(2.0 / fan_out))
182 |                 if getattr(m, 'bias', None) is not None:
183 |                     torch.nn.init.zeros_(m.bias)
184 |             if isinstance(m, nn.Linear):
185 |                 if getattr(m, 'bias', None) is not None:
186 |                     torch.nn.init.zeros_(m.bias)
187 |         nn.init.constant_(self.sigma, 0)
188 | 
189 |     @torch.no_grad()
190 |     def get_action(self, obs_dict):
191 |         # used specifically to collection samples during training
192 |         # it contains exploration so needs to sample from distribution
193 |         mu, logstd, value = self._actor_critic(obs_dict)
194 |         sigma = torch.exp(logstd)
195 |         distr = torch.distributions.Normal(mu, sigma)
196 |         selected_action = distr.sample()
197 |         result = {
198 |             'neglogpacs': -distr.log_prob(selected_action).sum(1), # self.neglogp(selected_action, mu, sigma, logstd),
199 |             'values': value,
200 |             'actions': selected_action,
201 |             'mus': mu,
202 |             'sigmas': sigma,
203 |         }
204 |         return result
205 | 
206 |     @torch.no_grad()
207 |     def infer_action(self, obs_dict):
208 |         # used during inference
209 |         mu, _, _= self._actor_critic(obs_dict)
210 |         return mu
211 | 
212 |     def _actor_critic(self, obs_dict):
213 | 
214 |         obs = obs_dict['obs']
215 |         pc_info = obs[:,self.pc_begin:self.pc_end].reshape(-1,self.pc_num,3)
216 |         pc_rep = self.point_net(pc_info).squeeze(1)
217 |         obs = torch.cat([obs[:,:self.pc_begin],pc_rep,obs[:,self.pc_end:]],dim=1)
218 |         x = self.actor_mlp(obs[:,:self.obs_end_actor])
219 |         value_h = self.value(obs)
220 |         value = self.value_final(value_h)
221 |         mu = self.mu(x)
222 |         sigma = self.sigma
223 |         return mu, mu * 0 + sigma, value
224 | 
225 |     def forward(self, input_dict):
226 |         prev_actions = input_dict.get('prev_actions', None)
227 |         mu,logstd,value = self._actor_critic(input_dict)
228 |         sigma = torch.exp(logstd)
229 |         distr = torch.distributions.Normal(mu, sigma)
230 |         entropy = distr.entropy().sum(dim=-1)
231 |         prev_neglogp = -distr.log_prob(prev_actions).sum(1)
232 |         result = {
233 |             'prev_neglogp': torch.squeeze(prev_neglogp),
234 |             'values': value,
235 |             'entropy': entropy,
236 |             'mus': mu,
237 |             'sigmas': sigma
238 |         }
239 |         return result
240 | 


--------------------------------------------------------------------------------
/algo/models/models_priv.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # In-Hand Object Rotation via Rapid Motor Adaptation
  3 | # https://arxiv.org/abs/2210.04887
  4 | # Copyright (c) 2022 Haozhi Qi
  5 | # Licensed under The MIT License [see LICENSE for details]
  6 | # --------------------------------------------------------
  7 | 
  8 | import numpy as np
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.nn.functional as F
 12 | 
 13 | 
 14 | class MLP(nn.Module):
 15 |     def __init__(self, units, input_size):
 16 |         super(MLP, self).__init__()
 17 |         layers = []
 18 |         for output_size in units:
 19 |             layers.append(nn.Linear(input_size, output_size))
 20 |             layers.append(nn.ELU())
 21 |             input_size = output_size
 22 |         self.mlp = nn.Sequential(*layers)
 23 | 
 24 |     def forward(self, x):
 25 |         return self.mlp(x)
 26 | 
 27 | 
 28 | class ProprioAdaptTConv(nn.Module):
 29 |     def __init__(self):
 30 |         super(ProprioAdaptTConv, self).__init__()
 31 |         self.channel_transform = nn.Sequential(
 32 |             nn.Linear(16 + 16, 32),
 33 |             nn.ReLU(inplace=True),
 34 |             nn.Linear(32, 32),
 35 |             nn.ReLU(inplace=True),
 36 |         )
 37 |         self.temporal_aggregation = nn.Sequential(
 38 |             nn.Conv1d(32, 32, (9,), stride=(2,)),
 39 |             nn.ReLU(inplace=True),
 40 |             nn.Conv1d(32, 32, (5,), stride=(1,)),
 41 |             nn.ReLU(inplace=True),
 42 |             nn.Conv1d(32, 32, (5,), stride=(1,)),
 43 |             nn.ReLU(inplace=True),
 44 |         )
 45 |         self.low_dim_proj = nn.Linear(32 * 3, 8)
 46 | 
 47 |     def forward(self, x):
 48 |         x = self.channel_transform(x)  # (N, 50, 32)
 49 |         x = x.permute((0, 2, 1))  # (N, 32, 50)
 50 |         x = self.temporal_aggregation(x)  # (N, 32, 3)
 51 |         x = self.low_dim_proj(x.flatten(1))
 52 |         return x
 53 | 
 54 | 
 55 | class ActorCritic(nn.Module):
 56 |     def __init__(self, kwargs):
 57 |         nn.Module.__init__(self)
 58 |         actions_num = kwargs.pop('actions_num')
 59 |         input_shape = kwargs.pop('input_shape')
 60 |         self.units = kwargs.pop('actor_units')
 61 |         self.priv_mlp = kwargs.pop('priv_mlp_units')
 62 |         mlp_input_shape = input_shape[0]
 63 | 
 64 |         out_size = self.units[-1]
 65 |         self.priv_info = kwargs['priv_info']
 66 |         self.priv_info_stage2 = kwargs['proprio_adapt']
 67 |         if self.priv_info:
 68 |             mlp_input_shape += self.priv_mlp[-1]
 69 |             self.env_mlp = MLP(units=self.priv_mlp, input_size=kwargs['priv_info_dim'])
 70 | 
 71 |             if self.priv_info_stage2:
 72 |                 self.adapt_tconv = ProprioAdaptTConv()
 73 | 
 74 |         self.actor_mlp = MLP(units=self.units, input_size=mlp_input_shape)
 75 |         self.value = torch.nn.Linear(out_size, 1)
 76 |         self.mu = torch.nn.Linear(out_size, actions_num)
 77 |         self.sigma = nn.Parameter(torch.zeros(actions_num, requires_grad=True, dtype=torch.float32), requires_grad=True)
 78 | 
 79 |         for m in self.modules():
 80 |             if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d):
 81 |                 fan_out = m.kernel_size[0] * m.out_channels
 82 |                 m.weight.data.normal_(mean=0.0, std=np.sqrt(2.0 / fan_out))
 83 |                 if getattr(m, 'bias', None) is not None:
 84 |                     torch.nn.init.zeros_(m.bias)
 85 |             if isinstance(m, nn.Linear):
 86 |                 if getattr(m, 'bias', None) is not None:
 87 |                     torch.nn.init.zeros_(m.bias)
 88 |         nn.init.constant_(self.sigma, 0)
 89 | 
 90 |     @torch.no_grad()
 91 |     def get_action(self, obs_dict):
 92 |         # used specifically to collection samples during training
 93 |         # it contains exploration so needs to sample from distribution
 94 |         mu, logstd, value, _, _ = self._actor_critic(obs_dict)
 95 |         sigma = torch.exp(logstd)
 96 |         distr = torch.distributions.Normal(mu, sigma)
 97 |         selected_action = distr.sample()
 98 |         result = {
 99 |             'neglogpacs': -distr.log_prob(selected_action).sum(1), # self.neglogp(selected_action, mu, sigma, logstd),
100 |             'values': value,
101 |             'actions': selected_action,
102 |             'mus': mu,
103 |             'sigmas': sigma,
104 |         }
105 |         return result
106 | 
107 |     @torch.no_grad()
108 |     def get_action_sample(self, obs_dict):
109 |         # used for testing
110 |         mu, logstd, value, _, _ = self._actor_critic(obs_dict)
111 |         return mu
112 | 
113 |     def _actor_critic(self, obs_dict):
114 |         obs = obs_dict['obs']
115 |         extrin, extrin_gt = None, None
116 |         if self.priv_info:
117 |             if self.priv_info_stage2:
118 |                 extrin = self.adapt_tconv(obs_dict['proprio_hist'])
119 |                 # during supervised training, extrin has gt label
120 |                 extrin_gt = self.env_mlp(obs_dict['priv_info']) if 'priv_info' in obs_dict else extrin
121 |                 extrin_gt = torch.tanh(extrin_gt)
122 |                 extrin = torch.tanh(extrin)
123 |                 obs = torch.cat([obs, extrin], dim=-1)
124 |             else:
125 |                 extrin = self.env_mlp(obs_dict['priv_info'])
126 |                 extrin = torch.tanh(extrin)
127 |                 obs = torch.cat([obs, extrin], dim=-1)
128 | 
129 |         x = self.actor_mlp(obs)
130 |         value = self.value(x)
131 |         mu = self.mu(x)
132 |         sigma = self.sigma
133 |         return mu, mu * 0 + sigma, value, extrin, extrin_gt
134 | 
135 |     def forward(self, input_dict):
136 |         prev_actions = input_dict.get('prev_actions', None)
137 |         rst = self._actor_critic(input_dict)
138 |         mu, logstd, value, extrin, extrin_gt = rst
139 |         sigma = torch.exp(logstd)
140 |         distr = torch.distributions.Normal(mu, sigma)
141 |         entropy = distr.entropy().sum(dim=-1)
142 |         prev_neglogp = -distr.log_prob(prev_actions).sum(1)
143 |         result = {
144 |             'prev_neglogp': torch.squeeze(prev_neglogp),
145 |             'values': value,
146 |             'entropy': entropy,
147 |             'mus': mu,
148 |             'sigmas': sigma,
149 |             'extrin': extrin,
150 |             'extrin_gt': extrin_gt,
151 |         }
152 |         return result
153 | 


--------------------------------------------------------------------------------
/algo/models/rt_actor_critic.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | import copy
  6 | from algo.pretrained.robot_transformer_ar import RobotTransformerAR
  7 | from algo.models.models import MLP
  8 | 
  9 | class RTActorCritic(nn.Module):
 10 | 
 11 |     def __init__(self, config, network_config, device, kwargs):
 12 | 
 13 |         nn.Module.__init__(self)
 14 |         self.network_config = network_config
 15 |         self.device = device
 16 |         actions_num =self.network_config.action_dim
 17 |         input_shape = kwargs.pop('value_input_shape')
 18 | 
 19 |         self.pc_to_value = config.train.ppo.point_cloud_input_to_value
 20 |         if config.get('pc_input', False) and self.pc_to_value:
 21 |             self.pc_begin, self.pc_end = kwargs.pop('point_cloud_index')
 22 | 
 23 |         self.value_grads_to_pointnet = config.train.ppo.value_grads_to_pointnet
 24 |         self.pc_num = self.network_config.pc_num
 25 |         self.scale_proprio = self.network_config.scale_proprio
 26 |         self.scale_action = self.network_config.scale_action
 27 | 
 28 | 
 29 |         mlp_input_shape = input_shape
 30 | 
 31 | 
 32 |         self.limits = {'upper': torch.tensor([6.2832, 2.0944, 6.2832, 3.9270, 6.2832, 3.1416, 6.2832, 0.4700, 1.6100, 1.7090, 1.6180, 1.3960,
 33 |                                         1.1630, 1.6440, 1.7190, 0.4700, 1.6100, 1.7090, 1.6180, 0.4700, 1.6100, 1.7090, 1.6180],
 34 |                                                 requires_grad=False, dtype=torch.float32, device=self.device),
 35 |                        'lower': torch.tensor([-6.2832, -2.0590, -6.2832, -0.1920, -6.2832, -1.6930, -6.2832, -0.4700, -0.1960, -0.1740, -0.2270,
 36 |                                    0.2630, -0.1050, -0.1890, -0.1620, -0.4700, -0.1960, -0.1740, -0.2270, -0.4700, -0.1960, -0.1740, -0.2270]
 37 |                                            ,requires_grad=False, dtype=torch.float32, device=self.device)}
 38 | 
 39 | 
 40 |         self.actor = RobotTransformerAR(
 41 |             cfg= config)
 42 |  
 43 | 
 44 |         self.value_fn = nn.Sequential(
 45 |             nn.Linear(mlp_input_shape,512),
 46 |             nn.ELU(inplace=True),
 47 |             nn.Linear(512,256),
 48 |             nn.ELU(inplace=True),
 49 |             nn.Linear(256,128),
 50 |             nn.ELU(inplace=True),
 51 |             nn.Linear(128, 1)
 52 |         ) #check this 
 53 | 
 54 |         self.logstd = nn.Parameter(torch.zeros(actions_num, requires_grad=True, dtype=torch.float32))
 55 |         #backbone sharing between value and critic? can this be implemented here in some way? 
 56 |         #not doing for now
 57 |         nn.init.constant_(self.logstd[:7], torch.log(torch.tensor(kwargs['init_eps_arm'])))
 58 |         nn.init.constant_(self.logstd[7:], torch.log(torch.tensor(kwargs['init_eps_hand'])))
 59 | 
 60 |     def scale_q(self, q):
 61 |         """
 62 |         Scale the proprioceptive data to be between -1 and 1.
 63 |         """
 64 |         q = (q - self.limits['lower'].view((1,-1))) / (self.limits['upper'] - self.limits['lower'])
 65 |         q = 2 * q - 1
 66 |         return q
 67 | 
 68 |     @torch.no_grad()
 69 |     def get_action(self, obs_dict):
 70 |         # used specifically to collection samples during training
 71 |         # it contains exploration so needs to sample from distribution
 72 |         mu, value = self._actor_critic(obs_dict)
 73 |         sigma = torch.exp(self.logstd)
 74 |         distr = torch.distributions.Normal(mu, sigma)
 75 |         selected_action = distr.sample()
 76 |         result = {
 77 |             'neglogpacs': -distr.log_prob(selected_action).sum(1), # self.neglogp(selected_action, mu, sigma, logstd),
 78 |             'values': value,
 79 |             'actions': selected_action,
 80 |             'mus': mu,
 81 |             'sigmas': sigma,
 82 |         }
 83 |         return result
 84 | 
 85 |     @torch.no_grad()
 86 |     def infer_action(self, obs_dict):
 87 |         # used during inference
 88 |         mu, _ = self._actor_critic(obs_dict)
 89 |         return mu
 90 | 
 91 |     def _actor_critic(self, obs_dict):
 92 |         
 93 |         #what to do with the value network? 
 94 |         obs = obs_dict['obs']
 95 | 
 96 |         proprio_hist = obs_dict['proprio_buf']
 97 | 
 98 |         if self.scale_proprio:
 99 |             proprio_hist = self.scale_q(proprio_hist) #scale proprio hist 
100 | 
101 |         pc_hist = obs_dict['pc_buf'] #this is normalized
102 | 
103 | 
104 |         attention_mask = obs_dict['attn_mask']
105 |         timesteps = obs_dict['timesteps']
106 | 
107 |         if self.actor.cfg:
108 |             action_hist = obs_dict['action_buf']
109 |             action_hist = torch.cat((action_hist, torch.zeros_like(action_hist[:,:1,:])), dim=1)
110 |         else:
111 |             action_hist=None
112 | 
113 |         res_dict, pc_embed = self.actor(proprio_hist, pc_hist, action_hist, timesteps.long(), attention_mask)
114 | 
115 |         # Value function should reuse features?
116 | 
117 |         if not self.value_grads_to_pointnet:
118 |             pc_embed = pc_embed.detach()
119 | 
120 |         if self.pc_to_value:
121 |             obs = torch.cat([obs[:,:self.pc_begin],pc_embed[:,-1],obs[:,self.pc_end:]],dim=1)
122 |         value = self.value_fn(obs)
123 |        
124 |         mu = res_dict['action'][:,-1]   #sigma in previous policy was independent of observations..F
125 |         
126 |         if not self.scale_action:
127 |             mu = self.scale_q(mu)
128 |             
129 |         return mu, value
130 | 
131 |     def forward(self, input_dict):
132 | 
133 |         prev_actions = input_dict.get('prev_actions', None)
134 |         mu, value = self._actor_critic(input_dict)
135 |         sigma = torch.exp(self.logstd)
136 |         distr = torch.distributions.Normal(mu, sigma)
137 |         entropy = distr.entropy().sum(dim=-1)
138 |         prev_neglogp = -distr.log_prob(prev_actions).sum(1)
139 |         result = {
140 |             'prev_neglogp': torch.squeeze(prev_neglogp),
141 |             'values': value,
142 |             'entropy': entropy,
143 |             'mus': mu,
144 |             'sigmas': sigma
145 |         }
146 |         return result
147 | 


--------------------------------------------------------------------------------
/algo/models/running_mean_std.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # In-Hand Object Rotation via Rapid Motor Adaptation
 3 | # https://arxiv.org/abs/2210.04887
 4 | # Copyright (c) 2022 Haozhi Qi
 5 | # Licensed under The MIT License [see LICENSE for details]
 6 | # --------------------------------------------------------
 7 | # Based on: IsaacGymEnvs
 8 | # Copyright (c) 2018-2022, NVIDIA Corporation
 9 | # Licence under BSD 3-Clause License
10 | # https://github.com/NVIDIA-Omniverse/IsaacGymEnvs/
11 | # --------------------------------------------------------
12 | 
13 | import torch
14 | import torch.nn as nn
15 | import numpy as np
16 | 
17 | class RunningMeanStd(nn.Module):
18 |     def __init__(self, insize, epsilon=1e-05, per_channel=False, norm_only=False):
19 |         super(RunningMeanStd, self).__init__()
20 |         print('RunningMeanStd: ', insize)
21 |         self.insize = insize
22 |         self.epsilon = epsilon
23 | 
24 |         self.norm_only = norm_only
25 |         self.per_channel = per_channel
26 |         if per_channel:
27 |             if len(self.insize) == 3:
28 |                 self.axis = [0,1,2]
29 |             if len(self.insize) == 2:
30 |                 self.axis = [0,1] #make this 0 and 1? 
31 |             if len(self.insize) == 1:
32 |                 self.axis = [0]
33 |             self.in_size = self.insize[-1] 
34 |         else:
35 |             self.axis = [0]
36 |             self.in_size = insize
37 | 
38 |         self.register_buffer('running_mean', torch.zeros(self.in_size, dtype = torch.float64))
39 |         self.register_buffer('running_var', torch.ones(self.in_size, dtype = torch.float64))
40 |         self.register_buffer('count', torch.ones((), dtype = torch.float64))
41 | 
42 |     def _update_mean_var_count_from_moments(self, mean, var, count, batch_mean, batch_var, batch_count):
43 |         delta = batch_mean - mean
44 |         tot_count = count + batch_count
45 | 
46 |         new_mean = mean + delta * batch_count / tot_count
47 |         m_a = var * count
48 |         m_b = batch_var * batch_count
49 |         M2 = m_a + m_b + delta**2 * count * batch_count / tot_count
50 |         new_var = M2 / tot_count
51 |         new_count = tot_count
52 |         return new_mean, new_var, new_count
53 | 
54 |     def forward(self, input, unnorm=False):
55 |         if self.training:
56 |             mean = input.mean(self.axis) # along channel axis
57 |             var = input.var(self.axis)
58 |             self.running_mean, self.running_var, self.count = self._update_mean_var_count_from_moments(self.running_mean, self.running_var, self.count, 
59 |                                                     mean, var, input.size()[0] )
60 |             
61 |         # change shape
62 |         if self.per_channel:
63 |             if len(self.insize) == 3:
64 |                 current_mean = self.running_mean.view([1, 1, 1, self.in_size]).expand_as(input)
65 |                 current_var = self.running_var.view([1, 1, 1, self.in_size]).expand_as(input)
66 |             if len(self.insize) == 2:
67 |                 current_mean = self.running_mean.view([1, 1, self.in_size]).expand_as(input)
68 |                 current_var = self.running_var.view([1, 1, self.in_size]).expand_as(input)
69 |             if len(self.insize) == 1:
70 |                 current_mean = self.running_mean.view([1, self.in_size]).expand_as(input)
71 |                 current_var = self.running_var.view([1, self.in_size]).expand_as(input)        
72 |         else:
73 |             current_mean = self.running_mean
74 |             current_var = self.running_var
75 |         # get output
76 |                                                                                                                                                                                                                                                                                                                                  
77 | 
78 |         if unnorm:
79 |             y = torch.clamp(input, min=-5.0, max=5.0)
80 |             y = torch.sqrt(current_var.float() + self.epsilon)*y + current_mean.float()
81 |         else:
82 |             if self.norm_only:
83 |                 y = input/ torch.sqrt(current_var.float() + self.epsilon)
84 |             else:
85 |                 y = (input - current_mean.float()) / torch.sqrt(current_var.float() + self.epsilon)
86 |                 y = torch.clamp(y, min=-5.0, max=5.0)
87 |         return y
88 | 


--------------------------------------------------------------------------------
/algo/ppo_transformer/__pycache__/experience.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/ppo_transformer/__pycache__/experience.cpython-37.pyc


--------------------------------------------------------------------------------
/algo/ppo_transformer/__pycache__/mem_eff_experience.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/ppo_transformer/__pycache__/mem_eff_experience.cpython-37.pyc


--------------------------------------------------------------------------------
/algo/ppo_transformer/__pycache__/ppo_transformer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/ppo_transformer/__pycache__/ppo_transformer.cpython-37.pyc


--------------------------------------------------------------------------------
/algo/ppo_transformer/__pycache__/ppobc_transformer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/ppo_transformer/__pycache__/ppobc_transformer.cpython-37.pyc


--------------------------------------------------------------------------------
/algo/ppo_transformer/experience.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # In-Hand Object Rotation via Rapid Motor Adaptation
  3 | # https://arxiv.org/abs/2210.04887
  4 | # Copyright (c) 2022 Haozhi Qi
  5 | # Licensed under The MIT License [see LICENSE for details]
  6 | # --------------------------------------------------------
  7 | # Based on: RLGames
  8 | # Copyright (c) 2019 Denys88
  9 | # Licence under MIT License
 10 | # https://github.com/Denys88/rl_games/
 11 | # --------------------------------------------------------
 12 | 
 13 | import gym
 14 | import torch
 15 | from torch.utils.data import Dataset
 16 | import utils.pytorch_utils as ptu 
 17 | from termcolor import cprint 
 18 | 
 19 | def transform_op(arr):
 20 |     """
 21 |     swap and then flatten axes 0 and 1
 22 |     """
 23 |     if arr is None:
 24 |         return arr
 25 |     s = arr.size()
 26 |     return arr.transpose(0, 1).reshape(s[0] * s[1], *s[2:])
 27 | 
 28 | 
 29 | class ExperienceBuffer(Dataset):
 30 |     def __init__(self, num_envs, 
 31 |                  horizon_length, 
 32 |                  batch_size, 
 33 |                  minibatch_size, 
 34 |                  num_gradient_steps, 
 35 |                  obs_dim, 
 36 |                  proprio_dim,
 37 |                  act_dim,
 38 |                  pc_num,
 39 |                  ctx_len, 
 40 |                  device):
 41 |         
 42 |         self.device = device
 43 |         self.num_envs = num_envs
 44 |         self.max_ep_len = horizon_length
 45 | 
 46 |         self.data_dict = None
 47 |         self.obs_dim = obs_dim
 48 |         self.proprio_dim = proprio_dim
 49 |         self.act_dim = act_dim
 50 |         self.ctx_len = ctx_len 
 51 |         self.pc_num = pc_num 
 52 |         self.storage_dict = {
 53 |             'obses': torch.zeros((self.max_ep_len, self.num_envs, self.obs_dim), dtype=torch.float32, device=self.device),
 54 |             'proprio_buf': torch.zeros((self.max_ep_len,self.num_envs, self.ctx_len, self.proprio_dim),dtype=torch.float32, device=self.device),
 55 |             'pc_buf': torch.zeros((self.max_ep_len,self.num_envs, self.ctx_len, self.pc_num,3),dtype=torch.float32, device=self.device),
 56 |             'action_buf': torch.zeros((self.max_ep_len,self.num_envs, self.ctx_len-1, self.act_dim),dtype=torch.float32, device=self.device),
 57 |             # 'priv_info': torch.zeros((self.self.max_ep_len, self.num_envs, self.priv_dim), dtype=torch.float32, device=self.device),
 58 |             'attn_mask': torch.zeros((self.max_ep_len, self.num_envs, self.ctx_len), dtype=torch.float32, device=self.device),
 59 |             'timesteps': -1*torch.ones((self.max_ep_len, self.num_envs, self.ctx_len), dtype=torch.float32, device=self.device),
 60 |             'rewards': torch.zeros((self.max_ep_len, self.num_envs, 1), dtype=torch.float32, device=self.device),
 61 |             'values': torch.zeros((self.max_ep_len, self.num_envs,  1), dtype=torch.float32, device=self.device),
 62 |             'neglogpacs': torch.zeros((self.max_ep_len, self.num_envs), dtype=torch.float32, device=self.device),
 63 |             'dones': torch.zeros((self.max_ep_len, self.num_envs), dtype=torch.uint8, device=self.device),
 64 |             'actions': torch.zeros((self.max_ep_len, self.num_envs, self.act_dim), dtype=torch.float32, device=self.device),
 65 |             'mus': torch.zeros((self.max_ep_len, self.num_envs, self.act_dim), dtype=torch.float32, device=self.device),
 66 |             'sigmas': torch.zeros((self.max_ep_len, self.num_envs, self.act_dim), dtype=torch.float32, device=self.device),
 67 |             'returns': torch.zeros((self.max_ep_len, self.num_envs,  1), dtype=torch.float32, device=self.device),
 68 |         }
 69 | 
 70 |         self.batch_size = batch_size
 71 |         self.length = self.num_gradient_steps = num_gradient_steps
 72 | 
 73 |         if self.length < self.max_ep_len:
 74 |             cprint('Warning: length of buffer is less than max_ep_len, full data is not getting used', 'red')
 75 |         self.minibatch_size = minibatch_size 
 76 |         
 77 |     def __len__(self):
 78 |         return self.length
 79 | 
 80 |     def __getitem__(self, idx):
 81 |         start = idx * self.minibatch_size 
 82 |         end = (idx + 1) * self.minibatch_size
 83 | 
 84 |         self.last_range = (start, end)
 85 |         input_dict = {}
 86 |         for k, v in self.data_dict.items():
 87 |             if type(v) is dict:
 88 |                 v_dict = {kd: vd[start:end] for kd, vd in v.items()}
 89 |                 input_dict[k] = v_dict
 90 |             else:
 91 |                 input_dict[k] = v[start:end]
 92 |         
 93 |         return input_dict['values'], input_dict['neglogpacs'],  input_dict['advantages'], input_dict['mus'], \
 94 |                input_dict['sigmas'], input_dict['returns'],     input_dict['actions'], \
 95 |                input_dict['obses'],  input_dict['proprio_buf'], input_dict['pc_buf'], input_dict['action_buf'], \
 96 |                input_dict['attn_mask'], input_dict['timesteps']
 97 | 
 98 | 
 99 |     def update_mu_sigma(self, mu, sigma):
100 |         start = self.last_range[0]
101 |         end = self.last_range[1]
102 |         self.data_dict['mus'][start:end] = mu
103 |         self.data_dict['sigmas'][start:end] = sigma
104 | 
105 |     def update_data(self, name, index, val):
106 |         if type(val) is dict:
107 |             for k, v in val.items():
108 |                 self.storage_dict[name][k][index,:] = v
109 |         else:
110 |             self.storage_dict[name][index,:] = val
111 | 
112 |     def compute_return(self, last_values, gamma, tau):
113 |         last_gae_lam = 0
114 |         mb_advs = torch.zeros_like(self.storage_dict['rewards'])
115 |         for t in reversed(range(self.max_ep_len)):
116 |             if t == self.max_ep_len - 1:
117 |                 next_values = last_values
118 |             else:
119 |                 next_values = self.storage_dict['values'][t + 1]
120 |             next_nonterminal = 1.0 - self.storage_dict['dones'].float()[t]
121 |             next_nonterminal = next_nonterminal.unsqueeze(1)
122 |             delta = self.storage_dict['rewards'][t] + gamma * next_values * next_nonterminal - self.storage_dict['values'][t]
123 |             mb_advs[t] = last_gae_lam = delta + gamma * tau * next_nonterminal * last_gae_lam
124 |             self.storage_dict['returns'][t, :] = mb_advs[t] + self.storage_dict['values'][t] #why?
125 | 
126 |     def prepare_training(self):
127 |         self.data_dict = {}
128 |         for k, v in self.storage_dict.items():
129 |             self.data_dict[k] = transform_op(v)
130 |         advantages = self.data_dict['returns'] - self.data_dict['values']
131 |         self.data_dict['advantages'] = ((advantages - advantages.mean()) / (advantages.std() + 1e-8)).squeeze(1)
132 |         return self.data_dict
133 | 
134 | 
135 | 
136 |     def get_info(self):
137 |         buffer_info = {
138 |             'AverageReward' : ptu.to_numpy(self.storage_dict['rewards'].mean()),
139 |             'AverageReturn' : ptu.to_numpy(self.storage_dict['returns'].mean()),
140 |         }
141 | 
142 |         return buffer_info 
143 | 


--------------------------------------------------------------------------------
/algo/pretrained/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/pretrained/__init__.py


--------------------------------------------------------------------------------
/algo/pretrained/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/pretrained/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/algo/pretrained/__pycache__/policy_transformer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/pretrained/__pycache__/policy_transformer.cpython-37.pyc


--------------------------------------------------------------------------------
/algo/pretrained/__pycache__/robot_transformer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/pretrained/__pycache__/robot_transformer.cpython-37.pyc


--------------------------------------------------------------------------------
/algo/pretrained/__pycache__/robot_transformer_ar.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/pretrained/__pycache__/robot_transformer_ar.cpython-37.pyc


--------------------------------------------------------------------------------
/algo/pretrained/__pycache__/transformer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/pretrained/__pycache__/transformer.cpython-37.pyc


--------------------------------------------------------------------------------
/algo/pretrained/dataset.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.utils.data import Dataset, DataLoader
 3 | import os 
 4 | import pickle as pkl 
 5 | from termcolor import cprint
 6 | class TrajectoryDataset(Dataset):
 7 | 
 8 |     def __init__(self, root,ctx_length=64,device='cuda'):
 9 |         """
10 |         Args:
11 |             data (Any): Your dataset (e.g., images, files, tensors).
12 |             targets (Any): The labels or targets associated with your data.
13 |             transform (callable, optional): Optional transform to be applied on a sample.
14 |         """
15 |         super(TrajectoryDataset, self).__init__()
16 |         self.root = root 
17 |         self.device = device 
18 |         #assuming not many files in the directory 
19 |         self.episodes = [pkl.load(open(os.path.join(root,episode),'rb')) for episode in os.listdir(root)]
20 |         self.ctx = ctx_length
21 |         self.ep_lens = torch.tensor([(len(episode)- self.ctx+1) for episode in self.episodes])
22 |         self.cumsum = torch.cumsum(self.ep_lens,0)
23 |         self.visualise()
24 | 
25 |     def visualise(self):
26 |         """
27 |         Visualise the dataset.
28 |         """
29 |         cprint(f"Number of episodes: {len(self.episodes)}",color='green',attrs=['bold'])
30 |         cprint(f"Number of examples: {torch.sum(self.ep_lens)}",color='green',attrs=['bold'])
31 |         cprint(f"Proprio dimension: {len(self.episodes[0]['robot_state'][0])}",color='green',attrs=['bold'])
32 |         cprint(f"Action dimension: {len(self.episodes[0]['action'][0])}",color='green',attrs=['bold'])
33 | 
34 |     def __len__(self):
35 |         """Returns the size of the dataset."""
36 |         return torch.sum(self.ep_lens).item()
37 | 
38 |     def __getitem__(self, index):
39 |         """
40 |         Generates one sample of data.
41 |         
42 |         Args:
43 |             index (int): The index of the item in the dataset
44 |         
45 |         Returns:
46 |             sample (Any): The data sample corresponding to the given index.
47 |             target (Any): The target corresponding to the given data sample.
48 |         """
49 | 
50 |         ep_idx = torch.searchsorted(self.cumsum, index, right=True)
51 |         ep = self.episodes[ep_idx]
52 |         idx = index - torch.sum(self.ep_lens[:ep_idx])
53 |         return {
54 |             'state': torch.tensor(ep['robot_state'][idx:idx+self.ctx]).to(self.device),
55 |             'action': torch.tensor(ep['action'][idx:idx+self.ctx]).to(self.device),
56 |             'timesteps': torch.tensor(torch.arange(idx,idx+self.ctx)).to(self.device),
57 |         }
58 | 
59 | 
60 | 
61 | def collate_fn(batch):
62 | 
63 |     state = torch.stack([torch.tensor(item['state']) for item in batch])
64 |     action = torch.stack([torch.tensor(item['action']) for item in batch])
65 |     timesteps = torch.stack([torch.tensor(item['timesteps']) for item in batch])
66 |     attention_mask = None 
67 | 
68 |     return state, action, timesteps, attention_mask 
69 | 
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/algo/pretrained/depth_trainer.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | import torch
  4 | import time
  5 | from torch.utils.data import DataLoader
  6 | import os 
  7 | from datetime import datetime 
  8 | import wandb 
  9 | import tqdm
 10 | from torch.nn.parallel import DistributedDataParallel as DDP
 11 | from termcolor import cprint 
 12 | class DepthTrainer:
 13 | 
 14 |     def __init__(self, 
 15 |                  model, 
 16 |                  collate_fn, 
 17 |                  optimizer, 
 18 |                  loss_fn, 
 19 |                  model_save_dir, 
 20 |                  train_dataloader,
 21 |                  val_dataset=None, 
 22 |                  config=None, 
 23 |                  scheduler=None, 
 24 |                  eval_fns=None, 
 25 |                  logger=None, 
 26 |                  rank=0, 
 27 |                  world_size=1, 
 28 |                  device='cuda'):
 29 | 
 30 |         self.model = model
 31 |         self.device = device
 32 |         self.optimizer = optimizer
 33 |         self.batch_size = config.pretrain.training.batch_size
 34 |         self.val_dataset = val_dataset
 35 |         self.collate_fn = collate_fn
 36 |         self.loss_fn = loss_fn
 37 |         self.scheduler = scheduler
 38 |         self.save_dir = model_save_dir
 39 |         self.rank = rank 
 40 |         self.world_size = world_size 
 41 |         self.eval_fns = [] if eval_fns is None else eval_fns
 42 |         self.diagnostics = dict()
 43 |         self.logger = logger
 44 |         self.saved_model_number = 0
 45 |         self.add_proprio_noise = config.pretrain.training.add_proprio_noise
 46 |         self.add_action_noise = config.pretrain.training.add_action_noise
 47 |         num_workers = config.pretrain.training.num_workers #add this to bash file 
 48 |         self.log_freq = config.pretrain.training.log_freq
 49 |         self.model_save_freq = config.pretrain.training.model_save_freq
 50 |         # create a dataloader
 51 |         self.train_dataloader = train_dataloader 
 52 | 
 53 |         self.start_time = time.time()
 54 | 
 55 |     def train_epoch(self, iter_num=0, print_logs=False):
 56 | 
 57 |         train_losses = []
 58 |         train_losses_action = []
 59 |         logs = dict()
 60 | 
 61 |         train_start = time.time()
 62 | 
 63 |         self.model.train()
 64 |         
 65 |         for i, batch in enumerate(tqdm.tqdm(self.train_dataloader)):
 66 |             
 67 |             proprio, depth , actions, timesteps, attention_mask = batch
 68 |             batch = proprio.to(self.device), depth.to(self.device), \
 69 |                 actions.to(self.device), timesteps.to(self.device), \
 70 |                 attention_mask.to(self.device) if attention_mask is not None else None
 71 | 
 72 |             train_loss = self.train_step(batch)
 73 | 
 74 |             train_losses_action.append(train_loss['action'])
 75 |             train_losses.append(train_loss['full'])
 76 | 
 77 |             if self.scheduler is not None:
 78 |                 self.scheduler.step()
 79 |             
 80 |             if self.logger is not None and  i % self.log_freq == 0:
 81 |                 logs['time/training'] = time.time() - train_start
 82 |                 logs['time/total'] = time.time() - self.start_time
 83 |                 logs['optimizer/lr'] = self.optimizer.param_groups[0]['lr']
 84 |                 global_step = iter_num * len(self.train_dataloader) + i
 85 |                 self.logger.log_dict(logs, global_step)
 86 |                 logs['training/train_loss_mean'] = np.mean(train_losses)
 87 |                 logs['training/train_loss_std'] = np.std(train_losses)
 88 |                 logs['training/train_loss_action_mean'] = np.mean(train_losses_action)
 89 |                 logs['training/train_loss_action_std'] = np.std(train_losses_action)
 90 | 
 91 |             global_step = iter_num * len(self.train_dataloader) + i
 92 |             if self.save_dir is not None and global_step % self.model_save_freq == 0:
 93 |                 torch.save(self.model.state_dict(), os.path.join(self.save_dir, f'model_step_{global_step}.pt'))
 94 |                 self.saved_model_number += 1
 95 |             
 96 |             #if self.save_dir is not None and global_step % self.model_save_freq == 0:
 97 |             #torch.save(self.model.state_dict(), os.path.join(self.save_dir, f'model_step_{global_step}.pt'))
 98 | 
 99 |             if print_logs and i % self.log_freq == 0:
100 |                 for k in self.diagnostics:
101 |                     logs[k] = self.diagnostics[k]
102 |                 print('=' * 80)
103 |                 print(f'Iteration {iter_num}')
104 |                 for k, v in logs.items():
105 |                     print(f'{k}: {v}')
106 | 
107 |         return logs
108 |     
109 |     def train_step(self,batch):
110 |         
111 |         proprio, depth, actions, timesteps, attention_mask = batch
112 | 
113 | 
114 | 
115 |         action_target = torch.clone(actions)
116 |         
117 |         if self.add_proprio_noise:
118 |             noise = torch.zeros_like(proprio)
119 |             noise[...,:7] = torch.randn_like(proprio[...,:7])*0.1 #self.noise_arm
120 |             noise[...,7:] = torch.randn_like(proprio[...,7:])*0.1 #self.noise_hand
121 |             proprio = proprio + noise
122 | 
123 | 
124 |         action_preds, _ = self.model.forward(proprio,depth,timesteps,attention_mask)
125 |        
126 |         act_dim = action_preds.shape[2]
127 | 
128 |         if attention_mask is not None:
129 |             action_preds = action_preds.reshape(-1, act_dim)[attention_mask.reshape(-1) > 0]
130 |             action_target = action_target.reshape(-1, act_dim)[attention_mask.reshape(-1) > 0]
131 | 
132 | 
133 |         loss_action = self.loss_fn(action_preds, action_target)
134 | 
135 |         loss = loss_action
136 | 
137 |         self.optimizer.zero_grad()
138 |         loss.backward()
139 |         torch.nn.utils.clip_grad_norm_(self.model.parameters(), .25)
140 |         self.optimizer.step()
141 | 
142 |         with torch.no_grad():
143 |             self.diagnostics['training/action_error'] = loss_action.detach().cpu().item()
144 |         
145 |         return_dict = {'action': loss_action.detach().cpu().item(),
146 |                         'full': loss.detach().cpu().item()
147 |                         }
148 |         
149 |         return return_dict 
150 | 
151 | 


--------------------------------------------------------------------------------
/algo/pretrained/depth_trainer_multigpu.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | import torch
  4 | import time
  5 | from torch.utils.data import DataLoader
  6 | import os 
  7 | from datetime import datetime 
  8 | import wandb 
  9 | import tqdm
 10 | from torch.nn.parallel import DistributedDataParallel as DDP
 11 | from termcolor import cprint 
 12 | 
 13 | class MultiGPUTrainer:
 14 | 
 15 |     def __init__(self, 
 16 |                  model, 
 17 |                  train_dataset, 
 18 |                  collate_fn, 
 19 |                  loss_fn, 
 20 |                  model_save_dir, 
 21 |                  rank,
 22 |                  world_size,
 23 |                  val_dataset=None, 
 24 |                  config=None, 
 25 |                  scheduler=None, 
 26 |                  eval_fns=None, 
 27 |                  logger=None, 
 28 |                  device='cuda'):
 29 | 
 30 |         self.model = model
 31 |         self.rank = rank 
 32 |         self.world_size = world_size 
 33 |         if self.world_size > 1:
 34 |             self.device = f'cuda:{self.rank}'
 35 |             self.model = self.model.to(self.device)
 36 |             self.ddp_model = DDP(self.model, device_ids=[self.rank], output_device=self.rank)
 37 |             self.optimizer = torch.optim.Adam(self.ddp_model.parameters(), lr=config.pretrain.training.lr*config.num_gpus,weight_decay=config.pretrain.training.weight_decay)
 38 |         else:
 39 |             self.device = device 
 40 |             self.model = self.model.to(self.device)
 41 |             self.optimizer = torch.optim.Adam(self.model.parameters(), lr=config.pretrain.training.lr,weight_decay=config.pretrain.training.weight_decay)
 42 | 
 43 |         self.batch_size = config.pretrain.training.batch_size
 44 |         self.train_dataset = train_dataset
 45 |         self.val_dataset = val_dataset
 46 |         self.collate_fn = collate_fn
 47 |         self.loss_fn = loss_fn
 48 |         self.scheduler = scheduler
 49 |         self.save_dir = model_save_dir
 50 |         self.eval_fns = [] if eval_fns is None else eval_fns
 51 |         self.diagnostics = dict()
 52 |         self.logger = logger
 53 | 
 54 |         self.saved_model_number = 0
 55 |         self.action_input = config.pretrain.model.action_input
 56 |         self.add_proprio_noise = config.pretrain.training.add_proprio_noise
 57 |         self.add_action_noise = config.pretrain.training.add_action_noise
 58 |         self.num_workers = config.pretrain.training.num_workers
 59 |         self.log_freq = config.pretrain.training.log_freq
 60 |         self.noise_arm = config.pretrain.training.noise_arm
 61 |         self.noise_hand = config.pretrain.training.noise_hand
 62 |         self.model_save_freq = config.pretrain.training.model_save_freq
 63 | 
 64 | 
 65 |         if self.world_size > 1:
 66 |             sampler = torch.utils.data.distributed.DistributedSampler(self.train_dataset, num_replicas=world_size, rank=rank)
 67 |             self.train_dataloader = DataLoader(self.train_dataset, 
 68 |                                                batch_size=self.batch_size, 
 69 |                                                num_workers=self.num_workers, 
 70 |                                                collate_fn=self.collate_fn, 
 71 |                                                sampler=sampler)
 72 |             if self.val_dataset is not None:
 73 |                 sampler = torch.utils.data.distributed.DistributedSampler(self.val_dataset, 
 74 |                                                                           num_replicas=world_size, 
 75 |                                                                           rank=rank)
 76 |                 
 77 |                 self.val_dataloader = DataLoader(self.val_dataset, 
 78 |                                                  batch_size=self.batch_size, 
 79 |                                                  num_workers=self.num_workers,
 80 |                                                  collate_fn=self.collate_fn, 
 81 |                                                  sampler=sampler)
 82 |         else:
 83 |             # create a dataloader
 84 |             print('Creating dataloader')
 85 |             self.train_dataloader = DataLoader(self.train_dataset, 
 86 |                                                batch_size=self.batch_size, 
 87 |                                                num_workers=self.num_workers,
 88 |                                                shuffle=True, 
 89 |                                                collate_fn=self.collate_fn)
 90 | 
 91 |             if self.val_dataset is not None:
 92 |                 self.val_dataloader = DataLoader(self.val_dataset, 
 93 |                                                  batch_size=self.batch_size, 
 94 |                                                  num_workers=self.num_workers,
 95 |                                                  shuffle=False, 
 96 |                                                  collate_fn=self.collate_fn)
 97 | 
 98 |         self.start_time = time.time()
 99 | 
100 |     def train_epoch(self, iter_num=0, print_logs=False):
101 | 
102 |         train_losses, train_losses_action = [], []
103 |         logs = dict()
104 | 
105 |         train_start = time.time()
106 | 
107 |         if self.world_size > 1:
108 |             self.ddp_model.train()
109 |         self.model.train()
110 | 
111 |         if self.world_size > 1:
112 |             self.train_dataloader.sampler.set_epoch(iter_num)
113 | 
114 |         for i, batch in enumerate(tqdm.tqdm(self.train_dataloader)):
115 | 
116 |             proprio, depth, actions, timesteps, attention_mask = batch
117 |             batch = proprio.to(self.device), depth.to(self.device), \
118 |                 actions.to(self.device), timesteps.to(self.device), \
119 |                 attention_mask.to(self.device) if attention_mask is not None else None
120 | 
121 | 
122 |             print(self.device)
123 |             train_loss = self.train_step(batch)
124 | 
125 |             print(train_loss)
126 | 
127 |             train_losses_action.append(train_loss['action'])
128 |             train_losses.append(train_loss['full'])
129 | 
130 |             if self.scheduler is not None:
131 |                 self.scheduler.step()
132 |             
133 | 
134 |             if self.world_size > 1:
135 |                 torch.distributed.barrier()
136 |             
137 |             if self.logger is not None and  i % self.log_freq == 0 and (self.world_size == 1 or self.rank==0):
138 |                 logs['time/training'] = time.time() - train_start
139 |                 logs['time/total'] = time.time() - self.start_time
140 |                 logs['optimizer/lr'] = self.optimizer.param_groups[0]['lr']
141 |                 global_step = iter_num * len(self.train_dataloader) + i
142 |                 self.logger.log_dict(logs, global_step)
143 |                 logs['training/train_loss_mean'] = np.mean(train_losses)
144 |                 logs['training/train_loss_std'] = np.std(train_losses)
145 |                 logs['training/train_loss_action_mean'] = np.mean(train_losses_action)
146 |                 logs['training/train_loss_action_std'] = np.std(train_losses_action)
147 |             
148 |             if self.save_dir is not None and i % self.model_save_freq == 0 and (self.world_size == 1 or self.rank==0):
149 |                 torch.save(self.model.state_dict(), os.path.join(self.save_dir, f'last.pt'))
150 |                 self.saved_model_number += 1
151 |             
152 |             if self.save_dir is not None and i % 5000 == 0 and (self.world_size == 1 or self.rank==0):
153 |                 global_step = iter_num * len(self.train_dataloader) + i
154 |                 torch.save(self.model.state_dict(), os.path.join(self.save_dir, f'model_step_{global_step}.pt'))
155 | 
156 |             if print_logs and i % self.log_freq == 0 and (self.world_size == 1 or self.rank==0):
157 |                 for k in self.diagnostics:
158 |                     logs[k] = self.diagnostics[k]
159 |                 print('=' * 80)
160 |                 print(f'Iteration {iter_num}')
161 |                 for k, v in logs.items():
162 |                     print(f'{k}: {v}')
163 |         return logs
164 | 
165 |     def train_step(self, batch):
166 | 
167 |         proprio, depth, actions, timesteps, attention_mask = batch 
168 | 
169 |         action_target = torch.clone(actions)
170 |         
171 |         if self.add_proprio_noise:
172 |             noise = torch.zeros_like(proprio)
173 |             noise[...,:7] = torch.randn_like(proprio[...,:7])*self.noise_arm
174 |             noise[...,7:] = torch.randn_like(proprio[...,7:])*self.noise_hand
175 |             proprio = proprio + noise
176 | 
177 | 
178 |         if self.world_size > 1:
179 |             action_preds, _ = self.ddp_model.forward(
180 |                         proprio, depth, timesteps=timesteps, attention_mask=attention_mask,)
181 |         
182 |         else:
183 |             action_preds, _ = self.model.forward(
184 |                         proprio, depth, timesteps=timesteps, attention_mask=attention_mask,)
185 |         
186 | 
187 |         act_dim = action_preds.shape[2]
188 | 
189 |         if attention_mask is not None:
190 |             action_preds = action_preds.reshape(-1, act_dim)[attention_mask.reshape(-1) > 0]
191 |             action_target = action_target.reshape(-1, act_dim)[attention_mask.reshape(-1) > 0]
192 | 
193 | 
194 |         loss_action = self.loss_fn(action_preds, action_target)
195 |         loss = loss_action
196 | 
197 | 
198 |         self.optimizer.zero_grad()
199 |         loss.backward()
200 |         if self.world_size > 1:
201 |             torch.nn.utils.clip_grad_norm_(self.ddp_model.parameters(), .25)
202 |         else:
203 |             torch.nn.utils.clip_grad_norm_(self.model.parameters(), .25)
204 | 
205 |         self.optimizer.step()
206 | 
207 |         with torch.no_grad():
208 |             self.diagnostics['training/action_error'] = loss_action.detach().cpu().item()
209 |         
210 |         return_dict = {'action': loss_action.detach().cpu().item(),
211 |                         'full': loss.detach().cpu().item()
212 |                         }
213 |         
214 |         return return_dict 
215 | 


--------------------------------------------------------------------------------
/algo/pretrained/robot_dataset.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.utils.data import Dataset, DataLoader
  3 | import os 
  4 | import pickle as pkl 
  5 | from termcolor import cprint
  6 | import numpy as np
  7 | class RobotDataset(Dataset):
  8 | 
  9 |     def __init__(self, root=None, cfg=None):
 10 |         """
 11 |         Args:
 12 |             data (Any): Your dataset (e.g., images, files, tensors).
 13 |             targets (Any): The labels or targets associated with your data.
 14 |             transform (callable, optional): Optional transform to be applied on a sample.
 15 |         """
 16 |         assert root is not None, "Please provide the root directory of the dataset"
 17 |         assert os.path.exists(root), f"The directory {root} does not exist"
 18 |         super(RobotDataset, self).__init__()
 19 |         self.root = root 
 20 |         print(f"Loading dataset from {root}")
 21 |         self.device = cfg.pretrain.device
 22 |         self.ctx = cfg.pretrain.model.context_length
 23 |         self.scale_action = cfg.pretrain.model.scale_action
 24 |         self.scale_proprio = cfg.pretrain.model.scale_proprio
 25 |         # set variable to store the episodes
 26 |         self.episodes_npy = []
 27 |         self.ep_lens = []
 28 |         # self.dt = kwargs.get('dt', 0.008333) # 120 Hz
 29 |         # self.dt = np.float32(self.dt)
 30 |         self.use_residuals = cfg.pretrain.training.use_residuals
 31 |         # get all folders of depth 2 in the directory
 32 |         subjects_dir = [os.path.join(root,episode) for episode in os.listdir(root) if os.path.isdir(os.path.join(root,episode))]
 33 |         # get all subfolders of depth 2 in subjects_dir
 34 |         self.episodes_dir = [os.path.join(subject,episode) for subject in subjects_dir for episode in os.listdir(subject) if os.path.isdir(os.path.join(subject,episode))]
 35 |         self.episodes_dir = sorted(self.episodes_dir)
 36 | 
 37 |         assert len(self.episodes_dir) > 0, f"No episodes found in the directory {root}"
 38 |         # load all the episodes
 39 |         for episode in self.episodes_dir:
 40 |             self.load_episode_fnames(episode)
 41 | 
 42 |         assert len(self.episodes_npy) > 0, f"No trajectories found in the directory {root}"
 43 |         # save the min, max, and mean of the episode lengths
 44 |         self.min_ep_len = np.min(self.ep_lens)
 45 |         self.max_ep_len = np.max(self.ep_lens)
 46 |         self.mean_ep_len = np.mean(self.ep_lens)
 47 |         cprint(f"Min episode length: {self.min_ep_len}, Max episode length: {self.max_ep_len}, Mean episode length: {self.mean_ep_len}",color='cyan',attrs=['bold'])
 48 |         self.ep_lens = torch.tensor(self.ep_lens)
 49 |         self.cumsum = torch.cumsum(self.ep_lens,0)
 50 |         self.visualise()
 51 | 
 52 |         # IG lower and upper limits
 53 |         self.limits = {'upper': [6.2832, 2.0944, 6.2832, 3.9270, 6.2832, 3.1416, 6.2832, 0.4700, 1.6100, 1.7090, 1.6180, 1.3960,
 54 |                                   1.1630, 1.6440, 1.7190, 0.4700, 1.6100, 1.7090, 1.6180, 0.4700, 1.6100, 1.7090, 1.6180],
 55 |                        'lower': [-6.2832, -2.0590, -6.2832, -0.1920, -6.2832, -1.6930, -6.2832, -0.4700, -0.1960, -0.1740, -0.2270,
 56 |                                   0.2630, -0.1050, -0.1890, -0.1620, -0.4700, -0.1960, -0.1740, -0.2270, -0.4700, -0.1960, -0.1740, -0.2270]}
 57 | 
 58 |         
 59 |         self.limits['upper'] = np.array(self.limits['upper']).astype(np.float32)
 60 |         self.limits['lower'] = np.array(self.limits['lower']).astype(np.float32)    
 61 | 
 62 | 
 63 |     def load_episode_fnames(self, episode_dir:str):
 64 |         """
 65 |         Load the episodes filenames.
 66 |         """
 67 |         for episode_fname in sorted(os.listdir(episode_dir)):
 68 |             # continue if the file is not a npy file
 69 |             if not episode_fname.endswith('.npy'):
 70 |                 continue
 71 |             ep = np.load(os.path.join(episode_dir,episode_fname), allow_pickle=True).item()
 72 |             self.episodes_npy.append(ep)
 73 |             # load the file and get the length
 74 |             eplen = len(ep['robot_qpos']) - self.ctx + 1
 75 | 
 76 |             assert eplen > 0, f"Episode length is less than the context length {self.ctx}"
 77 |             
 78 |             self.ep_lens.append(eplen)
 79 | 
 80 |     def scale_q(self, q):
 81 |         """
 82 |         Scale the proprioceptive data to be between -1 and 1.
 83 |         """
 84 |         q = (q - self.limits['lower']) / (self.limits['upper'] - self.limits['lower'])
 85 |         q = 2 * q - 1
 86 |         return q
 87 |     
 88 |     def change_order(self, q):
 89 |         IG_mapping = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 19, 20, 21, 22, 11, 12, 13, 14, 15, 16, 17, 18]
 90 |         return q[:,IG_mapping]
 91 | 
 92 |     def visualise(self):
 93 |         """
 94 |         Visualise the dataset.
 95 |         """
 96 |         cprint(f"Number of episodes: {len(self.episodes_npy)}",color='green',attrs=['bold'])
 97 |         cprint(f"Number of examples: {torch.sum(self.ep_lens)}",color='green',attrs=['bold'])
 98 |         # Load the first episode to get the dimension of the proprio and action
 99 |         ep = self.episodes_npy[0]
100 |         cprint(f"Proprio dimension: {len(ep['robot_qpos'][0])}",color='green',attrs=['bold'])
101 |         cprint(f"Action dimension: {len(ep['target_qpos'][0])}",color='green',attrs=['bold'])
102 | 
103 |     def __len__(self):
104 |         """Returns the size of the dataset."""
105 |         return torch.sum(self.ep_lens).item()
106 | 
107 |     def __getitem__(self, index):
108 |         """
109 |         Generates one sample of data.
110 |         
111 |         Args:
112 |             index (int): The index of the item in the dataset
113 |         
114 |         Returns:
115 |             sample (Any): The data sample corresponding to the given index.
116 |             target (Any): The target corresponding to the given data sample.
117 |         """
118 | 
119 |         ep_idx = torch.searchsorted(self.cumsum, index, right=True) 
120 |         # open the pickle file
121 |         idx = index - torch.sum(self.ep_lens[:ep_idx])
122 |         ep = self.episodes_npy[ep_idx]
123 |         action_npy = np.stack(ep['target_qpos'][idx:idx+self.ctx])
124 |         proprio_npy = np.stack(ep['robot_qpos'][idx:idx+self.ctx])
125 |         # Put in IG order
126 |         action = self.change_order(action_npy)
127 |         proprio = self.change_order(proprio_npy)
128 |         # Scale the proprioceptive data in [-1,1]
129 |         # For the first 7 elements of the action vector, predict the residual with respect to the previous action
130 |         if self.use_residuals:
131 |             action_res = np.concatenate([np.zeros((1,action.shape[1])), np.diff(action, axis=0)], axis=0)
132 |             action_res[0] = action[0] - proprio[0]
133 |             action_res = action_res.astype(np.float32)
134 |             action = action_res / self.dt
135 | 
136 |         if self.scale_proprio:
137 |             proprio = self.scale_q(proprio)
138 |         if self.scale_action:
139 |             action = self.scale_q(action)
140 |         
141 |         obj_pc = np.stack(ep['object_pc'][idx:idx+self.ctx])
142 | 
143 |         return {
144 |             'proprio': proprio,
145 |             'action': action,
146 |             'obj_pc': obj_pc,
147 |             'timesteps': np.arange(self.ctx),
148 |         }
149 | 
150 | 
151 | def collate_fn(batch):
152 | 
153 |     proprio = np.stack([item['proprio'] for item in batch])
154 |     object_pc = np.stack([item['obj_pc'] for item in batch])
155 |     action = np.stack([item['action'] for item in batch])
156 |     timesteps = np.stack([item['timesteps'] for item in batch])
157 |     attention_mask = None 
158 | 
159 |     proprio = torch.tensor(proprio, dtype=torch.float32, requires_grad=False)
160 |     object_pc = torch.tensor(object_pc, dtype=torch.float32, requires_grad=False)
161 |     action = torch.tensor(action, dtype=torch.float32, requires_grad=False)
162 |     timesteps = torch.tensor(timesteps, dtype=torch.long, requires_grad=False)
163 | 
164 |     return proprio, object_pc, action, timesteps, attention_mask 
165 | 


--------------------------------------------------------------------------------
/cfg/config.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | # Task name - used to pick the class to load
 3 | task_name: ${task.name}
 4 | teacher_mode: False
 5 | pc_input: True
 6 | #shape 
 7 | shape: ""
 8 | # if set to positive integer, overrides the default number of environments
 9 | num_envs: 4096
10 | # seed - set to -1 to choose random seed
11 | seed: 0
12 | # set to True for deterministic performance
13 | torch_deterministic: False
14 | 
15 | # set the maximum number of learning iterations to train for. overrides default per-environment setting
16 | max_iterations: ''
17 | 
18 | ## Device config
19 | #  'physx' or 'flex'
20 | physics_engine: 'physx'
21 | # whether to use cpu or gpu pipeline
22 | pipeline: 'cpu'
23 | num_gpus: 1 # if 1, it will only use the gpu indicated below. Otherwise it will use num_gpus in order starting from zero (ignoring the gpu config below)
24 | # device for running physics simulation
25 | sim_device: 'cpu'
26 | # device to run RL
27 | rl_device: 'cpu'
28 | graphics_device_id: 0
29 | 
30 | ## PhysX arguments
31 | num_threads: 4 # Number of worker threads per scene used by PhysX - for CPU PhysX only.
32 | solver_type: 1 # 0: pgs, 1: tgs
33 | num_subscenes: 4 # Splits the simulation into N physics scenes and runs each one in a separate thread
34 | 
35 | 
36 | # RLGames Arguments
37 | # test - if set, run policy in inference mode (requires setting checkpoint to load)
38 | test: False
39 | track_pose: False 
40 | get_target_reference: False
41 | get_target_traj: False  
42 | # save_jit - if Yes, it will save the Jit for execution on a real robot
43 | save_jit: False
44 | # used to set checkpoint path
45 | checkpoint: ''
46 | dagger_checkpoint: ''
47 | # set sigma when restoring network
48 | sigma: ''
49 | # set to True to use multi-gpu training
50 | multi_gpu: False
51 | 
52 | wandb_activate: False
53 | wandb_group: ''
54 | wandb_name: AllegroKukaGraspingTest
55 | wandb_entity: 'himanshu_singh'
56 | wandb_project: 'isaacgym'
57 | wandb_tags: []
58 | wandb_logcode_dir: '' 
59 | 
60 | capture_video: False
61 | capture_video_freq: 1464
62 | capture_video_len: 100
63 | force_render: True
64 | 
65 | # disables rendering
66 | headless: True
67 | 
68 | # set default task and default training config based on task
69 | defaults:
70 |   - _self_
71 |   - task: AllegroXarmNew
72 |   - train: ${task}PPO
73 |   - pretrain: ${task}
74 | 
75 | # set the directory where the output files get saved
76 | hydra:
77 |   output_subdir: null
78 |   run:
79 |     dir: .
80 | 


--------------------------------------------------------------------------------
/cfg/launcher/default.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/cfg/launcher/default.yaml


--------------------------------------------------------------------------------
/cfg/pretrain/AllegroXarmCabinet.yaml:
--------------------------------------------------------------------------------
 1 | device: cuda:0
 2 | wandb_name: "Pretrain_residuals_16ctx"
 3 | model:
 4 |   proprio_dim: 23
 5 |   action_dim: 23
 6 |   pc_num: 100
 7 |   hidden_dim: 192
 8 |   max_ep_len: 4096
 9 |   max_length: null
10 |   action_tanh: false
11 |   context_length: 16
12 |   n_layer: 4
13 |   n_head: 4
14 |   attn_pdrop: 0.0
15 |   resid_pdrop: 0.0
16 |   embd_pdrop: 0.0
17 |   action_input: False
18 |   scale_proprio: True
19 |   full_autoregressive: True
20 |   scale_action: True #these settings are for the working PolicyTransformer model
21 | test: False
22 | wandb_activate: False
23 | checkpoint: ''
24 | groundtruth_policy: '' 
25 | load_trajectory: ''
26 | training:
27 |   batch_size: 512
28 |   modality_aligned: False
29 |   use_pc_loss: False
30 |   use_proprio_loss: False
31 |   num_epochs: 100
32 |   use_residuals: False
33 |   num_workers: 16
34 |   dt: 0.05 # 20Hz, control frequency to scale the action (if activated)
35 |   lr: 0.0001
36 |   add_proprio_noise: True
37 |   add_action_noise: True
38 |   noise_arm: 0.1
39 |   noise_hand: 0.1
40 |   add_data_driven_noise: False
41 |   weight_decay: 0.01
42 |   log_freq: 1000
43 |   model_save_freq: 1000
44 |   time_shift: 0
45 |   model_save_dir: algo/pretrained/models
46 |   root_dir: retarget_data/train
47 |   load_checkpoint: False 
48 |   checkpoint_path: ''
49 | validation:
50 |   root_dir: retarget_data/val
51 | 


--------------------------------------------------------------------------------
/cfg/pretrain/AllegroXarmNew.yaml:
--------------------------------------------------------------------------------
 1 | device: cuda:0
 2 | wandb_name: "Pretrain_residuals_16ctx"
 3 | model:
 4 |   proprio_dim: 23
 5 |   action_dim: 23
 6 |   pc_num: 100
 7 |   hidden_dim: 192
 8 |   max_ep_len: 4096
 9 |   max_length: null
10 |   action_tanh: false
11 |   context_length: 16
12 |   n_layer: 4
13 |   n_head: 4
14 |   attn_pdrop: 0.0
15 |   resid_pdrop: 0.0
16 |   embd_pdrop: 0.0
17 |   action_input: False
18 |   scale_proprio: True
19 |   full_autoregressive: True
20 |   use_imagenet: False
21 |   use_vit: False
22 |   use_diffusion_policy: False 
23 |   use_r3m: False
24 |   use_mvp_rgb: False 
25 |   use_r3m_depth: False
26 |   use_vip: False
27 |   diffusion_policy_horizon: None
28 |   cache_all: False
29 |   scale_action: True #these settings are for the working PolicyTransformer model
30 | test: False
31 | wandb_activate: False
32 | checkpoint: ''
33 | groundtruth_policy: '' 
34 | load_trajectory: ''
35 | training:
36 |   finetune_layernorm: False
37 |   finetune_lastlayer: False
38 |   batch_size: 256
39 |   modality_aligned: False
40 |   use_pc_loss: False
41 |   use_proprio_loss: False
42 |   num_epochs: 100
43 |   use_residuals: False
44 |   num_workers: 16
45 |   dt: 0.05 # 20Hz, control frequency to scale the action (if activated)
46 |   lr: 0.0001
47 |   add_proprio_noise: True
48 |   add_action_noise: True
49 |   noise_arm: 0.1
50 |   noise_hand: 0.1
51 |   add_data_driven_noise: False
52 |   weight_decay: 0.01
53 |   log_freq: 3000
54 |   model_save_freq: 10000
55 |   time_shift: 0
56 |   model_save_dir: algo/pretrained/models
57 |   root_dir: retarget_data/train
58 |   load_checkpoint: False 
59 |   checkpoint_path: ''
60 | validation:
61 |   root_dir: retarget_data/val
62 | 
63 | diffusion:
64 |   num_inference_steps: 100
65 | 


--------------------------------------------------------------------------------
/cfg/pretrain/AllegroXarmThrowing.yaml:
--------------------------------------------------------------------------------
 1 | device: cuda:0
 2 | wandb_name: "Pretrain_residuals_16ctx"
 3 | model:
 4 |   proprio_dim: 23
 5 |   action_dim: 23
 6 |   pc_num: 100
 7 |   hidden_dim: 192
 8 |   max_ep_len: 4096
 9 |   max_length: null
10 |   action_tanh: false
11 |   context_length: 16
12 |   n_layer: 4
13 |   n_head: 4
14 |   attn_pdrop: 0.0
15 |   resid_pdrop: 0.0
16 |   embd_pdrop: 0.0
17 |   action_input: False
18 |   scale_proprio: True
19 |   full_autoregressive: True
20 |   scale_action: True #these settings are for the working PolicyTransformer model
21 | test: False
22 | wandb_activate: False
23 | checkpoint: ''
24 | groundtruth_policy: '' 
25 | load_trajectory: ''
26 | training:
27 |   batch_size: 512
28 |   modality_aligned: False
29 |   use_pc_loss: False
30 |   use_proprio_loss: False
31 |   num_epochs: 100
32 |   use_residuals: False
33 |   num_workers: 16
34 |   dt: 0.05 # 20Hz, control frequency to scale the action (if activated)
35 |   lr: 0.0001
36 |   add_proprio_noise: True
37 |   add_action_noise: True
38 |   noise_arm: 0.1
39 |   noise_hand: 0.1
40 |   add_data_driven_noise: False
41 |   weight_decay: 0.01
42 |   log_freq: 1000
43 |   model_save_freq: 1000
44 |   time_shift: 0
45 |   model_save_dir: algo/pretrained/models
46 |   root_dir: retarget_data/train
47 |   load_checkpoint: False 
48 |   checkpoint_path: ''
49 | validation:
50 |   root_dir: retarget_data/val
51 | 


--------------------------------------------------------------------------------
/cfg/task/AllegroXarmCabinet.yaml:
--------------------------------------------------------------------------------
  1 | defaults:
  2 |   - _self_
  3 | 
  4 | name: AllegroXarmCabinet
  5 | 
  6 | physics_engine: ${..physics_engine}
  7 | asset_root: '../assets'
  8 | 
  9 | 
 10 | env:
 11 |   subtask: ""
 12 |   throw_far: False
 13 |   bucket_in_front: False
 14 |   use_leap: False 
 15 |   use_allegro: True
 16 |   urdfFolder: "ycb_real_inertia"
 17 |   # if given, will override the device setting in gym.
 18 |   #numEnvs: ${resolve_default:8192,${...num_envs}}
 19 |   numEnvs: ${...num_envs}
 20 |   envSpacing: 1.2
 21 |   episodeLength: 600 #change
 22 |   tablePosey: -0.15
 23 |   tablePosez: 0.023
 24 |   enableDebugVis: False
 25 |   enableVideoLog: False 
 26 |   videoLogIdx: 0
 27 |   videoLogFreq: 20
 28 |   evalStats: False  # extra evaluation-time statistics
 29 |   doSimpleObjects: True
 30 |   doVerySimpleObjects: False 
 31 |   doDexYcbObjects: False
 32 |   useSavedInitPose: False
 33 |   limitArmDeltaTarget: True
 34 |   useRandomInitRot: False
 35 |   addZerosInPrivBuf: False
 36 |   usePoseRewardUnlifted: False 
 37 |   usePoseRewardLifted: False
 38 |   leapFingers: ["fingertip", "fingertip_2", "fingertip_3", "thumb_fingertip"] 
 39 |   leapDIP: ["dip", "dip_2", "dip_3", "thumb_dip"]
 40 |   initPoseVersion: v16
 41 |   useDIPFinger: False 
 42 |   lowmem: False
 43 |   input_priv: True
 44 |   enableVhacd: True 
 45 |   vhacdObjects: ['070-a','070-b','072','036','032','029','048','027','019','032','026']
 46 |   simpleObjects: ['002', '011', '036', '010', '025', '024', '005', '007']
 47 |   
 48 |   verysimpleObjects: ['002']
 49 |   DexYcbObjects: ['035','003','004','005','007','008','009','010','011', '021','024','025','002','036','037','040','051','052','061']
 50 |   
 51 |   clampAbsObservations: 10.0
 52 |   useOldActionSpace: False
 53 |   clampArmTarget: False
 54 | 
 55 |   stiffnessScale: 1.0
 56 |   forceLimitScale: 1.0
 57 |   useRelativeControl: False
 58 |   dofSpeedScale: 1.0
 59 |   actionsMovingAverage: 1.0
 60 |   controlFrequencyInv: 6 # 20 Hz
 61 |   jointVelocityLimit: 0.5
 62 | 
 63 |   resetPositionNoiseX: 0.1
 64 |   resetPositionNoiseY: 0.1
 65 |   resetPositionNoiseZ: 0.02
 66 |   resetRotationNoise: 1.0
 67 |   resetDofPosRandomIntervalFingers: 0.1
 68 |   resetDofPosRandomIntervalArm: 0.1
 69 |   resetDofVelRandomInterval: 0.
 70 | 
 71 | 
 72 |   pointCloudScale: 0.01
 73 |   # Random forces applied to the
 74 |   forceScale: 0.0
 75 |   forceProbRange: [0.001, 0.1]
 76 |   forceDecay: 0.99
 77 |   forceDecayInterval: 0.08
 78 | 
 79 |   resetOnArmCollision: False 
 80 |   ArmTableCollisionThreshold: 10
 81 |   resetOnCollision: False  
 82 |   ContactForceThreshold: 50
 83 |   resetOnFingerCrash: False
 84 |   FingerClearanceThreshold: 0.050
 85 | 
 86 |   liftingRewScale: 20.0
 87 |   goalHeight: 0.45
 88 |   handJointRewCoeff: 1 #work on this 
 89 |   liftingBonus: 300.0
 90 |   liftingBonusThreshold: 0.10  # when the object is lifted this distance (in meters) above the table, the agent gets the lifting bonus
 91 |   keypointRewScale: 200.0
 92 |   useFingertipReward: True 
 93 |   usePalmReward: False 
 94 |   useLiftingReward: True 
 95 |   useKeypointReward: True
 96 |   distanceDeltaRewScale: 50.0
 97 |   useFingertipShapeDistReward: False 
 98 |   useHandJointPoseRew: False
 99 |   
100 |    
101 |   handleDistRewardScale: 0.0
102 |   aroundHandleRewardScale: 0.0
103 |   openBonusRewardScale: 2.0
104 |   goalDistRewardScale: 6.0 
105 |   openPoseRewardScale: 0.0 
106 |   goalBonusRewardScale: 2.0
107 |   actionPenaltyScale: 0.01
108 |   fingerDistRewardScale: 0.04
109 |   thumbDistRewardScale: 0.08
110 | 
111 |   reachGoalBonus: 1000.0
112 |   kukaActionsPenaltyScale: 0.003
113 |   allegroActionsPenaltyScale: 0.0003
114 |   fallDistance: 0.24
115 |   fallPenalty: 0.0
116 | 
117 |   privilegedActions: False
118 |   privilegedActionsTorque: 0.02
119 | 
120 |   # Physics v1, pretty much default settings we used from the start of the project
121 |   dofFriction: 1.0  # negative values are ignored and the default friction from URDF file is used
122 | 
123 |   # gain of PD controller.
124 |   handStiffness: 40.0 #increasing stiffness leads to stiffer movements
125 |   armStiffness: 1000 #40.0
126 |   handVelocity: 10.0
127 |   armVelocity: 10.0
128 | 
129 |   handEffort: 0.35  # this is what was used in sim-to-real experiment. Motor torque in Newton*meters
130 |   # armEffort: [300, 300, 300, 300, 300, 300, 300]  # see Physics v2
131 |   armEffort: [500, 500, 500, 500, 500, 500, 500]  # see Physics v2
132 | 
133 |   handDamping: 5   #increasing damping leads to less local oscillatory moment
134 |   armDamping: 100 #5
135 | 
136 |   handArmature: 0
137 |   armArmature: 0
138 | 
139 |   keypointScale: 1.5
140 |   objectBaseSize: 0.05
141 |   numPointCloud: 100 
142 | 
143 |   randomizeObjectDimensions: True
144 |   withSmallCuboids: True
145 |   withBigCuboids: True
146 |   withSticks: True
147 | 
148 |   objectType: "" #changing to ball only for now 
149 |   observationType: "full_state"
150 |   successTolerance: 0.075
151 |   targetSuccessTolerance: 0.01
152 |   toleranceCurriculumIncrement: 0.9  # multiplicative
153 |   toleranceCurriculumInterval: 3000  # in env steps across all agents, with 8192 this is 3000 * 8192 = 24.6M env steps
154 |   maxConsecutiveSuccesses: 2
155 |   successSteps: 50  # how many steps we should be within the tolerance before we declare a success
156 | 
157 |   saveStates: False
158 |   saveStatesFile: "rootTensorsDofStates.bin"
159 | 
160 |   loadInitialStates: False
161 |   loadStatesFile: "rootTensorsDofStates.bin"
162 |   enableProprioHistory: True
163 |   useObsAsProp: False 
164 |   enableActionHistory: True
165 |   enableAttnMask: True
166 |   enablePointCloud: True
167 |   enableCameraSensors: False  
168 |   # set to True if you use camera sensors in the environment
169 |   rgbd_camera:
170 |     enable_depth: False
171 |     enable_rgb: False
172 |     render_slowness: 1
173 |     camera_width: 60
174 |     camera_height: 60 
175 |     buffer_width: 60
176 |     buffer_height: 60
177 |     fov: 60
178 |     ss: 2 
179 |     num_cameras: 1
180 |     intrinsics: 'utils/camera.json'
181 |     randomize_camera_pose: 0.04 #in meters
182 |     randomize_camera_rot: 5 #in degrees 
183 |     cam0:
184 |       #pos: [0.20, -0.55, 0.65]
185 |       #pos: [0.0, -0.31, 0.49]
186 |       #pos: [0.12, -0.31, 0.55]
187 |       pos: [0.12, -0.35, 0.60]
188 |       target: [0.10, -0.25, 0.45]
189 |     cam1:
190 |       pos: [0.50, -0.15, 0.65]
191 |       target: [0.0, -0.15, 0.6]
192 |     wrist_camera: False 
193 |   
194 |   stage2_hist_len: 16  # 3 seconds of history #GRU history not yet
195 | 
196 |   asset:
197 |     # Whis was the original kuka_allegro asset.
198 |     # This URDF has some issues, i.e. weights of fingers are too high and the mass of the Allegro hand is too
199 |     # high in general. But in turn this leads to smoother movements and better looking behaviors.
200 |     # Additionally, collision shapes of fingertips are more primitive (just rough convex hulls), which
201 |     # gives a bit more FPS.
202 |     kukaAllegro: "urdf/kuka_allegro_description/kuka_allegro_touch_sensor.urdf"
203 |     FrankAllegro: "urdf/franka_description/allegro_hand_description/franka_panda_allegro.urdf"
204 |     # Xarm7_allegro: 'new_asset/xarm7_description_new/xarm7_allegro.urdf' #"urdf/xarm7_color.urdf" #"urdf/xarm7_hand.urdf"
205 |     Xarm7_allegro: 'urdf/xarm7_allegro_vertical/xarm7_allegro.urdf' #"urdf/xarm7_color.urdf" #"urdf/xarm7_hand.urdf"
206 |     Xarm7_leap_hand: "urdf/xarm7_leap.urdf"
207 |     # This is the URDF which has more accurate collision shapes and weights.
208 |     # I believe since the hand is much lighter, the policy has more control over the movement of both arm and
209 |     # fingers which leads to faster training (better sample efficiency). But overall the resulting
210 |     # behaviors look too fast and a bit unrealistic.
211 |     # For sim-to-real experiments this needs to be addressed. Overall, v2 is a "Better" URDF, and it should not
212 |     # lead to behaviors that would be worse for sim-to-real experiments. Most likely the problem is elsewhere,
213 |     # for example the max torques might be too high, or the armature of the motors is too low.
214 |     # The exercise of finding the right URDF and other parameters is left for the sim-to-real part of the project.
215 |     # kukaAllegro: "urdf/kuka_allegro_description/kuka_allegro_v2.urdf"
216 | 
217 | task:
218 | 
219 |   do_random_resets: False 
220 | 
221 |   domain_randomization:
222 |     randomize_friction: False 
223 |     friction_lower_limit: 0.6
224 |     friction_upper_limit: 1.2 
225 |     
226 |     randomize_object_mass: False  
227 |     mass_lower_limit: 0.8
228 |     mass_upper_limit: 1.2 
229 |     
230 |     randomize_object_com: False
231 |     com_lower_limit: -0.05
232 |     com_upper_limit: 0.05
233 | 
234 |     randomize_table_position: False  
235 |     table_y_lower: 0.45
236 |     table_y_upper: 0.55
237 |     table_z_lower: 0.01 
238 |     table_z_upper: 0.05
239 | 
240 |     randomize_table_friction: False
241 |     table_friction_lower_limit: 0.6
242 |     table_friction_upper_limit: 1.2
243 | 
244 | 
245 | sim:
246 |   substeps: 2
247 |   dt: 0.00833 #  1/120
248 |   up_axis: "z"
249 |   use_gpu_pipeline: True #${eq:${...pipeline},"gpu"}
250 |   num_client_threads: 8
251 | 
252 |   gravity: [0.0, 0.0, -9.81]
253 |   physx:
254 |     num_threads: 6
255 |     solver_type: 1  # 0: pgs, 1: tgs
256 |     num_position_iterations: 8
257 |     num_velocity_iterations: 0
258 | 
259 |     max_gpu_contact_pairs: 8388608 # 8*1024*1024
260 |     num_subscenes: ${....num_subscenes}
261 |     contact_offset: 0.002
262 |     rest_offset: 0.0
263 |     bounce_threshold_velocity: 0.2
264 |     max_depenetration_velocity: 1000.0
265 |     default_buffer_size_multiplier: 25.0
266 |     contact_collection: 1 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!)
267 | 


--------------------------------------------------------------------------------
/cfg/task/AllegroXarmNew.yaml:
--------------------------------------------------------------------------------
  1 | defaults:
  2 |   - _self_
  3 | 
  4 | name: AllegroXarmNew
  5 | 
  6 | physics_engine: ${..physics_engine}
  7 | asset_root: '../assets'
  8 | 
  9 | 
 10 | env:
 11 |   subtask: ""
 12 |   use_leap: False 
 13 |   use_allegro: True
 14 |   urdfFolder: "ycb_real_inertia"
 15 |   # if given, will override the device setting in gym.
 16 |   #numEnvs: ${resolve_default:8192,${...num_envs}}
 17 |   numEnvs: ${...num_envs}
 18 |   envSpacing: 1.2
 19 |   episodeLength: 600 #change
 20 |   tablePosey: -0.15
 21 |   tablePosez: 0.023
 22 |   enableDebugVis: False
 23 |   enableVideoLog: False
 24 |   videoLogIdx: 0
 25 |   videoLogFreq: 20
 26 |   evalStats: False  # extra evaluation-time statistics
 27 |   doSimpleObjects: True
 28 |   doVerySimpleObjects: False 
 29 |   doDexYcbObjects: False
 30 |   useSavedInitPose: False
 31 |   limitArmDeltaTarget: True
 32 |   useRandomInitRot: False
 33 |   addZerosInPrivBuf: False
 34 |   usePoseRewardUnlifted: False 
 35 |   usePoseRewardLifted: False
 36 |   leapFingers: ["fingertip", "fingertip_2", "fingertip_3", "thumb_fingertip"] 
 37 |   leapDIP: ["dip", "dip_2", "dip_3", "thumb_dip"]
 38 |   initPoseVersion: v16
 39 |   useDIPFinger: False 
 40 |   lowmem: False
 41 |   input_priv: True
 42 |   enableVhacd: True 
 43 |   vhacdObjects: ['070-a','070-b','072','036','032','029','048','027','019','032','026']
 44 |   simpleObjects: ['002', '036', '010', '025', '024', '005'] #['021', '035', '036', '019'] #
 45 |   
 46 |   verysimpleObjects: ['002']
 47 |   DexYcbObjects: ['035','003','004','007','008','009','011', '021','037','040','051','052','061'] #['035','003','004','005','007','008','009','010','011', '021','024','025','002','036','037','040','051','052','061']
 48 |   
 49 |   clampAbsObservations: 10.0
 50 |   useOldActionSpace: False
 51 |   clampArmTarget: False
 52 | 
 53 |   stiffnessScale: 1.0
 54 |   forceLimitScale: 1.0
 55 |   useRelativeControl: False
 56 |   dofSpeedScale: 1.0
 57 |   actionsMovingAverage: 1.0
 58 |   controlFrequencyInv: 6 # 20 Hz
 59 |   jointVelocityLimit: 0.5
 60 | 
 61 |   resetPositionNoiseX: 0.1
 62 |   resetPositionNoiseY: 0.1
 63 |   resetPositionNoiseZ: 0.02
 64 |   resetRotationNoise: 1.0
 65 |   resetDofPosRandomIntervalFingers: 0.1
 66 |   resetDofPosRandomIntervalArm: 0.1
 67 |   resetDofVelRandomInterval: 0.
 68 | 
 69 | 
 70 |   pointCloudScale: 0.01
 71 |   # Random forces applied to the
 72 |   forceScale: 0.0
 73 |   forceProbRange: [0.8, 0.8]
 74 |   forceDecay: 0.99
 75 |   forceDecayInterval: 0.08
 76 | 
 77 |   resetOnArmCollision: False 
 78 |   ArmTableCollisionThreshold: 10
 79 |   resetOnCollision: False  
 80 |   ContactForceThreshold: 50
 81 |   resetOnFingerCrash: False
 82 |   FingerClearanceThreshold: 0.050
 83 | 
 84 |   liftingRewScale: 20.0
 85 |   goalHeight: 0.45
 86 |   handJointRewCoeff: 1 #work on this 
 87 |   liftingBonus: 300.0
 88 |   liftingBonusThreshold: 0.10  # when the object is lifted this distance (in meters) above the table, the agent gets the lifting bonus
 89 |   keypointRewScale: 200.0
 90 |   useFingertipReward: True 
 91 |   usePalmReward: False 
 92 |   useLiftingReward: True 
 93 |   useKeypointReward: True
 94 |   distanceDeltaRewScale: 50.0
 95 |   useFingertipShapeDistReward: False 
 96 |   useHandJointPoseRew: False 
 97 | 
 98 |   reachGoalBonus: 1000.0
 99 |   kukaActionsPenaltyScale: 0.003
100 |   allegroActionsPenaltyScale: 0.0003
101 |   fallDistance: 0.24
102 |   fallPenalty: 0.0
103 | 
104 |   privilegedActions: False
105 |   privilegedActionsTorque: 0.02
106 | 
107 |   # Physics v1, pretty much default settings we used from the start of the project
108 |   dofFriction: 1.0  # negative values are ignored and the default friction from URDF file is used
109 | 
110 |   # gain of PD controller.
111 |   handStiffness: 40.0 #increasing stiffness leads to stiffer movements
112 |   armStiffness: 1000 #40.0
113 |   handVelocity: 10.0
114 |   armVelocity: 10.0
115 | 
116 |   handEffort: 0.35  # this is what was used in sim-to-real experiment. Motor torque in Newton*meters
117 |   # armEffort: [300, 300, 300, 300, 300, 300, 300]  # see Physics v2
118 |   armEffort: [500, 500, 500, 500, 500, 500, 500]  # see Physics v2
119 | 
120 |   handDamping: 5   #increasing damping leads to less local oscillatory moment
121 |   armDamping: 100 #5
122 | 
123 |   handArmature: 0
124 |   armArmature: 0
125 | 
126 |   keypointScale: 1.5
127 |   objectBaseSize: 0.05
128 |   numPointCloud: 100 
129 | 
130 |   randomizeObjectDimensions: True
131 |   withSmallCuboids: True
132 |   withBigCuboids: True
133 |   withSticks: True
134 | 
135 |   objectType: "" #changing to ball only for now 
136 |   observationType: "full_state"
137 |   successTolerance: 0.075
138 |   targetSuccessTolerance: 0.01
139 |   toleranceCurriculumIncrement: 0.9  # multiplicative
140 |   toleranceCurriculumInterval: 3000  # in env steps across all agents, with 8192 this is 3000 * 8192 = 24.6M env steps
141 |   maxConsecutiveSuccesses: 2
142 |   successSteps: 50  # how many steps we should be within the tolerance before we declare a success
143 | 
144 |   saveStates: False
145 |   saveStatesFile: "rootTensorsDofStates.bin"
146 | 
147 |   loadInitialStates: False
148 |   loadStatesFile: "rootTensorsDofStates.bin"
149 |   enableProprioHistory: True
150 |   useObsAsProp: False 
151 |   enableActionHistory: True
152 |   enableAttnMask: True
153 |   enablePointCloud: True
154 |   enableCameraSensors: False  
155 |   # set to True if you use camera sensors in the environment
156 |   rgbd_camera:
157 |     enable_depth: False
158 |     enable_rgb: False
159 |     render_slowness: 1
160 |     camera_width: 60
161 |     camera_height: 60 
162 |     buffer_width: 60
163 |     buffer_height: 60
164 |     fov: 60
165 |     ss: 2 
166 |     num_cameras: 1
167 |     intrinsics: 'utils/camera2.json'
168 |     randomize_camera_pose: 0.04 #in meters
169 |     randomize_camera_rot: 5 #in degrees 
170 |     cam0:
171 |       #pos: [0.20, -0.55, 0.65]
172 |       #pos: [0.0, -0.31, 0.49]
173 |       #pos: [0.12, -0.31, 0.55]
174 |       pos: [0.12, -0.35, 0.60]
175 |       target: [0.10, -0.25, 0.45]
176 |     cam1:
177 |       pos: [0.50, -0.15, 0.65]
178 |       target: [0.0, -0.15, 0.6]
179 |     wrist_camera: False 
180 |   
181 |   stage2_hist_len: 16  # 3 seconds of history #GRU history not yet
182 | 
183 |   asset:
184 |     # Whis was the original kuka_allegro asset.
185 |     # This URDF has some issues, i.e. weights of fingers are too high and the mass of the Allegro hand is too
186 |     # high in general. But in turn this leads to smoother movements and better looking behaviors.
187 |     # Additionally, collision shapes of fingertips are more primitive (just rough convex hulls), which
188 |     # gives a bit more FPS.
189 |     kukaAllegro: "urdf/kuka_allegro_description/kuka_allegro_touch_sensor.urdf"
190 |     FrankAllegro: "urdf/franka_description/allegro_hand_description/franka_panda_allegro.urdf"
191 |     # Xarm7_allegro: 'new_asset/xarm7_description_new/xarm7_allegro.urdf' #"urdf/xarm7_color.urdf" #"urdf/xarm7_hand.urdf"
192 |     Xarm7_allegro: 'urdf/xarm7_allegro_vertical/xarm7_allegro.urdf' #"urdf/xarm7_color.urdf" #"urdf/xarm7_hand.urdf"
193 |     Xarm7_leap_hand: "urdf/xarm7_leap.urdf"
194 |     # This is the URDF which has more accurate collision shapes and weights.
195 |     # I believe since the hand is much lighter, the policy has more control over the movement of both arm and
196 |     # fingers which leads to faster training (better sample efficiency). But overall the resulting
197 |     # behaviors look too fast and a bit unrealistic.
198 |     # For sim-to-real experiments this needs to be addressed. Overall, v2 is a "Better" URDF, and it should not
199 |     # lead to behaviors that would be worse for sim-to-real experiments. Most likely the problem is elsewhere,
200 |     # for example the max torques might be too high, or the armature of the motors is too low.
201 |     # The exercise of finding the right URDF and other parameters is left for the sim-to-real part of the project.
202 |     # kukaAllegro: "urdf/kuka_allegro_description/kuka_allegro_v2.urdf"
203 | 
204 | task:
205 | 
206 |   do_random_resets: False 
207 | 
208 |   domain_randomization:
209 |     randomize_friction: False 
210 |     friction_lower_limit: 0.6
211 |     friction_upper_limit: 1.2 
212 |     
213 |     randomize_object_mass: False  
214 |     mass_lower_limit: 0.8
215 |     mass_upper_limit: 1.2 
216 |     
217 |     randomize_object_com: False
218 |     com_lower_limit: -0.05
219 |     com_upper_limit: 0.05
220 | 
221 |     randomize_table_position: False  
222 |     table_rnd_y: 0.02
223 |     table_rnd_z: 0.02
224 |     table_rnd_x: 0.02
225 | 
226 |     randomize_table_friction: False
227 |     table_friction_lower_limit: 0.6
228 |     table_friction_upper_limit: 1.2
229 | 
230 | 
231 | sim:
232 |   substeps: 2
233 |   dt: 0.00833 #  1/120
234 |   up_axis: "z"
235 |   use_gpu_pipeline: True #${eq:${...pipeline},"gpu"}
236 |   num_client_threads: 8
237 | 
238 |   gravity: [0.0, 0.0, -9.81]
239 |   physx:
240 |     num_threads: 6
241 |     solver_type: 1  # 0: pgs, 1: tgs
242 |     num_position_iterations: 8
243 |     num_velocity_iterations: 0
244 | 
245 |     max_gpu_contact_pairs: 8388608 # 8*1024*1024
246 |     num_subscenes: ${....num_subscenes}
247 |     contact_offset: 0.002
248 |     rest_offset: 0.0
249 |     bounce_threshold_velocity: 0.2
250 |     max_depenetration_velocity: 1000.0
251 |     default_buffer_size_multiplier: 25.0
252 |     contact_collection: 1 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!)
253 | 


--------------------------------------------------------------------------------
/cfg/task/AllegroXarmThrowing.yaml:
--------------------------------------------------------------------------------
  1 | defaults:
  2 |   - _self_
  3 | 
  4 | name: AllegroXarmThrowing
  5 | 
  6 | physics_engine: ${..physics_engine}
  7 | asset_root: '../assets'
  8 | 
  9 | 
 10 | env:
 11 |   subtask: ""
 12 |   throw_far: False
 13 |   bucket_in_front: False
 14 |   use_leap: False 
 15 |   use_allegro: True
 16 |   urdfFolder: "ycb_real_inertia"
 17 |   # if given, will override the device setting in gym.
 18 |   #numEnvs: ${resolve_default:8192,${...num_envs}}
 19 |   numEnvs: ${...num_envs}
 20 |   envSpacing: 1.2
 21 |   episodeLength: 600 #change
 22 |   tablePosey: -0.15
 23 |   tablePosez: 0.023
 24 |   enableDebugVis: False
 25 |   enableVideoLog: False 
 26 |   videoLogIdx: 0
 27 |   videoLogFreq: 20
 28 |   evalStats: False  # extra evaluation-time statistics
 29 |   doSimpleObjects: True
 30 |   doVerySimpleObjects: False 
 31 |   doDexYcbObjects: False
 32 |   useSavedInitPose: False
 33 |   limitArmDeltaTarget: True
 34 |   useRandomInitRot: False
 35 |   addZerosInPrivBuf: False
 36 |   usePoseRewardUnlifted: False 
 37 |   usePoseRewardLifted: False
 38 |   leapFingers: ["fingertip", "fingertip_2", "fingertip_3", "thumb_fingertip"] 
 39 |   leapDIP: ["dip", "dip_2", "dip_3", "thumb_dip"]
 40 |   initPoseVersion: v16
 41 |   useDIPFinger: False 
 42 |   lowmem: False
 43 |   input_priv: True
 44 |   enableVhacd: True 
 45 |   vhacdObjects: ['070-a','070-b','072','036','032','029','048','027','019','032','026']
 46 |   simpleObjects: ['002', '011', '036', '010', '025', '024', '005', '007']
 47 |   
 48 |   verysimpleObjects: ['002']
 49 |   DexYcbObjects: ['035','003','004','005','007','008','009','010','011', '021','024','025','002','036','037','040','051','052','061']
 50 |   
 51 |   clampAbsObservations: 10.0
 52 |   useOldActionSpace: False
 53 |   clampArmTarget: False
 54 | 
 55 |   stiffnessScale: 1.0
 56 |   forceLimitScale: 1.0
 57 |   useRelativeControl: False
 58 |   dofSpeedScale: 1.0
 59 |   actionsMovingAverage: 1.0
 60 |   controlFrequencyInv: 6 # 20 Hz
 61 |   jointVelocityLimit: 0.5
 62 | 
 63 |   resetPositionNoiseX: 0.1
 64 |   resetPositionNoiseY: 0.1
 65 |   resetPositionNoiseZ: 0.02
 66 |   resetRotationNoise: 1.0
 67 |   resetDofPosRandomIntervalFingers: 0.1
 68 |   resetDofPosRandomIntervalArm: 0.1
 69 |   resetDofVelRandomInterval: 0.
 70 | 
 71 | 
 72 |   pointCloudScale: 0.01
 73 |   # Random forces applied to the
 74 |   forceScale: 0.0
 75 |   forceProbRange: [0.001, 0.1]
 76 |   forceDecay: 0.99
 77 |   forceDecayInterval: 0.08
 78 | 
 79 |   resetOnArmCollision: False 
 80 |   ArmTableCollisionThreshold: 10
 81 |   resetOnCollision: False  
 82 |   ContactForceThreshold: 50
 83 |   resetOnFingerCrash: False
 84 |   FingerClearanceThreshold: 0.050
 85 | 
 86 |   liftingRewScale: 20.0
 87 |   goalHeight: 0.45
 88 |   handJointRewCoeff: 1 #work on this 
 89 |   liftingBonus: 300.0
 90 |   liftingBonusThreshold: 0.10  # when the object is lifted this distance (in meters) above the table, the agent gets the lifting bonus
 91 |   keypointRewScale: 200.0
 92 |   useFingertipReward: True 
 93 |   usePalmReward: False 
 94 |   useLiftingReward: True 
 95 |   useKeypointReward: True
 96 |   distanceDeltaRewScale: 50.0
 97 |   useFingertipShapeDistReward: False 
 98 |   useHandJointPoseRew: False 
 99 | 
100 |   reachGoalBonus: 1000.0
101 |   kukaActionsPenaltyScale: 0.003
102 |   allegroActionsPenaltyScale: 0.0003
103 |   fallDistance: 0.24
104 |   fallPenalty: 0.0
105 | 
106 |   privilegedActions: False
107 |   privilegedActionsTorque: 0.02
108 | 
109 |   # Physics v1, pretty much default settings we used from the start of the project
110 |   dofFriction: 1.0  # negative values are ignored and the default friction from URDF file is used
111 | 
112 |   # gain of PD controller.
113 |   handStiffness: 40.0 #increasing stiffness leads to stiffer movements
114 |   armStiffness: 1000 #40.0
115 |   handVelocity: 10.0
116 |   armVelocity: 10.0
117 | 
118 |   handEffort: 0.35  # this is what was used in sim-to-real experiment. Motor torque in Newton*meters
119 |   # armEffort: [300, 300, 300, 300, 300, 300, 300]  # see Physics v2
120 |   armEffort: [500, 500, 500, 500, 500, 500, 500]  # see Physics v2
121 | 
122 |   handDamping: 5   #increasing damping leads to less local oscillatory moment
123 |   armDamping: 100 #5
124 | 
125 |   handArmature: 0
126 |   armArmature: 0
127 | 
128 |   keypointScale: 1.5
129 |   objectBaseSize: 0.05
130 |   numPointCloud: 100 
131 | 
132 |   randomizeObjectDimensions: True
133 |   withSmallCuboids: True
134 |   withBigCuboids: True
135 |   withSticks: True
136 | 
137 |   objectType: "" #changing to ball only for now 
138 |   observationType: "full_state"
139 |   successTolerance: 0.075
140 |   targetSuccessTolerance: 0.01
141 |   toleranceCurriculumIncrement: 0.9  # multiplicative
142 |   toleranceCurriculumInterval: 3000  # in env steps across all agents, with 8192 this is 3000 * 8192 = 24.6M env steps
143 |   maxConsecutiveSuccesses: 2
144 |   successSteps: 50  # how many steps we should be within the tolerance before we declare a success
145 | 
146 |   saveStates: False
147 |   saveStatesFile: "rootTensorsDofStates.bin"
148 | 
149 |   loadInitialStates: False
150 |   loadStatesFile: "rootTensorsDofStates.bin"
151 |   enableProprioHistory: True
152 |   useObsAsProp: False 
153 |   enableActionHistory: True
154 |   enableAttnMask: True
155 |   enablePointCloud: True
156 |   enableCameraSensors: False  
157 |   # set to True if you use camera sensors in the environment
158 |   rgbd_camera:
159 |     enable_depth: False
160 |     enable_rgb: False
161 |     render_slowness: 1
162 |     camera_width: 60
163 |     camera_height: 60 
164 |     buffer_width: 60
165 |     buffer_height: 60
166 |     fov: 60
167 |     ss: 2 
168 |     num_cameras: 1
169 |     intrinsics: 'utils/camera.json'
170 |     randomize_camera_pose: 0.04 #in meters
171 |     randomize_camera_rot: 5 #in degrees 
172 |     cam0:
173 |       #pos: [0.20, -0.55, 0.65]
174 |       #pos: [0.0, -0.31, 0.49]
175 |       #pos: [0.12, -0.31, 0.55]
176 |       pos: [0.12, -0.35, 0.60]
177 |       target: [0.10, -0.25, 0.45]
178 |     cam1:
179 |       pos: [0.50, -0.15, 0.65]
180 |       target: [0.0, -0.15, 0.6]
181 |     wrist_camera: False 
182 |   
183 |   stage2_hist_len: 16  # 3 seconds of history #GRU history not yet
184 | 
185 |   asset:
186 |     # Whis was the original kuka_allegro asset.
187 |     # This URDF has some issues, i.e. weights of fingers are too high and the mass of the Allegro hand is too
188 |     # high in general. But in turn this leads to smoother movements and better looking behaviors.
189 |     # Additionally, collision shapes of fingertips are more primitive (just rough convex hulls), which
190 |     # gives a bit more FPS.
191 |     kukaAllegro: "urdf/kuka_allegro_description/kuka_allegro_touch_sensor.urdf"
192 |     FrankAllegro: "urdf/franka_description/allegro_hand_description/franka_panda_allegro.urdf"
193 |     # Xarm7_allegro: 'new_asset/xarm7_description_new/xarm7_allegro.urdf' #"urdf/xarm7_color.urdf" #"urdf/xarm7_hand.urdf"
194 |     Xarm7_allegro: 'urdf/xarm7_allegro_vertical/xarm7_allegro.urdf' #"urdf/xarm7_color.urdf" #"urdf/xarm7_hand.urdf"
195 |     Xarm7_leap_hand: "urdf/xarm7_leap.urdf"
196 |     # This is the URDF which has more accurate collision shapes and weights.
197 |     # I believe since the hand is much lighter, the policy has more control over the movement of both arm and
198 |     # fingers which leads to faster training (better sample efficiency). But overall the resulting
199 |     # behaviors look too fast and a bit unrealistic.
200 |     # For sim-to-real experiments this needs to be addressed. Overall, v2 is a "Better" URDF, and it should not
201 |     # lead to behaviors that would be worse for sim-to-real experiments. Most likely the problem is elsewhere,
202 |     # for example the max torques might be too high, or the armature of the motors is too low.
203 |     # The exercise of finding the right URDF and other parameters is left for the sim-to-real part of the project.
204 |     # kukaAllegro: "urdf/kuka_allegro_description/kuka_allegro_v2.urdf"
205 | 
206 | task:
207 | 
208 |   do_random_resets: False 
209 | 
210 |   domain_randomization:
211 |     randomize_friction: False 
212 |     friction_lower_limit: 0.6
213 |     friction_upper_limit: 1.2 
214 |     
215 |     randomize_object_mass: False  
216 |     mass_lower_limit: 0.8
217 |     mass_upper_limit: 1.2 
218 |     
219 |     randomize_object_com: False
220 |     com_lower_limit: -0.05
221 |     com_upper_limit: 0.05
222 | 
223 |     randomize_table_position: False  
224 |     table_y_lower: 0.45
225 |     table_y_upper: 0.55
226 |     table_z_lower: 0.01 
227 |     table_z_upper: 0.05
228 | 
229 |     randomize_table_friction: False
230 |     table_friction_lower_limit: 0.6
231 |     table_friction_upper_limit: 1.2
232 | 
233 | 
234 | sim:
235 |   substeps: 2
236 |   dt: 0.00833 #  1/120
237 |   up_axis: "z"
238 |   use_gpu_pipeline: True #${eq:${...pipeline},"gpu"}
239 |   num_client_threads: 8
240 | 
241 |   gravity: [0.0, 0.0, -9.81]
242 |   physx:
243 |     num_threads: 6
244 |     solver_type: 1  # 0: pgs, 1: tgs
245 |     num_position_iterations: 8
246 |     num_velocity_iterations: 0
247 | 
248 |     max_gpu_contact_pairs: 8388608 # 8*1024*1024
249 |     num_subscenes: ${....num_subscenes}
250 |     contact_offset: 0.002
251 |     rest_offset: 0.0
252 |     bounce_threshold_velocity: 0.2
253 |     max_depenetration_velocity: 1000.0
254 |     default_buffer_size_multiplier: 25.0
255 |     contact_collection: 1 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!)
256 | 


--------------------------------------------------------------------------------
/cfg/train/AllegroXarmCabinetPPO.yaml:
--------------------------------------------------------------------------------
 1 | seed: ${..seed}
 2 | algo: PPO
 3 | network:
 4 |   mlp:
 5 |     units: [512, 256, 128]
 6 |   priv_mlp:
 7 |     units: [256, 128, 8]
 8 | 
 9 |   pc_mlp:
10 |     out_dim: 64
11 |     units: [64,64]
12 | 
13 | load_path: ${..checkpoint} # path to the checkpoint to load
14 | 
15 | ppo:
16 |   output_name: 'debug'
17 |   normalize_input: True
18 |   normalize_value: True
19 |   normalize_pc: False
20 |   normalize_proprio_hist: False
21 |   value_bootstrap: True
22 |   num_actors: ${...task.env.numEnvs}
23 |   num_gradient_steps: ${...train.ppo.horizon_length}
24 |   normalize_advantage: True
25 |   gamma: 0.99
26 |   tau: 0.95
27 |   initEpsArm: 1.0
28 |   initEpsHand: 1.0
29 |   value_grads_to_pointnet: True
30 |   point_cloud_input_to_value: False
31 |   learning_rate: 1e-4
32 |   kl_threshold: 0.02
33 |   min_lr: 1e-6
34 |   max_lr: 1e-4
35 |   # PPO batch collection
36 |   horizon_length: 10
37 |   minibatch_size: 32768
38 |   mini_epochs: 1
39 |   # PPO loss setting
40 |   clip_value: True
41 |   critic_coef: 4
42 |   entropy_coef: 0.0
43 |   e_clip: 0.2
44 |   bounds_loss_coef: 0.0001
45 |   # grad clipping
46 |   truncate_grads: True
47 |   grad_norm: 1.0
48 |   # snapshot setting
49 |   save_best_after: 0
50 |   save_frequency: 1250
51 |   max_agent_steps: 5000000000
52 |   critic_warmup_steps: -1
53 |   # hora setting
54 |   priv_info: False
55 |   priv_info_dim: 9
56 |   priv_info_embed_dim: 8
57 |   proprio_adapt: False
58 |   useMemoryEfficientBuffer: False
59 |   dapg:
60 |     l1: 0.1 
61 |     l2: 0.999
62 |     dapg_threshold: 0.002
63 | 
64 | wandb:
65 |   activate: True
66 |   entity: himanshu_singh
67 |   project: grasping
68 | 


--------------------------------------------------------------------------------
/cfg/train/AllegroXarmNewPPO.yaml:
--------------------------------------------------------------------------------
  1 | # params:
  2 | #   seed: ${...seed}
  3 | 
  4 | #   algo:
  5 | #     name: a2c_continuous
  6 | 
  7 | #   model:
  8 | #     name: continuous_a2c_logstd
  9 | 
 10 | #   network:
 11 | #     name: a2c_pointnet
 12 | #     separate: False
 13 | 
 14 | #     space:
 15 | #       continuous:
 16 | #         mu_activation: None
 17 | #         sigma_activation: None
 18 | #         mu_init:
 19 | #           name: default
 20 | #         sigma_init:
 21 | #           name: const_initializer
 22 | #           val: 0
 23 | #         fixed_sigma: True
 24 | 
 25 | #     mlp:
 26 | #       units: [1024, 1024, 512, 512]
 27 | #       activation: elu
 28 | #       d2rl: False
 29 | #       initializer:
 30 | #         name: default
 31 | #       regularizer:
 32 | #         name: None
 33 | 
 34 | #   load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
 35 | #   load_path: ${...checkpoint} # path to the checkpoint to load
 36 | 
 37 | #   config:
 38 | #     name: ${resolve_default:AllegroKukaPPO,${....experiment}}
 39 | # #    full_experiment_name: ${.name}
 40 | #     env_name: rlgpu
 41 | #     multi_gpu: ${....multi_gpu}
 42 | #     ppo: True
 43 | #     mixed_precision: True
 44 | #     normalize_input: True
 45 | #     normalize_value: True
 46 | #     normalize_advantage: True
 47 | #     reward_shaper:
 48 | #       scale_value: 0.01
 49 | 
 50 | #     num_actors: ${....task.env.numEnvs}   
 51 | #     gamma: 0.99
 52 | #     tau: 0.95
 53 | #     learning_rate: 1e-4
 54 | #     lr_schedule: adaptive
 55 | #     schedule_type: standard
 56 | #     kl_threshold: 0.016
 57 | #     score_to_win: 1000000
 58 | #     max_epochs: 100000
 59 | #     max_frames: 10_000_000_000
 60 | #     save_best_after: 100
 61 | #     save_frequency: 5000
 62 | #     print_stats: True
 63 | #     grad_norm: 1.0
 64 | #     entropy_coef: 0.0
 65 | #     truncate_grads: True
 66 | #     e_clip: 0.1
 67 | #     minibatch_size: 8192
 68 | #     mini_epochs: 4
 69 | #     critic_coef: 4.0
 70 | #     clip_value: True
 71 | #     horizon_length: 16
 72 | #     seq_length: 16
 73 | 
 74 | #     # SampleFactory currently gives better results without bounds loss but I don't think this loss matters too much
 75 | #     # bounds_loss_coef: 0.0
 76 | #     bounds_loss_coef: 0.0001
 77 | 
 78 | #     # optimize summaries to prevent tf.event files from growing to gigabytes
 79 | #     defer_summaries_sec: 5
 80 | #     summaries_interval_sec_min: 5
 81 | #     summaries_interval_sec_max: 300
 82 | 
 83 | #     player:
 84 | #       #render: True
 85 | #       deterministic: False  # be careful there's a typo in older versions of rl_games in this parameter name ("determenistic")
 86 | #       games_num: 100000
 87 | #       print_stats: False
 88 | seed: ${..seed}
 89 | algo: PPOTransformer
 90 | network:
 91 |   mlp:
 92 |     units: [512, 256, 128]
 93 |   priv_mlp:
 94 |     units: [256, 128, 8]
 95 | 
 96 |   pc_mlp:
 97 |     out_dim: 64
 98 |     units: [64,64]
 99 | 
100 | load_path: ${..checkpoint} # path to the checkpoint to load
101 | 
102 | ppo:
103 |   output_name: 'debug'
104 |   normalize_input: True
105 |   normalize_value: True
106 |   normalize_pc: False
107 |   normalize_proprio_hist: False
108 |   value_bootstrap: True
109 |   num_actors: ${...task.env.numEnvs}
110 |   num_gradient_steps: ${...train.ppo.horizon_length}
111 |   normalize_advantage: True
112 |   gamma: 0.99
113 |   tau: 0.95
114 |   initEpsArm: 1.0
115 |   initEpsHand: 1.0
116 |   value_grads_to_pointnet: True
117 |   point_cloud_input_to_value: True
118 |   learning_rate: 1e-4
119 |   kl_threshold: 0.02
120 |   min_lr: 1e-6
121 |   max_lr: 1e-4
122 |   # PPO batch collection
123 |   horizon_length: 10
124 |   minibatch_size: 4096
125 |   mini_epochs: 1
126 |   # PPO loss setting
127 |   clip_value: True
128 |   critic_coef: 4
129 |   entropy_coef: 0.0
130 |   e_clip: 0.2
131 |   bounds_loss_coef: 0.0001
132 |   # grad clipping
133 |   truncate_grads: True
134 |   grad_norm: 1.0
135 |   # snapshot setting
136 |   save_best_after: 0
137 |   save_frequency: 1250
138 |   max_agent_steps: 5000000000
139 |   critic_warmup_steps: -1
140 |   # hora setting
141 |   priv_info: False
142 |   priv_info_dim: 9
143 |   priv_info_embed_dim: 8
144 |   proprio_adapt: False
145 |   useMemoryEfficientBuffer: False
146 |   dapg:
147 |     l1: 0.1 
148 |     l2: 0.999
149 |     dapg_threshold: 0.002
150 | 
151 | wandb:
152 |   activate: True
153 |   entity: himanshu_singh
154 |   project: grasping
155 | 


--------------------------------------------------------------------------------
/cfg/train/AllegroXarmThrowingPPO.yaml:
--------------------------------------------------------------------------------
  1 | # params:
  2 | #   seed: ${...seed}
  3 | 
  4 | #   algo:
  5 | #     name: a2c_continuous
  6 | 
  7 | #   model:
  8 | #     name: continuous_a2c_logstd
  9 | 
 10 | #   network:
 11 | #     name: a2c_pointnet
 12 | #     separate: False
 13 | 
 14 | #     space:
 15 | #       continuous:
 16 | #         mu_activation: None
 17 | #         sigma_activation: None
 18 | #         mu_init:
 19 | #           name: default
 20 | #         sigma_init:
 21 | #           name: const_initializer
 22 | #           val: 0
 23 | #         fixed_sigma: True
 24 | 
 25 | #     mlp:
 26 | #       units: [1024, 1024, 512, 512]
 27 | #       activation: elu
 28 | #       d2rl: False
 29 | #       initializer:
 30 | #         name: default
 31 | #       regularizer:
 32 | #         name: None
 33 | 
 34 | #   load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
 35 | #   load_path: ${...checkpoint} # path to the checkpoint to load
 36 | 
 37 | #   config:
 38 | #     name: ${resolve_default:AllegroKukaPPO,${....experiment}}
 39 | # #    full_experiment_name: ${.name}
 40 | #     env_name: rlgpu
 41 | #     multi_gpu: ${....multi_gpu}
 42 | #     ppo: True
 43 | #     mixed_precision: True
 44 | #     normalize_input: True
 45 | #     normalize_value: True
 46 | #     normalize_advantage: True
 47 | #     reward_shaper:
 48 | #       scale_value: 0.01
 49 | 
 50 | #     num_actors: ${....task.env.numEnvs}   
 51 | #     gamma: 0.99
 52 | #     tau: 0.95
 53 | #     learning_rate: 1e-4
 54 | #     lr_schedule: adaptive
 55 | #     schedule_type: standard
 56 | #     kl_threshold: 0.016
 57 | #     score_to_win: 1000000
 58 | #     max_epochs: 100000
 59 | #     max_frames: 10_000_000_000
 60 | #     save_best_after: 100
 61 | #     save_frequency: 5000
 62 | #     print_stats: True
 63 | #     grad_norm: 1.0
 64 | #     entropy_coef: 0.0
 65 | #     truncate_grads: True
 66 | #     e_clip: 0.1
 67 | #     minibatch_size: 8192
 68 | #     mini_epochs: 4
 69 | #     critic_coef: 4.0
 70 | #     clip_value: True
 71 | #     horizon_length: 16
 72 | #     seq_length: 16
 73 | 
 74 | #     # SampleFactory currently gives better results without bounds loss but I don't think this loss matters too much
 75 | #     # bounds_loss_coef: 0.0
 76 | #     bounds_loss_coef: 0.0001
 77 | 
 78 | #     # optimize summaries to prevent tf.event files from growing to gigabytes
 79 | #     defer_summaries_sec: 5
 80 | #     summaries_interval_sec_min: 5
 81 | #     summaries_interval_sec_max: 300
 82 | 
 83 | #     player:
 84 | #       #render: True
 85 | #       deterministic: False  # be careful there's a typo in older versions of rl_games in this parameter name ("determenistic")
 86 | #       games_num: 100000
 87 | #       print_stats: False
 88 | seed: ${..seed}
 89 | algo: PPO
 90 | network:
 91 |   mlp:
 92 |     units: [512, 256, 128]
 93 |   priv_mlp:
 94 |     units: [256, 128, 8]
 95 | 
 96 |   pc_mlp:
 97 |     out_dim: 64
 98 |     units: [64,64]
 99 | 
100 | load_path: ${..checkpoint} # path to the checkpoint to load
101 | 
102 | ppo:
103 |   output_name: 'debug'
104 |   normalize_input: True
105 |   normalize_value: True
106 |   normalize_pc: False
107 |   normalize_proprio_hist: False
108 |   value_bootstrap: True
109 |   num_actors: ${...task.env.numEnvs}
110 |   num_gradient_steps: ${...train.ppo.horizon_length}
111 |   normalize_advantage: True
112 |   gamma: 0.99
113 |   tau: 0.95
114 |   initEpsArm: 1.0
115 |   initEpsHand: 1.0
116 |   value_grads_to_pointnet: True
117 |   point_cloud_input_to_value: True
118 |   learning_rate: 1e-4
119 |   kl_threshold: 0.02
120 |   min_lr: 1e-6
121 |   max_lr: 1e-4
122 |   # PPO batch collection
123 |   horizon_length: 10
124 |   minibatch_size: 32768
125 |   mini_epochs: 1
126 |   # PPO loss setting
127 |   clip_value: True
128 |   critic_coef: 4
129 |   entropy_coef: 0.0
130 |   e_clip: 0.2
131 |   bounds_loss_coef: 0.0001
132 |   # grad clipping
133 |   truncate_grads: True
134 |   grad_norm: 1.0
135 |   # snapshot setting
136 |   save_best_after: 0
137 |   save_frequency: 1250
138 |   max_agent_steps: 5000000000
139 |   critic_warmup_steps: -1
140 |   # hora setting
141 |   priv_info: False
142 |   priv_info_dim: 9
143 |   priv_info_embed_dim: 8
144 |   proprio_adapt: False
145 |   useMemoryEfficientBuffer: False
146 |   dapg:
147 |     l1: 0.1 
148 |     l2: 0.999
149 |     dapg_threshold: 0.002
150 | 
151 | wandb:
152 |   activate: True
153 |   entity: himanshu_singh
154 |   project: grasping
155 | 


--------------------------------------------------------------------------------
/env.yml:
--------------------------------------------------------------------------------
  1 | name: rlgpu
  2 | channels:
  3 |   - pytorch3d
  4 |   - pytorch
  5 |   - conda-forge
  6 |   - defaults
  7 | dependencies:
  8 |   - _libgcc_mutex=0.1=conda_forge
  9 |   - _openmp_mutex=4.5=2_kmp_llvm
 10 |   - absl-py=2.1.0=pyhd8ed1ab_0
 11 |   - aiohttp=3.7.4.post0=py37h5e8e339_1
 12 |   - antlr-python-runtime=4.9.3=pyhd8ed1ab_1
 13 |   - appdirs=1.4.4=pyhd3eb1b0_0
 14 |   - async-timeout=3.0.1=py_1000
 15 |   - attrs=23.2.0=pyh71513ae_0
 16 |   - backcall=0.2.0=pyh9f0ad1d_0
 17 |   - backports=1.0=pyhd8ed1ab_3
 18 |   - backports.functools_lru_cache=2.0.0=pyhd8ed1ab_0
 19 |   - blas=1.0=mkl
 20 |   - blas-devel=3.9.0=16_linux64_mkl
 21 |   - blinker=1.6.3=pyhd8ed1ab_0
 22 |   - brotli=1.0.9=h5eee18b_7
 23 |   - brotli-bin=1.0.9=h5eee18b_7
 24 |   - brotli-python=1.0.9=py37hd23a5d3_7
 25 |   - bzip2=1.0.8=hd590300_5
 26 |   - c-ares=1.28.1=hd590300_0
 27 |   - ca-certificates=2024.7.2=h06a4308_0
 28 |   - cachetools=5.3.3=pyhd8ed1ab_0
 29 |   - certifi=2024.2.2=pyhd8ed1ab_0
 30 |   - cffi=1.15.1=py37h43b0acd_1
 31 |   - chardet=4.0.0=py37h89c1867_3
 32 |   - charset-normalizer=3.3.2=pyhd8ed1ab_0
 33 |   - click=8.1.3=py37h89c1867_0
 34 |   - cloudpickle=2.0.0=pyhd3eb1b0_0
 35 |   - cryptography=38.0.2=py37h5994e8b_1
 36 |   - cudatoolkit=11.1.1=hb139c0e_13
 37 |   - cycler=0.11.0=pyhd3eb1b0_0
 38 |   - dataclasses=0.8=pyh6d0b6a4_7
 39 |   - dbus=1.13.18=hb2f20db_0
 40 |   - debugpy=1.6.3=py37hd23a5d3_0
 41 |   - docker-pycreds=0.4.0=pyhd3eb1b0_0
 42 |   - einops=0.6.1=pyhd8ed1ab_0
 43 |   - entrypoints=0.4=pyhd8ed1ab_0
 44 |   - expat=2.5.0=h6a678d5_0
 45 |   - filelock=3.9.0=py37h06a4308_0
 46 |   - fontconfig=2.14.1=h52c9d5c_1
 47 |   - fonttools=4.25.0=pyhd3eb1b0_0
 48 |   - freetype=2.12.1=h267a509_2
 49 |   - fvcore=0.1.5.post20221221=pyhd8ed1ab_0
 50 |   - giflib=5.2.1=h5eee18b_3
 51 |   - gitdb=4.0.7=pyhd3eb1b0_0
 52 |   - gitpython=3.1.30=py37h06a4308_0
 53 |   - glib=2.78.4=h6a678d5_0
 54 |   - glib-tools=2.78.4=h6a678d5_0
 55 |   - gmp=6.3.0=h59595ed_1
 56 |   - gnutls=3.6.13=h85f3911_1
 57 |   - google-auth=2.23.0=pyh1a96a4e_0
 58 |   - google-auth-oauthlib=0.4.6=pyhd8ed1ab_0
 59 |   - grpc-cpp=1.48.1=h30feacc_1
 60 |   - grpcio=1.48.1=py37he7b19e7_1
 61 |   - gst-plugins-base=1.14.1=h6a678d5_1
 62 |   - gstreamer=1.14.1=h5eee18b_1
 63 |   - hydra-core=1.3.2=pyhd8ed1ab_0
 64 |   - icu=58.2=he6710b0_3
 65 |   - idna=3.6=pyhd8ed1ab_0
 66 |   - importlib-metadata=4.11.4=py37h89c1867_0
 67 |   - importlib_metadata=4.11.4=hd8ed1ab_0
 68 |   - importlib_resources=5.2.0=pyhd3eb1b0_1
 69 |   - intel-openmp=2022.1.0=h9e868ea_3769
 70 |   - iopath=0.1.9=pyhd8ed1ab_0
 71 |   - ipykernel=6.16.2=pyh210e3f2_0
 72 |   - ipython=7.33.0=py37h89c1867_0
 73 |   - jedi=0.19.1=pyhd8ed1ab_0
 74 |   - joblib=1.1.1=py37h06a4308_0
 75 |   - jpeg=9b=h024ee3a_2
 76 |   - jupyter_client=7.4.9=pyhd8ed1ab_0
 77 |   - jupyter_core=4.11.1=py37h89c1867_0
 78 |   - kiwisolver=1.4.4=py37h6a678d5_0
 79 |   - lame=3.100=h166bdaf_1003
 80 |   - lcms2=2.12=h3be6417_0
 81 |   - ld_impl_linux-64=2.40=h41732ed_0
 82 |   - libabseil=20220623.0=cxx17_h05df665_6
 83 |   - libblas=3.9.0=16_linux64_mkl
 84 |   - libbrotlicommon=1.0.9=h5eee18b_7
 85 |   - libbrotlidec=1.0.9=h5eee18b_7
 86 |   - libbrotlienc=1.0.9=h5eee18b_7
 87 |   - libcblas=3.9.0=16_linux64_mkl
 88 |   - libffi=3.4.2=h7f98852_5
 89 |   - libgcc-ng=13.2.0=h807b86a_5
 90 |   - libgfortran-ng=13.2.0=h69a702a_5
 91 |   - libgfortran5=13.2.0=ha4646dd_5
 92 |   - libglib=2.78.4=hdc74915_0
 93 |   - libhwloc=2.8.0=h32351e8_1
 94 |   - libiconv=1.17=hd590300_2
 95 |   - liblapack=3.9.0=16_linux64_mkl
 96 |   - liblapacke=3.9.0=16_linux64_mkl
 97 |   - libnsl=2.0.1=hd590300_0
 98 |   - libpng=1.6.43=h2797004_0
 99 |   - libprotobuf=3.21.8=h6239696_0
100 |   - libsodium=1.0.18=h36c2ea0_1
101 |   - libsqlite=3.45.2=h2797004_0
102 |   - libstdcxx-ng=13.2.0=h7e041cc_5
103 |   - libtiff=4.2.0=h85742a9_0
104 |   - libuuid=1.41.5=h5eee18b_0
105 |   - libuv=1.48.0=hd590300_0
106 |   - libwebp=1.2.0=h89dd481_0
107 |   - libwebp-base=1.2.0=h27cfd23_0
108 |   - libxcb=1.15=h7f8727e_0
109 |   - libxml2=2.9.14=h74e7548_0
110 |   - libzlib=1.2.13=hd590300_5
111 |   - llvm-openmp=14.0.6=h9e868ea_0
112 |   - lz4-c=1.9.4=h6a678d5_0
113 |   - markdown=3.6=pyhd8ed1ab_0
114 |   - markupsafe=2.1.1=py37h540881e_1
115 |   - matplotlib=3.5.3=py37h06a4308_0
116 |   - matplotlib-base=3.5.3=py37hf590b9c_0
117 |   - matplotlib-inline=0.1.7=pyhd8ed1ab_0
118 |   - mkl=2022.1.0=hc2b9512_224
119 |   - mkl-devel=2022.1.0=h66538d2_224
120 |   - mkl-include=2022.1.0=h06a4308_224
121 |   - multidict=6.0.2=py37h540881e_1
122 |   - munkres=1.1.4=py_0
123 |   - ncurses=6.4.20240210=h59595ed_0
124 |   - nest-asyncio=1.6.0=pyhd8ed1ab_0
125 |   - nettle=3.6=he412f7d_0
126 |   - numpy=1.21.6=py37h976b520_0
127 |   - oauthlib=3.2.2=pyhd8ed1ab_0
128 |   - olefile=0.47=pyhd8ed1ab_0
129 |   - omegaconf=2.3.0=pyhd8ed1ab_0
130 |   - openh264=2.1.1=h780b84a_0
131 |   - openssl=3.2.1=hd590300_1
132 |   - packaging=22.0=py37h06a4308_0
133 |   - parso=0.8.4=pyhd8ed1ab_0
134 |   - pathtools=0.1.2=pyhd3eb1b0_1
135 |   - pcre2=10.42=hebb0a14_0
136 |   - pexpect=4.9.0=pyhd8ed1ab_0
137 |   - pickleshare=0.7.5=py_1003
138 |   - pip=24.0=pyhd8ed1ab_0
139 |   - portalocker=2.3.0=py37h06a4308_0
140 |   - prompt-toolkit=3.0.42=pyha770c72_0
141 |   - psutil=5.9.0=py37h5eee18b_0
142 |   - ptyprocess=0.7.0=pyhd3deb0d_0
143 |   - pyasn1=0.5.1=pyhd8ed1ab_0
144 |   - pyasn1-modules=0.3.0=pyhd8ed1ab_0
145 |   - pycparser=2.21=pyhd8ed1ab_0
146 |   - pygments=2.17.2=pyhd8ed1ab_0
147 |   - pyjwt=2.8.0=pyhd8ed1ab_1
148 |   - pyopenssl=23.2.0=pyhd8ed1ab_1
149 |   - pyparsing=3.0.9=py37h06a4308_0
150 |   - pyqt=5.6.0=py37h22d08a2_6
151 |   - pysocks=1.7.1=py37h89c1867_5
152 |   - python=3.7.12=hf930737_100_cpython
153 |   - python-dateutil=2.8.2=pyhd3eb1b0_0
154 |   - python_abi=3.7=4_cp37m
155 |   - pytorch=1.8.1=py3.7_cuda11.1_cudnn8.0.5_0
156 |   - pytorch3d=0.7.0=py37_cu111_pyt181
157 |   - pyu2f=0.1.5=pyhd8ed1ab_0
158 |   - pyyaml=6.0=py37h540881e_4
159 |   - pyzmq=24.0.1=py37h0c0c2a8_0
160 |   - qt=5.6.3=h8bf5577_3
161 |   - re2=2022.06.01=h27087fc_1
162 |   - readline=8.2=h8228510_1
163 |   - regex=2022.7.9=py37h5eee18b_0
164 |   - requests=2.31.0=pyhd8ed1ab_0
165 |   - requests-oauthlib=2.0.0=pyhd8ed1ab_0
166 |   - rsa=4.9=pyhd8ed1ab_0
167 |   - ruamel=1.0=py37h06a4308_2
168 |   - ruamel.yaml=0.17.21=py37h5eee18b_0
169 |   - ruamel.yaml.clib=0.2.6=py37h5eee18b_1
170 |   - scipy=1.7.3=py37hf2a6cf1_0
171 |   - sentry-sdk=1.9.0=py37h06a4308_0
172 |   - setproctitle=1.2.2=py37h27cfd23_1004
173 |   - setuptools=69.0.3=pyhd8ed1ab_0
174 |   - sip=4.18.1=py37h295c915_2
175 |   - six=1.16.0=pyh6c4a22f_0
176 |   - smmap=4.0.0=pyhd3eb1b0_0
177 |   - sqlite=3.45.2=h2c6b66d_0
178 |   - tabulate=0.8.10=py37h06a4308_0
179 |   - tbb=2021.8.0=hdb19cb5_0
180 |   - tensorboard=2.11.2=pyhd8ed1ab_0
181 |   - tensorboard-data-server=0.6.1=py37h52d8a92_0
182 |   - tensorboard-plugin-wit=1.8.1=pyhd8ed1ab_0
183 |   - termcolor=2.1.0=py37h06a4308_0
184 |   - tk=8.6.13=noxft_h4845f30_101
185 |   - tokenizers=0.13.1=py37hfb4b0a8_0
186 |   - torchvision=0.9.1=py37_cu111
187 |   - tornado=6.2=py37h5eee18b_0
188 |   - tqdm=4.64.1=py37h06a4308_0
189 |   - traitlets=5.9.0=pyhd8ed1ab_0
190 |   - transformers=4.24.0=py37h06a4308_0
191 |   - typing-extensions=4.7.1=hd8ed1ab_0
192 |   - typing_extensions=4.7.1=pyha770c72_0
193 |   - urllib3=1.26.18=pyhd8ed1ab_0
194 |   - wcwidth=0.2.10=pyhd8ed1ab_0
195 |   - werkzeug=2.2.3=pyhd8ed1ab_0
196 |   - wheel=0.42.0=pyhd8ed1ab_0
197 |   - xlrd=2.0.1=pyhd3eb1b0_1
198 |   - xz=5.2.6=h166bdaf_0
199 |   - yacs=0.1.6=pyhd3eb1b0_1
200 |   - yaml=0.2.5=h7f98852_2
201 |   - yarl=1.7.2=py37h540881e_2
202 |   - zeromq=4.3.5=h59595ed_1
203 |   - zipp=3.15.0=pyhd8ed1ab_0
204 |   - zlib=1.2.13=hd590300_5
205 |   - zstd=1.4.9=haebb681_0
206 |   - pip:
207 |       - backports-cached-property==1.0.2
208 |       - decorator==4.4.2
209 |       - diffusers==0.21.4
210 |       - docstring-parser==0.16
211 |       - eval-type-backport==0.1.3
212 |       - ffmpeg==1.4
213 |       - freetype-py==2.4.0
214 |       - fsspec==2023.1.0
215 |       - gym==0.23.1
216 |       - gym-notices==0.0.8
217 |       - h5py==3.8.0
218 |       - huggingface-hub==0.16.4
219 |       - imageio==2.19.2
220 |       - imageio-ffmpeg==0.4.9
221 |       - jinja2==3.1.4
222 |       - lxml==5.2.2
223 |       - markdown-it-py==2.2.0
224 |       - mdurl==0.1.2
225 |       - mediapy==1.1.2
226 |       - moviepy==1.0.3
227 |       - natsort==8.4.0
228 |       - networkx==2.2
229 |       - ninja==1.11.1.1
230 |       - nltk==3.8.1
231 |       - numexpr==2.8.6
232 |       - opencv-python==4.9.0.80
233 |       - palettable==3.3.3
234 |       - pandas==1.3.5
235 |       - pillow==9.5.0
236 |       - platformdirs==4.0.0
237 |       - proglog==0.1.10
238 |       - promise==2.3
239 |       - protobuf==3.20.3
240 |       - pycollada==0.6
241 |       - pyglet==2.0.10
242 |       - pyopengl==3.1.0
243 |       - pyrender==0.1.45
244 |       - pysdf==0.1.9
245 |       - pytz==2024.1
246 |       - pyvirtualdisplay==3.0
247 |       - rich==13.7.1
248 |       - rl-games==1.6.1
249 |       - safetensors==0.4.3
250 |       - scikit-learn==1.0.2
251 |       - seaborn==0.12.2
252 |       - sentence-transformers==2.2.2
253 |       - sentencepiece==0.2.0
254 |       - shortuuid==1.0.13
255 |       - shtab==1.7.1
256 |       - tables==3.7.0
257 |       - tensorboardx==2.6.2.2
258 |       - threadpoolctl==3.1.0
259 |       - transforms3d==0.4.1
260 |       - trimesh==3.23.5
261 |       - tyro==0.8.4
262 |       - urdfpy==0.0.22
263 |       - wandb==0.17.0
264 |       - warp-lang==0.10.1
265 | prefix: /home/himanshu/anaconda3/envs/rlgpu
266 | 


--------------------------------------------------------------------------------
/imgs/approach.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/imgs/approach.png


--------------------------------------------------------------------------------
/scripts/finetune.py:
--------------------------------------------------------------------------------
  1 | import isaacgym
  2 | import os
  3 | import hydra
  4 | import datetime
  5 | from termcolor import cprint
  6 | from omegaconf import DictConfig, OmegaConf
  7 | from hydra.utils import to_absolute_path
  8 | import wandb
  9 | from algo.ppo_transformer.ppo_transformer   import PPOTransformer
 10 | from tasks import isaacgym_task_map
 11 | from utils.reformat import omegaconf_to_dict, print_dict
 12 | from utils.utils import set_np_formatting, set_seed, git_hash, git_diff_config
 13 | from utils.logger import Logger
 14 | import torch 
 15 | import torch.distributed as dist
 16 | import torch.multiprocessing as mp
 17 | 
 18 | def main(rank, world_size, config):
 19 |     
 20 |     print(config.task_name)
 21 |     if world_size > 1:
 22 |         dist.init_process_group("nccl", rank=rank, world_size=world_size)
 23 |         global_rank = rank
 24 |         seed = config.seed + global_rank
 25 |     else:
 26 |         global_rank = rank
 27 |         seed = config.seed
 28 | 
 29 |     if config.checkpoint:
 30 |         config.checkpoint = to_absolute_path(config.checkpoint)
 31 | 
 32 |     # set numpy formatting for printing only
 33 |     set_np_formatting()
 34 | 
 35 |     # sets seed. if seed is -1 will pick a random one
 36 |     _ = set_seed(seed)
 37 | 
 38 |     print(f"global_rank = {global_rank} seed = {seed}")
 39 | 
 40 |     if config.wandb_activate and not config.test and (global_rank == 0 or world_size ==1):
 41 |         wandb_logger = wandb.init(project=config.wandb_project, name=config.wandb_name, config=omegaconf_to_dict(config))
 42 |     else:
 43 |         wandb_logger=None
 44 | 
 45 |     if (global_rank == 0 or world_size == 1):
 46 |         output_dif = os.path.join('outputs', config.wandb_name)
 47 |         logger = Logger(output_dif, summary_writer=wandb_logger)
 48 |     else:
 49 |         logger = None
 50 | 
 51 |     cprint('Start Building the Environment', 'green', attrs=['bold'])
 52 | 
 53 | 
 54 |     if config.num_gpus > 1:
 55 |         rl_device = f'cuda:{global_rank}'
 56 |         sim_device = f'cuda:{global_rank}'
 57 |         graphics_id = global_rank
 58 |     else:
 59 |         rl_device = config.rl_device
 60 |         sim_device = config.sim_device
 61 |         graphics_id = config.graphics_device_id
 62 | 
 63 |     env = isaacgym_task_map[config.task_name](
 64 |         cfg=omegaconf_to_dict(config.task),
 65 |         rl_device = rl_device,
 66 |         sim_device=sim_device,
 67 |         graphics_device_id=graphics_id,
 68 |         headless=config.headless,
 69 |         virtual_screen_capture=config.capture_video,
 70 |         force_render=config.force_render,
 71 |     )
 72 | 
 73 |      #for debugging 
 74 |     if config.train.algo == 'PPOTransformer':
 75 |         if env.use_obs_as_prop:
 76 |             config.pretrain.model.proprio_dim = env.full_state_size 
 77 |         config.train.network = config.pretrain.model 
 78 |         config.task.env.stage2_hist_len = config.pretrain.model.context_length
 79 |         # Load the model to finetune
 80 | 
 81 | 
 82 |     agent = eval(config.train.algo)(env, config=config,logger=logger, rank=global_rank)
 83 | 
 84 |     if config.test:
 85 |         # agent.restore_test(config.train.load_path)
 86 |         assert config.checkpoint is not None 
 87 |         print(config.checkpoint)
 88 |         #agent.model.actor.load_state_dict(torch.load(config.checkpoint))
 89 |         agent.restore_test(config.checkpoint)
 90 |         #breakpoint()
 91 |         agent.test(name=config.wandb_name)
 92 |     else:
 93 |         if rank <= 0:
 94 |             date = str(datetime.datetime.now().strftime('%m%d%H'))
 95 |             if config.wandb_activate:
 96 |                 pid = os.getpid()
 97 |                 wandb.log({'pid': pid})
 98 |             #cprint(git_diff_config('./'),color='green',attrs=['bold'])
 99 |             #os.system(f'git diff HEAD > {output_dif}/gitdiff.patch')
100 |             #with open(os.path.join(output_dif, f'config_{date}_{git_hash()}.yaml'), 'w') as f:
101 |             #    f.write(OmegaConf.to_yaml(config))
102 | 
103 |         if config.train.load_path == '':
104 |             cprint("Train model from scratch", 'green', attrs=['bold'])
105 |             agent.train()
106 |         else:
107 |             agent.restore_train(config.train.load_path)
108 |             cprint("Loaded actor model from: " + config.train.load_path, 'green', attrs=['bold'])
109 |             agent.train()
110 |             
111 |         if config.wandb_activate and (global_rank==0 or world_size==1):
112 |             wandb.finish()
113 | 
114 | 
115 | @hydra.main(config_name='config', config_path='../cfg/')
116 | def main_multi_gpu(config: DictConfig):
117 |     if config.test:
118 |         # single gpu testing only!
119 |         config.num_gpus = 1
120 |     world_size = config.num_gpus
121 |     if world_size > 1:
122 |         mp.spawn(main,
123 |                  args=(world_size, config),
124 |                  nprocs=world_size,
125 |                  join=True)
126 |     else:
127 |         rank = 0 #config.sim_device.split(":")[1]
128 |         main(rank, 1, config)
129 |         
130 | 
131 | if __name__ == '__main__':
132 |     os.environ["MASTER_ADDR"] = "localhost"
133 |     #randomize port address
134 |     
135 |     os.environ["MASTER_PORT"] = "29435"
136 |     main_multi_gpu()
137 | 


--------------------------------------------------------------------------------
/scripts/finetune/finetune_cabinet.sh:
--------------------------------------------------------------------------------
 1 | cmd="python scripts/finetune.py   num_gpus=8 \
 2 |     checkpoint="outputs/Policy_noise01_l4h4_ctx_16_data_mix_simrob/dt_25-05-2024_07-02-31/model_step_831207.pt"\
 3 |     task=AllegroXarmCabinet \
 4 |     train.algo=PPOTransformer \
 5 |     train.ppo.initEpsHand=0.5 \
 6 |     train.ppo.initEpsArm=0.5 \
 7 |     train.ppo.value_grads_to_pointnet=False \
 8 |     train.ppo.critic_warmup_steps=200 \
 9 |     train.ppo.learning_rate=1e-5 \
10 |     wandb_activate=True  wandb_name=AllegroXarmCabinet_finetune_datamix_pretraining_initeps_0.5 \
11 |     pipeline=gpu  rl_device=cuda:0  sim_device=cuda:0 \
12 |     train.ppo.minibatch_size=512 num_envs=512 \
13 |     seed=-1"
14 | 
15 | echo $cmd
16 | eval $cmd
17 | 


--------------------------------------------------------------------------------
/scripts/finetune/finetune_grasp.sh:
--------------------------------------------------------------------------------
 1 | cmd="python scripts/finetune.py   num_gpus=4 \
 2 |     checkpoint="outputs/Policy_noise01_l4h4_ctx_16_data_mix_simrob/dt_25-05-2024_07-02-31/model_step_831207.pt"\
 3 |     task=AllegroXarmNew \
 4 |     train.algo=PPOTransformer \
 5 |     train.ppo.initEpsHand=0.1 \
 6 |     train.ppo.initEpsArm=0.1 \
 7 |     train.ppo.learning_rate=1e-5 \
 8 |     train.ppo.value_grads_to_pointnet=False \
 9 |     train.ppo.critic_warmup_steps=200 \
10 |     wandb_activate=True  wandb_name=AllegroXarmGrasping_finetune_datamix_pretraining\
11 |     pipeline=gpu  rl_device=cuda:0  sim_device=cuda:0 \
12 |     train.ppo.minibatch_size=512 num_envs=512 \
13 |     seed=-1"
14 | 
15 | echo $cmd
16 | eval $cmd
17 | #algo/pretrained/models/Policy_noise01_l4h4_ctx_16_shift0_scaled_inputs_new_setup/dt_17-04-2024_23-42-00/model_step_711071.pt 
18 | 


--------------------------------------------------------------------------------
/scripts/finetune/finetune_throw.sh:
--------------------------------------------------------------------------------
 1 | # cmd="python scripts/finetune.py   num_gpus=8 \
 2 | #     checkpoint="algo/pretrained/models/Policy_noise01_l4h4_ctx_16_data_mix_simrob/dt_25-05-2024_07-02-31/model_step_831207.pt"\
 3 | #     task=AllegroXarmThrowing \
 4 | #     train.algo=PPOTransformer \
 5 | #     train.ppo.value_grads_to_pointnet=False \
 6 | #     train.ppo.critic_warmup_steps=200 \
 7 | #     train.ppo.learning_rate=1e-5 \
 8 | #     train.ppo.initEpsHand=0.1 \
 9 | #     train.ppo.initEpsArm=0.1 \
10 | #     wandb_activate=True  wandb_name=AllegroXarmThrowing_finetune_datamix_pretraining_eps_20 \
11 | #     pipeline=gpu  rl_device=cuda:0  sim_device=cuda:0 \
12 | #     train.ppo.minibatch_size=512 num_envs=512 \
13 | #     seed=20"
14 | 
15 | # echo $cmd
16 | # eval $cmd
17 | cmd="python scripts/finetune.py   num_gpus=3 \
18 |     checkpoint="outputs/Policy_noise01_l4h4_ctx_16_data_mix_simrob/dt_25-05-2024_07-02-31/model_step_831207.pt"\
19 |     task=AllegroXarmThrowing \
20 |     train.algo=PPOTransformer \
21 |     train.ppo.value_grads_to_pointnet=False \
22 |     train.ppo.critic_warmup_steps=200 \
23 |     train.ppo.learning_rate=1e-5 \
24 |     train.ppo.initEpsHand=0.1 \
25 |     train.ppo.initEpsArm=0.1 \
26 |     wandb_activate=True  wandb_name=AllegroXarmThrowing_noobj_pretraining \
27 |     pipeline=gpu  rl_device=cuda:0  sim_device=cuda:0 \
28 |     train.ppo.minibatch_size=1365 num_envs=1365 \
29 |     seed=-1"
30 | 
31 | echo $cmd
32 | eval $cmd
33 | 


--------------------------------------------------------------------------------
/scripts/pretrain.py:
--------------------------------------------------------------------------------
  1 | import isaacgym 
  2 | from tasks import isaacgym_task_map
  3 | import torch 
  4 | from torch.utils.data import DataLoader
  5 | from omegaconf import DictConfig, OmegaConf
  6 | from termcolor import cprint
  7 | import wandb
  8 | from torch.optim import Adam, AdamW
  9 | from algo.pretrained.trainer import RobotTrainer 
 10 | import wandb 
 11 | from algo.pretrained.robot_transformer_ar import RobotTransformerAR
 12 | from algo.pretrained.robot_dataset import RobotDataset , collate_fn
 13 | import os 
 14 | from datetime import datetime 
 15 | import json 
 16 | import hydra 
 17 | from utils.reformat import omegaconf_to_dict, print_dict
 18 | from utils.utils import set_np_formatting, set_seed
 19 | from utils.logger import Logger
 20 | import random 
 21 | import numpy as np 
 22 | from torch.optim.lr_scheduler import CosineAnnealingLR 
 23 | import imageio
 24 | 
 25 | @hydra.main(config_name='config', config_path='../cfg/')
 26 | def main(config: DictConfig):
 27 | 
 28 | 
 29 |     device = config.pretrain.device  
 30 |     config.seed = set_seed(config.seed)
 31 | 
 32 |     capture_video = config.task.env.enableVideoLog
 33 | 
 34 |     if config.pretrain.wandb_activate:
 35 |         wandb.init(project="manipulation-pretraining",
 36 |                     name=config.pretrain.wandb_name,
 37 |                   config=omegaconf_to_dict(config))
 38 |         
 39 |     tmodel = RobotTransformerAR
 40 |     
 41 |     if config.pretrain.test: 
 42 | 
 43 |         model = tmodel(
 44 |             cfg=config
 45 |         )
 46 | 
 47 |         model = model.to(device)
 48 | 
 49 |         model.eval()
 50 | 
 51 |         assert config.pretrain.checkpoint != ''
 52 |            # set numpy formatting for printing only
 53 |         set_np_formatting()
 54 | 
 55 | 
 56 |         if config.pretrain.wandb_activate:
 57 |             wandb_logger = wandb.init(project=config.wandb_project, 
 58 |                                     name=config.pretrain.wandb_name,
 59 |                                     entity=config.wandb_entity, 
 60 |                                     config=omegaconf_to_dict(config),
 61 |                                     sync_tensorboard=True)
 62 |         else:
 63 |             wandb_logger=None
 64 | 
 65 |         output_dif = os.path.join('outputs', config.wandb_name)
 66 |         logger = Logger(output_dif, summary_writer=wandb_logger)
 67 | 
 68 |         cprint('Start Building the Environment', 'green', attrs=['bold'])
 69 |     
 70 |         env = isaacgym_task_map[config.task_name](
 71 |             cfg=omegaconf_to_dict(config.task),
 72 |             pretrain_cfg=omegaconf_to_dict(config.pretrain),
 73 |             rl_device = config.rl_device,
 74 |             sim_device=config.sim_device,
 75 |             graphics_device_id=config.graphics_device_id,
 76 |             headless=config.headless,
 77 |             virtual_screen_capture=config.capture_video,
 78 |             force_render=config.force_render
 79 |         )
 80 | 
 81 |         model.load_state_dict(torch.load(config.pretrain.checkpoint,map_location=device))
 82 |         
 83 |         cprint(f"Model loaded from {config.pretrain.checkpoint}", color='green', attrs=['bold'])
 84 | 
 85 |         model.run_multi_env(env, cfg=config)
 86 | 
 87 |         return 
 88 | 
 89 |     else:
 90 | 
 91 |         if config.pretrain.wandb_activate:
 92 |             wandb_logger = wandb.init(project=config.wandb_project, name=config.wandb_name,
 93 |                                        entity=config.wandb_entity, config=omegaconf_to_dict(config))
 94 |         else:
 95 |             wandb_logger=None
 96 | 
 97 |         train_dataset = RobotDataset(cfg=config, root=config.pretrain.training.root_dir)
 98 |         val_dataset = RobotDataset(cfg=config, root=config.pretrain.validation.root_dir)
 99 |         
100 |         max_ep_len = max(train_dataset.max_ep_len, val_dataset.max_ep_len)
101 | 
102 |         cprint(f"Dataloader built", color='green', attrs=['bold'])
103 | 
104 |         model = tmodel(
105 |             cfg=config,
106 |             max_ep_len=max_ep_len
107 |         )
108 | 
109 |         model = model.to(device)
110 | 
111 |         if config.pretrain.training.model_save_dir is not None:
112 |                 save_dir = config.pretrain.training.model_save_dir
113 |                 # Create the saving directory using the wandb name and the date and time
114 |                 os.makedirs(save_dir, exist_ok=True)
115 |                 #get date and time 
116 |                 now = datetime.now()
117 |                 dt_string = now.strftime("%d-%m-%Y_%H-%M-%S")
118 |                 experiment_folder = os.path.join(save_dir, f'{config.pretrain.wandb_name}', f'dt_{dt_string}')
119 |                 # create the experiment folder if not exists
120 |                 os.makedirs(experiment_folder, exist_ok=True)
121 |                 json.dump(OmegaConf.to_container(config), open(os.path.join(experiment_folder, 'config.json'), 'w'))
122 |                 logger = Logger(experiment_folder, summary_writer=wandb_logger)
123 | 
124 |         else:
125 |             save_dir = None
126 |             logger = None 
127 | 
128 |         cprint(f"Model built", color='green', attrs=['bold'])
129 | 
130 |         if config.pretrain.training.load_checkpoint:
131 |             assert os.path.exists(config.pretrain.checkpoint), f"Checkpoint {config.pretrain.checkpoint} does not exist"
132 |             model.load_state_dict(torch.load(config.pretrain.checkpoint,map_location=device))
133 |             model.train()
134 |         cprint(f"Model loaded from {config.pretrain.checkpoint}", color='green', attrs=['bold'])
135 | 
136 |         scheduler = None #CosineAnnealingLR(optimizer, T_max=10000, eta_min=1e-6)
137 |         optimizer = AdamW(model.parameters(), lr=config.pretrain.training.lr, weight_decay=config.pretrain.training.weight_decay)
138 |         loss_fn = torch.nn.L1Loss() #torch.nn.MSELoss()
139 | 
140 |         trainer = RobotTrainer(
141 |             model = model,
142 |             optimizer = optimizer, 
143 |             scheduler = scheduler,
144 |             train_dataset = train_dataset,
145 |             val_dataset = val_dataset,
146 |             collate_fn=collate_fn, 
147 |             loss_fn = loss_fn,
148 |             model_save_dir = experiment_folder,
149 |             logger = logger,
150 |             config=config
151 |         )
152 | 
153 |         if capture_video:
154 |             assert config.pretrain.wandb_activate, "Video capture requires wandb activation"
155 |             # create the environment to capture the video
156 |             env = isaacgym_task_map[config.task_name](
157 |                 cfg=omegaconf_to_dict(config.task),
158 |                 pretrain_cfg=omegaconf_to_dict(config.pretrain),
159 |                 rl_device = config.pretrain.device,
160 |                 sim_device=config.pretrain.device,
161 |                 graphics_device_id=config.graphics_device_id,
162 |                 headless=config.headless,
163 |                 virtual_screen_capture=config.capture_video,
164 |                 force_render=config.force_render
165 |             )
166 |         
167 |         for i in range(config.pretrain.training.num_epochs):
168 |             cprint("Training iteration {}".format(i), color='magenta', attrs=['bold'])
169 |             outputs = trainer.train_epoch(iter_num=i, 
170 |                                           print_logs=True)
171 |             if config.pretrain.wandb_activate:
172 |                 wandb.log(outputs, commit=True)
173 | 
174 |             
175 |             if capture_video:
176 |                 fps = int(1/(config.task.sim.dt*config.task.env.controlFrequencyInv))
177 |                 print(f"Capturing video from simulation")
178 |                 env.start_video_recording()
179 |                 info_dict = model.run_multi_env(env, cfg=config)
180 |                 video_frames = env.stop_video_recording() 
181 |                 video_path = os.path.join(experiment_folder, f'{config.pretrain.wandb_name}_video.mp4')
182 |                 video_frames = [np.array(frame.detach().cpu()).astype(np.uint8) for frame in video_frames]
183 |                 imageio.mimsave(video_path, video_frames, fps=fps)
184 |                 env.video_frames = []
185 | 
186 | 
187 | 
188 | if __name__ == '__main__':
189 |     main()
190 | 


--------------------------------------------------------------------------------
/scripts/pretrain.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | DATADIR=$1
 4 | CMD="python scripts/pretrain.py num_gpus=4  headless=True \
 5 |     track_pose=False get_target_reference=False num_envs=25 \
 6 |     pc_input=True pipeline=cuda rl_device=cuda:0 sim_device=cuda:0 \
 7 |     pretrain.training.root_dir=$DATADIR/train \
 8 |     pretrain.validation.root_dir=$DATADIR/val pretrain.wandb_activate=True \
 9 |     pretrain.wandb_name=Policy_noise01_l4h4_ctx_16_data_mix_simrob seed=-1 \
10 |     task.env.enableVideoLog=True \
11 |     task.env.episodeLength=400"
12 | 
13 | echo $CMD 
14 | eval $CMD
15 | 


--------------------------------------------------------------------------------
/scripts/run_policy.sh:
--------------------------------------------------------------------------------
 1 | POLICY=$1 #"outputs/AllegroXarmGrasping_scratch_vel_control/2024-05-29_00-49/stage1_nn/ep_41700_step_1708M_reward_1876.28.pth"
 2 | cmd="python scripts/finetune.py   num_gpus=1 \
 3 |     task=AllegroXarmNew test=True headless=False \
 4 |     checkpoint=$POLICY  \
 5 |     train.algo=PPOTransformer \
 6 |     wandb_activate=False  wandb_name=AllegroXarmGrasping_Finetuned \
 7 |     pipeline=gpu  rl_device=cuda:0  sim_device=cuda:0 \
 8 |     train.ppo.minibatch_size=16 num_envs=16 \
 9 |     task.env.episodeLength=600 \
10 |     task.env.maxConsecutiveSuccesses=1 \
11 |     pc_input=True \
12 |     seed=-1"
13 | 
14 | echo $cmd
15 | eval $cmd
16 | 


--------------------------------------------------------------------------------
/tasks/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2018-2023, NVIDIA Corporation
 2 | # All rights reserved.
 3 | #
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are met:
 6 | #
 7 | # 1. Redistributions of source code must retain the above copyright notice, this
 8 | #    list of conditions and the following disclaimer.
 9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | #    this list of conditions and the following disclaimer in the documentation
12 | #    and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | #    contributors may be used to endorse or promote products derived from
16 | #    this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 
29 | 
30 |  
31 | from tasks.xarm_grasping_new import AllegroXarmGraspingNew 
32 | from tasks.xarm_throwing import AllegroXarmThrowing
33 | from tasks.xarm_cabinet import AllegroXarmCabinet
34 | 
35 | 
36 | # Mappings from strings to environments
37 | isaacgym_task_map = {
38 |     "AllegroXarmNew": AllegroXarmGraspingNew,
39 |     "AllegroXarmThrowing": AllegroXarmThrowing,
40 |     "AllegroXarmCabinet": AllegroXarmCabinet
41 |     # "XarmReaching" : XarmReaching
42 | }
43 | 


--------------------------------------------------------------------------------
/tasks/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/tasks/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/tasks/__pycache__/allegro_kuka_grasping.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/allegro_kuka_grasping.cpython-37.pyc


--------------------------------------------------------------------------------
/tasks/__pycache__/allegro_kuka_grasping.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/allegro_kuka_grasping.cpython-38.pyc


--------------------------------------------------------------------------------
/tasks/__pycache__/torch_jit_utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/torch_jit_utils.cpython-37.pyc


--------------------------------------------------------------------------------
/tasks/__pycache__/torch_jit_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/torch_jit_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/tasks/__pycache__/xarm_cabinet.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_cabinet.cpython-37.pyc


--------------------------------------------------------------------------------
/tasks/__pycache__/xarm_cabinet.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_cabinet.cpython-38.pyc


--------------------------------------------------------------------------------
/tasks/__pycache__/xarm_grasping.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_grasping.cpython-37.pyc


--------------------------------------------------------------------------------
/tasks/__pycache__/xarm_grasping.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_grasping.cpython-38.pyc


--------------------------------------------------------------------------------
/tasks/__pycache__/xarm_grasping_debug.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_grasping_debug.cpython-37.pyc


--------------------------------------------------------------------------------
/tasks/__pycache__/xarm_grasping_debug.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_grasping_debug.cpython-38.pyc


--------------------------------------------------------------------------------
/tasks/__pycache__/xarm_grasping_new.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_grasping_new.cpython-37.pyc


--------------------------------------------------------------------------------
/tasks/__pycache__/xarm_grasping_new.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_grasping_new.cpython-38.pyc


--------------------------------------------------------------------------------
/tasks/__pycache__/xarm_grasping_real.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_grasping_real.cpython-37.pyc


--------------------------------------------------------------------------------
/tasks/__pycache__/xarm_throwing.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_throwing.cpython-37.pyc


--------------------------------------------------------------------------------
/tasks/__pycache__/xarm_throwing.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_throwing.cpython-38.pyc


--------------------------------------------------------------------------------
/tasks/base/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2018-2023, NVIDIA Corporation
 2 | # All rights reserved.
 3 | #
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are met:
 6 | #
 7 | # 1. Redistributions of source code must retain the above copyright notice, this
 8 | #    list of conditions and the following disclaimer.
 9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | #    this list of conditions and the following disclaimer in the documentation
12 | #    and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | #    contributors may be used to endorse or promote products derived from
16 | #    this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/tasks/base/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/base/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/tasks/base/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/base/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/tasks/base/__pycache__/vec_task.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/base/__pycache__/vec_task.cpython-37.pyc


--------------------------------------------------------------------------------
/tasks/base/__pycache__/vec_task.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/base/__pycache__/vec_task.cpython-38.pyc


--------------------------------------------------------------------------------
/tasks/xarm7_utils.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Copyright (c) 2018-2023, NVIDIA Corporation
  3 | # All rights reserved.
  4 | #
  5 | # Redistribution and use in source and binary forms, with or without
  6 | # modification, are permitted provided that the following conditions are met:
  7 | #
  8 | # 1. Redistributions of source code must retain the above copyright notice, this
  9 | #    list of conditions and the following disclaimer.
 10 | #
 11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 12 | #    this list of conditions and the following disclaimer in the documentation
 13 | #    and/or other materials provided with the distribution.
 14 | #
 15 | # 3. Neither the name of the copyright holder nor the names of its
 16 | #    contributors may be used to endorse or promote products derived from
 17 | #    this software without specific prior written permission.
 18 | #
 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 | 
 30 | from __future__ import annotations
 31 | 
 32 | from dataclasses import dataclass
 33 | from typing import Tuple, Dict, List
 34 | 
 35 | from torch import Tensor
 36 | 
 37 | 
 38 | @dataclass
 39 | class DofParameters:
 40 |     """Joint/dof parameters."""
 41 |     xarm_stiffness: float
 42 |     xarm_effort: List[float]  # separate per DOF
 43 |     xarm_damping: float
 44 |     xarm_velocity: float
 45 |     xarm_armature: float
 46 |     dof_friction: float
 47 | 
 48 |     @staticmethod
 49 |     def from_cfg(cfg: Dict) -> DofParameters:
 50 |         return DofParameters(
 51 |             xarm_stiffness=cfg["env"]["kukaStiffness"],
 52 |             xarm_effort=cfg["env"]["kukaEffort"],
 53 |             xarm_damping=cfg["env"]["kukaDamping"],
 54 |             xarm_velocity=cfg["env"]["kukaVelocity"],
 55 |             xarm_armature=cfg["env"]["kukaArmature"],
 56 |             dof_friction=cfg["env"]["dofFriction"],
 57 |         )
 58 | 
 59 | 
 60 | def populate_dof_properties(arm_dof_props, params: DofParameters, arm_dofs: int) -> None:
 61 |     assert len(arm_dof_props["stiffness"]) == arm_dofs
 62 | 
 63 |     arm_dof_props["stiffness"].fill(params.xarm_stiffness)
 64 | 
 65 |     assert len(params.xarm_effort) == arm_dofs
 66 |     arm_dof_props["effort"] = params.xarm_effort
 67 |     arm_dof_props["velocity"] = params.xarm_velocity
 68 |     arm_dof_props["damping"].fill(params.xarm_damping)
 69 | 
 70 |     if params.dof_friction >= 0:
 71 |         arm_dof_props["friction"].fill(params.dof_friction)
 72 | 
 73 |     arm_dof_props["armature"].fill(params.xarm_armature)
 74 | 
 75 | def tolerance_curriculum(
 76 |     last_curriculum_update: int,
 77 |     frames_since_restart: int,
 78 |     curriculum_interval: int,
 79 |     prev_episode_successes: Tensor,
 80 |     success_tolerance: float,
 81 |     initial_tolerance: float,
 82 |     target_tolerance: float,
 83 |     tolerance_curriculum_increment: float,
 84 | ) -> Tuple[float, int]:
 85 |     """
 86 |     Returns: new tolerance, new last_curriculum_update
 87 |     """
 88 |     if frames_since_restart - last_curriculum_update < curriculum_interval:
 89 |         return success_tolerance, last_curriculum_update
 90 | 
 91 |     mean_successes_per_episode = prev_episode_successes.mean()
 92 |     if mean_successes_per_episode < 3.0:
 93 |         # this policy is not good enough with the previous tolerance value, keep training for now...
 94 |         return success_tolerance, last_curriculum_update
 95 | 
 96 |     # decrease the tolerance now
 97 |     success_tolerance *= tolerance_curriculum_increment
 98 |     success_tolerance = min(success_tolerance, initial_tolerance)
 99 |     success_tolerance = max(success_tolerance, target_tolerance)
100 | 
101 |     print(f"Prev episode successes: {mean_successes_per_episode}, success tolerance: {success_tolerance}")
102 | 
103 |     last_curriculum_update = frames_since_restart
104 |     return success_tolerance, last_curriculum_update
105 | 
106 | 
107 | def interp_0_1(x_curr: float, x_initial: float, x_target: float) -> float:
108 |     """
109 |     Outputs 1 when x_curr == x_target (curriculum completed)
110 |     Outputs 0 when x_curr == x_initial (just started training)
111 |     Interpolates value in between.
112 |     """
113 |     span = x_initial - x_target
114 |     return (x_initial - x_curr) / span
115 | 
116 | 
117 | def tolerance_successes_objective(
118 |     success_tolerance: float, initial_tolerance: float, target_tolerance: float, successes: Tensor
119 | ) -> Tensor:
120 |     """
121 |     Objective for the PBT. This basically prioritizes tolerance over everything else when we
122 |     execute the curriculum, after that it's just #successes.
123 |     """
124 |     # this grows from 0 to 1 as we reach the target tolerance
125 |     if initial_tolerance > target_tolerance:
126 |         # makeshift unit tests:
127 |         eps = 1e-5
128 |         assert abs(interp_0_1(initial_tolerance, initial_tolerance, target_tolerance)) < eps
129 |         assert abs(interp_0_1(target_tolerance, initial_tolerance, target_tolerance) - 1.0) < eps
130 |         mid_tolerance = (initial_tolerance + target_tolerance) / 2
131 |         assert abs(interp_0_1(mid_tolerance, initial_tolerance, target_tolerance) - 0.5) < eps
132 | 
133 |         tolerance_objective = interp_0_1(success_tolerance, initial_tolerance, target_tolerance)
134 |     else:
135 |         tolerance_objective = 1.0
136 | 
137 |     if success_tolerance > target_tolerance:
138 |         # add succeses with a small coefficient to differentiate between policies at the beginning of training
139 |         # increment in tolerance improvement should always give higher value than higher successes with the
140 |         # previous tolerance, that's why this coefficient is very small
141 |         true_objective = (successes * 0.01) + tolerance_objective
142 |     else:
143 |         # basically just the successes + tolerance objective so that true_objective never decreases when we cross
144 |         # the threshold
145 |         true_objective = successes + tolerance_objective
146 | 
147 |     return true_objective
148 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__init__.py


--------------------------------------------------------------------------------
/utils/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/allegro_kuka_utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/allegro_kuka_utils.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/allegro_kuka_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/allegro_kuka_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/hand_arm_utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/hand_arm_utils.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/hand_arm_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/hand_arm_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/logger.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/logger.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/logger.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/logger.cpython-38.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/misc.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/misc.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/misc.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/misc.cpython-38.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/pytorch_utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/pytorch_utils.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/pytorch_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/pytorch_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/randomization_utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/randomization_utils.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/randomization_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/randomization_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/reformat.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/reformat.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/reformat.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/reformat.cpython-38.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/torch_jit_utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/torch_jit_utils.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/urdf_utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/urdf_utils.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/urdf_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/urdf_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/utils.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/utils.cpython-38.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/warmup_scheduler.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/warmup_scheduler.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/warmup_scheduler.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/warmup_scheduler.cpython-38.pyc


--------------------------------------------------------------------------------
/utils/allegro_kuka_utils.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Copyright (c) 2018-2023, NVIDIA Corporation
  3 | # All rights reserved.
  4 | #
  5 | # Redistribution and use in source and binary forms, with or without
  6 | # modification, are permitted provided that the following conditions are met:
  7 | #
  8 | # 1. Redistributions of source code must retain the above copyright notice, this
  9 | #    list of conditions and the following disclaimer.
 10 | #
 11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 12 | #    this list of conditions and the following disclaimer in the documentation
 13 | #    and/or other materials provided with the distribution.
 14 | #
 15 | # 3. Neither the name of the copyright holder nor the names of its
 16 | #    contributors may be used to endorse or promote products derived from
 17 | #    this software without specific prior written permission.
 18 | #
 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 | 
 30 | from __future__ import annotations
 31 | 
 32 | from dataclasses import dataclass
 33 | from typing import Tuple, Dict, List
 34 | 
 35 | from torch import Tensor
 36 | 
 37 | 
 38 | @dataclass
 39 | class DofParameters:
 40 |     """Joint/dof parameters."""
 41 |     allegro_stiffness: float
 42 |     kuka_stiffness: float
 43 |     allegro_effort: float
 44 |     allegro_velocity: float
 45 |     kuka_effort: List[float]  # separate per DOF
 46 |     allegro_damping: float
 47 |     kuka_damping: float
 48 |     kuka_velocity: float
 49 |     dof_friction: float
 50 |     allegro_armature: float
 51 |     kuka_armature: float
 52 | 
 53 |     @staticmethod
 54 |     def from_cfg(cfg: Dict) -> DofParameters:
 55 |         return DofParameters(
 56 |             allegro_stiffness=cfg["env"]["allegroStiffness"],
 57 |             kuka_stiffness=cfg["env"]["kukaStiffness"],
 58 |             allegro_effort=cfg["env"]["allegroEffort"],
 59 |             allegro_velocity=cfg["env"]["allegroVelocity"],
 60 |             kuka_effort=cfg["env"]["kukaEffort"],
 61 |             allegro_damping=cfg["env"]["allegroDamping"],
 62 |             kuka_damping=cfg["env"]["kukaDamping"],
 63 |             kuka_velocity=cfg["env"]["kukaVelocity"],
 64 |             dof_friction=cfg["env"]["dofFriction"],
 65 |             allegro_armature=cfg["env"]["allegroArmature"],
 66 |             kuka_armature=cfg["env"]["kukaArmature"],
 67 |         )
 68 | 
 69 | 
 70 | def populate_dof_properties(hand_arm_dof_props, params: DofParameters, arm_dofs: int, hand_dofs: int) -> None:
 71 |     assert len(hand_arm_dof_props["stiffness"]) == arm_dofs + hand_dofs
 72 | 
 73 |     hand_arm_dof_props["stiffness"][0:arm_dofs].fill(params.kuka_stiffness)
 74 |     hand_arm_dof_props["stiffness"][arm_dofs:].fill(params.allegro_stiffness)
 75 | 
 76 |     assert len(params.kuka_effort) == arm_dofs
 77 |     hand_arm_dof_props["effort"][0:arm_dofs] = params.kuka_effort
 78 |     hand_arm_dof_props["effort"][arm_dofs:].fill(params.allegro_effort)
 79 | 
 80 |     hand_arm_dof_props["velocity"][0:arm_dofs] = params.kuka_velocity
 81 |     hand_arm_dof_props["velocity"][arm_dofs:].fill(params.allegro_velocity)
 82 | 
 83 |     hand_arm_dof_props["damping"][0:arm_dofs].fill(params.kuka_damping)
 84 |     hand_arm_dof_props["damping"][arm_dofs:].fill(params.allegro_damping)
 85 | 
 86 |     if params.dof_friction >= 0:
 87 |         hand_arm_dof_props["friction"].fill(params.dof_friction)
 88 | 
 89 |     hand_arm_dof_props["armature"][0:arm_dofs].fill(params.kuka_armature)
 90 |     hand_arm_dof_props["armature"][arm_dofs:].fill(params.allegro_armature)
 91 | 
 92 | 
 93 | def tolerance_curriculum(
 94 |     last_curriculum_update: int,
 95 |     frames_since_restart: int,
 96 |     curriculum_interval: int,
 97 |     prev_episode_successes: Tensor,
 98 |     success_tolerance: float,
 99 |     initial_tolerance: float,
100 |     target_tolerance: float,
101 |     tolerance_curriculum_increment: float,
102 | ) -> Tuple[float, int]:
103 |     """
104 |     Returns: new tolerance, new last_curriculum_update
105 |     """
106 |     if frames_since_restart - last_curriculum_update < curriculum_interval:
107 |         return success_tolerance, last_curriculum_update
108 | 
109 |     mean_successes_per_episode = prev_episode_successes.mean()
110 |     if mean_successes_per_episode < 3.0:
111 |         # this policy is not good enough with the previous tolerance value, keep training for now...
112 |         return success_tolerance, last_curriculum_update
113 | 
114 |     # decrease the tolerance now
115 |     success_tolerance *= tolerance_curriculum_increment
116 |     success_tolerance = min(success_tolerance, initial_tolerance)
117 |     success_tolerance = max(success_tolerance, target_tolerance)
118 | 
119 |     print(f"Prev episode successes: {mean_successes_per_episode}, success tolerance: {success_tolerance}")
120 | 
121 |     last_curriculum_update = frames_since_restart
122 |     return success_tolerance, last_curriculum_update
123 | 
124 | 
125 | def interp_0_1(x_curr: float, x_initial: float, x_target: float) -> float:
126 |     """
127 |     Outputs 1 when x_curr == x_target (curriculum completed)
128 |     Outputs 0 when x_curr == x_initial (just started training)
129 |     Interpolates value in between.
130 |     """
131 |     span = x_initial - x_target
132 |     return (x_initial - x_curr) / span
133 | 
134 | 
135 | def tolerance_successes_objective(
136 |     success_tolerance: float, initial_tolerance: float, target_tolerance: float, successes: Tensor
137 | ) -> Tensor:
138 |     """
139 |     Objective for the PBT. This basically prioritizes tolerance over everything else when we
140 |     execute the curriculum, after that it's just #successes.
141 |     """
142 |     # this grows from 0 to 1 as we reach the target tolerance
143 |     if initial_tolerance > target_tolerance:
144 |         # makeshift unit tests:
145 |         eps = 1e-5
146 |         assert abs(interp_0_1(initial_tolerance, initial_tolerance, target_tolerance)) < eps
147 |         assert abs(interp_0_1(target_tolerance, initial_tolerance, target_tolerance) - 1.0) < eps
148 |         mid_tolerance = (initial_tolerance + target_tolerance) / 2
149 |         assert abs(interp_0_1(mid_tolerance, initial_tolerance, target_tolerance) - 0.5) < eps
150 | 
151 |         tolerance_objective = interp_0_1(success_tolerance, initial_tolerance, target_tolerance)
152 |     else:
153 |         tolerance_objective = 1.0
154 | 
155 |     if success_tolerance > target_tolerance:
156 |         # add succeses with a small coefficient to differentiate between policies at the beginning of training
157 |         # increment in tolerance improvement should always give higher value than higher successes with the
158 |         # previous tolerance, that's why this coefficient is very small
159 |         true_objective = (successes * 0.01) + tolerance_objective
160 |     else:
161 |         # basically just the successes + tolerance objective so that true_objective never decreases when we cross
162 |         # the threshold
163 |         true_objective = successes + tolerance_objective
164 | 
165 |     return true_objective
166 | 


--------------------------------------------------------------------------------
/utils/camera.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "width": 300,
 3 |     "height": 300,
 4 |     "fx": 267.9263610839844,
 5 |     "fy": 267.9263610839844,
 6 |     "pose": [
 7 |         0.5531,
 8 |         -0.0643,
 9 |         0.4484
10 |     ],
11 |     "R": [
12 |         [
13 |             0.0347,
14 |             0.4223,
15 |             -0.9058
16 |         ],
17 |         [
18 |             0.9993,
19 |             -0.0294,
20 |             0.0245
21 |         ],
22 |         [
23 |             -0.0163,
24 |             -0.9060,
25 |             -0.4231
26 |         ]
27 |     ]
28 | }


--------------------------------------------------------------------------------
/utils/camera2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "width": 300,
 3 |     "height": 300,
 4 |     "fx": 267.5335,
 5 |     "fy": 267.5335,
 6 |     "pose": [
 7 |         0.5545,
 8 |         -0.0563,
 9 |         0.4281
10 |     ],
11 |     "R": [
12 |         [
13 |             0.0285,
14 |             0.4060,
15 |             -0.9134
16 |         ],
17 |         [
18 |             0.9993,
19 |             -0.0355,
20 |             0.0154
21 |         ],
22 |         [
23 |             -0.0261,
24 |             -0.9132,
25 |             -0.4068
26 |         ]
27 |     ]
28 | }


--------------------------------------------------------------------------------
/utils/hand_arm_utils.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Copyright (c) 2018-2023, NVIDIA Corporation
  3 | # All rights reserved.
  4 | #
  5 | # Redistribution and use in source and binary forms, with or without
  6 | # modification, are permitted provided that the following conditions are met:
  7 | #
  8 | # 1. Redistributions of source code must retain the above copyright notice, this
  9 | #    list of conditions and the following disclaimer.
 10 | #
 11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 12 | #    this list of conditions and the following disclaimer in the documentation
 13 | #    and/or other materials provided with the distribution.
 14 | #
 15 | # 3. Neither the name of the copyright holder nor the names of its
 16 | #    contributors may be used to endorse or promote products derived from
 17 | #    this software without specific prior written permission.
 18 | #
 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 | 
 30 | from __future__ import annotations
 31 | 
 32 | from dataclasses import dataclass
 33 | from typing import Tuple, Dict, List
 34 | 
 35 | from torch import Tensor
 36 | 
 37 | 
 38 | @dataclass
 39 | class DofParameters:
 40 |     """Joint/dof parameters."""
 41 |     hand_stiffness: float
 42 |     arm_stiffness: float
 43 |     hand_effort: float
 44 |     hand_velocity: float
 45 |     arm_effort: List[float]  # separate per DOF
 46 |     hand_damping: float
 47 |     arm_damping: float
 48 |     arm_velocity: float
 49 |     dof_friction: float
 50 |     hand_armature: float
 51 |     arm_armature: float
 52 | 
 53 |     @staticmethod
 54 |     def from_cfg(cfg: Dict) -> DofParameters:
 55 |         return DofParameters(
 56 |             hand_stiffness=cfg["env"]["handStiffness"],
 57 |             arm_stiffness=cfg["env"]["armStiffness"],
 58 |             hand_effort=cfg["env"]["handEffort"],
 59 |             hand_velocity=cfg["env"]["handVelocity"],
 60 |             arm_effort=cfg["env"]["armEffort"],
 61 |             hand_damping=cfg["env"]["handDamping"],
 62 |             arm_damping=cfg["env"]["armDamping"],
 63 |             arm_velocity=cfg["env"]["armVelocity"],
 64 |             dof_friction=cfg["env"]["dofFriction"],
 65 |             hand_armature=cfg["env"]["handArmature"],
 66 |             arm_armature=cfg["env"]["armArmature"],
 67 |         )
 68 | 
 69 | 
 70 | def populate_dof_properties(hand_arm_dof_props, params: DofParameters, arm_dofs: int, hand_dofs: int) -> None:
 71 |     assert len(hand_arm_dof_props["stiffness"]) == arm_dofs + hand_dofs
 72 | 
 73 |     hand_arm_dof_props["stiffness"][0:arm_dofs].fill(params.arm_stiffness)
 74 |     hand_arm_dof_props["stiffness"][arm_dofs:].fill(params.hand_stiffness)
 75 | 
 76 |     assert len(params.arm_effort) == arm_dofs
 77 |     hand_arm_dof_props["effort"][0:arm_dofs] = params.arm_effort
 78 |     hand_arm_dof_props["effort"][arm_dofs:].fill(params.hand_effort)
 79 | 
 80 |     hand_arm_dof_props["velocity"][0:arm_dofs] = params.arm_velocity
 81 |     hand_arm_dof_props["velocity"][arm_dofs:].fill(params.hand_velocity)
 82 | 
 83 |     hand_arm_dof_props["damping"][0:arm_dofs].fill(params.arm_damping)
 84 |     hand_arm_dof_props["damping"][arm_dofs:].fill(params.hand_damping)
 85 | 
 86 |     if params.dof_friction >= 0:
 87 |         hand_arm_dof_props["friction"].fill(params.dof_friction)
 88 | 
 89 |     hand_arm_dof_props["armature"][0:arm_dofs].fill(params.arm_armature)
 90 |     hand_arm_dof_props["armature"][arm_dofs:].fill(params.hand_armature)
 91 | 
 92 | 
 93 | def tolerance_curriculum(
 94 |     last_curriculum_update: int,
 95 |     frames_since_restart: int,
 96 |     curriculum_interval: int,
 97 |     prev_episode_successes: Tensor,
 98 |     success_tolerance: float,
 99 |     initial_tolerance: float,
100 |     target_tolerance: float,
101 |     tolerance_curriculum_increment: float,
102 | ) -> Tuple[float, int]:
103 |     """
104 |     Returns: new tolerance, new last_curriculum_update
105 |     """
106 |     if frames_since_restart - last_curriculum_update < curriculum_interval:
107 |         return success_tolerance, last_curriculum_update
108 | 
109 |     mean_successes_per_episode = prev_episode_successes.mean()
110 |     if mean_successes_per_episode < 3.0:
111 |         # this policy is not good enough with the previous tolerance value, keep training for now...
112 |         return success_tolerance, last_curriculum_update
113 | 
114 |     # decrease the tolerance now
115 |     success_tolerance *= tolerance_curriculum_increment
116 |     success_tolerance = min(success_tolerance, initial_tolerance)
117 |     success_tolerance = max(success_tolerance, target_tolerance)
118 | 
119 |     print(f"Prev episode successes: {mean_successes_per_episode}, success tolerance: {success_tolerance}")
120 | 
121 |     last_curriculum_update = frames_since_restart
122 |     return success_tolerance, last_curriculum_update
123 | 
124 | 
125 | def interp_0_1(x_curr: float, x_initial: float, x_target: float) -> float:
126 |     """
127 |     Outputs 1 when x_curr == x_target (curriculum completed)
128 |     Outputs 0 when x_curr == x_initial (just started training)
129 |     Interpolates value in between.
130 |     """
131 |     span = x_initial - x_target
132 |     return (x_initial - x_curr) / span
133 | 
134 | 
135 | def tolerance_successes_objective(
136 |     success_tolerance: float, initial_tolerance: float, target_tolerance: float, successes: Tensor
137 | ) -> Tensor:
138 |     """
139 |     Objective for the PBT. This basically prioritizes tolerance over everything else when we
140 |     execute the curriculum, after that it's just #successes.
141 |     """
142 |     # this grows from 0 to 1 as we reach the target tolerance
143 |     if initial_tolerance > target_tolerance:
144 |         # makeshift unit tests:
145 |         eps = 1e-5
146 |         assert abs(interp_0_1(initial_tolerance, initial_tolerance, target_tolerance)) < eps
147 |         assert abs(interp_0_1(target_tolerance, initial_tolerance, target_tolerance) - 1.0) < eps
148 |         mid_tolerance = (initial_tolerance + target_tolerance) / 2
149 |         assert abs(interp_0_1(mid_tolerance, initial_tolerance, target_tolerance) - 0.5) < eps
150 | 
151 |         tolerance_objective = interp_0_1(success_tolerance, initial_tolerance, target_tolerance)
152 |     else:
153 |         tolerance_objective = 1.0
154 | 
155 |     if success_tolerance > target_tolerance:
156 |         # add succeses with a small coefficient to differentiate between policies at the beginning of training
157 |         # increment in tolerance improvement should always give higher value than higher successes with the
158 |         # previous tolerance, that's why this coefficient is very small
159 |         true_objective = (successes * 0.01) + tolerance_objective
160 |     else:
161 |         # basically just the successes + tolerance objective so that true_objective never decreases when we cross
162 |         # the threshold
163 |         true_objective = successes + tolerance_objective
164 | 
165 |     return true_objective
166 | 


--------------------------------------------------------------------------------
/utils/logger.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from tkinter import commondialog
  3 | import numpy as np
  4 | import wandb
  5 | from PIL import Image 
  6 | import cv2 
  7 | class Logger:
  8 |     def __init__(self, log_dir, n_logged_samples=10, summary_writer=None):
  9 |         self._log_dir = log_dir
 10 |         print('########################')
 11 |         print('logging outputs to ', log_dir)
 12 |         print('########################')
 13 |         self._n_logged_samples = n_logged_samples
 14 |         self._summ_writer = summary_writer 
 15 | 
 16 |     def flush(self):
 17 |         self._summ_writer.flush()
 18 |         return 
 19 |     
 20 |     def log_scalar(self, scalar, name, step_, commit=False):
 21 |         if self._summ_writer:
 22 |             self._summ_writer.log({'{}'.format(name): scalar}, step=step_) #, commit=commit)
 23 | 
 24 |     def log_scalars(self, scalar_dict, group_name, step, phase, commit=True):
 25 |         """Will log all scalars in the same plot."""
 26 |         if self._summ_writer:
 27 |             self._summ_writer.log({'{}/{}'.format(group_name, phase): scalar_dict}, step=step) # Not sure if this will work!
 28 |         #self._summ_writer.add_scalars('{}_{}'.format(group_name, phase), scalar_dict, step)
 29 | 
 30 |     def log_image(self, image, name, step, commit=False):
 31 |         assert(len(image.shape) == 3)  # [C, H, W]
 32 |         image = wandb.Image(image, caption=f"{name}", step=step, commit=commit)
 33 |         #self._summ_writer.add_image('{}'.format(name), image, step)
 34 | 
 35 |     # TODO: Add more logging as needed
 36 |     def log_gifs(self,imgs,name="gif",commit=False):
 37 |         
 38 |         images = [Image.fromarray(image.cpu().numpy().astype(np.uint8)) for image in imgs]
 39 |         wandb.log({name: [wandb.Image(image) for image in images]})
 40 |     
 41 |     def log_video(self,imgs,name="video", step=0, commit=False, fps=15):
 42 |         
 43 |         frames = [img.cpu().numpy().astype(np.uint8) for img in imgs]
 44 |         frames = np.array(frames)  # [T, H, W, C]
 45 |         frames = np.transpose(frames, (0, 3, 1, 2))  # [T, C, H, W]        
 46 | 
 47 |         print("here")
 48 |         wandb.log({
 49 |                 name: wandb.Video(frames, fps=fps, format='mp4'),
 50 |             }, step=step)
 51 |         
 52 |         print("here2")
 53 | 
 54 |     #def log_video(self, video_frames, name, step, fps=10):
 55 |     #    assert len(video_frames.shape) == 5, "Need [N, T, C, H, W] input tensor for video logging!"
 56 |     #    self._summ_writer.add_video('{}'.format(name), video_frames, step, fps=fps)
 57 | 
 58 |     #def log_trajs_as_videos(self, trajs, step, max_videos_to_save=2, fps=10, video_title='video'):
 59 | 
 60 |     #    # reshape the rollouts
 61 |     #    videos = [np.transpose(p['image_obs'], [0, 3, 1, 2]) for p in trajs]
 62 | 
 63 |     #    # max rollout length
 64 |     #    max_videos_to_save = np.min([max_videos_to_save, len(videos)])
 65 |     #    max_length = videos[0].shape[0]
 66 |     #    for i in range(max_videos_to_save):
 67 |     #        if videos[i].shape[0]>max_length:
 68 |     #            max_length = videos[i].shape[0]
 69 | 
 70 |     #    # pad rollouts to all be same length
 71 |     #    for i in range(max_videos_to_save):
 72 |     #        if videos[i].shape[0]<max_length:
 73 |     #            padding = np.tile([videos[i][-1]], (max_length-videos[i].shape[0],1,1,1))
 74 |     #            videos[i] = np.concatenate([videos[i], padding], 0)
 75 | 
 76 |     #    # log videos to tensorboard event file
 77 |     #    videos = np.stack(videos[:max_videos_to_save], 0)
 78 |     #    self.log_video(videos, video_title, step, fps=fps)
 79 | 
 80 |     #def log_figures(self, figure, name, step, phase):
 81 |     #    """figure: matplotlib.pyplot figure handle"""
 82 |     #    assert figure.shape[0] > 0, "Figure logging requires input shape [batch x figures]!"
 83 |     #    self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step)
 84 | 
 85 |     #def log_figure(self, figure, name, step, phase):
 86 |     #    """figure: matplotlib.pyplot figure handle"""
 87 |     #    self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step)
 88 | 
 89 |     #def log_graph(self, array, name, step, phase):
 90 |     #    """figure: matplotlib.pyplot figure handle"""
 91 |     #    im = plot_graph(array)
 92 |     #    self._summ_writer.add_image('{}_{}'.format(name, phase), im, step)
 93 | 
 94 |     #def dump_scalars(self, log_path=None):
 95 |     #    log_path = os.path.join(self._log_dir, "scalar_data.json") if log_path is None else log_path
 96 |     #    self._summ_writer.export_scalars_to_json(log_path)
 97 | 
 98 |     def log_dict(self, logs, itr, verbose=True):
 99 |         if self._summ_writer:
100 |             for key, value in logs.items():
101 |                 if verbose:
102 |                     print("{} : {}".format(key, value))
103 |                 self.log_scalar(value, key, itr)
104 | 


--------------------------------------------------------------------------------
/utils/pytorch_utils.py:
--------------------------------------------------------------------------------
 1 | import torch 
 2 | import numpy as np 
 3 | from copy import deepcopy
 4 | 
 5 | device=None 
 6 | 
 7 | 
 8 | def from_numpy(*args, **kwargs):
 9 |     return torch.from_numpy(*args, **kwargs).float().to(device)
10 | 
11 | 
12 | def to_numpy(tensor):
13 |     return tensor.to('cpu').detach().numpy()
14 | 
15 | 
16 | def to_torch(element,device):
17 | 
18 |     if isinstance(element,dict):
19 |         
20 |         new_element = deepcopy(element)
21 |         for key in element:
22 |             new_element[key] = to_torch(element[key],device)
23 |         return new_element 
24 |     
25 |     elif isinstance(element,list):
26 |         try: 
27 |             return torch.tensor(element).float().to(device)
28 |         except:
29 |             return element 
30 |     
31 |     elif isinstance(element,np.ndarray):
32 |         return torch.from_numpy(element).float().to(device)
33 |     
34 |     else:
35 |         return element 
36 | 
37 | 


--------------------------------------------------------------------------------
/utils/randomization_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np 
  2 | from termcolor import cprint
  3 | 
  4 | 
  5 | ##CAN ONLY BE DONE AT THE START OF SIMULATION##
  6 | def randomize_table_z(gym,env_ptr,table_handle,table_rand_config):
  7 |     #does not work. need to change table position differently.
  8 |     fr_z = np.random.uniform(table_rand_config['lower'],table_rand_config['upper'])
  9 |     prop = gym.get_actor_rigid_body_properties(env_ptr, table_handle)
 10 |     assert len(prop) == 1
 11 |     print(fr_z)
 12 |     obj_com = prop[0].com.z*fr_z
 13 |     prop[0].com.z = obj_com
 14 |     gym.set_actor_rigid_body_properties(env_ptr, table_handle, prop)
 15 | 
 16 | 
 17 | ##CAN ONLY BE DONE AT THE START OF SIMULATION##
 18 | def randomize_object_scale(gym,env_ptr,object_handle,object_rand_config):
 19 | 
 20 |     scale = np.random.uniform(object_rand_config['lower'], object_rand_config['upper'])
 21 |     gym.set_actor_scale(env_ptr, object_handle,scale)
 22 |     return scale 
 23 | 
 24 | 
 25 | ##CAN ONLY BE DONE AT THE START OF SIMULATION##
 26 | def randomize_object_mass(gym,env_ptr,object_handle,objmass_rand_config):
 27 | 
 28 |     prop = gym.get_actor_rigid_body_properties(env_ptr, object_handle)
 29 |     ret = []
 30 |     for p in prop:
 31 |         fr = np.random.uniform(objmass_rand_config['lower'], objmass_rand_config['upper'])
 32 |         p.mass = p.mass*fr 
 33 |         p.inertia.x = p.inertia.x*fr
 34 |         p.inertia.y = p.inertia.y*fr
 35 |         p.inertia.z = p.inertia.z*fr
 36 |         ret.append(p.mass)
 37 |         
 38 |     gym.set_actor_rigid_body_properties(env_ptr, object_handle, prop)
 39 | 
 40 |     return ret 
 41 | 
 42 | 
 43 | ##CAN ONLY BE DONE AT THE START OF SIMULATION##
 44 | def randomize_friction(gym,env_ptr,handle,rand_friction_config):
 45 |     
 46 |     rand_friction = np.random.uniform(rand_friction_config['lower'], rand_friction_config['upper'])
 47 |     rest = np.random.uniform(rand_friction_config['lower'], rand_friction_config['upper'])
 48 |     props = gym.get_actor_rigid_shape_properties(env_ptr, handle)
 49 |     friction = []
 50 |     restitution = []
 51 |     for p in props:
 52 |         p.friction = rand_friction*p.friction
 53 |         p.restitution = rest*p.restitution
 54 |         friction.append(p.friction)
 55 |         restitution.append(p.restitution)
 56 |     
 57 |     gym.set_actor_rigid_shape_properties(env_ptr, handle, props)
 58 | 
 59 |     return friction,restitution 
 60 | 
 61 | # def randomize_friction(gym,env_ptr,hand_handle,object_handle,rand_friction_config):
 62 | 
 63 | #     rand_friction = np.random.uniform(rand_friction_config['lower'], rand_friction_config['upper'])
 64 | #     obj_restitution = np.random.uniform(rand_friction_config['lower'], rand_friction_config['upper'])
 65 | #     hand_props = gym.get_actor_rigid_shape_properties(env_ptr, hand_handle)
 66 | #     hand_friction = []
 67 | #     hand_restitution = []
 68 | #     for p in hand_props:
 69 | #         p.friction = rand_friction
 70 | #         p.restitution = obj_restitution
 71 | #         hand_friction.append(p.friction)
 72 | #         hand_restitution.append(p.restitution)
 73 |     
 74 | #     gym.set_actor_rigid_shape_properties(env_ptr, hand_handle, hand_props)
 75 | 
 76 | 
 77 | #     rand_friction = np.random.uniform(rand_friction_config['lower'], rand_friction_config['upper'])
 78 | #     obj_rest = np.random.uniform(rand_friction_config['lower'], rand_friction_config['upper'])
 79 | #     obj_friction  = []
 80 | #     obj_restitution = []
 81 | #     obj_props = gym.get_actor_rigid_shape_properties(env_ptr, object_handle)
 82 | #     for p in obj_props:
 83 | #         p.friction = rand_friction*p.friction
 84 | #         p.restitution = obj_rest*p.restitution
 85 | #         obj_friction.append(p.friction)
 86 | #         obj_restitution.append(p.restitution)
 87 |     
 88 | #     gym.set_actor_rigid_shape_properties(env_ptr, object_handle, obj_props)
 89 | 
 90 | #     return hand_friction, hand_restitution, obj_friction, obj_restitution #not sure if just one value can influence the full policy but okay for now. 
 91 | 
 92 | 
 93 | # def randomize_object_position(env):
 94 | #     "already randomized in code"
 95 | #     pass 
 96 | 
 97 | # def randomize_robot_damping(env):
 98 | #     pass 
 99 | 
100 | # def randomize_robot_stiffness(env):
101 | #     pass 
102 | 
103 | 


--------------------------------------------------------------------------------
/utils/reformat.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2018-2023, NVIDIA Corporation
 2 | # All rights reserved.
 3 | #
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are met:
 6 | #
 7 | # 1. Redistributions of source code must retain the above copyright notice, this
 8 | #    list of conditions and the following disclaimer.
 9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | #    this list of conditions and the following disclaimer in the documentation
12 | #    and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | #    contributors may be used to endorse or promote products derived from
16 | #    this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 
29 | from omegaconf import DictConfig, OmegaConf
30 | from typing import Dict
31 | 
32 | def omegaconf_to_dict(d: DictConfig)->Dict:
33 |     """Converts an omegaconf DictConfig to a python Dict, respecting variable interpolation."""
34 |     ret = {}
35 |     for k, v in d.items():
36 |         if isinstance(v, DictConfig):
37 |             ret[k] = omegaconf_to_dict(v)
38 |         else:
39 |             ret[k] = v
40 |     return ret
41 | 
42 | def print_dict(val, nesting: int = -4, start: bool = True):
43 |     """Outputs a nested dictionory."""
44 |     if type(val) == dict:
45 |         if not start:
46 |             print('')
47 |         nesting += 4
48 |         for k in val:
49 |             print(nesting * ' ', end='')
50 |             print(k, end=': ')
51 |             print_dict(val[k], nesting, start=False)
52 |     else:
53 |         print(val)
54 | 
55 | # EOF
56 | 


--------------------------------------------------------------------------------
/utils/rna_util.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Copyright (c) 2018-2023, NVIDIA Corporation
  3 | # All rights reserved.
  4 | #
  5 | # Redistribution and use in source and binary forms, with or without
  6 | # modification, are permitted provided that the following conditions are met:
  7 | #
  8 | # 1. Redistributions of source code must retain the above copyright notice, this
  9 | #    list of conditions and the following disclaimer.
 10 | #
 11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 12 | #    this list of conditions and the following disclaimer in the documentation
 13 | #    and/or other materials provided with the distribution.
 14 | #
 15 | # 3. Neither the name of the copyright holder nor the names of its
 16 | #    contributors may be used to endorse or promote products derived from
 17 | #    this software without specific prior written permission.
 18 | #
 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 | 
 30 | from __future__ import print_function
 31 | 
 32 | import torch
 33 | import torch.nn as nn
 34 | import torch.nn.functional as F
 35 | 
 36 | 
 37 | class RandomNetworkAdversary(nn.Module):
 38 | 
 39 |     def __init__(self, num_envs, in_dims, out_dims, softmax_bins, device):
 40 |         super(RandomNetworkAdversary, self).__init__()
 41 | 
 42 |         """
 43 |         Class to add random action to the action generated by the policy. 
 44 |         The output is binned to 32 bins per channel and we do softmax over 
 45 |         these bins to figure out the most likely joint angle.
 46 | 
 47 |         Note: OpenAI et al. 2019 found out that if they used a continuous space 
 48 |               and a tanh non-linearity, actions would always be close to 0. 
 49 |               Section B.3 https://arxiv.org/abs/1910.07113
 50 | 
 51 |         Q: Why do we need dropouts here? 
 52 | 
 53 |         A: If we were using a CPU-based simulator as in OpenAI et al. 2019, we 
 54 |            will use a different RNA network for different CPU. However, 
 55 |            this is not feasible for a GPU-based simulator as that would mean 
 56 |            creating N_envs RNA networks which will overwhelm the GPU-memory. 
 57 |            Therefore, dropout is a nice approximation of this by re-sampling 
 58 |            weights of the same neural network for each different env on the GPU. 
 59 |         """
 60 | 
 61 |         self.in_dims  = in_dims 
 62 |         self.out_dims = out_dims
 63 |         self.softmax_bins = softmax_bins
 64 |         self.num_envs = num_envs
 65 | 
 66 |         self.device = device 
 67 |        
 68 |         self.num_feats1 = 512
 69 |         self.num_feats2 = 1024
 70 | 
 71 |         # Sampling random probablities for dropout masks 
 72 |         dropout_probs = torch.rand((2, ))
 73 | 
 74 |         # Setting up the RNA neural network here    
 75 | 
 76 |         # First layer
 77 | 
 78 |         self.fc1 = nn.Linear(in_dims, self.num_feats1).to(self.device)
 79 | 
 80 |         self.dropout_masks1 = torch.bernoulli(torch.ones((self.num_envs, \
 81 |             self.num_feats1)), p=dropout_probs[0]).to(self.device)
 82 | 
 83 |         self.fc1_1 = nn.Linear(self.num_feats1, self.num_feats1).to(self.device)
 84 | 
 85 |         # Second layer 
 86 |         self.fc2 = nn.Linear(self.num_feats1, self.num_feats2).to(self.device)
 87 | 
 88 |         self.dropout_masks2 = torch.bernoulli(torch.ones((self.num_envs, \
 89 |             self.num_feats2)), p=dropout_probs[1]).to(self.device)
 90 | 
 91 |         self.fc2_1 = nn.Linear(self.num_feats2, self.num_feats2).to(self.device)
 92 | 
 93 |         # Last layer 
 94 |         self.fc3 = nn.Linear(self.num_feats2, out_dims*softmax_bins).to(self.device)
 95 | 
 96 |         # This is needed to reset weights and dropout masks 
 97 |         self._refresh()
 98 | 
 99 |     def _refresh(self):
100 | 
101 |         self._init_weights()
102 |         self.eval()
103 |         self.refresh_dropout_masks()
104 | 
105 |     def _init_weights(self):
106 | 
107 |         print('initialising weights for random network')
108 | 
109 |         nn.init.kaiming_uniform_(self.fc1.weight)
110 |         nn.init.kaiming_uniform_(self.fc1_1.weight)
111 |         nn.init.kaiming_uniform_(self.fc2.weight)
112 |         nn.init.kaiming_uniform_(self.fc2_1.weight)
113 |         nn.init.kaiming_uniform_(self.fc3.weight)
114 | 
115 |         return
116 | 
117 |     def refresh_dropout_masks(self):
118 | 
119 |         dropout_probs = torch.rand((2, ))
120 | 
121 |         self.dropout_masks1 = torch.bernoulli(torch.ones((self.num_envs, self.num_feats1)), \
122 |             p=dropout_probs[0]).to(self.dropout_masks1.device)
123 | 
124 |         self.dropout_masks2 = torch.bernoulli(torch.ones((self.num_envs, self.num_feats2)), \
125 |             p=dropout_probs[1]).to(self.dropout_masks2.device)
126 | 
127 |         return
128 |    
129 |     def forward(self, x):
130 | 
131 |         x = self.fc1(x)
132 |         x = F.relu(x)
133 |         x = self.fc1_1(x)
134 |         x = self.dropout_masks1 * x 
135 | 
136 |         x = self.fc2(x)
137 |         x = F.relu(x)
138 |         x = self.fc2_1(x)
139 |         x = self.dropout_masks2 * x 
140 | 
141 |         x = self.fc3(x)
142 | 
143 |         x = x.view(-1, self.out_dims, self.softmax_bins)
144 |         output = F.softmax(x, dim=-1)
145 | 
146 |         # We have discretised the joint angles into bins 
147 |         # Now we pick up the bin for each joint angle 
148 |         # corresponding to the highest softmax value / prob.
149 | 
150 |         return output
151 | 
152 | 
153 | if __name__ == "__main__":
154 | 
155 |     num_envs = 1024
156 |     RNA = RandomNetworkAdversary(num_envs=num_envs, in_dims=16, out_dims=16, softmax_bins=32, device='cuda')
157 | 
158 |     x = torch.tensor(torch.randn(num_envs, 16).to(RNA.device))
159 |     y = RNA(x)
160 |     import ipdb; ipdb.set_trace()
161 | 
162 |     
163 | 
164 | 


--------------------------------------------------------------------------------
/utils/urdf_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np 
  2 | def read_xml(filename):
  3 |     import xml.etree.ElementTree as Et
  4 |     root = Et.parse(filename).getroot()
  5 |     return root
  6 | 
  7 | 
  8 | def get_link_meshes_from_urdf(urdf_file,link_names):
  9 |     root = read_xml(urdf_file)
 10 |     link_meshfiles =[]
 11 |     for link_name in link_names:
 12 |         for link in root.findall('link'):
 13 |             if link.attrib['name'] == link_name:
 14 |                 for mesh in link.findall('visual/geometry/mesh'):
 15 |                     link_meshfiles.append(mesh.attrib['filename'])
 16 | 
 17 |     assert len(link_meshfiles) == len(link_names)   
 18 |     return link_meshfiles
 19 | 
 20 | 
 21 | def load_asset_files_public(asset_root):
 22 |     import os 
 23 |     folder_name = 'pybullet-URDF-models/urdf_models/models'
 24 |     asset_files = {}
 25 | 
 26 |     for root, dirs, files in os.walk(os.path.join(asset_root,folder_name)):
 27 |         
 28 |         for file in files:
 29 |             if file.endswith("model.urdf"):
 30 |                 obj_name = root.split('/')[-1]
 31 |                 dir  = root[len(asset_root)+1:]
 32 |                 asset_files[obj_name]=os.path.join(dir, file)
 33 |     
 34 |     return asset_files
 35 | 
 36 | 
 37 | 
 38 | 
 39 | def load_asset_files_ycb(asset_root,folder_name='ycb_real_inertia'):
 40 | 
 41 |     import os 
 42 |     asset_files = {}
 43 | 
 44 |     for root, dirs, files in os.walk(os.path.join(asset_root,folder_name)):
 45 | 
 46 |         for file in files:
 47 |             if file.endswith(".urdf"):
 48 |                 obj_name = file.split('.')[0]
 49 |                 dir  = root[len(asset_root)+1:]
 50 |                 asset_files[obj_name]={}    
 51 |                 asset_files[obj_name]['urdf']=os.path.join(dir, file)
 52 |                 asset_files[obj_name]['mesh']=os.path.join(dir, file.split('.')[0]+'/google_16k/textured.obj')
 53 |                 assert os.path.exists(os.path.join(asset_root,asset_files[obj_name]['mesh']))
 54 |                 assert os.path.exists(os.path.join(asset_root,asset_files[obj_name]['urdf']))
 55 |                     
 56 |     return asset_files
 57 | 
 58 | def load_asset_files_ycb_lowmem(asset_root,folder_name='ycb_real_inertia'):
 59 |     import os 
 60 |     asset_files = {}
 61 | 
 62 |     for root, dirs, files in os.walk(os.path.join(asset_root,folder_name)):
 63 | 
 64 |         for file in files:
 65 |             if file.endswith(".urdf"):
 66 |                 obj_name = file.split('.')[0]
 67 |                 number = obj_name.split('_')[0]
 68 |                 print(obj_name,number)
 69 |                 if number in ['070-a','070-b','072','036','032','029','048','027','019','032','026']:
 70 |                     dir  = root[len(asset_root)+1:]
 71 |                     asset_files[obj_name]={}    
 72 |                     asset_files[obj_name]['urdf']=os.path.join(dir, file)
 73 |                     asset_files[obj_name]['mesh']=os.path.join(dir, file.split('.')[0]+'/google_16k/textured.obj')
 74 |                     assert os.path.exists(os.path.join(asset_root,asset_files[obj_name]['mesh']))
 75 |                     assert os.path.exists(os.path.join(asset_root,asset_files[obj_name]['urdf'])) 
 76 |                     
 77 |     return asset_files
 78 | 
 79 | 
 80 | def fix_ycb_scale(asset_root):
 81 |     import os 
 82 |     import shutil 
 83 |     import xml.etree.ElementTree as Et
 84 |     folder_name = 'ycb'
 85 |     new_folder_name = 'ycb_scaled'
 86 |     if not os.path.exists(os.path.join(asset_root,new_folder_name)):
 87 |         shutil.copytree(os.path.join(asset_root,folder_name), os.path.join(asset_root,new_folder_name))
 88 | 
 89 |     for root, dirs, files in os.walk(os.path.join(asset_root,new_folder_name)):
 90 |         for file in files:
 91 |             if file.endswith(".urdf"):
 92 |                 filepath = os.path.join(root, file)
 93 |                 urdf = read_xml(filepath)
 94 |                 for mesh in urdf.findall(f'.//collision/geometry/'):
 95 |                     mesh.attrib['scale']='1 1 1'
 96 |                 for mesh in urdf.findall(f'.//visual/geometry/'):
 97 |                     mesh.attrib['scale']='1 1 1'
 98 | 
 99 |                 new_xml = Et.ElementTree()
100 |                 new_xml._setroot(urdf)
101 |                 with open(filepath, "wb") as f:
102 |                     new_xml.write(f)
103 | 
104 |     return
105 | 
106 | 
107 | 
108 | 
109 | 
110 | def get_vol_ratio(scale1,scale2):
111 |     nums1 = [float(s) for s in scale1.split(' ')]
112 |     nums2 = [float(s) for s in scale2.split(' ')]
113 |     nums1 = np.array(nums1)
114 |     nums2 = np.array(nums2)
115 |     return np.prod(nums1)/np.prod(nums2)
116 | 


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2018-2023, NVIDIA Corporation
  2 | # All rights reserved.
  3 | #
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are met:
  6 | #
  7 | # 1. Redistributions of source code must retain the above copyright notice, this
  8 | #    list of conditions and the following disclaimer.
  9 | #
 10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 11 | #    this list of conditions and the following disclaimer in the documentation
 12 | #    and/or other materials provided with the distribution.
 13 | #
 14 | # 3. Neither the name of the copyright holder nor the names of its
 15 | #    contributors may be used to endorse or promote products derived from
 16 | #    this software without specific prior written permission.
 17 | #
 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | 
 29 | # python
 30 | #import pwd
 31 | import getpass
 32 | import tempfile
 33 | import time
 34 | from collections import OrderedDict
 35 | from os.path import join
 36 | 
 37 | import numpy as np
 38 | import torch
 39 | import random
 40 | import os
 41 | import subprocess 
 42 | import shlex
 43 | 
 44 | def retry(times, exceptions):
 45 |     """
 46 |     Retry Decorator https://stackoverflow.com/a/64030200/1645784
 47 |     Retries the wrapped function/method `times` times if the exceptions listed
 48 |     in ``exceptions`` are thrown
 49 |     :param times: The number of times to repeat the wrapped function/method
 50 |     :type times: Int
 51 |     :param exceptions: Lists of exceptions that trigger a retry attempt
 52 |     :type exceptions: Tuple of Exceptions
 53 |     """
 54 |     def decorator(func):
 55 |         def newfn(*args, **kwargs):
 56 |             attempt = 0
 57 |             while attempt < times:
 58 |                 try:
 59 |                     return func(*args, **kwargs)
 60 |                 except exceptions:
 61 |                     print(f'Exception thrown when attempting to run {func}, attempt {attempt} out of {times}')
 62 |                     time.sleep(min(2 ** attempt, 30))
 63 |                     attempt += 1
 64 | 
 65 |             return func(*args, **kwargs)
 66 |         return newfn
 67 |     return decorator
 68 | 
 69 | 
 70 | def flatten_dict(d, prefix='', separator='.'):
 71 |     res = dict()
 72 |     for key, value in d.items():
 73 |         if isinstance(value, (dict, OrderedDict)):
 74 |             res.update(flatten_dict(value, prefix + key + separator, separator))
 75 |         else:
 76 |             res[prefix + key] = value
 77 | 
 78 |     return res
 79 | 
 80 | 
 81 | def set_np_formatting():
 82 |     """ formats numpy print """
 83 |     np.set_printoptions(edgeitems=30, infstr='inf',
 84 |                         linewidth=4000, nanstr='nan', precision=2,
 85 |                         suppress=False, threshold=10000, formatter=None)
 86 | 
 87 | 
 88 | def set_seed(seed, torch_deterministic=False, rank=0):
 89 |     """ set seed across modules """
 90 |     if seed == -1 and torch_deterministic:
 91 |         seed = 42 + rank
 92 |     elif seed == -1:
 93 |         seed = np.random.randint(0, 10000)
 94 |     else:
 95 |         seed = seed + rank
 96 | 
 97 |     print("Setting seed: {}".format(seed))
 98 | 
 99 |     random.seed(seed)
100 |     np.random.seed(seed)
101 |     torch.manual_seed(seed)
102 |     os.environ['PYTHONHASHSEED'] = str(seed)
103 |     torch.cuda.manual_seed(seed)
104 |     torch.cuda.manual_seed_all(seed)
105 | 
106 |     if torch_deterministic:
107 |         # refer to https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility
108 |         os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
109 |         torch.backends.cudnn.benchmark = False
110 |         torch.backends.cudnn.deterministic = True
111 |         torch.use_deterministic_algorithms(True)
112 |     else:
113 |         torch.backends.cudnn.benchmark = True
114 |         torch.backends.cudnn.deterministic = False
115 | 
116 |     return seed
117 | 
118 | def nested_dict_set_attr(d, key, val):
119 |     pre, _, post = key.partition('.')
120 |     if post:
121 |         nested_dict_set_attr(d[pre], post, val)
122 |     else:
123 |         d[key] = val
124 |     
125 | def nested_dict_get_attr(d, key):
126 |     pre, _, post = key.partition('.')
127 |     if post:
128 |         return nested_dict_get_attr(d[pre], post)
129 |     else:
130 |         return d[key]
131 | 
132 | def ensure_dir_exists(path):
133 |     if not os.path.exists(path):
134 |         os.makedirs(path)
135 |     return path
136 | 
137 | 
138 | def safe_ensure_dir_exists(path):
139 |     """Should be safer in multi-treaded environment."""
140 |     try:
141 |         return ensure_dir_exists(path)
142 |     except FileExistsError:
143 |         return path
144 | 
145 | 
146 | def get_username():
147 |     uid = os.getuid()
148 |     try:
149 |         return getpass.getuser()
150 |     except KeyError:
151 |         # worst case scenario - let's just use uid
152 |         return str(uid)
153 | 
154 | 
155 | def project_tmp_dir():
156 |     tmp_dir_name = f'ige_{get_username()}'
157 |     return safe_ensure_dir_exists(join(tempfile.gettempdir(), tmp_dir_name))
158 | 
159 | # EOF
160 | 
161 | 
162 | def git_hash():
163 |     cmd = 'git log -n 1 --pretty="%h"'
164 |     ret = subprocess.check_output(shlex.split(cmd)).strip()
165 |     if isinstance(ret, bytes):
166 |         ret = ret.decode()
167 |     return ret
168 | 
169 | 
170 | def git_diff_config(name):
171 |     cmd = f'git diff --unified=0 {name}'
172 |     ret = subprocess.check_output(shlex.split(cmd)).strip()
173 |     if isinstance(ret, bytes):
174 |         ret = ret.decode()
175 |     return ret
176 | 
177 | 
178 | 


--------------------------------------------------------------------------------
/utils/wandb_utils.py:
--------------------------------------------------------------------------------
 1 | from rl_games.common.algo_observer import AlgoObserver
 2 | 
 3 | from utils.utils import retry
 4 | from utils.reformat import omegaconf_to_dict
 5 | 
 6 | 
 7 | class WandbAlgoObserver(AlgoObserver):
 8 |     """Need this to propagate the correct experiment name after initialization."""
 9 | 
10 |     def __init__(self, cfg):
11 |         super().__init__()
12 |         self.cfg = cfg
13 | 
14 |     def before_init(self, base_name, config, experiment_name):
15 |         """
16 |         Must call initialization of Wandb before RL-games summary writer is initialized, otherwise
17 |         sync_tensorboard does not work.
18 |         """
19 | 
20 |         import wandb
21 | 
22 |         wandb_unique_id = f"uid_{experiment_name}"
23 |         print(f"Wandb using unique id {wandb_unique_id}")
24 | 
25 |         cfg = self.cfg
26 | 
27 |         # this can fail occasionally, so we try a couple more times
28 |         @retry(3, exceptions=(Exception,))
29 |         def init_wandb():
30 |             wandb.init(
31 |                 project=cfg.wandb_project,
32 |                 entity=cfg.wandb_entity,
33 |                 group=cfg.wandb_group,
34 |                 tags=cfg.wandb_tags,
35 |                 sync_tensorboard=True,
36 |                 id=wandb_unique_id,
37 |                 name=experiment_name,
38 |                 resume=True,
39 |                 settings=wandb.Settings(start_method='fork'),
40 |             )
41 |        
42 |             if cfg.wandb_logcode_dir:
43 |                 wandb.run.log_code(root=cfg.wandb_logcode_dir)
44 |                 print('wandb running directory........', wandb.run.dir)
45 | 
46 |         print('Initializing WandB...')
47 |         try:
48 |             init_wandb()
49 |         except Exception as exc:
50 |             print(f'Could not initialize WandB! {exc}')
51 | 
52 |         if isinstance(self.cfg, dict):
53 |             wandb.config.update(self.cfg, allow_val_change=True)
54 |         else:
55 |             wandb.config.update(omegaconf_to_dict(self.cfg), allow_val_change=True)
56 | 


--------------------------------------------------------------------------------
/utils/warmup_scheduler.py:
--------------------------------------------------------------------------------
 1 | class WarmupScheduler:
 2 |     def __init__(self, optimizer, target_lr,initial_lr=1e-7,warmup_steps=25):
 3 |         self.optimizer = optimizer
 4 |         self.warmup_steps = warmup_steps
 5 |         self.initial_lr = initial_lr
 6 |         self.target_lr = target_lr
 7 |         self.current_step = 0
 8 |     
 9 |     def step(self):
10 |         if self.current_step < self.warmup_steps:
11 |             # Linearly increase the learning rate
12 |             lr = (self.target_lr - self.initial_lr) * (self.current_step / self.warmup_steps) + self.initial_lr
13 |             # Apply the learning rate to the optimizer
14 |             for param_group in self.optimizer.param_groups:
15 |                 param_group['lr'] = lr
16 |         # Increment the step count
17 |         self.current_step += 1
18 | 


--------------------------------------------------------------------------------