├── .gitiginore
├── .gitignore
├── README.md
├── algo
├── models
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-37.pyc
│ │ ├── __init__.cpython-38.pyc
│ │ ├── imagenet_depth_encoder.cpython-37.pyc
│ │ ├── models.cpython-37.pyc
│ │ ├── models.cpython-38.pyc
│ │ ├── models_priv.cpython-37.pyc
│ │ ├── models_priv.cpython-38.pyc
│ │ ├── observation_encoder.cpython-37.pyc
│ │ ├── proprio_depth_transformer.cpython-37.pyc
│ │ ├── proprio_depth_transformer.cpython-38.pyc
│ │ ├── proprio_embd_transformer.cpython-37.pyc
│ │ ├── proprio_mvp_rgb_transformer.cpython-37.pyc
│ │ ├── proprio_r3m_rgb_transformer.cpython-37.pyc
│ │ ├── proprio_vip_transformer.cpython-37.pyc
│ │ ├── proprio_vit_transformer.cpython-37.pyc
│ │ ├── pt_actor_critic.cpython-37.pyc
│ │ ├── rt_actor_critic.cpython-37.pyc
│ │ ├── rt_embed_actor_critic.cpython-37.pyc
│ │ ├── running_mean_std.cpython-37.pyc
│ │ ├── running_mean_std.cpython-38.pyc
│ │ ├── vision_encoder.cpython-37.pyc
│ │ └── vision_encoder.cpython-38.pyc
│ ├── models.py
│ ├── models_priv.py
│ ├── rt_actor_critic.py
│ └── running_mean_std.py
├── ppo_transformer
│ ├── __pycache__
│ │ ├── experience.cpython-37.pyc
│ │ ├── mem_eff_experience.cpython-37.pyc
│ │ ├── ppo_transformer.cpython-37.pyc
│ │ └── ppobc_transformer.cpython-37.pyc
│ ├── experience.py
│ └── ppo_transformer.py
└── pretrained
│ ├── __init__.py
│ ├── __pycache__
│ ├── __init__.cpython-37.pyc
│ ├── policy_transformer.cpython-37.pyc
│ ├── robot_transformer.cpython-37.pyc
│ ├── robot_transformer_ar.cpython-37.pyc
│ └── transformer.cpython-37.pyc
│ ├── dataset.py
│ ├── depth_trainer.py
│ ├── depth_trainer_multigpu.py
│ ├── robot_dataset.py
│ ├── robot_transformer_ar.py
│ ├── trainer.py
│ └── transformer.py
├── cfg
├── config.yaml
├── launcher
│ └── default.yaml
├── pretrain
│ ├── AllegroXarmCabinet.yaml
│ ├── AllegroXarmNew.yaml
│ └── AllegroXarmThrowing.yaml
├── task
│ ├── AllegroXarmCabinet.yaml
│ ├── AllegroXarmNew.yaml
│ └── AllegroXarmThrowing.yaml
└── train
│ ├── AllegroXarmCabinetPPO.yaml
│ ├── AllegroXarmNewPPO.yaml
│ └── AllegroXarmThrowingPPO.yaml
├── env.yml
├── imgs
└── approach.png
├── scripts
├── finetune.py
├── finetune
│ ├── finetune_cabinet.sh
│ ├── finetune_grasp.sh
│ └── finetune_throw.sh
├── pretrain.py
├── pretrain.sh
└── run_policy.sh
├── tasks
├── __init__.py
├── __pycache__
│ ├── __init__.cpython-37.pyc
│ ├── __init__.cpython-38.pyc
│ ├── allegro_kuka_grasping.cpython-37.pyc
│ ├── allegro_kuka_grasping.cpython-38.pyc
│ ├── torch_jit_utils.cpython-37.pyc
│ ├── torch_jit_utils.cpython-38.pyc
│ ├── xarm_cabinet.cpython-37.pyc
│ ├── xarm_cabinet.cpython-38.pyc
│ ├── xarm_grasping.cpython-37.pyc
│ ├── xarm_grasping.cpython-38.pyc
│ ├── xarm_grasping_debug.cpython-37.pyc
│ ├── xarm_grasping_debug.cpython-38.pyc
│ ├── xarm_grasping_new.cpython-37.pyc
│ ├── xarm_grasping_new.cpython-38.pyc
│ ├── xarm_grasping_real.cpython-37.pyc
│ ├── xarm_throwing.cpython-37.pyc
│ └── xarm_throwing.cpython-38.pyc
├── base
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-37.pyc
│ │ ├── __init__.cpython-38.pyc
│ │ ├── vec_task.cpython-37.pyc
│ │ └── vec_task.cpython-38.pyc
│ └── vec_task.py
├── torch_jit_utils.py
├── xarm7_utils.py
├── xarm_cabinet.py
├── xarm_grasping_new.py
└── xarm_throwing.py
└── utils
├── __init__.py
├── __pycache__
├── __init__.cpython-37.pyc
├── __init__.cpython-38.pyc
├── allegro_kuka_utils.cpython-37.pyc
├── allegro_kuka_utils.cpython-38.pyc
├── hand_arm_utils.cpython-37.pyc
├── hand_arm_utils.cpython-38.pyc
├── logger.cpython-37.pyc
├── logger.cpython-38.pyc
├── misc.cpython-37.pyc
├── misc.cpython-38.pyc
├── pytorch_utils.cpython-37.pyc
├── pytorch_utils.cpython-38.pyc
├── randomization_utils.cpython-37.pyc
├── randomization_utils.cpython-38.pyc
├── reformat.cpython-37.pyc
├── reformat.cpython-38.pyc
├── torch_jit_utils.cpython-37.pyc
├── urdf_utils.cpython-37.pyc
├── urdf_utils.cpython-38.pyc
├── utils.cpython-37.pyc
├── utils.cpython-38.pyc
├── warmup_scheduler.cpython-37.pyc
└── warmup_scheduler.cpython-38.pyc
├── allegro_kuka_utils.py
├── camera.json
├── camera2.json
├── dr_utils.py
├── hand_arm_utils.py
├── logger.py
├── misc.py
├── pytorch_utils.py
├── randomization_utils.py
├── reformat.py
├── rlgames_utils.py
├── rna_util.py
├── torch_jit_utils.py
├── urdf_utils.py
├── utils.py
├── wandb_utils.py
└── warmup_scheduler.py
/.gitiginore:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.zip
2 | outputs/
3 | assets/
4 | */*/.pyc
5 | *.pyc
6 | __pycache__/
7 | */__pycache__/
8 | wandb/
9 | *.log
10 | algo/pretrained/models/*
11 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Hand-object Interaction Pretraining from Videos
2 |
3 | This repo contains code for the paper [Hand-object interaction Pretraining from Videos](https://hgaurav2k.github.io/hop/pdf/manuscript.pdf)
4 |
5 |
6 |
7 | For a brief overview, check out the project [webpage](https://hgaurav2k.github.io/hop)!
8 |
9 |
10 |
11 |
12 | For any questions, please contact [Himanshu Gaurav Singh](https://hgaurav2k.github.io/).
13 |
14 |
15 | ## Setup
16 |
17 | * Create conda environment using `conda env create -f env.yml`
18 | * Install [IsaacGym](https://developer.nvidia.com/isaac-gym) in this environment.
19 | * Download the [asset](https://drive.google.com/drive/folders/1BE3lg8k1kssGxojtL0OkQLscSAkbpNzS?usp=sharing) folder and put them in the root directory.
20 |
21 | ## Running the code
22 |
23 | ### Pretraining
24 |
25 |
26 | * Download the hand-object interaction dataset from [here](https://drive.google.com/file/d/12-xghxt0rf_0xDo5SMdrRBnNr7LWJ02Y/view?usp=drive_link). Extract using `tar -xf hoi_pretraining_data.tar.xz`. Put it under the root directory.
27 | * Run `bash scripts/pretrain.sh `
28 |
29 | ### Finetuning
30 |
31 |
32 | * Download pretrained checkpoint from [here](https://drive.google.com/file/d/10zYrzPK8T-1zB8dqB5o2MfK_iF0Uda_f/view?usp=sharing). You can also use your own trained checkpoint.
33 | * For your choice of `task`, run `bash scripts/finetune/finetune_{task}.sh`.
34 |
36 |
37 |
38 | ### Visualising trained policies
39 |
40 | * Run `bash scripts/run_policy.sh `.
41 |
42 |
43 | ## Citation
44 |
45 |
46 | ## Acknowledgment
47 | This work was supported by the DARPA Machine Common Sense program, the DARPA Transfer from Imprecise and Abstract Models to Autonomous Technologies (TIAMAT) program, and by the ONR MURI award N00014-21-1-2801. This work was also funded by ONR MURI N00014-22-1-2773. We thank Adhithya Iyer for assistance with teleoperation systems, Phillip Wu for setting-up the real robot, and Raven Huang, Jathushan Rajasegaran and Yutong Bai for helpful discussions.
48 |
--------------------------------------------------------------------------------
/algo/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__init__.py
--------------------------------------------------------------------------------
/algo/models/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/algo/models/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/algo/models/__pycache__/imagenet_depth_encoder.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/imagenet_depth_encoder.cpython-37.pyc
--------------------------------------------------------------------------------
/algo/models/__pycache__/models.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/models.cpython-37.pyc
--------------------------------------------------------------------------------
/algo/models/__pycache__/models.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/models.cpython-38.pyc
--------------------------------------------------------------------------------
/algo/models/__pycache__/models_priv.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/models_priv.cpython-37.pyc
--------------------------------------------------------------------------------
/algo/models/__pycache__/models_priv.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/models_priv.cpython-38.pyc
--------------------------------------------------------------------------------
/algo/models/__pycache__/observation_encoder.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/observation_encoder.cpython-37.pyc
--------------------------------------------------------------------------------
/algo/models/__pycache__/proprio_depth_transformer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/proprio_depth_transformer.cpython-37.pyc
--------------------------------------------------------------------------------
/algo/models/__pycache__/proprio_depth_transformer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/proprio_depth_transformer.cpython-38.pyc
--------------------------------------------------------------------------------
/algo/models/__pycache__/proprio_embd_transformer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/proprio_embd_transformer.cpython-37.pyc
--------------------------------------------------------------------------------
/algo/models/__pycache__/proprio_mvp_rgb_transformer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/proprio_mvp_rgb_transformer.cpython-37.pyc
--------------------------------------------------------------------------------
/algo/models/__pycache__/proprio_r3m_rgb_transformer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/proprio_r3m_rgb_transformer.cpython-37.pyc
--------------------------------------------------------------------------------
/algo/models/__pycache__/proprio_vip_transformer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/proprio_vip_transformer.cpython-37.pyc
--------------------------------------------------------------------------------
/algo/models/__pycache__/proprio_vit_transformer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/proprio_vit_transformer.cpython-37.pyc
--------------------------------------------------------------------------------
/algo/models/__pycache__/pt_actor_critic.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/pt_actor_critic.cpython-37.pyc
--------------------------------------------------------------------------------
/algo/models/__pycache__/rt_actor_critic.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/rt_actor_critic.cpython-37.pyc
--------------------------------------------------------------------------------
/algo/models/__pycache__/rt_embed_actor_critic.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/rt_embed_actor_critic.cpython-37.pyc
--------------------------------------------------------------------------------
/algo/models/__pycache__/running_mean_std.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/running_mean_std.cpython-37.pyc
--------------------------------------------------------------------------------
/algo/models/__pycache__/running_mean_std.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/running_mean_std.cpython-38.pyc
--------------------------------------------------------------------------------
/algo/models/__pycache__/vision_encoder.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/vision_encoder.cpython-37.pyc
--------------------------------------------------------------------------------
/algo/models/__pycache__/vision_encoder.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/models/__pycache__/vision_encoder.cpython-38.pyc
--------------------------------------------------------------------------------
/algo/models/models.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # In-Hand Object Rotation via Rapid Motor Adaptation
3 | # https://arxiv.org/abs/2210.04887
4 | # Copyright (c) 2022 Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | import torch
10 | import torch.nn as nn
11 | import torch.nn.functional as F
12 | import copy
13 |
14 |
15 | class SavingModel(nn.Module):
16 | "Saves the two models (runnig_mean_std and actor_critic) required for infence and simplifies TT code"
17 | def __init__(self, actor_critic_model, running_std_model):
18 | super(SavingModel, self).__init__()
19 | self.actor_critic_model = copy.deepcopy(actor_critic_model)
20 | self.running_std_model = copy.deepcopy(running_std_model)
21 | self.running_std_model.eval()
22 |
23 | def forward(self, x):
24 | x = self.running_std_model(x)
25 | input_dict = {'obs': x}
26 | mu = self.actor_critic_model.infer_action(input_dict)
27 | return mu
28 |
29 | class MLP(nn.Module):
30 | def __init__(self, units, input_size):
31 | super(MLP, self).__init__()
32 | layers = []
33 | for output_size in units:
34 | layers.append(nn.Linear(input_size, output_size))
35 | layers.append(nn.ELU())
36 | input_size = output_size
37 | self.mlp = nn.Sequential(*layers)
38 |
39 | def forward(self, x):
40 | return self.mlp(x)
41 |
42 |
43 | class ProprioAdaptTConv(nn.Module):
44 | def __init__(self):
45 | super(ProprioAdaptTConv, self).__init__()
46 | self.channel_transform = nn.Sequential(
47 | nn.Linear(16 + 16, 32),
48 | nn.ReLU(inplace=True),
49 | nn.Linear(32, 32),
50 | nn.ReLU(inplace=True),
51 | )
52 | self.temporal_aggregation = nn.Sequential(
53 | nn.Conv1d(32, 32, (9,), stride=(2,)),
54 | nn.ReLU(inplace=True),
55 | nn.Conv1d(32, 32, (5,), stride=(1,)),
56 | nn.ReLU(inplace=True),
57 | nn.Conv1d(32, 32, (5,), stride=(1,)),
58 | nn.ReLU(inplace=True),
59 | )
60 | self.low_dim_proj = nn.Linear(32 * 3, 8)
61 |
62 | def forward(self, x):
63 | x = self.channel_transform(x) # (N, 50, 32)
64 | x = x.permute((0, 2, 1)) # (N, 32, 50)
65 | x = self.temporal_aggregation(x) # (N, 32, 3)
66 | x = self.low_dim_proj(x.flatten(1))
67 | return x
68 |
69 |
70 | class ActorCritic(nn.Module):
71 | def __init__(self, kwargs):
72 | nn.Module.__init__(self)
73 | actions_num = kwargs.pop('actions_num')
74 | input_shape = kwargs.pop('input_shape')
75 | self.units = kwargs.pop('actor_units')
76 | mlp_input_shape = input_shape
77 |
78 | out_size = self.units[-1]
79 |
80 | self.actor_mlp = MLP(units=self.units, input_size=mlp_input_shape)
81 | self.value = torch.nn.Linear(out_size, 1)
82 | self.mu = torch.nn.Linear(out_size, actions_num)
83 | self.sigma = nn.Parameter(torch.zeros(actions_num, requires_grad=True, dtype=torch.float32), requires_grad=True)
84 |
85 | for m in self.modules():
86 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d):
87 | fan_out = m.kernel_size[0] * m.out_channels
88 | m.weight.data.normal_(mean=0.0, std=np.sqrt(2.0 / fan_out))
89 | if getattr(m, 'bias', None) is not None:
90 | torch.nn.init.zeros_(m.bias)
91 | if isinstance(m, nn.Linear):
92 | if getattr(m, 'bias', None) is not None:
93 | torch.nn.init.zeros_(m.bias)
94 | nn.init.constant_(self.sigma, 0)
95 |
96 | @torch.no_grad()
97 | def get_action(self, obs_dict):
98 | # used specifically to collection samples during training
99 | # it contains exploration so needs to sample from distribution
100 | mu, logstd, value = self._actor_critic(obs_dict)
101 | sigma = torch.exp(logstd)
102 | distr = torch.distributions.Normal(mu, sigma)
103 | selected_action = distr.sample()
104 | result = {
105 | 'neglogpacs': -distr.log_prob(selected_action).sum(1), # self.neglogp(selected_action, mu, sigma, logstd),
106 | 'values': value,
107 | 'actions': selected_action,
108 | 'mus': mu,
109 | 'sigmas': sigma,
110 | }
111 | return result
112 |
113 | @torch.no_grad()
114 | def infer_action(self, obs_dict):
115 | # used during inference
116 | mu, _, _= self._actor_critic(obs_dict)
117 | return mu
118 |
119 | def _actor_critic(self, obs_dict):
120 | obs = obs_dict['obs']
121 | x = self.actor_mlp(obs)
122 | value = self.value(x)
123 | mu = self.mu(x)
124 | sigma = self.sigma
125 | return mu, mu * 0 + sigma, value
126 |
127 | def forward(self, input_dict):
128 | mu,logstd,value = self._actor_critic(input_dict)
129 | sigma = torch.exp(logstd)
130 | prev_actions = input_dict.get('prev_actions', mu.clone())
131 | distr = torch.distributions.Normal(mu, sigma)
132 | entropy = distr.entropy().sum(dim=-1)
133 | prev_neglogp = -distr.log_prob(prev_actions).sum(1)
134 |
135 | result = {
136 | 'prev_neglogp': torch.squeeze(prev_neglogp),
137 | 'values': value,
138 | 'entropy': entropy,
139 | 'mus': mu,
140 | 'sigmas': sigma
141 | }
142 |
143 | return result
144 |
145 |
146 |
147 | class PointNetActorCritic(nn.Module):
148 |
149 | def __init__(self, kwargs):
150 | nn.Module.__init__(self)
151 | actions_num = kwargs.pop('actions_num')
152 | input_shape = kwargs.pop('input_shape')
153 | self.units = kwargs.pop('actor_units')
154 | self.pc_out_dim = kwargs.pop('point_cloud_out_dim')
155 | self.pc_begin, self.pc_end = kwargs.pop('point_cloud_index')
156 | self.pc_num = kwargs.pop('point_cloud_num')
157 |
158 | mlp_input_shape = input_shape
159 | out_size = self.units[-1]
160 |
161 | self.point_net = nn.Sequential(
162 | nn.Linear(3,self.pc_out_dim),
163 | nn.ELU(inplace=True),
164 | nn.Linear(self.pc_out_dim,self.pc_out_dim),
165 | nn.ELU(inplace=True),
166 | nn.Linear(self.pc_out_dim,self.pc_out_dim),
167 | nn.MaxPool2d((self.pc_num,1))
168 | )
169 |
170 | self.actor_mlp = MLP(units=self.units, input_size=self.pc_begin + self.pc_out_dim)
171 | self.obs_end_actor = self.pc_begin + self.pc_out_dim
172 | self.value = MLP(units=self.units, input_size=mlp_input_shape)
173 | self.value_final = nn.Linear(out_size, 1)
174 | # self.value = nn.Linear(out_size, 1)
175 | self.mu = nn.Linear(out_size, actions_num)
176 | self.sigma = nn.Parameter(torch.zeros(actions_num, requires_grad=True, dtype=torch.float32), requires_grad=True)
177 |
178 | for m in self.modules():
179 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d):
180 | fan_out = m.kernel_size[0] * m.out_channels
181 | m.weight.data.normal_(mean=0.0, std=np.sqrt(2.0 / fan_out))
182 | if getattr(m, 'bias', None) is not None:
183 | torch.nn.init.zeros_(m.bias)
184 | if isinstance(m, nn.Linear):
185 | if getattr(m, 'bias', None) is not None:
186 | torch.nn.init.zeros_(m.bias)
187 | nn.init.constant_(self.sigma, 0)
188 |
189 | @torch.no_grad()
190 | def get_action(self, obs_dict):
191 | # used specifically to collection samples during training
192 | # it contains exploration so needs to sample from distribution
193 | mu, logstd, value = self._actor_critic(obs_dict)
194 | sigma = torch.exp(logstd)
195 | distr = torch.distributions.Normal(mu, sigma)
196 | selected_action = distr.sample()
197 | result = {
198 | 'neglogpacs': -distr.log_prob(selected_action).sum(1), # self.neglogp(selected_action, mu, sigma, logstd),
199 | 'values': value,
200 | 'actions': selected_action,
201 | 'mus': mu,
202 | 'sigmas': sigma,
203 | }
204 | return result
205 |
206 | @torch.no_grad()
207 | def infer_action(self, obs_dict):
208 | # used during inference
209 | mu, _, _= self._actor_critic(obs_dict)
210 | return mu
211 |
212 | def _actor_critic(self, obs_dict):
213 |
214 | obs = obs_dict['obs']
215 | pc_info = obs[:,self.pc_begin:self.pc_end].reshape(-1,self.pc_num,3)
216 | pc_rep = self.point_net(pc_info).squeeze(1)
217 | obs = torch.cat([obs[:,:self.pc_begin],pc_rep,obs[:,self.pc_end:]],dim=1)
218 | x = self.actor_mlp(obs[:,:self.obs_end_actor])
219 | value_h = self.value(obs)
220 | value = self.value_final(value_h)
221 | mu = self.mu(x)
222 | sigma = self.sigma
223 | return mu, mu * 0 + sigma, value
224 |
225 | def forward(self, input_dict):
226 | prev_actions = input_dict.get('prev_actions', None)
227 | mu,logstd,value = self._actor_critic(input_dict)
228 | sigma = torch.exp(logstd)
229 | distr = torch.distributions.Normal(mu, sigma)
230 | entropy = distr.entropy().sum(dim=-1)
231 | prev_neglogp = -distr.log_prob(prev_actions).sum(1)
232 | result = {
233 | 'prev_neglogp': torch.squeeze(prev_neglogp),
234 | 'values': value,
235 | 'entropy': entropy,
236 | 'mus': mu,
237 | 'sigmas': sigma
238 | }
239 | return result
240 |
--------------------------------------------------------------------------------
/algo/models/models_priv.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # In-Hand Object Rotation via Rapid Motor Adaptation
3 | # https://arxiv.org/abs/2210.04887
4 | # Copyright (c) 2022 Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | import torch
10 | import torch.nn as nn
11 | import torch.nn.functional as F
12 |
13 |
14 | class MLP(nn.Module):
15 | def __init__(self, units, input_size):
16 | super(MLP, self).__init__()
17 | layers = []
18 | for output_size in units:
19 | layers.append(nn.Linear(input_size, output_size))
20 | layers.append(nn.ELU())
21 | input_size = output_size
22 | self.mlp = nn.Sequential(*layers)
23 |
24 | def forward(self, x):
25 | return self.mlp(x)
26 |
27 |
28 | class ProprioAdaptTConv(nn.Module):
29 | def __init__(self):
30 | super(ProprioAdaptTConv, self).__init__()
31 | self.channel_transform = nn.Sequential(
32 | nn.Linear(16 + 16, 32),
33 | nn.ReLU(inplace=True),
34 | nn.Linear(32, 32),
35 | nn.ReLU(inplace=True),
36 | )
37 | self.temporal_aggregation = nn.Sequential(
38 | nn.Conv1d(32, 32, (9,), stride=(2,)),
39 | nn.ReLU(inplace=True),
40 | nn.Conv1d(32, 32, (5,), stride=(1,)),
41 | nn.ReLU(inplace=True),
42 | nn.Conv1d(32, 32, (5,), stride=(1,)),
43 | nn.ReLU(inplace=True),
44 | )
45 | self.low_dim_proj = nn.Linear(32 * 3, 8)
46 |
47 | def forward(self, x):
48 | x = self.channel_transform(x) # (N, 50, 32)
49 | x = x.permute((0, 2, 1)) # (N, 32, 50)
50 | x = self.temporal_aggregation(x) # (N, 32, 3)
51 | x = self.low_dim_proj(x.flatten(1))
52 | return x
53 |
54 |
55 | class ActorCritic(nn.Module):
56 | def __init__(self, kwargs):
57 | nn.Module.__init__(self)
58 | actions_num = kwargs.pop('actions_num')
59 | input_shape = kwargs.pop('input_shape')
60 | self.units = kwargs.pop('actor_units')
61 | self.priv_mlp = kwargs.pop('priv_mlp_units')
62 | mlp_input_shape = input_shape[0]
63 |
64 | out_size = self.units[-1]
65 | self.priv_info = kwargs['priv_info']
66 | self.priv_info_stage2 = kwargs['proprio_adapt']
67 | if self.priv_info:
68 | mlp_input_shape += self.priv_mlp[-1]
69 | self.env_mlp = MLP(units=self.priv_mlp, input_size=kwargs['priv_info_dim'])
70 |
71 | if self.priv_info_stage2:
72 | self.adapt_tconv = ProprioAdaptTConv()
73 |
74 | self.actor_mlp = MLP(units=self.units, input_size=mlp_input_shape)
75 | self.value = torch.nn.Linear(out_size, 1)
76 | self.mu = torch.nn.Linear(out_size, actions_num)
77 | self.sigma = nn.Parameter(torch.zeros(actions_num, requires_grad=True, dtype=torch.float32), requires_grad=True)
78 |
79 | for m in self.modules():
80 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d):
81 | fan_out = m.kernel_size[0] * m.out_channels
82 | m.weight.data.normal_(mean=0.0, std=np.sqrt(2.0 / fan_out))
83 | if getattr(m, 'bias', None) is not None:
84 | torch.nn.init.zeros_(m.bias)
85 | if isinstance(m, nn.Linear):
86 | if getattr(m, 'bias', None) is not None:
87 | torch.nn.init.zeros_(m.bias)
88 | nn.init.constant_(self.sigma, 0)
89 |
90 | @torch.no_grad()
91 | def get_action(self, obs_dict):
92 | # used specifically to collection samples during training
93 | # it contains exploration so needs to sample from distribution
94 | mu, logstd, value, _, _ = self._actor_critic(obs_dict)
95 | sigma = torch.exp(logstd)
96 | distr = torch.distributions.Normal(mu, sigma)
97 | selected_action = distr.sample()
98 | result = {
99 | 'neglogpacs': -distr.log_prob(selected_action).sum(1), # self.neglogp(selected_action, mu, sigma, logstd),
100 | 'values': value,
101 | 'actions': selected_action,
102 | 'mus': mu,
103 | 'sigmas': sigma,
104 | }
105 | return result
106 |
107 | @torch.no_grad()
108 | def get_action_sample(self, obs_dict):
109 | # used for testing
110 | mu, logstd, value, _, _ = self._actor_critic(obs_dict)
111 | return mu
112 |
113 | def _actor_critic(self, obs_dict):
114 | obs = obs_dict['obs']
115 | extrin, extrin_gt = None, None
116 | if self.priv_info:
117 | if self.priv_info_stage2:
118 | extrin = self.adapt_tconv(obs_dict['proprio_hist'])
119 | # during supervised training, extrin has gt label
120 | extrin_gt = self.env_mlp(obs_dict['priv_info']) if 'priv_info' in obs_dict else extrin
121 | extrin_gt = torch.tanh(extrin_gt)
122 | extrin = torch.tanh(extrin)
123 | obs = torch.cat([obs, extrin], dim=-1)
124 | else:
125 | extrin = self.env_mlp(obs_dict['priv_info'])
126 | extrin = torch.tanh(extrin)
127 | obs = torch.cat([obs, extrin], dim=-1)
128 |
129 | x = self.actor_mlp(obs)
130 | value = self.value(x)
131 | mu = self.mu(x)
132 | sigma = self.sigma
133 | return mu, mu * 0 + sigma, value, extrin, extrin_gt
134 |
135 | def forward(self, input_dict):
136 | prev_actions = input_dict.get('prev_actions', None)
137 | rst = self._actor_critic(input_dict)
138 | mu, logstd, value, extrin, extrin_gt = rst
139 | sigma = torch.exp(logstd)
140 | distr = torch.distributions.Normal(mu, sigma)
141 | entropy = distr.entropy().sum(dim=-1)
142 | prev_neglogp = -distr.log_prob(prev_actions).sum(1)
143 | result = {
144 | 'prev_neglogp': torch.squeeze(prev_neglogp),
145 | 'values': value,
146 | 'entropy': entropy,
147 | 'mus': mu,
148 | 'sigmas': sigma,
149 | 'extrin': extrin,
150 | 'extrin_gt': extrin_gt,
151 | }
152 | return result
153 |
--------------------------------------------------------------------------------
/algo/models/rt_actor_critic.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | import copy
6 | from algo.pretrained.robot_transformer_ar import RobotTransformerAR
7 | from algo.models.models import MLP
8 |
9 | class RTActorCritic(nn.Module):
10 |
11 | def __init__(self, config, network_config, device, kwargs):
12 |
13 | nn.Module.__init__(self)
14 | self.network_config = network_config
15 | self.device = device
16 | actions_num =self.network_config.action_dim
17 | input_shape = kwargs.pop('value_input_shape')
18 |
19 | self.pc_to_value = config.train.ppo.point_cloud_input_to_value
20 | if config.get('pc_input', False) and self.pc_to_value:
21 | self.pc_begin, self.pc_end = kwargs.pop('point_cloud_index')
22 |
23 | self.value_grads_to_pointnet = config.train.ppo.value_grads_to_pointnet
24 | self.pc_num = self.network_config.pc_num
25 | self.scale_proprio = self.network_config.scale_proprio
26 | self.scale_action = self.network_config.scale_action
27 |
28 |
29 | mlp_input_shape = input_shape
30 |
31 |
32 | self.limits = {'upper': torch.tensor([6.2832, 2.0944, 6.2832, 3.9270, 6.2832, 3.1416, 6.2832, 0.4700, 1.6100, 1.7090, 1.6180, 1.3960,
33 | 1.1630, 1.6440, 1.7190, 0.4700, 1.6100, 1.7090, 1.6180, 0.4700, 1.6100, 1.7090, 1.6180],
34 | requires_grad=False, dtype=torch.float32, device=self.device),
35 | 'lower': torch.tensor([-6.2832, -2.0590, -6.2832, -0.1920, -6.2832, -1.6930, -6.2832, -0.4700, -0.1960, -0.1740, -0.2270,
36 | 0.2630, -0.1050, -0.1890, -0.1620, -0.4700, -0.1960, -0.1740, -0.2270, -0.4700, -0.1960, -0.1740, -0.2270]
37 | ,requires_grad=False, dtype=torch.float32, device=self.device)}
38 |
39 |
40 | self.actor = RobotTransformerAR(
41 | cfg= config)
42 |
43 |
44 | self.value_fn = nn.Sequential(
45 | nn.Linear(mlp_input_shape,512),
46 | nn.ELU(inplace=True),
47 | nn.Linear(512,256),
48 | nn.ELU(inplace=True),
49 | nn.Linear(256,128),
50 | nn.ELU(inplace=True),
51 | nn.Linear(128, 1)
52 | ) #check this
53 |
54 | self.logstd = nn.Parameter(torch.zeros(actions_num, requires_grad=True, dtype=torch.float32))
55 | #backbone sharing between value and critic? can this be implemented here in some way?
56 | #not doing for now
57 | nn.init.constant_(self.logstd[:7], torch.log(torch.tensor(kwargs['init_eps_arm'])))
58 | nn.init.constant_(self.logstd[7:], torch.log(torch.tensor(kwargs['init_eps_hand'])))
59 |
60 | def scale_q(self, q):
61 | """
62 | Scale the proprioceptive data to be between -1 and 1.
63 | """
64 | q = (q - self.limits['lower'].view((1,-1))) / (self.limits['upper'] - self.limits['lower'])
65 | q = 2 * q - 1
66 | return q
67 |
68 | @torch.no_grad()
69 | def get_action(self, obs_dict):
70 | # used specifically to collection samples during training
71 | # it contains exploration so needs to sample from distribution
72 | mu, value = self._actor_critic(obs_dict)
73 | sigma = torch.exp(self.logstd)
74 | distr = torch.distributions.Normal(mu, sigma)
75 | selected_action = distr.sample()
76 | result = {
77 | 'neglogpacs': -distr.log_prob(selected_action).sum(1), # self.neglogp(selected_action, mu, sigma, logstd),
78 | 'values': value,
79 | 'actions': selected_action,
80 | 'mus': mu,
81 | 'sigmas': sigma,
82 | }
83 | return result
84 |
85 | @torch.no_grad()
86 | def infer_action(self, obs_dict):
87 | # used during inference
88 | mu, _ = self._actor_critic(obs_dict)
89 | return mu
90 |
91 | def _actor_critic(self, obs_dict):
92 |
93 | #what to do with the value network?
94 | obs = obs_dict['obs']
95 |
96 | proprio_hist = obs_dict['proprio_buf']
97 |
98 | if self.scale_proprio:
99 | proprio_hist = self.scale_q(proprio_hist) #scale proprio hist
100 |
101 | pc_hist = obs_dict['pc_buf'] #this is normalized
102 |
103 |
104 | attention_mask = obs_dict['attn_mask']
105 | timesteps = obs_dict['timesteps']
106 |
107 | if self.actor.cfg:
108 | action_hist = obs_dict['action_buf']
109 | action_hist = torch.cat((action_hist, torch.zeros_like(action_hist[:,:1,:])), dim=1)
110 | else:
111 | action_hist=None
112 |
113 | res_dict, pc_embed = self.actor(proprio_hist, pc_hist, action_hist, timesteps.long(), attention_mask)
114 |
115 | # Value function should reuse features?
116 |
117 | if not self.value_grads_to_pointnet:
118 | pc_embed = pc_embed.detach()
119 |
120 | if self.pc_to_value:
121 | obs = torch.cat([obs[:,:self.pc_begin],pc_embed[:,-1],obs[:,self.pc_end:]],dim=1)
122 | value = self.value_fn(obs)
123 |
124 | mu = res_dict['action'][:,-1] #sigma in previous policy was independent of observations..F
125 |
126 | if not self.scale_action:
127 | mu = self.scale_q(mu)
128 |
129 | return mu, value
130 |
131 | def forward(self, input_dict):
132 |
133 | prev_actions = input_dict.get('prev_actions', None)
134 | mu, value = self._actor_critic(input_dict)
135 | sigma = torch.exp(self.logstd)
136 | distr = torch.distributions.Normal(mu, sigma)
137 | entropy = distr.entropy().sum(dim=-1)
138 | prev_neglogp = -distr.log_prob(prev_actions).sum(1)
139 | result = {
140 | 'prev_neglogp': torch.squeeze(prev_neglogp),
141 | 'values': value,
142 | 'entropy': entropy,
143 | 'mus': mu,
144 | 'sigmas': sigma
145 | }
146 | return result
147 |
--------------------------------------------------------------------------------
/algo/models/running_mean_std.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # In-Hand Object Rotation via Rapid Motor Adaptation
3 | # https://arxiv.org/abs/2210.04887
4 | # Copyright (c) 2022 Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 | # Based on: IsaacGymEnvs
8 | # Copyright (c) 2018-2022, NVIDIA Corporation
9 | # Licence under BSD 3-Clause License
10 | # https://github.com/NVIDIA-Omniverse/IsaacGymEnvs/
11 | # --------------------------------------------------------
12 |
13 | import torch
14 | import torch.nn as nn
15 | import numpy as np
16 |
17 | class RunningMeanStd(nn.Module):
18 | def __init__(self, insize, epsilon=1e-05, per_channel=False, norm_only=False):
19 | super(RunningMeanStd, self).__init__()
20 | print('RunningMeanStd: ', insize)
21 | self.insize = insize
22 | self.epsilon = epsilon
23 |
24 | self.norm_only = norm_only
25 | self.per_channel = per_channel
26 | if per_channel:
27 | if len(self.insize) == 3:
28 | self.axis = [0,1,2]
29 | if len(self.insize) == 2:
30 | self.axis = [0,1] #make this 0 and 1?
31 | if len(self.insize) == 1:
32 | self.axis = [0]
33 | self.in_size = self.insize[-1]
34 | else:
35 | self.axis = [0]
36 | self.in_size = insize
37 |
38 | self.register_buffer('running_mean', torch.zeros(self.in_size, dtype = torch.float64))
39 | self.register_buffer('running_var', torch.ones(self.in_size, dtype = torch.float64))
40 | self.register_buffer('count', torch.ones((), dtype = torch.float64))
41 |
42 | def _update_mean_var_count_from_moments(self, mean, var, count, batch_mean, batch_var, batch_count):
43 | delta = batch_mean - mean
44 | tot_count = count + batch_count
45 |
46 | new_mean = mean + delta * batch_count / tot_count
47 | m_a = var * count
48 | m_b = batch_var * batch_count
49 | M2 = m_a + m_b + delta**2 * count * batch_count / tot_count
50 | new_var = M2 / tot_count
51 | new_count = tot_count
52 | return new_mean, new_var, new_count
53 |
54 | def forward(self, input, unnorm=False):
55 | if self.training:
56 | mean = input.mean(self.axis) # along channel axis
57 | var = input.var(self.axis)
58 | self.running_mean, self.running_var, self.count = self._update_mean_var_count_from_moments(self.running_mean, self.running_var, self.count,
59 | mean, var, input.size()[0] )
60 |
61 | # change shape
62 | if self.per_channel:
63 | if len(self.insize) == 3:
64 | current_mean = self.running_mean.view([1, 1, 1, self.in_size]).expand_as(input)
65 | current_var = self.running_var.view([1, 1, 1, self.in_size]).expand_as(input)
66 | if len(self.insize) == 2:
67 | current_mean = self.running_mean.view([1, 1, self.in_size]).expand_as(input)
68 | current_var = self.running_var.view([1, 1, self.in_size]).expand_as(input)
69 | if len(self.insize) == 1:
70 | current_mean = self.running_mean.view([1, self.in_size]).expand_as(input)
71 | current_var = self.running_var.view([1, self.in_size]).expand_as(input)
72 | else:
73 | current_mean = self.running_mean
74 | current_var = self.running_var
75 | # get output
76 |
77 |
78 | if unnorm:
79 | y = torch.clamp(input, min=-5.0, max=5.0)
80 | y = torch.sqrt(current_var.float() + self.epsilon)*y + current_mean.float()
81 | else:
82 | if self.norm_only:
83 | y = input/ torch.sqrt(current_var.float() + self.epsilon)
84 | else:
85 | y = (input - current_mean.float()) / torch.sqrt(current_var.float() + self.epsilon)
86 | y = torch.clamp(y, min=-5.0, max=5.0)
87 | return y
88 |
--------------------------------------------------------------------------------
/algo/ppo_transformer/__pycache__/experience.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/ppo_transformer/__pycache__/experience.cpython-37.pyc
--------------------------------------------------------------------------------
/algo/ppo_transformer/__pycache__/mem_eff_experience.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/ppo_transformer/__pycache__/mem_eff_experience.cpython-37.pyc
--------------------------------------------------------------------------------
/algo/ppo_transformer/__pycache__/ppo_transformer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/ppo_transformer/__pycache__/ppo_transformer.cpython-37.pyc
--------------------------------------------------------------------------------
/algo/ppo_transformer/__pycache__/ppobc_transformer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/ppo_transformer/__pycache__/ppobc_transformer.cpython-37.pyc
--------------------------------------------------------------------------------
/algo/ppo_transformer/experience.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # In-Hand Object Rotation via Rapid Motor Adaptation
3 | # https://arxiv.org/abs/2210.04887
4 | # Copyright (c) 2022 Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 | # Based on: RLGames
8 | # Copyright (c) 2019 Denys88
9 | # Licence under MIT License
10 | # https://github.com/Denys88/rl_games/
11 | # --------------------------------------------------------
12 |
13 | import gym
14 | import torch
15 | from torch.utils.data import Dataset
16 | import utils.pytorch_utils as ptu
17 | from termcolor import cprint
18 |
19 | def transform_op(arr):
20 | """
21 | swap and then flatten axes 0 and 1
22 | """
23 | if arr is None:
24 | return arr
25 | s = arr.size()
26 | return arr.transpose(0, 1).reshape(s[0] * s[1], *s[2:])
27 |
28 |
29 | class ExperienceBuffer(Dataset):
30 | def __init__(self, num_envs,
31 | horizon_length,
32 | batch_size,
33 | minibatch_size,
34 | num_gradient_steps,
35 | obs_dim,
36 | proprio_dim,
37 | act_dim,
38 | pc_num,
39 | ctx_len,
40 | device):
41 |
42 | self.device = device
43 | self.num_envs = num_envs
44 | self.max_ep_len = horizon_length
45 |
46 | self.data_dict = None
47 | self.obs_dim = obs_dim
48 | self.proprio_dim = proprio_dim
49 | self.act_dim = act_dim
50 | self.ctx_len = ctx_len
51 | self.pc_num = pc_num
52 | self.storage_dict = {
53 | 'obses': torch.zeros((self.max_ep_len, self.num_envs, self.obs_dim), dtype=torch.float32, device=self.device),
54 | 'proprio_buf': torch.zeros((self.max_ep_len,self.num_envs, self.ctx_len, self.proprio_dim),dtype=torch.float32, device=self.device),
55 | 'pc_buf': torch.zeros((self.max_ep_len,self.num_envs, self.ctx_len, self.pc_num,3),dtype=torch.float32, device=self.device),
56 | 'action_buf': torch.zeros((self.max_ep_len,self.num_envs, self.ctx_len-1, self.act_dim),dtype=torch.float32, device=self.device),
57 | # 'priv_info': torch.zeros((self.self.max_ep_len, self.num_envs, self.priv_dim), dtype=torch.float32, device=self.device),
58 | 'attn_mask': torch.zeros((self.max_ep_len, self.num_envs, self.ctx_len), dtype=torch.float32, device=self.device),
59 | 'timesteps': -1*torch.ones((self.max_ep_len, self.num_envs, self.ctx_len), dtype=torch.float32, device=self.device),
60 | 'rewards': torch.zeros((self.max_ep_len, self.num_envs, 1), dtype=torch.float32, device=self.device),
61 | 'values': torch.zeros((self.max_ep_len, self.num_envs, 1), dtype=torch.float32, device=self.device),
62 | 'neglogpacs': torch.zeros((self.max_ep_len, self.num_envs), dtype=torch.float32, device=self.device),
63 | 'dones': torch.zeros((self.max_ep_len, self.num_envs), dtype=torch.uint8, device=self.device),
64 | 'actions': torch.zeros((self.max_ep_len, self.num_envs, self.act_dim), dtype=torch.float32, device=self.device),
65 | 'mus': torch.zeros((self.max_ep_len, self.num_envs, self.act_dim), dtype=torch.float32, device=self.device),
66 | 'sigmas': torch.zeros((self.max_ep_len, self.num_envs, self.act_dim), dtype=torch.float32, device=self.device),
67 | 'returns': torch.zeros((self.max_ep_len, self.num_envs, 1), dtype=torch.float32, device=self.device),
68 | }
69 |
70 | self.batch_size = batch_size
71 | self.length = self.num_gradient_steps = num_gradient_steps
72 |
73 | if self.length < self.max_ep_len:
74 | cprint('Warning: length of buffer is less than max_ep_len, full data is not getting used', 'red')
75 | self.minibatch_size = minibatch_size
76 |
77 | def __len__(self):
78 | return self.length
79 |
80 | def __getitem__(self, idx):
81 | start = idx * self.minibatch_size
82 | end = (idx + 1) * self.minibatch_size
83 |
84 | self.last_range = (start, end)
85 | input_dict = {}
86 | for k, v in self.data_dict.items():
87 | if type(v) is dict:
88 | v_dict = {kd: vd[start:end] for kd, vd in v.items()}
89 | input_dict[k] = v_dict
90 | else:
91 | input_dict[k] = v[start:end]
92 |
93 | return input_dict['values'], input_dict['neglogpacs'], input_dict['advantages'], input_dict['mus'], \
94 | input_dict['sigmas'], input_dict['returns'], input_dict['actions'], \
95 | input_dict['obses'], input_dict['proprio_buf'], input_dict['pc_buf'], input_dict['action_buf'], \
96 | input_dict['attn_mask'], input_dict['timesteps']
97 |
98 |
99 | def update_mu_sigma(self, mu, sigma):
100 | start = self.last_range[0]
101 | end = self.last_range[1]
102 | self.data_dict['mus'][start:end] = mu
103 | self.data_dict['sigmas'][start:end] = sigma
104 |
105 | def update_data(self, name, index, val):
106 | if type(val) is dict:
107 | for k, v in val.items():
108 | self.storage_dict[name][k][index,:] = v
109 | else:
110 | self.storage_dict[name][index,:] = val
111 |
112 | def compute_return(self, last_values, gamma, tau):
113 | last_gae_lam = 0
114 | mb_advs = torch.zeros_like(self.storage_dict['rewards'])
115 | for t in reversed(range(self.max_ep_len)):
116 | if t == self.max_ep_len - 1:
117 | next_values = last_values
118 | else:
119 | next_values = self.storage_dict['values'][t + 1]
120 | next_nonterminal = 1.0 - self.storage_dict['dones'].float()[t]
121 | next_nonterminal = next_nonterminal.unsqueeze(1)
122 | delta = self.storage_dict['rewards'][t] + gamma * next_values * next_nonterminal - self.storage_dict['values'][t]
123 | mb_advs[t] = last_gae_lam = delta + gamma * tau * next_nonterminal * last_gae_lam
124 | self.storage_dict['returns'][t, :] = mb_advs[t] + self.storage_dict['values'][t] #why?
125 |
126 | def prepare_training(self):
127 | self.data_dict = {}
128 | for k, v in self.storage_dict.items():
129 | self.data_dict[k] = transform_op(v)
130 | advantages = self.data_dict['returns'] - self.data_dict['values']
131 | self.data_dict['advantages'] = ((advantages - advantages.mean()) / (advantages.std() + 1e-8)).squeeze(1)
132 | return self.data_dict
133 |
134 |
135 |
136 | def get_info(self):
137 | buffer_info = {
138 | 'AverageReward' : ptu.to_numpy(self.storage_dict['rewards'].mean()),
139 | 'AverageReturn' : ptu.to_numpy(self.storage_dict['returns'].mean()),
140 | }
141 |
142 | return buffer_info
143 |
--------------------------------------------------------------------------------
/algo/pretrained/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/pretrained/__init__.py
--------------------------------------------------------------------------------
/algo/pretrained/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/pretrained/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/algo/pretrained/__pycache__/policy_transformer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/pretrained/__pycache__/policy_transformer.cpython-37.pyc
--------------------------------------------------------------------------------
/algo/pretrained/__pycache__/robot_transformer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/pretrained/__pycache__/robot_transformer.cpython-37.pyc
--------------------------------------------------------------------------------
/algo/pretrained/__pycache__/robot_transformer_ar.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/pretrained/__pycache__/robot_transformer_ar.cpython-37.pyc
--------------------------------------------------------------------------------
/algo/pretrained/__pycache__/transformer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/algo/pretrained/__pycache__/transformer.cpython-37.pyc
--------------------------------------------------------------------------------
/algo/pretrained/dataset.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.utils.data import Dataset, DataLoader
3 | import os
4 | import pickle as pkl
5 | from termcolor import cprint
6 | class TrajectoryDataset(Dataset):
7 |
8 | def __init__(self, root,ctx_length=64,device='cuda'):
9 | """
10 | Args:
11 | data (Any): Your dataset (e.g., images, files, tensors).
12 | targets (Any): The labels or targets associated with your data.
13 | transform (callable, optional): Optional transform to be applied on a sample.
14 | """
15 | super(TrajectoryDataset, self).__init__()
16 | self.root = root
17 | self.device = device
18 | #assuming not many files in the directory
19 | self.episodes = [pkl.load(open(os.path.join(root,episode),'rb')) for episode in os.listdir(root)]
20 | self.ctx = ctx_length
21 | self.ep_lens = torch.tensor([(len(episode)- self.ctx+1) for episode in self.episodes])
22 | self.cumsum = torch.cumsum(self.ep_lens,0)
23 | self.visualise()
24 |
25 | def visualise(self):
26 | """
27 | Visualise the dataset.
28 | """
29 | cprint(f"Number of episodes: {len(self.episodes)}",color='green',attrs=['bold'])
30 | cprint(f"Number of examples: {torch.sum(self.ep_lens)}",color='green',attrs=['bold'])
31 | cprint(f"Proprio dimension: {len(self.episodes[0]['robot_state'][0])}",color='green',attrs=['bold'])
32 | cprint(f"Action dimension: {len(self.episodes[0]['action'][0])}",color='green',attrs=['bold'])
33 |
34 | def __len__(self):
35 | """Returns the size of the dataset."""
36 | return torch.sum(self.ep_lens).item()
37 |
38 | def __getitem__(self, index):
39 | """
40 | Generates one sample of data.
41 |
42 | Args:
43 | index (int): The index of the item in the dataset
44 |
45 | Returns:
46 | sample (Any): The data sample corresponding to the given index.
47 | target (Any): The target corresponding to the given data sample.
48 | """
49 |
50 | ep_idx = torch.searchsorted(self.cumsum, index, right=True)
51 | ep = self.episodes[ep_idx]
52 | idx = index - torch.sum(self.ep_lens[:ep_idx])
53 | return {
54 | 'state': torch.tensor(ep['robot_state'][idx:idx+self.ctx]).to(self.device),
55 | 'action': torch.tensor(ep['action'][idx:idx+self.ctx]).to(self.device),
56 | 'timesteps': torch.tensor(torch.arange(idx,idx+self.ctx)).to(self.device),
57 | }
58 |
59 |
60 |
61 | def collate_fn(batch):
62 |
63 | state = torch.stack([torch.tensor(item['state']) for item in batch])
64 | action = torch.stack([torch.tensor(item['action']) for item in batch])
65 | timesteps = torch.stack([torch.tensor(item['timesteps']) for item in batch])
66 | attention_mask = None
67 |
68 | return state, action, timesteps, attention_mask
69 |
70 |
71 |
72 |
--------------------------------------------------------------------------------
/algo/pretrained/depth_trainer.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | import torch
4 | import time
5 | from torch.utils.data import DataLoader
6 | import os
7 | from datetime import datetime
8 | import wandb
9 | import tqdm
10 | from torch.nn.parallel import DistributedDataParallel as DDP
11 | from termcolor import cprint
12 | class DepthTrainer:
13 |
14 | def __init__(self,
15 | model,
16 | collate_fn,
17 | optimizer,
18 | loss_fn,
19 | model_save_dir,
20 | train_dataloader,
21 | val_dataset=None,
22 | config=None,
23 | scheduler=None,
24 | eval_fns=None,
25 | logger=None,
26 | rank=0,
27 | world_size=1,
28 | device='cuda'):
29 |
30 | self.model = model
31 | self.device = device
32 | self.optimizer = optimizer
33 | self.batch_size = config.pretrain.training.batch_size
34 | self.val_dataset = val_dataset
35 | self.collate_fn = collate_fn
36 | self.loss_fn = loss_fn
37 | self.scheduler = scheduler
38 | self.save_dir = model_save_dir
39 | self.rank = rank
40 | self.world_size = world_size
41 | self.eval_fns = [] if eval_fns is None else eval_fns
42 | self.diagnostics = dict()
43 | self.logger = logger
44 | self.saved_model_number = 0
45 | self.add_proprio_noise = config.pretrain.training.add_proprio_noise
46 | self.add_action_noise = config.pretrain.training.add_action_noise
47 | num_workers = config.pretrain.training.num_workers #add this to bash file
48 | self.log_freq = config.pretrain.training.log_freq
49 | self.model_save_freq = config.pretrain.training.model_save_freq
50 | # create a dataloader
51 | self.train_dataloader = train_dataloader
52 |
53 | self.start_time = time.time()
54 |
55 | def train_epoch(self, iter_num=0, print_logs=False):
56 |
57 | train_losses = []
58 | train_losses_action = []
59 | logs = dict()
60 |
61 | train_start = time.time()
62 |
63 | self.model.train()
64 |
65 | for i, batch in enumerate(tqdm.tqdm(self.train_dataloader)):
66 |
67 | proprio, depth , actions, timesteps, attention_mask = batch
68 | batch = proprio.to(self.device), depth.to(self.device), \
69 | actions.to(self.device), timesteps.to(self.device), \
70 | attention_mask.to(self.device) if attention_mask is not None else None
71 |
72 | train_loss = self.train_step(batch)
73 |
74 | train_losses_action.append(train_loss['action'])
75 | train_losses.append(train_loss['full'])
76 |
77 | if self.scheduler is not None:
78 | self.scheduler.step()
79 |
80 | if self.logger is not None and i % self.log_freq == 0:
81 | logs['time/training'] = time.time() - train_start
82 | logs['time/total'] = time.time() - self.start_time
83 | logs['optimizer/lr'] = self.optimizer.param_groups[0]['lr']
84 | global_step = iter_num * len(self.train_dataloader) + i
85 | self.logger.log_dict(logs, global_step)
86 | logs['training/train_loss_mean'] = np.mean(train_losses)
87 | logs['training/train_loss_std'] = np.std(train_losses)
88 | logs['training/train_loss_action_mean'] = np.mean(train_losses_action)
89 | logs['training/train_loss_action_std'] = np.std(train_losses_action)
90 |
91 | global_step = iter_num * len(self.train_dataloader) + i
92 | if self.save_dir is not None and global_step % self.model_save_freq == 0:
93 | torch.save(self.model.state_dict(), os.path.join(self.save_dir, f'model_step_{global_step}.pt'))
94 | self.saved_model_number += 1
95 |
96 | #if self.save_dir is not None and global_step % self.model_save_freq == 0:
97 | #torch.save(self.model.state_dict(), os.path.join(self.save_dir, f'model_step_{global_step}.pt'))
98 |
99 | if print_logs and i % self.log_freq == 0:
100 | for k in self.diagnostics:
101 | logs[k] = self.diagnostics[k]
102 | print('=' * 80)
103 | print(f'Iteration {iter_num}')
104 | for k, v in logs.items():
105 | print(f'{k}: {v}')
106 |
107 | return logs
108 |
109 | def train_step(self,batch):
110 |
111 | proprio, depth, actions, timesteps, attention_mask = batch
112 |
113 |
114 |
115 | action_target = torch.clone(actions)
116 |
117 | if self.add_proprio_noise:
118 | noise = torch.zeros_like(proprio)
119 | noise[...,:7] = torch.randn_like(proprio[...,:7])*0.1 #self.noise_arm
120 | noise[...,7:] = torch.randn_like(proprio[...,7:])*0.1 #self.noise_hand
121 | proprio = proprio + noise
122 |
123 |
124 | action_preds, _ = self.model.forward(proprio,depth,timesteps,attention_mask)
125 |
126 | act_dim = action_preds.shape[2]
127 |
128 | if attention_mask is not None:
129 | action_preds = action_preds.reshape(-1, act_dim)[attention_mask.reshape(-1) > 0]
130 | action_target = action_target.reshape(-1, act_dim)[attention_mask.reshape(-1) > 0]
131 |
132 |
133 | loss_action = self.loss_fn(action_preds, action_target)
134 |
135 | loss = loss_action
136 |
137 | self.optimizer.zero_grad()
138 | loss.backward()
139 | torch.nn.utils.clip_grad_norm_(self.model.parameters(), .25)
140 | self.optimizer.step()
141 |
142 | with torch.no_grad():
143 | self.diagnostics['training/action_error'] = loss_action.detach().cpu().item()
144 |
145 | return_dict = {'action': loss_action.detach().cpu().item(),
146 | 'full': loss.detach().cpu().item()
147 | }
148 |
149 | return return_dict
150 |
151 |
--------------------------------------------------------------------------------
/algo/pretrained/depth_trainer_multigpu.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | import torch
4 | import time
5 | from torch.utils.data import DataLoader
6 | import os
7 | from datetime import datetime
8 | import wandb
9 | import tqdm
10 | from torch.nn.parallel import DistributedDataParallel as DDP
11 | from termcolor import cprint
12 |
13 | class MultiGPUTrainer:
14 |
15 | def __init__(self,
16 | model,
17 | train_dataset,
18 | collate_fn,
19 | loss_fn,
20 | model_save_dir,
21 | rank,
22 | world_size,
23 | val_dataset=None,
24 | config=None,
25 | scheduler=None,
26 | eval_fns=None,
27 | logger=None,
28 | device='cuda'):
29 |
30 | self.model = model
31 | self.rank = rank
32 | self.world_size = world_size
33 | if self.world_size > 1:
34 | self.device = f'cuda:{self.rank}'
35 | self.model = self.model.to(self.device)
36 | self.ddp_model = DDP(self.model, device_ids=[self.rank], output_device=self.rank)
37 | self.optimizer = torch.optim.Adam(self.ddp_model.parameters(), lr=config.pretrain.training.lr*config.num_gpus,weight_decay=config.pretrain.training.weight_decay)
38 | else:
39 | self.device = device
40 | self.model = self.model.to(self.device)
41 | self.optimizer = torch.optim.Adam(self.model.parameters(), lr=config.pretrain.training.lr,weight_decay=config.pretrain.training.weight_decay)
42 |
43 | self.batch_size = config.pretrain.training.batch_size
44 | self.train_dataset = train_dataset
45 | self.val_dataset = val_dataset
46 | self.collate_fn = collate_fn
47 | self.loss_fn = loss_fn
48 | self.scheduler = scheduler
49 | self.save_dir = model_save_dir
50 | self.eval_fns = [] if eval_fns is None else eval_fns
51 | self.diagnostics = dict()
52 | self.logger = logger
53 |
54 | self.saved_model_number = 0
55 | self.action_input = config.pretrain.model.action_input
56 | self.add_proprio_noise = config.pretrain.training.add_proprio_noise
57 | self.add_action_noise = config.pretrain.training.add_action_noise
58 | self.num_workers = config.pretrain.training.num_workers
59 | self.log_freq = config.pretrain.training.log_freq
60 | self.noise_arm = config.pretrain.training.noise_arm
61 | self.noise_hand = config.pretrain.training.noise_hand
62 | self.model_save_freq = config.pretrain.training.model_save_freq
63 |
64 |
65 | if self.world_size > 1:
66 | sampler = torch.utils.data.distributed.DistributedSampler(self.train_dataset, num_replicas=world_size, rank=rank)
67 | self.train_dataloader = DataLoader(self.train_dataset,
68 | batch_size=self.batch_size,
69 | num_workers=self.num_workers,
70 | collate_fn=self.collate_fn,
71 | sampler=sampler)
72 | if self.val_dataset is not None:
73 | sampler = torch.utils.data.distributed.DistributedSampler(self.val_dataset,
74 | num_replicas=world_size,
75 | rank=rank)
76 |
77 | self.val_dataloader = DataLoader(self.val_dataset,
78 | batch_size=self.batch_size,
79 | num_workers=self.num_workers,
80 | collate_fn=self.collate_fn,
81 | sampler=sampler)
82 | else:
83 | # create a dataloader
84 | print('Creating dataloader')
85 | self.train_dataloader = DataLoader(self.train_dataset,
86 | batch_size=self.batch_size,
87 | num_workers=self.num_workers,
88 | shuffle=True,
89 | collate_fn=self.collate_fn)
90 |
91 | if self.val_dataset is not None:
92 | self.val_dataloader = DataLoader(self.val_dataset,
93 | batch_size=self.batch_size,
94 | num_workers=self.num_workers,
95 | shuffle=False,
96 | collate_fn=self.collate_fn)
97 |
98 | self.start_time = time.time()
99 |
100 | def train_epoch(self, iter_num=0, print_logs=False):
101 |
102 | train_losses, train_losses_action = [], []
103 | logs = dict()
104 |
105 | train_start = time.time()
106 |
107 | if self.world_size > 1:
108 | self.ddp_model.train()
109 | self.model.train()
110 |
111 | if self.world_size > 1:
112 | self.train_dataloader.sampler.set_epoch(iter_num)
113 |
114 | for i, batch in enumerate(tqdm.tqdm(self.train_dataloader)):
115 |
116 | proprio, depth, actions, timesteps, attention_mask = batch
117 | batch = proprio.to(self.device), depth.to(self.device), \
118 | actions.to(self.device), timesteps.to(self.device), \
119 | attention_mask.to(self.device) if attention_mask is not None else None
120 |
121 |
122 | print(self.device)
123 | train_loss = self.train_step(batch)
124 |
125 | print(train_loss)
126 |
127 | train_losses_action.append(train_loss['action'])
128 | train_losses.append(train_loss['full'])
129 |
130 | if self.scheduler is not None:
131 | self.scheduler.step()
132 |
133 |
134 | if self.world_size > 1:
135 | torch.distributed.barrier()
136 |
137 | if self.logger is not None and i % self.log_freq == 0 and (self.world_size == 1 or self.rank==0):
138 | logs['time/training'] = time.time() - train_start
139 | logs['time/total'] = time.time() - self.start_time
140 | logs['optimizer/lr'] = self.optimizer.param_groups[0]['lr']
141 | global_step = iter_num * len(self.train_dataloader) + i
142 | self.logger.log_dict(logs, global_step)
143 | logs['training/train_loss_mean'] = np.mean(train_losses)
144 | logs['training/train_loss_std'] = np.std(train_losses)
145 | logs['training/train_loss_action_mean'] = np.mean(train_losses_action)
146 | logs['training/train_loss_action_std'] = np.std(train_losses_action)
147 |
148 | if self.save_dir is not None and i % self.model_save_freq == 0 and (self.world_size == 1 or self.rank==0):
149 | torch.save(self.model.state_dict(), os.path.join(self.save_dir, f'last.pt'))
150 | self.saved_model_number += 1
151 |
152 | if self.save_dir is not None and i % 5000 == 0 and (self.world_size == 1 or self.rank==0):
153 | global_step = iter_num * len(self.train_dataloader) + i
154 | torch.save(self.model.state_dict(), os.path.join(self.save_dir, f'model_step_{global_step}.pt'))
155 |
156 | if print_logs and i % self.log_freq == 0 and (self.world_size == 1 or self.rank==0):
157 | for k in self.diagnostics:
158 | logs[k] = self.diagnostics[k]
159 | print('=' * 80)
160 | print(f'Iteration {iter_num}')
161 | for k, v in logs.items():
162 | print(f'{k}: {v}')
163 | return logs
164 |
165 | def train_step(self, batch):
166 |
167 | proprio, depth, actions, timesteps, attention_mask = batch
168 |
169 | action_target = torch.clone(actions)
170 |
171 | if self.add_proprio_noise:
172 | noise = torch.zeros_like(proprio)
173 | noise[...,:7] = torch.randn_like(proprio[...,:7])*self.noise_arm
174 | noise[...,7:] = torch.randn_like(proprio[...,7:])*self.noise_hand
175 | proprio = proprio + noise
176 |
177 |
178 | if self.world_size > 1:
179 | action_preds, _ = self.ddp_model.forward(
180 | proprio, depth, timesteps=timesteps, attention_mask=attention_mask,)
181 |
182 | else:
183 | action_preds, _ = self.model.forward(
184 | proprio, depth, timesteps=timesteps, attention_mask=attention_mask,)
185 |
186 |
187 | act_dim = action_preds.shape[2]
188 |
189 | if attention_mask is not None:
190 | action_preds = action_preds.reshape(-1, act_dim)[attention_mask.reshape(-1) > 0]
191 | action_target = action_target.reshape(-1, act_dim)[attention_mask.reshape(-1) > 0]
192 |
193 |
194 | loss_action = self.loss_fn(action_preds, action_target)
195 | loss = loss_action
196 |
197 |
198 | self.optimizer.zero_grad()
199 | loss.backward()
200 | if self.world_size > 1:
201 | torch.nn.utils.clip_grad_norm_(self.ddp_model.parameters(), .25)
202 | else:
203 | torch.nn.utils.clip_grad_norm_(self.model.parameters(), .25)
204 |
205 | self.optimizer.step()
206 |
207 | with torch.no_grad():
208 | self.diagnostics['training/action_error'] = loss_action.detach().cpu().item()
209 |
210 | return_dict = {'action': loss_action.detach().cpu().item(),
211 | 'full': loss.detach().cpu().item()
212 | }
213 |
214 | return return_dict
215 |
--------------------------------------------------------------------------------
/algo/pretrained/robot_dataset.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.utils.data import Dataset, DataLoader
3 | import os
4 | import pickle as pkl
5 | from termcolor import cprint
6 | import numpy as np
7 | class RobotDataset(Dataset):
8 |
9 | def __init__(self, root=None, cfg=None):
10 | """
11 | Args:
12 | data (Any): Your dataset (e.g., images, files, tensors).
13 | targets (Any): The labels or targets associated with your data.
14 | transform (callable, optional): Optional transform to be applied on a sample.
15 | """
16 | assert root is not None, "Please provide the root directory of the dataset"
17 | assert os.path.exists(root), f"The directory {root} does not exist"
18 | super(RobotDataset, self).__init__()
19 | self.root = root
20 | print(f"Loading dataset from {root}")
21 | self.device = cfg.pretrain.device
22 | self.ctx = cfg.pretrain.model.context_length
23 | self.scale_action = cfg.pretrain.model.scale_action
24 | self.scale_proprio = cfg.pretrain.model.scale_proprio
25 | # set variable to store the episodes
26 | self.episodes_npy = []
27 | self.ep_lens = []
28 | # self.dt = kwargs.get('dt', 0.008333) # 120 Hz
29 | # self.dt = np.float32(self.dt)
30 | self.use_residuals = cfg.pretrain.training.use_residuals
31 | # get all folders of depth 2 in the directory
32 | subjects_dir = [os.path.join(root,episode) for episode in os.listdir(root) if os.path.isdir(os.path.join(root,episode))]
33 | # get all subfolders of depth 2 in subjects_dir
34 | self.episodes_dir = [os.path.join(subject,episode) for subject in subjects_dir for episode in os.listdir(subject) if os.path.isdir(os.path.join(subject,episode))]
35 | self.episodes_dir = sorted(self.episodes_dir)
36 |
37 | assert len(self.episodes_dir) > 0, f"No episodes found in the directory {root}"
38 | # load all the episodes
39 | for episode in self.episodes_dir:
40 | self.load_episode_fnames(episode)
41 |
42 | assert len(self.episodes_npy) > 0, f"No trajectories found in the directory {root}"
43 | # save the min, max, and mean of the episode lengths
44 | self.min_ep_len = np.min(self.ep_lens)
45 | self.max_ep_len = np.max(self.ep_lens)
46 | self.mean_ep_len = np.mean(self.ep_lens)
47 | cprint(f"Min episode length: {self.min_ep_len}, Max episode length: {self.max_ep_len}, Mean episode length: {self.mean_ep_len}",color='cyan',attrs=['bold'])
48 | self.ep_lens = torch.tensor(self.ep_lens)
49 | self.cumsum = torch.cumsum(self.ep_lens,0)
50 | self.visualise()
51 |
52 | # IG lower and upper limits
53 | self.limits = {'upper': [6.2832, 2.0944, 6.2832, 3.9270, 6.2832, 3.1416, 6.2832, 0.4700, 1.6100, 1.7090, 1.6180, 1.3960,
54 | 1.1630, 1.6440, 1.7190, 0.4700, 1.6100, 1.7090, 1.6180, 0.4700, 1.6100, 1.7090, 1.6180],
55 | 'lower': [-6.2832, -2.0590, -6.2832, -0.1920, -6.2832, -1.6930, -6.2832, -0.4700, -0.1960, -0.1740, -0.2270,
56 | 0.2630, -0.1050, -0.1890, -0.1620, -0.4700, -0.1960, -0.1740, -0.2270, -0.4700, -0.1960, -0.1740, -0.2270]}
57 |
58 |
59 | self.limits['upper'] = np.array(self.limits['upper']).astype(np.float32)
60 | self.limits['lower'] = np.array(self.limits['lower']).astype(np.float32)
61 |
62 |
63 | def load_episode_fnames(self, episode_dir:str):
64 | """
65 | Load the episodes filenames.
66 | """
67 | for episode_fname in sorted(os.listdir(episode_dir)):
68 | # continue if the file is not a npy file
69 | if not episode_fname.endswith('.npy'):
70 | continue
71 | ep = np.load(os.path.join(episode_dir,episode_fname), allow_pickle=True).item()
72 | self.episodes_npy.append(ep)
73 | # load the file and get the length
74 | eplen = len(ep['robot_qpos']) - self.ctx + 1
75 |
76 | assert eplen > 0, f"Episode length is less than the context length {self.ctx}"
77 |
78 | self.ep_lens.append(eplen)
79 |
80 | def scale_q(self, q):
81 | """
82 | Scale the proprioceptive data to be between -1 and 1.
83 | """
84 | q = (q - self.limits['lower']) / (self.limits['upper'] - self.limits['lower'])
85 | q = 2 * q - 1
86 | return q
87 |
88 | def change_order(self, q):
89 | IG_mapping = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 19, 20, 21, 22, 11, 12, 13, 14, 15, 16, 17, 18]
90 | return q[:,IG_mapping]
91 |
92 | def visualise(self):
93 | """
94 | Visualise the dataset.
95 | """
96 | cprint(f"Number of episodes: {len(self.episodes_npy)}",color='green',attrs=['bold'])
97 | cprint(f"Number of examples: {torch.sum(self.ep_lens)}",color='green',attrs=['bold'])
98 | # Load the first episode to get the dimension of the proprio and action
99 | ep = self.episodes_npy[0]
100 | cprint(f"Proprio dimension: {len(ep['robot_qpos'][0])}",color='green',attrs=['bold'])
101 | cprint(f"Action dimension: {len(ep['target_qpos'][0])}",color='green',attrs=['bold'])
102 |
103 | def __len__(self):
104 | """Returns the size of the dataset."""
105 | return torch.sum(self.ep_lens).item()
106 |
107 | def __getitem__(self, index):
108 | """
109 | Generates one sample of data.
110 |
111 | Args:
112 | index (int): The index of the item in the dataset
113 |
114 | Returns:
115 | sample (Any): The data sample corresponding to the given index.
116 | target (Any): The target corresponding to the given data sample.
117 | """
118 |
119 | ep_idx = torch.searchsorted(self.cumsum, index, right=True)
120 | # open the pickle file
121 | idx = index - torch.sum(self.ep_lens[:ep_idx])
122 | ep = self.episodes_npy[ep_idx]
123 | action_npy = np.stack(ep['target_qpos'][idx:idx+self.ctx])
124 | proprio_npy = np.stack(ep['robot_qpos'][idx:idx+self.ctx])
125 | # Put in IG order
126 | action = self.change_order(action_npy)
127 | proprio = self.change_order(proprio_npy)
128 | # Scale the proprioceptive data in [-1,1]
129 | # For the first 7 elements of the action vector, predict the residual with respect to the previous action
130 | if self.use_residuals:
131 | action_res = np.concatenate([np.zeros((1,action.shape[1])), np.diff(action, axis=0)], axis=0)
132 | action_res[0] = action[0] - proprio[0]
133 | action_res = action_res.astype(np.float32)
134 | action = action_res / self.dt
135 |
136 | if self.scale_proprio:
137 | proprio = self.scale_q(proprio)
138 | if self.scale_action:
139 | action = self.scale_q(action)
140 |
141 | obj_pc = np.stack(ep['object_pc'][idx:idx+self.ctx])
142 |
143 | return {
144 | 'proprio': proprio,
145 | 'action': action,
146 | 'obj_pc': obj_pc,
147 | 'timesteps': np.arange(self.ctx),
148 | }
149 |
150 |
151 | def collate_fn(batch):
152 |
153 | proprio = np.stack([item['proprio'] for item in batch])
154 | object_pc = np.stack([item['obj_pc'] for item in batch])
155 | action = np.stack([item['action'] for item in batch])
156 | timesteps = np.stack([item['timesteps'] for item in batch])
157 | attention_mask = None
158 |
159 | proprio = torch.tensor(proprio, dtype=torch.float32, requires_grad=False)
160 | object_pc = torch.tensor(object_pc, dtype=torch.float32, requires_grad=False)
161 | action = torch.tensor(action, dtype=torch.float32, requires_grad=False)
162 | timesteps = torch.tensor(timesteps, dtype=torch.long, requires_grad=False)
163 |
164 | return proprio, object_pc, action, timesteps, attention_mask
165 |
--------------------------------------------------------------------------------
/cfg/config.yaml:
--------------------------------------------------------------------------------
1 |
2 | # Task name - used to pick the class to load
3 | task_name: ${task.name}
4 | teacher_mode: False
5 | pc_input: True
6 | #shape
7 | shape: ""
8 | # if set to positive integer, overrides the default number of environments
9 | num_envs: 4096
10 | # seed - set to -1 to choose random seed
11 | seed: 0
12 | # set to True for deterministic performance
13 | torch_deterministic: False
14 |
15 | # set the maximum number of learning iterations to train for. overrides default per-environment setting
16 | max_iterations: ''
17 |
18 | ## Device config
19 | # 'physx' or 'flex'
20 | physics_engine: 'physx'
21 | # whether to use cpu or gpu pipeline
22 | pipeline: 'cpu'
23 | num_gpus: 1 # if 1, it will only use the gpu indicated below. Otherwise it will use num_gpus in order starting from zero (ignoring the gpu config below)
24 | # device for running physics simulation
25 | sim_device: 'cpu'
26 | # device to run RL
27 | rl_device: 'cpu'
28 | graphics_device_id: 0
29 |
30 | ## PhysX arguments
31 | num_threads: 4 # Number of worker threads per scene used by PhysX - for CPU PhysX only.
32 | solver_type: 1 # 0: pgs, 1: tgs
33 | num_subscenes: 4 # Splits the simulation into N physics scenes and runs each one in a separate thread
34 |
35 |
36 | # RLGames Arguments
37 | # test - if set, run policy in inference mode (requires setting checkpoint to load)
38 | test: False
39 | track_pose: False
40 | get_target_reference: False
41 | get_target_traj: False
42 | # save_jit - if Yes, it will save the Jit for execution on a real robot
43 | save_jit: False
44 | # used to set checkpoint path
45 | checkpoint: ''
46 | dagger_checkpoint: ''
47 | # set sigma when restoring network
48 | sigma: ''
49 | # set to True to use multi-gpu training
50 | multi_gpu: False
51 |
52 | wandb_activate: False
53 | wandb_group: ''
54 | wandb_name: AllegroKukaGraspingTest
55 | wandb_entity: 'himanshu_singh'
56 | wandb_project: 'isaacgym'
57 | wandb_tags: []
58 | wandb_logcode_dir: ''
59 |
60 | capture_video: False
61 | capture_video_freq: 1464
62 | capture_video_len: 100
63 | force_render: True
64 |
65 | # disables rendering
66 | headless: True
67 |
68 | # set default task and default training config based on task
69 | defaults:
70 | - _self_
71 | - task: AllegroXarmNew
72 | - train: ${task}PPO
73 | - pretrain: ${task}
74 |
75 | # set the directory where the output files get saved
76 | hydra:
77 | output_subdir: null
78 | run:
79 | dir: .
80 |
--------------------------------------------------------------------------------
/cfg/launcher/default.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/cfg/launcher/default.yaml
--------------------------------------------------------------------------------
/cfg/pretrain/AllegroXarmCabinet.yaml:
--------------------------------------------------------------------------------
1 | device: cuda:0
2 | wandb_name: "Pretrain_residuals_16ctx"
3 | model:
4 | proprio_dim: 23
5 | action_dim: 23
6 | pc_num: 100
7 | hidden_dim: 192
8 | max_ep_len: 4096
9 | max_length: null
10 | action_tanh: false
11 | context_length: 16
12 | n_layer: 4
13 | n_head: 4
14 | attn_pdrop: 0.0
15 | resid_pdrop: 0.0
16 | embd_pdrop: 0.0
17 | action_input: False
18 | scale_proprio: True
19 | full_autoregressive: True
20 | scale_action: True #these settings are for the working PolicyTransformer model
21 | test: False
22 | wandb_activate: False
23 | checkpoint: ''
24 | groundtruth_policy: ''
25 | load_trajectory: ''
26 | training:
27 | batch_size: 512
28 | modality_aligned: False
29 | use_pc_loss: False
30 | use_proprio_loss: False
31 | num_epochs: 100
32 | use_residuals: False
33 | num_workers: 16
34 | dt: 0.05 # 20Hz, control frequency to scale the action (if activated)
35 | lr: 0.0001
36 | add_proprio_noise: True
37 | add_action_noise: True
38 | noise_arm: 0.1
39 | noise_hand: 0.1
40 | add_data_driven_noise: False
41 | weight_decay: 0.01
42 | log_freq: 1000
43 | model_save_freq: 1000
44 | time_shift: 0
45 | model_save_dir: algo/pretrained/models
46 | root_dir: retarget_data/train
47 | load_checkpoint: False
48 | checkpoint_path: ''
49 | validation:
50 | root_dir: retarget_data/val
51 |
--------------------------------------------------------------------------------
/cfg/pretrain/AllegroXarmNew.yaml:
--------------------------------------------------------------------------------
1 | device: cuda:0
2 | wandb_name: "Pretrain_residuals_16ctx"
3 | model:
4 | proprio_dim: 23
5 | action_dim: 23
6 | pc_num: 100
7 | hidden_dim: 192
8 | max_ep_len: 4096
9 | max_length: null
10 | action_tanh: false
11 | context_length: 16
12 | n_layer: 4
13 | n_head: 4
14 | attn_pdrop: 0.0
15 | resid_pdrop: 0.0
16 | embd_pdrop: 0.0
17 | action_input: False
18 | scale_proprio: True
19 | full_autoregressive: True
20 | use_imagenet: False
21 | use_vit: False
22 | use_diffusion_policy: False
23 | use_r3m: False
24 | use_mvp_rgb: False
25 | use_r3m_depth: False
26 | use_vip: False
27 | diffusion_policy_horizon: None
28 | cache_all: False
29 | scale_action: True #these settings are for the working PolicyTransformer model
30 | test: False
31 | wandb_activate: False
32 | checkpoint: ''
33 | groundtruth_policy: ''
34 | load_trajectory: ''
35 | training:
36 | finetune_layernorm: False
37 | finetune_lastlayer: False
38 | batch_size: 256
39 | modality_aligned: False
40 | use_pc_loss: False
41 | use_proprio_loss: False
42 | num_epochs: 100
43 | use_residuals: False
44 | num_workers: 16
45 | dt: 0.05 # 20Hz, control frequency to scale the action (if activated)
46 | lr: 0.0001
47 | add_proprio_noise: True
48 | add_action_noise: True
49 | noise_arm: 0.1
50 | noise_hand: 0.1
51 | add_data_driven_noise: False
52 | weight_decay: 0.01
53 | log_freq: 3000
54 | model_save_freq: 10000
55 | time_shift: 0
56 | model_save_dir: algo/pretrained/models
57 | root_dir: retarget_data/train
58 | load_checkpoint: False
59 | checkpoint_path: ''
60 | validation:
61 | root_dir: retarget_data/val
62 |
63 | diffusion:
64 | num_inference_steps: 100
65 |
--------------------------------------------------------------------------------
/cfg/pretrain/AllegroXarmThrowing.yaml:
--------------------------------------------------------------------------------
1 | device: cuda:0
2 | wandb_name: "Pretrain_residuals_16ctx"
3 | model:
4 | proprio_dim: 23
5 | action_dim: 23
6 | pc_num: 100
7 | hidden_dim: 192
8 | max_ep_len: 4096
9 | max_length: null
10 | action_tanh: false
11 | context_length: 16
12 | n_layer: 4
13 | n_head: 4
14 | attn_pdrop: 0.0
15 | resid_pdrop: 0.0
16 | embd_pdrop: 0.0
17 | action_input: False
18 | scale_proprio: True
19 | full_autoregressive: True
20 | scale_action: True #these settings are for the working PolicyTransformer model
21 | test: False
22 | wandb_activate: False
23 | checkpoint: ''
24 | groundtruth_policy: ''
25 | load_trajectory: ''
26 | training:
27 | batch_size: 512
28 | modality_aligned: False
29 | use_pc_loss: False
30 | use_proprio_loss: False
31 | num_epochs: 100
32 | use_residuals: False
33 | num_workers: 16
34 | dt: 0.05 # 20Hz, control frequency to scale the action (if activated)
35 | lr: 0.0001
36 | add_proprio_noise: True
37 | add_action_noise: True
38 | noise_arm: 0.1
39 | noise_hand: 0.1
40 | add_data_driven_noise: False
41 | weight_decay: 0.01
42 | log_freq: 1000
43 | model_save_freq: 1000
44 | time_shift: 0
45 | model_save_dir: algo/pretrained/models
46 | root_dir: retarget_data/train
47 | load_checkpoint: False
48 | checkpoint_path: ''
49 | validation:
50 | root_dir: retarget_data/val
51 |
--------------------------------------------------------------------------------
/cfg/task/AllegroXarmCabinet.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 | - _self_
3 |
4 | name: AllegroXarmCabinet
5 |
6 | physics_engine: ${..physics_engine}
7 | asset_root: '../assets'
8 |
9 |
10 | env:
11 | subtask: ""
12 | throw_far: False
13 | bucket_in_front: False
14 | use_leap: False
15 | use_allegro: True
16 | urdfFolder: "ycb_real_inertia"
17 | # if given, will override the device setting in gym.
18 | #numEnvs: ${resolve_default:8192,${...num_envs}}
19 | numEnvs: ${...num_envs}
20 | envSpacing: 1.2
21 | episodeLength: 600 #change
22 | tablePosey: -0.15
23 | tablePosez: 0.023
24 | enableDebugVis: False
25 | enableVideoLog: False
26 | videoLogIdx: 0
27 | videoLogFreq: 20
28 | evalStats: False # extra evaluation-time statistics
29 | doSimpleObjects: True
30 | doVerySimpleObjects: False
31 | doDexYcbObjects: False
32 | useSavedInitPose: False
33 | limitArmDeltaTarget: True
34 | useRandomInitRot: False
35 | addZerosInPrivBuf: False
36 | usePoseRewardUnlifted: False
37 | usePoseRewardLifted: False
38 | leapFingers: ["fingertip", "fingertip_2", "fingertip_3", "thumb_fingertip"]
39 | leapDIP: ["dip", "dip_2", "dip_3", "thumb_dip"]
40 | initPoseVersion: v16
41 | useDIPFinger: False
42 | lowmem: False
43 | input_priv: True
44 | enableVhacd: True
45 | vhacdObjects: ['070-a','070-b','072','036','032','029','048','027','019','032','026']
46 | simpleObjects: ['002', '011', '036', '010', '025', '024', '005', '007']
47 |
48 | verysimpleObjects: ['002']
49 | DexYcbObjects: ['035','003','004','005','007','008','009','010','011', '021','024','025','002','036','037','040','051','052','061']
50 |
51 | clampAbsObservations: 10.0
52 | useOldActionSpace: False
53 | clampArmTarget: False
54 |
55 | stiffnessScale: 1.0
56 | forceLimitScale: 1.0
57 | useRelativeControl: False
58 | dofSpeedScale: 1.0
59 | actionsMovingAverage: 1.0
60 | controlFrequencyInv: 6 # 20 Hz
61 | jointVelocityLimit: 0.5
62 |
63 | resetPositionNoiseX: 0.1
64 | resetPositionNoiseY: 0.1
65 | resetPositionNoiseZ: 0.02
66 | resetRotationNoise: 1.0
67 | resetDofPosRandomIntervalFingers: 0.1
68 | resetDofPosRandomIntervalArm: 0.1
69 | resetDofVelRandomInterval: 0.
70 |
71 |
72 | pointCloudScale: 0.01
73 | # Random forces applied to the
74 | forceScale: 0.0
75 | forceProbRange: [0.001, 0.1]
76 | forceDecay: 0.99
77 | forceDecayInterval: 0.08
78 |
79 | resetOnArmCollision: False
80 | ArmTableCollisionThreshold: 10
81 | resetOnCollision: False
82 | ContactForceThreshold: 50
83 | resetOnFingerCrash: False
84 | FingerClearanceThreshold: 0.050
85 |
86 | liftingRewScale: 20.0
87 | goalHeight: 0.45
88 | handJointRewCoeff: 1 #work on this
89 | liftingBonus: 300.0
90 | liftingBonusThreshold: 0.10 # when the object is lifted this distance (in meters) above the table, the agent gets the lifting bonus
91 | keypointRewScale: 200.0
92 | useFingertipReward: True
93 | usePalmReward: False
94 | useLiftingReward: True
95 | useKeypointReward: True
96 | distanceDeltaRewScale: 50.0
97 | useFingertipShapeDistReward: False
98 | useHandJointPoseRew: False
99 |
100 |
101 | handleDistRewardScale: 0.0
102 | aroundHandleRewardScale: 0.0
103 | openBonusRewardScale: 2.0
104 | goalDistRewardScale: 6.0
105 | openPoseRewardScale: 0.0
106 | goalBonusRewardScale: 2.0
107 | actionPenaltyScale: 0.01
108 | fingerDistRewardScale: 0.04
109 | thumbDistRewardScale: 0.08
110 |
111 | reachGoalBonus: 1000.0
112 | kukaActionsPenaltyScale: 0.003
113 | allegroActionsPenaltyScale: 0.0003
114 | fallDistance: 0.24
115 | fallPenalty: 0.0
116 |
117 | privilegedActions: False
118 | privilegedActionsTorque: 0.02
119 |
120 | # Physics v1, pretty much default settings we used from the start of the project
121 | dofFriction: 1.0 # negative values are ignored and the default friction from URDF file is used
122 |
123 | # gain of PD controller.
124 | handStiffness: 40.0 #increasing stiffness leads to stiffer movements
125 | armStiffness: 1000 #40.0
126 | handVelocity: 10.0
127 | armVelocity: 10.0
128 |
129 | handEffort: 0.35 # this is what was used in sim-to-real experiment. Motor torque in Newton*meters
130 | # armEffort: [300, 300, 300, 300, 300, 300, 300] # see Physics v2
131 | armEffort: [500, 500, 500, 500, 500, 500, 500] # see Physics v2
132 |
133 | handDamping: 5 #increasing damping leads to less local oscillatory moment
134 | armDamping: 100 #5
135 |
136 | handArmature: 0
137 | armArmature: 0
138 |
139 | keypointScale: 1.5
140 | objectBaseSize: 0.05
141 | numPointCloud: 100
142 |
143 | randomizeObjectDimensions: True
144 | withSmallCuboids: True
145 | withBigCuboids: True
146 | withSticks: True
147 |
148 | objectType: "" #changing to ball only for now
149 | observationType: "full_state"
150 | successTolerance: 0.075
151 | targetSuccessTolerance: 0.01
152 | toleranceCurriculumIncrement: 0.9 # multiplicative
153 | toleranceCurriculumInterval: 3000 # in env steps across all agents, with 8192 this is 3000 * 8192 = 24.6M env steps
154 | maxConsecutiveSuccesses: 2
155 | successSteps: 50 # how many steps we should be within the tolerance before we declare a success
156 |
157 | saveStates: False
158 | saveStatesFile: "rootTensorsDofStates.bin"
159 |
160 | loadInitialStates: False
161 | loadStatesFile: "rootTensorsDofStates.bin"
162 | enableProprioHistory: True
163 | useObsAsProp: False
164 | enableActionHistory: True
165 | enableAttnMask: True
166 | enablePointCloud: True
167 | enableCameraSensors: False
168 | # set to True if you use camera sensors in the environment
169 | rgbd_camera:
170 | enable_depth: False
171 | enable_rgb: False
172 | render_slowness: 1
173 | camera_width: 60
174 | camera_height: 60
175 | buffer_width: 60
176 | buffer_height: 60
177 | fov: 60
178 | ss: 2
179 | num_cameras: 1
180 | intrinsics: 'utils/camera.json'
181 | randomize_camera_pose: 0.04 #in meters
182 | randomize_camera_rot: 5 #in degrees
183 | cam0:
184 | #pos: [0.20, -0.55, 0.65]
185 | #pos: [0.0, -0.31, 0.49]
186 | #pos: [0.12, -0.31, 0.55]
187 | pos: [0.12, -0.35, 0.60]
188 | target: [0.10, -0.25, 0.45]
189 | cam1:
190 | pos: [0.50, -0.15, 0.65]
191 | target: [0.0, -0.15, 0.6]
192 | wrist_camera: False
193 |
194 | stage2_hist_len: 16 # 3 seconds of history #GRU history not yet
195 |
196 | asset:
197 | # Whis was the original kuka_allegro asset.
198 | # This URDF has some issues, i.e. weights of fingers are too high and the mass of the Allegro hand is too
199 | # high in general. But in turn this leads to smoother movements and better looking behaviors.
200 | # Additionally, collision shapes of fingertips are more primitive (just rough convex hulls), which
201 | # gives a bit more FPS.
202 | kukaAllegro: "urdf/kuka_allegro_description/kuka_allegro_touch_sensor.urdf"
203 | FrankAllegro: "urdf/franka_description/allegro_hand_description/franka_panda_allegro.urdf"
204 | # Xarm7_allegro: 'new_asset/xarm7_description_new/xarm7_allegro.urdf' #"urdf/xarm7_color.urdf" #"urdf/xarm7_hand.urdf"
205 | Xarm7_allegro: 'urdf/xarm7_allegro_vertical/xarm7_allegro.urdf' #"urdf/xarm7_color.urdf" #"urdf/xarm7_hand.urdf"
206 | Xarm7_leap_hand: "urdf/xarm7_leap.urdf"
207 | # This is the URDF which has more accurate collision shapes and weights.
208 | # I believe since the hand is much lighter, the policy has more control over the movement of both arm and
209 | # fingers which leads to faster training (better sample efficiency). But overall the resulting
210 | # behaviors look too fast and a bit unrealistic.
211 | # For sim-to-real experiments this needs to be addressed. Overall, v2 is a "Better" URDF, and it should not
212 | # lead to behaviors that would be worse for sim-to-real experiments. Most likely the problem is elsewhere,
213 | # for example the max torques might be too high, or the armature of the motors is too low.
214 | # The exercise of finding the right URDF and other parameters is left for the sim-to-real part of the project.
215 | # kukaAllegro: "urdf/kuka_allegro_description/kuka_allegro_v2.urdf"
216 |
217 | task:
218 |
219 | do_random_resets: False
220 |
221 | domain_randomization:
222 | randomize_friction: False
223 | friction_lower_limit: 0.6
224 | friction_upper_limit: 1.2
225 |
226 | randomize_object_mass: False
227 | mass_lower_limit: 0.8
228 | mass_upper_limit: 1.2
229 |
230 | randomize_object_com: False
231 | com_lower_limit: -0.05
232 | com_upper_limit: 0.05
233 |
234 | randomize_table_position: False
235 | table_y_lower: 0.45
236 | table_y_upper: 0.55
237 | table_z_lower: 0.01
238 | table_z_upper: 0.05
239 |
240 | randomize_table_friction: False
241 | table_friction_lower_limit: 0.6
242 | table_friction_upper_limit: 1.2
243 |
244 |
245 | sim:
246 | substeps: 2
247 | dt: 0.00833 # 1/120
248 | up_axis: "z"
249 | use_gpu_pipeline: True #${eq:${...pipeline},"gpu"}
250 | num_client_threads: 8
251 |
252 | gravity: [0.0, 0.0, -9.81]
253 | physx:
254 | num_threads: 6
255 | solver_type: 1 # 0: pgs, 1: tgs
256 | num_position_iterations: 8
257 | num_velocity_iterations: 0
258 |
259 | max_gpu_contact_pairs: 8388608 # 8*1024*1024
260 | num_subscenes: ${....num_subscenes}
261 | contact_offset: 0.002
262 | rest_offset: 0.0
263 | bounce_threshold_velocity: 0.2
264 | max_depenetration_velocity: 1000.0
265 | default_buffer_size_multiplier: 25.0
266 | contact_collection: 1 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!)
267 |
--------------------------------------------------------------------------------
/cfg/task/AllegroXarmNew.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 | - _self_
3 |
4 | name: AllegroXarmNew
5 |
6 | physics_engine: ${..physics_engine}
7 | asset_root: '../assets'
8 |
9 |
10 | env:
11 | subtask: ""
12 | use_leap: False
13 | use_allegro: True
14 | urdfFolder: "ycb_real_inertia"
15 | # if given, will override the device setting in gym.
16 | #numEnvs: ${resolve_default:8192,${...num_envs}}
17 | numEnvs: ${...num_envs}
18 | envSpacing: 1.2
19 | episodeLength: 600 #change
20 | tablePosey: -0.15
21 | tablePosez: 0.023
22 | enableDebugVis: False
23 | enableVideoLog: False
24 | videoLogIdx: 0
25 | videoLogFreq: 20
26 | evalStats: False # extra evaluation-time statistics
27 | doSimpleObjects: True
28 | doVerySimpleObjects: False
29 | doDexYcbObjects: False
30 | useSavedInitPose: False
31 | limitArmDeltaTarget: True
32 | useRandomInitRot: False
33 | addZerosInPrivBuf: False
34 | usePoseRewardUnlifted: False
35 | usePoseRewardLifted: False
36 | leapFingers: ["fingertip", "fingertip_2", "fingertip_3", "thumb_fingertip"]
37 | leapDIP: ["dip", "dip_2", "dip_3", "thumb_dip"]
38 | initPoseVersion: v16
39 | useDIPFinger: False
40 | lowmem: False
41 | input_priv: True
42 | enableVhacd: True
43 | vhacdObjects: ['070-a','070-b','072','036','032','029','048','027','019','032','026']
44 | simpleObjects: ['002', '036', '010', '025', '024', '005'] #['021', '035', '036', '019'] #
45 |
46 | verysimpleObjects: ['002']
47 | DexYcbObjects: ['035','003','004','007','008','009','011', '021','037','040','051','052','061'] #['035','003','004','005','007','008','009','010','011', '021','024','025','002','036','037','040','051','052','061']
48 |
49 | clampAbsObservations: 10.0
50 | useOldActionSpace: False
51 | clampArmTarget: False
52 |
53 | stiffnessScale: 1.0
54 | forceLimitScale: 1.0
55 | useRelativeControl: False
56 | dofSpeedScale: 1.0
57 | actionsMovingAverage: 1.0
58 | controlFrequencyInv: 6 # 20 Hz
59 | jointVelocityLimit: 0.5
60 |
61 | resetPositionNoiseX: 0.1
62 | resetPositionNoiseY: 0.1
63 | resetPositionNoiseZ: 0.02
64 | resetRotationNoise: 1.0
65 | resetDofPosRandomIntervalFingers: 0.1
66 | resetDofPosRandomIntervalArm: 0.1
67 | resetDofVelRandomInterval: 0.
68 |
69 |
70 | pointCloudScale: 0.01
71 | # Random forces applied to the
72 | forceScale: 0.0
73 | forceProbRange: [0.8, 0.8]
74 | forceDecay: 0.99
75 | forceDecayInterval: 0.08
76 |
77 | resetOnArmCollision: False
78 | ArmTableCollisionThreshold: 10
79 | resetOnCollision: False
80 | ContactForceThreshold: 50
81 | resetOnFingerCrash: False
82 | FingerClearanceThreshold: 0.050
83 |
84 | liftingRewScale: 20.0
85 | goalHeight: 0.45
86 | handJointRewCoeff: 1 #work on this
87 | liftingBonus: 300.0
88 | liftingBonusThreshold: 0.10 # when the object is lifted this distance (in meters) above the table, the agent gets the lifting bonus
89 | keypointRewScale: 200.0
90 | useFingertipReward: True
91 | usePalmReward: False
92 | useLiftingReward: True
93 | useKeypointReward: True
94 | distanceDeltaRewScale: 50.0
95 | useFingertipShapeDistReward: False
96 | useHandJointPoseRew: False
97 |
98 | reachGoalBonus: 1000.0
99 | kukaActionsPenaltyScale: 0.003
100 | allegroActionsPenaltyScale: 0.0003
101 | fallDistance: 0.24
102 | fallPenalty: 0.0
103 |
104 | privilegedActions: False
105 | privilegedActionsTorque: 0.02
106 |
107 | # Physics v1, pretty much default settings we used from the start of the project
108 | dofFriction: 1.0 # negative values are ignored and the default friction from URDF file is used
109 |
110 | # gain of PD controller.
111 | handStiffness: 40.0 #increasing stiffness leads to stiffer movements
112 | armStiffness: 1000 #40.0
113 | handVelocity: 10.0
114 | armVelocity: 10.0
115 |
116 | handEffort: 0.35 # this is what was used in sim-to-real experiment. Motor torque in Newton*meters
117 | # armEffort: [300, 300, 300, 300, 300, 300, 300] # see Physics v2
118 | armEffort: [500, 500, 500, 500, 500, 500, 500] # see Physics v2
119 |
120 | handDamping: 5 #increasing damping leads to less local oscillatory moment
121 | armDamping: 100 #5
122 |
123 | handArmature: 0
124 | armArmature: 0
125 |
126 | keypointScale: 1.5
127 | objectBaseSize: 0.05
128 | numPointCloud: 100
129 |
130 | randomizeObjectDimensions: True
131 | withSmallCuboids: True
132 | withBigCuboids: True
133 | withSticks: True
134 |
135 | objectType: "" #changing to ball only for now
136 | observationType: "full_state"
137 | successTolerance: 0.075
138 | targetSuccessTolerance: 0.01
139 | toleranceCurriculumIncrement: 0.9 # multiplicative
140 | toleranceCurriculumInterval: 3000 # in env steps across all agents, with 8192 this is 3000 * 8192 = 24.6M env steps
141 | maxConsecutiveSuccesses: 2
142 | successSteps: 50 # how many steps we should be within the tolerance before we declare a success
143 |
144 | saveStates: False
145 | saveStatesFile: "rootTensorsDofStates.bin"
146 |
147 | loadInitialStates: False
148 | loadStatesFile: "rootTensorsDofStates.bin"
149 | enableProprioHistory: True
150 | useObsAsProp: False
151 | enableActionHistory: True
152 | enableAttnMask: True
153 | enablePointCloud: True
154 | enableCameraSensors: False
155 | # set to True if you use camera sensors in the environment
156 | rgbd_camera:
157 | enable_depth: False
158 | enable_rgb: False
159 | render_slowness: 1
160 | camera_width: 60
161 | camera_height: 60
162 | buffer_width: 60
163 | buffer_height: 60
164 | fov: 60
165 | ss: 2
166 | num_cameras: 1
167 | intrinsics: 'utils/camera2.json'
168 | randomize_camera_pose: 0.04 #in meters
169 | randomize_camera_rot: 5 #in degrees
170 | cam0:
171 | #pos: [0.20, -0.55, 0.65]
172 | #pos: [0.0, -0.31, 0.49]
173 | #pos: [0.12, -0.31, 0.55]
174 | pos: [0.12, -0.35, 0.60]
175 | target: [0.10, -0.25, 0.45]
176 | cam1:
177 | pos: [0.50, -0.15, 0.65]
178 | target: [0.0, -0.15, 0.6]
179 | wrist_camera: False
180 |
181 | stage2_hist_len: 16 # 3 seconds of history #GRU history not yet
182 |
183 | asset:
184 | # Whis was the original kuka_allegro asset.
185 | # This URDF has some issues, i.e. weights of fingers are too high and the mass of the Allegro hand is too
186 | # high in general. But in turn this leads to smoother movements and better looking behaviors.
187 | # Additionally, collision shapes of fingertips are more primitive (just rough convex hulls), which
188 | # gives a bit more FPS.
189 | kukaAllegro: "urdf/kuka_allegro_description/kuka_allegro_touch_sensor.urdf"
190 | FrankAllegro: "urdf/franka_description/allegro_hand_description/franka_panda_allegro.urdf"
191 | # Xarm7_allegro: 'new_asset/xarm7_description_new/xarm7_allegro.urdf' #"urdf/xarm7_color.urdf" #"urdf/xarm7_hand.urdf"
192 | Xarm7_allegro: 'urdf/xarm7_allegro_vertical/xarm7_allegro.urdf' #"urdf/xarm7_color.urdf" #"urdf/xarm7_hand.urdf"
193 | Xarm7_leap_hand: "urdf/xarm7_leap.urdf"
194 | # This is the URDF which has more accurate collision shapes and weights.
195 | # I believe since the hand is much lighter, the policy has more control over the movement of both arm and
196 | # fingers which leads to faster training (better sample efficiency). But overall the resulting
197 | # behaviors look too fast and a bit unrealistic.
198 | # For sim-to-real experiments this needs to be addressed. Overall, v2 is a "Better" URDF, and it should not
199 | # lead to behaviors that would be worse for sim-to-real experiments. Most likely the problem is elsewhere,
200 | # for example the max torques might be too high, or the armature of the motors is too low.
201 | # The exercise of finding the right URDF and other parameters is left for the sim-to-real part of the project.
202 | # kukaAllegro: "urdf/kuka_allegro_description/kuka_allegro_v2.urdf"
203 |
204 | task:
205 |
206 | do_random_resets: False
207 |
208 | domain_randomization:
209 | randomize_friction: False
210 | friction_lower_limit: 0.6
211 | friction_upper_limit: 1.2
212 |
213 | randomize_object_mass: False
214 | mass_lower_limit: 0.8
215 | mass_upper_limit: 1.2
216 |
217 | randomize_object_com: False
218 | com_lower_limit: -0.05
219 | com_upper_limit: 0.05
220 |
221 | randomize_table_position: False
222 | table_rnd_y: 0.02
223 | table_rnd_z: 0.02
224 | table_rnd_x: 0.02
225 |
226 | randomize_table_friction: False
227 | table_friction_lower_limit: 0.6
228 | table_friction_upper_limit: 1.2
229 |
230 |
231 | sim:
232 | substeps: 2
233 | dt: 0.00833 # 1/120
234 | up_axis: "z"
235 | use_gpu_pipeline: True #${eq:${...pipeline},"gpu"}
236 | num_client_threads: 8
237 |
238 | gravity: [0.0, 0.0, -9.81]
239 | physx:
240 | num_threads: 6
241 | solver_type: 1 # 0: pgs, 1: tgs
242 | num_position_iterations: 8
243 | num_velocity_iterations: 0
244 |
245 | max_gpu_contact_pairs: 8388608 # 8*1024*1024
246 | num_subscenes: ${....num_subscenes}
247 | contact_offset: 0.002
248 | rest_offset: 0.0
249 | bounce_threshold_velocity: 0.2
250 | max_depenetration_velocity: 1000.0
251 | default_buffer_size_multiplier: 25.0
252 | contact_collection: 1 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!)
253 |
--------------------------------------------------------------------------------
/cfg/task/AllegroXarmThrowing.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 | - _self_
3 |
4 | name: AllegroXarmThrowing
5 |
6 | physics_engine: ${..physics_engine}
7 | asset_root: '../assets'
8 |
9 |
10 | env:
11 | subtask: ""
12 | throw_far: False
13 | bucket_in_front: False
14 | use_leap: False
15 | use_allegro: True
16 | urdfFolder: "ycb_real_inertia"
17 | # if given, will override the device setting in gym.
18 | #numEnvs: ${resolve_default:8192,${...num_envs}}
19 | numEnvs: ${...num_envs}
20 | envSpacing: 1.2
21 | episodeLength: 600 #change
22 | tablePosey: -0.15
23 | tablePosez: 0.023
24 | enableDebugVis: False
25 | enableVideoLog: False
26 | videoLogIdx: 0
27 | videoLogFreq: 20
28 | evalStats: False # extra evaluation-time statistics
29 | doSimpleObjects: True
30 | doVerySimpleObjects: False
31 | doDexYcbObjects: False
32 | useSavedInitPose: False
33 | limitArmDeltaTarget: True
34 | useRandomInitRot: False
35 | addZerosInPrivBuf: False
36 | usePoseRewardUnlifted: False
37 | usePoseRewardLifted: False
38 | leapFingers: ["fingertip", "fingertip_2", "fingertip_3", "thumb_fingertip"]
39 | leapDIP: ["dip", "dip_2", "dip_3", "thumb_dip"]
40 | initPoseVersion: v16
41 | useDIPFinger: False
42 | lowmem: False
43 | input_priv: True
44 | enableVhacd: True
45 | vhacdObjects: ['070-a','070-b','072','036','032','029','048','027','019','032','026']
46 | simpleObjects: ['002', '011', '036', '010', '025', '024', '005', '007']
47 |
48 | verysimpleObjects: ['002']
49 | DexYcbObjects: ['035','003','004','005','007','008','009','010','011', '021','024','025','002','036','037','040','051','052','061']
50 |
51 | clampAbsObservations: 10.0
52 | useOldActionSpace: False
53 | clampArmTarget: False
54 |
55 | stiffnessScale: 1.0
56 | forceLimitScale: 1.0
57 | useRelativeControl: False
58 | dofSpeedScale: 1.0
59 | actionsMovingAverage: 1.0
60 | controlFrequencyInv: 6 # 20 Hz
61 | jointVelocityLimit: 0.5
62 |
63 | resetPositionNoiseX: 0.1
64 | resetPositionNoiseY: 0.1
65 | resetPositionNoiseZ: 0.02
66 | resetRotationNoise: 1.0
67 | resetDofPosRandomIntervalFingers: 0.1
68 | resetDofPosRandomIntervalArm: 0.1
69 | resetDofVelRandomInterval: 0.
70 |
71 |
72 | pointCloudScale: 0.01
73 | # Random forces applied to the
74 | forceScale: 0.0
75 | forceProbRange: [0.001, 0.1]
76 | forceDecay: 0.99
77 | forceDecayInterval: 0.08
78 |
79 | resetOnArmCollision: False
80 | ArmTableCollisionThreshold: 10
81 | resetOnCollision: False
82 | ContactForceThreshold: 50
83 | resetOnFingerCrash: False
84 | FingerClearanceThreshold: 0.050
85 |
86 | liftingRewScale: 20.0
87 | goalHeight: 0.45
88 | handJointRewCoeff: 1 #work on this
89 | liftingBonus: 300.0
90 | liftingBonusThreshold: 0.10 # when the object is lifted this distance (in meters) above the table, the agent gets the lifting bonus
91 | keypointRewScale: 200.0
92 | useFingertipReward: True
93 | usePalmReward: False
94 | useLiftingReward: True
95 | useKeypointReward: True
96 | distanceDeltaRewScale: 50.0
97 | useFingertipShapeDistReward: False
98 | useHandJointPoseRew: False
99 |
100 | reachGoalBonus: 1000.0
101 | kukaActionsPenaltyScale: 0.003
102 | allegroActionsPenaltyScale: 0.0003
103 | fallDistance: 0.24
104 | fallPenalty: 0.0
105 |
106 | privilegedActions: False
107 | privilegedActionsTorque: 0.02
108 |
109 | # Physics v1, pretty much default settings we used from the start of the project
110 | dofFriction: 1.0 # negative values are ignored and the default friction from URDF file is used
111 |
112 | # gain of PD controller.
113 | handStiffness: 40.0 #increasing stiffness leads to stiffer movements
114 | armStiffness: 1000 #40.0
115 | handVelocity: 10.0
116 | armVelocity: 10.0
117 |
118 | handEffort: 0.35 # this is what was used in sim-to-real experiment. Motor torque in Newton*meters
119 | # armEffort: [300, 300, 300, 300, 300, 300, 300] # see Physics v2
120 | armEffort: [500, 500, 500, 500, 500, 500, 500] # see Physics v2
121 |
122 | handDamping: 5 #increasing damping leads to less local oscillatory moment
123 | armDamping: 100 #5
124 |
125 | handArmature: 0
126 | armArmature: 0
127 |
128 | keypointScale: 1.5
129 | objectBaseSize: 0.05
130 | numPointCloud: 100
131 |
132 | randomizeObjectDimensions: True
133 | withSmallCuboids: True
134 | withBigCuboids: True
135 | withSticks: True
136 |
137 | objectType: "" #changing to ball only for now
138 | observationType: "full_state"
139 | successTolerance: 0.075
140 | targetSuccessTolerance: 0.01
141 | toleranceCurriculumIncrement: 0.9 # multiplicative
142 | toleranceCurriculumInterval: 3000 # in env steps across all agents, with 8192 this is 3000 * 8192 = 24.6M env steps
143 | maxConsecutiveSuccesses: 2
144 | successSteps: 50 # how many steps we should be within the tolerance before we declare a success
145 |
146 | saveStates: False
147 | saveStatesFile: "rootTensorsDofStates.bin"
148 |
149 | loadInitialStates: False
150 | loadStatesFile: "rootTensorsDofStates.bin"
151 | enableProprioHistory: True
152 | useObsAsProp: False
153 | enableActionHistory: True
154 | enableAttnMask: True
155 | enablePointCloud: True
156 | enableCameraSensors: False
157 | # set to True if you use camera sensors in the environment
158 | rgbd_camera:
159 | enable_depth: False
160 | enable_rgb: False
161 | render_slowness: 1
162 | camera_width: 60
163 | camera_height: 60
164 | buffer_width: 60
165 | buffer_height: 60
166 | fov: 60
167 | ss: 2
168 | num_cameras: 1
169 | intrinsics: 'utils/camera.json'
170 | randomize_camera_pose: 0.04 #in meters
171 | randomize_camera_rot: 5 #in degrees
172 | cam0:
173 | #pos: [0.20, -0.55, 0.65]
174 | #pos: [0.0, -0.31, 0.49]
175 | #pos: [0.12, -0.31, 0.55]
176 | pos: [0.12, -0.35, 0.60]
177 | target: [0.10, -0.25, 0.45]
178 | cam1:
179 | pos: [0.50, -0.15, 0.65]
180 | target: [0.0, -0.15, 0.6]
181 | wrist_camera: False
182 |
183 | stage2_hist_len: 16 # 3 seconds of history #GRU history not yet
184 |
185 | asset:
186 | # Whis was the original kuka_allegro asset.
187 | # This URDF has some issues, i.e. weights of fingers are too high and the mass of the Allegro hand is too
188 | # high in general. But in turn this leads to smoother movements and better looking behaviors.
189 | # Additionally, collision shapes of fingertips are more primitive (just rough convex hulls), which
190 | # gives a bit more FPS.
191 | kukaAllegro: "urdf/kuka_allegro_description/kuka_allegro_touch_sensor.urdf"
192 | FrankAllegro: "urdf/franka_description/allegro_hand_description/franka_panda_allegro.urdf"
193 | # Xarm7_allegro: 'new_asset/xarm7_description_new/xarm7_allegro.urdf' #"urdf/xarm7_color.urdf" #"urdf/xarm7_hand.urdf"
194 | Xarm7_allegro: 'urdf/xarm7_allegro_vertical/xarm7_allegro.urdf' #"urdf/xarm7_color.urdf" #"urdf/xarm7_hand.urdf"
195 | Xarm7_leap_hand: "urdf/xarm7_leap.urdf"
196 | # This is the URDF which has more accurate collision shapes and weights.
197 | # I believe since the hand is much lighter, the policy has more control over the movement of both arm and
198 | # fingers which leads to faster training (better sample efficiency). But overall the resulting
199 | # behaviors look too fast and a bit unrealistic.
200 | # For sim-to-real experiments this needs to be addressed. Overall, v2 is a "Better" URDF, and it should not
201 | # lead to behaviors that would be worse for sim-to-real experiments. Most likely the problem is elsewhere,
202 | # for example the max torques might be too high, or the armature of the motors is too low.
203 | # The exercise of finding the right URDF and other parameters is left for the sim-to-real part of the project.
204 | # kukaAllegro: "urdf/kuka_allegro_description/kuka_allegro_v2.urdf"
205 |
206 | task:
207 |
208 | do_random_resets: False
209 |
210 | domain_randomization:
211 | randomize_friction: False
212 | friction_lower_limit: 0.6
213 | friction_upper_limit: 1.2
214 |
215 | randomize_object_mass: False
216 | mass_lower_limit: 0.8
217 | mass_upper_limit: 1.2
218 |
219 | randomize_object_com: False
220 | com_lower_limit: -0.05
221 | com_upper_limit: 0.05
222 |
223 | randomize_table_position: False
224 | table_y_lower: 0.45
225 | table_y_upper: 0.55
226 | table_z_lower: 0.01
227 | table_z_upper: 0.05
228 |
229 | randomize_table_friction: False
230 | table_friction_lower_limit: 0.6
231 | table_friction_upper_limit: 1.2
232 |
233 |
234 | sim:
235 | substeps: 2
236 | dt: 0.00833 # 1/120
237 | up_axis: "z"
238 | use_gpu_pipeline: True #${eq:${...pipeline},"gpu"}
239 | num_client_threads: 8
240 |
241 | gravity: [0.0, 0.0, -9.81]
242 | physx:
243 | num_threads: 6
244 | solver_type: 1 # 0: pgs, 1: tgs
245 | num_position_iterations: 8
246 | num_velocity_iterations: 0
247 |
248 | max_gpu_contact_pairs: 8388608 # 8*1024*1024
249 | num_subscenes: ${....num_subscenes}
250 | contact_offset: 0.002
251 | rest_offset: 0.0
252 | bounce_threshold_velocity: 0.2
253 | max_depenetration_velocity: 1000.0
254 | default_buffer_size_multiplier: 25.0
255 | contact_collection: 1 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!)
256 |
--------------------------------------------------------------------------------
/cfg/train/AllegroXarmCabinetPPO.yaml:
--------------------------------------------------------------------------------
1 | seed: ${..seed}
2 | algo: PPO
3 | network:
4 | mlp:
5 | units: [512, 256, 128]
6 | priv_mlp:
7 | units: [256, 128, 8]
8 |
9 | pc_mlp:
10 | out_dim: 64
11 | units: [64,64]
12 |
13 | load_path: ${..checkpoint} # path to the checkpoint to load
14 |
15 | ppo:
16 | output_name: 'debug'
17 | normalize_input: True
18 | normalize_value: True
19 | normalize_pc: False
20 | normalize_proprio_hist: False
21 | value_bootstrap: True
22 | num_actors: ${...task.env.numEnvs}
23 | num_gradient_steps: ${...train.ppo.horizon_length}
24 | normalize_advantage: True
25 | gamma: 0.99
26 | tau: 0.95
27 | initEpsArm: 1.0
28 | initEpsHand: 1.0
29 | value_grads_to_pointnet: True
30 | point_cloud_input_to_value: False
31 | learning_rate: 1e-4
32 | kl_threshold: 0.02
33 | min_lr: 1e-6
34 | max_lr: 1e-4
35 | # PPO batch collection
36 | horizon_length: 10
37 | minibatch_size: 32768
38 | mini_epochs: 1
39 | # PPO loss setting
40 | clip_value: True
41 | critic_coef: 4
42 | entropy_coef: 0.0
43 | e_clip: 0.2
44 | bounds_loss_coef: 0.0001
45 | # grad clipping
46 | truncate_grads: True
47 | grad_norm: 1.0
48 | # snapshot setting
49 | save_best_after: 0
50 | save_frequency: 1250
51 | max_agent_steps: 5000000000
52 | critic_warmup_steps: -1
53 | # hora setting
54 | priv_info: False
55 | priv_info_dim: 9
56 | priv_info_embed_dim: 8
57 | proprio_adapt: False
58 | useMemoryEfficientBuffer: False
59 | dapg:
60 | l1: 0.1
61 | l2: 0.999
62 | dapg_threshold: 0.002
63 |
64 | wandb:
65 | activate: True
66 | entity: himanshu_singh
67 | project: grasping
68 |
--------------------------------------------------------------------------------
/cfg/train/AllegroXarmNewPPO.yaml:
--------------------------------------------------------------------------------
1 | # params:
2 | # seed: ${...seed}
3 |
4 | # algo:
5 | # name: a2c_continuous
6 |
7 | # model:
8 | # name: continuous_a2c_logstd
9 |
10 | # network:
11 | # name: a2c_pointnet
12 | # separate: False
13 |
14 | # space:
15 | # continuous:
16 | # mu_activation: None
17 | # sigma_activation: None
18 | # mu_init:
19 | # name: default
20 | # sigma_init:
21 | # name: const_initializer
22 | # val: 0
23 | # fixed_sigma: True
24 |
25 | # mlp:
26 | # units: [1024, 1024, 512, 512]
27 | # activation: elu
28 | # d2rl: False
29 | # initializer:
30 | # name: default
31 | # regularizer:
32 | # name: None
33 |
34 | # load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
35 | # load_path: ${...checkpoint} # path to the checkpoint to load
36 |
37 | # config:
38 | # name: ${resolve_default:AllegroKukaPPO,${....experiment}}
39 | # # full_experiment_name: ${.name}
40 | # env_name: rlgpu
41 | # multi_gpu: ${....multi_gpu}
42 | # ppo: True
43 | # mixed_precision: True
44 | # normalize_input: True
45 | # normalize_value: True
46 | # normalize_advantage: True
47 | # reward_shaper:
48 | # scale_value: 0.01
49 |
50 | # num_actors: ${....task.env.numEnvs}
51 | # gamma: 0.99
52 | # tau: 0.95
53 | # learning_rate: 1e-4
54 | # lr_schedule: adaptive
55 | # schedule_type: standard
56 | # kl_threshold: 0.016
57 | # score_to_win: 1000000
58 | # max_epochs: 100000
59 | # max_frames: 10_000_000_000
60 | # save_best_after: 100
61 | # save_frequency: 5000
62 | # print_stats: True
63 | # grad_norm: 1.0
64 | # entropy_coef: 0.0
65 | # truncate_grads: True
66 | # e_clip: 0.1
67 | # minibatch_size: 8192
68 | # mini_epochs: 4
69 | # critic_coef: 4.0
70 | # clip_value: True
71 | # horizon_length: 16
72 | # seq_length: 16
73 |
74 | # # SampleFactory currently gives better results without bounds loss but I don't think this loss matters too much
75 | # # bounds_loss_coef: 0.0
76 | # bounds_loss_coef: 0.0001
77 |
78 | # # optimize summaries to prevent tf.event files from growing to gigabytes
79 | # defer_summaries_sec: 5
80 | # summaries_interval_sec_min: 5
81 | # summaries_interval_sec_max: 300
82 |
83 | # player:
84 | # #render: True
85 | # deterministic: False # be careful there's a typo in older versions of rl_games in this parameter name ("determenistic")
86 | # games_num: 100000
87 | # print_stats: False
88 | seed: ${..seed}
89 | algo: PPOTransformer
90 | network:
91 | mlp:
92 | units: [512, 256, 128]
93 | priv_mlp:
94 | units: [256, 128, 8]
95 |
96 | pc_mlp:
97 | out_dim: 64
98 | units: [64,64]
99 |
100 | load_path: ${..checkpoint} # path to the checkpoint to load
101 |
102 | ppo:
103 | output_name: 'debug'
104 | normalize_input: True
105 | normalize_value: True
106 | normalize_pc: False
107 | normalize_proprio_hist: False
108 | value_bootstrap: True
109 | num_actors: ${...task.env.numEnvs}
110 | num_gradient_steps: ${...train.ppo.horizon_length}
111 | normalize_advantage: True
112 | gamma: 0.99
113 | tau: 0.95
114 | initEpsArm: 1.0
115 | initEpsHand: 1.0
116 | value_grads_to_pointnet: True
117 | point_cloud_input_to_value: True
118 | learning_rate: 1e-4
119 | kl_threshold: 0.02
120 | min_lr: 1e-6
121 | max_lr: 1e-4
122 | # PPO batch collection
123 | horizon_length: 10
124 | minibatch_size: 4096
125 | mini_epochs: 1
126 | # PPO loss setting
127 | clip_value: True
128 | critic_coef: 4
129 | entropy_coef: 0.0
130 | e_clip: 0.2
131 | bounds_loss_coef: 0.0001
132 | # grad clipping
133 | truncate_grads: True
134 | grad_norm: 1.0
135 | # snapshot setting
136 | save_best_after: 0
137 | save_frequency: 1250
138 | max_agent_steps: 5000000000
139 | critic_warmup_steps: -1
140 | # hora setting
141 | priv_info: False
142 | priv_info_dim: 9
143 | priv_info_embed_dim: 8
144 | proprio_adapt: False
145 | useMemoryEfficientBuffer: False
146 | dapg:
147 | l1: 0.1
148 | l2: 0.999
149 | dapg_threshold: 0.002
150 |
151 | wandb:
152 | activate: True
153 | entity: himanshu_singh
154 | project: grasping
155 |
--------------------------------------------------------------------------------
/cfg/train/AllegroXarmThrowingPPO.yaml:
--------------------------------------------------------------------------------
1 | # params:
2 | # seed: ${...seed}
3 |
4 | # algo:
5 | # name: a2c_continuous
6 |
7 | # model:
8 | # name: continuous_a2c_logstd
9 |
10 | # network:
11 | # name: a2c_pointnet
12 | # separate: False
13 |
14 | # space:
15 | # continuous:
16 | # mu_activation: None
17 | # sigma_activation: None
18 | # mu_init:
19 | # name: default
20 | # sigma_init:
21 | # name: const_initializer
22 | # val: 0
23 | # fixed_sigma: True
24 |
25 | # mlp:
26 | # units: [1024, 1024, 512, 512]
27 | # activation: elu
28 | # d2rl: False
29 | # initializer:
30 | # name: default
31 | # regularizer:
32 | # name: None
33 |
34 | # load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
35 | # load_path: ${...checkpoint} # path to the checkpoint to load
36 |
37 | # config:
38 | # name: ${resolve_default:AllegroKukaPPO,${....experiment}}
39 | # # full_experiment_name: ${.name}
40 | # env_name: rlgpu
41 | # multi_gpu: ${....multi_gpu}
42 | # ppo: True
43 | # mixed_precision: True
44 | # normalize_input: True
45 | # normalize_value: True
46 | # normalize_advantage: True
47 | # reward_shaper:
48 | # scale_value: 0.01
49 |
50 | # num_actors: ${....task.env.numEnvs}
51 | # gamma: 0.99
52 | # tau: 0.95
53 | # learning_rate: 1e-4
54 | # lr_schedule: adaptive
55 | # schedule_type: standard
56 | # kl_threshold: 0.016
57 | # score_to_win: 1000000
58 | # max_epochs: 100000
59 | # max_frames: 10_000_000_000
60 | # save_best_after: 100
61 | # save_frequency: 5000
62 | # print_stats: True
63 | # grad_norm: 1.0
64 | # entropy_coef: 0.0
65 | # truncate_grads: True
66 | # e_clip: 0.1
67 | # minibatch_size: 8192
68 | # mini_epochs: 4
69 | # critic_coef: 4.0
70 | # clip_value: True
71 | # horizon_length: 16
72 | # seq_length: 16
73 |
74 | # # SampleFactory currently gives better results without bounds loss but I don't think this loss matters too much
75 | # # bounds_loss_coef: 0.0
76 | # bounds_loss_coef: 0.0001
77 |
78 | # # optimize summaries to prevent tf.event files from growing to gigabytes
79 | # defer_summaries_sec: 5
80 | # summaries_interval_sec_min: 5
81 | # summaries_interval_sec_max: 300
82 |
83 | # player:
84 | # #render: True
85 | # deterministic: False # be careful there's a typo in older versions of rl_games in this parameter name ("determenistic")
86 | # games_num: 100000
87 | # print_stats: False
88 | seed: ${..seed}
89 | algo: PPO
90 | network:
91 | mlp:
92 | units: [512, 256, 128]
93 | priv_mlp:
94 | units: [256, 128, 8]
95 |
96 | pc_mlp:
97 | out_dim: 64
98 | units: [64,64]
99 |
100 | load_path: ${..checkpoint} # path to the checkpoint to load
101 |
102 | ppo:
103 | output_name: 'debug'
104 | normalize_input: True
105 | normalize_value: True
106 | normalize_pc: False
107 | normalize_proprio_hist: False
108 | value_bootstrap: True
109 | num_actors: ${...task.env.numEnvs}
110 | num_gradient_steps: ${...train.ppo.horizon_length}
111 | normalize_advantage: True
112 | gamma: 0.99
113 | tau: 0.95
114 | initEpsArm: 1.0
115 | initEpsHand: 1.0
116 | value_grads_to_pointnet: True
117 | point_cloud_input_to_value: True
118 | learning_rate: 1e-4
119 | kl_threshold: 0.02
120 | min_lr: 1e-6
121 | max_lr: 1e-4
122 | # PPO batch collection
123 | horizon_length: 10
124 | minibatch_size: 32768
125 | mini_epochs: 1
126 | # PPO loss setting
127 | clip_value: True
128 | critic_coef: 4
129 | entropy_coef: 0.0
130 | e_clip: 0.2
131 | bounds_loss_coef: 0.0001
132 | # grad clipping
133 | truncate_grads: True
134 | grad_norm: 1.0
135 | # snapshot setting
136 | save_best_after: 0
137 | save_frequency: 1250
138 | max_agent_steps: 5000000000
139 | critic_warmup_steps: -1
140 | # hora setting
141 | priv_info: False
142 | priv_info_dim: 9
143 | priv_info_embed_dim: 8
144 | proprio_adapt: False
145 | useMemoryEfficientBuffer: False
146 | dapg:
147 | l1: 0.1
148 | l2: 0.999
149 | dapg_threshold: 0.002
150 |
151 | wandb:
152 | activate: True
153 | entity: himanshu_singh
154 | project: grasping
155 |
--------------------------------------------------------------------------------
/env.yml:
--------------------------------------------------------------------------------
1 | name: rlgpu
2 | channels:
3 | - pytorch3d
4 | - pytorch
5 | - conda-forge
6 | - defaults
7 | dependencies:
8 | - _libgcc_mutex=0.1=conda_forge
9 | - _openmp_mutex=4.5=2_kmp_llvm
10 | - absl-py=2.1.0=pyhd8ed1ab_0
11 | - aiohttp=3.7.4.post0=py37h5e8e339_1
12 | - antlr-python-runtime=4.9.3=pyhd8ed1ab_1
13 | - appdirs=1.4.4=pyhd3eb1b0_0
14 | - async-timeout=3.0.1=py_1000
15 | - attrs=23.2.0=pyh71513ae_0
16 | - backcall=0.2.0=pyh9f0ad1d_0
17 | - backports=1.0=pyhd8ed1ab_3
18 | - backports.functools_lru_cache=2.0.0=pyhd8ed1ab_0
19 | - blas=1.0=mkl
20 | - blas-devel=3.9.0=16_linux64_mkl
21 | - blinker=1.6.3=pyhd8ed1ab_0
22 | - brotli=1.0.9=h5eee18b_7
23 | - brotli-bin=1.0.9=h5eee18b_7
24 | - brotli-python=1.0.9=py37hd23a5d3_7
25 | - bzip2=1.0.8=hd590300_5
26 | - c-ares=1.28.1=hd590300_0
27 | - ca-certificates=2024.7.2=h06a4308_0
28 | - cachetools=5.3.3=pyhd8ed1ab_0
29 | - certifi=2024.2.2=pyhd8ed1ab_0
30 | - cffi=1.15.1=py37h43b0acd_1
31 | - chardet=4.0.0=py37h89c1867_3
32 | - charset-normalizer=3.3.2=pyhd8ed1ab_0
33 | - click=8.1.3=py37h89c1867_0
34 | - cloudpickle=2.0.0=pyhd3eb1b0_0
35 | - cryptography=38.0.2=py37h5994e8b_1
36 | - cudatoolkit=11.1.1=hb139c0e_13
37 | - cycler=0.11.0=pyhd3eb1b0_0
38 | - dataclasses=0.8=pyh6d0b6a4_7
39 | - dbus=1.13.18=hb2f20db_0
40 | - debugpy=1.6.3=py37hd23a5d3_0
41 | - docker-pycreds=0.4.0=pyhd3eb1b0_0
42 | - einops=0.6.1=pyhd8ed1ab_0
43 | - entrypoints=0.4=pyhd8ed1ab_0
44 | - expat=2.5.0=h6a678d5_0
45 | - filelock=3.9.0=py37h06a4308_0
46 | - fontconfig=2.14.1=h52c9d5c_1
47 | - fonttools=4.25.0=pyhd3eb1b0_0
48 | - freetype=2.12.1=h267a509_2
49 | - fvcore=0.1.5.post20221221=pyhd8ed1ab_0
50 | - giflib=5.2.1=h5eee18b_3
51 | - gitdb=4.0.7=pyhd3eb1b0_0
52 | - gitpython=3.1.30=py37h06a4308_0
53 | - glib=2.78.4=h6a678d5_0
54 | - glib-tools=2.78.4=h6a678d5_0
55 | - gmp=6.3.0=h59595ed_1
56 | - gnutls=3.6.13=h85f3911_1
57 | - google-auth=2.23.0=pyh1a96a4e_0
58 | - google-auth-oauthlib=0.4.6=pyhd8ed1ab_0
59 | - grpc-cpp=1.48.1=h30feacc_1
60 | - grpcio=1.48.1=py37he7b19e7_1
61 | - gst-plugins-base=1.14.1=h6a678d5_1
62 | - gstreamer=1.14.1=h5eee18b_1
63 | - hydra-core=1.3.2=pyhd8ed1ab_0
64 | - icu=58.2=he6710b0_3
65 | - idna=3.6=pyhd8ed1ab_0
66 | - importlib-metadata=4.11.4=py37h89c1867_0
67 | - importlib_metadata=4.11.4=hd8ed1ab_0
68 | - importlib_resources=5.2.0=pyhd3eb1b0_1
69 | - intel-openmp=2022.1.0=h9e868ea_3769
70 | - iopath=0.1.9=pyhd8ed1ab_0
71 | - ipykernel=6.16.2=pyh210e3f2_0
72 | - ipython=7.33.0=py37h89c1867_0
73 | - jedi=0.19.1=pyhd8ed1ab_0
74 | - joblib=1.1.1=py37h06a4308_0
75 | - jpeg=9b=h024ee3a_2
76 | - jupyter_client=7.4.9=pyhd8ed1ab_0
77 | - jupyter_core=4.11.1=py37h89c1867_0
78 | - kiwisolver=1.4.4=py37h6a678d5_0
79 | - lame=3.100=h166bdaf_1003
80 | - lcms2=2.12=h3be6417_0
81 | - ld_impl_linux-64=2.40=h41732ed_0
82 | - libabseil=20220623.0=cxx17_h05df665_6
83 | - libblas=3.9.0=16_linux64_mkl
84 | - libbrotlicommon=1.0.9=h5eee18b_7
85 | - libbrotlidec=1.0.9=h5eee18b_7
86 | - libbrotlienc=1.0.9=h5eee18b_7
87 | - libcblas=3.9.0=16_linux64_mkl
88 | - libffi=3.4.2=h7f98852_5
89 | - libgcc-ng=13.2.0=h807b86a_5
90 | - libgfortran-ng=13.2.0=h69a702a_5
91 | - libgfortran5=13.2.0=ha4646dd_5
92 | - libglib=2.78.4=hdc74915_0
93 | - libhwloc=2.8.0=h32351e8_1
94 | - libiconv=1.17=hd590300_2
95 | - liblapack=3.9.0=16_linux64_mkl
96 | - liblapacke=3.9.0=16_linux64_mkl
97 | - libnsl=2.0.1=hd590300_0
98 | - libpng=1.6.43=h2797004_0
99 | - libprotobuf=3.21.8=h6239696_0
100 | - libsodium=1.0.18=h36c2ea0_1
101 | - libsqlite=3.45.2=h2797004_0
102 | - libstdcxx-ng=13.2.0=h7e041cc_5
103 | - libtiff=4.2.0=h85742a9_0
104 | - libuuid=1.41.5=h5eee18b_0
105 | - libuv=1.48.0=hd590300_0
106 | - libwebp=1.2.0=h89dd481_0
107 | - libwebp-base=1.2.0=h27cfd23_0
108 | - libxcb=1.15=h7f8727e_0
109 | - libxml2=2.9.14=h74e7548_0
110 | - libzlib=1.2.13=hd590300_5
111 | - llvm-openmp=14.0.6=h9e868ea_0
112 | - lz4-c=1.9.4=h6a678d5_0
113 | - markdown=3.6=pyhd8ed1ab_0
114 | - markupsafe=2.1.1=py37h540881e_1
115 | - matplotlib=3.5.3=py37h06a4308_0
116 | - matplotlib-base=3.5.3=py37hf590b9c_0
117 | - matplotlib-inline=0.1.7=pyhd8ed1ab_0
118 | - mkl=2022.1.0=hc2b9512_224
119 | - mkl-devel=2022.1.0=h66538d2_224
120 | - mkl-include=2022.1.0=h06a4308_224
121 | - multidict=6.0.2=py37h540881e_1
122 | - munkres=1.1.4=py_0
123 | - ncurses=6.4.20240210=h59595ed_0
124 | - nest-asyncio=1.6.0=pyhd8ed1ab_0
125 | - nettle=3.6=he412f7d_0
126 | - numpy=1.21.6=py37h976b520_0
127 | - oauthlib=3.2.2=pyhd8ed1ab_0
128 | - olefile=0.47=pyhd8ed1ab_0
129 | - omegaconf=2.3.0=pyhd8ed1ab_0
130 | - openh264=2.1.1=h780b84a_0
131 | - openssl=3.2.1=hd590300_1
132 | - packaging=22.0=py37h06a4308_0
133 | - parso=0.8.4=pyhd8ed1ab_0
134 | - pathtools=0.1.2=pyhd3eb1b0_1
135 | - pcre2=10.42=hebb0a14_0
136 | - pexpect=4.9.0=pyhd8ed1ab_0
137 | - pickleshare=0.7.5=py_1003
138 | - pip=24.0=pyhd8ed1ab_0
139 | - portalocker=2.3.0=py37h06a4308_0
140 | - prompt-toolkit=3.0.42=pyha770c72_0
141 | - psutil=5.9.0=py37h5eee18b_0
142 | - ptyprocess=0.7.0=pyhd3deb0d_0
143 | - pyasn1=0.5.1=pyhd8ed1ab_0
144 | - pyasn1-modules=0.3.0=pyhd8ed1ab_0
145 | - pycparser=2.21=pyhd8ed1ab_0
146 | - pygments=2.17.2=pyhd8ed1ab_0
147 | - pyjwt=2.8.0=pyhd8ed1ab_1
148 | - pyopenssl=23.2.0=pyhd8ed1ab_1
149 | - pyparsing=3.0.9=py37h06a4308_0
150 | - pyqt=5.6.0=py37h22d08a2_6
151 | - pysocks=1.7.1=py37h89c1867_5
152 | - python=3.7.12=hf930737_100_cpython
153 | - python-dateutil=2.8.2=pyhd3eb1b0_0
154 | - python_abi=3.7=4_cp37m
155 | - pytorch=1.8.1=py3.7_cuda11.1_cudnn8.0.5_0
156 | - pytorch3d=0.7.0=py37_cu111_pyt181
157 | - pyu2f=0.1.5=pyhd8ed1ab_0
158 | - pyyaml=6.0=py37h540881e_4
159 | - pyzmq=24.0.1=py37h0c0c2a8_0
160 | - qt=5.6.3=h8bf5577_3
161 | - re2=2022.06.01=h27087fc_1
162 | - readline=8.2=h8228510_1
163 | - regex=2022.7.9=py37h5eee18b_0
164 | - requests=2.31.0=pyhd8ed1ab_0
165 | - requests-oauthlib=2.0.0=pyhd8ed1ab_0
166 | - rsa=4.9=pyhd8ed1ab_0
167 | - ruamel=1.0=py37h06a4308_2
168 | - ruamel.yaml=0.17.21=py37h5eee18b_0
169 | - ruamel.yaml.clib=0.2.6=py37h5eee18b_1
170 | - scipy=1.7.3=py37hf2a6cf1_0
171 | - sentry-sdk=1.9.0=py37h06a4308_0
172 | - setproctitle=1.2.2=py37h27cfd23_1004
173 | - setuptools=69.0.3=pyhd8ed1ab_0
174 | - sip=4.18.1=py37h295c915_2
175 | - six=1.16.0=pyh6c4a22f_0
176 | - smmap=4.0.0=pyhd3eb1b0_0
177 | - sqlite=3.45.2=h2c6b66d_0
178 | - tabulate=0.8.10=py37h06a4308_0
179 | - tbb=2021.8.0=hdb19cb5_0
180 | - tensorboard=2.11.2=pyhd8ed1ab_0
181 | - tensorboard-data-server=0.6.1=py37h52d8a92_0
182 | - tensorboard-plugin-wit=1.8.1=pyhd8ed1ab_0
183 | - termcolor=2.1.0=py37h06a4308_0
184 | - tk=8.6.13=noxft_h4845f30_101
185 | - tokenizers=0.13.1=py37hfb4b0a8_0
186 | - torchvision=0.9.1=py37_cu111
187 | - tornado=6.2=py37h5eee18b_0
188 | - tqdm=4.64.1=py37h06a4308_0
189 | - traitlets=5.9.0=pyhd8ed1ab_0
190 | - transformers=4.24.0=py37h06a4308_0
191 | - typing-extensions=4.7.1=hd8ed1ab_0
192 | - typing_extensions=4.7.1=pyha770c72_0
193 | - urllib3=1.26.18=pyhd8ed1ab_0
194 | - wcwidth=0.2.10=pyhd8ed1ab_0
195 | - werkzeug=2.2.3=pyhd8ed1ab_0
196 | - wheel=0.42.0=pyhd8ed1ab_0
197 | - xlrd=2.0.1=pyhd3eb1b0_1
198 | - xz=5.2.6=h166bdaf_0
199 | - yacs=0.1.6=pyhd3eb1b0_1
200 | - yaml=0.2.5=h7f98852_2
201 | - yarl=1.7.2=py37h540881e_2
202 | - zeromq=4.3.5=h59595ed_1
203 | - zipp=3.15.0=pyhd8ed1ab_0
204 | - zlib=1.2.13=hd590300_5
205 | - zstd=1.4.9=haebb681_0
206 | - pip:
207 | - backports-cached-property==1.0.2
208 | - decorator==4.4.2
209 | - diffusers==0.21.4
210 | - docstring-parser==0.16
211 | - eval-type-backport==0.1.3
212 | - ffmpeg==1.4
213 | - freetype-py==2.4.0
214 | - fsspec==2023.1.0
215 | - gym==0.23.1
216 | - gym-notices==0.0.8
217 | - h5py==3.8.0
218 | - huggingface-hub==0.16.4
219 | - imageio==2.19.2
220 | - imageio-ffmpeg==0.4.9
221 | - jinja2==3.1.4
222 | - lxml==5.2.2
223 | - markdown-it-py==2.2.0
224 | - mdurl==0.1.2
225 | - mediapy==1.1.2
226 | - moviepy==1.0.3
227 | - natsort==8.4.0
228 | - networkx==2.2
229 | - ninja==1.11.1.1
230 | - nltk==3.8.1
231 | - numexpr==2.8.6
232 | - opencv-python==4.9.0.80
233 | - palettable==3.3.3
234 | - pandas==1.3.5
235 | - pillow==9.5.0
236 | - platformdirs==4.0.0
237 | - proglog==0.1.10
238 | - promise==2.3
239 | - protobuf==3.20.3
240 | - pycollada==0.6
241 | - pyglet==2.0.10
242 | - pyopengl==3.1.0
243 | - pyrender==0.1.45
244 | - pysdf==0.1.9
245 | - pytz==2024.1
246 | - pyvirtualdisplay==3.0
247 | - rich==13.7.1
248 | - rl-games==1.6.1
249 | - safetensors==0.4.3
250 | - scikit-learn==1.0.2
251 | - seaborn==0.12.2
252 | - sentence-transformers==2.2.2
253 | - sentencepiece==0.2.0
254 | - shortuuid==1.0.13
255 | - shtab==1.7.1
256 | - tables==3.7.0
257 | - tensorboardx==2.6.2.2
258 | - threadpoolctl==3.1.0
259 | - transforms3d==0.4.1
260 | - trimesh==3.23.5
261 | - tyro==0.8.4
262 | - urdfpy==0.0.22
263 | - wandb==0.17.0
264 | - warp-lang==0.10.1
265 | prefix: /home/himanshu/anaconda3/envs/rlgpu
266 |
--------------------------------------------------------------------------------
/imgs/approach.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/imgs/approach.png
--------------------------------------------------------------------------------
/scripts/finetune.py:
--------------------------------------------------------------------------------
1 | import isaacgym
2 | import os
3 | import hydra
4 | import datetime
5 | from termcolor import cprint
6 | from omegaconf import DictConfig, OmegaConf
7 | from hydra.utils import to_absolute_path
8 | import wandb
9 | from algo.ppo_transformer.ppo_transformer import PPOTransformer
10 | from tasks import isaacgym_task_map
11 | from utils.reformat import omegaconf_to_dict, print_dict
12 | from utils.utils import set_np_formatting, set_seed, git_hash, git_diff_config
13 | from utils.logger import Logger
14 | import torch
15 | import torch.distributed as dist
16 | import torch.multiprocessing as mp
17 |
18 | def main(rank, world_size, config):
19 |
20 | print(config.task_name)
21 | if world_size > 1:
22 | dist.init_process_group("nccl", rank=rank, world_size=world_size)
23 | global_rank = rank
24 | seed = config.seed + global_rank
25 | else:
26 | global_rank = rank
27 | seed = config.seed
28 |
29 | if config.checkpoint:
30 | config.checkpoint = to_absolute_path(config.checkpoint)
31 |
32 | # set numpy formatting for printing only
33 | set_np_formatting()
34 |
35 | # sets seed. if seed is -1 will pick a random one
36 | _ = set_seed(seed)
37 |
38 | print(f"global_rank = {global_rank} seed = {seed}")
39 |
40 | if config.wandb_activate and not config.test and (global_rank == 0 or world_size ==1):
41 | wandb_logger = wandb.init(project=config.wandb_project, name=config.wandb_name, config=omegaconf_to_dict(config))
42 | else:
43 | wandb_logger=None
44 |
45 | if (global_rank == 0 or world_size == 1):
46 | output_dif = os.path.join('outputs', config.wandb_name)
47 | logger = Logger(output_dif, summary_writer=wandb_logger)
48 | else:
49 | logger = None
50 |
51 | cprint('Start Building the Environment', 'green', attrs=['bold'])
52 |
53 |
54 | if config.num_gpus > 1:
55 | rl_device = f'cuda:{global_rank}'
56 | sim_device = f'cuda:{global_rank}'
57 | graphics_id = global_rank
58 | else:
59 | rl_device = config.rl_device
60 | sim_device = config.sim_device
61 | graphics_id = config.graphics_device_id
62 |
63 | env = isaacgym_task_map[config.task_name](
64 | cfg=omegaconf_to_dict(config.task),
65 | rl_device = rl_device,
66 | sim_device=sim_device,
67 | graphics_device_id=graphics_id,
68 | headless=config.headless,
69 | virtual_screen_capture=config.capture_video,
70 | force_render=config.force_render,
71 | )
72 |
73 | #for debugging
74 | if config.train.algo == 'PPOTransformer':
75 | if env.use_obs_as_prop:
76 | config.pretrain.model.proprio_dim = env.full_state_size
77 | config.train.network = config.pretrain.model
78 | config.task.env.stage2_hist_len = config.pretrain.model.context_length
79 | # Load the model to finetune
80 |
81 |
82 | agent = eval(config.train.algo)(env, config=config,logger=logger, rank=global_rank)
83 |
84 | if config.test:
85 | # agent.restore_test(config.train.load_path)
86 | assert config.checkpoint is not None
87 | print(config.checkpoint)
88 | #agent.model.actor.load_state_dict(torch.load(config.checkpoint))
89 | agent.restore_test(config.checkpoint)
90 | #breakpoint()
91 | agent.test(name=config.wandb_name)
92 | else:
93 | if rank <= 0:
94 | date = str(datetime.datetime.now().strftime('%m%d%H'))
95 | if config.wandb_activate:
96 | pid = os.getpid()
97 | wandb.log({'pid': pid})
98 | #cprint(git_diff_config('./'),color='green',attrs=['bold'])
99 | #os.system(f'git diff HEAD > {output_dif}/gitdiff.patch')
100 | #with open(os.path.join(output_dif, f'config_{date}_{git_hash()}.yaml'), 'w') as f:
101 | # f.write(OmegaConf.to_yaml(config))
102 |
103 | if config.train.load_path == '':
104 | cprint("Train model from scratch", 'green', attrs=['bold'])
105 | agent.train()
106 | else:
107 | agent.restore_train(config.train.load_path)
108 | cprint("Loaded actor model from: " + config.train.load_path, 'green', attrs=['bold'])
109 | agent.train()
110 |
111 | if config.wandb_activate and (global_rank==0 or world_size==1):
112 | wandb.finish()
113 |
114 |
115 | @hydra.main(config_name='config', config_path='../cfg/')
116 | def main_multi_gpu(config: DictConfig):
117 | if config.test:
118 | # single gpu testing only!
119 | config.num_gpus = 1
120 | world_size = config.num_gpus
121 | if world_size > 1:
122 | mp.spawn(main,
123 | args=(world_size, config),
124 | nprocs=world_size,
125 | join=True)
126 | else:
127 | rank = 0 #config.sim_device.split(":")[1]
128 | main(rank, 1, config)
129 |
130 |
131 | if __name__ == '__main__':
132 | os.environ["MASTER_ADDR"] = "localhost"
133 | #randomize port address
134 |
135 | os.environ["MASTER_PORT"] = "29435"
136 | main_multi_gpu()
137 |
--------------------------------------------------------------------------------
/scripts/finetune/finetune_cabinet.sh:
--------------------------------------------------------------------------------
1 | cmd="python scripts/finetune.py num_gpus=8 \
2 | checkpoint="outputs/Policy_noise01_l4h4_ctx_16_data_mix_simrob/dt_25-05-2024_07-02-31/model_step_831207.pt"\
3 | task=AllegroXarmCabinet \
4 | train.algo=PPOTransformer \
5 | train.ppo.initEpsHand=0.5 \
6 | train.ppo.initEpsArm=0.5 \
7 | train.ppo.value_grads_to_pointnet=False \
8 | train.ppo.critic_warmup_steps=200 \
9 | train.ppo.learning_rate=1e-5 \
10 | wandb_activate=True wandb_name=AllegroXarmCabinet_finetune_datamix_pretraining_initeps_0.5 \
11 | pipeline=gpu rl_device=cuda:0 sim_device=cuda:0 \
12 | train.ppo.minibatch_size=512 num_envs=512 \
13 | seed=-1"
14 |
15 | echo $cmd
16 | eval $cmd
17 |
--------------------------------------------------------------------------------
/scripts/finetune/finetune_grasp.sh:
--------------------------------------------------------------------------------
1 | cmd="python scripts/finetune.py num_gpus=4 \
2 | checkpoint="outputs/Policy_noise01_l4h4_ctx_16_data_mix_simrob/dt_25-05-2024_07-02-31/model_step_831207.pt"\
3 | task=AllegroXarmNew \
4 | train.algo=PPOTransformer \
5 | train.ppo.initEpsHand=0.1 \
6 | train.ppo.initEpsArm=0.1 \
7 | train.ppo.learning_rate=1e-5 \
8 | train.ppo.value_grads_to_pointnet=False \
9 | train.ppo.critic_warmup_steps=200 \
10 | wandb_activate=True wandb_name=AllegroXarmGrasping_finetune_datamix_pretraining\
11 | pipeline=gpu rl_device=cuda:0 sim_device=cuda:0 \
12 | train.ppo.minibatch_size=512 num_envs=512 \
13 | seed=-1"
14 |
15 | echo $cmd
16 | eval $cmd
17 | #algo/pretrained/models/Policy_noise01_l4h4_ctx_16_shift0_scaled_inputs_new_setup/dt_17-04-2024_23-42-00/model_step_711071.pt
18 |
--------------------------------------------------------------------------------
/scripts/finetune/finetune_throw.sh:
--------------------------------------------------------------------------------
1 | # cmd="python scripts/finetune.py num_gpus=8 \
2 | # checkpoint="algo/pretrained/models/Policy_noise01_l4h4_ctx_16_data_mix_simrob/dt_25-05-2024_07-02-31/model_step_831207.pt"\
3 | # task=AllegroXarmThrowing \
4 | # train.algo=PPOTransformer \
5 | # train.ppo.value_grads_to_pointnet=False \
6 | # train.ppo.critic_warmup_steps=200 \
7 | # train.ppo.learning_rate=1e-5 \
8 | # train.ppo.initEpsHand=0.1 \
9 | # train.ppo.initEpsArm=0.1 \
10 | # wandb_activate=True wandb_name=AllegroXarmThrowing_finetune_datamix_pretraining_eps_20 \
11 | # pipeline=gpu rl_device=cuda:0 sim_device=cuda:0 \
12 | # train.ppo.minibatch_size=512 num_envs=512 \
13 | # seed=20"
14 |
15 | # echo $cmd
16 | # eval $cmd
17 | cmd="python scripts/finetune.py num_gpus=3 \
18 | checkpoint="outputs/Policy_noise01_l4h4_ctx_16_data_mix_simrob/dt_25-05-2024_07-02-31/model_step_831207.pt"\
19 | task=AllegroXarmThrowing \
20 | train.algo=PPOTransformer \
21 | train.ppo.value_grads_to_pointnet=False \
22 | train.ppo.critic_warmup_steps=200 \
23 | train.ppo.learning_rate=1e-5 \
24 | train.ppo.initEpsHand=0.1 \
25 | train.ppo.initEpsArm=0.1 \
26 | wandb_activate=True wandb_name=AllegroXarmThrowing_noobj_pretraining \
27 | pipeline=gpu rl_device=cuda:0 sim_device=cuda:0 \
28 | train.ppo.minibatch_size=1365 num_envs=1365 \
29 | seed=-1"
30 |
31 | echo $cmd
32 | eval $cmd
33 |
--------------------------------------------------------------------------------
/scripts/pretrain.py:
--------------------------------------------------------------------------------
1 | import isaacgym
2 | from tasks import isaacgym_task_map
3 | import torch
4 | from torch.utils.data import DataLoader
5 | from omegaconf import DictConfig, OmegaConf
6 | from termcolor import cprint
7 | import wandb
8 | from torch.optim import Adam, AdamW
9 | from algo.pretrained.trainer import RobotTrainer
10 | import wandb
11 | from algo.pretrained.robot_transformer_ar import RobotTransformerAR
12 | from algo.pretrained.robot_dataset import RobotDataset , collate_fn
13 | import os
14 | from datetime import datetime
15 | import json
16 | import hydra
17 | from utils.reformat import omegaconf_to_dict, print_dict
18 | from utils.utils import set_np_formatting, set_seed
19 | from utils.logger import Logger
20 | import random
21 | import numpy as np
22 | from torch.optim.lr_scheduler import CosineAnnealingLR
23 | import imageio
24 |
25 | @hydra.main(config_name='config', config_path='../cfg/')
26 | def main(config: DictConfig):
27 |
28 |
29 | device = config.pretrain.device
30 | config.seed = set_seed(config.seed)
31 |
32 | capture_video = config.task.env.enableVideoLog
33 |
34 | if config.pretrain.wandb_activate:
35 | wandb.init(project="manipulation-pretraining",
36 | name=config.pretrain.wandb_name,
37 | config=omegaconf_to_dict(config))
38 |
39 | tmodel = RobotTransformerAR
40 |
41 | if config.pretrain.test:
42 |
43 | model = tmodel(
44 | cfg=config
45 | )
46 |
47 | model = model.to(device)
48 |
49 | model.eval()
50 |
51 | assert config.pretrain.checkpoint != ''
52 | # set numpy formatting for printing only
53 | set_np_formatting()
54 |
55 |
56 | if config.pretrain.wandb_activate:
57 | wandb_logger = wandb.init(project=config.wandb_project,
58 | name=config.pretrain.wandb_name,
59 | entity=config.wandb_entity,
60 | config=omegaconf_to_dict(config),
61 | sync_tensorboard=True)
62 | else:
63 | wandb_logger=None
64 |
65 | output_dif = os.path.join('outputs', config.wandb_name)
66 | logger = Logger(output_dif, summary_writer=wandb_logger)
67 |
68 | cprint('Start Building the Environment', 'green', attrs=['bold'])
69 |
70 | env = isaacgym_task_map[config.task_name](
71 | cfg=omegaconf_to_dict(config.task),
72 | pretrain_cfg=omegaconf_to_dict(config.pretrain),
73 | rl_device = config.rl_device,
74 | sim_device=config.sim_device,
75 | graphics_device_id=config.graphics_device_id,
76 | headless=config.headless,
77 | virtual_screen_capture=config.capture_video,
78 | force_render=config.force_render
79 | )
80 |
81 | model.load_state_dict(torch.load(config.pretrain.checkpoint,map_location=device))
82 |
83 | cprint(f"Model loaded from {config.pretrain.checkpoint}", color='green', attrs=['bold'])
84 |
85 | model.run_multi_env(env, cfg=config)
86 |
87 | return
88 |
89 | else:
90 |
91 | if config.pretrain.wandb_activate:
92 | wandb_logger = wandb.init(project=config.wandb_project, name=config.wandb_name,
93 | entity=config.wandb_entity, config=omegaconf_to_dict(config))
94 | else:
95 | wandb_logger=None
96 |
97 | train_dataset = RobotDataset(cfg=config, root=config.pretrain.training.root_dir)
98 | val_dataset = RobotDataset(cfg=config, root=config.pretrain.validation.root_dir)
99 |
100 | max_ep_len = max(train_dataset.max_ep_len, val_dataset.max_ep_len)
101 |
102 | cprint(f"Dataloader built", color='green', attrs=['bold'])
103 |
104 | model = tmodel(
105 | cfg=config,
106 | max_ep_len=max_ep_len
107 | )
108 |
109 | model = model.to(device)
110 |
111 | if config.pretrain.training.model_save_dir is not None:
112 | save_dir = config.pretrain.training.model_save_dir
113 | # Create the saving directory using the wandb name and the date and time
114 | os.makedirs(save_dir, exist_ok=True)
115 | #get date and time
116 | now = datetime.now()
117 | dt_string = now.strftime("%d-%m-%Y_%H-%M-%S")
118 | experiment_folder = os.path.join(save_dir, f'{config.pretrain.wandb_name}', f'dt_{dt_string}')
119 | # create the experiment folder if not exists
120 | os.makedirs(experiment_folder, exist_ok=True)
121 | json.dump(OmegaConf.to_container(config), open(os.path.join(experiment_folder, 'config.json'), 'w'))
122 | logger = Logger(experiment_folder, summary_writer=wandb_logger)
123 |
124 | else:
125 | save_dir = None
126 | logger = None
127 |
128 | cprint(f"Model built", color='green', attrs=['bold'])
129 |
130 | if config.pretrain.training.load_checkpoint:
131 | assert os.path.exists(config.pretrain.checkpoint), f"Checkpoint {config.pretrain.checkpoint} does not exist"
132 | model.load_state_dict(torch.load(config.pretrain.checkpoint,map_location=device))
133 | model.train()
134 | cprint(f"Model loaded from {config.pretrain.checkpoint}", color='green', attrs=['bold'])
135 |
136 | scheduler = None #CosineAnnealingLR(optimizer, T_max=10000, eta_min=1e-6)
137 | optimizer = AdamW(model.parameters(), lr=config.pretrain.training.lr, weight_decay=config.pretrain.training.weight_decay)
138 | loss_fn = torch.nn.L1Loss() #torch.nn.MSELoss()
139 |
140 | trainer = RobotTrainer(
141 | model = model,
142 | optimizer = optimizer,
143 | scheduler = scheduler,
144 | train_dataset = train_dataset,
145 | val_dataset = val_dataset,
146 | collate_fn=collate_fn,
147 | loss_fn = loss_fn,
148 | model_save_dir = experiment_folder,
149 | logger = logger,
150 | config=config
151 | )
152 |
153 | if capture_video:
154 | assert config.pretrain.wandb_activate, "Video capture requires wandb activation"
155 | # create the environment to capture the video
156 | env = isaacgym_task_map[config.task_name](
157 | cfg=omegaconf_to_dict(config.task),
158 | pretrain_cfg=omegaconf_to_dict(config.pretrain),
159 | rl_device = config.pretrain.device,
160 | sim_device=config.pretrain.device,
161 | graphics_device_id=config.graphics_device_id,
162 | headless=config.headless,
163 | virtual_screen_capture=config.capture_video,
164 | force_render=config.force_render
165 | )
166 |
167 | for i in range(config.pretrain.training.num_epochs):
168 | cprint("Training iteration {}".format(i), color='magenta', attrs=['bold'])
169 | outputs = trainer.train_epoch(iter_num=i,
170 | print_logs=True)
171 | if config.pretrain.wandb_activate:
172 | wandb.log(outputs, commit=True)
173 |
174 |
175 | if capture_video:
176 | fps = int(1/(config.task.sim.dt*config.task.env.controlFrequencyInv))
177 | print(f"Capturing video from simulation")
178 | env.start_video_recording()
179 | info_dict = model.run_multi_env(env, cfg=config)
180 | video_frames = env.stop_video_recording()
181 | video_path = os.path.join(experiment_folder, f'{config.pretrain.wandb_name}_video.mp4')
182 | video_frames = [np.array(frame.detach().cpu()).astype(np.uint8) for frame in video_frames]
183 | imageio.mimsave(video_path, video_frames, fps=fps)
184 | env.video_frames = []
185 |
186 |
187 |
188 | if __name__ == '__main__':
189 | main()
190 |
--------------------------------------------------------------------------------
/scripts/pretrain.sh:
--------------------------------------------------------------------------------
1 |
2 |
3 | DATADIR=$1
4 | CMD="python scripts/pretrain.py num_gpus=4 headless=True \
5 | track_pose=False get_target_reference=False num_envs=25 \
6 | pc_input=True pipeline=cuda rl_device=cuda:0 sim_device=cuda:0 \
7 | pretrain.training.root_dir=$DATADIR/train \
8 | pretrain.validation.root_dir=$DATADIR/val pretrain.wandb_activate=True \
9 | pretrain.wandb_name=Policy_noise01_l4h4_ctx_16_data_mix_simrob seed=-1 \
10 | task.env.enableVideoLog=True \
11 | task.env.episodeLength=400"
12 |
13 | echo $CMD
14 | eval $CMD
15 |
--------------------------------------------------------------------------------
/scripts/run_policy.sh:
--------------------------------------------------------------------------------
1 | POLICY=$1 #"outputs/AllegroXarmGrasping_scratch_vel_control/2024-05-29_00-49/stage1_nn/ep_41700_step_1708M_reward_1876.28.pth"
2 | cmd="python scripts/finetune.py num_gpus=1 \
3 | task=AllegroXarmNew test=True headless=False \
4 | checkpoint=$POLICY \
5 | train.algo=PPOTransformer \
6 | wandb_activate=False wandb_name=AllegroXarmGrasping_Finetuned \
7 | pipeline=gpu rl_device=cuda:0 sim_device=cuda:0 \
8 | train.ppo.minibatch_size=16 num_envs=16 \
9 | task.env.episodeLength=600 \
10 | task.env.maxConsecutiveSuccesses=1 \
11 | pc_input=True \
12 | seed=-1"
13 |
14 | echo $cmd
15 | eval $cmd
16 |
--------------------------------------------------------------------------------
/tasks/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2018-2023, NVIDIA Corporation
2 | # All rights reserved.
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 |
29 |
30 |
31 | from tasks.xarm_grasping_new import AllegroXarmGraspingNew
32 | from tasks.xarm_throwing import AllegroXarmThrowing
33 | from tasks.xarm_cabinet import AllegroXarmCabinet
34 |
35 |
36 | # Mappings from strings to environments
37 | isaacgym_task_map = {
38 | "AllegroXarmNew": AllegroXarmGraspingNew,
39 | "AllegroXarmThrowing": AllegroXarmThrowing,
40 | "AllegroXarmCabinet": AllegroXarmCabinet
41 | # "XarmReaching" : XarmReaching
42 | }
43 |
--------------------------------------------------------------------------------
/tasks/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/tasks/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/tasks/__pycache__/allegro_kuka_grasping.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/allegro_kuka_grasping.cpython-37.pyc
--------------------------------------------------------------------------------
/tasks/__pycache__/allegro_kuka_grasping.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/allegro_kuka_grasping.cpython-38.pyc
--------------------------------------------------------------------------------
/tasks/__pycache__/torch_jit_utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/torch_jit_utils.cpython-37.pyc
--------------------------------------------------------------------------------
/tasks/__pycache__/torch_jit_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/torch_jit_utils.cpython-38.pyc
--------------------------------------------------------------------------------
/tasks/__pycache__/xarm_cabinet.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_cabinet.cpython-37.pyc
--------------------------------------------------------------------------------
/tasks/__pycache__/xarm_cabinet.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_cabinet.cpython-38.pyc
--------------------------------------------------------------------------------
/tasks/__pycache__/xarm_grasping.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_grasping.cpython-37.pyc
--------------------------------------------------------------------------------
/tasks/__pycache__/xarm_grasping.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_grasping.cpython-38.pyc
--------------------------------------------------------------------------------
/tasks/__pycache__/xarm_grasping_debug.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_grasping_debug.cpython-37.pyc
--------------------------------------------------------------------------------
/tasks/__pycache__/xarm_grasping_debug.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_grasping_debug.cpython-38.pyc
--------------------------------------------------------------------------------
/tasks/__pycache__/xarm_grasping_new.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_grasping_new.cpython-37.pyc
--------------------------------------------------------------------------------
/tasks/__pycache__/xarm_grasping_new.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_grasping_new.cpython-38.pyc
--------------------------------------------------------------------------------
/tasks/__pycache__/xarm_grasping_real.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_grasping_real.cpython-37.pyc
--------------------------------------------------------------------------------
/tasks/__pycache__/xarm_throwing.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_throwing.cpython-37.pyc
--------------------------------------------------------------------------------
/tasks/__pycache__/xarm_throwing.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/__pycache__/xarm_throwing.cpython-38.pyc
--------------------------------------------------------------------------------
/tasks/base/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2018-2023, NVIDIA Corporation
2 | # All rights reserved.
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 |
--------------------------------------------------------------------------------
/tasks/base/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/base/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/tasks/base/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/base/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/tasks/base/__pycache__/vec_task.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/base/__pycache__/vec_task.cpython-37.pyc
--------------------------------------------------------------------------------
/tasks/base/__pycache__/vec_task.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/tasks/base/__pycache__/vec_task.cpython-38.pyc
--------------------------------------------------------------------------------
/tasks/xarm7_utils.py:
--------------------------------------------------------------------------------
1 |
2 | # Copyright (c) 2018-2023, NVIDIA Corporation
3 | # All rights reserved.
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |
30 | from __future__ import annotations
31 |
32 | from dataclasses import dataclass
33 | from typing import Tuple, Dict, List
34 |
35 | from torch import Tensor
36 |
37 |
38 | @dataclass
39 | class DofParameters:
40 | """Joint/dof parameters."""
41 | xarm_stiffness: float
42 | xarm_effort: List[float] # separate per DOF
43 | xarm_damping: float
44 | xarm_velocity: float
45 | xarm_armature: float
46 | dof_friction: float
47 |
48 | @staticmethod
49 | def from_cfg(cfg: Dict) -> DofParameters:
50 | return DofParameters(
51 | xarm_stiffness=cfg["env"]["kukaStiffness"],
52 | xarm_effort=cfg["env"]["kukaEffort"],
53 | xarm_damping=cfg["env"]["kukaDamping"],
54 | xarm_velocity=cfg["env"]["kukaVelocity"],
55 | xarm_armature=cfg["env"]["kukaArmature"],
56 | dof_friction=cfg["env"]["dofFriction"],
57 | )
58 |
59 |
60 | def populate_dof_properties(arm_dof_props, params: DofParameters, arm_dofs: int) -> None:
61 | assert len(arm_dof_props["stiffness"]) == arm_dofs
62 |
63 | arm_dof_props["stiffness"].fill(params.xarm_stiffness)
64 |
65 | assert len(params.xarm_effort) == arm_dofs
66 | arm_dof_props["effort"] = params.xarm_effort
67 | arm_dof_props["velocity"] = params.xarm_velocity
68 | arm_dof_props["damping"].fill(params.xarm_damping)
69 |
70 | if params.dof_friction >= 0:
71 | arm_dof_props["friction"].fill(params.dof_friction)
72 |
73 | arm_dof_props["armature"].fill(params.xarm_armature)
74 |
75 | def tolerance_curriculum(
76 | last_curriculum_update: int,
77 | frames_since_restart: int,
78 | curriculum_interval: int,
79 | prev_episode_successes: Tensor,
80 | success_tolerance: float,
81 | initial_tolerance: float,
82 | target_tolerance: float,
83 | tolerance_curriculum_increment: float,
84 | ) -> Tuple[float, int]:
85 | """
86 | Returns: new tolerance, new last_curriculum_update
87 | """
88 | if frames_since_restart - last_curriculum_update < curriculum_interval:
89 | return success_tolerance, last_curriculum_update
90 |
91 | mean_successes_per_episode = prev_episode_successes.mean()
92 | if mean_successes_per_episode < 3.0:
93 | # this policy is not good enough with the previous tolerance value, keep training for now...
94 | return success_tolerance, last_curriculum_update
95 |
96 | # decrease the tolerance now
97 | success_tolerance *= tolerance_curriculum_increment
98 | success_tolerance = min(success_tolerance, initial_tolerance)
99 | success_tolerance = max(success_tolerance, target_tolerance)
100 |
101 | print(f"Prev episode successes: {mean_successes_per_episode}, success tolerance: {success_tolerance}")
102 |
103 | last_curriculum_update = frames_since_restart
104 | return success_tolerance, last_curriculum_update
105 |
106 |
107 | def interp_0_1(x_curr: float, x_initial: float, x_target: float) -> float:
108 | """
109 | Outputs 1 when x_curr == x_target (curriculum completed)
110 | Outputs 0 when x_curr == x_initial (just started training)
111 | Interpolates value in between.
112 | """
113 | span = x_initial - x_target
114 | return (x_initial - x_curr) / span
115 |
116 |
117 | def tolerance_successes_objective(
118 | success_tolerance: float, initial_tolerance: float, target_tolerance: float, successes: Tensor
119 | ) -> Tensor:
120 | """
121 | Objective for the PBT. This basically prioritizes tolerance over everything else when we
122 | execute the curriculum, after that it's just #successes.
123 | """
124 | # this grows from 0 to 1 as we reach the target tolerance
125 | if initial_tolerance > target_tolerance:
126 | # makeshift unit tests:
127 | eps = 1e-5
128 | assert abs(interp_0_1(initial_tolerance, initial_tolerance, target_tolerance)) < eps
129 | assert abs(interp_0_1(target_tolerance, initial_tolerance, target_tolerance) - 1.0) < eps
130 | mid_tolerance = (initial_tolerance + target_tolerance) / 2
131 | assert abs(interp_0_1(mid_tolerance, initial_tolerance, target_tolerance) - 0.5) < eps
132 |
133 | tolerance_objective = interp_0_1(success_tolerance, initial_tolerance, target_tolerance)
134 | else:
135 | tolerance_objective = 1.0
136 |
137 | if success_tolerance > target_tolerance:
138 | # add succeses with a small coefficient to differentiate between policies at the beginning of training
139 | # increment in tolerance improvement should always give higher value than higher successes with the
140 | # previous tolerance, that's why this coefficient is very small
141 | true_objective = (successes * 0.01) + tolerance_objective
142 | else:
143 | # basically just the successes + tolerance objective so that true_objective never decreases when we cross
144 | # the threshold
145 | true_objective = successes + tolerance_objective
146 |
147 | return true_objective
148 |
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__init__.py
--------------------------------------------------------------------------------
/utils/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/allegro_kuka_utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/allegro_kuka_utils.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/allegro_kuka_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/allegro_kuka_utils.cpython-38.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/hand_arm_utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/hand_arm_utils.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/hand_arm_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/hand_arm_utils.cpython-38.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/logger.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/logger.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/logger.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/logger.cpython-38.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/misc.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/misc.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/misc.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/misc.cpython-38.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/pytorch_utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/pytorch_utils.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/pytorch_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/pytorch_utils.cpython-38.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/randomization_utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/randomization_utils.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/randomization_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/randomization_utils.cpython-38.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/reformat.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/reformat.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/reformat.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/reformat.cpython-38.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/torch_jit_utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/torch_jit_utils.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/urdf_utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/urdf_utils.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/urdf_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/urdf_utils.cpython-38.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/utils.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/utils.cpython-38.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/warmup_scheduler.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/warmup_scheduler.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/warmup_scheduler.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hgaurav2k/hop/5114512c9b1f5ac6625bbeb57e316064fc9d80da/utils/__pycache__/warmup_scheduler.cpython-38.pyc
--------------------------------------------------------------------------------
/utils/allegro_kuka_utils.py:
--------------------------------------------------------------------------------
1 |
2 | # Copyright (c) 2018-2023, NVIDIA Corporation
3 | # All rights reserved.
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |
30 | from __future__ import annotations
31 |
32 | from dataclasses import dataclass
33 | from typing import Tuple, Dict, List
34 |
35 | from torch import Tensor
36 |
37 |
38 | @dataclass
39 | class DofParameters:
40 | """Joint/dof parameters."""
41 | allegro_stiffness: float
42 | kuka_stiffness: float
43 | allegro_effort: float
44 | allegro_velocity: float
45 | kuka_effort: List[float] # separate per DOF
46 | allegro_damping: float
47 | kuka_damping: float
48 | kuka_velocity: float
49 | dof_friction: float
50 | allegro_armature: float
51 | kuka_armature: float
52 |
53 | @staticmethod
54 | def from_cfg(cfg: Dict) -> DofParameters:
55 | return DofParameters(
56 | allegro_stiffness=cfg["env"]["allegroStiffness"],
57 | kuka_stiffness=cfg["env"]["kukaStiffness"],
58 | allegro_effort=cfg["env"]["allegroEffort"],
59 | allegro_velocity=cfg["env"]["allegroVelocity"],
60 | kuka_effort=cfg["env"]["kukaEffort"],
61 | allegro_damping=cfg["env"]["allegroDamping"],
62 | kuka_damping=cfg["env"]["kukaDamping"],
63 | kuka_velocity=cfg["env"]["kukaVelocity"],
64 | dof_friction=cfg["env"]["dofFriction"],
65 | allegro_armature=cfg["env"]["allegroArmature"],
66 | kuka_armature=cfg["env"]["kukaArmature"],
67 | )
68 |
69 |
70 | def populate_dof_properties(hand_arm_dof_props, params: DofParameters, arm_dofs: int, hand_dofs: int) -> None:
71 | assert len(hand_arm_dof_props["stiffness"]) == arm_dofs + hand_dofs
72 |
73 | hand_arm_dof_props["stiffness"][0:arm_dofs].fill(params.kuka_stiffness)
74 | hand_arm_dof_props["stiffness"][arm_dofs:].fill(params.allegro_stiffness)
75 |
76 | assert len(params.kuka_effort) == arm_dofs
77 | hand_arm_dof_props["effort"][0:arm_dofs] = params.kuka_effort
78 | hand_arm_dof_props["effort"][arm_dofs:].fill(params.allegro_effort)
79 |
80 | hand_arm_dof_props["velocity"][0:arm_dofs] = params.kuka_velocity
81 | hand_arm_dof_props["velocity"][arm_dofs:].fill(params.allegro_velocity)
82 |
83 | hand_arm_dof_props["damping"][0:arm_dofs].fill(params.kuka_damping)
84 | hand_arm_dof_props["damping"][arm_dofs:].fill(params.allegro_damping)
85 |
86 | if params.dof_friction >= 0:
87 | hand_arm_dof_props["friction"].fill(params.dof_friction)
88 |
89 | hand_arm_dof_props["armature"][0:arm_dofs].fill(params.kuka_armature)
90 | hand_arm_dof_props["armature"][arm_dofs:].fill(params.allegro_armature)
91 |
92 |
93 | def tolerance_curriculum(
94 | last_curriculum_update: int,
95 | frames_since_restart: int,
96 | curriculum_interval: int,
97 | prev_episode_successes: Tensor,
98 | success_tolerance: float,
99 | initial_tolerance: float,
100 | target_tolerance: float,
101 | tolerance_curriculum_increment: float,
102 | ) -> Tuple[float, int]:
103 | """
104 | Returns: new tolerance, new last_curriculum_update
105 | """
106 | if frames_since_restart - last_curriculum_update < curriculum_interval:
107 | return success_tolerance, last_curriculum_update
108 |
109 | mean_successes_per_episode = prev_episode_successes.mean()
110 | if mean_successes_per_episode < 3.0:
111 | # this policy is not good enough with the previous tolerance value, keep training for now...
112 | return success_tolerance, last_curriculum_update
113 |
114 | # decrease the tolerance now
115 | success_tolerance *= tolerance_curriculum_increment
116 | success_tolerance = min(success_tolerance, initial_tolerance)
117 | success_tolerance = max(success_tolerance, target_tolerance)
118 |
119 | print(f"Prev episode successes: {mean_successes_per_episode}, success tolerance: {success_tolerance}")
120 |
121 | last_curriculum_update = frames_since_restart
122 | return success_tolerance, last_curriculum_update
123 |
124 |
125 | def interp_0_1(x_curr: float, x_initial: float, x_target: float) -> float:
126 | """
127 | Outputs 1 when x_curr == x_target (curriculum completed)
128 | Outputs 0 when x_curr == x_initial (just started training)
129 | Interpolates value in between.
130 | """
131 | span = x_initial - x_target
132 | return (x_initial - x_curr) / span
133 |
134 |
135 | def tolerance_successes_objective(
136 | success_tolerance: float, initial_tolerance: float, target_tolerance: float, successes: Tensor
137 | ) -> Tensor:
138 | """
139 | Objective for the PBT. This basically prioritizes tolerance over everything else when we
140 | execute the curriculum, after that it's just #successes.
141 | """
142 | # this grows from 0 to 1 as we reach the target tolerance
143 | if initial_tolerance > target_tolerance:
144 | # makeshift unit tests:
145 | eps = 1e-5
146 | assert abs(interp_0_1(initial_tolerance, initial_tolerance, target_tolerance)) < eps
147 | assert abs(interp_0_1(target_tolerance, initial_tolerance, target_tolerance) - 1.0) < eps
148 | mid_tolerance = (initial_tolerance + target_tolerance) / 2
149 | assert abs(interp_0_1(mid_tolerance, initial_tolerance, target_tolerance) - 0.5) < eps
150 |
151 | tolerance_objective = interp_0_1(success_tolerance, initial_tolerance, target_tolerance)
152 | else:
153 | tolerance_objective = 1.0
154 |
155 | if success_tolerance > target_tolerance:
156 | # add succeses with a small coefficient to differentiate between policies at the beginning of training
157 | # increment in tolerance improvement should always give higher value than higher successes with the
158 | # previous tolerance, that's why this coefficient is very small
159 | true_objective = (successes * 0.01) + tolerance_objective
160 | else:
161 | # basically just the successes + tolerance objective so that true_objective never decreases when we cross
162 | # the threshold
163 | true_objective = successes + tolerance_objective
164 |
165 | return true_objective
166 |
--------------------------------------------------------------------------------
/utils/camera.json:
--------------------------------------------------------------------------------
1 | {
2 | "width": 300,
3 | "height": 300,
4 | "fx": 267.9263610839844,
5 | "fy": 267.9263610839844,
6 | "pose": [
7 | 0.5531,
8 | -0.0643,
9 | 0.4484
10 | ],
11 | "R": [
12 | [
13 | 0.0347,
14 | 0.4223,
15 | -0.9058
16 | ],
17 | [
18 | 0.9993,
19 | -0.0294,
20 | 0.0245
21 | ],
22 | [
23 | -0.0163,
24 | -0.9060,
25 | -0.4231
26 | ]
27 | ]
28 | }
--------------------------------------------------------------------------------
/utils/camera2.json:
--------------------------------------------------------------------------------
1 | {
2 | "width": 300,
3 | "height": 300,
4 | "fx": 267.5335,
5 | "fy": 267.5335,
6 | "pose": [
7 | 0.5545,
8 | -0.0563,
9 | 0.4281
10 | ],
11 | "R": [
12 | [
13 | 0.0285,
14 | 0.4060,
15 | -0.9134
16 | ],
17 | [
18 | 0.9993,
19 | -0.0355,
20 | 0.0154
21 | ],
22 | [
23 | -0.0261,
24 | -0.9132,
25 | -0.4068
26 | ]
27 | ]
28 | }
--------------------------------------------------------------------------------
/utils/hand_arm_utils.py:
--------------------------------------------------------------------------------
1 |
2 | # Copyright (c) 2018-2023, NVIDIA Corporation
3 | # All rights reserved.
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |
30 | from __future__ import annotations
31 |
32 | from dataclasses import dataclass
33 | from typing import Tuple, Dict, List
34 |
35 | from torch import Tensor
36 |
37 |
38 | @dataclass
39 | class DofParameters:
40 | """Joint/dof parameters."""
41 | hand_stiffness: float
42 | arm_stiffness: float
43 | hand_effort: float
44 | hand_velocity: float
45 | arm_effort: List[float] # separate per DOF
46 | hand_damping: float
47 | arm_damping: float
48 | arm_velocity: float
49 | dof_friction: float
50 | hand_armature: float
51 | arm_armature: float
52 |
53 | @staticmethod
54 | def from_cfg(cfg: Dict) -> DofParameters:
55 | return DofParameters(
56 | hand_stiffness=cfg["env"]["handStiffness"],
57 | arm_stiffness=cfg["env"]["armStiffness"],
58 | hand_effort=cfg["env"]["handEffort"],
59 | hand_velocity=cfg["env"]["handVelocity"],
60 | arm_effort=cfg["env"]["armEffort"],
61 | hand_damping=cfg["env"]["handDamping"],
62 | arm_damping=cfg["env"]["armDamping"],
63 | arm_velocity=cfg["env"]["armVelocity"],
64 | dof_friction=cfg["env"]["dofFriction"],
65 | hand_armature=cfg["env"]["handArmature"],
66 | arm_armature=cfg["env"]["armArmature"],
67 | )
68 |
69 |
70 | def populate_dof_properties(hand_arm_dof_props, params: DofParameters, arm_dofs: int, hand_dofs: int) -> None:
71 | assert len(hand_arm_dof_props["stiffness"]) == arm_dofs + hand_dofs
72 |
73 | hand_arm_dof_props["stiffness"][0:arm_dofs].fill(params.arm_stiffness)
74 | hand_arm_dof_props["stiffness"][arm_dofs:].fill(params.hand_stiffness)
75 |
76 | assert len(params.arm_effort) == arm_dofs
77 | hand_arm_dof_props["effort"][0:arm_dofs] = params.arm_effort
78 | hand_arm_dof_props["effort"][arm_dofs:].fill(params.hand_effort)
79 |
80 | hand_arm_dof_props["velocity"][0:arm_dofs] = params.arm_velocity
81 | hand_arm_dof_props["velocity"][arm_dofs:].fill(params.hand_velocity)
82 |
83 | hand_arm_dof_props["damping"][0:arm_dofs].fill(params.arm_damping)
84 | hand_arm_dof_props["damping"][arm_dofs:].fill(params.hand_damping)
85 |
86 | if params.dof_friction >= 0:
87 | hand_arm_dof_props["friction"].fill(params.dof_friction)
88 |
89 | hand_arm_dof_props["armature"][0:arm_dofs].fill(params.arm_armature)
90 | hand_arm_dof_props["armature"][arm_dofs:].fill(params.hand_armature)
91 |
92 |
93 | def tolerance_curriculum(
94 | last_curriculum_update: int,
95 | frames_since_restart: int,
96 | curriculum_interval: int,
97 | prev_episode_successes: Tensor,
98 | success_tolerance: float,
99 | initial_tolerance: float,
100 | target_tolerance: float,
101 | tolerance_curriculum_increment: float,
102 | ) -> Tuple[float, int]:
103 | """
104 | Returns: new tolerance, new last_curriculum_update
105 | """
106 | if frames_since_restart - last_curriculum_update < curriculum_interval:
107 | return success_tolerance, last_curriculum_update
108 |
109 | mean_successes_per_episode = prev_episode_successes.mean()
110 | if mean_successes_per_episode < 3.0:
111 | # this policy is not good enough with the previous tolerance value, keep training for now...
112 | return success_tolerance, last_curriculum_update
113 |
114 | # decrease the tolerance now
115 | success_tolerance *= tolerance_curriculum_increment
116 | success_tolerance = min(success_tolerance, initial_tolerance)
117 | success_tolerance = max(success_tolerance, target_tolerance)
118 |
119 | print(f"Prev episode successes: {mean_successes_per_episode}, success tolerance: {success_tolerance}")
120 |
121 | last_curriculum_update = frames_since_restart
122 | return success_tolerance, last_curriculum_update
123 |
124 |
125 | def interp_0_1(x_curr: float, x_initial: float, x_target: float) -> float:
126 | """
127 | Outputs 1 when x_curr == x_target (curriculum completed)
128 | Outputs 0 when x_curr == x_initial (just started training)
129 | Interpolates value in between.
130 | """
131 | span = x_initial - x_target
132 | return (x_initial - x_curr) / span
133 |
134 |
135 | def tolerance_successes_objective(
136 | success_tolerance: float, initial_tolerance: float, target_tolerance: float, successes: Tensor
137 | ) -> Tensor:
138 | """
139 | Objective for the PBT. This basically prioritizes tolerance over everything else when we
140 | execute the curriculum, after that it's just #successes.
141 | """
142 | # this grows from 0 to 1 as we reach the target tolerance
143 | if initial_tolerance > target_tolerance:
144 | # makeshift unit tests:
145 | eps = 1e-5
146 | assert abs(interp_0_1(initial_tolerance, initial_tolerance, target_tolerance)) < eps
147 | assert abs(interp_0_1(target_tolerance, initial_tolerance, target_tolerance) - 1.0) < eps
148 | mid_tolerance = (initial_tolerance + target_tolerance) / 2
149 | assert abs(interp_0_1(mid_tolerance, initial_tolerance, target_tolerance) - 0.5) < eps
150 |
151 | tolerance_objective = interp_0_1(success_tolerance, initial_tolerance, target_tolerance)
152 | else:
153 | tolerance_objective = 1.0
154 |
155 | if success_tolerance > target_tolerance:
156 | # add succeses with a small coefficient to differentiate between policies at the beginning of training
157 | # increment in tolerance improvement should always give higher value than higher successes with the
158 | # previous tolerance, that's why this coefficient is very small
159 | true_objective = (successes * 0.01) + tolerance_objective
160 | else:
161 | # basically just the successes + tolerance objective so that true_objective never decreases when we cross
162 | # the threshold
163 | true_objective = successes + tolerance_objective
164 |
165 | return true_objective
166 |
--------------------------------------------------------------------------------
/utils/logger.py:
--------------------------------------------------------------------------------
1 | import os
2 | from tkinter import commondialog
3 | import numpy as np
4 | import wandb
5 | from PIL import Image
6 | import cv2
7 | class Logger:
8 | def __init__(self, log_dir, n_logged_samples=10, summary_writer=None):
9 | self._log_dir = log_dir
10 | print('########################')
11 | print('logging outputs to ', log_dir)
12 | print('########################')
13 | self._n_logged_samples = n_logged_samples
14 | self._summ_writer = summary_writer
15 |
16 | def flush(self):
17 | self._summ_writer.flush()
18 | return
19 |
20 | def log_scalar(self, scalar, name, step_, commit=False):
21 | if self._summ_writer:
22 | self._summ_writer.log({'{}'.format(name): scalar}, step=step_) #, commit=commit)
23 |
24 | def log_scalars(self, scalar_dict, group_name, step, phase, commit=True):
25 | """Will log all scalars in the same plot."""
26 | if self._summ_writer:
27 | self._summ_writer.log({'{}/{}'.format(group_name, phase): scalar_dict}, step=step) # Not sure if this will work!
28 | #self._summ_writer.add_scalars('{}_{}'.format(group_name, phase), scalar_dict, step)
29 |
30 | def log_image(self, image, name, step, commit=False):
31 | assert(len(image.shape) == 3) # [C, H, W]
32 | image = wandb.Image(image, caption=f"{name}", step=step, commit=commit)
33 | #self._summ_writer.add_image('{}'.format(name), image, step)
34 |
35 | # TODO: Add more logging as needed
36 | def log_gifs(self,imgs,name="gif",commit=False):
37 |
38 | images = [Image.fromarray(image.cpu().numpy().astype(np.uint8)) for image in imgs]
39 | wandb.log({name: [wandb.Image(image) for image in images]})
40 |
41 | def log_video(self,imgs,name="video", step=0, commit=False, fps=15):
42 |
43 | frames = [img.cpu().numpy().astype(np.uint8) for img in imgs]
44 | frames = np.array(frames) # [T, H, W, C]
45 | frames = np.transpose(frames, (0, 3, 1, 2)) # [T, C, H, W]
46 |
47 | print("here")
48 | wandb.log({
49 | name: wandb.Video(frames, fps=fps, format='mp4'),
50 | }, step=step)
51 |
52 | print("here2")
53 |
54 | #def log_video(self, video_frames, name, step, fps=10):
55 | # assert len(video_frames.shape) == 5, "Need [N, T, C, H, W] input tensor for video logging!"
56 | # self._summ_writer.add_video('{}'.format(name), video_frames, step, fps=fps)
57 |
58 | #def log_trajs_as_videos(self, trajs, step, max_videos_to_save=2, fps=10, video_title='video'):
59 |
60 | # # reshape the rollouts
61 | # videos = [np.transpose(p['image_obs'], [0, 3, 1, 2]) for p in trajs]
62 |
63 | # # max rollout length
64 | # max_videos_to_save = np.min([max_videos_to_save, len(videos)])
65 | # max_length = videos[0].shape[0]
66 | # for i in range(max_videos_to_save):
67 | # if videos[i].shape[0]>max_length:
68 | # max_length = videos[i].shape[0]
69 |
70 | # # pad rollouts to all be same length
71 | # for i in range(max_videos_to_save):
72 | # if videos[i].shape[0] 0, "Figure logging requires input shape [batch x figures]!"
83 | # self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step)
84 |
85 | #def log_figure(self, figure, name, step, phase):
86 | # """figure: matplotlib.pyplot figure handle"""
87 | # self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step)
88 |
89 | #def log_graph(self, array, name, step, phase):
90 | # """figure: matplotlib.pyplot figure handle"""
91 | # im = plot_graph(array)
92 | # self._summ_writer.add_image('{}_{}'.format(name, phase), im, step)
93 |
94 | #def dump_scalars(self, log_path=None):
95 | # log_path = os.path.join(self._log_dir, "scalar_data.json") if log_path is None else log_path
96 | # self._summ_writer.export_scalars_to_json(log_path)
97 |
98 | def log_dict(self, logs, itr, verbose=True):
99 | if self._summ_writer:
100 | for key, value in logs.items():
101 | if verbose:
102 | print("{} : {}".format(key, value))
103 | self.log_scalar(value, key, itr)
104 |
--------------------------------------------------------------------------------
/utils/pytorch_utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | from copy import deepcopy
4 |
5 | device=None
6 |
7 |
8 | def from_numpy(*args, **kwargs):
9 | return torch.from_numpy(*args, **kwargs).float().to(device)
10 |
11 |
12 | def to_numpy(tensor):
13 | return tensor.to('cpu').detach().numpy()
14 |
15 |
16 | def to_torch(element,device):
17 |
18 | if isinstance(element,dict):
19 |
20 | new_element = deepcopy(element)
21 | for key in element:
22 | new_element[key] = to_torch(element[key],device)
23 | return new_element
24 |
25 | elif isinstance(element,list):
26 | try:
27 | return torch.tensor(element).float().to(device)
28 | except:
29 | return element
30 |
31 | elif isinstance(element,np.ndarray):
32 | return torch.from_numpy(element).float().to(device)
33 |
34 | else:
35 | return element
36 |
37 |
--------------------------------------------------------------------------------
/utils/randomization_utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from termcolor import cprint
3 |
4 |
5 | ##CAN ONLY BE DONE AT THE START OF SIMULATION##
6 | def randomize_table_z(gym,env_ptr,table_handle,table_rand_config):
7 | #does not work. need to change table position differently.
8 | fr_z = np.random.uniform(table_rand_config['lower'],table_rand_config['upper'])
9 | prop = gym.get_actor_rigid_body_properties(env_ptr, table_handle)
10 | assert len(prop) == 1
11 | print(fr_z)
12 | obj_com = prop[0].com.z*fr_z
13 | prop[0].com.z = obj_com
14 | gym.set_actor_rigid_body_properties(env_ptr, table_handle, prop)
15 |
16 |
17 | ##CAN ONLY BE DONE AT THE START OF SIMULATION##
18 | def randomize_object_scale(gym,env_ptr,object_handle,object_rand_config):
19 |
20 | scale = np.random.uniform(object_rand_config['lower'], object_rand_config['upper'])
21 | gym.set_actor_scale(env_ptr, object_handle,scale)
22 | return scale
23 |
24 |
25 | ##CAN ONLY BE DONE AT THE START OF SIMULATION##
26 | def randomize_object_mass(gym,env_ptr,object_handle,objmass_rand_config):
27 |
28 | prop = gym.get_actor_rigid_body_properties(env_ptr, object_handle)
29 | ret = []
30 | for p in prop:
31 | fr = np.random.uniform(objmass_rand_config['lower'], objmass_rand_config['upper'])
32 | p.mass = p.mass*fr
33 | p.inertia.x = p.inertia.x*fr
34 | p.inertia.y = p.inertia.y*fr
35 | p.inertia.z = p.inertia.z*fr
36 | ret.append(p.mass)
37 |
38 | gym.set_actor_rigid_body_properties(env_ptr, object_handle, prop)
39 |
40 | return ret
41 |
42 |
43 | ##CAN ONLY BE DONE AT THE START OF SIMULATION##
44 | def randomize_friction(gym,env_ptr,handle,rand_friction_config):
45 |
46 | rand_friction = np.random.uniform(rand_friction_config['lower'], rand_friction_config['upper'])
47 | rest = np.random.uniform(rand_friction_config['lower'], rand_friction_config['upper'])
48 | props = gym.get_actor_rigid_shape_properties(env_ptr, handle)
49 | friction = []
50 | restitution = []
51 | for p in props:
52 | p.friction = rand_friction*p.friction
53 | p.restitution = rest*p.restitution
54 | friction.append(p.friction)
55 | restitution.append(p.restitution)
56 |
57 | gym.set_actor_rigid_shape_properties(env_ptr, handle, props)
58 |
59 | return friction,restitution
60 |
61 | # def randomize_friction(gym,env_ptr,hand_handle,object_handle,rand_friction_config):
62 |
63 | # rand_friction = np.random.uniform(rand_friction_config['lower'], rand_friction_config['upper'])
64 | # obj_restitution = np.random.uniform(rand_friction_config['lower'], rand_friction_config['upper'])
65 | # hand_props = gym.get_actor_rigid_shape_properties(env_ptr, hand_handle)
66 | # hand_friction = []
67 | # hand_restitution = []
68 | # for p in hand_props:
69 | # p.friction = rand_friction
70 | # p.restitution = obj_restitution
71 | # hand_friction.append(p.friction)
72 | # hand_restitution.append(p.restitution)
73 |
74 | # gym.set_actor_rigid_shape_properties(env_ptr, hand_handle, hand_props)
75 |
76 |
77 | # rand_friction = np.random.uniform(rand_friction_config['lower'], rand_friction_config['upper'])
78 | # obj_rest = np.random.uniform(rand_friction_config['lower'], rand_friction_config['upper'])
79 | # obj_friction = []
80 | # obj_restitution = []
81 | # obj_props = gym.get_actor_rigid_shape_properties(env_ptr, object_handle)
82 | # for p in obj_props:
83 | # p.friction = rand_friction*p.friction
84 | # p.restitution = obj_rest*p.restitution
85 | # obj_friction.append(p.friction)
86 | # obj_restitution.append(p.restitution)
87 |
88 | # gym.set_actor_rigid_shape_properties(env_ptr, object_handle, obj_props)
89 |
90 | # return hand_friction, hand_restitution, obj_friction, obj_restitution #not sure if just one value can influence the full policy but okay for now.
91 |
92 |
93 | # def randomize_object_position(env):
94 | # "already randomized in code"
95 | # pass
96 |
97 | # def randomize_robot_damping(env):
98 | # pass
99 |
100 | # def randomize_robot_stiffness(env):
101 | # pass
102 |
103 |
--------------------------------------------------------------------------------
/utils/reformat.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2018-2023, NVIDIA Corporation
2 | # All rights reserved.
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 |
29 | from omegaconf import DictConfig, OmegaConf
30 | from typing import Dict
31 |
32 | def omegaconf_to_dict(d: DictConfig)->Dict:
33 | """Converts an omegaconf DictConfig to a python Dict, respecting variable interpolation."""
34 | ret = {}
35 | for k, v in d.items():
36 | if isinstance(v, DictConfig):
37 | ret[k] = omegaconf_to_dict(v)
38 | else:
39 | ret[k] = v
40 | return ret
41 |
42 | def print_dict(val, nesting: int = -4, start: bool = True):
43 | """Outputs a nested dictionory."""
44 | if type(val) == dict:
45 | if not start:
46 | print('')
47 | nesting += 4
48 | for k in val:
49 | print(nesting * ' ', end='')
50 | print(k, end=': ')
51 | print_dict(val[k], nesting, start=False)
52 | else:
53 | print(val)
54 |
55 | # EOF
56 |
--------------------------------------------------------------------------------
/utils/rna_util.py:
--------------------------------------------------------------------------------
1 |
2 | # Copyright (c) 2018-2023, NVIDIA Corporation
3 | # All rights reserved.
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |
30 | from __future__ import print_function
31 |
32 | import torch
33 | import torch.nn as nn
34 | import torch.nn.functional as F
35 |
36 |
37 | class RandomNetworkAdversary(nn.Module):
38 |
39 | def __init__(self, num_envs, in_dims, out_dims, softmax_bins, device):
40 | super(RandomNetworkAdversary, self).__init__()
41 |
42 | """
43 | Class to add random action to the action generated by the policy.
44 | The output is binned to 32 bins per channel and we do softmax over
45 | these bins to figure out the most likely joint angle.
46 |
47 | Note: OpenAI et al. 2019 found out that if they used a continuous space
48 | and a tanh non-linearity, actions would always be close to 0.
49 | Section B.3 https://arxiv.org/abs/1910.07113
50 |
51 | Q: Why do we need dropouts here?
52 |
53 | A: If we were using a CPU-based simulator as in OpenAI et al. 2019, we
54 | will use a different RNA network for different CPU. However,
55 | this is not feasible for a GPU-based simulator as that would mean
56 | creating N_envs RNA networks which will overwhelm the GPU-memory.
57 | Therefore, dropout is a nice approximation of this by re-sampling
58 | weights of the same neural network for each different env on the GPU.
59 | """
60 |
61 | self.in_dims = in_dims
62 | self.out_dims = out_dims
63 | self.softmax_bins = softmax_bins
64 | self.num_envs = num_envs
65 |
66 | self.device = device
67 |
68 | self.num_feats1 = 512
69 | self.num_feats2 = 1024
70 |
71 | # Sampling random probablities for dropout masks
72 | dropout_probs = torch.rand((2, ))
73 |
74 | # Setting up the RNA neural network here
75 |
76 | # First layer
77 |
78 | self.fc1 = nn.Linear(in_dims, self.num_feats1).to(self.device)
79 |
80 | self.dropout_masks1 = torch.bernoulli(torch.ones((self.num_envs, \
81 | self.num_feats1)), p=dropout_probs[0]).to(self.device)
82 |
83 | self.fc1_1 = nn.Linear(self.num_feats1, self.num_feats1).to(self.device)
84 |
85 | # Second layer
86 | self.fc2 = nn.Linear(self.num_feats1, self.num_feats2).to(self.device)
87 |
88 | self.dropout_masks2 = torch.bernoulli(torch.ones((self.num_envs, \
89 | self.num_feats2)), p=dropout_probs[1]).to(self.device)
90 |
91 | self.fc2_1 = nn.Linear(self.num_feats2, self.num_feats2).to(self.device)
92 |
93 | # Last layer
94 | self.fc3 = nn.Linear(self.num_feats2, out_dims*softmax_bins).to(self.device)
95 |
96 | # This is needed to reset weights and dropout masks
97 | self._refresh()
98 |
99 | def _refresh(self):
100 |
101 | self._init_weights()
102 | self.eval()
103 | self.refresh_dropout_masks()
104 |
105 | def _init_weights(self):
106 |
107 | print('initialising weights for random network')
108 |
109 | nn.init.kaiming_uniform_(self.fc1.weight)
110 | nn.init.kaiming_uniform_(self.fc1_1.weight)
111 | nn.init.kaiming_uniform_(self.fc2.weight)
112 | nn.init.kaiming_uniform_(self.fc2_1.weight)
113 | nn.init.kaiming_uniform_(self.fc3.weight)
114 |
115 | return
116 |
117 | def refresh_dropout_masks(self):
118 |
119 | dropout_probs = torch.rand((2, ))
120 |
121 | self.dropout_masks1 = torch.bernoulli(torch.ones((self.num_envs, self.num_feats1)), \
122 | p=dropout_probs[0]).to(self.dropout_masks1.device)
123 |
124 | self.dropout_masks2 = torch.bernoulli(torch.ones((self.num_envs, self.num_feats2)), \
125 | p=dropout_probs[1]).to(self.dropout_masks2.device)
126 |
127 | return
128 |
129 | def forward(self, x):
130 |
131 | x = self.fc1(x)
132 | x = F.relu(x)
133 | x = self.fc1_1(x)
134 | x = self.dropout_masks1 * x
135 |
136 | x = self.fc2(x)
137 | x = F.relu(x)
138 | x = self.fc2_1(x)
139 | x = self.dropout_masks2 * x
140 |
141 | x = self.fc3(x)
142 |
143 | x = x.view(-1, self.out_dims, self.softmax_bins)
144 | output = F.softmax(x, dim=-1)
145 |
146 | # We have discretised the joint angles into bins
147 | # Now we pick up the bin for each joint angle
148 | # corresponding to the highest softmax value / prob.
149 |
150 | return output
151 |
152 |
153 | if __name__ == "__main__":
154 |
155 | num_envs = 1024
156 | RNA = RandomNetworkAdversary(num_envs=num_envs, in_dims=16, out_dims=16, softmax_bins=32, device='cuda')
157 |
158 | x = torch.tensor(torch.randn(num_envs, 16).to(RNA.device))
159 | y = RNA(x)
160 | import ipdb; ipdb.set_trace()
161 |
162 |
163 |
164 |
--------------------------------------------------------------------------------
/utils/urdf_utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | def read_xml(filename):
3 | import xml.etree.ElementTree as Et
4 | root = Et.parse(filename).getroot()
5 | return root
6 |
7 |
8 | def get_link_meshes_from_urdf(urdf_file,link_names):
9 | root = read_xml(urdf_file)
10 | link_meshfiles =[]
11 | for link_name in link_names:
12 | for link in root.findall('link'):
13 | if link.attrib['name'] == link_name:
14 | for mesh in link.findall('visual/geometry/mesh'):
15 | link_meshfiles.append(mesh.attrib['filename'])
16 |
17 | assert len(link_meshfiles) == len(link_names)
18 | return link_meshfiles
19 |
20 |
21 | def load_asset_files_public(asset_root):
22 | import os
23 | folder_name = 'pybullet-URDF-models/urdf_models/models'
24 | asset_files = {}
25 |
26 | for root, dirs, files in os.walk(os.path.join(asset_root,folder_name)):
27 |
28 | for file in files:
29 | if file.endswith("model.urdf"):
30 | obj_name = root.split('/')[-1]
31 | dir = root[len(asset_root)+1:]
32 | asset_files[obj_name]=os.path.join(dir, file)
33 |
34 | return asset_files
35 |
36 |
37 |
38 |
39 | def load_asset_files_ycb(asset_root,folder_name='ycb_real_inertia'):
40 |
41 | import os
42 | asset_files = {}
43 |
44 | for root, dirs, files in os.walk(os.path.join(asset_root,folder_name)):
45 |
46 | for file in files:
47 | if file.endswith(".urdf"):
48 | obj_name = file.split('.')[0]
49 | dir = root[len(asset_root)+1:]
50 | asset_files[obj_name]={}
51 | asset_files[obj_name]['urdf']=os.path.join(dir, file)
52 | asset_files[obj_name]['mesh']=os.path.join(dir, file.split('.')[0]+'/google_16k/textured.obj')
53 | assert os.path.exists(os.path.join(asset_root,asset_files[obj_name]['mesh']))
54 | assert os.path.exists(os.path.join(asset_root,asset_files[obj_name]['urdf']))
55 |
56 | return asset_files
57 |
58 | def load_asset_files_ycb_lowmem(asset_root,folder_name='ycb_real_inertia'):
59 | import os
60 | asset_files = {}
61 |
62 | for root, dirs, files in os.walk(os.path.join(asset_root,folder_name)):
63 |
64 | for file in files:
65 | if file.endswith(".urdf"):
66 | obj_name = file.split('.')[0]
67 | number = obj_name.split('_')[0]
68 | print(obj_name,number)
69 | if number in ['070-a','070-b','072','036','032','029','048','027','019','032','026']:
70 | dir = root[len(asset_root)+1:]
71 | asset_files[obj_name]={}
72 | asset_files[obj_name]['urdf']=os.path.join(dir, file)
73 | asset_files[obj_name]['mesh']=os.path.join(dir, file.split('.')[0]+'/google_16k/textured.obj')
74 | assert os.path.exists(os.path.join(asset_root,asset_files[obj_name]['mesh']))
75 | assert os.path.exists(os.path.join(asset_root,asset_files[obj_name]['urdf']))
76 |
77 | return asset_files
78 |
79 |
80 | def fix_ycb_scale(asset_root):
81 | import os
82 | import shutil
83 | import xml.etree.ElementTree as Et
84 | folder_name = 'ycb'
85 | new_folder_name = 'ycb_scaled'
86 | if not os.path.exists(os.path.join(asset_root,new_folder_name)):
87 | shutil.copytree(os.path.join(asset_root,folder_name), os.path.join(asset_root,new_folder_name))
88 |
89 | for root, dirs, files in os.walk(os.path.join(asset_root,new_folder_name)):
90 | for file in files:
91 | if file.endswith(".urdf"):
92 | filepath = os.path.join(root, file)
93 | urdf = read_xml(filepath)
94 | for mesh in urdf.findall(f'.//collision/geometry/'):
95 | mesh.attrib['scale']='1 1 1'
96 | for mesh in urdf.findall(f'.//visual/geometry/'):
97 | mesh.attrib['scale']='1 1 1'
98 |
99 | new_xml = Et.ElementTree()
100 | new_xml._setroot(urdf)
101 | with open(filepath, "wb") as f:
102 | new_xml.write(f)
103 |
104 | return
105 |
106 |
107 |
108 |
109 |
110 | def get_vol_ratio(scale1,scale2):
111 | nums1 = [float(s) for s in scale1.split(' ')]
112 | nums2 = [float(s) for s in scale2.split(' ')]
113 | nums1 = np.array(nums1)
114 | nums2 = np.array(nums2)
115 | return np.prod(nums1)/np.prod(nums2)
116 |
--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2018-2023, NVIDIA Corporation
2 | # All rights reserved.
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 |
29 | # python
30 | #import pwd
31 | import getpass
32 | import tempfile
33 | import time
34 | from collections import OrderedDict
35 | from os.path import join
36 |
37 | import numpy as np
38 | import torch
39 | import random
40 | import os
41 | import subprocess
42 | import shlex
43 |
44 | def retry(times, exceptions):
45 | """
46 | Retry Decorator https://stackoverflow.com/a/64030200/1645784
47 | Retries the wrapped function/method `times` times if the exceptions listed
48 | in ``exceptions`` are thrown
49 | :param times: The number of times to repeat the wrapped function/method
50 | :type times: Int
51 | :param exceptions: Lists of exceptions that trigger a retry attempt
52 | :type exceptions: Tuple of Exceptions
53 | """
54 | def decorator(func):
55 | def newfn(*args, **kwargs):
56 | attempt = 0
57 | while attempt < times:
58 | try:
59 | return func(*args, **kwargs)
60 | except exceptions:
61 | print(f'Exception thrown when attempting to run {func}, attempt {attempt} out of {times}')
62 | time.sleep(min(2 ** attempt, 30))
63 | attempt += 1
64 |
65 | return func(*args, **kwargs)
66 | return newfn
67 | return decorator
68 |
69 |
70 | def flatten_dict(d, prefix='', separator='.'):
71 | res = dict()
72 | for key, value in d.items():
73 | if isinstance(value, (dict, OrderedDict)):
74 | res.update(flatten_dict(value, prefix + key + separator, separator))
75 | else:
76 | res[prefix + key] = value
77 |
78 | return res
79 |
80 |
81 | def set_np_formatting():
82 | """ formats numpy print """
83 | np.set_printoptions(edgeitems=30, infstr='inf',
84 | linewidth=4000, nanstr='nan', precision=2,
85 | suppress=False, threshold=10000, formatter=None)
86 |
87 |
88 | def set_seed(seed, torch_deterministic=False, rank=0):
89 | """ set seed across modules """
90 | if seed == -1 and torch_deterministic:
91 | seed = 42 + rank
92 | elif seed == -1:
93 | seed = np.random.randint(0, 10000)
94 | else:
95 | seed = seed + rank
96 |
97 | print("Setting seed: {}".format(seed))
98 |
99 | random.seed(seed)
100 | np.random.seed(seed)
101 | torch.manual_seed(seed)
102 | os.environ['PYTHONHASHSEED'] = str(seed)
103 | torch.cuda.manual_seed(seed)
104 | torch.cuda.manual_seed_all(seed)
105 |
106 | if torch_deterministic:
107 | # refer to https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility
108 | os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
109 | torch.backends.cudnn.benchmark = False
110 | torch.backends.cudnn.deterministic = True
111 | torch.use_deterministic_algorithms(True)
112 | else:
113 | torch.backends.cudnn.benchmark = True
114 | torch.backends.cudnn.deterministic = False
115 |
116 | return seed
117 |
118 | def nested_dict_set_attr(d, key, val):
119 | pre, _, post = key.partition('.')
120 | if post:
121 | nested_dict_set_attr(d[pre], post, val)
122 | else:
123 | d[key] = val
124 |
125 | def nested_dict_get_attr(d, key):
126 | pre, _, post = key.partition('.')
127 | if post:
128 | return nested_dict_get_attr(d[pre], post)
129 | else:
130 | return d[key]
131 |
132 | def ensure_dir_exists(path):
133 | if not os.path.exists(path):
134 | os.makedirs(path)
135 | return path
136 |
137 |
138 | def safe_ensure_dir_exists(path):
139 | """Should be safer in multi-treaded environment."""
140 | try:
141 | return ensure_dir_exists(path)
142 | except FileExistsError:
143 | return path
144 |
145 |
146 | def get_username():
147 | uid = os.getuid()
148 | try:
149 | return getpass.getuser()
150 | except KeyError:
151 | # worst case scenario - let's just use uid
152 | return str(uid)
153 |
154 |
155 | def project_tmp_dir():
156 | tmp_dir_name = f'ige_{get_username()}'
157 | return safe_ensure_dir_exists(join(tempfile.gettempdir(), tmp_dir_name))
158 |
159 | # EOF
160 |
161 |
162 | def git_hash():
163 | cmd = 'git log -n 1 --pretty="%h"'
164 | ret = subprocess.check_output(shlex.split(cmd)).strip()
165 | if isinstance(ret, bytes):
166 | ret = ret.decode()
167 | return ret
168 |
169 |
170 | def git_diff_config(name):
171 | cmd = f'git diff --unified=0 {name}'
172 | ret = subprocess.check_output(shlex.split(cmd)).strip()
173 | if isinstance(ret, bytes):
174 | ret = ret.decode()
175 | return ret
176 |
177 |
178 |
--------------------------------------------------------------------------------
/utils/wandb_utils.py:
--------------------------------------------------------------------------------
1 | from rl_games.common.algo_observer import AlgoObserver
2 |
3 | from utils.utils import retry
4 | from utils.reformat import omegaconf_to_dict
5 |
6 |
7 | class WandbAlgoObserver(AlgoObserver):
8 | """Need this to propagate the correct experiment name after initialization."""
9 |
10 | def __init__(self, cfg):
11 | super().__init__()
12 | self.cfg = cfg
13 |
14 | def before_init(self, base_name, config, experiment_name):
15 | """
16 | Must call initialization of Wandb before RL-games summary writer is initialized, otherwise
17 | sync_tensorboard does not work.
18 | """
19 |
20 | import wandb
21 |
22 | wandb_unique_id = f"uid_{experiment_name}"
23 | print(f"Wandb using unique id {wandb_unique_id}")
24 |
25 | cfg = self.cfg
26 |
27 | # this can fail occasionally, so we try a couple more times
28 | @retry(3, exceptions=(Exception,))
29 | def init_wandb():
30 | wandb.init(
31 | project=cfg.wandb_project,
32 | entity=cfg.wandb_entity,
33 | group=cfg.wandb_group,
34 | tags=cfg.wandb_tags,
35 | sync_tensorboard=True,
36 | id=wandb_unique_id,
37 | name=experiment_name,
38 | resume=True,
39 | settings=wandb.Settings(start_method='fork'),
40 | )
41 |
42 | if cfg.wandb_logcode_dir:
43 | wandb.run.log_code(root=cfg.wandb_logcode_dir)
44 | print('wandb running directory........', wandb.run.dir)
45 |
46 | print('Initializing WandB...')
47 | try:
48 | init_wandb()
49 | except Exception as exc:
50 | print(f'Could not initialize WandB! {exc}')
51 |
52 | if isinstance(self.cfg, dict):
53 | wandb.config.update(self.cfg, allow_val_change=True)
54 | else:
55 | wandb.config.update(omegaconf_to_dict(self.cfg), allow_val_change=True)
56 |
--------------------------------------------------------------------------------
/utils/warmup_scheduler.py:
--------------------------------------------------------------------------------
1 | class WarmupScheduler:
2 | def __init__(self, optimizer, target_lr,initial_lr=1e-7,warmup_steps=25):
3 | self.optimizer = optimizer
4 | self.warmup_steps = warmup_steps
5 | self.initial_lr = initial_lr
6 | self.target_lr = target_lr
7 | self.current_step = 0
8 |
9 | def step(self):
10 | if self.current_step < self.warmup_steps:
11 | # Linearly increase the learning rate
12 | lr = (self.target_lr - self.initial_lr) * (self.current_step / self.warmup_steps) + self.initial_lr
13 | # Apply the learning rate to the optimizer
14 | for param_group in self.optimizer.param_groups:
15 | param_group['lr'] = lr
16 | # Increment the step count
17 | self.current_step += 1
18 |
--------------------------------------------------------------------------------